npm - agentv - Versions diffs - 2.0.1 → 2.0.2 - Mend

agentv 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +11 -4
package/dist/{chunk-6SHT2QS6.js → chunk-5AJ7DFUO.js} +211 -7
package/dist/chunk-5AJ7DFUO.js.map +1 -0
package/dist/cli.js +4 -2
package/dist/cli.js.map +1 -1
package/dist/index.js +1 -1
package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +45 -43
package/package.json +4 -2
package/dist/chunk-6SHT2QS6.js.map +0 -1

package/dist/cli.js CHANGED Viewed

@@ -1,11 +1,13 @@
 #!/usr/bin/env node
 import {
   runCli
-} from "./chunk-6SHT2QS6.js";
+} from "./chunk-5AJ7DFUO.js";
 import "./chunk-UE4GLFVL.js";
 // src/cli.ts
-runCli().catch((error) => {
+runCli().then(() => {
+  process.exit(0);
+}).catch((error) => {
   console.error(error);
   process.exit(1);
 });

package/dist/cli.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli().catch((error) => {\n console.error(error);\n process.exit(1);\n});\n"],"mappings":";;;;;;;AAGA,OAAO,~~EAAE~~,MAAM,CAAC,UAAU;~~AACxB~~,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
1	+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli()\n .then(() => {\n process.exit(0);\n })\n .catch((error) => {\n console.error(error);\n process.exit(1);\n });\n"],"mappings":";;;;;;;AAGA,OAAO,EACJ,KAAK,MAAM;AACV,UAAQ,KAAK,CAAC;AAChB,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}

package/dist/index.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import {
   app,
   runCli
-} from "./chunk-6SHT2QS6.js";
+} from "./chunk-5AJ7DFUO.js";
 import "./chunk-UE4GLFVL.js";
 export {
   app,

package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md CHANGED Viewed

@@ -154,34 +154,27 @@ if __name__ == "__main__":
 ## TypeScript Code Evaluator Template (with SDK)
-The optional `@agentv/core` SDK provides type-safe payload parsing with camelCase properties (`candidateAnswer` vs `candidate_answer`).
+The `@agentv/eval` SDK provides a declarative API for code evaluators with automatic stdin/stdout handling, validation, and error handling.
-**Execution:** Keep evaluators as `.ts` files and run via Node loaders like `npx --yes tsx ./evaluators/my-check.ts` so users don't need Bun after `npm install -g agentv`.
-**Without SDK:** Skip the import and parse JSON from stdin directly (similar to the Python template above).
+**Execution:** Keep evaluators as `.ts` files and run via `bun run` or Node loaders like `npx --yes tsx ./evaluators/my-check.ts`.
 ```typescript
+#!/usr/bin/env bun
 /**
- * Example TypeScript code evaluator using the AgentV SDK
+ * Example TypeScript code evaluator using defineCodeJudge
  *
- * Run with: npx --yes tsx ./evaluators/example-check.ts
+ * Run with: bun run ./evaluators/example-check.ts
+ *        or: npx --yes tsx ./evaluators/example-check.ts
  *
- * The SDK provides:
- * - Type-safe CodeJudgePayload interface with all fields
- * - camelCase properties (candidateAnswer, expectedOutcome, etc.)
- * - Automatic conversion from snake_case wire format
+ * The SDK handles:
+ * - Reading JSON from stdin
+ * - Converting snake_case to camelCase
+ * - Validating input with Zod
+ * - Error handling and output formatting
  */
+import { defineCodeJudge } from '@agentv/eval';
-import { readCodeJudgePayload } from '@agentv/core';
-try {
-  // Read and parse stdin with automatic snake_case → camelCase conversion
-  const payload = readCodeJudgePayload();
-  // Type-safe camelCase access to all fields
-  const { candidateAnswer, expectedOutcome, inputFiles, guidelineFiles } = payload;
-  // Your validation logic here
+export default defineCodeJudge(({ candidateAnswer, expectedOutcome, inputFiles, guidelineFiles }) => {
   const hits: string[] = [];
   const misses: string[] = [];
@@ -207,38 +200,47 @@ try {
   const totalChecks = hits.length + misses.length;
   const score = totalChecks === 0 ? 0 : hits.length / totalChecks;
-  // Build result
-  const result = {
+  return {
     score,
     hits,
     misses,
-    reasoning: `Passed ${hits.length}/${totalChecks} checks`
+    reasoning: `Passed ${hits.length}/${totalChecks} checks`,
   };
-  console.log(JSON.stringify(result, null, 2));
-} catch (error) {
-  const message = error instanceof Error ? error.message : String(error);
-  console.log(JSON.stringify({
-    score: 0,
-    hits: [],
-    misses: [`Error: ${message}`],
-    reasoning: 'Evaluator error'
-  }, null, 2));
-  process.exit(1);
-}
+});
 ```
 **TypeScript SDK Benefits:**
-- **Type-safe**: `CodeJudgePayload` interface with all fields typed
+- **Zero boilerplate**: No try/catch, stdin parsing, or JSON.stringify needed
+- **Type-safe**: `CodeJudgeInput` interface with all fields typed
 - **camelCase**: Idiomatic TypeScript naming (`candidateAnswer` vs `candidate_answer`)
-- **Automatic conversion**: Handles snake_case wire format → camelCase objects
-- **Compile-time safety**: Catch typos and missing fields before runtime
+- **Validation**: Zod schemas validate input and output at runtime
+- **Error handling**: Exceptions automatically produce valid failure results
+**Available exports from `@agentv/eval`:**
+- `defineCodeJudge(handler)`: Define a code judge evaluator (recommended)
+- `CodeJudgeInput`: TypeScript type for input payload
+- `CodeJudgeResult`: TypeScript type for result
+- `TraceSummary`, `OutputMessage`: Types for trace data
+- `z`: Re-exported Zod for custom config schemas
-**Available in SDK:**
-- `readCodeJudgePayload()`: Read stdin and convert to camelCase (recommended)
-- `parseCodeJudgePayload(jsonString)`: Parse JSON string and convert to camelCase
-- `CodeJudgePayload`: TypeScript interface for type safety
+**Using execution metrics:**
+```typescript
+import { defineCodeJudge } from '@agentv/eval';
+export default defineCodeJudge(({ traceSummary }) => {
+  if (!traceSummary) {
+    return { score: 0.5, reasoning: 'No trace available' };
+  }
+  const efficient = traceSummary.eventCount <= 10;
+  return {
+    score: efficient ? 1.0 : 0.5,
+    hits: efficient ? ['Efficient execution'] : [],
+    misses: efficient ? [] : ['Too many tool calls'],
+  };
+});
+```
 **See also:** `examples/features/code-judge-sdk/` for complete working examples

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentv",
-  "version": "2.0.1",
+  "version": "2.0.2",
   "description": "CLI entry point for AgentV",
   "type": "module",
   "repository": {
@@ -31,7 +31,9 @@
     "test:watch": "bun test --watch"
   },
   "dependencies": {
-    "@agentv/core": "1.5.0",
+    "@agentv/core": "2.0.1",
+    "@mariozechner/pi-agent": "^0.9.0",
+    "@mariozechner/pi-ai": "^0.37.2",
     "cmd-ts": "^0.14.3",
     "dotenv": "^16.4.5",
     "fast-glob": "^3.3.3",