agentv 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,11 +1,13 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runCli
4
- } from "./chunk-6SHT2QS6.js";
4
+ } from "./chunk-5AJ7DFUO.js";
5
5
  import "./chunk-UE4GLFVL.js";
6
6
 
7
7
  // src/cli.ts
8
- runCli().catch((error) => {
8
+ runCli().then(() => {
9
+ process.exit(0);
10
+ }).catch((error) => {
9
11
  console.error(error);
10
12
  process.exit(1);
11
13
  });
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli().catch((error) => {\n console.error(error);\n process.exit(1);\n});\n"],"mappings":";;;;;;;AAGA,OAAO,EAAE,MAAM,CAAC,UAAU;AACxB,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli()\n .then(() => {\n process.exit(0);\n })\n .catch((error) => {\n console.error(error);\n process.exit(1);\n });\n"],"mappings":";;;;;;;AAGA,OAAO,EACJ,KAAK,MAAM;AACV,UAAQ,KAAK,CAAC;AAChB,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  app,
3
3
  runCli
4
- } from "./chunk-6SHT2QS6.js";
4
+ } from "./chunk-5AJ7DFUO.js";
5
5
  import "./chunk-UE4GLFVL.js";
6
6
  export {
7
7
  app,
@@ -154,34 +154,27 @@ if __name__ == "__main__":
154
154
 
155
155
  ## TypeScript Code Evaluator Template (with SDK)
156
156
 
157
- The optional `@agentv/core` SDK provides type-safe payload parsing with camelCase properties (`candidateAnswer` vs `candidate_answer`).
157
+ The `@agentv/eval` SDK provides a declarative API for code evaluators with automatic stdin/stdout handling, validation, and error handling.
158
158
 
159
- **Execution:** Keep evaluators as `.ts` files and run via Node loaders like `npx --yes tsx ./evaluators/my-check.ts` so users don't need Bun after `npm install -g agentv`.
160
-
161
- **Without SDK:** Skip the import and parse JSON from stdin directly (similar to the Python template above).
159
+ **Execution:** Keep evaluators as `.ts` files and run via `bun run` or Node loaders like `npx --yes tsx ./evaluators/my-check.ts`.
162
160
 
163
161
  ```typescript
162
+ #!/usr/bin/env bun
164
163
  /**
165
- * Example TypeScript code evaluator using the AgentV SDK
164
+ * Example TypeScript code evaluator using defineCodeJudge
166
165
  *
167
- * Run with: npx --yes tsx ./evaluators/example-check.ts
166
+ * Run with: bun run ./evaluators/example-check.ts
167
+ * or: npx --yes tsx ./evaluators/example-check.ts
168
168
  *
169
- * The SDK provides:
170
- * - Type-safe CodeJudgePayload interface with all fields
171
- * - camelCase properties (candidateAnswer, expectedOutcome, etc.)
172
- * - Automatic conversion from snake_case wire format
169
+ * The SDK handles:
170
+ * - Reading JSON from stdin
171
+ * - Converting snake_case to camelCase
172
+ * - Validating input with Zod
173
+ * - Error handling and output formatting
173
174
  */
175
+ import { defineCodeJudge } from '@agentv/eval';
174
176
 
175
- import { readCodeJudgePayload } from '@agentv/core';
176
-
177
- try {
178
- // Read and parse stdin with automatic snake_case → camelCase conversion
179
- const payload = readCodeJudgePayload();
180
-
181
- // Type-safe camelCase access to all fields
182
- const { candidateAnswer, expectedOutcome, inputFiles, guidelineFiles } = payload;
183
-
184
- // Your validation logic here
177
+ export default defineCodeJudge(({ candidateAnswer, expectedOutcome, inputFiles, guidelineFiles }) => {
185
178
  const hits: string[] = [];
186
179
  const misses: string[] = [];
187
180
 
@@ -207,38 +200,47 @@ try {
207
200
  const totalChecks = hits.length + misses.length;
208
201
  const score = totalChecks === 0 ? 0 : hits.length / totalChecks;
209
202
 
210
- // Build result
211
- const result = {
203
+ return {
212
204
  score,
213
205
  hits,
214
206
  misses,
215
- reasoning: `Passed ${hits.length}/${totalChecks} checks`
207
+ reasoning: `Passed ${hits.length}/${totalChecks} checks`,
216
208
  };
217
-
218
- console.log(JSON.stringify(result, null, 2));
219
-
220
- } catch (error) {
221
- const message = error instanceof Error ? error.message : String(error);
222
- console.log(JSON.stringify({
223
- score: 0,
224
- hits: [],
225
- misses: [`Error: ${message}`],
226
- reasoning: 'Evaluator error'
227
- }, null, 2));
228
- process.exit(1);
229
- }
209
+ });
230
210
  ```
231
211
 
232
212
  **TypeScript SDK Benefits:**
233
- - **Type-safe**: `CodeJudgePayload` interface with all fields typed
213
+ - **Zero boilerplate**: No try/catch, stdin parsing, or JSON.stringify needed
214
+ - **Type-safe**: `CodeJudgeInput` interface with all fields typed
234
215
  - **camelCase**: Idiomatic TypeScript naming (`candidateAnswer` vs `candidate_answer`)
235
- - **Automatic conversion**: Handles snake_case wire format camelCase objects
236
- - **Compile-time safety**: Catch typos and missing fields before runtime
216
+ - **Validation**: Zod schemas validate input and output at runtime
217
+ - **Error handling**: Exceptions automatically produce valid failure results
218
+
219
+ **Available exports from `@agentv/eval`:**
220
+ - `defineCodeJudge(handler)`: Define a code judge evaluator (recommended)
221
+ - `CodeJudgeInput`: TypeScript type for input payload
222
+ - `CodeJudgeResult`: TypeScript type for result
223
+ - `TraceSummary`, `OutputMessage`: Types for trace data
224
+ - `z`: Re-exported Zod for custom config schemas
237
225
 
238
- **Available in SDK:**
239
- - `readCodeJudgePayload()`: Read stdin and convert to camelCase (recommended)
240
- - `parseCodeJudgePayload(jsonString)`: Parse JSON string and convert to camelCase
241
- - `CodeJudgePayload`: TypeScript interface for type safety
226
+ **Using execution metrics:**
227
+
228
+ ```typescript
229
+ import { defineCodeJudge } from '@agentv/eval';
230
+
231
+ export default defineCodeJudge(({ traceSummary }) => {
232
+ if (!traceSummary) {
233
+ return { score: 0.5, reasoning: 'No trace available' };
234
+ }
235
+
236
+ const efficient = traceSummary.eventCount <= 10;
237
+ return {
238
+ score: efficient ? 1.0 : 0.5,
239
+ hits: efficient ? ['Efficient execution'] : [],
240
+ misses: efficient ? [] : ['Too many tool calls'],
241
+ };
242
+ });
243
+ ```
242
244
 
243
245
  **See also:** `examples/features/code-judge-sdk/` for complete working examples
244
246
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentv",
3
- "version": "2.0.1",
3
+ "version": "2.0.2",
4
4
  "description": "CLI entry point for AgentV",
5
5
  "type": "module",
6
6
  "repository": {
@@ -31,7 +31,9 @@
31
31
  "test:watch": "bun test --watch"
32
32
  },
33
33
  "dependencies": {
34
- "@agentv/core": "1.5.0",
34
+ "@agentv/core": "2.0.1",
35
+ "@mariozechner/pi-agent": "^0.9.0",
36
+ "@mariozechner/pi-ai": "^0.37.2",
35
37
  "cmd-ts": "^0.14.3",
36
38
  "dotenv": "^16.4.5",
37
39
  "fast-glob": "^3.3.3",