agentv 2.19.0 → 3.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +62 -36
  2. package/dist/agentv-provider-5CJVBBGG-2XVZBW7L.js +9 -0
  3. package/dist/{chunk-GC6T3RD4.js → chunk-5WIB7A27.js} +598 -403
  4. package/dist/chunk-5WIB7A27.js.map +1 -0
  5. package/dist/chunk-6GSYTMXD.js +31520 -0
  6. package/dist/chunk-6GSYTMXD.js.map +1 -0
  7. package/dist/{chunk-4MSAOMCC.js → chunk-DY4ZDTTO.js} +1018 -140
  8. package/dist/chunk-DY4ZDTTO.js.map +1 -0
  9. package/dist/chunk-HF4X7ALN.js +24299 -0
  10. package/dist/chunk-HF4X7ALN.js.map +1 -0
  11. package/dist/{chunk-FV32QHPB.js → chunk-XOSNETAV.js} +1 -1
  12. package/dist/cli.js +5 -4
  13. package/dist/cli.js.map +1 -1
  14. package/dist/{dist-MQBGD6LP.js → dist-WN2QIOQR.js} +27 -11
  15. package/dist/{esm-DX3WQKEN.js → esm-CZAWIY6F.js} +2 -2
  16. package/dist/esm-CZAWIY6F.js.map +1 -0
  17. package/dist/index.js +5 -4
  18. package/dist/{interactive-3TDBCSDW.js → interactive-B432TCRZ.js} +5 -4
  19. package/dist/{interactive-3TDBCSDW.js.map → interactive-B432TCRZ.js.map} +1 -1
  20. package/dist/{src-2N5EJ2N6.js → src-ML4D2MC2.js} +2 -2
  21. package/dist/templates/.agentv/targets.yaml +8 -11
  22. package/package.json +2 -2
  23. package/dist/chunk-4MSAOMCC.js.map +0 -1
  24. package/dist/chunk-GC6T3RD4.js.map +0 -1
  25. package/dist/chunk-XTYMR4I5.js +0 -49811
  26. package/dist/chunk-XTYMR4I5.js.map +0 -1
  27. /package/dist/{dist-MQBGD6LP.js.map → agentv-provider-5CJVBBGG-2XVZBW7L.js.map} +0 -0
  28. /package/dist/{chunk-FV32QHPB.js.map → chunk-XOSNETAV.js.map} +0 -0
  29. /package/dist/{esm-DX3WQKEN.js.map → dist-WN2QIOQR.js.map} +0 -0
  30. /package/dist/{src-2N5EJ2N6.js.map → src-ML4D2MC2.js.map} +0 -0
@@ -562,4 +562,4 @@ export {
562
562
  serviceInstanceIdDetector,
563
563
  init_esm4 as init_esm
564
564
  };
565
- //# sourceMappingURL=chunk-FV32QHPB.js.map
565
+ //# sourceMappingURL=chunk-XOSNETAV.js.map
package/dist/cli.js CHANGED
@@ -2,12 +2,13 @@
2
2
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
3
3
  import {
4
4
  runCli
5
- } from "./chunk-GC6T3RD4.js";
6
- import "./chunk-4MSAOMCC.js";
7
- import "./chunk-XTYMR4I5.js";
8
- import "./chunk-JK6V4KVD.js";
5
+ } from "./chunk-5WIB7A27.js";
6
+ import "./chunk-DY4ZDTTO.js";
7
+ import "./chunk-HF4X7ALN.js";
9
8
  import "./chunk-C5GOHBQM.js";
9
+ import "./chunk-JK6V4KVD.js";
10
10
  import "./chunk-3L2L5GIL.js";
11
+ import "./chunk-6GSYTMXD.js";
11
12
  import "./chunk-LRULMAAA.js";
12
13
  import "./chunk-5H446C7X.js";
13
14
 
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli()\n .then(() => {\n process.exit(0);\n })\n .catch((error) => {\n if (error instanceof Error) {\n console.error(`Error: ${error.message}`);\n if (process.env.DEBUG) {\n console.error(error.stack);\n }\n } else {\n console.error(error);\n }\n process.exit(1);\n });\n"],"mappings":";;;;;;;;;;;;;;AAGA,OAAO,EACJ,KAAK,MAAM;AACV,UAAQ,KAAK,CAAC;AAChB,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,MAAI,iBAAiB,OAAO;AAC1B,YAAQ,MAAM,UAAU,MAAM,OAAO,EAAE;AACvC,QAAI,QAAQ,IAAI,OAAO;AACrB,cAAQ,MAAM,MAAM,KAAK;AAAA,IAC3B;AAAA,EACF,OAAO;AACL,YAAQ,MAAM,KAAK;AAAA,EACrB;AACA,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli()\n .then(() => {\n process.exit(0);\n })\n .catch((error) => {\n if (error instanceof Error) {\n console.error(`Error: ${error.message}`);\n if (process.env.DEBUG) {\n console.error(error.stack);\n }\n } else {\n console.error(error);\n }\n process.exit(1);\n });\n"],"mappings":";;;;;;;;;;;;;;;AAGA,OAAO,EACJ,KAAK,MAAM;AACV,UAAQ,KAAK,CAAC;AAChB,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,MAAI,iBAAiB,OAAO;AAC1B,YAAQ,MAAM,UAAU,MAAM,OAAO,EAAE;AACvC,QAAI,QAAQ,IAAI,OAAO;AACrB,cAAQ,MAAM,MAAM,KAAK;AAAA,IAC3B;AAAA,EACF,OAAO;AACL,YAAQ,MAAM,KAAK;AAAA,EACrB;AACA,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
@@ -1,6 +1,5 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
- AgentJudgeEvaluator,
4
3
  CodeEvaluator,
5
4
  CompositeEvaluator,
6
5
  CostEvaluator,
@@ -12,13 +11,14 @@ import {
12
11
  ExecutionMetricsEvaluator,
13
12
  FieldAccuracyEvaluator,
14
13
  LatencyEvaluator,
15
- LlmJudgeEvaluator,
14
+ LlmGraderEvaluator,
16
15
  OTEL_BACKEND_PRESETS,
17
16
  OtelStreamingObserver,
18
17
  OtelTraceExporter,
19
18
  ProviderRegistry,
20
19
  RepoManager,
21
20
  ResponseCache,
21
+ SkillTriggerEvaluator,
22
22
  TEST_MESSAGE_ROLES,
23
23
  TemplateNotDirectoryError,
24
24
  TemplateNotFoundError,
@@ -26,7 +26,7 @@ import {
26
26
  ToolTrajectoryEvaluator,
27
27
  WorkspaceCreationError,
28
28
  WorkspacePoolManager,
29
- assembleLlmJudgePrompt,
29
+ assembleLlmGraderPrompt,
30
30
  avgToolDurationMs,
31
31
  buildDirectoryChain,
32
32
  buildOutputSchema,
@@ -55,6 +55,7 @@ import {
55
55
  defineConfig,
56
56
  detectFormat,
57
57
  discoverAssertions,
58
+ discoverGraders,
58
59
  discoverProviders,
59
60
  ensureVSCodeSubagents,
60
61
  evaluate,
@@ -74,12 +75,14 @@ import {
74
75
  generateRubrics,
75
76
  getAgentvHome,
76
77
  getHitCount,
78
+ getOutputFilenames,
77
79
  getSubagentsRoot,
78
80
  getTraceStateRoot,
79
81
  getWorkspacePath,
80
82
  getWorkspacePoolRoot,
81
83
  getWorkspacesRoot,
82
84
  initializeBaseline,
85
+ isAgentSkillsFormat,
83
86
  isEvaluatorKind,
84
87
  isGuidelineFile,
85
88
  isJsonObject,
@@ -99,6 +102,7 @@ import {
99
102
  mergeExecutionMetrics,
100
103
  negateScore,
101
104
  normalizeLineEndings,
105
+ parseAgentSkillsEvals,
102
106
  parseJsonFromText,
103
107
  parseJsonSafe,
104
108
  readJsonFile,
@@ -135,19 +139,21 @@ import {
135
139
  toCamelCaseDeep,
136
140
  toSnakeCaseDeep,
137
141
  tokensPerTool,
142
+ transpileEvalYaml,
143
+ transpileEvalYamlFile,
138
144
  trimBaselineResult
139
- } from "./chunk-XTYMR4I5.js";
140
- import {
141
- SimpleTraceFileExporter
142
- } from "./chunk-JK6V4KVD.js";
145
+ } from "./chunk-HF4X7ALN.js";
143
146
  import {
144
147
  OtlpJsonFileExporter
145
148
  } from "./chunk-C5GOHBQM.js";
149
+ import {
150
+ SimpleTraceFileExporter
151
+ } from "./chunk-JK6V4KVD.js";
146
152
  import "./chunk-3L2L5GIL.js";
153
+ import "./chunk-6GSYTMXD.js";
147
154
  import "./chunk-LRULMAAA.js";
148
155
  import "./chunk-5H446C7X.js";
149
156
  export {
150
- AgentJudgeEvaluator,
151
157
  CodeEvaluator,
152
158
  CompositeEvaluator,
153
159
  CostEvaluator,
@@ -159,7 +165,8 @@ export {
159
165
  ExecutionMetricsEvaluator,
160
166
  FieldAccuracyEvaluator,
161
167
  LatencyEvaluator,
162
- LlmJudgeEvaluator,
168
+ LlmGraderEvaluator,
169
+ LlmGraderEvaluator as LlmJudgeEvaluator,
163
170
  OTEL_BACKEND_PRESETS,
164
171
  OtelStreamingObserver,
165
172
  OtelTraceExporter,
@@ -168,6 +175,7 @@ export {
168
175
  RepoManager,
169
176
  ResponseCache,
170
177
  SimpleTraceFileExporter,
178
+ SkillTriggerEvaluator,
171
179
  TEST_MESSAGE_ROLES,
172
180
  TemplateNotDirectoryError,
173
181
  TemplateNotFoundError,
@@ -175,7 +183,8 @@ export {
175
183
  ToolTrajectoryEvaluator,
176
184
  WorkspaceCreationError,
177
185
  WorkspacePoolManager,
178
- assembleLlmJudgePrompt,
186
+ assembleLlmGraderPrompt,
187
+ assembleLlmGraderPrompt as assembleLlmJudgePrompt,
179
188
  avgToolDurationMs,
180
189
  buildDirectoryChain,
181
190
  buildOutputSchema,
@@ -204,6 +213,8 @@ export {
204
213
  defineConfig,
205
214
  detectFormat,
206
215
  discoverAssertions,
216
+ discoverGraders,
217
+ discoverGraders as discoverJudges,
207
218
  discoverProviders,
208
219
  ensureVSCodeSubagents,
209
220
  evaluate,
@@ -223,12 +234,14 @@ export {
223
234
  generateRubrics,
224
235
  getAgentvHome,
225
236
  getHitCount,
237
+ getOutputFilenames,
226
238
  getSubagentsRoot,
227
239
  getTraceStateRoot,
228
240
  getWorkspacePath,
229
241
  getWorkspacePoolRoot,
230
242
  getWorkspacesRoot,
231
243
  initializeBaseline,
244
+ isAgentSkillsFormat,
232
245
  isEvaluatorKind,
233
246
  isGuidelineFile,
234
247
  isJsonObject,
@@ -248,6 +261,7 @@ export {
248
261
  mergeExecutionMetrics,
249
262
  negateScore,
250
263
  normalizeLineEndings,
264
+ parseAgentSkillsEvals,
251
265
  parseJsonFromText,
252
266
  parseJsonSafe,
253
267
  readJsonFile,
@@ -284,6 +298,8 @@ export {
284
298
  toCamelCaseDeep,
285
299
  toSnakeCaseDeep,
286
300
  tokensPerTool,
301
+ transpileEvalYaml,
302
+ transpileEvalYamlFile,
287
303
  trimBaselineResult
288
304
  };
289
- //# sourceMappingURL=dist-MQBGD6LP.js.map
305
+ //# sourceMappingURL=dist-WN2QIOQR.js.map
@@ -11,7 +11,7 @@ import {
11
11
  processDetector,
12
12
  resourceFromAttributes,
13
13
  serviceInstanceIdDetector
14
- } from "./chunk-FV32QHPB.js";
14
+ } from "./chunk-XOSNETAV.js";
15
15
  import "./chunk-SR4I5KET.js";
16
16
  import "./chunk-VQ2ZO7XJ.js";
17
17
  import "./chunk-LRULMAAA.js";
@@ -29,4 +29,4 @@ export {
29
29
  resourceFromAttributes,
30
30
  serviceInstanceIdDetector
31
31
  };
32
- //# sourceMappingURL=esm-DX3WQKEN.js.map
32
+ //# sourceMappingURL=esm-CZAWIY6F.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
package/dist/index.js CHANGED
@@ -3,12 +3,13 @@ import {
3
3
  app,
4
4
  preprocessArgv,
5
5
  runCli
6
- } from "./chunk-GC6T3RD4.js";
7
- import "./chunk-4MSAOMCC.js";
8
- import "./chunk-XTYMR4I5.js";
9
- import "./chunk-JK6V4KVD.js";
6
+ } from "./chunk-5WIB7A27.js";
7
+ import "./chunk-DY4ZDTTO.js";
8
+ import "./chunk-HF4X7ALN.js";
10
9
  import "./chunk-C5GOHBQM.js";
10
+ import "./chunk-JK6V4KVD.js";
11
11
  import "./chunk-3L2L5GIL.js";
12
+ import "./chunk-6GSYTMXD.js";
12
13
  import "./chunk-LRULMAAA.js";
13
14
  import "./chunk-5H446C7X.js";
14
15
  export {
@@ -4,17 +4,18 @@ import {
4
4
  fileExists,
5
5
  findRepoRoot,
6
6
  runEvalCommand
7
- } from "./chunk-4MSAOMCC.js";
7
+ } from "./chunk-DY4ZDTTO.js";
8
8
  import {
9
9
  DEFAULT_EVAL_PATTERNS,
10
10
  getAgentvHome,
11
11
  listTargetNames,
12
12
  loadConfig,
13
13
  readTargetDefinitions
14
- } from "./chunk-XTYMR4I5.js";
15
- import "./chunk-JK6V4KVD.js";
14
+ } from "./chunk-HF4X7ALN.js";
16
15
  import "./chunk-C5GOHBQM.js";
16
+ import "./chunk-JK6V4KVD.js";
17
17
  import "./chunk-3L2L5GIL.js";
18
+ import "./chunk-6GSYTMXD.js";
18
19
  import "./chunk-LRULMAAA.js";
19
20
  import "./chunk-5H446C7X.js";
20
21
 
@@ -330,4 +331,4 @@ async function executeConfig(config) {
330
331
  export {
331
332
  launchInteractiveWizard
332
333
  };
333
- //# sourceMappingURL=interactive-3TDBCSDW.js.map
334
+ //# sourceMappingURL=interactive-B432TCRZ.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/commands/eval/interactive.ts","../src/commands/eval/discover.ts","../src/commands/eval/last-config.ts"],"sourcesContent":["import path from 'node:path';\nimport { listTargetNames, readTargetDefinitions } from '@agentv/core';\nimport { checkbox, confirm, number, search, select } from '@inquirer/prompts';\n\nimport { TARGET_FILE_CANDIDATES, fileExists } from '../../utils/targets.js';\nimport {\n type DiscoveredEvalFile,\n discoverEvalFiles,\n filterByCategory,\n getCategories,\n} from './discover.js';\nimport { type LastConfig, loadLastConfig, saveLastConfig } from './last-config.js';\nimport { runEvalCommand } from './run-eval.js';\nimport { findRepoRoot } from './shared.js';\n\nconst ANSI_BOLD = '\\x1b[1m';\nconst ANSI_DIM = '\\x1b[2m';\nconst ANSI_CYAN = '\\x1b[36m';\nconst ANSI_GREEN = '\\x1b[32m';\nconst ANSI_RESET = '\\x1b[0m';\n\nexport interface InteractiveConfig {\n readonly evalPaths: readonly string[];\n readonly target: string;\n readonly workers: number;\n readonly dryRun: boolean;\n readonly cache: boolean;\n}\n\n/**\n * Launch the interactive wizard when `agentv eval` is called with no arguments.\n */\nexport async function launchInteractiveWizard(): Promise<void> {\n const cwd = process.cwd();\n\n console.log(`\\n${ANSI_BOLD}${ANSI_CYAN}AgentV Interactive Mode${ANSI_RESET}\\n`);\n\n const lastConfig = await loadLastConfig();\n const action = await promptMainMenu(lastConfig);\n\n if (action === 'exit') {\n return;\n }\n\n if (action === 'rerun' && lastConfig) {\n console.log(`\\n${ANSI_DIM}Rerunning last configuration...${ANSI_RESET}\\n`);\n await executeConfig({\n evalPaths: lastConfig.evalPaths,\n target: lastConfig.target,\n workers: lastConfig.workers,\n dryRun: lastConfig.dryRun,\n cache: lastConfig.cache,\n });\n return;\n }\n\n // Run new evaluation flow\n const config = await promptNewEvaluation(cwd);\n if (!config) {\n return;\n }\n\n // Review & confirm\n const confirmed = await promptReviewAndConfirm(config, cwd);\n if (!confirmed) {\n return;\n }\n\n // Save last config\n await saveLastConfig({\n timestamp: new Date().toISOString(),\n cwd,\n evalPaths: config.evalPaths,\n target: config.target,\n workers: config.workers,\n dryRun: config.dryRun,\n cache: config.cache,\n });\n\n await executeConfig(config);\n}\n\nasync function promptMainMenu(\n lastConfig: LastConfig | undefined,\n): Promise<'new' | 'rerun' | 'exit'> {\n type MenuChoice = 'new' | 'rerun' | 'exit';\n const choices: Array<{ name: string; value: MenuChoice; description?: string }> = [];\n\n if (lastConfig) {\n const evalCount = lastConfig.evalPaths.length;\n choices.push({\n name: '🔄 Rerun last config',\n value: 'rerun',\n description: `${evalCount} eval file(s), target: ${lastConfig.target}`,\n });\n }\n\n choices.push({ name: '🚀 Run new evaluation', value: 'new' }, { name: '✕ Exit', value: 'exit' });\n\n return select<MenuChoice>({\n message: 'What would you like to do?',\n choices,\n });\n}\n\nasync function promptNewEvaluation(cwd: string): Promise<InteractiveConfig | undefined> {\n // Step 1: Discover eval files\n console.log(`\\n${ANSI_DIM}Scanning for eval files...${ANSI_RESET}`);\n const allFiles = await discoverEvalFiles(cwd);\n\n if (allFiles.length === 0) {\n console.log(\n '\\n⚠ No eval files found in the current directory.\\n' +\n ' Place .yaml or .jsonl eval files in your project, or use:\\n' +\n ' agentv eval <path-to-eval.yaml>\\n',\n );\n return undefined;\n }\n\n console.log(`${ANSI_DIM}Found ${allFiles.length} eval file(s)${ANSI_RESET}\\n`);\n\n // Step 2: Select eval files (optionally filter by category first)\n const selectedFiles = await promptEvalSelection(allFiles);\n if (selectedFiles.length === 0) {\n console.log('\\nNo eval files selected.');\n return undefined;\n }\n\n // Step 3: Select target\n const target = await promptTargetSelection(cwd, selectedFiles[0].path);\n\n // Step 4: Advanced options\n const advanced = await promptAdvancedOptions();\n\n return {\n evalPaths: selectedFiles.map((f) => f.path),\n target,\n ...advanced,\n };\n}\n\nasync function promptEvalSelection(\n allFiles: readonly DiscoveredEvalFile[],\n): Promise<DiscoveredEvalFile[]> {\n const categories = getCategories(allFiles);\n\n // If only one category or few files, skip category selection\n let filesToSelect: readonly DiscoveredEvalFile[];\n\n if (categories.length > 1) {\n const selectedCategory = await search<string>({\n message: 'Select a category (type to search)',\n source: async (term) => {\n const filtered = term\n ? categories.filter((c) => c.toLowerCase().includes(term.toLowerCase()))\n : categories;\n return [\n { name: '(all categories)', value: '__all__' },\n ...filtered.map((c) => {\n const count = filterByCategory(allFiles, c).length;\n return { name: `${c} (${count} file${count > 1 ? 's' : ''})`, value: c };\n }),\n ];\n },\n });\n\n filesToSelect =\n selectedCategory === '__all__' ? allFiles : filterByCategory(allFiles, selectedCategory);\n } else {\n filesToSelect = allFiles;\n }\n\n return checkbox<DiscoveredEvalFile>({\n message: 'Select eval files to run (space to toggle, enter to confirm)',\n choices: filesToSelect.map((f) => ({\n name: f.relativePath,\n value: f,\n checked: filesToSelect.length <= 5, // auto-select if few files\n })),\n required: true,\n });\n}\n\nasync function promptTargetSelection(cwd: string, firstEvalPath: string): Promise<string> {\n const repoRoot = await findRepoRoot(cwd);\n\n // Try to find targets.yaml — search near the eval file first, then cwd/repoRoot\n const targetsPath = await findTargetsFile(cwd, repoRoot, firstEvalPath);\n\n if (!targetsPath) {\n console.log(`${ANSI_DIM}No targets.yaml found. Using default target.${ANSI_RESET}`);\n return 'default';\n }\n\n const definitions = await readTargetDefinitions(targetsPath);\n const targetNames = listTargetNames(definitions);\n\n if (targetNames.length === 0) {\n return 'default';\n }\n\n if (targetNames.length === 1) {\n console.log(`${ANSI_DIM}Using target: ${targetNames[0]}${ANSI_RESET}`);\n return targetNames[0];\n }\n\n return search<string>({\n message: 'Select a target (type to search)',\n source: async (term) => {\n const filtered = term\n ? targetNames.filter((t) => t.toLowerCase().includes(term.toLowerCase()))\n : targetNames;\n return filtered.map((t) => {\n const def = definitions.find((d) => d.name === t);\n return {\n name: t,\n value: t,\n description: def ? `provider: ${def.provider}` : undefined,\n };\n });\n },\n });\n}\n\nasync function findTargetsFile(\n cwd: string,\n repoRoot: string,\n evalFilePath?: string,\n): Promise<string | undefined> {\n // Build directory chain: eval file dir → cwd → repoRoot (mirrors discoverTargetsFile)\n const dirsToSearch: string[] = [];\n\n if (evalFilePath) {\n const evalDir = path.dirname(evalFilePath);\n if (!dirsToSearch.includes(evalDir)) {\n dirsToSearch.push(evalDir);\n }\n }\n\n if (!dirsToSearch.includes(cwd)) {\n dirsToSearch.push(cwd);\n }\n\n if (repoRoot !== cwd && !dirsToSearch.includes(repoRoot)) {\n dirsToSearch.push(repoRoot);\n }\n\n for (const dir of dirsToSearch) {\n for (const candidate of TARGET_FILE_CANDIDATES) {\n const fullPath = `${dir}/${candidate}`;\n if (await fileExists(fullPath)) {\n return fullPath;\n }\n }\n }\n\n return undefined;\n}\n\nasync function promptAdvancedOptions(): Promise<{\n workers: number;\n dryRun: boolean;\n cache: boolean;\n}> {\n const customize = await confirm({\n message: 'Configure advanced options?',\n default: false,\n });\n\n if (!customize) {\n return { workers: 3, dryRun: false, cache: false };\n }\n\n const workers =\n (await number({\n message: 'Number of parallel workers (1-50)',\n default: 3,\n min: 1,\n max: 50,\n })) ?? 3;\n\n const dryRun = await confirm({\n message: 'Enable dry-run mode (mock responses)?',\n default: false,\n });\n\n const cache = await confirm({\n message: 'Enable response cache?',\n default: false,\n });\n\n return { workers, dryRun, cache };\n}\n\nasync function promptReviewAndConfirm(config: InteractiveConfig, cwd: string): Promise<boolean> {\n const evalDisplay = config.evalPaths\n .map((p) => {\n const rel = p.startsWith(cwd) ? p.slice(cwd.length + 1) : p;\n return ` ${rel}`;\n })\n .join('\\n');\n\n console.log(`\\n${ANSI_BOLD}Review Configuration${ANSI_RESET}`);\n console.log(`${ANSI_DIM}${'─'.repeat(40)}${ANSI_RESET}`);\n console.log(`${ANSI_GREEN}Eval files:${ANSI_RESET}\\n${evalDisplay}`);\n console.log(`${ANSI_GREEN}Target:${ANSI_RESET} ${config.target}`);\n console.log(`${ANSI_GREEN}Workers:${ANSI_RESET} ${config.workers}`);\n console.log(`${ANSI_GREEN}Dry run:${ANSI_RESET} ${config.dryRun ? 'yes' : 'no'}`);\n console.log(`${ANSI_GREEN}Cache:${ANSI_RESET} ${config.cache ? 'yes' : 'no'}`);\n console.log(`${ANSI_DIM}${'─'.repeat(40)}${ANSI_RESET}`);\n\n return confirm({\n message: 'Run evaluation with this configuration?',\n default: true,\n });\n}\n\nasync function executeConfig(config: InteractiveConfig): Promise<void> {\n const rawOptions: Record<string, unknown> = {\n target: config.target,\n workers: config.workers,\n dryRun: config.dryRun,\n cache: config.cache,\n outputFormat: 'jsonl',\n dryRunDelay: 0,\n dryRunDelayMin: 0,\n dryRunDelayMax: 0,\n agentTimeout: 120,\n maxRetries: 2,\n verbose: false,\n keepWorkspaces: false,\n cleanupWorkspaces: false,\n trace: false,\n };\n\n await runEvalCommand({\n testFiles: [...config.evalPaths],\n rawOptions,\n });\n}\n","import path from 'node:path';\nimport { DEFAULT_EVAL_PATTERNS, loadConfig } from '@agentv/core';\nimport fg from 'fast-glob';\n\nimport { findRepoRoot } from './shared.js';\n\nexport interface DiscoveredEvalFile {\n /** Absolute path to the eval file */\n readonly path: string;\n /** Relative path from cwd for display */\n readonly relativePath: string;\n /** Category derived from parent folder structure */\n readonly category: string;\n}\n\n/**\n * Discover eval files by glob pattern matching.\n *\n * Uses `eval_patterns` from `.agentv/config.yaml` if configured,\n * otherwise falls back to default patterns that match `dataset*.yaml`\n * and `eval.yaml` files under `evals/` directories.\n */\nexport async function discoverEvalFiles(cwd: string): Promise<readonly DiscoveredEvalFile[]> {\n const repoRoot = await findRepoRoot(cwd);\n\n // Load config to check for custom eval_patterns\n // Pass a dummy file path in cwd so buildDirectoryChain starts from cwd\n const config = await loadConfig(path.join(cwd, '_'), repoRoot);\n const patterns =\n config?.eval_patterns && config.eval_patterns.length > 0\n ? config.eval_patterns\n : DEFAULT_EVAL_PATTERNS;\n\n const ignore = ['**/node_modules/**', '**/dist/**'];\n\n const matches = await fg(patterns as string[], {\n cwd,\n absolute: true,\n onlyFiles: true,\n ignore,\n followSymbolicLinks: true,\n caseSensitiveMatch: false,\n });\n\n const evalFiles: DiscoveredEvalFile[] = matches.map((absPath) => {\n const relativePath = path.relative(cwd, absPath);\n const category = deriveCategory(relativePath);\n return { path: absPath, relativePath, category };\n });\n\n evalFiles.sort((a, b) => a.relativePath.localeCompare(b.relativePath));\n return evalFiles;\n}\n\n/** Derive a human-readable category from the relative path. */\nfunction deriveCategory(relativePath: string): string {\n const parts = relativePath.split(path.sep);\n // Use the first meaningful directory as category\n // e.g., \"examples/showcase/export-screening/evals/dataset.eval.yaml\" → \"showcase/export-screening\"\n // e.g., \"evals/dataset.eval.yaml\" → \"evals\"\n if (parts.length <= 1) {\n return 'root';\n }\n\n // Remove the filename and \"evals\" folder if present\n const dirs = parts.slice(0, -1).filter((d) => d !== 'evals');\n return dirs.length > 0 ? dirs.join('/') : 'root';\n}\n\n/** Get unique categories from discovered eval files. */\nexport function getCategories(files: readonly DiscoveredEvalFile[]): readonly string[] {\n const categories = new Set<string>();\n for (const file of files) {\n categories.add(file.category);\n }\n const sorted = Array.from(categories);\n sorted.sort();\n return sorted;\n}\n\n/** Filter eval files by category. */\nexport function filterByCategory(\n files: readonly DiscoveredEvalFile[],\n category: string,\n): readonly DiscoveredEvalFile[] {\n return files.filter((f) => f.category === category);\n}\n","import { mkdir, readFile, writeFile } from 'node:fs/promises';\nimport path from 'node:path';\nimport { getAgentvHome } from '@agentv/core';\n\nconst AGENTV_DIR = getAgentvHome();\nconst LAST_CONFIG_PATH = path.join(AGENTV_DIR, 'last-config.json');\n\nexport interface LastConfig {\n readonly timestamp: string;\n readonly cwd: string;\n readonly evalPaths: readonly string[];\n readonly target: string;\n readonly workers: number;\n readonly dryRun: boolean;\n readonly cache: boolean;\n}\n\nexport async function loadLastConfig(): Promise<LastConfig | undefined> {\n try {\n const content = await readFile(LAST_CONFIG_PATH, 'utf-8');\n return JSON.parse(content) as LastConfig;\n } catch {\n return undefined;\n }\n}\n\nexport async function saveLastConfig(config: LastConfig): Promise<void> {\n await mkdir(AGENTV_DIR, { recursive: true });\n await writeFile(LAST_CONFIG_PATH, JSON.stringify(config, null, 2), 'utf-8');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;AAAA,OAAOA,WAAU;AAEjB,SAAS,UAAU,SAAS,QAAQ,QAAQ,cAAc;;;ACF1D,OAAO,UAAU;AAEjB,OAAO,QAAQ;AAoBf,eAAsB,kBAAkB,KAAqD;AAC3F,QAAM,WAAW,MAAM,aAAa,GAAG;AAIvC,QAAM,SAAS,MAAM,WAAW,KAAK,KAAK,KAAK,GAAG,GAAG,QAAQ;AAC7D,QAAM,WACJ,QAAQ,iBAAiB,OAAO,cAAc,SAAS,IACnD,OAAO,gBACP;AAEN,QAAM,SAAS,CAAC,sBAAsB,YAAY;AAElD,QAAM,UAAU,MAAM,GAAG,UAAsB;AAAA,IAC7C;AAAA,IACA,UAAU;AAAA,IACV,WAAW;AAAA,IACX;AAAA,IACA,qBAAqB;AAAA,IACrB,oBAAoB;AAAA,EACtB,CAAC;AAED,QAAM,YAAkC,QAAQ,IAAI,CAAC,YAAY;AAC/D,UAAM,eAAe,KAAK,SAAS,KAAK,OAAO;AAC/C,UAAM,WAAW,eAAe,YAAY;AAC5C,WAAO,EAAE,MAAM,SAAS,cAAc,SAAS;AAAA,EACjD,CAAC;AAED,YAAU,KAAK,CAAC,GAAG,MAAM,EAAE,aAAa,cAAc,EAAE,YAAY,CAAC;AACrE,SAAO;AACT;AAGA,SAAS,eAAe,cAA8B;AACpD,QAAM,QAAQ,aAAa,MAAM,KAAK,GAAG;AAIzC,MAAI,MAAM,UAAU,GAAG;AACrB,WAAO;AAAA,EACT;AAGA,QAAM,OAAO,MAAM,MAAM,GAAG,EAAE,EAAE,OAAO,CAAC,MAAM,MAAM,OAAO;AAC3D,SAAO,KAAK,SAAS,IAAI,KAAK,KAAK,GAAG,IAAI;AAC5C;AAGO,SAAS,cAAc,OAAyD;AACrF,QAAM,aAAa,oBAAI,IAAY;AACnC,aAAW,QAAQ,OAAO;AACxB,eAAW,IAAI,KAAK,QAAQ;AAAA,EAC9B;AACA,QAAM,SAAS,MAAM,KAAK,UAAU;AACpC,SAAO,KAAK;AACZ,SAAO;AACT;AAGO,SAAS,iBACd,OACA,UAC+B;AAC/B,SAAO,MAAM,OAAO,CAAC,MAAM,EAAE,aAAa,QAAQ;AACpD;;;ACtFA,SAAS,OAAO,UAAU,iBAAiB;AAC3C,OAAOC,WAAU;AAGjB,IAAM,aAAa,cAAc;AACjC,IAAM,mBAAmBC,MAAK,KAAK,YAAY,kBAAkB;AAYjE,eAAsB,iBAAkD;AACtE,MAAI;AACF,UAAM,UAAU,MAAM,SAAS,kBAAkB,OAAO;AACxD,WAAO,KAAK,MAAM,OAAO;AAAA,EAC3B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,eAAe,QAAmC;AACtE,QAAM,MAAM,YAAY,EAAE,WAAW,KAAK,CAAC;AAC3C,QAAM,UAAU,kBAAkB,KAAK,UAAU,QAAQ,MAAM,CAAC,GAAG,OAAO;AAC5E;;;AFdA,IAAM,YAAY;AAClB,IAAM,WAAW;AACjB,IAAM,YAAY;AAClB,IAAM,aAAa;AACnB,IAAM,aAAa;AAanB,eAAsB,0BAAyC;AAC7D,QAAM,MAAM,QAAQ,IAAI;AAExB,UAAQ,IAAI;AAAA,EAAK,SAAS,GAAG,SAAS,0BAA0B,UAAU;AAAA,CAAI;AAE9E,QAAM,aAAa,MAAM,eAAe;AACxC,QAAM,SAAS,MAAM,eAAe,UAAU;AAE9C,MAAI,WAAW,QAAQ;AACrB;AAAA,EACF;AAEA,MAAI,WAAW,WAAW,YAAY;AACpC,YAAQ,IAAI;AAAA,EAAK,QAAQ,kCAAkC,UAAU;AAAA,CAAI;AACzE,UAAM,cAAc;AAAA,MAClB,WAAW,WAAW;AAAA,MACtB,QAAQ,WAAW;AAAA,MACnB,SAAS,WAAW;AAAA,MACpB,QAAQ,WAAW;AAAA,MACnB,OAAO,WAAW;AAAA,IACpB,CAAC;AACD;AAAA,EACF;AAGA,QAAM,SAAS,MAAM,oBAAoB,GAAG;AAC5C,MAAI,CAAC,QAAQ;AACX;AAAA,EACF;AAGA,QAAM,YAAY,MAAM,uBAAuB,QAAQ,GAAG;AAC1D,MAAI,CAAC,WAAW;AACd;AAAA,EACF;AAGA,QAAM,eAAe;AAAA,IACnB,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC;AAAA,IACA,WAAW,OAAO;AAAA,IAClB,QAAQ,OAAO;AAAA,IACf,SAAS,OAAO;AAAA,IAChB,QAAQ,OAAO;AAAA,IACf,OAAO,OAAO;AAAA,EAChB,CAAC;AAED,QAAM,cAAc,MAAM;AAC5B;AAEA,eAAe,eACb,YACmC;AAEnC,QAAM,UAA4E,CAAC;AAEnF,MAAI,YAAY;AACd,UAAM,YAAY,WAAW,UAAU;AACvC,YAAQ,KAAK;AAAA,MACX,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aAAa,GAAG,SAAS,0BAA0B,WAAW,MAAM;AAAA,IACtE,CAAC;AAAA,EACH;AAEA,UAAQ,KAAK,EAAE,MAAM,gCAAyB,OAAO,MAAM,GAAG,EAAE,MAAM,eAAU,OAAO,OAAO,CAAC;AAE/F,SAAO,OAAmB;AAAA,IACxB,SAAS;AAAA,IACT;AAAA,EACF,CAAC;AACH;AAEA,eAAe,oBAAoB,KAAqD;AAEtF,UAAQ,IAAI;AAAA,EAAK,QAAQ,6BAA6B,UAAU,EAAE;AAClE,QAAM,WAAW,MAAM,kBAAkB,GAAG;AAE5C,MAAI,SAAS,WAAW,GAAG;AACzB,YAAQ;AAAA,MACN;AAAA,IAGF;AACA,WAAO;AAAA,EACT;AAEA,UAAQ,IAAI,GAAG,QAAQ,SAAS,SAAS,MAAM,gBAAgB,UAAU;AAAA,CAAI;AAG7E,QAAM,gBAAgB,MAAM,oBAAoB,QAAQ;AACxD,MAAI,cAAc,WAAW,GAAG;AAC9B,YAAQ,IAAI,2BAA2B;AACvC,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,MAAM,sBAAsB,KAAK,cAAc,CAAC,EAAE,IAAI;AAGrE,QAAM,WAAW,MAAM,sBAAsB;AAE7C,SAAO;AAAA,IACL,WAAW,cAAc,IAAI,CAAC,MAAM,EAAE,IAAI;AAAA,IAC1C;AAAA,IACA,GAAG;AAAA,EACL;AACF;AAEA,eAAe,oBACb,UAC+B;AAC/B,QAAM,aAAa,cAAc,QAAQ;AAGzC,MAAI;AAEJ,MAAI,WAAW,SAAS,GAAG;AACzB,UAAM,mBAAmB,MAAM,OAAe;AAAA,MAC5C,SAAS;AAAA,MACT,QAAQ,OAAO,SAAS;AACtB,cAAM,WAAW,OACb,WAAW,OAAO,CAAC,MAAM,EAAE,YAAY,EAAE,SAAS,KAAK,YAAY,CAAC,CAAC,IACrE;AACJ,eAAO;AAAA,UACL,EAAE,MAAM,oBAAoB,OAAO,UAAU;AAAA,UAC7C,GAAG,SAAS,IAAI,CAAC,MAAM;AACrB,kBAAM,QAAQ,iBAAiB,UAAU,CAAC,EAAE;AAC5C,mBAAO,EAAE,MAAM,GAAG,CAAC,KAAK,KAAK,QAAQ,QAAQ,IAAI,MAAM,EAAE,KAAK,OAAO,EAAE;AAAA,UACzE,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF,CAAC;AAED,oBACE,qBAAqB,YAAY,WAAW,iBAAiB,UAAU,gBAAgB;AAAA,EAC3F,OAAO;AACL,oBAAgB;AAAA,EAClB;AAEA,SAAO,SAA6B;AAAA,IAClC,SAAS;AAAA,IACT,SAAS,cAAc,IAAI,CAAC,OAAO;AAAA,MACjC,MAAM,EAAE;AAAA,MACR,OAAO;AAAA,MACP,SAAS,cAAc,UAAU;AAAA;AAAA,IACnC,EAAE;AAAA,IACF,UAAU;AAAA,EACZ,CAAC;AACH;AAEA,eAAe,sBAAsB,KAAa,eAAwC;AACxF,QAAM,WAAW,MAAM,aAAa,GAAG;AAGvC,QAAM,cAAc,MAAM,gBAAgB,KAAK,UAAU,aAAa;AAEtE,MAAI,CAAC,aAAa;AAChB,YAAQ,IAAI,GAAG,QAAQ,+CAA+C,UAAU,EAAE;AAClF,WAAO;AAAA,EACT;AAEA,QAAM,cAAc,MAAM,sBAAsB,WAAW;AAC3D,QAAM,cAAc,gBAAgB,WAAW;AAE/C,MAAI,YAAY,WAAW,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,MAAI,YAAY,WAAW,GAAG;AAC5B,YAAQ,IAAI,GAAG,QAAQ,iBAAiB,YAAY,CAAC,CAAC,GAAG,UAAU,EAAE;AACrE,WAAO,YAAY,CAAC;AAAA,EACtB;AAEA,SAAO,OAAe;AAAA,IACpB,SAAS;AAAA,IACT,QAAQ,OAAO,SAAS;AACtB,YAAM,WAAW,OACb,YAAY,OAAO,CAAC,MAAM,EAAE,YAAY,EAAE,SAAS,KAAK,YAAY,CAAC,CAAC,IACtE;AACJ,aAAO,SAAS,IAAI,CAAC,MAAM;AACzB,cAAM,MAAM,YAAY,KAAK,CAAC,MAAM,EAAE,SAAS,CAAC;AAChD,eAAO;AAAA,UACL,MAAM;AAAA,UACN,OAAO;AAAA,UACP,aAAa,MAAM,aAAa,IAAI,QAAQ,KAAK;AAAA,QACnD;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AACH;AAEA,eAAe,gBACb,KACA,UACA,cAC6B;AAE7B,QAAM,eAAyB,CAAC;AAEhC,MAAI,cAAc;AAChB,UAAM,UAAUC,MAAK,QAAQ,YAAY;AACzC,QAAI,CAAC,aAAa,SAAS,OAAO,GAAG;AACnC,mBAAa,KAAK,OAAO;AAAA,IAC3B;AAAA,EACF;AAEA,MAAI,CAAC,aAAa,SAAS,GAAG,GAAG;AAC/B,iBAAa,KAAK,GAAG;AAAA,EACvB;AAEA,MAAI,aAAa,OAAO,CAAC,aAAa,SAAS,QAAQ,GAAG;AACxD,iBAAa,KAAK,QAAQ;AAAA,EAC5B;AAEA,aAAW,OAAO,cAAc;AAC9B,eAAW,aAAa,wBAAwB;AAC9C,YAAM,WAAW,GAAG,GAAG,IAAI,SAAS;AACpC,UAAI,MAAM,WAAW,QAAQ,GAAG;AAC9B,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAEA,eAAe,wBAIZ;AACD,QAAM,YAAY,MAAM,QAAQ;AAAA,IAC9B,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AAED,MAAI,CAAC,WAAW;AACd,WAAO,EAAE,SAAS,GAAG,QAAQ,OAAO,OAAO,MAAM;AAAA,EACnD;AAEA,QAAM,UACH,MAAM,OAAO;AAAA,IACZ,SAAS;AAAA,IACT,SAAS;AAAA,IACT,KAAK;AAAA,IACL,KAAK;AAAA,EACP,CAAC,KAAM;AAET,QAAM,SAAS,MAAM,QAAQ;AAAA,IAC3B,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AAED,QAAM,QAAQ,MAAM,QAAQ;AAAA,IAC1B,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AAED,SAAO,EAAE,SAAS,QAAQ,MAAM;AAClC;AAEA,eAAe,uBAAuB,QAA2B,KAA+B;AAC9F,QAAM,cAAc,OAAO,UACxB,IAAI,CAAC,MAAM;AACV,UAAM,MAAM,EAAE,WAAW,GAAG,IAAI,EAAE,MAAM,IAAI,SAAS,CAAC,IAAI;AAC1D,WAAO,KAAK,GAAG;AAAA,EACjB,CAAC,EACA,KAAK,IAAI;AAEZ,UAAQ,IAAI;AAAA,EAAK,SAAS,uBAAuB,UAAU,EAAE;AAC7D,UAAQ,IAAI,GAAG,QAAQ,GAAG,SAAI,OAAO,EAAE,CAAC,GAAG,UAAU,EAAE;AACvD,UAAQ,IAAI,GAAG,UAAU,cAAc,UAAU;AAAA,EAAK,WAAW,EAAE;AACnE,UAAQ,IAAI,GAAG,UAAU,UAAU,UAAU,OAAO,OAAO,MAAM,EAAE;AACnE,UAAQ,IAAI,GAAG,UAAU,WAAW,UAAU,MAAM,OAAO,OAAO,EAAE;AACpE,UAAQ,IAAI,GAAG,UAAU,WAAW,UAAU,MAAM,OAAO,SAAS,QAAQ,IAAI,EAAE;AAClF,UAAQ,IAAI,GAAG,UAAU,SAAS,UAAU,QAAQ,OAAO,QAAQ,QAAQ,IAAI,EAAE;AACjF,UAAQ,IAAI,GAAG,QAAQ,GAAG,SAAI,OAAO,EAAE,CAAC,GAAG,UAAU,EAAE;AAEvD,SAAO,QAAQ;AAAA,IACb,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AACH;AAEA,eAAe,cAAc,QAA0C;AACrE,QAAM,aAAsC;AAAA,IAC1C,QAAQ,OAAO;AAAA,IACf,SAAS,OAAO;AAAA,IAChB,QAAQ,OAAO;AAAA,IACf,OAAO,OAAO;AAAA,IACd,cAAc;AAAA,IACd,aAAa;AAAA,IACb,gBAAgB;AAAA,IAChB,gBAAgB;AAAA,IAChB,cAAc;AAAA,IACd,YAAY;AAAA,IACZ,SAAS;AAAA,IACT,gBAAgB;AAAA,IAChB,mBAAmB;AAAA,IACnB,OAAO;AAAA,EACT;AAEA,QAAM,eAAe;AAAA,IACnB,WAAW,CAAC,GAAG,OAAO,SAAS;AAAA,IAC/B;AAAA,EACF,CAAC;AACH;","names":["path","path","path","path"]}
1
+ {"version":3,"sources":["../src/commands/eval/interactive.ts","../src/commands/eval/discover.ts","../src/commands/eval/last-config.ts"],"sourcesContent":["import path from 'node:path';\nimport { listTargetNames, readTargetDefinitions } from '@agentv/core';\nimport { checkbox, confirm, number, search, select } from '@inquirer/prompts';\n\nimport { TARGET_FILE_CANDIDATES, fileExists } from '../../utils/targets.js';\nimport {\n type DiscoveredEvalFile,\n discoverEvalFiles,\n filterByCategory,\n getCategories,\n} from './discover.js';\nimport { type LastConfig, loadLastConfig, saveLastConfig } from './last-config.js';\nimport { runEvalCommand } from './run-eval.js';\nimport { findRepoRoot } from './shared.js';\n\nconst ANSI_BOLD = '\\x1b[1m';\nconst ANSI_DIM = '\\x1b[2m';\nconst ANSI_CYAN = '\\x1b[36m';\nconst ANSI_GREEN = '\\x1b[32m';\nconst ANSI_RESET = '\\x1b[0m';\n\nexport interface InteractiveConfig {\n readonly evalPaths: readonly string[];\n readonly target: string;\n readonly workers: number;\n readonly dryRun: boolean;\n readonly cache: boolean;\n}\n\n/**\n * Launch the interactive wizard when `agentv eval` is called with no arguments.\n */\nexport async function launchInteractiveWizard(): Promise<void> {\n const cwd = process.cwd();\n\n console.log(`\\n${ANSI_BOLD}${ANSI_CYAN}AgentV Interactive Mode${ANSI_RESET}\\n`);\n\n const lastConfig = await loadLastConfig();\n const action = await promptMainMenu(lastConfig);\n\n if (action === 'exit') {\n return;\n }\n\n if (action === 'rerun' && lastConfig) {\n console.log(`\\n${ANSI_DIM}Rerunning last configuration...${ANSI_RESET}\\n`);\n await executeConfig({\n evalPaths: lastConfig.evalPaths,\n target: lastConfig.target,\n workers: lastConfig.workers,\n dryRun: lastConfig.dryRun,\n cache: lastConfig.cache,\n });\n return;\n }\n\n // Run new evaluation flow\n const config = await promptNewEvaluation(cwd);\n if (!config) {\n return;\n }\n\n // Review & confirm\n const confirmed = await promptReviewAndConfirm(config, cwd);\n if (!confirmed) {\n return;\n }\n\n // Save last config\n await saveLastConfig({\n timestamp: new Date().toISOString(),\n cwd,\n evalPaths: config.evalPaths,\n target: config.target,\n workers: config.workers,\n dryRun: config.dryRun,\n cache: config.cache,\n });\n\n await executeConfig(config);\n}\n\nasync function promptMainMenu(\n lastConfig: LastConfig | undefined,\n): Promise<'new' | 'rerun' | 'exit'> {\n type MenuChoice = 'new' | 'rerun' | 'exit';\n const choices: Array<{ name: string; value: MenuChoice; description?: string }> = [];\n\n if (lastConfig) {\n const evalCount = lastConfig.evalPaths.length;\n choices.push({\n name: '🔄 Rerun last config',\n value: 'rerun',\n description: `${evalCount} eval file(s), target: ${lastConfig.target}`,\n });\n }\n\n choices.push({ name: '🚀 Run new evaluation', value: 'new' }, { name: '✕ Exit', value: 'exit' });\n\n return select<MenuChoice>({\n message: 'What would you like to do?',\n choices,\n });\n}\n\nasync function promptNewEvaluation(cwd: string): Promise<InteractiveConfig | undefined> {\n // Step 1: Discover eval files\n console.log(`\\n${ANSI_DIM}Scanning for eval files...${ANSI_RESET}`);\n const allFiles = await discoverEvalFiles(cwd);\n\n if (allFiles.length === 0) {\n console.log(\n '\\n⚠ No eval files found in the current directory.\\n' +\n ' Place .yaml or .jsonl eval files in your project, or use:\\n' +\n ' agentv eval <path-to-eval.yaml>\\n',\n );\n return undefined;\n }\n\n console.log(`${ANSI_DIM}Found ${allFiles.length} eval file(s)${ANSI_RESET}\\n`);\n\n // Step 2: Select eval files (optionally filter by category first)\n const selectedFiles = await promptEvalSelection(allFiles);\n if (selectedFiles.length === 0) {\n console.log('\\nNo eval files selected.');\n return undefined;\n }\n\n // Step 3: Select target\n const target = await promptTargetSelection(cwd, selectedFiles[0].path);\n\n // Step 4: Advanced options\n const advanced = await promptAdvancedOptions();\n\n return {\n evalPaths: selectedFiles.map((f) => f.path),\n target,\n ...advanced,\n };\n}\n\nasync function promptEvalSelection(\n allFiles: readonly DiscoveredEvalFile[],\n): Promise<DiscoveredEvalFile[]> {\n const categories = getCategories(allFiles);\n\n // If only one category or few files, skip category selection\n let filesToSelect: readonly DiscoveredEvalFile[];\n\n if (categories.length > 1) {\n const selectedCategory = await search<string>({\n message: 'Select a category (type to search)',\n source: async (term) => {\n const filtered = term\n ? categories.filter((c) => c.toLowerCase().includes(term.toLowerCase()))\n : categories;\n return [\n { name: '(all categories)', value: '__all__' },\n ...filtered.map((c) => {\n const count = filterByCategory(allFiles, c).length;\n return { name: `${c} (${count} file${count > 1 ? 's' : ''})`, value: c };\n }),\n ];\n },\n });\n\n filesToSelect =\n selectedCategory === '__all__' ? allFiles : filterByCategory(allFiles, selectedCategory);\n } else {\n filesToSelect = allFiles;\n }\n\n return checkbox<DiscoveredEvalFile>({\n message: 'Select eval files to run (space to toggle, enter to confirm)',\n choices: filesToSelect.map((f) => ({\n name: f.relativePath,\n value: f,\n checked: filesToSelect.length <= 5, // auto-select if few files\n })),\n required: true,\n });\n}\n\nasync function promptTargetSelection(cwd: string, firstEvalPath: string): Promise<string> {\n const repoRoot = await findRepoRoot(cwd);\n\n // Try to find targets.yaml — search near the eval file first, then cwd/repoRoot\n const targetsPath = await findTargetsFile(cwd, repoRoot, firstEvalPath);\n\n if (!targetsPath) {\n console.log(`${ANSI_DIM}No targets.yaml found. Using default target.${ANSI_RESET}`);\n return 'default';\n }\n\n const definitions = await readTargetDefinitions(targetsPath);\n const targetNames = listTargetNames(definitions);\n\n if (targetNames.length === 0) {\n return 'default';\n }\n\n if (targetNames.length === 1) {\n console.log(`${ANSI_DIM}Using target: ${targetNames[0]}${ANSI_RESET}`);\n return targetNames[0];\n }\n\n return search<string>({\n message: 'Select a target (type to search)',\n source: async (term) => {\n const filtered = term\n ? targetNames.filter((t) => t.toLowerCase().includes(term.toLowerCase()))\n : targetNames;\n return filtered.map((t) => {\n const def = definitions.find((d) => d.name === t);\n return {\n name: t,\n value: t,\n description: def ? `provider: ${def.provider}` : undefined,\n };\n });\n },\n });\n}\n\nasync function findTargetsFile(\n cwd: string,\n repoRoot: string,\n evalFilePath?: string,\n): Promise<string | undefined> {\n // Build directory chain: eval file dir → cwd → repoRoot (mirrors discoverTargetsFile)\n const dirsToSearch: string[] = [];\n\n if (evalFilePath) {\n const evalDir = path.dirname(evalFilePath);\n if (!dirsToSearch.includes(evalDir)) {\n dirsToSearch.push(evalDir);\n }\n }\n\n if (!dirsToSearch.includes(cwd)) {\n dirsToSearch.push(cwd);\n }\n\n if (repoRoot !== cwd && !dirsToSearch.includes(repoRoot)) {\n dirsToSearch.push(repoRoot);\n }\n\n for (const dir of dirsToSearch) {\n for (const candidate of TARGET_FILE_CANDIDATES) {\n const fullPath = `${dir}/${candidate}`;\n if (await fileExists(fullPath)) {\n return fullPath;\n }\n }\n }\n\n return undefined;\n}\n\nasync function promptAdvancedOptions(): Promise<{\n workers: number;\n dryRun: boolean;\n cache: boolean;\n}> {\n const customize = await confirm({\n message: 'Configure advanced options?',\n default: false,\n });\n\n if (!customize) {\n return { workers: 3, dryRun: false, cache: false };\n }\n\n const workers =\n (await number({\n message: 'Number of parallel workers (1-50)',\n default: 3,\n min: 1,\n max: 50,\n })) ?? 3;\n\n const dryRun = await confirm({\n message: 'Enable dry-run mode (mock responses)?',\n default: false,\n });\n\n const cache = await confirm({\n message: 'Enable response cache?',\n default: false,\n });\n\n return { workers, dryRun, cache };\n}\n\nasync function promptReviewAndConfirm(config: InteractiveConfig, cwd: string): Promise<boolean> {\n const evalDisplay = config.evalPaths\n .map((p) => {\n const rel = p.startsWith(cwd) ? p.slice(cwd.length + 1) : p;\n return ` ${rel}`;\n })\n .join('\\n');\n\n console.log(`\\n${ANSI_BOLD}Review Configuration${ANSI_RESET}`);\n console.log(`${ANSI_DIM}${'─'.repeat(40)}${ANSI_RESET}`);\n console.log(`${ANSI_GREEN}Eval files:${ANSI_RESET}\\n${evalDisplay}`);\n console.log(`${ANSI_GREEN}Target:${ANSI_RESET} ${config.target}`);\n console.log(`${ANSI_GREEN}Workers:${ANSI_RESET} ${config.workers}`);\n console.log(`${ANSI_GREEN}Dry run:${ANSI_RESET} ${config.dryRun ? 'yes' : 'no'}`);\n console.log(`${ANSI_GREEN}Cache:${ANSI_RESET} ${config.cache ? 'yes' : 'no'}`);\n console.log(`${ANSI_DIM}${'─'.repeat(40)}${ANSI_RESET}`);\n\n return confirm({\n message: 'Run evaluation with this configuration?',\n default: true,\n });\n}\n\nasync function executeConfig(config: InteractiveConfig): Promise<void> {\n const rawOptions: Record<string, unknown> = {\n target: config.target,\n workers: config.workers,\n dryRun: config.dryRun,\n cache: config.cache,\n outputFormat: 'jsonl',\n dryRunDelay: 0,\n dryRunDelayMin: 0,\n dryRunDelayMax: 0,\n agentTimeout: 120,\n maxRetries: 2,\n verbose: false,\n keepWorkspaces: false,\n cleanupWorkspaces: false,\n trace: false,\n };\n\n await runEvalCommand({\n testFiles: [...config.evalPaths],\n rawOptions,\n });\n}\n","import path from 'node:path';\nimport { DEFAULT_EVAL_PATTERNS, loadConfig } from '@agentv/core';\nimport fg from 'fast-glob';\n\nimport { findRepoRoot } from './shared.js';\n\nexport interface DiscoveredEvalFile {\n /** Absolute path to the eval file */\n readonly path: string;\n /** Relative path from cwd for display */\n readonly relativePath: string;\n /** Category derived from parent folder structure */\n readonly category: string;\n}\n\n/**\n * Discover eval files by glob pattern matching.\n *\n * Uses `eval_patterns` from `.agentv/config.yaml` if configured,\n * otherwise falls back to default patterns that match `dataset*.yaml`\n * and `eval.yaml` files under `evals/` directories.\n */\nexport async function discoverEvalFiles(cwd: string): Promise<readonly DiscoveredEvalFile[]> {\n const repoRoot = await findRepoRoot(cwd);\n\n // Load config to check for custom eval_patterns\n // Pass a dummy file path in cwd so buildDirectoryChain starts from cwd\n const config = await loadConfig(path.join(cwd, '_'), repoRoot);\n const patterns =\n config?.eval_patterns && config.eval_patterns.length > 0\n ? config.eval_patterns\n : DEFAULT_EVAL_PATTERNS;\n\n const ignore = ['**/node_modules/**', '**/dist/**'];\n\n const matches = await fg(patterns as string[], {\n cwd,\n absolute: true,\n onlyFiles: true,\n ignore,\n followSymbolicLinks: true,\n caseSensitiveMatch: false,\n });\n\n const evalFiles: DiscoveredEvalFile[] = matches.map((absPath) => {\n const relativePath = path.relative(cwd, absPath);\n const category = deriveCategory(relativePath);\n return { path: absPath, relativePath, category };\n });\n\n evalFiles.sort((a, b) => a.relativePath.localeCompare(b.relativePath));\n return evalFiles;\n}\n\n/** Derive a human-readable category from the relative path. */\nfunction deriveCategory(relativePath: string): string {\n const parts = relativePath.split(path.sep);\n // Use the first meaningful directory as category\n // e.g., \"examples/showcase/export-screening/evals/dataset.eval.yaml\" → \"showcase/export-screening\"\n // e.g., \"evals/dataset.eval.yaml\" → \"evals\"\n if (parts.length <= 1) {\n return 'root';\n }\n\n // Remove the filename and \"evals\" folder if present\n const dirs = parts.slice(0, -1).filter((d) => d !== 'evals');\n return dirs.length > 0 ? dirs.join('/') : 'root';\n}\n\n/** Get unique categories from discovered eval files. */\nexport function getCategories(files: readonly DiscoveredEvalFile[]): readonly string[] {\n const categories = new Set<string>();\n for (const file of files) {\n categories.add(file.category);\n }\n const sorted = Array.from(categories);\n sorted.sort();\n return sorted;\n}\n\n/** Filter eval files by category. */\nexport function filterByCategory(\n files: readonly DiscoveredEvalFile[],\n category: string,\n): readonly DiscoveredEvalFile[] {\n return files.filter((f) => f.category === category);\n}\n","import { mkdir, readFile, writeFile } from 'node:fs/promises';\nimport path from 'node:path';\nimport { getAgentvHome } from '@agentv/core';\n\nconst AGENTV_DIR = getAgentvHome();\nconst LAST_CONFIG_PATH = path.join(AGENTV_DIR, 'last-config.json');\n\nexport interface LastConfig {\n readonly timestamp: string;\n readonly cwd: string;\n readonly evalPaths: readonly string[];\n readonly target: string;\n readonly workers: number;\n readonly dryRun: boolean;\n readonly cache: boolean;\n}\n\nexport async function loadLastConfig(): Promise<LastConfig | undefined> {\n try {\n const content = await readFile(LAST_CONFIG_PATH, 'utf-8');\n return JSON.parse(content) as LastConfig;\n } catch {\n return undefined;\n }\n}\n\nexport async function saveLastConfig(config: LastConfig): Promise<void> {\n await mkdir(AGENTV_DIR, { recursive: true });\n await writeFile(LAST_CONFIG_PATH, JSON.stringify(config, null, 2), 'utf-8');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AAAA,OAAOA,WAAU;AAEjB,SAAS,UAAU,SAAS,QAAQ,QAAQ,cAAc;;;ACF1D,OAAO,UAAU;AAEjB,OAAO,QAAQ;AAoBf,eAAsB,kBAAkB,KAAqD;AAC3F,QAAM,WAAW,MAAM,aAAa,GAAG;AAIvC,QAAM,SAAS,MAAM,WAAW,KAAK,KAAK,KAAK,GAAG,GAAG,QAAQ;AAC7D,QAAM,WACJ,QAAQ,iBAAiB,OAAO,cAAc,SAAS,IACnD,OAAO,gBACP;AAEN,QAAM,SAAS,CAAC,sBAAsB,YAAY;AAElD,QAAM,UAAU,MAAM,GAAG,UAAsB;AAAA,IAC7C;AAAA,IACA,UAAU;AAAA,IACV,WAAW;AAAA,IACX;AAAA,IACA,qBAAqB;AAAA,IACrB,oBAAoB;AAAA,EACtB,CAAC;AAED,QAAM,YAAkC,QAAQ,IAAI,CAAC,YAAY;AAC/D,UAAM,eAAe,KAAK,SAAS,KAAK,OAAO;AAC/C,UAAM,WAAW,eAAe,YAAY;AAC5C,WAAO,EAAE,MAAM,SAAS,cAAc,SAAS;AAAA,EACjD,CAAC;AAED,YAAU,KAAK,CAAC,GAAG,MAAM,EAAE,aAAa,cAAc,EAAE,YAAY,CAAC;AACrE,SAAO;AACT;AAGA,SAAS,eAAe,cAA8B;AACpD,QAAM,QAAQ,aAAa,MAAM,KAAK,GAAG;AAIzC,MAAI,MAAM,UAAU,GAAG;AACrB,WAAO;AAAA,EACT;AAGA,QAAM,OAAO,MAAM,MAAM,GAAG,EAAE,EAAE,OAAO,CAAC,MAAM,MAAM,OAAO;AAC3D,SAAO,KAAK,SAAS,IAAI,KAAK,KAAK,GAAG,IAAI;AAC5C;AAGO,SAAS,cAAc,OAAyD;AACrF,QAAM,aAAa,oBAAI,IAAY;AACnC,aAAW,QAAQ,OAAO;AACxB,eAAW,IAAI,KAAK,QAAQ;AAAA,EAC9B;AACA,QAAM,SAAS,MAAM,KAAK,UAAU;AACpC,SAAO,KAAK;AACZ,SAAO;AACT;AAGO,SAAS,iBACd,OACA,UAC+B;AAC/B,SAAO,MAAM,OAAO,CAAC,MAAM,EAAE,aAAa,QAAQ;AACpD;;;ACtFA,SAAS,OAAO,UAAU,iBAAiB;AAC3C,OAAOC,WAAU;AAGjB,IAAM,aAAa,cAAc;AACjC,IAAM,mBAAmBC,MAAK,KAAK,YAAY,kBAAkB;AAYjE,eAAsB,iBAAkD;AACtE,MAAI;AACF,UAAM,UAAU,MAAM,SAAS,kBAAkB,OAAO;AACxD,WAAO,KAAK,MAAM,OAAO;AAAA,EAC3B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,eAAe,QAAmC;AACtE,QAAM,MAAM,YAAY,EAAE,WAAW,KAAK,CAAC;AAC3C,QAAM,UAAU,kBAAkB,KAAK,UAAU,QAAQ,MAAM,CAAC,GAAG,OAAO;AAC5E;;;AFdA,IAAM,YAAY;AAClB,IAAM,WAAW;AACjB,IAAM,YAAY;AAClB,IAAM,aAAa;AACnB,IAAM,aAAa;AAanB,eAAsB,0BAAyC;AAC7D,QAAM,MAAM,QAAQ,IAAI;AAExB,UAAQ,IAAI;AAAA,EAAK,SAAS,GAAG,SAAS,0BAA0B,UAAU;AAAA,CAAI;AAE9E,QAAM,aAAa,MAAM,eAAe;AACxC,QAAM,SAAS,MAAM,eAAe,UAAU;AAE9C,MAAI,WAAW,QAAQ;AACrB;AAAA,EACF;AAEA,MAAI,WAAW,WAAW,YAAY;AACpC,YAAQ,IAAI;AAAA,EAAK,QAAQ,kCAAkC,UAAU;AAAA,CAAI;AACzE,UAAM,cAAc;AAAA,MAClB,WAAW,WAAW;AAAA,MACtB,QAAQ,WAAW;AAAA,MACnB,SAAS,WAAW;AAAA,MACpB,QAAQ,WAAW;AAAA,MACnB,OAAO,WAAW;AAAA,IACpB,CAAC;AACD;AAAA,EACF;AAGA,QAAM,SAAS,MAAM,oBAAoB,GAAG;AAC5C,MAAI,CAAC,QAAQ;AACX;AAAA,EACF;AAGA,QAAM,YAAY,MAAM,uBAAuB,QAAQ,GAAG;AAC1D,MAAI,CAAC,WAAW;AACd;AAAA,EACF;AAGA,QAAM,eAAe;AAAA,IACnB,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC;AAAA,IACA,WAAW,OAAO;AAAA,IAClB,QAAQ,OAAO;AAAA,IACf,SAAS,OAAO;AAAA,IAChB,QAAQ,OAAO;AAAA,IACf,OAAO,OAAO;AAAA,EAChB,CAAC;AAED,QAAM,cAAc,MAAM;AAC5B;AAEA,eAAe,eACb,YACmC;AAEnC,QAAM,UAA4E,CAAC;AAEnF,MAAI,YAAY;AACd,UAAM,YAAY,WAAW,UAAU;AACvC,YAAQ,KAAK;AAAA,MACX,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aAAa,GAAG,SAAS,0BAA0B,WAAW,MAAM;AAAA,IACtE,CAAC;AAAA,EACH;AAEA,UAAQ,KAAK,EAAE,MAAM,gCAAyB,OAAO,MAAM,GAAG,EAAE,MAAM,eAAU,OAAO,OAAO,CAAC;AAE/F,SAAO,OAAmB;AAAA,IACxB,SAAS;AAAA,IACT;AAAA,EACF,CAAC;AACH;AAEA,eAAe,oBAAoB,KAAqD;AAEtF,UAAQ,IAAI;AAAA,EAAK,QAAQ,6BAA6B,UAAU,EAAE;AAClE,QAAM,WAAW,MAAM,kBAAkB,GAAG;AAE5C,MAAI,SAAS,WAAW,GAAG;AACzB,YAAQ;AAAA,MACN;AAAA,IAGF;AACA,WAAO;AAAA,EACT;AAEA,UAAQ,IAAI,GAAG,QAAQ,SAAS,SAAS,MAAM,gBAAgB,UAAU;AAAA,CAAI;AAG7E,QAAM,gBAAgB,MAAM,oBAAoB,QAAQ;AACxD,MAAI,cAAc,WAAW,GAAG;AAC9B,YAAQ,IAAI,2BAA2B;AACvC,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,MAAM,sBAAsB,KAAK,cAAc,CAAC,EAAE,IAAI;AAGrE,QAAM,WAAW,MAAM,sBAAsB;AAE7C,SAAO;AAAA,IACL,WAAW,cAAc,IAAI,CAAC,MAAM,EAAE,IAAI;AAAA,IAC1C;AAAA,IACA,GAAG;AAAA,EACL;AACF;AAEA,eAAe,oBACb,UAC+B;AAC/B,QAAM,aAAa,cAAc,QAAQ;AAGzC,MAAI;AAEJ,MAAI,WAAW,SAAS,GAAG;AACzB,UAAM,mBAAmB,MAAM,OAAe;AAAA,MAC5C,SAAS;AAAA,MACT,QAAQ,OAAO,SAAS;AACtB,cAAM,WAAW,OACb,WAAW,OAAO,CAAC,MAAM,EAAE,YAAY,EAAE,SAAS,KAAK,YAAY,CAAC,CAAC,IACrE;AACJ,eAAO;AAAA,UACL,EAAE,MAAM,oBAAoB,OAAO,UAAU;AAAA,UAC7C,GAAG,SAAS,IAAI,CAAC,MAAM;AACrB,kBAAM,QAAQ,iBAAiB,UAAU,CAAC,EAAE;AAC5C,mBAAO,EAAE,MAAM,GAAG,CAAC,KAAK,KAAK,QAAQ,QAAQ,IAAI,MAAM,EAAE,KAAK,OAAO,EAAE;AAAA,UACzE,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF,CAAC;AAED,oBACE,qBAAqB,YAAY,WAAW,iBAAiB,UAAU,gBAAgB;AAAA,EAC3F,OAAO;AACL,oBAAgB;AAAA,EAClB;AAEA,SAAO,SAA6B;AAAA,IAClC,SAAS;AAAA,IACT,SAAS,cAAc,IAAI,CAAC,OAAO;AAAA,MACjC,MAAM,EAAE;AAAA,MACR,OAAO;AAAA,MACP,SAAS,cAAc,UAAU;AAAA;AAAA,IACnC,EAAE;AAAA,IACF,UAAU;AAAA,EACZ,CAAC;AACH;AAEA,eAAe,sBAAsB,KAAa,eAAwC;AACxF,QAAM,WAAW,MAAM,aAAa,GAAG;AAGvC,QAAM,cAAc,MAAM,gBAAgB,KAAK,UAAU,aAAa;AAEtE,MAAI,CAAC,aAAa;AAChB,YAAQ,IAAI,GAAG,QAAQ,+CAA+C,UAAU,EAAE;AAClF,WAAO;AAAA,EACT;AAEA,QAAM,cAAc,MAAM,sBAAsB,WAAW;AAC3D,QAAM,cAAc,gBAAgB,WAAW;AAE/C,MAAI,YAAY,WAAW,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,MAAI,YAAY,WAAW,GAAG;AAC5B,YAAQ,IAAI,GAAG,QAAQ,iBAAiB,YAAY,CAAC,CAAC,GAAG,UAAU,EAAE;AACrE,WAAO,YAAY,CAAC;AAAA,EACtB;AAEA,SAAO,OAAe;AAAA,IACpB,SAAS;AAAA,IACT,QAAQ,OAAO,SAAS;AACtB,YAAM,WAAW,OACb,YAAY,OAAO,CAAC,MAAM,EAAE,YAAY,EAAE,SAAS,KAAK,YAAY,CAAC,CAAC,IACtE;AACJ,aAAO,SAAS,IAAI,CAAC,MAAM;AACzB,cAAM,MAAM,YAAY,KAAK,CAAC,MAAM,EAAE,SAAS,CAAC;AAChD,eAAO;AAAA,UACL,MAAM;AAAA,UACN,OAAO;AAAA,UACP,aAAa,MAAM,aAAa,IAAI,QAAQ,KAAK;AAAA,QACnD;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AACH;AAEA,eAAe,gBACb,KACA,UACA,cAC6B;AAE7B,QAAM,eAAyB,CAAC;AAEhC,MAAI,cAAc;AAChB,UAAM,UAAUC,MAAK,QAAQ,YAAY;AACzC,QAAI,CAAC,aAAa,SAAS,OAAO,GAAG;AACnC,mBAAa,KAAK,OAAO;AAAA,IAC3B;AAAA,EACF;AAEA,MAAI,CAAC,aAAa,SAAS,GAAG,GAAG;AAC/B,iBAAa,KAAK,GAAG;AAAA,EACvB;AAEA,MAAI,aAAa,OAAO,CAAC,aAAa,SAAS,QAAQ,GAAG;AACxD,iBAAa,KAAK,QAAQ;AAAA,EAC5B;AAEA,aAAW,OAAO,cAAc;AAC9B,eAAW,aAAa,wBAAwB;AAC9C,YAAM,WAAW,GAAG,GAAG,IAAI,SAAS;AACpC,UAAI,MAAM,WAAW,QAAQ,GAAG;AAC9B,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAEA,eAAe,wBAIZ;AACD,QAAM,YAAY,MAAM,QAAQ;AAAA,IAC9B,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AAED,MAAI,CAAC,WAAW;AACd,WAAO,EAAE,SAAS,GAAG,QAAQ,OAAO,OAAO,MAAM;AAAA,EACnD;AAEA,QAAM,UACH,MAAM,OAAO;AAAA,IACZ,SAAS;AAAA,IACT,SAAS;AAAA,IACT,KAAK;AAAA,IACL,KAAK;AAAA,EACP,CAAC,KAAM;AAET,QAAM,SAAS,MAAM,QAAQ;AAAA,IAC3B,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AAED,QAAM,QAAQ,MAAM,QAAQ;AAAA,IAC1B,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AAED,SAAO,EAAE,SAAS,QAAQ,MAAM;AAClC;AAEA,eAAe,uBAAuB,QAA2B,KAA+B;AAC9F,QAAM,cAAc,OAAO,UACxB,IAAI,CAAC,MAAM;AACV,UAAM,MAAM,EAAE,WAAW,GAAG,IAAI,EAAE,MAAM,IAAI,SAAS,CAAC,IAAI;AAC1D,WAAO,KAAK,GAAG;AAAA,EACjB,CAAC,EACA,KAAK,IAAI;AAEZ,UAAQ,IAAI;AAAA,EAAK,SAAS,uBAAuB,UAAU,EAAE;AAC7D,UAAQ,IAAI,GAAG,QAAQ,GAAG,SAAI,OAAO,EAAE,CAAC,GAAG,UAAU,EAAE;AACvD,UAAQ,IAAI,GAAG,UAAU,cAAc,UAAU;AAAA,EAAK,WAAW,EAAE;AACnE,UAAQ,IAAI,GAAG,UAAU,UAAU,UAAU,OAAO,OAAO,MAAM,EAAE;AACnE,UAAQ,IAAI,GAAG,UAAU,WAAW,UAAU,MAAM,OAAO,OAAO,EAAE;AACpE,UAAQ,IAAI,GAAG,UAAU,WAAW,UAAU,MAAM,OAAO,SAAS,QAAQ,IAAI,EAAE;AAClF,UAAQ,IAAI,GAAG,UAAU,SAAS,UAAU,QAAQ,OAAO,QAAQ,QAAQ,IAAI,EAAE;AACjF,UAAQ,IAAI,GAAG,QAAQ,GAAG,SAAI,OAAO,EAAE,CAAC,GAAG,UAAU,EAAE;AAEvD,SAAO,QAAQ;AAAA,IACb,SAAS;AAAA,IACT,SAAS;AAAA,EACX,CAAC;AACH;AAEA,eAAe,cAAc,QAA0C;AACrE,QAAM,aAAsC;AAAA,IAC1C,QAAQ,OAAO;AAAA,IACf,SAAS,OAAO;AAAA,IAChB,QAAQ,OAAO;AAAA,IACf,OAAO,OAAO;AAAA,IACd,cAAc;AAAA,IACd,aAAa;AAAA,IACb,gBAAgB;AAAA,IAChB,gBAAgB;AAAA,IAChB,cAAc;AAAA,IACd,YAAY;AAAA,IACZ,SAAS;AAAA,IACT,gBAAgB;AAAA,IAChB,mBAAmB;AAAA,IACnB,OAAO;AAAA,EACT;AAEA,QAAM,eAAe;AAAA,IACnB,WAAW,CAAC,GAAG,OAAO,SAAS;AAAA,IAC/B;AAAA,EACF,CAAC;AACH;","names":["path","path","path","path"]}
@@ -2,7 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
2
2
  import {
3
3
  defaultResource,
4
4
  init_esm as init_esm4
5
- } from "./chunk-FV32QHPB.js";
5
+ } from "./chunk-XOSNETAV.js";
6
6
  import {
7
7
  BindOnceFuture,
8
8
  ExportResultCode,
@@ -1730,4 +1730,4 @@ var require_src2 = __commonJS({
1730
1730
  }
1731
1731
  });
1732
1732
  export default require_src2();
1733
- //# sourceMappingURL=src-2N5EJ2N6.js.map
1733
+ //# sourceMappingURL=src-ML4D2MC2.js.map
@@ -12,11 +12,11 @@ targets:
12
12
 
13
13
  - name: vscode
14
14
  provider: vscode
15
- judge_target: azure_base
15
+ judge_target: azure-llm
16
16
 
17
17
  - name: codex
18
18
  provider: codex
19
- judge_target: azure_base
19
+ judge_target: azure-llm
20
20
  # Uses the Codex CLI (defaults to `codex` on PATH)
21
21
  # executable: ${{ CODEX_CLI_PATH }} # Optional: override executable path
22
22
  # args: # Optional additional CLI arguments
@@ -26,7 +26,6 @@ targets:
26
26
  # - ${{ CODEX_MODEL }}
27
27
  # - --ask-for-approval
28
28
  # - ${{ CODEX_APPROVAL_PRESET }}
29
- timeout_seconds: 180
30
29
  cwd: ${{ CODEX_WORKSPACE_DIR }} # Where scratch workspaces are created
31
30
  log_dir: ${{ CODEX_LOG_DIR }} # Optional: where Codex CLI stream logs are stored (defaults to ./.agentv/logs/codex)
32
31
  log_format: json # Optional: 'summary' (default) or 'json' for raw event logs
@@ -34,10 +33,9 @@ targets:
34
33
  # Claude - Anthropic's Claude Agent SDK
35
34
  - name: claude
36
35
  provider: claude
37
- judge_target: azure_base
36
+ judge_target: azure-llm
38
37
  # Uses the @anthropic-ai/claude-agent-sdk
39
38
  # model: claude-sonnet-4-20250514 # Optional: override model
40
- timeout_seconds: 180
41
39
  # cwd: ${{ CLAUDE_WORKSPACE_DIR }} # Optional: working directory (defaults to process.cwd())
42
40
  # max_turns: 50 # Optional: max conversation turns
43
41
  # max_budget_usd: 5.0 # Optional: max cost budget in USD
@@ -49,29 +47,29 @@ targets:
49
47
  provider: vscode
50
48
  workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
51
49
  provider_batching: false
52
- judge_target: azure_base
50
+ judge_target: azure-llm
53
51
 
54
52
  - name: vscode_insiders_projectx
55
53
  provider: vscode-insiders
56
54
  workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
57
55
  provider_batching: false
58
- judge_target: azure_base
56
+ judge_target: azure-llm
59
57
 
60
- - name: azure_base
58
+ - name: azure-llm
61
59
  provider: azure
62
60
  endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
63
61
  api_key: ${{ AZURE_OPENAI_API_KEY }}
64
62
  model: ${{ AZURE_DEPLOYMENT_NAME }}
65
63
  version: ${{ AZURE_OPENAI_API_VERSION }}
66
64
 
67
- - name: gemini_base
65
+ - name: gemini-llm
68
66
  provider: gemini
69
67
  api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
70
68
  model: ${{ GEMINI_MODEL_NAME }}
71
69
 
72
70
  - name: local_cli
73
71
  provider: cli
74
- judge_target: azure_base
72
+ judge_target: azure-llm
75
73
  # Passes the fully rendered prompt and any attached files to a local Python script
76
74
  # NOTE: Do not add quotes around {PROMPT} or {FILES} - they are already shell-escaped
77
75
  command: uv run ./mock_cli.py --prompt {PROMPT} {FILES} --output {OUTPUT_FILE}
@@ -79,6 +77,5 @@ targets:
79
77
  files_format: --file {path}
80
78
  # Optional working directory resolved from .env
81
79
  cwd: ${{ CLI_EVALS_DIR }}
82
- timeout_seconds: 30
83
80
  healthcheck:
84
81
  command: uv run ./mock_cli.py --healthcheck
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentv",
3
- "version": "2.19.0",
3
+ "version": "3.0.0-next.1",
4
4
  "description": "CLI entry point for AgentV",
5
5
  "type": "module",
6
6
  "repository": {
@@ -43,7 +43,7 @@
43
43
  "yaml": "^2.6.1"
44
44
  },
45
45
  "devDependencies": {
46
- "@agentv/core": "2.12.0",
46
+ "@agentv/core": "2.19.0",
47
47
  "@types/semver": "^7.7.1",
48
48
  "execa": "^9.3.0"
49
49
  }