agentv 4.11.2 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,9 +2,9 @@
2
2
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
3
3
  import {
4
4
  runCli
5
- } from "./chunk-7TJ2PON3.js";
6
- import "./chunk-MIP46NEN.js";
7
- import "./chunk-FQGY6QXQ.js";
5
+ } from "./chunk-VVWPD4CN.js";
6
+ import "./chunk-4MEGL2E3.js";
7
+ import "./chunk-CXAO4VPP.js";
8
8
  import "./chunk-QOBQ5XYF.js";
9
9
  import "./chunk-NPVGBFF6.js";
10
10
  import "./chunk-HQDCIXVH.js";
@@ -31,7 +31,7 @@ import {
31
31
  TranscriptProvider,
32
32
  WorkspaceCreationError,
33
33
  WorkspacePoolManager,
34
- addProject,
34
+ addBenchmark,
35
35
  assembleLlmGraderPrompt,
36
36
  avgToolDurationMs,
37
37
  buildDirectoryChain,
@@ -62,16 +62,16 @@ import {
62
62
  createTempWorkspace,
63
63
  deepEqual,
64
64
  defineConfig,
65
+ deriveBenchmarkId,
65
66
  deriveCategory,
66
- deriveProjectId,
67
67
  detectFormat,
68
68
  directorySizeBytes,
69
69
  discoverAssertions,
70
+ discoverBenchmarks,
70
71
  discoverClaudeSessions,
71
72
  discoverCodexSessions,
72
73
  discoverCopilotSessions,
73
74
  discoverGraders,
74
- discoverProjects,
75
75
  discoverProviders,
76
76
  ensureResultsRepoClone,
77
77
  ensureVSCodeSubagents,
@@ -95,9 +95,9 @@ import {
95
95
  freeformEvaluationSchema,
96
96
  generateRubrics,
97
97
  getAgentvHome,
98
+ getBenchmark,
99
+ getBenchmarksRegistryPath,
98
100
  getOutputFilenames,
99
- getProject,
100
- getProjectsRegistryPath,
101
101
  getResultsRepoCachePaths,
102
102
  getResultsRepoStatus,
103
103
  getSubagentsRoot,
@@ -117,11 +117,11 @@ import {
117
117
  isTestMessage,
118
118
  isTestMessageRole,
119
119
  listTargetNames,
120
+ loadBenchmarkRegistry,
120
121
  loadConfig,
121
122
  loadEvalCaseById,
122
123
  loadEvalCases,
123
124
  loadEvalSuite,
124
- loadProjectRegistry,
125
125
  loadTestById,
126
126
  loadTestSuite,
127
127
  loadTests,
@@ -144,7 +144,7 @@ import {
144
144
  readTextFile,
145
145
  readTranscriptFile,
146
146
  readTranscriptJsonl,
147
- removeProject,
147
+ removeBenchmark,
148
148
  resolveAndCreateProvider,
149
149
  resolveDelegatedTargetDefinition,
150
150
  resolveFileReference,
@@ -166,7 +166,7 @@ import {
166
166
  runIsJsonAssertion,
167
167
  runRegexAssertion,
168
168
  runStartsWithAssertion,
169
- saveProjectRegistry,
169
+ saveBenchmarkRegistry,
170
170
  scanRepoDeps,
171
171
  scoreToVerdict,
172
172
  shouldEnableCache,
@@ -183,11 +183,11 @@ import {
183
183
  toSnakeCaseDeep,
184
184
  toTranscriptJsonLine,
185
185
  tokensPerTool,
186
- touchProject,
186
+ touchBenchmark,
187
187
  transpileEvalYaml,
188
188
  transpileEvalYamlFile,
189
189
  trimBaselineResult
190
- } from "./chunk-FQGY6QXQ.js";
190
+ } from "./chunk-CXAO4VPP.js";
191
191
  import {
192
192
  OtlpJsonFileExporter
193
193
  } from "./chunk-QOBQ5XYF.js";
@@ -234,7 +234,7 @@ export {
234
234
  TranscriptProvider,
235
235
  WorkspaceCreationError,
236
236
  WorkspacePoolManager,
237
- addProject,
237
+ addBenchmark,
238
238
  assembleLlmGraderPrompt,
239
239
  assembleLlmGraderPrompt as assembleLlmJudgePrompt,
240
240
  avgToolDurationMs,
@@ -266,17 +266,17 @@ export {
266
266
  createTempWorkspace,
267
267
  deepEqual,
268
268
  defineConfig,
269
+ deriveBenchmarkId,
269
270
  deriveCategory,
270
- deriveProjectId,
271
271
  detectFormat,
272
272
  directorySizeBytes,
273
273
  discoverAssertions,
274
+ discoverBenchmarks,
274
275
  discoverClaudeSessions,
275
276
  discoverCodexSessions,
276
277
  discoverCopilotSessions,
277
278
  discoverGraders,
278
279
  discoverGraders as discoverJudges,
279
- discoverProjects,
280
280
  discoverProviders,
281
281
  ensureResultsRepoClone,
282
282
  ensureVSCodeSubagents,
@@ -300,9 +300,9 @@ export {
300
300
  freeformEvaluationSchema,
301
301
  generateRubrics,
302
302
  getAgentvHome,
303
+ getBenchmark,
304
+ getBenchmarksRegistryPath,
303
305
  getOutputFilenames,
304
- getProject,
305
- getProjectsRegistryPath,
306
306
  getResultsRepoCachePaths,
307
307
  getResultsRepoStatus,
308
308
  getSubagentsRoot,
@@ -322,11 +322,11 @@ export {
322
322
  isTestMessage,
323
323
  isTestMessageRole,
324
324
  listTargetNames,
325
+ loadBenchmarkRegistry,
325
326
  loadConfig,
326
327
  loadEvalCaseById,
327
328
  loadEvalCases,
328
329
  loadEvalSuite,
329
- loadProjectRegistry,
330
330
  loadTestById,
331
331
  loadTestSuite,
332
332
  loadTests,
@@ -349,7 +349,7 @@ export {
349
349
  readTextFile,
350
350
  readTranscriptFile,
351
351
  readTranscriptJsonl,
352
- removeProject,
352
+ removeBenchmark,
353
353
  resolveAndCreateProvider,
354
354
  resolveDelegatedTargetDefinition,
355
355
  resolveFileReference,
@@ -371,7 +371,7 @@ export {
371
371
  runIsJsonAssertion,
372
372
  runRegexAssertion,
373
373
  runStartsWithAssertion,
374
- saveProjectRegistry,
374
+ saveBenchmarkRegistry,
375
375
  scanRepoDeps,
376
376
  scoreToVerdict,
377
377
  shouldEnableCache,
@@ -388,9 +388,9 @@ export {
388
388
  toSnakeCaseDeep,
389
389
  toTranscriptJsonLine,
390
390
  tokensPerTool,
391
- touchProject,
391
+ touchBenchmark,
392
392
  transpileEvalYaml,
393
393
  transpileEvalYamlFile,
394
394
  trimBaselineResult
395
395
  };
396
- //# sourceMappingURL=dist-HNSXNRVK.js.map
396
+ //# sourceMappingURL=dist-D6EJ3O7Q.js.map
package/dist/index.js CHANGED
@@ -3,9 +3,9 @@ import {
3
3
  app,
4
4
  preprocessArgv,
5
5
  runCli
6
- } from "./chunk-7TJ2PON3.js";
7
- import "./chunk-MIP46NEN.js";
8
- import "./chunk-FQGY6QXQ.js";
6
+ } from "./chunk-VVWPD4CN.js";
7
+ import "./chunk-4MEGL2E3.js";
8
+ import "./chunk-CXAO4VPP.js";
9
9
  import "./chunk-QOBQ5XYF.js";
10
10
  import "./chunk-NPVGBFF6.js";
11
11
  import "./chunk-HQDCIXVH.js";
@@ -7,12 +7,12 @@ import {
7
7
  findRepoRoot,
8
8
  getCategories,
9
9
  runEvalCommand
10
- } from "./chunk-MIP46NEN.js";
10
+ } from "./chunk-4MEGL2E3.js";
11
11
  import {
12
12
  getAgentvHome,
13
13
  listTargetNames,
14
14
  readTargetDefinitions
15
- } from "./chunk-FQGY6QXQ.js";
15
+ } from "./chunk-CXAO4VPP.js";
16
16
  import "./chunk-QOBQ5XYF.js";
17
17
  import "./chunk-NPVGBFF6.js";
18
18
  import "./chunk-HQDCIXVH.js";
@@ -328,4 +328,4 @@ ${ANSI_DIM}Retrying execution errors...${ANSI_RESET}
328
328
  export {
329
329
  launchInteractiveWizard
330
330
  };
331
- //# sourceMappingURL=interactive-LRW3X5OF.js.map
331
+ //# sourceMappingURL=interactive-SP2LWOQX.js.map