agentv 4.11.2 → 4.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MIP46NEN.js → chunk-4MEGL2E3.js} +5 -5
- package/dist/{chunk-MIP46NEN.js.map → chunk-4MEGL2E3.js.map} +1 -1
- package/dist/{chunk-FQGY6QXQ.js → chunk-CXAO4VPP.js} +43 -43
- package/dist/chunk-CXAO4VPP.js.map +1 -0
- package/dist/{chunk-7TJ2PON3.js → chunk-VVWPD4CN.js} +104 -89
- package/dist/chunk-VVWPD4CN.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-HNSXNRVK.js → dist-D6EJ3O7Q.js} +20 -20
- package/dist/index.js +3 -3
- package/dist/{interactive-LRW3X5OF.js → interactive-SP2LWOQX.js} +3 -3
- package/dist/studio/assets/index-BdR2qr8G.js +65 -0
- package/dist/studio/assets/{index-VyDFrnoK.js → index-CkXzhDmw.js} +1 -1
- package/dist/studio/assets/index-XVVBVabi.css +1 -0
- package/dist/studio/index.html +2 -2
- package/package.json +1 -1
- package/dist/chunk-7TJ2PON3.js.map +0 -1
- package/dist/chunk-FQGY6QXQ.js.map +0 -1
- package/dist/studio/assets/index-Bi-KHfNm.js +0 -65
- package/dist/studio/assets/index-D_j-w4UO.css +0 -1
- /package/dist/{dist-HNSXNRVK.js.map → dist-D6EJ3O7Q.js.map} +0 -0
- /package/dist/{interactive-LRW3X5OF.js.map → interactive-SP2LWOQX.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
3
3
|
import {
|
|
4
4
|
runCli
|
|
5
|
-
} from "./chunk-
|
|
6
|
-
import "./chunk-
|
|
7
|
-
import "./chunk-
|
|
5
|
+
} from "./chunk-VVWPD4CN.js";
|
|
6
|
+
import "./chunk-4MEGL2E3.js";
|
|
7
|
+
import "./chunk-CXAO4VPP.js";
|
|
8
8
|
import "./chunk-QOBQ5XYF.js";
|
|
9
9
|
import "./chunk-NPVGBFF6.js";
|
|
10
10
|
import "./chunk-HQDCIXVH.js";
|
|
@@ -31,7 +31,7 @@ import {
|
|
|
31
31
|
TranscriptProvider,
|
|
32
32
|
WorkspaceCreationError,
|
|
33
33
|
WorkspacePoolManager,
|
|
34
|
-
|
|
34
|
+
addBenchmark,
|
|
35
35
|
assembleLlmGraderPrompt,
|
|
36
36
|
avgToolDurationMs,
|
|
37
37
|
buildDirectoryChain,
|
|
@@ -62,16 +62,16 @@ import {
|
|
|
62
62
|
createTempWorkspace,
|
|
63
63
|
deepEqual,
|
|
64
64
|
defineConfig,
|
|
65
|
+
deriveBenchmarkId,
|
|
65
66
|
deriveCategory,
|
|
66
|
-
deriveProjectId,
|
|
67
67
|
detectFormat,
|
|
68
68
|
directorySizeBytes,
|
|
69
69
|
discoverAssertions,
|
|
70
|
+
discoverBenchmarks,
|
|
70
71
|
discoverClaudeSessions,
|
|
71
72
|
discoverCodexSessions,
|
|
72
73
|
discoverCopilotSessions,
|
|
73
74
|
discoverGraders,
|
|
74
|
-
discoverProjects,
|
|
75
75
|
discoverProviders,
|
|
76
76
|
ensureResultsRepoClone,
|
|
77
77
|
ensureVSCodeSubagents,
|
|
@@ -95,9 +95,9 @@ import {
|
|
|
95
95
|
freeformEvaluationSchema,
|
|
96
96
|
generateRubrics,
|
|
97
97
|
getAgentvHome,
|
|
98
|
+
getBenchmark,
|
|
99
|
+
getBenchmarksRegistryPath,
|
|
98
100
|
getOutputFilenames,
|
|
99
|
-
getProject,
|
|
100
|
-
getProjectsRegistryPath,
|
|
101
101
|
getResultsRepoCachePaths,
|
|
102
102
|
getResultsRepoStatus,
|
|
103
103
|
getSubagentsRoot,
|
|
@@ -117,11 +117,11 @@ import {
|
|
|
117
117
|
isTestMessage,
|
|
118
118
|
isTestMessageRole,
|
|
119
119
|
listTargetNames,
|
|
120
|
+
loadBenchmarkRegistry,
|
|
120
121
|
loadConfig,
|
|
121
122
|
loadEvalCaseById,
|
|
122
123
|
loadEvalCases,
|
|
123
124
|
loadEvalSuite,
|
|
124
|
-
loadProjectRegistry,
|
|
125
125
|
loadTestById,
|
|
126
126
|
loadTestSuite,
|
|
127
127
|
loadTests,
|
|
@@ -144,7 +144,7 @@ import {
|
|
|
144
144
|
readTextFile,
|
|
145
145
|
readTranscriptFile,
|
|
146
146
|
readTranscriptJsonl,
|
|
147
|
-
|
|
147
|
+
removeBenchmark,
|
|
148
148
|
resolveAndCreateProvider,
|
|
149
149
|
resolveDelegatedTargetDefinition,
|
|
150
150
|
resolveFileReference,
|
|
@@ -166,7 +166,7 @@ import {
|
|
|
166
166
|
runIsJsonAssertion,
|
|
167
167
|
runRegexAssertion,
|
|
168
168
|
runStartsWithAssertion,
|
|
169
|
-
|
|
169
|
+
saveBenchmarkRegistry,
|
|
170
170
|
scanRepoDeps,
|
|
171
171
|
scoreToVerdict,
|
|
172
172
|
shouldEnableCache,
|
|
@@ -183,11 +183,11 @@ import {
|
|
|
183
183
|
toSnakeCaseDeep,
|
|
184
184
|
toTranscriptJsonLine,
|
|
185
185
|
tokensPerTool,
|
|
186
|
-
|
|
186
|
+
touchBenchmark,
|
|
187
187
|
transpileEvalYaml,
|
|
188
188
|
transpileEvalYamlFile,
|
|
189
189
|
trimBaselineResult
|
|
190
|
-
} from "./chunk-
|
|
190
|
+
} from "./chunk-CXAO4VPP.js";
|
|
191
191
|
import {
|
|
192
192
|
OtlpJsonFileExporter
|
|
193
193
|
} from "./chunk-QOBQ5XYF.js";
|
|
@@ -234,7 +234,7 @@ export {
|
|
|
234
234
|
TranscriptProvider,
|
|
235
235
|
WorkspaceCreationError,
|
|
236
236
|
WorkspacePoolManager,
|
|
237
|
-
|
|
237
|
+
addBenchmark,
|
|
238
238
|
assembleLlmGraderPrompt,
|
|
239
239
|
assembleLlmGraderPrompt as assembleLlmJudgePrompt,
|
|
240
240
|
avgToolDurationMs,
|
|
@@ -266,17 +266,17 @@ export {
|
|
|
266
266
|
createTempWorkspace,
|
|
267
267
|
deepEqual,
|
|
268
268
|
defineConfig,
|
|
269
|
+
deriveBenchmarkId,
|
|
269
270
|
deriveCategory,
|
|
270
|
-
deriveProjectId,
|
|
271
271
|
detectFormat,
|
|
272
272
|
directorySizeBytes,
|
|
273
273
|
discoverAssertions,
|
|
274
|
+
discoverBenchmarks,
|
|
274
275
|
discoverClaudeSessions,
|
|
275
276
|
discoverCodexSessions,
|
|
276
277
|
discoverCopilotSessions,
|
|
277
278
|
discoverGraders,
|
|
278
279
|
discoverGraders as discoverJudges,
|
|
279
|
-
discoverProjects,
|
|
280
280
|
discoverProviders,
|
|
281
281
|
ensureResultsRepoClone,
|
|
282
282
|
ensureVSCodeSubagents,
|
|
@@ -300,9 +300,9 @@ export {
|
|
|
300
300
|
freeformEvaluationSchema,
|
|
301
301
|
generateRubrics,
|
|
302
302
|
getAgentvHome,
|
|
303
|
+
getBenchmark,
|
|
304
|
+
getBenchmarksRegistryPath,
|
|
303
305
|
getOutputFilenames,
|
|
304
|
-
getProject,
|
|
305
|
-
getProjectsRegistryPath,
|
|
306
306
|
getResultsRepoCachePaths,
|
|
307
307
|
getResultsRepoStatus,
|
|
308
308
|
getSubagentsRoot,
|
|
@@ -322,11 +322,11 @@ export {
|
|
|
322
322
|
isTestMessage,
|
|
323
323
|
isTestMessageRole,
|
|
324
324
|
listTargetNames,
|
|
325
|
+
loadBenchmarkRegistry,
|
|
325
326
|
loadConfig,
|
|
326
327
|
loadEvalCaseById,
|
|
327
328
|
loadEvalCases,
|
|
328
329
|
loadEvalSuite,
|
|
329
|
-
loadProjectRegistry,
|
|
330
330
|
loadTestById,
|
|
331
331
|
loadTestSuite,
|
|
332
332
|
loadTests,
|
|
@@ -349,7 +349,7 @@ export {
|
|
|
349
349
|
readTextFile,
|
|
350
350
|
readTranscriptFile,
|
|
351
351
|
readTranscriptJsonl,
|
|
352
|
-
|
|
352
|
+
removeBenchmark,
|
|
353
353
|
resolveAndCreateProvider,
|
|
354
354
|
resolveDelegatedTargetDefinition,
|
|
355
355
|
resolveFileReference,
|
|
@@ -371,7 +371,7 @@ export {
|
|
|
371
371
|
runIsJsonAssertion,
|
|
372
372
|
runRegexAssertion,
|
|
373
373
|
runStartsWithAssertion,
|
|
374
|
-
|
|
374
|
+
saveBenchmarkRegistry,
|
|
375
375
|
scanRepoDeps,
|
|
376
376
|
scoreToVerdict,
|
|
377
377
|
shouldEnableCache,
|
|
@@ -388,9 +388,9 @@ export {
|
|
|
388
388
|
toSnakeCaseDeep,
|
|
389
389
|
toTranscriptJsonLine,
|
|
390
390
|
tokensPerTool,
|
|
391
|
-
|
|
391
|
+
touchBenchmark,
|
|
392
392
|
transpileEvalYaml,
|
|
393
393
|
transpileEvalYamlFile,
|
|
394
394
|
trimBaselineResult
|
|
395
395
|
};
|
|
396
|
-
//# sourceMappingURL=dist-
|
|
396
|
+
//# sourceMappingURL=dist-D6EJ3O7Q.js.map
|
package/dist/index.js
CHANGED
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
app,
|
|
4
4
|
preprocessArgv,
|
|
5
5
|
runCli
|
|
6
|
-
} from "./chunk-
|
|
7
|
-
import "./chunk-
|
|
8
|
-
import "./chunk-
|
|
6
|
+
} from "./chunk-VVWPD4CN.js";
|
|
7
|
+
import "./chunk-4MEGL2E3.js";
|
|
8
|
+
import "./chunk-CXAO4VPP.js";
|
|
9
9
|
import "./chunk-QOBQ5XYF.js";
|
|
10
10
|
import "./chunk-NPVGBFF6.js";
|
|
11
11
|
import "./chunk-HQDCIXVH.js";
|
|
@@ -7,12 +7,12 @@ import {
|
|
|
7
7
|
findRepoRoot,
|
|
8
8
|
getCategories,
|
|
9
9
|
runEvalCommand
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-4MEGL2E3.js";
|
|
11
11
|
import {
|
|
12
12
|
getAgentvHome,
|
|
13
13
|
listTargetNames,
|
|
14
14
|
readTargetDefinitions
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-CXAO4VPP.js";
|
|
16
16
|
import "./chunk-QOBQ5XYF.js";
|
|
17
17
|
import "./chunk-NPVGBFF6.js";
|
|
18
18
|
import "./chunk-HQDCIXVH.js";
|
|
@@ -328,4 +328,4 @@ ${ANSI_DIM}Retrying execution errors...${ANSI_RESET}
|
|
|
328
328
|
export {
|
|
329
329
|
launchInteractiveWizard
|
|
330
330
|
};
|
|
331
|
-
//# sourceMappingURL=interactive-
|
|
331
|
+
//# sourceMappingURL=interactive-SP2LWOQX.js.map
|