agentv 4.35.1 → 4.37.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-G57MG52C.js → artifact-writer-GFNKYREE.js} +4 -4
- package/dist/{chunk-INOKS5LF.js → chunk-M7AMFWBZ.js} +275 -58
- package/dist/chunk-M7AMFWBZ.js.map +1 -0
- package/dist/{chunk-KJGYL3M3.js → chunk-N6E5XFOM.js} +213 -85
- package/dist/chunk-N6E5XFOM.js.map +1 -0
- package/dist/{chunk-KNF3AGCI.js → chunk-OYI35QFW.js} +314 -49
- package/dist/chunk-OYI35QFW.js.map +1 -0
- package/dist/{chunk-CRMGUVRZ.js → chunk-P4LSNFZR.js} +85 -19
- package/dist/chunk-P4LSNFZR.js.map +1 -0
- package/dist/{chunk-6QEIZ33V.js → chunk-RL4S2FBZ.js} +2700 -456
- package/dist/chunk-RL4S2FBZ.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/dashboard/assets/index-9tV-u4HJ.css +1 -0
- package/dist/dashboard/assets/{index-Bdk-9a_8.js → index-BDRYJsGF.js} +1 -1
- package/dist/dashboard/assets/index-DuESU7zZ.js +118 -0
- package/dist/dashboard/index.html +2 -2
- package/dist/{dist-M4B77IW4.js → dist-OY3JSP6Z.js} +125 -3
- package/dist/index.js +5 -5
- package/dist/{interactive-VYQ5SYMR.js → interactive-CQELHITQ.js} +5 -5
- package/dist/skills/agentv-eval-writer/SKILL.md +6 -0
- package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js → ts-eval-loader-RBTB2HG2-H5TRXZLO.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-6QEIZ33V.js.map +0 -1
- package/dist/chunk-CRMGUVRZ.js.map +0 -1
- package/dist/chunk-INOKS5LF.js.map +0 -1
- package/dist/chunk-KJGYL3M3.js.map +0 -1
- package/dist/chunk-KNF3AGCI.js.map +0 -1
- package/dist/dashboard/assets/index-BPMAZqjE.css +0 -1
- package/dist/dashboard/assets/index-BWO0UcxG.js +0 -118
- /package/dist/{artifact-writer-G57MG52C.js.map → artifact-writer-GFNKYREE.js.map} +0 -0
- /package/dist/{dist-M4B77IW4.js.map → dist-OY3JSP6Z.js.map} +0 -0
- /package/dist/{interactive-VYQ5SYMR.js.map → interactive-CQELHITQ.js.map} +0 -0
- /package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js.map → ts-eval-loader-RBTB2HG2-H5TRXZLO.js.map} +0 -0
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
6
|
<title>AgentV</title>
|
|
7
|
-
<script type="module" crossorigin src="/assets/index-
|
|
8
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
7
|
+
<script type="module" crossorigin src="/assets/index-DuESU7zZ.js"></script>
|
|
8
|
+
<link rel="stylesheet" crossorigin href="/assets/index-9tV-u4HJ.css">
|
|
9
9
|
</head>
|
|
10
10
|
<body class="bg-gray-950 text-gray-100">
|
|
11
11
|
<div id="root"></div>
|
|
@@ -7,11 +7,13 @@ import {
|
|
|
7
7
|
RunBudgetTracker,
|
|
8
8
|
TranscriptProvider,
|
|
9
9
|
addProject,
|
|
10
|
+
buildWipBranchName,
|
|
10
11
|
checkoutResultsRepoBranch,
|
|
11
12
|
commitAndPushResultsBranch,
|
|
12
13
|
createAgentKernel,
|
|
13
14
|
createDraftResultsPr,
|
|
14
15
|
defineConfig,
|
|
16
|
+
deleteWipBranch,
|
|
15
17
|
deriveCategory,
|
|
16
18
|
deriveProjectId,
|
|
17
19
|
directPushResults,
|
|
@@ -39,6 +41,7 @@ import {
|
|
|
39
41
|
parseEnvOutput,
|
|
40
42
|
prepareResultsRepoBranch,
|
|
41
43
|
pushResultsRepoBranch,
|
|
44
|
+
pushWipCheckpoint,
|
|
42
45
|
readTranscriptFile,
|
|
43
46
|
readTranscriptJsonl,
|
|
44
47
|
removeProject,
|
|
@@ -47,6 +50,7 @@ import {
|
|
|
47
50
|
runBeforeSessionHook,
|
|
48
51
|
saveProjectRegistry,
|
|
49
52
|
scanRepoDeps,
|
|
53
|
+
setupWipWorktree,
|
|
50
54
|
stageResultsArtifacts,
|
|
51
55
|
syncProject,
|
|
52
56
|
syncProjects,
|
|
@@ -54,10 +58,12 @@ import {
|
|
|
54
58
|
syncResultsRepoForProject,
|
|
55
59
|
toTranscriptJsonLines,
|
|
56
60
|
touchProject,
|
|
61
|
+
traceFromTranscriptJsonLines,
|
|
62
|
+
traceToTranscriptJsonLines,
|
|
57
63
|
transpileEvalYaml,
|
|
58
64
|
transpileEvalYamlFile,
|
|
59
65
|
trimBaselineResult
|
|
60
|
-
} from "./chunk-
|
|
66
|
+
} from "./chunk-OYI35QFW.js";
|
|
61
67
|
import {
|
|
62
68
|
OtlpJsonFileExporter
|
|
63
69
|
} from "./chunk-QOBQ5XYF.js";
|
|
@@ -98,25 +104,62 @@ import {
|
|
|
98
104
|
NormalizedTrajectoryWireSchema,
|
|
99
105
|
PASS_THRESHOLD,
|
|
100
106
|
ProviderRegistry,
|
|
107
|
+
REPLAY_FIXTURE_SCHEMA_VERSION,
|
|
101
108
|
RUBRIC_OPERATOR_VALUES,
|
|
109
|
+
ReplayProvider,
|
|
102
110
|
RepoManager,
|
|
103
111
|
ResponseCache,
|
|
104
112
|
SkillTriggerGrader,
|
|
105
113
|
TEST_MESSAGE_ROLES,
|
|
114
|
+
TRACE_ENVELOPE_SCHEMA_VERSION,
|
|
115
|
+
TRACE_EVENT_TYPES,
|
|
116
|
+
TRACE_REDACTION_LEVELS,
|
|
117
|
+
TRACE_SCHEMA_VERSION,
|
|
118
|
+
TRACE_SOURCE_KINDS,
|
|
119
|
+
TRACE_TOOL_STATUSES,
|
|
106
120
|
TemplateNotDirectoryError,
|
|
107
121
|
TemplateNotFoundError,
|
|
108
122
|
TokenUsageGrader,
|
|
109
123
|
ToolTrajectoryGrader,
|
|
124
|
+
TraceArtifactWireSchema,
|
|
125
|
+
TraceBranchWireSchema,
|
|
126
|
+
TraceEnvelopeBodyWireSchema,
|
|
127
|
+
TraceEnvelopeCaptureWireSchema,
|
|
128
|
+
TraceEnvelopeConversionWarningWireSchema,
|
|
129
|
+
TraceEnvelopeEvalWireSchema,
|
|
130
|
+
TraceEnvelopeReplayWireSchema,
|
|
131
|
+
TraceEnvelopeScoreWireSchema,
|
|
132
|
+
TraceEnvelopeSourceRefWireSchema,
|
|
133
|
+
TraceEnvelopeSourceWireSchema,
|
|
134
|
+
TraceEnvelopeSpanEventWireSchema,
|
|
135
|
+
TraceEnvelopeSpanStatusWireSchema,
|
|
136
|
+
TraceEnvelopeSpanWireSchema,
|
|
137
|
+
TraceEnvelopeWireSchema,
|
|
138
|
+
TraceErrorWireSchema,
|
|
139
|
+
TraceEventWireSchema,
|
|
140
|
+
TraceMessageWireSchema,
|
|
141
|
+
TraceModelWireSchema,
|
|
142
|
+
TraceRawEvidenceWireSchema,
|
|
143
|
+
TraceRedactionStateWireSchema,
|
|
144
|
+
TraceSessionWireSchema,
|
|
145
|
+
TraceSourceRefWireSchema,
|
|
146
|
+
TraceSourceWireSchema,
|
|
147
|
+
TraceToolWireSchema,
|
|
110
148
|
WorkspaceCreationError,
|
|
111
149
|
WorkspacePoolManager,
|
|
150
|
+
appendErrorEventToTrace,
|
|
151
|
+
appendReplayFixtureRecord,
|
|
112
152
|
assembleLlmGraderPrompt,
|
|
113
153
|
avgToolDurationMs,
|
|
114
154
|
buildDirectoryChain,
|
|
115
155
|
buildOutputSchema,
|
|
116
156
|
buildPromptInputs,
|
|
157
|
+
buildReplayFixtureRecord,
|
|
117
158
|
buildRubricOutputSchema,
|
|
118
159
|
buildScoreRangeOutputSchema,
|
|
119
160
|
buildSearchRoots,
|
|
161
|
+
buildTraceEnvelopeFromEvaluationResult,
|
|
162
|
+
buildTraceFromMessages,
|
|
120
163
|
calculateRubricScore,
|
|
121
164
|
captureFileChanges,
|
|
122
165
|
clampScore,
|
|
@@ -159,15 +202,21 @@ import {
|
|
|
159
202
|
extractWorkersFromSuite,
|
|
160
203
|
fileExists,
|
|
161
204
|
findGitRoot,
|
|
205
|
+
findReplayFixtureRecord,
|
|
206
|
+
findTraceEnvelopeReplayRecord,
|
|
207
|
+
formatReplayLookupKey,
|
|
162
208
|
formatToolCalls,
|
|
163
209
|
freeformEvaluationSchema,
|
|
164
210
|
fromNormalizedTrajectoryWire,
|
|
211
|
+
fromTraceArtifactWire,
|
|
212
|
+
fromTraceEnvelopeWire,
|
|
165
213
|
getAgentvConfigDir,
|
|
166
214
|
getAgentvDataDir,
|
|
167
215
|
getAgentvHome,
|
|
168
216
|
getSelectedTrajectoryEvents,
|
|
169
217
|
getSubagentsRoot,
|
|
170
218
|
getTextContent,
|
|
219
|
+
getTraceEnvelopeSummary,
|
|
171
220
|
getTraceStateRoot,
|
|
172
221
|
getWorkspacePath,
|
|
173
222
|
getWorkspacePoolRoot,
|
|
@@ -201,9 +250,13 @@ import {
|
|
|
201
250
|
parseJsonSafe,
|
|
202
251
|
parseYamlValue,
|
|
203
252
|
readJsonFile,
|
|
253
|
+
readReplayFixtureRecords,
|
|
204
254
|
readTargetDefinitions,
|
|
205
255
|
readTestSuiteMetadata,
|
|
206
256
|
readTextFile,
|
|
257
|
+
readTraceEnvelopeReplayRecords,
|
|
258
|
+
replayFixtureRecordToProviderResponse,
|
|
259
|
+
replayLookupIdentityMatches,
|
|
207
260
|
resolveAndCreateProvider,
|
|
208
261
|
resolveDelegatedTargetDefinition,
|
|
209
262
|
resolveFileReference,
|
|
@@ -224,8 +277,10 @@ import {
|
|
|
224
277
|
runIsJsonAssertion,
|
|
225
278
|
runRegexAssertion,
|
|
226
279
|
runStartsWithAssertion,
|
|
280
|
+
sameReplayEvalPath,
|
|
227
281
|
scoreRangeEvaluationSchema,
|
|
228
282
|
scoreToVerdict,
|
|
283
|
+
serializeReplayFixtureRecord,
|
|
229
284
|
shouldEnableCache,
|
|
230
285
|
shouldSkipCacheForTemperature,
|
|
231
286
|
subscribeToClaudeLogEntries,
|
|
@@ -237,10 +292,16 @@ import {
|
|
|
237
292
|
toCamelCaseDeep,
|
|
238
293
|
toNormalizedTrajectoryWire,
|
|
239
294
|
toSnakeCaseDeep,
|
|
295
|
+
toTraceArtifactWire,
|
|
296
|
+
toTraceEnvelopeWire,
|
|
240
297
|
tokensPerTool,
|
|
298
|
+
traceEnvelopeReplayRecordToProviderResponse,
|
|
299
|
+
traceEnvelopeToMessages,
|
|
300
|
+
traceEnvelopeToTraceArtifact,
|
|
301
|
+
traceEnvelopeToTraceSummary,
|
|
241
302
|
trackChild,
|
|
242
303
|
trackedChildCount
|
|
243
|
-
} from "./chunk-
|
|
304
|
+
} from "./chunk-RL4S2FBZ.js";
|
|
244
305
|
import "./chunk-NPVGBFF6.js";
|
|
245
306
|
import "./chunk-M7BUKBAF.js";
|
|
246
307
|
import "./chunk-5H446C7X.js";
|
|
@@ -284,28 +345,66 @@ export {
|
|
|
284
345
|
OtlpJsonFileExporter,
|
|
285
346
|
PASS_THRESHOLD,
|
|
286
347
|
ProviderRegistry,
|
|
348
|
+
REPLAY_FIXTURE_SCHEMA_VERSION,
|
|
287
349
|
RUBRIC_OPERATOR_VALUES,
|
|
350
|
+
ReplayProvider,
|
|
288
351
|
RepoManager,
|
|
289
352
|
ResponseCache,
|
|
290
353
|
RunBudgetTracker,
|
|
291
354
|
SkillTriggerGrader,
|
|
292
355
|
TEST_MESSAGE_ROLES,
|
|
356
|
+
TRACE_ENVELOPE_SCHEMA_VERSION,
|
|
357
|
+
TRACE_EVENT_TYPES,
|
|
358
|
+
TRACE_REDACTION_LEVELS,
|
|
359
|
+
TRACE_SCHEMA_VERSION,
|
|
360
|
+
TRACE_SOURCE_KINDS,
|
|
361
|
+
TRACE_TOOL_STATUSES,
|
|
293
362
|
TemplateNotDirectoryError,
|
|
294
363
|
TemplateNotFoundError,
|
|
295
364
|
TokenUsageGrader,
|
|
296
365
|
ToolTrajectoryGrader,
|
|
366
|
+
TraceArtifactWireSchema,
|
|
367
|
+
TraceBranchWireSchema,
|
|
368
|
+
TraceEnvelopeBodyWireSchema,
|
|
369
|
+
TraceEnvelopeCaptureWireSchema,
|
|
370
|
+
TraceEnvelopeConversionWarningWireSchema,
|
|
371
|
+
TraceEnvelopeEvalWireSchema,
|
|
372
|
+
TraceEnvelopeReplayWireSchema,
|
|
373
|
+
TraceEnvelopeScoreWireSchema,
|
|
374
|
+
TraceEnvelopeSourceRefWireSchema,
|
|
375
|
+
TraceEnvelopeSourceWireSchema,
|
|
376
|
+
TraceEnvelopeSpanEventWireSchema,
|
|
377
|
+
TraceEnvelopeSpanStatusWireSchema,
|
|
378
|
+
TraceEnvelopeSpanWireSchema,
|
|
379
|
+
TraceEnvelopeWireSchema,
|
|
380
|
+
TraceErrorWireSchema,
|
|
381
|
+
TraceEventWireSchema,
|
|
382
|
+
TraceMessageWireSchema,
|
|
383
|
+
TraceModelWireSchema,
|
|
384
|
+
TraceRawEvidenceWireSchema,
|
|
385
|
+
TraceRedactionStateWireSchema,
|
|
386
|
+
TraceSessionWireSchema,
|
|
387
|
+
TraceSourceRefWireSchema,
|
|
388
|
+
TraceSourceWireSchema,
|
|
389
|
+
TraceToolWireSchema,
|
|
297
390
|
TranscriptProvider,
|
|
298
391
|
WorkspaceCreationError,
|
|
299
392
|
WorkspacePoolManager,
|
|
300
393
|
addProject,
|
|
394
|
+
appendErrorEventToTrace,
|
|
395
|
+
appendReplayFixtureRecord,
|
|
301
396
|
assembleLlmGraderPrompt,
|
|
302
397
|
avgToolDurationMs,
|
|
303
398
|
buildDirectoryChain,
|
|
304
399
|
buildOutputSchema,
|
|
305
400
|
buildPromptInputs,
|
|
401
|
+
buildReplayFixtureRecord,
|
|
306
402
|
buildRubricOutputSchema,
|
|
307
403
|
buildScoreRangeOutputSchema,
|
|
308
404
|
buildSearchRoots,
|
|
405
|
+
buildTraceEnvelopeFromEvaluationResult,
|
|
406
|
+
buildTraceFromMessages,
|
|
407
|
+
buildWipBranchName,
|
|
309
408
|
calculateRubricScore,
|
|
310
409
|
captureFileChanges,
|
|
311
410
|
checkoutResultsRepoBranch,
|
|
@@ -329,6 +428,7 @@ export {
|
|
|
329
428
|
createTempWorkspace,
|
|
330
429
|
deepEqual,
|
|
331
430
|
defineConfig,
|
|
431
|
+
deleteWipBranch,
|
|
332
432
|
deriveCategory,
|
|
333
433
|
deriveProjectId,
|
|
334
434
|
detectFormat,
|
|
@@ -361,9 +461,14 @@ export {
|
|
|
361
461
|
extractWorkersFromSuite,
|
|
362
462
|
fileExists,
|
|
363
463
|
findGitRoot,
|
|
464
|
+
findReplayFixtureRecord,
|
|
465
|
+
findTraceEnvelopeReplayRecord,
|
|
466
|
+
formatReplayLookupKey,
|
|
364
467
|
formatToolCalls,
|
|
365
468
|
freeformEvaluationSchema,
|
|
366
469
|
fromNormalizedTrajectoryWire,
|
|
470
|
+
fromTraceArtifactWire,
|
|
471
|
+
fromTraceEnvelopeWire,
|
|
367
472
|
generateRubrics,
|
|
368
473
|
getAgentvConfigDir,
|
|
369
474
|
getAgentvDataDir,
|
|
@@ -378,6 +483,7 @@ export {
|
|
|
378
483
|
getSelectedTrajectoryEvents,
|
|
379
484
|
getSubagentsRoot,
|
|
380
485
|
getTextContent,
|
|
486
|
+
getTraceEnvelopeSummary,
|
|
381
487
|
getTraceStateRoot,
|
|
382
488
|
getWorkspacePath,
|
|
383
489
|
getWorkspacePoolRoot,
|
|
@@ -421,13 +527,18 @@ export {
|
|
|
421
527
|
parseYamlValue,
|
|
422
528
|
prepareResultsRepoBranch,
|
|
423
529
|
pushResultsRepoBranch,
|
|
530
|
+
pushWipCheckpoint,
|
|
424
531
|
readJsonFile,
|
|
532
|
+
readReplayFixtureRecords,
|
|
425
533
|
readTargetDefinitions,
|
|
426
534
|
readTestSuiteMetadata,
|
|
427
535
|
readTextFile,
|
|
536
|
+
readTraceEnvelopeReplayRecords,
|
|
428
537
|
readTranscriptFile,
|
|
429
538
|
readTranscriptJsonl,
|
|
430
539
|
removeProject,
|
|
540
|
+
replayFixtureRecordToProviderResponse,
|
|
541
|
+
replayLookupIdentityMatches,
|
|
431
542
|
resolveAndCreateProvider,
|
|
432
543
|
resolveDelegatedTargetDefinition,
|
|
433
544
|
resolveFileReference,
|
|
@@ -451,10 +562,13 @@ export {
|
|
|
451
562
|
runIsJsonAssertion,
|
|
452
563
|
runRegexAssertion,
|
|
453
564
|
runStartsWithAssertion,
|
|
565
|
+
sameReplayEvalPath,
|
|
454
566
|
saveProjectRegistry,
|
|
455
567
|
scanRepoDeps,
|
|
456
568
|
scoreRangeEvaluationSchema,
|
|
457
569
|
scoreToVerdict,
|
|
570
|
+
serializeReplayFixtureRecord,
|
|
571
|
+
setupWipWorktree,
|
|
458
572
|
shouldEnableCache,
|
|
459
573
|
shouldSkipCacheForTemperature,
|
|
460
574
|
stageResultsArtifacts,
|
|
@@ -471,13 +585,21 @@ export {
|
|
|
471
585
|
toCamelCaseDeep,
|
|
472
586
|
toNormalizedTrajectoryWire,
|
|
473
587
|
toSnakeCaseDeep,
|
|
588
|
+
toTraceArtifactWire,
|
|
589
|
+
toTraceEnvelopeWire,
|
|
474
590
|
toTranscriptJsonLines,
|
|
475
591
|
tokensPerTool,
|
|
476
592
|
touchProject,
|
|
593
|
+
traceEnvelopeReplayRecordToProviderResponse,
|
|
594
|
+
traceEnvelopeToMessages,
|
|
595
|
+
traceEnvelopeToTraceArtifact,
|
|
596
|
+
traceEnvelopeToTraceSummary,
|
|
597
|
+
traceFromTranscriptJsonLines,
|
|
598
|
+
traceToTranscriptJsonLines,
|
|
477
599
|
trackChild,
|
|
478
600
|
trackedChildCount,
|
|
479
601
|
transpileEvalYaml,
|
|
480
602
|
transpileEvalYamlFile,
|
|
481
603
|
trimBaselineResult
|
|
482
604
|
};
|
|
483
|
-
//# sourceMappingURL=dist-
|
|
605
|
+
//# sourceMappingURL=dist-OY3JSP6Z.js.map
|
package/dist/index.js
CHANGED
|
@@ -4,13 +4,13 @@ import {
|
|
|
4
4
|
preprocessArgv,
|
|
5
5
|
runCli,
|
|
6
6
|
usesDeprecatedStudioAlias
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-
|
|
9
|
-
import "./chunk-
|
|
10
|
-
import "./chunk-
|
|
7
|
+
} from "./chunk-P4LSNFZR.js";
|
|
8
|
+
import "./chunk-M7AMFWBZ.js";
|
|
9
|
+
import "./chunk-N6E5XFOM.js";
|
|
10
|
+
import "./chunk-OYI35QFW.js";
|
|
11
11
|
import "./chunk-QOBQ5XYF.js";
|
|
12
12
|
import "./chunk-BPGJ4HBU.js";
|
|
13
|
-
import "./chunk-
|
|
13
|
+
import "./chunk-RL4S2FBZ.js";
|
|
14
14
|
import "./chunk-NPVGBFF6.js";
|
|
15
15
|
import "./chunk-M7BUKBAF.js";
|
|
16
16
|
import "./chunk-5H446C7X.js";
|
|
@@ -7,16 +7,16 @@ import {
|
|
|
7
7
|
findRepoRoot,
|
|
8
8
|
getCategories,
|
|
9
9
|
runEvalCommand
|
|
10
|
-
} from "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-
|
|
10
|
+
} from "./chunk-M7AMFWBZ.js";
|
|
11
|
+
import "./chunk-N6E5XFOM.js";
|
|
12
|
+
import "./chunk-OYI35QFW.js";
|
|
13
13
|
import "./chunk-QOBQ5XYF.js";
|
|
14
14
|
import "./chunk-BPGJ4HBU.js";
|
|
15
15
|
import {
|
|
16
16
|
getAgentvConfigDir,
|
|
17
17
|
listTargetNames,
|
|
18
18
|
readTargetDefinitions
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-RL4S2FBZ.js";
|
|
20
20
|
import "./chunk-NPVGBFF6.js";
|
|
21
21
|
import "./chunk-M7BUKBAF.js";
|
|
22
22
|
import "./chunk-5H446C7X.js";
|
|
@@ -360,4 +360,4 @@ ${ANSI_DIM}Retrying execution errors...${ANSI_RESET}
|
|
|
360
360
|
export {
|
|
361
361
|
launchInteractiveWizard
|
|
362
362
|
};
|
|
363
|
-
//# sourceMappingURL=interactive-
|
|
363
|
+
//# sourceMappingURL=interactive-CQELHITQ.js.map
|
|
@@ -544,6 +544,10 @@ agentv eval <file.yaml> [--test-id <id>] [--target <name>] [--dry-run] [--thresh
|
|
|
544
544
|
# Run with OTLP JSON file (importable by OTel backends)
|
|
545
545
|
agentv eval <file.yaml> --otel-file traces/eval.otlp.json
|
|
546
546
|
|
|
547
|
+
# Record live target output for later target substitution
|
|
548
|
+
agentv eval <file.yaml> --target live_agent --record-replay fixtures/target-output.jsonl
|
|
549
|
+
agentv eval <file.yaml> --target replay_agent
|
|
550
|
+
|
|
547
551
|
# Run a single assertion in isolation (no API keys needed)
|
|
548
552
|
agentv eval assert <grader-name> --agent-output "..." --agent-input "..."
|
|
549
553
|
|
|
@@ -567,6 +571,8 @@ agentv compare .agentv/results/runs/<baseline-timestamp>/index.jsonl .agentv/res
|
|
|
567
571
|
agentv validate <file.yaml>
|
|
568
572
|
```
|
|
569
573
|
|
|
574
|
+
**Replay targets:** Add `provider: replay`, `fixtures: <jsonl>`, and `source_target: <live target name>` in `.agentv/targets.yaml`. Optional `suite`, `eval_path`, and `variant` tighten lookup. The eval YAML and graders stay unchanged; replay only substitutes recorded target output, and graders run fresh.
|
|
575
|
+
|
|
570
576
|
## Code Judge SDK
|
|
571
577
|
|
|
572
578
|
Use `@agentv/eval` to build custom graders in TypeScript/JavaScript:
|
|
@@ -2,7 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
|
|
|
2
2
|
import {
|
|
3
3
|
loadTsEvalFile,
|
|
4
4
|
loadTsEvalSuite
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-RL4S2FBZ.js";
|
|
6
6
|
import "./chunk-NPVGBFF6.js";
|
|
7
7
|
import "./chunk-M7BUKBAF.js";
|
|
8
8
|
import "./chunk-5H446C7X.js";
|
|
@@ -10,4 +10,4 @@ export {
|
|
|
10
10
|
loadTsEvalFile,
|
|
11
11
|
loadTsEvalSuite
|
|
12
12
|
};
|
|
13
|
-
//# sourceMappingURL=ts-eval-loader-
|
|
13
|
+
//# sourceMappingURL=ts-eval-loader-RBTB2HG2-H5TRXZLO.js.map
|