agentv 4.18.0-next.1 → 4.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-WH3OE42V.js → artifact-writer-YATMDPWI.js} +6 -5
- package/dist/{chunk-MCBERRMC.js → chunk-62M5MR5K.js} +22 -20
- package/dist/chunk-62M5MR5K.js.map +1 -0
- package/dist/{chunk-VRPCMCLQ.js → chunk-IWI4AJRS.js} +80 -42
- package/dist/chunk-IWI4AJRS.js.map +1 -0
- package/dist/{chunk-HBDOJJFY.js → chunk-NL6P5MUH.js} +5 -3
- package/dist/{chunk-HBDOJJFY.js.map → chunk-NL6P5MUH.js.map} +1 -1
- package/dist/{chunk-RCOAXXHP.js → chunk-PTYQS37Y.js} +28906 -30884
- package/dist/chunk-PTYQS37Y.js.map +1 -0
- package/dist/chunk-R2QDYORI.js +2178 -0
- package/dist/chunk-R2QDYORI.js.map +1 -0
- package/dist/cli.js +7 -6
- package/dist/cli.js.map +1 -1
- package/dist/{dist-7W4OI3X2.js → dist-RTIUSC6L.js} +63 -59
- package/dist/index.js +7 -6
- package/dist/{interactive-J4QEU5FG.js → interactive-7AZMOH2V.js} +8 -7
- package/dist/{interactive-J4QEU5FG.js.map → interactive-7AZMOH2V.js.map} +1 -1
- package/dist/ts-eval-loader-XFQ6S4DT-S7P2UUBX.js +15 -0
- package/dist/ts-eval-loader-XFQ6S4DT-S7P2UUBX.js.map +1 -0
- package/package.json +1 -1
- package/dist/chunk-MCBERRMC.js.map +0 -1
- package/dist/chunk-RCOAXXHP.js.map +0 -1
- package/dist/chunk-VRPCMCLQ.js.map +0 -1
- /package/dist/{artifact-writer-WH3OE42V.js.map → artifact-writer-YATMDPWI.js.map} +0 -0
- /package/dist/{dist-7W4OI3X2.js.map → dist-RTIUSC6L.js.map} +0 -0
|
@@ -15,7 +15,22 @@ import {
|
|
|
15
15
|
resolveWorkspaceOrFilePath,
|
|
16
16
|
toSnakeCaseDeep,
|
|
17
17
|
writeArtifactsFromResults
|
|
18
|
-
} from "./chunk-
|
|
18
|
+
} from "./chunk-NL6P5MUH.js";
|
|
19
|
+
import {
|
|
20
|
+
ResponseCache,
|
|
21
|
+
commitAndPushResultsBranch,
|
|
22
|
+
createDraftResultsPr,
|
|
23
|
+
deriveCategory,
|
|
24
|
+
directorySizeBytes,
|
|
25
|
+
getResultsRepoStatus,
|
|
26
|
+
loadTsConfig,
|
|
27
|
+
prepareResultsRepoBranch,
|
|
28
|
+
resolveResultsRepoRunsDir,
|
|
29
|
+
shouldEnableCache,
|
|
30
|
+
shouldSkipCacheForTemperature,
|
|
31
|
+
stageResultsArtifacts,
|
|
32
|
+
syncResultsRepo
|
|
33
|
+
} from "./chunk-R2QDYORI.js";
|
|
19
34
|
import {
|
|
20
35
|
CLI_PLACEHOLDERS,
|
|
21
36
|
COMMON_TARGET_SETTINGS,
|
|
@@ -23,47 +38,34 @@ import {
|
|
|
23
38
|
DEFAULT_THRESHOLD,
|
|
24
39
|
KNOWN_PROVIDERS,
|
|
25
40
|
PROVIDER_ALIASES,
|
|
26
|
-
ResponseCache,
|
|
27
41
|
buildDirectoryChain,
|
|
28
42
|
buildSearchRoots,
|
|
29
|
-
commitAndPushResultsBranch,
|
|
30
|
-
createDraftResultsPr,
|
|
31
|
-
deriveCategory,
|
|
32
|
-
directorySizeBytes,
|
|
33
43
|
ensureVSCodeSubagents,
|
|
34
44
|
findDeprecatedCamelCaseTargetWarnings,
|
|
35
45
|
findGitRoot,
|
|
36
|
-
getResultsRepoStatus,
|
|
37
46
|
interpolateEnv,
|
|
38
47
|
isGraderKind,
|
|
39
48
|
listTargetNames,
|
|
40
49
|
loadCasesFromFile,
|
|
41
50
|
loadConfig,
|
|
42
51
|
loadTestSuite,
|
|
43
|
-
loadTsConfig,
|
|
44
52
|
normalizeLineEndings,
|
|
45
|
-
prepareResultsRepoBranch,
|
|
46
53
|
readTargetDefinitions,
|
|
47
54
|
readTestSuiteMetadata,
|
|
48
55
|
resolveFileReference,
|
|
49
|
-
resolveResultsRepoRunsDir,
|
|
50
56
|
resolveTargetDefinition,
|
|
51
57
|
runEvaluation,
|
|
52
|
-
shouldEnableCache,
|
|
53
|
-
shouldSkipCacheForTemperature,
|
|
54
|
-
stageResultsArtifacts,
|
|
55
58
|
subscribeToCodexLogEntries,
|
|
56
59
|
subscribeToCopilotCliLogEntries,
|
|
57
60
|
subscribeToCopilotSdkLogEntries,
|
|
58
61
|
subscribeToPiLogEntries,
|
|
59
|
-
syncResultsRepo,
|
|
60
62
|
toCamelCaseDeep
|
|
61
|
-
} from "./chunk-
|
|
63
|
+
} from "./chunk-PTYQS37Y.js";
|
|
62
64
|
|
|
63
65
|
// package.json
|
|
64
66
|
var package_default = {
|
|
65
67
|
name: "agentv",
|
|
66
|
-
version: "4.
|
|
68
|
+
version: "4.19.0",
|
|
67
69
|
description: "CLI entry point for AgentV",
|
|
68
70
|
type: "module",
|
|
69
71
|
repository: {
|
|
@@ -148,12 +150,15 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
148
150
|
const candidatePath = path.isAbsolute(pattern) ? path.normalize(pattern) : path.resolve(cwd, pattern);
|
|
149
151
|
try {
|
|
150
152
|
const stats = await stat(candidatePath);
|
|
151
|
-
if (stats.isFile() && /\.(ya?ml|jsonl|json)$/i.test(candidatePath)) {
|
|
153
|
+
if (stats.isFile() && /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(candidatePath)) {
|
|
152
154
|
results.add(candidatePath);
|
|
153
155
|
continue;
|
|
154
156
|
}
|
|
155
157
|
if (stats.isDirectory()) {
|
|
156
|
-
const dirGlob = path.posix.join(
|
|
158
|
+
const dirGlob = path.posix.join(
|
|
159
|
+
candidatePath.replace(/\\/g, "/"),
|
|
160
|
+
"**/{*.eval.yaml,*.eval.yml,eval.yaml,eval.yml,*.eval.ts,*.eval.mts}"
|
|
161
|
+
);
|
|
157
162
|
const dirMatches = await fg(dirGlob, {
|
|
158
163
|
absolute: true,
|
|
159
164
|
onlyFiles: true,
|
|
@@ -179,7 +184,9 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
179
184
|
followSymbolicLinks: true,
|
|
180
185
|
ignore: ignorePatterns
|
|
181
186
|
});
|
|
182
|
-
const yamlMatches = matches.filter(
|
|
187
|
+
const yamlMatches = matches.filter(
|
|
188
|
+
(filePath) => /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(filePath)
|
|
189
|
+
);
|
|
183
190
|
for (const filePath of yamlMatches) {
|
|
184
191
|
results.add(path.normalize(filePath));
|
|
185
192
|
}
|
|
@@ -201,7 +208,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
201
208
|
throw new Error(
|
|
202
209
|
`No eval files matched any provided paths or globs: ${includePatterns.join(
|
|
203
210
|
", "
|
|
204
|
-
)}. Provide YAML, JSONL, or
|
|
211
|
+
)}. Provide YAML, JSONL, JSON, or TypeScript paths or globs (e.g., "evals/**/eval.yaml", "evals/**/*.eval.ts").`
|
|
205
212
|
);
|
|
206
213
|
}
|
|
207
214
|
const sorted = Array.from(results);
|
|
@@ -4874,6 +4881,52 @@ async function prepareFileMetadata(params) {
|
|
|
4874
4881
|
inlineTargetLabel: `transcript (${path15.basename(options.transcript)})`
|
|
4875
4882
|
}
|
|
4876
4883
|
];
|
|
4884
|
+
} else if (suite.inlineTarget && options.cliTargets.length === 0) {
|
|
4885
|
+
const targetDefinition = suite.inlineTarget;
|
|
4886
|
+
const resolvedTarget = options.dryRun ? {
|
|
4887
|
+
kind: "mock",
|
|
4888
|
+
name: `${targetDefinition.name}-dry-run`,
|
|
4889
|
+
graderTarget: void 0,
|
|
4890
|
+
config: {
|
|
4891
|
+
response: '{"answer":"Mock dry-run response"}',
|
|
4892
|
+
delayMs: options.dryRunDelay,
|
|
4893
|
+
delayMinMs: options.dryRunDelayMin,
|
|
4894
|
+
delayMaxMs: options.dryRunDelayMax
|
|
4895
|
+
}
|
|
4896
|
+
} : resolveTargetDefinition(targetDefinition, process.env, testFilePath, {
|
|
4897
|
+
emitDeprecationWarnings: false
|
|
4898
|
+
});
|
|
4899
|
+
selections = [
|
|
4900
|
+
{
|
|
4901
|
+
selection: {
|
|
4902
|
+
definitions: [targetDefinition],
|
|
4903
|
+
resolvedTarget,
|
|
4904
|
+
targetName: targetDefinition.name,
|
|
4905
|
+
targetSource: "test-file",
|
|
4906
|
+
targetsFilePath: testFilePath
|
|
4907
|
+
},
|
|
4908
|
+
inlineTargetLabel: resolveTargetLabel(targetDefinition.name, resolvedTarget.name)
|
|
4909
|
+
}
|
|
4910
|
+
];
|
|
4911
|
+
} else if (suite.providerFactory && options.cliTargets.length === 0) {
|
|
4912
|
+
const taskTarget = {
|
|
4913
|
+
kind: "mock",
|
|
4914
|
+
name: "custom-task",
|
|
4915
|
+
graderTarget: void 0,
|
|
4916
|
+
config: {}
|
|
4917
|
+
};
|
|
4918
|
+
selections = [
|
|
4919
|
+
{
|
|
4920
|
+
selection: {
|
|
4921
|
+
definitions: [],
|
|
4922
|
+
resolvedTarget: taskTarget,
|
|
4923
|
+
targetName: "custom-task",
|
|
4924
|
+
targetSource: "test-file",
|
|
4925
|
+
targetsFilePath: testFilePath
|
|
4926
|
+
},
|
|
4927
|
+
inlineTargetLabel: "custom-task"
|
|
4928
|
+
}
|
|
4929
|
+
];
|
|
4877
4930
|
} else {
|
|
4878
4931
|
const cliTargets = options.cliTargets;
|
|
4879
4932
|
const suiteTargets2 = suite.targets;
|
|
@@ -4944,7 +4997,8 @@ async function prepareFileMetadata(params) {
|
|
|
4944
4997
|
budgetUsd: suite.budgetUsd,
|
|
4945
4998
|
failOnError: suite.failOnError,
|
|
4946
4999
|
threshold: suite.threshold,
|
|
4947
|
-
tags: suite.metadata?.tags
|
|
5000
|
+
tags: suite.metadata?.tags,
|
|
5001
|
+
providerFactory: suite.providerFactory
|
|
4948
5002
|
};
|
|
4949
5003
|
}
|
|
4950
5004
|
async function runSingleEvalFile(params) {
|
|
@@ -5223,7 +5277,7 @@ async function runEvalCommand(input) {
|
|
|
5223
5277
|
const useFileExport = !!options.otelFile;
|
|
5224
5278
|
if (options.exportOtel || useFileExport) {
|
|
5225
5279
|
try {
|
|
5226
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
5280
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-RTIUSC6L.js");
|
|
5227
5281
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
5228
5282
|
let headers = {};
|
|
5229
5283
|
if (options.otelBackend) {
|
|
@@ -5283,23 +5337,7 @@ async function runEvalCommand(input) {
|
|
|
5283
5337
|
const displayIdTracker = createDisplayIdTracker();
|
|
5284
5338
|
const perFileWorkers = options.workers;
|
|
5285
5339
|
const fileMetadata = /* @__PURE__ */ new Map();
|
|
5286
|
-
const tsFiles = [];
|
|
5287
|
-
const yamlFiles = [];
|
|
5288
5340
|
for (const testFilePath of resolvedTestFiles) {
|
|
5289
|
-
if (/\.(ts|js|mts|mjs)$/.test(testFilePath)) {
|
|
5290
|
-
tsFiles.push(testFilePath);
|
|
5291
|
-
} else {
|
|
5292
|
-
yamlFiles.push(testFilePath);
|
|
5293
|
-
}
|
|
5294
|
-
}
|
|
5295
|
-
for (const tsFile of tsFiles) {
|
|
5296
|
-
await ensureFileExists(tsFile, "TypeScript eval file");
|
|
5297
|
-
await import(pathToFileURL(tsFile).href);
|
|
5298
|
-
}
|
|
5299
|
-
if (yamlFiles.length === 0 && tsFiles.length > 0) {
|
|
5300
|
-
return;
|
|
5301
|
-
}
|
|
5302
|
-
for (const testFilePath of yamlFiles) {
|
|
5303
5341
|
const meta = await prepareFileMetadata({
|
|
5304
5342
|
testFilePath,
|
|
5305
5343
|
repoRoot,
|
|
@@ -5434,7 +5472,7 @@ async function runEvalCommand(input) {
|
|
|
5434
5472
|
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
|
|
5435
5473
|
let transcriptProviderFactory;
|
|
5436
5474
|
if (options.transcript) {
|
|
5437
|
-
const { TranscriptProvider } = await import("./dist-
|
|
5475
|
+
const { TranscriptProvider } = await import("./dist-RTIUSC6L.js");
|
|
5438
5476
|
const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
|
|
5439
5477
|
const totalTests = [...fileMetadata.values()].reduce(
|
|
5440
5478
|
(sum, meta) => sum + meta.testCases.length,
|
|
@@ -5494,7 +5532,7 @@ async function runEvalCommand(input) {
|
|
|
5494
5532
|
budgetUsd: targetPrep.budgetUsd,
|
|
5495
5533
|
failOnError: targetPrep.failOnError,
|
|
5496
5534
|
threshold: resolvedThreshold,
|
|
5497
|
-
providerFactory: transcriptProviderFactory
|
|
5535
|
+
providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory
|
|
5498
5536
|
});
|
|
5499
5537
|
const evalFile = path15.relative(cwd, testFilePath);
|
|
5500
5538
|
const existingSummary = remoteEvalSummaries.find(
|
|
@@ -5574,7 +5612,7 @@ async function runEvalCommand(input) {
|
|
|
5574
5612
|
if (usesDefaultArtifactWorkspace && allResults.length > 0) {
|
|
5575
5613
|
const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
|
|
5576
5614
|
if (isResumeAppend) {
|
|
5577
|
-
const { writePerTestArtifacts } = await import("./artifact-writer-
|
|
5615
|
+
const { writePerTestArtifacts } = await import("./artifact-writer-YATMDPWI.js");
|
|
5578
5616
|
await writePerTestArtifacts(allResults, runDir, {
|
|
5579
5617
|
experiment: normalizeExperimentName(options.experiment)
|
|
5580
5618
|
});
|
|
@@ -5784,4 +5822,4 @@ export {
|
|
|
5784
5822
|
getCategories,
|
|
5785
5823
|
filterByCategory
|
|
5786
5824
|
};
|
|
5787
|
-
//# sourceMappingURL=chunk-
|
|
5825
|
+
//# sourceMappingURL=chunk-IWI4AJRS.js.map
|