agentv 4.20.0 → 4.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/{artifact-writer-RFXWXUOV.js → artifact-writer-E775664W.js} +4 -4
  2. package/dist/{chunk-36HXBYUY.js → chunk-27VT2KU2.js} +32 -27
  3. package/dist/chunk-27VT2KU2.js.map +1 -0
  4. package/dist/{chunk-LP4Y5D2Z.js → chunk-ERSBQAGK.js} +95 -27
  5. package/dist/chunk-ERSBQAGK.js.map +1 -0
  6. package/dist/{chunk-ZNS74WKH.js → chunk-FZUNMXBT.js} +3 -3
  7. package/dist/{chunk-PHGEGHKR.js → chunk-GPGX24OI.js} +186 -30
  8. package/dist/chunk-GPGX24OI.js.map +1 -0
  9. package/dist/{chunk-KJZ7PZCE.js → chunk-LPSUVXHV.js} +78 -132
  10. package/dist/{chunk-KJZ7PZCE.js.map → chunk-LPSUVXHV.js.map} +1 -1
  11. package/dist/cli.js +5 -5
  12. package/dist/{dist-GURCO6IS.js → dist-RSAA3T6F.js} +3 -3
  13. package/dist/index.js +5 -5
  14. package/dist/{interactive-GLRASSKM.js → interactive-RLMRNXXD.js} +5 -5
  15. package/dist/studio/assets/{index-KfPHd-QM.js → index-BVSHI8Eo.js} +1 -1
  16. package/dist/studio/assets/{index-BTsTcivx.js → index-SZVrc1UE.js} +20 -20
  17. package/dist/studio/index.html +1 -1
  18. package/dist/{ts-eval-loader-32COE32J-TCT4RIRT.js → ts-eval-loader-HPIPE72C-GDYGJVIA.js} +2 -2
  19. package/package.json +1 -1
  20. package/dist/chunk-36HXBYUY.js.map +0 -1
  21. package/dist/chunk-LP4Y5D2Z.js.map +0 -1
  22. package/dist/chunk-PHGEGHKR.js.map +0 -1
  23. /package/dist/{artifact-writer-RFXWXUOV.js.map → artifact-writer-E775664W.js.map} +0 -0
  24. /package/dist/{chunk-ZNS74WKH.js.map → chunk-FZUNMXBT.js.map} +0 -0
  25. /package/dist/{dist-GURCO6IS.js.map → dist-RSAA3T6F.js.map} +0 -0
  26. /package/dist/{interactive-GLRASSKM.js.map → interactive-RLMRNXXD.js.map} +0 -0
  27. /package/dist/{ts-eval-loader-32COE32J-TCT4RIRT.js.map → ts-eval-loader-HPIPE72C-GDYGJVIA.js.map} +0 -0
@@ -1,10 +1,10 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  toTranscriptJsonLines
4
- } from "./chunk-36HXBYUY.js";
4
+ } from "./chunk-27VT2KU2.js";
5
5
  import {
6
6
  DEFAULT_THRESHOLD
7
- } from "./chunk-LP4Y5D2Z.js";
7
+ } from "./chunk-ERSBQAGK.js";
8
8
 
9
9
  // src/commands/eval/artifact-writer.ts
10
10
  import { mkdir, readFile, writeFile } from "node:fs/promises";
@@ -688,4 +688,4 @@ export {
688
688
  writePerTestArtifacts,
689
689
  writeArtifactsFromResults
690
690
  };
691
- //# sourceMappingURL=chunk-ZNS74WKH.js.map
691
+ //# sourceMappingURL=chunk-FZUNMXBT.js.map
@@ -15,7 +15,7 @@ import {
15
15
  resolveWorkspaceOrFilePath,
16
16
  toSnakeCaseDeep,
17
17
  writeArtifactsFromResults
18
- } from "./chunk-ZNS74WKH.js";
18
+ } from "./chunk-FZUNMXBT.js";
19
19
  import {
20
20
  ResponseCache,
21
21
  RunBudgetTracker,
@@ -31,7 +31,7 @@ import {
31
31
  shouldSkipCacheForTemperature,
32
32
  stageResultsArtifacts,
33
33
  syncResultsRepo
34
- } from "./chunk-36HXBYUY.js";
34
+ } from "./chunk-27VT2KU2.js";
35
35
  import {
36
36
  CLI_PLACEHOLDERS,
37
37
  COMMON_TARGET_SETTINGS,
@@ -47,6 +47,7 @@ import {
47
47
  interpolateEnv,
48
48
  isGraderKind,
49
49
  listTargetNames,
50
+ loadCasesFromDirectory,
50
51
  loadCasesFromFile,
51
52
  loadConfig,
52
53
  loadTestSuite,
@@ -61,12 +62,12 @@ import {
61
62
  subscribeToCopilotSdkLogEntries,
62
63
  subscribeToPiLogEntries,
63
64
  toCamelCaseDeep
64
- } from "./chunk-LP4Y5D2Z.js";
65
+ } from "./chunk-ERSBQAGK.js";
65
66
 
66
67
  // package.json
67
68
  var package_default = {
68
69
  name: "agentv",
69
- version: "4.20.0",
70
+ version: "4.21.0",
70
71
  description: "CLI entry point for AgentV",
71
72
  type: "module",
72
73
  repository: {
@@ -291,9 +292,108 @@ import path15 from "node:path";
291
292
  import { pathToFileURL } from "node:url";
292
293
 
293
294
  // src/version-check.ts
294
- import { coerce, satisfies, validRange } from "semver";
295
+ import { coerce, major, satisfies, validRange } from "semver";
296
+
297
+ // src/self-update.ts
298
+ import { spawn } from "node:child_process";
299
+ import { get } from "node:https";
300
+ var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
301
+ function detectPackageManagerFromPath(scriptPath) {
302
+ if (scriptPath.includes(".bun")) {
303
+ return "bun";
304
+ }
305
+ return "npm";
306
+ }
307
+ function detectPackageManager() {
308
+ return detectPackageManagerFromPath(process.argv[1] ?? "");
309
+ }
310
+ function detectInstallScopeFromPath(scriptPath) {
311
+ const hasSegment = scriptPath.includes("/node_modules/") || scriptPath.includes("\\node_modules\\");
312
+ return hasSegment ? "local" : "global";
313
+ }
314
+ function detectInstallScope() {
315
+ return detectInstallScopeFromPath(process.argv[1] ?? "");
316
+ }
317
+ function runCommand(cmd, args) {
318
+ return new Promise((resolve, reject) => {
319
+ const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"] });
320
+ let stdout = "";
321
+ child.stdout?.on("data", (data) => {
322
+ process.stdout.write(data);
323
+ stdout += data.toString();
324
+ });
325
+ child.on("error", reject);
326
+ child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
327
+ });
328
+ }
329
+ function fetchLatestVersion() {
330
+ return new Promise((resolve) => {
331
+ const req = get(NPM_REGISTRY_URL, { timeout: 5e3 }, (res) => {
332
+ if (res.statusCode !== 200) {
333
+ res.resume();
334
+ resolve(null);
335
+ return;
336
+ }
337
+ let body = "";
338
+ res.on("data", (chunk) => {
339
+ body += chunk.toString();
340
+ });
341
+ res.on("end", () => {
342
+ try {
343
+ const version = JSON.parse(body).version;
344
+ resolve(typeof version === "string" ? version : null);
345
+ } catch {
346
+ resolve(null);
347
+ }
348
+ });
349
+ });
350
+ req.on("error", () => resolve(null));
351
+ req.on("timeout", () => {
352
+ req.destroy();
353
+ resolve(null);
354
+ });
355
+ });
356
+ }
357
+ function getInstallArgs(pm, versionSpec, scope) {
358
+ const pkg = `agentv@${versionSpec}`;
359
+ const baseCmd = pm === "npm" ? "install" : "add";
360
+ return scope === "global" ? [baseCmd, "-g", pkg] : [baseCmd, pkg];
361
+ }
362
+ async function performSelfUpdate(options) {
363
+ const pm = options?.pm ?? detectPackageManager();
364
+ const currentVersion = options?.currentVersion ?? "unknown";
365
+ const versionSpec = options?.versionRange ?? "latest";
366
+ const scope = options?.scope ?? detectInstallScope();
367
+ const args = getInstallArgs(pm, versionSpec, scope);
368
+ try {
369
+ const result = await runCommand(pm, args);
370
+ if (result.exitCode !== 0) {
371
+ return { success: false, currentVersion, scope };
372
+ }
373
+ let newVersion;
374
+ try {
375
+ const versionResult = await runCommand("agentv", ["--version"]);
376
+ newVersion = versionResult.stdout.trim();
377
+ } catch {
378
+ }
379
+ return { success: true, currentVersion, newVersion, scope };
380
+ } catch (error) {
381
+ if (error instanceof Error) {
382
+ if (error.message.includes("ENOENT") || error.message.includes("not found")) {
383
+ const alternative = pm === "npm" ? "bun" : "npm";
384
+ console.error(`Error: ${pm} not found. Try using --${alternative} flag.`);
385
+ } else {
386
+ console.error(`Error: ${error.message}`);
387
+ }
388
+ }
389
+ return { success: false, currentVersion, scope };
390
+ }
391
+ }
392
+
393
+ // src/version-check.ts
295
394
  var ANSI_YELLOW = "\x1B[33m";
296
395
  var ANSI_RED = "\x1B[31m";
396
+ var ANSI_GREEN = "\x1B[32m";
297
397
  var ANSI_RESET = "\x1B[0m";
298
398
  function checkVersion(requiredVersion) {
299
399
  const currentVersion = package_default.version;
@@ -319,10 +419,10 @@ async function enforceRequiredVersion(requiredVersion, options) {
319
419
  if (result.satisfied) {
320
420
  return;
321
421
  }
322
- const warning = `${ANSI_YELLOW}Warning: This project requires agentv ${result.requiredRange} but you have ${result.currentVersion}.${ANSI_RESET}
323
- Run \`agentv self update\` to upgrade.`;
422
+ const warning = `${ANSI_YELLOW}Warning: This project requires agentv ${result.requiredRange} but you have ${result.currentVersion}.${ANSI_RESET}`;
324
423
  if (options?.strict) {
325
- console.error(warning);
424
+ console.error(`${warning}
425
+ Run \`agentv self update\` to upgrade.`);
326
426
  console.error(
327
427
  `${ANSI_RED}Aborting: --strict mode requires the installed version to satisfy the required range.${ANSI_RESET}`
328
428
  );
@@ -330,18 +430,40 @@ async function enforceRequiredVersion(requiredVersion, options) {
330
430
  }
331
431
  if (process.stdin.isTTY && process.stdout.isTTY) {
332
432
  console.warn(warning);
333
- const shouldContinue = await promptContinue();
334
- if (!shouldContinue) {
335
- process.exit(1);
433
+ const shouldUpdate = await promptUpdate();
434
+ if (shouldUpdate) {
435
+ await runInlineUpdate(result.currentVersion, result.requiredRange);
336
436
  }
337
437
  } else {
338
438
  process.stderr.write(`${warning}
439
+ Run \`agentv self update\` to upgrade.
339
440
  `);
340
441
  }
341
442
  }
342
- async function promptContinue() {
443
+ async function promptUpdate() {
343
444
  const { confirm } = await import("@inquirer/prompts");
344
- return confirm({ message: "Continue anyway?", default: false });
445
+ return confirm({ message: "Update now?", default: true });
446
+ }
447
+ async function runInlineUpdate(currentVersion, versionRange) {
448
+ const currentMajor = major(coerce(currentVersion) ?? currentVersion);
449
+ const safeRange = `${versionRange} <${currentMajor + 1}.0.0`;
450
+ console.log("");
451
+ const result = await performSelfUpdate({ currentVersion, versionRange: safeRange });
452
+ if (!result.success) {
453
+ console.error(`${ANSI_RED}Update failed. Run \`agentv self update\` manually.${ANSI_RESET}`);
454
+ process.exit(1);
455
+ }
456
+ if (result.newVersion) {
457
+ console.log(
458
+ `
459
+ ${ANSI_GREEN}Update complete: ${currentVersion} \u2192 ${result.newVersion}${ANSI_RESET}`
460
+ );
461
+ } else {
462
+ console.log(`
463
+ ${ANSI_GREEN}Update complete.${ANSI_RESET}`);
464
+ }
465
+ console.log("Please re-run your command.");
466
+ process.exit(0);
345
467
  }
346
468
 
347
469
  // src/commands/results/remote.ts
@@ -2212,7 +2334,7 @@ function createWriterFromPath(filePath, options) {
2212
2334
 
2213
2335
  // src/commands/eval/progress-display.ts
2214
2336
  var ANSI_BOLD = "\x1B[1m";
2215
- var ANSI_GREEN = "\x1B[32m";
2337
+ var ANSI_GREEN2 = "\x1B[32m";
2216
2338
  var ANSI_RED2 = "\x1B[31m";
2217
2339
  var ANSI_YELLOW2 = "\x1B[33m";
2218
2340
  var ANSI_RESET2 = "\x1B[0m";
@@ -2226,7 +2348,7 @@ function formatVerdict(score, verdict) {
2226
2348
  const scoreStr = score !== void 0 ? `${Math.round(score * 100)}%` : "";
2227
2349
  const verdictLabel = verdict === "ERROR" ? "ERROR" : `${scoreStr} ${verdict}`;
2228
2350
  if (!colors2) return ` | ${verdictLabel}`;
2229
- const color = verdict === "PASS" ? ANSI_GREEN : verdict === "FAIL" ? ANSI_RED2 : ANSI_YELLOW2;
2351
+ const color = verdict === "PASS" ? ANSI_GREEN2 : verdict === "FAIL" ? ANSI_RED2 : ANSI_YELLOW2;
2230
2352
  return ` | ${color}${ANSI_BOLD}${verdictLabel}${ANSI_RESET2}`;
2231
2353
  }
2232
2354
  var ProgressDisplay = class {
@@ -2644,7 +2766,7 @@ function formatMatrixSummary(results) {
2644
2766
  import { readFile as readFile2 } from "node:fs/promises";
2645
2767
  import path14 from "node:path";
2646
2768
  import { parse } from "yaml";
2647
- import { readFile as readFile22, readdir } from "node:fs/promises";
2769
+ import { readFile as readFile22, readdir, stat as stat2 } from "node:fs/promises";
2648
2770
  import path22 from "node:path";
2649
2771
  import { parse as parse2 } from "yaml";
2650
2772
  import { readFile as readFile3 } from "node:fs/promises";
@@ -2880,17 +3002,21 @@ async function validateEvalFile(filePath) {
2880
3002
  }
2881
3003
  const cases = parsed.tests;
2882
3004
  if (typeof cases === "string") {
2883
- validateTestsStringPath(cases, absolutePath, errors);
2884
3005
  await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
2885
- const ext = path22.extname(cases).toLowerCase();
2886
- if (VALID_TEST_FILE_EXTENSIONS.has(ext)) {
2887
- const externalCasesPath = path22.resolve(path22.dirname(absolutePath), cases);
3006
+ const externalCasesPath = path22.resolve(path22.dirname(absolutePath), cases);
3007
+ let isDir = false;
3008
+ try {
3009
+ const pathStat = await stat2(externalCasesPath);
3010
+ isDir = pathStat.isDirectory();
3011
+ } catch {
3012
+ }
3013
+ if (isDir) {
2888
3014
  try {
2889
- const externalCases = await loadCasesFromFile(externalCasesPath);
2890
- for (let i = 0; i < externalCases.length; i++) {
2891
- const externalCase = externalCases[i];
3015
+ const dirCases = await loadCasesFromDirectory(externalCasesPath);
3016
+ for (let i = 0; i < dirCases.length; i++) {
3017
+ const dirCase = dirCases[i];
2892
3018
  await validateWorkspaceConfig(
2893
- externalCase.workspace,
3019
+ dirCase.workspace,
2894
3020
  absolutePath,
2895
3021
  errors,
2896
3022
  `tests[${i}].workspace`
@@ -2905,6 +3031,31 @@ async function validateEvalFile(filePath) {
2905
3031
  message
2906
3032
  });
2907
3033
  }
3034
+ } else {
3035
+ validateTestsStringPath(cases, absolutePath, errors);
3036
+ const ext = path22.extname(cases).toLowerCase();
3037
+ if (VALID_TEST_FILE_EXTENSIONS.has(ext)) {
3038
+ try {
3039
+ const externalCases = await loadCasesFromFile(externalCasesPath);
3040
+ for (let i = 0; i < externalCases.length; i++) {
3041
+ const externalCase = externalCases[i];
3042
+ await validateWorkspaceConfig(
3043
+ externalCase.workspace,
3044
+ absolutePath,
3045
+ errors,
3046
+ `tests[${i}].workspace`
3047
+ );
3048
+ }
3049
+ } catch (error) {
3050
+ const message = error instanceof Error ? error.message : String(error);
3051
+ errors.push({
3052
+ severity: "error",
3053
+ filePath: absolutePath,
3054
+ location: "tests",
3055
+ message
3056
+ });
3057
+ }
3058
+ }
2908
3059
  }
2909
3060
  return {
2910
3061
  valid: errors.filter((e) => e.severity === "error").length === 0,
@@ -5224,8 +5375,8 @@ async function runEvalCommand(input) {
5224
5375
  if (options.workspacePath) {
5225
5376
  const resolvedWorkspace = path15.resolve(options.workspacePath);
5226
5377
  try {
5227
- const { stat: stat2 } = await import("node:fs/promises");
5228
- const stats = await stat2(resolvedWorkspace);
5378
+ const { stat: stat3 } = await import("node:fs/promises");
5379
+ const stats = await stat3(resolvedWorkspace);
5229
5380
  if (!stats.isDirectory()) {
5230
5381
  throw new Error(`--workspace-path is not a directory: ${resolvedWorkspace}`);
5231
5382
  }
@@ -5281,7 +5432,7 @@ async function runEvalCommand(input) {
5281
5432
  const useFileExport = !!options.otelFile;
5282
5433
  if (options.exportOtel || useFileExport) {
5283
5434
  try {
5284
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-GURCO6IS.js");
5435
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-RSAA3T6F.js");
5285
5436
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5286
5437
  let headers = {};
5287
5438
  if (options.otelBackend) {
@@ -5480,7 +5631,7 @@ async function runEvalCommand(input) {
5480
5631
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5481
5632
  let transcriptProviderFactory;
5482
5633
  if (options.transcript) {
5483
- const { TranscriptProvider } = await import("./dist-GURCO6IS.js");
5634
+ const { TranscriptProvider } = await import("./dist-RSAA3T6F.js");
5484
5635
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5485
5636
  const totalTests = [...fileMetadata.values()].reduce(
5486
5637
  (sum, meta) => sum + meta.testCases.length,
@@ -5649,7 +5800,7 @@ async function runEvalCommand(input) {
5649
5800
  if (usesDefaultArtifactWorkspace && allResults.length > 0) {
5650
5801
  const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
5651
5802
  if (isResumeAppend) {
5652
- const { writePerTestArtifacts } = await import("./artifact-writer-RFXWXUOV.js");
5803
+ const { writePerTestArtifacts } = await import("./artifact-writer-E775664W.js");
5653
5804
  await writePerTestArtifacts(allResults, runDir, {
5654
5805
  experiment: normalizeExperimentName(options.experiment)
5655
5806
  });
@@ -5830,6 +5981,11 @@ export {
5830
5981
  loadManifestResults,
5831
5982
  loadLightweightResults,
5832
5983
  HtmlWriter,
5984
+ detectPackageManager,
5985
+ detectInstallScope,
5986
+ fetchLatestVersion,
5987
+ performSelfUpdate,
5988
+ enforceRequiredVersion,
5833
5989
  resolveEvalPaths,
5834
5990
  findRepoRoot,
5835
5991
  c,
@@ -5867,4 +6023,4 @@ export {
5867
6023
  getCategories,
5868
6024
  filterByCategory
5869
6025
  };
5870
- //# sourceMappingURL=chunk-PHGEGHKR.js.map
6026
+ //# sourceMappingURL=chunk-GPGX24OI.js.map