agentv 3.7.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -164,7 +164,7 @@ For large-scale evaluations, AgentV supports JSONL (JSON Lines) format as an alt
164
164
  Optional sidecar YAML metadata file (`dataset.eval.yaml` alongside `dataset.jsonl`):
165
165
  ```yaml
166
166
  description: Math evaluation dataset
167
- dataset: math-tests
167
+ name: math-tests
168
168
  execution:
169
169
  target: azure-llm
170
170
  assertions:
@@ -8,8 +8,10 @@ import {
8
8
  buildSearchRoots,
9
9
  ensureVSCodeSubagents,
10
10
  findGitRoot,
11
+ interpolateEnv,
11
12
  isEvaluatorKind,
12
13
  listTargetNames,
14
+ loadCasesFromFile,
13
15
  loadConfig,
14
16
  loadTestSuite,
15
17
  loadTsConfig,
@@ -25,12 +27,12 @@ import {
25
27
  subscribeToCopilotCliLogEntries,
26
28
  subscribeToCopilotSdkLogEntries,
27
29
  subscribeToPiLogEntries
28
- } from "./chunk-XGG64VIY.js";
30
+ } from "./chunk-TXDPYXHY.js";
29
31
 
30
32
  // package.json
31
33
  var package_default = {
32
34
  name: "agentv",
33
- version: "3.7.0",
35
+ version: "3.9.0",
34
36
  description: "CLI entry point for AgentV",
35
37
  type: "module",
36
38
  repository: {
@@ -1326,9 +1328,9 @@ var SCRIPT = `
1326
1328
  /* input / output */
1327
1329
  h+='<div class="detail-grid">';
1328
1330
  if(r.input!=null){
1329
- h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(typeof r.input==="string"?r.input:JSON.stringify(r.input,null,2))+"</pre></div>";
1331
+ h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(JSON.stringify(r.input,null,2))+"</pre></div>";
1330
1332
  }
1331
- h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.outputText||"")+"</pre></div>";
1333
+ h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.output?JSON.stringify(r.output,null,2):"")+"</pre></div>";
1332
1334
  h+="</div>";
1333
1335
 
1334
1336
  /* evaluator results */
@@ -1522,7 +1524,7 @@ var JunitWriter = class _JunitWriter {
1522
1524
  this.closed = true;
1523
1525
  const grouped = /* @__PURE__ */ new Map();
1524
1526
  for (const result of this.results) {
1525
- const suite = result.dataset ?? "default";
1527
+ const suite = result.eval_set ?? "default";
1526
1528
  const existing = grouped.get(suite);
1527
1529
  if (existing) {
1528
1530
  existing.push(result);
@@ -2186,7 +2188,7 @@ async function validateEvalFile(filePath) {
2186
2188
  let parsed;
2187
2189
  try {
2188
2190
  const content = await readFile22(absolutePath, "utf8");
2189
- parsed = parse2(content);
2191
+ parsed = interpolateEnv(parse2(content), process.env);
2190
2192
  } catch (error) {
2191
2193
  errors.push({
2192
2194
  severity: "error",
@@ -2249,6 +2251,31 @@ async function validateEvalFile(filePath) {
2249
2251
  }
2250
2252
  if (typeof cases === "string") {
2251
2253
  validateTestsStringPath(cases, absolutePath, errors);
2254
+ await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
2255
+ const ext = path22.extname(cases).toLowerCase();
2256
+ if (VALID_TEST_FILE_EXTENSIONS.has(ext)) {
2257
+ const externalCasesPath = path22.resolve(path22.dirname(absolutePath), cases);
2258
+ try {
2259
+ const externalCases = await loadCasesFromFile(externalCasesPath);
2260
+ for (let i = 0; i < externalCases.length; i++) {
2261
+ const externalCase = externalCases[i];
2262
+ await validateWorkspaceConfig(
2263
+ externalCase.workspace,
2264
+ absolutePath,
2265
+ errors,
2266
+ `tests[${i}].workspace`
2267
+ );
2268
+ }
2269
+ } catch (error) {
2270
+ const message = error instanceof Error ? error.message : String(error);
2271
+ errors.push({
2272
+ severity: "error",
2273
+ filePath: absolutePath,
2274
+ location: "tests",
2275
+ message
2276
+ });
2277
+ }
2278
+ }
2252
2279
  return {
2253
2280
  valid: errors.filter((e) => e.severity === "error").length === 0,
2254
2281
  filePath: absolutePath,
@@ -2356,10 +2383,14 @@ async function validateEvalFile(filePath) {
2356
2383
  if (assertField !== void 0) {
2357
2384
  validateAssertArray(assertField, location, absolutePath, errors);
2358
2385
  }
2386
+ await validateWorkspaceConfig(
2387
+ evalCase.workspace,
2388
+ absolutePath,
2389
+ errors,
2390
+ `${location}.workspace`
2391
+ );
2359
2392
  }
2360
- if (isObject(parsed.workspace)) {
2361
- validateWorkspaceRepoConfig(parsed.workspace, absolutePath, errors);
2362
- }
2393
+ await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
2363
2394
  return {
2364
2395
  valid: errors.filter((e) => e.severity === "error").length === 0,
2365
2396
  filePath: absolutePath,
@@ -2367,6 +2398,41 @@ async function validateEvalFile(filePath) {
2367
2398
  errors
2368
2399
  };
2369
2400
  }
2401
+ async function validateWorkspaceConfig(workspace, evalFilePath, errors, location) {
2402
+ if (workspace === void 0) {
2403
+ return;
2404
+ }
2405
+ if (isObject(workspace)) {
2406
+ validateWorkspaceRepoConfig(workspace, evalFilePath, errors);
2407
+ return;
2408
+ }
2409
+ if (typeof workspace !== "string") {
2410
+ return;
2411
+ }
2412
+ const workspacePath = path22.resolve(path22.dirname(evalFilePath), workspace);
2413
+ try {
2414
+ const workspaceContent = await readFile22(workspacePath, "utf8");
2415
+ const parsedWorkspace = interpolateEnv(parse2(workspaceContent), process.env);
2416
+ if (!isObject(parsedWorkspace)) {
2417
+ errors.push({
2418
+ severity: "error",
2419
+ filePath: evalFilePath,
2420
+ location,
2421
+ message: `External workspace file must contain a YAML object: ${workspace}`
2422
+ });
2423
+ return;
2424
+ }
2425
+ validateWorkspaceRepoConfig(parsedWorkspace, workspacePath, errors);
2426
+ } catch (error) {
2427
+ const message = error instanceof Error ? error.message : String(error);
2428
+ errors.push({
2429
+ severity: "error",
2430
+ filePath: evalFilePath,
2431
+ location,
2432
+ message: `Failed to load external workspace file '${workspace}': ${message}`
2433
+ });
2434
+ }
2435
+ }
2370
2436
  function validateWorkspaceRepoConfig(workspace, filePath, errors) {
2371
2437
  const repos = workspace.repos;
2372
2438
  const hooks = workspace.hooks;
@@ -2375,8 +2441,21 @@ function validateWorkspaceRepoConfig(workspace, filePath, errors) {
2375
2441
  if (Array.isArray(repos)) {
2376
2442
  for (const repo of repos) {
2377
2443
  if (!isObject(repo)) continue;
2444
+ const source = repo.source;
2378
2445
  const checkout = repo.checkout;
2379
2446
  const clone = repo.clone;
2447
+ if (isObject(source) && isObject(checkout)) {
2448
+ const sourceType = source.type;
2449
+ const resolve = checkout.resolve;
2450
+ if (sourceType === "local" && typeof resolve === "string") {
2451
+ errors.push({
2452
+ severity: "warning",
2453
+ filePath,
2454
+ location: `workspace.repos[path=${repo.path}]`,
2455
+ message: "checkout.resolve has no effect for a local source. Use source.type to choose where the repo comes from; keep checkout.ref or checkout.ancestor only when pinning a local source."
2456
+ });
2457
+ }
2458
+ }
2380
2459
  if (isObject(checkout) && isObject(clone)) {
2381
2460
  const ancestor = checkout.ancestor;
2382
2461
  const depth = clone.depth;
@@ -3141,31 +3220,6 @@ async function validateConfigFile(filePath) {
3141
3220
  return { valid: false, filePath, fileType: "config", errors };
3142
3221
  }
3143
3222
  const config = parsed;
3144
- const guidelinePatterns = config.guideline_patterns;
3145
- if (guidelinePatterns !== void 0) {
3146
- if (!Array.isArray(guidelinePatterns)) {
3147
- errors.push({
3148
- severity: "error",
3149
- filePath,
3150
- location: "guideline_patterns",
3151
- message: "Field 'guideline_patterns' must be an array"
3152
- });
3153
- } else if (!guidelinePatterns.every((p) => typeof p === "string")) {
3154
- errors.push({
3155
- severity: "error",
3156
- filePath,
3157
- location: "guideline_patterns",
3158
- message: "All entries in 'guideline_patterns' must be strings"
3159
- });
3160
- } else if (guidelinePatterns.length === 0) {
3161
- errors.push({
3162
- severity: "warning",
3163
- filePath,
3164
- location: "guideline_patterns",
3165
- message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
3166
- });
3167
- }
3168
- }
3169
3223
  const evalPatterns = config.eval_patterns;
3170
3224
  if (evalPatterns !== void 0) {
3171
3225
  if (!Array.isArray(evalPatterns)) {
@@ -3202,13 +3256,7 @@ async function validateConfigFile(filePath) {
3202
3256
  });
3203
3257
  }
3204
3258
  }
3205
- const allowedFields = /* @__PURE__ */ new Set([
3206
- "$schema",
3207
- "guideline_patterns",
3208
- "eval_patterns",
3209
- "required_version",
3210
- "execution"
3211
- ]);
3259
+ const allowedFields = /* @__PURE__ */ new Set(["$schema", "eval_patterns", "required_version", "execution"]);
3212
3260
  const unexpectedFields = Object.keys(config).filter((key) => !allowedFields.has(key));
3213
3261
  if (unexpectedFields.length > 0) {
3214
3262
  errors.push({
@@ -3614,6 +3662,34 @@ function normalizeOptionalNumber(value) {
3614
3662
  function normalizeWorkspaceMode(value) {
3615
3663
  return value === "pooled" || value === "temp" || value === "static" ? value : void 0;
3616
3664
  }
3665
+ function normalizeOutputMessages(cliValue) {
3666
+ if (cliValue === void 0) {
3667
+ return 1;
3668
+ }
3669
+ if (cliValue === "all") {
3670
+ return "all";
3671
+ }
3672
+ const parsed = Number.parseInt(cliValue, 10);
3673
+ if (Number.isNaN(parsed) || !Number.isInteger(parsed) || parsed < 1) {
3674
+ console.warn(
3675
+ `Warning: Invalid --output-messages value '${cliValue}'. Must be a positive integer or 'all'. Defaulting to 1.`
3676
+ );
3677
+ return 1;
3678
+ }
3679
+ return parsed;
3680
+ }
3681
+ function trimOutputMessages(output, outputMessages) {
3682
+ const messages = output ?? [];
3683
+ if (outputMessages === "all") {
3684
+ return messages.map((m) => ({ role: m.role, content: m.content }));
3685
+ }
3686
+ if (outputMessages === 1) {
3687
+ const lastAssistant = messages.filter((m) => m.role === "assistant").at(-1);
3688
+ return lastAssistant ? [{ role: lastAssistant.role, content: lastAssistant.content }] : [];
3689
+ }
3690
+ const sliced = messages.slice(-outputMessages);
3691
+ return sliced.map((m) => ({ role: m.role, content: m.content }));
3692
+ }
3617
3693
  function normalizeOptions(rawOptions, config, yamlExecution) {
3618
3694
  const cliFormat = normalizeString(rawOptions.outputFormat);
3619
3695
  const configFormat = config?.output?.format;
@@ -3693,7 +3769,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
3693
3769
  benchmarkJson: normalizeString(rawOptions.benchmarkJson),
3694
3770
  artifacts: normalizeString(rawOptions.artifacts),
3695
3771
  graderTarget: normalizeString(rawOptions.graderTarget),
3696
- model: normalizeString(rawOptions.model)
3772
+ model: normalizeString(rawOptions.model),
3773
+ outputMessages: normalizeOutputMessages(normalizeString(rawOptions.outputMessages))
3697
3774
  };
3698
3775
  }
3699
3776
  async function ensureFileExists(filePath, description) {
@@ -3942,8 +4019,12 @@ async function runSingleEvalFile(params) {
3942
4019
  streamCallbacks: streamingObserver?.getStreamCallbacks(),
3943
4020
  onResult: async (result) => {
3944
4021
  streamingObserver?.finalizeEvalCase(result.score, result.error);
3945
- const { output: _, ...resultWithoutTrace } = result;
3946
- await outputWriter.append(resultWithoutTrace);
4022
+ const trimmedOutput = trimOutputMessages(result.output, options.outputMessages);
4023
+ const trimmedResult = {
4024
+ ...result,
4025
+ output: trimmedOutput
4026
+ };
4027
+ await outputWriter.append(trimmedResult);
3947
4028
  if (otelExporter && !streamingObserver) {
3948
4029
  try {
3949
4030
  await otelExporter.exportResult(result);
@@ -4044,7 +4125,7 @@ async function runEvalCommand(input) {
4044
4125
  const useFileExport = !!(options.otelFile || options.traceFile);
4045
4126
  if (options.exportOtel || useFileExport) {
4046
4127
  try {
4047
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-VP6AXX6B.js");
4128
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-PIOSPBKX.js");
4048
4129
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4049
4130
  let headers = {};
4050
4131
  if (options.otelBackend) {
@@ -4386,4 +4467,4 @@ export {
4386
4467
  selectTarget,
4387
4468
  runEvalCommand
4388
4469
  };
4389
- //# sourceMappingURL=chunk-7YS6YNJZ.js.map
4470
+ //# sourceMappingURL=chunk-GC5P5HHZ.js.map