agentv 4.14.0-next.1 → 4.15.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,12 +41,12 @@ import {
41
41
  subscribeToPiLogEntries,
42
42
  syncResultsRepo,
43
43
  toCamelCaseDeep
44
- } from "./chunk-HP3ZUKYB.js";
44
+ } from "./chunk-VSWJTL3T.js";
45
45
 
46
46
  // package.json
47
47
  var package_default = {
48
48
  name: "agentv",
49
- version: "4.14.0-next.1",
49
+ version: "4.15.0-next.1",
50
50
  description: "CLI entry point for AgentV",
51
51
  type: "module",
52
52
  repository: {
@@ -1491,6 +1491,37 @@ async function writeArtifactsFromResults(results, outputDir, options) {
1491
1491
  await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
1492
1492
  `, "utf8");
1493
1493
  await writeJsonlFile(indexPath, indexRecords);
1494
+ const transcriptPath = path7.join(outputDir, "transcript.jsonl");
1495
+ const transcriptLines = results.map((result) => {
1496
+ let inputText = "";
1497
+ if (typeof result.input === "string") {
1498
+ inputText = result.input;
1499
+ } else if (Array.isArray(result.input)) {
1500
+ const firstUserMsg = result.input.find((m) => m.role === "user");
1501
+ inputText = typeof firstUserMsg?.content === "string" ? firstUserMsg.content : "";
1502
+ }
1503
+ return {
1504
+ input: inputText,
1505
+ output: result.output,
1506
+ token_usage: result.tokenUsage ? {
1507
+ input: result.tokenUsage.input,
1508
+ output: result.tokenUsage.output,
1509
+ cached: result.tokenUsage.cached
1510
+ } : void 0,
1511
+ duration_ms: result.durationMs,
1512
+ cost_usd: result.costUsd,
1513
+ source: {
1514
+ provider: result.target,
1515
+ session_id: result.conversationId ?? result.testId,
1516
+ timestamp: result.timestamp
1517
+ }
1518
+ };
1519
+ });
1520
+ await writeFile(
1521
+ transcriptPath,
1522
+ transcriptLines.map((line) => JSON.stringify(line)).join("\n") + (transcriptLines.length ? "\n" : ""),
1523
+ "utf8"
1524
+ );
1494
1525
  return { testArtifactDir, timingPath, benchmarkPath, indexPath };
1495
1526
  }
1496
1527
 
@@ -3147,7 +3178,14 @@ var KNOWN_TEST_FIELDS = /* @__PURE__ */ new Set([
3147
3178
  "metadata",
3148
3179
  "conversation_id",
3149
3180
  "suite",
3150
- "note"
3181
+ "note",
3182
+ "depends_on",
3183
+ "on_dependency_failure",
3184
+ "mode",
3185
+ "turns",
3186
+ "aggregation",
3187
+ "on_turn_failure",
3188
+ "window_size"
3151
3189
  ]);
3152
3190
  var NAME_PATTERN = /^[a-z0-9-]+$/;
3153
3191
  function isObject(value) {
@@ -3360,6 +3398,7 @@ async function validateEvalFile(filePath) {
3360
3398
  if (assertField !== void 0) {
3361
3399
  validateAssertArray(assertField, location, absolutePath, errors);
3362
3400
  }
3401
+ validateConversationMode(evalCase, location, absolutePath, errors);
3363
3402
  await validateWorkspaceConfig(
3364
3403
  evalCase.workspace,
3365
3404
  absolutePath,
@@ -3456,16 +3495,6 @@ function validateWorkspaceRepoConfig(workspace, filePath, errors) {
3456
3495
  }
3457
3496
  }
3458
3497
  }
3459
- if (isObject(afterEachHook) && afterEachHook.reset && afterEachHook.reset !== "none") {
3460
- if (!Array.isArray(repos) || repos.length === 0) {
3461
- errors.push({
3462
- severity: "warning",
3463
- filePath,
3464
- location: "workspace.hooks.after_each",
3465
- message: `hooks.after_each.reset '${afterEachHook.reset}' has no effect without repos.`
3466
- });
3467
- }
3468
- }
3469
3498
  if (isObject(afterEachHook) && afterEachHook.reset && isolation === "per_test") {
3470
3499
  errors.push({
3471
3500
  severity: "warning",
@@ -3712,6 +3741,86 @@ function validateContentForRoleMarkers(content, location, filePath, errors) {
3712
3741
  }
3713
3742
  }
3714
3743
  }
3744
+ function validateConversationMode(evalCase, location, filePath, errors) {
3745
+ const mode = evalCase.mode;
3746
+ const turns = evalCase.turns;
3747
+ const aggregation = evalCase.aggregation;
3748
+ const onTurnFailure = evalCase.on_turn_failure;
3749
+ const windowSize = evalCase.window_size;
3750
+ const isConversationMode = mode === "conversation";
3751
+ if (turns !== void 0 && !isConversationMode) {
3752
+ errors.push({
3753
+ severity: "error",
3754
+ filePath,
3755
+ location: `${location}.turns`,
3756
+ message: "'turns' requires mode: conversation"
3757
+ });
3758
+ }
3759
+ if (isConversationMode && (!Array.isArray(turns) || turns.length === 0)) {
3760
+ errors.push({
3761
+ severity: "error",
3762
+ filePath,
3763
+ location: `${location}.mode`,
3764
+ message: "mode: conversation requires a non-empty 'turns' array"
3765
+ });
3766
+ }
3767
+ if (isConversationMode && Array.isArray(turns) && evalCase.expected_output !== void 0) {
3768
+ errors.push({
3769
+ severity: "error",
3770
+ filePath,
3771
+ location: `${location}.expected_output`,
3772
+ message: "Top-level 'expected_output' is not allowed with mode: conversation (use per-turn expected_output instead)"
3773
+ });
3774
+ }
3775
+ if (aggregation !== void 0 && !isConversationMode) {
3776
+ errors.push({
3777
+ severity: "error",
3778
+ filePath,
3779
+ location: `${location}.aggregation`,
3780
+ message: "'aggregation' requires mode: conversation"
3781
+ });
3782
+ }
3783
+ if (onTurnFailure !== void 0 && !isConversationMode) {
3784
+ errors.push({
3785
+ severity: "error",
3786
+ filePath,
3787
+ location: `${location}.on_turn_failure`,
3788
+ message: "'on_turn_failure' requires mode: conversation"
3789
+ });
3790
+ }
3791
+ if (windowSize !== void 0 && !isConversationMode) {
3792
+ errors.push({
3793
+ severity: "error",
3794
+ filePath,
3795
+ location: `${location}.window_size`,
3796
+ message: "'window_size' requires mode: conversation"
3797
+ });
3798
+ }
3799
+ if (isConversationMode && Array.isArray(turns)) {
3800
+ for (let i = 0; i < turns.length; i++) {
3801
+ const turn = turns[i];
3802
+ if (!isObject(turn)) {
3803
+ errors.push({
3804
+ severity: "error",
3805
+ filePath,
3806
+ location: `${location}.turns[${i}]`,
3807
+ message: "Turn must be an object"
3808
+ });
3809
+ continue;
3810
+ }
3811
+ const turnInput = turn.input;
3812
+ const isEmpty = turnInput === void 0 || turnInput === "" || typeof turnInput === "string" && turnInput.trim() === "" || Array.isArray(turnInput) && turnInput.length === 0;
3813
+ if (isEmpty) {
3814
+ errors.push({
3815
+ severity: "error",
3816
+ filePath,
3817
+ location: `${location}.turns[${i}].input`,
3818
+ message: "Each turn must have a non-empty input"
3819
+ });
3820
+ }
3821
+ }
3822
+ }
3823
+ }
3715
3824
  function isObject2(value) {
3716
3825
  return typeof value === "object" && value !== null && !Array.isArray(value);
3717
3826
  }
@@ -5242,7 +5351,7 @@ async function runEvalCommand(input) {
5242
5351
  const useFileExport = !!options.otelFile;
5243
5352
  if (options.exportOtel || useFileExport) {
5244
5353
  try {
5245
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-Y3NRIBUT.js");
5354
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-G6XAYD4R.js");
5246
5355
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5247
5356
  let headers = {};
5248
5357
  if (options.otelBackend) {
@@ -5438,7 +5547,7 @@ async function runEvalCommand(input) {
5438
5547
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5439
5548
  let transcriptProviderFactory;
5440
5549
  if (options.transcript) {
5441
- const { TranscriptProvider } = await import("./dist-Y3NRIBUT.js");
5550
+ const { TranscriptProvider } = await import("./dist-G6XAYD4R.js");
5442
5551
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5443
5552
  const totalTests = [...fileMetadata.values()].reduce(
5444
5553
  (sum, meta) => sum + meta.testCases.length,
@@ -5760,4 +5869,4 @@ export {
5760
5869
  getCategories,
5761
5870
  filterByCategory
5762
5871
  };
5763
- //# sourceMappingURL=chunk-YFQMID6V.js.map
5872
+ //# sourceMappingURL=chunk-CJFA423D.js.map