@agentv/core 4.4.1 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1491,6 +1491,7 @@ __export(index_exports, {
1491
1491
  deriveProjectId: () => deriveProjectId,
1492
1492
  detectFormat: () => detectFormat,
1493
1493
  discoverAssertions: () => discoverAssertions,
1494
+ discoverClaudeSessions: () => discoverClaudeSessions,
1494
1495
  discoverCopilotSessions: () => discoverCopilotSessions,
1495
1496
  discoverGraders: () => discoverGraders,
1496
1497
  discoverJudges: () => discoverGraders,
@@ -1550,12 +1551,14 @@ __export(index_exports, {
1550
1551
  negateScore: () => negateScore,
1551
1552
  normalizeLineEndings: () => normalizeLineEndings,
1552
1553
  parseAgentSkillsEvals: () => parseAgentSkillsEvals,
1554
+ parseClaudeSession: () => parseClaudeSession,
1553
1555
  parseJsonFromText: () => parseJsonFromText,
1554
1556
  parseJsonSafe: () => parseJsonSafe,
1555
1557
  readJsonFile: () => readJsonFile,
1556
1558
  readTargetDefinitions: () => readTargetDefinitions,
1557
1559
  readTestSuiteMetadata: () => readTestSuiteMetadata,
1558
1560
  readTextFile: () => readTextFile,
1561
+ readTranscriptFile: () => readTranscriptFile,
1559
1562
  removeProject: () => removeProject,
1560
1563
  resolveAndCreateProvider: () => resolveAndCreateProvider,
1561
1564
  resolveFileReference: () => resolveFileReference3,
@@ -8763,6 +8766,7 @@ var CopilotLogProvider = class {
8763
8766
  // src/evaluation/providers/copilot-sdk.ts
8764
8767
  init_cjs_shims();
8765
8768
  var import_node_crypto6 = require("crypto");
8769
+ var import_node_fs8 = require("fs");
8766
8770
  var import_promises17 = require("fs/promises");
8767
8771
  var import_node_path20 = __toESM(require("path"), 1);
8768
8772
 
@@ -8879,6 +8883,7 @@ var CopilotSdkProvider = class {
8879
8883
  const cwd = this.resolveCwd(request.cwd);
8880
8884
  if (cwd) {
8881
8885
  sessionOptions.workingDirectory = cwd;
8886
+ sessionOptions.skillDirectories = resolveSkillDirectories(cwd);
8882
8887
  }
8883
8888
  const systemPrompt = this.config.systemPrompt;
8884
8889
  if (systemPrompt) {
@@ -9110,6 +9115,14 @@ var CopilotSdkProvider = class {
9110
9115
  }
9111
9116
  }
9112
9117
  };
9118
+ function resolveSkillDirectories(cwd) {
9119
+ const candidates = [
9120
+ import_node_path20.default.join(cwd, ".claude", "skills"),
9121
+ import_node_path20.default.join(cwd, ".agents", "skills"),
9122
+ import_node_path20.default.join(cwd, ".codex", "skills")
9123
+ ];
9124
+ return candidates.filter((dir) => (0, import_node_fs8.existsSync)(dir));
9125
+ }
9113
9126
  function summarizeSdkEvent(eventType, data) {
9114
9127
  if (!data || typeof data !== "object") {
9115
9128
  return eventType;
@@ -9179,7 +9192,7 @@ var MockProvider = class {
9179
9192
  init_cjs_shims();
9180
9193
  var import_node_child_process4 = require("child_process");
9181
9194
  var import_node_crypto7 = require("crypto");
9182
- var import_node_fs8 = require("fs");
9195
+ var import_node_fs9 = require("fs");
9183
9196
  var import_promises18 = require("fs/promises");
9184
9197
  var import_node_os5 = require("os");
9185
9198
  var import_node_path21 = __toESM(require("path"), 1);
@@ -9552,7 +9565,7 @@ var PiStreamLogger = class _PiStreamLogger {
9552
9565
  constructor(filePath, format) {
9553
9566
  this.filePath = filePath;
9554
9567
  this.format = format;
9555
- this.stream = (0, import_node_fs8.createWriteStream)(filePath, { flags: "a" });
9568
+ this.stream = (0, import_node_fs9.createWriteStream)(filePath, { flags: "a" });
9556
9569
  }
9557
9570
  static async create(options) {
9558
9571
  const logger = new _PiStreamLogger(options.filePath, options.format);
@@ -10025,7 +10038,7 @@ async function defaultPiRunner(options) {
10025
10038
  init_cjs_shims();
10026
10039
  var import_node_child_process5 = require("child_process");
10027
10040
  var import_node_crypto8 = require("crypto");
10028
- var import_node_fs9 = require("fs");
10041
+ var import_node_fs10 = require("fs");
10029
10042
  var import_promises19 = require("fs/promises");
10030
10043
  var import_node_path22 = __toESM(require("path"), 1);
10031
10044
  var import_node_readline = require("readline");
@@ -10053,7 +10066,7 @@ function findAgentvRoot() {
10053
10066
  for (let i = 0; i < 10; i++) {
10054
10067
  try {
10055
10068
  const pkg = import_node_path22.default.join(dir, "package.json");
10056
- (0, import_node_fs9.accessSync)(pkg);
10069
+ (0, import_node_fs10.accessSync)(pkg);
10057
10070
  return dir;
10058
10071
  } catch {
10059
10072
  const parent = import_node_path22.default.dirname(dir);
@@ -10143,6 +10156,11 @@ var PiCodingAgentProvider = class {
10143
10156
  const modelId = this.config.model ?? "gemini-2.5-flash";
10144
10157
  this.setApiKeyEnv(providerName);
10145
10158
  const model = sdk.getModel(providerName, modelId);
10159
+ if (!model) {
10160
+ throw new Error(
10161
+ `pi-coding-agent: getModel('${providerName}', '${modelId}') returned undefined. The model '${modelId}' is not registered for provider '${providerName}' in pi-ai. Check that subprovider and model are correct in your target config.`
10162
+ );
10163
+ }
10146
10164
  const tools = this.resolveTools(sdk);
10147
10165
  const { session } = await sdk.createAgentSession({
10148
10166
  cwd,
@@ -10257,6 +10275,15 @@ ${fileList}`;
10257
10275
  await session.prompt(prompt);
10258
10276
  }
10259
10277
  const agentMessages = session.agent.state.messages;
10278
+ const lastAssistant = [...agentMessages].reverse().find(
10279
+ (m) => !!m && typeof m === "object" && m.role === "assistant"
10280
+ );
10281
+ if (lastAssistant?.stopReason === "error") {
10282
+ const errorMsg = typeof lastAssistant.errorMessage === "string" ? lastAssistant.errorMessage : "unknown SDK error";
10283
+ throw new Error(
10284
+ `pi-coding-agent SDK error (provider: ${lastAssistant.provider ?? providerName}, model: ${lastAssistant.model ?? modelId}): ${errorMsg}`
10285
+ );
10286
+ }
10260
10287
  const output = [];
10261
10288
  for (const msg of agentMessages) {
10262
10289
  output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
@@ -10372,7 +10399,7 @@ var PiStreamLogger2 = class _PiStreamLogger {
10372
10399
  constructor(filePath, format) {
10373
10400
  this.filePath = filePath;
10374
10401
  this.format = format;
10375
- this.stream = (0, import_node_fs9.createWriteStream)(filePath, { flags: "a" });
10402
+ this.stream = (0, import_node_fs10.createWriteStream)(filePath, { flags: "a" });
10376
10403
  }
10377
10404
  static async create(options) {
10378
10405
  const logger = new _PiStreamLogger(options.filePath, options.format);
@@ -11565,8 +11592,8 @@ function resolveCliConfig(target, env, evalFilePath) {
11565
11592
  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
11566
11593
  if (!parseResult.success) {
11567
11594
  const firstError = parseResult.error.errors[0];
11568
- const path51 = firstError?.path.join(".") || "";
11569
- const prefix = path51 ? `${target.name} ${path51}: ` : `${target.name}: `;
11595
+ const path52 = firstError?.path.join(".") || "";
11596
+ const prefix = path52 ? `${target.name} ${path52}: ` : `${target.name}: `;
11570
11597
  throw new Error(`${prefix}${firstError?.message}`);
11571
11598
  }
11572
11599
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -11827,12 +11854,12 @@ var import_node_path33 = __toESM(require("path"), 1);
11827
11854
 
11828
11855
  // src/evaluation/providers/vscode/utils/fs.ts
11829
11856
  init_cjs_shims();
11830
- var import_node_fs10 = require("fs");
11857
+ var import_node_fs11 = require("fs");
11831
11858
  var import_promises20 = require("fs/promises");
11832
11859
  var import_node_path24 = __toESM(require("path"), 1);
11833
11860
  async function pathExists(target) {
11834
11861
  try {
11835
- await (0, import_promises20.access)(target, import_node_fs10.constants.F_OK);
11862
+ await (0, import_promises20.access)(target, import_node_fs11.constants.F_OK);
11836
11863
  return true;
11837
11864
  } catch {
11838
11865
  return false;
@@ -13312,7 +13339,7 @@ function isAgentProvider(provider) {
13312
13339
 
13313
13340
  // src/evaluation/providers/targets-file.ts
13314
13341
  init_cjs_shims();
13315
- var import_node_fs11 = require("fs");
13342
+ var import_node_fs12 = require("fs");
13316
13343
  var import_promises27 = require("fs/promises");
13317
13344
  var import_node_path36 = __toESM(require("path"), 1);
13318
13345
  var import_yaml7 = require("yaml");
@@ -13344,7 +13371,7 @@ function assertTargetDefinition(value, index, filePath) {
13344
13371
  }
13345
13372
  async function fileExists3(filePath) {
13346
13373
  try {
13347
- await (0, import_promises27.access)(filePath, import_node_fs11.constants.F_OK);
13374
+ await (0, import_promises27.access)(filePath, import_node_fs12.constants.F_OK);
13348
13375
  return true;
13349
13376
  } catch {
13350
13377
  return false;
@@ -13629,15 +13656,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
13629
13656
  });
13630
13657
  }
13631
13658
  async function execShellWithStdin(command, stdinPayload, options = {}) {
13632
- const { mkdir: mkdir17, readFile: readFile16, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
13659
+ const { mkdir: mkdir17, readFile: readFile17, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
13633
13660
  const { tmpdir: tmpdir3 } = await import("os");
13634
- const path51 = await import("path");
13661
+ const path52 = await import("path");
13635
13662
  const { randomUUID: randomUUID10 } = await import("crypto");
13636
- const dir = path51.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
13663
+ const dir = path52.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
13637
13664
  await mkdir17(dir, { recursive: true });
13638
- const stdinPath = path51.join(dir, "stdin.txt");
13639
- const stdoutPath = path51.join(dir, "stdout.txt");
13640
- const stderrPath = path51.join(dir, "stderr.txt");
13665
+ const stdinPath = path52.join(dir, "stdin.txt");
13666
+ const stdoutPath = path52.join(dir, "stdout.txt");
13667
+ const stderrPath = path52.join(dir, "stderr.txt");
13641
13668
  await writeFile9(stdinPath, stdinPayload, "utf8");
13642
13669
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
13643
13670
  const { spawn: spawn5 } = await import("child_process");
@@ -13667,8 +13694,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
13667
13694
  resolve(code ?? 0);
13668
13695
  });
13669
13696
  });
13670
- const stdout = (await readFile16(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
13671
- const stderr = (await readFile16(stderrPath, "utf8")).replace(/\r\n/g, "\n");
13697
+ const stdout = (await readFile17(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
13698
+ const stderr = (await readFile17(stderrPath, "utf8")).replace(/\r\n/g, "\n");
13672
13699
  return { stdout, stderr, exitCode };
13673
13700
  } finally {
13674
13701
  await rm6(dir, { recursive: true, force: true });
@@ -15148,11 +15175,11 @@ function createFilesystemTools(workspacePath) {
15148
15175
  execute: async (input) => {
15149
15176
  try {
15150
15177
  const resolved = resolveSandboxed(workspacePath, input.path);
15151
- const stat9 = await import_promises29.default.stat(resolved);
15152
- if (stat9.isDirectory()) {
15178
+ const stat10 = await import_promises29.default.stat(resolved);
15179
+ if (stat10.isDirectory()) {
15153
15180
  return { error: `'${input.path}' is a directory, not a file` };
15154
15181
  }
15155
- const buffer = Buffer.alloc(Math.min(stat9.size, MAX_FILE_SIZE));
15182
+ const buffer = Buffer.alloc(Math.min(stat10.size, MAX_FILE_SIZE));
15156
15183
  const fd = await import_promises29.default.open(resolved, "r");
15157
15184
  try {
15158
15185
  await fd.read(buffer, 0, buffer.length, 0);
@@ -15160,8 +15187,8 @@ function createFilesystemTools(workspacePath) {
15160
15187
  await fd.close();
15161
15188
  }
15162
15189
  const content = buffer.toString("utf-8");
15163
- const truncated = stat9.size > MAX_FILE_SIZE;
15164
- return { content, truncated, size: stat9.size };
15190
+ const truncated = stat10.size > MAX_FILE_SIZE;
15191
+ return { content, truncated, size: stat10.size };
15165
15192
  } catch (error) {
15166
15193
  return { error: error instanceof Error ? error.message : String(error) };
15167
15194
  }
@@ -15212,8 +15239,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
15212
15239
  const ext = import_node_path39.default.extname(entry.name).toLowerCase();
15213
15240
  if (BINARY_EXTENSIONS.has(ext)) continue;
15214
15241
  try {
15215
- const stat9 = await import_promises29.default.stat(fullPath);
15216
- if (stat9.size > MAX_FILE_SIZE) continue;
15242
+ const stat10 = await import_promises29.default.stat(fullPath);
15243
+ if (stat10.size > MAX_FILE_SIZE) continue;
15217
15244
  const content = await import_promises29.default.readFile(fullPath, "utf-8");
15218
15245
  const lines = content.split("\n");
15219
15246
  for (let i = 0; i < lines.length; i++) {
@@ -15857,115 +15884,115 @@ var FieldAccuracyEvaluator = class {
15857
15884
  * Evaluate a single field against the expected value.
15858
15885
  */
15859
15886
  evaluateField(fieldConfig, candidateData, expectedData) {
15860
- const { path: path51, match, required = true, weight = 1 } = fieldConfig;
15861
- const candidateValue = resolvePath(candidateData, path51);
15862
- const expectedValue = resolvePath(expectedData, path51);
15887
+ const { path: path52, match, required = true, weight = 1 } = fieldConfig;
15888
+ const candidateValue = resolvePath(candidateData, path52);
15889
+ const expectedValue = resolvePath(expectedData, path52);
15863
15890
  if (expectedValue === void 0) {
15864
15891
  return {
15865
- path: path51,
15892
+ path: path52,
15866
15893
  score: 1,
15867
15894
  // No expected value means no comparison needed
15868
15895
  weight,
15869
15896
  hit: true,
15870
- message: `${path51}: no expected value`
15897
+ message: `${path52}: no expected value`
15871
15898
  };
15872
15899
  }
15873
15900
  if (candidateValue === void 0) {
15874
15901
  if (required) {
15875
15902
  return {
15876
- path: path51,
15903
+ path: path52,
15877
15904
  score: 0,
15878
15905
  weight,
15879
15906
  hit: false,
15880
- message: `${path51} (required, missing)`
15907
+ message: `${path52} (required, missing)`
15881
15908
  };
15882
15909
  }
15883
15910
  return {
15884
- path: path51,
15911
+ path: path52,
15885
15912
  score: 1,
15886
15913
  // Don't penalize missing optional fields
15887
15914
  weight: 0,
15888
15915
  // Zero weight means it won't affect the score
15889
15916
  hit: true,
15890
- message: `${path51}: optional field missing`
15917
+ message: `${path52}: optional field missing`
15891
15918
  };
15892
15919
  }
15893
15920
  switch (match) {
15894
15921
  case "exact":
15895
- return this.compareExact(path51, candidateValue, expectedValue, weight);
15922
+ return this.compareExact(path52, candidateValue, expectedValue, weight);
15896
15923
  case "numeric_tolerance":
15897
15924
  return this.compareNumericTolerance(
15898
- path51,
15925
+ path52,
15899
15926
  candidateValue,
15900
15927
  expectedValue,
15901
15928
  fieldConfig,
15902
15929
  weight
15903
15930
  );
15904
15931
  case "date":
15905
- return this.compareDate(path51, candidateValue, expectedValue, fieldConfig, weight);
15932
+ return this.compareDate(path52, candidateValue, expectedValue, fieldConfig, weight);
15906
15933
  default:
15907
15934
  return {
15908
- path: path51,
15935
+ path: path52,
15909
15936
  score: 0,
15910
15937
  weight,
15911
15938
  hit: false,
15912
- message: `${path51}: unknown match type "${match}"`
15939
+ message: `${path52}: unknown match type "${match}"`
15913
15940
  };
15914
15941
  }
15915
15942
  }
15916
15943
  /**
15917
15944
  * Exact equality comparison.
15918
15945
  */
15919
- compareExact(path51, candidateValue, expectedValue, weight) {
15946
+ compareExact(path52, candidateValue, expectedValue, weight) {
15920
15947
  if (deepEqual(candidateValue, expectedValue)) {
15921
15948
  return {
15922
- path: path51,
15949
+ path: path52,
15923
15950
  score: 1,
15924
15951
  weight,
15925
15952
  hit: true,
15926
- message: path51
15953
+ message: path52
15927
15954
  };
15928
15955
  }
15929
15956
  if (typeof candidateValue !== typeof expectedValue) {
15930
15957
  return {
15931
- path: path51,
15958
+ path: path52,
15932
15959
  score: 0,
15933
15960
  weight,
15934
15961
  hit: false,
15935
- message: `${path51} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
15962
+ message: `${path52} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
15936
15963
  };
15937
15964
  }
15938
15965
  return {
15939
- path: path51,
15966
+ path: path52,
15940
15967
  score: 0,
15941
15968
  weight,
15942
15969
  hit: false,
15943
- message: `${path51} (value mismatch)`
15970
+ message: `${path52} (value mismatch)`
15944
15971
  };
15945
15972
  }
15946
15973
  /**
15947
15974
  * Numeric comparison with absolute or relative tolerance.
15948
15975
  */
15949
- compareNumericTolerance(path51, candidateValue, expectedValue, fieldConfig, weight) {
15976
+ compareNumericTolerance(path52, candidateValue, expectedValue, fieldConfig, weight) {
15950
15977
  const { tolerance = 0, relative = false } = fieldConfig;
15951
15978
  const candidateNum = toNumber(candidateValue);
15952
15979
  const expectedNum = toNumber(expectedValue);
15953
15980
  if (candidateNum === null || expectedNum === null) {
15954
15981
  return {
15955
- path: path51,
15982
+ path: path52,
15956
15983
  score: 0,
15957
15984
  weight,
15958
15985
  hit: false,
15959
- message: `${path51} (non-numeric value)`
15986
+ message: `${path52} (non-numeric value)`
15960
15987
  };
15961
15988
  }
15962
15989
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
15963
15990
  return {
15964
- path: path51,
15991
+ path: path52,
15965
15992
  score: 0,
15966
15993
  weight,
15967
15994
  hit: false,
15968
- message: `${path51} (invalid numeric value)`
15995
+ message: `${path52} (invalid numeric value)`
15969
15996
  };
15970
15997
  }
15971
15998
  const diff = Math.abs(candidateNum - expectedNum);
@@ -15978,61 +16005,61 @@ var FieldAccuracyEvaluator = class {
15978
16005
  }
15979
16006
  if (withinTolerance) {
15980
16007
  return {
15981
- path: path51,
16008
+ path: path52,
15982
16009
  score: 1,
15983
16010
  weight,
15984
16011
  hit: true,
15985
- message: `${path51} (within tolerance: diff=${diff.toFixed(2)})`
16012
+ message: `${path52} (within tolerance: diff=${diff.toFixed(2)})`
15986
16013
  };
15987
16014
  }
15988
16015
  return {
15989
- path: path51,
16016
+ path: path52,
15990
16017
  score: 0,
15991
16018
  weight,
15992
16019
  hit: false,
15993
- message: `${path51} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
16020
+ message: `${path52} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
15994
16021
  };
15995
16022
  }
15996
16023
  /**
15997
16024
  * Date comparison with format normalization.
15998
16025
  */
15999
- compareDate(path51, candidateValue, expectedValue, fieldConfig, weight) {
16026
+ compareDate(path52, candidateValue, expectedValue, fieldConfig, weight) {
16000
16027
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
16001
16028
  const candidateDate = parseDate(String(candidateValue), formats);
16002
16029
  const expectedDate = parseDate(String(expectedValue), formats);
16003
16030
  if (candidateDate === null) {
16004
16031
  return {
16005
- path: path51,
16032
+ path: path52,
16006
16033
  score: 0,
16007
16034
  weight,
16008
16035
  hit: false,
16009
- message: `${path51} (unparseable candidate date)`
16036
+ message: `${path52} (unparseable candidate date)`
16010
16037
  };
16011
16038
  }
16012
16039
  if (expectedDate === null) {
16013
16040
  return {
16014
- path: path51,
16041
+ path: path52,
16015
16042
  score: 0,
16016
16043
  weight,
16017
16044
  hit: false,
16018
- message: `${path51} (unparseable expected date)`
16045
+ message: `${path52} (unparseable expected date)`
16019
16046
  };
16020
16047
  }
16021
16048
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
16022
16049
  return {
16023
- path: path51,
16050
+ path: path52,
16024
16051
  score: 1,
16025
16052
  weight,
16026
16053
  hit: true,
16027
- message: path51
16054
+ message: path52
16028
16055
  };
16029
16056
  }
16030
16057
  return {
16031
- path: path51,
16058
+ path: path52,
16032
16059
  score: 0,
16033
16060
  weight,
16034
16061
  hit: false,
16035
- message: `${path51} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
16062
+ message: `${path52} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
16036
16063
  };
16037
16064
  }
16038
16065
  /**
@@ -16065,11 +16092,11 @@ var FieldAccuracyEvaluator = class {
16065
16092
  };
16066
16093
  }
16067
16094
  };
16068
- function resolvePath(obj, path51) {
16069
- if (!path51 || !obj) {
16095
+ function resolvePath(obj, path52) {
16096
+ if (!path52 || !obj) {
16070
16097
  return void 0;
16071
16098
  }
16072
- const parts = path51.split(/\.|\[|\]/).filter((p) => p.length > 0);
16099
+ const parts = path52.split(/\.|\[|\]/).filter((p) => p.length > 0);
16073
16100
  let current = obj;
16074
16101
  for (const part of parts) {
16075
16102
  if (current === null || current === void 0) {
@@ -16295,6 +16322,14 @@ var SkillTriggerEvaluator = class {
16295
16322
  evidence = `Read tool loaded skill file via tool name "${toolName}"`;
16296
16323
  break;
16297
16324
  }
16325
+ if (!triggered && toolCall.output != null) {
16326
+ const outputStr = typeof toolCall.output === "string" ? toolCall.output : JSON.stringify(toolCall.output);
16327
+ if (outputStr.includes(`skills/${skillName}/`)) {
16328
+ triggered = true;
16329
+ evidence = `Tool "${toolName}" output referenced skill file for "${skillName}"`;
16330
+ break;
16331
+ }
16332
+ }
16298
16333
  }
16299
16334
  const pass = triggered === shouldTrigger;
16300
16335
  if (pass) {
@@ -16558,8 +16593,8 @@ var TokenUsageEvaluator = class {
16558
16593
 
16559
16594
  // src/evaluation/evaluators/tool-trajectory.ts
16560
16595
  init_cjs_shims();
16561
- function getNestedValue(obj, path51) {
16562
- const parts = path51.split(".");
16596
+ function getNestedValue(obj, path52) {
16597
+ const parts = path52.split(".");
16563
16598
  let current = obj;
16564
16599
  for (const part of parts) {
16565
16600
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -17957,7 +17992,7 @@ function getTCritical(df) {
17957
17992
  // src/evaluation/workspace/file-changes.ts
17958
17993
  init_cjs_shims();
17959
17994
  var import_node_child_process8 = require("child_process");
17960
- var import_node_fs12 = require("fs");
17995
+ var import_node_fs13 = require("fs");
17961
17996
  var import_node_path43 = __toESM(require("path"), 1);
17962
17997
  var import_node_util4 = require("util");
17963
17998
  var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process8.exec);
@@ -17986,7 +18021,7 @@ async function captureFileChanges(workspacePath, baselineCommit) {
17986
18021
  async function stageNestedRepoChanges(workspacePath) {
17987
18022
  let entries;
17988
18023
  try {
17989
- entries = (0, import_node_fs12.readdirSync)(workspacePath);
18024
+ entries = (0, import_node_fs13.readdirSync)(workspacePath);
17990
18025
  } catch {
17991
18026
  return;
17992
18027
  }
@@ -17994,8 +18029,8 @@ async function stageNestedRepoChanges(workspacePath) {
17994
18029
  if (entry === ".git" || entry === "node_modules") continue;
17995
18030
  const childPath = import_node_path43.default.join(workspacePath, entry);
17996
18031
  try {
17997
- if (!(0, import_node_fs12.statSync)(childPath).isDirectory()) continue;
17998
- if (!(0, import_node_fs12.statSync)(import_node_path43.default.join(childPath, ".git")).isDirectory()) continue;
18032
+ if (!(0, import_node_fs13.statSync)(childPath).isDirectory()) continue;
18033
+ if (!(0, import_node_fs13.statSync)(import_node_path43.default.join(childPath, ".git")).isDirectory()) continue;
17999
18034
  } catch {
18000
18035
  continue;
18001
18036
  }
@@ -18115,7 +18150,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
18115
18150
  init_cjs_shims();
18116
18151
  var import_node_child_process9 = require("child_process");
18117
18152
  var import_node_crypto10 = require("crypto");
18118
- var import_node_fs13 = require("fs");
18153
+ var import_node_fs14 = require("fs");
18119
18154
  var import_promises31 = require("fs/promises");
18120
18155
  var import_node_path45 = __toESM(require("path"), 1);
18121
18156
  var import_node_util5 = require("util");
@@ -18221,7 +18256,7 @@ var WorkspacePoolManager = class {
18221
18256
  if (!locked) {
18222
18257
  continue;
18223
18258
  }
18224
- const slotExists = (0, import_node_fs13.existsSync)(slotPath);
18259
+ const slotExists = (0, import_node_fs14.existsSync)(slotPath);
18225
18260
  if (slotExists) {
18226
18261
  await this.resetSlot(slotPath, templatePath, repos, poolReset);
18227
18262
  return {
@@ -18327,7 +18362,7 @@ var WorkspacePoolManager = class {
18327
18362
  for (const entry of entries) {
18328
18363
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
18329
18364
  const lockPath = import_node_path45.default.join(poolDir, `${entry}.lock`);
18330
- if ((0, import_node_fs13.existsSync)(lockPath)) {
18365
+ if ((0, import_node_fs14.existsSync)(lockPath)) {
18331
18366
  try {
18332
18367
  const pidStr = await (0, import_promises31.readFile)(lockPath, "utf-8");
18333
18368
  const pid = Number.parseInt(pidStr.trim(), 10);
@@ -18358,7 +18393,7 @@ var WorkspacePoolManager = class {
18358
18393
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
18359
18394
  for (const repo of repos) {
18360
18395
  const repoDir = import_node_path45.default.join(slotPath, repo.path);
18361
- if (!(0, import_node_fs13.existsSync)(repoDir)) {
18396
+ if (!(0, import_node_fs14.existsSync)(repoDir)) {
18362
18397
  continue;
18363
18398
  }
18364
18399
  if (poolReset === "none") {
@@ -18384,7 +18419,7 @@ var WorkspacePoolManager = class {
18384
18419
  // src/evaluation/workspace/repo-manager.ts
18385
18420
  init_cjs_shims();
18386
18421
  var import_node_child_process10 = require("child_process");
18387
- var import_node_fs14 = require("fs");
18422
+ var import_node_fs15 = require("fs");
18388
18423
  var import_node_path46 = __toESM(require("path"), 1);
18389
18424
  var import_node_util6 = require("util");
18390
18425
  var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process10.execFile);
@@ -18436,7 +18471,7 @@ var RepoManager = class {
18436
18471
  resolvedSourcePath: sourcePath ?? "",
18437
18472
  reason: "empty_path"
18438
18473
  });
18439
- } else if (!(0, import_node_fs14.existsSync)(sourcePath)) {
18474
+ } else if (!(0, import_node_fs15.existsSync)(sourcePath)) {
18440
18475
  errors.push({
18441
18476
  repoPath: repo.path,
18442
18477
  resolvedSourcePath: sourcePath,
@@ -18665,8 +18700,8 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
18665
18700
  }
18666
18701
 
18667
18702
  // src/evaluation/orchestrator.ts
18668
- function classifyQualityStatus(score) {
18669
- return score >= PASS_THRESHOLD ? "ok" : "quality_failure";
18703
+ function classifyQualityStatus(score, threshold = PASS_THRESHOLD) {
18704
+ return score >= threshold ? "ok" : "quality_failure";
18670
18705
  }
18671
18706
  function buildSkippedEvaluatorError(scores) {
18672
18707
  const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
@@ -18742,7 +18777,8 @@ async function runEvaluation(options) {
18742
18777
  retainOnSuccess,
18743
18778
  retainOnFailure,
18744
18779
  graderTarget: cliGraderTarget,
18745
- model: cliModel
18780
+ model: cliModel,
18781
+ threshold: scoreThreshold
18746
18782
  } = options;
18747
18783
  let useCache = options.useCache;
18748
18784
  if (trials && trials.count > 1 && useCache) {
@@ -18871,7 +18907,8 @@ async function runEvaluation(options) {
18871
18907
  resolveGraderProvider,
18872
18908
  agentTimeoutMs,
18873
18909
  targetResolver,
18874
- availableTargets
18910
+ availableTargets,
18911
+ threshold: scoreThreshold
18875
18912
  });
18876
18913
  } catch (error) {
18877
18914
  if (verbose) {
@@ -19240,7 +19277,8 @@ async function runEvaluation(options) {
19240
19277
  typeRegistry,
19241
19278
  repoManager,
19242
19279
  evalDir,
19243
- verbose
19280
+ verbose,
19281
+ threshold: scoreThreshold
19244
19282
  };
19245
19283
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
19246
19284
  if (totalBudgetUsd !== void 0) {
@@ -19398,7 +19436,8 @@ async function runBatchEvaluation(options) {
19398
19436
  resolveGraderProvider,
19399
19437
  agentTimeoutMs,
19400
19438
  targetResolver,
19401
- availableTargets
19439
+ availableTargets,
19440
+ threshold: batchThreshold
19402
19441
  } = options;
19403
19442
  const promptInputsList = [];
19404
19443
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -19483,7 +19522,8 @@ async function runBatchEvaluation(options) {
19483
19522
  endTime,
19484
19523
  targetResolver,
19485
19524
  availableTargets,
19486
- verbose
19525
+ verbose,
19526
+ threshold: batchThreshold
19487
19527
  });
19488
19528
  if (providerError) {
19489
19529
  result = {
@@ -19569,7 +19609,8 @@ async function runEvalCase(options) {
19569
19609
  typeRegistry: providedTypeRegistry,
19570
19610
  repoManager,
19571
19611
  evalDir,
19572
- verbose
19612
+ verbose,
19613
+ threshold: caseThreshold
19573
19614
  } = options;
19574
19615
  const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
19575
19616
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -19801,7 +19842,9 @@ async function runEvalCase(options) {
19801
19842
  });
19802
19843
  } catch (error) {
19803
19844
  lastError = error;
19804
- if (isTimeoutLike(error) && attempt + 1 < attemptBudget) {
19845
+ if (attempt + 1 < attemptBudget) {
19846
+ const delayMs = retryBackoffMs(attempt);
19847
+ await sleep3(delayMs, signal);
19805
19848
  attempt += 1;
19806
19849
  continue;
19807
19850
  }
@@ -19932,7 +19975,8 @@ async function runEvalCase(options) {
19932
19975
  availableTargets,
19933
19976
  fileChanges,
19934
19977
  workspacePath,
19935
- verbose
19978
+ verbose,
19979
+ threshold: caseThreshold
19936
19980
  });
19937
19981
  const totalDurationMs = Date.now() - caseStartMs;
19938
19982
  const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
@@ -19947,7 +19991,7 @@ async function runEvalCase(options) {
19947
19991
  ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
19948
19992
  };
19949
19993
  const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
19950
- const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score);
19994
+ const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
19951
19995
  const finalResult = providerError ? {
19952
19996
  ...result,
19953
19997
  evalRun,
@@ -20118,7 +20162,8 @@ async function evaluateCandidate(options) {
20118
20162
  targetResolver,
20119
20163
  availableTargets,
20120
20164
  fileChanges,
20121
- workspacePath
20165
+ workspacePath,
20166
+ threshold: evalThreshold
20122
20167
  } = options;
20123
20168
  const gradeTimestamp = nowFn();
20124
20169
  const { score, scores } = await runEvaluatorsForCase({
@@ -20191,7 +20236,7 @@ async function evaluateCandidate(options) {
20191
20236
  scores,
20192
20237
  trace: trace2,
20193
20238
  fileChanges,
20194
- executionStatus: classifyQualityStatus(score.score)
20239
+ executionStatus: classifyQualityStatus(score.score, evalThreshold)
20195
20240
  };
20196
20241
  }
20197
20242
  async function runEvaluatorsForCase(options) {
@@ -20495,7 +20540,7 @@ async function invokeProvider(provider, options) {
20495
20540
  }
20496
20541
  }
20497
20542
  function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider, failureStage, failureReasonCode, verbose) {
20498
- const message = error instanceof Error ? error.message : String(error);
20543
+ const message = extractErrorMessage(error);
20499
20544
  let agentRequest;
20500
20545
  let lmRequest;
20501
20546
  if (isAgentProvider(provider)) {
@@ -20612,20 +20657,45 @@ function aggregateEvaluatorTokenUsage(scores) {
20612
20657
  ...hasCached ? { cached } : {}
20613
20658
  };
20614
20659
  }
20615
- function isTimeoutLike(error) {
20616
- if (!error) {
20617
- return false;
20618
- }
20619
- if (typeof DOMException !== "undefined" && error instanceof DOMException && error.name === "AbortError") {
20620
- return true;
20621
- }
20660
+ function extractErrorMessage(error) {
20622
20661
  if (error instanceof Error) {
20623
- const name = error.name?.toLowerCase();
20624
- const message = error.message?.toLowerCase();
20625
- return name.includes("timeout") || message.includes("timeout");
20662
+ return error.message;
20626
20663
  }
20627
- const value = String(error).toLowerCase();
20628
- return value.includes("timeout");
20664
+ if (error !== null && typeof error === "object") {
20665
+ const obj = error;
20666
+ const parts = [];
20667
+ if (typeof obj.message === "string" && obj.message) {
20668
+ parts.push(obj.message);
20669
+ }
20670
+ if (typeof obj.code === "number") {
20671
+ parts.push(`(code ${obj.code})`);
20672
+ }
20673
+ if (parts.length > 0) {
20674
+ return parts.join(" ");
20675
+ }
20676
+ try {
20677
+ return JSON.stringify(error);
20678
+ } catch {
20679
+ }
20680
+ }
20681
+ return String(error);
20682
+ }
20683
+ function retryBackoffMs(attempt) {
20684
+ return Math.min(2 ** attempt * 1e3, 3e4);
20685
+ }
20686
+ function sleep3(ms, signal) {
20687
+ if (signal?.aborted) return Promise.resolve();
20688
+ return new Promise((resolve) => {
20689
+ const timer = setTimeout(resolve, ms);
20690
+ signal?.addEventListener(
20691
+ "abort",
20692
+ () => {
20693
+ clearTimeout(timer);
20694
+ resolve();
20695
+ },
20696
+ { once: true }
20697
+ );
20698
+ });
20629
20699
  }
20630
20700
  function mapChildResults(children) {
20631
20701
  if (!children || children.length === 0) {
@@ -20657,7 +20727,7 @@ function computeWeightedMean(entries) {
20657
20727
 
20658
20728
  // src/evaluation/evaluate.ts
20659
20729
  init_cjs_shims();
20660
- var import_node_fs15 = require("fs");
20730
+ var import_node_fs16 = require("fs");
20661
20731
  var import_node_path49 = __toESM(require("path"), 1);
20662
20732
 
20663
20733
  // src/evaluation/providers/function-provider.ts
@@ -20814,7 +20884,7 @@ async function discoverDefaultTarget(repoRoot) {
20814
20884
  for (const dir of chain) {
20815
20885
  for (const candidate of TARGET_FILE_CANDIDATES) {
20816
20886
  const targetsPath = import_node_path49.default.join(dir, candidate);
20817
- if (!(0, import_node_fs15.existsSync)(targetsPath)) continue;
20887
+ if (!(0, import_node_fs16.existsSync)(targetsPath)) continue;
20818
20888
  try {
20819
20889
  const definitions = await readTargetDefinitions(targetsPath);
20820
20890
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -20831,7 +20901,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
20831
20901
  const envFiles = [];
20832
20902
  for (const dir of chain) {
20833
20903
  const envPath = import_node_path49.default.join(dir, ".env");
20834
- if ((0, import_node_fs15.existsSync)(envPath)) envFiles.push(envPath);
20904
+ if ((0, import_node_fs16.existsSync)(envPath)) envFiles.push(envPath);
20835
20905
  }
20836
20906
  for (let i = 0; i < envFiles.length; i++) {
20837
20907
  try {
@@ -20908,12 +20978,12 @@ var CONFIG_FILE_NAMES = [
20908
20978
  ".agentv/config.js"
20909
20979
  ];
20910
20980
  async function loadTsConfig(projectRoot) {
20911
- const { existsSync: existsSync6 } = await import("fs");
20981
+ const { existsSync: existsSync7 } = await import("fs");
20912
20982
  const { pathToFileURL } = await import("url");
20913
20983
  const { join: join2 } = await import("path");
20914
20984
  for (const fileName of CONFIG_FILE_NAMES) {
20915
20985
  const filePath = join2(projectRoot, fileName);
20916
- if (!existsSync6(filePath)) {
20986
+ if (!existsSync7(filePath)) {
20917
20987
  continue;
20918
20988
  }
20919
20989
  try {
@@ -21060,7 +21130,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
21060
21130
 
21061
21131
  // src/projects.ts
21062
21132
  init_cjs_shims();
21063
- var import_node_fs16 = require("fs");
21133
+ var import_node_fs17 = require("fs");
21064
21134
  var import_node_path51 = __toESM(require("path"), 1);
21065
21135
  var import_yaml8 = require("yaml");
21066
21136
  function getProjectsRegistryPath() {
@@ -21068,11 +21138,11 @@ function getProjectsRegistryPath() {
21068
21138
  }
21069
21139
  function loadProjectRegistry() {
21070
21140
  const registryPath = getProjectsRegistryPath();
21071
- if (!(0, import_node_fs16.existsSync)(registryPath)) {
21141
+ if (!(0, import_node_fs17.existsSync)(registryPath)) {
21072
21142
  return { projects: [] };
21073
21143
  }
21074
21144
  try {
21075
- const raw = (0, import_node_fs16.readFileSync)(registryPath, "utf-8");
21145
+ const raw = (0, import_node_fs17.readFileSync)(registryPath, "utf-8");
21076
21146
  const parsed = (0, import_yaml8.parse)(raw);
21077
21147
  if (!parsed || !Array.isArray(parsed.projects)) {
21078
21148
  return { projects: [] };
@@ -21085,10 +21155,10 @@ function loadProjectRegistry() {
21085
21155
  function saveProjectRegistry(registry) {
21086
21156
  const registryPath = getProjectsRegistryPath();
21087
21157
  const dir = import_node_path51.default.dirname(registryPath);
21088
- if (!(0, import_node_fs16.existsSync)(dir)) {
21089
- (0, import_node_fs16.mkdirSync)(dir, { recursive: true });
21158
+ if (!(0, import_node_fs17.existsSync)(dir)) {
21159
+ (0, import_node_fs17.mkdirSync)(dir, { recursive: true });
21090
21160
  }
21091
- (0, import_node_fs16.writeFileSync)(registryPath, (0, import_yaml8.stringify)(registry), "utf-8");
21161
+ (0, import_node_fs17.writeFileSync)(registryPath, (0, import_yaml8.stringify)(registry), "utf-8");
21092
21162
  }
21093
21163
  function deriveProjectId(dirPath, existingIds) {
21094
21164
  const base = import_node_path51.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
@@ -21102,10 +21172,10 @@ function deriveProjectId(dirPath, existingIds) {
21102
21172
  }
21103
21173
  function addProject(projectPath) {
21104
21174
  const absPath = import_node_path51.default.resolve(projectPath);
21105
- if (!(0, import_node_fs16.existsSync)(absPath)) {
21175
+ if (!(0, import_node_fs17.existsSync)(absPath)) {
21106
21176
  throw new Error(`Directory not found: ${absPath}`);
21107
21177
  }
21108
- if (!(0, import_node_fs16.existsSync)(import_node_path51.default.join(absPath, ".agentv"))) {
21178
+ if (!(0, import_node_fs17.existsSync)(import_node_path51.default.join(absPath, ".agentv"))) {
21109
21179
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
21110
21180
  }
21111
21181
  const registry = loadProjectRegistry();
@@ -21149,19 +21219,19 @@ function touchProject(projectId) {
21149
21219
  }
21150
21220
  function discoverProjects(rootDir, maxDepth = 2) {
21151
21221
  const absRoot = import_node_path51.default.resolve(rootDir);
21152
- if (!(0, import_node_fs16.existsSync)(absRoot) || !(0, import_node_fs16.statSync)(absRoot).isDirectory()) {
21222
+ if (!(0, import_node_fs17.existsSync)(absRoot) || !(0, import_node_fs17.statSync)(absRoot).isDirectory()) {
21153
21223
  return [];
21154
21224
  }
21155
21225
  const results = [];
21156
21226
  function scan(dir, depth) {
21157
21227
  if (depth > maxDepth) return;
21158
- if ((0, import_node_fs16.existsSync)(import_node_path51.default.join(dir, ".agentv"))) {
21228
+ if ((0, import_node_fs17.existsSync)(import_node_path51.default.join(dir, ".agentv"))) {
21159
21229
  results.push(dir);
21160
21230
  return;
21161
21231
  }
21162
21232
  if (depth === maxDepth) return;
21163
21233
  try {
21164
- const entries = (0, import_node_fs16.readdirSync)(dir, { withFileTypes: true });
21234
+ const entries = (0, import_node_fs17.readdirSync)(dir, { withFileTypes: true });
21165
21235
  for (const entry of entries) {
21166
21236
  if (!entry.isDirectory()) continue;
21167
21237
  if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
@@ -21737,6 +21807,254 @@ function toHrTime(iso) {
21737
21807
  // src/observability/index.ts
21738
21808
  init_otlp_json_file_exporter();
21739
21809
 
21810
+ // src/import/index.ts
21811
+ init_cjs_shims();
21812
+
21813
+ // src/import/claude-parser.ts
21814
+ init_cjs_shims();
21815
+ var SKIPPED_TYPES = /* @__PURE__ */ new Set(["progress", "system", "file-history-snapshot"]);
21816
+ function parseClaudeSession(jsonl) {
21817
+ const messages = [];
21818
+ let sessionId = "";
21819
+ let projectPath;
21820
+ let model;
21821
+ let startTimestamp;
21822
+ let endTimestamp;
21823
+ const usageByRequestId = /* @__PURE__ */ new Map();
21824
+ let lastAssistantRequestId;
21825
+ let lastAssistantIdx = -1;
21826
+ const pendingToolCalls = /* @__PURE__ */ new Map();
21827
+ const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
21828
+ for (const line of lines) {
21829
+ let event;
21830
+ try {
21831
+ event = JSON.parse(line);
21832
+ } catch {
21833
+ continue;
21834
+ }
21835
+ if (!event.type) continue;
21836
+ if (event.timestamp) {
21837
+ if (!startTimestamp) startTimestamp = event.timestamp;
21838
+ endTimestamp = event.timestamp;
21839
+ }
21840
+ if (SKIPPED_TYPES.has(event.type)) continue;
21841
+ if (event.isSidechain) continue;
21842
+ if (!sessionId && event.sessionId) {
21843
+ sessionId = event.sessionId;
21844
+ }
21845
+ if (!projectPath && event.cwd) {
21846
+ projectPath = event.cwd;
21847
+ }
21848
+ switch (event.type) {
21849
+ case "user": {
21850
+ const msg = event.message;
21851
+ if (!msg) break;
21852
+ const contentArr = msg.content;
21853
+ if (Array.isArray(contentArr)) {
21854
+ for (const block of contentArr) {
21855
+ if (block.type === "tool_result" && block.tool_use_id) {
21856
+ const pending = pendingToolCalls.get(block.tool_use_id);
21857
+ if (pending) {
21858
+ const existingMsg = messages[pending.msgIdx];
21859
+ const existingCalls = [...existingMsg.toolCalls ?? []];
21860
+ existingCalls[pending.toolIdx] = {
21861
+ ...existingCalls[pending.toolIdx],
21862
+ output: extractToolResultContent(block.content)
21863
+ };
21864
+ messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
21865
+ pendingToolCalls.delete(block.tool_use_id);
21866
+ }
21867
+ }
21868
+ }
21869
+ }
21870
+ const text = extractTextContent2(contentArr);
21871
+ if (text !== void 0) {
21872
+ messages.push({ role: "user", content: text });
21873
+ }
21874
+ break;
21875
+ }
21876
+ case "assistant": {
21877
+ const msg = event.message;
21878
+ if (!msg) break;
21879
+ if (!model && msg.model) {
21880
+ model = msg.model;
21881
+ }
21882
+ if (msg.usage && event.requestId) {
21883
+ usageByRequestId.set(event.requestId, msg.usage);
21884
+ }
21885
+ const { text, toolCalls } = extractAssistantContent(msg.content);
21886
+ if (event.requestId && event.requestId === lastAssistantRequestId && lastAssistantIdx >= 0) {
21887
+ messages[lastAssistantIdx] = {
21888
+ role: "assistant",
21889
+ content: text || void 0,
21890
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0
21891
+ };
21892
+ registerPendingToolCalls(toolCalls, lastAssistantIdx, pendingToolCalls);
21893
+ } else {
21894
+ if (text || toolCalls.length > 0) {
21895
+ lastAssistantIdx = messages.length;
21896
+ messages.push({
21897
+ role: "assistant",
21898
+ content: text || void 0,
21899
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0
21900
+ });
21901
+ registerPendingToolCalls(toolCalls, lastAssistantIdx, pendingToolCalls);
21902
+ }
21903
+ }
21904
+ lastAssistantRequestId = event.requestId;
21905
+ break;
21906
+ }
21907
+ }
21908
+ }
21909
+ let totalInputTokens = 0;
21910
+ let totalOutputTokens = 0;
21911
+ for (const usage of usageByRequestId.values()) {
21912
+ totalInputTokens += Number(usage.input_tokens ?? 0);
21913
+ totalOutputTokens += Number(usage.output_tokens ?? 0);
21914
+ }
21915
+ const hasUsage = usageByRequestId.size > 0;
21916
+ let durationMs;
21917
+ if (startTimestamp && endTimestamp) {
21918
+ durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
21919
+ }
21920
+ const source = {
21921
+ provider: "claude",
21922
+ sessionId,
21923
+ projectPath,
21924
+ startedAt: startTimestamp,
21925
+ model
21926
+ };
21927
+ return {
21928
+ messages,
21929
+ source,
21930
+ tokenUsage: hasUsage ? { input: totalInputTokens, output: totalOutputTokens } : void 0,
21931
+ durationMs,
21932
+ costUsd: null
21933
+ };
21934
+ }
21935
+ function registerPendingToolCalls(toolCalls, msgIdx, pending) {
21936
+ for (let i = 0; i < toolCalls.length; i++) {
21937
+ const id = toolCalls[i].id;
21938
+ if (id) {
21939
+ pending.set(id, { msgIdx, toolIdx: i });
21940
+ }
21941
+ }
21942
+ }
21943
+ function extractTextContent2(content) {
21944
+ if (content === void 0 || content === null) return void 0;
21945
+ if (typeof content === "string") return content;
21946
+ const textParts = [];
21947
+ for (const block of content) {
21948
+ if (block.type === "text" && block.text) {
21949
+ textParts.push(block.text);
21950
+ }
21951
+ }
21952
+ return textParts.length > 0 ? textParts.join("") : void 0;
21953
+ }
21954
+ function extractAssistantContent(content) {
21955
+ if (content === void 0 || content === null) {
21956
+ return { text: void 0, toolCalls: [] };
21957
+ }
21958
+ if (typeof content === "string") {
21959
+ return { text: content, toolCalls: [] };
21960
+ }
21961
+ const textParts = [];
21962
+ const toolCalls = [];
21963
+ for (const block of content) {
21964
+ switch (block.type) {
21965
+ case "text":
21966
+ if (block.text) textParts.push(block.text);
21967
+ break;
21968
+ case "tool_use":
21969
+ if (block.name) {
21970
+ toolCalls.push({
21971
+ tool: block.name,
21972
+ input: block.input,
21973
+ id: block.id
21974
+ });
21975
+ }
21976
+ break;
21977
+ }
21978
+ }
21979
+ return {
21980
+ text: textParts.length > 0 ? textParts.join("") : void 0,
21981
+ toolCalls
21982
+ };
21983
+ }
21984
+ function extractToolResultContent(content) {
21985
+ if (content === void 0 || content === null) return void 0;
21986
+ if (typeof content === "string") return content;
21987
+ const parts = [];
21988
+ for (const block of content) {
21989
+ if (block.type === "text" && block.text) {
21990
+ parts.push(block.text);
21991
+ }
21992
+ }
21993
+ return parts.length > 0 ? parts.join("") : void 0;
21994
+ }
21995
+
21996
+ // src/import/session-discovery.ts
21997
+ init_cjs_shims();
21998
+ var import_promises36 = require("fs/promises");
21999
+ var import_node_os8 = require("os");
22000
+ var import_node_path53 = __toESM(require("path"), 1);
22001
+ var DEFAULT_PROJECTS_DIR = () => import_node_path53.default.join((0, import_node_os8.homedir)(), ".claude", "projects");
22002
+ function encodeProjectPath(projectPath) {
22003
+ return projectPath.replace(/\//g, "-");
22004
+ }
22005
+ async function discoverClaudeSessions(opts) {
22006
+ const projectsDir = opts?.projectsDir ?? DEFAULT_PROJECTS_DIR();
22007
+ const limit = opts?.latest ? 1 : opts?.limit ?? 10;
22008
+ let projectDirs;
22009
+ try {
22010
+ projectDirs = await (0, import_promises36.readdir)(projectsDir);
22011
+ } catch {
22012
+ return [];
22013
+ }
22014
+ if (opts?.projectPath) {
22015
+ const encoded = encodeProjectPath(opts.projectPath);
22016
+ projectDirs = projectDirs.filter((dir) => dir === encoded || dir.includes(encoded));
22017
+ }
22018
+ const sessions = [];
22019
+ for (const projectDir of projectDirs) {
22020
+ const dirPath = import_node_path53.default.join(projectsDir, projectDir);
22021
+ let entries;
22022
+ try {
22023
+ entries = await (0, import_promises36.readdir)(dirPath);
22024
+ } catch {
22025
+ continue;
22026
+ }
22027
+ for (const entry of entries) {
22028
+ if (!entry.endsWith(".jsonl")) continue;
22029
+ const sessionId = entry.replace(/\.jsonl$/, "");
22030
+ if (opts?.sessionId && sessionId !== opts.sessionId) continue;
22031
+ const filePath = import_node_path53.default.join(dirPath, entry);
22032
+ let updatedAt;
22033
+ try {
22034
+ const fileStat = await (0, import_promises36.stat)(filePath);
22035
+ updatedAt = fileStat.mtime;
22036
+ } catch {
22037
+ updatedAt = /* @__PURE__ */ new Date(0);
22038
+ }
22039
+ sessions.push({
22040
+ sessionId,
22041
+ filePath,
22042
+ projectDir,
22043
+ updatedAt
22044
+ });
22045
+ }
22046
+ }
22047
+ sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
22048
+ return sessions.slice(0, limit);
22049
+ }
22050
+
22051
+ // src/import/types.ts
22052
+ init_cjs_shims();
22053
+ var import_promises37 = require("fs/promises");
22054
+ async function readTranscriptFile(filePath) {
22055
+ return (0, import_promises37.readFile)(filePath, "utf8");
22056
+ }
22057
+
21740
22058
  // src/index.ts
21741
22059
  function createAgentKernel() {
21742
22060
  return { status: "stub" };
@@ -21807,6 +22125,7 @@ function createAgentKernel() {
21807
22125
  deriveProjectId,
21808
22126
  detectFormat,
21809
22127
  discoverAssertions,
22128
+ discoverClaudeSessions,
21810
22129
  discoverCopilotSessions,
21811
22130
  discoverGraders,
21812
22131
  discoverJudges,
@@ -21866,12 +22185,14 @@ function createAgentKernel() {
21866
22185
  negateScore,
21867
22186
  normalizeLineEndings,
21868
22187
  parseAgentSkillsEvals,
22188
+ parseClaudeSession,
21869
22189
  parseJsonFromText,
21870
22190
  parseJsonSafe,
21871
22191
  readJsonFile,
21872
22192
  readTargetDefinitions,
21873
22193
  readTestSuiteMetadata,
21874
22194
  readTextFile,
22195
+ readTranscriptFile,
21875
22196
  removeProject,
21876
22197
  resolveAndCreateProvider,
21877
22198
  resolveFileReference,