@agentv/core 4.4.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6803,6 +6803,7 @@ var CopilotLogProvider = class {
6803
6803
 
6804
6804
  // src/evaluation/providers/copilot-sdk.ts
6805
6805
  import { randomUUID as randomUUID6 } from "node:crypto";
6806
+ import { existsSync as existsSync2 } from "node:fs";
6806
6807
  import { mkdir as mkdir5 } from "node:fs/promises";
6807
6808
  import path18 from "node:path";
6808
6809
 
@@ -6918,6 +6919,7 @@ var CopilotSdkProvider = class {
6918
6919
  const cwd = this.resolveCwd(request.cwd);
6919
6920
  if (cwd) {
6920
6921
  sessionOptions.workingDirectory = cwd;
6922
+ sessionOptions.skillDirectories = resolveSkillDirectories(cwd);
6921
6923
  }
6922
6924
  const systemPrompt = this.config.systemPrompt;
6923
6925
  if (systemPrompt) {
@@ -7149,6 +7151,14 @@ var CopilotSdkProvider = class {
7149
7151
  }
7150
7152
  }
7151
7153
  };
7154
+ function resolveSkillDirectories(cwd) {
7155
+ const candidates = [
7156
+ path18.join(cwd, ".claude", "skills"),
7157
+ path18.join(cwd, ".agents", "skills"),
7158
+ path18.join(cwd, ".codex", "skills")
7159
+ ];
7160
+ return candidates.filter((dir) => existsSync2(dir));
7161
+ }
7152
7162
  function summarizeSdkEvent(eventType, data) {
7153
7163
  if (!data || typeof data !== "object") {
7154
7164
  return eventType;
@@ -8177,6 +8187,11 @@ var PiCodingAgentProvider = class {
8177
8187
  const modelId = this.config.model ?? "gemini-2.5-flash";
8178
8188
  this.setApiKeyEnv(providerName);
8179
8189
  const model = sdk.getModel(providerName, modelId);
8190
+ if (!model) {
8191
+ throw new Error(
8192
+ `pi-coding-agent: getModel('${providerName}', '${modelId}') returned undefined. The model '${modelId}' is not registered for provider '${providerName}' in pi-ai. Check that subprovider and model are correct in your target config.`
8193
+ );
8194
+ }
8180
8195
  const tools = this.resolveTools(sdk);
8181
8196
  const { session } = await sdk.createAgentSession({
8182
8197
  cwd,
@@ -8291,6 +8306,15 @@ ${fileList}`;
8291
8306
  await session.prompt(prompt);
8292
8307
  }
8293
8308
  const agentMessages = session.agent.state.messages;
8309
+ const lastAssistant = [...agentMessages].reverse().find(
8310
+ (m) => !!m && typeof m === "object" && m.role === "assistant"
8311
+ );
8312
+ if (lastAssistant?.stopReason === "error") {
8313
+ const errorMsg = typeof lastAssistant.errorMessage === "string" ? lastAssistant.errorMessage : "unknown SDK error";
8314
+ throw new Error(
8315
+ `pi-coding-agent SDK error (provider: ${lastAssistant.provider ?? providerName}, model: ${lastAssistant.model ?? modelId}): ${errorMsg}`
8316
+ );
8317
+ }
8294
8318
  const output = [];
8295
8319
  for (const msg of agentMessages) {
8296
8320
  output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
@@ -10340,15 +10364,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
10340
10364
  });
10341
10365
  }
10342
10366
  async function execShellWithStdin(command, stdinPayload, options = {}) {
10343
- const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
10367
+ const { mkdir: mkdir16, readFile: readFile15, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
10344
10368
  const { tmpdir: tmpdir3 } = await import("node:os");
10345
- const path48 = await import("node:path");
10369
+ const path49 = await import("node:path");
10346
10370
  const { randomUUID: randomUUID10 } = await import("node:crypto");
10347
- const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
10371
+ const dir = path49.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
10348
10372
  await mkdir16(dir, { recursive: true });
10349
- const stdinPath = path48.join(dir, "stdin.txt");
10350
- const stdoutPath = path48.join(dir, "stdout.txt");
10351
- const stderrPath = path48.join(dir, "stderr.txt");
10373
+ const stdinPath = path49.join(dir, "stdin.txt");
10374
+ const stdoutPath = path49.join(dir, "stdout.txt");
10375
+ const stderrPath = path49.join(dir, "stderr.txt");
10352
10376
  await writeFile9(stdinPath, stdinPayload, "utf8");
10353
10377
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
10354
10378
  const { spawn: spawn5 } = await import("node:child_process");
@@ -10378,8 +10402,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
10378
10402
  resolve(code ?? 0);
10379
10403
  });
10380
10404
  });
10381
- const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
10382
- const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
10405
+ const stdout = (await readFile15(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
10406
+ const stderr = (await readFile15(stderrPath, "utf8")).replace(/\r\n/g, "\n");
10383
10407
  return { stdout, stderr, exitCode };
10384
10408
  } finally {
10385
10409
  await rm6(dir, { recursive: true, force: true });
@@ -11855,11 +11879,11 @@ function createFilesystemTools(workspacePath) {
11855
11879
  execute: async (input) => {
11856
11880
  try {
11857
11881
  const resolved = resolveSandboxed(workspacePath, input.path);
11858
- const stat9 = await fs2.stat(resolved);
11859
- if (stat9.isDirectory()) {
11882
+ const stat10 = await fs2.stat(resolved);
11883
+ if (stat10.isDirectory()) {
11860
11884
  return { error: `'${input.path}' is a directory, not a file` };
11861
11885
  }
11862
- const buffer = Buffer.alloc(Math.min(stat9.size, MAX_FILE_SIZE));
11886
+ const buffer = Buffer.alloc(Math.min(stat10.size, MAX_FILE_SIZE));
11863
11887
  const fd = await fs2.open(resolved, "r");
11864
11888
  try {
11865
11889
  await fd.read(buffer, 0, buffer.length, 0);
@@ -11867,8 +11891,8 @@ function createFilesystemTools(workspacePath) {
11867
11891
  await fd.close();
11868
11892
  }
11869
11893
  const content = buffer.toString("utf-8");
11870
- const truncated = stat9.size > MAX_FILE_SIZE;
11871
- return { content, truncated, size: stat9.size };
11894
+ const truncated = stat10.size > MAX_FILE_SIZE;
11895
+ return { content, truncated, size: stat10.size };
11872
11896
  } catch (error) {
11873
11897
  return { error: error instanceof Error ? error.message : String(error) };
11874
11898
  }
@@ -11919,8 +11943,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
11919
11943
  const ext = path35.extname(entry.name).toLowerCase();
11920
11944
  if (BINARY_EXTENSIONS.has(ext)) continue;
11921
11945
  try {
11922
- const stat9 = await fs2.stat(fullPath);
11923
- if (stat9.size > MAX_FILE_SIZE) continue;
11946
+ const stat10 = await fs2.stat(fullPath);
11947
+ if (stat10.size > MAX_FILE_SIZE) continue;
11924
11948
  const content = await fs2.readFile(fullPath, "utf-8");
11925
11949
  const lines = content.split("\n");
11926
11950
  for (let i = 0; i < lines.length; i++) {
@@ -12561,115 +12585,115 @@ var FieldAccuracyEvaluator = class {
12561
12585
  * Evaluate a single field against the expected value.
12562
12586
  */
12563
12587
  evaluateField(fieldConfig, candidateData, expectedData) {
12564
- const { path: path48, match, required = true, weight = 1 } = fieldConfig;
12565
- const candidateValue = resolvePath(candidateData, path48);
12566
- const expectedValue = resolvePath(expectedData, path48);
12588
+ const { path: path49, match, required = true, weight = 1 } = fieldConfig;
12589
+ const candidateValue = resolvePath(candidateData, path49);
12590
+ const expectedValue = resolvePath(expectedData, path49);
12567
12591
  if (expectedValue === void 0) {
12568
12592
  return {
12569
- path: path48,
12593
+ path: path49,
12570
12594
  score: 1,
12571
12595
  // No expected value means no comparison needed
12572
12596
  weight,
12573
12597
  hit: true,
12574
- message: `${path48}: no expected value`
12598
+ message: `${path49}: no expected value`
12575
12599
  };
12576
12600
  }
12577
12601
  if (candidateValue === void 0) {
12578
12602
  if (required) {
12579
12603
  return {
12580
- path: path48,
12604
+ path: path49,
12581
12605
  score: 0,
12582
12606
  weight,
12583
12607
  hit: false,
12584
- message: `${path48} (required, missing)`
12608
+ message: `${path49} (required, missing)`
12585
12609
  };
12586
12610
  }
12587
12611
  return {
12588
- path: path48,
12612
+ path: path49,
12589
12613
  score: 1,
12590
12614
  // Don't penalize missing optional fields
12591
12615
  weight: 0,
12592
12616
  // Zero weight means it won't affect the score
12593
12617
  hit: true,
12594
- message: `${path48}: optional field missing`
12618
+ message: `${path49}: optional field missing`
12595
12619
  };
12596
12620
  }
12597
12621
  switch (match) {
12598
12622
  case "exact":
12599
- return this.compareExact(path48, candidateValue, expectedValue, weight);
12623
+ return this.compareExact(path49, candidateValue, expectedValue, weight);
12600
12624
  case "numeric_tolerance":
12601
12625
  return this.compareNumericTolerance(
12602
- path48,
12626
+ path49,
12603
12627
  candidateValue,
12604
12628
  expectedValue,
12605
12629
  fieldConfig,
12606
12630
  weight
12607
12631
  );
12608
12632
  case "date":
12609
- return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
12633
+ return this.compareDate(path49, candidateValue, expectedValue, fieldConfig, weight);
12610
12634
  default:
12611
12635
  return {
12612
- path: path48,
12636
+ path: path49,
12613
12637
  score: 0,
12614
12638
  weight,
12615
12639
  hit: false,
12616
- message: `${path48}: unknown match type "${match}"`
12640
+ message: `${path49}: unknown match type "${match}"`
12617
12641
  };
12618
12642
  }
12619
12643
  }
12620
12644
  /**
12621
12645
  * Exact equality comparison.
12622
12646
  */
12623
- compareExact(path48, candidateValue, expectedValue, weight) {
12647
+ compareExact(path49, candidateValue, expectedValue, weight) {
12624
12648
  if (deepEqual(candidateValue, expectedValue)) {
12625
12649
  return {
12626
- path: path48,
12650
+ path: path49,
12627
12651
  score: 1,
12628
12652
  weight,
12629
12653
  hit: true,
12630
- message: path48
12654
+ message: path49
12631
12655
  };
12632
12656
  }
12633
12657
  if (typeof candidateValue !== typeof expectedValue) {
12634
12658
  return {
12635
- path: path48,
12659
+ path: path49,
12636
12660
  score: 0,
12637
12661
  weight,
12638
12662
  hit: false,
12639
- message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
12663
+ message: `${path49} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
12640
12664
  };
12641
12665
  }
12642
12666
  return {
12643
- path: path48,
12667
+ path: path49,
12644
12668
  score: 0,
12645
12669
  weight,
12646
12670
  hit: false,
12647
- message: `${path48} (value mismatch)`
12671
+ message: `${path49} (value mismatch)`
12648
12672
  };
12649
12673
  }
12650
12674
  /**
12651
12675
  * Numeric comparison with absolute or relative tolerance.
12652
12676
  */
12653
- compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
12677
+ compareNumericTolerance(path49, candidateValue, expectedValue, fieldConfig, weight) {
12654
12678
  const { tolerance = 0, relative = false } = fieldConfig;
12655
12679
  const candidateNum = toNumber(candidateValue);
12656
12680
  const expectedNum = toNumber(expectedValue);
12657
12681
  if (candidateNum === null || expectedNum === null) {
12658
12682
  return {
12659
- path: path48,
12683
+ path: path49,
12660
12684
  score: 0,
12661
12685
  weight,
12662
12686
  hit: false,
12663
- message: `${path48} (non-numeric value)`
12687
+ message: `${path49} (non-numeric value)`
12664
12688
  };
12665
12689
  }
12666
12690
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
12667
12691
  return {
12668
- path: path48,
12692
+ path: path49,
12669
12693
  score: 0,
12670
12694
  weight,
12671
12695
  hit: false,
12672
- message: `${path48} (invalid numeric value)`
12696
+ message: `${path49} (invalid numeric value)`
12673
12697
  };
12674
12698
  }
12675
12699
  const diff = Math.abs(candidateNum - expectedNum);
@@ -12682,61 +12706,61 @@ var FieldAccuracyEvaluator = class {
12682
12706
  }
12683
12707
  if (withinTolerance) {
12684
12708
  return {
12685
- path: path48,
12709
+ path: path49,
12686
12710
  score: 1,
12687
12711
  weight,
12688
12712
  hit: true,
12689
- message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
12713
+ message: `${path49} (within tolerance: diff=${diff.toFixed(2)})`
12690
12714
  };
12691
12715
  }
12692
12716
  return {
12693
- path: path48,
12717
+ path: path49,
12694
12718
  score: 0,
12695
12719
  weight,
12696
12720
  hit: false,
12697
- message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
12721
+ message: `${path49} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
12698
12722
  };
12699
12723
  }
12700
12724
  /**
12701
12725
  * Date comparison with format normalization.
12702
12726
  */
12703
- compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
12727
+ compareDate(path49, candidateValue, expectedValue, fieldConfig, weight) {
12704
12728
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
12705
12729
  const candidateDate = parseDate(String(candidateValue), formats);
12706
12730
  const expectedDate = parseDate(String(expectedValue), formats);
12707
12731
  if (candidateDate === null) {
12708
12732
  return {
12709
- path: path48,
12733
+ path: path49,
12710
12734
  score: 0,
12711
12735
  weight,
12712
12736
  hit: false,
12713
- message: `${path48} (unparseable candidate date)`
12737
+ message: `${path49} (unparseable candidate date)`
12714
12738
  };
12715
12739
  }
12716
12740
  if (expectedDate === null) {
12717
12741
  return {
12718
- path: path48,
12742
+ path: path49,
12719
12743
  score: 0,
12720
12744
  weight,
12721
12745
  hit: false,
12722
- message: `${path48} (unparseable expected date)`
12746
+ message: `${path49} (unparseable expected date)`
12723
12747
  };
12724
12748
  }
12725
12749
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
12726
12750
  return {
12727
- path: path48,
12751
+ path: path49,
12728
12752
  score: 1,
12729
12753
  weight,
12730
12754
  hit: true,
12731
- message: path48
12755
+ message: path49
12732
12756
  };
12733
12757
  }
12734
12758
  return {
12735
- path: path48,
12759
+ path: path49,
12736
12760
  score: 0,
12737
12761
  weight,
12738
12762
  hit: false,
12739
- message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
12763
+ message: `${path49} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
12740
12764
  };
12741
12765
  }
12742
12766
  /**
@@ -12769,11 +12793,11 @@ var FieldAccuracyEvaluator = class {
12769
12793
  };
12770
12794
  }
12771
12795
  };
12772
- function resolvePath(obj, path48) {
12773
- if (!path48 || !obj) {
12796
+ function resolvePath(obj, path49) {
12797
+ if (!path49 || !obj) {
12774
12798
  return void 0;
12775
12799
  }
12776
- const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
12800
+ const parts = path49.split(/\.|\[|\]/).filter((p) => p.length > 0);
12777
12801
  let current = obj;
12778
12802
  for (const part of parts) {
12779
12803
  if (current === null || current === void 0) {
@@ -12997,6 +13021,14 @@ var SkillTriggerEvaluator = class {
12997
13021
  evidence = `Read tool loaded skill file via tool name "${toolName}"`;
12998
13022
  break;
12999
13023
  }
13024
+ if (!triggered && toolCall.output != null) {
13025
+ const outputStr = typeof toolCall.output === "string" ? toolCall.output : JSON.stringify(toolCall.output);
13026
+ if (outputStr.includes(`skills/${skillName}/`)) {
13027
+ triggered = true;
13028
+ evidence = `Tool "${toolName}" output referenced skill file for "${skillName}"`;
13029
+ break;
13030
+ }
13031
+ }
13000
13032
  }
13001
13033
  const pass = triggered === shouldTrigger;
13002
13034
  if (pass) {
@@ -13257,8 +13289,8 @@ var TokenUsageEvaluator = class {
13257
13289
  };
13258
13290
 
13259
13291
  // src/evaluation/evaluators/tool-trajectory.ts
13260
- function getNestedValue(obj, path48) {
13261
- const parts = path48.split(".");
13292
+ function getNestedValue(obj, path49) {
13293
+ const parts = path49.split(".");
13262
13294
  let current = obj;
13263
13295
  for (const part of parts) {
13264
13296
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -14793,7 +14825,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
14793
14825
  // src/evaluation/workspace/pool-manager.ts
14794
14826
  import { execFile } from "node:child_process";
14795
14827
  import { createHash } from "node:crypto";
14796
- import { existsSync as existsSync2 } from "node:fs";
14828
+ import { existsSync as existsSync3 } from "node:fs";
14797
14829
  import { cp as cp2, mkdir as mkdir13, readFile as readFile12, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
14798
14830
  import path41 from "node:path";
14799
14831
  import { promisify as promisify5 } from "node:util";
@@ -14899,7 +14931,7 @@ var WorkspacePoolManager = class {
14899
14931
  if (!locked) {
14900
14932
  continue;
14901
14933
  }
14902
- const slotExists = existsSync2(slotPath);
14934
+ const slotExists = existsSync3(slotPath);
14903
14935
  if (slotExists) {
14904
14936
  await this.resetSlot(slotPath, templatePath, repos, poolReset);
14905
14937
  return {
@@ -15005,7 +15037,7 @@ var WorkspacePoolManager = class {
15005
15037
  for (const entry of entries) {
15006
15038
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
15007
15039
  const lockPath = path41.join(poolDir, `${entry}.lock`);
15008
- if (existsSync2(lockPath)) {
15040
+ if (existsSync3(lockPath)) {
15009
15041
  try {
15010
15042
  const pidStr = await readFile12(lockPath, "utf-8");
15011
15043
  const pid = Number.parseInt(pidStr.trim(), 10);
@@ -15036,7 +15068,7 @@ var WorkspacePoolManager = class {
15036
15068
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
15037
15069
  for (const repo of repos) {
15038
15070
  const repoDir = path41.join(slotPath, repo.path);
15039
- if (!existsSync2(repoDir)) {
15071
+ if (!existsSync3(repoDir)) {
15040
15072
  continue;
15041
15073
  }
15042
15074
  if (poolReset === "none") {
@@ -15061,7 +15093,7 @@ var WorkspacePoolManager = class {
15061
15093
 
15062
15094
  // src/evaluation/workspace/repo-manager.ts
15063
15095
  import { execFile as execFile2 } from "node:child_process";
15064
- import { existsSync as existsSync3 } from "node:fs";
15096
+ import { existsSync as existsSync4 } from "node:fs";
15065
15097
  import path42 from "node:path";
15066
15098
  import { promisify as promisify6 } from "node:util";
15067
15099
  var execFileAsync2 = promisify6(execFile2);
@@ -15113,7 +15145,7 @@ var RepoManager = class {
15113
15145
  resolvedSourcePath: sourcePath ?? "",
15114
15146
  reason: "empty_path"
15115
15147
  });
15116
- } else if (!existsSync3(sourcePath)) {
15148
+ } else if (!existsSync4(sourcePath)) {
15117
15149
  errors.push({
15118
15150
  repoPath: repo.path,
15119
15151
  resolvedSourcePath: sourcePath,
@@ -15340,8 +15372,8 @@ async function executeWorkspaceScript(config, context, failureMode = "fatal") {
15340
15372
  }
15341
15373
 
15342
15374
  // src/evaluation/orchestrator.ts
15343
- function classifyQualityStatus(score) {
15344
- return score >= PASS_THRESHOLD ? "ok" : "quality_failure";
15375
+ function classifyQualityStatus(score, threshold = PASS_THRESHOLD) {
15376
+ return score >= threshold ? "ok" : "quality_failure";
15345
15377
  }
15346
15378
  function buildSkippedEvaluatorError(scores) {
15347
15379
  const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
@@ -15417,7 +15449,8 @@ async function runEvaluation(options) {
15417
15449
  retainOnSuccess,
15418
15450
  retainOnFailure,
15419
15451
  graderTarget: cliGraderTarget,
15420
- model: cliModel
15452
+ model: cliModel,
15453
+ threshold: scoreThreshold
15421
15454
  } = options;
15422
15455
  let useCache = options.useCache;
15423
15456
  if (trials && trials.count > 1 && useCache) {
@@ -15546,7 +15579,8 @@ async function runEvaluation(options) {
15546
15579
  resolveGraderProvider,
15547
15580
  agentTimeoutMs,
15548
15581
  targetResolver,
15549
- availableTargets
15582
+ availableTargets,
15583
+ threshold: scoreThreshold
15550
15584
  });
15551
15585
  } catch (error) {
15552
15586
  if (verbose) {
@@ -15915,7 +15949,8 @@ async function runEvaluation(options) {
15915
15949
  typeRegistry,
15916
15950
  repoManager,
15917
15951
  evalDir,
15918
- verbose
15952
+ verbose,
15953
+ threshold: scoreThreshold
15919
15954
  };
15920
15955
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
15921
15956
  if (totalBudgetUsd !== void 0) {
@@ -16073,7 +16108,8 @@ async function runBatchEvaluation(options) {
16073
16108
  resolveGraderProvider,
16074
16109
  agentTimeoutMs,
16075
16110
  targetResolver,
16076
- availableTargets
16111
+ availableTargets,
16112
+ threshold: batchThreshold
16077
16113
  } = options;
16078
16114
  const promptInputsList = [];
16079
16115
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -16158,7 +16194,8 @@ async function runBatchEvaluation(options) {
16158
16194
  endTime,
16159
16195
  targetResolver,
16160
16196
  availableTargets,
16161
- verbose
16197
+ verbose,
16198
+ threshold: batchThreshold
16162
16199
  });
16163
16200
  if (providerError) {
16164
16201
  result = {
@@ -16244,7 +16281,8 @@ async function runEvalCase(options) {
16244
16281
  typeRegistry: providedTypeRegistry,
16245
16282
  repoManager,
16246
16283
  evalDir,
16247
- verbose
16284
+ verbose,
16285
+ threshold: caseThreshold
16248
16286
  } = options;
16249
16287
  const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
16250
16288
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -16607,7 +16645,8 @@ async function runEvalCase(options) {
16607
16645
  availableTargets,
16608
16646
  fileChanges,
16609
16647
  workspacePath,
16610
- verbose
16648
+ verbose,
16649
+ threshold: caseThreshold
16611
16650
  });
16612
16651
  const totalDurationMs = Date.now() - caseStartMs;
16613
16652
  const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
@@ -16622,7 +16661,7 @@ async function runEvalCase(options) {
16622
16661
  ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
16623
16662
  };
16624
16663
  const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
16625
- const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score);
16664
+ const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
16626
16665
  const finalResult = providerError ? {
16627
16666
  ...result,
16628
16667
  evalRun,
@@ -16793,7 +16832,8 @@ async function evaluateCandidate(options) {
16793
16832
  targetResolver,
16794
16833
  availableTargets,
16795
16834
  fileChanges,
16796
- workspacePath
16835
+ workspacePath,
16836
+ threshold: evalThreshold
16797
16837
  } = options;
16798
16838
  const gradeTimestamp = nowFn();
16799
16839
  const { score, scores } = await runEvaluatorsForCase({
@@ -16866,7 +16906,7 @@ async function evaluateCandidate(options) {
16866
16906
  scores,
16867
16907
  trace,
16868
16908
  fileChanges,
16869
- executionStatus: classifyQualityStatus(score.score)
16909
+ executionStatus: classifyQualityStatus(score.score, evalThreshold)
16870
16910
  };
16871
16911
  }
16872
16912
  async function runEvaluatorsForCase(options) {
@@ -17331,7 +17371,7 @@ function computeWeightedMean(entries) {
17331
17371
  }
17332
17372
 
17333
17373
  // src/evaluation/evaluate.ts
17334
- import { existsSync as existsSync4 } from "node:fs";
17374
+ import { existsSync as existsSync5 } from "node:fs";
17335
17375
  import path45 from "node:path";
17336
17376
 
17337
17377
  // src/evaluation/providers/function-provider.ts
@@ -17487,7 +17527,7 @@ async function discoverDefaultTarget(repoRoot) {
17487
17527
  for (const dir of chain) {
17488
17528
  for (const candidate of TARGET_FILE_CANDIDATES) {
17489
17529
  const targetsPath = path45.join(dir, candidate);
17490
- if (!existsSync4(targetsPath)) continue;
17530
+ if (!existsSync5(targetsPath)) continue;
17491
17531
  try {
17492
17532
  const definitions = await readTargetDefinitions(targetsPath);
17493
17533
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -17504,7 +17544,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
17504
17544
  const envFiles = [];
17505
17545
  for (const dir of chain) {
17506
17546
  const envPath = path45.join(dir, ".env");
17507
- if (existsSync4(envPath)) envFiles.push(envPath);
17547
+ if (existsSync5(envPath)) envFiles.push(envPath);
17508
17548
  }
17509
17549
  for (let i = 0; i < envFiles.length; i++) {
17510
17550
  try {
@@ -17580,12 +17620,12 @@ var CONFIG_FILE_NAMES = [
17580
17620
  ".agentv/config.js"
17581
17621
  ];
17582
17622
  async function loadTsConfig(projectRoot) {
17583
- const { existsSync: existsSync6 } = await import("node:fs");
17623
+ const { existsSync: existsSync7 } = await import("node:fs");
17584
17624
  const { pathToFileURL } = await import("node:url");
17585
17625
  const { join: join2 } = await import("node:path");
17586
17626
  for (const fileName of CONFIG_FILE_NAMES) {
17587
17627
  const filePath = join2(projectRoot, fileName);
17588
- if (!existsSync6(filePath)) {
17628
+ if (!existsSync7(filePath)) {
17589
17629
  continue;
17590
17630
  }
17591
17631
  try {
@@ -17723,7 +17763,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
17723
17763
  }
17724
17764
 
17725
17765
  // src/projects.ts
17726
- import { existsSync as existsSync5, mkdirSync, readFileSync as readFileSync2, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
17766
+ import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync2, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
17727
17767
  import path47 from "node:path";
17728
17768
  import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
17729
17769
  function getProjectsRegistryPath() {
@@ -17731,7 +17771,7 @@ function getProjectsRegistryPath() {
17731
17771
  }
17732
17772
  function loadProjectRegistry() {
17733
17773
  const registryPath = getProjectsRegistryPath();
17734
- if (!existsSync5(registryPath)) {
17774
+ if (!existsSync6(registryPath)) {
17735
17775
  return { projects: [] };
17736
17776
  }
17737
17777
  try {
@@ -17748,7 +17788,7 @@ function loadProjectRegistry() {
17748
17788
  function saveProjectRegistry(registry) {
17749
17789
  const registryPath = getProjectsRegistryPath();
17750
17790
  const dir = path47.dirname(registryPath);
17751
- if (!existsSync5(dir)) {
17791
+ if (!existsSync6(dir)) {
17752
17792
  mkdirSync(dir, { recursive: true });
17753
17793
  }
17754
17794
  writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
@@ -17765,10 +17805,10 @@ function deriveProjectId(dirPath, existingIds) {
17765
17805
  }
17766
17806
  function addProject(projectPath) {
17767
17807
  const absPath = path47.resolve(projectPath);
17768
- if (!existsSync5(absPath)) {
17808
+ if (!existsSync6(absPath)) {
17769
17809
  throw new Error(`Directory not found: ${absPath}`);
17770
17810
  }
17771
- if (!existsSync5(path47.join(absPath, ".agentv"))) {
17811
+ if (!existsSync6(path47.join(absPath, ".agentv"))) {
17772
17812
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
17773
17813
  }
17774
17814
  const registry = loadProjectRegistry();
@@ -17812,13 +17852,13 @@ function touchProject(projectId) {
17812
17852
  }
17813
17853
  function discoverProjects(rootDir, maxDepth = 2) {
17814
17854
  const absRoot = path47.resolve(rootDir);
17815
- if (!existsSync5(absRoot) || !statSync2(absRoot).isDirectory()) {
17855
+ if (!existsSync6(absRoot) || !statSync2(absRoot).isDirectory()) {
17816
17856
  return [];
17817
17857
  }
17818
17858
  const results = [];
17819
17859
  function scan(dir, depth) {
17820
17860
  if (depth > maxDepth) return;
17821
- if (existsSync5(path47.join(dir, ".agentv"))) {
17861
+ if (existsSync6(path47.join(dir, ".agentv"))) {
17822
17862
  results.push(dir);
17823
17863
  return;
17824
17864
  }
@@ -18391,6 +18431,248 @@ function toHrTime(iso) {
18391
18431
  return new Date(iso).getTime();
18392
18432
  }
18393
18433
 
18434
+ // src/import/claude-parser.ts
18435
+ var SKIPPED_TYPES = /* @__PURE__ */ new Set(["progress", "system", "file-history-snapshot"]);
18436
+ function parseClaudeSession(jsonl) {
18437
+ const messages = [];
18438
+ let sessionId = "";
18439
+ let projectPath;
18440
+ let model;
18441
+ let startTimestamp;
18442
+ let endTimestamp;
18443
+ const usageByRequestId = /* @__PURE__ */ new Map();
18444
+ let lastAssistantRequestId;
18445
+ let lastAssistantIdx = -1;
18446
+ const pendingToolCalls = /* @__PURE__ */ new Map();
18447
+ const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
18448
+ for (const line of lines) {
18449
+ let event;
18450
+ try {
18451
+ event = JSON.parse(line);
18452
+ } catch {
18453
+ continue;
18454
+ }
18455
+ if (!event.type) continue;
18456
+ if (event.timestamp) {
18457
+ if (!startTimestamp) startTimestamp = event.timestamp;
18458
+ endTimestamp = event.timestamp;
18459
+ }
18460
+ if (SKIPPED_TYPES.has(event.type)) continue;
18461
+ if (event.isSidechain) continue;
18462
+ if (!sessionId && event.sessionId) {
18463
+ sessionId = event.sessionId;
18464
+ }
18465
+ if (!projectPath && event.cwd) {
18466
+ projectPath = event.cwd;
18467
+ }
18468
+ switch (event.type) {
18469
+ case "user": {
18470
+ const msg = event.message;
18471
+ if (!msg) break;
18472
+ const contentArr = msg.content;
18473
+ if (Array.isArray(contentArr)) {
18474
+ for (const block of contentArr) {
18475
+ if (block.type === "tool_result" && block.tool_use_id) {
18476
+ const pending = pendingToolCalls.get(block.tool_use_id);
18477
+ if (pending) {
18478
+ const existingMsg = messages[pending.msgIdx];
18479
+ const existingCalls = [...existingMsg.toolCalls ?? []];
18480
+ existingCalls[pending.toolIdx] = {
18481
+ ...existingCalls[pending.toolIdx],
18482
+ output: extractToolResultContent(block.content)
18483
+ };
18484
+ messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
18485
+ pendingToolCalls.delete(block.tool_use_id);
18486
+ }
18487
+ }
18488
+ }
18489
+ }
18490
+ const text = extractTextContent2(contentArr);
18491
+ if (text !== void 0) {
18492
+ messages.push({ role: "user", content: text });
18493
+ }
18494
+ break;
18495
+ }
18496
+ case "assistant": {
18497
+ const msg = event.message;
18498
+ if (!msg) break;
18499
+ if (!model && msg.model) {
18500
+ model = msg.model;
18501
+ }
18502
+ if (msg.usage && event.requestId) {
18503
+ usageByRequestId.set(event.requestId, msg.usage);
18504
+ }
18505
+ const { text, toolCalls } = extractAssistantContent(msg.content);
18506
+ if (event.requestId && event.requestId === lastAssistantRequestId && lastAssistantIdx >= 0) {
18507
+ messages[lastAssistantIdx] = {
18508
+ role: "assistant",
18509
+ content: text || void 0,
18510
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0
18511
+ };
18512
+ registerPendingToolCalls(toolCalls, lastAssistantIdx, pendingToolCalls);
18513
+ } else {
18514
+ if (text || toolCalls.length > 0) {
18515
+ lastAssistantIdx = messages.length;
18516
+ messages.push({
18517
+ role: "assistant",
18518
+ content: text || void 0,
18519
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0
18520
+ });
18521
+ registerPendingToolCalls(toolCalls, lastAssistantIdx, pendingToolCalls);
18522
+ }
18523
+ }
18524
+ lastAssistantRequestId = event.requestId;
18525
+ break;
18526
+ }
18527
+ }
18528
+ }
18529
+ let totalInputTokens = 0;
18530
+ let totalOutputTokens = 0;
18531
+ for (const usage of usageByRequestId.values()) {
18532
+ totalInputTokens += Number(usage.input_tokens ?? 0);
18533
+ totalOutputTokens += Number(usage.output_tokens ?? 0);
18534
+ }
18535
+ const hasUsage = usageByRequestId.size > 0;
18536
+ let durationMs;
18537
+ if (startTimestamp && endTimestamp) {
18538
+ durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
18539
+ }
18540
+ const source = {
18541
+ provider: "claude",
18542
+ sessionId,
18543
+ projectPath,
18544
+ startedAt: startTimestamp,
18545
+ model
18546
+ };
18547
+ return {
18548
+ messages,
18549
+ source,
18550
+ tokenUsage: hasUsage ? { input: totalInputTokens, output: totalOutputTokens } : void 0,
18551
+ durationMs,
18552
+ costUsd: null
18553
+ };
18554
+ }
18555
+ function registerPendingToolCalls(toolCalls, msgIdx, pending) {
18556
+ for (let i = 0; i < toolCalls.length; i++) {
18557
+ const id = toolCalls[i].id;
18558
+ if (id) {
18559
+ pending.set(id, { msgIdx, toolIdx: i });
18560
+ }
18561
+ }
18562
+ }
18563
+ function extractTextContent2(content) {
18564
+ if (content === void 0 || content === null) return void 0;
18565
+ if (typeof content === "string") return content;
18566
+ const textParts = [];
18567
+ for (const block of content) {
18568
+ if (block.type === "text" && block.text) {
18569
+ textParts.push(block.text);
18570
+ }
18571
+ }
18572
+ return textParts.length > 0 ? textParts.join("") : void 0;
18573
+ }
18574
+ function extractAssistantContent(content) {
18575
+ if (content === void 0 || content === null) {
18576
+ return { text: void 0, toolCalls: [] };
18577
+ }
18578
+ if (typeof content === "string") {
18579
+ return { text: content, toolCalls: [] };
18580
+ }
18581
+ const textParts = [];
18582
+ const toolCalls = [];
18583
+ for (const block of content) {
18584
+ switch (block.type) {
18585
+ case "text":
18586
+ if (block.text) textParts.push(block.text);
18587
+ break;
18588
+ case "tool_use":
18589
+ if (block.name) {
18590
+ toolCalls.push({
18591
+ tool: block.name,
18592
+ input: block.input,
18593
+ id: block.id
18594
+ });
18595
+ }
18596
+ break;
18597
+ }
18598
+ }
18599
+ return {
18600
+ text: textParts.length > 0 ? textParts.join("") : void 0,
18601
+ toolCalls
18602
+ };
18603
+ }
18604
+ function extractToolResultContent(content) {
18605
+ if (content === void 0 || content === null) return void 0;
18606
+ if (typeof content === "string") return content;
18607
+ const parts = [];
18608
+ for (const block of content) {
18609
+ if (block.type === "text" && block.text) {
18610
+ parts.push(block.text);
18611
+ }
18612
+ }
18613
+ return parts.length > 0 ? parts.join("") : void 0;
18614
+ }
18615
+
18616
+ // src/import/session-discovery.ts
18617
+ import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
18618
+ import { homedir as homedir3 } from "node:os";
18619
+ import path48 from "node:path";
18620
+ var DEFAULT_PROJECTS_DIR = () => path48.join(homedir3(), ".claude", "projects");
18621
+ function encodeProjectPath(projectPath) {
18622
+ return projectPath.replace(/\//g, "-");
18623
+ }
18624
+ async function discoverClaudeSessions(opts) {
18625
+ const projectsDir = opts?.projectsDir ?? DEFAULT_PROJECTS_DIR();
18626
+ const limit = opts?.latest ? 1 : opts?.limit ?? 10;
18627
+ let projectDirs;
18628
+ try {
18629
+ projectDirs = await readdir8(projectsDir);
18630
+ } catch {
18631
+ return [];
18632
+ }
18633
+ if (opts?.projectPath) {
18634
+ const encoded = encodeProjectPath(opts.projectPath);
18635
+ projectDirs = projectDirs.filter((dir) => dir === encoded || dir.includes(encoded));
18636
+ }
18637
+ const sessions = [];
18638
+ for (const projectDir of projectDirs) {
18639
+ const dirPath = path48.join(projectsDir, projectDir);
18640
+ let entries;
18641
+ try {
18642
+ entries = await readdir8(dirPath);
18643
+ } catch {
18644
+ continue;
18645
+ }
18646
+ for (const entry of entries) {
18647
+ if (!entry.endsWith(".jsonl")) continue;
18648
+ const sessionId = entry.replace(/\.jsonl$/, "");
18649
+ if (opts?.sessionId && sessionId !== opts.sessionId) continue;
18650
+ const filePath = path48.join(dirPath, entry);
18651
+ let updatedAt;
18652
+ try {
18653
+ const fileStat = await stat9(filePath);
18654
+ updatedAt = fileStat.mtime;
18655
+ } catch {
18656
+ updatedAt = /* @__PURE__ */ new Date(0);
18657
+ }
18658
+ sessions.push({
18659
+ sessionId,
18660
+ filePath,
18661
+ projectDir,
18662
+ updatedAt
18663
+ });
18664
+ }
18665
+ }
18666
+ sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
18667
+ return sessions.slice(0, limit);
18668
+ }
18669
+
18670
+ // src/import/types.ts
18671
+ import { readFile as readFile14 } from "node:fs/promises";
18672
+ async function readTranscriptFile(filePath) {
18673
+ return readFile14(filePath, "utf8");
18674
+ }
18675
+
18394
18676
  // src/index.ts
18395
18677
  function createAgentKernel() {
18396
18678
  return { status: "stub" };
@@ -18460,6 +18742,7 @@ export {
18460
18742
  deriveProjectId,
18461
18743
  detectFormat,
18462
18744
  discoverAssertions,
18745
+ discoverClaudeSessions,
18463
18746
  discoverCopilotSessions,
18464
18747
  discoverGraders,
18465
18748
  discoverGraders as discoverJudges,
@@ -18519,12 +18802,14 @@ export {
18519
18802
  negateScore,
18520
18803
  normalizeLineEndings,
18521
18804
  parseAgentSkillsEvals,
18805
+ parseClaudeSession,
18522
18806
  parseJsonFromText,
18523
18807
  parseJsonSafe,
18524
18808
  readJsonFile,
18525
18809
  readTargetDefinitions,
18526
18810
  readTestSuiteMetadata,
18527
18811
  readTextFile,
18812
+ readTranscriptFile,
18528
18813
  removeProject,
18529
18814
  resolveAndCreateProvider,
18530
18815
  resolveFileReference,