npm - @tangle-network/agent-eval - Versions diffs - 0.61.0 → 0.62.0 - Mend

@tangle-network/agent-eval 0.61.0 → 0.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/CHANGELOG.md +14 -8
package/dist/adapters/http.d.ts +4 -1
package/dist/adapters/langchain.d.ts +4 -1
package/dist/adapters/otel.d.ts +4 -4
package/dist/{agent-profile-9J9hxdm2.d.ts → agent-profile-DzcPHR1Z.d.ts} +1 -1
package/dist/benchmarks/index.d.ts +2 -2
package/dist/campaign/index.d.ts +8 -8
package/dist/campaign/index.js +4 -3
package/dist/campaign/index.js.map +1 -1
package/dist/{chunk-OLULBECP.js → chunk-7TPYV2ER.js} +27 -1
package/dist/chunk-7TPYV2ER.js.map +1 -0
package/dist/{chunk-SUGME4OT.js → chunk-CV2BS2OV.js} +7 -5
package/dist/chunk-CV2BS2OV.js.map +1 -0
package/dist/chunk-E22YUOAL.js +111 -0
package/dist/chunk-E22YUOAL.js.map +1 -0
package/dist/{chunk-GMXHLSLL.js → chunk-SS2SOBBT.js} +1 -106
package/dist/chunk-SS2SOBBT.js.map +1 -0
package/dist/contract/index.d.ts +9 -9
package/dist/contract/index.js +4 -3
package/dist/contract/index.js.map +1 -1
package/dist/{control-Bf8owbuG.d.ts → control-DxvZeV5X.d.ts} +1 -1
package/dist/control.d.ts +2 -2
package/dist/hosted/index.d.ts +4 -4
package/dist/{index-Bvk35ils.d.ts → index-DsnOpCO6.d.ts} +1 -1
package/dist/{index-D9dwa00f.d.ts → index-DxfmYUjC.d.ts} +2 -2
package/dist/index.d.ts +98 -14
package/dist/index.js +324 -45
package/dist/index.js.map +1 -1
package/dist/meta-eval/index.d.ts +2 -2
package/dist/multishot/index.js.map +1 -1
package/dist/openapi.json +1 -1
package/dist/{provenance-D0WeCXt1.d.ts → provenance-CYBV9Ox6.d.ts} +14 -3
package/dist/{registry-qmbYT3Eo.d.ts → registry-DPly4_hZ.d.ts} +1 -1
package/dist/{release-report-DszkgvJ3.d.ts → release-report-DGoeObZT.d.ts} +2 -2
package/dist/reporting.d.ts +4 -4
package/dist/{researcher-BaVsy0sW.d.ts → researcher-WJvIpX3L.d.ts} +2 -2
package/dist/rl.d.ts +6 -6
package/dist/{rubric-predictive-validity-DgBHWsh7.d.ts → rubric-predictive-validity-D_4BSXGV.d.ts} +1 -1
package/dist/{run-campaign-HXPJAUZ3.js → run-campaign-5J3ED2UJ.js} +3 -2
package/dist/{run-record-DgUVo5pw.d.ts → run-record-BgTFzO2r.d.ts} +1 -1
package/dist/{summary-report-BQvXpvaR.d.ts → summary-report-ByiOUrHj.d.ts} +1 -1
package/dist/{types-Beb6KPqZ.d.ts → types-DH22o8hM.d.ts} +15 -11
package/package.json +1 -1
package/dist/chunk-GMXHLSLL.js.map +0 -1
package/dist/chunk-OLULBECP.js.map +0 -1
package/dist/chunk-SUGME4OT.js.map +0 -1
/package/dist/{run-campaign-HXPJAUZ3.js.map → run-campaign-5J3ED2UJ.js.map} +0 -0

package/dist/index.js CHANGED Viewed

@@ -9,12 +9,10 @@ import {
   runBehavioralCanaries
 } from "./chunk-SHTXZ4O2.js";
 import {
-  BackendIntegrityError,
   DEFAULT_MUTATION_PRIMITIVES,
   DEFAULT_RED_TEAM_CORPUS,
   Dataset,
   HoldoutLockedError,
-  assertRealBackend,
   buildReflectionPrompt,
   hashScenarios,
   parseReflectionResponse,
@@ -22,9 +20,13 @@ import {
   redTeamReport,
   runCanaries,
   scoreRedTeamOutput,
-  summarizeBackendIntegrity,
   toolNamesForRun
-} from "./chunk-GMXHLSLL.js";
+} from "./chunk-SS2SOBBT.js";
+import {
+  BackendIntegrityError,
+  assertRealBackend,
+  summarizeBackendIntegrity
+} from "./chunk-E22YUOAL.js";
 import {
   BENCHMARK_SPLIT_SEED,
   benchmarks_exports,
@@ -723,8 +725,8 @@ function createVerifierAdapter(opts) {
       const report = await opts.verifier.run({ env, ...opts.options });
       const out = [];
       for (const layer of report.layers) {
-        for (const finding of layer.findings) {
-          out.push(liftLayerFinding(id, area, layer.layer, finding));
+        for (const finding2 of layer.findings) {
+          out.push(liftLayerFinding(id, area, layer.layer, finding2));
         }
         if (layer.status === "fail" || layer.status === "error" || layer.status === "timeout") {
           out.push(
@@ -1754,6 +1756,279 @@ var DEFAULT_TRACE_ANALYST_KINDS = [
   IMPROVEMENT_KIND_SPEC
 ];
+// src/analyst/kinds/skill-usage.ts
+import { existsSync as existsSync3, readdirSync, readFileSync as readFileSync2, statSync } from "fs";
+import { join } from "path";
+var BLOAT_LINE_THRESHOLD = 300;
+var TANGLE_PRIVATE_RE = /\b(cli-bridge|tangletools|ops-board|drew-gtr-pro|@tangle-network\/|~\/company|tangle\.tools|gtm-agent)\b|\bkimi\b|\btcloud\b/gi;
+var TRIGGER_RE = /triggers?\s*[:\-]/i;
+function listSkillDirs(root) {
+  if (!existsSync3(root)) return [];
+  const out = [];
+  for (const entry of readdirSync(root, { withFileTypes: true })) {
+    if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
+    const skillMd = join(root, entry.name, "SKILL.md");
+    if (existsSync3(skillMd)) out.push({ name: entry.name, path: skillMd });
+  }
+  return out;
+}
+function walkJsonl(dir, cap) {
+  if (!existsSync3(dir)) return [];
+  const files = [];
+  const stack = [dir];
+  while (stack.length) {
+    const cur = stack.pop();
+    let entries;
+    try {
+      entries = readdirSync(cur, { withFileTypes: true });
+    } catch {
+      continue;
+    }
+    for (const e of entries) {
+      const full = join(cur, e.name);
+      if (e.isDirectory()) stack.push(full);
+      else if (e.name.endsWith(".jsonl")) {
+        files.push(full);
+        if (cap > 0 && files.length >= cap) return files;
+      }
+    }
+  }
+  return files;
+}
+function frontmatterDescription(body) {
+  const fm = /^---\n([\s\S]*?)\n---/.exec(body);
+  const block = fm?.[1] ?? "";
+  const m = /description:\s*(.+)/i.exec(block);
+  return m?.[1] ?? "";
+}
+function countArtifacts(roots, name, aliases) {
+  let n = 0;
+  for (const root of roots) {
+    const candidates = [join(root, ".evolve", name), ...aliases.map((a) => join(root, a))];
+    for (const dir of candidates) {
+      if (!existsSync3(dir)) continue;
+      try {
+        if (statSync(dir).isDirectory()) n += readdirSync(dir).length;
+        else n += 1;
+      } catch {
+      }
+    }
+  }
+  return n;
+}
+function buildSkillUsageReport(config) {
+  const skills = config.skillRoots.flatMap(
+    ({ root, kind }) => listSkillDirs(root).map((s) => ({ ...s, kind }))
+  );
+  const names = skills.map((s) => s.name);
+  const direct = new Map(names.map((n) => [n, 0]));
+  const slash = new Map(names.map((n) => [n, 0]));
+  const skillRe = /"skill"\s*:\s*"([a-z0-9_:-]+)"/g;
+  const cmdRe = /<command-name>\/?([a-z0-9_:-]+)<\/command-name>/g;
+  let transcripts = 0;
+  for (const dir of config.transcriptDirs) {
+    for (const file of walkJsonl(dir, config.maxTranscriptsPerDir ?? 0)) {
+      transcripts += 1;
+      let data;
+      try {
+        data = readFileSync2(file, "utf8");
+      } catch {
+        continue;
+      }
+      for (const m of data.matchAll(skillRe)) {
+        const g = m[1];
+        if (!g) continue;
+        const n = g.split(":").pop() ?? g;
+        const prev = direct.get(n);
+        if (prev !== void 0) direct.set(n, prev + 1);
+      }
+      for (const m of data.matchAll(cmdRe)) {
+        const g = m[1];
+        if (g === void 0) continue;
+        const prev = slash.get(g);
+        if (prev !== void 0) slash.set(g, prev + 1);
+      }
+    }
+  }
+  const bodies = /* @__PURE__ */ new Map();
+  for (const s of skills) {
+    try {
+      bodies.set(s.name, readFileSync2(s.path, "utf8"));
+    } catch {
+      bodies.set(s.name, "");
+    }
+  }
+  const inbound = new Map(names.map((n) => [n, 0]));
+  for (const target of names) {
+    const ref = new RegExp(`/${target}\\b|\\[\\[${target}\\]\\]`);
+    for (const s of skills) {
+      if (s.name === target) continue;
+      if (ref.test(bodies.get(s.name) ?? "")) inbound.set(target, inbound.get(target) + 1);
+    }
+  }
+  const records = skills.map((s) => {
+    const body = bodies.get(s.name) ?? "";
+    const dir = s.path.replace(/\/SKILL\.md$/, "");
+    return {
+      name: s.name,
+      kind: s.kind,
+      path: s.path,
+      lines: body ? body.split("\n").length : 0,
+      directInvocations: direct.get(s.name) ?? 0,
+      slashInvocations: slash.get(s.name) ?? 0,
+      inboundRefs: inbound.get(s.name) ?? 0,
+      artifactCount: countArtifacts(
+        config.artifactRoots ?? [],
+        s.name,
+        config.artifactAliases?.[s.name] ?? []
+      ),
+      tanglePrivateRefs: (body.match(TANGLE_PRIVATE_RE) ?? []).length,
+      hasReferencesDir: existsSync3(join(dir, "references")),
+      hasEvalsDir: existsSync3(join(dir, "evals")),
+      logsRuns: body.includes("skill-runs.jsonl"),
+      hasTriggerPhrases: TRIGGER_RE.test(frontmatterDescription(body) || body.slice(0, 600))
+    };
+  });
+  return { generatedFromTraces: transcripts, records };
+}
+var ANALYST_ID = "skill-usage";
+function finding(area, subject, claim, severity, confidence, producedAt, recommended, evidenceUri, rationale) {
+  return {
+    schema_version: "1.0.0",
+    finding_id: computeFindingId({ analyst_id: ANALYST_ID, area, subject, claim }),
+    analyst_id: ANALYST_ID,
+    produced_at: producedAt,
+    severity,
+    area,
+    claim,
+    rationale,
+    evidence_refs: [{ kind: "artifact", uri: evidenceUri }],
+    recommended_action: recommended,
+    confidence,
+    subject
+  };
+}
+function emitSkillUsageFindings(report, producedAt) {
+  const out = [];
+  for (const r of report.records) {
+    const directTotal = r.directInvocations + r.slashInvocations;
+    const trueUsage = directTotal + r.inboundRefs + r.artifactCount;
+    if (trueUsage === 0) {
+      out.push(
+        finding(
+          "skill-usage",
+          r.name,
+          `Skill '${r.name}' has zero usage across all signals (direct, slash, inbound-refs, artifacts)`,
+          "high",
+          0.6,
+          producedAt,
+          "Confirm the skill covers a real recurring job; if not, deprecate. Zero true usage is the only deterministic deprecation candidate.",
+          r.path,
+          "No Skill-tool call, no slash invocation, no sibling dispatches to it, and no on-disk artifacts."
+        )
+      );
+    } else if (directTotal === 0 && r.inboundRefs + r.artifactCount > 0) {
+      out.push(
+        finding(
+          "skill-usage",
+          r.name,
+          `Skill '${r.name}' shows 0 direct invocations but is used via orchestration/artifacts (inbound=${r.inboundRefs}, artifacts=${r.artifactCount})`,
+          "info",
+          0.8,
+          producedAt,
+          "Do NOT treat as unused \u2014 usage is real but logged under parent skills or on disk. Strengthen direct-invocation discovery only if direct use is desired.",
+          r.path,
+          "The Skill-tool counter undercounts orchestrated/chained leaf skills."
+        )
+      );
+    }
+    if (directTotal <= 2 && !r.hasTriggerPhrases) {
+      out.push(
+        finding(
+          "discoverability",
+          r.name,
+          `Skill '${r.name}' is rarely invoked directly and its description has no explicit trigger phrases`,
+          "medium",
+          0.7,
+          producedAt,
+          "Add a `Triggers:` clause with verbatim user phrases to the frontmatter description so the model auto-invokes it.",
+          r.path
+        )
+      );
+    }
+    if (r.kind === "public" && r.tanglePrivateRefs > 0) {
+      out.push(
+        finding(
+          "safety",
+          r.name,
+          `Public skill '${r.name}' carries ${r.tanglePrivateRefs} Tangle-private reference(s)`,
+          "high",
+          0.75,
+          producedAt,
+          "Sanitize incidental internal refs (cli-bridge/kimi/tcloud/~company/private repos) or relocate to a private repo. Verify @tangle-network/* refs are to PUBLISHED packages before treating as a leak.",
+          r.path
+        )
+      );
+    }
+    if (r.lines > BLOAT_LINE_THRESHOLD && !r.hasReferencesDir) {
+      out.push(
+        finding(
+          "maintainability",
+          r.name,
+          `Skill '${r.name}' is ${r.lines} lines with no references/ split (progressive disclosure)`,
+          "medium",
+          0.8,
+          producedAt,
+          `Split detail into references/ loaded on demand; keep SKILL.md a short overview. ${r.lines} lines load into every session's context budget.`,
+          r.path
+        )
+      );
+    }
+    if (!r.hasEvalsDir) {
+      out.push(
+        finding(
+          "data-quality",
+          r.name,
+          `Skill '${r.name}' ships no evals/`,
+          "low",
+          0.6,
+          producedAt,
+          "Add evals/evals.json with >=3 scenarios proving the skill beats baseline; gives regression coverage.",
+          r.path
+        )
+      );
+    }
+    if (!r.logsRuns) {
+      out.push(
+        finding(
+          "observability",
+          r.name,
+          `Skill '${r.name}' never appends to .evolve/skill-runs.jsonl`,
+          "low",
+          0.55,
+          producedAt,
+          "Append one run line to .evolve/skill-runs.jsonl on completion, or declare it a non-logging leaf, so the self-improvement loop can see it ran.",
+          r.path
+        )
+      );
+    }
+  }
+  return out;
+}
+var SkillUsageAnalyst = class {
+  id = ANALYST_ID;
+  description = "Deterministic multi-signal skill-usage analysis: flags dead skills, measurement-invisible (orchestrated) usage, discovery gaps, public-repo leaks, bloat, missing evals, and missing run-logging.";
+  inputKind = "custom";
+  cost = { kind: "deterministic", est_usd_per_run: 0 };
+  version = "1.0.0";
+  async analyze(input, ctx) {
+    const producedAt = ctx.tags?.producedAt ?? (/* @__PURE__ */ new Date()).toISOString();
+    ctx.log?.(`skill-usage: ${input.records.length} skills over ${input.generatedFromTraces} transcripts`);
+    return emitSkillUsageFindings(input, producedAt);
+  }
+};
+var SKILL_USAGE_ANALYST = new SkillUsageAnalyst();
 // src/analyst/registry.ts
 import { randomUUID } from "crypto";
 var AnalystRegistry = class {
@@ -2188,12 +2463,12 @@ function ghCliClient(opts = {}) {
       await exec("git", ["branch", "-D", input.branchName], { cwd });
       await run("git", ["checkout", "-b", input.branchName]);
       const { mkdir, writeFile } = await import("fs/promises");
-      const { dirname: dirname4, join: join4, resolve } = await import("path");
+      const { dirname: dirname4, join: join5, resolve } = await import("path");
       for (const change of input.fileChanges) {
         const abs = resolve(cwd, change.path);
         await mkdir(dirname4(abs), { recursive: true });
         await writeFile(abs, change.contents, "utf8");
-        await run("git", ["add", join4(change.path)]);
+        await run("git", ["add", join5(change.path)]);
       }
       const env = {};
       if (input.authorName) env.GIT_AUTHOR_NAME = input.authorName;
@@ -3224,10 +3499,10 @@ var FileSystemFeedbackTrajectoryStore = class {
   }
   async append(record) {
     const { appendFile, mkdir } = await import("fs/promises");
-    const { join: join4 } = await import("path");
+    const { join: join5 } = await import("path");
     await mkdir(this.dir, { recursive: true });
     await appendFile(
-      join4(this.dir, "feedback-trajectories.ndjson"),
+      join5(this.dir, "feedback-trajectories.ndjson"),
       `${JSON.stringify(record)}
 `,
       "utf8"
@@ -3236,8 +3511,8 @@ var FileSystemFeedbackTrajectoryStore = class {
   async load() {
     if (this.loaded) return;
     const { readFile } = await import("fs/promises");
-    const { join: join4 } = await import("path");
-    const file = join4(this.dir, "feedback-trajectories.ndjson");
+    const { join: join5 } = await import("path");
+    const file = join5(this.dir, "feedback-trajectories.ndjson");
     try {
       const raw = await readFile(file, "utf8");
       for (const line of raw.split("\n")) {
@@ -6048,8 +6323,8 @@ function assertNonNegative(n, name) {
 }
 // src/muffled-gate-scanner.ts
-import { existsSync as existsSync3, readdirSync, readFileSync as readFileSync2, statSync } from "fs";
-import { join } from "path";
+import { existsSync as existsSync4, readdirSync as readdirSync2, readFileSync as readFileSync3, statSync as statSync2 } from "fs";
+import { join as join2 } from "path";
 function codeOf(line) {
   return line.replace(/\/\/.*$/, "").replace(/^\s*\*.*$/, "");
 }
@@ -6161,14 +6436,14 @@ var UNIVERSAL_FINDERS = [findConstructorCwdDropped];
 function autoDeriveImporters(repoRoot, roots, extensions, importsContain) {
   const matches = [];
   const walk = (rel) => {
-    const abs = join(repoRoot, rel);
-    if (!existsSync3(abs)) return;
-    for (const entry of readdirSync(abs)) {
-      const sub = join(rel, entry);
-      const subAbs = join(repoRoot, sub);
+    const abs = join2(repoRoot, rel);
+    if (!existsSync4(abs)) return;
+    for (const entry of readdirSync2(abs)) {
+      const sub = join2(rel, entry);
+      const subAbs = join2(repoRoot, sub);
       let st;
       try {
-        st = statSync(subAbs);
+        st = statSync2(subAbs);
       } catch {
         continue;
       }
@@ -6181,7 +6456,7 @@ function autoDeriveImporters(repoRoot, roots, extensions, importsContain) {
           continue;
         let text;
         try {
-          text = readFileSync2(subAbs, "utf8");
+          text = readFileSync3(subAbs, "utf8");
         } catch {
           continue;
         }
@@ -6196,9 +6471,9 @@ function scanForMuffledGates(opts) {
   const findings = [];
   const scanned = /* @__PURE__ */ new Set();
   for (const file of opts.scanFiles) {
-    const abs = join(opts.repoRoot, file);
-    if (!existsSync3(abs)) continue;
-    const text = readFileSync2(abs, "utf8");
+    const abs = join2(opts.repoRoot, file);
+    if (!existsSync4(abs)) continue;
+    const text = readFileSync3(abs, "utf8");
     for (const find of opts.finders) findings.push(...find(file, text));
     scanned.add(file);
   }
@@ -6211,9 +6486,9 @@ function scanForMuffledGates(opts) {
     );
     for (const file of importers) {
       if (scanned.has(file)) continue;
-      const abs = join(opts.repoRoot, file);
-      if (!existsSync3(abs)) continue;
-      const text = readFileSync2(abs, "utf8");
+      const abs = join2(opts.repoRoot, file);
+      if (!existsSync4(abs)) continue;
+      const text = readFileSync3(abs, "utf8");
       for (const find of opts.autoDerive.universalFinders) findings.push(...find(file, text));
     }
   }
@@ -6363,7 +6638,7 @@ function isObject(v) {
 }
 // src/scorecard.ts
-import { appendFileSync as appendFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, readFileSync as readFileSync3 } from "fs";
+import { appendFileSync as appendFileSync2, existsSync as existsSync5, mkdirSync as mkdirSync2, readFileSync as readFileSync4 } from "fs";
 import { dirname as dirname2 } from "path";
 function median(xs) {
   if (xs.length === 0) return 0;
@@ -6439,10 +6714,10 @@ function recordRunsToScorecard(logPath, runs, opts) {
   return lines;
 }
 function loadScorecard(logPath) {
-  if (!existsSync4(logPath)) return { cells: [], profiles: {} };
+  if (!existsSync5(logPath)) return { cells: [], profiles: {} };
   const cells = /* @__PURE__ */ new Map();
   const profiles = {};
-  for (const raw of readFileSync3(logPath, "utf8").split("\n")) {
+  for (const raw of readFileSync4(logPath, "utf8").split("\n")) {
     const line = raw.trim();
     if (!line) continue;
     let parsed;
@@ -7319,7 +7594,7 @@ async function commitBisect(options) {
 }
 async function promptBisect(options) {
   const split = options.paragraphSplitter ?? ((p) => p.split(/\n\s*\n/));
-  const join4 = (paragraphs) => paragraphs.join("\n\n");
+  const join5 = (paragraphs) => paragraphs.join("\n\n");
   const goodParas = split(options.good);
   const badParas = split(options.bad);
   if (goodParas.length !== badParas.length) {
@@ -7339,7 +7614,7 @@ async function promptBisect(options) {
   const result = await bisect({
     good: goodMask,
     bad: badMask,
-    runEval: (mask) => options.runEval(join4(paragraphsFor(mask))),
+    runEval: (mask) => options.runEval(join5(paragraphsFor(mask))),
     maxIterations: options.maxIterations ?? n + 5,
     halfway: (g, b) => {
       for (let i = 0; i < g.length; i++) {
@@ -7370,12 +7645,12 @@ async function promptBisect(options) {
     }
   }
   const materializedPath = result.path.map((s) => ({
-    state: join4(paragraphsFor(s.state)),
+    state: join5(paragraphsFor(s.state)),
     score: s.score,
     pass: s.pass
   }));
   return {
-    culprit: join4(paragraphsFor(culprit)),
+    culprit: join5(paragraphsFor(culprit)),
     path: materializedPath,
     converged: result.converged,
     inputInconsistent: result.inputInconsistent,
@@ -7869,8 +8144,8 @@ async function runSelfPlay(proposer, scorer, targets, options = {}) {
 // src/command-runner.ts
 import { spawnSync } from "child_process";
-import { existsSync as existsSync5, readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync2 } from "fs";
-import { join as join2 } from "path";
+import { existsSync as existsSync6, readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync3 } from "fs";
+import { join as join3 } from "path";
 var localCommandRunner = {
   name: "local",
   async run(input) {
@@ -7898,11 +8173,11 @@ var localCommandRunner = {
     return r.status === 0 && (r.stdout ?? "").trim().length > 0;
   },
   async fileExists(path) {
-    return existsSync5(path);
+    return existsSync6(path);
   },
   async readFile(path) {
     try {
-      return readFileSync4(path, "utf8");
+      return readFileSync5(path, "utf8");
     } catch {
       return null;
     }
@@ -7910,14 +8185,14 @@ var localCommandRunner = {
   async readDir(path) {
     let entries;
     try {
-      entries = readdirSync2(path);
+      entries = readdirSync3(path);
     } catch {
       return [];
     }
     const out = [];
     for (const name of entries) {
       try {
-        const st = statSync2(join2(path, name));
+        const st = statSync3(join3(path, name));
         out.push({
           name,
           isDirectory: st.isDirectory(),
@@ -8834,7 +9109,7 @@ function multiToolchainLayer(config) {
 }
 // src/reference-replay.ts
-import { appendFileSync as appendFileSync3, existsSync as existsSync6, mkdirSync as mkdirSync3, readFileSync as readFileSync5 } from "fs";
+import { appendFileSync as appendFileSync3, existsSync as existsSync7, mkdirSync as mkdirSync3, readFileSync as readFileSync6 } from "fs";
 import { dirname as dirname3 } from "path";
 var DEFAULT_MATCH_THRESHOLD = 0.55;
 var ALL_SPLITS = ["train", "dev", "test", "holdout"];
@@ -8960,7 +9235,7 @@ function jsonlReferenceReplayStore(path) {
     },
     async list() {
       return lock.runExclusive(() => {
-        if (!existsSync6(path)) return [];
+        if (!existsSync7(path)) return [];
         return readJsonl(path);
       });
     }
@@ -9303,7 +9578,7 @@ function throwIfAborted(signal) {
   throw new Error(signal.reason ? String(signal.reason) : "reference replay aborted");
 }
 function readJsonl(path) {
-  const raw = readFileSync5(path, "utf8");
+  const raw = readFileSync6(path, "utf8");
   const out = [];
   for (const line of raw.split("\n")) {
     const trimmed = line.trim();
@@ -9460,7 +9735,7 @@ function createDefaultReviewer(options) {
 // src/discover-personas.ts
 import { promises as fs } from "fs";
-import { basename, extname, join as join3 } from "path";
+import { basename, extname, join as join4 } from "path";
 var DEFAULT_PATTERN = /^\d{2}-.+\.(yaml|yml|json|md)$/;
 async function discoverPersonas(dir, opts = {}) {
   const pattern = opts.pattern ?? DEFAULT_PATTERN;
@@ -9478,7 +9753,7 @@ async function discoverPersonas(dir, opts = {}) {
     }
     const out = [];
     for (const entry of entries) {
-      const full = join3(d, entry.name);
+      const full = join4(d, entry.name);
       if (entry.isDir) {
         if (opts.recursive) out.push(...await walk(full));
         continue;
@@ -10322,9 +10597,11 @@ export {
   RunIntegrityError,
   RunRecordValidationError,
   SEMANTIC_CONCEPT_JUDGE_VERSION,
+  SKILL_USAGE_ANALYST,
   SandboxHarness,
   ScenarioRegistry,
   SingleBackendError,
+  SkillUsageAnalyst,
   SpanNotFoundError,
   SubprocessSandboxDriver,
   TRACE_ANALYST_ACTOR_DESCRIPTION,
@@ -10375,6 +10652,7 @@ export {
   buildReflectionPrompt,
   buildReviewerPrompt,
   buildSandboxAgentProfileCell,
+  buildSkillUsageReport,
   buildTraceAnalystTools,
   buildTraceInsightContext,
   buildTraceInsightPrompt,
@@ -10455,6 +10733,7 @@ export {
   distillPlaybook,
   domainEvidencePattern,
   dominates,
+  emitSkillUsageFindings,
   estimateCost,
   estimateTokens,
   euAiActReport,