@tangle-network/agent-eval 0.60.0 → 0.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/dist/adapters/http.d.ts +4 -1
  3. package/dist/adapters/langchain.d.ts +4 -1
  4. package/dist/adapters/otel.d.ts +5 -5
  5. package/dist/agent-profile-DzcPHR1Z.d.ts +114 -0
  6. package/dist/benchmarks/index.d.ts +3 -3
  7. package/dist/builder-eval/index.js +2 -2
  8. package/dist/campaign/index.d.ts +151 -11
  9. package/dist/campaign/index.js +212 -10
  10. package/dist/campaign/index.js.map +1 -1
  11. package/dist/{chunk-QDOSODID.js → chunk-3B7Y5AUR.js} +2 -2
  12. package/dist/{chunk-QYJT52YW.js → chunk-3BFEG2F6.js} +1 -1
  13. package/dist/chunk-3BFEG2F6.js.map +1 -0
  14. package/dist/{chunk-J4DIMSRK.js → chunk-6EKXFFGQ.js} +2 -2
  15. package/dist/{chunk-63EPZQUZ.js → chunk-6REHLN5J.js} +2 -2
  16. package/dist/{chunk-NOPYCRNG.js → chunk-7TPYV2ER.js} +39 -2
  17. package/dist/chunk-7TPYV2ER.js.map +1 -0
  18. package/dist/{chunk-GM476SZU.js → chunk-AIWHLG7J.js} +5 -5
  19. package/dist/{chunk-AIXHUIHG.js → chunk-B26KI423.js} +3 -3
  20. package/dist/{chunk-LBSXXH56.js → chunk-CV2BS2OV.js} +8 -6
  21. package/dist/chunk-CV2BS2OV.js.map +1 -0
  22. package/dist/chunk-E22YUOAL.js +111 -0
  23. package/dist/chunk-E22YUOAL.js.map +1 -0
  24. package/dist/{chunk-NCK5QLGT.js → chunk-F3SRAAZO.js} +2 -2
  25. package/dist/{chunk-VXNVVBZO.js → chunk-IHDHUN2X.js} +2 -2
  26. package/dist/{chunk-S3SDD56V.js → chunk-ITBRCT73.js} +2 -2
  27. package/dist/{chunk-OLIBRKRD.js → chunk-KX6F6NCG.js} +2 -2
  28. package/dist/chunk-PQV2TKC3.js +27 -0
  29. package/dist/chunk-PQV2TKC3.js.map +1 -0
  30. package/dist/{chunk-UBPIXOC4.js → chunk-SBCB6VZY.js} +2 -2
  31. package/dist/{chunk-GBHRUAOF.js → chunk-SS2SOBBT.js} +2 -107
  32. package/dist/chunk-SS2SOBBT.js.map +1 -0
  33. package/dist/{chunk-YTMXBHFM.js → chunk-T375SUOZ.js} +2 -2
  34. package/dist/{chunk-PIEAE33T.js → chunk-Z4ZCBC7M.js} +2 -2
  35. package/dist/cli.js +3 -3
  36. package/dist/contract/index.d.ts +13 -13
  37. package/dist/contract/index.js +8 -7
  38. package/dist/contract/index.js.map +1 -1
  39. package/dist/{control-DjEgwWNo.d.ts → control-DxvZeV5X.d.ts} +2 -2
  40. package/dist/control.d.ts +5 -5
  41. package/dist/control.js +3 -3
  42. package/dist/{dataset-BlwAtYYf.d.ts → dataset-B2kL-fSM.d.ts} +1 -1
  43. package/dist/{errors-mje_cKOs.d.ts → errors-Dwqw-T_m.d.ts} +1 -1
  44. package/dist/{feedback-trajectory-DpUmE90J.d.ts → feedback-trajectory-8hKC5EOb.d.ts} +1 -1
  45. package/dist/governance/index.d.ts +3 -3
  46. package/dist/hosted/index.d.ts +5 -5
  47. package/dist/{index-wlaiph9Y.d.ts → index-DsnOpCO6.d.ts} +1 -1
  48. package/dist/{index-BIkvdkSU.d.ts → index-DxfmYUjC.d.ts} +2 -2
  49. package/dist/index.d.ts +108 -132
  50. package/dist/index.js +339 -73
  51. package/dist/index.js.map +1 -1
  52. package/dist/{integrity-CfXjSqEv.d.ts → integrity-CJzrpUua.d.ts} +1 -1
  53. package/dist/{llm-client-BXVRUZyX.d.ts → llm-client-DbjLfz-K.d.ts} +1 -1
  54. package/dist/meta-eval/index.d.ts +3 -3
  55. package/dist/multishot/index.js.map +1 -1
  56. package/dist/openapi.json +1 -1
  57. package/dist/pipelines/index.js +3 -3
  58. package/dist/{provenance-BM8vmMBa.d.ts → provenance-CYBV9Ox6.d.ts} +16 -5
  59. package/dist/{red-team-CrC5MZYd.d.ts → red-team-DW9Ca_tj.d.ts} +1 -1
  60. package/dist/{registry-DK9kqXvb.d.ts → registry-DPly4_hZ.d.ts} +2 -2
  61. package/dist/{release-report-DmPjIce3.d.ts → release-report-DGoeObZT.d.ts} +3 -3
  62. package/dist/reporting.d.ts +6 -6
  63. package/dist/reporting.js +4 -4
  64. package/dist/{researcher-JP8EvnLv.d.ts → researcher-WJvIpX3L.d.ts} +4 -4
  65. package/dist/rl.d.ts +9 -9
  66. package/dist/rl.js +7 -7
  67. package/dist/{rubric-predictive-validity-B3qNa4aY.d.ts → rubric-predictive-validity-D_4BSXGV.d.ts} +1 -1
  68. package/dist/run-campaign-5J3ED2UJ.js +11 -0
  69. package/dist/{run-record-etiCMsUq.d.ts → run-record-BgTFzO2r.d.ts} +2 -2
  70. package/dist/{summary-report-DLxh4yWk.d.ts → summary-report-ByiOUrHj.d.ts} +1 -1
  71. package/dist/traces.d.ts +2 -2
  72. package/dist/traces.js +3 -3
  73. package/dist/{types-VCIXx_yo.d.ts → types-DH22o8hM.d.ts} +28 -4
  74. package/dist/wire/index.d.ts +3 -3
  75. package/dist/wire/index.js +3 -3
  76. package/package.json +12 -25
  77. package/dist/chunk-GBHRUAOF.js.map +0 -1
  78. package/dist/chunk-LBSXXH56.js.map +0 -1
  79. package/dist/chunk-NOPYCRNG.js.map +0 -1
  80. package/dist/chunk-QYJT52YW.js.map +0 -1
  81. package/dist/run-campaign-5XENUKRF.js +0 -10
  82. /package/dist/{chunk-QDOSODID.js.map → chunk-3B7Y5AUR.js.map} +0 -0
  83. /package/dist/{chunk-J4DIMSRK.js.map → chunk-6EKXFFGQ.js.map} +0 -0
  84. /package/dist/{chunk-63EPZQUZ.js.map → chunk-6REHLN5J.js.map} +0 -0
  85. /package/dist/{chunk-GM476SZU.js.map → chunk-AIWHLG7J.js.map} +0 -0
  86. /package/dist/{chunk-AIXHUIHG.js.map → chunk-B26KI423.js.map} +0 -0
  87. /package/dist/{chunk-NCK5QLGT.js.map → chunk-F3SRAAZO.js.map} +0 -0
  88. /package/dist/{chunk-VXNVVBZO.js.map → chunk-IHDHUN2X.js.map} +0 -0
  89. /package/dist/{chunk-S3SDD56V.js.map → chunk-ITBRCT73.js.map} +0 -0
  90. /package/dist/{chunk-OLIBRKRD.js.map → chunk-KX6F6NCG.js.map} +0 -0
  91. /package/dist/{chunk-UBPIXOC4.js.map → chunk-SBCB6VZY.js.map} +0 -0
  92. /package/dist/{chunk-YTMXBHFM.js.map → chunk-T375SUOZ.js.map} +0 -0
  93. /package/dist/{chunk-PIEAE33T.js.map → chunk-Z4ZCBC7M.js.map} +0 -0
  94. /package/dist/{run-campaign-5XENUKRF.js.map → run-campaign-5J3ED2UJ.js.map} +0 -0
package/dist/index.js CHANGED
@@ -1,3 +1,6 @@
1
+ import {
2
+ agentProfileHash
3
+ } from "./chunk-PQV2TKC3.js";
1
4
  import {
2
5
  HoldoutAuditor,
3
6
  canaryLeakView,
@@ -6,12 +9,10 @@ import {
6
9
  runBehavioralCanaries
7
10
  } from "./chunk-SHTXZ4O2.js";
8
11
  import {
9
- BackendIntegrityError,
10
12
  DEFAULT_MUTATION_PRIMITIVES,
11
13
  DEFAULT_RED_TEAM_CORPUS,
12
14
  Dataset,
13
15
  HoldoutLockedError,
14
- assertRealBackend,
15
16
  buildReflectionPrompt,
16
17
  hashScenarios,
17
18
  parseReflectionResponse,
@@ -19,9 +20,13 @@ import {
19
20
  redTeamReport,
20
21
  runCanaries,
21
22
  scoreRedTeamOutput,
22
- summarizeBackendIntegrity,
23
23
  toolNamesForRun
24
- } from "./chunk-GBHRUAOF.js";
24
+ } from "./chunk-SS2SOBBT.js";
25
+ import {
26
+ BackendIntegrityError,
27
+ assertRealBackend,
28
+ summarizeBackendIntegrity
29
+ } from "./chunk-E22YUOAL.js";
25
30
  import {
26
31
  BENCHMARK_SPLIT_SEED,
27
32
  benchmarks_exports,
@@ -34,7 +39,7 @@ import {
34
39
  computeToolUseMetrics,
35
40
  iqr,
36
41
  welchsTTest
37
- } from "./chunk-QDOSODID.js";
42
+ } from "./chunk-3B7Y5AUR.js";
38
43
  import {
39
44
  exportTrainingData,
40
45
  toNdjson
@@ -51,7 +56,7 @@ import {
51
56
  pytestTestParser,
52
57
  runTestGradedScenario,
53
58
  vitestTestParser
54
- } from "./chunk-YTMXBHFM.js";
59
+ } from "./chunk-T375SUOZ.js";
55
60
  import {
56
61
  classifyEuAiRisk,
57
62
  euAiActReport,
@@ -77,7 +82,7 @@ import {
77
82
  runProposeReview,
78
83
  runProposeReviewAsControlLoop,
79
84
  scoreFromEvals
80
- } from "./chunk-J4DIMSRK.js";
85
+ } from "./chunk-6EKXFFGQ.js";
81
86
  import {
82
87
  allCriticalPassed,
83
88
  objectiveEval,
@@ -92,10 +97,10 @@ import {
92
97
  evaluateReleaseConfidence,
93
98
  judgeReplayGate,
94
99
  renderReleaseReport
95
- } from "./chunk-AIXHUIHG.js";
100
+ } from "./chunk-B26KI423.js";
96
101
  import {
97
102
  runEvalCampaign
98
- } from "./chunk-GM476SZU.js";
103
+ } from "./chunk-AIWHLG7J.js";
99
104
  import {
100
105
  AGENT_PROFILE_KINDS,
101
106
  AgentProfileCellValidationError,
@@ -114,7 +119,7 @@ import {
114
119
  validateAgentProfileCell,
115
120
  validateRunRecord,
116
121
  verifyAgentProfileCell
117
- } from "./chunk-NCK5QLGT.js";
122
+ } from "./chunk-F3SRAAZO.js";
118
123
  import {
119
124
  evaluateInterimReleaseConfidence,
120
125
  pairedEvalueSequence
@@ -125,7 +130,7 @@ import {
125
130
  paretoChart,
126
131
  researchReport,
127
132
  summaryTable
128
- } from "./chunk-OLIBRKRD.js";
133
+ } from "./chunk-KX6F6NCG.js";
129
134
  import {
130
135
  benjaminiHochberg,
131
136
  bonferroni,
@@ -152,7 +157,7 @@ import {
152
157
  weightedComposite,
153
158
  weightedMean,
154
159
  wilcoxonSignedRank
155
- } from "./chunk-S3SDD56V.js";
160
+ } from "./chunk-ITBRCT73.js";
156
161
  import {
157
162
  DEFAULT_TRACE_ANALYST_BUDGETS,
158
163
  FileSystemTraceStore,
@@ -189,7 +194,7 @@ import {
189
194
  tokenizeDomainWords,
190
195
  traceAnalystFunctionGroup,
191
196
  traceAnalystOnRunComplete
192
- } from "./chunk-PIEAE33T.js";
197
+ } from "./chunk-Z4ZCBC7M.js";
193
198
  import {
194
199
  DEFAULT_REDACTION_RULES,
195
200
  REDACTION_VERSION,
@@ -219,7 +224,7 @@ import {
219
224
  RunIntegrityError,
220
225
  assertRunCaptured,
221
226
  throwIfRunIncomplete
222
- } from "./chunk-UBPIXOC4.js";
227
+ } from "./chunk-SBCB6VZY.js";
223
228
  import {
224
229
  TraceEmitter,
225
230
  llmSpanFromProvider
@@ -242,7 +247,7 @@ import {
242
247
  isTransientLlmError,
243
248
  probeLlm,
244
249
  stripFencedJson
245
- } from "./chunk-VXNVVBZO.js";
250
+ } from "./chunk-IHDHUN2X.js";
246
251
  import {
247
252
  FileSystemRawProviderSink,
248
253
  InMemoryRawProviderSink,
@@ -259,7 +264,7 @@ import {
259
264
  ReplayError,
260
265
  ValidationError,
261
266
  VerificationError
262
- } from "./chunk-QYJT52YW.js";
267
+ } from "./chunk-3BFEG2F6.js";
263
268
  import "./chunk-PZ5AY32C.js";
264
269
 
265
270
  // src/run-score.ts
@@ -720,8 +725,8 @@ function createVerifierAdapter(opts) {
720
725
  const report = await opts.verifier.run({ env, ...opts.options });
721
726
  const out = [];
722
727
  for (const layer of report.layers) {
723
- for (const finding of layer.findings) {
724
- out.push(liftLayerFinding(id, area, layer.layer, finding));
728
+ for (const finding2 of layer.findings) {
729
+ out.push(liftLayerFinding(id, area, layer.layer, finding2));
725
730
  }
726
731
  if (layer.status === "fail" || layer.status === "error" || layer.status === "timeout") {
727
732
  out.push(
@@ -1751,6 +1756,279 @@ var DEFAULT_TRACE_ANALYST_KINDS = [
1751
1756
  IMPROVEMENT_KIND_SPEC
1752
1757
  ];
1753
1758
 
1759
+ // src/analyst/kinds/skill-usage.ts
1760
+ import { existsSync as existsSync3, readdirSync, readFileSync as readFileSync2, statSync } from "fs";
1761
+ import { join } from "path";
1762
+ var BLOAT_LINE_THRESHOLD = 300;
1763
+ var TANGLE_PRIVATE_RE = /\b(cli-bridge|tangletools|ops-board|drew-gtr-pro|@tangle-network\/|~\/company|tangle\.tools|gtm-agent)\b|\bkimi\b|\btcloud\b/gi;
1764
+ var TRIGGER_RE = /triggers?\s*[:\-]/i;
1765
+ function listSkillDirs(root) {
1766
+ if (!existsSync3(root)) return [];
1767
+ const out = [];
1768
+ for (const entry of readdirSync(root, { withFileTypes: true })) {
1769
+ if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
1770
+ const skillMd = join(root, entry.name, "SKILL.md");
1771
+ if (existsSync3(skillMd)) out.push({ name: entry.name, path: skillMd });
1772
+ }
1773
+ return out;
1774
+ }
1775
+ function walkJsonl(dir, cap) {
1776
+ if (!existsSync3(dir)) return [];
1777
+ const files = [];
1778
+ const stack = [dir];
1779
+ while (stack.length) {
1780
+ const cur = stack.pop();
1781
+ let entries;
1782
+ try {
1783
+ entries = readdirSync(cur, { withFileTypes: true });
1784
+ } catch {
1785
+ continue;
1786
+ }
1787
+ for (const e of entries) {
1788
+ const full = join(cur, e.name);
1789
+ if (e.isDirectory()) stack.push(full);
1790
+ else if (e.name.endsWith(".jsonl")) {
1791
+ files.push(full);
1792
+ if (cap > 0 && files.length >= cap) return files;
1793
+ }
1794
+ }
1795
+ }
1796
+ return files;
1797
+ }
1798
+ function frontmatterDescription(body) {
1799
+ const fm = /^---\n([\s\S]*?)\n---/.exec(body);
1800
+ const block = fm?.[1] ?? "";
1801
+ const m = /description:\s*(.+)/i.exec(block);
1802
+ return m?.[1] ?? "";
1803
+ }
1804
+ function countArtifacts(roots, name, aliases) {
1805
+ let n = 0;
1806
+ for (const root of roots) {
1807
+ const candidates = [join(root, ".evolve", name), ...aliases.map((a) => join(root, a))];
1808
+ for (const dir of candidates) {
1809
+ if (!existsSync3(dir)) continue;
1810
+ try {
1811
+ if (statSync(dir).isDirectory()) n += readdirSync(dir).length;
1812
+ else n += 1;
1813
+ } catch {
1814
+ }
1815
+ }
1816
+ }
1817
+ return n;
1818
+ }
1819
+ function buildSkillUsageReport(config) {
1820
+ const skills = config.skillRoots.flatMap(
1821
+ ({ root, kind }) => listSkillDirs(root).map((s) => ({ ...s, kind }))
1822
+ );
1823
+ const names = skills.map((s) => s.name);
1824
+ const direct = new Map(names.map((n) => [n, 0]));
1825
+ const slash = new Map(names.map((n) => [n, 0]));
1826
+ const skillRe = /"skill"\s*:\s*"([a-z0-9_:-]+)"/g;
1827
+ const cmdRe = /<command-name>\/?([a-z0-9_:-]+)<\/command-name>/g;
1828
+ let transcripts = 0;
1829
+ for (const dir of config.transcriptDirs) {
1830
+ for (const file of walkJsonl(dir, config.maxTranscriptsPerDir ?? 0)) {
1831
+ transcripts += 1;
1832
+ let data;
1833
+ try {
1834
+ data = readFileSync2(file, "utf8");
1835
+ } catch {
1836
+ continue;
1837
+ }
1838
+ for (const m of data.matchAll(skillRe)) {
1839
+ const g = m[1];
1840
+ if (!g) continue;
1841
+ const n = g.split(":").pop() ?? g;
1842
+ const prev = direct.get(n);
1843
+ if (prev !== void 0) direct.set(n, prev + 1);
1844
+ }
1845
+ for (const m of data.matchAll(cmdRe)) {
1846
+ const g = m[1];
1847
+ if (g === void 0) continue;
1848
+ const prev = slash.get(g);
1849
+ if (prev !== void 0) slash.set(g, prev + 1);
1850
+ }
1851
+ }
1852
+ }
1853
+ const bodies = /* @__PURE__ */ new Map();
1854
+ for (const s of skills) {
1855
+ try {
1856
+ bodies.set(s.name, readFileSync2(s.path, "utf8"));
1857
+ } catch {
1858
+ bodies.set(s.name, "");
1859
+ }
1860
+ }
1861
+ const inbound = new Map(names.map((n) => [n, 0]));
1862
+ for (const target of names) {
1863
+ const ref = new RegExp(`/${target}\\b|\\[\\[${target}\\]\\]`);
1864
+ for (const s of skills) {
1865
+ if (s.name === target) continue;
1866
+ if (ref.test(bodies.get(s.name) ?? "")) inbound.set(target, inbound.get(target) + 1);
1867
+ }
1868
+ }
1869
+ const records = skills.map((s) => {
1870
+ const body = bodies.get(s.name) ?? "";
1871
+ const dir = s.path.replace(/\/SKILL\.md$/, "");
1872
+ return {
1873
+ name: s.name,
1874
+ kind: s.kind,
1875
+ path: s.path,
1876
+ lines: body ? body.split("\n").length : 0,
1877
+ directInvocations: direct.get(s.name) ?? 0,
1878
+ slashInvocations: slash.get(s.name) ?? 0,
1879
+ inboundRefs: inbound.get(s.name) ?? 0,
1880
+ artifactCount: countArtifacts(
1881
+ config.artifactRoots ?? [],
1882
+ s.name,
1883
+ config.artifactAliases?.[s.name] ?? []
1884
+ ),
1885
+ tanglePrivateRefs: (body.match(TANGLE_PRIVATE_RE) ?? []).length,
1886
+ hasReferencesDir: existsSync3(join(dir, "references")),
1887
+ hasEvalsDir: existsSync3(join(dir, "evals")),
1888
+ logsRuns: body.includes("skill-runs.jsonl"),
1889
+ hasTriggerPhrases: TRIGGER_RE.test(frontmatterDescription(body) || body.slice(0, 600))
1890
+ };
1891
+ });
1892
+ return { generatedFromTraces: transcripts, records };
1893
+ }
1894
+ var ANALYST_ID = "skill-usage";
1895
+ function finding(area, subject, claim, severity, confidence, producedAt, recommended, evidenceUri, rationale) {
1896
+ return {
1897
+ schema_version: "1.0.0",
1898
+ finding_id: computeFindingId({ analyst_id: ANALYST_ID, area, subject, claim }),
1899
+ analyst_id: ANALYST_ID,
1900
+ produced_at: producedAt,
1901
+ severity,
1902
+ area,
1903
+ claim,
1904
+ rationale,
1905
+ evidence_refs: [{ kind: "artifact", uri: evidenceUri }],
1906
+ recommended_action: recommended,
1907
+ confidence,
1908
+ subject
1909
+ };
1910
+ }
1911
+ function emitSkillUsageFindings(report, producedAt) {
1912
+ const out = [];
1913
+ for (const r of report.records) {
1914
+ const directTotal = r.directInvocations + r.slashInvocations;
1915
+ const trueUsage = directTotal + r.inboundRefs + r.artifactCount;
1916
+ if (trueUsage === 0) {
1917
+ out.push(
1918
+ finding(
1919
+ "skill-usage",
1920
+ r.name,
1921
+ `Skill '${r.name}' has zero usage across all signals (direct, slash, inbound-refs, artifacts)`,
1922
+ "high",
1923
+ 0.6,
1924
+ producedAt,
1925
+ "Confirm the skill covers a real recurring job; if not, deprecate. Zero true usage is the only deterministic deprecation candidate.",
1926
+ r.path,
1927
+ "No Skill-tool call, no slash invocation, no sibling dispatches to it, and no on-disk artifacts."
1928
+ )
1929
+ );
1930
+ } else if (directTotal === 0 && r.inboundRefs + r.artifactCount > 0) {
1931
+ out.push(
1932
+ finding(
1933
+ "skill-usage",
1934
+ r.name,
1935
+ `Skill '${r.name}' shows 0 direct invocations but is used via orchestration/artifacts (inbound=${r.inboundRefs}, artifacts=${r.artifactCount})`,
1936
+ "info",
1937
+ 0.8,
1938
+ producedAt,
1939
+ "Do NOT treat as unused \u2014 usage is real but logged under parent skills or on disk. Strengthen direct-invocation discovery only if direct use is desired.",
1940
+ r.path,
1941
+ "The Skill-tool counter undercounts orchestrated/chained leaf skills."
1942
+ )
1943
+ );
1944
+ }
1945
+ if (directTotal <= 2 && !r.hasTriggerPhrases) {
1946
+ out.push(
1947
+ finding(
1948
+ "discoverability",
1949
+ r.name,
1950
+ `Skill '${r.name}' is rarely invoked directly and its description has no explicit trigger phrases`,
1951
+ "medium",
1952
+ 0.7,
1953
+ producedAt,
1954
+ "Add a `Triggers:` clause with verbatim user phrases to the frontmatter description so the model auto-invokes it.",
1955
+ r.path
1956
+ )
1957
+ );
1958
+ }
1959
+ if (r.kind === "public" && r.tanglePrivateRefs > 0) {
1960
+ out.push(
1961
+ finding(
1962
+ "safety",
1963
+ r.name,
1964
+ `Public skill '${r.name}' carries ${r.tanglePrivateRefs} Tangle-private reference(s)`,
1965
+ "high",
1966
+ 0.75,
1967
+ producedAt,
1968
+ "Sanitize incidental internal refs (cli-bridge/kimi/tcloud/~company/private repos) or relocate to a private repo. Verify @tangle-network/* refs are to PUBLISHED packages before treating as a leak.",
1969
+ r.path
1970
+ )
1971
+ );
1972
+ }
1973
+ if (r.lines > BLOAT_LINE_THRESHOLD && !r.hasReferencesDir) {
1974
+ out.push(
1975
+ finding(
1976
+ "maintainability",
1977
+ r.name,
1978
+ `Skill '${r.name}' is ${r.lines} lines with no references/ split (progressive disclosure)`,
1979
+ "medium",
1980
+ 0.8,
1981
+ producedAt,
1982
+ `Split detail into references/ loaded on demand; keep SKILL.md a short overview. ${r.lines} lines load into every session's context budget.`,
1983
+ r.path
1984
+ )
1985
+ );
1986
+ }
1987
+ if (!r.hasEvalsDir) {
1988
+ out.push(
1989
+ finding(
1990
+ "data-quality",
1991
+ r.name,
1992
+ `Skill '${r.name}' ships no evals/`,
1993
+ "low",
1994
+ 0.6,
1995
+ producedAt,
1996
+ "Add evals/evals.json with >=3 scenarios proving the skill beats baseline; gives regression coverage.",
1997
+ r.path
1998
+ )
1999
+ );
2000
+ }
2001
+ if (!r.logsRuns) {
2002
+ out.push(
2003
+ finding(
2004
+ "observability",
2005
+ r.name,
2006
+ `Skill '${r.name}' never appends to .evolve/skill-runs.jsonl`,
2007
+ "low",
2008
+ 0.55,
2009
+ producedAt,
2010
+ "Append one run line to .evolve/skill-runs.jsonl on completion, or declare it a non-logging leaf, so the self-improvement loop can see it ran.",
2011
+ r.path
2012
+ )
2013
+ );
2014
+ }
2015
+ }
2016
+ return out;
2017
+ }
2018
+ var SkillUsageAnalyst = class {
2019
+ id = ANALYST_ID;
2020
+ description = "Deterministic multi-signal skill-usage analysis: flags dead skills, measurement-invisible (orchestrated) usage, discovery gaps, public-repo leaks, bloat, missing evals, and missing run-logging.";
2021
+ inputKind = "custom";
2022
+ cost = { kind: "deterministic", est_usd_per_run: 0 };
2023
+ version = "1.0.0";
2024
+ async analyze(input, ctx) {
2025
+ const producedAt = ctx.tags?.producedAt ?? (/* @__PURE__ */ new Date()).toISOString();
2026
+ ctx.log?.(`skill-usage: ${input.records.length} skills over ${input.generatedFromTraces} transcripts`);
2027
+ return emitSkillUsageFindings(input, producedAt);
2028
+ }
2029
+ };
2030
+ var SKILL_USAGE_ANALYST = new SkillUsageAnalyst();
2031
+
1754
2032
  // src/analyst/registry.ts
1755
2033
  import { randomUUID } from "crypto";
1756
2034
  var AnalystRegistry = class {
@@ -2185,12 +2463,12 @@ function ghCliClient(opts = {}) {
2185
2463
  await exec("git", ["branch", "-D", input.branchName], { cwd });
2186
2464
  await run("git", ["checkout", "-b", input.branchName]);
2187
2465
  const { mkdir, writeFile } = await import("fs/promises");
2188
- const { dirname: dirname4, join: join4, resolve } = await import("path");
2466
+ const { dirname: dirname4, join: join5, resolve } = await import("path");
2189
2467
  for (const change of input.fileChanges) {
2190
2468
  const abs = resolve(cwd, change.path);
2191
2469
  await mkdir(dirname4(abs), { recursive: true });
2192
2470
  await writeFile(abs, change.contents, "utf8");
2193
- await run("git", ["add", join4(change.path)]);
2471
+ await run("git", ["add", join5(change.path)]);
2194
2472
  }
2195
2473
  const env = {};
2196
2474
  if (input.authorName) env.GIT_AUTHOR_NAME = input.authorName;
@@ -3221,10 +3499,10 @@ var FileSystemFeedbackTrajectoryStore = class {
3221
3499
  }
3222
3500
  async append(record) {
3223
3501
  const { appendFile, mkdir } = await import("fs/promises");
3224
- const { join: join4 } = await import("path");
3502
+ const { join: join5 } = await import("path");
3225
3503
  await mkdir(this.dir, { recursive: true });
3226
3504
  await appendFile(
3227
- join4(this.dir, "feedback-trajectories.ndjson"),
3505
+ join5(this.dir, "feedback-trajectories.ndjson"),
3228
3506
  `${JSON.stringify(record)}
3229
3507
  `,
3230
3508
  "utf8"
@@ -3233,8 +3511,8 @@ var FileSystemFeedbackTrajectoryStore = class {
3233
3511
  async load() {
3234
3512
  if (this.loaded) return;
3235
3513
  const { readFile } = await import("fs/promises");
3236
- const { join: join4 } = await import("path");
3237
- const file = join4(this.dir, "feedback-trajectories.ndjson");
3514
+ const { join: join5 } = await import("path");
3515
+ const file = join5(this.dir, "feedback-trajectories.ndjson");
3238
3516
  try {
3239
3517
  const raw = await readFile(file, "utf8");
3240
3518
  for (const line of raw.split("\n")) {
@@ -5953,22 +6231,6 @@ var BudgetGuard = class {
5953
6231
  }
5954
6232
  };
5955
6233
 
5956
- // src/agent-profile.ts
5957
- import { createHash as createHash2 } from "crypto";
5958
- function agentProfileHash(profile) {
5959
- if (typeof profile.model !== "string" || profile.model.trim().length === 0) {
5960
- throw new ValidationError(`AgentProfile "${profile.id}" has no model \u2014 cannot hash`);
5961
- }
5962
- const behaviour = {
5963
- model: profile.model.trim(),
5964
- skills: [...profile.skills ?? []].sort(),
5965
- promptVersion: profile.promptVersion ?? null,
5966
- tools: [...profile.tools ?? []].sort(),
5967
- metadata: profile.metadata ?? {}
5968
- };
5969
- return createHash2("sha256").update(JSON.stringify(canonicalize(behaviour))).digest("hex");
5970
- }
5971
-
5972
6234
  // src/cost-tracker.ts
5973
6235
  var CostTracker = class {
5974
6236
  byScenario = /* @__PURE__ */ new Map();
@@ -6061,8 +6323,8 @@ function assertNonNegative(n, name) {
6061
6323
  }
6062
6324
 
6063
6325
  // src/muffled-gate-scanner.ts
6064
- import { existsSync as existsSync3, readdirSync, readFileSync as readFileSync2, statSync } from "fs";
6065
- import { join } from "path";
6326
+ import { existsSync as existsSync4, readdirSync as readdirSync2, readFileSync as readFileSync3, statSync as statSync2 } from "fs";
6327
+ import { join as join2 } from "path";
6066
6328
  function codeOf(line) {
6067
6329
  return line.replace(/\/\/.*$/, "").replace(/^\s*\*.*$/, "");
6068
6330
  }
@@ -6174,14 +6436,14 @@ var UNIVERSAL_FINDERS = [findConstructorCwdDropped];
6174
6436
  function autoDeriveImporters(repoRoot, roots, extensions, importsContain) {
6175
6437
  const matches = [];
6176
6438
  const walk = (rel) => {
6177
- const abs = join(repoRoot, rel);
6178
- if (!existsSync3(abs)) return;
6179
- for (const entry of readdirSync(abs)) {
6180
- const sub = join(rel, entry);
6181
- const subAbs = join(repoRoot, sub);
6439
+ const abs = join2(repoRoot, rel);
6440
+ if (!existsSync4(abs)) return;
6441
+ for (const entry of readdirSync2(abs)) {
6442
+ const sub = join2(rel, entry);
6443
+ const subAbs = join2(repoRoot, sub);
6182
6444
  let st;
6183
6445
  try {
6184
- st = statSync(subAbs);
6446
+ st = statSync2(subAbs);
6185
6447
  } catch {
6186
6448
  continue;
6187
6449
  }
@@ -6194,7 +6456,7 @@ function autoDeriveImporters(repoRoot, roots, extensions, importsContain) {
6194
6456
  continue;
6195
6457
  let text;
6196
6458
  try {
6197
- text = readFileSync2(subAbs, "utf8");
6459
+ text = readFileSync3(subAbs, "utf8");
6198
6460
  } catch {
6199
6461
  continue;
6200
6462
  }
@@ -6209,9 +6471,9 @@ function scanForMuffledGates(opts) {
6209
6471
  const findings = [];
6210
6472
  const scanned = /* @__PURE__ */ new Set();
6211
6473
  for (const file of opts.scanFiles) {
6212
- const abs = join(opts.repoRoot, file);
6213
- if (!existsSync3(abs)) continue;
6214
- const text = readFileSync2(abs, "utf8");
6474
+ const abs = join2(opts.repoRoot, file);
6475
+ if (!existsSync4(abs)) continue;
6476
+ const text = readFileSync3(abs, "utf8");
6215
6477
  for (const find of opts.finders) findings.push(...find(file, text));
6216
6478
  scanned.add(file);
6217
6479
  }
@@ -6224,9 +6486,9 @@ function scanForMuffledGates(opts) {
6224
6486
  );
6225
6487
  for (const file of importers) {
6226
6488
  if (scanned.has(file)) continue;
6227
- const abs = join(opts.repoRoot, file);
6228
- if (!existsSync3(abs)) continue;
6229
- const text = readFileSync2(abs, "utf8");
6489
+ const abs = join2(opts.repoRoot, file);
6490
+ if (!existsSync4(abs)) continue;
6491
+ const text = readFileSync3(abs, "utf8");
6230
6492
  for (const find of opts.autoDerive.universalFinders) findings.push(...find(file, text));
6231
6493
  }
6232
6494
  }
@@ -6376,7 +6638,7 @@ function isObject(v) {
6376
6638
  }
6377
6639
 
6378
6640
  // src/scorecard.ts
6379
- import { appendFileSync as appendFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, readFileSync as readFileSync3 } from "fs";
6641
+ import { appendFileSync as appendFileSync2, existsSync as existsSync5, mkdirSync as mkdirSync2, readFileSync as readFileSync4 } from "fs";
6380
6642
  import { dirname as dirname2 } from "path";
6381
6643
  function median(xs) {
6382
6644
  if (xs.length === 0) return 0;
@@ -6452,10 +6714,10 @@ function recordRunsToScorecard(logPath, runs, opts) {
6452
6714
  return lines;
6453
6715
  }
6454
6716
  function loadScorecard(logPath) {
6455
- if (!existsSync4(logPath)) return { cells: [], profiles: {} };
6717
+ if (!existsSync5(logPath)) return { cells: [], profiles: {} };
6456
6718
  const cells = /* @__PURE__ */ new Map();
6457
6719
  const profiles = {};
6458
- for (const raw of readFileSync3(logPath, "utf8").split("\n")) {
6720
+ for (const raw of readFileSync4(logPath, "utf8").split("\n")) {
6459
6721
  const line = raw.trim();
6460
6722
  if (!line) continue;
6461
6723
  let parsed;
@@ -7332,7 +7594,7 @@ async function commitBisect(options) {
7332
7594
  }
7333
7595
  async function promptBisect(options) {
7334
7596
  const split = options.paragraphSplitter ?? ((p) => p.split(/\n\s*\n/));
7335
- const join4 = (paragraphs) => paragraphs.join("\n\n");
7597
+ const join5 = (paragraphs) => paragraphs.join("\n\n");
7336
7598
  const goodParas = split(options.good);
7337
7599
  const badParas = split(options.bad);
7338
7600
  if (goodParas.length !== badParas.length) {
@@ -7352,7 +7614,7 @@ async function promptBisect(options) {
7352
7614
  const result = await bisect({
7353
7615
  good: goodMask,
7354
7616
  bad: badMask,
7355
- runEval: (mask) => options.runEval(join4(paragraphsFor(mask))),
7617
+ runEval: (mask) => options.runEval(join5(paragraphsFor(mask))),
7356
7618
  maxIterations: options.maxIterations ?? n + 5,
7357
7619
  halfway: (g, b) => {
7358
7620
  for (let i = 0; i < g.length; i++) {
@@ -7383,12 +7645,12 @@ async function promptBisect(options) {
7383
7645
  }
7384
7646
  }
7385
7647
  const materializedPath = result.path.map((s) => ({
7386
- state: join4(paragraphsFor(s.state)),
7648
+ state: join5(paragraphsFor(s.state)),
7387
7649
  score: s.score,
7388
7650
  pass: s.pass
7389
7651
  }));
7390
7652
  return {
7391
- culprit: join4(paragraphsFor(culprit)),
7653
+ culprit: join5(paragraphsFor(culprit)),
7392
7654
  path: materializedPath,
7393
7655
  converged: result.converged,
7394
7656
  inputInconsistent: result.inputInconsistent,
@@ -7882,8 +8144,8 @@ async function runSelfPlay(proposer, scorer, targets, options = {}) {
7882
8144
 
7883
8145
  // src/command-runner.ts
7884
8146
  import { spawnSync } from "child_process";
7885
- import { existsSync as existsSync5, readdirSync as readdirSync2, readFileSync as readFileSync4, statSync as statSync2 } from "fs";
7886
- import { join as join2 } from "path";
8147
+ import { existsSync as existsSync6, readdirSync as readdirSync3, readFileSync as readFileSync5, statSync as statSync3 } from "fs";
8148
+ import { join as join3 } from "path";
7887
8149
  var localCommandRunner = {
7888
8150
  name: "local",
7889
8151
  async run(input) {
@@ -7911,11 +8173,11 @@ var localCommandRunner = {
7911
8173
  return r.status === 0 && (r.stdout ?? "").trim().length > 0;
7912
8174
  },
7913
8175
  async fileExists(path) {
7914
- return existsSync5(path);
8176
+ return existsSync6(path);
7915
8177
  },
7916
8178
  async readFile(path) {
7917
8179
  try {
7918
- return readFileSync4(path, "utf8");
8180
+ return readFileSync5(path, "utf8");
7919
8181
  } catch {
7920
8182
  return null;
7921
8183
  }
@@ -7923,14 +8185,14 @@ var localCommandRunner = {
7923
8185
  async readDir(path) {
7924
8186
  let entries;
7925
8187
  try {
7926
- entries = readdirSync2(path);
8188
+ entries = readdirSync3(path);
7927
8189
  } catch {
7928
8190
  return [];
7929
8191
  }
7930
8192
  const out = [];
7931
8193
  for (const name of entries) {
7932
8194
  try {
7933
- const st = statSync2(join2(path, name));
8195
+ const st = statSync3(join3(path, name));
7934
8196
  out.push({
7935
8197
  name,
7936
8198
  isDirectory: st.isDirectory(),
@@ -8847,7 +9109,7 @@ function multiToolchainLayer(config) {
8847
9109
  }
8848
9110
 
8849
9111
  // src/reference-replay.ts
8850
- import { appendFileSync as appendFileSync3, existsSync as existsSync6, mkdirSync as mkdirSync3, readFileSync as readFileSync5 } from "fs";
9112
+ import { appendFileSync as appendFileSync3, existsSync as existsSync7, mkdirSync as mkdirSync3, readFileSync as readFileSync6 } from "fs";
8851
9113
  import { dirname as dirname3 } from "path";
8852
9114
  var DEFAULT_MATCH_THRESHOLD = 0.55;
8853
9115
  var ALL_SPLITS = ["train", "dev", "test", "holdout"];
@@ -8973,7 +9235,7 @@ function jsonlReferenceReplayStore(path) {
8973
9235
  },
8974
9236
  async list() {
8975
9237
  return lock.runExclusive(() => {
8976
- if (!existsSync6(path)) return [];
9238
+ if (!existsSync7(path)) return [];
8977
9239
  return readJsonl(path);
8978
9240
  });
8979
9241
  }
@@ -9316,7 +9578,7 @@ function throwIfAborted(signal) {
9316
9578
  throw new Error(signal.reason ? String(signal.reason) : "reference replay aborted");
9317
9579
  }
9318
9580
  function readJsonl(path) {
9319
- const raw = readFileSync5(path, "utf8");
9581
+ const raw = readFileSync6(path, "utf8");
9320
9582
  const out = [];
9321
9583
  for (const line of raw.split("\n")) {
9322
9584
  const trimmed = line.trim();
@@ -9473,7 +9735,7 @@ function createDefaultReviewer(options) {
9473
9735
 
9474
9736
  // src/discover-personas.ts
9475
9737
  import { promises as fs } from "fs";
9476
- import { basename, extname, join as join3 } from "path";
9738
+ import { basename, extname, join as join4 } from "path";
9477
9739
  var DEFAULT_PATTERN = /^\d{2}-.+\.(yaml|yml|json|md)$/;
9478
9740
  async function discoverPersonas(dir, opts = {}) {
9479
9741
  const pattern = opts.pattern ?? DEFAULT_PATTERN;
@@ -9491,7 +9753,7 @@ async function discoverPersonas(dir, opts = {}) {
9491
9753
  }
9492
9754
  const out = [];
9493
9755
  for (const entry of entries) {
9494
- const full = join3(d, entry.name);
9756
+ const full = join4(d, entry.name);
9495
9757
  if (entry.isDir) {
9496
9758
  if (opts.recursive) out.push(...await walk(full));
9497
9759
  continue;
@@ -10335,9 +10597,11 @@ export {
10335
10597
  RunIntegrityError,
10336
10598
  RunRecordValidationError,
10337
10599
  SEMANTIC_CONCEPT_JUDGE_VERSION,
10600
+ SKILL_USAGE_ANALYST,
10338
10601
  SandboxHarness,
10339
10602
  ScenarioRegistry,
10340
10603
  SingleBackendError,
10604
+ SkillUsageAnalyst,
10341
10605
  SpanNotFoundError,
10342
10606
  SubprocessSandboxDriver,
10343
10607
  TRACE_ANALYST_ACTOR_DESCRIPTION,
@@ -10388,6 +10652,7 @@ export {
10388
10652
  buildReflectionPrompt,
10389
10653
  buildReviewerPrompt,
10390
10654
  buildSandboxAgentProfileCell,
10655
+ buildSkillUsageReport,
10391
10656
  buildTraceAnalystTools,
10392
10657
  buildTraceInsightContext,
10393
10658
  buildTraceInsightPrompt,
@@ -10468,6 +10733,7 @@ export {
10468
10733
  distillPlaybook,
10469
10734
  domainEvidencePattern,
10470
10735
  dominates,
10736
+ emitSkillUsageFindings,
10471
10737
  estimateCost,
10472
10738
  estimateTokens,
10473
10739
  euAiActReport,