@tangle-network/agent-eval 0.23.1 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +80 -0
  2. package/README.md +141 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  20. package/dist/chunk-6QDKWHLS.js.map +1 -0
  21. package/dist/chunk-I4MBDTY5.js +272 -0
  22. package/dist/chunk-I4MBDTY5.js.map +1 -0
  23. package/dist/chunk-K2TPS5LB.js +569 -0
  24. package/dist/chunk-K2TPS5LB.js.map +1 -0
  25. package/dist/chunk-KKHDIONI.js +414 -0
  26. package/dist/chunk-KKHDIONI.js.map +1 -0
  27. package/dist/chunk-KMPRBJK4.js +74 -0
  28. package/dist/chunk-KMPRBJK4.js.map +1 -0
  29. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  30. package/dist/chunk-KTGTIOFD.js.map +1 -0
  31. package/dist/chunk-LSH4MMOZ.js +838 -0
  32. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  33. package/dist/chunk-NG236HPC.js +57 -0
  34. package/dist/chunk-NG236HPC.js.map +1 -0
  35. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  36. package/dist/chunk-NLMNWKVM.js.map +1 -0
  37. package/dist/chunk-NU65VQ7M.js +99 -0
  38. package/dist/chunk-NU65VQ7M.js.map +1 -0
  39. package/dist/chunk-OHEPNJQN.js +554 -0
  40. package/dist/chunk-OHEPNJQN.js.map +1 -0
  41. package/dist/chunk-OWLAAMME.js +250 -0
  42. package/dist/chunk-OWLAAMME.js.map +1 -0
  43. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  44. package/dist/chunk-PC4UYEBM.js.map +1 -0
  45. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  46. package/dist/chunk-RAF443UI.js.map +1 -0
  47. package/dist/chunk-RZTMDUO7.js +49 -0
  48. package/dist/chunk-RZTMDUO7.js.map +1 -0
  49. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  50. package/dist/chunk-SESZDQPX.js.map +1 -0
  51. package/dist/{chunk-6KQG5HAH.js → chunk-SY6WAAAD.js} +84 -71
  52. package/dist/chunk-SY6WAAAD.js.map +1 -0
  53. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  54. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  55. package/dist/{chunk-VQQSPGSM.js → chunk-VRJVTXRV.js} +169 -111
  56. package/dist/chunk-VRJVTXRV.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +1866 -3151
  80. package/dist/index.js +5457 -7809
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +1 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +409 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-TDPn1cxq.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-CUOiGcGv.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-BXGs_9V0.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +22 -22
  125. package/dist/wire/index.js +4 -3
  126. package/package.json +44 -18
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-5IIQKMD5.js.map +0 -1
  129. package/dist/chunk-6KQG5HAH.js.map +0 -1
  130. package/dist/chunk-6M774GY6.js.map +0 -1
  131. package/dist/chunk-7EAUOUQS.js.map +0 -1
  132. package/dist/chunk-AXHNWLIX.js.map +0 -1
  133. package/dist/chunk-EXGR4XEM.js.map +0 -1
  134. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  135. package/dist/chunk-KAO3Q65R.js.map +0 -1
  136. package/dist/chunk-LZKIOBG2.js +0 -2026
  137. package/dist/chunk-LZKIOBG2.js.map +0 -1
  138. package/dist/chunk-QBW3YBTR.js.map +0 -1
  139. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  140. package/dist/chunk-SQQLHODJ.js.map +0 -1
  141. package/dist/chunk-V5QSWN7L.js +0 -1310
  142. package/dist/chunk-V5QSWN7L.js.map +0 -1
  143. package/dist/chunk-VQQSPGSM.js.map +0 -1
  144. package/dist/chunk-XPHOZPOM.js +0 -1947
  145. package/dist/chunk-XPHOZPOM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -0,0 +1,414 @@
1
+ // src/governance/types.ts
2
+ function renderMarkdown(report) {
3
+ const sevEmoji = {
4
+ info: "\u2139\uFE0E",
5
+ low: "\xB7",
6
+ medium: "!",
7
+ high: "!!",
8
+ critical: "\u203C"
9
+ };
10
+ const lines = [];
11
+ lines.push(`# ${report.framework} report \u2014 ${report.context.systemName}`);
12
+ lines.push("");
13
+ lines.push(`- Organization: **${report.context.organization}**`);
14
+ lines.push(`- Period: ${report.context.periodStart} \u2192 ${report.context.periodEnd}`);
15
+ lines.push(
16
+ `- Owner: ${report.context.owner.role} ${report.context.owner.name} <${report.context.owner.email}>`
17
+ );
18
+ lines.push(`- Generated: ${report.generatedAt}`);
19
+ lines.push("");
20
+ lines.push(`## Summary \u2014 ${report.summary.overall}`);
21
+ lines.push("");
22
+ lines.push(`${report.summary.findings} finding(s).`);
23
+ for (const [sev, n] of Object.entries(report.summary.byeverity)) {
24
+ if (n > 0) lines.push(`- ${sevEmoji[sev]} ${sev}: ${n}`);
25
+ }
26
+ lines.push("");
27
+ lines.push("## Findings");
28
+ lines.push("");
29
+ for (const f of report.findings) {
30
+ lines.push(`### ${sevEmoji[f.severity]} ${f.id} \u2014 ${f.control}`);
31
+ lines.push("");
32
+ lines.push(f.summary);
33
+ if (f.evidence) {
34
+ lines.push("");
35
+ lines.push(`**Evidence:** ${f.evidence}`);
36
+ }
37
+ if (f.remediation) {
38
+ lines.push("");
39
+ lines.push(`**Remediation:** ${f.remediation}`);
40
+ }
41
+ lines.push("");
42
+ }
43
+ return lines.join("\n");
44
+ }
45
+ function summarize(findings) {
46
+ const byeverity = {
47
+ info: 0,
48
+ low: 0,
49
+ medium: 0,
50
+ high: 0,
51
+ critical: 0
52
+ };
53
+ for (const f of findings) byeverity[f.severity]++;
54
+ const overall = byeverity.critical + byeverity.high > 0 ? "non-compliant" : byeverity.medium + byeverity.low > 0 ? "compliant-with-findings" : "compliant";
55
+ return { findings: findings.length, byeverity, overall };
56
+ }
57
+
58
+ // src/governance/eu-ai-act.ts
59
+ function classifyEuAiRisk(signals) {
60
+ if (signals.biometricPublic || signals.socialScoring || signals.subliminal) return "unacceptable";
61
+ if (signals.annexIII) return "high";
62
+ if (signals.chatbot || signals.generatesSyntheticMedia) return "limited";
63
+ return "minimal";
64
+ }
65
+ async function euAiActReport(ctx, signals) {
66
+ const riskClass = classifyEuAiRisk(signals);
67
+ const findings = [];
68
+ if (riskClass === "unacceptable") {
69
+ findings.push({
70
+ id: "EU-ART-5",
71
+ severity: "critical",
72
+ control: "EU-AI-ACT:Article-5",
73
+ summary: "Use case matches a prohibited practice under Article 5.",
74
+ remediation: "Discontinue or substantially redesign the use case."
75
+ });
76
+ }
77
+ if (riskClass === "high") {
78
+ if (!ctx.redTeam) {
79
+ findings.push({
80
+ id: "EU-ART-9",
81
+ severity: "high",
82
+ control: "EU-AI-ACT:Article-9",
83
+ summary: "High-risk system lacks documented adversarial-testing evidence (Art. 9 risk mgmt).",
84
+ remediation: "Run redTeamDataset() + attach the report."
85
+ });
86
+ }
87
+ if (ctx.datasets.length === 0) {
88
+ findings.push({
89
+ id: "EU-ART-10",
90
+ severity: "high",
91
+ control: "EU-AI-ACT:Article-10",
92
+ summary: "No training/eval datasets recorded with provenance (Art. 10)."
93
+ });
94
+ }
95
+ const runs = await ctx.traceStore.listRuns({
96
+ since: Date.parse(ctx.periodStart),
97
+ until: Date.parse(ctx.periodEnd)
98
+ });
99
+ if (runs.length === 0) {
100
+ findings.push({
101
+ id: "EU-ART-11",
102
+ severity: "high",
103
+ control: "EU-AI-ACT:Article-11",
104
+ summary: "No eval runs recorded (Art. 11 technical documentation)."
105
+ });
106
+ }
107
+ if (!signals.chatbot && !signals.generatesSyntheticMedia) {
108
+ } else {
109
+ findings.push({
110
+ id: "EU-ART-13",
111
+ severity: "info",
112
+ control: "EU-AI-ACT:Article-13",
113
+ summary: "Chatbot/synthetic-media transparency obligations apply; verify user-facing disclosures."
114
+ });
115
+ }
116
+ if (!ctx.owner?.email) {
117
+ findings.push({
118
+ id: "EU-ART-14",
119
+ severity: "high",
120
+ control: "EU-AI-ACT:Article-14",
121
+ summary: "No designated human overseer (Art. 14).",
122
+ remediation: "Populate GovernanceContext.owner with the responsible individual."
123
+ });
124
+ }
125
+ if (!ctx.outcomeStore) {
126
+ findings.push({
127
+ id: "EU-ART-15",
128
+ severity: "medium",
129
+ control: "EU-AI-ACT:Article-15",
130
+ summary: "No post-deployment outcome measurement; accuracy + robustness are un-attested.",
131
+ remediation: "Attach an OutcomeStore + run correlationStudy() over the reporting period."
132
+ });
133
+ }
134
+ }
135
+ if (riskClass === "limited") {
136
+ findings.push({
137
+ id: "EU-ART-52",
138
+ severity: "info",
139
+ control: "EU-AI-ACT:Article-52",
140
+ summary: "Transparency obligations apply: disclose AI nature + synthetic content labeling.",
141
+ remediation: "Ensure user-facing surfaces label AI-generated content."
142
+ });
143
+ }
144
+ const payload = {
145
+ riskClass,
146
+ signals,
147
+ articlesReviewed: riskClass === "high" ? ["5", "9", "10", "11", "13", "14", "15"] : riskClass === "limited" ? ["52"] : ["none"]
148
+ };
149
+ return {
150
+ framework: "EU-AI-ACT",
151
+ version: "Regulation-2024-1689",
152
+ context: {
153
+ organization: ctx.organization,
154
+ systemName: ctx.systemName,
155
+ periodStart: ctx.periodStart,
156
+ periodEnd: ctx.periodEnd,
157
+ owner: ctx.owner
158
+ },
159
+ summary: summarize(findings),
160
+ findings,
161
+ payload,
162
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString()
163
+ };
164
+ }
165
+
166
+ // src/governance/nist-ai-rmf.ts
167
+ async function nistAiRmfReport(ctx) {
168
+ const findings = [];
169
+ if (!ctx.owner?.email) {
170
+ findings.push({
171
+ id: "G-1.1",
172
+ severity: "high",
173
+ control: "NIST-AI-RMF:GOVERN-1.1",
174
+ summary: "No responsible owner recorded for the AI system.",
175
+ remediation: "Assign an accountable individual + email in GovernanceContext.owner."
176
+ });
177
+ }
178
+ if (ctx.datasets.length === 0) {
179
+ findings.push({
180
+ id: "G-1.3",
181
+ severity: "high",
182
+ control: "NIST-AI-RMF:GOVERN-1.3",
183
+ summary: "No versioned datasets recorded for the evaluation period.",
184
+ remediation: "Register each dataset with a Dataset manifest (content hash + provenance)."
185
+ });
186
+ } else {
187
+ for (const manifest of ctx.datasets) {
188
+ if (!manifest.contentHash || manifest.contentHash.length < 16) {
189
+ findings.push({
190
+ id: "G-1.3-hash",
191
+ severity: "medium",
192
+ control: "NIST-AI-RMF:GOVERN-1.3",
193
+ summary: `Dataset "${manifest.name}" has weak or missing content hash.`,
194
+ evidence: `contentHash="${manifest.contentHash}"`,
195
+ remediation: "Call dataset.manifest() to compute SHA-256; commit the manifest alongside releases."
196
+ });
197
+ }
198
+ }
199
+ }
200
+ if (!ctx.redTeam) {
201
+ findings.push({
202
+ id: "M-2.6",
203
+ severity: "high",
204
+ control: "NIST-AI-RMF:MEASURE-2.6",
205
+ summary: "No red-team evaluation attached to the report period.",
206
+ remediation: "Run redTeamDataset() against the system and attach the RedTeamReport to context.redTeam."
207
+ });
208
+ } else if (ctx.redTeam.overallPassRate < 0.8) {
209
+ findings.push({
210
+ id: "M-2.6-rate",
211
+ severity: "high",
212
+ control: "NIST-AI-RMF:MEASURE-2.6",
213
+ summary: `Red-team pass rate ${(ctx.redTeam.overallPassRate * 100).toFixed(1)}% below 80% threshold.`,
214
+ evidence: JSON.stringify(ctx.redTeam.passRateByCategory),
215
+ remediation: "Harden the failing categories; rerun the battery."
216
+ });
217
+ }
218
+ const runs = await ctx.traceStore.listRuns({
219
+ since: Date.parse(ctx.periodStart),
220
+ until: Date.parse(ctx.periodEnd)
221
+ });
222
+ if (runs.length === 0) {
223
+ findings.push({
224
+ id: "M-2.1",
225
+ severity: "critical",
226
+ control: "NIST-AI-RMF:MEASURE-2.1",
227
+ summary: "No eval runs recorded for the reporting period.",
228
+ remediation: "Emit traces for every deployment-relevant evaluation."
229
+ });
230
+ }
231
+ if (!ctx.judgeCalibration || ctx.judgeCalibration.length === 0) {
232
+ findings.push({
233
+ id: "M-2.11",
234
+ severity: "medium",
235
+ control: "NIST-AI-RMF:MEASURE-2.11",
236
+ summary: "No judge-vs-human calibration recorded.",
237
+ remediation: "Build a human golden set; run calibrateJudge() before trusting LLM judge scores."
238
+ });
239
+ } else {
240
+ const weak = ctx.judgeCalibration.filter((c) => Number.isFinite(c.pearson) && c.pearson < 0.6);
241
+ if (weak.length > 0) {
242
+ findings.push({
243
+ id: "M-2.11-weak",
244
+ severity: "medium",
245
+ control: "NIST-AI-RMF:MEASURE-2.11",
246
+ summary: `${weak.length} judge(s) show weak agreement with humans (Pearson < 0.6).`,
247
+ remediation: "Retrain or replace the underperforming judges."
248
+ });
249
+ }
250
+ }
251
+ if (!ctx.outcomeStore) {
252
+ findings.push({
253
+ id: "MN-1.1",
254
+ severity: "medium",
255
+ control: "NIST-AI-RMF:MANAGE-1.1",
256
+ summary: "No deployment outcomes captured \u2014 meta-eval correlation cannot be computed.",
257
+ remediation: "Attach an OutcomeStore and ingest production outcome metrics."
258
+ });
259
+ } else {
260
+ const outcomes = await ctx.outcomeStore.list({
261
+ since: Date.parse(ctx.periodStart),
262
+ until: Date.parse(ctx.periodEnd)
263
+ });
264
+ if (outcomes.length === 0) {
265
+ findings.push({
266
+ id: "MN-1.1-empty",
267
+ severity: "medium",
268
+ control: "NIST-AI-RMF:MANAGE-1.1",
269
+ summary: "OutcomeStore present but no outcomes captured for the period."
270
+ });
271
+ }
272
+ }
273
+ const hashChecks = [];
274
+ for (const manifest of ctx.datasets) {
275
+ hashChecks.push({ name: manifest.name, ok: /^[0-9a-f]{64}$/.test(manifest.contentHash) });
276
+ }
277
+ const payload = {
278
+ controlsEvaluated: [
279
+ "GOVERN-1.1",
280
+ "GOVERN-1.3",
281
+ "MEASURE-2.1",
282
+ "MEASURE-2.6",
283
+ "MEASURE-2.11",
284
+ "MANAGE-1.1"
285
+ ],
286
+ runCount: runs.length,
287
+ redTeamPassRate: ctx.redTeam?.overallPassRate ?? null,
288
+ datasetHashChecks: hashChecks
289
+ };
290
+ return {
291
+ framework: "NIST-AI-RMF",
292
+ version: "1.0.0",
293
+ context: {
294
+ organization: ctx.organization,
295
+ systemName: ctx.systemName,
296
+ periodStart: ctx.periodStart,
297
+ periodEnd: ctx.periodEnd,
298
+ owner: ctx.owner
299
+ },
300
+ summary: summarize(findings),
301
+ findings,
302
+ payload,
303
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString()
304
+ };
305
+ }
306
+
307
+ // src/governance/soc2.ts
308
+ async function soc2Report(ctx) {
309
+ const findings = [];
310
+ const start = Date.parse(ctx.periodStart);
311
+ const end = Date.parse(ctx.periodEnd);
312
+ const runs = await ctx.traceStore.listRuns({ since: start, until: end });
313
+ const failureRate = runs.length > 0 ? runs.filter((r) => r.outcome?.pass === false).length / runs.length : null;
314
+ if (failureRate !== null && failureRate > 0.2) {
315
+ findings.push({
316
+ id: "CC7.1-fail-rate",
317
+ severity: "medium",
318
+ control: "SOC2:CC7.1",
319
+ summary: `System failure rate ${(failureRate * 100).toFixed(1)}% over the period exceeds 20%.`,
320
+ remediation: "Investigate failure clusters (failureClusterView) + prioritize remediation."
321
+ });
322
+ }
323
+ if (runs.length === 0) {
324
+ findings.push({
325
+ id: "CC7.1-coverage",
326
+ severity: "high",
327
+ control: "SOC2:CC7.1",
328
+ summary: "No telemetry runs recorded for the period \u2014 monitoring regime is incomplete."
329
+ });
330
+ }
331
+ const aborted = runs.filter((r) => r.status === "aborted");
332
+ if (aborted.length > runs.length * 0.05 && aborted.length >= 3) {
333
+ findings.push({
334
+ id: "CC7.2-abort",
335
+ severity: "medium",
336
+ control: "SOC2:CC7.2",
337
+ summary: `${aborted.length} run(s) aborted \u2014 investigate pattern.`,
338
+ remediation: "Use the bisector + failureClusterView to localize the trigger."
339
+ });
340
+ }
341
+ const incidentEvents = await ctx.traceStore.events({
342
+ kind: "policy_violation",
343
+ since: start,
344
+ until: end
345
+ });
346
+ const errorEvents = await ctx.traceStore.events({ kind: "error", since: start, until: end });
347
+ const totalIncidents = incidentEvents.length + errorEvents.length;
348
+ if (totalIncidents > 0) {
349
+ findings.push({
350
+ id: "CC7.3-resolution",
351
+ severity: "low",
352
+ control: "SOC2:CC7.3",
353
+ summary: `${totalIncidents} incident-class event(s) recorded; resolution tracking is informal.`,
354
+ remediation: 'Emit a resolution event (kind="log" with payload.resolves=<eventId>) per remediated incident.'
355
+ });
356
+ }
357
+ const modelFingerprints = new Set(runs.map((r) => r.modelFingerprint).filter(Boolean));
358
+ const promptHashes = new Set(runs.map((r) => r.promptSha).filter(Boolean));
359
+ const codeSha = new Set(runs.map((r) => r.codeSha).filter(Boolean));
360
+ if (codeSha.size === 0) {
361
+ findings.push({
362
+ id: "CC7.4-code",
363
+ severity: "high",
364
+ control: "SOC2:CC7.4",
365
+ summary: "No codeSha recorded on runs \u2014 cannot attribute scores to a specific release.",
366
+ remediation: "Populate Run.codeSha with the git SHA of the system at run time."
367
+ });
368
+ }
369
+ if (promptHashes.size === 0) {
370
+ findings.push({
371
+ id: "CC7.4-prompt",
372
+ severity: "medium",
373
+ control: "SOC2:CC7.4",
374
+ summary: "No promptSha recorded \u2014 prompt changes are untracked."
375
+ });
376
+ }
377
+ const payload = {
378
+ controls: ["CC7.1", "CC7.2", "CC7.3", "CC7.4"],
379
+ runCount: runs.length,
380
+ failureRate,
381
+ abortedCount: aborted.length,
382
+ incidentEventCount: totalIncidents,
383
+ distinctReleases: {
384
+ codeShas: codeSha.size,
385
+ promptHashes: promptHashes.size,
386
+ modelFingerprints: modelFingerprints.size
387
+ }
388
+ };
389
+ return {
390
+ framework: "SOC2",
391
+ version: "2017-Common-Criteria",
392
+ context: {
393
+ organization: ctx.organization,
394
+ systemName: ctx.systemName,
395
+ periodStart: ctx.periodStart,
396
+ periodEnd: ctx.periodEnd,
397
+ owner: ctx.owner
398
+ },
399
+ summary: summarize(findings),
400
+ findings,
401
+ payload,
402
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString()
403
+ };
404
+ }
405
+
406
+ export {
407
+ renderMarkdown,
408
+ summarize,
409
+ classifyEuAiRisk,
410
+ euAiActReport,
411
+ nistAiRmfReport,
412
+ soc2Report
413
+ };
414
+ //# sourceMappingURL=chunk-KKHDIONI.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/governance/types.ts","../src/governance/eu-ai-act.ts","../src/governance/nist-ai-rmf.ts","../src/governance/soc2.ts"],"sourcesContent":["/**\n * Governance reporting — shared types.\n *\n * The framework collects a `GovernanceContext` (traces + outcomes +\n * dataset manifests + red-team results + judge calibration) and each\n * specific template (NIST AI RMF, SOC2, EU AI Act) renders a\n * structured report from it.\n *\n * Reports are machine-readable JSON first; human-readable Markdown is a\n * pure transform on top. External auditors consume the Markdown; CI\n * consumes the JSON.\n */\n\nimport type { DatasetManifest } from '../dataset'\nimport type { CalibrationResult } from '../judge-calibration'\nimport type { OutcomeStore } from '../meta-eval/outcome-store'\nimport type { RedTeamReport } from '../red-team'\nimport type { TraceStore } from '../trace/store'\n\nexport interface GovernanceContext {\n /** Legal / org identity for the report. */\n organization: string\n /** System / agent identifier. */\n systemName: string\n /** ISO8601 period the report covers. */\n periodStart: string\n periodEnd: string\n /** Versioned dataset manifests used during the period. */\n datasets: DatasetManifest[]\n traceStore: TraceStore\n outcomeStore?: OutcomeStore\n /** Cached red-team results for the period, if available. */\n redTeam?: RedTeamReport\n /** Judge-vs-human calibration results, if measured. */\n judgeCalibration?: CalibrationResult[]\n /** Responsible owner for the system — role + name + email. */\n owner: { role: string; name: string; email: string }\n}\n\nexport interface GovernanceFinding {\n id: string\n severity: 'info' | 'low' | 'medium' | 'high' | 'critical'\n /** Control reference the finding maps to (e.g. \"NIST-AI-RMF:MEASURE-2.1\"). */\n control: string\n summary: string\n evidence?: string\n remediation?: string\n}\n\nexport interface GovernanceReport {\n framework: 'NIST-AI-RMF' | 'SOC2' | 'EU-AI-ACT'\n version: string\n context: Pick<\n GovernanceContext,\n 'organization' | 'systemName' | 'periodStart' | 'periodEnd' | 'owner'\n >\n summary: {\n findings: number\n byeverity: Record<GovernanceFinding['severity'], number>\n overall: 'compliant' | 'compliant-with-findings' | 'non-compliant'\n }\n findings: GovernanceFinding[]\n /** Framework-specific structured payload (mapped controls, risk class, etc.). */\n payload: Record<string, unknown>\n generatedAt: string\n}\n\nexport function renderMarkdown(report: GovernanceReport): string {\n const sevEmoji: Record<GovernanceFinding['severity'], string> = {\n info: 'ℹ︎',\n low: '·',\n medium: '!',\n high: '!!',\n critical: '‼',\n }\n const lines: string[] = []\n lines.push(`# ${report.framework} report — ${report.context.systemName}`)\n lines.push('')\n lines.push(`- Organization: **${report.context.organization}**`)\n lines.push(`- Period: ${report.context.periodStart} → ${report.context.periodEnd}`)\n lines.push(\n `- Owner: ${report.context.owner.role} ${report.context.owner.name} <${report.context.owner.email}>`,\n )\n lines.push(`- Generated: ${report.generatedAt}`)\n lines.push('')\n lines.push(`## Summary — ${report.summary.overall}`)\n lines.push('')\n lines.push(`${report.summary.findings} finding(s).`)\n for (const [sev, n] of Object.entries(report.summary.byeverity) as Array<\n [GovernanceFinding['severity'], number]\n >) {\n if (n > 0) lines.push(`- ${sevEmoji[sev]} ${sev}: ${n}`)\n }\n lines.push('')\n lines.push('## Findings')\n lines.push('')\n for (const f of report.findings) {\n lines.push(`### ${sevEmoji[f.severity]} ${f.id} — ${f.control}`)\n lines.push('')\n lines.push(f.summary)\n if (f.evidence) {\n lines.push('')\n lines.push(`**Evidence:** ${f.evidence}`)\n }\n if (f.remediation) {\n lines.push('')\n lines.push(`**Remediation:** ${f.remediation}`)\n }\n lines.push('')\n }\n return lines.join('\\n')\n}\n\nexport function summarize(findings: GovernanceFinding[]): GovernanceReport['summary'] {\n const byeverity: GovernanceReport['summary']['byeverity'] = {\n info: 0,\n low: 0,\n medium: 0,\n high: 0,\n critical: 0,\n }\n for (const f of findings) byeverity[f.severity]++\n const overall: GovernanceReport['summary']['overall'] =\n byeverity.critical + byeverity.high > 0\n ? 'non-compliant'\n : byeverity.medium + byeverity.low > 0\n ? 'compliant-with-findings'\n : 'compliant'\n return { findings: findings.length, byeverity, overall }\n}\n","/**\n * EU AI Act — risk-class classification + compliance checklist.\n *\n * Classification is declarative: caller supplies the domain/use-case\n * signals (biometric? critical infrastructure? education? employment?\n * access to services?) and we map to the Act's risk tiers:\n * - \"unacceptable\" (prohibited)\n * - \"high\" (Annex III — strict obligations)\n * - \"limited\" (transparency obligations)\n * - \"minimal\" (voluntary codes of conduct)\n *\n * Then the compliance checklist enumerates Article 9 (risk mgmt),\n * 10 (data + data governance), 11 (technical documentation), 13\n * (transparency), 14 (human oversight), 15 (accuracy + robustness)\n * requirements and flags gaps.\n */\n\nimport type { GovernanceContext, GovernanceFinding, GovernanceReport } from './types'\nimport { summarize } from './types'\n\nexport type EuRiskClass = 'unacceptable' | 'high' | 'limited' | 'minimal'\n\nexport interface UseCaseSignals {\n /** Used for biometric identification in public spaces? (Art. 5 — unacceptable). */\n biometricPublic?: boolean\n /** Social scoring by public authorities? (Art. 5). */\n socialScoring?: boolean\n /** Subliminal manipulation? (Art. 5). */\n subliminal?: boolean\n /** Annex III sector: critical infrastructure / education / employment /\n * access to essential services / law enforcement / migration /\n * administration of justice / democratic processes? */\n annexIII?: boolean\n /** Interacts directly with natural persons (chatbot, agent)? — limited risk. */\n chatbot?: boolean\n /** Generates synthetic media (image/audio/video/text deepfakes)? — limited risk. */\n generatesSyntheticMedia?: boolean\n}\n\nexport function classifyEuAiRisk(signals: UseCaseSignals): EuRiskClass {\n if (signals.biometricPublic || signals.socialScoring || signals.subliminal) return 'unacceptable'\n if (signals.annexIII) return 'high'\n if (signals.chatbot || signals.generatesSyntheticMedia) return 'limited'\n return 'minimal'\n}\n\nexport async function euAiActReport(\n ctx: GovernanceContext,\n signals: UseCaseSignals,\n): Promise<GovernanceReport> {\n const riskClass = classifyEuAiRisk(signals)\n const findings: GovernanceFinding[] = []\n\n if (riskClass === 'unacceptable') {\n findings.push({\n id: 'EU-ART-5',\n severity: 'critical',\n control: 'EU-AI-ACT:Article-5',\n summary: 'Use case matches a prohibited practice under Article 5.',\n remediation: 'Discontinue or substantially redesign the use case.',\n })\n }\n\n if (riskClass === 'high') {\n // Article 9 — risk management\n if (!ctx.redTeam) {\n findings.push({\n id: 'EU-ART-9',\n severity: 'high',\n control: 'EU-AI-ACT:Article-9',\n summary:\n 'High-risk system lacks documented adversarial-testing evidence (Art. 9 risk mgmt).',\n remediation: 'Run redTeamDataset() + attach the report.',\n })\n }\n // Article 10 — data + data governance\n if (ctx.datasets.length === 0) {\n findings.push({\n id: 'EU-ART-10',\n severity: 'high',\n control: 'EU-AI-ACT:Article-10',\n summary: 'No training/eval datasets recorded with provenance (Art. 10).',\n })\n }\n // Article 11 — technical documentation (traces + runs)\n const runs = await ctx.traceStore.listRuns({\n since: Date.parse(ctx.periodStart),\n until: Date.parse(ctx.periodEnd),\n })\n if (runs.length === 0) {\n findings.push({\n id: 'EU-ART-11',\n severity: 'high',\n control: 'EU-AI-ACT:Article-11',\n summary: 'No eval runs recorded (Art. 11 technical documentation).',\n })\n }\n // Article 13 — transparency to users\n if (!signals.chatbot && !signals.generatesSyntheticMedia) {\n // High-risk but not a chatbot — transparency may still apply; flag informational\n } else {\n findings.push({\n id: 'EU-ART-13',\n severity: 'info',\n control: 'EU-AI-ACT:Article-13',\n summary:\n 'Chatbot/synthetic-media transparency obligations apply; verify user-facing disclosures.',\n })\n }\n // Article 14 — human oversight\n if (!ctx.owner?.email) {\n findings.push({\n id: 'EU-ART-14',\n severity: 'high',\n control: 'EU-AI-ACT:Article-14',\n summary: 'No designated human overseer (Art. 14).',\n remediation: 'Populate GovernanceContext.owner with the responsible individual.',\n })\n }\n // Article 15 — accuracy + robustness\n if (!ctx.outcomeStore) {\n findings.push({\n id: 'EU-ART-15',\n severity: 'medium',\n control: 'EU-AI-ACT:Article-15',\n summary: 'No post-deployment outcome measurement; accuracy + robustness are un-attested.',\n remediation: 'Attach an OutcomeStore + run correlationStudy() over the reporting period.',\n })\n }\n }\n\n if (riskClass === 'limited') {\n findings.push({\n id: 'EU-ART-52',\n severity: 'info',\n control: 'EU-AI-ACT:Article-52',\n summary: 'Transparency obligations apply: disclose AI nature + synthetic content labeling.',\n remediation: 'Ensure user-facing surfaces label AI-generated content.',\n })\n }\n\n const payload = {\n riskClass,\n signals,\n articlesReviewed:\n riskClass === 'high'\n ? ['5', '9', '10', '11', '13', '14', '15']\n : riskClass === 'limited'\n ? ['52']\n : ['none'],\n }\n\n return {\n framework: 'EU-AI-ACT',\n version: 'Regulation-2024-1689',\n context: {\n organization: ctx.organization,\n systemName: ctx.systemName,\n periodStart: ctx.periodStart,\n periodEnd: ctx.periodEnd,\n owner: ctx.owner,\n },\n summary: summarize(findings),\n findings,\n payload,\n generatedAt: new Date().toISOString(),\n }\n}\n","/**\n * NIST AI RMF 1.0 — Govern / Map / Measure / Manage mapping.\n *\n * Each subcategory derives its status from concrete framework state:\n * MEASURE 2.x: do we have a calibration regime? contamination controls?\n * MEASURE 2.7: are red-team results available?\n * MANAGE 1.x: are outcome metrics captured? correlation measured?\n * GOVERN 1.x: dataset + prompt provenance recorded?\n *\n * We ship the mapping and the derivation rules; consumers supply the\n * GovernanceContext.\n */\n\nimport type { GovernanceContext, GovernanceFinding, GovernanceReport } from './types'\nimport { summarize } from './types'\n\nexport async function nistAiRmfReport(ctx: GovernanceContext): Promise<GovernanceReport> {\n const findings: GovernanceFinding[] = []\n\n // GOVERN 1.1 — \"Accountable individual identified\"\n if (!ctx.owner?.email) {\n findings.push({\n id: 'G-1.1',\n severity: 'high',\n control: 'NIST-AI-RMF:GOVERN-1.1',\n summary: 'No responsible owner recorded for the AI system.',\n remediation: 'Assign an accountable individual + email in GovernanceContext.owner.',\n })\n }\n\n // GOVERN 1.3 — \"Inventory + lifecycle tracking\"\n if (ctx.datasets.length === 0) {\n findings.push({\n id: 'G-1.3',\n severity: 'high',\n control: 'NIST-AI-RMF:GOVERN-1.3',\n summary: 'No versioned datasets recorded for the evaluation period.',\n remediation: 'Register each dataset with a Dataset manifest (content hash + provenance).',\n })\n } else {\n // Validate content hashes are stable\n for (const manifest of ctx.datasets) {\n if (!manifest.contentHash || manifest.contentHash.length < 16) {\n findings.push({\n id: 'G-1.3-hash',\n severity: 'medium',\n control: 'NIST-AI-RMF:GOVERN-1.3',\n summary: `Dataset \"${manifest.name}\" has weak or missing content hash.`,\n evidence: `contentHash=\"${manifest.contentHash}\"`,\n remediation:\n 'Call dataset.manifest() to compute SHA-256; commit the manifest alongside releases.',\n })\n }\n }\n }\n\n // MEASURE 2.6 — \"Safety + adversarial testing\"\n if (!ctx.redTeam) {\n findings.push({\n id: 'M-2.6',\n severity: 'high',\n control: 'NIST-AI-RMF:MEASURE-2.6',\n summary: 'No red-team evaluation attached to the report period.',\n remediation:\n 'Run redTeamDataset() against the system and attach the RedTeamReport to context.redTeam.',\n })\n } else if (ctx.redTeam.overallPassRate < 0.8) {\n findings.push({\n id: 'M-2.6-rate',\n severity: 'high',\n control: 'NIST-AI-RMF:MEASURE-2.6',\n summary: `Red-team pass rate ${(ctx.redTeam.overallPassRate * 100).toFixed(1)}% below 80% threshold.`,\n evidence: JSON.stringify(ctx.redTeam.passRateByCategory),\n remediation: 'Harden the failing categories; rerun the battery.',\n })\n }\n\n // MEASURE 2.1 — \"Test results against defined metrics\"\n const runs = await ctx.traceStore.listRuns({\n since: Date.parse(ctx.periodStart),\n until: Date.parse(ctx.periodEnd),\n })\n if (runs.length === 0) {\n findings.push({\n id: 'M-2.1',\n severity: 'critical',\n control: 'NIST-AI-RMF:MEASURE-2.1',\n summary: 'No eval runs recorded for the reporting period.',\n remediation: 'Emit traces for every deployment-relevant evaluation.',\n })\n }\n\n // MEASURE 2.11 — \"Calibration + validation regime\"\n if (!ctx.judgeCalibration || ctx.judgeCalibration.length === 0) {\n findings.push({\n id: 'M-2.11',\n severity: 'medium',\n control: 'NIST-AI-RMF:MEASURE-2.11',\n summary: 'No judge-vs-human calibration recorded.',\n remediation:\n 'Build a human golden set; run calibrateJudge() before trusting LLM judge scores.',\n })\n } else {\n const weak = ctx.judgeCalibration.filter((c) => Number.isFinite(c.pearson) && c.pearson < 0.6)\n if (weak.length > 0) {\n findings.push({\n id: 'M-2.11-weak',\n severity: 'medium',\n control: 'NIST-AI-RMF:MEASURE-2.11',\n summary: `${weak.length} judge(s) show weak agreement with humans (Pearson < 0.6).`,\n remediation: 'Retrain or replace the underperforming judges.',\n })\n }\n }\n\n // MANAGE 1.1 — \"Outcomes tracked post-deployment\"\n if (!ctx.outcomeStore) {\n findings.push({\n id: 'MN-1.1',\n severity: 'medium',\n control: 'NIST-AI-RMF:MANAGE-1.1',\n summary: 'No deployment outcomes captured — meta-eval correlation cannot be computed.',\n remediation: 'Attach an OutcomeStore and ingest production outcome metrics.',\n })\n } else {\n const outcomes = await ctx.outcomeStore.list({\n since: Date.parse(ctx.periodStart),\n until: Date.parse(ctx.periodEnd),\n })\n if (outcomes.length === 0) {\n findings.push({\n id: 'MN-1.1-empty',\n severity: 'medium',\n control: 'NIST-AI-RMF:MANAGE-1.1',\n summary: 'OutcomeStore present but no outcomes captured for the period.',\n })\n }\n }\n\n // Validate that dataset manifests carry strong SHA-256-shaped content hashes.\n const hashChecks: Array<{ name: string; ok: boolean }> = []\n for (const manifest of ctx.datasets) {\n // We don't persist the scenarios here; the check is that the caller's\n // manifest already carries a hash in the expected hex format.\n hashChecks.push({ name: manifest.name, ok: /^[0-9a-f]{64}$/.test(manifest.contentHash) })\n }\n\n const payload = {\n controlsEvaluated: [\n 'GOVERN-1.1',\n 'GOVERN-1.3',\n 'MEASURE-2.1',\n 'MEASURE-2.6',\n 'MEASURE-2.11',\n 'MANAGE-1.1',\n ],\n runCount: runs.length,\n redTeamPassRate: ctx.redTeam?.overallPassRate ?? null,\n datasetHashChecks: hashChecks,\n }\n\n return {\n framework: 'NIST-AI-RMF',\n version: '1.0.0',\n context: {\n organization: ctx.organization,\n systemName: ctx.systemName,\n periodStart: ctx.periodStart,\n periodEnd: ctx.periodEnd,\n owner: ctx.owner,\n },\n summary: summarize(findings),\n findings,\n payload,\n generatedAt: new Date().toISOString(),\n }\n}\n","/**\n * SOC 2 — Common Criteria 7 (system operations + change management)\n * audit trail derived from the trace corpus.\n *\n * This is NOT a formal SOC2 report — that requires an external\n * auditor. What we ship is the machine-readable *evidence* package\n * that an auditor consumes: run counts, deploy events, access log\n * summary, anomaly tracking, response-time SLOs.\n */\n\nimport type { GovernanceContext, GovernanceFinding, GovernanceReport } from './types'\nimport { summarize } from './types'\n\nexport async function soc2Report(ctx: GovernanceContext): Promise<GovernanceReport> {\n const findings: GovernanceFinding[] = []\n const start = Date.parse(ctx.periodStart)\n const end = Date.parse(ctx.periodEnd)\n const runs = await ctx.traceStore.listRuns({ since: start, until: end })\n\n // CC7.1 — \"Monitoring to detect anomalies\"\n const failureRate =\n runs.length > 0 ? runs.filter((r) => r.outcome?.pass === false).length / runs.length : null\n if (failureRate !== null && failureRate > 0.2) {\n findings.push({\n id: 'CC7.1-fail-rate',\n severity: 'medium',\n control: 'SOC2:CC7.1',\n summary: `System failure rate ${(failureRate * 100).toFixed(1)}% over the period exceeds 20%.`,\n remediation: 'Investigate failure clusters (failureClusterView) + prioritize remediation.',\n })\n }\n if (runs.length === 0) {\n findings.push({\n id: 'CC7.1-coverage',\n severity: 'high',\n control: 'SOC2:CC7.1',\n summary: 'No telemetry runs recorded for the period — monitoring regime is incomplete.',\n })\n }\n\n // CC7.2 — \"Anomaly investigation\"\n const aborted = runs.filter((r) => r.status === 'aborted')\n if (aborted.length > runs.length * 0.05 && aborted.length >= 3) {\n findings.push({\n id: 'CC7.2-abort',\n severity: 'medium',\n control: 'SOC2:CC7.2',\n summary: `${aborted.length} run(s) aborted — investigate pattern.`,\n remediation: 'Use the bisector + failureClusterView to localize the trigger.',\n })\n }\n\n // CC7.3 — \"Response to incidents\" — require an event tag for resolved incidents\n const incidentEvents = await ctx.traceStore.events({\n kind: 'policy_violation',\n since: start,\n until: end,\n })\n const errorEvents = await ctx.traceStore.events({ kind: 'error', since: start, until: end })\n const totalIncidents = incidentEvents.length + errorEvents.length\n if (totalIncidents > 0) {\n // No formal resolution tracking yet — flag medium by default\n findings.push({\n id: 'CC7.3-resolution',\n severity: 'low',\n control: 'SOC2:CC7.3',\n summary: `${totalIncidents} incident-class event(s) recorded; resolution tracking is informal.`,\n remediation:\n 'Emit a resolution event (kind=\"log\" with payload.resolves=<eventId>) per remediated incident.',\n })\n }\n\n // CC7.4 — \"Configuration change tracking\"\n const modelFingerprints = new Set(runs.map((r) => r.modelFingerprint).filter(Boolean) as string[])\n const promptHashes = new Set(runs.map((r) => r.promptSha).filter(Boolean) as string[])\n const codeSha = new Set(runs.map((r) => r.codeSha).filter(Boolean) as string[])\n if (codeSha.size === 0) {\n findings.push({\n id: 'CC7.4-code',\n severity: 'high',\n control: 'SOC2:CC7.4',\n summary: 'No codeSha recorded on runs — cannot attribute scores to a specific release.',\n remediation: 'Populate Run.codeSha with the git SHA of the system at run time.',\n })\n }\n if (promptHashes.size === 0) {\n findings.push({\n id: 'CC7.4-prompt',\n severity: 'medium',\n control: 'SOC2:CC7.4',\n summary: 'No promptSha recorded — prompt changes are untracked.',\n })\n }\n\n const payload = {\n controls: ['CC7.1', 'CC7.2', 'CC7.3', 'CC7.4'],\n runCount: runs.length,\n failureRate,\n abortedCount: aborted.length,\n incidentEventCount: totalIncidents,\n distinctReleases: {\n codeShas: codeSha.size,\n promptHashes: promptHashes.size,\n modelFingerprints: modelFingerprints.size,\n },\n }\n\n return {\n framework: 'SOC2',\n version: '2017-Common-Criteria',\n context: {\n organization: ctx.organization,\n systemName: ctx.systemName,\n periodStart: ctx.periodStart,\n periodEnd: ctx.periodEnd,\n owner: ctx.owner,\n },\n summary: summarize(findings),\n findings,\n payload,\n generatedAt: new Date().toISOString(),\n }\n}\n"],"mappings":";AAmEO,SAAS,eAAe,QAAkC;AAC/D,QAAM,WAA0D;AAAA,IAC9D,MAAM;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,UAAU;AAAA,EACZ;AACA,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,OAAO,SAAS,kBAAa,OAAO,QAAQ,UAAU,EAAE;AACxE,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,OAAO,QAAQ,YAAY,IAAI;AAC/D,QAAM,KAAK,aAAa,OAAO,QAAQ,WAAW,WAAM,OAAO,QAAQ,SAAS,EAAE;AAClF,QAAM;AAAA,IACJ,YAAY,OAAO,QAAQ,MAAM,IAAI,IAAI,OAAO,QAAQ,MAAM,IAAI,KAAK,OAAO,QAAQ,MAAM,KAAK;AAAA,EACnG;AACA,QAAM,KAAK,gBAAgB,OAAO,WAAW,EAAE;AAC/C,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAgB,OAAO,QAAQ,OAAO,EAAE;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,GAAG,OAAO,QAAQ,QAAQ,cAAc;AACnD,aAAW,CAAC,KAAK,CAAC,KAAK,OAAO,QAAQ,OAAO,QAAQ,SAAS,GAE3D;AACD,QAAI,IAAI,EAAG,OAAM,KAAK,KAAK,SAAS,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,EAAE;AAAA,EACzD;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,aAAa;AACxB,QAAM,KAAK,EAAE;AACb,aAAW,KAAK,OAAO,UAAU;AAC/B,UAAM,KAAK,OAAO,SAAS,EAAE,QAAQ,CAAC,IAAI,EAAE,EAAE,WAAM,EAAE,OAAO,EAAE;AAC/D,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,EAAE,OAAO;AACpB,QAAI,EAAE,UAAU;AACd,YAAM,KAAK,EAAE;AACb,YAAM,KAAK,iBAAiB,EAAE,QAAQ,EAAE;AAAA,IAC1C;AACA,QAAI,EAAE,aAAa;AACjB,YAAM,KAAK,EAAE;AACb,YAAM,KAAK,oBAAoB,EAAE,WAAW,EAAE;AAAA,IAChD;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEO,SAAS,UAAU,UAA4D;AACpF,QAAM,YAAsD;AAAA,IAC1D,MAAM;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,UAAU;AAAA,EACZ;AACA,aAAW,KAAK,SAAU,WAAU,EAAE,QAAQ;AAC9C,QAAM,UACJ,UAAU,WAAW,UAAU,OAAO,IAClC,kBACA,UAAU,SAAS,UAAU,MAAM,IACjC,4BACA;AACR,SAAO,EAAE,UAAU,SAAS,QAAQ,WAAW,QAAQ;AACzD;;;AC1FO,SAAS,iBAAiB,SAAsC;AACrE,MAAI,QAAQ,mBAAmB,QAAQ,iBAAiB,QAAQ,WAAY,QAAO;AACnF,MAAI,QAAQ,SAAU,QAAO;AAC7B,MAAI,QAAQ,WAAW,QAAQ,wBAAyB,QAAO;AAC/D,SAAO;AACT;AAEA,eAAsB,cACpB,KACA,SAC2B;AAC3B,QAAM,YAAY,iBAAiB,OAAO;AAC1C,QAAM,WAAgC,CAAC;AAEvC,MAAI,cAAc,gBAAgB;AAChC,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AAEA,MAAI,cAAc,QAAQ;AAExB,QAAI,CAAC,IAAI,SAAS;AAChB,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SACE;AAAA,QACF,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAEA,QAAI,IAAI,SAAS,WAAW,GAAG;AAC7B,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SAAS;AAAA,MACX,CAAC;AAAA,IACH;AAEA,UAAM,OAAO,MAAM,IAAI,WAAW,SAAS;AAAA,MACzC,OAAO,KAAK,MAAM,IAAI,WAAW;AAAA,MACjC,OAAO,KAAK,MAAM,IAAI,SAAS;AAAA,IACjC,CAAC;AACD,QAAI,KAAK,WAAW,GAAG;AACrB,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SAAS;AAAA,MACX,CAAC;AAAA,IACH;AAEA,QAAI,CAAC,QAAQ,WAAW,CAAC,QAAQ,yBAAyB;AAAA,IAE1D,OAAO;AACL,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SACE;AAAA,MACJ,CAAC;AAAA,IACH;AAEA,QAAI,CAAC,IAAI,OAAO,OAAO;AACrB,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SAAS;AAAA,QACT,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAEA,QAAI,CAAC,IAAI,cAAc;AACrB,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SAAS;AAAA,QACT,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAAA,EACF;AAEA,MAAI,cAAc,WAAW;AAC3B,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AAEA,QAAM,UAAU;AAAA,IACd;AAAA,IACA;AAAA,IACA,kBACE,cAAc,SACV,CAAC,KAAK,KAAK,MAAM,MAAM,MAAM,MAAM,IAAI,IACvC,cAAc,YACZ,CAAC,IAAI,IACL,CAAC,MAAM;AAAA,EACjB;AAEA,SAAO;AAAA,IACL,WAAW;AAAA,IACX,SAAS;AAAA,IACT,SAAS;AAAA,MACP,cAAc,IAAI;AAAA,MAClB,YAAY,IAAI;AAAA,MAChB,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI;AAAA,MACf,OAAO,IAAI;AAAA,IACb;AAAA,IACA,SAAS,UAAU,QAAQ;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,EACtC;AACF;;;ACvJA,eAAsB,gBAAgB,KAAmD;AACvF,QAAM,WAAgC,CAAC;AAGvC,MAAI,CAAC,IAAI,OAAO,OAAO;AACrB,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AAGA,MAAI,IAAI,SAAS,WAAW,GAAG;AAC7B,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aAAa;AAAA,IACf,CAAC;AAAA,EACH,OAAO;AAEL,eAAW,YAAY,IAAI,UAAU;AACnC,UAAI,CAAC,SAAS,eAAe,SAAS,YAAY,SAAS,IAAI;AAC7D,iBAAS,KAAK;AAAA,UACZ,IAAI;AAAA,UACJ,UAAU;AAAA,UACV,SAAS;AAAA,UACT,SAAS,YAAY,SAAS,IAAI;AAAA,UAClC,UAAU,gBAAgB,SAAS,WAAW;AAAA,UAC9C,aACE;AAAA,QACJ,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAGA,MAAI,CAAC,IAAI,SAAS;AAChB,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aACE;AAAA,IACJ,CAAC;AAAA,EACH,WAAW,IAAI,QAAQ,kBAAkB,KAAK;AAC5C,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS,uBAAuB,IAAI,QAAQ,kBAAkB,KAAK,QAAQ,CAAC,CAAC;AAAA,MAC7E,UAAU,KAAK,UAAU,IAAI,QAAQ,kBAAkB;AAAA,MACvD,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AAGA,QAAM,OAAO,MAAM,IAAI,WAAW,SAAS;AAAA,IACzC,OAAO,KAAK,MAAM,IAAI,WAAW;AAAA,IACjC,OAAO,KAAK,MAAM,IAAI,SAAS;AAAA,EACjC,CAAC;AACD,MAAI,KAAK,WAAW,GAAG;AACrB,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AAGA,MAAI,CAAC,IAAI,oBAAoB,IAAI,iBAAiB,WAAW,GAAG;AAC9D,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aACE;AAAA,IACJ,CAAC;AAAA,EACH,OAAO;AACL,UAAM,OAAO,IAAI,iBAAiB,OAAO,CAAC,MAAM,OAAO,SAAS,EAAE,OAAO,KAAK,EAAE,UAAU,GAAG;AAC7F,QAAI,KAAK,SAAS,GAAG;AACnB,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SAAS,GAAG,KAAK,MAAM;AAAA,QACvB,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAAA,EACF;AAGA,MAAI,CAAC,IAAI,cAAc;AACrB,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aAAa;AAAA,IACf,CAAC;AAAA,EACH,OAAO;AACL,UAAM,WAAW,MAAM,IAAI,aAAa,KAAK;AAAA,MAC3C,OAAO,KAAK,MAAM,IAAI,WAAW;AAAA,MACjC,OAAO,KAAK,MAAM,IAAI,SAAS;AAAA,IACjC,CAAC;AACD,QAAI,SAAS,WAAW,GAAG;AACzB,eAAS,KAAK;AAAA,QACZ,IAAI;AAAA,QACJ,UAAU;AAAA,QACV,SAAS;AAAA,QACT,SAAS;AAAA,MACX,CAAC;AAAA,IACH;AAAA,EACF;AAGA,QAAM,aAAmD,CAAC;AAC1D,aAAW,YAAY,IAAI,UAAU;AAGnC,eAAW,KAAK,EAAE,MAAM,SAAS,MAAM,IAAI,iBAAiB,KAAK,SAAS,WAAW,EAAE,CAAC;AAAA,EAC1F;AAEA,QAAM,UAAU;AAAA,IACd,mBAAmB;AAAA,MACjB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,UAAU,KAAK;AAAA,IACf,iBAAiB,IAAI,SAAS,mBAAmB;AAAA,IACjD,mBAAmB;AAAA,EACrB;AAEA,SAAO;AAAA,IACL,WAAW;AAAA,IACX,SAAS;AAAA,IACT,SAAS;AAAA,MACP,cAAc,IAAI;AAAA,MAClB,YAAY,IAAI;AAAA,MAChB,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI;AAAA,MACf,OAAO,IAAI;AAAA,IACb;AAAA,IACA,SAAS,UAAU,QAAQ;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,EACtC;AACF;;;ACnKA,eAAsB,WAAW,KAAmD;AAClF,QAAM,WAAgC,CAAC;AACvC,QAAM,QAAQ,KAAK,MAAM,IAAI,WAAW;AACxC,QAAM,MAAM,KAAK,MAAM,IAAI,SAAS;AACpC,QAAM,OAAO,MAAM,IAAI,WAAW,SAAS,EAAE,OAAO,OAAO,OAAO,IAAI,CAAC;AAGvE,QAAM,cACJ,KAAK,SAAS,IAAI,KAAK,OAAO,CAAC,MAAM,EAAE,SAAS,SAAS,KAAK,EAAE,SAAS,KAAK,SAAS;AACzF,MAAI,gBAAgB,QAAQ,cAAc,KAAK;AAC7C,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS,wBAAwB,cAAc,KAAK,QAAQ,CAAC,CAAC;AAAA,MAC9D,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AACA,MAAI,KAAK,WAAW,GAAG;AACrB,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAGA,QAAM,UAAU,KAAK,OAAO,CAAC,MAAM,EAAE,WAAW,SAAS;AACzD,MAAI,QAAQ,SAAS,KAAK,SAAS,QAAQ,QAAQ,UAAU,GAAG;AAC9D,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS,GAAG,QAAQ,MAAM;AAAA,MAC1B,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AAGA,QAAM,iBAAiB,MAAM,IAAI,WAAW,OAAO;AAAA,IACjD,MAAM;AAAA,IACN,OAAO;AAAA,IACP,OAAO;AAAA,EACT,CAAC;AACD,QAAM,cAAc,MAAM,IAAI,WAAW,OAAO,EAAE,MAAM,SAAS,OAAO,OAAO,OAAO,IAAI,CAAC;AAC3F,QAAM,iBAAiB,eAAe,SAAS,YAAY;AAC3D,MAAI,iBAAiB,GAAG;AAEtB,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS,GAAG,cAAc;AAAA,MAC1B,aACE;AAAA,IACJ,CAAC;AAAA,EACH;AAGA,QAAM,oBAAoB,IAAI,IAAI,KAAK,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,OAAO,OAAO,CAAa;AACjG,QAAM,eAAe,IAAI,IAAI,KAAK,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,OAAO,CAAa;AACrF,QAAM,UAAU,IAAI,IAAI,KAAK,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,OAAO,CAAa;AAC9E,MAAI,QAAQ,SAAS,GAAG;AACtB,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,MACT,aAAa;AAAA,IACf,CAAC;AAAA,EACH;AACA,MAAI,aAAa,SAAS,GAAG;AAC3B,aAAS,KAAK;AAAA,MACZ,IAAI;AAAA,MACJ,UAAU;AAAA,MACV,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAEA,QAAM,UAAU;AAAA,IACd,UAAU,CAAC,SAAS,SAAS,SAAS,OAAO;AAAA,IAC7C,UAAU,KAAK;AAAA,IACf;AAAA,IACA,cAAc,QAAQ;AAAA,IACtB,oBAAoB;AAAA,IACpB,kBAAkB;AAAA,MAChB,UAAU,QAAQ;AAAA,MAClB,cAAc,aAAa;AAAA,MAC3B,mBAAmB,kBAAkB;AAAA,IACvC;AAAA,EACF;AAEA,SAAO;AAAA,IACL,WAAW;AAAA,IACX,SAAS;AAAA,IACT,SAAS;AAAA,MACP,cAAc,IAAI;AAAA,MAClB,YAAY,IAAI;AAAA,MAChB,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI;AAAA,MACf,OAAO,IAAI;AAAA,IACb;AAAA,IACA,SAAS,UAAU,QAAQ;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,cAAa,oBAAI,KAAK,GAAE,YAAY;AAAA,EACtC;AACF;","names":[]}
@@ -0,0 +1,74 @@
1
+ import {
2
+ buildTrajectory
3
+ } from "./chunk-RZTMDUO7.js";
4
+ import {
5
+ isLlmSpan,
6
+ isToolSpan
7
+ } from "./chunk-5BKGXME7.js";
8
+
9
+ // src/prm/training-export.ts
10
+ async function exportTrainingData(store, graded, options = {}) {
11
+ const window = options.contextWindow ?? 5;
12
+ const out = [];
13
+ for (const g of graded) {
14
+ const trajectory = await buildTrajectory(store, g.runId);
15
+ const spanById = new Map(trajectory.steps.map((s) => [s.span.spanId, s]));
16
+ for (const gs of g.steps) {
17
+ const node = spanById.get(gs.spanId);
18
+ if (!node) continue;
19
+ const idx = trajectory.steps.indexOf(node);
20
+ const priorSpans = trajectory.steps.slice(Math.max(0, idx - window), idx).map((s) => s.span);
21
+ out.push({
22
+ runId: g.runId,
23
+ spanId: gs.spanId,
24
+ rubricId: gs.rubricId,
25
+ score: gs.score,
26
+ context: {
27
+ priorTurns: priorSpans.map(spanToTurn).filter((t) => t !== null),
28
+ step: { kind: node.span.kind, text: spanToText(node.span) }
29
+ },
30
+ rationale: gs.rationale,
31
+ evidence: gs.evidence
32
+ });
33
+ }
34
+ }
35
+ return out;
36
+ }
37
+ function toNdjson(samples) {
38
+ return `${samples.map((s) => JSON.stringify(s)).join("\n")}
39
+ `;
40
+ }
41
+ function spanToTurn(span) {
42
+ if (isLlmSpan(span)) {
43
+ const text = span.output ?? span.messages.map((m) => `${m.role}: ${m.content}`).join("\n");
44
+ return { role: "assistant", content: text };
45
+ }
46
+ if (isToolSpan(span)) {
47
+ return {
48
+ role: "tool",
49
+ content: `${span.toolName}(${safeStringify(span.args)}) \u2192 ${safeStringify(span.result)}`
50
+ };
51
+ }
52
+ return null;
53
+ }
54
+ function spanToText(span) {
55
+ if (isLlmSpan(span)) return span.output ?? "";
56
+ if (isToolSpan(span))
57
+ return `${span.toolName}(${safeStringify(span.args)}) \u2192 ${safeStringify(span.result)}`;
58
+ return span.name;
59
+ }
60
+ function safeStringify(v) {
61
+ if (v === null || v === void 0) return "";
62
+ if (typeof v === "string") return v;
63
+ try {
64
+ return JSON.stringify(v);
65
+ } catch {
66
+ return String(v);
67
+ }
68
+ }
69
+
70
+ export {
71
+ exportTrainingData,
72
+ toNdjson
73
+ };
74
+ //# sourceMappingURL=chunk-KMPRBJK4.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/prm/training-export.ts"],"sourcesContent":["/**\n * Export PRM-graded traces as training data for downstream reward-model\n * fine-tuning. Canonical format is NDJSON of\n * `{ trajectory_text, step_index, rubric, score }` so a small model can\n * learn to predict step rewards from step context.\n *\n * The framework doesn't train the model — we emit the data; callers\n * plug it into their preferred trainer (TRL, Unsloth, custom).\n */\n\nimport type { LlmSpan, Span } from '../trace/schema'\nimport { isLlmSpan, isToolSpan } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\nimport { buildTrajectory } from '../trajectory'\nimport type { PrmGradedTrace } from './rubric'\n\nexport interface PrmTrainingSample {\n runId: string\n spanId: string\n rubricId: string\n score: number\n /** Serialized step context — step + surrounding conversation. */\n context: {\n priorTurns: Array<{ role: string; content: string }>\n step: { kind: Span['kind']; text: string }\n }\n /** Optional evidence + rationale for auditability. */\n rationale?: string\n evidence?: string\n}\n\nexport async function exportTrainingData(\n store: TraceStore,\n graded: PrmGradedTrace[],\n options: { contextWindow?: number } = {},\n): Promise<PrmTrainingSample[]> {\n const window = options.contextWindow ?? 5\n const out: PrmTrainingSample[] = []\n for (const g of graded) {\n const trajectory = await buildTrajectory(store, g.runId)\n const spanById = new Map(trajectory.steps.map((s) => [s.span.spanId, s]))\n for (const gs of g.steps) {\n const node = spanById.get(gs.spanId)\n if (!node) continue\n const idx = trajectory.steps.indexOf(node)\n const priorSpans = trajectory.steps.slice(Math.max(0, idx - window), idx).map((s) => s.span)\n out.push({\n runId: g.runId,\n spanId: gs.spanId,\n rubricId: gs.rubricId,\n score: gs.score,\n context: {\n priorTurns: priorSpans\n .map(spanToTurn)\n .filter((t): t is { role: string; content: string } => t !== null),\n step: { kind: node.span.kind, text: spanToText(node.span) },\n },\n rationale: gs.rationale,\n evidence: gs.evidence,\n })\n }\n }\n return out\n}\n\n/** NDJSON serialization — write to file or stream directly to a trainer. */\nexport function toNdjson(samples: PrmTrainingSample[]): string {\n return `${samples.map((s) => JSON.stringify(s)).join('\\n')}\\n`\n}\n\nfunction spanToTurn(span: Span): { role: string; content: string } | null {\n if (isLlmSpan(span)) {\n const text = span.output ?? span.messages.map((m) => `${m.role}: ${m.content}`).join('\\n')\n return { role: 'assistant', content: text }\n }\n if (isToolSpan(span)) {\n return {\n role: 'tool',\n content: `${span.toolName}(${safeStringify(span.args)}) → ${safeStringify(span.result)}`,\n }\n }\n return null\n}\n\nfunction spanToText(span: Span): string {\n if (isLlmSpan(span)) return (span as LlmSpan).output ?? ''\n if (isToolSpan(span))\n return `${span.toolName}(${safeStringify(span.args)}) → ${safeStringify(span.result)}`\n return span.name\n}\n\nfunction safeStringify(v: unknown): string {\n if (v === null || v === undefined) return ''\n if (typeof v === 'string') return v\n try {\n return JSON.stringify(v)\n } catch {\n return String(v)\n }\n}\n"],"mappings":";;;;;;;;;AA+BA,eAAsB,mBACpB,OACA,QACA,UAAsC,CAAC,GACT;AAC9B,QAAM,SAAS,QAAQ,iBAAiB;AACxC,QAAM,MAA2B,CAAC;AAClC,aAAW,KAAK,QAAQ;AACtB,UAAM,aAAa,MAAM,gBAAgB,OAAO,EAAE,KAAK;AACvD,UAAM,WAAW,IAAI,IAAI,WAAW,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,QAAQ,CAAC,CAAC,CAAC;AACxE,eAAW,MAAM,EAAE,OAAO;AACxB,YAAM,OAAO,SAAS,IAAI,GAAG,MAAM;AACnC,UAAI,CAAC,KAAM;AACX,YAAM,MAAM,WAAW,MAAM,QAAQ,IAAI;AACzC,YAAM,aAAa,WAAW,MAAM,MAAM,KAAK,IAAI,GAAG,MAAM,MAAM,GAAG,GAAG,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI;AAC3F,UAAI,KAAK;AAAA,QACP,OAAO,EAAE;AAAA,QACT,QAAQ,GAAG;AAAA,QACX,UAAU,GAAG;AAAA,QACb,OAAO,GAAG;AAAA,QACV,SAAS;AAAA,UACP,YAAY,WACT,IAAI,UAAU,EACd,OAAO,CAAC,MAA8C,MAAM,IAAI;AAAA,UACnE,MAAM,EAAE,MAAM,KAAK,KAAK,MAAM,MAAM,WAAW,KAAK,IAAI,EAAE;AAAA,QAC5D;AAAA,QACA,WAAW,GAAG;AAAA,QACd,UAAU,GAAG;AAAA,MACf,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO;AACT;AAGO,SAAS,SAAS,SAAsC;AAC7D,SAAO,GAAG,QAAQ,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA;AAC5D;AAEA,SAAS,WAAW,MAAsD;AACxE,MAAI,UAAU,IAAI,GAAG;AACnB,UAAM,OAAO,KAAK,UAAU,KAAK,SAAS,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,WAAO,EAAE,MAAM,aAAa,SAAS,KAAK;AAAA,EAC5C;AACA,MAAI,WAAW,IAAI,GAAG;AACpB,WAAO;AAAA,MACL,MAAM;AAAA,MACN,SAAS,GAAG,KAAK,QAAQ,IAAI,cAAc,KAAK,IAAI,CAAC,YAAO,cAAc,KAAK,MAAM,CAAC;AAAA,IACxF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,WAAW,MAAoB;AACtC,MAAI,UAAU,IAAI,EAAG,QAAQ,KAAiB,UAAU;AACxD,MAAI,WAAW,IAAI;AACjB,WAAO,GAAG,KAAK,QAAQ,IAAI,cAAc,KAAK,IAAI,CAAC,YAAO,cAAc,KAAK,MAAM,CAAC;AACtF,SAAO,KAAK;AACd;AAEA,SAAS,cAAc,GAAoB;AACzC,MAAI,MAAM,QAAQ,MAAM,OAAW,QAAO;AAC1C,MAAI,OAAO,MAAM,SAAU,QAAO;AAClC,MAAI;AACF,WAAO,KAAK,UAAU,CAAC;AAAA,EACzB,QAAQ;AACN,WAAO,OAAO,CAAC;AAAA,EACjB;AACF;","names":[]}
@@ -1,11 +1,14 @@
1
+ import {
2
+ CaptureIntegrityError
3
+ } from "./chunk-NG236HPC.js";
4
+
1
5
  // src/trace/integrity.ts
2
- var RunIntegrityError = class extends Error {
6
+ var RunIntegrityError = class extends CaptureIntegrityError {
3
7
  constructor(report) {
4
8
  super(
5
9
  `Run ${report.runId} failed integrity check: ${report.issues.map((i) => i.code).join(", ")}`
6
10
  );
7
11
  this.report = report;
8
- this.name = "RunIntegrityError";
9
12
  }
10
13
  report;
11
14
  };
@@ -118,4 +121,4 @@ export {
118
121
  assertRunCaptured,
119
122
  throwIfRunIncomplete
120
123
  };
121
- //# sourceMappingURL=chunk-QUKKGHTZ.js.map
124
+ //# sourceMappingURL=chunk-KTGTIOFD.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/trace/integrity.ts"],"sourcesContent":["/**\n * Run-completion integrity check — at end of run, verify the expected event\n * types were actually captured. The point is the launch-review failure mode:\n * a run *appears* successful but the raw provider events were never written,\n * so a downstream reviewer can't reconstruct what happened.\n *\n * Pattern:\n *\n * const report = await assertRunCaptured(store, runId, {\n * llmSpansMin: 1,\n * judgeSpansMin: 1,\n * rawSink: providerSink, // must have ≥ 1 event for this run\n * requireRawCoverageOfLlmSpans: true, // every llm span has matching raw events\n * })\n * if (!report.ok) throwIfRunIncomplete(report) // or mark run failed and continue\n *\n * The function is read-only on the store and returns a structured report;\n * the caller chooses the failure mode (throw, mark run failed, log warning).\n * `throwIfRunIncomplete` is the convenient strict mode.\n */\n\nimport { CaptureIntegrityError } from '../errors'\nimport type { RawProviderSink } from './raw-provider-sink'\nimport type { TraceStore } from './store'\n\nexport interface RunIntegrityExpectations {\n /** Minimum LLM span count. Default 0 (no requirement). */\n llmSpansMin?: number\n /** Minimum judge span count. Default 0. */\n judgeSpansMin?: number\n /** Minimum tool span count. Default 0. */\n toolSpansMin?: number\n /**\n * Raw provider sink to consult for capture verification. When present,\n * the check requires at least one raw event for the run.\n */\n rawSink?: RawProviderSink\n /** Minimum raw provider event count. Default 0; ignored when `rawSink` absent. */\n rawProviderEventsMin?: number\n /**\n * Every LLM span must have at least one matching raw `request` event\n * (matched by spanId). Catches the common bug where the structured span\n * was emitted but the raw HTTP capture was wired to a different sink.\n */\n requireRawCoverageOfLlmSpans?: boolean\n /** Run outcome must be set (not null/undefined). Default false. */\n requireOutcome?: boolean\n}\n\nexport type RunIntegrityIssueCode =\n | 'no_run'\n | 'missing_llm_spans'\n | 'missing_judge_spans'\n | 'missing_tool_spans'\n | 'missing_raw_events'\n | 'no_raw_sink'\n | 'orphan_llm_span'\n | 'missing_outcome'\n\nexport interface RunIntegrityIssue {\n code: RunIntegrityIssueCode\n message: string\n detail?: Record<string, unknown>\n}\n\nexport interface RunIntegrityReport {\n ok: boolean\n runId: string\n llmSpanCount: number\n judgeSpanCount: number\n toolSpanCount: number\n rawProviderEventCount: number\n /**\n * Coverage of LLM spans by raw provider events keyed on spanId.\n * `total` is the number of LLM spans; `covered` is the count with at\n * least one matching `request` raw event.\n */\n rawSpanCoverage: { covered: number; total: number }\n issues: RunIntegrityIssue[]\n}\n\nexport class RunIntegrityError extends CaptureIntegrityError {\n constructor(public readonly report: RunIntegrityReport) {\n super(\n `Run ${report.runId} failed integrity check: ${report.issues.map((i) => i.code).join(', ')}`,\n )\n }\n}\n\nexport async function assertRunCaptured(\n store: TraceStore,\n runId: string,\n expectations: RunIntegrityExpectations = {},\n): Promise<RunIntegrityReport> {\n const issues: RunIntegrityIssue[] = []\n const run = await store.getRun(runId)\n if (!run) {\n return {\n ok: false,\n runId,\n llmSpanCount: 0,\n judgeSpanCount: 0,\n toolSpanCount: 0,\n rawProviderEventCount: 0,\n rawSpanCoverage: { covered: 0, total: 0 },\n issues: [{ code: 'no_run', message: `Run ${runId} not found in store.` }],\n }\n }\n\n const spans = await store.spans({ runId })\n const llmSpans = spans.filter((s) => s.kind === 'llm')\n const judgeSpans = spans.filter((s) => s.kind === 'judge')\n const toolSpans = spans.filter((s) => s.kind === 'tool')\n\n const llmMin = expectations.llmSpansMin ?? 0\n const judgeMin = expectations.judgeSpansMin ?? 0\n const toolMin = expectations.toolSpansMin ?? 0\n\n if (llmSpans.length < llmMin) {\n issues.push({\n code: 'missing_llm_spans',\n message: `Expected ≥ ${llmMin} LLM spans, found ${llmSpans.length}.`,\n detail: { expected: llmMin, found: llmSpans.length },\n })\n }\n if (judgeSpans.length < judgeMin) {\n issues.push({\n code: 'missing_judge_spans',\n message: `Expected ≥ ${judgeMin} judge spans, found ${judgeSpans.length}.`,\n detail: { expected: judgeMin, found: judgeSpans.length },\n })\n }\n if (toolSpans.length < toolMin) {\n issues.push({\n code: 'missing_tool_spans',\n message: `Expected ≥ ${toolMin} tool spans, found ${toolSpans.length}.`,\n detail: { expected: toolMin, found: toolSpans.length },\n })\n }\n\n let rawEventCount = 0\n let coverage = { covered: 0, total: llmSpans.length }\n\n if (expectations.rawSink) {\n if (!expectations.rawSink.list) {\n issues.push({\n code: 'no_raw_sink',\n message: 'Provided rawSink does not implement list(); cannot verify capture.',\n })\n } else {\n const events = await expectations.rawSink.list({ runId })\n rawEventCount = events.length\n const rawMin = expectations.rawProviderEventsMin ?? 1\n if (rawEventCount < rawMin) {\n issues.push({\n code: 'missing_raw_events',\n message: `Expected ≥ ${rawMin} raw provider events, found ${rawEventCount}.`,\n detail: { expected: rawMin, found: rawEventCount },\n })\n }\n if (expectations.requireRawCoverageOfLlmSpans) {\n const requestEventsBySpan = new Set(\n events.filter((e) => e.direction === 'request' && e.spanId).map((e) => e.spanId!),\n )\n const orphaned = llmSpans.filter((s) => !requestEventsBySpan.has(s.spanId))\n coverage = { covered: llmSpans.length - orphaned.length, total: llmSpans.length }\n if (orphaned.length > 0) {\n issues.push({\n code: 'orphan_llm_span',\n message: `${orphaned.length} LLM span(s) have no matching raw provider request event.`,\n detail: { orphanedSpanIds: orphaned.map((s) => s.spanId) },\n })\n }\n }\n }\n } else if (expectations.requireRawCoverageOfLlmSpans || expectations.rawProviderEventsMin) {\n issues.push({\n code: 'no_raw_sink',\n message: 'Raw coverage required but no rawSink supplied to the integrity check.',\n })\n }\n\n if (expectations.requireOutcome && (run.outcome === undefined || run.outcome === null)) {\n issues.push({\n code: 'missing_outcome',\n message: `Run ${runId} has no outcome recorded.`,\n })\n }\n\n return {\n ok: issues.length === 0,\n runId,\n llmSpanCount: llmSpans.length,\n judgeSpanCount: judgeSpans.length,\n toolSpanCount: toolSpans.length,\n rawProviderEventCount: rawEventCount,\n rawSpanCoverage: coverage,\n issues,\n }\n}\n\n/** Strict mode: throws `RunIntegrityError` when the report isn't ok. */\nexport function throwIfRunIncomplete(report: RunIntegrityReport): void {\n if (!report.ok) throw new RunIntegrityError(report)\n}\n"],"mappings":";;;;;AAiFO,IAAM,oBAAN,cAAgC,sBAAsB;AAAA,EAC3D,YAA4B,QAA4B;AACtD;AAAA,MACE,OAAO,OAAO,KAAK,4BAA4B,OAAO,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,IAC5F;AAH0B;AAAA,EAI5B;AAAA,EAJ4B;AAK9B;AAEA,eAAsB,kBACpB,OACA,OACA,eAAyC,CAAC,GACb;AAC7B,QAAM,SAA8B,CAAC;AACrC,QAAM,MAAM,MAAM,MAAM,OAAO,KAAK;AACpC,MAAI,CAAC,KAAK;AACR,WAAO;AAAA,MACL,IAAI;AAAA,MACJ;AAAA,MACA,cAAc;AAAA,MACd,gBAAgB;AAAA,MAChB,eAAe;AAAA,MACf,uBAAuB;AAAA,MACvB,iBAAiB,EAAE,SAAS,GAAG,OAAO,EAAE;AAAA,MACxC,QAAQ,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,KAAK,uBAAuB,CAAC;AAAA,IAC1E;AAAA,EACF;AAEA,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,MAAM,CAAC;AACzC,QAAM,WAAW,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,KAAK;AACrD,QAAM,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,OAAO;AACzD,QAAM,YAAY,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AAEvD,QAAM,SAAS,aAAa,eAAe;AAC3C,QAAM,WAAW,aAAa,iBAAiB;AAC/C,QAAM,UAAU,aAAa,gBAAgB;AAE7C,MAAI,SAAS,SAAS,QAAQ;AAC5B,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,MAAM,qBAAqB,SAAS,MAAM;AAAA,MACjE,QAAQ,EAAE,UAAU,QAAQ,OAAO,SAAS,OAAO;AAAA,IACrD,CAAC;AAAA,EACH;AACA,MAAI,WAAW,SAAS,UAAU;AAChC,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,QAAQ,uBAAuB,WAAW,MAAM;AAAA,MACvE,QAAQ,EAAE,UAAU,UAAU,OAAO,WAAW,OAAO;AAAA,IACzD,CAAC;AAAA,EACH;AACA,MAAI,UAAU,SAAS,SAAS;AAC9B,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,OAAO,sBAAsB,UAAU,MAAM;AAAA,MACpE,QAAQ,EAAE,UAAU,SAAS,OAAO,UAAU,OAAO;AAAA,IACvD,CAAC;AAAA,EACH;AAEA,MAAI,gBAAgB;AACpB,MAAI,WAAW,EAAE,SAAS,GAAG,OAAO,SAAS,OAAO;AAEpD,MAAI,aAAa,SAAS;AACxB,QAAI,CAAC,aAAa,QAAQ,MAAM;AAC9B,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,SAAS;AAAA,MACX,CAAC;AAAA,IACH,OAAO;AACL,YAAM,SAAS,MAAM,aAAa,QAAQ,KAAK,EAAE,MAAM,CAAC;AACxD,sBAAgB,OAAO;AACvB,YAAM,SAAS,aAAa,wBAAwB;AACpD,UAAI,gBAAgB,QAAQ;AAC1B,eAAO,KAAK;AAAA,UACV,MAAM;AAAA,UACN,SAAS,mBAAc,MAAM,+BAA+B,aAAa;AAAA,UACzE,QAAQ,EAAE,UAAU,QAAQ,OAAO,cAAc;AAAA,QACnD,CAAC;AAAA,MACH;AACA,UAAI,aAAa,8BAA8B;AAC7C,cAAM,sBAAsB,IAAI;AAAA,UAC9B,OAAO,OAAO,CAAC,MAAM,EAAE,cAAc,aAAa,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,MAAO;AAAA,QAClF;AACA,cAAM,WAAW,SAAS,OAAO,CAAC,MAAM,CAAC,oBAAoB,IAAI,EAAE,MAAM,CAAC;AAC1E,mBAAW,EAAE,SAAS,SAAS,SAAS,SAAS,QAAQ,OAAO,SAAS,OAAO;AAChF,YAAI,SAAS,SAAS,GAAG;AACvB,iBAAO,KAAK;AAAA,YACV,MAAM;AAAA,YACN,SAAS,GAAG,SAAS,MAAM;AAAA,YAC3B,QAAQ,EAAE,iBAAiB,SAAS,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,UAC3D,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF,WAAW,aAAa,gCAAgC,aAAa,sBAAsB;AACzF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAEA,MAAI,aAAa,mBAAmB,IAAI,YAAY,UAAa,IAAI,YAAY,OAAO;AACtF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,OAAO,KAAK;AAAA,IACvB,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,IAAI,OAAO,WAAW;AAAA,IACtB;AAAA,IACA,cAAc,SAAS;AAAA,IACvB,gBAAgB,WAAW;AAAA,IAC3B,eAAe,UAAU;AAAA,IACzB,uBAAuB;AAAA,IACvB,iBAAiB;AAAA,IACjB;AAAA,EACF;AACF;AAGO,SAAS,qBAAqB,QAAkC;AACrE,MAAI,CAAC,OAAO,GAAI,OAAM,IAAI,kBAAkB,MAAM;AACpD;","names":[]}