audrey 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +5 -3
  3. package/benchmarks/adapter-self-test.mjs +6 -2
  4. package/benchmarks/adapters/example-allow.mjs +5 -2
  5. package/benchmarks/adapters/mem0-platform.mjs +19 -12
  6. package/benchmarks/adapters/zep-cloud.mjs +51 -27
  7. package/benchmarks/baselines.js +11 -6
  8. package/benchmarks/build-leaderboard.mjs +36 -23
  9. package/benchmarks/cases.js +24 -12
  10. package/benchmarks/create-conformance-card.mjs +12 -3
  11. package/benchmarks/create-submission-bundle.mjs +22 -8
  12. package/benchmarks/dry-run-external-adapters.mjs +24 -12
  13. package/benchmarks/guardbench.js +263 -123
  14. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
  15. package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  16. package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  17. package/benchmarks/output/guardbench-conformance-card.json +11 -11
  18. package/benchmarks/output/guardbench-raw.json +107 -108
  19. package/benchmarks/output/guardbench-summary.json +170 -172
  20. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  21. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  22. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +11 -11
  23. package/benchmarks/output/submission-bundle/guardbench-raw.json +107 -108
  24. package/benchmarks/output/submission-bundle/guardbench-summary.json +170 -172
  25. package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
  26. package/benchmarks/output/submission-bundle/validation-report.json +1 -1
  27. package/benchmarks/output/summary.json +57 -57
  28. package/benchmarks/perf-snapshot.js +12 -9
  29. package/benchmarks/perf.bench.js +14 -6
  30. package/benchmarks/public-paths.mjs +11 -5
  31. package/benchmarks/reference-results.js +10 -5
  32. package/benchmarks/report.js +48 -27
  33. package/benchmarks/run-external-guardbench.mjs +47 -25
  34. package/benchmarks/run.js +112 -59
  35. package/benchmarks/validate-adapter-module.mjs +13 -10
  36. package/benchmarks/validate-adapter-registry.mjs +16 -5
  37. package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
  38. package/benchmarks/verify-external-evidence.mjs +86 -31
  39. package/benchmarks/verify-publication-artifacts.mjs +34 -11
  40. package/benchmarks/verify-submission-bundle.mjs +9 -4
  41. package/dist/mcp-server/config.d.ts +1 -1
  42. package/dist/mcp-server/config.d.ts.map +1 -1
  43. package/dist/mcp-server/config.js +5 -3
  44. package/dist/mcp-server/config.js.map +1 -1
  45. package/dist/mcp-server/index.d.ts +4 -3
  46. package/dist/mcp-server/index.d.ts.map +1 -1
  47. package/dist/mcp-server/index.js +479 -172
  48. package/dist/mcp-server/index.js.map +1 -1
  49. package/dist/src/action-key.d.ts.map +1 -1
  50. package/dist/src/action-key.js +6 -2
  51. package/dist/src/action-key.js.map +1 -1
  52. package/dist/src/adaptive.d.ts.map +1 -1
  53. package/dist/src/adaptive.js +4 -2
  54. package/dist/src/adaptive.js.map +1 -1
  55. package/dist/src/affect.d.ts.map +1 -1
  56. package/dist/src/affect.js +8 -5
  57. package/dist/src/affect.js.map +1 -1
  58. package/dist/src/audrey.d.ts +1 -1
  59. package/dist/src/audrey.d.ts.map +1 -1
  60. package/dist/src/audrey.js +93 -49
  61. package/dist/src/audrey.js.map +1 -1
  62. package/dist/src/capsule.d.ts.map +1 -1
  63. package/dist/src/capsule.js +37 -15
  64. package/dist/src/capsule.js.map +1 -1
  65. package/dist/src/causal.d.ts +1 -1
  66. package/dist/src/causal.d.ts.map +1 -1
  67. package/dist/src/causal.js +4 -2
  68. package/dist/src/causal.js.map +1 -1
  69. package/dist/src/confidence.d.ts.map +1 -1
  70. package/dist/src/confidence.js +5 -5
  71. package/dist/src/confidence.js.map +1 -1
  72. package/dist/src/consolidate.d.ts.map +1 -1
  73. package/dist/src/consolidate.js +17 -9
  74. package/dist/src/consolidate.js.map +1 -1
  75. package/dist/src/context.js +1 -1
  76. package/dist/src/context.js.map +1 -1
  77. package/dist/src/controller.d.ts.map +1 -1
  78. package/dist/src/controller.js +24 -13
  79. package/dist/src/controller.js.map +1 -1
  80. package/dist/src/db.d.ts.map +1 -1
  81. package/dist/src/db.js +78 -27
  82. package/dist/src/db.js.map +1 -1
  83. package/dist/src/decay.d.ts +1 -1
  84. package/dist/src/decay.d.ts.map +1 -1
  85. package/dist/src/decay.js +1 -1
  86. package/dist/src/decay.js.map +1 -1
  87. package/dist/src/embedding.d.ts +12 -4
  88. package/dist/src/embedding.d.ts.map +1 -1
  89. package/dist/src/embedding.js +18 -16
  90. package/dist/src/embedding.js.map +1 -1
  91. package/dist/src/encode.d.ts.map +1 -1
  92. package/dist/src/encode.js +5 -4
  93. package/dist/src/encode.js.map +1 -1
  94. package/dist/src/events.d.ts +3 -2
  95. package/dist/src/events.d.ts.map +1 -1
  96. package/dist/src/events.js +7 -3
  97. package/dist/src/events.js.map +1 -1
  98. package/dist/src/export.d.ts.map +1 -1
  99. package/dist/src/export.js +21 -7
  100. package/dist/src/export.js.map +1 -1
  101. package/dist/src/feedback.d.ts.map +1 -1
  102. package/dist/src/feedback.js +1 -1
  103. package/dist/src/feedback.js.map +1 -1
  104. package/dist/src/forget.d.ts.map +1 -1
  105. package/dist/src/forget.js +12 -6
  106. package/dist/src/forget.js.map +1 -1
  107. package/dist/src/fts.d.ts.map +1 -1
  108. package/dist/src/fts.js +20 -8
  109. package/dist/src/fts.js.map +1 -1
  110. package/dist/src/hybrid-recall.d.ts.map +1 -1
  111. package/dist/src/hybrid-recall.js +12 -6
  112. package/dist/src/hybrid-recall.js.map +1 -1
  113. package/dist/src/impact.d.ts.map +1 -1
  114. package/dist/src/impact.js +26 -10
  115. package/dist/src/impact.js.map +1 -1
  116. package/dist/src/import.d.ts.map +1 -1
  117. package/dist/src/import.js +11 -6
  118. package/dist/src/import.js.map +1 -1
  119. package/dist/src/index.d.ts +3 -3
  120. package/dist/src/index.d.ts.map +1 -1
  121. package/dist/src/index.js +3 -3
  122. package/dist/src/index.js.map +1 -1
  123. package/dist/src/interference.d.ts.map +1 -1
  124. package/dist/src/interference.js +10 -5
  125. package/dist/src/interference.js.map +1 -1
  126. package/dist/src/introspect.d.ts.map +1 -1
  127. package/dist/src/introspect.js +12 -6
  128. package/dist/src/introspect.js.map +1 -1
  129. package/dist/src/llm.d.ts +2 -2
  130. package/dist/src/llm.d.ts.map +1 -1
  131. package/dist/src/llm.js +6 -6
  132. package/dist/src/llm.js.map +1 -1
  133. package/dist/src/migrate.d.ts.map +1 -1
  134. package/dist/src/migrate.js +10 -4
  135. package/dist/src/migrate.js.map +1 -1
  136. package/dist/src/preflight.d.ts.map +1 -1
  137. package/dist/src/preflight.js +6 -8
  138. package/dist/src/preflight.js.map +1 -1
  139. package/dist/src/profile.d.ts.map +1 -1
  140. package/dist/src/profile.js.map +1 -1
  141. package/dist/src/promote.d.ts.map +1 -1
  142. package/dist/src/promote.js +16 -7
  143. package/dist/src/promote.js.map +1 -1
  144. package/dist/src/prompts.d.ts.map +1 -1
  145. package/dist/src/prompts.js +1 -2
  146. package/dist/src/prompts.js.map +1 -1
  147. package/dist/src/recall.d.ts.map +1 -1
  148. package/dist/src/recall.js +85 -18
  149. package/dist/src/recall.js.map +1 -1
  150. package/dist/src/redact.d.ts.map +1 -1
  151. package/dist/src/redact.js +9 -4
  152. package/dist/src/redact.js.map +1 -1
  153. package/dist/src/reflexes.d.ts.map +1 -1
  154. package/dist/src/reflexes.js +1 -7
  155. package/dist/src/reflexes.js.map +1 -1
  156. package/dist/src/rollback.d.ts.map +1 -1
  157. package/dist/src/rollback.js +4 -2
  158. package/dist/src/rollback.js.map +1 -1
  159. package/dist/src/routes.d.ts.map +1 -1
  160. package/dist/src/routes.js +33 -13
  161. package/dist/src/routes.js.map +1 -1
  162. package/dist/src/rules-compiler.d.ts.map +1 -1
  163. package/dist/src/rules-compiler.js +24 -2
  164. package/dist/src/rules-compiler.js.map +1 -1
  165. package/dist/src/server.js +2 -2
  166. package/dist/src/server.js.map +1 -1
  167. package/dist/src/tool-trace.d.ts +2 -2
  168. package/dist/src/tool-trace.d.ts.map +1 -1
  169. package/dist/src/tool-trace.js +12 -4
  170. package/dist/src/tool-trace.js.map +1 -1
  171. package/dist/src/types.d.ts.map +1 -1
  172. package/dist/src/ulid.js +1 -1
  173. package/dist/src/ulid.js.map +1 -1
  174. package/dist/src/utils.d.ts.map +1 -1
  175. package/dist/src/utils.js.map +1 -1
  176. package/dist/src/validate.d.ts.map +1 -1
  177. package/dist/src/validate.js +20 -10
  178. package/dist/src/validate.js.map +1 -1
  179. package/docs/paper/07-evaluation.md +5 -5
  180. package/docs/paper/audrey-paper-v1.md +5 -5
  181. package/docs/paper/evidence-ledger.md +1 -1
  182. package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  183. package/docs/paper/output/arxiv/main.tex +5 -5
  184. package/docs/paper/output/arxiv-compile-report.json +3 -3
  185. package/docs/paper/output/submission-bundle/README.md +5 -3
  186. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
  187. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  188. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  189. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +11 -11
  190. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +107 -108
  191. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +170 -172
  192. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  193. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  194. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
  195. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
  196. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +58 -58
  197. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
  198. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
  199. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
  200. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  201. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
  202. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
  203. package/docs/paper/output/submission-bundle/package.json +17 -4
  204. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +36 -36
  205. package/examples/fintech-ops-demo.js +12 -5
  206. package/examples/healthcare-ops-demo.js +8 -4
  207. package/examples/ollama-memory-agent.js +41 -13
  208. package/examples/stripe-demo.js +12 -5
  209. package/package.json +17 -4
  210. package/scripts/audit-release-completion.mjs +179 -101
  211. package/scripts/create-arxiv-source.mjs +20 -14
  212. package/scripts/create-paper-submission-bundle.mjs +6 -2
  213. package/scripts/finalize-release.mjs +111 -36
  214. package/scripts/prepare-release-cut.mjs +14 -6
  215. package/scripts/publish-release-bundle.mjs +62 -23
  216. package/scripts/publish-release-github-api.mjs +89 -24
  217. package/scripts/smoke-cli.js +9 -9
  218. package/scripts/sync-paper-artifacts.mjs +5 -1
  219. package/scripts/verify-arxiv-compile.mjs +52 -16
  220. package/scripts/verify-arxiv-source.mjs +45 -15
  221. package/scripts/verify-browser-launch-plan.mjs +28 -11
  222. package/scripts/verify-browser-launch-results.mjs +32 -14
  223. package/scripts/verify-paper-artifacts.mjs +539 -79
  224. package/scripts/verify-paper-claims.mjs +48 -20
  225. package/scripts/verify-paper-submission-bundle.mjs +22 -11
  226. package/scripts/verify-publication-pack.mjs +23 -9
  227. package/scripts/verify-release-readiness.mjs +211 -76
@@ -103,13 +103,25 @@ function validateSchema(value, schema, label, root = schema) {
103
103
  if (currentSchema.minLength != null && String(current).length < currentSchema.minLength) {
104
104
  errors.push(`${path}: shorter than minLength ${currentSchema.minLength}`);
105
105
  }
106
- if (currentSchema.pattern && typeof current === 'string' && !(new RegExp(currentSchema.pattern).test(current))) {
106
+ if (
107
+ currentSchema.pattern &&
108
+ typeof current === 'string' &&
109
+ !new RegExp(currentSchema.pattern).test(current)
110
+ ) {
107
111
  errors.push(`${path}: does not match ${currentSchema.pattern}`);
108
112
  }
109
- if (currentSchema.minimum != null && typeof current === 'number' && current < currentSchema.minimum) {
113
+ if (
114
+ currentSchema.minimum != null &&
115
+ typeof current === 'number' &&
116
+ current < currentSchema.minimum
117
+ ) {
110
118
  errors.push(`${path}: below minimum ${currentSchema.minimum}`);
111
119
  }
112
- if (currentSchema.maximum != null && typeof current === 'number' && current > currentSchema.maximum) {
120
+ if (
121
+ currentSchema.maximum != null &&
122
+ typeof current === 'number' &&
123
+ current > currentSchema.maximum
124
+ ) {
113
125
  errors.push(`${path}: above maximum ${currentSchema.maximum}`);
114
126
  }
115
127
 
@@ -121,14 +133,18 @@ function validateSchema(value, schema, label, root = schema) {
121
133
  current.forEach((item, index) => validate(item, currentSchema.items, `${path}[${index}]`));
122
134
  }
123
135
  if (currentSchema.contains) {
124
- const matched = current.some(item => validateSchema(item, currentSchema.contains, `${path}.contains`, root).length === 0);
136
+ const matched = current.some(
137
+ item =>
138
+ validateSchema(item, currentSchema.contains, `${path}.contains`, root).length === 0,
139
+ );
125
140
  if (!matched) errors.push(`${path}: no item matched contains constraint`);
126
141
  }
127
142
  }
128
143
 
129
144
  if (currentSchema.type === 'object') {
130
145
  for (const required of currentSchema.required ?? []) {
131
- if (!Object.hasOwn(current, required)) errors.push(`${path}: missing required property ${required}`);
146
+ if (!Object.hasOwn(current, required))
147
+ errors.push(`${path}: missing required property ${required}`);
132
148
  }
133
149
  if (currentSchema.additionalProperties === false) {
134
150
  for (const key of Object.keys(current)) {
@@ -153,18 +169,32 @@ const summary = readJson('benchmarks/output/summary.json');
153
169
  const guardSummary = readJson('benchmarks/output/guardbench-summary.json');
154
170
  const guardManifest = readJson('benchmarks/output/guardbench-manifest.json');
155
171
  const guardRaw = readJson('benchmarks/output/guardbench-raw.json');
156
- const guardAdapterSelfTest = readJson('benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json');
172
+ const guardAdapterSelfTest = readJson(
173
+ 'benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json',
174
+ );
157
175
  const guardAdapterRegistry = readJson('benchmarks/adapters/registry.json');
158
176
  const guardExternalDryRun = readJson('benchmarks/output/external/guardbench-external-dry-run.json');
159
- const guardExternalEvidence = readJson('benchmarks/output/external/guardbench-external-evidence.json');
177
+ const guardExternalEvidence = readJson(
178
+ 'benchmarks/output/external/guardbench-external-evidence.json',
179
+ );
160
180
  const guardManifestSchema = readJson('benchmarks/schemas/guardbench-manifest.schema.json');
161
181
  const guardSummarySchema = readJson('benchmarks/schemas/guardbench-summary.schema.json');
162
182
  const guardRawSchema = readJson('benchmarks/schemas/guardbench-raw.schema.json');
163
- const guardAdapterSelfTestSchema = readJson('benchmarks/schemas/guardbench-adapter-self-test.schema.json');
164
- const guardAdapterRegistrySchema = readJson('benchmarks/schemas/guardbench-adapter-registry.schema.json');
165
- const guardExternalDryRunSchema = readJson('benchmarks/schemas/guardbench-external-dry-run.schema.json');
166
- const guardExternalEvidenceSchema = readJson('benchmarks/schemas/guardbench-external-evidence.schema.json');
167
- const guardPublicationVerificationSchema = readJson('benchmarks/schemas/guardbench-publication-verification.schema.json');
183
+ const guardAdapterSelfTestSchema = readJson(
184
+ 'benchmarks/schemas/guardbench-adapter-self-test.schema.json',
185
+ );
186
+ const guardAdapterRegistrySchema = readJson(
187
+ 'benchmarks/schemas/guardbench-adapter-registry.schema.json',
188
+ );
189
+ const guardExternalDryRunSchema = readJson(
190
+ 'benchmarks/schemas/guardbench-external-dry-run.schema.json',
191
+ );
192
+ const guardExternalEvidenceSchema = readJson(
193
+ 'benchmarks/schemas/guardbench-external-evidence.schema.json',
194
+ );
195
+ const guardPublicationVerificationSchema = readJson(
196
+ 'benchmarks/schemas/guardbench-publication-verification.schema.json',
197
+ );
168
198
  const packageJsonText = readText('package.json');
169
199
  const readme = readText('README.md');
170
200
  const evaluation = readText('docs/paper/07-evaluation.md');
@@ -186,78 +216,366 @@ const local = Object.fromEntries(summary.local.overall.map(row => [row.system, r
186
216
  const evidenceRows = countEvidenceRows(ledger);
187
217
  const bibEntries = countBibEntries(references);
188
218
 
189
- assert(evidenceRows >= 97, `Expected at least 97 evidence ledger rows, found ${evidenceRows}`, failures);
190
- assert(submission.includes(`Evidence ledger with ${evidenceRows} rows`), 'SUBMISSION_README ledger row count is stale', failures);
219
+ assert(
220
+ evidenceRows >= 97,
221
+ `Expected at least 97 evidence ledger rows, found ${evidenceRows}`,
222
+ failures,
223
+ );
224
+ assert(
225
+ submission.includes(`Evidence ledger with ${evidenceRows} rows`),
226
+ 'SUBMISSION_README ledger row count is stale',
227
+ failures,
228
+ );
191
229
  assert(bibEntries === 21, `Expected 21 bibliography entries, found ${bibEntries}`, failures);
192
- assert(submission.includes(`Primary-source bibliography with ${bibEntries} entries`), 'SUBMISSION_README bibliography count is stale', failures);
230
+ assert(
231
+ submission.includes(`Primary-source bibliography with ${bibEntries} entries`),
232
+ 'SUBMISSION_README bibliography count is stale',
233
+ failures,
234
+ );
193
235
 
194
- ensureContainsAll(ledger, ['| E46 -', '| E47 -', '| E48 -', '| E49 -', '| E50 -', '| E51 -', '| E52 -', '| E53 -', '| E54 -', '| E55 -', '| E56 -', '| E57 -', '| E58 -', '| E59 -', '| E60 -', '| E61 -', '| E62 -', '| E63 -', '| E64 -', '| E65 -', '| E66 -', '| E67 -', '| E68 -', '| E69 -', '| E70 -', '| E71 -', '| E72 -', '| E73 -', '| E74 -', '| E75 -', '| E76 -', '| E77 -', '| E78 -', '| E79 -', '| E80 -', '| E81 -', '| E82 -', '| E83 -', '| E84 -', '| E85 -', '| E86 -', '| E87 -', '| E88 -', '| E89 -', '| E90 -', '| E91 -', '| E92 -', '| E93 -', '| E94 -', '| E95 -', '| E96 -', '| E97 -'], 'evidence-ledger.md', failures);
195
- ensureContainsAll(submission, ['Ledger: E46-E51', 'artifact redaction sweep', 'local absolute-path sweep', 'public-paths.mjs', 'adapter-kit.mjs', 'registry.json', 'claim-register.json', 'publication-pack.json', 'reservedUrlChars', 'arxiv-source.schema.json', 'arxiv-compile-report.schema.json', 'arxiv-compile-report.json', 'docs/paper/output/arxiv', 'paper:arxiv', 'paper:arxiv:verify', 'paper:arxiv:compile', 'paper:arxiv:compile:strict', 'browser-launch-plan.json', 'browser-launch-plan.schema.json', 'browser-launch-results.json', 'browser-launch-results.schema.json', 'artifactUrl', 'x-counting-characters', 'paper-submission-bundle.schema.json', 'docs/paper/output/submission-bundle', 'paper:bundle', 'paper:bundle:verify', 'paper:launch-plan', 'paper:launch-results', 'paper:launch-results:strict', 'release:cut:plan', 'release:cut:apply', 'release:readiness', 'release:readiness:strict', 'python:release:check', 'Python package release verifier', 'npm audit --omit=dev --audit-level=moderate', 'bench:guard:adapter-registry:validate', 'bench:guard:adapter-module:validate', 'bench:guard:adapter-self-test', 'bench:guard:adapter-self-test:validate', 'bench:guard:publication:verify', 'bench:guard:external:dry-run', 'bench:guard:external:evidence', 'bench:guard:external:evidence:strict', 'paper:claims', 'paper:publication-pack', 'guardbench-adapter-self-test.schema.json', 'guardbench-adapter-registry.schema.json', 'guardbench-external-dry-run.schema.json', 'guardbench-external-evidence.schema.json', 'guardbench-publication-verification.schema.json', 'zep-cloud.mjs', 'bench:guard:zep', 'ZEP_API_KEY'], 'SUBMISSION_README.md', failures);
196
- ensureContainsAllProse(submission, ['source-control release-state check', 'live remote-head verification', 'git ls-remote', 'npm registry/auth readiness', 'npm whoami', 'audrey@1.0.0', 'PyPI publish readiness'], 'SUBMISSION_README.md', failures);
197
- ensureContainsAll(packageJsonText, ['"scripts/*.py"', '"python:release:check"', '"paper:arxiv:compile"', '"paper:arxiv:compile:strict"'], 'package.json', failures);
236
+ ensureContainsAll(
237
+ ledger,
238
+ [
239
+ '| E46 -',
240
+ '| E47 -',
241
+ '| E48 -',
242
+ '| E49 -',
243
+ '| E50 -',
244
+ '| E51 -',
245
+ '| E52 -',
246
+ '| E53 -',
247
+ '| E54 -',
248
+ '| E55 -',
249
+ '| E56 -',
250
+ '| E57 -',
251
+ '| E58 -',
252
+ '| E59 -',
253
+ '| E60 -',
254
+ '| E61 -',
255
+ '| E62 -',
256
+ '| E63 -',
257
+ '| E64 -',
258
+ '| E65 -',
259
+ '| E66 -',
260
+ '| E67 -',
261
+ '| E68 -',
262
+ '| E69 -',
263
+ '| E70 -',
264
+ '| E71 -',
265
+ '| E72 -',
266
+ '| E73 -',
267
+ '| E74 -',
268
+ '| E75 -',
269
+ '| E76 -',
270
+ '| E77 -',
271
+ '| E78 -',
272
+ '| E79 -',
273
+ '| E80 -',
274
+ '| E81 -',
275
+ '| E82 -',
276
+ '| E83 -',
277
+ '| E84 -',
278
+ '| E85 -',
279
+ '| E86 -',
280
+ '| E87 -',
281
+ '| E88 -',
282
+ '| E89 -',
283
+ '| E90 -',
284
+ '| E91 -',
285
+ '| E92 -',
286
+ '| E93 -',
287
+ '| E94 -',
288
+ '| E95 -',
289
+ '| E96 -',
290
+ '| E97 -',
291
+ ],
292
+ 'evidence-ledger.md',
293
+ failures,
294
+ );
295
+ ensureContainsAll(
296
+ submission,
297
+ [
298
+ 'Ledger: E46-E51',
299
+ 'artifact redaction sweep',
300
+ 'local absolute-path sweep',
301
+ 'public-paths.mjs',
302
+ 'adapter-kit.mjs',
303
+ 'registry.json',
304
+ 'claim-register.json',
305
+ 'publication-pack.json',
306
+ 'reservedUrlChars',
307
+ 'arxiv-source.schema.json',
308
+ 'arxiv-compile-report.schema.json',
309
+ 'arxiv-compile-report.json',
310
+ 'docs/paper/output/arxiv',
311
+ 'paper:arxiv',
312
+ 'paper:arxiv:verify',
313
+ 'paper:arxiv:compile',
314
+ 'paper:arxiv:compile:strict',
315
+ 'browser-launch-plan.json',
316
+ 'browser-launch-plan.schema.json',
317
+ 'browser-launch-results.json',
318
+ 'browser-launch-results.schema.json',
319
+ 'artifactUrl',
320
+ 'x-counting-characters',
321
+ 'paper-submission-bundle.schema.json',
322
+ 'docs/paper/output/submission-bundle',
323
+ 'paper:bundle',
324
+ 'paper:bundle:verify',
325
+ 'paper:launch-plan',
326
+ 'paper:launch-results',
327
+ 'paper:launch-results:strict',
328
+ 'release:cut:plan',
329
+ 'release:cut:apply',
330
+ 'release:readiness',
331
+ 'release:readiness:strict',
332
+ 'python:release:check',
333
+ 'Python package release verifier',
334
+ 'npm audit --omit=dev --audit-level=moderate',
335
+ 'bench:guard:adapter-registry:validate',
336
+ 'bench:guard:adapter-module:validate',
337
+ 'bench:guard:adapter-self-test',
338
+ 'bench:guard:adapter-self-test:validate',
339
+ 'bench:guard:publication:verify',
340
+ 'bench:guard:external:dry-run',
341
+ 'bench:guard:external:evidence',
342
+ 'bench:guard:external:evidence:strict',
343
+ 'paper:claims',
344
+ 'paper:publication-pack',
345
+ 'guardbench-adapter-self-test.schema.json',
346
+ 'guardbench-adapter-registry.schema.json',
347
+ 'guardbench-external-dry-run.schema.json',
348
+ 'guardbench-external-evidence.schema.json',
349
+ 'guardbench-publication-verification.schema.json',
350
+ 'zep-cloud.mjs',
351
+ 'bench:guard:zep',
352
+ 'ZEP_API_KEY',
353
+ ],
354
+ 'SUBMISSION_README.md',
355
+ failures,
356
+ );
357
+ ensureContainsAllProse(
358
+ submission,
359
+ [
360
+ 'source-control release-state check',
361
+ 'live remote-head verification',
362
+ 'git ls-remote',
363
+ 'npm registry/auth readiness',
364
+ 'npm whoami',
365
+ 'audrey@1.0.0',
366
+ 'PyPI publish readiness',
367
+ ],
368
+ 'SUBMISSION_README.md',
369
+ failures,
370
+ );
371
+ ensureContainsAll(
372
+ packageJsonText,
373
+ [
374
+ '"scripts/*.py"',
375
+ '"python:release:check"',
376
+ '"paper:arxiv:compile"',
377
+ '"paper:arxiv:compile:strict"',
378
+ ],
379
+ 'package.json',
380
+ failures,
381
+ );
198
382
  if (!claimReport.ok) {
199
- failures.push(...claimReport.failures.map(failure => `Paper claim verification failed: ${failure}`));
383
+ failures.push(
384
+ ...claimReport.failures.map(failure => `Paper claim verification failed: ${failure}`),
385
+ );
200
386
  }
201
387
  if (!publicationPackReport.ok) {
202
- failures.push(...publicationPackReport.failures.map(failure => `Publication pack verification failed: ${failure}`));
388
+ failures.push(
389
+ ...publicationPackReport.failures.map(
390
+ failure => `Publication pack verification failed: ${failure}`,
391
+ ),
392
+ );
203
393
  }
204
394
  if (!arxivSourceReport.ok) {
205
- failures.push(...arxivSourceReport.failures.map(failure => `arXiv source package verification failed: ${failure}`));
395
+ failures.push(
396
+ ...arxivSourceReport.failures.map(
397
+ failure => `arXiv source package verification failed: ${failure}`,
398
+ ),
399
+ );
206
400
  }
207
401
  if (!arxivCompileReport.ok) {
208
- failures.push(...arxivCompileReport.failures.map(failure => `arXiv compile report verification failed: ${failure}`));
402
+ failures.push(
403
+ ...arxivCompileReport.failures.map(
404
+ failure => `arXiv compile report verification failed: ${failure}`,
405
+ ),
406
+ );
209
407
  }
210
408
  if (!browserLaunchReport.ok) {
211
- failures.push(...browserLaunchReport.failures.map(failure => `Browser launch plan verification failed: ${failure}`));
409
+ failures.push(
410
+ ...browserLaunchReport.failures.map(
411
+ failure => `Browser launch plan verification failed: ${failure}`,
412
+ ),
413
+ );
212
414
  }
213
415
  if (!browserLaunchResultsReport.ok) {
214
- failures.push(...browserLaunchResultsReport.failures.map(failure => `Browser launch results verification failed: ${failure}`));
416
+ failures.push(
417
+ ...browserLaunchResultsReport.failures.map(
418
+ failure => `Browser launch results verification failed: ${failure}`,
419
+ ),
420
+ );
215
421
  }
216
422
  if (!paperBundleReport.ok) {
217
- failures.push(...paperBundleReport.failures.map(failure => `Paper submission bundle verification failed: ${failure}`));
423
+ failures.push(
424
+ ...paperBundleReport.failures.map(
425
+ failure => `Paper submission bundle verification failed: ${failure}`,
426
+ ),
427
+ );
218
428
  }
219
429
  if (arxivCompileReport.status === 'passed') {
220
- assert(paperBundleReport.files.includes('docs/paper/output/arxiv-compile/main.pdf'), 'Paper submission bundle missing compiled arXiv PDF', failures);
221
- assert(paperBundleReport.files.includes('docs/paper/output/arxiv-compile/arxiv-compile.log'), 'Paper submission bundle missing arXiv compile log', failures);
430
+ assert(
431
+ paperBundleReport.files.includes('docs/paper/output/arxiv-compile/main.pdf'),
432
+ 'Paper submission bundle missing compiled arXiv PDF',
433
+ failures,
434
+ );
435
+ assert(
436
+ paperBundleReport.files.includes('docs/paper/output/arxiv-compile/arxiv-compile.log'),
437
+ 'Paper submission bundle missing arXiv compile log',
438
+ failures,
439
+ );
222
440
  }
223
441
  const firstXPost = publicationPackReport.entries.find(entry => entry.id === 'x-post-1');
224
442
  assert(firstXPost?.requiresArtifactUrl === true, 'x-post-1 must require an artifact URL', failures);
225
- assert(firstXPost?.reservedUrlChars >= 24, 'x-post-1 must reserve at least 24 characters for an X URL plus separator', failures);
226
- assert(firstXPost?.effectiveChars <= 280, 'x-post-1 text plus URL reserve must fit within 280 characters', failures);
227
- ensureContainsAll(browserPlan, ['x-counting-characters', 'https://docs.x.com/fundamentals/counting-characters', 'reservedUrlChars'], 'browser-launch-plan.json', failures);
228
- ensureContainsAll(browserLaunchResultsVerifier, ['submitted artifact-url target must record artifactUrl'], 'verify-browser-launch-results.mjs', failures);
443
+ assert(
444
+ firstXPost?.reservedUrlChars >= 24,
445
+ 'x-post-1 must reserve at least 24 characters for an X URL plus separator',
446
+ failures,
447
+ );
448
+ assert(
449
+ firstXPost?.effectiveChars <= 280,
450
+ 'x-post-1 text plus URL reserve must fit within 280 characters',
451
+ failures,
452
+ );
453
+ ensureContainsAll(
454
+ browserPlan,
455
+ [
456
+ 'x-counting-characters',
457
+ 'https://docs.x.com/fundamentals/counting-characters',
458
+ 'reservedUrlChars',
459
+ ],
460
+ 'browser-launch-plan.json',
461
+ failures,
462
+ );
463
+ ensureContainsAll(
464
+ browserLaunchResultsVerifier,
465
+ ['submitted artifact-url target must record artifactUrl'],
466
+ 'verify-browser-launch-results.mjs',
467
+ failures,
468
+ );
229
469
 
230
- const manifestSchemaErrors = validateSchema(guardManifest, guardManifestSchema, 'guardbench-manifest');
231
- for (const error of manifestSchemaErrors) failures.push(`GuardBench manifest schema violation: ${error}`);
470
+ const manifestSchemaErrors = validateSchema(
471
+ guardManifest,
472
+ guardManifestSchema,
473
+ 'guardbench-manifest',
474
+ );
475
+ for (const error of manifestSchemaErrors)
476
+ failures.push(`GuardBench manifest schema violation: ${error}`);
232
477
  const summarySchemaErrors = validateSchema(guardSummary, guardSummarySchema, 'guardbench-summary');
233
- for (const error of summarySchemaErrors) failures.push(`GuardBench summary schema violation: ${error}`);
478
+ for (const error of summarySchemaErrors)
479
+ failures.push(`GuardBench summary schema violation: ${error}`);
234
480
  const rawSchemaErrors = validateSchema(guardRaw, guardRawSchema, 'guardbench-raw');
235
481
  for (const error of rawSchemaErrors) failures.push(`GuardBench raw schema violation: ${error}`);
236
- const adapterSelfTestSchemaErrors = validateSchema(guardAdapterSelfTest, guardAdapterSelfTestSchema, 'guardbench-adapter-self-test');
237
- for (const error of adapterSelfTestSchemaErrors) failures.push(`GuardBench adapter self-test schema violation: ${error}`);
238
- const adapterRegistrySchemaErrors = validateSchema(guardAdapterRegistry, guardAdapterRegistrySchema, 'guardbench-adapter-registry');
239
- for (const error of adapterRegistrySchemaErrors) failures.push(`GuardBench adapter registry schema violation: ${error}`);
240
- const externalDryRunSchemaErrors = validateSchema(guardExternalDryRun, guardExternalDryRunSchema, 'guardbench-external-dry-run');
241
- for (const error of externalDryRunSchemaErrors) failures.push(`GuardBench external dry-run schema violation: ${error}`);
242
- const externalEvidenceSchemaErrors = validateSchema(guardExternalEvidence, guardExternalEvidenceSchema, 'guardbench-external-evidence');
243
- for (const error of externalEvidenceSchemaErrors) failures.push(`GuardBench external evidence schema violation: ${error}`);
482
+ const adapterSelfTestSchemaErrors = validateSchema(
483
+ guardAdapterSelfTest,
484
+ guardAdapterSelfTestSchema,
485
+ 'guardbench-adapter-self-test',
486
+ );
487
+ for (const error of adapterSelfTestSchemaErrors)
488
+ failures.push(`GuardBench adapter self-test schema violation: ${error}`);
489
+ const adapterRegistrySchemaErrors = validateSchema(
490
+ guardAdapterRegistry,
491
+ guardAdapterRegistrySchema,
492
+ 'guardbench-adapter-registry',
493
+ );
494
+ for (const error of adapterRegistrySchemaErrors)
495
+ failures.push(`GuardBench adapter registry schema violation: ${error}`);
496
+ const externalDryRunSchemaErrors = validateSchema(
497
+ guardExternalDryRun,
498
+ guardExternalDryRunSchema,
499
+ 'guardbench-external-dry-run',
500
+ );
501
+ for (const error of externalDryRunSchemaErrors)
502
+ failures.push(`GuardBench external dry-run schema violation: ${error}`);
503
+ const externalEvidenceSchemaErrors = validateSchema(
504
+ guardExternalEvidence,
505
+ guardExternalEvidenceSchema,
506
+ 'guardbench-external-evidence',
507
+ );
508
+ for (const error of externalEvidenceSchemaErrors)
509
+ failures.push(`GuardBench external evidence schema violation: ${error}`);
244
510
  const registryIds = guardAdapterRegistry.adapters.map(adapter => adapter.id);
245
- assert(registryIds.includes('mem0-platform'), 'GuardBench adapter registry missing mem0-platform', failures);
246
- assert(registryIds.includes('zep-cloud'), 'GuardBench adapter registry missing zep-cloud', failures);
511
+ assert(
512
+ registryIds.includes('mem0-platform'),
513
+ 'GuardBench adapter registry missing mem0-platform',
514
+ failures,
515
+ );
516
+ assert(
517
+ registryIds.includes('zep-cloud'),
518
+ 'GuardBench adapter registry missing zep-cloud',
519
+ failures,
520
+ );
247
521
  const dryRunIds = guardExternalDryRun.adapters.map(adapter => adapter.id);
248
- assert(dryRunIds.includes('mem0-platform'), 'GuardBench external dry-run matrix missing mem0-platform', failures);
249
- assert(dryRunIds.includes('zep-cloud'), 'GuardBench external dry-run matrix missing zep-cloud', failures);
250
- assert(guardExternalDryRun.adapters.every(adapter => !JSON.stringify(adapter).includes('runtime-key')), 'GuardBench external dry-run matrix contains a test secret', failures);
522
+ assert(
523
+ dryRunIds.includes('mem0-platform'),
524
+ 'GuardBench external dry-run matrix missing mem0-platform',
525
+ failures,
526
+ );
527
+ assert(
528
+ dryRunIds.includes('zep-cloud'),
529
+ 'GuardBench external dry-run matrix missing zep-cloud',
530
+ failures,
531
+ );
532
+ assert(
533
+ guardExternalDryRun.adapters.every(adapter => !JSON.stringify(adapter).includes('runtime-key')),
534
+ 'GuardBench external dry-run matrix contains a test secret',
535
+ failures,
536
+ );
251
537
  const evidenceIds = guardExternalEvidence.adapters.map(adapter => adapter.id);
252
- assert(guardExternalEvidence.allowPending === true, 'GuardBench external evidence report should allow pending live runs in the release gate', failures);
253
- assert(evidenceIds.includes('mem0-platform'), 'GuardBench external evidence report missing mem0-platform', failures);
254
- assert(evidenceIds.includes('zep-cloud'), 'GuardBench external evidence report missing zep-cloud', failures);
255
- assert(guardExternalEvidence.adapters.every(adapter => ['pending', 'verified'].includes(adapter.status)), 'GuardBench external evidence report has an invalid adapter status', failures);
256
- assert(guardExternalEvidence.adapters.every(adapter => !JSON.stringify(adapter).includes('runtime-key')), 'GuardBench external evidence report contains a test secret', failures);
538
+ assert(
539
+ guardExternalEvidence.allowPending === true,
540
+ 'GuardBench external evidence report should allow pending live runs in the release gate',
541
+ failures,
542
+ );
543
+ assert(
544
+ evidenceIds.includes('mem0-platform'),
545
+ 'GuardBench external evidence report missing mem0-platform',
546
+ failures,
547
+ );
548
+ assert(
549
+ evidenceIds.includes('zep-cloud'),
550
+ 'GuardBench external evidence report missing zep-cloud',
551
+ failures,
552
+ );
553
+ assert(
554
+ guardExternalEvidence.adapters.every(adapter => ['pending', 'verified'].includes(adapter.status)),
555
+ 'GuardBench external evidence report has an invalid adapter status',
556
+ failures,
557
+ );
558
+ assert(
559
+ guardExternalEvidence.adapters.every(adapter => !JSON.stringify(adapter).includes('runtime-key')),
560
+ 'GuardBench external evidence report contains a test secret',
561
+ failures,
562
+ );
257
563
  const zepAdapter = guardAdapterRegistry.adapters.find(adapter => adapter.id === 'zep-cloud');
258
- assert(zepAdapter?.credentialMode === 'runtime-env', 'Zep adapter must require runtime environment credentials', failures);
259
- assert(zepAdapter?.requiredEnv?.includes('ZEP_API_KEY'), 'Zep adapter registry entry missing ZEP_API_KEY', failures);
260
- assert(zepAdapter?.commands?.externalRun === 'npm run bench:guard:zep', 'Zep adapter external-run command is stale', failures);
564
+ assert(
565
+ zepAdapter?.credentialMode === 'runtime-env',
566
+ 'Zep adapter must require runtime environment credentials',
567
+ failures,
568
+ );
569
+ assert(
570
+ zepAdapter?.requiredEnv?.includes('ZEP_API_KEY'),
571
+ 'Zep adapter registry entry missing ZEP_API_KEY',
572
+ failures,
573
+ );
574
+ assert(
575
+ zepAdapter?.commands?.externalRun === 'npm run bench:guard:zep',
576
+ 'Zep adapter external-run command is stale',
577
+ failures,
578
+ );
261
579
  const publicationVerificationFixture = {
262
580
  schemaVersion: '1.0.0',
263
581
  suite: 'GuardBench publication artifact verification',
@@ -281,7 +599,8 @@ const publicationVerificationSchemaErrors = validateSchema(
281
599
  guardPublicationVerificationSchema,
282
600
  'guardbench-publication-verification',
283
601
  );
284
- for (const error of publicationVerificationSchemaErrors) failures.push(`GuardBench publication verifier schema violation: ${error}`);
602
+ for (const error of publicationVerificationSchemaErrors)
603
+ failures.push(`GuardBench publication verifier schema violation: ${error}`);
285
604
 
286
605
  const benchmarkNeedles = [
287
606
  summary.generatedAt,
@@ -294,30 +613,165 @@ ensureContainsAll(paper, benchmarkNeedles, 'audrey-paper-v1.md', failures);
294
613
 
295
614
  const latency = guardSummary.latency;
296
615
  const guardLatencyText = `${formatMetric(latency.p50Ms)} ms / ${formatMetric(latency.p95Ms)} ms`;
297
- ensureContainsAll(evaluation, [guardLatencyText, '| Published artifact raw-secret leaks | 0 |'], '07-evaluation.md', failures);
298
- ensureContainsAll(paper, [guardLatencyText, '| Published artifact raw-secret leaks | 0 |'], 'audrey-paper-v1.md', failures);
299
- ensureContainsAll(readme, [`${formatMetric(latency.p50Ms)}ms / ${formatMetric(latency.p95Ms)}ms`, '0 published artifact leaks'], 'README.md', failures);
300
- ensureContainsAll(readme, ['bench:guard:zep', 'bench:guard:external:dry-run', 'bench:guard:external:evidence', 'bench:guard:external:evidence:strict', 'paper:arxiv:compile', 'paper:arxiv:compile:strict', 'paper:launch-results', 'paper:launch-results:strict', 'release:cut:plan', 'release:cut:apply', 'release:readiness', 'release:readiness:strict', 'python:release:check', 'absolute-path sweep', 'X URL reserve', 'submitted artifact-url targets', 'external dry-run matrix', 'external evidence verification', 'ZEP_API_KEY', 'ZEP_GUARDBENCH_INGEST_DELAY_MS'], 'README.md', failures);
301
- ensureContainsAllProse(readme, ['source-control state', 'live remote-head verification', 'npm registry/auth readiness', 'PyPI publish readiness'], 'README.md', failures);
302
- ensureContainsAll(paper, ['Zep Cloud', 'ZEP_API_KEY', 'Mem0 and Zep adapters', 'external dry-run matrix', 'external evidence verification', 'reserved URL budget', 'submitted artifact-url targets', 'arXiv compile report', 'release-readiness verifier', 'release-cut planner', 'Python package verifier'], 'audrey-paper-v1.md', failures);
303
- ensureContainsAllProse(paper, ['source-control release-state check', 'live remote-head verification', 'npm registry/auth readiness', 'npm whoami', 'audrey@1.0.0', 'PyPI publish readiness'], 'audrey-paper-v1.md', failures);
304
- ensureContainsAll(ledger, [`${formatMetric(latency.p50Ms)}ms/${formatMetric(latency.p95Ms)}ms`, 'zero published artifact raw-secret leaks'], 'evidence-ledger.md', failures);
616
+ ensureContainsAll(
617
+ evaluation,
618
+ [guardLatencyText, '| Published artifact raw-secret leaks | 0 |'],
619
+ '07-evaluation.md',
620
+ failures,
621
+ );
622
+ ensureContainsAll(
623
+ paper,
624
+ [guardLatencyText, '| Published artifact raw-secret leaks | 0 |'],
625
+ 'audrey-paper-v1.md',
626
+ failures,
627
+ );
628
+ ensureContainsAll(
629
+ readme,
630
+ [
631
+ `${formatMetric(latency.p50Ms)}ms / ${formatMetric(latency.p95Ms)}ms`,
632
+ '0 published artifact leaks',
633
+ ],
634
+ 'README.md',
635
+ failures,
636
+ );
637
+ ensureContainsAll(
638
+ readme,
639
+ [
640
+ 'bench:guard:zep',
641
+ 'bench:guard:external:dry-run',
642
+ 'bench:guard:external:evidence',
643
+ 'bench:guard:external:evidence:strict',
644
+ 'paper:arxiv:compile',
645
+ 'paper:arxiv:compile:strict',
646
+ 'paper:launch-results',
647
+ 'paper:launch-results:strict',
648
+ 'release:cut:plan',
649
+ 'release:cut:apply',
650
+ 'release:readiness',
651
+ 'release:readiness:strict',
652
+ 'python:release:check',
653
+ 'absolute-path sweep',
654
+ 'X URL reserve',
655
+ 'submitted artifact-url targets',
656
+ 'external dry-run matrix',
657
+ 'external evidence verification',
658
+ 'ZEP_API_KEY',
659
+ 'ZEP_GUARDBENCH_INGEST_DELAY_MS',
660
+ ],
661
+ 'README.md',
662
+ failures,
663
+ );
664
+ ensureContainsAllProse(
665
+ readme,
666
+ [
667
+ 'source-control state',
668
+ 'live remote-head verification',
669
+ 'npm registry/auth readiness',
670
+ 'PyPI publish readiness',
671
+ ],
672
+ 'README.md',
673
+ failures,
674
+ );
675
+ ensureContainsAll(
676
+ paper,
677
+ [
678
+ 'Zep Cloud',
679
+ 'ZEP_API_KEY',
680
+ 'Mem0 and Zep adapters',
681
+ 'external dry-run matrix',
682
+ 'external evidence verification',
683
+ 'reserved URL budget',
684
+ 'submitted artifact-url targets',
685
+ 'arXiv compile report',
686
+ 'release-readiness verifier',
687
+ 'release-cut planner',
688
+ 'Python package verifier',
689
+ ],
690
+ 'audrey-paper-v1.md',
691
+ failures,
692
+ );
693
+ ensureContainsAllProse(
694
+ paper,
695
+ [
696
+ 'source-control release-state check',
697
+ 'live remote-head verification',
698
+ 'npm registry/auth readiness',
699
+ 'npm whoami',
700
+ 'audrey@1.0.0',
701
+ 'PyPI publish readiness',
702
+ ],
703
+ 'audrey-paper-v1.md',
704
+ failures,
705
+ );
706
+ ensureContainsAll(
707
+ ledger,
708
+ [
709
+ `${formatMetric(latency.p50Ms)}ms/${formatMetric(latency.p95Ms)}ms`,
710
+ 'zero published artifact raw-secret leaks',
711
+ ],
712
+ 'evidence-ledger.md',
713
+ failures,
714
+ );
305
715
 
306
- assert(guardSummary.passed === 10, `GuardBench expected 10 passed scenarios, got ${guardSummary.passed}`, failures);
307
- assert(guardSummary.scenarios === 10, `GuardBench expected 10 scenarios, got ${guardSummary.scenarios}`, failures);
308
- assert(guardSummary.redactionLeaks === 0, `GuardBench decision-output leaks expected 0, got ${guardSummary.redactionLeaks}`, failures);
309
- assert(guardSummary.artifactRedactionSweep?.passed === true, 'GuardBench artifactRedactionSweep did not pass', failures);
310
- assert(guardSummary.artifactRedactionSweep?.leakCount === 0, `GuardBench artifact leak count expected 0, got ${guardSummary.artifactRedactionSweep?.leakCount}`, failures);
311
- assert(guardRaw.artifactRedactionSweep?.passed === true, 'Raw GuardBench artifactRedactionSweep did not pass', failures);
716
+ assert(
717
+ guardSummary.passed === 10,
718
+ `GuardBench expected 10 passed scenarios, got ${guardSummary.passed}`,
719
+ failures,
720
+ );
721
+ assert(
722
+ guardSummary.scenarios === 10,
723
+ `GuardBench expected 10 scenarios, got ${guardSummary.scenarios}`,
724
+ failures,
725
+ );
726
+ assert(
727
+ guardSummary.redactionLeaks === 0,
728
+ `GuardBench decision-output leaks expected 0, got ${guardSummary.redactionLeaks}`,
729
+ failures,
730
+ );
731
+ assert(
732
+ guardSummary.artifactRedactionSweep?.passed === true,
733
+ 'GuardBench artifactRedactionSweep did not pass',
734
+ failures,
735
+ );
736
+ assert(
737
+ guardSummary.artifactRedactionSweep?.leakCount === 0,
738
+ `GuardBench artifact leak count expected 0, got ${guardSummary.artifactRedactionSweep?.leakCount}`,
739
+ failures,
740
+ );
741
+ assert(
742
+ guardRaw.artifactRedactionSweep?.passed === true,
743
+ 'Raw GuardBench artifactRedactionSweep did not pass',
744
+ failures,
745
+ );
312
746
 
313
747
  const manifestText = JSON.stringify(guardManifest);
314
748
  const summaryText = JSON.stringify(guardSummary);
315
749
  const rawText = JSON.stringify(guardRaw);
316
- assert(!manifestText.includes(SEEDED_SECRET), 'GuardBench manifest contains the raw seeded secret', failures);
317
- assert(!summaryText.includes(SEEDED_SECRET), 'GuardBench summary contains the raw seeded secret', failures);
318
- assert(!rawText.includes(SEEDED_SECRET), 'GuardBench raw output contains the raw seeded secret', failures);
319
- assert(manifestText.includes('seededSecretRefs'), 'GuardBench manifest missing seededSecretRefs', failures);
320
- assert(!manifestText.includes('"seededSecrets"'), 'GuardBench manifest still publishes seededSecrets', failures);
750
+ assert(
751
+ !manifestText.includes(SEEDED_SECRET),
752
+ 'GuardBench manifest contains the raw seeded secret',
753
+ failures,
754
+ );
755
+ assert(
756
+ !summaryText.includes(SEEDED_SECRET),
757
+ 'GuardBench summary contains the raw seeded secret',
758
+ failures,
759
+ );
760
+ assert(
761
+ !rawText.includes(SEEDED_SECRET),
762
+ 'GuardBench raw output contains the raw seeded secret',
763
+ failures,
764
+ );
765
+ assert(
766
+ manifestText.includes('seededSecretRefs'),
767
+ 'GuardBench manifest missing seededSecretRefs',
768
+ failures,
769
+ );
770
+ assert(
771
+ !manifestText.includes('"seededSecrets"'),
772
+ 'GuardBench manifest still publishes seededSecrets',
773
+ failures,
774
+ );
321
775
 
322
776
  if (failures.length) {
323
777
  console.error('Paper artifact verification failed:');
@@ -330,9 +784,15 @@ console.log(`Evidence rows: ${evidenceRows}`);
330
784
  console.log(`Bibliography entries: ${bibEntries}`);
331
785
  console.log(`Paper claims: ${claimReport.claims.length}`);
332
786
  console.log(`Publication pack entries: ${publicationPackReport.entries.length}`);
333
- console.log(`arXiv source files: ${arxivSourceReport.files.length}, citations ${arxivSourceReport.citationCount}`);
787
+ console.log(
788
+ `arXiv source files: ${arxivSourceReport.files.length}, citations ${arxivSourceReport.citationCount}`,
789
+ );
334
790
  console.log(`arXiv compile status: ${arxivCompileReport.status}`);
335
791
  console.log(`Browser launch targets: ${browserLaunchReport.targets.length}`);
336
- console.log(`Browser launch results: ${browserLaunchResultsReport.targets.length} targets, ready=${browserLaunchResultsReport.ready}`);
792
+ console.log(
793
+ `Browser launch results: ${browserLaunchResultsReport.targets.length} targets, ready=${browserLaunchResultsReport.ready}`,
794
+ );
337
795
  console.log(`Paper bundle files: ${paperBundleReport.files.length}`);
338
- console.log(`GuardBench: ${guardSummary.passed}/${guardSummary.scenarios}, latency ${latency.p50Ms}ms/${latency.p95Ms}ms, artifact leaks ${guardSummary.artifactRedactionSweep.leakCount}`);
796
+ console.log(
797
+ `GuardBench: ${guardSummary.passed}/${guardSummary.scenarios}, latency ${latency.p50Ms}ms/${latency.p95Ms}ms, artifact leaks ${guardSummary.artifactRedactionSweep.leakCount}`,
798
+ );