audrey 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/README.md +13 -3
- package/benchmarks/adapter-self-test.mjs +6 -2
- package/benchmarks/adapters/example-allow.mjs +5 -2
- package/benchmarks/adapters/mem0-platform.mjs +19 -12
- package/benchmarks/adapters/zep-cloud.mjs +51 -27
- package/benchmarks/baselines.js +11 -6
- package/benchmarks/build-leaderboard.mjs +36 -23
- package/benchmarks/cases.js +24 -12
- package/benchmarks/create-conformance-card.mjs +12 -3
- package/benchmarks/create-submission-bundle.mjs +22 -8
- package/benchmarks/dry-run-external-adapters.mjs +24 -12
- package/benchmarks/guardbench.js +263 -123
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +106 -106
- package/benchmarks/output/guardbench-summary.json +168 -168
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +106 -106
- package/benchmarks/output/submission-bundle/guardbench-summary.json +168 -168
- package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +58 -58
- package/benchmarks/perf-snapshot.js +12 -9
- package/benchmarks/perf.bench.js +14 -6
- package/benchmarks/public-paths.mjs +11 -5
- package/benchmarks/reference-results.js +10 -5
- package/benchmarks/report.js +48 -27
- package/benchmarks/run-external-guardbench.mjs +47 -25
- package/benchmarks/run.js +112 -59
- package/benchmarks/validate-adapter-module.mjs +13 -10
- package/benchmarks/validate-adapter-registry.mjs +16 -5
- package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
- package/benchmarks/verify-external-evidence.mjs +86 -31
- package/benchmarks/verify-publication-artifacts.mjs +34 -11
- package/benchmarks/verify-submission-bundle.mjs +9 -4
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.d.ts.map +1 -1
- package/dist/mcp-server/config.js +5 -3
- package/dist/mcp-server/config.js.map +1 -1
- package/dist/mcp-server/index.d.ts +7 -347
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +289 -256
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/mcp-server/tool-schemas.d.ts +341 -0
- package/dist/mcp-server/tool-schemas.d.ts.map +1 -0
- package/dist/mcp-server/tool-schemas.js +248 -0
- package/dist/mcp-server/tool-schemas.js.map +1 -0
- package/dist/mcp-server/tool-validation.d.ts +17 -0
- package/dist/mcp-server/tool-validation.d.ts.map +1 -0
- package/dist/mcp-server/tool-validation.js +41 -0
- package/dist/mcp-server/tool-validation.js.map +1 -0
- package/dist/src/action-key.d.ts.map +1 -1
- package/dist/src/action-key.js +6 -2
- package/dist/src/action-key.js.map +1 -1
- package/dist/src/adaptive.d.ts.map +1 -1
- package/dist/src/adaptive.js +4 -2
- package/dist/src/adaptive.js.map +1 -1
- package/dist/src/affect.d.ts.map +1 -1
- package/dist/src/affect.js +8 -5
- package/dist/src/affect.js.map +1 -1
- package/dist/src/audrey.d.ts +1 -1
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +93 -49
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/capsule.d.ts.map +1 -1
- package/dist/src/capsule.js +37 -15
- package/dist/src/capsule.js.map +1 -1
- package/dist/src/causal.d.ts +1 -1
- package/dist/src/causal.d.ts.map +1 -1
- package/dist/src/causal.js +4 -2
- package/dist/src/causal.js.map +1 -1
- package/dist/src/confidence.d.ts.map +1 -1
- package/dist/src/confidence.js +5 -5
- package/dist/src/confidence.js.map +1 -1
- package/dist/src/consolidate.d.ts.map +1 -1
- package/dist/src/consolidate.js +17 -9
- package/dist/src/consolidate.js.map +1 -1
- package/dist/src/context.js +1 -1
- package/dist/src/context.js.map +1 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +24 -13
- package/dist/src/controller.js.map +1 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +78 -27
- package/dist/src/db.js.map +1 -1
- package/dist/src/decay.d.ts +1 -1
- package/dist/src/decay.d.ts.map +1 -1
- package/dist/src/decay.js +1 -1
- package/dist/src/decay.js.map +1 -1
- package/dist/src/embedding.d.ts +12 -4
- package/dist/src/embedding.d.ts.map +1 -1
- package/dist/src/embedding.js +18 -16
- package/dist/src/embedding.js.map +1 -1
- package/dist/src/encode.d.ts.map +1 -1
- package/dist/src/encode.js +5 -4
- package/dist/src/encode.js.map +1 -1
- package/dist/src/events.d.ts +3 -2
- package/dist/src/events.d.ts.map +1 -1
- package/dist/src/events.js +7 -3
- package/dist/src/events.js.map +1 -1
- package/dist/src/export.d.ts.map +1 -1
- package/dist/src/export.js +21 -7
- package/dist/src/export.js.map +1 -1
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +1 -1
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/forget.d.ts.map +1 -1
- package/dist/src/forget.js +12 -6
- package/dist/src/forget.js.map +1 -1
- package/dist/src/fts.d.ts.map +1 -1
- package/dist/src/fts.js +20 -8
- package/dist/src/fts.js.map +1 -1
- package/dist/src/hybrid-recall.d.ts.map +1 -1
- package/dist/src/hybrid-recall.js +12 -6
- package/dist/src/hybrid-recall.js.map +1 -1
- package/dist/src/impact.d.ts.map +1 -1
- package/dist/src/impact.js +26 -10
- package/dist/src/impact.js.map +1 -1
- package/dist/src/import.d.ts.map +1 -1
- package/dist/src/import.js +11 -6
- package/dist/src/import.js.map +1 -1
- package/dist/src/index.d.ts +3 -3
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -3
- package/dist/src/index.js.map +1 -1
- package/dist/src/interference.d.ts.map +1 -1
- package/dist/src/interference.js +10 -5
- package/dist/src/interference.js.map +1 -1
- package/dist/src/introspect.d.ts.map +1 -1
- package/dist/src/introspect.js +12 -6
- package/dist/src/introspect.js.map +1 -1
- package/dist/src/llm.d.ts +2 -2
- package/dist/src/llm.d.ts.map +1 -1
- package/dist/src/llm.js +6 -6
- package/dist/src/llm.js.map +1 -1
- package/dist/src/migrate.d.ts.map +1 -1
- package/dist/src/migrate.js +10 -4
- package/dist/src/migrate.js.map +1 -1
- package/dist/src/preflight.d.ts.map +1 -1
- package/dist/src/preflight.js +6 -8
- package/dist/src/preflight.js.map +1 -1
- package/dist/src/profile.d.ts.map +1 -1
- package/dist/src/profile.js.map +1 -1
- package/dist/src/promote.d.ts.map +1 -1
- package/dist/src/promote.js +16 -7
- package/dist/src/promote.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +1 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/recall.d.ts.map +1 -1
- package/dist/src/recall.js +85 -18
- package/dist/src/recall.js.map +1 -1
- package/dist/src/redact.d.ts.map +1 -1
- package/dist/src/redact.js +9 -4
- package/dist/src/redact.js.map +1 -1
- package/dist/src/reflexes.d.ts.map +1 -1
- package/dist/src/reflexes.js +1 -7
- package/dist/src/reflexes.js.map +1 -1
- package/dist/src/rollback.d.ts.map +1 -1
- package/dist/src/rollback.js +4 -2
- package/dist/src/rollback.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +33 -13
- package/dist/src/routes.js.map +1 -1
- package/dist/src/rules-compiler.d.ts.map +1 -1
- package/dist/src/rules-compiler.js +24 -2
- package/dist/src/rules-compiler.js.map +1 -1
- package/dist/src/server.js +2 -2
- package/dist/src/server.js.map +1 -1
- package/dist/src/tool-trace.d.ts +2 -2
- package/dist/src/tool-trace.d.ts.map +1 -1
- package/dist/src/tool-trace.js +12 -4
- package/dist/src/tool-trace.js.map +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/ulid.js +1 -1
- package/dist/src/ulid.js.map +1 -1
- package/dist/src/utils.d.ts.map +1 -1
- package/dist/src/utils.js.map +1 -1
- package/dist/src/validate.d.ts.map +1 -1
- package/dist/src/validate.js +20 -10
- package/dist/src/validate.js.map +1 -1
- package/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +13 -3
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +106 -106
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +168 -168
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +64 -64
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +17 -4
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +34 -34
- package/examples/fintech-ops-demo.js +12 -5
- package/examples/healthcare-ops-demo.js +8 -4
- package/examples/ollama-memory-agent.js +41 -13
- package/examples/stripe-demo.js +12 -5
- package/package.json +17 -4
- package/scripts/audit-release-completion.mjs +179 -101
- package/scripts/create-arxiv-source.mjs +20 -14
- package/scripts/create-paper-submission-bundle.mjs +6 -2
- package/scripts/finalize-release.mjs +111 -36
- package/scripts/prepare-release-cut.mjs +14 -6
- package/scripts/publish-release-bundle.mjs +62 -23
- package/scripts/publish-release-github-api.mjs +89 -24
- package/scripts/smoke-cli.js +9 -9
- package/scripts/sync-paper-artifacts.mjs +5 -1
- package/scripts/verify-arxiv-compile.mjs +52 -16
- package/scripts/verify-arxiv-source.mjs +45 -15
- package/scripts/verify-browser-launch-plan.mjs +28 -11
- package/scripts/verify-browser-launch-results.mjs +32 -14
- package/scripts/verify-paper-artifacts.mjs +539 -79
- package/scripts/verify-paper-claims.mjs +48 -20
- package/scripts/verify-paper-submission-bundle.mjs +22 -11
- package/scripts/verify-publication-pack.mjs +23 -9
- package/scripts/verify-release-readiness.mjs +211 -76
|
@@ -103,13 +103,25 @@ function validateSchema(value, schema, label, root = schema) {
|
|
|
103
103
|
if (currentSchema.minLength != null && String(current).length < currentSchema.minLength) {
|
|
104
104
|
errors.push(`${path}: shorter than minLength ${currentSchema.minLength}`);
|
|
105
105
|
}
|
|
106
|
-
if (
|
|
106
|
+
if (
|
|
107
|
+
currentSchema.pattern &&
|
|
108
|
+
typeof current === 'string' &&
|
|
109
|
+
!new RegExp(currentSchema.pattern).test(current)
|
|
110
|
+
) {
|
|
107
111
|
errors.push(`${path}: does not match ${currentSchema.pattern}`);
|
|
108
112
|
}
|
|
109
|
-
if (
|
|
113
|
+
if (
|
|
114
|
+
currentSchema.minimum != null &&
|
|
115
|
+
typeof current === 'number' &&
|
|
116
|
+
current < currentSchema.minimum
|
|
117
|
+
) {
|
|
110
118
|
errors.push(`${path}: below minimum ${currentSchema.minimum}`);
|
|
111
119
|
}
|
|
112
|
-
if (
|
|
120
|
+
if (
|
|
121
|
+
currentSchema.maximum != null &&
|
|
122
|
+
typeof current === 'number' &&
|
|
123
|
+
current > currentSchema.maximum
|
|
124
|
+
) {
|
|
113
125
|
errors.push(`${path}: above maximum ${currentSchema.maximum}`);
|
|
114
126
|
}
|
|
115
127
|
|
|
@@ -121,14 +133,18 @@ function validateSchema(value, schema, label, root = schema) {
|
|
|
121
133
|
current.forEach((item, index) => validate(item, currentSchema.items, `${path}[${index}]`));
|
|
122
134
|
}
|
|
123
135
|
if (currentSchema.contains) {
|
|
124
|
-
const matched = current.some(
|
|
136
|
+
const matched = current.some(
|
|
137
|
+
item =>
|
|
138
|
+
validateSchema(item, currentSchema.contains, `${path}.contains`, root).length === 0,
|
|
139
|
+
);
|
|
125
140
|
if (!matched) errors.push(`${path}: no item matched contains constraint`);
|
|
126
141
|
}
|
|
127
142
|
}
|
|
128
143
|
|
|
129
144
|
if (currentSchema.type === 'object') {
|
|
130
145
|
for (const required of currentSchema.required ?? []) {
|
|
131
|
-
if (!Object.hasOwn(current, required))
|
|
146
|
+
if (!Object.hasOwn(current, required))
|
|
147
|
+
errors.push(`${path}: missing required property ${required}`);
|
|
132
148
|
}
|
|
133
149
|
if (currentSchema.additionalProperties === false) {
|
|
134
150
|
for (const key of Object.keys(current)) {
|
|
@@ -153,18 +169,32 @@ const summary = readJson('benchmarks/output/summary.json');
|
|
|
153
169
|
const guardSummary = readJson('benchmarks/output/guardbench-summary.json');
|
|
154
170
|
const guardManifest = readJson('benchmarks/output/guardbench-manifest.json');
|
|
155
171
|
const guardRaw = readJson('benchmarks/output/guardbench-raw.json');
|
|
156
|
-
const guardAdapterSelfTest = readJson(
|
|
172
|
+
const guardAdapterSelfTest = readJson(
|
|
173
|
+
'benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json',
|
|
174
|
+
);
|
|
157
175
|
const guardAdapterRegistry = readJson('benchmarks/adapters/registry.json');
|
|
158
176
|
const guardExternalDryRun = readJson('benchmarks/output/external/guardbench-external-dry-run.json');
|
|
159
|
-
const guardExternalEvidence = readJson(
|
|
177
|
+
const guardExternalEvidence = readJson(
|
|
178
|
+
'benchmarks/output/external/guardbench-external-evidence.json',
|
|
179
|
+
);
|
|
160
180
|
const guardManifestSchema = readJson('benchmarks/schemas/guardbench-manifest.schema.json');
|
|
161
181
|
const guardSummarySchema = readJson('benchmarks/schemas/guardbench-summary.schema.json');
|
|
162
182
|
const guardRawSchema = readJson('benchmarks/schemas/guardbench-raw.schema.json');
|
|
163
|
-
const guardAdapterSelfTestSchema = readJson(
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
const
|
|
167
|
-
|
|
183
|
+
const guardAdapterSelfTestSchema = readJson(
|
|
184
|
+
'benchmarks/schemas/guardbench-adapter-self-test.schema.json',
|
|
185
|
+
);
|
|
186
|
+
const guardAdapterRegistrySchema = readJson(
|
|
187
|
+
'benchmarks/schemas/guardbench-adapter-registry.schema.json',
|
|
188
|
+
);
|
|
189
|
+
const guardExternalDryRunSchema = readJson(
|
|
190
|
+
'benchmarks/schemas/guardbench-external-dry-run.schema.json',
|
|
191
|
+
);
|
|
192
|
+
const guardExternalEvidenceSchema = readJson(
|
|
193
|
+
'benchmarks/schemas/guardbench-external-evidence.schema.json',
|
|
194
|
+
);
|
|
195
|
+
const guardPublicationVerificationSchema = readJson(
|
|
196
|
+
'benchmarks/schemas/guardbench-publication-verification.schema.json',
|
|
197
|
+
);
|
|
168
198
|
const packageJsonText = readText('package.json');
|
|
169
199
|
const readme = readText('README.md');
|
|
170
200
|
const evaluation = readText('docs/paper/07-evaluation.md');
|
|
@@ -186,78 +216,366 @@ const local = Object.fromEntries(summary.local.overall.map(row => [row.system, r
|
|
|
186
216
|
const evidenceRows = countEvidenceRows(ledger);
|
|
187
217
|
const bibEntries = countBibEntries(references);
|
|
188
218
|
|
|
189
|
-
assert(
|
|
190
|
-
|
|
219
|
+
assert(
|
|
220
|
+
evidenceRows >= 97,
|
|
221
|
+
`Expected at least 97 evidence ledger rows, found ${evidenceRows}`,
|
|
222
|
+
failures,
|
|
223
|
+
);
|
|
224
|
+
assert(
|
|
225
|
+
submission.includes(`Evidence ledger with ${evidenceRows} rows`),
|
|
226
|
+
'SUBMISSION_README ledger row count is stale',
|
|
227
|
+
failures,
|
|
228
|
+
);
|
|
191
229
|
assert(bibEntries === 21, `Expected 21 bibliography entries, found ${bibEntries}`, failures);
|
|
192
|
-
assert(
|
|
230
|
+
assert(
|
|
231
|
+
submission.includes(`Primary-source bibliography with ${bibEntries} entries`),
|
|
232
|
+
'SUBMISSION_README bibliography count is stale',
|
|
233
|
+
failures,
|
|
234
|
+
);
|
|
193
235
|
|
|
194
|
-
ensureContainsAll(
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
236
|
+
ensureContainsAll(
|
|
237
|
+
ledger,
|
|
238
|
+
[
|
|
239
|
+
'| E46 -',
|
|
240
|
+
'| E47 -',
|
|
241
|
+
'| E48 -',
|
|
242
|
+
'| E49 -',
|
|
243
|
+
'| E50 -',
|
|
244
|
+
'| E51 -',
|
|
245
|
+
'| E52 -',
|
|
246
|
+
'| E53 -',
|
|
247
|
+
'| E54 -',
|
|
248
|
+
'| E55 -',
|
|
249
|
+
'| E56 -',
|
|
250
|
+
'| E57 -',
|
|
251
|
+
'| E58 -',
|
|
252
|
+
'| E59 -',
|
|
253
|
+
'| E60 -',
|
|
254
|
+
'| E61 -',
|
|
255
|
+
'| E62 -',
|
|
256
|
+
'| E63 -',
|
|
257
|
+
'| E64 -',
|
|
258
|
+
'| E65 -',
|
|
259
|
+
'| E66 -',
|
|
260
|
+
'| E67 -',
|
|
261
|
+
'| E68 -',
|
|
262
|
+
'| E69 -',
|
|
263
|
+
'| E70 -',
|
|
264
|
+
'| E71 -',
|
|
265
|
+
'| E72 -',
|
|
266
|
+
'| E73 -',
|
|
267
|
+
'| E74 -',
|
|
268
|
+
'| E75 -',
|
|
269
|
+
'| E76 -',
|
|
270
|
+
'| E77 -',
|
|
271
|
+
'| E78 -',
|
|
272
|
+
'| E79 -',
|
|
273
|
+
'| E80 -',
|
|
274
|
+
'| E81 -',
|
|
275
|
+
'| E82 -',
|
|
276
|
+
'| E83 -',
|
|
277
|
+
'| E84 -',
|
|
278
|
+
'| E85 -',
|
|
279
|
+
'| E86 -',
|
|
280
|
+
'| E87 -',
|
|
281
|
+
'| E88 -',
|
|
282
|
+
'| E89 -',
|
|
283
|
+
'| E90 -',
|
|
284
|
+
'| E91 -',
|
|
285
|
+
'| E92 -',
|
|
286
|
+
'| E93 -',
|
|
287
|
+
'| E94 -',
|
|
288
|
+
'| E95 -',
|
|
289
|
+
'| E96 -',
|
|
290
|
+
'| E97 -',
|
|
291
|
+
],
|
|
292
|
+
'evidence-ledger.md',
|
|
293
|
+
failures,
|
|
294
|
+
);
|
|
295
|
+
ensureContainsAll(
|
|
296
|
+
submission,
|
|
297
|
+
[
|
|
298
|
+
'Ledger: E46-E51',
|
|
299
|
+
'artifact redaction sweep',
|
|
300
|
+
'local absolute-path sweep',
|
|
301
|
+
'public-paths.mjs',
|
|
302
|
+
'adapter-kit.mjs',
|
|
303
|
+
'registry.json',
|
|
304
|
+
'claim-register.json',
|
|
305
|
+
'publication-pack.json',
|
|
306
|
+
'reservedUrlChars',
|
|
307
|
+
'arxiv-source.schema.json',
|
|
308
|
+
'arxiv-compile-report.schema.json',
|
|
309
|
+
'arxiv-compile-report.json',
|
|
310
|
+
'docs/paper/output/arxiv',
|
|
311
|
+
'paper:arxiv',
|
|
312
|
+
'paper:arxiv:verify',
|
|
313
|
+
'paper:arxiv:compile',
|
|
314
|
+
'paper:arxiv:compile:strict',
|
|
315
|
+
'browser-launch-plan.json',
|
|
316
|
+
'browser-launch-plan.schema.json',
|
|
317
|
+
'browser-launch-results.json',
|
|
318
|
+
'browser-launch-results.schema.json',
|
|
319
|
+
'artifactUrl',
|
|
320
|
+
'x-counting-characters',
|
|
321
|
+
'paper-submission-bundle.schema.json',
|
|
322
|
+
'docs/paper/output/submission-bundle',
|
|
323
|
+
'paper:bundle',
|
|
324
|
+
'paper:bundle:verify',
|
|
325
|
+
'paper:launch-plan',
|
|
326
|
+
'paper:launch-results',
|
|
327
|
+
'paper:launch-results:strict',
|
|
328
|
+
'release:cut:plan',
|
|
329
|
+
'release:cut:apply',
|
|
330
|
+
'release:readiness',
|
|
331
|
+
'release:readiness:strict',
|
|
332
|
+
'python:release:check',
|
|
333
|
+
'Python package release verifier',
|
|
334
|
+
'npm audit --omit=dev --audit-level=moderate',
|
|
335
|
+
'bench:guard:adapter-registry:validate',
|
|
336
|
+
'bench:guard:adapter-module:validate',
|
|
337
|
+
'bench:guard:adapter-self-test',
|
|
338
|
+
'bench:guard:adapter-self-test:validate',
|
|
339
|
+
'bench:guard:publication:verify',
|
|
340
|
+
'bench:guard:external:dry-run',
|
|
341
|
+
'bench:guard:external:evidence',
|
|
342
|
+
'bench:guard:external:evidence:strict',
|
|
343
|
+
'paper:claims',
|
|
344
|
+
'paper:publication-pack',
|
|
345
|
+
'guardbench-adapter-self-test.schema.json',
|
|
346
|
+
'guardbench-adapter-registry.schema.json',
|
|
347
|
+
'guardbench-external-dry-run.schema.json',
|
|
348
|
+
'guardbench-external-evidence.schema.json',
|
|
349
|
+
'guardbench-publication-verification.schema.json',
|
|
350
|
+
'zep-cloud.mjs',
|
|
351
|
+
'bench:guard:zep',
|
|
352
|
+
'ZEP_API_KEY',
|
|
353
|
+
],
|
|
354
|
+
'SUBMISSION_README.md',
|
|
355
|
+
failures,
|
|
356
|
+
);
|
|
357
|
+
ensureContainsAllProse(
|
|
358
|
+
submission,
|
|
359
|
+
[
|
|
360
|
+
'source-control release-state check',
|
|
361
|
+
'live remote-head verification',
|
|
362
|
+
'git ls-remote',
|
|
363
|
+
'npm registry/auth readiness',
|
|
364
|
+
'npm whoami',
|
|
365
|
+
'audrey@1.0.0',
|
|
366
|
+
'PyPI publish readiness',
|
|
367
|
+
],
|
|
368
|
+
'SUBMISSION_README.md',
|
|
369
|
+
failures,
|
|
370
|
+
);
|
|
371
|
+
ensureContainsAll(
|
|
372
|
+
packageJsonText,
|
|
373
|
+
[
|
|
374
|
+
'"scripts/*.py"',
|
|
375
|
+
'"python:release:check"',
|
|
376
|
+
'"paper:arxiv:compile"',
|
|
377
|
+
'"paper:arxiv:compile:strict"',
|
|
378
|
+
],
|
|
379
|
+
'package.json',
|
|
380
|
+
failures,
|
|
381
|
+
);
|
|
198
382
|
if (!claimReport.ok) {
|
|
199
|
-
failures.push(
|
|
383
|
+
failures.push(
|
|
384
|
+
...claimReport.failures.map(failure => `Paper claim verification failed: ${failure}`),
|
|
385
|
+
);
|
|
200
386
|
}
|
|
201
387
|
if (!publicationPackReport.ok) {
|
|
202
|
-
failures.push(
|
|
388
|
+
failures.push(
|
|
389
|
+
...publicationPackReport.failures.map(
|
|
390
|
+
failure => `Publication pack verification failed: ${failure}`,
|
|
391
|
+
),
|
|
392
|
+
);
|
|
203
393
|
}
|
|
204
394
|
if (!arxivSourceReport.ok) {
|
|
205
|
-
failures.push(
|
|
395
|
+
failures.push(
|
|
396
|
+
...arxivSourceReport.failures.map(
|
|
397
|
+
failure => `arXiv source package verification failed: ${failure}`,
|
|
398
|
+
),
|
|
399
|
+
);
|
|
206
400
|
}
|
|
207
401
|
if (!arxivCompileReport.ok) {
|
|
208
|
-
failures.push(
|
|
402
|
+
failures.push(
|
|
403
|
+
...arxivCompileReport.failures.map(
|
|
404
|
+
failure => `arXiv compile report verification failed: ${failure}`,
|
|
405
|
+
),
|
|
406
|
+
);
|
|
209
407
|
}
|
|
210
408
|
if (!browserLaunchReport.ok) {
|
|
211
|
-
failures.push(
|
|
409
|
+
failures.push(
|
|
410
|
+
...browserLaunchReport.failures.map(
|
|
411
|
+
failure => `Browser launch plan verification failed: ${failure}`,
|
|
412
|
+
),
|
|
413
|
+
);
|
|
212
414
|
}
|
|
213
415
|
if (!browserLaunchResultsReport.ok) {
|
|
214
|
-
failures.push(
|
|
416
|
+
failures.push(
|
|
417
|
+
...browserLaunchResultsReport.failures.map(
|
|
418
|
+
failure => `Browser launch results verification failed: ${failure}`,
|
|
419
|
+
),
|
|
420
|
+
);
|
|
215
421
|
}
|
|
216
422
|
if (!paperBundleReport.ok) {
|
|
217
|
-
failures.push(
|
|
423
|
+
failures.push(
|
|
424
|
+
...paperBundleReport.failures.map(
|
|
425
|
+
failure => `Paper submission bundle verification failed: ${failure}`,
|
|
426
|
+
),
|
|
427
|
+
);
|
|
218
428
|
}
|
|
219
429
|
if (arxivCompileReport.status === 'passed') {
|
|
220
|
-
assert(
|
|
221
|
-
|
|
430
|
+
assert(
|
|
431
|
+
paperBundleReport.files.includes('docs/paper/output/arxiv-compile/main.pdf'),
|
|
432
|
+
'Paper submission bundle missing compiled arXiv PDF',
|
|
433
|
+
failures,
|
|
434
|
+
);
|
|
435
|
+
assert(
|
|
436
|
+
paperBundleReport.files.includes('docs/paper/output/arxiv-compile/arxiv-compile.log'),
|
|
437
|
+
'Paper submission bundle missing arXiv compile log',
|
|
438
|
+
failures,
|
|
439
|
+
);
|
|
222
440
|
}
|
|
223
441
|
const firstXPost = publicationPackReport.entries.find(entry => entry.id === 'x-post-1');
|
|
224
442
|
assert(firstXPost?.requiresArtifactUrl === true, 'x-post-1 must require an artifact URL', failures);
|
|
225
|
-
assert(
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
443
|
+
assert(
|
|
444
|
+
firstXPost?.reservedUrlChars >= 24,
|
|
445
|
+
'x-post-1 must reserve at least 24 characters for an X URL plus separator',
|
|
446
|
+
failures,
|
|
447
|
+
);
|
|
448
|
+
assert(
|
|
449
|
+
firstXPost?.effectiveChars <= 280,
|
|
450
|
+
'x-post-1 text plus URL reserve must fit within 280 characters',
|
|
451
|
+
failures,
|
|
452
|
+
);
|
|
453
|
+
ensureContainsAll(
|
|
454
|
+
browserPlan,
|
|
455
|
+
[
|
|
456
|
+
'x-counting-characters',
|
|
457
|
+
'https://docs.x.com/fundamentals/counting-characters',
|
|
458
|
+
'reservedUrlChars',
|
|
459
|
+
],
|
|
460
|
+
'browser-launch-plan.json',
|
|
461
|
+
failures,
|
|
462
|
+
);
|
|
463
|
+
ensureContainsAll(
|
|
464
|
+
browserLaunchResultsVerifier,
|
|
465
|
+
['submitted artifact-url target must record artifactUrl'],
|
|
466
|
+
'verify-browser-launch-results.mjs',
|
|
467
|
+
failures,
|
|
468
|
+
);
|
|
229
469
|
|
|
230
|
-
const manifestSchemaErrors = validateSchema(
|
|
231
|
-
|
|
470
|
+
const manifestSchemaErrors = validateSchema(
|
|
471
|
+
guardManifest,
|
|
472
|
+
guardManifestSchema,
|
|
473
|
+
'guardbench-manifest',
|
|
474
|
+
);
|
|
475
|
+
for (const error of manifestSchemaErrors)
|
|
476
|
+
failures.push(`GuardBench manifest schema violation: ${error}`);
|
|
232
477
|
const summarySchemaErrors = validateSchema(guardSummary, guardSummarySchema, 'guardbench-summary');
|
|
233
|
-
for (const error of summarySchemaErrors)
|
|
478
|
+
for (const error of summarySchemaErrors)
|
|
479
|
+
failures.push(`GuardBench summary schema violation: ${error}`);
|
|
234
480
|
const rawSchemaErrors = validateSchema(guardRaw, guardRawSchema, 'guardbench-raw');
|
|
235
481
|
for (const error of rawSchemaErrors) failures.push(`GuardBench raw schema violation: ${error}`);
|
|
236
|
-
const adapterSelfTestSchemaErrors = validateSchema(
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
for (const error of
|
|
242
|
-
|
|
243
|
-
|
|
482
|
+
const adapterSelfTestSchemaErrors = validateSchema(
|
|
483
|
+
guardAdapterSelfTest,
|
|
484
|
+
guardAdapterSelfTestSchema,
|
|
485
|
+
'guardbench-adapter-self-test',
|
|
486
|
+
);
|
|
487
|
+
for (const error of adapterSelfTestSchemaErrors)
|
|
488
|
+
failures.push(`GuardBench adapter self-test schema violation: ${error}`);
|
|
489
|
+
const adapterRegistrySchemaErrors = validateSchema(
|
|
490
|
+
guardAdapterRegistry,
|
|
491
|
+
guardAdapterRegistrySchema,
|
|
492
|
+
'guardbench-adapter-registry',
|
|
493
|
+
);
|
|
494
|
+
for (const error of adapterRegistrySchemaErrors)
|
|
495
|
+
failures.push(`GuardBench adapter registry schema violation: ${error}`);
|
|
496
|
+
const externalDryRunSchemaErrors = validateSchema(
|
|
497
|
+
guardExternalDryRun,
|
|
498
|
+
guardExternalDryRunSchema,
|
|
499
|
+
'guardbench-external-dry-run',
|
|
500
|
+
);
|
|
501
|
+
for (const error of externalDryRunSchemaErrors)
|
|
502
|
+
failures.push(`GuardBench external dry-run schema violation: ${error}`);
|
|
503
|
+
const externalEvidenceSchemaErrors = validateSchema(
|
|
504
|
+
guardExternalEvidence,
|
|
505
|
+
guardExternalEvidenceSchema,
|
|
506
|
+
'guardbench-external-evidence',
|
|
507
|
+
);
|
|
508
|
+
for (const error of externalEvidenceSchemaErrors)
|
|
509
|
+
failures.push(`GuardBench external evidence schema violation: ${error}`);
|
|
244
510
|
const registryIds = guardAdapterRegistry.adapters.map(adapter => adapter.id);
|
|
245
|
-
assert(
|
|
246
|
-
|
|
511
|
+
assert(
|
|
512
|
+
registryIds.includes('mem0-platform'),
|
|
513
|
+
'GuardBench adapter registry missing mem0-platform',
|
|
514
|
+
failures,
|
|
515
|
+
);
|
|
516
|
+
assert(
|
|
517
|
+
registryIds.includes('zep-cloud'),
|
|
518
|
+
'GuardBench adapter registry missing zep-cloud',
|
|
519
|
+
failures,
|
|
520
|
+
);
|
|
247
521
|
const dryRunIds = guardExternalDryRun.adapters.map(adapter => adapter.id);
|
|
248
|
-
assert(
|
|
249
|
-
|
|
250
|
-
|
|
522
|
+
assert(
|
|
523
|
+
dryRunIds.includes('mem0-platform'),
|
|
524
|
+
'GuardBench external dry-run matrix missing mem0-platform',
|
|
525
|
+
failures,
|
|
526
|
+
);
|
|
527
|
+
assert(
|
|
528
|
+
dryRunIds.includes('zep-cloud'),
|
|
529
|
+
'GuardBench external dry-run matrix missing zep-cloud',
|
|
530
|
+
failures,
|
|
531
|
+
);
|
|
532
|
+
assert(
|
|
533
|
+
guardExternalDryRun.adapters.every(adapter => !JSON.stringify(adapter).includes('runtime-key')),
|
|
534
|
+
'GuardBench external dry-run matrix contains a test secret',
|
|
535
|
+
failures,
|
|
536
|
+
);
|
|
251
537
|
const evidenceIds = guardExternalEvidence.adapters.map(adapter => adapter.id);
|
|
252
|
-
assert(
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
538
|
+
assert(
|
|
539
|
+
guardExternalEvidence.allowPending === true,
|
|
540
|
+
'GuardBench external evidence report should allow pending live runs in the release gate',
|
|
541
|
+
failures,
|
|
542
|
+
);
|
|
543
|
+
assert(
|
|
544
|
+
evidenceIds.includes('mem0-platform'),
|
|
545
|
+
'GuardBench external evidence report missing mem0-platform',
|
|
546
|
+
failures,
|
|
547
|
+
);
|
|
548
|
+
assert(
|
|
549
|
+
evidenceIds.includes('zep-cloud'),
|
|
550
|
+
'GuardBench external evidence report missing zep-cloud',
|
|
551
|
+
failures,
|
|
552
|
+
);
|
|
553
|
+
assert(
|
|
554
|
+
guardExternalEvidence.adapters.every(adapter => ['pending', 'verified'].includes(adapter.status)),
|
|
555
|
+
'GuardBench external evidence report has an invalid adapter status',
|
|
556
|
+
failures,
|
|
557
|
+
);
|
|
558
|
+
assert(
|
|
559
|
+
guardExternalEvidence.adapters.every(adapter => !JSON.stringify(adapter).includes('runtime-key')),
|
|
560
|
+
'GuardBench external evidence report contains a test secret',
|
|
561
|
+
failures,
|
|
562
|
+
);
|
|
257
563
|
const zepAdapter = guardAdapterRegistry.adapters.find(adapter => adapter.id === 'zep-cloud');
|
|
258
|
-
assert(
|
|
259
|
-
|
|
260
|
-
|
|
564
|
+
assert(
|
|
565
|
+
zepAdapter?.credentialMode === 'runtime-env',
|
|
566
|
+
'Zep adapter must require runtime environment credentials',
|
|
567
|
+
failures,
|
|
568
|
+
);
|
|
569
|
+
assert(
|
|
570
|
+
zepAdapter?.requiredEnv?.includes('ZEP_API_KEY'),
|
|
571
|
+
'Zep adapter registry entry missing ZEP_API_KEY',
|
|
572
|
+
failures,
|
|
573
|
+
);
|
|
574
|
+
assert(
|
|
575
|
+
zepAdapter?.commands?.externalRun === 'npm run bench:guard:zep',
|
|
576
|
+
'Zep adapter external-run command is stale',
|
|
577
|
+
failures,
|
|
578
|
+
);
|
|
261
579
|
const publicationVerificationFixture = {
|
|
262
580
|
schemaVersion: '1.0.0',
|
|
263
581
|
suite: 'GuardBench publication artifact verification',
|
|
@@ -281,7 +599,8 @@ const publicationVerificationSchemaErrors = validateSchema(
|
|
|
281
599
|
guardPublicationVerificationSchema,
|
|
282
600
|
'guardbench-publication-verification',
|
|
283
601
|
);
|
|
284
|
-
for (const error of publicationVerificationSchemaErrors)
|
|
602
|
+
for (const error of publicationVerificationSchemaErrors)
|
|
603
|
+
failures.push(`GuardBench publication verifier schema violation: ${error}`);
|
|
285
604
|
|
|
286
605
|
const benchmarkNeedles = [
|
|
287
606
|
summary.generatedAt,
|
|
@@ -294,30 +613,165 @@ ensureContainsAll(paper, benchmarkNeedles, 'audrey-paper-v1.md', failures);
|
|
|
294
613
|
|
|
295
614
|
const latency = guardSummary.latency;
|
|
296
615
|
const guardLatencyText = `${formatMetric(latency.p50Ms)} ms / ${formatMetric(latency.p95Ms)} ms`;
|
|
297
|
-
ensureContainsAll(
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
616
|
+
ensureContainsAll(
|
|
617
|
+
evaluation,
|
|
618
|
+
[guardLatencyText, '| Published artifact raw-secret leaks | 0 |'],
|
|
619
|
+
'07-evaluation.md',
|
|
620
|
+
failures,
|
|
621
|
+
);
|
|
622
|
+
ensureContainsAll(
|
|
623
|
+
paper,
|
|
624
|
+
[guardLatencyText, '| Published artifact raw-secret leaks | 0 |'],
|
|
625
|
+
'audrey-paper-v1.md',
|
|
626
|
+
failures,
|
|
627
|
+
);
|
|
628
|
+
ensureContainsAll(
|
|
629
|
+
readme,
|
|
630
|
+
[
|
|
631
|
+
`${formatMetric(latency.p50Ms)}ms / ${formatMetric(latency.p95Ms)}ms`,
|
|
632
|
+
'0 published artifact leaks',
|
|
633
|
+
],
|
|
634
|
+
'README.md',
|
|
635
|
+
failures,
|
|
636
|
+
);
|
|
637
|
+
ensureContainsAll(
|
|
638
|
+
readme,
|
|
639
|
+
[
|
|
640
|
+
'bench:guard:zep',
|
|
641
|
+
'bench:guard:external:dry-run',
|
|
642
|
+
'bench:guard:external:evidence',
|
|
643
|
+
'bench:guard:external:evidence:strict',
|
|
644
|
+
'paper:arxiv:compile',
|
|
645
|
+
'paper:arxiv:compile:strict',
|
|
646
|
+
'paper:launch-results',
|
|
647
|
+
'paper:launch-results:strict',
|
|
648
|
+
'release:cut:plan',
|
|
649
|
+
'release:cut:apply',
|
|
650
|
+
'release:readiness',
|
|
651
|
+
'release:readiness:strict',
|
|
652
|
+
'python:release:check',
|
|
653
|
+
'absolute-path sweep',
|
|
654
|
+
'X URL reserve',
|
|
655
|
+
'submitted artifact-url targets',
|
|
656
|
+
'external dry-run matrix',
|
|
657
|
+
'external evidence verification',
|
|
658
|
+
'ZEP_API_KEY',
|
|
659
|
+
'ZEP_GUARDBENCH_INGEST_DELAY_MS',
|
|
660
|
+
],
|
|
661
|
+
'README.md',
|
|
662
|
+
failures,
|
|
663
|
+
);
|
|
664
|
+
ensureContainsAllProse(
|
|
665
|
+
readme,
|
|
666
|
+
[
|
|
667
|
+
'source-control state',
|
|
668
|
+
'live remote-head verification',
|
|
669
|
+
'npm registry/auth readiness',
|
|
670
|
+
'PyPI publish readiness',
|
|
671
|
+
],
|
|
672
|
+
'README.md',
|
|
673
|
+
failures,
|
|
674
|
+
);
|
|
675
|
+
ensureContainsAll(
|
|
676
|
+
paper,
|
|
677
|
+
[
|
|
678
|
+
'Zep Cloud',
|
|
679
|
+
'ZEP_API_KEY',
|
|
680
|
+
'Mem0 and Zep adapters',
|
|
681
|
+
'external dry-run matrix',
|
|
682
|
+
'external evidence verification',
|
|
683
|
+
'reserved URL budget',
|
|
684
|
+
'submitted artifact-url targets',
|
|
685
|
+
'arXiv compile report',
|
|
686
|
+
'release-readiness verifier',
|
|
687
|
+
'release-cut planner',
|
|
688
|
+
'Python package verifier',
|
|
689
|
+
],
|
|
690
|
+
'audrey-paper-v1.md',
|
|
691
|
+
failures,
|
|
692
|
+
);
|
|
693
|
+
ensureContainsAllProse(
|
|
694
|
+
paper,
|
|
695
|
+
[
|
|
696
|
+
'source-control release-state check',
|
|
697
|
+
'live remote-head verification',
|
|
698
|
+
'npm registry/auth readiness',
|
|
699
|
+
'npm whoami',
|
|
700
|
+
'audrey@1.0.0',
|
|
701
|
+
'PyPI publish readiness',
|
|
702
|
+
],
|
|
703
|
+
'audrey-paper-v1.md',
|
|
704
|
+
failures,
|
|
705
|
+
);
|
|
706
|
+
ensureContainsAll(
|
|
707
|
+
ledger,
|
|
708
|
+
[
|
|
709
|
+
`${formatMetric(latency.p50Ms)}ms/${formatMetric(latency.p95Ms)}ms`,
|
|
710
|
+
'zero published artifact raw-secret leaks',
|
|
711
|
+
],
|
|
712
|
+
'evidence-ledger.md',
|
|
713
|
+
failures,
|
|
714
|
+
);
|
|
305
715
|
|
|
306
|
-
assert(
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
assert(
|
|
716
|
+
assert(
|
|
717
|
+
guardSummary.passed === 10,
|
|
718
|
+
`GuardBench expected 10 passed scenarios, got ${guardSummary.passed}`,
|
|
719
|
+
failures,
|
|
720
|
+
);
|
|
721
|
+
assert(
|
|
722
|
+
guardSummary.scenarios === 10,
|
|
723
|
+
`GuardBench expected 10 scenarios, got ${guardSummary.scenarios}`,
|
|
724
|
+
failures,
|
|
725
|
+
);
|
|
726
|
+
assert(
|
|
727
|
+
guardSummary.redactionLeaks === 0,
|
|
728
|
+
`GuardBench decision-output leaks expected 0, got ${guardSummary.redactionLeaks}`,
|
|
729
|
+
failures,
|
|
730
|
+
);
|
|
731
|
+
assert(
|
|
732
|
+
guardSummary.artifactRedactionSweep?.passed === true,
|
|
733
|
+
'GuardBench artifactRedactionSweep did not pass',
|
|
734
|
+
failures,
|
|
735
|
+
);
|
|
736
|
+
assert(
|
|
737
|
+
guardSummary.artifactRedactionSweep?.leakCount === 0,
|
|
738
|
+
`GuardBench artifact leak count expected 0, got ${guardSummary.artifactRedactionSweep?.leakCount}`,
|
|
739
|
+
failures,
|
|
740
|
+
);
|
|
741
|
+
assert(
|
|
742
|
+
guardRaw.artifactRedactionSweep?.passed === true,
|
|
743
|
+
'Raw GuardBench artifactRedactionSweep did not pass',
|
|
744
|
+
failures,
|
|
745
|
+
);
|
|
312
746
|
|
|
313
747
|
const manifestText = JSON.stringify(guardManifest);
|
|
314
748
|
const summaryText = JSON.stringify(guardSummary);
|
|
315
749
|
const rawText = JSON.stringify(guardRaw);
|
|
316
|
-
assert(
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
750
|
+
assert(
|
|
751
|
+
!manifestText.includes(SEEDED_SECRET),
|
|
752
|
+
'GuardBench manifest contains the raw seeded secret',
|
|
753
|
+
failures,
|
|
754
|
+
);
|
|
755
|
+
assert(
|
|
756
|
+
!summaryText.includes(SEEDED_SECRET),
|
|
757
|
+
'GuardBench summary contains the raw seeded secret',
|
|
758
|
+
failures,
|
|
759
|
+
);
|
|
760
|
+
assert(
|
|
761
|
+
!rawText.includes(SEEDED_SECRET),
|
|
762
|
+
'GuardBench raw output contains the raw seeded secret',
|
|
763
|
+
failures,
|
|
764
|
+
);
|
|
765
|
+
assert(
|
|
766
|
+
manifestText.includes('seededSecretRefs'),
|
|
767
|
+
'GuardBench manifest missing seededSecretRefs',
|
|
768
|
+
failures,
|
|
769
|
+
);
|
|
770
|
+
assert(
|
|
771
|
+
!manifestText.includes('"seededSecrets"'),
|
|
772
|
+
'GuardBench manifest still publishes seededSecrets',
|
|
773
|
+
failures,
|
|
774
|
+
);
|
|
321
775
|
|
|
322
776
|
if (failures.length) {
|
|
323
777
|
console.error('Paper artifact verification failed:');
|
|
@@ -330,9 +784,15 @@ console.log(`Evidence rows: ${evidenceRows}`);
|
|
|
330
784
|
console.log(`Bibliography entries: ${bibEntries}`);
|
|
331
785
|
console.log(`Paper claims: ${claimReport.claims.length}`);
|
|
332
786
|
console.log(`Publication pack entries: ${publicationPackReport.entries.length}`);
|
|
333
|
-
console.log(
|
|
787
|
+
console.log(
|
|
788
|
+
`arXiv source files: ${arxivSourceReport.files.length}, citations ${arxivSourceReport.citationCount}`,
|
|
789
|
+
);
|
|
334
790
|
console.log(`arXiv compile status: ${arxivCompileReport.status}`);
|
|
335
791
|
console.log(`Browser launch targets: ${browserLaunchReport.targets.length}`);
|
|
336
|
-
console.log(
|
|
792
|
+
console.log(
|
|
793
|
+
`Browser launch results: ${browserLaunchResultsReport.targets.length} targets, ready=${browserLaunchResultsReport.ready}`,
|
|
794
|
+
);
|
|
337
795
|
console.log(`Paper bundle files: ${paperBundleReport.files.length}`);
|
|
338
|
-
console.log(
|
|
796
|
+
console.log(
|
|
797
|
+
`GuardBench: ${guardSummary.passed}/${guardSummary.scenarios}, latency ${latency.p50Ms}ms/${latency.p95Ms}ms, artifact leaks ${guardSummary.artifactRedactionSweep.leakCount}`,
|
|
798
|
+
);
|