audrey 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/README.md +13 -3
- package/benchmarks/adapter-self-test.mjs +6 -2
- package/benchmarks/adapters/example-allow.mjs +5 -2
- package/benchmarks/adapters/mem0-platform.mjs +19 -12
- package/benchmarks/adapters/zep-cloud.mjs +51 -27
- package/benchmarks/baselines.js +11 -6
- package/benchmarks/build-leaderboard.mjs +36 -23
- package/benchmarks/cases.js +24 -12
- package/benchmarks/create-conformance-card.mjs +12 -3
- package/benchmarks/create-submission-bundle.mjs +22 -8
- package/benchmarks/dry-run-external-adapters.mjs +24 -12
- package/benchmarks/guardbench.js +263 -123
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +106 -106
- package/benchmarks/output/guardbench-summary.json +168 -168
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +106 -106
- package/benchmarks/output/submission-bundle/guardbench-summary.json +168 -168
- package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +58 -58
- package/benchmarks/perf-snapshot.js +12 -9
- package/benchmarks/perf.bench.js +14 -6
- package/benchmarks/public-paths.mjs +11 -5
- package/benchmarks/reference-results.js +10 -5
- package/benchmarks/report.js +48 -27
- package/benchmarks/run-external-guardbench.mjs +47 -25
- package/benchmarks/run.js +112 -59
- package/benchmarks/validate-adapter-module.mjs +13 -10
- package/benchmarks/validate-adapter-registry.mjs +16 -5
- package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
- package/benchmarks/verify-external-evidence.mjs +86 -31
- package/benchmarks/verify-publication-artifacts.mjs +34 -11
- package/benchmarks/verify-submission-bundle.mjs +9 -4
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.d.ts.map +1 -1
- package/dist/mcp-server/config.js +5 -3
- package/dist/mcp-server/config.js.map +1 -1
- package/dist/mcp-server/index.d.ts +7 -347
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +289 -256
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/mcp-server/tool-schemas.d.ts +341 -0
- package/dist/mcp-server/tool-schemas.d.ts.map +1 -0
- package/dist/mcp-server/tool-schemas.js +248 -0
- package/dist/mcp-server/tool-schemas.js.map +1 -0
- package/dist/mcp-server/tool-validation.d.ts +17 -0
- package/dist/mcp-server/tool-validation.d.ts.map +1 -0
- package/dist/mcp-server/tool-validation.js +41 -0
- package/dist/mcp-server/tool-validation.js.map +1 -0
- package/dist/src/action-key.d.ts.map +1 -1
- package/dist/src/action-key.js +6 -2
- package/dist/src/action-key.js.map +1 -1
- package/dist/src/adaptive.d.ts.map +1 -1
- package/dist/src/adaptive.js +4 -2
- package/dist/src/adaptive.js.map +1 -1
- package/dist/src/affect.d.ts.map +1 -1
- package/dist/src/affect.js +8 -5
- package/dist/src/affect.js.map +1 -1
- package/dist/src/audrey.d.ts +1 -1
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +93 -49
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/capsule.d.ts.map +1 -1
- package/dist/src/capsule.js +37 -15
- package/dist/src/capsule.js.map +1 -1
- package/dist/src/causal.d.ts +1 -1
- package/dist/src/causal.d.ts.map +1 -1
- package/dist/src/causal.js +4 -2
- package/dist/src/causal.js.map +1 -1
- package/dist/src/confidence.d.ts.map +1 -1
- package/dist/src/confidence.js +5 -5
- package/dist/src/confidence.js.map +1 -1
- package/dist/src/consolidate.d.ts.map +1 -1
- package/dist/src/consolidate.js +17 -9
- package/dist/src/consolidate.js.map +1 -1
- package/dist/src/context.js +1 -1
- package/dist/src/context.js.map +1 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +24 -13
- package/dist/src/controller.js.map +1 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +78 -27
- package/dist/src/db.js.map +1 -1
- package/dist/src/decay.d.ts +1 -1
- package/dist/src/decay.d.ts.map +1 -1
- package/dist/src/decay.js +1 -1
- package/dist/src/decay.js.map +1 -1
- package/dist/src/embedding.d.ts +12 -4
- package/dist/src/embedding.d.ts.map +1 -1
- package/dist/src/embedding.js +18 -16
- package/dist/src/embedding.js.map +1 -1
- package/dist/src/encode.d.ts.map +1 -1
- package/dist/src/encode.js +5 -4
- package/dist/src/encode.js.map +1 -1
- package/dist/src/events.d.ts +3 -2
- package/dist/src/events.d.ts.map +1 -1
- package/dist/src/events.js +7 -3
- package/dist/src/events.js.map +1 -1
- package/dist/src/export.d.ts.map +1 -1
- package/dist/src/export.js +21 -7
- package/dist/src/export.js.map +1 -1
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +1 -1
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/forget.d.ts.map +1 -1
- package/dist/src/forget.js +12 -6
- package/dist/src/forget.js.map +1 -1
- package/dist/src/fts.d.ts.map +1 -1
- package/dist/src/fts.js +20 -8
- package/dist/src/fts.js.map +1 -1
- package/dist/src/hybrid-recall.d.ts.map +1 -1
- package/dist/src/hybrid-recall.js +12 -6
- package/dist/src/hybrid-recall.js.map +1 -1
- package/dist/src/impact.d.ts.map +1 -1
- package/dist/src/impact.js +26 -10
- package/dist/src/impact.js.map +1 -1
- package/dist/src/import.d.ts.map +1 -1
- package/dist/src/import.js +11 -6
- package/dist/src/import.js.map +1 -1
- package/dist/src/index.d.ts +3 -3
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -3
- package/dist/src/index.js.map +1 -1
- package/dist/src/interference.d.ts.map +1 -1
- package/dist/src/interference.js +10 -5
- package/dist/src/interference.js.map +1 -1
- package/dist/src/introspect.d.ts.map +1 -1
- package/dist/src/introspect.js +12 -6
- package/dist/src/introspect.js.map +1 -1
- package/dist/src/llm.d.ts +2 -2
- package/dist/src/llm.d.ts.map +1 -1
- package/dist/src/llm.js +6 -6
- package/dist/src/llm.js.map +1 -1
- package/dist/src/migrate.d.ts.map +1 -1
- package/dist/src/migrate.js +10 -4
- package/dist/src/migrate.js.map +1 -1
- package/dist/src/preflight.d.ts.map +1 -1
- package/dist/src/preflight.js +6 -8
- package/dist/src/preflight.js.map +1 -1
- package/dist/src/profile.d.ts.map +1 -1
- package/dist/src/profile.js.map +1 -1
- package/dist/src/promote.d.ts.map +1 -1
- package/dist/src/promote.js +16 -7
- package/dist/src/promote.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +1 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/recall.d.ts.map +1 -1
- package/dist/src/recall.js +85 -18
- package/dist/src/recall.js.map +1 -1
- package/dist/src/redact.d.ts.map +1 -1
- package/dist/src/redact.js +9 -4
- package/dist/src/redact.js.map +1 -1
- package/dist/src/reflexes.d.ts.map +1 -1
- package/dist/src/reflexes.js +1 -7
- package/dist/src/reflexes.js.map +1 -1
- package/dist/src/rollback.d.ts.map +1 -1
- package/dist/src/rollback.js +4 -2
- package/dist/src/rollback.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +33 -13
- package/dist/src/routes.js.map +1 -1
- package/dist/src/rules-compiler.d.ts.map +1 -1
- package/dist/src/rules-compiler.js +24 -2
- package/dist/src/rules-compiler.js.map +1 -1
- package/dist/src/server.js +2 -2
- package/dist/src/server.js.map +1 -1
- package/dist/src/tool-trace.d.ts +2 -2
- package/dist/src/tool-trace.d.ts.map +1 -1
- package/dist/src/tool-trace.js +12 -4
- package/dist/src/tool-trace.js.map +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/ulid.js +1 -1
- package/dist/src/ulid.js.map +1 -1
- package/dist/src/utils.d.ts.map +1 -1
- package/dist/src/utils.js.map +1 -1
- package/dist/src/validate.d.ts.map +1 -1
- package/dist/src/validate.js +20 -10
- package/dist/src/validate.js.map +1 -1
- package/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +13 -3
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +106 -106
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +168 -168
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +64 -64
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +17 -4
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +34 -34
- package/examples/fintech-ops-demo.js +12 -5
- package/examples/healthcare-ops-demo.js +8 -4
- package/examples/ollama-memory-agent.js +41 -13
- package/examples/stripe-demo.js +12 -5
- package/package.json +17 -4
- package/scripts/audit-release-completion.mjs +179 -101
- package/scripts/create-arxiv-source.mjs +20 -14
- package/scripts/create-paper-submission-bundle.mjs +6 -2
- package/scripts/finalize-release.mjs +111 -36
- package/scripts/prepare-release-cut.mjs +14 -6
- package/scripts/publish-release-bundle.mjs +62 -23
- package/scripts/publish-release-github-api.mjs +89 -24
- package/scripts/smoke-cli.js +9 -9
- package/scripts/sync-paper-artifacts.mjs +5 -1
- package/scripts/verify-arxiv-compile.mjs +52 -16
- package/scripts/verify-arxiv-source.mjs +45 -15
- package/scripts/verify-browser-launch-plan.mjs +28 -11
- package/scripts/verify-browser-launch-results.mjs +32 -14
- package/scripts/verify-paper-artifacts.mjs +539 -79
- package/scripts/verify-paper-claims.mjs +48 -20
- package/scripts/verify-paper-submission-bundle.mjs +22 -11
- package/scripts/verify-publication-pack.mjs +23 -9
- package/scripts/verify-release-readiness.mjs +211 -76
|
@@ -54,7 +54,7 @@ Options:
|
|
|
54
54
|
|
|
55
55
|
function assertTextNeedles(needles, shouldExist, failures) {
|
|
56
56
|
for (const needle of needles) {
|
|
57
|
-
let text
|
|
57
|
+
let text;
|
|
58
58
|
try {
|
|
59
59
|
text = readText(needle.path);
|
|
60
60
|
} catch (error) {
|
|
@@ -64,8 +64,10 @@ function assertTextNeedles(needles, shouldExist, failures) {
|
|
|
64
64
|
const normalizedText = text.replace(/\s+/g, ' ');
|
|
65
65
|
const normalizedNeedle = needle.text.replace(/\s+/g, ' ');
|
|
66
66
|
const found = text.includes(needle.text) || normalizedText.includes(normalizedNeedle);
|
|
67
|
-
if (shouldExist && !found)
|
|
68
|
-
|
|
67
|
+
if (shouldExist && !found)
|
|
68
|
+
failures.push(`${needle.path} is missing claim text: ${needle.text}`);
|
|
69
|
+
if (!shouldExist && found)
|
|
70
|
+
failures.push(`${needle.path} contains forbidden claim text: ${needle.text}`);
|
|
69
71
|
}
|
|
70
72
|
}
|
|
71
73
|
|
|
@@ -73,11 +75,16 @@ function guardbenchLocalPassed() {
|
|
|
73
75
|
const summary = readJson('benchmarks/output/guardbench-summary.json');
|
|
74
76
|
const failures = [];
|
|
75
77
|
if (summary.passed !== 10) failures.push(`GuardBench passed expected 10, got ${summary.passed}`);
|
|
76
|
-
if (summary.scenarios !== 10)
|
|
77
|
-
|
|
78
|
-
if (summary.
|
|
78
|
+
if (summary.scenarios !== 10)
|
|
79
|
+
failures.push(`GuardBench scenarios expected 10, got ${summary.scenarios}`);
|
|
80
|
+
if (summary.redactionLeaks !== 0)
|
|
81
|
+
failures.push(`GuardBench decision redaction leaks expected 0, got ${summary.redactionLeaks}`);
|
|
82
|
+
if (summary.artifactRedactionSweep?.passed !== true)
|
|
83
|
+
failures.push('GuardBench artifact redaction sweep did not pass');
|
|
79
84
|
if (summary.artifactRedactionSweep?.leakCount !== 0) {
|
|
80
|
-
failures.push(
|
|
85
|
+
failures.push(
|
|
86
|
+
`GuardBench artifact leak count expected 0, got ${summary.artifactRedactionSweep?.leakCount}`,
|
|
87
|
+
);
|
|
81
88
|
}
|
|
82
89
|
return failures;
|
|
83
90
|
}
|
|
@@ -88,9 +95,9 @@ function noPublishedSecretLeaks() {
|
|
|
88
95
|
'benchmarks/output/guardbench-summary.json',
|
|
89
96
|
'benchmarks/output/guardbench-raw.json',
|
|
90
97
|
];
|
|
91
|
-
return paths.flatMap(path =>
|
|
92
|
-
? [`${path} contains the seeded raw secret`]
|
|
93
|
-
|
|
98
|
+
return paths.flatMap(path =>
|
|
99
|
+
readText(path).includes(SEEDED_SECRET) ? [`${path} contains the seeded raw secret`] : [],
|
|
100
|
+
);
|
|
94
101
|
}
|
|
95
102
|
|
|
96
103
|
function adapterRegistryHasMem0Zep() {
|
|
@@ -104,15 +111,26 @@ function adapterRegistryHasMem0Zep() {
|
|
|
104
111
|
|
|
105
112
|
function externalEvidencePending() {
|
|
106
113
|
const evidence = readJson('benchmarks/output/external/guardbench-external-evidence.json');
|
|
107
|
-
const rows = (evidence.adapters ?? []).filter(adapter =>
|
|
114
|
+
const rows = (evidence.adapters ?? []).filter(adapter =>
|
|
115
|
+
['mem0-platform', 'zep-cloud'].includes(adapter.id),
|
|
116
|
+
);
|
|
108
117
|
const failures = [];
|
|
109
|
-
if (rows.length !== 2)
|
|
118
|
+
if (rows.length !== 2)
|
|
119
|
+
failures.push(`External evidence expected Mem0 and Zep rows, got ${rows.length}`);
|
|
110
120
|
if (rows.every(row => row.status === 'verified')) {
|
|
111
|
-
failures.push(
|
|
121
|
+
failures.push(
|
|
122
|
+
'External evidence is fully verified but claim register still marks external scores pending',
|
|
123
|
+
);
|
|
112
124
|
}
|
|
113
125
|
for (const row of rows) {
|
|
114
|
-
if (row.status !== 'pending')
|
|
115
|
-
|
|
126
|
+
if (row.status !== 'pending')
|
|
127
|
+
failures.push(
|
|
128
|
+
`External evidence row ${row.id} should remain pending until strict live evidence passes`,
|
|
129
|
+
);
|
|
130
|
+
if (row.evidenceKind !== 'dry-run')
|
|
131
|
+
failures.push(
|
|
132
|
+
`External evidence row ${row.id} should be dry-run evidence before live credentials`,
|
|
133
|
+
);
|
|
116
134
|
}
|
|
117
135
|
return failures;
|
|
118
136
|
}
|
|
@@ -121,9 +139,13 @@ function externalEvidenceNoSecrets() {
|
|
|
121
139
|
const text = readText('benchmarks/output/external/guardbench-external-evidence.json');
|
|
122
140
|
const evidence = JSON.parse(text);
|
|
123
141
|
const failures = [];
|
|
124
|
-
if (text.includes('runtime-key'))
|
|
142
|
+
if (text.includes('runtime-key'))
|
|
143
|
+
failures.push('External evidence report contains test runtime-key');
|
|
125
144
|
for (const row of evidence.adapters ?? []) {
|
|
126
|
-
if (row.secretLeakCount !== 0)
|
|
145
|
+
if (row.secretLeakCount !== 0)
|
|
146
|
+
failures.push(
|
|
147
|
+
`External evidence row ${row.id} reports ${row.secretLeakCount} credential leak(s)`,
|
|
148
|
+
);
|
|
127
149
|
}
|
|
128
150
|
return failures;
|
|
129
151
|
}
|
|
@@ -134,7 +156,11 @@ function paperStageBoundaryExcludesExternalScores() {
|
|
|
134
156
|
if (!paper.includes('this paper does not report external-system GuardBench scores')) {
|
|
135
157
|
failures.push('Paper missing explicit external-score exclusion');
|
|
136
158
|
}
|
|
137
|
-
if (
|
|
159
|
+
if (
|
|
160
|
+
!paper.includes(
|
|
161
|
+
'External scores added only when live adapter runs and raw outputs are published',
|
|
162
|
+
)
|
|
163
|
+
) {
|
|
138
164
|
failures.push('Paper missing Stage-B external-score condition');
|
|
139
165
|
}
|
|
140
166
|
return failures;
|
|
@@ -151,7 +177,8 @@ async function runArtifactCheck(name) {
|
|
|
151
177
|
if (name === 'external-evidence-pending') return externalEvidencePending();
|
|
152
178
|
if (name === 'guardbench-local-passed') return guardbenchLocalPassed();
|
|
153
179
|
if (name === 'no-published-secret-leaks') return noPublishedSecretLeaks();
|
|
154
|
-
if (name === 'paper-stage-boundary-excludes-external-scores')
|
|
180
|
+
if (name === 'paper-stage-boundary-excludes-external-scores')
|
|
181
|
+
return paperStageBoundaryExcludesExternalScores();
|
|
155
182
|
if (name === 'publication-verifier-ok') return publicationVerifierOk();
|
|
156
183
|
return [`Unknown claim artifact check: ${name}`];
|
|
157
184
|
}
|
|
@@ -168,7 +195,8 @@ export async function verifyPaperClaims(options = {}) {
|
|
|
168
195
|
assertTextNeedles(claim.forbiddenText ?? [], false, failures);
|
|
169
196
|
for (const evidence of claim.evidence ?? []) {
|
|
170
197
|
const [path] = evidence.split('#');
|
|
171
|
-
if (!existsSync(fromRoot(path)))
|
|
198
|
+
if (!existsSync(fromRoot(path)))
|
|
199
|
+
failures.push(`Missing evidence file for ${claim.id}: ${path}`);
|
|
172
200
|
}
|
|
173
201
|
for (const check of claim.artifactChecks ?? []) {
|
|
174
202
|
failures.push(...(await runArtifactCheck(check)));
|
|
@@ -94,7 +94,7 @@ export function verifyPaperSubmissionBundle(options = {}) {
|
|
|
94
94
|
const checkSourceFreshness = options.checkSourceFreshness !== false;
|
|
95
95
|
const manifestPath = join(dir, 'paper-submission-manifest.json');
|
|
96
96
|
const failures = [];
|
|
97
|
-
let manifest
|
|
97
|
+
let manifest;
|
|
98
98
|
|
|
99
99
|
try {
|
|
100
100
|
manifest = readJson(manifestPath);
|
|
@@ -116,14 +116,16 @@ export function verifyPaperSubmissionBundle(options = {}) {
|
|
|
116
116
|
|
|
117
117
|
const listed = new Map((manifest.files ?? []).map(file => [file.path, file]));
|
|
118
118
|
for (const file of REQUIRED_FILES) {
|
|
119
|
-
if (!listed.has(file))
|
|
119
|
+
if (!listed.has(file))
|
|
120
|
+
failures.push(`paper-submission-manifest.json: missing required file record ${file}`);
|
|
120
121
|
}
|
|
121
122
|
const compileReport = listed.has('docs/paper/output/arxiv-compile-report.json')
|
|
122
123
|
? readJson(join(dir, 'docs/paper/output/arxiv-compile-report.json'))
|
|
123
124
|
: null;
|
|
124
125
|
if (compileReport?.status === 'passed') {
|
|
125
126
|
for (const file of PASSED_COMPILE_FILES) {
|
|
126
|
-
if (!listed.has(file))
|
|
127
|
+
if (!listed.has(file))
|
|
128
|
+
failures.push(`paper-submission-manifest.json: missing compile-proof file record ${file}`);
|
|
127
129
|
}
|
|
128
130
|
}
|
|
129
131
|
if (listed.has('paper-submission-manifest.json')) {
|
|
@@ -152,7 +154,9 @@ export function verifyPaperSubmissionBundle(options = {}) {
|
|
|
152
154
|
}
|
|
153
155
|
}
|
|
154
156
|
|
|
155
|
-
const actualFiles = walkFiles(dir)
|
|
157
|
+
const actualFiles = walkFiles(dir)
|
|
158
|
+
.filter(file => file !== 'paper-submission-manifest.json')
|
|
159
|
+
.sort();
|
|
156
160
|
const listedFiles = [...listed.keys()].sort();
|
|
157
161
|
const actualSet = new Set(actualFiles);
|
|
158
162
|
const listedSet = new Set(listedFiles);
|
|
@@ -160,17 +164,24 @@ export function verifyPaperSubmissionBundle(options = {}) {
|
|
|
160
164
|
if (!listedSet.has(file)) failures.push(`${file}: present in bundle but missing from manifest`);
|
|
161
165
|
}
|
|
162
166
|
for (const file of listedFiles) {
|
|
163
|
-
if (!actualSet.has(file))
|
|
167
|
+
if (!actualSet.has(file))
|
|
168
|
+
failures.push(`${file}: listed in manifest but not present in bundle`);
|
|
164
169
|
}
|
|
165
170
|
for (const file of scanFilesForLocalPaths(dir, actualFiles)) {
|
|
166
171
|
failures.push(`${file}: contains a local absolute path`);
|
|
167
172
|
}
|
|
168
|
-
if (manifest.claimVerification?.ok !== true)
|
|
169
|
-
|
|
170
|
-
if (manifest.
|
|
171
|
-
|
|
172
|
-
if (manifest.guardBenchSnapshot?.
|
|
173
|
-
|
|
173
|
+
if (manifest.claimVerification?.ok !== true)
|
|
174
|
+
failures.push('paper-submission-manifest.json: claimVerification is not ok');
|
|
175
|
+
if (manifest.publicationPackVerification?.ok !== true)
|
|
176
|
+
failures.push('paper-submission-manifest.json: publicationPackVerification is not ok');
|
|
177
|
+
if (manifest.guardBenchSnapshot?.passed !== 10)
|
|
178
|
+
failures.push('paper-submission-manifest.json: GuardBench passed count is not 10');
|
|
179
|
+
if (manifest.guardBenchSnapshot?.scenarios !== 10)
|
|
180
|
+
failures.push('paper-submission-manifest.json: GuardBench scenario count is not 10');
|
|
181
|
+
if (manifest.guardBenchSnapshot?.redactionLeaks !== 0)
|
|
182
|
+
failures.push('paper-submission-manifest.json: GuardBench decision redaction leaks are not 0');
|
|
183
|
+
if (manifest.guardBenchSnapshot?.artifactLeaks !== 0)
|
|
184
|
+
failures.push('paper-submission-manifest.json: GuardBench artifact leaks are not 0');
|
|
174
185
|
|
|
175
186
|
return {
|
|
176
187
|
ok: failures.length === 0,
|
|
@@ -70,25 +70,33 @@ function referencesPendingClaim(entry, claimMap) {
|
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
function hasPendingBoundaryLanguage(text) {
|
|
73
|
-
return /\b(pending|deferred|does not report|not reporting|not claimed|Stage-B|live credentialed)\b/i.test(
|
|
73
|
+
return /\b(pending|deferred|does not report|not reporting|not claimed|Stage-B|live credentialed)\b/i.test(
|
|
74
|
+
text,
|
|
75
|
+
);
|
|
74
76
|
}
|
|
75
77
|
|
|
76
78
|
function validateEntry(entry, claimMap, forbiddenNeedles) {
|
|
77
79
|
const failures = [];
|
|
78
80
|
const reservedUrlChars = Number.isInteger(entry.reservedUrlChars) ? entry.reservedUrlChars : 0;
|
|
79
81
|
if (entry.text.length > entry.maxChars) {
|
|
80
|
-
failures.push(
|
|
82
|
+
failures.push(
|
|
83
|
+
`${entry.id}: text length ${entry.text.length} exceeds maxChars ${entry.maxChars}`,
|
|
84
|
+
);
|
|
81
85
|
}
|
|
82
86
|
if (entry.text.includes(SEEDED_SECRET)) failures.push(`${entry.id}: contains seeded raw secret`);
|
|
83
|
-
if (entry.text.includes('runtime-key'))
|
|
87
|
+
if (entry.text.includes('runtime-key'))
|
|
88
|
+
failures.push(`${entry.id}: contains runtime-key test credential`);
|
|
84
89
|
for (const claimId of entry.claimIds) {
|
|
85
90
|
if (!claimMap.has(claimId)) failures.push(`${entry.id}: unknown claim id ${claimId}`);
|
|
86
91
|
}
|
|
87
92
|
for (const needle of forbiddenNeedles) {
|
|
88
|
-
if (entry.text.includes(needle))
|
|
93
|
+
if (entry.text.includes(needle))
|
|
94
|
+
failures.push(`${entry.id}: contains forbidden claim text: ${needle}`);
|
|
89
95
|
}
|
|
90
96
|
if (referencesPendingClaim(entry, claimMap) && !hasPendingBoundaryLanguage(entry.text)) {
|
|
91
|
-
failures.push(
|
|
97
|
+
failures.push(
|
|
98
|
+
`${entry.id}: references a pending claim without explicit pending/deferred boundary language`,
|
|
99
|
+
);
|
|
92
100
|
}
|
|
93
101
|
if (/10\/10/.test(entry.text) && !/\b(local|Stage-A)\b/i.test(entry.text)) {
|
|
94
102
|
failures.push(`${entry.id}: 10/10 claim must be scoped as local or Stage-A`);
|
|
@@ -106,10 +114,14 @@ function validateEntry(entry, claimMap, forbiddenNeedles) {
|
|
|
106
114
|
if (!Number.isInteger(entry.reservedUrlChars)) {
|
|
107
115
|
failures.push(`${entry.id}: X post requiring an artifact URL must set reservedUrlChars`);
|
|
108
116
|
} else if (entry.reservedUrlChars < X_URL_RESERVED_CHARS) {
|
|
109
|
-
failures.push(
|
|
117
|
+
failures.push(
|
|
118
|
+
`${entry.id}: X artifact URL reserve must be at least ${X_URL_RESERVED_CHARS} characters`,
|
|
119
|
+
);
|
|
110
120
|
}
|
|
111
121
|
if (entry.text.length + reservedUrlChars > entry.maxChars) {
|
|
112
|
-
failures.push(
|
|
122
|
+
failures.push(
|
|
123
|
+
`${entry.id}: text length ${entry.text.length} plus URL reserve ${reservedUrlChars} exceeds maxChars ${entry.maxChars}`,
|
|
124
|
+
);
|
|
113
125
|
}
|
|
114
126
|
}
|
|
115
127
|
return failures;
|
|
@@ -122,14 +134,16 @@ export async function verifyPublicationPack(options = {}) {
|
|
|
122
134
|
const claimRegister = readJson(pack.claimRegister);
|
|
123
135
|
const claimMap = new Map((claimRegister.claims ?? []).map(claim => [claim.id, claim]));
|
|
124
136
|
const forbiddenNeedles = (claimRegister.claims ?? []).flatMap(claim =>
|
|
125
|
-
(claim.forbiddenText ?? []).map(needle => needle.text)
|
|
137
|
+
(claim.forbiddenText ?? []).map(needle => needle.text),
|
|
138
|
+
);
|
|
126
139
|
|
|
127
140
|
const schemaFailures = validateSchema(pack, schema, 'audrey-publication-pack');
|
|
128
141
|
const ids = new Set();
|
|
129
142
|
const entryReports = [];
|
|
130
143
|
const failures = [...schemaFailures.map(failure => `publication pack schema: ${failure}`)];
|
|
131
144
|
|
|
132
|
-
if (!claimReport.ok)
|
|
145
|
+
if (!claimReport.ok)
|
|
146
|
+
failures.push(...claimReport.failures.map(failure => `claim verifier: ${failure}`));
|
|
133
147
|
|
|
134
148
|
for (const entry of pack.entries ?? []) {
|
|
135
149
|
const entryFailures = [];
|