audrey 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/README.md +13 -3
- package/benchmarks/adapter-self-test.mjs +6 -2
- package/benchmarks/adapters/example-allow.mjs +5 -2
- package/benchmarks/adapters/mem0-platform.mjs +19 -12
- package/benchmarks/adapters/zep-cloud.mjs +51 -27
- package/benchmarks/baselines.js +11 -6
- package/benchmarks/build-leaderboard.mjs +36 -23
- package/benchmarks/cases.js +24 -12
- package/benchmarks/create-conformance-card.mjs +12 -3
- package/benchmarks/create-submission-bundle.mjs +22 -8
- package/benchmarks/dry-run-external-adapters.mjs +24 -12
- package/benchmarks/guardbench.js +263 -123
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +106 -106
- package/benchmarks/output/guardbench-summary.json +168 -168
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +106 -106
- package/benchmarks/output/submission-bundle/guardbench-summary.json +168 -168
- package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +58 -58
- package/benchmarks/perf-snapshot.js +12 -9
- package/benchmarks/perf.bench.js +14 -6
- package/benchmarks/public-paths.mjs +11 -5
- package/benchmarks/reference-results.js +10 -5
- package/benchmarks/report.js +48 -27
- package/benchmarks/run-external-guardbench.mjs +47 -25
- package/benchmarks/run.js +112 -59
- package/benchmarks/validate-adapter-module.mjs +13 -10
- package/benchmarks/validate-adapter-registry.mjs +16 -5
- package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
- package/benchmarks/verify-external-evidence.mjs +86 -31
- package/benchmarks/verify-publication-artifacts.mjs +34 -11
- package/benchmarks/verify-submission-bundle.mjs +9 -4
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.d.ts.map +1 -1
- package/dist/mcp-server/config.js +5 -3
- package/dist/mcp-server/config.js.map +1 -1
- package/dist/mcp-server/index.d.ts +7 -347
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +289 -256
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/mcp-server/tool-schemas.d.ts +341 -0
- package/dist/mcp-server/tool-schemas.d.ts.map +1 -0
- package/dist/mcp-server/tool-schemas.js +248 -0
- package/dist/mcp-server/tool-schemas.js.map +1 -0
- package/dist/mcp-server/tool-validation.d.ts +17 -0
- package/dist/mcp-server/tool-validation.d.ts.map +1 -0
- package/dist/mcp-server/tool-validation.js +41 -0
- package/dist/mcp-server/tool-validation.js.map +1 -0
- package/dist/src/action-key.d.ts.map +1 -1
- package/dist/src/action-key.js +6 -2
- package/dist/src/action-key.js.map +1 -1
- package/dist/src/adaptive.d.ts.map +1 -1
- package/dist/src/adaptive.js +4 -2
- package/dist/src/adaptive.js.map +1 -1
- package/dist/src/affect.d.ts.map +1 -1
- package/dist/src/affect.js +8 -5
- package/dist/src/affect.js.map +1 -1
- package/dist/src/audrey.d.ts +1 -1
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +93 -49
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/capsule.d.ts.map +1 -1
- package/dist/src/capsule.js +37 -15
- package/dist/src/capsule.js.map +1 -1
- package/dist/src/causal.d.ts +1 -1
- package/dist/src/causal.d.ts.map +1 -1
- package/dist/src/causal.js +4 -2
- package/dist/src/causal.js.map +1 -1
- package/dist/src/confidence.d.ts.map +1 -1
- package/dist/src/confidence.js +5 -5
- package/dist/src/confidence.js.map +1 -1
- package/dist/src/consolidate.d.ts.map +1 -1
- package/dist/src/consolidate.js +17 -9
- package/dist/src/consolidate.js.map +1 -1
- package/dist/src/context.js +1 -1
- package/dist/src/context.js.map +1 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +24 -13
- package/dist/src/controller.js.map +1 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +78 -27
- package/dist/src/db.js.map +1 -1
- package/dist/src/decay.d.ts +1 -1
- package/dist/src/decay.d.ts.map +1 -1
- package/dist/src/decay.js +1 -1
- package/dist/src/decay.js.map +1 -1
- package/dist/src/embedding.d.ts +12 -4
- package/dist/src/embedding.d.ts.map +1 -1
- package/dist/src/embedding.js +18 -16
- package/dist/src/embedding.js.map +1 -1
- package/dist/src/encode.d.ts.map +1 -1
- package/dist/src/encode.js +5 -4
- package/dist/src/encode.js.map +1 -1
- package/dist/src/events.d.ts +3 -2
- package/dist/src/events.d.ts.map +1 -1
- package/dist/src/events.js +7 -3
- package/dist/src/events.js.map +1 -1
- package/dist/src/export.d.ts.map +1 -1
- package/dist/src/export.js +21 -7
- package/dist/src/export.js.map +1 -1
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +1 -1
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/forget.d.ts.map +1 -1
- package/dist/src/forget.js +12 -6
- package/dist/src/forget.js.map +1 -1
- package/dist/src/fts.d.ts.map +1 -1
- package/dist/src/fts.js +20 -8
- package/dist/src/fts.js.map +1 -1
- package/dist/src/hybrid-recall.d.ts.map +1 -1
- package/dist/src/hybrid-recall.js +12 -6
- package/dist/src/hybrid-recall.js.map +1 -1
- package/dist/src/impact.d.ts.map +1 -1
- package/dist/src/impact.js +26 -10
- package/dist/src/impact.js.map +1 -1
- package/dist/src/import.d.ts.map +1 -1
- package/dist/src/import.js +11 -6
- package/dist/src/import.js.map +1 -1
- package/dist/src/index.d.ts +3 -3
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -3
- package/dist/src/index.js.map +1 -1
- package/dist/src/interference.d.ts.map +1 -1
- package/dist/src/interference.js +10 -5
- package/dist/src/interference.js.map +1 -1
- package/dist/src/introspect.d.ts.map +1 -1
- package/dist/src/introspect.js +12 -6
- package/dist/src/introspect.js.map +1 -1
- package/dist/src/llm.d.ts +2 -2
- package/dist/src/llm.d.ts.map +1 -1
- package/dist/src/llm.js +6 -6
- package/dist/src/llm.js.map +1 -1
- package/dist/src/migrate.d.ts.map +1 -1
- package/dist/src/migrate.js +10 -4
- package/dist/src/migrate.js.map +1 -1
- package/dist/src/preflight.d.ts.map +1 -1
- package/dist/src/preflight.js +6 -8
- package/dist/src/preflight.js.map +1 -1
- package/dist/src/profile.d.ts.map +1 -1
- package/dist/src/profile.js.map +1 -1
- package/dist/src/promote.d.ts.map +1 -1
- package/dist/src/promote.js +16 -7
- package/dist/src/promote.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +1 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/recall.d.ts.map +1 -1
- package/dist/src/recall.js +85 -18
- package/dist/src/recall.js.map +1 -1
- package/dist/src/redact.d.ts.map +1 -1
- package/dist/src/redact.js +9 -4
- package/dist/src/redact.js.map +1 -1
- package/dist/src/reflexes.d.ts.map +1 -1
- package/dist/src/reflexes.js +1 -7
- package/dist/src/reflexes.js.map +1 -1
- package/dist/src/rollback.d.ts.map +1 -1
- package/dist/src/rollback.js +4 -2
- package/dist/src/rollback.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +33 -13
- package/dist/src/routes.js.map +1 -1
- package/dist/src/rules-compiler.d.ts.map +1 -1
- package/dist/src/rules-compiler.js +24 -2
- package/dist/src/rules-compiler.js.map +1 -1
- package/dist/src/server.js +2 -2
- package/dist/src/server.js.map +1 -1
- package/dist/src/tool-trace.d.ts +2 -2
- package/dist/src/tool-trace.d.ts.map +1 -1
- package/dist/src/tool-trace.js +12 -4
- package/dist/src/tool-trace.js.map +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/ulid.js +1 -1
- package/dist/src/ulid.js.map +1 -1
- package/dist/src/utils.d.ts.map +1 -1
- package/dist/src/utils.js.map +1 -1
- package/dist/src/validate.d.ts.map +1 -1
- package/dist/src/validate.js +20 -10
- package/dist/src/validate.js.map +1 -1
- package/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +13 -3
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +106 -106
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +168 -168
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +64 -64
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +17 -4
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +34 -34
- package/examples/fintech-ops-demo.js +12 -5
- package/examples/healthcare-ops-demo.js +8 -4
- package/examples/ollama-memory-agent.js +41 -13
- package/examples/stripe-demo.js +12 -5
- package/package.json +17 -4
- package/scripts/audit-release-completion.mjs +179 -101
- package/scripts/create-arxiv-source.mjs +20 -14
- package/scripts/create-paper-submission-bundle.mjs +6 -2
- package/scripts/finalize-release.mjs +111 -36
- package/scripts/prepare-release-cut.mjs +14 -6
- package/scripts/publish-release-bundle.mjs +62 -23
- package/scripts/publish-release-github-api.mjs +89 -24
- package/scripts/smoke-cli.js +9 -9
- package/scripts/sync-paper-artifacts.mjs +5 -1
- package/scripts/verify-arxiv-compile.mjs +52 -16
- package/scripts/verify-arxiv-source.mjs +45 -15
- package/scripts/verify-browser-launch-plan.mjs +28 -11
- package/scripts/verify-browser-launch-results.mjs +32 -14
- package/scripts/verify-paper-artifacts.mjs +539 -79
- package/scripts/verify-paper-claims.mjs +48 -20
- package/scripts/verify-paper-submission-bundle.mjs +22 -11
- package/scripts/verify-publication-pack.mjs +23 -9
- package/scripts/verify-release-readiness.mjs +211 -76
|
@@ -106,4 +106,8 @@ for (const [path, updater] of updates) {
|
|
|
106
106
|
}
|
|
107
107
|
}
|
|
108
108
|
|
|
109
|
-
console.log(
|
|
109
|
+
console.log(
|
|
110
|
+
changed.length
|
|
111
|
+
? `Synced paper artifacts: ${changed.join(', ')}`
|
|
112
|
+
: 'Paper artifacts already in sync.',
|
|
113
|
+
);
|
|
@@ -37,9 +37,12 @@ function pathForReport(path) {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
function commandExists(command) {
|
|
40
|
-
const result =
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
const result =
|
|
41
|
+
process.platform === 'win32'
|
|
42
|
+
? spawnSync(process.env.ComSpec ?? 'cmd.exe', ['/d', '/c', 'where', command], {
|
|
43
|
+
encoding: 'utf-8',
|
|
44
|
+
})
|
|
45
|
+
: spawnSync('sh', ['-lc', `command -v ${command}`], { encoding: 'utf-8' });
|
|
43
46
|
return result.status === 0;
|
|
44
47
|
}
|
|
45
48
|
|
|
@@ -47,16 +50,17 @@ function compilerPlan(exists = commandExists) {
|
|
|
47
50
|
if (exists('tectonic')) {
|
|
48
51
|
return {
|
|
49
52
|
name: 'tectonic',
|
|
50
|
-
stages: [
|
|
51
|
-
{ command: 'tectonic', args: ['--keep-logs', '--keep-intermediates', MAIN_TEX] },
|
|
52
|
-
],
|
|
53
|
+
stages: [{ command: 'tectonic', args: ['--keep-logs', '--keep-intermediates', MAIN_TEX] }],
|
|
53
54
|
};
|
|
54
55
|
}
|
|
55
56
|
if (exists('latexmk')) {
|
|
56
57
|
return {
|
|
57
58
|
name: 'latexmk',
|
|
58
59
|
stages: [
|
|
59
|
-
{
|
|
60
|
+
{
|
|
61
|
+
command: 'latexmk',
|
|
62
|
+
args: ['-pdf', '-interaction=nonstopmode', '-halt-on-error', MAIN_TEX],
|
|
63
|
+
},
|
|
60
64
|
],
|
|
61
65
|
};
|
|
62
66
|
}
|
|
@@ -78,7 +82,18 @@ function compilerPlan(exists = commandExists) {
|
|
|
78
82
|
stages: [
|
|
79
83
|
{
|
|
80
84
|
command: 'uvx',
|
|
81
|
-
args: [
|
|
85
|
+
args: [
|
|
86
|
+
'tecto',
|
|
87
|
+
'-X',
|
|
88
|
+
'compile',
|
|
89
|
+
'--bundle',
|
|
90
|
+
'__TECTONIC_BUNDLE_URL__',
|
|
91
|
+
'--keep-logs',
|
|
92
|
+
'--keep-intermediates',
|
|
93
|
+
'--reruns',
|
|
94
|
+
'2',
|
|
95
|
+
MAIN_TEX,
|
|
96
|
+
],
|
|
82
97
|
},
|
|
83
98
|
],
|
|
84
99
|
};
|
|
@@ -164,7 +179,14 @@ async function startTectonicBundleProxy(bundleUrl = TECTONIC_BUNDLE_URL) {
|
|
|
164
179
|
if (request.headers.range) headers.range = request.headers.range;
|
|
165
180
|
const upstream = await fetch(remoteUrl, { headers });
|
|
166
181
|
response.statusCode = upstream.status;
|
|
167
|
-
for (const header of [
|
|
182
|
+
for (const header of [
|
|
183
|
+
'accept-ranges',
|
|
184
|
+
'content-length',
|
|
185
|
+
'content-range',
|
|
186
|
+
'content-type',
|
|
187
|
+
'etag',
|
|
188
|
+
'last-modified',
|
|
189
|
+
]) {
|
|
168
190
|
const value = upstream.headers.get(header);
|
|
169
191
|
if (value) response.setHeader(header, value);
|
|
170
192
|
}
|
|
@@ -196,7 +218,7 @@ async function startTectonicBundleProxy(bundleUrl = TECTONIC_BUNDLE_URL) {
|
|
|
196
218
|
function stageWithBundle(stage, bundleUrl) {
|
|
197
219
|
return {
|
|
198
220
|
command: stage.command,
|
|
199
|
-
args: stage.args.map(arg => arg === '__TECTONIC_BUNDLE_URL__' ? bundleUrl : arg),
|
|
221
|
+
args: stage.args.map(arg => (arg === '__TECTONIC_BUNDLE_URL__' ? bundleUrl : arg)),
|
|
200
222
|
};
|
|
201
223
|
}
|
|
202
224
|
|
|
@@ -210,7 +232,12 @@ function runStage(stage, cwd) {
|
|
|
210
232
|
child.kill();
|
|
211
233
|
if (!settled) {
|
|
212
234
|
settled = true;
|
|
213
|
-
resolveRun({
|
|
235
|
+
resolveRun({
|
|
236
|
+
status: 1,
|
|
237
|
+
signal: 'TIMEOUT',
|
|
238
|
+
stdout,
|
|
239
|
+
stderr: `${stderr}\nTimed out after 120000ms`.trim(),
|
|
240
|
+
});
|
|
214
241
|
}
|
|
215
242
|
}, 120000);
|
|
216
243
|
|
|
@@ -297,7 +324,9 @@ export async function verifyArxivCompile(options = {}) {
|
|
|
297
324
|
const logLines = [];
|
|
298
325
|
try {
|
|
299
326
|
proxy = plan.bundleProxy ? await startTectonicBundleProxy() : null;
|
|
300
|
-
const stages = proxy
|
|
327
|
+
const stages = proxy
|
|
328
|
+
? plan.stages.map(stage => stageWithBundle(stage, proxy.url))
|
|
329
|
+
: plan.stages;
|
|
301
330
|
for (const stage of stages) {
|
|
302
331
|
logLines.push(`$ ${stage.command} ${stage.args.join(' ')}`);
|
|
303
332
|
const result = await runStage(stage, outDir);
|
|
@@ -344,7 +373,7 @@ export function verifyArxivCompileReport(options = {}) {
|
|
|
344
373
|
const allowPending = options.allowPending !== false;
|
|
345
374
|
const failures = [];
|
|
346
375
|
const blockers = [];
|
|
347
|
-
let report
|
|
376
|
+
let report;
|
|
348
377
|
|
|
349
378
|
try {
|
|
350
379
|
report = JSON.parse(readFileSync(reportPath, 'utf-8'));
|
|
@@ -359,7 +388,9 @@ export function verifyArxivCompileReport(options = {}) {
|
|
|
359
388
|
}
|
|
360
389
|
|
|
361
390
|
try {
|
|
362
|
-
failures.push(
|
|
391
|
+
failures.push(
|
|
392
|
+
...validateSchema(report, readJson(pathForReport(schemaPath)), 'audrey-arxiv-compile-report'),
|
|
393
|
+
);
|
|
363
394
|
} catch (error) {
|
|
364
395
|
failures.push(`schema: ${error.message}`);
|
|
365
396
|
}
|
|
@@ -386,8 +417,13 @@ export function verifyArxivCompileReport(options = {}) {
|
|
|
386
417
|
} else if (report.status === 'failed') {
|
|
387
418
|
failures.push(...(report.failures?.length ? report.failures : ['arXiv compile failed']));
|
|
388
419
|
} else if (report.status === 'passed') {
|
|
389
|
-
if (!report.outputPdf || !existsSync(fromRoot(report.outputPdf)))
|
|
390
|
-
|
|
420
|
+
if (!report.outputPdf || !existsSync(fromRoot(report.outputPdf)))
|
|
421
|
+
failures.push('arxiv-compile-report.json: outputPdf is missing');
|
|
422
|
+
if (
|
|
423
|
+
report.outputPdf &&
|
|
424
|
+
report.outputPdfSha256 &&
|
|
425
|
+
sha256File(fromRoot(report.outputPdf)) !== report.outputPdfSha256
|
|
426
|
+
) {
|
|
391
427
|
failures.push('arxiv-compile-report.json: outputPdfSha256 is stale');
|
|
392
428
|
}
|
|
393
429
|
}
|
|
@@ -41,7 +41,8 @@ function checkSourceHash(label, sourcePath, expectedHash, failures) {
|
|
|
41
41
|
failures.push(`arxiv-manifest.json: missing source file for ${label}: ${sourcePath}`);
|
|
42
42
|
return;
|
|
43
43
|
}
|
|
44
|
-
if (expectedHash !== sha256File(absolute))
|
|
44
|
+
if (expectedHash !== sha256File(absolute))
|
|
45
|
+
failures.push(`arxiv-manifest.json: ${label} hash is stale`);
|
|
45
46
|
}
|
|
46
47
|
|
|
47
48
|
function parseArgs(argv = process.argv.slice(2)) {
|
|
@@ -76,7 +77,7 @@ export function verifyArxivSourcePackage(options = {}) {
|
|
|
76
77
|
const schemaPath = fromRoot(options.schema ?? DEFAULT_SCHEMA);
|
|
77
78
|
const manifestPath = join(dir, 'arxiv-manifest.json');
|
|
78
79
|
const failures = [];
|
|
79
|
-
let manifest
|
|
80
|
+
let manifest;
|
|
80
81
|
|
|
81
82
|
try {
|
|
82
83
|
manifest = readJson(manifestPath);
|
|
@@ -99,12 +100,29 @@ export function verifyArxivSourcePackage(options = {}) {
|
|
|
99
100
|
|
|
100
101
|
const listed = new Map((manifest.files ?? []).map(file => [file.path, file]));
|
|
101
102
|
for (const file of REQUIRED_FILES) {
|
|
102
|
-
if (!listed.has(file))
|
|
103
|
+
if (!listed.has(file))
|
|
104
|
+
failures.push(`arxiv-manifest.json: missing required file record ${file}`);
|
|
103
105
|
}
|
|
104
|
-
if (listed.has('arxiv-manifest.json'))
|
|
105
|
-
|
|
106
|
-
checkSourceHash(
|
|
107
|
-
|
|
106
|
+
if (listed.has('arxiv-manifest.json'))
|
|
107
|
+
failures.push('arxiv-manifest.json: must not include a self-hash file record');
|
|
108
|
+
checkSourceHash(
|
|
109
|
+
'sourceMarkdown',
|
|
110
|
+
manifest.sourceMarkdown,
|
|
111
|
+
manifest.sourceHashes?.sourceMarkdown,
|
|
112
|
+
failures,
|
|
113
|
+
);
|
|
114
|
+
checkSourceHash(
|
|
115
|
+
'publicationPack',
|
|
116
|
+
manifest.publicationPack,
|
|
117
|
+
manifest.sourceHashes?.publicationPack,
|
|
118
|
+
failures,
|
|
119
|
+
);
|
|
120
|
+
checkSourceHash(
|
|
121
|
+
'referencesBib',
|
|
122
|
+
'docs/paper/references.bib',
|
|
123
|
+
manifest.sourceHashes?.referencesBib,
|
|
124
|
+
failures,
|
|
125
|
+
);
|
|
108
126
|
|
|
109
127
|
for (const [file, record] of listed) {
|
|
110
128
|
const path = join(dir, file);
|
|
@@ -126,11 +144,14 @@ export function verifyArxivSourcePackage(options = {}) {
|
|
|
126
144
|
}
|
|
127
145
|
}
|
|
128
146
|
|
|
129
|
-
const actualFiles = walkFiles(dir)
|
|
147
|
+
const actualFiles = walkFiles(dir)
|
|
148
|
+
.filter(file => file !== 'arxiv-manifest.json')
|
|
149
|
+
.sort();
|
|
130
150
|
const listedFiles = [...listed.keys()].sort();
|
|
131
151
|
const listedSet = new Set(listedFiles);
|
|
132
152
|
for (const file of actualFiles) {
|
|
133
|
-
if (!listedSet.has(file))
|
|
153
|
+
if (!listedSet.has(file))
|
|
154
|
+
failures.push(`${file}: present in package but missing from manifest`);
|
|
134
155
|
}
|
|
135
156
|
|
|
136
157
|
const mainPath = join(dir, 'main.tex');
|
|
@@ -138,24 +159,33 @@ export function verifyArxivSourcePackage(options = {}) {
|
|
|
138
159
|
const main = existsSync(mainPath) ? readFileSync(mainPath, 'utf-8') : '';
|
|
139
160
|
const bib = existsSync(bibPath) ? readFileSync(bibPath, 'utf-8') : '';
|
|
140
161
|
const citationCount = [...main.matchAll(/\\cite\{([^}]+)\}/g)].length;
|
|
141
|
-
const citedIds = new Set(
|
|
162
|
+
const citedIds = new Set(
|
|
163
|
+
[...main.matchAll(/\\cite\{([^}]+)\}/g)].flatMap(match =>
|
|
164
|
+
match[1].split(',').map(id => id.trim()),
|
|
165
|
+
),
|
|
166
|
+
);
|
|
142
167
|
const bibIds = new Set([...bib.matchAll(/@\w+\s*\{\s*([^,\s]+)/g)].map(match => match[1].trim()));
|
|
143
168
|
const bibEntries = countBibEntries(bib);
|
|
144
169
|
|
|
145
170
|
if (!main.includes('\\documentclass')) failures.push('main.tex: missing documentclass');
|
|
146
171
|
if (!main.includes('\\begin{abstract}')) failures.push('main.tex: missing abstract');
|
|
147
|
-
if (!main.includes('\\bibliography{references}'))
|
|
172
|
+
if (!main.includes('\\bibliography{references}'))
|
|
173
|
+
failures.push('main.tex: missing bibliography command');
|
|
148
174
|
if (main.includes('[@')) failures.push('main.tex: contains unconverted Markdown citation syntax');
|
|
149
|
-
if (/^#{1,6}\s/m.test(main))
|
|
175
|
+
if (/^#{1,6}\s/m.test(main))
|
|
176
|
+
failures.push('main.tex: contains unconverted Markdown heading syntax');
|
|
150
177
|
if (main.includes(SEEDED_SECRET)) failures.push('main.tex: contains seeded raw secret');
|
|
151
|
-
if (/([A-Z]:\\|file:\/\/|C:\\Users\\|B:\\Projects\\)/i.test(main))
|
|
178
|
+
if (/([A-Z]:\\|file:\/\/|C:\\Users\\|B:\\Projects\\)/i.test(main))
|
|
179
|
+
failures.push('main.tex: contains a local absolute path');
|
|
152
180
|
if (citationCount < 1) failures.push('main.tex: expected at least one citation');
|
|
153
181
|
if (bibEntries !== 21) failures.push(`references.bib: expected 21 entries, found ${bibEntries}`);
|
|
154
182
|
for (const id of citedIds) {
|
|
155
183
|
if (!bibIds.has(id)) failures.push(`main.tex: cites missing bibliography id ${id}`);
|
|
156
184
|
}
|
|
157
|
-
if (manifest.tex?.citationCount !== citationCount)
|
|
158
|
-
|
|
185
|
+
if (manifest.tex?.citationCount !== citationCount)
|
|
186
|
+
failures.push('arxiv-manifest.json: citation count is stale');
|
|
187
|
+
if (manifest.tex?.bibEntryCount !== bibEntries)
|
|
188
|
+
failures.push('arxiv-manifest.json: bibliography count is stale');
|
|
159
189
|
|
|
160
190
|
return {
|
|
161
191
|
ok: failures.length === 0,
|
|
@@ -89,7 +89,9 @@ function isAllowedHost(platform, value) {
|
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
function hasPendingBoundary(text) {
|
|
92
|
-
return /\b(pending|not claim|not claimed|does not report|remain pending|live evidence|strict evidence)\b/i.test(
|
|
92
|
+
return /\b(pending|not claim|not claimed|does not report|remain pending|live evidence|strict evidence)\b/i.test(
|
|
93
|
+
text,
|
|
94
|
+
);
|
|
93
95
|
}
|
|
94
96
|
|
|
95
97
|
function validateTarget(target, entryMap, sourceIds) {
|
|
@@ -107,7 +109,9 @@ function validateTarget(target, entryMap, sourceIds) {
|
|
|
107
109
|
continue;
|
|
108
110
|
}
|
|
109
111
|
if (entry.platform !== target.platform) {
|
|
110
|
-
failures.push(
|
|
112
|
+
failures.push(
|
|
113
|
+
`${target.id}: entry ${entryId} belongs to ${entry.platform}, not ${target.platform}`,
|
|
114
|
+
);
|
|
111
115
|
}
|
|
112
116
|
if (!allowedEntries.has(entryId)) {
|
|
113
117
|
failures.push(`${target.id}: entry ${entryId} is not approved for ${target.platform}`);
|
|
@@ -116,7 +120,9 @@ function validateTarget(target, entryMap, sourceIds) {
|
|
|
116
120
|
failures.push(`${target.id}: entry ${entryId} exceeds maxChars`);
|
|
117
121
|
}
|
|
118
122
|
if (/\b(Mem0|Zep)\b/.test(entry.text) && !hasPendingBoundary(entry.text)) {
|
|
119
|
-
failures.push(
|
|
123
|
+
failures.push(
|
|
124
|
+
`${target.id}: entry ${entryId} mentions Mem0/Zep without pending boundary language`,
|
|
125
|
+
);
|
|
120
126
|
}
|
|
121
127
|
targetEntries.push(entry);
|
|
122
128
|
}
|
|
@@ -124,7 +130,8 @@ function validateTarget(target, entryMap, sourceIds) {
|
|
|
124
130
|
if (!sourceIds.has(sourceId)) failures.push(`${target.id}: unknown sourceRef ${sourceId}`);
|
|
125
131
|
}
|
|
126
132
|
for (const artifact of target.artifactRefs) {
|
|
127
|
-
if (!existsSync(fromRoot(artifact)))
|
|
133
|
+
if (!existsSync(fromRoot(artifact)))
|
|
134
|
+
failures.push(`${target.id}: missing artifactRef ${artifact}`);
|
|
128
135
|
}
|
|
129
136
|
if (target.platform === 'reddit' && target.manualRuleCheckRequired !== true) {
|
|
130
137
|
failures.push(`${target.id}: Reddit target must require a manual subreddit rule check`);
|
|
@@ -135,16 +142,21 @@ function validateTarget(target, entryMap, sourceIds) {
|
|
|
135
142
|
if (target.platform === 'arxiv' && target.manualRuleCheckRequired !== true) {
|
|
136
143
|
failures.push(`${target.id}: arXiv target must require a manual category/metadata check`);
|
|
137
144
|
}
|
|
138
|
-
if (!target.humanRequired)
|
|
139
|
-
|
|
140
|
-
if (target.
|
|
145
|
+
if (!target.humanRequired)
|
|
146
|
+
failures.push(`${target.id}: browser launch targets must require a human operator`);
|
|
147
|
+
if (!target.authRequired)
|
|
148
|
+
failures.push(`${target.id}: browser launch targets must require authenticated account review`);
|
|
149
|
+
if (target.operatorChecks.length < 2)
|
|
150
|
+
failures.push(`${target.id}: operator checklist is too thin`);
|
|
141
151
|
if (target.postSubmitChecks.length < 1) failures.push(`${target.id}: missing post-submit checks`);
|
|
142
152
|
if (
|
|
143
153
|
target.platform === 'x' &&
|
|
144
154
|
target.status === 'blocked-until-artifact-url' &&
|
|
145
155
|
!targetEntries.some(entry => entry.requiresArtifactUrl === true)
|
|
146
156
|
) {
|
|
147
|
-
failures.push(
|
|
157
|
+
failures.push(
|
|
158
|
+
`${target.id}: X artifact-url launch target must include a publication entry with reserved URL budget`,
|
|
159
|
+
);
|
|
148
160
|
}
|
|
149
161
|
|
|
150
162
|
return failures;
|
|
@@ -160,7 +172,9 @@ export async function verifyBrowserLaunchPlan(options = {}) {
|
|
|
160
172
|
const ids = new Set();
|
|
161
173
|
const targetReports = [];
|
|
162
174
|
const failures = [
|
|
163
|
-
...validateSchema(plan, schema, 'audrey-browser-launch-plan').map(
|
|
175
|
+
...validateSchema(plan, schema, 'audrey-browser-launch-plan').map(
|
|
176
|
+
failure => `browser launch plan schema: ${failure}`,
|
|
177
|
+
),
|
|
164
178
|
];
|
|
165
179
|
|
|
166
180
|
if (!publicationReport.ok) {
|
|
@@ -170,7 +184,8 @@ export async function verifyBrowserLaunchPlan(options = {}) {
|
|
|
170
184
|
failures.push('browser launch plan must point at docs/paper/publication-pack.json');
|
|
171
185
|
}
|
|
172
186
|
for (const command of REQUIRED_PREFLIGHT_COMMANDS) {
|
|
173
|
-
if (!(plan.preflightCommands ?? []).includes(command))
|
|
187
|
+
if (!(plan.preflightCommands ?? []).includes(command))
|
|
188
|
+
failures.push(`Missing browser-launch preflight command: ${command}`);
|
|
174
189
|
}
|
|
175
190
|
for (const target of plan.targets ?? []) {
|
|
176
191
|
const targetFailures = [];
|
|
@@ -192,7 +207,9 @@ export async function verifyBrowserLaunchPlan(options = {}) {
|
|
|
192
207
|
for (const id of REQUIRED_TARGETS) {
|
|
193
208
|
if (!ids.has(id)) failures.push(`Missing browser-launch target: ${id}`);
|
|
194
209
|
}
|
|
195
|
-
const ordered = [...(plan.targets ?? [])]
|
|
210
|
+
const ordered = [...(plan.targets ?? [])]
|
|
211
|
+
.sort((a, b) => a.order - b.order)
|
|
212
|
+
.map(target => target.id);
|
|
196
213
|
if (ordered.join('|') !== REQUIRED_TARGETS.join('|')) {
|
|
197
214
|
failures.push(`Browser-launch target order must be ${REQUIRED_TARGETS.join(', ')}`);
|
|
198
215
|
}
|
|
@@ -102,9 +102,11 @@ function matchesTrustedGitHubRepoUrl(value) {
|
|
|
102
102
|
try {
|
|
103
103
|
const url = new URL(value);
|
|
104
104
|
const pathname = url.pathname.toLowerCase();
|
|
105
|
-
return
|
|
106
|
-
|
|
107
|
-
|
|
105
|
+
return (
|
|
106
|
+
url.protocol === TRUSTED_GITHUB_REPO.protocol &&
|
|
107
|
+
url.hostname === TRUSTED_GITHUB_REPO.hostname &&
|
|
108
|
+
(pathname === TRUSTED_GITHUB_REPO_PATH || pathname.startsWith(`${TRUSTED_GITHUB_REPO_PATH}/`))
|
|
109
|
+
);
|
|
108
110
|
} catch {
|
|
109
111
|
return false;
|
|
110
112
|
}
|
|
@@ -136,7 +138,9 @@ function validateResultTarget(result, planTarget) {
|
|
|
136
138
|
}
|
|
137
139
|
|
|
138
140
|
if (result.platform !== planTarget.platform) {
|
|
139
|
-
failures.push(
|
|
141
|
+
failures.push(
|
|
142
|
+
`${result.id}: platform ${result.platform} does not match launch plan ${planTarget.platform}`,
|
|
143
|
+
);
|
|
140
144
|
}
|
|
141
145
|
if (!isAllowedPlatformUrl(result.platform, result.publicUrl)) {
|
|
142
146
|
failures.push(`${result.id}: publicUrl host is not allowed for ${result.platform}`);
|
|
@@ -144,14 +148,18 @@ function validateResultTarget(result, planTarget) {
|
|
|
144
148
|
if (!isHttpsUrl(result.artifactUrl)) {
|
|
145
149
|
failures.push(`${result.id}: artifactUrl must be null or https`);
|
|
146
150
|
}
|
|
147
|
-
if (text.includes(SEEDED_SECRET))
|
|
151
|
+
if (text.includes(SEEDED_SECRET))
|
|
152
|
+
failures.push(`${result.id}: contains raw seeded GuardBench secret`);
|
|
148
153
|
if (containsLocalPath(text)) failures.push(`${result.id}: contains local absolute path`);
|
|
149
154
|
|
|
150
155
|
if (result.status === 'pending') {
|
|
151
156
|
if (!result.blocker) failures.push(`${result.id}: pending result must record a blocker`);
|
|
152
|
-
if (result.publicUrl !== null)
|
|
153
|
-
|
|
154
|
-
if (result.
|
|
157
|
+
if (result.publicUrl !== null)
|
|
158
|
+
failures.push(`${result.id}: pending result must not record a publicUrl`);
|
|
159
|
+
if (result.submittedAt !== null)
|
|
160
|
+
failures.push(`${result.id}: pending result must not record submittedAt`);
|
|
161
|
+
if (result.operatorVerified)
|
|
162
|
+
failures.push(`${result.id}: pending result must not be operator verified`);
|
|
155
163
|
blockers.push(`${result.id}: ${result.blocker ?? 'pending launch target'}`);
|
|
156
164
|
}
|
|
157
165
|
|
|
@@ -160,8 +168,10 @@ function validateResultTarget(result, planTarget) {
|
|
|
160
168
|
if (planTarget.status === 'blocked-until-artifact-url' && !result.artifactUrl) {
|
|
161
169
|
failures.push(`${result.id}: submitted artifact-url target must record artifactUrl`);
|
|
162
170
|
}
|
|
163
|
-
if (!result.submittedAt)
|
|
164
|
-
|
|
171
|
+
if (!result.submittedAt)
|
|
172
|
+
failures.push(`${result.id}: submitted result must record submittedAt`);
|
|
173
|
+
if (!result.operatorVerified)
|
|
174
|
+
failures.push(`${result.id}: submitted result must be operator verified`);
|
|
165
175
|
if (planTarget.manualRuleCheckRequired && !result.manualRuleCheckCompleted) {
|
|
166
176
|
failures.push(`${result.id}: submitted result must record manual rule check completion`);
|
|
167
177
|
}
|
|
@@ -192,7 +202,9 @@ export async function verifyBrowserLaunchResults(options = {}) {
|
|
|
192
202
|
const planReport = await verifyBrowserLaunchPlan({ plan: planPath });
|
|
193
203
|
const planTargets = new Map((plan.targets ?? []).map(target => [target.id, target]));
|
|
194
204
|
const failures = [
|
|
195
|
-
...validateSchema(results, schema, 'audrey-browser-launch-results').map(
|
|
205
|
+
...validateSchema(results, schema, 'audrey-browser-launch-results').map(
|
|
206
|
+
failure => `browser launch results schema: ${failure}`,
|
|
207
|
+
),
|
|
196
208
|
];
|
|
197
209
|
const blockers = [];
|
|
198
210
|
const seen = new Set();
|
|
@@ -225,7 +237,9 @@ export async function verifyBrowserLaunchResults(options = {}) {
|
|
|
225
237
|
});
|
|
226
238
|
}
|
|
227
239
|
|
|
228
|
-
const planOrder = [...(plan.targets ?? [])]
|
|
240
|
+
const planOrder = [...(plan.targets ?? [])]
|
|
241
|
+
.sort((a, b) => a.order - b.order)
|
|
242
|
+
.map(target => target.id);
|
|
229
243
|
const resultOrder = [...(results.targets ?? [])].map(target => target.id);
|
|
230
244
|
if (resultOrder.join('|') !== planOrder.join('|')) {
|
|
231
245
|
failures.push(`browser launch results order must be ${planOrder.join(', ')}`);
|
|
@@ -234,7 +248,9 @@ export async function verifyBrowserLaunchResults(options = {}) {
|
|
|
234
248
|
if (!seen.has(id)) failures.push(`Missing browser launch result: ${id}`);
|
|
235
249
|
}
|
|
236
250
|
|
|
237
|
-
const notSubmitted = targetReports
|
|
251
|
+
const notSubmitted = targetReports
|
|
252
|
+
.filter(target => target.status !== 'submitted')
|
|
253
|
+
.map(target => target.id);
|
|
238
254
|
const ready = failures.length === 0 && notSubmitted.length === 0;
|
|
239
255
|
if (options.strict === true && notSubmitted.length > 0) {
|
|
240
256
|
failures.push(`strict launch readiness requires submitted targets: ${notSubmitted.join(', ')}`);
|
|
@@ -268,7 +284,9 @@ async function main() {
|
|
|
268
284
|
} else if (report.ok) {
|
|
269
285
|
const submitted = report.targets.filter(target => target.status === 'submitted').length;
|
|
270
286
|
const pending = report.targets.length - submitted;
|
|
271
|
-
console.log(
|
|
287
|
+
console.log(
|
|
288
|
+
`Browser launch results verification passed: ${submitted} submitted, ${pending} pending`,
|
|
289
|
+
);
|
|
272
290
|
} else {
|
|
273
291
|
console.error('Browser launch results verification failed:');
|
|
274
292
|
for (const failure of report.failures) console.error(`- ${failure}`);
|