audrey 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +27 -5
- package/benchmarks/guardbench.js +98 -8
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +240 -140
- package/benchmarks/output/guardbench-summary.json +350 -224
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +240 -140
- package/benchmarks/output/submission-bundle/guardbench-summary.json +350 -224
- package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +23 -2
- package/benchmarks/output/submission-bundle/submission-manifest.json +14 -14
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +56 -56
- package/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.js +1 -1
- package/dist/src/audrey.d.ts +10 -0
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +17 -4
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/controller.d.ts +17 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +52 -13
- package/dist/src/controller.js.map +1 -1
- package/dist/src/index.d.ts +2 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +4 -1
- package/dist/src/routes.js.map +1 -1
- package/docs/paper/07-evaluation.md +4 -4
- package/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +27 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +240 -140
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +350 -224
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +14 -14
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +67 -67
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +2 -2
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +35 -35
- package/package.json +2 -2
- package/scripts/smoke-cli.js +22 -2
- package/scripts/verify-release-readiness.mjs +50 -6
|
@@ -344,7 +344,7 @@ The deterministic demo, `audrey demo --scenario repeated-failure`, constructs a
|
|
|
344
344
|
|
|
345
345
|
The current paper version has two implemented empirical anchors. First, `benchmarks/snapshots/perf-0.22.2.json` reports canonical local performance under the mock-provider methodology: generated on 2026-05-01 from git SHA `e2e821b`, using mock 64-dimensional in-process embeddings, hybrid recall limit 5, and corpus sizes 100, 1,000, and 5,000 on Node 25.5.0 with a 24-core Ryzen 9 7900X3D and 62.9 GB RAM (Ledger: E20). Under that methodology, hybrid recall p95 is 1.82 ms, 2.364 ms, and 3.417 ms for those three sizes, and encode p95 is 0.589 ms, 2.147 ms, and 1.838 ms (Ledger: E21-E22).
|
|
346
346
|
|
|
347
|
-
Second, `bench:memory:check` is wired into the release gate
|
|
347
|
+
Second, `bench:memory:check` is wired into the release gate as an **internal regression suite**, not a competitive benchmark. It exists to catch retrieval/lifecycle regressions in Audrey itself. The suite includes hand-tuned weak local baselines (vector-only, keyword-plus-recency, recent-window) whose role is to anchor a relative pass margin — they are not stand-ins for production memory systems, and their scores should not be cited as comparative claims about any external system (Ledger: E23). The current checked-in output reports a 2026-05-08 mock-provider run in which Audrey scores 100% with 100% pass rate and the listed local stub baselines score in the 25-42% range in that same run (Ledger: E24). These numbers support regression-gate honesty inside this repository; they are not cross-system results and they do not replace GuardBench.
|
|
348
348
|
|
|
349
349
|
The README benchmark table currently differs from the canonical JSON snapshot, so the paper quotes only the JSON snapshot and tracks the README correction as a follow-up (Ledger: E28).
|
|
350
350
|
|
|
@@ -895,13 +895,13 @@ These numbers measure Audrey's local call path under an in-process mock embeddin
|
|
|
895
895
|
|
|
896
896
|
### Behavioral Regression Result
|
|
897
897
|
|
|
898
|
-
The current `benchmarks/output/summary.json` was generated on 2026-05-
|
|
898
|
+
The current `benchmarks/output/summary.json` was generated on 2026-05-15T17:52:00.842Z with command `node benchmarks/run.js --provider mock --dimensions 64` (Ledger: E24). It reports:
|
|
899
899
|
|
|
900
900
|
| System | Score Percent | Pass Rate | Average Duration Ms |
|
|
901
901
|
|---|---:|---:|---:|
|
|
902
|
-
| Audrey | 100 | 100 |
|
|
902
|
+
| Audrey | 100 | 100 | 93.58333333333333 |
|
|
903
903
|
| Vector Only | 41.66666666666667 | 25 | 0.25 |
|
|
904
|
-
| Keyword + Recency | 41.66666666666667 | 25 | 0.
|
|
904
|
+
| Keyword + Recency | 41.66666666666667 | 25 | 0.5833333333333334 |
|
|
905
905
|
| Recent Window | 37.5 | 25 | 0 |
|
|
906
906
|
|
|
907
907
|
This output is a regression-gate result. The baselines are toy local baselines used to catch retrieval and lifecycle regressions in the Audrey codebase. They are not external systems, not tuned competitor implementations, and not GuardBench baselines (Ledger: E23-E24). The current suite covers retrieval and operation families such as information extraction, knowledge updates, multi-session reasoning, conflict resolution, procedural learning, privacy boundary, overwrite, delete-and-abstain, semantic merge, and procedural merge (Ledger: E23-E24).
|
|
@@ -924,7 +924,7 @@ It reports local adapters only, not external-system comparisons (Ledger: E46):
|
|
|
924
924
|
| Evidence recall | 100% |
|
|
925
925
|
| Redaction leaks | 0 |
|
|
926
926
|
| Recall-degradation detection | 100% |
|
|
927
|
-
| Guard latency p50 / p95 |
|
|
927
|
+
| Guard latency p50 / p95 | 2.465 ms / 30.791 ms |
|
|
928
928
|
| Published artifact raw-secret leaks | 0 |
|
|
929
929
|
| Audrey Guard decision accuracy | 100% |
|
|
930
930
|
| No-memory decision accuracy | 10% |
|
|
@@ -49,7 +49,7 @@ Every implementation claim in the paper should point to one or more ledger IDs i
|
|
|
49
49
|
| E43 - Audrey exposes a Claude Code hook generator, guarded settings apply path, and hook-mode Guard command: `hook-config claude-code` emits hooks, `hook-config claude-code --apply --scope project|user` merges them into Claude Code settings with backup/idempotence, `guard --hook --fail-on-warn` consumes PreToolUse JSON and returns `hookSpecificOutput.permissionDecision`, and `observe-tool` records post-tool events. Codex hook wiring remains pending on a stable host hook surface. | Hook integration boundary | README.md; mcp-server/index.ts; tests/mcp-server.test.js | Yes, focused Vitest and CLI hook smoke passed on 2026-05-12 |
|
|
50
50
|
| E44 - Audrey preflight events now persist `preflight_evidence_ids` and `audrey_guard_action_key`; `memory_validate` accepts optional `preflight_event_id`, action key, and evidence ids, persists them on the validation audit event, and rejects validation lineage when the memory id was not evidence for that preflight. | Validation lineage implementation | src/action-key.ts; src/controller.ts; src/preflight.ts; src/audrey.ts; mcp-server/index.ts; tests/controller.test.js | Yes, focused Vitest passed on 2026-05-12 |
|
|
51
51
|
| E45 - Preflight risk scoring uses a fixed severity map (`info=0.1`, `low=0.25`, `medium=0.55`, `high=0.85`), sorts warnings by severity, and strict mode blocks on high-severity warnings; the scoring path does not consume validation feedback. | Fixed risk scoring boundary | src/preflight.ts:6-60,291-299,332-338; src/feedback.ts:3-18,70-163 | Yes, 2026-05-08 |
|
|
52
|
-
| E46 - `benchmarks/guardbench.js` runs ten local comparative GuardBench scenarios across Audrey Guard, no-memory, recent-window, vector-only, and FTS-only adapters and writes `benchmarks/output/guardbench-summary.json`, `benchmarks/output/guardbench-manifest.json`, and `benchmarks/output/guardbench-raw.json`; the latest local run has Audrey Guard passing 10/10 scenarios with 100% prevention rate, 0% false-block rate, 100% evidence recall, zero decision-output redaction leaks, zero published artifact raw-secret leaks, 100% recall-degradation detection, 100% decision accuracy, and
|
|
52
|
+
| E46 - `benchmarks/guardbench.js` runs ten local comparative GuardBench scenarios across Audrey Guard, no-memory, recent-window, vector-only, and FTS-only adapters and writes `benchmarks/output/guardbench-summary.json`, `benchmarks/output/guardbench-manifest.json`, and `benchmarks/output/guardbench-raw.json`; the latest local run has Audrey Guard passing 10/10 scenarios with 100% prevention rate, 0% false-block rate, 100% evidence recall, zero decision-output redaction leaks, zero published artifact raw-secret leaks, 100% recall-degradation detection, 100% decision accuracy, and 2.465ms/30.791ms p50/p95 guard latency under the mock-provider methodology. Baseline decision accuracy was no-memory 10%, recent-window 60%, vector-only 40%, and FTS-only 10%, with 0% full-contract pass rate for each baseline. | GuardBench local comparative results | benchmarks/guardbench.js; benchmarks/output/guardbench-summary.json; benchmarks/output/guardbench-manifest.json; benchmarks/output/guardbench-raw.json; package.json | Yes, `npm run bench:guard:check` passed on 2026-05-13 |
|
|
53
53
|
| E47 - GuardBench accepts external ESM adapters through `--adapter`, supports `default`, `adapter`, or `createGuardBenchAdapter()` exports, withholds `expectedDecision` and `requiredEvidence` during adapter execution, then scores adapter output against the same full-contract decision/evidence/redaction checks. | GuardBench external adapter contract | benchmarks/guardbench.js; tests/guardbench.test.js; package.json | Yes, `node scripts/run-vitest.mjs run tests/guardbench.test.js` passed on 2026-05-12 |
|
|
54
54
|
| E48 - Audrey ships a Mem0 Platform GuardBench adapter that uses the current Mem0 REST shape: V3 async memory add with event polling, V2 filtered memory search, and user-entity cleanup. It requires runtime `MEM0_API_KEY` and is not run by default. | First external-system GuardBench adapter | benchmarks/adapters/mem0-platform.mjs; tests/guardbench.test.js; README.md | Import/contract and mocked REST-flow tests passed on 2026-05-12; live Mem0 run not yet executed |
|
|
55
55
|
| E49 - GuardBench ships a credential-free example external adapter and a `bench:guard:adapter-smoke` script so the adapter loader can be exercised through the real CLI path without external credentials. | External adapter smoke path | benchmarks/adapters/example-allow.mjs; package.json; README.md; tests/guardbench.test.js | Yes, `npm run bench:guard:adapter-smoke` passed on 2026-05-12 |
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "Audrey arXiv source package",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-15T17:52:21.991Z",
|
|
5
5
|
"sourceMarkdown": "docs/paper/audrey-paper-v1.md",
|
|
6
6
|
"publicationPack": "docs/paper/publication-pack.json",
|
|
7
7
|
"sourceHashes": {
|
|
8
|
-
"sourceMarkdown": "
|
|
8
|
+
"sourceMarkdown": "1d6f2c4fc4f1337dbb7f829d7d309753c38154e0eee2dd1b1e0fad9eab007554",
|
|
9
9
|
"publicationPack": "a1a523d5938faea72be568b843ac3890e61cea6070b0cfa46acf22ad3d2fb974",
|
|
10
10
|
"referencesBib": "c0bfcaf7bfe37d6933c812e46352be8a95397eaa430a0f5bc94037600a53f654"
|
|
11
11
|
},
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
{
|
|
14
14
|
"path": "main.tex",
|
|
15
15
|
"source": "docs/paper/audrey-paper-v1.md",
|
|
16
|
-
"bytes":
|
|
17
|
-
"sha256": "
|
|
16
|
+
"bytes": 122653,
|
|
17
|
+
"sha256": "6a20a37058c038f1ec4a1b6f0d4dde592094ffd46d08735170e0a5eb8f39cb20"
|
|
18
18
|
},
|
|
19
19
|
{
|
|
20
20
|
"path": "references.bib",
|
|
@@ -385,7 +385,7 @@ The deterministic demo, \texttt{audrey demo --scenario repeated-failure}, constr
|
|
|
385
385
|
|
|
386
386
|
The current paper version has two implemented empirical anchors. First, \texttt{benchmarks/snapshots/perf-0.22.2.json} reports canonical local performance under the mock-provider methodology: generated on 2026-05-01 from git SHA \texttt{e2e821b}, using mock 64-dimensional in-process embeddings, hybrid recall limit 5, and corpus sizes 100, 1,000, and 5,000 on Node 25.5.0 with a 24-core Ryzen 9 7900X3D and 62.9 GB RAM (Ledger: E20). Under that methodology, hybrid recall p95 is 1.82 ms, 2.364 ms, and 3.417 ms for those three sizes, and encode p95 is 0.589 ms, 2.147 ms, and 1.838 ms (Ledger: E21-E22).
|
|
387
387
|
|
|
388
|
-
Second, \texttt{bench:memory:check} is wired into the release gate
|
|
388
|
+
Second, \texttt{bench:memory:check} is wired into the release gate as an **internal regression suite**, not a competitive benchmark. It exists to catch retrieval/lifecycle regressions in Audrey itself. The suite includes hand-tuned weak local baselines (vector-only, keyword-plus-recency, recent-window) whose role is to anchor a relative pass margin — they are not stand-ins for production memory systems, and their scores should not be cited as comparative claims about any external system (Ledger: E23). The current checked-in output reports a 2026-05-08 mock-provider run in which Audrey scores 100\% with 100\% pass rate and the listed local stub baselines score in the 25-42\% range in that same run (Ledger: E24). These numbers support regression-gate honesty inside this repository; they are not cross-system results and they do not replace GuardBench.
|
|
389
389
|
|
|
390
390
|
The README benchmark table currently differs from the canonical JSON snapshot, so the paper quotes only the JSON snapshot and tracks the README correction as a follow-up (Ledger: E28).
|
|
391
391
|
|
|
@@ -729,13 +729,13 @@ These numbers measure Audrey's local call path under an in-process mock embeddin
|
|
|
729
729
|
|
|
730
730
|
\subsection{Behavioral Regression Result}
|
|
731
731
|
|
|
732
|
-
The current \texttt{benchmarks/output/summary.json} was generated on 2026-05-
|
|
732
|
+
The current \texttt{benchmarks/output/summary.json} was generated on 2026-05-15T17:52:00.842Z with command \texttt{node benchmarks/run.js --provider mock --dimensions 64} (Ledger: E24). It reports:
|
|
733
733
|
|
|
734
734
|
\begin{verbatim}
|
|
735
735
|
| System | Score Percent | Pass Rate | Average Duration Ms |
|
|
736
|
-
| Audrey | 100 | 100 |
|
|
736
|
+
| Audrey | 100 | 100 | 93.58333333333333 |
|
|
737
737
|
| Vector Only | 41.66666666666667 | 25 | 0.25 |
|
|
738
|
-
| Keyword + Recency | 41.66666666666667 | 25 | 0.
|
|
738
|
+
| Keyword + Recency | 41.66666666666667 | 25 | 0.5833333333333334 |
|
|
739
739
|
| Recent Window | 37.5 | 25 | 0 |
|
|
740
740
|
\end{verbatim}
|
|
741
741
|
|
|
@@ -759,7 +759,7 @@ It reports local adapters only, not external-system comparisons (Ledger: E46):
|
|
|
759
759
|
| Evidence recall | 100% |
|
|
760
760
|
| Redaction leaks | 0 |
|
|
761
761
|
| Recall-degradation detection | 100% |
|
|
762
|
-
| Guard latency p50 / p95 |
|
|
762
|
+
| Guard latency p50 / p95 | 2.465 ms / 30.791 ms |
|
|
763
763
|
| Published artifact raw-secret leaks | 0 |
|
|
764
764
|
| Audrey Guard decision accuracy | 100% |
|
|
765
765
|
| No-memory decision accuracy | 10% |
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "Audrey arXiv compile check",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-15T17:52:22.233Z",
|
|
5
5
|
"source": {
|
|
6
6
|
"sourceDir": "docs/paper/output/arxiv",
|
|
7
7
|
"manifest": "docs/paper/output/arxiv/arxiv-manifest.json",
|
|
8
|
-
"manifestSha256": "
|
|
8
|
+
"manifestSha256": "b2676a107e6ff8955acb874e13c6cede29010200a0e1d43860a53624923b3b7e",
|
|
9
9
|
"mainTex": "docs/paper/output/arxiv/main.tex",
|
|
10
|
-
"mainTexSha256": "
|
|
10
|
+
"mainTexSha256": "6a20a37058c038f1ec4a1b6f0d4dde592094ffd46d08735170e0a5eb8f39cb20",
|
|
11
11
|
"referencesBib": "docs/paper/output/arxiv/references.bib",
|
|
12
12
|
"referencesBibSha256": "c0bfcaf7bfe37d6933c812e46352be8a95397eaa430a0f5bc94037600a53f654"
|
|
13
13
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "audrey",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "Local-first memory runtime for AI agents with recall, consolidation, memory reflexes, contradiction detection, and tool-trace learning",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/src/index.js",
|
|
@@ -71,7 +71,7 @@
|
|
|
71
71
|
"pretest": "npm run build && npm run test:artifacts",
|
|
72
72
|
"test": "node scripts/run-vitest.mjs",
|
|
73
73
|
"test:watch": "node scripts/run-vitest.mjs watch",
|
|
74
|
-
"test:artifacts": "npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run paper:arxiv && npm run paper:arxiv:verify && npm run paper:arxiv:compile && npm run paper:bundle && npm run paper:bundle:verify",
|
|
74
|
+
"test:artifacts": "npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run paper:sync && npm run paper:arxiv && npm run paper:arxiv:verify && npm run paper:arxiv:compile && npm run paper:bundle && npm run paper:bundle:verify && npm run paper:verify",
|
|
75
75
|
"release:gate": "npm run typecheck && npm test && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-registry:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run bench:guard:adapter-module:validate && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:validate && npm run bench:guard:publication:verify && npm run python:release:check && npm run smoke:cli && npm run security:audit && npm run pack:check",
|
|
76
76
|
"release:gate:sandbox": "npm run build && npm run typecheck && npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-registry:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run bench:guard:adapter-module:validate && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:validate && npm run bench:guard:publication:verify && npm run python:release:check && npm run smoke:cli && npm run security:audit && npm run pack:check",
|
|
77
77
|
"release:gate:paper": "npm run build && npm run typecheck && npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-registry:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run bench:guard:adapter-module:validate && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:validate && npm run bench:guard:publication:verify && npm run python:release:check && npm run paper:sync && npm run paper:arxiv && npm run paper:arxiv:verify && npm run paper:arxiv:compile && npm run paper:launch-plan && npm run paper:launch-results && npm run paper:bundle && npm run paper:bundle:verify && npm run paper:verify && npm run release:readiness && npm run smoke:cli && npm run security:audit && npm run pack:check",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "Audrey paper submission bundle",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-15T17:52:23.862Z",
|
|
5
5
|
"sourceRoot": ".",
|
|
6
6
|
"outDir": "docs/paper/output/submission-bundle",
|
|
7
7
|
"claimVerification": {
|
|
@@ -17,33 +17,33 @@
|
|
|
17
17
|
"scenarios": 10,
|
|
18
18
|
"redactionLeaks": 0,
|
|
19
19
|
"artifactLeaks": 0,
|
|
20
|
-
"latencyP50Ms":
|
|
21
|
-
"latencyP95Ms":
|
|
20
|
+
"latencyP50Ms": 2.465,
|
|
21
|
+
"latencyP95Ms": 30.791
|
|
22
22
|
},
|
|
23
23
|
"files": [
|
|
24
24
|
{
|
|
25
25
|
"path": "benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json",
|
|
26
26
|
"source": "benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json",
|
|
27
|
-
"bytes":
|
|
28
|
-
"sha256": "
|
|
27
|
+
"bytes": 1267,
|
|
28
|
+
"sha256": "376f40f1ddcaf7ec80673c5262a9b6288fa63c4edca2bc9be83f001242202535"
|
|
29
29
|
},
|
|
30
30
|
{
|
|
31
31
|
"path": "benchmarks/output/external/guardbench-external-dry-run.json",
|
|
32
32
|
"source": "benchmarks/output/external/guardbench-external-dry-run.json",
|
|
33
33
|
"bytes": 1831,
|
|
34
|
-
"sha256": "
|
|
34
|
+
"sha256": "c2ddf803edabc02ebb02c50fd135fa44a6de1b24d86cf5e7a54eeda35d7da0a6"
|
|
35
35
|
},
|
|
36
36
|
{
|
|
37
37
|
"path": "benchmarks/output/external/guardbench-external-evidence.json",
|
|
38
38
|
"source": "benchmarks/output/external/guardbench-external-evidence.json",
|
|
39
39
|
"bytes": 1638,
|
|
40
|
-
"sha256": "
|
|
40
|
+
"sha256": "850c41b067419af71b888d994d0d947e0cd7c127c88f606cac2753da9b7aed77"
|
|
41
41
|
},
|
|
42
42
|
{
|
|
43
43
|
"path": "benchmarks/output/guardbench-conformance-card.json",
|
|
44
44
|
"source": "benchmarks/output/guardbench-conformance-card.json",
|
|
45
45
|
"bytes": 1735,
|
|
46
|
-
"sha256": "
|
|
46
|
+
"sha256": "65d4a10afe10178e4e4e3d508e0e06ce5a4b28713debc27cf4d9b60a46050d2f"
|
|
47
47
|
},
|
|
48
48
|
{
|
|
49
49
|
"path": "benchmarks/output/guardbench-manifest.json",
|
|
@@ -54,44 +54,44 @@
|
|
|
54
54
|
{
|
|
55
55
|
"path": "benchmarks/output/guardbench-raw.json",
|
|
56
56
|
"source": "benchmarks/output/guardbench-raw.json",
|
|
57
|
-
"bytes":
|
|
58
|
-
"sha256": "
|
|
57
|
+
"bytes": 43455,
|
|
58
|
+
"sha256": "3b78d1a2432e7d72752f96d9ac4b2b49cf6f59eb65548fbadb21ea6adbb86b37"
|
|
59
59
|
},
|
|
60
60
|
{
|
|
61
61
|
"path": "benchmarks/output/guardbench-summary.json",
|
|
62
62
|
"source": "benchmarks/output/guardbench-summary.json",
|
|
63
|
-
"bytes":
|
|
64
|
-
"sha256": "
|
|
63
|
+
"bytes": 69350,
|
|
64
|
+
"sha256": "21023f230b761f1b43f8ecabe519dd6b320c62ad56f0b6aa28bbcf7a2c8838f5"
|
|
65
65
|
},
|
|
66
66
|
{
|
|
67
67
|
"path": "benchmarks/output/leaderboard/guardbench-leaderboard.json",
|
|
68
68
|
"source": "benchmarks/output/leaderboard/guardbench-leaderboard.json",
|
|
69
69
|
"bytes": 2798,
|
|
70
|
-
"sha256": "
|
|
70
|
+
"sha256": "9024b05b018dd5c930e4a7dd1dd01b7b7118552ef4c2d8e69d117a4fd83d20d5"
|
|
71
71
|
},
|
|
72
72
|
{
|
|
73
73
|
"path": "benchmarks/output/leaderboard/guardbench-leaderboard.md",
|
|
74
74
|
"source": "benchmarks/output/leaderboard/guardbench-leaderboard.md",
|
|
75
75
|
"bytes": 364,
|
|
76
|
-
"sha256": "
|
|
76
|
+
"sha256": "2711a8be95bbeaa1daddb088002892d629c3901a0cd333c5ab46bc4bb373a928"
|
|
77
77
|
},
|
|
78
78
|
{
|
|
79
79
|
"path": "benchmarks/output/submission-bundle/submission-manifest.json",
|
|
80
80
|
"source": "benchmarks/output/submission-bundle/submission-manifest.json",
|
|
81
81
|
"bytes": 3986,
|
|
82
|
-
"sha256": "
|
|
82
|
+
"sha256": "40aab20f49a86c464b86f589a36774fbc89a6568807c58b9ab6c94413a01d61a"
|
|
83
83
|
},
|
|
84
84
|
{
|
|
85
85
|
"path": "benchmarks/output/submission-bundle/validation-report.json",
|
|
86
86
|
"source": "benchmarks/output/submission-bundle/validation-report.json",
|
|
87
87
|
"bytes": 739,
|
|
88
|
-
"sha256": "
|
|
88
|
+
"sha256": "570d28760ad3611ccf4f08e98281a3fe659730463d13e96ff31073c7ee10ce65"
|
|
89
89
|
},
|
|
90
90
|
{
|
|
91
91
|
"path": "benchmarks/output/summary.json",
|
|
92
92
|
"source": "benchmarks/output/summary.json",
|
|
93
|
-
"bytes":
|
|
94
|
-
"sha256": "
|
|
93
|
+
"bytes": 100747,
|
|
94
|
+
"sha256": "839c3b3686ba61c45119cb87d0277857adf5cc2a291390bd21bf6726598b9b1b"
|
|
95
95
|
},
|
|
96
96
|
{
|
|
97
97
|
"path": "benchmarks/schemas/guardbench-adapter-registry.schema.json",
|
|
@@ -150,8 +150,8 @@
|
|
|
150
150
|
{
|
|
151
151
|
"path": "benchmarks/schemas/guardbench-raw.schema.json",
|
|
152
152
|
"source": "benchmarks/schemas/guardbench-raw.schema.json",
|
|
153
|
-
"bytes":
|
|
154
|
-
"sha256": "
|
|
153
|
+
"bytes": 5653,
|
|
154
|
+
"sha256": "2893204554696b69e4e1478f0d147cb95603b5f698ff112ebe9de45904b44045"
|
|
155
155
|
},
|
|
156
156
|
{
|
|
157
157
|
"path": "benchmarks/schemas/guardbench-submission-manifest.schema.json",
|
|
@@ -162,8 +162,8 @@
|
|
|
162
162
|
{
|
|
163
163
|
"path": "benchmarks/schemas/guardbench-summary.schema.json",
|
|
164
164
|
"source": "benchmarks/schemas/guardbench-summary.schema.json",
|
|
165
|
-
"bytes":
|
|
166
|
-
"sha256": "
|
|
165
|
+
"bytes": 8004,
|
|
166
|
+
"sha256": "2545b83cdd1d5d5d9abd535950779b5bbf3e5a2b04c3a36c35c2bbacc5efd9d9"
|
|
167
167
|
},
|
|
168
168
|
{
|
|
169
169
|
"path": "docs/AUDREY_PAPER_OUTLINE.md",
|
|
@@ -216,8 +216,8 @@
|
|
|
216
216
|
{
|
|
217
217
|
"path": "docs/paper/07-evaluation.md",
|
|
218
218
|
"source": "docs/paper/07-evaluation.md",
|
|
219
|
-
"bytes":
|
|
220
|
-
"sha256": "
|
|
219
|
+
"bytes": 11254,
|
|
220
|
+
"sha256": "6c2e903d0f59710dc06db472df1837da99297ed69133984d677a62a15882b77e"
|
|
221
221
|
},
|
|
222
222
|
{
|
|
223
223
|
"path": "docs/paper/08-discussion-limitations.md",
|
|
@@ -252,8 +252,8 @@
|
|
|
252
252
|
{
|
|
253
253
|
"path": "docs/paper/audrey-paper-v1.md",
|
|
254
254
|
"source": "docs/paper/audrey-paper-v1.md",
|
|
255
|
-
"bytes":
|
|
256
|
-
"sha256": "
|
|
255
|
+
"bytes": 116602,
|
|
256
|
+
"sha256": "1d6f2c4fc4f1337dbb7f829d7d309753c38154e0eee2dd1b1e0fad9eab007554"
|
|
257
257
|
},
|
|
258
258
|
{
|
|
259
259
|
"path": "docs/paper/browser-launch-plan.json",
|
|
@@ -295,25 +295,25 @@
|
|
|
295
295
|
"path": "docs/paper/evidence-ledger.md",
|
|
296
296
|
"source": "docs/paper/evidence-ledger.md",
|
|
297
297
|
"bytes": 61844,
|
|
298
|
-
"sha256": "
|
|
298
|
+
"sha256": "fe26c96385b5b8dd90977be7fef0c4b97ce5952b8ff6464651308d9ec809f065"
|
|
299
299
|
},
|
|
300
300
|
{
|
|
301
301
|
"path": "docs/paper/output/arxiv-compile-report.json",
|
|
302
302
|
"source": "docs/paper/output/arxiv-compile-report.json",
|
|
303
303
|
"bytes": 924,
|
|
304
|
-
"sha256": "
|
|
304
|
+
"sha256": "8592b56bef6fff37eb0e5b5de10f72865f79a1a1b50203d1b5f51b13fb632943"
|
|
305
305
|
},
|
|
306
306
|
{
|
|
307
307
|
"path": "docs/paper/output/arxiv/arxiv-manifest.json",
|
|
308
308
|
"source": "docs/paper/output/arxiv/arxiv-manifest.json",
|
|
309
309
|
"bytes": 1316,
|
|
310
|
-
"sha256": "
|
|
310
|
+
"sha256": "b2676a107e6ff8955acb874e13c6cede29010200a0e1d43860a53624923b3b7e"
|
|
311
311
|
},
|
|
312
312
|
{
|
|
313
313
|
"path": "docs/paper/output/arxiv/main.tex",
|
|
314
314
|
"source": "docs/paper/output/arxiv/main.tex",
|
|
315
|
-
"bytes":
|
|
316
|
-
"sha256": "
|
|
315
|
+
"bytes": 122653,
|
|
316
|
+
"sha256": "6a20a37058c038f1ec4a1b6f0d4dde592094ffd46d08735170e0a5eb8f39cb20"
|
|
317
317
|
},
|
|
318
318
|
{
|
|
319
319
|
"path": "docs/paper/output/arxiv/README-arxiv.txt",
|
|
@@ -366,14 +366,14 @@
|
|
|
366
366
|
{
|
|
367
367
|
"path": "package.json",
|
|
368
368
|
"source": "package.json",
|
|
369
|
-
"bytes":
|
|
370
|
-
"sha256": "
|
|
369
|
+
"bytes": 11714,
|
|
370
|
+
"sha256": "6bf6a70f006acba918526d14f0e28257f44593e60d0b602eb351b6111626e490"
|
|
371
371
|
},
|
|
372
372
|
{
|
|
373
373
|
"path": "README.md",
|
|
374
374
|
"source": "README.md",
|
|
375
|
-
"bytes":
|
|
376
|
-
"sha256": "
|
|
375
|
+
"bytes": 26041,
|
|
376
|
+
"sha256": "9d41650ff29b393912f4ea20a450bd99002202fe6fda6d6529e25cbbdd7352db"
|
|
377
377
|
}
|
|
378
378
|
]
|
|
379
379
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "audrey",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "Local-first memory runtime for AI agents with recall, consolidation, memory reflexes, contradiction detection, and tool-trace learning",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/src/index.js",
|
|
@@ -71,7 +71,7 @@
|
|
|
71
71
|
"pretest": "npm run build && npm run test:artifacts",
|
|
72
72
|
"test": "node scripts/run-vitest.mjs",
|
|
73
73
|
"test:watch": "node scripts/run-vitest.mjs watch",
|
|
74
|
-
"test:artifacts": "npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run paper:arxiv && npm run paper:arxiv:verify && npm run paper:arxiv:compile && npm run paper:bundle && npm run paper:bundle:verify",
|
|
74
|
+
"test:artifacts": "npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run paper:sync && npm run paper:arxiv && npm run paper:arxiv:verify && npm run paper:arxiv:compile && npm run paper:bundle && npm run paper:bundle:verify && npm run paper:verify",
|
|
75
75
|
"release:gate": "npm run typecheck && npm test && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-registry:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run bench:guard:adapter-module:validate && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:validate && npm run bench:guard:publication:verify && npm run python:release:check && npm run smoke:cli && npm run security:audit && npm run pack:check",
|
|
76
76
|
"release:gate:sandbox": "npm run build && npm run typecheck && npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-registry:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run bench:guard:adapter-module:validate && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:validate && npm run bench:guard:publication:verify && npm run python:release:check && npm run smoke:cli && npm run security:audit && npm run pack:check",
|
|
77
77
|
"release:gate:paper": "npm run build && npm run typecheck && npm run bench:perf && npm run bench:memory:check && npm run bench:guard:check && npm run bench:guard:card && npm run bench:guard:bundle && npm run bench:guard:bundle:verify && npm run bench:guard:leaderboard && npm run bench:guard:adapter-registry:validate && npm run bench:guard:external:dry-run && npm run bench:guard:external:evidence && npm run bench:guard:adapter-module:validate && npm run bench:guard:adapter-self-test && npm run bench:guard:adapter-self-test:validate && npm run bench:guard:validate && npm run bench:guard:publication:verify && npm run python:release:check && npm run paper:sync && npm run paper:arxiv && npm run paper:arxiv:verify && npm run paper:arxiv:compile && npm run paper:launch-plan && npm run paper:launch-results && npm run paper:bundle && npm run paper:bundle:verify && npm run paper:verify && npm run release:readiness && npm run smoke:cli && npm run security:audit && npm run pack:check",
|
package/scripts/smoke-cli.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
import { spawnSync } from 'node:child_process';
|
|
4
|
-
import { existsSync, mkdtempSync, rmSync, readFileSync } from 'node:fs';
|
|
4
|
+
import { existsSync, mkdirSync, mkdtempSync, rmSync, readFileSync } from 'node:fs';
|
|
5
5
|
import { tmpdir } from 'node:os';
|
|
6
6
|
import { dirname, join, resolve } from 'node:path';
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
@@ -23,7 +23,27 @@ if (!existsSync(cli)) {
|
|
|
23
23
|
fail(`missing built CLI at ${cli}; run npm run build first`);
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
function createTempRoot() {
|
|
27
|
+
const candidates = [
|
|
28
|
+
process.env.AUDREY_SMOKE_TMPDIR,
|
|
29
|
+
tmpdir(),
|
|
30
|
+
join(root, '.tmp'),
|
|
31
|
+
].filter(Boolean);
|
|
32
|
+
const failures = [];
|
|
33
|
+
|
|
34
|
+
for (const candidate of candidates) {
|
|
35
|
+
try {
|
|
36
|
+
mkdirSync(candidate, { recursive: true });
|
|
37
|
+
return mkdtempSync(join(candidate, 'audrey-smoke-'));
|
|
38
|
+
} catch (error) {
|
|
39
|
+
failures.push(`${candidate}: ${error.code ?? error.message}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
fail(`unable to create smoke temp directory (${failures.join('; ')})`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const tempRoot = createTempRoot();
|
|
27
47
|
const env = {
|
|
28
48
|
...process.env,
|
|
29
49
|
AUDREY_DATA_DIR: join(tempRoot, 'store'),
|
|
@@ -16,6 +16,7 @@ const ROOT = process.cwd();
|
|
|
16
16
|
const DEFAULT_TARGET_VERSION = '1.0.0';
|
|
17
17
|
const PYPI_CREDENTIAL_ENVS = ['TWINE_PASSWORD', 'PYPI_API_TOKEN', 'UV_PUBLISH_TOKEN'];
|
|
18
18
|
const NPM_REGISTRY = 'https://registry.npmjs.org/';
|
|
19
|
+
const PYPI_JSON_BASE = 'https://pypi.org/pypi';
|
|
19
20
|
|
|
20
21
|
function fromRoot(path) {
|
|
21
22
|
return resolve(ROOT, path);
|
|
@@ -35,6 +36,7 @@ function parseArgs(argv = process.argv.slice(2)) {
|
|
|
35
36
|
const args = {
|
|
36
37
|
targetVersion: DEFAULT_TARGET_VERSION,
|
|
37
38
|
allowPending: false,
|
|
39
|
+
checkPypiRegistry: true,
|
|
38
40
|
json: false,
|
|
39
41
|
};
|
|
40
42
|
|
|
@@ -42,6 +44,7 @@ function parseArgs(argv = process.argv.slice(2)) {
|
|
|
42
44
|
const token = argv[i];
|
|
43
45
|
if ((token === '--target-version' || token === '--version') && argv[i + 1]) args.targetVersion = argv[++i];
|
|
44
46
|
else if (token === '--allow-pending') args.allowPending = true;
|
|
47
|
+
else if (token === '--skip-pypi-registry') args.checkPypiRegistry = false;
|
|
45
48
|
else if (token === '--json') args.json = true;
|
|
46
49
|
else if (token === '--help' || token === '-h') args.help = true;
|
|
47
50
|
else throw new Error(`Unknown argument: ${token}`);
|
|
@@ -56,6 +59,7 @@ function usage() {
|
|
|
56
59
|
Options:
|
|
57
60
|
--target-version <version> Target release version. Default: ${DEFAULT_TARGET_VERSION}.
|
|
58
61
|
--allow-pending Exit 0 when only publish/account/credential blockers remain.
|
|
62
|
+
--skip-pypi-registry Do not check whether the target PyPI version is already public.
|
|
59
63
|
--json Print the machine-readable readiness report.
|
|
60
64
|
`;
|
|
61
65
|
}
|
|
@@ -501,10 +505,21 @@ function pythonDistCheck(targetVersion) {
|
|
|
501
505
|
);
|
|
502
506
|
}
|
|
503
507
|
|
|
504
|
-
function
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
+
async function pypiRegistryVersionStatus(packageName, targetVersion, fetchImpl = fetch) {
|
|
509
|
+
try {
|
|
510
|
+
const response = await fetchImpl(`${PYPI_JSON_BASE}/${encodeURIComponent(packageName)}/${encodeURIComponent(targetVersion)}/json`, {
|
|
511
|
+
headers: { accept: 'application/json' },
|
|
512
|
+
});
|
|
513
|
+
if (response.ok) return { ok: true, published: true, status: response.status };
|
|
514
|
+
if (response.status === 404) return { ok: true, published: false, status: response.status };
|
|
515
|
+
return { ok: false, published: false, status: response.status, error: `PyPI returned HTTP ${response.status}` };
|
|
516
|
+
} catch (error) {
|
|
517
|
+
return { ok: false, published: false, status: 'network-error', error: error.message };
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
export async function pypiPackageTargetStatus({ packageName, version }, targetVersion, options = {}) {
|
|
522
|
+
const env = options.env ?? process.env;
|
|
508
523
|
const evidence = [`python package=${packageName}`, `python version=${version ?? 'missing'}`];
|
|
509
524
|
|
|
510
525
|
if (version !== targetVersion) {
|
|
@@ -516,7 +531,26 @@ function pypiPublishCheck(targetVersion) {
|
|
|
516
531
|
);
|
|
517
532
|
}
|
|
518
533
|
|
|
519
|
-
|
|
534
|
+
if (options.checkRegistry === true) {
|
|
535
|
+
const registry = await pypiRegistryVersionStatus(packageName, targetVersion, options.fetchImpl);
|
|
536
|
+
if (registry.ok && registry.published) {
|
|
537
|
+
return ok('pypi-package-target', `PyPI package is already published as ${targetVersion}`, [
|
|
538
|
+
...evidence,
|
|
539
|
+
`registry=${packageName}==${targetVersion}`,
|
|
540
|
+
]);
|
|
541
|
+
}
|
|
542
|
+
if (!registry.ok) {
|
|
543
|
+
return pending(
|
|
544
|
+
'pypi-package-target',
|
|
545
|
+
`PyPI package is ready to publish as ${targetVersion}`,
|
|
546
|
+
evidence,
|
|
547
|
+
[`Verify PyPI registry availability before publishing (${registry.error ?? `status=${registry.status}`})`],
|
|
548
|
+
);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
evidence.push(`registry=${packageName}==${targetVersion}:unpublished`);
|
|
552
|
+
}
|
|
553
|
+
const credentialEnv = PYPI_CREDENTIAL_ENVS.find(name => Boolean(env[name]));
|
|
520
554
|
if (!credentialEnv) {
|
|
521
555
|
return pending(
|
|
522
556
|
'pypi-package-target',
|
|
@@ -529,6 +563,16 @@ function pypiPublishCheck(targetVersion) {
|
|
|
529
563
|
return ok('pypi-package-target', `PyPI package is ready to publish as ${targetVersion}`, [...evidence, `credentialEnv=${credentialEnv}`]);
|
|
530
564
|
}
|
|
531
565
|
|
|
566
|
+
async function pypiPublishCheck(targetVersion, options = {}) {
|
|
567
|
+
const pyproject = readText('python/pyproject.toml');
|
|
568
|
+
const packageName = pyproject.match(/^name\s*=\s*"([^"]+)"/m)?.[1] ?? 'unknown';
|
|
569
|
+
return pypiPackageTargetStatus(
|
|
570
|
+
{ packageName, version: pythonVersion() },
|
|
571
|
+
targetVersion,
|
|
572
|
+
{ checkRegistry: options.checkPypiRegistry === true },
|
|
573
|
+
);
|
|
574
|
+
}
|
|
575
|
+
|
|
532
576
|
async function paperChecks() {
|
|
533
577
|
const claimReport = await verifyPaperClaims();
|
|
534
578
|
const publicationPackReport = await verifyPublicationPack();
|
|
@@ -688,7 +732,7 @@ export async function verifyReleaseReadiness(options = {}) {
|
|
|
688
732
|
await browserPublicationCheck(),
|
|
689
733
|
await externalEvidenceCheck(),
|
|
690
734
|
packageDryRunCheck(targetVersion),
|
|
691
|
-
pypiPublishCheck(targetVersion),
|
|
735
|
+
await pypiPublishCheck(targetVersion, options),
|
|
692
736
|
];
|
|
693
737
|
const failures = checks.flatMap(row => row.failures.map(failure => `${row.id}: ${failure}`));
|
|
694
738
|
const blockers = checks.flatMap(row => row.blockers.map(blocker => `${row.id}: ${blocker}`));
|