npm - audrey - Versions diffs - 0.21.0 → 1.0.0 - Mend

audrey 0.21.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (346) hide show

package/CHANGELOG.md +238 -0
package/LICENSE +21 -21
package/README.md +281 -33
package/SECURITY.md +30 -0
package/benchmarks/adapter-kit.mjs +20 -0
package/benchmarks/adapter-self-test.mjs +166 -0
package/benchmarks/adapters/example-allow.mjs +28 -0
package/benchmarks/adapters/mem0-platform.mjs +267 -0
package/benchmarks/adapters/registry.json +51 -0
package/benchmarks/adapters/zep-cloud.mjs +280 -0
package/benchmarks/baselines.js +169 -0
package/benchmarks/build-leaderboard.mjs +170 -0
package/benchmarks/cases.js +537 -0
package/benchmarks/create-conformance-card.mjs +139 -0
package/benchmarks/create-submission-bundle.mjs +176 -0
package/benchmarks/dry-run-external-adapters.mjs +165 -0
package/benchmarks/guardbench.js +1035 -0
package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
package/benchmarks/output/guardbench-conformance-card.json +63 -0
package/benchmarks/output/guardbench-manifest.json +414 -0
package/benchmarks/output/guardbench-raw.json +1171 -0
package/benchmarks/output/guardbench-summary.json +1981 -0
package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
package/benchmarks/output/submission-bundle/guardbench-raw.json +1171 -0
package/benchmarks/output/submission-bundle/guardbench-summary.json +1981 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +164 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +228 -0
package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
package/benchmarks/output/submission-bundle/validation-report.json +31 -0
package/benchmarks/output/summary.json +2354 -0
package/benchmarks/perf-snapshot.js +304 -0
package/benchmarks/perf.bench.js +161 -0
package/benchmarks/public-paths.mjs +78 -0
package/benchmarks/reference-results.js +70 -0
package/benchmarks/report.js +259 -0
package/benchmarks/run-external-guardbench.mjs +281 -0
package/benchmarks/run.js +682 -0
package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
package/benchmarks/schemas/guardbench-raw.schema.json +164 -0
package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
package/benchmarks/schemas/guardbench-summary.schema.json +228 -0
package/benchmarks/snapshots/perf-0.22.2.json +123 -0
package/benchmarks/snapshots/perf-0.23.0.json +123 -0
package/benchmarks/validate-adapter-module.mjs +104 -0
package/benchmarks/validate-adapter-registry.mjs +134 -0
package/benchmarks/validate-adapter-self-test.mjs +96 -0
package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
package/benchmarks/verify-external-evidence.mjs +296 -0
package/benchmarks/verify-publication-artifacts.mjs +286 -0
package/benchmarks/verify-submission-bundle.mjs +167 -0
package/dist/mcp-server/config.d.ts +5 -4
package/dist/mcp-server/config.d.ts.map +1 -1
package/dist/mcp-server/config.js +6 -8
package/dist/mcp-server/config.js.map +1 -1
package/dist/mcp-server/index.d.ts +281 -23
package/dist/mcp-server/index.d.ts.map +1 -1
package/dist/mcp-server/index.js +1186 -82
package/dist/mcp-server/index.js.map +1 -1
package/dist/src/action-key.d.ts +9 -0
package/dist/src/action-key.d.ts.map +1 -0
package/dist/src/action-key.js +49 -0
package/dist/src/action-key.js.map +1 -0
package/dist/src/adaptive.d.ts.map +1 -1
package/dist/src/adaptive.js +8 -6
package/dist/src/adaptive.js.map +1 -1
package/dist/src/affect.d.ts +4 -1
package/dist/src/affect.d.ts.map +1 -1
package/dist/src/affect.js +14 -12
package/dist/src/affect.js.map +1 -1
package/dist/src/audrey.d.ts +57 -4
package/dist/src/audrey.d.ts.map +1 -1
package/dist/src/audrey.js +512 -65
package/dist/src/audrey.js.map +1 -1
package/dist/src/capsule.d.ts +2 -1
package/dist/src/capsule.d.ts.map +1 -1
package/dist/src/capsule.js +18 -8
package/dist/src/capsule.js.map +1 -1
package/dist/src/causal.d.ts.map +1 -1
package/dist/src/causal.js +23 -5
package/dist/src/causal.js.map +1 -1
package/dist/src/confidence.d.ts.map +1 -1
package/dist/src/confidence.js +3 -0
package/dist/src/confidence.js.map +1 -1
package/dist/src/consolidate.d.ts +1 -0
package/dist/src/consolidate.d.ts.map +1 -1
package/dist/src/consolidate.js +70 -54
package/dist/src/consolidate.js.map +1 -1
package/dist/src/controller.d.ts +94 -0
package/dist/src/controller.d.ts.map +1 -0
package/dist/src/controller.js +350 -0
package/dist/src/controller.js.map +1 -0
package/dist/src/db.d.ts.map +1 -1
package/dist/src/db.js +181 -169
package/dist/src/db.js.map +1 -1
package/dist/src/decay.d.ts.map +1 -1
package/dist/src/decay.js +62 -55
package/dist/src/decay.js.map +1 -1
package/dist/src/embedding.d.ts +2 -1
package/dist/src/embedding.d.ts.map +1 -1
package/dist/src/embedding.js +60 -22
package/dist/src/embedding.js.map +1 -1
package/dist/src/encode.d.ts +9 -2
package/dist/src/encode.d.ts.map +1 -1
package/dist/src/encode.js +25 -12
package/dist/src/encode.js.map +1 -1
package/dist/src/export.d.ts.map +1 -1
package/dist/src/export.js +5 -3
package/dist/src/export.js.map +1 -1
package/dist/src/feedback.d.ts +35 -0
package/dist/src/feedback.d.ts.map +1 -0
package/dist/src/feedback.js +129 -0
package/dist/src/feedback.js.map +1 -0
package/dist/src/forget.d.ts.map +1 -1
package/dist/src/forget.js +68 -60
package/dist/src/forget.js.map +1 -1
package/dist/src/fts.js +1 -1
package/dist/src/fts.js.map +1 -1
package/dist/src/hybrid-recall.d.ts +2 -1
package/dist/src/hybrid-recall.d.ts.map +1 -1
package/dist/src/hybrid-recall.js +41 -32
package/dist/src/hybrid-recall.js.map +1 -1
package/dist/src/impact.d.ts +47 -0
package/dist/src/impact.d.ts.map +1 -0
package/dist/src/impact.js +146 -0
package/dist/src/impact.js.map +1 -0
package/dist/src/import.d.ts +177 -1
package/dist/src/import.d.ts.map +1 -1
package/dist/src/import.js +235 -46
package/dist/src/import.js.map +1 -1
package/dist/src/index.d.ts +5 -1
package/dist/src/index.d.ts.map +1 -1
package/dist/src/index.js +3 -1
package/dist/src/index.js.map +1 -1
package/dist/src/interference.d.ts +5 -2
package/dist/src/interference.d.ts.map +1 -1
package/dist/src/interference.js +39 -32
package/dist/src/interference.js.map +1 -1
package/dist/src/introspect.js +18 -18
package/dist/src/llm.d.ts.map +1 -1
package/dist/src/llm.js +1 -0
package/dist/src/llm.js.map +1 -1
package/dist/src/migrate.d.ts.map +1 -1
package/dist/src/migrate.js +21 -9
package/dist/src/migrate.js.map +1 -1
package/dist/src/preflight.d.ts +2 -1
package/dist/src/preflight.d.ts.map +1 -1
package/dist/src/preflight.js +66 -5
package/dist/src/preflight.js.map +1 -1
package/dist/src/profile.d.ts +23 -0
package/dist/src/profile.d.ts.map +1 -0
package/dist/src/profile.js +51 -0
package/dist/src/profile.js.map +1 -0
package/dist/src/promote.d.ts.map +1 -1
package/dist/src/promote.js +8 -9
package/dist/src/promote.js.map +1 -1
package/dist/src/prompts.d.ts.map +1 -1
package/dist/src/prompts.js +165 -136
package/dist/src/prompts.js.map +1 -1
package/dist/src/recall.d.ts +9 -6
package/dist/src/recall.d.ts.map +1 -1
package/dist/src/recall.js +204 -62
package/dist/src/recall.js.map +1 -1
package/dist/src/redact.d.ts +7 -1
package/dist/src/redact.d.ts.map +1 -1
package/dist/src/redact.js +94 -11
package/dist/src/redact.js.map +1 -1
package/dist/src/reflexes.d.ts +1 -0
package/dist/src/reflexes.d.ts.map +1 -1
package/dist/src/reflexes.js +3 -0
package/dist/src/reflexes.js.map +1 -1
package/dist/src/rollback.d.ts.map +1 -1
package/dist/src/rollback.js +13 -8
package/dist/src/rollback.js.map +1 -1
package/dist/src/routes.d.ts +1 -0
package/dist/src/routes.d.ts.map +1 -1
package/dist/src/routes.js +251 -6
package/dist/src/routes.js.map +1 -1
package/dist/src/rules-compiler.d.ts.map +1 -1
package/dist/src/rules-compiler.js +36 -6
package/dist/src/rules-compiler.js.map +1 -1
package/dist/src/server.d.ts +2 -1
package/dist/src/server.d.ts.map +1 -1
package/dist/src/server.js +42 -4
package/dist/src/server.js.map +1 -1
package/dist/src/tool-trace.d.ts.map +1 -1
package/dist/src/tool-trace.js +42 -29
package/dist/src/tool-trace.js.map +1 -1
package/dist/src/types.d.ts +28 -1
package/dist/src/types.d.ts.map +1 -1
package/dist/src/ulid.d.ts.map +1 -1
package/dist/src/ulid.js +52 -2
package/dist/src/ulid.js.map +1 -1
package/dist/src/utils.d.ts.map +1 -1
package/dist/src/utils.js +8 -1
package/dist/src/utils.js.map +1 -1
package/dist/src/validate.d.ts +2 -0
package/dist/src/validate.d.ts.map +1 -1
package/dist/src/validate.js +77 -46
package/dist/src/validate.js.map +1 -1
package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
package/docs/MEMORY_BENCHMARKING.md +59 -0
package/docs/PRODUCTION_BACKLOG.md +304 -0
package/docs/paper/00-master.md +48 -0
package/docs/paper/01-introduction.md +27 -0
package/docs/paper/02-related-work.md +47 -0
package/docs/paper/03-problem-definition.md +108 -0
package/docs/paper/04-design.md +164 -0
package/docs/paper/05-guardbench-spec.md +412 -0
package/docs/paper/06-implementation.md +113 -0
package/docs/paper/07-evaluation.md +168 -0
package/docs/paper/08-discussion-limitations.md +61 -0
package/docs/paper/09-conclusion.md +11 -0
package/docs/paper/SUBMISSION_README.md +162 -0
package/docs/paper/appendix-a-demo-transcript.md +114 -0
package/docs/paper/arxiv-compile-report.schema.json +116 -0
package/docs/paper/arxiv-source.schema.json +61 -0
package/docs/paper/audrey-paper-v1.md +1106 -0
package/docs/paper/browser-launch-plan.json +209 -0
package/docs/paper/browser-launch-plan.schema.json +100 -0
package/docs/paper/browser-launch-results.json +86 -0
package/docs/paper/browser-launch-results.schema.json +66 -0
package/docs/paper/claim-register.json +138 -0
package/docs/paper/claim-register.schema.json +81 -0
package/docs/paper/evidence-ledger.md +103 -0
package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
package/docs/paper/output/arxiv/main.tex +949 -0
package/docs/paper/output/arxiv/references.bib +222 -0
package/docs/paper/output/arxiv-compile-report.json +24 -0
package/docs/paper/output/submission-bundle/LICENSE +21 -0
package/docs/paper/output/submission-bundle/README.md +533 -0
package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1171 -0
package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +1981 -0
package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +164 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +228 -0
package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
package/docs/paper/output/submission-bundle/package.json +212 -0
package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
package/docs/paper/paper-submission-bundle.schema.json +70 -0
package/docs/paper/publication-pack.json +81 -0
package/docs/paper/publication-pack.schema.json +60 -0
package/docs/paper/references.bib +222 -0
package/package.json +103 -26
package/scripts/audit-release-completion.mjs +362 -0
package/scripts/create-arxiv-source.mjs +362 -0
package/scripts/create-paper-submission-bundle.mjs +210 -0
package/scripts/finalize-release.mjs +526 -0
package/scripts/prepare-release-cut.mjs +269 -0
package/scripts/publish-release-bundle.mjs +209 -0
package/scripts/publish-release-github-api.mjs +429 -0
package/scripts/run-vitest.mjs +34 -0
package/scripts/smoke-cli.js +72 -0
package/scripts/sync-paper-artifacts.mjs +109 -0
package/scripts/verify-arxiv-compile.mjs +440 -0
package/scripts/verify-arxiv-source.mjs +194 -0
package/scripts/verify-browser-launch-plan.mjs +237 -0
package/scripts/verify-browser-launch-results.mjs +285 -0
package/scripts/verify-paper-artifacts.mjs +338 -0
package/scripts/verify-paper-claims.mjs +226 -0
package/scripts/verify-paper-submission-bundle.mjs +207 -0
package/scripts/verify-publication-pack.mjs +196 -0
package/scripts/verify-python-package.py +201 -0
package/scripts/verify-release-readiness.mjs +741 -0
package/docs/assets/benchmarks/local-benchmark.svg +0 -45
package/docs/assets/benchmarks/operations-benchmark.svg +0 -45
package/docs/assets/benchmarks/published-memory-standards.svg +0 -50
package/docs/audrey-for-dummies.md +0 -670
package/docs/benchmarking.md +0 -151
package/docs/future-of-llm-memory.md +0 -452
package/docs/mcp-hosts.md +0 -206
package/docs/ollama-local-agents.md +0 -128
package/docs/production-readiness.md +0 -128

package/README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 <div align="center">
   <img src="docs/assets/audrey-wordmark.png" alt="Audrey wordmark" width="760">
-  <p><strong>The local-first memory control plane for AI agents.</strong></p>
+  <p><strong>The local-first memory firewall for AI agents.</strong></p>
   <p>
     Give Codex, Claude Code, Claude Desktop, Cursor, Windsurf, VS Code, JetBrains, Ollama-backed agents,
-    and custom agent services one durable memory layer they can check before they act.
+    and custom agent services one durable memory layer they can check before they touch tools.
   </p>
   <p>
@@ -19,13 +19,17 @@
 Agents forget the exact mistakes they made yesterday. They repeat broken commands, lose project-specific rules, miss contradictions, and treat every new session like a cold start.
+Audrey Guard is the headline loop: record what happened, remember what mattered, check before action, return `allow`, `warn`, or `block` with evidence, then validate whether the memory helped.
 Audrey turns those hard-won lessons into a local memory runtime:
+- `audrey guard --tool Bash "npm run deploy"` runs memory-before-action from the terminal.
 - `memory_recall` finds durable context by semantic similarity.
 - `memory_preflight` checks prior failures, risks, rules, and relevant procedures before an action.
 - `memory_reflexes` converts remembered evidence into trigger-response guidance agents can follow.
+- `memory_validate` closes the loop after the action: `helpful`, `used`, or `wrong` outcomes feed salience and can bind back to the exact preflight event, evidence ids, and Guard action fingerprint.
 - `memory_dream` consolidates episodes into principles and applies decay.
-- `audrey doctor` tells a human or CI system whether the runtime is actually ready.
+- `audrey impact` and `audrey doctor` tell a human or CI system whether the runtime is doing real work and is actually ready.
 It is not a hosted vector database, a notes app, or a Claude-only plugin. Audrey is a SQLite-backed continuity layer that can sit under any local or sidecar agent loop.
@@ -39,15 +43,16 @@ Requires Node.js 20+.
 ```bash
 npx audrey doctor
-npx audrey demo
+npx audrey demo --scenario repeated-failure
+npx audrey guard --tool Bash "npm run deploy"
 ```
-`doctor` verifies Node, the MCP entrypoint, provider selection, memory-store health, and host config generation. `demo` runs a no-key, no-host, no-network proof: it creates temporary memories, records a redacted failed tool trace, generates a Memory Capsule, proves recall, prints Memory Reflexes, and deletes the demo store.
+`doctor` verifies Node, the MCP entrypoint, provider selection, memory-store health, and host config generation. The repeated-failure demo is no-key, no-host, and no-network: it creates a temporary store, records a failed deploy, teaches Audrey the fix, then shows Audrey Guard blocking the repeat attempt with evidence.
 Expected first-run shape:
 ```text
-Audrey Doctor v0.21.0
+Audrey Doctor v1.0.0
 Store health: not initialized
 Verdict: ready
 ```
@@ -70,6 +75,7 @@ Generate raw config blocks:
 npx audrey mcp-config codex
 npx audrey mcp-config generic
 npx audrey mcp-config vscode
+npx audrey hook-config claude-code
 ```
 Claude Code can be registered directly:
@@ -79,8 +85,19 @@ npx audrey install
 claude mcp list
 ```
+For memory-before-action hooks, preview with `npx audrey hook-config
+claude-code`, then apply with `npx audrey hook-config claude-code --apply
+--scope project` for `.claude/settings.local.json` or `--scope user` for
+`~/.claude/settings.json`. Audrey merges the hook block into existing settings
+and writes a timestamped backup before changing a non-empty file. The generated
+`PreToolUse` hook runs `audrey guard --hook --fail-on-warn`; the `PostToolUse`
+and `PostToolUseFailure` hooks record redacted tool traces. Verify the active
+hook set inside Claude Code with `/hooks`.
 All local MCP paths default to local embeddings and one shared SQLite-backed memory directory. Use `AUDREY_DATA_DIR` to isolate projects, tenants, or host identities.
+Installer-generated host config does not include provider API keys by default. Prefer setting `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GOOGLE_API_KEY`, or `GEMINI_API_KEY` in the host runtime environment; use `npx audrey install --include-secrets` only if you explicitly accept argv/config exposure.
 ## Use With Ollama And Local Agents
 Ollama runs models; Audrey supplies memory. Start Audrey as a local REST sidecar and expose its routes as tools in your agent loop:
@@ -113,9 +130,9 @@ Core sidecar tools:
 | Surface | Status |
 |---|---|
-| MCP stdio server | 19 tools, resources, and prompt templates |
-| CLI | `doctor`, `demo`, `install`, `mcp-config`, `status`, `dream`, `reembed`, `observe-tool`, `promote` |
-| REST API | Hono server with `/health`, `/openapi.json`, `/docs`, and `/v1/*` routes |
+| MCP stdio server | 20 tools plus status/recent/principles resources and briefing/recall/reflection prompts |
+| CLI | `doctor`, `demo`, `guard`, `install`, `mcp-config`, `hook-config`, `status`, `dream`, `reembed`, `observe-tool`, `promote`, `impact` |
+| REST API | Hono server with `/health` and `/v1/*` routes |
 | JavaScript SDK | Direct TypeScript/Node import from `audrey` |
 | Python client | `pip install audrey-memory`, calls the REST sidecar |
 | Storage | Local SQLite plus `sqlite-vec`, no hosted database required |
@@ -183,10 +200,10 @@ Audrey is close to a 1.0-ready local memory runtime, but production depends on h
 Release gates used for this package:
 ```bash
-npm run build
-npm run typecheck
-npm run bench:memory:check
-npm pack --dry-run
+npm run release:gate
+npm run python:release:check
+npm run bench:guard:card
+npm run bench:guard:validate
 npx audrey doctor
 npx audrey demo
 ```
@@ -209,22 +226,250 @@ Production controls you still own:
 - Run `npx audrey dream` on a schedule so consolidation and decay stay current.
 - Add application-level encryption, retention, access control, and audit logging for regulated environments.
-Read the full guide: [docs/production-readiness.md](docs/production-readiness.md).
+## Environment Variables
+| Variable | Default | Purpose |
+|---|---|---|
+| `AUDREY_DATA_DIR` | `~/.audrey/data` | SQLite memory store path. Use one per tenant or agent identity for isolation. |
+| `AUDREY_AGENT` | `local-agent` | Logical agent identity stamped on writes. |
+| `AUDREY_EMBEDDING_PROVIDER` | `local` | `local`, `gemini`, `openai`, or `mock`. Cloud providers require explicit opt-in. |
+| `AUDREY_LLM_PROVIDER` | auto | `anthropic`, `openai`, or `mock`. |
+| `AUDREY_DEVICE` | `gpu` | Local embedding device (`gpu` or `cpu`). Falls back to CPU if GPU init fails. |
+| `AUDREY_PORT` | `7437` | REST sidecar port. |
+| `AUDREY_HOST` | `127.0.0.1` | REST sidecar bind address. Set to `0.0.0.0` only with `AUDREY_API_KEY`. |
+| `AUDREY_API_KEY` | unset | Bearer token required for non-loopback REST traffic. |
+| `AUDREY_ALLOW_NO_AUTH` | `0` | Set to `1` to allow non-loopback bind without an API key. Don't. |
+| `AUDREY_ENABLE_ADMIN_TOOLS` | `0` | Set to `1` to enable export, import, and forget routes/tools. Disabled by default. |
+| `AUDREY_PROMOTE_ROOTS` | unset | Colon/semicolon-separated extra roots for `audrey promote --yes` writes. By default writes are restricted to `process.cwd()`. |
+| `AUDREY_DEBUG` | `0` | Set to `1` to print MCP info logs (server started, warmup completed). Errors always log. |
+| `AUDREY_PROFILE` | `0` | Set to `1` to emit per-stage timings via MCP `_meta.diagnostics`. |
+| `AUDREY_DISABLE_WARMUP` | `0` | Set to `1` to skip background embedding warmup at MCP boot. |
+| `AUDREY_ONNX_VERBOSE` | `0` | Set to `1` to restore ONNX runtime EP-assignment warnings (suppressed by default). |
+| `AUDREY_PRAGMA_DEFAULTS` | `1` | Set to `0` to revert SQLite PRAGMA tuning to better-sqlite3 defaults. |
+| `AUDREY_CONTEXT_BUDGET_CHARS` | `4000` | Default Memory Capsule character budget. |
 ## Benchmarks
-Audrey ships with a benchmark harness and release gate:
+Audrey ships three benchmark families.
+### Performance snapshot
+`npm run bench:perf-snapshot` measures encode and hybrid recall latency at multiple corpus sizes against the in-process mock provider. It reports p50/p95/p99 plus machine provenance so the numbers are reproducible and honest about what they cover.
 ```bash
-npm run bench:memory
-npm run bench:memory:check
+npm run build
+npm run bench:perf-snapshot                                 # default sizes 100, 1000, 5000
+node benchmarks/perf-snapshot.js --sizes 1000,10000 --json  # custom shape
 ```
-Current repo snapshot:
+Sample output from `benchmarks/snapshots/perf-0.22.2.json` (24-core Ryzen 9 7900X3D, Node 25.5.0, mock 64-dim embedding, hybrid recall, limit 5):
+| Corpus size | Encode p50 (ms) | Encode p95 (ms) | Recall p50 (ms) | Recall p95 (ms) | Recall p99 (ms) |
+|---|---|---|---|---|---|
+| 100 | 0.33 | 0.59 | 0.54 | 1.82 | 2.71 |
+| 1,000 | 0.31 | 2.15 | 1.57 | 2.36 | 21.18 |
+| 5,000 | 0.31 | 1.84 | 2.09 | 3.42 | 16.58 |
-![Audrey local benchmark](docs/assets/benchmarks/local-benchmark.svg)
+These numbers cover Audrey's own pipeline (SQLite + sqlite-vec + hybrid ranking) and exclude embedding-provider cost. Real-world recall p95 with a local 384-dim provider is typically 5-15x higher; with a hosted provider it is dominated by the API round-trip. Run on your own hardware before quoting numbers anywhere.
-The benchmark suite covers retrieval behavior, overwrite behavior, delete/abstain behavior, and semantic/procedural merge behavior. For methodology and comparison anchors, see [docs/benchmarking.md](docs/benchmarking.md).
+### Behavioral regression suite
+`npm run bench:memory:check` is a release gate. It runs a small set of retrieval and lifecycle scenarios (information extraction, knowledge updates, multi-session reasoning, conflict resolution, privacy boundary, overwrite, delete-and-abstain, semantic/procedural merge) against Audrey and three weak baselines (vector-only, keyword+recency, recent-window) and asserts Audrey doesn't regress. The baseline comparisons exist to catch correctness regressions in retrieval logic, not to make marketing claims.
+```bash
+npm run bench:memory          # full regression suite (writes JSON + report)
+npm run bench:memory:check    # release gate, exits non-zero on regression
+```
+### GuardBench comparative suite
+`npm run bench:guard:check` runs Audrey's local GuardBench comparative suite:
+ten pre-action scenarios across Audrey Guard, no-memory, recent-window,
+vector-only, and FTS-only adapters. The scenarios cover exact repeated
+failures, required procedures, changed file scopes, changed commands,
+recovered failures, recall degradation, redaction safety, conflicting
+instructions, and noisy stores. It writes
+`benchmarks/output/guardbench-summary.json`,
+`benchmarks/output/guardbench-manifest.json`, and
+`benchmarks/output/guardbench-raw.json`. The emitted manifest, summary, and raw
+output shapes are validated by JSON schemas under `benchmarks/schemas/`.
+Latest local result in this checkout: 10/10 scenarios passed, 100% prevention
+rate, 0% false-block rate, 0 raw secret leaks, 0 published artifact leaks in
+the raw-secret sweep, and 3.214ms / 21.395ms
+p50/p95 guard latency under the mock-provider methodology. Local baseline
+decision accuracy was: no-memory 10%, recent-window 60%, vector-only 40%, and
+FTS-only 10%; none passed the full GuardBench decision-plus-evidence contract.
+```bash
+npm run bench:guard
+npm run bench:guard:check
+npm run bench:guard:manifest
+npm run bench:guard:validate
+npm run bench:guard:card
+npm run bench:guard:bundle
+npm run bench:guard:bundle:verify
+npm run bench:guard:leaderboard
+npm run bench:guard:adapter-registry:validate
+npm run bench:guard:adapter-module:validate
+npm run bench:guard:adapter-self-test
+npm run bench:guard:adapter-self-test:validate
+npm run bench:guard:publication:verify
+npm run bench:guard:adapter-smoke
+npm run bench:guard:adapter-conformance
+npm run bench:guard:external:dry-run
+npm run bench:guard:mem0 -- --dry-run
+npm run bench:guard:zep -- --dry-run
+node benchmarks/adapter-self-test.mjs --adapter ./path/to/adapter.mjs
+node benchmarks/guardbench.js --adapter ./path/to/adapter.mjs --check
+```
+External GuardBench adapters are ESM modules that export either `default`,
+`adapter`, or `createGuardBenchAdapter()`. The adapter receives scenario seed
+data and the proposed action, but the harness withholds `expectedDecision` and
+`requiredEvidence` until scoring. Start from
+`benchmarks/adapters/example-allow.mjs` when wiring a new system. Adapter
+authors can import `defineGuardBenchAdapter()` and `defineGuardBenchResult()`
+from `benchmarks/adapter-kit.mjs` to validate module shape and decision output
+while developing.
+The published adapter registry lives at `benchmarks/adapters/registry.json`.
+Run `npm run bench:guard:adapter-registry:validate` to verify registry shape,
+adapter paths, and credential-free module loading.
+Before running the full self-test, validate the ESM module shape quickly:
+```bash
+npm run bench:guard:adapter-module:validate -- --adapter ./path/to/adapter.mjs
+```
+Before publishing a new adapter, run `npm run bench:guard:adapter-self-test --
+--adapter ./path/to/adapter.mjs`. The self-test validates the external adapter
+contract and row conformance while explicitly allowing low benchmark scores, so
+authors can separate "valid submission shape" from "competitive GuardBench
+performance." The generated self-test report is validated against
+`benchmarks/schemas/guardbench-adapter-self-test.schema.json`. Reviewers can
+validate a submitted report without rerunning an adapter through `npm run
+bench:guard:adapter-self-test:validate -- --report ./guardbench-adapter-self-test.json`.
+Audrey ships external adapters for Mem0 Platform and Zep Cloud. Run them only
+with runtime API keys:
+```bash
+set MEM0_API_KEY=...
+npm run bench:guard:mem0
+set ZEP_API_KEY=...
+npm run bench:guard:zep
+```
+The Zep adapter uses the current REST surface for users, sessions, `memory.add`,
+`graph.search`, and benchmark-user cleanup. If Zep graph ingestion needs more
+time in a live account, set `ZEP_GUARDBENCH_INGEST_DELAY_MS` before the run.
+Run `npm run bench:guard:external:dry-run` before coordinating credentialed
+runs. It walks the runtime-env adapter registry, writes non-secret
+`external-run-metadata.json` files for each adapter, and reports which runtime
+environment variables are still missing. The external dry-run matrix report is schema-bound by
+`benchmarks/schemas/guardbench-external-dry-run.schema.json` and written to
+`benchmarks/output/external/guardbench-external-dry-run.json`.
+Run `npm run bench:guard:external:evidence` after dry-runs or live runs to
+write `benchmarks/output/external/guardbench-external-evidence.json`. This
+external evidence verification report is schema-bound by
+`benchmarks/schemas/guardbench-external-evidence.schema.json`, treats dry-run
+or missing-key rows as pending in normal release gates, and checks that saved
+metadata does not contain runtime credential values. Use
+`npm run bench:guard:external:evidence:strict` when Mem0/Zep keys have been
+provided; strict mode fails until every runtime-env adapter has a passed live
+bundle.
+External runs write `external-run-metadata.json` alongside the GuardBench
+summary, manifest, and raw output bundle under
+`benchmarks/output/external/<adapter>/`. The external runner validates the
+emitted bundle with `benchmarks/validate-guardbench-artifacts.mjs` before
+marking the run passed, and separately records adapter conformance so a valid
+low-scoring adapter is distinguished from a malformed adapter. When
+`external-run-metadata.json` is present, the validator also checks it against
+`benchmarks/schemas/guardbench-external-run.schema.json` and verifies any
+recorded SHA-256 artifact hashes against the bundle on disk.
+For a shareable submission artifact, run `npm run bench:guard:card -- --dir
+<output-dir>`. This writes `guardbench-conformance-card.json` with the subject
+name, run status, score, conformance result, artifact hashes, optional
+external-run metadata hash, and machine provenance. The standalone validator
+checks the card when it is present.
+For a portable submission directory, run `npm run bench:guard:bundle -- --dir
+<output-dir>`. This creates `submission-bundle/` with the raw GuardBench
+artifacts, conformance card, JSON schemas, validation report, and
+`submission-manifest.json` with SHA-256 hashes for every bundled file.
+Reviewers can run `npm run bench:guard:bundle:verify -- --dir
+<submission-bundle>` to check manifest hashes, bundled schemas, and artifact
+validation from the bundle alone.
+For benchmark aggregation, run `npm run bench:guard:leaderboard -- --bundle
+<submission-bundle>`. The leaderboard builder verifies each bundle before
+ranking and writes JSON plus Markdown reports under `benchmarks/output/leaderboard/`.
+Before publishing benchmark artifacts, run `npm run
+bench:guard:publication:verify`. This single benchmark-focused verifier checks
+the adapter registry, default adapter module, adapter self-test report,
+GuardBench manifest/summary/raw artifacts, submission bundle, external dry-run
+matrix, external evidence verification report, leaderboard, and a local
+absolute-path sweep over the public artifact set.
+The verifier validates its own machine-readable report against
+`benchmarks/schemas/guardbench-publication-verification.schema.json` before it
+exits.
+Before turning the paper into public posts or submissions, run `npm run
+paper:claims`. It validates `docs/paper/claim-register.json` against the
+current paper, README, GuardBench artifacts, publication verifier, and external
+evidence status so pending Mem0/Zep live-score claims cannot slip into public
+copy.
+Run `npm run paper:publication-pack` to verify the ready-to-use arXiv, Hacker
+News, Reddit, X, and LinkedIn drafts in `docs/paper/publication-pack.json`
+before browser-based submission. The X URL reserve is explicit: the first X
+post carries `reservedUrlChars: 24`, and submitted artifact-url targets in
+`browser-launch-results.json` must record the final `artifactUrl`.
+Run `npm run paper:arxiv` to generate a deterministic TeX source package under
+`docs/paper/output/arxiv/`, and `npm run paper:arxiv:verify` to check hashes,
+citation conversion, bibliography coverage, seeded-secret redaction, and local
+absolute-path leakage before arXiv upload.
+Run `npm run paper:arxiv:compile` to record a schema-bound compile report at
+`docs/paper/output/arxiv-compile-report.json`. It attempts `tectonic`,
+`latexmk`, `pdflatex`/`bibtex`, or `uvx tecto` with a local bundle proxy when
+available; `npm run paper:arxiv:compile:strict` stays blocked on hosts without
+supported TeX tooling.
+Run `npm run paper:launch-plan` to verify
+`docs/paper/browser-launch-plan.json`, which maps those drafts to manual
+browser targets, login/captcha expectations, platform-rule checks, source
+URLs, and post-submit URL capture.
+Run `npm run paper:launch-results` to validate
+`docs/paper/browser-launch-results.json`, the post-submit ledger for arXiv,
+Hacker News, Reddit, X, and LinkedIn targets. The normal verifier allows
+pending rows with explicit blockers; `npm run paper:launch-results:strict`
+fails until every target has a submitted, operator-verified public URL.
+Run `npm run paper:bundle` to generate
+`docs/paper/output/submission-bundle/`, a hash-manifested package containing
+paper sources, claim and publication registers, GuardBench outputs, schemas,
+and package metadata. `npm run paper:bundle:verify` checks the manifest and
+file hashes before browser upload.
+Run `npm run release:readiness` for the pending-aware Audrey 1.0 checklist.
+It keeps code/paper readiness separate from publish blockers; `npm run
+release:readiness:strict` fails until the 1.0 version surfaces,
+source-control state, live remote-head verification, Python artifacts, npm
+registry/auth readiness, PyPI publish readiness, arXiv compile proof, browser
+publication URLs, and live Mem0/Zep evidence are complete.
+Run `npm run release:cut:plan` to preview the exact 1.0 version/changelog
+edits across npm, lockfile, MCP, and Python surfaces. `npm run
+release:cut:apply -- --target-version 1.0.0` writes those edits only when the
+final cut is intentional. The generated changelog section is release-note copy,
+not a TODO scaffold; `release:readiness:strict` rejects placeholder changelog
+markers before publication.
+Run `npm run security:audit` before packaging or publishing; the release gates
+call it after artifact verification so production dependency advisories cannot
+slip past the final package check.
 ## Command Reference
@@ -237,6 +482,7 @@ npx audrey demo
 npx audrey install --host codex --dry-run
 npx audrey mcp-config codex
 npx audrey mcp-config generic
+npx audrey hook-config claude-code
 npx audrey install
 npx audrey uninstall
@@ -246,39 +492,41 @@ npx audrey status --json --fail-on-unhealthy
 npx audrey dream
 npx audrey reembed
+# Closed-loop visibility
+npx audrey impact
+npx audrey impact --json --window 7 --limit 5
 # Tool-trace learning
 npx audrey observe-tool --event PostToolUse --tool Bash --outcome failed
 npx audrey promote --dry-run
 # REST sidecar
 npx audrey serve
+copy .env.docker.example .env
+# edit AUDREY_API_KEY in .env
 docker compose up -d --build
 ```
+The Node sidecar defaults to `127.0.0.1:7437`. The Docker image intentionally binds inside the container on `3487`, so Compose requires `AUDREY_API_KEY` in `.env` before startup. Override the published host port with `AUDREY_PUBLISHED_PORT` when using Compose.
 ## Documentation
-- [Audrey for Dummies](docs/audrey-for-dummies.md)
-- [MCP host guide](docs/mcp-hosts.md)
-- [Ollama and local agents](docs/ollama-local-agents.md)
-- [Production readiness](docs/production-readiness.md)
-- [Future of LLM memory](docs/future-of-llm-memory.md)
-- [Benchmarking](docs/benchmarking.md)
 - [Security policy](SECURITY.md)
+- [Audrey paper outline](docs/AUDREY_PAPER_OUTLINE.md)
+- Public setup, runtime, benchmark, and command guidance is maintained in this README.
 ## Development
 ```bash
 npm ci
-npm run build
-npm run typecheck
-npm test
-npm run bench:memory:check
-npm run pack:check
+npm run release:gate
 python -m unittest discover -s python/tests -v
-python -m build --no-isolation python
+npm run python:release:check
 ```
-On some locked-down Windows hosts, Vitest/Vite can fail before tests start with `spawn EPERM`. That is an environment process-spawn blocker, not an Audrey runtime failure. Use build, typecheck, benchmark, pack dry-run, direct `dist/` smokes, and GitHub Actions as the release evidence path.
+`npm test` uses a repo-local Vitest launcher so locked-down Windows temp
+directories do not block test startup. `npm run release:gate:sandbox` remains
+available for hosts that block child-process spawning entirely.
 ## License

package/SECURITY.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Security Policy
+## Supported Versions
+Security fixes are best-effort for the current published release line and the current default branch.
+| Version | Supported |
+|---|---|
+| `0.23.x` | Yes |
+| `0.22.x` | Best effort |
+| `< 0.22.0` | No |
+## Reporting a Vulnerability
+Do not open a public GitHub issue for a security vulnerability.
+Report vulnerabilities through one of these channels:
+- GitHub Security Advisories for this repository
+Include:
+- affected version
+- reproduction steps or proof of concept
+- impact description
+- suggested mitigation, if you have one
+## Scope Notes
+Audrey is a memory layer. Security posture also depends on the host application, deployment environment, provider configuration, access controls, and data-handling rules around it.

package/benchmarks/adapter-kit.mjs ADDED Viewed

@@ -0,0 +1,20 @@
+import { validateGuardBenchAdapter, validateAdapterResult } from './guardbench.js';
+export const GUARDBENCH_ADAPTER_CONTRACT_VERSION = '1.0.0';
+export const GUARDBENCH_DECISIONS = Object.freeze(['allow', 'warn', 'block']);
+export const GUARDBENCH_RESULT_FIELDS = Object.freeze([
+  'decision',
+  'riskScore',
+  'evidenceIds',
+  'recommendedActions',
+  'summary',
+  'recallErrors',
+]);
+export function defineGuardBenchAdapter(adapter) {
+  return validateGuardBenchAdapter(adapter, adapter?.name ?? 'inline adapter');
+}
+export function defineGuardBenchResult(result, adapterName = 'adapter', scenarioId = 'scenario') {
+  return validateAdapterResult(result, adapterName, scenarioId);
+}

package/benchmarks/adapter-self-test.mjs ADDED Viewed

@@ -0,0 +1,166 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { basename, dirname, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { loadExternalAdapters, runGuardBench } from './guardbench.js';
+import { evaluateAdapterConformance } from './run-external-guardbench.mjs';
+import { validateSchema } from './validate-guardbench-artifacts.mjs';
+import { publicPath } from './public-paths.mjs';
+const ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '..');
+const DEFAULT_ADAPTER = 'benchmarks/adapters/example-allow.mjs';
+const DEFAULT_OUT = 'benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json';
+const DEFAULT_SCHEMA = 'benchmarks/schemas/guardbench-adapter-self-test.schema.json';
+const RESULT_FIELDS = [
+  'decision',
+  'riskScore',
+  'evidenceIds',
+  'recommendedActions',
+  'summary',
+  'recallErrors',
+];
+export function parseAdapterSelfTestArgs(argv = process.argv.slice(2)) {
+  const args = {
+    adapter: DEFAULT_ADAPTER,
+    out: DEFAULT_OUT,
+    json: false,
+    noWrite: false,
+  };
+  for (let i = 0; i < argv.length; i++) {
+    const token = argv[i];
+    if (token === '--adapter' && argv[i + 1]) args.adapter = argv[++i];
+    else if (token === '--out' && argv[i + 1]) args.out = argv[++i];
+    else if (token === '--json') args.json = true;
+    else if (token === '--no-write') args.noWrite = true;
+    else if (token === '--help' || token === '-h') args.help = true;
+    else throw new Error(`Unknown argument: ${token}`);
+  }
+  return args;
+}
+function usage() {
+  return `Usage: node benchmarks/adapter-self-test.mjs [options]
+Options:
+  --adapter <path>   ESM GuardBench adapter path. Default: ${DEFAULT_ADAPTER}.
+  --out <path>       JSON report path. Default: ${DEFAULT_OUT}.
+  --json             Print the full JSON report.
+  --no-write         Do not write the JSON report.
+`;
+}
+function systemSummary(report, adapterName) {
+  return report.systemSummaries.find(row => row.system === adapterName) ?? null;
+}
+function scoreFromReport(report, adapterName) {
+  const summary = systemSummary(report, adapterName);
+  return {
+    scenarios: summary?.scenarios ?? 0,
+    fullContractPassRate: summary?.passRate ?? null,
+    decisionAccuracy: summary?.decisionAccuracy ?? null,
+    evidenceRecall: summary?.evidenceRecall ?? null,
+    redactionLeaks: summary?.redactionLeaks ?? null,
+    latency: summary?.latency ?? null,
+  };
+}
+function readJson(path) {
+  return JSON.parse(readFileSync(path, 'utf-8'));
+}
+export function validateAdapterSelfTestReport(report, options = {}) {
+  const schemaPath = resolve(ROOT, options.schema ?? DEFAULT_SCHEMA);
+  const schema = options.schemaObject ?? readJson(schemaPath);
+  return validateSchema(report, schema, 'guardbench-adapter-self-test');
+}
+export async function runGuardBenchAdapterSelfTest(options = {}) {
+  const adapterPath = resolve(ROOT, options.adapterPath ?? options.adapter ?? DEFAULT_ADAPTER);
+  if (!existsSync(adapterPath)) {
+    throw new Error(`GuardBench adapter not found: ${adapterPath}`);
+  }
+  const adapters = await loadExternalAdapters([adapterPath]);
+  if (adapters.length !== 1) {
+    throw new Error(`GuardBench adapter self-test expected 1 adapter, got ${adapters.length}`);
+  }
+  const [adapter] = adapters;
+  const report = await runGuardBench({ externalAdapters: adapters });
+  const conformance = evaluateAdapterConformance(report, adapter.name);
+  const score = scoreFromReport(report, conformance.adapter);
+  const selfTest = {
+    schemaVersion: '1.0.0',
+    suite: 'GuardBench adapter self-test',
+    generatedAt: new Date().toISOString(),
+    ok: conformance.ok,
+    adapter: {
+      name: adapter.name,
+      path: publicPath(adapterPath),
+      moduleFile: basename(adapterPath),
+      description: adapter.description ?? null,
+    },
+    conformance,
+    score,
+    contract: {
+      expectedAnswersWithheld: true,
+      lowScoreAllowed: true,
+      requiredScenarioRows: report.scenarios,
+      requiredResultFields: RESULT_FIELDS,
+      redactionLeakTolerance: 0,
+    },
+    failures: conformance.failures,
+  };
+  const schemaErrors = validateAdapterSelfTestReport(selfTest);
+  if (schemaErrors.length > 0) {
+    throw new Error(`GuardBench adapter self-test schema validation failed: ${schemaErrors.join('; ')}`);
+  }
+  if (options.out && options.write !== false) {
+    const outPath = resolve(ROOT, options.out);
+    mkdirSync(dirname(outPath), { recursive: true });
+    writeFileSync(outPath, `${JSON.stringify(selfTest, null, 2)}\n`, 'utf-8');
+    selfTest.outPath = publicPath(outPath);
+  }
+  return selfTest;
+}
+async function main() {
+  const args = parseAdapterSelfTestArgs();
+  if (args.help) {
+    console.log(usage());
+    return;
+  }
+  const result = await runGuardBenchAdapterSelfTest({
+    adapter: args.adapter,
+    out: args.noWrite ? null : args.out,
+    write: !args.noWrite,
+  });
+  if (args.json) {
+    console.log(JSON.stringify(result, null, 2));
+  } else if (result.ok) {
+    console.log(`GuardBench adapter self-test passed: ${result.adapter.name}`);
+    console.log(`Contract rows: ${result.conformance.scenarios}/${result.conformance.expectedScenarios}`);
+    console.log(`Full-contract score: ${(result.score.fullContractPassRate * 100).toFixed(1)}%`);
+    console.log(`Decision accuracy: ${(result.score.decisionAccuracy * 100).toFixed(1)}%`);
+    if (result.outPath) console.log(`Self-test report: ${result.outPath}`);
+  } else {
+    console.error(`GuardBench adapter self-test failed: ${result.adapter.name}`);
+    for (const failure of result.failures) console.error(`- ${failure}`);
+  }
+  process.exitCode = result.ok ? 0 : 1;
+}
+if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
+  main().catch(error => {
+    console.error(error.message);
+    process.exit(1);
+  });
+}

package/benchmarks/adapters/example-allow.mjs ADDED Viewed

@@ -0,0 +1,28 @@
+import { defineGuardBenchAdapter } from '../adapter-kit.mjs';
+export default defineGuardBenchAdapter({
+  name: 'Example Allow Adapter',
+  description: 'Credential-free GuardBench adapter example. It always allows and is useful for adapter-loading smoke tests.',
+  async setup({ scenario }) {
+    return {
+      memoryCount: (scenario.seed.seededMemories ?? []).length,
+      toolEventCount: (scenario.seed.seededToolEvents ?? []).length,
+      hasFaultInjection: Boolean(scenario.seed.faultInjection),
+    };
+  },
+  async decide({ scenario, state }) {
+    return {
+      decision: 'allow',
+      riskScore: 0,
+      evidenceIds: [],
+      recommendedActions: [],
+      summary: [
+        `Example adapter loaded ${state.memoryCount} seeded memories`,
+        `${state.toolEventCount} seeded tool events`,
+        scenario.seed.seededNoise ? `${scenario.seed.seededNoise.count} noise memories` : 'no noise block',
+        state.hasFaultInjection ? 'fault injection present but unsupported' : 'no fault injection',
+      ].join('; '),
+    };
+  },
+  async cleanup() {},
+});