audrey 0.23.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/CHANGELOG.md +101 -15
  2. package/LICENSE +21 -21
  3. package/README.md +232 -6
  4. package/SECURITY.md +2 -1
  5. package/benchmarks/adapter-kit.mjs +20 -0
  6. package/benchmarks/adapter-self-test.mjs +166 -0
  7. package/benchmarks/adapters/example-allow.mjs +28 -0
  8. package/benchmarks/adapters/mem0-platform.mjs +267 -0
  9. package/benchmarks/adapters/registry.json +51 -0
  10. package/benchmarks/adapters/zep-cloud.mjs +280 -0
  11. package/benchmarks/baselines.js +169 -0
  12. package/benchmarks/build-leaderboard.mjs +170 -0
  13. package/benchmarks/cases.js +537 -0
  14. package/benchmarks/create-conformance-card.mjs +139 -0
  15. package/benchmarks/create-submission-bundle.mjs +176 -0
  16. package/benchmarks/dry-run-external-adapters.mjs +165 -0
  17. package/benchmarks/guardbench.js +1125 -0
  18. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  19. package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  20. package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  21. package/benchmarks/output/guardbench-conformance-card.json +63 -0
  22. package/benchmarks/output/guardbench-manifest.json +414 -0
  23. package/benchmarks/output/guardbench-raw.json +1271 -0
  24. package/benchmarks/output/guardbench-summary.json +2107 -0
  25. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  26. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  27. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
  28. package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
  29. package/benchmarks/output/submission-bundle/guardbench-raw.json +1271 -0
  30. package/benchmarks/output/submission-bundle/guardbench-summary.json +2107 -0
  31. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
  32. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
  33. package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
  34. package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
  35. package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
  36. package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
  37. package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
  38. package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
  39. package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
  40. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +184 -0
  41. package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
  42. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +249 -0
  43. package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  44. package/benchmarks/output/submission-bundle/validation-report.json +31 -0
  45. package/benchmarks/output/summary.json +2354 -0
  46. package/benchmarks/perf-snapshot.js +304 -0
  47. package/benchmarks/perf.bench.js +161 -0
  48. package/benchmarks/public-paths.mjs +78 -0
  49. package/benchmarks/reference-results.js +70 -0
  50. package/benchmarks/report.js +259 -0
  51. package/benchmarks/run-external-guardbench.mjs +281 -0
  52. package/benchmarks/run.js +682 -0
  53. package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  54. package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  55. package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  56. package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  57. package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  58. package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  59. package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  60. package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  61. package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  62. package/benchmarks/schemas/guardbench-raw.schema.json +184 -0
  63. package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  64. package/benchmarks/schemas/guardbench-summary.schema.json +249 -0
  65. package/benchmarks/snapshots/perf-0.22.2.json +123 -0
  66. package/benchmarks/snapshots/perf-0.23.0.json +123 -0
  67. package/benchmarks/validate-adapter-module.mjs +104 -0
  68. package/benchmarks/validate-adapter-registry.mjs +134 -0
  69. package/benchmarks/validate-adapter-self-test.mjs +96 -0
  70. package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
  71. package/benchmarks/verify-external-evidence.mjs +296 -0
  72. package/benchmarks/verify-publication-artifacts.mjs +286 -0
  73. package/benchmarks/verify-submission-bundle.mjs +167 -0
  74. package/dist/mcp-server/config.d.ts +1 -1
  75. package/dist/mcp-server/config.d.ts.map +1 -1
  76. package/dist/mcp-server/config.js +1 -1
  77. package/dist/mcp-server/config.js.map +1 -1
  78. package/dist/mcp-server/index.d.ts +65 -3
  79. package/dist/mcp-server/index.d.ts.map +1 -1
  80. package/dist/mcp-server/index.js +675 -157
  81. package/dist/mcp-server/index.js.map +1 -1
  82. package/dist/src/action-key.d.ts +9 -0
  83. package/dist/src/action-key.d.ts.map +1 -0
  84. package/dist/src/action-key.js +49 -0
  85. package/dist/src/action-key.js.map +1 -0
  86. package/dist/src/adaptive.js +5 -5
  87. package/dist/src/affect.js +8 -8
  88. package/dist/src/audrey.d.ts +13 -0
  89. package/dist/src/audrey.d.ts.map +1 -1
  90. package/dist/src/audrey.js +68 -3
  91. package/dist/src/audrey.js.map +1 -1
  92. package/dist/src/capsule.js +4 -4
  93. package/dist/src/causal.js +3 -3
  94. package/dist/src/consolidate.js +48 -48
  95. package/dist/src/controller.d.ts +78 -6
  96. package/dist/src/controller.d.ts.map +1 -1
  97. package/dist/src/controller.js +273 -53
  98. package/dist/src/controller.js.map +1 -1
  99. package/dist/src/db.js +172 -172
  100. package/dist/src/decay.js +8 -8
  101. package/dist/src/embedding.d.ts +2 -1
  102. package/dist/src/embedding.d.ts.map +1 -1
  103. package/dist/src/embedding.js +39 -29
  104. package/dist/src/embedding.js.map +1 -1
  105. package/dist/src/encode.js +6 -6
  106. package/dist/src/feedback.d.ts +6 -0
  107. package/dist/src/feedback.d.ts.map +1 -1
  108. package/dist/src/feedback.js +6 -0
  109. package/dist/src/feedback.js.map +1 -1
  110. package/dist/src/forget.js +12 -12
  111. package/dist/src/hybrid-recall.js +9 -9
  112. package/dist/src/impact.js +6 -6
  113. package/dist/src/import.d.ts +3 -3
  114. package/dist/src/import.js +41 -41
  115. package/dist/src/index.d.ts +5 -4
  116. package/dist/src/index.d.ts.map +1 -1
  117. package/dist/src/index.js +3 -3
  118. package/dist/src/index.js.map +1 -1
  119. package/dist/src/interference.js +14 -14
  120. package/dist/src/introspect.js +18 -18
  121. package/dist/src/preflight.d.ts.map +1 -1
  122. package/dist/src/preflight.js +41 -0
  123. package/dist/src/preflight.js.map +1 -1
  124. package/dist/src/promote.js +7 -7
  125. package/dist/src/prompts.js +118 -118
  126. package/dist/src/recall.js +30 -30
  127. package/dist/src/reflexes.d.ts +1 -0
  128. package/dist/src/reflexes.d.ts.map +1 -1
  129. package/dist/src/reflexes.js +3 -0
  130. package/dist/src/reflexes.js.map +1 -1
  131. package/dist/src/rollback.js +4 -4
  132. package/dist/src/routes.d.ts.map +1 -1
  133. package/dist/src/routes.js +71 -2
  134. package/dist/src/routes.js.map +1 -1
  135. package/dist/src/validate.js +25 -25
  136. package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  137. package/docs/MEMORY_BENCHMARKING.md +59 -0
  138. package/docs/PRODUCTION_BACKLOG.md +304 -0
  139. package/docs/paper/00-master.md +48 -0
  140. package/docs/paper/01-introduction.md +27 -0
  141. package/docs/paper/02-related-work.md +47 -0
  142. package/docs/paper/03-problem-definition.md +108 -0
  143. package/docs/paper/04-design.md +164 -0
  144. package/docs/paper/05-guardbench-spec.md +412 -0
  145. package/docs/paper/06-implementation.md +113 -0
  146. package/docs/paper/07-evaluation.md +168 -0
  147. package/docs/paper/08-discussion-limitations.md +61 -0
  148. package/docs/paper/09-conclusion.md +11 -0
  149. package/docs/paper/SUBMISSION_README.md +162 -0
  150. package/docs/paper/appendix-a-demo-transcript.md +114 -0
  151. package/docs/paper/arxiv-compile-report.schema.json +116 -0
  152. package/docs/paper/arxiv-source.schema.json +61 -0
  153. package/docs/paper/audrey-paper-v1.md +1106 -0
  154. package/docs/paper/browser-launch-plan.json +209 -0
  155. package/docs/paper/browser-launch-plan.schema.json +100 -0
  156. package/docs/paper/browser-launch-results.json +86 -0
  157. package/docs/paper/browser-launch-results.schema.json +66 -0
  158. package/docs/paper/claim-register.json +138 -0
  159. package/docs/paper/claim-register.schema.json +81 -0
  160. package/docs/paper/evidence-ledger.md +103 -0
  161. package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  162. package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  163. package/docs/paper/output/arxiv/main.tex +949 -0
  164. package/docs/paper/output/arxiv/references.bib +222 -0
  165. package/docs/paper/output/arxiv-compile-report.json +24 -0
  166. package/docs/paper/output/submission-bundle/LICENSE +21 -0
  167. package/docs/paper/output/submission-bundle/README.md +555 -0
  168. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  169. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  170. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  171. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
  172. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
  173. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1271 -0
  174. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +2107 -0
  175. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  176. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  177. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  178. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
  179. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
  180. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  181. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  182. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  183. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  184. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  185. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  186. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  187. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  188. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  189. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +184 -0
  190. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  191. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +249 -0
  192. package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  193. package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
  194. package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
  195. package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
  196. package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
  197. package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
  198. package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
  199. package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
  200. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
  201. package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
  202. package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
  203. package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
  204. package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
  205. package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
  206. package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
  207. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
  208. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
  209. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
  210. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
  211. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
  212. package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
  213. package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
  214. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
  215. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  216. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  217. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
  218. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
  219. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
  220. package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
  221. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
  222. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
  223. package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
  224. package/docs/paper/output/submission-bundle/package.json +212 -0
  225. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
  226. package/docs/paper/paper-submission-bundle.schema.json +70 -0
  227. package/docs/paper/publication-pack.json +81 -0
  228. package/docs/paper/publication-pack.schema.json +60 -0
  229. package/docs/paper/references.bib +222 -0
  230. package/package.json +87 -4
  231. package/scripts/audit-release-completion.mjs +362 -0
  232. package/scripts/create-arxiv-source.mjs +362 -0
  233. package/scripts/create-paper-submission-bundle.mjs +210 -0
  234. package/scripts/finalize-release.mjs +526 -0
  235. package/scripts/prepare-release-cut.mjs +269 -0
  236. package/scripts/publish-release-bundle.mjs +209 -0
  237. package/scripts/publish-release-github-api.mjs +429 -0
  238. package/scripts/run-vitest.mjs +34 -0
  239. package/scripts/smoke-cli.js +92 -0
  240. package/scripts/sync-paper-artifacts.mjs +109 -0
  241. package/scripts/verify-arxiv-compile.mjs +440 -0
  242. package/scripts/verify-arxiv-source.mjs +194 -0
  243. package/scripts/verify-browser-launch-plan.mjs +237 -0
  244. package/scripts/verify-browser-launch-results.mjs +285 -0
  245. package/scripts/verify-paper-artifacts.mjs +338 -0
  246. package/scripts/verify-paper-claims.mjs +226 -0
  247. package/scripts/verify-paper-submission-bundle.mjs +207 -0
  248. package/scripts/verify-publication-pack.mjs +196 -0
  249. package/scripts/verify-python-package.py +201 -0
  250. package/scripts/verify-release-readiness.mjs +785 -0
@@ -0,0 +1,555 @@
1
+ <div align="center">
2
+ <img src="docs/assets/audrey-wordmark.png" alt="Audrey wordmark" width="760">
3
+
4
+ <p><strong>The local-first memory firewall for AI agents.</strong></p>
5
+
6
+ <p>
7
+ Give Codex, Claude Code, Claude Desktop, Cursor, Windsurf, VS Code, JetBrains, Ollama-backed agents,
8
+ and custom agent services one durable memory layer they can check before they touch tools.
9
+ </p>
10
+
11
+ <p>
12
+ <a href="https://github.com/Evilander/Audrey/actions/workflows/ci.yml"><img alt="CI" src="https://github.com/Evilander/Audrey/actions/workflows/ci.yml/badge.svg?branch=master"></a>
13
+ <a href="https://www.npmjs.com/package/audrey"><img alt="npm version" src="https://img.shields.io/npm/v/audrey.svg"></a>
14
+ <a href="LICENSE"><img alt="MIT license" src="https://img.shields.io/badge/license-MIT-blue.svg"></a>
15
+ </p>
16
+ </div>
17
+
18
+ ## Why Audrey Exists
19
+
20
+ Agents forget the exact mistakes they made yesterday. They repeat broken commands, lose project-specific rules, miss contradictions, and treat every new session like a cold start.
21
+
22
+ Audrey Guard is the headline loop: record what happened, remember what mattered, check before action, return `allow`, `warn`, or `block` with evidence, then validate whether the memory helped.
23
+
24
+ Audrey turns those hard-won lessons into a local memory runtime:
25
+
26
+ - `audrey guard --tool Bash "npm run deploy"` runs memory-before-action from the terminal.
27
+ - `memory_recall` finds durable context by semantic similarity.
28
+ - `memory_preflight` checks prior failures, risks, rules, and relevant procedures before an action.
29
+ - `memory_reflexes` converts remembered evidence into trigger-response guidance agents can follow.
30
+ - `memory_validate` closes the loop after the action: `helpful`, `used`, or `wrong` outcomes feed salience and can bind back to the exact preflight event, evidence ids, and Guard action fingerprint.
31
+ - `memory_dream` consolidates episodes into principles and applies decay.
32
+ - `audrey impact` and `audrey doctor` tell a human or CI system whether the runtime is doing real work and is actually ready.
33
+
34
+ It is not a hosted vector database, a notes app, or a Claude-only plugin. Audrey is a SQLite-backed continuity layer that can sit under any local or sidecar agent loop.
35
+
36
+ <div align="center">
37
+ <img src="docs/assets/audrey-feature-grid.jpg" alt="Audrey feature marks: memory continuity, archive signal, recall loop, layered evidence, local node, and remembering before acting" width="760">
38
+ </div>
39
+
40
+ ## Quick Start
41
+
42
+ Requires Node.js 20+.
43
+
44
+ ```bash
45
+ npx audrey doctor
46
+ npx audrey demo --scenario repeated-failure
47
+ npx audrey guard --tool Bash "npm run deploy"
48
+ ```
49
+
50
+ `doctor` verifies Node, the MCP entrypoint, provider selection, memory-store health, and host config generation. The repeated-failure demo is no-key, no-host, and no-network: it creates a temporary store, records a failed deploy, teaches Audrey the fix, then shows Audrey Guard blocking the repeat attempt with evidence.
51
+
52
+ Expected first-run shape:
53
+
54
+ ```text
55
+ Audrey Doctor v1.0.0
56
+ Store health: not initialized
57
+ Verdict: ready
58
+ ```
59
+
60
+ After the first real memory write, `doctor` should report the store as healthy.
61
+
62
+ ## Install Into Agent Hosts
63
+
64
+ Preview host setup without editing config files:
65
+
66
+ ```bash
67
+ npx audrey install --host codex --dry-run
68
+ npx audrey install --host claude-code --dry-run
69
+ npx audrey install --host generic --dry-run
70
+ ```
71
+
72
+ Generate raw config blocks:
73
+
74
+ ```bash
75
+ npx audrey mcp-config codex
76
+ npx audrey mcp-config generic
77
+ npx audrey mcp-config vscode
78
+ npx audrey hook-config claude-code
79
+ ```
80
+
81
+ Claude Code can be registered directly:
82
+
83
+ ```bash
84
+ npx audrey install
85
+ claude mcp list
86
+ ```
87
+
88
+ For memory-before-action hooks, preview with `npx audrey hook-config
89
+ claude-code`, then apply with `npx audrey hook-config claude-code --apply
90
+ --scope project` for `.claude/settings.local.json` or `--scope user` for
91
+ `~/.claude/settings.json`. Audrey merges the hook block into existing settings
92
+ and writes a timestamped backup before changing a non-empty file. The generated
93
+ `PreToolUse` hook runs `audrey guard --hook --fail-on-warn`; the `PostToolUse`
94
+ and `PostToolUseFailure` hooks record redacted tool traces. Verify the active
95
+ hook set inside Claude Code with `/hooks`.
96
+
97
+ All local MCP paths default to local embeddings and one shared SQLite-backed memory directory. **Set a distinct `AUDREY_DATA_DIR` per tenant, agent identity, or concurrent host.** SQLite uses WAL mode without an advisory lock, so two processes sharing a directory will contend on writes. Isolation is a hard requirement for multi-agent setups, not a recommendation.
98
+
99
+ Installer-generated host config does not include provider API keys by default. Prefer setting `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GOOGLE_API_KEY`, or `GEMINI_API_KEY` in the host runtime environment; use `npx audrey install --include-secrets` only if you explicitly accept argv/config exposure.
100
+
101
+ ## Use With Ollama And Local Agents
102
+
103
+ Ollama runs models; Audrey supplies memory. Start Audrey as a local REST sidecar and expose its routes as tools in your agent loop:
104
+
105
+ ```bash
106
+ AUDREY_AGENT=ollama-local-agent npx audrey serve
107
+ curl http://localhost:7437/health
108
+ curl http://localhost:7437/v1/status
109
+ ```
110
+
111
+ Runnable example:
112
+
113
+ ```bash
114
+ AUDREY_AGENT=ollama-local-agent npx audrey serve
115
+ OLLAMA_MODEL=qwen3 node examples/ollama-memory-agent.js "What should you remember about Audrey?"
116
+ ```
117
+
118
+ Core sidecar tools:
119
+
120
+ | Agent Need | REST Route |
121
+ |---|---|
122
+ | Check memory before acting | `POST /v1/preflight` |
123
+ | Get reflex rules for an action | `POST /v1/reflexes` |
124
+ | Store a useful observation | `POST /v1/encode` |
125
+ | Recall relevant context | `POST /v1/recall` |
126
+ | Get a turn-sized memory packet | `POST /v1/capsule` |
127
+ | Check health | `GET /v1/status` |
128
+
129
+ ## What Ships
130
+
131
+ | Surface | Status |
132
+ |---|---|
133
+ | MCP stdio server | 20 tools plus status/recent/principles resources and briefing/recall/reflection prompts |
134
+ | CLI | `doctor`, `demo`, `guard`, `install`, `mcp-config`, `hook-config`, `status`, `dream`, `reembed`, `observe-tool`, `promote`, `impact` |
135
+ | REST API | Hono server with `/health` and `/v1/*` routes |
136
+ | JavaScript SDK | Direct TypeScript/Node import from `audrey` |
137
+ | Python client | `pip install audrey-memory`, calls the REST sidecar |
138
+ | Storage | Local SQLite plus `sqlite-vec`, no hosted database required |
139
+ | Deployment | npm package, Docker, Compose, host-specific MCP config generation |
140
+ | Safety loop | preflight warnings, reflexes, redacted tool traces, contradiction handling |
141
+
142
+ ## Memory Model
143
+
144
+ Audrey is built around the parts of memory that matter for agents:
145
+
146
+ - Episodic memory: specific observations, tool results, preferences, and session facts.
147
+ - Semantic memory: consolidated principles extracted from repeated evidence.
148
+ - Procedural memory: remembered ways to act, avoid, retry, or verify.
149
+ - Affect and salience: emotional weight and importance influence recall.
150
+ - Interference and decay: stale, conflicting, or low-confidence memories lose authority over time.
151
+ - Contradiction handling: competing claims are tracked instead of silently overwritten.
152
+ - Tool-trace learning: failed commands and risky actions become future preflight warnings.
153
+
154
+ The product bet is simple: the next generation of useful agents will not just retrieve facts. They will remember what happened, decide whether a memory is still trustworthy, and use that memory before touching tools.
155
+
156
+ ## Use Audrey From Code
157
+
158
+ ### JavaScript
159
+
160
+ ```js
161
+ import { Audrey } from 'audrey';
162
+
163
+ const brain = new Audrey({
164
+ dataDir: './audrey-data',
165
+ agent: 'support-agent',
166
+ embedding: { provider: 'local', dimensions: 384 },
167
+ });
168
+
169
+ await brain.encode({
170
+ content: 'Stripe returns HTTP 429 above 100 req/s',
171
+ source: 'direct-observation',
172
+ tags: ['stripe', 'rate-limit'],
173
+ });
174
+
175
+ const memories = await brain.recall('stripe rate limit');
176
+
177
+ await brain.waitForIdle();
178
+ brain.close();
179
+ ```
180
+
181
+ ### Python
182
+
183
+ ```bash
184
+ pip install audrey-memory
185
+ ```
186
+
187
+ ```python
188
+ from audrey_memory import Audrey
189
+
190
+ brain = Audrey(base_url="http://127.0.0.1:7437", agent="support-agent")
191
+ memory_id = brain.encode("Stripe returns HTTP 429 above 100 req/s", source="direct-observation")
192
+ results = brain.recall("stripe rate limit", limit=5)
193
+ brain.close()
194
+ ```
195
+
196
+ ## Production Readiness
197
+
198
+ Audrey is close to a 1.0-ready local memory runtime, but production depends on how it is embedded. Treat it like stateful infrastructure.
199
+
200
+ Release gates used for this package:
201
+
202
+ ```bash
203
+ npm run release:gate
204
+ npm run python:release:check
205
+ npm run bench:guard:card
206
+ npm run bench:guard:validate
207
+ npx audrey doctor
208
+ npx audrey demo
209
+ ```
210
+
211
+ Recommended runtime checks:
212
+
213
+ ```bash
214
+ npx audrey doctor --json
215
+ npx audrey status --json --fail-on-unhealthy
216
+ npx audrey install --host codex --dry-run
217
+ ```
218
+
219
+ Production controls you still own:
220
+
221
+ - Set one `AUDREY_DATA_DIR` per tenant, environment, or isolation boundary.
222
+ - Pin `AUDREY_EMBEDDING_PROVIDER` and `AUDREY_LLM_PROVIDER` explicitly.
223
+ - Back up the SQLite data directory before provider or dimension changes.
224
+ - Keep API keys and raw credentials out of encoded memory content.
225
+ - Use `AUDREY_API_KEY` if the REST sidecar is reachable beyond the local process boundary.
226
+ - Run `npx audrey dream` on a schedule so consolidation and decay stay current.
227
+ - Add application-level encryption, retention, access control, and audit logging for regulated environments.
228
+
229
+ ## Environment Variables
230
+
231
+ | Variable | Default | Purpose |
232
+ |---|---|---|
233
+ | `AUDREY_DATA_DIR` | `~/.audrey/data` | SQLite memory store path. Use one per tenant or agent identity for isolation. |
234
+ | `AUDREY_AGENT` | `local-agent` | Logical agent identity stamped on writes. |
235
+ | `AUDREY_EMBEDDING_PROVIDER` | `local` | `local`, `gemini`, `openai`, or `mock`. Cloud providers require explicit opt-in. |
236
+ | `AUDREY_LLM_PROVIDER` | auto | `anthropic`, `openai`, or `mock`. |
237
+ | `AUDREY_DEVICE` | `gpu` | Local embedding device (`gpu` or `cpu`). Falls back to CPU if GPU init fails. |
238
+ | `AUDREY_PORT` | `7437` | REST sidecar port. |
239
+ | `AUDREY_HOST` | `127.0.0.1` | REST sidecar bind address. Set to `0.0.0.0` only with `AUDREY_API_KEY`. |
240
+ | `AUDREY_API_KEY` | unset | Bearer token required for non-loopback REST traffic. |
241
+ | `AUDREY_ALLOW_NO_AUTH` | `0` | Set to `1` to allow non-loopback bind without an API key. Don't. |
242
+ | `AUDREY_ENABLE_ADMIN_TOOLS` | `0` | Set to `1` to enable export, import, and forget routes/tools. Disabled by default. |
243
+ | `AUDREY_PROMOTE_ROOTS` | unset | Colon/semicolon-separated extra roots for `audrey promote --yes` writes. By default writes are restricted to `process.cwd()`. |
244
+ | `AUDREY_DEBUG` | `0` | Set to `1` to print MCP info logs (server started, warmup completed). Errors always log. |
245
+ | `AUDREY_PROFILE` | `0` | Set to `1` to emit per-stage timings via MCP `_meta.diagnostics`. |
246
+ | `AUDREY_DISABLE_WARMUP` | `0` | Set to `1` to skip background embedding warmup at MCP boot. |
247
+ | `AUDREY_ONNX_VERBOSE` | `0` | Set to `1` to restore ONNX runtime EP-assignment warnings (suppressed by default). |
248
+ | `AUDREY_PRAGMA_DEFAULTS` | `1` | Set to `0` to revert SQLite PRAGMA tuning to better-sqlite3 defaults. |
249
+ | `AUDREY_CONTEXT_BUDGET_CHARS` | `4000` | Default Memory Capsule character budget. |
250
+
251
+ ## Benchmarks
252
+
253
+ Audrey ships three benchmark families.
254
+
255
+ ### Performance snapshot
256
+
257
+ `npm run bench:perf-snapshot` measures encode and hybrid recall latency at multiple corpus sizes against the in-process mock provider. It reports p50/p95/p99 plus machine provenance so the numbers are reproducible and honest about what they cover.
258
+
259
+ ```bash
260
+ npm run build
261
+ npm run bench:perf-snapshot # default sizes 100, 1000, 5000
262
+ node benchmarks/perf-snapshot.js --sizes 1000,10000 --json # custom shape
263
+ ```
264
+
265
+ Sample output from `benchmarks/snapshots/perf-0.22.2.json` (24-core Ryzen 9 7900X3D, Node 25.5.0, mock 64-dim embedding, hybrid recall, limit 5):
266
+
267
+ | Corpus size | Encode p50 (ms) | Encode p95 (ms) | Recall p50 (ms) | Recall p95 (ms) | Recall p99 (ms) |
268
+ |---|---|---|---|---|---|
269
+ | 100 | 0.33 | 0.59 | 0.54 | 1.82 | 2.71 |
270
+ | 1,000 | 0.31 | 2.15 | 1.57 | 2.36 | 21.18 |
271
+ | 5,000 | 0.31 | 1.84 | 2.09 | 3.42 | 16.58 |
272
+
273
+ These numbers cover Audrey's own pipeline (SQLite + sqlite-vec + hybrid ranking) and exclude embedding-provider cost. Real-world recall p95 with a local 384-dim provider is typically 5-15x higher; with a hosted provider it is dominated by the API round-trip. Run on your own hardware before quoting numbers anywhere.
274
+
275
+ ### Behavioral regression suite
276
+
277
+ `npm run bench:memory:check` is a release gate. It runs a small set of retrieval and lifecycle scenarios (information extraction, knowledge updates, multi-session reasoning, conflict resolution, privacy boundary, overwrite, delete-and-abstain, semantic/procedural merge) against Audrey and three weak baselines (vector-only, keyword+recency, recent-window) and asserts Audrey doesn't regress. The baseline comparisons exist to catch correctness regressions in retrieval logic, not to make marketing claims.
278
+
279
+ ```bash
280
+ npm run bench:memory # full regression suite (writes JSON + report)
281
+ npm run bench:memory:check # release gate, exits non-zero on regression
282
+ ```
283
+
284
+ ### GuardBench comparative suite
285
+
286
+ `npm run bench:guard:check` runs Audrey's local GuardBench comparative suite:
287
+ ten pre-action scenarios across Audrey Guard, no-memory, recent-window,
288
+ vector-only, and FTS-only adapters. The scenarios cover exact repeated
289
+ failures, required procedures, changed file scopes, changed commands,
290
+ recovered failures, recall degradation, redaction safety, conflicting
291
+ instructions, and noisy stores. It writes
292
+ `benchmarks/output/guardbench-summary.json`,
293
+ `benchmarks/output/guardbench-manifest.json`, and
294
+ `benchmarks/output/guardbench-raw.json`. The emitted manifest, summary, and raw
295
+ output shapes are validated by JSON schemas under `benchmarks/schemas/`.
296
+
297
+ Latest local result in this checkout: 10/10 scenarios passed, 100% prevention
298
+ rate, 0% false-block rate, 0 raw secret leaks, 0 published artifact leaks in
299
+ the raw-secret sweep, and 2.465ms / 30.791ms
300
+ p50/p95 guard latency under the mock-provider methodology.
301
+
302
+ **Methodology caveats, on purpose.** All numbers above are produced against
303
+ the in-process mock 64-dim embedding provider documented in the run's
304
+ `provenance` block. They characterize Audrey's controller and SQLite path,
305
+ not real-provider end-to-end latency or production false-positive rates. The
306
+ 100% prevention rate is over the 5 GuardBench scenarios that expect a
307
+ `block` decision (the suite is 10 scenarios total, mixed across allow / warn
308
+ / block). Local baseline decision accuracy was: no-memory 10%, recent-window
309
+ 60%, vector-only 40%, and FTS-only 10%; none of the local baselines passed
310
+ the GuardBench decision-plus-evidence contract, which since v1.0.1 requires
311
+ the correct decision plus at least one returned evidence id for `block` /
312
+ `warn` scenarios (no longer Audrey-specific lineage phrasing — see
313
+ `CHANGELOG.md#101---2026-05-15`). External-system numbers for Mem0 and Zep
314
+ are explicitly out of scope for this Stage-A artifact; live credentialed
315
+ runs land in a v2 paper after raw evidence bundles publish.
316
+
317
+ ```bash
318
+ npm run bench:guard
319
+ npm run bench:guard:check
320
+ npm run bench:guard:manifest
321
+ npm run bench:guard:validate
322
+ npm run bench:guard:card
323
+ npm run bench:guard:bundle
324
+ npm run bench:guard:bundle:verify
325
+ npm run bench:guard:leaderboard
326
+ npm run bench:guard:adapter-registry:validate
327
+ npm run bench:guard:adapter-module:validate
328
+ npm run bench:guard:adapter-self-test
329
+ npm run bench:guard:adapter-self-test:validate
330
+ npm run bench:guard:publication:verify
331
+ npm run bench:guard:adapter-smoke
332
+ npm run bench:guard:adapter-conformance
333
+ npm run bench:guard:external:dry-run
334
+ npm run bench:guard:mem0 -- --dry-run
335
+ npm run bench:guard:zep -- --dry-run
336
+ node benchmarks/adapter-self-test.mjs --adapter ./path/to/adapter.mjs
337
+ node benchmarks/guardbench.js --adapter ./path/to/adapter.mjs --check
338
+ ```
339
+
340
+ External GuardBench adapters are ESM modules that export either `default`,
341
+ `adapter`, or `createGuardBenchAdapter()`. The adapter receives scenario seed
342
+ data and the proposed action, but the harness withholds `expectedDecision` and
343
+ `requiredEvidence` until scoring. Start from
344
+ `benchmarks/adapters/example-allow.mjs` when wiring a new system. Adapter
345
+ authors can import `defineGuardBenchAdapter()` and `defineGuardBenchResult()`
346
+ from `benchmarks/adapter-kit.mjs` to validate module shape and decision output
347
+ while developing.
348
+
349
+ The published adapter registry lives at `benchmarks/adapters/registry.json`.
350
+ Run `npm run bench:guard:adapter-registry:validate` to verify registry shape,
351
+ adapter paths, and credential-free module loading.
352
+
353
+ Before running the full self-test, validate the ESM module shape quickly:
354
+
355
+ ```bash
356
+ npm run bench:guard:adapter-module:validate -- --adapter ./path/to/adapter.mjs
357
+ ```
358
+
359
+ Before publishing a new adapter, run `npm run bench:guard:adapter-self-test --
360
+ --adapter ./path/to/adapter.mjs`. The self-test validates the external adapter
361
+ contract and row conformance while explicitly allowing low benchmark scores, so
362
+ authors can separate "valid submission shape" from "competitive GuardBench
363
+ performance." The generated self-test report is validated against
364
+ `benchmarks/schemas/guardbench-adapter-self-test.schema.json`. Reviewers can
365
+ validate a submitted report without rerunning an adapter through `npm run
366
+ bench:guard:adapter-self-test:validate -- --report ./guardbench-adapter-self-test.json`.
367
+
368
+ Audrey ships external adapters for Mem0 Platform and Zep Cloud. Run them only
369
+ with runtime API keys:
370
+
371
+ ```bash
372
+ set MEM0_API_KEY=...
373
+ npm run bench:guard:mem0
374
+
375
+ set ZEP_API_KEY=...
376
+ npm run bench:guard:zep
377
+ ```
378
+
379
+ The Zep adapter uses the current REST surface for users, sessions, `memory.add`,
380
+ `graph.search`, and benchmark-user cleanup. If Zep graph ingestion needs more
381
+ time in a live account, set `ZEP_GUARDBENCH_INGEST_DELAY_MS` before the run.
382
+
383
+ Run `npm run bench:guard:external:dry-run` before coordinating credentialed
384
+ runs. It walks the runtime-env adapter registry, writes non-secret
385
+ `external-run-metadata.json` files for each adapter, and reports which runtime
386
+ environment variables are still missing. The external dry-run matrix report is schema-bound by
387
+ `benchmarks/schemas/guardbench-external-dry-run.schema.json` and written to
388
+ `benchmarks/output/external/guardbench-external-dry-run.json`.
389
+
390
+ Run `npm run bench:guard:external:evidence` after dry-runs or live runs to
391
+ write `benchmarks/output/external/guardbench-external-evidence.json`. This
392
+ external evidence verification report is schema-bound by
393
+ `benchmarks/schemas/guardbench-external-evidence.schema.json`, treats dry-run
394
+ or missing-key rows as pending in normal release gates, and checks that saved
395
+ metadata does not contain runtime credential values. Use
396
+ `npm run bench:guard:external:evidence:strict` when Mem0/Zep keys have been
397
+ provided; strict mode fails until every runtime-env adapter has a passed live
398
+ bundle.
399
+
400
+ External runs write `external-run-metadata.json` alongside the GuardBench
401
+ summary, manifest, and raw output bundle under
402
+ `benchmarks/output/external/<adapter>/`. The external runner validates the
403
+ emitted bundle with `benchmarks/validate-guardbench-artifacts.mjs` before
404
+ marking the run passed, and separately records adapter conformance so a valid
405
+ low-scoring adapter is distinguished from a malformed adapter. When
406
+ `external-run-metadata.json` is present, the validator also checks it against
407
+ `benchmarks/schemas/guardbench-external-run.schema.json` and verifies any
408
+ recorded SHA-256 artifact hashes against the bundle on disk.
409
+
410
+ For a shareable submission artifact, run `npm run bench:guard:card -- --dir
411
+ <output-dir>`. This writes `guardbench-conformance-card.json` with the subject
412
+ name, run status, score, conformance result, artifact hashes, optional
413
+ external-run metadata hash, and machine provenance. The standalone validator
414
+ checks the card when it is present.
415
+
416
+ For a portable submission directory, run `npm run bench:guard:bundle -- --dir
417
+ <output-dir>`. This creates `submission-bundle/` with the raw GuardBench
418
+ artifacts, conformance card, JSON schemas, validation report, and
419
+ `submission-manifest.json` with SHA-256 hashes for every bundled file.
420
+ Reviewers can run `npm run bench:guard:bundle:verify -- --dir
421
+ <submission-bundle>` to check manifest hashes, bundled schemas, and artifact
422
+ validation from the bundle alone.
423
+
424
+ For benchmark aggregation, run `npm run bench:guard:leaderboard -- --bundle
425
+ <submission-bundle>`. The leaderboard builder verifies each bundle before
426
+ ranking and writes JSON plus Markdown reports under `benchmarks/output/leaderboard/`.
427
+
428
+ Before publishing benchmark artifacts, run `npm run
429
+ bench:guard:publication:verify`. This single benchmark-focused verifier checks
430
+ the adapter registry, default adapter module, adapter self-test report,
431
+ GuardBench manifest/summary/raw artifacts, submission bundle, external dry-run
432
+ matrix, external evidence verification report, leaderboard, and a local
433
+ absolute-path sweep over the public artifact set.
434
+ The verifier validates its own machine-readable report against
435
+ `benchmarks/schemas/guardbench-publication-verification.schema.json` before it
436
+ exits.
437
+
438
+ Before turning the paper into public posts or submissions, run `npm run
439
+ paper:claims`. It validates `docs/paper/claim-register.json` against the
440
+ current paper, README, GuardBench artifacts, publication verifier, and external
441
+ evidence status so pending Mem0/Zep live-score claims cannot slip into public
442
+ copy.
443
+ Run `npm run paper:publication-pack` to verify the ready-to-use arXiv, Hacker
444
+ News, Reddit, X, and LinkedIn drafts in `docs/paper/publication-pack.json`
445
+ before browser-based submission. The X URL reserve is explicit: the first X
446
+ post carries `reservedUrlChars: 24`, and submitted artifact-url targets in
447
+ `browser-launch-results.json` must record the final `artifactUrl`.
448
+ Run `npm run paper:arxiv` to generate a deterministic TeX source package under
449
+ `docs/paper/output/arxiv/`, and `npm run paper:arxiv:verify` to check hashes,
450
+ citation conversion, bibliography coverage, seeded-secret redaction, and local
451
+ absolute-path leakage before arXiv upload.
452
+ Run `npm run paper:arxiv:compile` to record a schema-bound compile report at
453
+ `docs/paper/output/arxiv-compile-report.json`. It attempts `tectonic`,
454
+ `latexmk`, `pdflatex`/`bibtex`, or `uvx tecto` with a local bundle proxy when
455
+ available; `npm run paper:arxiv:compile:strict` stays blocked on hosts without
456
+ supported TeX tooling.
457
+ Run `npm run paper:launch-plan` to verify
458
+ `docs/paper/browser-launch-plan.json`, which maps those drafts to manual
459
+ browser targets, login/captcha expectations, platform-rule checks, source
460
+ URLs, and post-submit URL capture.
461
+ Run `npm run paper:launch-results` to validate
462
+ `docs/paper/browser-launch-results.json`, the post-submit ledger for arXiv,
463
+ Hacker News, Reddit, X, and LinkedIn targets. The normal verifier allows
464
+ pending rows with explicit blockers; `npm run paper:launch-results:strict`
465
+ fails until every target has a submitted, operator-verified public URL.
466
+ Run `npm run paper:bundle` to generate
467
+ `docs/paper/output/submission-bundle/`, a hash-manifested package containing
468
+ paper sources, claim and publication registers, GuardBench outputs, schemas,
469
+ and package metadata. `npm run paper:bundle:verify` checks the manifest and
470
+ file hashes before browser upload.
471
+ Run `npm run release:readiness` for the pending-aware Audrey 1.0 checklist.
472
+ It keeps code/paper readiness separate from publish blockers; `npm run
473
+ release:readiness:strict` fails until the 1.0 version surfaces,
474
+ source-control state, live remote-head verification, Python artifacts, npm
475
+ registry/auth readiness, PyPI publish readiness, arXiv compile proof, browser
476
+ publication URLs, and live Mem0/Zep evidence are complete.
477
+ Run `npm run release:cut:plan` to preview the exact 1.0 version/changelog
478
+ edits across npm, lockfile, MCP, and Python surfaces. `npm run
479
+ release:cut:apply -- --target-version 1.0.0` writes those edits only when the
480
+ final cut is intentional. The generated changelog section is release-note copy,
481
+ not a TODO scaffold; `release:readiness:strict` rejects placeholder changelog
482
+ markers before publication.
483
+ Run `npm run security:audit` before packaging or publishing; the release gates
484
+ call it after artifact verification so production dependency advisories cannot
485
+ slip past the final package check.
486
+
487
+ ## Command Reference
488
+
489
+ ```bash
490
+ # First contact
491
+ npx audrey doctor
492
+ npx audrey demo
493
+
494
+ # MCP setup
495
+ npx audrey install --host codex --dry-run
496
+ npx audrey mcp-config codex
497
+ npx audrey mcp-config generic
498
+ npx audrey hook-config claude-code
499
+ npx audrey install
500
+ npx audrey uninstall
501
+
502
+ # Health and maintenance
503
+ npx audrey status
504
+ npx audrey status --json --fail-on-unhealthy
505
+ npx audrey dream
506
+ npx audrey reembed
507
+
508
+ # Closed-loop visibility
509
+ npx audrey impact
510
+ npx audrey impact --json --window 7 --limit 5
511
+
512
+ # Tool-trace learning
513
+ npx audrey observe-tool --event PostToolUse --tool Bash --outcome failed
514
+ npx audrey promote --dry-run
515
+
516
+ # REST sidecar
517
+ npx audrey serve
518
+ copy .env.docker.example .env
519
+ # edit AUDREY_API_KEY in .env
520
+ docker compose up -d --build
521
+ ```
522
+
523
+ The Node sidecar defaults to `127.0.0.1:7437`. The Docker image intentionally binds inside the container on `3487`, so Compose requires `AUDREY_API_KEY` in `.env` before startup. Override the published host port with `AUDREY_PUBLISHED_PORT` when using Compose.
524
+
525
+ ## Documentation
526
+
527
+ - [Security policy](SECURITY.md)
528
+ - [Audrey paper outline](docs/AUDREY_PAPER_OUTLINE.md)
529
+ - Public setup, runtime, benchmark, and command guidance is maintained in this README.
530
+
531
+ ## Development
532
+
533
+ Developer setup runs from source, not from the published tarball, so `npm run build` is required before any CLI subcommand resolves:
534
+
535
+ ```bash
536
+ npm ci
537
+ npm run build
538
+ npm test
539
+ ```
540
+
541
+ Once built, the `Quick Start` commands work against the local `dist/` output. The full release gate runs everything CI runs:
542
+
543
+ ```bash
544
+ npm run release:gate
545
+ python -m unittest discover -s python/tests -v
546
+ npm run python:release:check
547
+ ```
548
+
549
+ `npm test` uses a repo-local Vitest launcher so locked-down Windows temp
550
+ directories do not block test startup. `npm run release:gate:sandbox` remains
551
+ available for hosts that block child-process spawning entirely.
552
+
553
+ ## License
554
+
555
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,50 @@
1
+ {
2
+ "schemaVersion": "1.0.0",
3
+ "suite": "GuardBench adapter self-test",
4
+ "generatedAt": "2026-05-15T17:52:20.717Z",
5
+ "ok": true,
6
+ "adapter": {
7
+ "name": "Example Allow Adapter",
8
+ "path": "benchmarks/adapters/example-allow.mjs",
9
+ "moduleFile": "example-allow.mjs",
10
+ "description": "Credential-free GuardBench adapter example. It always allows and is useful for adapter-loading smoke tests."
11
+ },
12
+ "conformance": {
13
+ "ok": true,
14
+ "adapter": "Example Allow Adapter",
15
+ "requestedAdapter": "Example Allow Adapter",
16
+ "scenarios": 10,
17
+ "expectedScenarios": 10,
18
+ "fullContractPassRate": 0.1,
19
+ "decisionAccuracy": 0.1,
20
+ "redactionLeaks": 0,
21
+ "failures": []
22
+ },
23
+ "score": {
24
+ "scenarios": 10,
25
+ "fullContractPassRate": 0.1,
26
+ "decisionAccuracy": 0.1,
27
+ "evidenceRecall": 0.1,
28
+ "redactionLeaks": 0,
29
+ "latency": {
30
+ "p50Ms": 0.009,
31
+ "p95Ms": 0.032,
32
+ "maxMs": 0.032
33
+ }
34
+ },
35
+ "contract": {
36
+ "expectedAnswersWithheld": true,
37
+ "lowScoreAllowed": true,
38
+ "requiredScenarioRows": 10,
39
+ "requiredResultFields": [
40
+ "decision",
41
+ "riskScore",
42
+ "evidenceIds",
43
+ "recommendedActions",
44
+ "summary",
45
+ "recallErrors"
46
+ ],
47
+ "redactionLeakTolerance": 0
48
+ },
49
+ "failures": []
50
+ }
@@ -0,0 +1,69 @@
1
+ {
2
+ "schemaVersion": "1.0.0",
3
+ "suite": "GuardBench external adapter dry-run matrix",
4
+ "generatedAt": "2026-05-15T17:52:21.145Z",
5
+ "ok": true,
6
+ "registry": "benchmarks/adapters/registry.json",
7
+ "outRoot": "benchmarks/output/external",
8
+ "adapters": [
9
+ {
10
+ "id": "mem0-platform",
11
+ "name": "Mem0 Platform",
12
+ "credentialMode": "runtime-env",
13
+ "requiredEnv": [
14
+ "MEM0_API_KEY"
15
+ ],
16
+ "missingEnv": [
17
+ "MEM0_API_KEY"
18
+ ],
19
+ "status": "dry-run-missing-env",
20
+ "command": [
21
+ "node",
22
+ "benchmarks/guardbench.js",
23
+ "--adapter",
24
+ "benchmarks/adapters/mem0-platform.mjs",
25
+ "--out-dir",
26
+ "benchmarks/output/external/mem0-platform",
27
+ "--check",
28
+ "--json"
29
+ ],
30
+ "validationCommand": [
31
+ "node",
32
+ "benchmarks/validate-guardbench-artifacts.mjs",
33
+ "--dir",
34
+ "benchmarks/output/external/mem0-platform"
35
+ ],
36
+ "metadataPath": "benchmarks/output/external/mem0-platform/external-run-metadata.json"
37
+ },
38
+ {
39
+ "id": "zep-cloud",
40
+ "name": "Zep Cloud",
41
+ "credentialMode": "runtime-env",
42
+ "requiredEnv": [
43
+ "ZEP_API_KEY"
44
+ ],
45
+ "missingEnv": [
46
+ "ZEP_API_KEY"
47
+ ],
48
+ "status": "dry-run-missing-env",
49
+ "command": [
50
+ "node",
51
+ "benchmarks/guardbench.js",
52
+ "--adapter",
53
+ "benchmarks/adapters/zep-cloud.mjs",
54
+ "--out-dir",
55
+ "benchmarks/output/external/zep-cloud",
56
+ "--check",
57
+ "--json"
58
+ ],
59
+ "validationCommand": [
60
+ "node",
61
+ "benchmarks/validate-guardbench-artifacts.mjs",
62
+ "--dir",
63
+ "benchmarks/output/external/zep-cloud"
64
+ ],
65
+ "metadataPath": "benchmarks/output/external/zep-cloud/external-run-metadata.json"
66
+ }
67
+ ],
68
+ "failures": []
69
+ }