audrey 0.21.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. package/CHANGELOG.md +238 -0
  2. package/LICENSE +21 -21
  3. package/README.md +281 -33
  4. package/SECURITY.md +30 -0
  5. package/benchmarks/adapter-kit.mjs +20 -0
  6. package/benchmarks/adapter-self-test.mjs +166 -0
  7. package/benchmarks/adapters/example-allow.mjs +28 -0
  8. package/benchmarks/adapters/mem0-platform.mjs +267 -0
  9. package/benchmarks/adapters/registry.json +51 -0
  10. package/benchmarks/adapters/zep-cloud.mjs +280 -0
  11. package/benchmarks/baselines.js +169 -0
  12. package/benchmarks/build-leaderboard.mjs +170 -0
  13. package/benchmarks/cases.js +537 -0
  14. package/benchmarks/create-conformance-card.mjs +139 -0
  15. package/benchmarks/create-submission-bundle.mjs +176 -0
  16. package/benchmarks/dry-run-external-adapters.mjs +165 -0
  17. package/benchmarks/guardbench.js +1035 -0
  18. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  19. package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  20. package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  21. package/benchmarks/output/guardbench-conformance-card.json +63 -0
  22. package/benchmarks/output/guardbench-manifest.json +414 -0
  23. package/benchmarks/output/guardbench-raw.json +1171 -0
  24. package/benchmarks/output/guardbench-summary.json +1981 -0
  25. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  26. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  27. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
  28. package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
  29. package/benchmarks/output/submission-bundle/guardbench-raw.json +1171 -0
  30. package/benchmarks/output/submission-bundle/guardbench-summary.json +1981 -0
  31. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
  32. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
  33. package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
  34. package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
  35. package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
  36. package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
  37. package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
  38. package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
  39. package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
  40. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +164 -0
  41. package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
  42. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +228 -0
  43. package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  44. package/benchmarks/output/submission-bundle/validation-report.json +31 -0
  45. package/benchmarks/output/summary.json +2354 -0
  46. package/benchmarks/perf-snapshot.js +304 -0
  47. package/benchmarks/perf.bench.js +161 -0
  48. package/benchmarks/public-paths.mjs +78 -0
  49. package/benchmarks/reference-results.js +70 -0
  50. package/benchmarks/report.js +259 -0
  51. package/benchmarks/run-external-guardbench.mjs +281 -0
  52. package/benchmarks/run.js +682 -0
  53. package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  54. package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  55. package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  56. package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  57. package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  58. package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  59. package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  60. package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  61. package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  62. package/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  63. package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  64. package/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  65. package/benchmarks/snapshots/perf-0.22.2.json +123 -0
  66. package/benchmarks/snapshots/perf-0.23.0.json +123 -0
  67. package/benchmarks/validate-adapter-module.mjs +104 -0
  68. package/benchmarks/validate-adapter-registry.mjs +134 -0
  69. package/benchmarks/validate-adapter-self-test.mjs +96 -0
  70. package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
  71. package/benchmarks/verify-external-evidence.mjs +296 -0
  72. package/benchmarks/verify-publication-artifacts.mjs +286 -0
  73. package/benchmarks/verify-submission-bundle.mjs +167 -0
  74. package/dist/mcp-server/config.d.ts +5 -4
  75. package/dist/mcp-server/config.d.ts.map +1 -1
  76. package/dist/mcp-server/config.js +6 -8
  77. package/dist/mcp-server/config.js.map +1 -1
  78. package/dist/mcp-server/index.d.ts +281 -23
  79. package/dist/mcp-server/index.d.ts.map +1 -1
  80. package/dist/mcp-server/index.js +1186 -82
  81. package/dist/mcp-server/index.js.map +1 -1
  82. package/dist/src/action-key.d.ts +9 -0
  83. package/dist/src/action-key.d.ts.map +1 -0
  84. package/dist/src/action-key.js +49 -0
  85. package/dist/src/action-key.js.map +1 -0
  86. package/dist/src/adaptive.d.ts.map +1 -1
  87. package/dist/src/adaptive.js +8 -6
  88. package/dist/src/adaptive.js.map +1 -1
  89. package/dist/src/affect.d.ts +4 -1
  90. package/dist/src/affect.d.ts.map +1 -1
  91. package/dist/src/affect.js +14 -12
  92. package/dist/src/affect.js.map +1 -1
  93. package/dist/src/audrey.d.ts +57 -4
  94. package/dist/src/audrey.d.ts.map +1 -1
  95. package/dist/src/audrey.js +512 -65
  96. package/dist/src/audrey.js.map +1 -1
  97. package/dist/src/capsule.d.ts +2 -1
  98. package/dist/src/capsule.d.ts.map +1 -1
  99. package/dist/src/capsule.js +18 -8
  100. package/dist/src/capsule.js.map +1 -1
  101. package/dist/src/causal.d.ts.map +1 -1
  102. package/dist/src/causal.js +23 -5
  103. package/dist/src/causal.js.map +1 -1
  104. package/dist/src/confidence.d.ts.map +1 -1
  105. package/dist/src/confidence.js +3 -0
  106. package/dist/src/confidence.js.map +1 -1
  107. package/dist/src/consolidate.d.ts +1 -0
  108. package/dist/src/consolidate.d.ts.map +1 -1
  109. package/dist/src/consolidate.js +70 -54
  110. package/dist/src/consolidate.js.map +1 -1
  111. package/dist/src/controller.d.ts +94 -0
  112. package/dist/src/controller.d.ts.map +1 -0
  113. package/dist/src/controller.js +350 -0
  114. package/dist/src/controller.js.map +1 -0
  115. package/dist/src/db.d.ts.map +1 -1
  116. package/dist/src/db.js +181 -169
  117. package/dist/src/db.js.map +1 -1
  118. package/dist/src/decay.d.ts.map +1 -1
  119. package/dist/src/decay.js +62 -55
  120. package/dist/src/decay.js.map +1 -1
  121. package/dist/src/embedding.d.ts +2 -1
  122. package/dist/src/embedding.d.ts.map +1 -1
  123. package/dist/src/embedding.js +60 -22
  124. package/dist/src/embedding.js.map +1 -1
  125. package/dist/src/encode.d.ts +9 -2
  126. package/dist/src/encode.d.ts.map +1 -1
  127. package/dist/src/encode.js +25 -12
  128. package/dist/src/encode.js.map +1 -1
  129. package/dist/src/export.d.ts.map +1 -1
  130. package/dist/src/export.js +5 -3
  131. package/dist/src/export.js.map +1 -1
  132. package/dist/src/feedback.d.ts +35 -0
  133. package/dist/src/feedback.d.ts.map +1 -0
  134. package/dist/src/feedback.js +129 -0
  135. package/dist/src/feedback.js.map +1 -0
  136. package/dist/src/forget.d.ts.map +1 -1
  137. package/dist/src/forget.js +68 -60
  138. package/dist/src/forget.js.map +1 -1
  139. package/dist/src/fts.js +1 -1
  140. package/dist/src/fts.js.map +1 -1
  141. package/dist/src/hybrid-recall.d.ts +2 -1
  142. package/dist/src/hybrid-recall.d.ts.map +1 -1
  143. package/dist/src/hybrid-recall.js +41 -32
  144. package/dist/src/hybrid-recall.js.map +1 -1
  145. package/dist/src/impact.d.ts +47 -0
  146. package/dist/src/impact.d.ts.map +1 -0
  147. package/dist/src/impact.js +146 -0
  148. package/dist/src/impact.js.map +1 -0
  149. package/dist/src/import.d.ts +177 -1
  150. package/dist/src/import.d.ts.map +1 -1
  151. package/dist/src/import.js +235 -46
  152. package/dist/src/import.js.map +1 -1
  153. package/dist/src/index.d.ts +5 -1
  154. package/dist/src/index.d.ts.map +1 -1
  155. package/dist/src/index.js +3 -1
  156. package/dist/src/index.js.map +1 -1
  157. package/dist/src/interference.d.ts +5 -2
  158. package/dist/src/interference.d.ts.map +1 -1
  159. package/dist/src/interference.js +39 -32
  160. package/dist/src/interference.js.map +1 -1
  161. package/dist/src/introspect.js +18 -18
  162. package/dist/src/llm.d.ts.map +1 -1
  163. package/dist/src/llm.js +1 -0
  164. package/dist/src/llm.js.map +1 -1
  165. package/dist/src/migrate.d.ts.map +1 -1
  166. package/dist/src/migrate.js +21 -9
  167. package/dist/src/migrate.js.map +1 -1
  168. package/dist/src/preflight.d.ts +2 -1
  169. package/dist/src/preflight.d.ts.map +1 -1
  170. package/dist/src/preflight.js +66 -5
  171. package/dist/src/preflight.js.map +1 -1
  172. package/dist/src/profile.d.ts +23 -0
  173. package/dist/src/profile.d.ts.map +1 -0
  174. package/dist/src/profile.js +51 -0
  175. package/dist/src/profile.js.map +1 -0
  176. package/dist/src/promote.d.ts.map +1 -1
  177. package/dist/src/promote.js +8 -9
  178. package/dist/src/promote.js.map +1 -1
  179. package/dist/src/prompts.d.ts.map +1 -1
  180. package/dist/src/prompts.js +165 -136
  181. package/dist/src/prompts.js.map +1 -1
  182. package/dist/src/recall.d.ts +9 -6
  183. package/dist/src/recall.d.ts.map +1 -1
  184. package/dist/src/recall.js +204 -62
  185. package/dist/src/recall.js.map +1 -1
  186. package/dist/src/redact.d.ts +7 -1
  187. package/dist/src/redact.d.ts.map +1 -1
  188. package/dist/src/redact.js +94 -11
  189. package/dist/src/redact.js.map +1 -1
  190. package/dist/src/reflexes.d.ts +1 -0
  191. package/dist/src/reflexes.d.ts.map +1 -1
  192. package/dist/src/reflexes.js +3 -0
  193. package/dist/src/reflexes.js.map +1 -1
  194. package/dist/src/rollback.d.ts.map +1 -1
  195. package/dist/src/rollback.js +13 -8
  196. package/dist/src/rollback.js.map +1 -1
  197. package/dist/src/routes.d.ts +1 -0
  198. package/dist/src/routes.d.ts.map +1 -1
  199. package/dist/src/routes.js +251 -6
  200. package/dist/src/routes.js.map +1 -1
  201. package/dist/src/rules-compiler.d.ts.map +1 -1
  202. package/dist/src/rules-compiler.js +36 -6
  203. package/dist/src/rules-compiler.js.map +1 -1
  204. package/dist/src/server.d.ts +2 -1
  205. package/dist/src/server.d.ts.map +1 -1
  206. package/dist/src/server.js +42 -4
  207. package/dist/src/server.js.map +1 -1
  208. package/dist/src/tool-trace.d.ts.map +1 -1
  209. package/dist/src/tool-trace.js +42 -29
  210. package/dist/src/tool-trace.js.map +1 -1
  211. package/dist/src/types.d.ts +28 -1
  212. package/dist/src/types.d.ts.map +1 -1
  213. package/dist/src/ulid.d.ts.map +1 -1
  214. package/dist/src/ulid.js +52 -2
  215. package/dist/src/ulid.js.map +1 -1
  216. package/dist/src/utils.d.ts.map +1 -1
  217. package/dist/src/utils.js +8 -1
  218. package/dist/src/utils.js.map +1 -1
  219. package/dist/src/validate.d.ts +2 -0
  220. package/dist/src/validate.d.ts.map +1 -1
  221. package/dist/src/validate.js +77 -46
  222. package/dist/src/validate.js.map +1 -1
  223. package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  224. package/docs/MEMORY_BENCHMARKING.md +59 -0
  225. package/docs/PRODUCTION_BACKLOG.md +304 -0
  226. package/docs/paper/00-master.md +48 -0
  227. package/docs/paper/01-introduction.md +27 -0
  228. package/docs/paper/02-related-work.md +47 -0
  229. package/docs/paper/03-problem-definition.md +108 -0
  230. package/docs/paper/04-design.md +164 -0
  231. package/docs/paper/05-guardbench-spec.md +412 -0
  232. package/docs/paper/06-implementation.md +113 -0
  233. package/docs/paper/07-evaluation.md +168 -0
  234. package/docs/paper/08-discussion-limitations.md +61 -0
  235. package/docs/paper/09-conclusion.md +11 -0
  236. package/docs/paper/SUBMISSION_README.md +162 -0
  237. package/docs/paper/appendix-a-demo-transcript.md +114 -0
  238. package/docs/paper/arxiv-compile-report.schema.json +116 -0
  239. package/docs/paper/arxiv-source.schema.json +61 -0
  240. package/docs/paper/audrey-paper-v1.md +1106 -0
  241. package/docs/paper/browser-launch-plan.json +209 -0
  242. package/docs/paper/browser-launch-plan.schema.json +100 -0
  243. package/docs/paper/browser-launch-results.json +86 -0
  244. package/docs/paper/browser-launch-results.schema.json +66 -0
  245. package/docs/paper/claim-register.json +138 -0
  246. package/docs/paper/claim-register.schema.json +81 -0
  247. package/docs/paper/evidence-ledger.md +103 -0
  248. package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  249. package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  250. package/docs/paper/output/arxiv/main.tex +949 -0
  251. package/docs/paper/output/arxiv/references.bib +222 -0
  252. package/docs/paper/output/arxiv-compile-report.json +24 -0
  253. package/docs/paper/output/submission-bundle/LICENSE +21 -0
  254. package/docs/paper/output/submission-bundle/README.md +533 -0
  255. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  256. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  257. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  258. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
  259. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
  260. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1171 -0
  261. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +1981 -0
  262. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  263. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  264. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  265. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
  266. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
  267. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  268. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  269. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  270. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  271. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  272. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  273. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  274. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  275. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  276. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  277. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  278. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  279. package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  280. package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
  281. package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
  282. package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
  283. package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
  284. package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
  285. package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
  286. package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
  287. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
  288. package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
  289. package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
  290. package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
  291. package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
  292. package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
  293. package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
  294. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
  295. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
  296. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
  297. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
  298. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
  299. package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
  300. package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
  301. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
  302. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  303. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  304. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
  305. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
  306. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
  307. package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
  308. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
  309. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
  310. package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
  311. package/docs/paper/output/submission-bundle/package.json +212 -0
  312. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
  313. package/docs/paper/paper-submission-bundle.schema.json +70 -0
  314. package/docs/paper/publication-pack.json +81 -0
  315. package/docs/paper/publication-pack.schema.json +60 -0
  316. package/docs/paper/references.bib +222 -0
  317. package/package.json +103 -26
  318. package/scripts/audit-release-completion.mjs +362 -0
  319. package/scripts/create-arxiv-source.mjs +362 -0
  320. package/scripts/create-paper-submission-bundle.mjs +210 -0
  321. package/scripts/finalize-release.mjs +526 -0
  322. package/scripts/prepare-release-cut.mjs +269 -0
  323. package/scripts/publish-release-bundle.mjs +209 -0
  324. package/scripts/publish-release-github-api.mjs +429 -0
  325. package/scripts/run-vitest.mjs +34 -0
  326. package/scripts/smoke-cli.js +72 -0
  327. package/scripts/sync-paper-artifacts.mjs +109 -0
  328. package/scripts/verify-arxiv-compile.mjs +440 -0
  329. package/scripts/verify-arxiv-source.mjs +194 -0
  330. package/scripts/verify-browser-launch-plan.mjs +237 -0
  331. package/scripts/verify-browser-launch-results.mjs +285 -0
  332. package/scripts/verify-paper-artifacts.mjs +338 -0
  333. package/scripts/verify-paper-claims.mjs +226 -0
  334. package/scripts/verify-paper-submission-bundle.mjs +207 -0
  335. package/scripts/verify-publication-pack.mjs +196 -0
  336. package/scripts/verify-python-package.py +201 -0
  337. package/scripts/verify-release-readiness.mjs +741 -0
  338. package/docs/assets/benchmarks/local-benchmark.svg +0 -45
  339. package/docs/assets/benchmarks/operations-benchmark.svg +0 -45
  340. package/docs/assets/benchmarks/published-memory-standards.svg +0 -50
  341. package/docs/audrey-for-dummies.md +0 -670
  342. package/docs/benchmarking.md +0 -151
  343. package/docs/future-of-llm-memory.md +0 -452
  344. package/docs/mcp-hosts.md +0 -206
  345. package/docs/ollama-local-agents.md +0 -128
  346. package/docs/production-readiness.md +0 -128
package/README.md CHANGED
@@ -1,11 +1,11 @@
1
1
  <div align="center">
2
2
  <img src="docs/assets/audrey-wordmark.png" alt="Audrey wordmark" width="760">
3
3
 
4
- <p><strong>The local-first memory control plane for AI agents.</strong></p>
4
+ <p><strong>The local-first memory firewall for AI agents.</strong></p>
5
5
 
6
6
  <p>
7
7
  Give Codex, Claude Code, Claude Desktop, Cursor, Windsurf, VS Code, JetBrains, Ollama-backed agents,
8
- and custom agent services one durable memory layer they can check before they act.
8
+ and custom agent services one durable memory layer they can check before they touch tools.
9
9
  </p>
10
10
 
11
11
  <p>
@@ -19,13 +19,17 @@
19
19
 
20
20
  Agents forget the exact mistakes they made yesterday. They repeat broken commands, lose project-specific rules, miss contradictions, and treat every new session like a cold start.
21
21
 
22
+ Audrey Guard is the headline loop: record what happened, remember what mattered, check before action, return `allow`, `warn`, or `block` with evidence, then validate whether the memory helped.
23
+
22
24
  Audrey turns those hard-won lessons into a local memory runtime:
23
25
 
26
+ - `audrey guard --tool Bash "npm run deploy"` runs memory-before-action from the terminal.
24
27
  - `memory_recall` finds durable context by semantic similarity.
25
28
  - `memory_preflight` checks prior failures, risks, rules, and relevant procedures before an action.
26
29
  - `memory_reflexes` converts remembered evidence into trigger-response guidance agents can follow.
30
+ - `memory_validate` closes the loop after the action: `helpful`, `used`, or `wrong` outcomes feed salience and can bind back to the exact preflight event, evidence ids, and Guard action fingerprint.
27
31
  - `memory_dream` consolidates episodes into principles and applies decay.
28
- - `audrey doctor` tells a human or CI system whether the runtime is actually ready.
32
+ - `audrey impact` and `audrey doctor` tell a human or CI system whether the runtime is doing real work and is actually ready.
29
33
 
30
34
  It is not a hosted vector database, a notes app, or a Claude-only plugin. Audrey is a SQLite-backed continuity layer that can sit under any local or sidecar agent loop.
31
35
 
@@ -39,15 +43,16 @@ Requires Node.js 20+.
39
43
 
40
44
  ```bash
41
45
  npx audrey doctor
42
- npx audrey demo
46
+ npx audrey demo --scenario repeated-failure
47
+ npx audrey guard --tool Bash "npm run deploy"
43
48
  ```
44
49
 
45
- `doctor` verifies Node, the MCP entrypoint, provider selection, memory-store health, and host config generation. `demo` runs a no-key, no-host, no-network proof: it creates temporary memories, records a redacted failed tool trace, generates a Memory Capsule, proves recall, prints Memory Reflexes, and deletes the demo store.
50
+ `doctor` verifies Node, the MCP entrypoint, provider selection, memory-store health, and host config generation. The repeated-failure demo is no-key, no-host, and no-network: it creates a temporary store, records a failed deploy, teaches Audrey the fix, then shows Audrey Guard blocking the repeat attempt with evidence.
46
51
 
47
52
  Expected first-run shape:
48
53
 
49
54
  ```text
50
- Audrey Doctor v0.21.0
55
+ Audrey Doctor v1.0.0
51
56
  Store health: not initialized
52
57
  Verdict: ready
53
58
  ```
@@ -70,6 +75,7 @@ Generate raw config blocks:
70
75
  npx audrey mcp-config codex
71
76
  npx audrey mcp-config generic
72
77
  npx audrey mcp-config vscode
78
+ npx audrey hook-config claude-code
73
79
  ```
74
80
 
75
81
  Claude Code can be registered directly:
@@ -79,8 +85,19 @@ npx audrey install
79
85
  claude mcp list
80
86
  ```
81
87
 
88
+ For memory-before-action hooks, preview with `npx audrey hook-config
89
+ claude-code`, then apply with `npx audrey hook-config claude-code --apply
90
+ --scope project` for `.claude/settings.local.json` or `--scope user` for
91
+ `~/.claude/settings.json`. Audrey merges the hook block into existing settings
92
+ and writes a timestamped backup before changing a non-empty file. The generated
93
+ `PreToolUse` hook runs `audrey guard --hook --fail-on-warn`; the `PostToolUse`
94
+ and `PostToolUseFailure` hooks record redacted tool traces. Verify the active
95
+ hook set inside Claude Code with `/hooks`.
96
+
82
97
  All local MCP paths default to local embeddings and one shared SQLite-backed memory directory. Use `AUDREY_DATA_DIR` to isolate projects, tenants, or host identities.
83
98
 
99
+ Installer-generated host config does not include provider API keys by default. Prefer setting `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GOOGLE_API_KEY`, or `GEMINI_API_KEY` in the host runtime environment; use `npx audrey install --include-secrets` only if you explicitly accept argv/config exposure.
100
+
84
101
  ## Use With Ollama And Local Agents
85
102
 
86
103
  Ollama runs models; Audrey supplies memory. Start Audrey as a local REST sidecar and expose its routes as tools in your agent loop:
@@ -113,9 +130,9 @@ Core sidecar tools:
113
130
 
114
131
  | Surface | Status |
115
132
  |---|---|
116
- | MCP stdio server | 19 tools, resources, and prompt templates |
117
- | CLI | `doctor`, `demo`, `install`, `mcp-config`, `status`, `dream`, `reembed`, `observe-tool`, `promote` |
118
- | REST API | Hono server with `/health`, `/openapi.json`, `/docs`, and `/v1/*` routes |
133
+ | MCP stdio server | 20 tools plus status/recent/principles resources and briefing/recall/reflection prompts |
134
+ | CLI | `doctor`, `demo`, `guard`, `install`, `mcp-config`, `hook-config`, `status`, `dream`, `reembed`, `observe-tool`, `promote`, `impact` |
135
+ | REST API | Hono server with `/health` and `/v1/*` routes |
119
136
  | JavaScript SDK | Direct TypeScript/Node import from `audrey` |
120
137
  | Python client | `pip install audrey-memory`, calls the REST sidecar |
121
138
  | Storage | Local SQLite plus `sqlite-vec`, no hosted database required |
@@ -183,10 +200,10 @@ Audrey is close to a 1.0-ready local memory runtime, but production depends on h
183
200
  Release gates used for this package:
184
201
 
185
202
  ```bash
186
- npm run build
187
- npm run typecheck
188
- npm run bench:memory:check
189
- npm pack --dry-run
203
+ npm run release:gate
204
+ npm run python:release:check
205
+ npm run bench:guard:card
206
+ npm run bench:guard:validate
190
207
  npx audrey doctor
191
208
  npx audrey demo
192
209
  ```
@@ -209,22 +226,250 @@ Production controls you still own:
209
226
  - Run `npx audrey dream` on a schedule so consolidation and decay stay current.
210
227
  - Add application-level encryption, retention, access control, and audit logging for regulated environments.
211
228
 
212
- Read the full guide: [docs/production-readiness.md](docs/production-readiness.md).
229
+ ## Environment Variables
230
+
231
+ | Variable | Default | Purpose |
232
+ |---|---|---|
233
+ | `AUDREY_DATA_DIR` | `~/.audrey/data` | SQLite memory store path. Use one per tenant or agent identity for isolation. |
234
+ | `AUDREY_AGENT` | `local-agent` | Logical agent identity stamped on writes. |
235
+ | `AUDREY_EMBEDDING_PROVIDER` | `local` | `local`, `gemini`, `openai`, or `mock`. Cloud providers require explicit opt-in. |
236
+ | `AUDREY_LLM_PROVIDER` | auto | `anthropic`, `openai`, or `mock`. |
237
+ | `AUDREY_DEVICE` | `gpu` | Local embedding device (`gpu` or `cpu`). Falls back to CPU if GPU init fails. |
238
+ | `AUDREY_PORT` | `7437` | REST sidecar port. |
239
+ | `AUDREY_HOST` | `127.0.0.1` | REST sidecar bind address. Set to `0.0.0.0` only with `AUDREY_API_KEY`. |
240
+ | `AUDREY_API_KEY` | unset | Bearer token required for non-loopback REST traffic. |
241
+ | `AUDREY_ALLOW_NO_AUTH` | `0` | Set to `1` to allow non-loopback bind without an API key. Don't. |
242
+ | `AUDREY_ENABLE_ADMIN_TOOLS` | `0` | Set to `1` to enable export, import, and forget routes/tools. Disabled by default. |
243
+ | `AUDREY_PROMOTE_ROOTS` | unset | Colon/semicolon-separated extra roots for `audrey promote --yes` writes. By default writes are restricted to `process.cwd()`. |
244
+ | `AUDREY_DEBUG` | `0` | Set to `1` to print MCP info logs (server started, warmup completed). Errors always log. |
245
+ | `AUDREY_PROFILE` | `0` | Set to `1` to emit per-stage timings via MCP `_meta.diagnostics`. |
246
+ | `AUDREY_DISABLE_WARMUP` | `0` | Set to `1` to skip background embedding warmup at MCP boot. |
247
+ | `AUDREY_ONNX_VERBOSE` | `0` | Set to `1` to restore ONNX runtime EP-assignment warnings (suppressed by default). |
248
+ | `AUDREY_PRAGMA_DEFAULTS` | `1` | Set to `0` to revert SQLite PRAGMA tuning to better-sqlite3 defaults. |
249
+ | `AUDREY_CONTEXT_BUDGET_CHARS` | `4000` | Default Memory Capsule character budget. |
213
250
 
214
251
  ## Benchmarks
215
252
 
216
- Audrey ships with a benchmark harness and release gate:
253
+ Audrey ships three benchmark families.
254
+
255
+ ### Performance snapshot
256
+
257
+ `npm run bench:perf-snapshot` measures encode and hybrid recall latency at multiple corpus sizes against the in-process mock provider. It reports p50/p95/p99 plus machine provenance so the numbers are reproducible and honest about what they cover.
217
258
 
218
259
  ```bash
219
- npm run bench:memory
220
- npm run bench:memory:check
260
+ npm run build
261
+ npm run bench:perf-snapshot # default sizes 100, 1000, 5000
262
+ node benchmarks/perf-snapshot.js --sizes 1000,10000 --json # custom shape
221
263
  ```
222
264
 
223
- Current repo snapshot:
265
+ Sample output from `benchmarks/snapshots/perf-0.22.2.json` (24-core Ryzen 9 7900X3D, Node 25.5.0, mock 64-dim embedding, hybrid recall, limit 5):
266
+
267
+ | Corpus size | Encode p50 (ms) | Encode p95 (ms) | Recall p50 (ms) | Recall p95 (ms) | Recall p99 (ms) |
268
+ |---|---|---|---|---|---|
269
+ | 100 | 0.33 | 0.59 | 0.54 | 1.82 | 2.71 |
270
+ | 1,000 | 0.31 | 2.15 | 1.57 | 2.36 | 21.18 |
271
+ | 5,000 | 0.31 | 1.84 | 2.09 | 3.42 | 16.58 |
224
272
 
225
- ![Audrey local benchmark](docs/assets/benchmarks/local-benchmark.svg)
273
+ These numbers cover Audrey's own pipeline (SQLite + sqlite-vec + hybrid ranking) and exclude embedding-provider cost. Real-world recall p95 with a local 384-dim provider is typically 5-15x higher; with a hosted provider it is dominated by the API round-trip. Run on your own hardware before quoting numbers anywhere.
226
274
 
227
- The benchmark suite covers retrieval behavior, overwrite behavior, delete/abstain behavior, and semantic/procedural merge behavior. For methodology and comparison anchors, see [docs/benchmarking.md](docs/benchmarking.md).
275
+ ### Behavioral regression suite
276
+
277
+ `npm run bench:memory:check` is a release gate. It runs a small set of retrieval and lifecycle scenarios (information extraction, knowledge updates, multi-session reasoning, conflict resolution, privacy boundary, overwrite, delete-and-abstain, semantic/procedural merge) against Audrey and three weak baselines (vector-only, keyword+recency, recent-window) and asserts Audrey doesn't regress. The baseline comparisons exist to catch correctness regressions in retrieval logic, not to make marketing claims.
278
+
279
+ ```bash
280
+ npm run bench:memory # full regression suite (writes JSON + report)
281
+ npm run bench:memory:check # release gate, exits non-zero on regression
282
+ ```
283
+
284
+ ### GuardBench comparative suite
285
+
286
+ `npm run bench:guard:check` runs Audrey's local GuardBench comparative suite:
287
+ ten pre-action scenarios across Audrey Guard, no-memory, recent-window,
288
+ vector-only, and FTS-only adapters. The scenarios cover exact repeated
289
+ failures, required procedures, changed file scopes, changed commands,
290
+ recovered failures, recall degradation, redaction safety, conflicting
291
+ instructions, and noisy stores. It writes
292
+ `benchmarks/output/guardbench-summary.json`,
293
+ `benchmarks/output/guardbench-manifest.json`, and
294
+ `benchmarks/output/guardbench-raw.json`. The emitted manifest, summary, and raw
295
+ output shapes are validated by JSON schemas under `benchmarks/schemas/`.
296
+
297
+ Latest local result in this checkout: 10/10 scenarios passed, 100% prevention
298
+ rate, 0% false-block rate, 0 raw secret leaks, 0 published artifact leaks in
299
+ the raw-secret sweep, and 3.214ms / 21.395ms
300
+ p50/p95 guard latency under the mock-provider methodology. Local baseline
301
+ decision accuracy was: no-memory 10%, recent-window 60%, vector-only 40%, and
302
+ FTS-only 10%; none passed the full GuardBench decision-plus-evidence contract.
303
+
304
+ ```bash
305
+ npm run bench:guard
306
+ npm run bench:guard:check
307
+ npm run bench:guard:manifest
308
+ npm run bench:guard:validate
309
+ npm run bench:guard:card
310
+ npm run bench:guard:bundle
311
+ npm run bench:guard:bundle:verify
312
+ npm run bench:guard:leaderboard
313
+ npm run bench:guard:adapter-registry:validate
314
+ npm run bench:guard:adapter-module:validate
315
+ npm run bench:guard:adapter-self-test
316
+ npm run bench:guard:adapter-self-test:validate
317
+ npm run bench:guard:publication:verify
318
+ npm run bench:guard:adapter-smoke
319
+ npm run bench:guard:adapter-conformance
320
+ npm run bench:guard:external:dry-run
321
+ npm run bench:guard:mem0 -- --dry-run
322
+ npm run bench:guard:zep -- --dry-run
323
+ node benchmarks/adapter-self-test.mjs --adapter ./path/to/adapter.mjs
324
+ node benchmarks/guardbench.js --adapter ./path/to/adapter.mjs --check
325
+ ```
326
+
327
+ External GuardBench adapters are ESM modules that export either `default`,
328
+ `adapter`, or `createGuardBenchAdapter()`. The adapter receives scenario seed
329
+ data and the proposed action, but the harness withholds `expectedDecision` and
330
+ `requiredEvidence` until scoring. Start from
331
+ `benchmarks/adapters/example-allow.mjs` when wiring a new system. Adapter
332
+ authors can import `defineGuardBenchAdapter()` and `defineGuardBenchResult()`
333
+ from `benchmarks/adapter-kit.mjs` to validate module shape and decision output
334
+ while developing.
335
+
336
+ The published adapter registry lives at `benchmarks/adapters/registry.json`.
337
+ Run `npm run bench:guard:adapter-registry:validate` to verify registry shape,
338
+ adapter paths, and credential-free module loading.
339
+
340
+ Before running the full self-test, validate the ESM module shape quickly:
341
+
342
+ ```bash
343
+ npm run bench:guard:adapter-module:validate -- --adapter ./path/to/adapter.mjs
344
+ ```
345
+
346
+ Before publishing a new adapter, run `npm run bench:guard:adapter-self-test --
347
+ --adapter ./path/to/adapter.mjs`. The self-test validates the external adapter
348
+ contract and row conformance while explicitly allowing low benchmark scores, so
349
+ authors can separate "valid submission shape" from "competitive GuardBench
350
+ performance." The generated self-test report is validated against
351
+ `benchmarks/schemas/guardbench-adapter-self-test.schema.json`. Reviewers can
352
+ validate a submitted report without rerunning an adapter through `npm run
353
+ bench:guard:adapter-self-test:validate -- --report ./guardbench-adapter-self-test.json`.
354
+
355
+ Audrey ships external adapters for Mem0 Platform and Zep Cloud. Run them only
356
+ with runtime API keys:
357
+
358
+ ```bash
359
+ set MEM0_API_KEY=...
360
+ npm run bench:guard:mem0
361
+
362
+ set ZEP_API_KEY=...
363
+ npm run bench:guard:zep
364
+ ```
365
+
366
+ The Zep adapter uses the current REST surface for users, sessions, `memory.add`,
367
+ `graph.search`, and benchmark-user cleanup. If Zep graph ingestion needs more
368
+ time in a live account, set `ZEP_GUARDBENCH_INGEST_DELAY_MS` before the run.
369
+
370
+ Run `npm run bench:guard:external:dry-run` before coordinating credentialed
371
+ runs. It walks the runtime-env adapter registry, writes non-secret
372
+ `external-run-metadata.json` files for each adapter, and reports which runtime
373
+ environment variables are still missing. The external dry-run matrix report is schema-bound by
374
+ `benchmarks/schemas/guardbench-external-dry-run.schema.json` and written to
375
+ `benchmarks/output/external/guardbench-external-dry-run.json`.
376
+
377
+ Run `npm run bench:guard:external:evidence` after dry-runs or live runs to
378
+ write `benchmarks/output/external/guardbench-external-evidence.json`. This
379
+ external evidence verification report is schema-bound by
380
+ `benchmarks/schemas/guardbench-external-evidence.schema.json`, treats dry-run
381
+ or missing-key rows as pending in normal release gates, and checks that saved
382
+ metadata does not contain runtime credential values. Use
383
+ `npm run bench:guard:external:evidence:strict` when Mem0/Zep keys have been
384
+ provided; strict mode fails until every runtime-env adapter has a passed live
385
+ bundle.
386
+
387
+ External runs write `external-run-metadata.json` alongside the GuardBench
388
+ summary, manifest, and raw output bundle under
389
+ `benchmarks/output/external/<adapter>/`. The external runner validates the
390
+ emitted bundle with `benchmarks/validate-guardbench-artifacts.mjs` before
391
+ marking the run passed, and separately records adapter conformance so a valid
392
+ low-scoring adapter is distinguished from a malformed adapter. When
393
+ `external-run-metadata.json` is present, the validator also checks it against
394
+ `benchmarks/schemas/guardbench-external-run.schema.json` and verifies any
395
+ recorded SHA-256 artifact hashes against the bundle on disk.
396
+
397
+ For a shareable submission artifact, run `npm run bench:guard:card -- --dir
398
+ <output-dir>`. This writes `guardbench-conformance-card.json` with the subject
399
+ name, run status, score, conformance result, artifact hashes, optional
400
+ external-run metadata hash, and machine provenance. The standalone validator
401
+ checks the card when it is present.
402
+
403
+ For a portable submission directory, run `npm run bench:guard:bundle -- --dir
404
+ <output-dir>`. This creates `submission-bundle/` with the raw GuardBench
405
+ artifacts, conformance card, JSON schemas, validation report, and
406
+ `submission-manifest.json` with SHA-256 hashes for every bundled file.
407
+ Reviewers can run `npm run bench:guard:bundle:verify -- --dir
408
+ <submission-bundle>` to check manifest hashes, bundled schemas, and artifact
409
+ validation from the bundle alone.
410
+
411
+ For benchmark aggregation, run `npm run bench:guard:leaderboard -- --bundle
412
+ <submission-bundle>`. The leaderboard builder verifies each bundle before
413
+ ranking and writes JSON plus Markdown reports under `benchmarks/output/leaderboard/`.
414
+
415
+ Before publishing benchmark artifacts, run `npm run
416
+ bench:guard:publication:verify`. This single benchmark-focused verifier checks
417
+ the adapter registry, default adapter module, adapter self-test report,
418
+ GuardBench manifest/summary/raw artifacts, submission bundle, external dry-run
419
+ matrix, external evidence verification report, leaderboard, and a local
420
+ absolute-path sweep over the public artifact set.
421
+ The verifier validates its own machine-readable report against
422
+ `benchmarks/schemas/guardbench-publication-verification.schema.json` before it
423
+ exits.
424
+
425
+ Before turning the paper into public posts or submissions, run `npm run
426
+ paper:claims`. It validates `docs/paper/claim-register.json` against the
427
+ current paper, README, GuardBench artifacts, publication verifier, and external
428
+ evidence status so pending Mem0/Zep live-score claims cannot slip into public
429
+ copy.
430
+ Run `npm run paper:publication-pack` to verify the ready-to-use arXiv, Hacker
431
+ News, Reddit, X, and LinkedIn drafts in `docs/paper/publication-pack.json`
432
+ before browser-based submission. The X URL reserve is explicit: the first X
433
+ post carries `reservedUrlChars: 24`, and submitted artifact-url targets in
434
+ `browser-launch-results.json` must record the final `artifactUrl`.
435
+ Run `npm run paper:arxiv` to generate a deterministic TeX source package under
436
+ `docs/paper/output/arxiv/`, and `npm run paper:arxiv:verify` to check hashes,
437
+ citation conversion, bibliography coverage, seeded-secret redaction, and local
438
+ absolute-path leakage before arXiv upload.
439
+ Run `npm run paper:arxiv:compile` to record a schema-bound compile report at
440
+ `docs/paper/output/arxiv-compile-report.json`. It attempts `tectonic`,
441
+ `latexmk`, `pdflatex`/`bibtex`, or `uvx tecto` with a local bundle proxy when
442
+ available; `npm run paper:arxiv:compile:strict` stays blocked on hosts without
443
+ supported TeX tooling.
444
+ Run `npm run paper:launch-plan` to verify
445
+ `docs/paper/browser-launch-plan.json`, which maps those drafts to manual
446
+ browser targets, login/captcha expectations, platform-rule checks, source
447
+ URLs, and post-submit URL capture.
448
+ Run `npm run paper:launch-results` to validate
449
+ `docs/paper/browser-launch-results.json`, the post-submit ledger for arXiv,
450
+ Hacker News, Reddit, X, and LinkedIn targets. The normal verifier allows
451
+ pending rows with explicit blockers; `npm run paper:launch-results:strict`
452
+ fails until every target has a submitted, operator-verified public URL.
453
+ Run `npm run paper:bundle` to generate
454
+ `docs/paper/output/submission-bundle/`, a hash-manifested package containing
455
+ paper sources, claim and publication registers, GuardBench outputs, schemas,
456
+ and package metadata. `npm run paper:bundle:verify` checks the manifest and
457
+ file hashes before browser upload.
458
+ Run `npm run release:readiness` for the pending-aware Audrey 1.0 checklist.
459
+ It keeps code/paper readiness separate from publish blockers; `npm run
460
+ release:readiness:strict` fails until the 1.0 version surfaces,
461
+ source-control state, live remote-head verification, Python artifacts, npm
462
+ registry/auth readiness, PyPI publish readiness, arXiv compile proof, browser
463
+ publication URLs, and live Mem0/Zep evidence are complete.
464
+ Run `npm run release:cut:plan` to preview the exact 1.0 version/changelog
465
+ edits across npm, lockfile, MCP, and Python surfaces. `npm run
466
+ release:cut:apply -- --target-version 1.0.0` writes those edits only when the
467
+ final cut is intentional. The generated changelog section is release-note copy,
468
+ not a TODO scaffold; `release:readiness:strict` rejects placeholder changelog
469
+ markers before publication.
470
+ Run `npm run security:audit` before packaging or publishing; the release gates
471
+ call it after artifact verification so production dependency advisories cannot
472
+ slip past the final package check.
228
473
 
229
474
  ## Command Reference
230
475
 
@@ -237,6 +482,7 @@ npx audrey demo
237
482
  npx audrey install --host codex --dry-run
238
483
  npx audrey mcp-config codex
239
484
  npx audrey mcp-config generic
485
+ npx audrey hook-config claude-code
240
486
  npx audrey install
241
487
  npx audrey uninstall
242
488
 
@@ -246,39 +492,41 @@ npx audrey status --json --fail-on-unhealthy
246
492
  npx audrey dream
247
493
  npx audrey reembed
248
494
 
495
+ # Closed-loop visibility
496
+ npx audrey impact
497
+ npx audrey impact --json --window 7 --limit 5
498
+
249
499
  # Tool-trace learning
250
500
  npx audrey observe-tool --event PostToolUse --tool Bash --outcome failed
251
501
  npx audrey promote --dry-run
252
502
 
253
503
  # REST sidecar
254
504
  npx audrey serve
505
+ copy .env.docker.example .env
506
+ # edit AUDREY_API_KEY in .env
255
507
  docker compose up -d --build
256
508
  ```
257
509
 
510
+ The Node sidecar defaults to `127.0.0.1:7437`. The Docker image intentionally binds inside the container on `3487`, so Compose requires `AUDREY_API_KEY` in `.env` before startup. Override the published host port with `AUDREY_PUBLISHED_PORT` when using Compose.
511
+
258
512
  ## Documentation
259
513
 
260
- - [Audrey for Dummies](docs/audrey-for-dummies.md)
261
- - [MCP host guide](docs/mcp-hosts.md)
262
- - [Ollama and local agents](docs/ollama-local-agents.md)
263
- - [Production readiness](docs/production-readiness.md)
264
- - [Future of LLM memory](docs/future-of-llm-memory.md)
265
- - [Benchmarking](docs/benchmarking.md)
266
514
  - [Security policy](SECURITY.md)
515
+ - [Audrey paper outline](docs/AUDREY_PAPER_OUTLINE.md)
516
+ - Public setup, runtime, benchmark, and command guidance is maintained in this README.
267
517
 
268
518
  ## Development
269
519
 
270
520
  ```bash
271
521
  npm ci
272
- npm run build
273
- npm run typecheck
274
- npm test
275
- npm run bench:memory:check
276
- npm run pack:check
522
+ npm run release:gate
277
523
  python -m unittest discover -s python/tests -v
278
- python -m build --no-isolation python
524
+ npm run python:release:check
279
525
  ```
280
526
 
281
- On some locked-down Windows hosts, Vitest/Vite can fail before tests start with `spawn EPERM`. That is an environment process-spawn blocker, not an Audrey runtime failure. Use build, typecheck, benchmark, pack dry-run, direct `dist/` smokes, and GitHub Actions as the release evidence path.
527
+ `npm test` uses a repo-local Vitest launcher so locked-down Windows temp
528
+ directories do not block test startup. `npm run release:gate:sandbox` remains
529
+ available for hosts that block child-process spawning entirely.
282
530
 
283
531
  ## License
284
532
 
package/SECURITY.md ADDED
@@ -0,0 +1,30 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ Security fixes are best-effort for the current published release line and the current default branch.
6
+
7
+ | Version | Supported |
8
+ |---|---|
9
+ | `0.23.x` | Yes |
10
+ | `0.22.x` | Best effort |
11
+ | `< 0.22.0` | No |
12
+
13
+ ## Reporting a Vulnerability
14
+
15
+ Do not open a public GitHub issue for a security vulnerability.
16
+
17
+ Report vulnerabilities through one of these channels:
18
+
19
+ - GitHub Security Advisories for this repository
20
+
21
+ Include:
22
+
23
+ - affected version
24
+ - reproduction steps or proof of concept
25
+ - impact description
26
+ - suggested mitigation, if you have one
27
+
28
+ ## Scope Notes
29
+
30
+ Audrey is a memory layer. Security posture also depends on the host application, deployment environment, provider configuration, access controls, and data-handling rules around it.
@@ -0,0 +1,20 @@
1
+ import { validateGuardBenchAdapter, validateAdapterResult } from './guardbench.js';
2
+
3
+ export const GUARDBENCH_ADAPTER_CONTRACT_VERSION = '1.0.0';
4
+ export const GUARDBENCH_DECISIONS = Object.freeze(['allow', 'warn', 'block']);
5
+ export const GUARDBENCH_RESULT_FIELDS = Object.freeze([
6
+ 'decision',
7
+ 'riskScore',
8
+ 'evidenceIds',
9
+ 'recommendedActions',
10
+ 'summary',
11
+ 'recallErrors',
12
+ ]);
13
+
14
+ export function defineGuardBenchAdapter(adapter) {
15
+ return validateGuardBenchAdapter(adapter, adapter?.name ?? 'inline adapter');
16
+ }
17
+
18
+ export function defineGuardBenchResult(result, adapterName = 'adapter', scenarioId = 'scenario') {
19
+ return validateAdapterResult(result, adapterName, scenarioId);
20
+ }
@@ -0,0 +1,166 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2
+ import { basename, dirname, resolve } from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ import { loadExternalAdapters, runGuardBench } from './guardbench.js';
5
+ import { evaluateAdapterConformance } from './run-external-guardbench.mjs';
6
+ import { validateSchema } from './validate-guardbench-artifacts.mjs';
7
+ import { publicPath } from './public-paths.mjs';
8
+
9
+ const ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '..');
10
+ const DEFAULT_ADAPTER = 'benchmarks/adapters/example-allow.mjs';
11
+ const DEFAULT_OUT = 'benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json';
12
+ const DEFAULT_SCHEMA = 'benchmarks/schemas/guardbench-adapter-self-test.schema.json';
13
+ const RESULT_FIELDS = [
14
+ 'decision',
15
+ 'riskScore',
16
+ 'evidenceIds',
17
+ 'recommendedActions',
18
+ 'summary',
19
+ 'recallErrors',
20
+ ];
21
+
22
+ export function parseAdapterSelfTestArgs(argv = process.argv.slice(2)) {
23
+ const args = {
24
+ adapter: DEFAULT_ADAPTER,
25
+ out: DEFAULT_OUT,
26
+ json: false,
27
+ noWrite: false,
28
+ };
29
+
30
+ for (let i = 0; i < argv.length; i++) {
31
+ const token = argv[i];
32
+ if (token === '--adapter' && argv[i + 1]) args.adapter = argv[++i];
33
+ else if (token === '--out' && argv[i + 1]) args.out = argv[++i];
34
+ else if (token === '--json') args.json = true;
35
+ else if (token === '--no-write') args.noWrite = true;
36
+ else if (token === '--help' || token === '-h') args.help = true;
37
+ else throw new Error(`Unknown argument: ${token}`);
38
+ }
39
+
40
+ return args;
41
+ }
42
+
43
+ function usage() {
44
+ return `Usage: node benchmarks/adapter-self-test.mjs [options]
45
+
46
+ Options:
47
+ --adapter <path> ESM GuardBench adapter path. Default: ${DEFAULT_ADAPTER}.
48
+ --out <path> JSON report path. Default: ${DEFAULT_OUT}.
49
+ --json Print the full JSON report.
50
+ --no-write Do not write the JSON report.
51
+ `;
52
+ }
53
+
54
+ function systemSummary(report, adapterName) {
55
+ return report.systemSummaries.find(row => row.system === adapterName) ?? null;
56
+ }
57
+
58
+ function scoreFromReport(report, adapterName) {
59
+ const summary = systemSummary(report, adapterName);
60
+ return {
61
+ scenarios: summary?.scenarios ?? 0,
62
+ fullContractPassRate: summary?.passRate ?? null,
63
+ decisionAccuracy: summary?.decisionAccuracy ?? null,
64
+ evidenceRecall: summary?.evidenceRecall ?? null,
65
+ redactionLeaks: summary?.redactionLeaks ?? null,
66
+ latency: summary?.latency ?? null,
67
+ };
68
+ }
69
+
70
+ function readJson(path) {
71
+ return JSON.parse(readFileSync(path, 'utf-8'));
72
+ }
73
+
74
+ export function validateAdapterSelfTestReport(report, options = {}) {
75
+ const schemaPath = resolve(ROOT, options.schema ?? DEFAULT_SCHEMA);
76
+ const schema = options.schemaObject ?? readJson(schemaPath);
77
+ return validateSchema(report, schema, 'guardbench-adapter-self-test');
78
+ }
79
+
80
+ export async function runGuardBenchAdapterSelfTest(options = {}) {
81
+ const adapterPath = resolve(ROOT, options.adapterPath ?? options.adapter ?? DEFAULT_ADAPTER);
82
+ if (!existsSync(adapterPath)) {
83
+ throw new Error(`GuardBench adapter not found: ${adapterPath}`);
84
+ }
85
+
86
+ const adapters = await loadExternalAdapters([adapterPath]);
87
+ if (adapters.length !== 1) {
88
+ throw new Error(`GuardBench adapter self-test expected 1 adapter, got ${adapters.length}`);
89
+ }
90
+
91
+ const [adapter] = adapters;
92
+ const report = await runGuardBench({ externalAdapters: adapters });
93
+ const conformance = evaluateAdapterConformance(report, adapter.name);
94
+ const score = scoreFromReport(report, conformance.adapter);
95
+ const selfTest = {
96
+ schemaVersion: '1.0.0',
97
+ suite: 'GuardBench adapter self-test',
98
+ generatedAt: new Date().toISOString(),
99
+ ok: conformance.ok,
100
+ adapter: {
101
+ name: adapter.name,
102
+ path: publicPath(adapterPath),
103
+ moduleFile: basename(adapterPath),
104
+ description: adapter.description ?? null,
105
+ },
106
+ conformance,
107
+ score,
108
+ contract: {
109
+ expectedAnswersWithheld: true,
110
+ lowScoreAllowed: true,
111
+ requiredScenarioRows: report.scenarios,
112
+ requiredResultFields: RESULT_FIELDS,
113
+ redactionLeakTolerance: 0,
114
+ },
115
+ failures: conformance.failures,
116
+ };
117
+ const schemaErrors = validateAdapterSelfTestReport(selfTest);
118
+ if (schemaErrors.length > 0) {
119
+ throw new Error(`GuardBench adapter self-test schema validation failed: ${schemaErrors.join('; ')}`);
120
+ }
121
+
122
+ if (options.out && options.write !== false) {
123
+ const outPath = resolve(ROOT, options.out);
124
+ mkdirSync(dirname(outPath), { recursive: true });
125
+ writeFileSync(outPath, `${JSON.stringify(selfTest, null, 2)}\n`, 'utf-8');
126
+ selfTest.outPath = publicPath(outPath);
127
+ }
128
+
129
+ return selfTest;
130
+ }
131
+
132
+ async function main() {
133
+ const args = parseAdapterSelfTestArgs();
134
+ if (args.help) {
135
+ console.log(usage());
136
+ return;
137
+ }
138
+
139
+ const result = await runGuardBenchAdapterSelfTest({
140
+ adapter: args.adapter,
141
+ out: args.noWrite ? null : args.out,
142
+ write: !args.noWrite,
143
+ });
144
+
145
+ if (args.json) {
146
+ console.log(JSON.stringify(result, null, 2));
147
+ } else if (result.ok) {
148
+ console.log(`GuardBench adapter self-test passed: ${result.adapter.name}`);
149
+ console.log(`Contract rows: ${result.conformance.scenarios}/${result.conformance.expectedScenarios}`);
150
+ console.log(`Full-contract score: ${(result.score.fullContractPassRate * 100).toFixed(1)}%`);
151
+ console.log(`Decision accuracy: ${(result.score.decisionAccuracy * 100).toFixed(1)}%`);
152
+ if (result.outPath) console.log(`Self-test report: ${result.outPath}`);
153
+ } else {
154
+ console.error(`GuardBench adapter self-test failed: ${result.adapter.name}`);
155
+ for (const failure of result.failures) console.error(`- ${failure}`);
156
+ }
157
+
158
+ process.exitCode = result.ok ? 0 : 1;
159
+ }
160
+
161
+ if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
162
+ main().catch(error => {
163
+ console.error(error.message);
164
+ process.exit(1);
165
+ });
166
+ }
@@ -0,0 +1,28 @@
1
+ import { defineGuardBenchAdapter } from '../adapter-kit.mjs';
2
+
3
+ export default defineGuardBenchAdapter({
4
+ name: 'Example Allow Adapter',
5
+ description: 'Credential-free GuardBench adapter example. It always allows and is useful for adapter-loading smoke tests.',
6
+ async setup({ scenario }) {
7
+ return {
8
+ memoryCount: (scenario.seed.seededMemories ?? []).length,
9
+ toolEventCount: (scenario.seed.seededToolEvents ?? []).length,
10
+ hasFaultInjection: Boolean(scenario.seed.faultInjection),
11
+ };
12
+ },
13
+ async decide({ scenario, state }) {
14
+ return {
15
+ decision: 'allow',
16
+ riskScore: 0,
17
+ evidenceIds: [],
18
+ recommendedActions: [],
19
+ summary: [
20
+ `Example adapter loaded ${state.memoryCount} seeded memories`,
21
+ `${state.toolEventCount} seeded tool events`,
22
+ scenario.seed.seededNoise ? `${scenario.seed.seededNoise.count} noise memories` : 'no noise block',
23
+ state.hasFaultInjection ? 'fault injection present but unsupported' : 'no fault injection',
24
+ ].join('; '),
25
+ };
26
+ },
27
+ async cleanup() {},
28
+ });