audrey 0.21.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. package/CHANGELOG.md +238 -0
  2. package/LICENSE +21 -21
  3. package/README.md +281 -33
  4. package/SECURITY.md +30 -0
  5. package/benchmarks/adapter-kit.mjs +20 -0
  6. package/benchmarks/adapter-self-test.mjs +166 -0
  7. package/benchmarks/adapters/example-allow.mjs +28 -0
  8. package/benchmarks/adapters/mem0-platform.mjs +267 -0
  9. package/benchmarks/adapters/registry.json +51 -0
  10. package/benchmarks/adapters/zep-cloud.mjs +280 -0
  11. package/benchmarks/baselines.js +169 -0
  12. package/benchmarks/build-leaderboard.mjs +170 -0
  13. package/benchmarks/cases.js +537 -0
  14. package/benchmarks/create-conformance-card.mjs +139 -0
  15. package/benchmarks/create-submission-bundle.mjs +176 -0
  16. package/benchmarks/dry-run-external-adapters.mjs +165 -0
  17. package/benchmarks/guardbench.js +1035 -0
  18. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  19. package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  20. package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  21. package/benchmarks/output/guardbench-conformance-card.json +63 -0
  22. package/benchmarks/output/guardbench-manifest.json +414 -0
  23. package/benchmarks/output/guardbench-raw.json +1171 -0
  24. package/benchmarks/output/guardbench-summary.json +1981 -0
  25. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  26. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  27. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
  28. package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
  29. package/benchmarks/output/submission-bundle/guardbench-raw.json +1171 -0
  30. package/benchmarks/output/submission-bundle/guardbench-summary.json +1981 -0
  31. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
  32. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
  33. package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
  34. package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
  35. package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
  36. package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
  37. package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
  38. package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
  39. package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
  40. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +164 -0
  41. package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
  42. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +228 -0
  43. package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  44. package/benchmarks/output/submission-bundle/validation-report.json +31 -0
  45. package/benchmarks/output/summary.json +2354 -0
  46. package/benchmarks/perf-snapshot.js +304 -0
  47. package/benchmarks/perf.bench.js +161 -0
  48. package/benchmarks/public-paths.mjs +78 -0
  49. package/benchmarks/reference-results.js +70 -0
  50. package/benchmarks/report.js +259 -0
  51. package/benchmarks/run-external-guardbench.mjs +281 -0
  52. package/benchmarks/run.js +682 -0
  53. package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  54. package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  55. package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  56. package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  57. package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  58. package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  59. package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  60. package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  61. package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  62. package/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  63. package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  64. package/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  65. package/benchmarks/snapshots/perf-0.22.2.json +123 -0
  66. package/benchmarks/snapshots/perf-0.23.0.json +123 -0
  67. package/benchmarks/validate-adapter-module.mjs +104 -0
  68. package/benchmarks/validate-adapter-registry.mjs +134 -0
  69. package/benchmarks/validate-adapter-self-test.mjs +96 -0
  70. package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
  71. package/benchmarks/verify-external-evidence.mjs +296 -0
  72. package/benchmarks/verify-publication-artifacts.mjs +286 -0
  73. package/benchmarks/verify-submission-bundle.mjs +167 -0
  74. package/dist/mcp-server/config.d.ts +5 -4
  75. package/dist/mcp-server/config.d.ts.map +1 -1
  76. package/dist/mcp-server/config.js +6 -8
  77. package/dist/mcp-server/config.js.map +1 -1
  78. package/dist/mcp-server/index.d.ts +281 -23
  79. package/dist/mcp-server/index.d.ts.map +1 -1
  80. package/dist/mcp-server/index.js +1186 -82
  81. package/dist/mcp-server/index.js.map +1 -1
  82. package/dist/src/action-key.d.ts +9 -0
  83. package/dist/src/action-key.d.ts.map +1 -0
  84. package/dist/src/action-key.js +49 -0
  85. package/dist/src/action-key.js.map +1 -0
  86. package/dist/src/adaptive.d.ts.map +1 -1
  87. package/dist/src/adaptive.js +8 -6
  88. package/dist/src/adaptive.js.map +1 -1
  89. package/dist/src/affect.d.ts +4 -1
  90. package/dist/src/affect.d.ts.map +1 -1
  91. package/dist/src/affect.js +14 -12
  92. package/dist/src/affect.js.map +1 -1
  93. package/dist/src/audrey.d.ts +57 -4
  94. package/dist/src/audrey.d.ts.map +1 -1
  95. package/dist/src/audrey.js +512 -65
  96. package/dist/src/audrey.js.map +1 -1
  97. package/dist/src/capsule.d.ts +2 -1
  98. package/dist/src/capsule.d.ts.map +1 -1
  99. package/dist/src/capsule.js +18 -8
  100. package/dist/src/capsule.js.map +1 -1
  101. package/dist/src/causal.d.ts.map +1 -1
  102. package/dist/src/causal.js +23 -5
  103. package/dist/src/causal.js.map +1 -1
  104. package/dist/src/confidence.d.ts.map +1 -1
  105. package/dist/src/confidence.js +3 -0
  106. package/dist/src/confidence.js.map +1 -1
  107. package/dist/src/consolidate.d.ts +1 -0
  108. package/dist/src/consolidate.d.ts.map +1 -1
  109. package/dist/src/consolidate.js +70 -54
  110. package/dist/src/consolidate.js.map +1 -1
  111. package/dist/src/controller.d.ts +94 -0
  112. package/dist/src/controller.d.ts.map +1 -0
  113. package/dist/src/controller.js +350 -0
  114. package/dist/src/controller.js.map +1 -0
  115. package/dist/src/db.d.ts.map +1 -1
  116. package/dist/src/db.js +181 -169
  117. package/dist/src/db.js.map +1 -1
  118. package/dist/src/decay.d.ts.map +1 -1
  119. package/dist/src/decay.js +62 -55
  120. package/dist/src/decay.js.map +1 -1
  121. package/dist/src/embedding.d.ts +2 -1
  122. package/dist/src/embedding.d.ts.map +1 -1
  123. package/dist/src/embedding.js +60 -22
  124. package/dist/src/embedding.js.map +1 -1
  125. package/dist/src/encode.d.ts +9 -2
  126. package/dist/src/encode.d.ts.map +1 -1
  127. package/dist/src/encode.js +25 -12
  128. package/dist/src/encode.js.map +1 -1
  129. package/dist/src/export.d.ts.map +1 -1
  130. package/dist/src/export.js +5 -3
  131. package/dist/src/export.js.map +1 -1
  132. package/dist/src/feedback.d.ts +35 -0
  133. package/dist/src/feedback.d.ts.map +1 -0
  134. package/dist/src/feedback.js +129 -0
  135. package/dist/src/feedback.js.map +1 -0
  136. package/dist/src/forget.d.ts.map +1 -1
  137. package/dist/src/forget.js +68 -60
  138. package/dist/src/forget.js.map +1 -1
  139. package/dist/src/fts.js +1 -1
  140. package/dist/src/fts.js.map +1 -1
  141. package/dist/src/hybrid-recall.d.ts +2 -1
  142. package/dist/src/hybrid-recall.d.ts.map +1 -1
  143. package/dist/src/hybrid-recall.js +41 -32
  144. package/dist/src/hybrid-recall.js.map +1 -1
  145. package/dist/src/impact.d.ts +47 -0
  146. package/dist/src/impact.d.ts.map +1 -0
  147. package/dist/src/impact.js +146 -0
  148. package/dist/src/impact.js.map +1 -0
  149. package/dist/src/import.d.ts +177 -1
  150. package/dist/src/import.d.ts.map +1 -1
  151. package/dist/src/import.js +235 -46
  152. package/dist/src/import.js.map +1 -1
  153. package/dist/src/index.d.ts +5 -1
  154. package/dist/src/index.d.ts.map +1 -1
  155. package/dist/src/index.js +3 -1
  156. package/dist/src/index.js.map +1 -1
  157. package/dist/src/interference.d.ts +5 -2
  158. package/dist/src/interference.d.ts.map +1 -1
  159. package/dist/src/interference.js +39 -32
  160. package/dist/src/interference.js.map +1 -1
  161. package/dist/src/introspect.js +18 -18
  162. package/dist/src/llm.d.ts.map +1 -1
  163. package/dist/src/llm.js +1 -0
  164. package/dist/src/llm.js.map +1 -1
  165. package/dist/src/migrate.d.ts.map +1 -1
  166. package/dist/src/migrate.js +21 -9
  167. package/dist/src/migrate.js.map +1 -1
  168. package/dist/src/preflight.d.ts +2 -1
  169. package/dist/src/preflight.d.ts.map +1 -1
  170. package/dist/src/preflight.js +66 -5
  171. package/dist/src/preflight.js.map +1 -1
  172. package/dist/src/profile.d.ts +23 -0
  173. package/dist/src/profile.d.ts.map +1 -0
  174. package/dist/src/profile.js +51 -0
  175. package/dist/src/profile.js.map +1 -0
  176. package/dist/src/promote.d.ts.map +1 -1
  177. package/dist/src/promote.js +8 -9
  178. package/dist/src/promote.js.map +1 -1
  179. package/dist/src/prompts.d.ts.map +1 -1
  180. package/dist/src/prompts.js +165 -136
  181. package/dist/src/prompts.js.map +1 -1
  182. package/dist/src/recall.d.ts +9 -6
  183. package/dist/src/recall.d.ts.map +1 -1
  184. package/dist/src/recall.js +204 -62
  185. package/dist/src/recall.js.map +1 -1
  186. package/dist/src/redact.d.ts +7 -1
  187. package/dist/src/redact.d.ts.map +1 -1
  188. package/dist/src/redact.js +94 -11
  189. package/dist/src/redact.js.map +1 -1
  190. package/dist/src/reflexes.d.ts +1 -0
  191. package/dist/src/reflexes.d.ts.map +1 -1
  192. package/dist/src/reflexes.js +3 -0
  193. package/dist/src/reflexes.js.map +1 -1
  194. package/dist/src/rollback.d.ts.map +1 -1
  195. package/dist/src/rollback.js +13 -8
  196. package/dist/src/rollback.js.map +1 -1
  197. package/dist/src/routes.d.ts +1 -0
  198. package/dist/src/routes.d.ts.map +1 -1
  199. package/dist/src/routes.js +251 -6
  200. package/dist/src/routes.js.map +1 -1
  201. package/dist/src/rules-compiler.d.ts.map +1 -1
  202. package/dist/src/rules-compiler.js +36 -6
  203. package/dist/src/rules-compiler.js.map +1 -1
  204. package/dist/src/server.d.ts +2 -1
  205. package/dist/src/server.d.ts.map +1 -1
  206. package/dist/src/server.js +42 -4
  207. package/dist/src/server.js.map +1 -1
  208. package/dist/src/tool-trace.d.ts.map +1 -1
  209. package/dist/src/tool-trace.js +42 -29
  210. package/dist/src/tool-trace.js.map +1 -1
  211. package/dist/src/types.d.ts +28 -1
  212. package/dist/src/types.d.ts.map +1 -1
  213. package/dist/src/ulid.d.ts.map +1 -1
  214. package/dist/src/ulid.js +52 -2
  215. package/dist/src/ulid.js.map +1 -1
  216. package/dist/src/utils.d.ts.map +1 -1
  217. package/dist/src/utils.js +8 -1
  218. package/dist/src/utils.js.map +1 -1
  219. package/dist/src/validate.d.ts +2 -0
  220. package/dist/src/validate.d.ts.map +1 -1
  221. package/dist/src/validate.js +77 -46
  222. package/dist/src/validate.js.map +1 -1
  223. package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  224. package/docs/MEMORY_BENCHMARKING.md +59 -0
  225. package/docs/PRODUCTION_BACKLOG.md +304 -0
  226. package/docs/paper/00-master.md +48 -0
  227. package/docs/paper/01-introduction.md +27 -0
  228. package/docs/paper/02-related-work.md +47 -0
  229. package/docs/paper/03-problem-definition.md +108 -0
  230. package/docs/paper/04-design.md +164 -0
  231. package/docs/paper/05-guardbench-spec.md +412 -0
  232. package/docs/paper/06-implementation.md +113 -0
  233. package/docs/paper/07-evaluation.md +168 -0
  234. package/docs/paper/08-discussion-limitations.md +61 -0
  235. package/docs/paper/09-conclusion.md +11 -0
  236. package/docs/paper/SUBMISSION_README.md +162 -0
  237. package/docs/paper/appendix-a-demo-transcript.md +114 -0
  238. package/docs/paper/arxiv-compile-report.schema.json +116 -0
  239. package/docs/paper/arxiv-source.schema.json +61 -0
  240. package/docs/paper/audrey-paper-v1.md +1106 -0
  241. package/docs/paper/browser-launch-plan.json +209 -0
  242. package/docs/paper/browser-launch-plan.schema.json +100 -0
  243. package/docs/paper/browser-launch-results.json +86 -0
  244. package/docs/paper/browser-launch-results.schema.json +66 -0
  245. package/docs/paper/claim-register.json +138 -0
  246. package/docs/paper/claim-register.schema.json +81 -0
  247. package/docs/paper/evidence-ledger.md +103 -0
  248. package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  249. package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  250. package/docs/paper/output/arxiv/main.tex +949 -0
  251. package/docs/paper/output/arxiv/references.bib +222 -0
  252. package/docs/paper/output/arxiv-compile-report.json +24 -0
  253. package/docs/paper/output/submission-bundle/LICENSE +21 -0
  254. package/docs/paper/output/submission-bundle/README.md +533 -0
  255. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  256. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  257. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  258. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
  259. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
  260. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1171 -0
  261. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +1981 -0
  262. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  263. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  264. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  265. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
  266. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
  267. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  268. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  269. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  270. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  271. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  272. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  273. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  274. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  275. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  276. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  277. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  278. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  279. package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  280. package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
  281. package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
  282. package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
  283. package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
  284. package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
  285. package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
  286. package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
  287. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
  288. package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
  289. package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
  290. package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
  291. package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
  292. package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
  293. package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
  294. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
  295. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
  296. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
  297. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
  298. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
  299. package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
  300. package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
  301. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
  302. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  303. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  304. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
  305. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
  306. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
  307. package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
  308. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
  309. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
  310. package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
  311. package/docs/paper/output/submission-bundle/package.json +212 -0
  312. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
  313. package/docs/paper/paper-submission-bundle.schema.json +70 -0
  314. package/docs/paper/publication-pack.json +81 -0
  315. package/docs/paper/publication-pack.schema.json +60 -0
  316. package/docs/paper/references.bib +222 -0
  317. package/package.json +103 -26
  318. package/scripts/audit-release-completion.mjs +362 -0
  319. package/scripts/create-arxiv-source.mjs +362 -0
  320. package/scripts/create-paper-submission-bundle.mjs +210 -0
  321. package/scripts/finalize-release.mjs +526 -0
  322. package/scripts/prepare-release-cut.mjs +269 -0
  323. package/scripts/publish-release-bundle.mjs +209 -0
  324. package/scripts/publish-release-github-api.mjs +429 -0
  325. package/scripts/run-vitest.mjs +34 -0
  326. package/scripts/smoke-cli.js +72 -0
  327. package/scripts/sync-paper-artifacts.mjs +109 -0
  328. package/scripts/verify-arxiv-compile.mjs +440 -0
  329. package/scripts/verify-arxiv-source.mjs +194 -0
  330. package/scripts/verify-browser-launch-plan.mjs +237 -0
  331. package/scripts/verify-browser-launch-results.mjs +285 -0
  332. package/scripts/verify-paper-artifacts.mjs +338 -0
  333. package/scripts/verify-paper-claims.mjs +226 -0
  334. package/scripts/verify-paper-submission-bundle.mjs +207 -0
  335. package/scripts/verify-publication-pack.mjs +196 -0
  336. package/scripts/verify-python-package.py +201 -0
  337. package/scripts/verify-release-readiness.mjs +741 -0
  338. package/docs/assets/benchmarks/local-benchmark.svg +0 -45
  339. package/docs/assets/benchmarks/operations-benchmark.svg +0 -45
  340. package/docs/assets/benchmarks/published-memory-standards.svg +0 -50
  341. package/docs/audrey-for-dummies.md +0 -670
  342. package/docs/benchmarking.md +0 -151
  343. package/docs/future-of-llm-memory.md +0 -452
  344. package/docs/mcp-hosts.md +0 -206
  345. package/docs/ollama-local-agents.md +0 -128
  346. package/docs/production-readiness.md +0 -128
@@ -0,0 +1,259 @@
1
+ import { mkdirSync, writeFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+
4
+ const PALETTE = {
5
+ audrey: '#0f766e',
6
+ vector: '#0369a1',
7
+ keyword: '#6d28d9',
8
+ recent: '#b45309',
9
+ external: '#1d4ed8',
10
+ accent: '#111827',
11
+ muted: '#6b7280',
12
+ surface: '#f8fafc',
13
+ border: '#cbd5e1',
14
+ };
15
+
16
+ function escapeHtml(text) {
17
+ return String(text)
18
+ .replaceAll('&', '&')
19
+ .replaceAll('<', '&lt;')
20
+ .replaceAll('>', '&gt;')
21
+ .replaceAll('"', '&quot;');
22
+ }
23
+
24
+ function chartBarColor(label) {
25
+ if (label === 'Audrey') return PALETTE.audrey;
26
+ if (label.includes('Vector')) return PALETTE.vector;
27
+ if (label.includes('Keyword')) return PALETTE.keyword;
28
+ if (label.includes('Recent')) return PALETTE.recent;
29
+ return PALETTE.external;
30
+ }
31
+
32
+ function renderBarChart({ title, rows, valueSuffix = '%', maxValue = 100 }) {
33
+ const width = 960;
34
+ const height = 420;
35
+ const margin = { top: 56, right: 32, bottom: 88, left: 64 };
36
+ const plotWidth = width - margin.left - margin.right;
37
+ const plotHeight = height - margin.top - margin.bottom;
38
+ const barWidth = Math.max(32, Math.floor(plotWidth / Math.max(rows.length, 1)) - 18);
39
+ const gap = rows.length > 1 ? (plotWidth - barWidth * rows.length) / (rows.length - 1) : 0;
40
+
41
+ const bars = rows.map((row, index) => {
42
+ const value = Math.max(0, Math.min(maxValue, row.value));
43
+ const barHeight = (value / maxValue) * plotHeight;
44
+ const x = margin.left + index * (barWidth + gap);
45
+ const y = margin.top + plotHeight - barHeight;
46
+ return `
47
+ <rect x="${x}" y="${y}" width="${barWidth}" height="${barHeight}" rx="8" fill="${chartBarColor(row.label)}" />
48
+ <text x="${x + barWidth / 2}" y="${y - 10}" text-anchor="middle" font-size="15" fill="${PALETTE.accent}">${value.toFixed(1)}${valueSuffix}</text>
49
+ <text x="${x + barWidth / 2}" y="${height - 42}" text-anchor="middle" font-size="14" fill="${PALETTE.muted}">${escapeHtml(row.label)}</text>
50
+ `;
51
+ }).join('\n');
52
+
53
+ const grid = [0, 25, 50, 75, 100].map(tick => {
54
+ const y = margin.top + plotHeight - (tick / maxValue) * plotHeight;
55
+ return `
56
+ <line x1="${margin.left}" y1="${y}" x2="${width - margin.right}" y2="${y}" stroke="${PALETTE.border}" stroke-dasharray="4 4" />
57
+ <text x="${margin.left - 10}" y="${y + 5}" text-anchor="end" font-size="13" fill="${PALETTE.muted}">${tick}${valueSuffix}</text>
58
+ `;
59
+ }).join('\n');
60
+
61
+ return `<?xml version="1.0" encoding="UTF-8"?>
62
+ <svg xmlns="http://www.w3.org/2000/svg" width="${width}" height="${height}" viewBox="0 0 ${width} ${height}" role="img" aria-label="${escapeHtml(title)}">
63
+ <rect width="100%" height="100%" fill="white" />
64
+ <text x="${margin.left}" y="34" font-size="24" font-weight="700" fill="${PALETTE.accent}">${escapeHtml(title)}</text>
65
+ ${grid}
66
+ ${bars}
67
+ </svg>`;
68
+ }
69
+
70
+ function renderTrendList(trends) {
71
+ return trends.map(trend => `
72
+ <li>
73
+ <strong>${escapeHtml(trend.title)}</strong><br />
74
+ ${escapeHtml(trend.summary)}<br />
75
+ <a href="${trend.source}">${escapeHtml(trend.source)}</a>
76
+ </li>
77
+ `).join('\n');
78
+ }
79
+
80
+ function renderCaseRows(localCases) {
81
+ return localCases.map(caseResult => `
82
+ <tr>
83
+ <td>${escapeHtml(caseResult.title)}</td>
84
+ <td>${escapeHtml(caseResult.suite)}</td>
85
+ <td>${escapeHtml(caseResult.family)}</td>
86
+ ${caseResult.results.map(result => {
87
+ const bg = result.passed ? '#ecfdf5' : result.score >= 0.5 ? '#fff7ed' : '#fef2f2';
88
+ const fg = result.passed ? '#065f46' : result.score >= 0.5 ? '#9a3412' : '#991b1b';
89
+ return `<td style="background:${bg};color:${fg}">${result.score.toFixed(2)}<br /><span style="font-size:12px">${escapeHtml(result.summary)}</span></td>`;
90
+ }).join('')}
91
+ </tr>
92
+ `).join('\n');
93
+ }
94
+
95
+ function renderSuiteSections(suiteCharts) {
96
+ if (suiteCharts.length === 0) return '';
97
+ return suiteCharts.map(chart => `
98
+ <section class="callout">
99
+ <h2>${escapeHtml(chart.title)}</h2>
100
+ <p>${escapeHtml(chart.description)}</p>
101
+ <img src="./${escapeHtml(chart.fileName)}" alt="${escapeHtml(chart.title)} chart" />
102
+ </section>
103
+ `).join('\n');
104
+ }
105
+
106
+ export function writeBenchmarkArtifacts({
107
+ outputDir,
108
+ summary,
109
+ localOverall,
110
+ localSuites,
111
+ externalOverall,
112
+ trends,
113
+ readmeAssetsDir,
114
+ }) {
115
+ mkdirSync(outputDir, { recursive: true });
116
+
117
+ const localChartTitle = summary.local?.overall_scope === 'comparable_suites'
118
+ ? 'Audrey vs Comparable Local Memory Baselines'
119
+ : 'Selected Audrey Regression Suite';
120
+ const localChart = renderBarChart({
121
+ title: localChartTitle,
122
+ rows: localOverall.map(row => ({ label: row.system, value: row.scorePercent })),
123
+ });
124
+ const externalChart = renderBarChart({
125
+ title: 'Published LLM Memory Standards (LoCoMo)',
126
+ rows: externalOverall.map(row => ({ label: row.system, value: row.score })),
127
+ });
128
+
129
+ writeFileSync(join(outputDir, 'local-overall.svg'), localChart, 'utf8');
130
+ writeFileSync(join(outputDir, 'published-locomo.svg'), externalChart, 'utf8');
131
+ writeFileSync(join(outputDir, 'summary.json'), JSON.stringify(summary, null, 2), 'utf8');
132
+
133
+ const suiteCharts = localSuites.map(suite => {
134
+ const fileName = `${suite.id}-overall.svg`;
135
+ const chart = renderBarChart({
136
+ title: `${suite.title} Benchmark`,
137
+ rows: suite.overall.map(row => ({ label: row.system, value: row.scorePercent })),
138
+ });
139
+ writeFileSync(join(outputDir, fileName), chart, 'utf8');
140
+ return {
141
+ id: suite.id,
142
+ title: `${suite.title} Benchmark`,
143
+ description: suite.description,
144
+ fileName,
145
+ path: join(outputDir, fileName),
146
+ };
147
+ });
148
+
149
+ let readmeAssets = null;
150
+ if (readmeAssetsDir) {
151
+ mkdirSync(readmeAssetsDir, { recursive: true });
152
+ const localReadmeChart = join(readmeAssetsDir, 'local-benchmark.svg');
153
+ const externalReadmeChart = join(readmeAssetsDir, 'published-memory-standards.svg');
154
+ writeFileSync(localReadmeChart, localChart, 'utf8');
155
+ writeFileSync(externalReadmeChart, externalChart, 'utf8');
156
+
157
+ const operationsSuite = suiteCharts.find(chart => chart.id === 'operations');
158
+ let operationsReadmeChart = null;
159
+ if (operationsSuite) {
160
+ operationsReadmeChart = join(readmeAssetsDir, 'operations-benchmark.svg');
161
+ writeFileSync(
162
+ operationsReadmeChart,
163
+ renderBarChart({
164
+ title: 'Audrey Memory Operations Benchmark',
165
+ rows: (localSuites.find(suite => suite.id === 'operations')?.overall || [])
166
+ .map(row => ({ label: row.system, value: row.scorePercent })),
167
+ }),
168
+ 'utf8',
169
+ );
170
+ }
171
+
172
+ readmeAssets = {
173
+ localChart: localReadmeChart,
174
+ operationsChart: operationsReadmeChart,
175
+ externalChart: externalReadmeChart,
176
+ };
177
+ }
178
+
179
+ const html = `<!doctype html>
180
+ <html lang="en">
181
+ <head>
182
+ <meta charset="utf-8" />
183
+ <title>Audrey Memory Benchmark</title>
184
+ <style>
185
+ body { font-family: "Segoe UI", Arial, sans-serif; margin: 32px; color: ${PALETTE.accent}; background: ${PALETTE.surface}; }
186
+ main { max-width: 1120px; margin: 0 auto; }
187
+ h1, h2 { margin-bottom: 12px; }
188
+ p, li { line-height: 1.5; }
189
+ .callout { background: white; border: 1px solid ${PALETTE.border}; border-radius: 16px; padding: 20px; margin-bottom: 24px; }
190
+ .grid { display: grid; gap: 24px; grid-template-columns: 1fr; }
191
+ img { width: 100%; border: 1px solid ${PALETTE.border}; border-radius: 16px; background: white; }
192
+ table { width: 100%; border-collapse: collapse; background: white; border-radius: 16px; overflow: hidden; }
193
+ th, td { border: 1px solid ${PALETTE.border}; padding: 12px; vertical-align: top; text-align: left; }
194
+ th { background: #e2e8f0; }
195
+ code { background: #e2e8f0; padding: 2px 6px; border-radius: 6px; }
196
+ </style>
197
+ </head>
198
+ <body>
199
+ <main>
200
+ <h1>Audrey Memory Benchmark</h1>
201
+ <div class="callout">
202
+ <p><strong>Method:</strong> Audrey is scored on a local regression suite inspired by LongMemEval-style retrieval, operation-level lifecycle behavior, and agent guard-loop benchmarks. The combined local chart uses comparable retrieval/lifecycle suites when available; the guard loop is reported as its own controller regression suite. Published external LoCoMo numbers stay separate so the comparison remains honest.</p>
203
+ <p><strong>Scope:</strong> ${escapeHtml(summary.local?.overall_scope ?? 'unknown')} across ${escapeHtml((summary.local?.overall_suite_ids ?? []).join(', '))}; ${summary.local?.cases?.length ?? 0} total cases.</p>
204
+ <p><strong>Run:</strong> <code>${escapeHtml(summary.command)}</code></p>
205
+ <p><strong>Generated:</strong> ${escapeHtml(summary.generatedAt)}</p>
206
+ </div>
207
+
208
+ <div class="grid">
209
+ <section class="callout">
210
+ <h2>Combined Local Benchmark</h2>
211
+ <img src="./local-overall.svg" alt="Combined local benchmark bar chart" />
212
+ </section>
213
+
214
+ ${renderSuiteSections(suiteCharts)}
215
+
216
+ <section class="callout">
217
+ <h2>Published Leaderboard</h2>
218
+ <img src="./published-locomo.svg" alt="Published LoCoMo leaderboard bar chart" />
219
+ </section>
220
+ </div>
221
+
222
+ <section class="callout">
223
+ <h2>Case Matrix</h2>
224
+ <table>
225
+ <thead>
226
+ <tr>
227
+ <th>Case</th>
228
+ <th>Suite</th>
229
+ <th>Family</th>
230
+ ${summary.local.overall.map(row => `<th>${escapeHtml(row.system)}</th>`).join('')}
231
+ </tr>
232
+ </thead>
233
+ <tbody>
234
+ ${renderCaseRows(summary.local.cases)}
235
+ </tbody>
236
+ </table>
237
+ </section>
238
+
239
+ <section class="callout">
240
+ <h2>March 23, 2026 Memory Trends</h2>
241
+ <ul>
242
+ ${renderTrendList(trends)}
243
+ </ul>
244
+ </section>
245
+ </main>
246
+ </body>
247
+ </html>`;
248
+
249
+ writeFileSync(join(outputDir, 'report.html'), html, 'utf8');
250
+
251
+ return {
252
+ json: join(outputDir, 'summary.json'),
253
+ html: join(outputDir, 'report.html'),
254
+ localChart: join(outputDir, 'local-overall.svg'),
255
+ suiteCharts,
256
+ externalChart: join(outputDir, 'published-locomo.svg'),
257
+ readmeAssets,
258
+ };
259
+ }
@@ -0,0 +1,281 @@
1
+ import { spawnSync } from 'node:child_process';
2
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
3
+ import { basename, dirname, resolve } from 'node:path';
4
+ import { fileURLToPath } from 'node:url';
5
+ import { writeGuardBenchConformanceCard } from './create-conformance-card.mjs';
6
+ import { computeGuardBenchArtifactHashes, validateGuardBenchArtifacts } from './validate-guardbench-artifacts.mjs';
7
+ import { publicArtifactValue } from './public-paths.mjs';
8
+
9
+ const ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '..');
10
+ const KNOWN_ADAPTERS = new Map([
11
+ ['mem0', {
12
+ name: 'mem0-platform',
13
+ path: 'benchmarks/adapters/mem0-platform.mjs',
14
+ requiredEnv: ['MEM0_API_KEY'],
15
+ }],
16
+ ['mem0-platform', {
17
+ name: 'mem0-platform',
18
+ path: 'benchmarks/adapters/mem0-platform.mjs',
19
+ requiredEnv: ['MEM0_API_KEY'],
20
+ }],
21
+ ['zep', {
22
+ name: 'zep-cloud',
23
+ path: 'benchmarks/adapters/zep-cloud.mjs',
24
+ requiredEnv: ['ZEP_API_KEY'],
25
+ }],
26
+ ['zep-cloud', {
27
+ name: 'zep-cloud',
28
+ path: 'benchmarks/adapters/zep-cloud.mjs',
29
+ requiredEnv: ['ZEP_API_KEY'],
30
+ }],
31
+ ]);
32
+
33
+ export function parseExternalArgs(argv = process.argv.slice(2)) {
34
+ const args = {
35
+ adapter: 'mem0-platform',
36
+ outDir: null,
37
+ check: false,
38
+ dryRun: false,
39
+ json: false,
40
+ minPassRate: null,
41
+ allowMissingEnv: false,
42
+ };
43
+
44
+ for (let i = 0; i < argv.length; i++) {
45
+ const token = argv[i];
46
+ if (token === '--adapter' && argv[i + 1]) args.adapter = argv[++i];
47
+ else if (token === '--out-dir' && argv[i + 1]) args.outDir = argv[++i];
48
+ else if (token === '--check') args.check = true;
49
+ else if (token === '--dry-run') args.dryRun = true;
50
+ else if (token === '--json') args.json = true;
51
+ else if (token === '--min-pass-rate' && argv[i + 1]) args.minPassRate = argv[++i];
52
+ else if (token === '--allow-missing-env') args.allowMissingEnv = true;
53
+ else if (token === '--help' || token === '-h') args.help = true;
54
+ else throw new Error(`Unknown argument: ${token}`);
55
+ }
56
+
57
+ return args;
58
+ }
59
+
60
+ function readJson(path) {
61
+ return JSON.parse(readFileSync(path, 'utf-8'));
62
+ }
63
+
64
+ function adapterSpec(adapter) {
65
+ const known = KNOWN_ADAPTERS.get(adapter);
66
+ if (known) return known;
67
+
68
+ const adapterPath = resolve(ROOT, adapter);
69
+ return {
70
+ name: basename(adapter).replace(/\.[cm]?js$/i, ''),
71
+ path: adapterPath,
72
+ requiredEnv: [],
73
+ };
74
+ }
75
+
76
+ export function buildExternalGuardBenchRun(args = {}, env = process.env) {
77
+ const spec = adapterSpec(args.adapter ?? 'mem0-platform');
78
+ const adapterPath = resolve(ROOT, spec.path);
79
+ const outDir = resolve(ROOT, args.outDir ?? `benchmarks/output/external/${spec.name}`);
80
+ const missingEnv = spec.requiredEnv.filter(name => !env[name]);
81
+ const command = [
82
+ process.execPath,
83
+ resolve(ROOT, 'benchmarks/guardbench.js'),
84
+ '--adapter',
85
+ adapterPath,
86
+ '--out-dir',
87
+ outDir,
88
+ ];
89
+
90
+ if (args.check) command.push('--check');
91
+ if (args.json) command.push('--json');
92
+ if (args.minPassRate != null) command.push('--min-pass-rate', String(args.minPassRate));
93
+ const validationCommand = [
94
+ process.execPath,
95
+ resolve(ROOT, 'benchmarks/validate-guardbench-artifacts.mjs'),
96
+ '--dir',
97
+ outDir,
98
+ ];
99
+
100
+ return {
101
+ adapter: spec.name,
102
+ adapterPath,
103
+ outDir,
104
+ requiredEnv: spec.requiredEnv,
105
+ missingEnv,
106
+ command,
107
+ validationCommand,
108
+ };
109
+ }
110
+
111
+ export function evaluateAdapterConformance(summary, adapterName) {
112
+ const failures = [];
113
+ const externalSubjects = (summary.manifest?.subjects ?? []).filter(subject => subject.external);
114
+ const resolvedAdapterName = summary.systemSummaries?.some(row => row.system === adapterName)
115
+ ? adapterName
116
+ : externalSubjects.length === 1
117
+ ? externalSubjects[0].name
118
+ : adapterName;
119
+ const systemSummary = summary.systemSummaries?.find(row => row.system === resolvedAdapterName);
120
+ if (!systemSummary) {
121
+ failures.push(`Missing system summary for adapter ${resolvedAdapterName}`);
122
+ }
123
+
124
+ const expectedScenarios = summary.scenarios ?? summary.manifest?.scenarios?.length ?? 0;
125
+ const adapterRows = (summary.cases ?? [])
126
+ .flatMap(entry => entry.results ?? [])
127
+ .filter(row => row.system === resolvedAdapterName);
128
+
129
+ if (adapterRows.length !== expectedScenarios) {
130
+ failures.push(`Adapter ${resolvedAdapterName} returned ${adapterRows.length}/${expectedScenarios} scenario rows`);
131
+ }
132
+ if (systemSummary && systemSummary.scenarios !== expectedScenarios) {
133
+ failures.push(`Adapter ${resolvedAdapterName} system summary has ${systemSummary.scenarios}/${expectedScenarios} scenarios`);
134
+ }
135
+ if (systemSummary && systemSummary.redactionLeaks !== 0) {
136
+ failures.push(`Adapter ${resolvedAdapterName} leaked ${systemSummary.redactionLeaks} seeded secret(s) in decision output`);
137
+ }
138
+ if (adapterRows.some(row => row.external !== true)) {
139
+ failures.push(`Adapter ${resolvedAdapterName} rows are not marked external`);
140
+ }
141
+
142
+ return {
143
+ ok: failures.length === 0,
144
+ adapter: resolvedAdapterName,
145
+ requestedAdapter: adapterName,
146
+ scenarios: systemSummary?.scenarios ?? adapterRows.length,
147
+ expectedScenarios,
148
+ fullContractPassRate: systemSummary?.passRate ?? null,
149
+ decisionAccuracy: systemSummary?.decisionAccuracy ?? null,
150
+ redactionLeaks: systemSummary?.redactionLeaks ?? null,
151
+ failures,
152
+ };
153
+ }
154
+
155
+ function usage() {
156
+ return `Usage: node benchmarks/run-external-guardbench.mjs [options]
157
+
158
+ Options:
159
+ --adapter <name|path> Adapter alias or ESM adapter path. Default: mem0-platform.
160
+ --out-dir <path> Output directory. Default: benchmarks/output/external/<adapter>.
161
+ --check Fail if Audrey Guard pass rate is below the threshold.
162
+ --min-pass-rate <percent> GuardBench pass-rate threshold for --check.
163
+ --json Forward JSON output from GuardBench.
164
+ --dry-run Print the resolved command and metadata without running.
165
+ --allow-missing-env Permit running even when known runtime env vars are absent.
166
+ `;
167
+ }
168
+
169
+ export function writeExternalRunMetadata(path, metadata) {
170
+ mkdirSync(path, { recursive: true });
171
+ const file = resolve(path, 'external-run-metadata.json');
172
+ writeFileSync(file, `${JSON.stringify(publicArtifactValue(metadata), null, 2)}\n`, 'utf-8');
173
+ return file;
174
+ }
175
+
176
+ async function main() {
177
+ const args = parseExternalArgs();
178
+ if (args.help) {
179
+ console.log(usage());
180
+ return;
181
+ }
182
+
183
+ const run = buildExternalGuardBenchRun(args);
184
+ const startedAt = new Date().toISOString();
185
+ const metadata = {
186
+ suite: 'GuardBench external adapter run',
187
+ startedAt,
188
+ adapter: run.adapter,
189
+ adapterPath: run.adapterPath,
190
+ outDir: run.outDir,
191
+ requiredEnv: run.requiredEnv,
192
+ missingEnv: run.missingEnv,
193
+ command: run.command,
194
+ validationCommand: run.validationCommand,
195
+ dryRun: args.dryRun,
196
+ };
197
+
198
+ if (!existsSync(run.adapterPath)) {
199
+ throw new Error(`Adapter not found: ${run.adapterPath}`);
200
+ }
201
+
202
+ if (run.missingEnv.length && !args.allowMissingEnv && !args.dryRun) {
203
+ metadata.status = 'blocked';
204
+ metadata.blockReason = `Missing runtime environment: ${run.missingEnv.join(', ')}`;
205
+ const metadataPath = writeExternalRunMetadata(run.outDir, metadata);
206
+ throw new Error(`${metadata.blockReason}. Metadata written to ${metadataPath}`);
207
+ }
208
+
209
+ if (args.dryRun) {
210
+ metadata.status = run.missingEnv.length ? 'dry-run-missing-env' : 'dry-run-ready';
211
+ const metadataPath = writeExternalRunMetadata(run.outDir, metadata);
212
+ if (args.json) {
213
+ console.log(JSON.stringify({ ...metadata, metadataPath }, null, 2));
214
+ } else {
215
+ console.log(`External GuardBench dry run: ${run.adapter}`);
216
+ console.log(`Command: ${run.command.map(part => JSON.stringify(part)).join(' ')}`);
217
+ console.log(`Metadata: ${metadataPath}`);
218
+ if (run.missingEnv.length) console.log(`Missing runtime env: ${run.missingEnv.join(', ')}`);
219
+ }
220
+ return;
221
+ }
222
+
223
+ writeExternalRunMetadata(run.outDir, { ...metadata, status: 'running' });
224
+ const child = spawnSync(run.command[0], run.command.slice(1), {
225
+ cwd: ROOT,
226
+ env: process.env,
227
+ stdio: 'inherit',
228
+ });
229
+ const validation = validateGuardBenchArtifacts({ dir: run.outDir });
230
+ let adapterConformance = {
231
+ ok: false,
232
+ adapter: run.adapter,
233
+ failures: ['GuardBench summary was not available for adapter conformance evaluation'],
234
+ };
235
+ if (child.status === 0) {
236
+ try {
237
+ const summary = readJson(resolve(run.outDir, 'guardbench-summary.json'));
238
+ adapterConformance = evaluateAdapterConformance(summary, run.adapter);
239
+ } catch (error) {
240
+ adapterConformance = {
241
+ ok: false,
242
+ adapter: run.adapter,
243
+ failures: [error.message],
244
+ };
245
+ }
246
+ }
247
+ if (validation.ok) {
248
+ console.log(`External GuardBench artifact validation passed: ${run.outDir}`);
249
+ } else {
250
+ console.error('External GuardBench artifact validation failed:');
251
+ for (const failure of validation.failures) console.error(`- ${failure}`);
252
+ }
253
+ if (adapterConformance.ok) {
254
+ console.log(`External GuardBench adapter conformance passed: ${adapterConformance.adapter}`);
255
+ } else {
256
+ console.error('External GuardBench adapter conformance failed:');
257
+ for (const failure of adapterConformance.failures) console.error(`- ${failure}`);
258
+ }
259
+ const completed = {
260
+ ...metadata,
261
+ completedAt: new Date().toISOString(),
262
+ status: child.status === 0 && validation.ok && adapterConformance.ok ? 'passed' : 'failed',
263
+ exitCode: child.status,
264
+ signal: child.signal,
265
+ artifactHashes: child.status === 0 ? computeGuardBenchArtifactHashes(run.outDir) : undefined,
266
+ artifactValidation: validation,
267
+ adapterConformance,
268
+ };
269
+ const metadataPath = writeExternalRunMetadata(run.outDir, completed);
270
+ const card = child.status === 0 ? writeGuardBenchConformanceCard({ dir: run.outDir }) : null;
271
+ console.log(`External GuardBench metadata: ${metadataPath}`);
272
+ if (card) console.log(`External GuardBench conformance card: ${card.path}`);
273
+ process.exitCode = child.status === 0 && validation.ok && adapterConformance.ok ? 0 : (child.status ?? 1);
274
+ }
275
+
276
+ if (process.argv[1] && process.argv[1].endsWith('run-external-guardbench.mjs')) {
277
+ main().catch(err => {
278
+ console.error(err.message);
279
+ process.exit(1);
280
+ });
281
+ }