audrey 0.23.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/CHANGELOG.md +101 -15
  2. package/LICENSE +21 -21
  3. package/README.md +232 -6
  4. package/SECURITY.md +2 -1
  5. package/benchmarks/adapter-kit.mjs +20 -0
  6. package/benchmarks/adapter-self-test.mjs +166 -0
  7. package/benchmarks/adapters/example-allow.mjs +28 -0
  8. package/benchmarks/adapters/mem0-platform.mjs +267 -0
  9. package/benchmarks/adapters/registry.json +51 -0
  10. package/benchmarks/adapters/zep-cloud.mjs +280 -0
  11. package/benchmarks/baselines.js +169 -0
  12. package/benchmarks/build-leaderboard.mjs +170 -0
  13. package/benchmarks/cases.js +537 -0
  14. package/benchmarks/create-conformance-card.mjs +139 -0
  15. package/benchmarks/create-submission-bundle.mjs +176 -0
  16. package/benchmarks/dry-run-external-adapters.mjs +165 -0
  17. package/benchmarks/guardbench.js +1125 -0
  18. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  19. package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  20. package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  21. package/benchmarks/output/guardbench-conformance-card.json +63 -0
  22. package/benchmarks/output/guardbench-manifest.json +414 -0
  23. package/benchmarks/output/guardbench-raw.json +1271 -0
  24. package/benchmarks/output/guardbench-summary.json +2107 -0
  25. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  26. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  27. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
  28. package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
  29. package/benchmarks/output/submission-bundle/guardbench-raw.json +1271 -0
  30. package/benchmarks/output/submission-bundle/guardbench-summary.json +2107 -0
  31. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
  32. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
  33. package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
  34. package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
  35. package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
  36. package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
  37. package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
  38. package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
  39. package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
  40. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +184 -0
  41. package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
  42. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +249 -0
  43. package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  44. package/benchmarks/output/submission-bundle/validation-report.json +31 -0
  45. package/benchmarks/output/summary.json +2354 -0
  46. package/benchmarks/perf-snapshot.js +304 -0
  47. package/benchmarks/perf.bench.js +161 -0
  48. package/benchmarks/public-paths.mjs +78 -0
  49. package/benchmarks/reference-results.js +70 -0
  50. package/benchmarks/report.js +259 -0
  51. package/benchmarks/run-external-guardbench.mjs +281 -0
  52. package/benchmarks/run.js +682 -0
  53. package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  54. package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  55. package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  56. package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  57. package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  58. package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  59. package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  60. package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  61. package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  62. package/benchmarks/schemas/guardbench-raw.schema.json +184 -0
  63. package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  64. package/benchmarks/schemas/guardbench-summary.schema.json +249 -0
  65. package/benchmarks/snapshots/perf-0.22.2.json +123 -0
  66. package/benchmarks/snapshots/perf-0.23.0.json +123 -0
  67. package/benchmarks/validate-adapter-module.mjs +104 -0
  68. package/benchmarks/validate-adapter-registry.mjs +134 -0
  69. package/benchmarks/validate-adapter-self-test.mjs +96 -0
  70. package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
  71. package/benchmarks/verify-external-evidence.mjs +296 -0
  72. package/benchmarks/verify-publication-artifacts.mjs +286 -0
  73. package/benchmarks/verify-submission-bundle.mjs +167 -0
  74. package/dist/mcp-server/config.d.ts +1 -1
  75. package/dist/mcp-server/config.d.ts.map +1 -1
  76. package/dist/mcp-server/config.js +1 -1
  77. package/dist/mcp-server/config.js.map +1 -1
  78. package/dist/mcp-server/index.d.ts +65 -3
  79. package/dist/mcp-server/index.d.ts.map +1 -1
  80. package/dist/mcp-server/index.js +675 -157
  81. package/dist/mcp-server/index.js.map +1 -1
  82. package/dist/src/action-key.d.ts +9 -0
  83. package/dist/src/action-key.d.ts.map +1 -0
  84. package/dist/src/action-key.js +49 -0
  85. package/dist/src/action-key.js.map +1 -0
  86. package/dist/src/adaptive.js +5 -5
  87. package/dist/src/affect.js +8 -8
  88. package/dist/src/audrey.d.ts +13 -0
  89. package/dist/src/audrey.d.ts.map +1 -1
  90. package/dist/src/audrey.js +68 -3
  91. package/dist/src/audrey.js.map +1 -1
  92. package/dist/src/capsule.js +4 -4
  93. package/dist/src/causal.js +3 -3
  94. package/dist/src/consolidate.js +48 -48
  95. package/dist/src/controller.d.ts +78 -6
  96. package/dist/src/controller.d.ts.map +1 -1
  97. package/dist/src/controller.js +273 -53
  98. package/dist/src/controller.js.map +1 -1
  99. package/dist/src/db.js +172 -172
  100. package/dist/src/decay.js +8 -8
  101. package/dist/src/embedding.d.ts +2 -1
  102. package/dist/src/embedding.d.ts.map +1 -1
  103. package/dist/src/embedding.js +39 -29
  104. package/dist/src/embedding.js.map +1 -1
  105. package/dist/src/encode.js +6 -6
  106. package/dist/src/feedback.d.ts +6 -0
  107. package/dist/src/feedback.d.ts.map +1 -1
  108. package/dist/src/feedback.js +6 -0
  109. package/dist/src/feedback.js.map +1 -1
  110. package/dist/src/forget.js +12 -12
  111. package/dist/src/hybrid-recall.js +9 -9
  112. package/dist/src/impact.js +6 -6
  113. package/dist/src/import.d.ts +3 -3
  114. package/dist/src/import.js +41 -41
  115. package/dist/src/index.d.ts +5 -4
  116. package/dist/src/index.d.ts.map +1 -1
  117. package/dist/src/index.js +3 -3
  118. package/dist/src/index.js.map +1 -1
  119. package/dist/src/interference.js +14 -14
  120. package/dist/src/introspect.js +18 -18
  121. package/dist/src/preflight.d.ts.map +1 -1
  122. package/dist/src/preflight.js +41 -0
  123. package/dist/src/preflight.js.map +1 -1
  124. package/dist/src/promote.js +7 -7
  125. package/dist/src/prompts.js +118 -118
  126. package/dist/src/recall.js +30 -30
  127. package/dist/src/reflexes.d.ts +1 -0
  128. package/dist/src/reflexes.d.ts.map +1 -1
  129. package/dist/src/reflexes.js +3 -0
  130. package/dist/src/reflexes.js.map +1 -1
  131. package/dist/src/rollback.js +4 -4
  132. package/dist/src/routes.d.ts.map +1 -1
  133. package/dist/src/routes.js +71 -2
  134. package/dist/src/routes.js.map +1 -1
  135. package/dist/src/validate.js +25 -25
  136. package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  137. package/docs/MEMORY_BENCHMARKING.md +59 -0
  138. package/docs/PRODUCTION_BACKLOG.md +304 -0
  139. package/docs/paper/00-master.md +48 -0
  140. package/docs/paper/01-introduction.md +27 -0
  141. package/docs/paper/02-related-work.md +47 -0
  142. package/docs/paper/03-problem-definition.md +108 -0
  143. package/docs/paper/04-design.md +164 -0
  144. package/docs/paper/05-guardbench-spec.md +412 -0
  145. package/docs/paper/06-implementation.md +113 -0
  146. package/docs/paper/07-evaluation.md +168 -0
  147. package/docs/paper/08-discussion-limitations.md +61 -0
  148. package/docs/paper/09-conclusion.md +11 -0
  149. package/docs/paper/SUBMISSION_README.md +162 -0
  150. package/docs/paper/appendix-a-demo-transcript.md +114 -0
  151. package/docs/paper/arxiv-compile-report.schema.json +116 -0
  152. package/docs/paper/arxiv-source.schema.json +61 -0
  153. package/docs/paper/audrey-paper-v1.md +1106 -0
  154. package/docs/paper/browser-launch-plan.json +209 -0
  155. package/docs/paper/browser-launch-plan.schema.json +100 -0
  156. package/docs/paper/browser-launch-results.json +86 -0
  157. package/docs/paper/browser-launch-results.schema.json +66 -0
  158. package/docs/paper/claim-register.json +138 -0
  159. package/docs/paper/claim-register.schema.json +81 -0
  160. package/docs/paper/evidence-ledger.md +103 -0
  161. package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  162. package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  163. package/docs/paper/output/arxiv/main.tex +949 -0
  164. package/docs/paper/output/arxiv/references.bib +222 -0
  165. package/docs/paper/output/arxiv-compile-report.json +24 -0
  166. package/docs/paper/output/submission-bundle/LICENSE +21 -0
  167. package/docs/paper/output/submission-bundle/README.md +555 -0
  168. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  169. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  170. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  171. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
  172. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
  173. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1271 -0
  174. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +2107 -0
  175. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  176. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  177. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  178. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
  179. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
  180. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  181. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  182. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  183. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  184. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  185. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  186. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  187. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  188. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  189. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +184 -0
  190. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  191. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +249 -0
  192. package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  193. package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
  194. package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
  195. package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
  196. package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
  197. package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
  198. package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
  199. package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
  200. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
  201. package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
  202. package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
  203. package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
  204. package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
  205. package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
  206. package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
  207. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
  208. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
  209. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
  210. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
  211. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
  212. package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
  213. package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
  214. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
  215. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  216. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  217. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
  218. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
  219. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
  220. package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
  221. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
  222. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
  223. package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
  224. package/docs/paper/output/submission-bundle/package.json +212 -0
  225. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
  226. package/docs/paper/paper-submission-bundle.schema.json +70 -0
  227. package/docs/paper/publication-pack.json +81 -0
  228. package/docs/paper/publication-pack.schema.json +60 -0
  229. package/docs/paper/references.bib +222 -0
  230. package/package.json +87 -4
  231. package/scripts/audit-release-completion.mjs +362 -0
  232. package/scripts/create-arxiv-source.mjs +362 -0
  233. package/scripts/create-paper-submission-bundle.mjs +210 -0
  234. package/scripts/finalize-release.mjs +526 -0
  235. package/scripts/prepare-release-cut.mjs +269 -0
  236. package/scripts/publish-release-bundle.mjs +209 -0
  237. package/scripts/publish-release-github-api.mjs +429 -0
  238. package/scripts/run-vitest.mjs +34 -0
  239. package/scripts/smoke-cli.js +92 -0
  240. package/scripts/sync-paper-artifacts.mjs +109 -0
  241. package/scripts/verify-arxiv-compile.mjs +440 -0
  242. package/scripts/verify-arxiv-source.mjs +194 -0
  243. package/scripts/verify-browser-launch-plan.mjs +237 -0
  244. package/scripts/verify-browser-launch-results.mjs +285 -0
  245. package/scripts/verify-paper-artifacts.mjs +338 -0
  246. package/scripts/verify-paper-claims.mjs +226 -0
  247. package/scripts/verify-paper-submission-bundle.mjs +207 -0
  248. package/scripts/verify-publication-pack.mjs +196 -0
  249. package/scripts/verify-python-package.py +201 -0
  250. package/scripts/verify-release-readiness.mjs +785 -0
@@ -0,0 +1,123 @@
1
+ {
2
+ "generatedAt": "2026-05-01T02:15:29.400Z",
3
+ "durationMs": 4600,
4
+ "audreyVersion": null,
5
+ "gitSha": "e2e821b",
6
+ "methodology": {
7
+ "embedding": "mock provider, 64 dimensions (in-process, no network)",
8
+ "llm": "mock provider (in-process)",
9
+ "retrieval": "hybrid (vector + lexical) with limit=5",
10
+ "sizes": [
11
+ 100,
12
+ 1000,
13
+ 5000
14
+ ],
15
+ "recallRunsPerSize": 50,
16
+ "notes": "Latency is wall-clock for a single call from a JS caller. Cloud and local 384-dim providers will report higher recall latency dominated by embedding cost and network. Run on your own hardware before quoting."
17
+ },
18
+ "machine": {
19
+ "node": "25.5.0",
20
+ "v8": "14.1.146.11-node.18",
21
+ "platform": "win32",
22
+ "arch": "x64",
23
+ "osRelease": "10.0.26200",
24
+ "cpuCount": 24,
25
+ "cpuModel": "AMD Ryzen 9 7900X3D 12-Core Processor ",
26
+ "memoryGb": 62.9
27
+ },
28
+ "sizes": [
29
+ {
30
+ "corpusSize": 100,
31
+ "encodeMs": {
32
+ "samples": 100,
33
+ "p50": 0.331,
34
+ "p95": 0.589,
35
+ "p99": 7.65,
36
+ "min": 0.214,
37
+ "max": 10.978,
38
+ "mean": 0.577
39
+ },
40
+ "hybridRecallMs": {
41
+ "samples": 50,
42
+ "p50": 0.539,
43
+ "p95": 1.82,
44
+ "p99": 2.712,
45
+ "min": 0.448,
46
+ "max": 2.712,
47
+ "mean": 0.659
48
+ },
49
+ "postEncodeQueueMs": {
50
+ "samples": 100,
51
+ "p50": 0.344,
52
+ "p95": 0.73,
53
+ "p99": 9.715,
54
+ "min": 0.079,
55
+ "max": 13.218,
56
+ "mean": 0.533
57
+ },
58
+ "queueEvents": 100
59
+ },
60
+ {
61
+ "corpusSize": 1000,
62
+ "encodeMs": {
63
+ "samples": 1000,
64
+ "p50": 0.307,
65
+ "p95": 2.147,
66
+ "p99": 9.672,
67
+ "min": 0.183,
68
+ "max": 13.514,
69
+ "mean": 0.639
70
+ },
71
+ "hybridRecallMs": {
72
+ "samples": 50,
73
+ "p50": 1.566,
74
+ "p95": 2.364,
75
+ "p99": 21.177,
76
+ "min": 0.608,
77
+ "max": 21.177,
78
+ "mean": 1.803
79
+ },
80
+ "postEncodeQueueMs": {
81
+ "samples": 1000,
82
+ "p50": 0.321,
83
+ "p95": 1.468,
84
+ "p99": 9.744,
85
+ "min": 0.07,
86
+ "max": 15.61,
87
+ "mean": 0.553
88
+ },
89
+ "queueEvents": 1000
90
+ },
91
+ {
92
+ "corpusSize": 5000,
93
+ "encodeMs": {
94
+ "samples": 5000,
95
+ "p50": 0.308,
96
+ "p95": 1.838,
97
+ "p99": 10.45,
98
+ "min": 0.171,
99
+ "max": 19.247,
100
+ "mean": 0.653
101
+ },
102
+ "hybridRecallMs": {
103
+ "samples": 50,
104
+ "p50": 2.091,
105
+ "p95": 3.417,
106
+ "p99": 16.58,
107
+ "min": 2.005,
108
+ "max": 16.58,
109
+ "mean": 2.492
110
+ },
111
+ "postEncodeQueueMs": {
112
+ "samples": 5000,
113
+ "p50": 0.313,
114
+ "p95": 1.217,
115
+ "p99": 10.532,
116
+ "min": 0.074,
117
+ "max": 19.378,
118
+ "mean": 0.554
119
+ },
120
+ "queueEvents": 5000
121
+ }
122
+ ]
123
+ }
@@ -0,0 +1,123 @@
1
+ {
2
+ "generatedAt": "2026-05-05T17:32:45.578Z",
3
+ "durationMs": 1042,
4
+ "audreyVersion": "0.23.0",
5
+ "gitSha": "20cdde0",
6
+ "methodology": {
7
+ "embedding": "mock provider, 64 dimensions (in-process, no network)",
8
+ "llm": "mock provider (in-process)",
9
+ "retrieval": "hybrid (vector + lexical) with limit=5",
10
+ "sizes": [
11
+ 100,
12
+ 1000,
13
+ 5000
14
+ ],
15
+ "recallRunsPerSize": 50,
16
+ "notes": "Latency is wall-clock for a single call from a JS caller. Cloud and local 384-dim providers will report higher recall latency dominated by embedding cost and network. Run on your own hardware before quoting."
17
+ },
18
+ "machine": {
19
+ "node": "25.9.0",
20
+ "v8": "14.1.146.11-node.25",
21
+ "platform": "darwin",
22
+ "arch": "arm64",
23
+ "osRelease": "25.4.0",
24
+ "cpuCount": 18,
25
+ "cpuModel": "Apple M5 Max",
26
+ "memoryGb": 64
27
+ },
28
+ "sizes": [
29
+ {
30
+ "corpusSize": 100,
31
+ "encodeMs": {
32
+ "samples": 100,
33
+ "p50": 0.136,
34
+ "p95": 0.246,
35
+ "p99": 1.054,
36
+ "min": 0.084,
37
+ "max": 2.211,
38
+ "mean": 0.178
39
+ },
40
+ "hybridRecallMs": {
41
+ "samples": 50,
42
+ "p50": 0.215,
43
+ "p95": 0.693,
44
+ "p99": 1.285,
45
+ "min": 0.166,
46
+ "max": 1.285,
47
+ "mean": 0.261
48
+ },
49
+ "postEncodeQueueMs": {
50
+ "samples": 100,
51
+ "p50": 0.134,
52
+ "p95": 0.35,
53
+ "p99": 1.079,
54
+ "min": 0.036,
55
+ "max": 1.244,
56
+ "mean": 0.156
57
+ },
58
+ "queueEvents": 100
59
+ },
60
+ {
61
+ "corpusSize": 1000,
62
+ "encodeMs": {
63
+ "samples": 1000,
64
+ "p50": 0.114,
65
+ "p95": 0.187,
66
+ "p99": 0.865,
67
+ "min": 0.074,
68
+ "max": 1.378,
69
+ "mean": 0.139
70
+ },
71
+ "hybridRecallMs": {
72
+ "samples": 50,
73
+ "p50": 0.272,
74
+ "p95": 0.48,
75
+ "p99": 2.081,
76
+ "min": 0.254,
77
+ "max": 2.081,
78
+ "mean": 0.322
79
+ },
80
+ "postEncodeQueueMs": {
81
+ "samples": 1000,
82
+ "p50": 0.122,
83
+ "p95": 0.256,
84
+ "p99": 0.921,
85
+ "min": 0.035,
86
+ "max": 1.49,
87
+ "mean": 0.127
88
+ },
89
+ "queueEvents": 1000
90
+ },
91
+ {
92
+ "corpusSize": 5000,
93
+ "encodeMs": {
94
+ "samples": 5000,
95
+ "p50": 0.109,
96
+ "p95": 0.174,
97
+ "p99": 0.938,
98
+ "min": 0.07,
99
+ "max": 3.618,
100
+ "mean": 0.136
101
+ },
102
+ "hybridRecallMs": {
103
+ "samples": 50,
104
+ "p50": 0.735,
105
+ "p95": 0.867,
106
+ "p99": 4.228,
107
+ "min": 0.688,
108
+ "max": 4.228,
109
+ "mean": 0.816
110
+ },
111
+ "postEncodeQueueMs": {
112
+ "samples": 5000,
113
+ "p50": 0.116,
114
+ "p95": 0.242,
115
+ "p99": 0.978,
116
+ "min": 0.034,
117
+ "max": 6.272,
118
+ "mean": 0.124
119
+ },
120
+ "queueEvents": 5000
121
+ }
122
+ ]
123
+ }
@@ -0,0 +1,104 @@
1
+ import { existsSync } from 'node:fs';
2
+ import { basename, resolve } from 'node:path';
3
+ import { fileURLToPath, pathToFileURL } from 'node:url';
4
+ import { validateGuardBenchAdapter } from './guardbench.js';
5
+ import { publicPath } from './public-paths.mjs';
6
+
7
+ const DEFAULT_ADAPTER = 'benchmarks/adapters/example-allow.mjs';
8
+
9
+ export function parseAdapterModuleValidatorArgs(argv = process.argv.slice(2)) {
10
+ const args = {
11
+ adapter: DEFAULT_ADAPTER,
12
+ json: false,
13
+ };
14
+
15
+ for (let i = 0; i < argv.length; i++) {
16
+ const token = argv[i];
17
+ if (token === '--adapter' && argv[i + 1]) args.adapter = argv[++i];
18
+ else if (token === '--json') args.json = true;
19
+ else if (token === '--help' || token === '-h') args.help = true;
20
+ else throw new Error(`Unknown argument: ${token}`);
21
+ }
22
+
23
+ return args;
24
+ }
25
+
26
+ function usage() {
27
+ return `Usage: node benchmarks/validate-adapter-module.mjs [options]
28
+
29
+ Options:
30
+ --adapter <path> ESM GuardBench adapter module. Default: ${DEFAULT_ADAPTER}.
31
+ --json Print the machine-readable validation report.
32
+ `;
33
+ }
34
+
35
+ export async function validateAdapterModuleFile(options = {}) {
36
+ const adapterPath = resolve(options.adapter ?? DEFAULT_ADAPTER);
37
+ const failures = [];
38
+ let adapter = null;
39
+
40
+ if (!existsSync(adapterPath)) {
41
+ failures.push(`Adapter not found: ${adapterPath}`);
42
+ } else {
43
+ try {
44
+ const mod = await import(pathToFileURL(adapterPath).href);
45
+ const candidate = typeof mod.createGuardBenchAdapter === 'function'
46
+ ? await mod.createGuardBenchAdapter()
47
+ : mod.default ?? mod.adapter;
48
+ adapter = validateGuardBenchAdapter(candidate, adapterPath);
49
+ } catch (error) {
50
+ failures.push(error.message);
51
+ }
52
+ }
53
+
54
+ return {
55
+ ok: failures.length === 0,
56
+ adapterPath: publicPath(adapterPath),
57
+ moduleFile: basename(adapterPath),
58
+ adapter: adapter
59
+ ? {
60
+ name: adapter.name,
61
+ description: adapter.description ?? null,
62
+ hasSetup: typeof adapter.setup === 'function',
63
+ hasDecide: typeof adapter.decide === 'function',
64
+ hasCleanup: typeof adapter.cleanup === 'function',
65
+ }
66
+ : null,
67
+ contract: {
68
+ moduleFormat: 'ESM',
69
+ exports: ['default', 'adapter', 'createGuardBenchAdapter'],
70
+ requiredMethods: ['decide'],
71
+ optionalMethods: ['setup', 'cleanup'],
72
+ },
73
+ failures,
74
+ };
75
+ }
76
+
77
+ async function main() {
78
+ const args = parseAdapterModuleValidatorArgs();
79
+ if (args.help) {
80
+ console.log(usage());
81
+ return;
82
+ }
83
+
84
+ const validation = await validateAdapterModuleFile(args);
85
+ if (args.json) {
86
+ console.log(JSON.stringify(validation, null, 2));
87
+ } else if (validation.ok) {
88
+ console.log(`GuardBench adapter module validation passed: ${validation.adapterPath}`);
89
+ console.log(`Adapter: ${validation.adapter.name}`);
90
+ console.log(`Methods: setup=${validation.adapter.hasSetup}, decide=${validation.adapter.hasDecide}, cleanup=${validation.adapter.hasCleanup}`);
91
+ } else {
92
+ console.error('GuardBench adapter module validation failed:');
93
+ for (const failure of validation.failures) console.error(`- ${failure}`);
94
+ }
95
+
96
+ if (!validation.ok) process.exit(1);
97
+ }
98
+
99
+ if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
100
+ main().catch(error => {
101
+ console.error(error.stack ?? error.message);
102
+ process.exit(1);
103
+ });
104
+ }
@@ -0,0 +1,134 @@
1
+ import { existsSync, readFileSync } from 'node:fs';
2
+ import { resolve } from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ import { validateSchema } from './validate-guardbench-artifacts.mjs';
5
+ import { validateAdapterModuleFile } from './validate-adapter-module.mjs';
6
+ import { publicPath } from './public-paths.mjs';
7
+
8
+ const DEFAULT_REGISTRY = 'benchmarks/adapters/registry.json';
9
+ const DEFAULT_SCHEMA = 'benchmarks/schemas/guardbench-adapter-registry.schema.json';
10
+
11
+ export function parseAdapterRegistryValidatorArgs(argv = process.argv.slice(2)) {
12
+ const args = {
13
+ registry: DEFAULT_REGISTRY,
14
+ schema: DEFAULT_SCHEMA,
15
+ json: false,
16
+ };
17
+
18
+ for (let i = 0; i < argv.length; i++) {
19
+ const token = argv[i];
20
+ if (token === '--registry' && argv[i + 1]) args.registry = argv[++i];
21
+ else if (token === '--schema' && argv[i + 1]) args.schema = argv[++i];
22
+ else if (token === '--json') args.json = true;
23
+ else if (token === '--help' || token === '-h') args.help = true;
24
+ else throw new Error(`Unknown argument: ${token}`);
25
+ }
26
+
27
+ return args;
28
+ }
29
+
30
+ function usage() {
31
+ return `Usage: node benchmarks/validate-adapter-registry.mjs [options]
32
+
33
+ Options:
34
+ --registry <path> Adapter registry JSON. Default: ${DEFAULT_REGISTRY}.
35
+ --schema <path> Adapter registry JSON schema. Default: ${DEFAULT_SCHEMA}.
36
+ --json Print the machine-readable validation report.
37
+ `;
38
+ }
39
+
40
+ function readJson(path) {
41
+ return JSON.parse(readFileSync(path, 'utf-8'));
42
+ }
43
+
44
+ export async function validateAdapterRegistry(options = {}) {
45
+ const registryPath = resolve(options.registry ?? DEFAULT_REGISTRY);
46
+ const schemaPath = resolve(options.schema ?? DEFAULT_SCHEMA);
47
+ const failures = [];
48
+ let registry = null;
49
+
50
+ try {
51
+ registry = readJson(registryPath);
52
+ } catch (error) {
53
+ failures.push(error.message);
54
+ }
55
+
56
+ try {
57
+ const schema = readJson(schemaPath);
58
+ if (registry) failures.push(...validateSchema(registry, schema, 'guardbench-adapter-registry'));
59
+ } catch (error) {
60
+ failures.push(error.message);
61
+ }
62
+
63
+ const ids = new Set();
64
+ const adapterReports = [];
65
+ for (const adapter of registry?.adapters ?? []) {
66
+ if (ids.has(adapter.id)) failures.push(`Duplicate adapter id: ${adapter.id}`);
67
+ ids.add(adapter.id);
68
+ if (adapter.credentialMode === 'none' && adapter.requiredEnv.length !== 0) {
69
+ failures.push(`Adapter ${adapter.id} has credentialMode=none but declares requiredEnv`);
70
+ }
71
+ if (adapter.credentialMode === 'runtime-env' && adapter.requiredEnv.length === 0) {
72
+ failures.push(`Adapter ${adapter.id} has credentialMode=runtime-env but declares no requiredEnv`);
73
+ }
74
+ for (const [commandName, command] of Object.entries(adapter.commands ?? {})) {
75
+ if ((commandName === 'moduleValidate' || commandName === 'selfTest') && !command.includes(adapter.path)) {
76
+ failures.push(`Adapter ${adapter.id} command ${commandName} does not reference ${adapter.path}`);
77
+ }
78
+ }
79
+ if (!existsSync(resolve(adapter.path))) {
80
+ failures.push(`Adapter ${adapter.id} path does not exist: ${adapter.path}`);
81
+ continue;
82
+ }
83
+ const report = await validateAdapterModuleFile({ adapter: adapter.path });
84
+ adapterReports.push({
85
+ id: adapter.id,
86
+ ok: report.ok,
87
+ adapter: report.adapter,
88
+ credentialMode: adapter.credentialMode,
89
+ failures: report.failures,
90
+ });
91
+ if (!report.ok) {
92
+ failures.push(`Adapter ${adapter.id} failed module validation: ${report.failures.join('; ')}`);
93
+ }
94
+ if (report.adapter?.name && report.adapter.name !== adapter.name) {
95
+ failures.push(`Adapter ${adapter.id} registry name ${adapter.name} does not match module name ${report.adapter.name}`);
96
+ }
97
+ }
98
+
99
+ return {
100
+ ok: failures.length === 0,
101
+ registry: publicPath(registryPath),
102
+ schema: publicPath(schemaPath),
103
+ adapters: adapterReports,
104
+ failures,
105
+ };
106
+ }
107
+
108
+ async function main() {
109
+ const args = parseAdapterRegistryValidatorArgs();
110
+ if (args.help) {
111
+ console.log(usage());
112
+ return;
113
+ }
114
+
115
+ const validation = await validateAdapterRegistry(args);
116
+ if (args.json) {
117
+ console.log(JSON.stringify(validation, null, 2));
118
+ } else if (validation.ok) {
119
+ console.log(`GuardBench adapter registry validation passed: ${validation.registry}`);
120
+ console.log(`Adapters: ${validation.adapters.length}`);
121
+ } else {
122
+ console.error('GuardBench adapter registry validation failed:');
123
+ for (const failure of validation.failures) console.error(`- ${failure}`);
124
+ }
125
+
126
+ if (!validation.ok) process.exit(1);
127
+ }
128
+
129
+ if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
130
+ main().catch(error => {
131
+ console.error(error.stack ?? error.message);
132
+ process.exit(1);
133
+ });
134
+ }
@@ -0,0 +1,96 @@
1
+ import { existsSync, readFileSync } from 'node:fs';
2
+ import { resolve } from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ import { validateAdapterSelfTestReport } from './adapter-self-test.mjs';
5
+ import { publicPath } from './public-paths.mjs';
6
+
7
+ const DEFAULT_REPORT = 'benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json';
8
+
9
+ export function parseAdapterSelfTestValidatorArgs(argv = process.argv.slice(2)) {
10
+ const args = {
11
+ report: DEFAULT_REPORT,
12
+ schema: undefined,
13
+ json: false,
14
+ };
15
+
16
+ for (let i = 0; i < argv.length; i++) {
17
+ const token = argv[i];
18
+ if ((token === '--report' || token === '--file') && argv[i + 1]) args.report = argv[++i];
19
+ else if (token === '--schema' && argv[i + 1]) args.schema = argv[++i];
20
+ else if (token === '--json') args.json = true;
21
+ else if (token === '--help' || token === '-h') args.help = true;
22
+ else throw new Error(`Unknown argument: ${token}`);
23
+ }
24
+
25
+ return args;
26
+ }
27
+
28
+ function usage() {
29
+ return `Usage: node benchmarks/validate-adapter-self-test.mjs [options]
30
+
31
+ Options:
32
+ --report <path> Adapter self-test JSON report. Default: ${DEFAULT_REPORT}.
33
+ --schema <path> Optional alternate schema path.
34
+ --json Print the machine-readable validation report.
35
+ `;
36
+ }
37
+
38
+ function readJson(path) {
39
+ return JSON.parse(readFileSync(path, 'utf-8'));
40
+ }
41
+
42
+ export function validateAdapterSelfTestFile(options = {}) {
43
+ const reportPath = resolve(options.report ?? DEFAULT_REPORT);
44
+ const failures = [];
45
+ let report = null;
46
+
47
+ if (!existsSync(reportPath)) {
48
+ failures.push(`Missing adapter self-test report: ${reportPath}`);
49
+ } else {
50
+ try {
51
+ report = readJson(reportPath);
52
+ failures.push(...validateAdapterSelfTestReport(report, { schema: options.schema }));
53
+ } catch (error) {
54
+ failures.push(error.message);
55
+ }
56
+ }
57
+
58
+ return {
59
+ ok: failures.length === 0,
60
+ report: publicPath(reportPath),
61
+ adapter: report?.adapter?.name ?? null,
62
+ scenarios: report?.conformance?.scenarios ?? null,
63
+ expectedScenarios: report?.conformance?.expectedScenarios ?? null,
64
+ lowScoreAllowed: report?.contract?.lowScoreAllowed ?? null,
65
+ failures,
66
+ };
67
+ }
68
+
69
+ async function main() {
70
+ const args = parseAdapterSelfTestValidatorArgs();
71
+ if (args.help) {
72
+ console.log(usage());
73
+ return;
74
+ }
75
+
76
+ const validation = validateAdapterSelfTestFile(args);
77
+ if (args.json) {
78
+ console.log(JSON.stringify(validation, null, 2));
79
+ } else if (validation.ok) {
80
+ console.log(`GuardBench adapter self-test validation passed: ${validation.report}`);
81
+ console.log(`Adapter: ${validation.adapter}`);
82
+ console.log(`Rows: ${validation.scenarios}/${validation.expectedScenarios}`);
83
+ } else {
84
+ console.error('GuardBench adapter self-test validation failed:');
85
+ for (const failure of validation.failures) console.error(`- ${failure}`);
86
+ }
87
+
88
+ if (!validation.ok) process.exit(1);
89
+ }
90
+
91
+ if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
92
+ main().catch(error => {
93
+ console.error(error.stack ?? error.message);
94
+ process.exit(1);
95
+ });
96
+ }