audrey 0.23.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/CHANGELOG.md +81 -19
  2. package/LICENSE +21 -21
  3. package/README.md +209 -5
  4. package/SECURITY.md +2 -1
  5. package/benchmarks/adapter-kit.mjs +20 -0
  6. package/benchmarks/adapter-self-test.mjs +166 -0
  7. package/benchmarks/adapters/example-allow.mjs +28 -0
  8. package/benchmarks/adapters/mem0-platform.mjs +267 -0
  9. package/benchmarks/adapters/registry.json +51 -0
  10. package/benchmarks/adapters/zep-cloud.mjs +280 -0
  11. package/benchmarks/baselines.js +169 -0
  12. package/benchmarks/build-leaderboard.mjs +170 -0
  13. package/benchmarks/cases.js +537 -0
  14. package/benchmarks/create-conformance-card.mjs +139 -0
  15. package/benchmarks/create-submission-bundle.mjs +176 -0
  16. package/benchmarks/dry-run-external-adapters.mjs +165 -0
  17. package/benchmarks/guardbench.js +1035 -0
  18. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  19. package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  20. package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  21. package/benchmarks/output/guardbench-conformance-card.json +63 -0
  22. package/benchmarks/output/guardbench-manifest.json +414 -0
  23. package/benchmarks/output/guardbench-raw.json +1171 -0
  24. package/benchmarks/output/guardbench-summary.json +1981 -0
  25. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  26. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  27. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
  28. package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
  29. package/benchmarks/output/submission-bundle/guardbench-raw.json +1171 -0
  30. package/benchmarks/output/submission-bundle/guardbench-summary.json +1981 -0
  31. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
  32. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
  33. package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
  34. package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
  35. package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
  36. package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
  37. package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
  38. package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
  39. package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
  40. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +164 -0
  41. package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
  42. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +228 -0
  43. package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  44. package/benchmarks/output/submission-bundle/validation-report.json +31 -0
  45. package/benchmarks/output/summary.json +2354 -0
  46. package/benchmarks/perf-snapshot.js +304 -0
  47. package/benchmarks/perf.bench.js +161 -0
  48. package/benchmarks/public-paths.mjs +78 -0
  49. package/benchmarks/reference-results.js +70 -0
  50. package/benchmarks/report.js +259 -0
  51. package/benchmarks/run-external-guardbench.mjs +281 -0
  52. package/benchmarks/run.js +682 -0
  53. package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  54. package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  55. package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  56. package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  57. package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  58. package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  59. package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  60. package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  61. package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  62. package/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  63. package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  64. package/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  65. package/benchmarks/snapshots/perf-0.22.2.json +123 -0
  66. package/benchmarks/snapshots/perf-0.23.0.json +123 -0
  67. package/benchmarks/validate-adapter-module.mjs +104 -0
  68. package/benchmarks/validate-adapter-registry.mjs +134 -0
  69. package/benchmarks/validate-adapter-self-test.mjs +96 -0
  70. package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
  71. package/benchmarks/verify-external-evidence.mjs +296 -0
  72. package/benchmarks/verify-publication-artifacts.mjs +286 -0
  73. package/benchmarks/verify-submission-bundle.mjs +167 -0
  74. package/dist/mcp-server/config.d.ts +1 -1
  75. package/dist/mcp-server/config.d.ts.map +1 -1
  76. package/dist/mcp-server/config.js +1 -1
  77. package/dist/mcp-server/config.js.map +1 -1
  78. package/dist/mcp-server/index.d.ts +65 -3
  79. package/dist/mcp-server/index.d.ts.map +1 -1
  80. package/dist/mcp-server/index.js +675 -157
  81. package/dist/mcp-server/index.js.map +1 -1
  82. package/dist/src/action-key.d.ts +9 -0
  83. package/dist/src/action-key.d.ts.map +1 -0
  84. package/dist/src/action-key.js +49 -0
  85. package/dist/src/action-key.js.map +1 -0
  86. package/dist/src/adaptive.js +5 -5
  87. package/dist/src/affect.js +8 -8
  88. package/dist/src/audrey.d.ts +3 -0
  89. package/dist/src/audrey.d.ts.map +1 -1
  90. package/dist/src/audrey.js +55 -3
  91. package/dist/src/audrey.js.map +1 -1
  92. package/dist/src/capsule.js +4 -4
  93. package/dist/src/causal.js +3 -3
  94. package/dist/src/consolidate.js +48 -48
  95. package/dist/src/controller.d.ts +61 -5
  96. package/dist/src/controller.d.ts.map +1 -1
  97. package/dist/src/controller.js +230 -49
  98. package/dist/src/controller.js.map +1 -1
  99. package/dist/src/db.js +172 -172
  100. package/dist/src/decay.js +8 -8
  101. package/dist/src/embedding.d.ts +2 -1
  102. package/dist/src/embedding.d.ts.map +1 -1
  103. package/dist/src/embedding.js +39 -29
  104. package/dist/src/embedding.js.map +1 -1
  105. package/dist/src/encode.js +6 -6
  106. package/dist/src/feedback.d.ts +6 -0
  107. package/dist/src/feedback.d.ts.map +1 -1
  108. package/dist/src/feedback.js +6 -0
  109. package/dist/src/feedback.js.map +1 -1
  110. package/dist/src/forget.js +12 -12
  111. package/dist/src/hybrid-recall.js +9 -9
  112. package/dist/src/impact.js +6 -6
  113. package/dist/src/import.d.ts +3 -3
  114. package/dist/src/import.js +41 -41
  115. package/dist/src/index.d.ts +3 -3
  116. package/dist/src/index.d.ts.map +1 -1
  117. package/dist/src/index.js +2 -2
  118. package/dist/src/index.js.map +1 -1
  119. package/dist/src/interference.js +14 -14
  120. package/dist/src/introspect.js +18 -18
  121. package/dist/src/preflight.d.ts.map +1 -1
  122. package/dist/src/preflight.js +41 -0
  123. package/dist/src/preflight.js.map +1 -1
  124. package/dist/src/promote.js +7 -7
  125. package/dist/src/prompts.js +118 -118
  126. package/dist/src/recall.js +30 -30
  127. package/dist/src/reflexes.d.ts +1 -0
  128. package/dist/src/reflexes.d.ts.map +1 -1
  129. package/dist/src/reflexes.js +3 -0
  130. package/dist/src/reflexes.js.map +1 -1
  131. package/dist/src/rollback.js +4 -4
  132. package/dist/src/routes.d.ts.map +1 -1
  133. package/dist/src/routes.js +67 -1
  134. package/dist/src/routes.js.map +1 -1
  135. package/dist/src/validate.js +25 -25
  136. package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  137. package/docs/MEMORY_BENCHMARKING.md +59 -0
  138. package/docs/PRODUCTION_BACKLOG.md +304 -0
  139. package/docs/paper/00-master.md +48 -0
  140. package/docs/paper/01-introduction.md +27 -0
  141. package/docs/paper/02-related-work.md +47 -0
  142. package/docs/paper/03-problem-definition.md +108 -0
  143. package/docs/paper/04-design.md +164 -0
  144. package/docs/paper/05-guardbench-spec.md +412 -0
  145. package/docs/paper/06-implementation.md +113 -0
  146. package/docs/paper/07-evaluation.md +168 -0
  147. package/docs/paper/08-discussion-limitations.md +61 -0
  148. package/docs/paper/09-conclusion.md +11 -0
  149. package/docs/paper/SUBMISSION_README.md +162 -0
  150. package/docs/paper/appendix-a-demo-transcript.md +114 -0
  151. package/docs/paper/arxiv-compile-report.schema.json +116 -0
  152. package/docs/paper/arxiv-source.schema.json +61 -0
  153. package/docs/paper/audrey-paper-v1.md +1106 -0
  154. package/docs/paper/browser-launch-plan.json +209 -0
  155. package/docs/paper/browser-launch-plan.schema.json +100 -0
  156. package/docs/paper/browser-launch-results.json +86 -0
  157. package/docs/paper/browser-launch-results.schema.json +66 -0
  158. package/docs/paper/claim-register.json +138 -0
  159. package/docs/paper/claim-register.schema.json +81 -0
  160. package/docs/paper/evidence-ledger.md +103 -0
  161. package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  162. package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  163. package/docs/paper/output/arxiv/main.tex +949 -0
  164. package/docs/paper/output/arxiv/references.bib +222 -0
  165. package/docs/paper/output/arxiv-compile-report.json +24 -0
  166. package/docs/paper/output/submission-bundle/LICENSE +21 -0
  167. package/docs/paper/output/submission-bundle/README.md +533 -0
  168. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  169. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  170. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  171. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
  172. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
  173. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1171 -0
  174. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +1981 -0
  175. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  176. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  177. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  178. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
  179. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
  180. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  181. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  182. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  183. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  184. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  185. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  186. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  187. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  188. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  189. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  190. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  191. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  192. package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  193. package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
  194. package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
  195. package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
  196. package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
  197. package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
  198. package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
  199. package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
  200. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
  201. package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
  202. package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
  203. package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
  204. package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
  205. package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
  206. package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
  207. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
  208. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
  209. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
  210. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
  211. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
  212. package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
  213. package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
  214. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
  215. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  216. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  217. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
  218. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
  219. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
  220. package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
  221. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
  222. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
  223. package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
  224. package/docs/paper/output/submission-bundle/package.json +212 -0
  225. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
  226. package/docs/paper/paper-submission-bundle.schema.json +70 -0
  227. package/docs/paper/publication-pack.json +81 -0
  228. package/docs/paper/publication-pack.schema.json +60 -0
  229. package/docs/paper/references.bib +222 -0
  230. package/package.json +87 -4
  231. package/scripts/audit-release-completion.mjs +362 -0
  232. package/scripts/create-arxiv-source.mjs +362 -0
  233. package/scripts/create-paper-submission-bundle.mjs +210 -0
  234. package/scripts/finalize-release.mjs +526 -0
  235. package/scripts/prepare-release-cut.mjs +269 -0
  236. package/scripts/publish-release-bundle.mjs +209 -0
  237. package/scripts/publish-release-github-api.mjs +429 -0
  238. package/scripts/run-vitest.mjs +34 -0
  239. package/scripts/smoke-cli.js +72 -0
  240. package/scripts/sync-paper-artifacts.mjs +109 -0
  241. package/scripts/verify-arxiv-compile.mjs +440 -0
  242. package/scripts/verify-arxiv-source.mjs +194 -0
  243. package/scripts/verify-browser-launch-plan.mjs +237 -0
  244. package/scripts/verify-browser-launch-results.mjs +285 -0
  245. package/scripts/verify-paper-artifacts.mjs +338 -0
  246. package/scripts/verify-paper-claims.mjs +226 -0
  247. package/scripts/verify-paper-submission-bundle.mjs +207 -0
  248. package/scripts/verify-publication-pack.mjs +196 -0
  249. package/scripts/verify-python-package.py +201 -0
  250. package/scripts/verify-release-readiness.mjs +741 -0
@@ -0,0 +1,160 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://audrey-memory.org/schemas/guardbench-external-run.schema.json",
4
+ "title": "GuardBench External Run Metadata",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": [
8
+ "suite",
9
+ "startedAt",
10
+ "adapter",
11
+ "adapterPath",
12
+ "outDir",
13
+ "requiredEnv",
14
+ "missingEnv",
15
+ "command",
16
+ "validationCommand",
17
+ "dryRun",
18
+ "status"
19
+ ],
20
+ "properties": {
21
+ "suite": { "const": "GuardBench external adapter run" },
22
+ "startedAt": { "type": "string", "minLength": 1 },
23
+ "completedAt": { "type": "string", "minLength": 1 },
24
+ "adapter": { "type": "string", "minLength": 1 },
25
+ "adapterPath": { "type": "string", "minLength": 1 },
26
+ "outDir": { "type": "string", "minLength": 1 },
27
+ "requiredEnv": {
28
+ "type": "array",
29
+ "items": { "type": "string", "minLength": 1 }
30
+ },
31
+ "missingEnv": {
32
+ "type": "array",
33
+ "items": { "type": "string", "minLength": 1 }
34
+ },
35
+ "command": {
36
+ "type": "array",
37
+ "minItems": 1,
38
+ "items": { "type": "string", "minLength": 1 }
39
+ },
40
+ "validationCommand": {
41
+ "type": "array",
42
+ "minItems": 1,
43
+ "items": { "type": "string", "minLength": 1 }
44
+ },
45
+ "dryRun": { "type": "boolean" },
46
+ "status": {
47
+ "enum": [
48
+ "blocked",
49
+ "dry-run-missing-env",
50
+ "dry-run-ready",
51
+ "failed",
52
+ "passed",
53
+ "running"
54
+ ]
55
+ },
56
+ "blockReason": { "type": "string", "minLength": 1 },
57
+ "exitCode": {
58
+ "anyOf": [
59
+ { "type": "integer" },
60
+ { "type": "null" }
61
+ ]
62
+ },
63
+ "signal": {
64
+ "anyOf": [
65
+ { "type": "string", "minLength": 1 },
66
+ { "type": "null" }
67
+ ]
68
+ },
69
+ "artifactHashes": { "$ref": "#/$defs/artifactHashes" },
70
+ "artifactValidation": { "$ref": "#/$defs/artifactValidation" },
71
+ "adapterConformance": { "$ref": "#/$defs/adapterConformance" }
72
+ },
73
+ "$defs": {
74
+ "artifactHashes": {
75
+ "type": "object",
76
+ "additionalProperties": false,
77
+ "required": [
78
+ "guardbench-manifest.json",
79
+ "guardbench-summary.json",
80
+ "guardbench-raw.json"
81
+ ],
82
+ "properties": {
83
+ "guardbench-manifest.json": { "$ref": "#/$defs/sha256" },
84
+ "guardbench-summary.json": { "$ref": "#/$defs/sha256" },
85
+ "guardbench-raw.json": { "$ref": "#/$defs/sha256" }
86
+ }
87
+ },
88
+ "sha256": {
89
+ "type": "string",
90
+ "pattern": "^[a-f0-9]{64}$"
91
+ },
92
+ "artifactValidation": {
93
+ "type": "object",
94
+ "additionalProperties": false,
95
+ "required": ["ok", "dir", "schemasDir", "files", "failures"],
96
+ "properties": {
97
+ "ok": { "type": "boolean" },
98
+ "dir": { "type": "string", "minLength": 1 },
99
+ "schemasDir": { "type": "string", "minLength": 1 },
100
+ "files": {
101
+ "type": "array",
102
+ "minItems": 3,
103
+ "items": { "type": "string", "minLength": 1 }
104
+ },
105
+ "optionalFiles": {
106
+ "type": "array",
107
+ "items": { "type": "string", "minLength": 1 }
108
+ },
109
+ "failures": {
110
+ "type": "array",
111
+ "items": { "type": "string" }
112
+ }
113
+ }
114
+ },
115
+ "adapterConformance": {
116
+ "type": "object",
117
+ "additionalProperties": false,
118
+ "required": [
119
+ "ok",
120
+ "adapter",
121
+ "requestedAdapter",
122
+ "scenarios",
123
+ "expectedScenarios",
124
+ "fullContractPassRate",
125
+ "decisionAccuracy",
126
+ "redactionLeaks",
127
+ "failures"
128
+ ],
129
+ "properties": {
130
+ "ok": { "type": "boolean" },
131
+ "adapter": { "type": "string", "minLength": 1 },
132
+ "requestedAdapter": { "type": "string", "minLength": 1 },
133
+ "scenarios": { "type": "integer", "minimum": 0 },
134
+ "expectedScenarios": { "type": "integer", "minimum": 0 },
135
+ "fullContractPassRate": {
136
+ "anyOf": [
137
+ { "type": "null" },
138
+ { "type": "number", "minimum": 0, "maximum": 1 }
139
+ ]
140
+ },
141
+ "decisionAccuracy": {
142
+ "anyOf": [
143
+ { "type": "null" },
144
+ { "type": "number", "minimum": 0, "maximum": 1 }
145
+ ]
146
+ },
147
+ "redactionLeaks": {
148
+ "anyOf": [
149
+ { "type": "null" },
150
+ { "type": "integer", "minimum": 0 }
151
+ ]
152
+ },
153
+ "failures": {
154
+ "type": "array",
155
+ "items": { "type": "string" }
156
+ }
157
+ }
158
+ }
159
+ }
160
+ }
@@ -0,0 +1,179 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://audrey-memory.org/schemas/guardbench-leaderboard.schema.json",
4
+ "title": "GuardBench Leaderboard",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": ["schemaVersion", "suite", "generatedAt", "ranking", "rows", "failures"],
8
+ "properties": {
9
+ "schemaVersion": { "const": "1.0.0" },
10
+ "suite": { "const": "GuardBench leaderboard" },
11
+ "generatedAt": { "type": "string", "minLength": 1 },
12
+ "ranking": {
13
+ "type": "array",
14
+ "minItems": 1,
15
+ "items": { "type": "string", "minLength": 1 }
16
+ },
17
+ "rows": {
18
+ "type": "array",
19
+ "items": { "$ref": "#/$defs/row" }
20
+ },
21
+ "failures": {
22
+ "type": "array",
23
+ "items": { "type": "string" }
24
+ }
25
+ },
26
+ "$defs": {
27
+ "row": {
28
+ "type": "object",
29
+ "additionalProperties": false,
30
+ "required": [
31
+ "rank",
32
+ "subject",
33
+ "score",
34
+ "conformance",
35
+ "source",
36
+ "verification"
37
+ ],
38
+ "properties": {
39
+ "rank": { "type": "integer", "minimum": 1 },
40
+ "subject": { "$ref": "#/$defs/subject" },
41
+ "score": { "$ref": "#/$defs/score" },
42
+ "conformance": { "$ref": "#/$defs/conformance" },
43
+ "source": { "$ref": "#/$defs/source" },
44
+ "verification": { "$ref": "#/$defs/verification" }
45
+ }
46
+ },
47
+ "subject": {
48
+ "type": "object",
49
+ "additionalProperties": false,
50
+ "required": ["name", "requestedAdapter", "external"],
51
+ "properties": {
52
+ "name": { "type": "string", "minLength": 1 },
53
+ "requestedAdapter": {
54
+ "anyOf": [
55
+ { "type": "string", "minLength": 1 },
56
+ { "type": "null" }
57
+ ]
58
+ },
59
+ "external": { "type": "boolean" }
60
+ }
61
+ },
62
+ "score": {
63
+ "type": "object",
64
+ "additionalProperties": false,
65
+ "required": [
66
+ "scenarios",
67
+ "fullContractPassed",
68
+ "fullContractPassRate",
69
+ "decisionAccuracy",
70
+ "evidenceRecall",
71
+ "redactionLeaks",
72
+ "latency"
73
+ ],
74
+ "properties": {
75
+ "scenarios": { "type": "integer", "minimum": 0 },
76
+ "fullContractPassed": { "$ref": "#/$defs/nullableNumber" },
77
+ "fullContractPassRate": { "$ref": "#/$defs/nullableRate" },
78
+ "decisionAccuracy": { "$ref": "#/$defs/nullableRate" },
79
+ "evidenceRecall": { "$ref": "#/$defs/nullableRate" },
80
+ "redactionLeaks": { "$ref": "#/$defs/nullableNumber" },
81
+ "latency": {
82
+ "anyOf": [
83
+ {
84
+ "type": "object",
85
+ "additionalProperties": false,
86
+ "required": ["p50Ms", "p95Ms", "maxMs"],
87
+ "properties": {
88
+ "p50Ms": { "type": "number", "minimum": 0 },
89
+ "p95Ms": { "type": "number", "minimum": 0 },
90
+ "maxMs": { "type": "number", "minimum": 0 }
91
+ }
92
+ },
93
+ { "type": "null" }
94
+ ]
95
+ }
96
+ }
97
+ },
98
+ "conformance": {
99
+ "type": "object",
100
+ "additionalProperties": false,
101
+ "required": ["ok", "failures", "artifactValidationOk", "artifactValidationFailures"],
102
+ "properties": {
103
+ "ok": { "type": "boolean" },
104
+ "failures": {
105
+ "type": "array",
106
+ "items": { "type": "string" }
107
+ },
108
+ "artifactValidationOk": { "type": "boolean" },
109
+ "artifactValidationFailures": {
110
+ "type": "array",
111
+ "items": { "type": "string" }
112
+ }
113
+ }
114
+ },
115
+ "source": {
116
+ "type": "object",
117
+ "additionalProperties": false,
118
+ "required": ["dir", "manifestGeneratedAt", "fileCount"],
119
+ "properties": {
120
+ "dir": { "type": "string", "minLength": 1 },
121
+ "manifestGeneratedAt": { "type": "string", "minLength": 1 },
122
+ "fileCount": { "type": "integer", "minimum": 0 }
123
+ }
124
+ },
125
+ "verification": {
126
+ "type": "object",
127
+ "additionalProperties": false,
128
+ "required": ["ok", "dir", "subject", "files", "artifactValidation", "failures"],
129
+ "properties": {
130
+ "ok": { "type": "boolean" },
131
+ "dir": { "type": "string", "minLength": 1 },
132
+ "subject": { "$ref": "#/$defs/subject" },
133
+ "files": {
134
+ "type": "array",
135
+ "items": { "type": "string", "minLength": 1 }
136
+ },
137
+ "artifactValidation": { "$ref": "#/$defs/artifactValidation" },
138
+ "failures": {
139
+ "type": "array",
140
+ "items": { "type": "string" }
141
+ }
142
+ }
143
+ },
144
+ "artifactValidation": {
145
+ "type": "object",
146
+ "additionalProperties": false,
147
+ "required": ["ok", "dir", "schemasDir", "files", "optionalFiles", "failures"],
148
+ "properties": {
149
+ "ok": { "type": "boolean" },
150
+ "dir": { "type": "string", "minLength": 1 },
151
+ "schemasDir": { "type": "string", "minLength": 1 },
152
+ "files": {
153
+ "type": "array",
154
+ "items": { "type": "string", "minLength": 1 }
155
+ },
156
+ "optionalFiles": {
157
+ "type": "array",
158
+ "items": { "type": "string", "minLength": 1 }
159
+ },
160
+ "failures": {
161
+ "type": "array",
162
+ "items": { "type": "string" }
163
+ }
164
+ }
165
+ },
166
+ "nullableNumber": {
167
+ "anyOf": [
168
+ { "type": "number", "minimum": 0 },
169
+ { "type": "null" }
170
+ ]
171
+ },
172
+ "nullableRate": {
173
+ "anyOf": [
174
+ { "type": "number", "minimum": 0, "maximum": 1 },
175
+ { "type": "null" }
176
+ ]
177
+ }
178
+ }
179
+ }
@@ -0,0 +1,213 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://audrey-memory.org/schemas/guardbench-manifest.schema.json",
4
+ "title": "GuardBench Manifest",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": [
8
+ "manifestVersion",
9
+ "suiteId",
10
+ "suiteName",
11
+ "generatedBy",
12
+ "decisionVocabulary",
13
+ "subjects",
14
+ "metrics",
15
+ "contract",
16
+ "scenarios"
17
+ ],
18
+ "properties": {
19
+ "manifestVersion": { "type": "string", "minLength": 1 },
20
+ "suiteId": { "type": "string", "minLength": 1 },
21
+ "suiteName": { "type": "string", "minLength": 1 },
22
+ "generatedBy": { "type": "string", "minLength": 1 },
23
+ "decisionVocabulary": {
24
+ "type": "array",
25
+ "minItems": 3,
26
+ "items": { "enum": ["allow", "warn", "block"] },
27
+ "contains": { "const": "allow" }
28
+ },
29
+ "subjects": {
30
+ "type": "array",
31
+ "minItems": 5,
32
+ "items": { "$ref": "#/$defs/subject" }
33
+ },
34
+ "metrics": {
35
+ "type": "array",
36
+ "minItems": 9,
37
+ "items": {
38
+ "enum": [
39
+ "fullContractPassRate",
40
+ "decisionAccuracy",
41
+ "preventionRate",
42
+ "falseBlockRate",
43
+ "evidenceRecall",
44
+ "redactionLeaks",
45
+ "recallDegradationDetectionRate",
46
+ "latencyP50Ms",
47
+ "latencyP95Ms"
48
+ ]
49
+ }
50
+ },
51
+ "contract": {
52
+ "type": "object",
53
+ "additionalProperties": false,
54
+ "required": ["fullContractPass", "externalAdapterRule"],
55
+ "properties": {
56
+ "fullContractPass": { "type": "string", "minLength": 1 },
57
+ "externalAdapterRule": { "type": "string", "minLength": 1 }
58
+ }
59
+ },
60
+ "scenarios": {
61
+ "type": "array",
62
+ "minItems": 10,
63
+ "items": { "$ref": "#/$defs/scenario" }
64
+ }
65
+ },
66
+ "$defs": {
67
+ "subject": {
68
+ "type": "object",
69
+ "additionalProperties": false,
70
+ "required": ["id", "name", "description", "external"],
71
+ "properties": {
72
+ "id": { "type": "string", "minLength": 1 },
73
+ "name": { "type": "string", "minLength": 1 },
74
+ "description": { "type": "string", "minLength": 1 },
75
+ "external": { "type": "boolean" }
76
+ }
77
+ },
78
+ "scenario": {
79
+ "type": "object",
80
+ "additionalProperties": false,
81
+ "required": [
82
+ "id",
83
+ "name",
84
+ "expectedDecision",
85
+ "action",
86
+ "seed",
87
+ "expectedEvidenceClass",
88
+ "requiredEvidence",
89
+ "redactionSecretsSeeded"
90
+ ],
91
+ "properties": {
92
+ "id": { "type": "string", "pattern": "^GB-[0-9]{2}$" },
93
+ "name": { "type": "string", "minLength": 1 },
94
+ "expectedDecision": { "enum": ["allow", "warn", "block"] },
95
+ "action": { "$ref": "#/$defs/action" },
96
+ "seed": { "$ref": "#/$defs/seed" },
97
+ "expectedEvidenceClass": { "type": "string", "minLength": 1 },
98
+ "requiredEvidence": {
99
+ "type": "array",
100
+ "items": { "type": "string", "minLength": 1 }
101
+ },
102
+ "redactionSecretsSeeded": { "type": "integer", "minimum": 0 }
103
+ }
104
+ },
105
+ "action": {
106
+ "type": "object",
107
+ "additionalProperties": false,
108
+ "required": ["tool", "action"],
109
+ "properties": {
110
+ "tool": { "type": "string", "minLength": 1 },
111
+ "action": { "type": "string", "minLength": 1 },
112
+ "command": { "type": "string", "minLength": 1 },
113
+ "cwd": { "type": "string" },
114
+ "files": {
115
+ "type": "array",
116
+ "items": { "type": "string", "minLength": 1 }
117
+ }
118
+ }
119
+ },
120
+ "seed": {
121
+ "type": "object",
122
+ "additionalProperties": false,
123
+ "required": [
124
+ "seededMemories",
125
+ "seededToolEvents",
126
+ "seededNoise",
127
+ "faultInjection",
128
+ "seededSecretRefs"
129
+ ],
130
+ "properties": {
131
+ "seededMemories": {
132
+ "type": "array",
133
+ "items": { "$ref": "#/$defs/memorySeed" }
134
+ },
135
+ "seededToolEvents": {
136
+ "type": "array",
137
+ "items": { "$ref": "#/$defs/toolEventSeed" }
138
+ },
139
+ "seededNoise": {
140
+ "anyOf": [
141
+ { "type": "null" },
142
+ { "$ref": "#/$defs/noiseSeed" }
143
+ ]
144
+ },
145
+ "faultInjection": {
146
+ "anyOf": [
147
+ { "type": "null" },
148
+ { "type": "string", "minLength": 1 }
149
+ ]
150
+ },
151
+ "seededSecretRefs": {
152
+ "type": "array",
153
+ "items": { "$ref": "#/$defs/secretRef" }
154
+ }
155
+ }
156
+ },
157
+ "memorySeed": {
158
+ "type": "object",
159
+ "additionalProperties": false,
160
+ "required": ["content", "source", "tags", "salience"],
161
+ "properties": {
162
+ "content": { "type": "string", "minLength": 1 },
163
+ "source": { "type": "string", "minLength": 1 },
164
+ "tags": {
165
+ "type": "array",
166
+ "items": { "type": "string", "minLength": 1 }
167
+ },
168
+ "salience": { "type": "number", "minimum": 0, "maximum": 1 }
169
+ }
170
+ },
171
+ "toolEventSeed": {
172
+ "type": "object",
173
+ "additionalProperties": false,
174
+ "required": ["tool"],
175
+ "properties": {
176
+ "tool": { "type": "string", "minLength": 1 },
177
+ "action": { "type": "string", "minLength": 1 },
178
+ "files": {
179
+ "type": "array",
180
+ "items": { "type": "string", "minLength": 1 }
181
+ },
182
+ "outcome": { "type": "string", "minLength": 1 },
183
+ "errorSummary": { "type": "string", "minLength": 1 },
184
+ "errorSummaryPattern": { "type": "string", "minLength": 1 },
185
+ "output": { "type": "string", "minLength": 1 }
186
+ }
187
+ },
188
+ "noiseSeed": {
189
+ "type": "object",
190
+ "additionalProperties": false,
191
+ "required": ["count", "source", "tags", "salience"],
192
+ "properties": {
193
+ "count": { "type": "integer", "minimum": 0 },
194
+ "source": { "type": "string", "minLength": 1 },
195
+ "tags": {
196
+ "type": "array",
197
+ "items": { "type": "string", "minLength": 1 }
198
+ },
199
+ "salience": { "type": "number", "minimum": 0, "maximum": 1 }
200
+ }
201
+ },
202
+ "secretRef": {
203
+ "type": "object",
204
+ "additionalProperties": false,
205
+ "required": ["id", "fingerprint", "length"],
206
+ "properties": {
207
+ "id": { "type": "string", "minLength": 1 },
208
+ "fingerprint": { "type": "string", "pattern": "^[a-f0-9]{16}$" },
209
+ "length": { "type": "integer", "minimum": 1 }
210
+ }
211
+ }
212
+ }
213
+ }
@@ -0,0 +1,47 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://audrey-memory.org/schemas/guardbench-publication-verification.schema.json",
4
+ "title": "GuardBench Publication Artifact Verification",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": ["schemaVersion", "suite", "generatedAt", "ok", "checks", "failures"],
8
+ "properties": {
9
+ "schemaVersion": { "const": "1.0.0" },
10
+ "suite": { "const": "GuardBench publication artifact verification" },
11
+ "generatedAt": { "type": "string", "minLength": 1 },
12
+ "ok": { "type": "boolean" },
13
+ "checks": {
14
+ "type": "object",
15
+ "additionalProperties": false,
16
+ "required": ["registry", "adapterModule", "selfTest", "artifacts", "bundle", "externalDryRun", "externalEvidence", "leaderboard", "localPaths"],
17
+ "properties": {
18
+ "registry": { "$ref": "#/$defs/basicCheck" },
19
+ "adapterModule": { "$ref": "#/$defs/basicCheck" },
20
+ "selfTest": { "$ref": "#/$defs/basicCheck" },
21
+ "artifacts": { "$ref": "#/$defs/basicCheck" },
22
+ "bundle": { "$ref": "#/$defs/basicCheck" },
23
+ "externalDryRun": { "$ref": "#/$defs/basicCheck" },
24
+ "externalEvidence": { "$ref": "#/$defs/basicCheck" },
25
+ "leaderboard": { "$ref": "#/$defs/basicCheck" },
26
+ "localPaths": { "$ref": "#/$defs/basicCheck" }
27
+ }
28
+ },
29
+ "failures": {
30
+ "type": "array",
31
+ "items": { "type": "string" }
32
+ }
33
+ },
34
+ "$defs": {
35
+ "basicCheck": {
36
+ "type": "object",
37
+ "required": ["ok", "failures"],
38
+ "properties": {
39
+ "ok": { "type": "boolean" },
40
+ "failures": {
41
+ "type": "array",
42
+ "items": { "type": "string" }
43
+ }
44
+ }
45
+ }
46
+ }
47
+ }