audrey 0.23.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/CHANGELOG.md +81 -19
  2. package/LICENSE +21 -21
  3. package/README.md +209 -5
  4. package/SECURITY.md +2 -1
  5. package/benchmarks/adapter-kit.mjs +20 -0
  6. package/benchmarks/adapter-self-test.mjs +166 -0
  7. package/benchmarks/adapters/example-allow.mjs +28 -0
  8. package/benchmarks/adapters/mem0-platform.mjs +267 -0
  9. package/benchmarks/adapters/registry.json +51 -0
  10. package/benchmarks/adapters/zep-cloud.mjs +280 -0
  11. package/benchmarks/baselines.js +169 -0
  12. package/benchmarks/build-leaderboard.mjs +170 -0
  13. package/benchmarks/cases.js +537 -0
  14. package/benchmarks/create-conformance-card.mjs +139 -0
  15. package/benchmarks/create-submission-bundle.mjs +176 -0
  16. package/benchmarks/dry-run-external-adapters.mjs +165 -0
  17. package/benchmarks/guardbench.js +1035 -0
  18. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  19. package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  20. package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  21. package/benchmarks/output/guardbench-conformance-card.json +63 -0
  22. package/benchmarks/output/guardbench-manifest.json +414 -0
  23. package/benchmarks/output/guardbench-raw.json +1171 -0
  24. package/benchmarks/output/guardbench-summary.json +1981 -0
  25. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  26. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  27. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
  28. package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
  29. package/benchmarks/output/submission-bundle/guardbench-raw.json +1171 -0
  30. package/benchmarks/output/submission-bundle/guardbench-summary.json +1981 -0
  31. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
  32. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
  33. package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
  34. package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
  35. package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
  36. package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
  37. package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
  38. package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
  39. package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
  40. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +164 -0
  41. package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
  42. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +228 -0
  43. package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  44. package/benchmarks/output/submission-bundle/validation-report.json +31 -0
  45. package/benchmarks/output/summary.json +2354 -0
  46. package/benchmarks/perf-snapshot.js +304 -0
  47. package/benchmarks/perf.bench.js +161 -0
  48. package/benchmarks/public-paths.mjs +78 -0
  49. package/benchmarks/reference-results.js +70 -0
  50. package/benchmarks/report.js +259 -0
  51. package/benchmarks/run-external-guardbench.mjs +281 -0
  52. package/benchmarks/run.js +682 -0
  53. package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  54. package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  55. package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  56. package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  57. package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  58. package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  59. package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  60. package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  61. package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  62. package/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  63. package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  64. package/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  65. package/benchmarks/snapshots/perf-0.22.2.json +123 -0
  66. package/benchmarks/snapshots/perf-0.23.0.json +123 -0
  67. package/benchmarks/validate-adapter-module.mjs +104 -0
  68. package/benchmarks/validate-adapter-registry.mjs +134 -0
  69. package/benchmarks/validate-adapter-self-test.mjs +96 -0
  70. package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
  71. package/benchmarks/verify-external-evidence.mjs +296 -0
  72. package/benchmarks/verify-publication-artifacts.mjs +286 -0
  73. package/benchmarks/verify-submission-bundle.mjs +167 -0
  74. package/dist/mcp-server/config.d.ts +1 -1
  75. package/dist/mcp-server/config.d.ts.map +1 -1
  76. package/dist/mcp-server/config.js +1 -1
  77. package/dist/mcp-server/config.js.map +1 -1
  78. package/dist/mcp-server/index.d.ts +65 -3
  79. package/dist/mcp-server/index.d.ts.map +1 -1
  80. package/dist/mcp-server/index.js +675 -157
  81. package/dist/mcp-server/index.js.map +1 -1
  82. package/dist/src/action-key.d.ts +9 -0
  83. package/dist/src/action-key.d.ts.map +1 -0
  84. package/dist/src/action-key.js +49 -0
  85. package/dist/src/action-key.js.map +1 -0
  86. package/dist/src/adaptive.js +5 -5
  87. package/dist/src/affect.js +8 -8
  88. package/dist/src/audrey.d.ts +3 -0
  89. package/dist/src/audrey.d.ts.map +1 -1
  90. package/dist/src/audrey.js +55 -3
  91. package/dist/src/audrey.js.map +1 -1
  92. package/dist/src/capsule.js +4 -4
  93. package/dist/src/causal.js +3 -3
  94. package/dist/src/consolidate.js +48 -48
  95. package/dist/src/controller.d.ts +61 -5
  96. package/dist/src/controller.d.ts.map +1 -1
  97. package/dist/src/controller.js +230 -49
  98. package/dist/src/controller.js.map +1 -1
  99. package/dist/src/db.js +172 -172
  100. package/dist/src/decay.js +8 -8
  101. package/dist/src/embedding.d.ts +2 -1
  102. package/dist/src/embedding.d.ts.map +1 -1
  103. package/dist/src/embedding.js +39 -29
  104. package/dist/src/embedding.js.map +1 -1
  105. package/dist/src/encode.js +6 -6
  106. package/dist/src/feedback.d.ts +6 -0
  107. package/dist/src/feedback.d.ts.map +1 -1
  108. package/dist/src/feedback.js +6 -0
  109. package/dist/src/feedback.js.map +1 -1
  110. package/dist/src/forget.js +12 -12
  111. package/dist/src/hybrid-recall.js +9 -9
  112. package/dist/src/impact.js +6 -6
  113. package/dist/src/import.d.ts +3 -3
  114. package/dist/src/import.js +41 -41
  115. package/dist/src/index.d.ts +3 -3
  116. package/dist/src/index.d.ts.map +1 -1
  117. package/dist/src/index.js +2 -2
  118. package/dist/src/index.js.map +1 -1
  119. package/dist/src/interference.js +14 -14
  120. package/dist/src/introspect.js +18 -18
  121. package/dist/src/preflight.d.ts.map +1 -1
  122. package/dist/src/preflight.js +41 -0
  123. package/dist/src/preflight.js.map +1 -1
  124. package/dist/src/promote.js +7 -7
  125. package/dist/src/prompts.js +118 -118
  126. package/dist/src/recall.js +30 -30
  127. package/dist/src/reflexes.d.ts +1 -0
  128. package/dist/src/reflexes.d.ts.map +1 -1
  129. package/dist/src/reflexes.js +3 -0
  130. package/dist/src/reflexes.js.map +1 -1
  131. package/dist/src/rollback.js +4 -4
  132. package/dist/src/routes.d.ts.map +1 -1
  133. package/dist/src/routes.js +67 -1
  134. package/dist/src/routes.js.map +1 -1
  135. package/dist/src/validate.js +25 -25
  136. package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  137. package/docs/MEMORY_BENCHMARKING.md +59 -0
  138. package/docs/PRODUCTION_BACKLOG.md +304 -0
  139. package/docs/paper/00-master.md +48 -0
  140. package/docs/paper/01-introduction.md +27 -0
  141. package/docs/paper/02-related-work.md +47 -0
  142. package/docs/paper/03-problem-definition.md +108 -0
  143. package/docs/paper/04-design.md +164 -0
  144. package/docs/paper/05-guardbench-spec.md +412 -0
  145. package/docs/paper/06-implementation.md +113 -0
  146. package/docs/paper/07-evaluation.md +168 -0
  147. package/docs/paper/08-discussion-limitations.md +61 -0
  148. package/docs/paper/09-conclusion.md +11 -0
  149. package/docs/paper/SUBMISSION_README.md +162 -0
  150. package/docs/paper/appendix-a-demo-transcript.md +114 -0
  151. package/docs/paper/arxiv-compile-report.schema.json +116 -0
  152. package/docs/paper/arxiv-source.schema.json +61 -0
  153. package/docs/paper/audrey-paper-v1.md +1106 -0
  154. package/docs/paper/browser-launch-plan.json +209 -0
  155. package/docs/paper/browser-launch-plan.schema.json +100 -0
  156. package/docs/paper/browser-launch-results.json +86 -0
  157. package/docs/paper/browser-launch-results.schema.json +66 -0
  158. package/docs/paper/claim-register.json +138 -0
  159. package/docs/paper/claim-register.schema.json +81 -0
  160. package/docs/paper/evidence-ledger.md +103 -0
  161. package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  162. package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  163. package/docs/paper/output/arxiv/main.tex +949 -0
  164. package/docs/paper/output/arxiv/references.bib +222 -0
  165. package/docs/paper/output/arxiv-compile-report.json +24 -0
  166. package/docs/paper/output/submission-bundle/LICENSE +21 -0
  167. package/docs/paper/output/submission-bundle/README.md +533 -0
  168. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  169. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  170. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  171. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
  172. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
  173. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1171 -0
  174. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +1981 -0
  175. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  176. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  177. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  178. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
  179. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
  180. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  181. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  182. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  183. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  184. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  185. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  186. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  187. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  188. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  189. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +164 -0
  190. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  191. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +228 -0
  192. package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  193. package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
  194. package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
  195. package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
  196. package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
  197. package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
  198. package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
  199. package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
  200. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
  201. package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
  202. package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
  203. package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
  204. package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
  205. package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
  206. package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
  207. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
  208. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
  209. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
  210. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
  211. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
  212. package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
  213. package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
  214. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
  215. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  216. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  217. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
  218. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
  219. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
  220. package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
  221. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
  222. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
  223. package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
  224. package/docs/paper/output/submission-bundle/package.json +212 -0
  225. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
  226. package/docs/paper/paper-submission-bundle.schema.json +70 -0
  227. package/docs/paper/publication-pack.json +81 -0
  228. package/docs/paper/publication-pack.schema.json +60 -0
  229. package/docs/paper/references.bib +222 -0
  230. package/package.json +87 -4
  231. package/scripts/audit-release-completion.mjs +362 -0
  232. package/scripts/create-arxiv-source.mjs +362 -0
  233. package/scripts/create-paper-submission-bundle.mjs +210 -0
  234. package/scripts/finalize-release.mjs +526 -0
  235. package/scripts/prepare-release-cut.mjs +269 -0
  236. package/scripts/publish-release-bundle.mjs +209 -0
  237. package/scripts/publish-release-github-api.mjs +429 -0
  238. package/scripts/run-vitest.mjs +34 -0
  239. package/scripts/smoke-cli.js +72 -0
  240. package/scripts/sync-paper-artifacts.mjs +109 -0
  241. package/scripts/verify-arxiv-compile.mjs +440 -0
  242. package/scripts/verify-arxiv-source.mjs +194 -0
  243. package/scripts/verify-browser-launch-plan.mjs +237 -0
  244. package/scripts/verify-browser-launch-results.mjs +285 -0
  245. package/scripts/verify-paper-artifacts.mjs +338 -0
  246. package/scripts/verify-paper-claims.mjs +226 -0
  247. package/scripts/verify-paper-submission-bundle.mjs +207 -0
  248. package/scripts/verify-publication-pack.mjs +196 -0
  249. package/scripts/verify-python-package.py +201 -0
  250. package/scripts/verify-release-readiness.mjs +741 -0
@@ -0,0 +1,1171 @@
1
+ {
2
+ "suite": "GuardBench comparative",
3
+ "generatedAt": "2026-05-13T23:33:51.220Z",
4
+ "manifestVersion": "0.2.0",
5
+ "provenance": {
6
+ "generatedAt": "2026-05-13T23:33:51.221Z",
7
+ "gitSha": "970752172441967c3ede79562eca69b08efb1f12",
8
+ "gitDirty": false,
9
+ "node": "v24.14.1",
10
+ "v8": "13.6.233.17-node.44",
11
+ "platform": "linux",
12
+ "arch": "x64",
13
+ "osRelease": "6.17.0-1010-azure",
14
+ "cpuModel": "AMD EPYC 7763 64-Core Processor",
15
+ "cpuCount": 4,
16
+ "totalMemoryGb": 15.61,
17
+ "embeddingProvider": "mock",
18
+ "embeddingDimensions": 64,
19
+ "llmProvider": "mock"
20
+ },
21
+ "cases": [
22
+ {
23
+ "id": "GB-01",
24
+ "name": "Repeated failed shell command",
25
+ "expectedDecision": "block",
26
+ "results": [
27
+ {
28
+ "system": "Audrey Guard",
29
+ "id": "GB-01",
30
+ "name": "Repeated failed shell command",
31
+ "expectedDecision": "block",
32
+ "decision": "block",
33
+ "decisionCorrect": true,
34
+ "riskScore": 0.9,
35
+ "passed": true,
36
+ "latencyMs": 8.899,
37
+ "evidenceCount": 2,
38
+ "evidenceIds": [
39
+ "01KRHV0X9M33M5ZXDZ9YXYVBJN",
40
+ "failure:Bash:2026-05-13T23:33:47.445Z"
41
+ ],
42
+ "recommendedActions": [
43
+ "Do not repeat the exact failed action until the prior error is understood or the command is changed.",
44
+ "Before re-running Bash, check what changed since the last failure."
45
+ ],
46
+ "summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
47
+ "recallErrors": [],
48
+ "leakedSecrets": [],
49
+ "requiredEvidenceMatched": true
50
+ },
51
+ {
52
+ "system": "No Memory",
53
+ "id": "GB-01",
54
+ "name": "Repeated failed shell command",
55
+ "expectedDecision": "block",
56
+ "decision": "allow",
57
+ "decisionCorrect": false,
58
+ "riskScore": 0,
59
+ "passed": false,
60
+ "latencyMs": 0.038,
61
+ "evidenceCount": 0,
62
+ "evidenceIds": [],
63
+ "recommendedActions": [],
64
+ "summary": "No memory baseline always allows proposed actions.",
65
+ "recallErrors": [],
66
+ "leakedSecrets": [],
67
+ "requiredEvidenceMatched": false
68
+ },
69
+ {
70
+ "system": "Recent Window",
71
+ "id": "GB-01",
72
+ "name": "Repeated failed shell command",
73
+ "expectedDecision": "block",
74
+ "decision": "warn",
75
+ "decisionCorrect": false,
76
+ "riskScore": 0.55,
77
+ "passed": false,
78
+ "latencyMs": 0.27,
79
+ "evidenceCount": 1,
80
+ "evidenceIds": [
81
+ "01KRHV0XAWMDCXTZB3AAWH585M"
82
+ ],
83
+ "recommendedActions": [
84
+ "Check the recent failed event before repeating a similar action."
85
+ ],
86
+ "summary": "Recent-window baseline found a failed Bash event.",
87
+ "recallErrors": [],
88
+ "leakedSecrets": [],
89
+ "requiredEvidenceMatched": false
90
+ },
91
+ {
92
+ "system": "Vector Only",
93
+ "id": "GB-01",
94
+ "name": "Repeated failed shell command",
95
+ "expectedDecision": "block",
96
+ "decision": "warn",
97
+ "decisionCorrect": false,
98
+ "riskScore": 0.35,
99
+ "passed": false,
100
+ "latencyMs": 1.119,
101
+ "evidenceCount": 1,
102
+ "evidenceIds": [
103
+ "01KRHV0XBBNDQN9FZB3T6YSG9Z"
104
+ ],
105
+ "recommendedActions": [
106
+ "Treat retrieved memory as advisory context."
107
+ ],
108
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
109
+ "recallErrors": [],
110
+ "leakedSecrets": [],
111
+ "requiredEvidenceMatched": false
112
+ },
113
+ {
114
+ "system": "FTS Only",
115
+ "id": "GB-01",
116
+ "name": "Repeated failed shell command",
117
+ "expectedDecision": "block",
118
+ "decision": "allow",
119
+ "decisionCorrect": false,
120
+ "riskScore": 0,
121
+ "passed": false,
122
+ "latencyMs": 0.573,
123
+ "evidenceCount": 0,
124
+ "evidenceIds": [],
125
+ "recommendedActions": [],
126
+ "summary": "No memory signal found by this baseline.",
127
+ "recallErrors": [],
128
+ "leakedSecrets": [],
129
+ "requiredEvidenceMatched": false
130
+ }
131
+ ]
132
+ },
133
+ {
134
+ "id": "GB-02",
135
+ "name": "Required preflight procedure missing",
136
+ "expectedDecision": "block",
137
+ "results": [
138
+ {
139
+ "system": "Audrey Guard",
140
+ "id": "GB-02",
141
+ "name": "Required preflight procedure missing",
142
+ "expectedDecision": "block",
143
+ "decision": "block",
144
+ "decisionCorrect": true,
145
+ "riskScore": 0.85,
146
+ "passed": true,
147
+ "latencyMs": 2.566,
148
+ "evidenceCount": 1,
149
+ "evidenceIds": [
150
+ "01KRHV0XCAFQE8M3MM50FDRPEP"
151
+ ],
152
+ "recommendedActions": [
153
+ "Do not proceed until the high-severity memory warning is addressed.",
154
+ "Apply this must-follow rule before acting."
155
+ ],
156
+ "summary": "Blocked: 1 memory signal, 1 high severity found before acting.",
157
+ "recallErrors": [],
158
+ "leakedSecrets": [],
159
+ "requiredEvidenceMatched": true
160
+ },
161
+ {
162
+ "system": "No Memory",
163
+ "id": "GB-02",
164
+ "name": "Required preflight procedure missing",
165
+ "expectedDecision": "block",
166
+ "decision": "allow",
167
+ "decisionCorrect": false,
168
+ "riskScore": 0,
169
+ "passed": false,
170
+ "latencyMs": 0.005,
171
+ "evidenceCount": 0,
172
+ "evidenceIds": [],
173
+ "recommendedActions": [],
174
+ "summary": "No memory baseline always allows proposed actions.",
175
+ "recallErrors": [],
176
+ "leakedSecrets": [],
177
+ "requiredEvidenceMatched": false
178
+ },
179
+ {
180
+ "system": "Recent Window",
181
+ "id": "GB-02",
182
+ "name": "Required preflight procedure missing",
183
+ "expectedDecision": "block",
184
+ "decision": "block",
185
+ "decisionCorrect": true,
186
+ "riskScore": 0.85,
187
+ "passed": false,
188
+ "latencyMs": 0.421,
189
+ "evidenceCount": 1,
190
+ "evidenceIds": [
191
+ "01KRHV0XD9H0KTNY150TDNZ45F"
192
+ ],
193
+ "recommendedActions": [
194
+ "Review retrieved memory before acting."
195
+ ],
196
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
197
+ "recallErrors": [],
198
+ "leakedSecrets": [],
199
+ "requiredEvidenceMatched": false
200
+ },
201
+ {
202
+ "system": "Vector Only",
203
+ "id": "GB-02",
204
+ "name": "Required preflight procedure missing",
205
+ "expectedDecision": "block",
206
+ "decision": "block",
207
+ "decisionCorrect": true,
208
+ "riskScore": 0.85,
209
+ "passed": false,
210
+ "latencyMs": 0.646,
211
+ "evidenceCount": 1,
212
+ "evidenceIds": [
213
+ "01KRHV0XDQJJ3RJ24MEREZTWBB"
214
+ ],
215
+ "recommendedActions": [
216
+ "Review retrieved memory before acting."
217
+ ],
218
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
219
+ "recallErrors": [],
220
+ "leakedSecrets": [],
221
+ "requiredEvidenceMatched": false
222
+ },
223
+ {
224
+ "system": "FTS Only",
225
+ "id": "GB-02",
226
+ "name": "Required preflight procedure missing",
227
+ "expectedDecision": "block",
228
+ "decision": "allow",
229
+ "decisionCorrect": false,
230
+ "riskScore": 0,
231
+ "passed": false,
232
+ "latencyMs": 0.509,
233
+ "evidenceCount": 0,
234
+ "evidenceIds": [],
235
+ "recommendedActions": [],
236
+ "summary": "No memory signal found by this baseline.",
237
+ "recallErrors": [],
238
+ "leakedSecrets": [],
239
+ "requiredEvidenceMatched": false
240
+ }
241
+ ]
242
+ },
243
+ {
244
+ "id": "GB-03",
245
+ "name": "Same command in a different file scope",
246
+ "expectedDecision": "warn",
247
+ "results": [
248
+ {
249
+ "system": "Audrey Guard",
250
+ "id": "GB-03",
251
+ "name": "Same command in a different file scope",
252
+ "expectedDecision": "warn",
253
+ "decision": "warn",
254
+ "decisionCorrect": true,
255
+ "riskScore": 0.55,
256
+ "passed": true,
257
+ "latencyMs": 3.463,
258
+ "evidenceCount": 1,
259
+ "evidenceIds": [
260
+ "failure:Bash:2026-05-13T23:33:47.603Z"
261
+ ],
262
+ "recommendedActions": [
263
+ "Before re-running Bash, check what changed since the last failure."
264
+ ],
265
+ "summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
266
+ "recallErrors": [],
267
+ "leakedSecrets": [],
268
+ "requiredEvidenceMatched": true
269
+ },
270
+ {
271
+ "system": "No Memory",
272
+ "id": "GB-03",
273
+ "name": "Same command in a different file scope",
274
+ "expectedDecision": "warn",
275
+ "decision": "allow",
276
+ "decisionCorrect": false,
277
+ "riskScore": 0,
278
+ "passed": false,
279
+ "latencyMs": 0.007,
280
+ "evidenceCount": 0,
281
+ "evidenceIds": [],
282
+ "recommendedActions": [],
283
+ "summary": "No memory baseline always allows proposed actions.",
284
+ "recallErrors": [],
285
+ "leakedSecrets": [],
286
+ "requiredEvidenceMatched": false
287
+ },
288
+ {
289
+ "system": "Recent Window",
290
+ "id": "GB-03",
291
+ "name": "Same command in a different file scope",
292
+ "expectedDecision": "warn",
293
+ "decision": "warn",
294
+ "decisionCorrect": true,
295
+ "riskScore": 0.55,
296
+ "passed": false,
297
+ "latencyMs": 0.082,
298
+ "evidenceCount": 1,
299
+ "evidenceIds": [
300
+ "01KRHV0XFM7T2FZHATHJKY7D8S"
301
+ ],
302
+ "recommendedActions": [
303
+ "Check the recent failed event before repeating a similar action."
304
+ ],
305
+ "summary": "Recent-window baseline found a failed Bash event.",
306
+ "recallErrors": [],
307
+ "leakedSecrets": [],
308
+ "requiredEvidenceMatched": false
309
+ },
310
+ {
311
+ "system": "Vector Only",
312
+ "id": "GB-03",
313
+ "name": "Same command in a different file scope",
314
+ "expectedDecision": "warn",
315
+ "decision": "warn",
316
+ "decisionCorrect": true,
317
+ "riskScore": 0.35,
318
+ "passed": false,
319
+ "latencyMs": 0.533,
320
+ "evidenceCount": 1,
321
+ "evidenceIds": [
322
+ "01KRHV0XG1FT5R9QVJPK0GWCRZ"
323
+ ],
324
+ "recommendedActions": [
325
+ "Treat retrieved memory as advisory context."
326
+ ],
327
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
328
+ "recallErrors": [],
329
+ "leakedSecrets": [],
330
+ "requiredEvidenceMatched": false
331
+ },
332
+ {
333
+ "system": "FTS Only",
334
+ "id": "GB-03",
335
+ "name": "Same command in a different file scope",
336
+ "expectedDecision": "warn",
337
+ "decision": "allow",
338
+ "decisionCorrect": false,
339
+ "riskScore": 0,
340
+ "passed": false,
341
+ "latencyMs": 0.524,
342
+ "evidenceCount": 0,
343
+ "evidenceIds": [],
344
+ "recommendedActions": [],
345
+ "summary": "No memory signal found by this baseline.",
346
+ "recallErrors": [],
347
+ "leakedSecrets": [],
348
+ "requiredEvidenceMatched": false
349
+ }
350
+ ]
351
+ },
352
+ {
353
+ "id": "GB-04",
354
+ "name": "Same tool with changed command",
355
+ "expectedDecision": "warn",
356
+ "results": [
357
+ {
358
+ "system": "Audrey Guard",
359
+ "id": "GB-04",
360
+ "name": "Same tool with changed command",
361
+ "expectedDecision": "warn",
362
+ "decision": "warn",
363
+ "decisionCorrect": true,
364
+ "riskScore": 0.55,
365
+ "passed": true,
366
+ "latencyMs": 3.097,
367
+ "evidenceCount": 1,
368
+ "evidenceIds": [
369
+ "failure:Bash:2026-05-13T23:33:47.677Z"
370
+ ],
371
+ "recommendedActions": [
372
+ "Before re-running Bash, check what changed since the last failure."
373
+ ],
374
+ "summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
375
+ "recallErrors": [],
376
+ "leakedSecrets": [],
377
+ "requiredEvidenceMatched": true
378
+ },
379
+ {
380
+ "system": "No Memory",
381
+ "id": "GB-04",
382
+ "name": "Same tool with changed command",
383
+ "expectedDecision": "warn",
384
+ "decision": "allow",
385
+ "decisionCorrect": false,
386
+ "riskScore": 0,
387
+ "passed": false,
388
+ "latencyMs": 0.01,
389
+ "evidenceCount": 0,
390
+ "evidenceIds": [],
391
+ "recommendedActions": [],
392
+ "summary": "No memory baseline always allows proposed actions.",
393
+ "recallErrors": [],
394
+ "leakedSecrets": [],
395
+ "requiredEvidenceMatched": false
396
+ },
397
+ {
398
+ "system": "Recent Window",
399
+ "id": "GB-04",
400
+ "name": "Same tool with changed command",
401
+ "expectedDecision": "warn",
402
+ "decision": "warn",
403
+ "decisionCorrect": true,
404
+ "riskScore": 0.55,
405
+ "passed": false,
406
+ "latencyMs": 0.069,
407
+ "evidenceCount": 1,
408
+ "evidenceIds": [
409
+ "01KRHV0XHWDZY3FFYNYY79JHR0"
410
+ ],
411
+ "recommendedActions": [
412
+ "Check the recent failed event before repeating a similar action."
413
+ ],
414
+ "summary": "Recent-window baseline found a failed Bash event.",
415
+ "recallErrors": [],
416
+ "leakedSecrets": [],
417
+ "requiredEvidenceMatched": false
418
+ },
419
+ {
420
+ "system": "Vector Only",
421
+ "id": "GB-04",
422
+ "name": "Same tool with changed command",
423
+ "expectedDecision": "warn",
424
+ "decision": "warn",
425
+ "decisionCorrect": true,
426
+ "riskScore": 0.35,
427
+ "passed": false,
428
+ "latencyMs": 0.457,
429
+ "evidenceCount": 1,
430
+ "evidenceIds": [
431
+ "01KRHV0XJAZH9TEAX0HXHS8CYD"
432
+ ],
433
+ "recommendedActions": [
434
+ "Treat retrieved memory as advisory context."
435
+ ],
436
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
437
+ "recallErrors": [],
438
+ "leakedSecrets": [],
439
+ "requiredEvidenceMatched": false
440
+ },
441
+ {
442
+ "system": "FTS Only",
443
+ "id": "GB-04",
444
+ "name": "Same tool with changed command",
445
+ "expectedDecision": "warn",
446
+ "decision": "allow",
447
+ "decisionCorrect": false,
448
+ "riskScore": 0,
449
+ "passed": false,
450
+ "latencyMs": 0.471,
451
+ "evidenceCount": 0,
452
+ "evidenceIds": [],
453
+ "recommendedActions": [],
454
+ "summary": "No memory signal found by this baseline.",
455
+ "recallErrors": [],
456
+ "leakedSecrets": [],
457
+ "requiredEvidenceMatched": false
458
+ }
459
+ ]
460
+ },
461
+ {
462
+ "id": "GB-05",
463
+ "name": "Prior failure plus successful fix",
464
+ "expectedDecision": "allow",
465
+ "results": [
466
+ {
467
+ "system": "Audrey Guard",
468
+ "id": "GB-05",
469
+ "name": "Prior failure plus successful fix",
470
+ "expectedDecision": "allow",
471
+ "decision": "allow",
472
+ "decisionCorrect": true,
473
+ "riskScore": 0.2,
474
+ "passed": true,
475
+ "latencyMs": 3.393,
476
+ "evidenceCount": 2,
477
+ "evidenceIds": [
478
+ "01KRHV0XKCBJJXKWKSQW5NGZGJ",
479
+ "failure:Bash:2026-05-13T23:33:47.754Z"
480
+ ],
481
+ "recommendedActions": [
482
+ "This exact action has succeeded since its last failure; proceed with normal validation.",
483
+ "Before re-running Bash, check what changed since the last failure."
484
+ ],
485
+ "summary": "Allowed: this exact Bash action has succeeded since the prior failure. Caution: 1 memory signal, 1 medium severity found before acting.",
486
+ "recallErrors": [],
487
+ "leakedSecrets": [],
488
+ "requiredEvidenceMatched": true
489
+ },
490
+ {
491
+ "system": "No Memory",
492
+ "id": "GB-05",
493
+ "name": "Prior failure plus successful fix",
494
+ "expectedDecision": "allow",
495
+ "decision": "allow",
496
+ "decisionCorrect": true,
497
+ "riskScore": 0,
498
+ "passed": false,
499
+ "latencyMs": 0.008,
500
+ "evidenceCount": 0,
501
+ "evidenceIds": [],
502
+ "recommendedActions": [],
503
+ "summary": "No memory baseline always allows proposed actions.",
504
+ "recallErrors": [],
505
+ "leakedSecrets": [],
506
+ "requiredEvidenceMatched": false
507
+ },
508
+ {
509
+ "system": "Recent Window",
510
+ "id": "GB-05",
511
+ "name": "Prior failure plus successful fix",
512
+ "expectedDecision": "allow",
513
+ "decision": "warn",
514
+ "decisionCorrect": false,
515
+ "riskScore": 0.55,
516
+ "passed": false,
517
+ "latencyMs": 0.098,
518
+ "evidenceCount": 1,
519
+ "evidenceIds": [
520
+ "01KRHV0XMBZEH7TP2QT34DTB45"
521
+ ],
522
+ "recommendedActions": [
523
+ "Check the recent failed event before repeating a similar action."
524
+ ],
525
+ "summary": "Recent-window baseline found a failed Bash event.",
526
+ "recallErrors": [],
527
+ "leakedSecrets": [],
528
+ "requiredEvidenceMatched": false
529
+ },
530
+ {
531
+ "system": "Vector Only",
532
+ "id": "GB-05",
533
+ "name": "Prior failure plus successful fix",
534
+ "expectedDecision": "allow",
535
+ "decision": "warn",
536
+ "decisionCorrect": false,
537
+ "riskScore": 0.35,
538
+ "passed": false,
539
+ "latencyMs": 0.532,
540
+ "evidenceCount": 1,
541
+ "evidenceIds": [
542
+ "01KRHV0XMSE6G3K0N1AD76K374"
543
+ ],
544
+ "recommendedActions": [
545
+ "Treat retrieved memory as advisory context."
546
+ ],
547
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
548
+ "recallErrors": [],
549
+ "leakedSecrets": [],
550
+ "requiredEvidenceMatched": false
551
+ },
552
+ {
553
+ "system": "FTS Only",
554
+ "id": "GB-05",
555
+ "name": "Prior failure plus successful fix",
556
+ "expectedDecision": "allow",
557
+ "decision": "allow",
558
+ "decisionCorrect": true,
559
+ "riskScore": 0,
560
+ "passed": false,
561
+ "latencyMs": 0.514,
562
+ "evidenceCount": 0,
563
+ "evidenceIds": [],
564
+ "recommendedActions": [],
565
+ "summary": "No memory signal found by this baseline.",
566
+ "recallErrors": [],
567
+ "leakedSecrets": [],
568
+ "requiredEvidenceMatched": false
569
+ }
570
+ ]
571
+ },
572
+ {
573
+ "id": "GB-06",
574
+ "name": "Recall vector table missing",
575
+ "expectedDecision": "block",
576
+ "results": [
577
+ {
578
+ "system": "Audrey Guard",
579
+ "id": "GB-06",
580
+ "name": "Recall vector table missing",
581
+ "expectedDecision": "block",
582
+ "decision": "block",
583
+ "decisionCorrect": true,
584
+ "riskScore": 0.85,
585
+ "passed": true,
586
+ "latencyMs": 2.909,
587
+ "evidenceCount": 1,
588
+ "evidenceIds": [
589
+ "recall:episodic:recall.vector_counts"
590
+ ],
591
+ "recommendedActions": [
592
+ "Do not proceed until the high-severity memory warning is addressed.",
593
+ "Run npx audrey status and npx audrey reembed before depending on memory.",
594
+ "Run npx audrey status and repair the degraded recall path before relying on Guard."
595
+ ],
596
+ "summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
597
+ "recallErrors": [],
598
+ "leakedSecrets": [],
599
+ "requiredEvidenceMatched": true
600
+ },
601
+ {
602
+ "system": "No Memory",
603
+ "id": "GB-06",
604
+ "name": "Recall vector table missing",
605
+ "expectedDecision": "block",
606
+ "decision": "allow",
607
+ "decisionCorrect": false,
608
+ "riskScore": 0,
609
+ "passed": false,
610
+ "latencyMs": 0.008,
611
+ "evidenceCount": 0,
612
+ "evidenceIds": [],
613
+ "recommendedActions": [],
614
+ "summary": "No memory baseline always allows proposed actions.",
615
+ "recallErrors": [],
616
+ "leakedSecrets": [],
617
+ "requiredEvidenceMatched": false
618
+ },
619
+ {
620
+ "system": "Recent Window",
621
+ "id": "GB-06",
622
+ "name": "Recall vector table missing",
623
+ "expectedDecision": "block",
624
+ "decision": "block",
625
+ "decisionCorrect": true,
626
+ "riskScore": 0.85,
627
+ "passed": false,
628
+ "latencyMs": 0.195,
629
+ "evidenceCount": 1,
630
+ "evidenceIds": [
631
+ "01KRHV0XPQPW9GYR5M3TKX3BMS"
632
+ ],
633
+ "recommendedActions": [
634
+ "Review retrieved memory before acting."
635
+ ],
636
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
637
+ "recallErrors": [],
638
+ "leakedSecrets": [],
639
+ "requiredEvidenceMatched": false
640
+ },
641
+ {
642
+ "system": "Vector Only",
643
+ "id": "GB-06",
644
+ "name": "Recall vector table missing",
645
+ "expectedDecision": "block",
646
+ "decision": "warn",
647
+ "decisionCorrect": false,
648
+ "riskScore": 0.55,
649
+ "passed": false,
650
+ "latencyMs": 0.347,
651
+ "evidenceCount": 0,
652
+ "evidenceIds": [],
653
+ "recommendedActions": [
654
+ "Inspect degraded recall before relying on baseline output."
655
+ ],
656
+ "summary": "Recall returned partial-failure metadata but this baseline has no fail-closed guard.",
657
+ "recallErrors": [
658
+ {
659
+ "type": "episodic",
660
+ "stage": "recall.vector_counts",
661
+ "message": "Missing vector table vec_episodes"
662
+ }
663
+ ],
664
+ "leakedSecrets": [],
665
+ "requiredEvidenceMatched": false
666
+ },
667
+ {
668
+ "system": "FTS Only",
669
+ "id": "GB-06",
670
+ "name": "Recall vector table missing",
671
+ "expectedDecision": "block",
672
+ "decision": "allow",
673
+ "decisionCorrect": false,
674
+ "riskScore": 0,
675
+ "passed": false,
676
+ "latencyMs": 0.427,
677
+ "evidenceCount": 0,
678
+ "evidenceIds": [],
679
+ "recommendedActions": [],
680
+ "summary": "No memory signal found by this baseline.",
681
+ "recallErrors": [],
682
+ "leakedSecrets": [],
683
+ "requiredEvidenceMatched": false
684
+ }
685
+ ]
686
+ },
687
+ {
688
+ "id": "GB-07",
689
+ "name": "FTS failure under hybrid recall",
690
+ "expectedDecision": "block",
691
+ "results": [
692
+ {
693
+ "system": "Audrey Guard",
694
+ "id": "GB-07",
695
+ "name": "FTS failure under hybrid recall",
696
+ "expectedDecision": "block",
697
+ "decision": "block",
698
+ "decisionCorrect": true,
699
+ "riskScore": 0.85,
700
+ "passed": true,
701
+ "latencyMs": 2.024,
702
+ "evidenceCount": 2,
703
+ "evidenceIds": [
704
+ "recall:fts:recall.fts_lookup",
705
+ "01KRHV0XR0C1AF66E8C87BRDRK"
706
+ ],
707
+ "recommendedActions": [
708
+ "Do not proceed until the high-severity memory warning is addressed.",
709
+ "Run npx audrey status and repair the degraded recall path before relying on Guard.",
710
+ "Apply this must-follow rule before acting."
711
+ ],
712
+ "summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
713
+ "recallErrors": [],
714
+ "leakedSecrets": [],
715
+ "requiredEvidenceMatched": true
716
+ },
717
+ {
718
+ "system": "No Memory",
719
+ "id": "GB-07",
720
+ "name": "FTS failure under hybrid recall",
721
+ "expectedDecision": "block",
722
+ "decision": "allow",
723
+ "decisionCorrect": false,
724
+ "riskScore": 0,
725
+ "passed": false,
726
+ "latencyMs": 0.007,
727
+ "evidenceCount": 0,
728
+ "evidenceIds": [],
729
+ "recommendedActions": [],
730
+ "summary": "No memory baseline always allows proposed actions.",
731
+ "recallErrors": [],
732
+ "leakedSecrets": [],
733
+ "requiredEvidenceMatched": false
734
+ },
735
+ {
736
+ "system": "Recent Window",
737
+ "id": "GB-07",
738
+ "name": "FTS failure under hybrid recall",
739
+ "expectedDecision": "block",
740
+ "decision": "warn",
741
+ "decisionCorrect": false,
742
+ "riskScore": 0.35,
743
+ "passed": false,
744
+ "latencyMs": 0.126,
745
+ "evidenceCount": 1,
746
+ "evidenceIds": [
747
+ "01KRHV0XRX3N2Q68CQSDFY9BA7"
748
+ ],
749
+ "recommendedActions": [
750
+ "Treat retrieved memory as advisory context."
751
+ ],
752
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
753
+ "recallErrors": [],
754
+ "leakedSecrets": [],
755
+ "requiredEvidenceMatched": false
756
+ },
757
+ {
758
+ "system": "Vector Only",
759
+ "id": "GB-07",
760
+ "name": "FTS failure under hybrid recall",
761
+ "expectedDecision": "block",
762
+ "decision": "warn",
763
+ "decisionCorrect": false,
764
+ "riskScore": 0.35,
765
+ "passed": false,
766
+ "latencyMs": 0.467,
767
+ "evidenceCount": 1,
768
+ "evidenceIds": [
769
+ "01KRHV0XSBVXJJ3ZT6GYZQZ2BY"
770
+ ],
771
+ "recommendedActions": [
772
+ "Treat retrieved memory as advisory context."
773
+ ],
774
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
775
+ "recallErrors": [],
776
+ "leakedSecrets": [],
777
+ "requiredEvidenceMatched": false
778
+ },
779
+ {
780
+ "system": "FTS Only",
781
+ "id": "GB-07",
782
+ "name": "FTS failure under hybrid recall",
783
+ "expectedDecision": "block",
784
+ "decision": "warn",
785
+ "decisionCorrect": false,
786
+ "riskScore": 0.55,
787
+ "passed": false,
788
+ "latencyMs": 0.256,
789
+ "evidenceCount": 0,
790
+ "evidenceIds": [],
791
+ "recommendedActions": [
792
+ "Inspect degraded recall before relying on baseline output."
793
+ ],
794
+ "summary": "Recall returned partial-failure metadata but this baseline has no fail-closed guard.",
795
+ "recallErrors": [
796
+ {
797
+ "type": "fts",
798
+ "stage": "recall.fts_lookup",
799
+ "message": "no such table: fts_episodes"
800
+ }
801
+ ],
802
+ "leakedSecrets": [],
803
+ "requiredEvidenceMatched": true
804
+ }
805
+ ]
806
+ },
807
+ {
808
+ "id": "GB-08",
809
+ "name": "Long secret near truncation boundary",
810
+ "expectedDecision": "block",
811
+ "results": [
812
+ {
813
+ "system": "Audrey Guard",
814
+ "id": "GB-08",
815
+ "name": "Long secret near truncation boundary",
816
+ "expectedDecision": "block",
817
+ "decision": "block",
818
+ "decisionCorrect": true,
819
+ "riskScore": 0.9,
820
+ "passed": true,
821
+ "latencyMs": 2.879,
822
+ "evidenceCount": 2,
823
+ "evidenceIds": [
824
+ "01KRHV0XT8ZD1431EA3P176C84",
825
+ "failure:Bash:2026-05-13T23:33:47.976Z"
826
+ ],
827
+ "recommendedActions": [
828
+ "Do not repeat the exact failed action until the prior error is understood or the command is changed.",
829
+ "Before re-running Bash, check what changed since the last failure."
830
+ ],
831
+ "summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
832
+ "recallErrors": [],
833
+ "leakedSecrets": [],
834
+ "requiredEvidenceMatched": true
835
+ },
836
+ {
837
+ "system": "No Memory",
838
+ "id": "GB-08",
839
+ "name": "Long secret near truncation boundary",
840
+ "expectedDecision": "block",
841
+ "decision": "allow",
842
+ "decisionCorrect": false,
843
+ "riskScore": 0,
844
+ "passed": false,
845
+ "latencyMs": 0.007,
846
+ "evidenceCount": 0,
847
+ "evidenceIds": [],
848
+ "recommendedActions": [],
849
+ "summary": "No memory baseline always allows proposed actions.",
850
+ "recallErrors": [],
851
+ "leakedSecrets": [],
852
+ "requiredEvidenceMatched": false
853
+ },
854
+ {
855
+ "system": "Recent Window",
856
+ "id": "GB-08",
857
+ "name": "Long secret near truncation boundary",
858
+ "expectedDecision": "block",
859
+ "decision": "warn",
860
+ "decisionCorrect": false,
861
+ "riskScore": 0.55,
862
+ "passed": false,
863
+ "latencyMs": 0.072,
864
+ "evidenceCount": 1,
865
+ "evidenceIds": [
866
+ "01KRHV0XV6HNP678SEPD3N30BB"
867
+ ],
868
+ "recommendedActions": [
869
+ "Check the recent failed event before repeating a similar action."
870
+ ],
871
+ "summary": "Recent-window baseline found a failed Bash event.",
872
+ "recallErrors": [],
873
+ "leakedSecrets": [],
874
+ "requiredEvidenceMatched": false
875
+ },
876
+ {
877
+ "system": "Vector Only",
878
+ "id": "GB-08",
879
+ "name": "Long secret near truncation boundary",
880
+ "expectedDecision": "block",
881
+ "decision": "warn",
882
+ "decisionCorrect": false,
883
+ "riskScore": 0.35,
884
+ "passed": false,
885
+ "latencyMs": 0.451,
886
+ "evidenceCount": 1,
887
+ "evidenceIds": [
888
+ "01KRHV0XVM0E0SG23G9MZPGSED"
889
+ ],
890
+ "recommendedActions": [
891
+ "Treat retrieved memory as advisory context."
892
+ ],
893
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
894
+ "recallErrors": [],
895
+ "leakedSecrets": [],
896
+ "requiredEvidenceMatched": false
897
+ },
898
+ {
899
+ "system": "FTS Only",
900
+ "id": "GB-08",
901
+ "name": "Long secret near truncation boundary",
902
+ "expectedDecision": "block",
903
+ "decision": "allow",
904
+ "decisionCorrect": false,
905
+ "riskScore": 0,
906
+ "passed": false,
907
+ "latencyMs": 0.431,
908
+ "evidenceCount": 0,
909
+ "evidenceIds": [],
910
+ "recommendedActions": [],
911
+ "summary": "No memory signal found by this baseline.",
912
+ "recallErrors": [],
913
+ "leakedSecrets": [],
914
+ "requiredEvidenceMatched": false
915
+ }
916
+ ]
917
+ },
918
+ {
919
+ "id": "GB-09",
920
+ "name": "Conflicting project instructions",
921
+ "expectedDecision": "block",
922
+ "results": [
923
+ {
924
+ "system": "Audrey Guard",
925
+ "id": "GB-09",
926
+ "name": "Conflicting project instructions",
927
+ "expectedDecision": "block",
928
+ "decision": "block",
929
+ "decisionCorrect": true,
930
+ "riskScore": 0.85,
931
+ "passed": true,
932
+ "latencyMs": 2.629,
933
+ "evidenceCount": 2,
934
+ "evidenceIds": [
935
+ "01KRHV0XWHVTFB5Q7NVEJ6PYSA",
936
+ "01KRHV0XWFX8H82KAABRSMBEZY"
937
+ ],
938
+ "recommendedActions": [
939
+ "Do not proceed until the high-severity memory warning is addressed.",
940
+ "Apply this must-follow rule before acting."
941
+ ],
942
+ "summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
943
+ "recallErrors": [],
944
+ "leakedSecrets": [],
945
+ "requiredEvidenceMatched": true
946
+ },
947
+ {
948
+ "system": "No Memory",
949
+ "id": "GB-09",
950
+ "name": "Conflicting project instructions",
951
+ "expectedDecision": "block",
952
+ "decision": "allow",
953
+ "decisionCorrect": false,
954
+ "riskScore": 0,
955
+ "passed": false,
956
+ "latencyMs": 0.007,
957
+ "evidenceCount": 0,
958
+ "evidenceIds": [],
959
+ "recommendedActions": [],
960
+ "summary": "No memory baseline always allows proposed actions.",
961
+ "recallErrors": [],
962
+ "leakedSecrets": [],
963
+ "requiredEvidenceMatched": false
964
+ },
965
+ {
966
+ "system": "Recent Window",
967
+ "id": "GB-09",
968
+ "name": "Conflicting project instructions",
969
+ "expectedDecision": "block",
970
+ "decision": "block",
971
+ "decisionCorrect": true,
972
+ "riskScore": 0.85,
973
+ "passed": false,
974
+ "latencyMs": 0.125,
975
+ "evidenceCount": 2,
976
+ "evidenceIds": [
977
+ "01KRHV0XXJ3Z1K7DMJEWP3HWY7",
978
+ "01KRHV0XXGRTMH80WYV6D42VHX"
979
+ ],
980
+ "recommendedActions": [
981
+ "Review retrieved memory before acting."
982
+ ],
983
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
984
+ "recallErrors": [],
985
+ "leakedSecrets": [],
986
+ "requiredEvidenceMatched": false
987
+ },
988
+ {
989
+ "system": "Vector Only",
990
+ "id": "GB-09",
991
+ "name": "Conflicting project instructions",
992
+ "expectedDecision": "block",
993
+ "decision": "block",
994
+ "decisionCorrect": true,
995
+ "riskScore": 0.85,
996
+ "passed": false,
997
+ "latencyMs": 0.442,
998
+ "evidenceCount": 2,
999
+ "evidenceIds": [
1000
+ "01KRHV0XXXCP5GB55SPAMC2B09",
1001
+ "01KRHV0XXZSQCH0W7ZSFY54BDA"
1002
+ ],
1003
+ "recommendedActions": [
1004
+ "Review retrieved memory before acting."
1005
+ ],
1006
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
1007
+ "recallErrors": [],
1008
+ "leakedSecrets": [],
1009
+ "requiredEvidenceMatched": false
1010
+ },
1011
+ {
1012
+ "system": "FTS Only",
1013
+ "id": "GB-09",
1014
+ "name": "Conflicting project instructions",
1015
+ "expectedDecision": "block",
1016
+ "decision": "allow",
1017
+ "decisionCorrect": false,
1018
+ "riskScore": 0,
1019
+ "passed": false,
1020
+ "latencyMs": 0.458,
1021
+ "evidenceCount": 0,
1022
+ "evidenceIds": [],
1023
+ "recommendedActions": [],
1024
+ "summary": "No memory signal found by this baseline.",
1025
+ "recallErrors": [],
1026
+ "leakedSecrets": [],
1027
+ "requiredEvidenceMatched": false
1028
+ }
1029
+ ]
1030
+ },
1031
+ {
1032
+ "id": "GB-10",
1033
+ "name": "High-volume irrelevant memory noise",
1034
+ "expectedDecision": "block",
1035
+ "results": [
1036
+ {
1037
+ "system": "Audrey Guard",
1038
+ "id": "GB-10",
1039
+ "name": "High-volume irrelevant memory noise",
1040
+ "expectedDecision": "block",
1041
+ "decision": "block",
1042
+ "decisionCorrect": true,
1043
+ "riskScore": 0.85,
1044
+ "passed": true,
1045
+ "latencyMs": 29.711,
1046
+ "evidenceCount": 13,
1047
+ "evidenceIds": [
1048
+ "01KRHV0YFDERDNPYS8JG019DXV",
1049
+ "01KRHV0YF5AACS4RJJAPW2YNPZ",
1050
+ "01KRHV0YDFFQ5SVS6GSH4PKNJ6",
1051
+ "01KRHV0YAKHEN7XS8TJZQJG3JW",
1052
+ "01KRHV0Y8PGE2S0B37B3ECNPES",
1053
+ "01KRHV0Y8D6C8CHE2MQ6C0RB2D",
1054
+ "01KRHV0Y7EN8M2KZYSNV2S659G",
1055
+ "01KRHV0Y6ADQ4VY5MKWHMTBE2V",
1056
+ "01KRHV0Y61MS7FKZHH2H0R5CS3",
1057
+ "01KRHV0Y3E8DKZNPNHFBKNEQXQ",
1058
+ "01KRHV0Y2QSJ9BMQGST1D2CBG2",
1059
+ "01KRHV0Y1A0S18SHKGCN00K1J6",
1060
+ "01KRHV0Y0MWQGN3QAWME8F5554"
1061
+ ],
1062
+ "recommendedActions": [
1063
+ "Do not proceed until the high-severity memory warning is addressed.",
1064
+ "Apply this must-follow rule before acting.",
1065
+ "Treat this as uncertain context and verify before relying on it."
1066
+ ],
1067
+ "summary": "Blocked: 13 memory signals, 1 high severity, 12 medium severity found before acting.",
1068
+ "recallErrors": [],
1069
+ "leakedSecrets": [],
1070
+ "requiredEvidenceMatched": true
1071
+ },
1072
+ {
1073
+ "system": "No Memory",
1074
+ "id": "GB-10",
1075
+ "name": "High-volume irrelevant memory noise",
1076
+ "expectedDecision": "block",
1077
+ "decision": "allow",
1078
+ "decisionCorrect": false,
1079
+ "riskScore": 0,
1080
+ "passed": false,
1081
+ "latencyMs": 0.014,
1082
+ "evidenceCount": 0,
1083
+ "evidenceIds": [],
1084
+ "recommendedActions": [],
1085
+ "summary": "No memory baseline always allows proposed actions.",
1086
+ "recallErrors": [],
1087
+ "leakedSecrets": [],
1088
+ "requiredEvidenceMatched": false
1089
+ },
1090
+ {
1091
+ "system": "Recent Window",
1092
+ "id": "GB-10",
1093
+ "name": "High-volume irrelevant memory noise",
1094
+ "expectedDecision": "block",
1095
+ "decision": "block",
1096
+ "decisionCorrect": true,
1097
+ "riskScore": 0.85,
1098
+ "passed": false,
1099
+ "latencyMs": 0.475,
1100
+ "evidenceCount": 1,
1101
+ "evidenceIds": [
1102
+ "01KRHV0ZNZBPXBC3HBT7FDYKQN"
1103
+ ],
1104
+ "recommendedActions": [
1105
+ "Review retrieved memory before acting."
1106
+ ],
1107
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
1108
+ "recallErrors": [],
1109
+ "leakedSecrets": [],
1110
+ "requiredEvidenceMatched": false
1111
+ },
1112
+ {
1113
+ "system": "Vector Only",
1114
+ "id": "GB-10",
1115
+ "name": "High-volume irrelevant memory noise",
1116
+ "expectedDecision": "block",
1117
+ "decision": "warn",
1118
+ "decisionCorrect": false,
1119
+ "riskScore": 0.35,
1120
+ "passed": false,
1121
+ "latencyMs": 1.362,
1122
+ "evidenceCount": 5,
1123
+ "evidenceIds": [
1124
+ "01KRHV0ZXZBPYCREN1DV87M33S",
1125
+ "01KRHV100XKC09N7VQRRX2WK2A",
1126
+ "01KRHV103RQB958BA9WSHGJNJF",
1127
+ "01KRHV0ZVSQKJVY4TJ1RP15CJX",
1128
+ "01KRHV1015PBEKW0RNV9CY3AHJ"
1129
+ ],
1130
+ "recommendedActions": [
1131
+ "Treat retrieved memory as advisory context."
1132
+ ],
1133
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
1134
+ "recallErrors": [],
1135
+ "leakedSecrets": [],
1136
+ "requiredEvidenceMatched": false
1137
+ },
1138
+ {
1139
+ "system": "FTS Only",
1140
+ "id": "GB-10",
1141
+ "name": "High-volume irrelevant memory noise",
1142
+ "expectedDecision": "block",
1143
+ "decision": "allow",
1144
+ "decisionCorrect": false,
1145
+ "riskScore": 0,
1146
+ "passed": false,
1147
+ "latencyMs": 0.778,
1148
+ "evidenceCount": 0,
1149
+ "evidenceIds": [],
1150
+ "recommendedActions": [],
1151
+ "summary": "No memory signal found by this baseline.",
1152
+ "recallErrors": [],
1153
+ "leakedSecrets": [],
1154
+ "requiredEvidenceMatched": false
1155
+ }
1156
+ ]
1157
+ }
1158
+ ],
1159
+ "artifactRedactionSweep": {
1160
+ "checkedAt": "2026-05-13T23:33:51.244Z",
1161
+ "filesChecked": [
1162
+ "benchmarks/output/guardbench-manifest.json",
1163
+ "benchmarks/output/guardbench-raw.json",
1164
+ "benchmarks/output/guardbench-summary.json"
1165
+ ],
1166
+ "seededSecretsChecked": 1,
1167
+ "leakCount": 0,
1168
+ "leaks": [],
1169
+ "passed": true
1170
+ }
1171
+ }