audrey 0.23.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/CHANGELOG.md +101 -15
  2. package/LICENSE +21 -21
  3. package/README.md +232 -6
  4. package/SECURITY.md +2 -1
  5. package/benchmarks/adapter-kit.mjs +20 -0
  6. package/benchmarks/adapter-self-test.mjs +166 -0
  7. package/benchmarks/adapters/example-allow.mjs +28 -0
  8. package/benchmarks/adapters/mem0-platform.mjs +267 -0
  9. package/benchmarks/adapters/registry.json +51 -0
  10. package/benchmarks/adapters/zep-cloud.mjs +280 -0
  11. package/benchmarks/baselines.js +169 -0
  12. package/benchmarks/build-leaderboard.mjs +170 -0
  13. package/benchmarks/cases.js +537 -0
  14. package/benchmarks/create-conformance-card.mjs +139 -0
  15. package/benchmarks/create-submission-bundle.mjs +176 -0
  16. package/benchmarks/dry-run-external-adapters.mjs +165 -0
  17. package/benchmarks/guardbench.js +1125 -0
  18. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  19. package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  20. package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  21. package/benchmarks/output/guardbench-conformance-card.json +63 -0
  22. package/benchmarks/output/guardbench-manifest.json +414 -0
  23. package/benchmarks/output/guardbench-raw.json +1271 -0
  24. package/benchmarks/output/guardbench-summary.json +2107 -0
  25. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  26. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  27. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
  28. package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
  29. package/benchmarks/output/submission-bundle/guardbench-raw.json +1271 -0
  30. package/benchmarks/output/submission-bundle/guardbench-summary.json +2107 -0
  31. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
  32. package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
  33. package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
  34. package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
  35. package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
  36. package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
  37. package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
  38. package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
  39. package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
  40. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +184 -0
  41. package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
  42. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +249 -0
  43. package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  44. package/benchmarks/output/submission-bundle/validation-report.json +31 -0
  45. package/benchmarks/output/summary.json +2354 -0
  46. package/benchmarks/perf-snapshot.js +304 -0
  47. package/benchmarks/perf.bench.js +161 -0
  48. package/benchmarks/public-paths.mjs +78 -0
  49. package/benchmarks/reference-results.js +70 -0
  50. package/benchmarks/report.js +259 -0
  51. package/benchmarks/run-external-guardbench.mjs +281 -0
  52. package/benchmarks/run.js +682 -0
  53. package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  54. package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  55. package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  56. package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  57. package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  58. package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  59. package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  60. package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  61. package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  62. package/benchmarks/schemas/guardbench-raw.schema.json +184 -0
  63. package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  64. package/benchmarks/schemas/guardbench-summary.schema.json +249 -0
  65. package/benchmarks/snapshots/perf-0.22.2.json +123 -0
  66. package/benchmarks/snapshots/perf-0.23.0.json +123 -0
  67. package/benchmarks/validate-adapter-module.mjs +104 -0
  68. package/benchmarks/validate-adapter-registry.mjs +134 -0
  69. package/benchmarks/validate-adapter-self-test.mjs +96 -0
  70. package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
  71. package/benchmarks/verify-external-evidence.mjs +296 -0
  72. package/benchmarks/verify-publication-artifacts.mjs +286 -0
  73. package/benchmarks/verify-submission-bundle.mjs +167 -0
  74. package/dist/mcp-server/config.d.ts +1 -1
  75. package/dist/mcp-server/config.d.ts.map +1 -1
  76. package/dist/mcp-server/config.js +1 -1
  77. package/dist/mcp-server/config.js.map +1 -1
  78. package/dist/mcp-server/index.d.ts +65 -3
  79. package/dist/mcp-server/index.d.ts.map +1 -1
  80. package/dist/mcp-server/index.js +675 -157
  81. package/dist/mcp-server/index.js.map +1 -1
  82. package/dist/src/action-key.d.ts +9 -0
  83. package/dist/src/action-key.d.ts.map +1 -0
  84. package/dist/src/action-key.js +49 -0
  85. package/dist/src/action-key.js.map +1 -0
  86. package/dist/src/adaptive.js +5 -5
  87. package/dist/src/affect.js +8 -8
  88. package/dist/src/audrey.d.ts +13 -0
  89. package/dist/src/audrey.d.ts.map +1 -1
  90. package/dist/src/audrey.js +68 -3
  91. package/dist/src/audrey.js.map +1 -1
  92. package/dist/src/capsule.js +4 -4
  93. package/dist/src/causal.js +3 -3
  94. package/dist/src/consolidate.js +48 -48
  95. package/dist/src/controller.d.ts +78 -6
  96. package/dist/src/controller.d.ts.map +1 -1
  97. package/dist/src/controller.js +273 -53
  98. package/dist/src/controller.js.map +1 -1
  99. package/dist/src/db.js +172 -172
  100. package/dist/src/decay.js +8 -8
  101. package/dist/src/embedding.d.ts +2 -1
  102. package/dist/src/embedding.d.ts.map +1 -1
  103. package/dist/src/embedding.js +39 -29
  104. package/dist/src/embedding.js.map +1 -1
  105. package/dist/src/encode.js +6 -6
  106. package/dist/src/feedback.d.ts +6 -0
  107. package/dist/src/feedback.d.ts.map +1 -1
  108. package/dist/src/feedback.js +6 -0
  109. package/dist/src/feedback.js.map +1 -1
  110. package/dist/src/forget.js +12 -12
  111. package/dist/src/hybrid-recall.js +9 -9
  112. package/dist/src/impact.js +6 -6
  113. package/dist/src/import.d.ts +3 -3
  114. package/dist/src/import.js +41 -41
  115. package/dist/src/index.d.ts +5 -4
  116. package/dist/src/index.d.ts.map +1 -1
  117. package/dist/src/index.js +3 -3
  118. package/dist/src/index.js.map +1 -1
  119. package/dist/src/interference.js +14 -14
  120. package/dist/src/introspect.js +18 -18
  121. package/dist/src/preflight.d.ts.map +1 -1
  122. package/dist/src/preflight.js +41 -0
  123. package/dist/src/preflight.js.map +1 -1
  124. package/dist/src/promote.js +7 -7
  125. package/dist/src/prompts.js +118 -118
  126. package/dist/src/recall.js +30 -30
  127. package/dist/src/reflexes.d.ts +1 -0
  128. package/dist/src/reflexes.d.ts.map +1 -1
  129. package/dist/src/reflexes.js +3 -0
  130. package/dist/src/reflexes.js.map +1 -1
  131. package/dist/src/rollback.js +4 -4
  132. package/dist/src/routes.d.ts.map +1 -1
  133. package/dist/src/routes.js +71 -2
  134. package/dist/src/routes.js.map +1 -1
  135. package/dist/src/validate.js +25 -25
  136. package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  137. package/docs/MEMORY_BENCHMARKING.md +59 -0
  138. package/docs/PRODUCTION_BACKLOG.md +304 -0
  139. package/docs/paper/00-master.md +48 -0
  140. package/docs/paper/01-introduction.md +27 -0
  141. package/docs/paper/02-related-work.md +47 -0
  142. package/docs/paper/03-problem-definition.md +108 -0
  143. package/docs/paper/04-design.md +164 -0
  144. package/docs/paper/05-guardbench-spec.md +412 -0
  145. package/docs/paper/06-implementation.md +113 -0
  146. package/docs/paper/07-evaluation.md +168 -0
  147. package/docs/paper/08-discussion-limitations.md +61 -0
  148. package/docs/paper/09-conclusion.md +11 -0
  149. package/docs/paper/SUBMISSION_README.md +162 -0
  150. package/docs/paper/appendix-a-demo-transcript.md +114 -0
  151. package/docs/paper/arxiv-compile-report.schema.json +116 -0
  152. package/docs/paper/arxiv-source.schema.json +61 -0
  153. package/docs/paper/audrey-paper-v1.md +1106 -0
  154. package/docs/paper/browser-launch-plan.json +209 -0
  155. package/docs/paper/browser-launch-plan.schema.json +100 -0
  156. package/docs/paper/browser-launch-results.json +86 -0
  157. package/docs/paper/browser-launch-results.schema.json +66 -0
  158. package/docs/paper/claim-register.json +138 -0
  159. package/docs/paper/claim-register.schema.json +81 -0
  160. package/docs/paper/evidence-ledger.md +103 -0
  161. package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  162. package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  163. package/docs/paper/output/arxiv/main.tex +949 -0
  164. package/docs/paper/output/arxiv/references.bib +222 -0
  165. package/docs/paper/output/arxiv-compile-report.json +24 -0
  166. package/docs/paper/output/submission-bundle/LICENSE +21 -0
  167. package/docs/paper/output/submission-bundle/README.md +555 -0
  168. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
  169. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
  170. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
  171. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
  172. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
  173. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1271 -0
  174. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +2107 -0
  175. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
  176. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
  177. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
  178. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
  179. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
  180. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
  181. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
  182. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
  183. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
  184. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
  185. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
  186. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
  187. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
  188. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
  189. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +184 -0
  190. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
  191. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +249 -0
  192. package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
  193. package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
  194. package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
  195. package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
  196. package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
  197. package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
  198. package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
  199. package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
  200. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
  201. package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
  202. package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
  203. package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
  204. package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
  205. package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
  206. package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
  207. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
  208. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
  209. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
  210. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
  211. package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
  212. package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
  213. package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
  214. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
  215. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
  216. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
  217. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
  218. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
  219. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
  220. package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
  221. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
  222. package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
  223. package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
  224. package/docs/paper/output/submission-bundle/package.json +212 -0
  225. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
  226. package/docs/paper/paper-submission-bundle.schema.json +70 -0
  227. package/docs/paper/publication-pack.json +81 -0
  228. package/docs/paper/publication-pack.schema.json +60 -0
  229. package/docs/paper/references.bib +222 -0
  230. package/package.json +87 -4
  231. package/scripts/audit-release-completion.mjs +362 -0
  232. package/scripts/create-arxiv-source.mjs +362 -0
  233. package/scripts/create-paper-submission-bundle.mjs +210 -0
  234. package/scripts/finalize-release.mjs +526 -0
  235. package/scripts/prepare-release-cut.mjs +269 -0
  236. package/scripts/publish-release-bundle.mjs +209 -0
  237. package/scripts/publish-release-github-api.mjs +429 -0
  238. package/scripts/run-vitest.mjs +34 -0
  239. package/scripts/smoke-cli.js +92 -0
  240. package/scripts/sync-paper-artifacts.mjs +109 -0
  241. package/scripts/verify-arxiv-compile.mjs +440 -0
  242. package/scripts/verify-arxiv-source.mjs +194 -0
  243. package/scripts/verify-browser-launch-plan.mjs +237 -0
  244. package/scripts/verify-browser-launch-results.mjs +285 -0
  245. package/scripts/verify-paper-artifacts.mjs +338 -0
  246. package/scripts/verify-paper-claims.mjs +226 -0
  247. package/scripts/verify-paper-submission-bundle.mjs +207 -0
  248. package/scripts/verify-publication-pack.mjs +196 -0
  249. package/scripts/verify-python-package.py +201 -0
  250. package/scripts/verify-release-readiness.mjs +785 -0
@@ -0,0 +1,1271 @@
1
+ {
2
+ "suite": "GuardBench comparative",
3
+ "generatedAt": "2026-05-15T17:52:12.761Z",
4
+ "manifestVersion": "0.2.0",
5
+ "provenance": {
6
+ "generatedAt": "2026-05-15T17:52:12.761Z",
7
+ "gitSha": "82b0e9979680acf751b9e80f6f90f8c6ac74befb",
8
+ "gitDirty": false,
9
+ "node": "v24.15.0",
10
+ "v8": "13.6.233.17-node.48",
11
+ "platform": "linux",
12
+ "arch": "x64",
13
+ "osRelease": "6.17.0-1013-azure",
14
+ "cpuModel": "AMD EPYC 9V74 80-Core Processor",
15
+ "cpuCount": 4,
16
+ "totalMemoryGb": 15.61,
17
+ "embeddingProvider": "mock",
18
+ "embeddingDimensions": 64,
19
+ "llmProvider": "mock"
20
+ },
21
+ "cases": [
22
+ {
23
+ "id": "GB-01",
24
+ "name": "Repeated failed shell command",
25
+ "expectedDecision": "block",
26
+ "results": [
27
+ {
28
+ "system": "Audrey Guard",
29
+ "id": "GB-01",
30
+ "name": "Repeated failed shell command",
31
+ "expectedDecision": "block",
32
+ "decision": "block",
33
+ "decisionCorrect": true,
34
+ "riskScore": 0.9,
35
+ "passed": true,
36
+ "latencyMs": 6.135,
37
+ "evidenceCount": 2,
38
+ "evidenceIds": [
39
+ "01KRPC8HJAWZEY0085CJJHK54G",
40
+ "failure:Bash:2026-05-15T17:52:01.099Z"
41
+ ],
42
+ "recommendedActions": [
43
+ "Do not repeat the exact failed action until the prior error is understood or the command is changed.",
44
+ "Before re-running Bash, check what changed since the last failure."
45
+ ],
46
+ "summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
47
+ "recallErrors": [],
48
+ "leakedSecrets": [],
49
+ "hasEvidenceForDecision": true,
50
+ "lineageTextMatched": true,
51
+ "requiredEvidenceMatched": true
52
+ },
53
+ {
54
+ "system": "No Memory",
55
+ "id": "GB-01",
56
+ "name": "Repeated failed shell command",
57
+ "expectedDecision": "block",
58
+ "decision": "allow",
59
+ "decisionCorrect": false,
60
+ "riskScore": 0,
61
+ "passed": false,
62
+ "latencyMs": 0.028,
63
+ "evidenceCount": 0,
64
+ "evidenceIds": [],
65
+ "recommendedActions": [],
66
+ "summary": "No memory baseline always allows proposed actions.",
67
+ "recallErrors": [],
68
+ "leakedSecrets": [],
69
+ "hasEvidenceForDecision": false,
70
+ "lineageTextMatched": false,
71
+ "requiredEvidenceMatched": false
72
+ },
73
+ {
74
+ "system": "Recent Window",
75
+ "id": "GB-01",
76
+ "name": "Repeated failed shell command",
77
+ "expectedDecision": "block",
78
+ "decision": "warn",
79
+ "decisionCorrect": false,
80
+ "riskScore": 0.55,
81
+ "passed": false,
82
+ "latencyMs": 0.185,
83
+ "evidenceCount": 1,
84
+ "evidenceIds": [
85
+ "01KRPC8HKK5GH11P78E2KB66KW"
86
+ ],
87
+ "recommendedActions": [
88
+ "Check the recent failed event before repeating a similar action."
89
+ ],
90
+ "summary": "Recent-window baseline found a failed Bash event.",
91
+ "recallErrors": [],
92
+ "leakedSecrets": [],
93
+ "hasEvidenceForDecision": true,
94
+ "lineageTextMatched": false,
95
+ "requiredEvidenceMatched": true
96
+ },
97
+ {
98
+ "system": "Vector Only",
99
+ "id": "GB-01",
100
+ "name": "Repeated failed shell command",
101
+ "expectedDecision": "block",
102
+ "decision": "warn",
103
+ "decisionCorrect": false,
104
+ "riskScore": 0.35,
105
+ "passed": false,
106
+ "latencyMs": 0.742,
107
+ "evidenceCount": 1,
108
+ "evidenceIds": [
109
+ "01KRPC8HM92Q8XXJDSGGCKV8XP"
110
+ ],
111
+ "recommendedActions": [
112
+ "Treat retrieved memory as advisory context."
113
+ ],
114
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
115
+ "recallErrors": [],
116
+ "leakedSecrets": [],
117
+ "hasEvidenceForDecision": true,
118
+ "lineageTextMatched": false,
119
+ "requiredEvidenceMatched": true
120
+ },
121
+ {
122
+ "system": "FTS Only",
123
+ "id": "GB-01",
124
+ "name": "Repeated failed shell command",
125
+ "expectedDecision": "block",
126
+ "decision": "allow",
127
+ "decisionCorrect": false,
128
+ "riskScore": 0,
129
+ "passed": false,
130
+ "latencyMs": 0.418,
131
+ "evidenceCount": 0,
132
+ "evidenceIds": [],
133
+ "recommendedActions": [],
134
+ "summary": "No memory signal found by this baseline.",
135
+ "recallErrors": [],
136
+ "leakedSecrets": [],
137
+ "hasEvidenceForDecision": false,
138
+ "lineageTextMatched": false,
139
+ "requiredEvidenceMatched": false
140
+ }
141
+ ]
142
+ },
143
+ {
144
+ "id": "GB-02",
145
+ "name": "Required preflight procedure missing",
146
+ "expectedDecision": "block",
147
+ "results": [
148
+ {
149
+ "system": "Audrey Guard",
150
+ "id": "GB-02",
151
+ "name": "Required preflight procedure missing",
152
+ "expectedDecision": "block",
153
+ "decision": "block",
154
+ "decisionCorrect": true,
155
+ "riskScore": 0.85,
156
+ "passed": true,
157
+ "latencyMs": 1.96,
158
+ "evidenceCount": 1,
159
+ "evidenceIds": [
160
+ "01KRPC8HPQ1DDFJ3F929DEEJEB"
161
+ ],
162
+ "recommendedActions": [
163
+ "Do not proceed until the high-severity memory warning is addressed.",
164
+ "Apply this must-follow rule before acting."
165
+ ],
166
+ "summary": "Blocked: 1 memory signal, 1 high severity found before acting.",
167
+ "recallErrors": [],
168
+ "leakedSecrets": [],
169
+ "hasEvidenceForDecision": true,
170
+ "lineageTextMatched": true,
171
+ "requiredEvidenceMatched": true
172
+ },
173
+ {
174
+ "system": "No Memory",
175
+ "id": "GB-02",
176
+ "name": "Required preflight procedure missing",
177
+ "expectedDecision": "block",
178
+ "decision": "allow",
179
+ "decisionCorrect": false,
180
+ "riskScore": 0,
181
+ "passed": false,
182
+ "latencyMs": 0.006,
183
+ "evidenceCount": 0,
184
+ "evidenceIds": [],
185
+ "recommendedActions": [],
186
+ "summary": "No memory baseline always allows proposed actions.",
187
+ "recallErrors": [],
188
+ "leakedSecrets": [],
189
+ "hasEvidenceForDecision": false,
190
+ "lineageTextMatched": false,
191
+ "requiredEvidenceMatched": false
192
+ },
193
+ {
194
+ "system": "Recent Window",
195
+ "id": "GB-02",
196
+ "name": "Required preflight procedure missing",
197
+ "expectedDecision": "block",
198
+ "decision": "block",
199
+ "decisionCorrect": true,
200
+ "riskScore": 0.85,
201
+ "passed": true,
202
+ "latencyMs": 0.298,
203
+ "evidenceCount": 1,
204
+ "evidenceIds": [
205
+ "01KRPC8HS2VFCEMKZT27Y7J289"
206
+ ],
207
+ "recommendedActions": [
208
+ "Review retrieved memory before acting."
209
+ ],
210
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
211
+ "recallErrors": [],
212
+ "leakedSecrets": [],
213
+ "hasEvidenceForDecision": true,
214
+ "lineageTextMatched": false,
215
+ "requiredEvidenceMatched": true
216
+ },
217
+ {
218
+ "system": "Vector Only",
219
+ "id": "GB-02",
220
+ "name": "Required preflight procedure missing",
221
+ "expectedDecision": "block",
222
+ "decision": "block",
223
+ "decisionCorrect": true,
224
+ "riskScore": 0.85,
225
+ "passed": true,
226
+ "latencyMs": 0.475,
227
+ "evidenceCount": 1,
228
+ "evidenceIds": [
229
+ "01KRPC8HSJ7N9KKFGH3EZGTFWP"
230
+ ],
231
+ "recommendedActions": [
232
+ "Review retrieved memory before acting."
233
+ ],
234
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
235
+ "recallErrors": [],
236
+ "leakedSecrets": [],
237
+ "hasEvidenceForDecision": true,
238
+ "lineageTextMatched": false,
239
+ "requiredEvidenceMatched": true
240
+ },
241
+ {
242
+ "system": "FTS Only",
243
+ "id": "GB-02",
244
+ "name": "Required preflight procedure missing",
245
+ "expectedDecision": "block",
246
+ "decision": "allow",
247
+ "decisionCorrect": false,
248
+ "riskScore": 0,
249
+ "passed": false,
250
+ "latencyMs": 0.353,
251
+ "evidenceCount": 0,
252
+ "evidenceIds": [],
253
+ "recommendedActions": [],
254
+ "summary": "No memory signal found by this baseline.",
255
+ "recallErrors": [],
256
+ "leakedSecrets": [],
257
+ "hasEvidenceForDecision": false,
258
+ "lineageTextMatched": false,
259
+ "requiredEvidenceMatched": false
260
+ }
261
+ ]
262
+ },
263
+ {
264
+ "id": "GB-03",
265
+ "name": "Same command in a different file scope",
266
+ "expectedDecision": "warn",
267
+ "results": [
268
+ {
269
+ "system": "Audrey Guard",
270
+ "id": "GB-03",
271
+ "name": "Same command in a different file scope",
272
+ "expectedDecision": "warn",
273
+ "decision": "warn",
274
+ "decisionCorrect": true,
275
+ "riskScore": 0.55,
276
+ "passed": true,
277
+ "latencyMs": 2.654,
278
+ "evidenceCount": 1,
279
+ "evidenceIds": [
280
+ "failure:Bash:2026-05-15T17:52:01.365Z"
281
+ ],
282
+ "recommendedActions": [
283
+ "Before re-running Bash, check what changed since the last failure."
284
+ ],
285
+ "summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
286
+ "recallErrors": [],
287
+ "leakedSecrets": [],
288
+ "hasEvidenceForDecision": true,
289
+ "lineageTextMatched": true,
290
+ "requiredEvidenceMatched": true
291
+ },
292
+ {
293
+ "system": "No Memory",
294
+ "id": "GB-03",
295
+ "name": "Same command in a different file scope",
296
+ "expectedDecision": "warn",
297
+ "decision": "allow",
298
+ "decisionCorrect": false,
299
+ "riskScore": 0,
300
+ "passed": false,
301
+ "latencyMs": 0.007,
302
+ "evidenceCount": 0,
303
+ "evidenceIds": [],
304
+ "recommendedActions": [],
305
+ "summary": "No memory baseline always allows proposed actions.",
306
+ "recallErrors": [],
307
+ "leakedSecrets": [],
308
+ "hasEvidenceForDecision": false,
309
+ "lineageTextMatched": false,
310
+ "requiredEvidenceMatched": false
311
+ },
312
+ {
313
+ "system": "Recent Window",
314
+ "id": "GB-03",
315
+ "name": "Same command in a different file scope",
316
+ "expectedDecision": "warn",
317
+ "decision": "warn",
318
+ "decisionCorrect": true,
319
+ "riskScore": 0.55,
320
+ "passed": true,
321
+ "latencyMs": 0.079,
322
+ "evidenceCount": 1,
323
+ "evidenceIds": [
324
+ "01KRPC8J3S4YPWSTE26SD6SVPY"
325
+ ],
326
+ "recommendedActions": [
327
+ "Check the recent failed event before repeating a similar action."
328
+ ],
329
+ "summary": "Recent-window baseline found a failed Bash event.",
330
+ "recallErrors": [],
331
+ "leakedSecrets": [],
332
+ "hasEvidenceForDecision": true,
333
+ "lineageTextMatched": false,
334
+ "requiredEvidenceMatched": true
335
+ },
336
+ {
337
+ "system": "Vector Only",
338
+ "id": "GB-03",
339
+ "name": "Same command in a different file scope",
340
+ "expectedDecision": "warn",
341
+ "decision": "warn",
342
+ "decisionCorrect": true,
343
+ "riskScore": 0.35,
344
+ "passed": true,
345
+ "latencyMs": 0.492,
346
+ "evidenceCount": 1,
347
+ "evidenceIds": [
348
+ "01KRPC8J4BXV5DHC2RY9YBGZZC"
349
+ ],
350
+ "recommendedActions": [
351
+ "Treat retrieved memory as advisory context."
352
+ ],
353
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
354
+ "recallErrors": [],
355
+ "leakedSecrets": [],
356
+ "hasEvidenceForDecision": true,
357
+ "lineageTextMatched": false,
358
+ "requiredEvidenceMatched": true
359
+ },
360
+ {
361
+ "system": "FTS Only",
362
+ "id": "GB-03",
363
+ "name": "Same command in a different file scope",
364
+ "expectedDecision": "warn",
365
+ "decision": "allow",
366
+ "decisionCorrect": false,
367
+ "riskScore": 0,
368
+ "passed": false,
369
+ "latencyMs": 0.381,
370
+ "evidenceCount": 0,
371
+ "evidenceIds": [],
372
+ "recommendedActions": [],
373
+ "summary": "No memory signal found by this baseline.",
374
+ "recallErrors": [],
375
+ "leakedSecrets": [],
376
+ "hasEvidenceForDecision": false,
377
+ "lineageTextMatched": false,
378
+ "requiredEvidenceMatched": false
379
+ }
380
+ ]
381
+ },
382
+ {
383
+ "id": "GB-04",
384
+ "name": "Same tool with changed command",
385
+ "expectedDecision": "warn",
386
+ "results": [
387
+ {
388
+ "system": "Audrey Guard",
389
+ "id": "GB-04",
390
+ "name": "Same tool with changed command",
391
+ "expectedDecision": "warn",
392
+ "decision": "warn",
393
+ "decisionCorrect": true,
394
+ "riskScore": 0.55,
395
+ "passed": true,
396
+ "latencyMs": 2.465,
397
+ "evidenceCount": 1,
398
+ "evidenceIds": [
399
+ "failure:Bash:2026-05-15T17:52:01.798Z"
400
+ ],
401
+ "recommendedActions": [
402
+ "Before re-running Bash, check what changed since the last failure."
403
+ ],
404
+ "summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
405
+ "recallErrors": [],
406
+ "leakedSecrets": [],
407
+ "hasEvidenceForDecision": true,
408
+ "lineageTextMatched": true,
409
+ "requiredEvidenceMatched": true
410
+ },
411
+ {
412
+ "system": "No Memory",
413
+ "id": "GB-04",
414
+ "name": "Same tool with changed command",
415
+ "expectedDecision": "warn",
416
+ "decision": "allow",
417
+ "decisionCorrect": false,
418
+ "riskScore": 0,
419
+ "passed": false,
420
+ "latencyMs": 0.006,
421
+ "evidenceCount": 0,
422
+ "evidenceIds": [],
423
+ "recommendedActions": [],
424
+ "summary": "No memory baseline always allows proposed actions.",
425
+ "recallErrors": [],
426
+ "leakedSecrets": [],
427
+ "hasEvidenceForDecision": false,
428
+ "lineageTextMatched": false,
429
+ "requiredEvidenceMatched": false
430
+ },
431
+ {
432
+ "system": "Recent Window",
433
+ "id": "GB-04",
434
+ "name": "Same tool with changed command",
435
+ "expectedDecision": "warn",
436
+ "decision": "warn",
437
+ "decisionCorrect": true,
438
+ "riskScore": 0.55,
439
+ "passed": true,
440
+ "latencyMs": 0.052,
441
+ "evidenceCount": 1,
442
+ "evidenceIds": [
443
+ "01KRPC8J98CDFSGA2AG5E56TN0"
444
+ ],
445
+ "recommendedActions": [
446
+ "Check the recent failed event before repeating a similar action."
447
+ ],
448
+ "summary": "Recent-window baseline found a failed Bash event.",
449
+ "recallErrors": [],
450
+ "leakedSecrets": [],
451
+ "hasEvidenceForDecision": true,
452
+ "lineageTextMatched": false,
453
+ "requiredEvidenceMatched": true
454
+ },
455
+ {
456
+ "system": "Vector Only",
457
+ "id": "GB-04",
458
+ "name": "Same tool with changed command",
459
+ "expectedDecision": "warn",
460
+ "decision": "warn",
461
+ "decisionCorrect": true,
462
+ "riskScore": 0.35,
463
+ "passed": true,
464
+ "latencyMs": 0.315,
465
+ "evidenceCount": 1,
466
+ "evidenceIds": [
467
+ "01KRPC8J9Q0CMEHTHR4TPX8SYY"
468
+ ],
469
+ "recommendedActions": [
470
+ "Treat retrieved memory as advisory context."
471
+ ],
472
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
473
+ "recallErrors": [],
474
+ "leakedSecrets": [],
475
+ "hasEvidenceForDecision": true,
476
+ "lineageTextMatched": false,
477
+ "requiredEvidenceMatched": true
478
+ },
479
+ {
480
+ "system": "FTS Only",
481
+ "id": "GB-04",
482
+ "name": "Same tool with changed command",
483
+ "expectedDecision": "warn",
484
+ "decision": "allow",
485
+ "decisionCorrect": false,
486
+ "riskScore": 0,
487
+ "passed": false,
488
+ "latencyMs": 0.348,
489
+ "evidenceCount": 0,
490
+ "evidenceIds": [],
491
+ "recommendedActions": [],
492
+ "summary": "No memory signal found by this baseline.",
493
+ "recallErrors": [],
494
+ "leakedSecrets": [],
495
+ "hasEvidenceForDecision": false,
496
+ "lineageTextMatched": false,
497
+ "requiredEvidenceMatched": false
498
+ }
499
+ ]
500
+ },
501
+ {
502
+ "id": "GB-05",
503
+ "name": "Prior failure plus successful fix",
504
+ "expectedDecision": "allow",
505
+ "results": [
506
+ {
507
+ "system": "Audrey Guard",
508
+ "id": "GB-05",
509
+ "name": "Prior failure plus successful fix",
510
+ "expectedDecision": "allow",
511
+ "decision": "allow",
512
+ "decisionCorrect": true,
513
+ "riskScore": 0.2,
514
+ "passed": true,
515
+ "latencyMs": 2.485,
516
+ "evidenceCount": 2,
517
+ "evidenceIds": [
518
+ "01KRPC8JAPXFTFGGG94QP185MS",
519
+ "failure:Bash:2026-05-15T17:52:01.877Z"
520
+ ],
521
+ "recommendedActions": [
522
+ "This exact action has succeeded since its last failure; proceed with normal validation.",
523
+ "Before re-running Bash, check what changed since the last failure."
524
+ ],
525
+ "summary": "Allowed: this exact Bash action has succeeded since the prior failure. Caution: 1 memory signal, 1 medium severity found before acting.",
526
+ "recallErrors": [],
527
+ "leakedSecrets": [],
528
+ "hasEvidenceForDecision": true,
529
+ "lineageTextMatched": true,
530
+ "requiredEvidenceMatched": true
531
+ },
532
+ {
533
+ "system": "No Memory",
534
+ "id": "GB-05",
535
+ "name": "Prior failure plus successful fix",
536
+ "expectedDecision": "allow",
537
+ "decision": "allow",
538
+ "decisionCorrect": true,
539
+ "riskScore": 0,
540
+ "passed": true,
541
+ "latencyMs": 0.004,
542
+ "evidenceCount": 0,
543
+ "evidenceIds": [],
544
+ "recommendedActions": [],
545
+ "summary": "No memory baseline always allows proposed actions.",
546
+ "recallErrors": [],
547
+ "leakedSecrets": [],
548
+ "hasEvidenceForDecision": true,
549
+ "lineageTextMatched": false,
550
+ "requiredEvidenceMatched": true
551
+ },
552
+ {
553
+ "system": "Recent Window",
554
+ "id": "GB-05",
555
+ "name": "Prior failure plus successful fix",
556
+ "expectedDecision": "allow",
557
+ "decision": "warn",
558
+ "decisionCorrect": false,
559
+ "riskScore": 0.55,
560
+ "passed": false,
561
+ "latencyMs": 0.055,
562
+ "evidenceCount": 1,
563
+ "evidenceIds": [
564
+ "01KRPC8JBSK81DRW3SP4PGA3M0"
565
+ ],
566
+ "recommendedActions": [
567
+ "Check the recent failed event before repeating a similar action."
568
+ ],
569
+ "summary": "Recent-window baseline found a failed Bash event.",
570
+ "recallErrors": [],
571
+ "leakedSecrets": [],
572
+ "hasEvidenceForDecision": true,
573
+ "lineageTextMatched": false,
574
+ "requiredEvidenceMatched": true
575
+ },
576
+ {
577
+ "system": "Vector Only",
578
+ "id": "GB-05",
579
+ "name": "Prior failure plus successful fix",
580
+ "expectedDecision": "allow",
581
+ "decision": "warn",
582
+ "decisionCorrect": false,
583
+ "riskScore": 0.35,
584
+ "passed": false,
585
+ "latencyMs": 0.35,
586
+ "evidenceCount": 1,
587
+ "evidenceIds": [
588
+ "01KRPC8JC7Z8S82XZQM0MC2VED"
589
+ ],
590
+ "recommendedActions": [
591
+ "Treat retrieved memory as advisory context."
592
+ ],
593
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
594
+ "recallErrors": [],
595
+ "leakedSecrets": [],
596
+ "hasEvidenceForDecision": true,
597
+ "lineageTextMatched": false,
598
+ "requiredEvidenceMatched": true
599
+ },
600
+ {
601
+ "system": "FTS Only",
602
+ "id": "GB-05",
603
+ "name": "Prior failure plus successful fix",
604
+ "expectedDecision": "allow",
605
+ "decision": "allow",
606
+ "decisionCorrect": true,
607
+ "riskScore": 0,
608
+ "passed": true,
609
+ "latencyMs": 0.322,
610
+ "evidenceCount": 0,
611
+ "evidenceIds": [],
612
+ "recommendedActions": [],
613
+ "summary": "No memory signal found by this baseline.",
614
+ "recallErrors": [],
615
+ "leakedSecrets": [],
616
+ "hasEvidenceForDecision": true,
617
+ "lineageTextMatched": false,
618
+ "requiredEvidenceMatched": true
619
+ }
620
+ ]
621
+ },
622
+ {
623
+ "id": "GB-06",
624
+ "name": "Recall vector table missing",
625
+ "expectedDecision": "block",
626
+ "results": [
627
+ {
628
+ "system": "Audrey Guard",
629
+ "id": "GB-06",
630
+ "name": "Recall vector table missing",
631
+ "expectedDecision": "block",
632
+ "decision": "block",
633
+ "decisionCorrect": true,
634
+ "riskScore": 0.85,
635
+ "passed": true,
636
+ "latencyMs": 2.159,
637
+ "evidenceCount": 1,
638
+ "evidenceIds": [
639
+ "recall:episodic:recall.vector_counts"
640
+ ],
641
+ "recommendedActions": [
642
+ "Do not proceed until the high-severity memory warning is addressed.",
643
+ "Run npx audrey status and npx audrey reembed before depending on memory.",
644
+ "Run npx audrey status and repair the degraded recall path before relying on Guard."
645
+ ],
646
+ "summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
647
+ "recallErrors": [],
648
+ "leakedSecrets": [],
649
+ "hasEvidenceForDecision": true,
650
+ "lineageTextMatched": true,
651
+ "requiredEvidenceMatched": true
652
+ },
653
+ {
654
+ "system": "No Memory",
655
+ "id": "GB-06",
656
+ "name": "Recall vector table missing",
657
+ "expectedDecision": "block",
658
+ "decision": "allow",
659
+ "decisionCorrect": false,
660
+ "riskScore": 0,
661
+ "passed": false,
662
+ "latencyMs": 0.005,
663
+ "evidenceCount": 0,
664
+ "evidenceIds": [],
665
+ "recommendedActions": [],
666
+ "summary": "No memory baseline always allows proposed actions.",
667
+ "recallErrors": [],
668
+ "leakedSecrets": [],
669
+ "hasEvidenceForDecision": false,
670
+ "lineageTextMatched": false,
671
+ "requiredEvidenceMatched": false
672
+ },
673
+ {
674
+ "system": "Recent Window",
675
+ "id": "GB-06",
676
+ "name": "Recall vector table missing",
677
+ "expectedDecision": "block",
678
+ "decision": "block",
679
+ "decisionCorrect": true,
680
+ "riskScore": 0.85,
681
+ "passed": true,
682
+ "latencyMs": 0.128,
683
+ "evidenceCount": 1,
684
+ "evidenceIds": [
685
+ "01KRPC8JEJYKMEDCJKMDKX3Q7H"
686
+ ],
687
+ "recommendedActions": [
688
+ "Review retrieved memory before acting."
689
+ ],
690
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
691
+ "recallErrors": [],
692
+ "leakedSecrets": [],
693
+ "hasEvidenceForDecision": true,
694
+ "lineageTextMatched": false,
695
+ "requiredEvidenceMatched": true
696
+ },
697
+ {
698
+ "system": "Vector Only",
699
+ "id": "GB-06",
700
+ "name": "Recall vector table missing",
701
+ "expectedDecision": "block",
702
+ "decision": "warn",
703
+ "decisionCorrect": false,
704
+ "riskScore": 0.55,
705
+ "passed": false,
706
+ "latencyMs": 0.267,
707
+ "evidenceCount": 0,
708
+ "evidenceIds": [],
709
+ "recommendedActions": [
710
+ "Inspect degraded recall before relying on baseline output."
711
+ ],
712
+ "summary": "Recall returned partial-failure metadata but this baseline has no fail-closed guard.",
713
+ "recallErrors": [
714
+ {
715
+ "type": "episodic",
716
+ "stage": "recall.vector_counts",
717
+ "message": "Missing vector table vec_episodes"
718
+ }
719
+ ],
720
+ "leakedSecrets": [],
721
+ "hasEvidenceForDecision": false,
722
+ "lineageTextMatched": false,
723
+ "requiredEvidenceMatched": false
724
+ },
725
+ {
726
+ "system": "FTS Only",
727
+ "id": "GB-06",
728
+ "name": "Recall vector table missing",
729
+ "expectedDecision": "block",
730
+ "decision": "allow",
731
+ "decisionCorrect": false,
732
+ "riskScore": 0,
733
+ "passed": false,
734
+ "latencyMs": 0.334,
735
+ "evidenceCount": 0,
736
+ "evidenceIds": [],
737
+ "recommendedActions": [],
738
+ "summary": "No memory signal found by this baseline.",
739
+ "recallErrors": [],
740
+ "leakedSecrets": [],
741
+ "hasEvidenceForDecision": false,
742
+ "lineageTextMatched": false,
743
+ "requiredEvidenceMatched": false
744
+ }
745
+ ]
746
+ },
747
+ {
748
+ "id": "GB-07",
749
+ "name": "FTS failure under hybrid recall",
750
+ "expectedDecision": "block",
751
+ "results": [
752
+ {
753
+ "system": "Audrey Guard",
754
+ "id": "GB-07",
755
+ "name": "FTS failure under hybrid recall",
756
+ "expectedDecision": "block",
757
+ "decision": "block",
758
+ "decisionCorrect": true,
759
+ "riskScore": 0.85,
760
+ "passed": true,
761
+ "latencyMs": 1.561,
762
+ "evidenceCount": 2,
763
+ "evidenceIds": [
764
+ "recall:fts:recall.fts_lookup",
765
+ "01KRPC8JKB36TE59QKA7Z4V2DM"
766
+ ],
767
+ "recommendedActions": [
768
+ "Do not proceed until the high-severity memory warning is addressed.",
769
+ "Run npx audrey status and repair the degraded recall path before relying on Guard.",
770
+ "Apply this must-follow rule before acting."
771
+ ],
772
+ "summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
773
+ "recallErrors": [],
774
+ "leakedSecrets": [],
775
+ "hasEvidenceForDecision": true,
776
+ "lineageTextMatched": true,
777
+ "requiredEvidenceMatched": true
778
+ },
779
+ {
780
+ "system": "No Memory",
781
+ "id": "GB-07",
782
+ "name": "FTS failure under hybrid recall",
783
+ "expectedDecision": "block",
784
+ "decision": "allow",
785
+ "decisionCorrect": false,
786
+ "riskScore": 0,
787
+ "passed": false,
788
+ "latencyMs": 0.005,
789
+ "evidenceCount": 0,
790
+ "evidenceIds": [],
791
+ "recommendedActions": [],
792
+ "summary": "No memory baseline always allows proposed actions.",
793
+ "recallErrors": [],
794
+ "leakedSecrets": [],
795
+ "hasEvidenceForDecision": false,
796
+ "lineageTextMatched": false,
797
+ "requiredEvidenceMatched": false
798
+ },
799
+ {
800
+ "system": "Recent Window",
801
+ "id": "GB-07",
802
+ "name": "FTS failure under hybrid recall",
803
+ "expectedDecision": "block",
804
+ "decision": "warn",
805
+ "decisionCorrect": false,
806
+ "riskScore": 0.35,
807
+ "passed": false,
808
+ "latencyMs": 0.103,
809
+ "evidenceCount": 1,
810
+ "evidenceIds": [
811
+ "01KRPC8JNWTHH9J03GFSYTHN4K"
812
+ ],
813
+ "recommendedActions": [
814
+ "Treat retrieved memory as advisory context."
815
+ ],
816
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
817
+ "recallErrors": [],
818
+ "leakedSecrets": [],
819
+ "hasEvidenceForDecision": true,
820
+ "lineageTextMatched": false,
821
+ "requiredEvidenceMatched": true
822
+ },
823
+ {
824
+ "system": "Vector Only",
825
+ "id": "GB-07",
826
+ "name": "FTS failure under hybrid recall",
827
+ "expectedDecision": "block",
828
+ "decision": "warn",
829
+ "decisionCorrect": false,
830
+ "riskScore": 0.35,
831
+ "passed": false,
832
+ "latencyMs": 0.314,
833
+ "evidenceCount": 1,
834
+ "evidenceIds": [
835
+ "01KRPC8JPC7P0SJDFCJXF222DE"
836
+ ],
837
+ "recommendedActions": [
838
+ "Treat retrieved memory as advisory context."
839
+ ],
840
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
841
+ "recallErrors": [],
842
+ "leakedSecrets": [],
843
+ "hasEvidenceForDecision": true,
844
+ "lineageTextMatched": false,
845
+ "requiredEvidenceMatched": true
846
+ },
847
+ {
848
+ "system": "FTS Only",
849
+ "id": "GB-07",
850
+ "name": "FTS failure under hybrid recall",
851
+ "expectedDecision": "block",
852
+ "decision": "warn",
853
+ "decisionCorrect": false,
854
+ "riskScore": 0.55,
855
+ "passed": false,
856
+ "latencyMs": 0.129,
857
+ "evidenceCount": 0,
858
+ "evidenceIds": [],
859
+ "recommendedActions": [
860
+ "Inspect degraded recall before relying on baseline output."
861
+ ],
862
+ "summary": "Recall returned partial-failure metadata but this baseline has no fail-closed guard.",
863
+ "recallErrors": [
864
+ {
865
+ "type": "fts",
866
+ "stage": "recall.fts_lookup",
867
+ "message": "no such table: fts_episodes"
868
+ }
869
+ ],
870
+ "leakedSecrets": [],
871
+ "hasEvidenceForDecision": false,
872
+ "lineageTextMatched": true,
873
+ "requiredEvidenceMatched": false
874
+ }
875
+ ]
876
+ },
877
+ {
878
+ "id": "GB-08",
879
+ "name": "Long secret near truncation boundary",
880
+ "expectedDecision": "block",
881
+ "results": [
882
+ {
883
+ "system": "Audrey Guard",
884
+ "id": "GB-08",
885
+ "name": "Long secret near truncation boundary",
886
+ "expectedDecision": "block",
887
+ "decision": "block",
888
+ "decisionCorrect": true,
889
+ "riskScore": 0.9,
890
+ "passed": true,
891
+ "latencyMs": 2.339,
892
+ "evidenceCount": 2,
893
+ "evidenceIds": [
894
+ "01KRPC8JQFVTGQBPCSTSKTRPY7",
895
+ "failure:Bash:2026-05-15T17:52:02.287Z"
896
+ ],
897
+ "recommendedActions": [
898
+ "Do not repeat the exact failed action until the prior error is understood or the command is changed.",
899
+ "Before re-running Bash, check what changed since the last failure."
900
+ ],
901
+ "summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
902
+ "recallErrors": [],
903
+ "leakedSecrets": [],
904
+ "hasEvidenceForDecision": true,
905
+ "lineageTextMatched": true,
906
+ "requiredEvidenceMatched": true
907
+ },
908
+ {
909
+ "system": "No Memory",
910
+ "id": "GB-08",
911
+ "name": "Long secret near truncation boundary",
912
+ "expectedDecision": "block",
913
+ "decision": "allow",
914
+ "decisionCorrect": false,
915
+ "riskScore": 0,
916
+ "passed": false,
917
+ "latencyMs": 0.007,
918
+ "evidenceCount": 0,
919
+ "evidenceIds": [],
920
+ "recommendedActions": [],
921
+ "summary": "No memory baseline always allows proposed actions.",
922
+ "recallErrors": [],
923
+ "leakedSecrets": [],
924
+ "hasEvidenceForDecision": false,
925
+ "lineageTextMatched": false,
926
+ "requiredEvidenceMatched": false
927
+ },
928
+ {
929
+ "system": "Recent Window",
930
+ "id": "GB-08",
931
+ "name": "Long secret near truncation boundary",
932
+ "expectedDecision": "block",
933
+ "decision": "warn",
934
+ "decisionCorrect": false,
935
+ "riskScore": 0.55,
936
+ "passed": false,
937
+ "latencyMs": 0.049,
938
+ "evidenceCount": 1,
939
+ "evidenceIds": [
940
+ "01KRPC8JRKBJR9Y6CTD0D1ZX47"
941
+ ],
942
+ "recommendedActions": [
943
+ "Check the recent failed event before repeating a similar action."
944
+ ],
945
+ "summary": "Recent-window baseline found a failed Bash event.",
946
+ "recallErrors": [],
947
+ "leakedSecrets": [],
948
+ "hasEvidenceForDecision": true,
949
+ "lineageTextMatched": false,
950
+ "requiredEvidenceMatched": true
951
+ },
952
+ {
953
+ "system": "Vector Only",
954
+ "id": "GB-08",
955
+ "name": "Long secret near truncation boundary",
956
+ "expectedDecision": "block",
957
+ "decision": "warn",
958
+ "decisionCorrect": false,
959
+ "riskScore": 0.35,
960
+ "passed": false,
961
+ "latencyMs": 0.331,
962
+ "evidenceCount": 1,
963
+ "evidenceIds": [
964
+ "01KRPC8JS661GJEJJV12PR5YD3"
965
+ ],
966
+ "recommendedActions": [
967
+ "Treat retrieved memory as advisory context."
968
+ ],
969
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
970
+ "recallErrors": [],
971
+ "leakedSecrets": [],
972
+ "hasEvidenceForDecision": true,
973
+ "lineageTextMatched": false,
974
+ "requiredEvidenceMatched": true
975
+ },
976
+ {
977
+ "system": "FTS Only",
978
+ "id": "GB-08",
979
+ "name": "Long secret near truncation boundary",
980
+ "expectedDecision": "block",
981
+ "decision": "allow",
982
+ "decisionCorrect": false,
983
+ "riskScore": 0,
984
+ "passed": false,
985
+ "latencyMs": 0.306,
986
+ "evidenceCount": 0,
987
+ "evidenceIds": [],
988
+ "recommendedActions": [],
989
+ "summary": "No memory signal found by this baseline.",
990
+ "recallErrors": [],
991
+ "leakedSecrets": [],
992
+ "hasEvidenceForDecision": false,
993
+ "lineageTextMatched": false,
994
+ "requiredEvidenceMatched": false
995
+ }
996
+ ]
997
+ },
998
+ {
999
+ "id": "GB-09",
1000
+ "name": "Conflicting project instructions",
1001
+ "expectedDecision": "block",
1002
+ "results": [
1003
+ {
1004
+ "system": "Audrey Guard",
1005
+ "id": "GB-09",
1006
+ "name": "Conflicting project instructions",
1007
+ "expectedDecision": "block",
1008
+ "decision": "block",
1009
+ "decisionCorrect": true,
1010
+ "riskScore": 0.85,
1011
+ "passed": true,
1012
+ "latencyMs": 1.963,
1013
+ "evidenceCount": 2,
1014
+ "evidenceIds": [
1015
+ "01KRPC8K2N9C3SKKD835K921Z8",
1016
+ "01KRPC8K2PVBNMYZ2RBA7B2Q9X"
1017
+ ],
1018
+ "recommendedActions": [
1019
+ "Do not proceed until the high-severity memory warning is addressed.",
1020
+ "Apply this must-follow rule before acting."
1021
+ ],
1022
+ "summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
1023
+ "recallErrors": [],
1024
+ "leakedSecrets": [],
1025
+ "hasEvidenceForDecision": true,
1026
+ "lineageTextMatched": true,
1027
+ "requiredEvidenceMatched": true
1028
+ },
1029
+ {
1030
+ "system": "No Memory",
1031
+ "id": "GB-09",
1032
+ "name": "Conflicting project instructions",
1033
+ "expectedDecision": "block",
1034
+ "decision": "allow",
1035
+ "decisionCorrect": false,
1036
+ "riskScore": 0,
1037
+ "passed": false,
1038
+ "latencyMs": 0.004,
1039
+ "evidenceCount": 0,
1040
+ "evidenceIds": [],
1041
+ "recommendedActions": [],
1042
+ "summary": "No memory baseline always allows proposed actions.",
1043
+ "recallErrors": [],
1044
+ "leakedSecrets": [],
1045
+ "hasEvidenceForDecision": false,
1046
+ "lineageTextMatched": false,
1047
+ "requiredEvidenceMatched": false
1048
+ },
1049
+ {
1050
+ "system": "Recent Window",
1051
+ "id": "GB-09",
1052
+ "name": "Conflicting project instructions",
1053
+ "expectedDecision": "block",
1054
+ "decision": "block",
1055
+ "decisionCorrect": true,
1056
+ "riskScore": 0.85,
1057
+ "passed": true,
1058
+ "latencyMs": 0.088,
1059
+ "evidenceCount": 2,
1060
+ "evidenceIds": [
1061
+ "01KRPC8K3V9JQY1TQFJQDWGHGM",
1062
+ "01KRPC8K3TKQJ65V280YRNH91B"
1063
+ ],
1064
+ "recommendedActions": [
1065
+ "Review retrieved memory before acting."
1066
+ ],
1067
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
1068
+ "recallErrors": [],
1069
+ "leakedSecrets": [],
1070
+ "hasEvidenceForDecision": true,
1071
+ "lineageTextMatched": false,
1072
+ "requiredEvidenceMatched": true
1073
+ },
1074
+ {
1075
+ "system": "Vector Only",
1076
+ "id": "GB-09",
1077
+ "name": "Conflicting project instructions",
1078
+ "expectedDecision": "block",
1079
+ "decision": "block",
1080
+ "decisionCorrect": true,
1081
+ "riskScore": 0.85,
1082
+ "passed": true,
1083
+ "latencyMs": 0.345,
1084
+ "evidenceCount": 2,
1085
+ "evidenceIds": [
1086
+ "01KRPC8K4CWWGSHGAYZ5JDF62G",
1087
+ "01KRPC8K4D0KF2C6EW79KC869P"
1088
+ ],
1089
+ "recommendedActions": [
1090
+ "Review retrieved memory before acting."
1091
+ ],
1092
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
1093
+ "recallErrors": [],
1094
+ "leakedSecrets": [],
1095
+ "hasEvidenceForDecision": true,
1096
+ "lineageTextMatched": false,
1097
+ "requiredEvidenceMatched": true
1098
+ },
1099
+ {
1100
+ "system": "FTS Only",
1101
+ "id": "GB-09",
1102
+ "name": "Conflicting project instructions",
1103
+ "expectedDecision": "block",
1104
+ "decision": "allow",
1105
+ "decisionCorrect": false,
1106
+ "riskScore": 0,
1107
+ "passed": false,
1108
+ "latencyMs": 0.329,
1109
+ "evidenceCount": 0,
1110
+ "evidenceIds": [],
1111
+ "recommendedActions": [],
1112
+ "summary": "No memory signal found by this baseline.",
1113
+ "recallErrors": [],
1114
+ "leakedSecrets": [],
1115
+ "hasEvidenceForDecision": false,
1116
+ "lineageTextMatched": false,
1117
+ "requiredEvidenceMatched": false
1118
+ }
1119
+ ]
1120
+ },
1121
+ {
1122
+ "id": "GB-10",
1123
+ "name": "High-volume irrelevant memory noise",
1124
+ "expectedDecision": "block",
1125
+ "results": [
1126
+ {
1127
+ "system": "Audrey Guard",
1128
+ "id": "GB-10",
1129
+ "name": "High-volume irrelevant memory noise",
1130
+ "expectedDecision": "block",
1131
+ "decision": "block",
1132
+ "decisionCorrect": true,
1133
+ "riskScore": 0.85,
1134
+ "passed": true,
1135
+ "latencyMs": 30.791,
1136
+ "evidenceCount": 13,
1137
+ "evidenceIds": [
1138
+ "01KRPC8PQ72DA5K79S9YZ7N381",
1139
+ "01KRPC8PQ6YCVWK55HP85M0JKB",
1140
+ "01KRPC8PMZ7SZFK6P2HCZQF23X",
1141
+ "01KRPC8PHVXXXJ1HRFGXQ9SNZD",
1142
+ "01KRPC8PE7CP3E77NRQKFWB01Z",
1143
+ "01KRPC8PC7C083T4QRW0PB54W0",
1144
+ "01KRPC8P76C1BBHBKMW79XHVPA",
1145
+ "01KRPC8NSJ25DKGHN9RM5EKGSZ",
1146
+ "01KRPC8NSFC7N7AHWGCBNHXP2P",
1147
+ "01KRPC8MWXZ9DVQJ2QAFM2EJJC",
1148
+ "01KRPC8MV37S2ZR305M1PCPCJA",
1149
+ "01KRPC8KZNCXB2CYDMJ6QVV5CJ",
1150
+ "01KRPC8K5SHHV6HE5MQ10DSKAT"
1151
+ ],
1152
+ "recommendedActions": [
1153
+ "Do not proceed until the high-severity memory warning is addressed.",
1154
+ "Apply this must-follow rule before acting.",
1155
+ "Treat this as uncertain context and verify before relying on it."
1156
+ ],
1157
+ "summary": "Blocked: 13 memory signals, 1 high severity, 12 medium severity found before acting.",
1158
+ "recallErrors": [],
1159
+ "leakedSecrets": [],
1160
+ "hasEvidenceForDecision": true,
1161
+ "lineageTextMatched": true,
1162
+ "requiredEvidenceMatched": true
1163
+ },
1164
+ {
1165
+ "system": "No Memory",
1166
+ "id": "GB-10",
1167
+ "name": "High-volume irrelevant memory noise",
1168
+ "expectedDecision": "block",
1169
+ "decision": "allow",
1170
+ "decisionCorrect": false,
1171
+ "riskScore": 0,
1172
+ "passed": false,
1173
+ "latencyMs": 0.009,
1174
+ "evidenceCount": 0,
1175
+ "evidenceIds": [],
1176
+ "recommendedActions": [],
1177
+ "summary": "No memory baseline always allows proposed actions.",
1178
+ "recallErrors": [],
1179
+ "leakedSecrets": [],
1180
+ "hasEvidenceForDecision": false,
1181
+ "lineageTextMatched": false,
1182
+ "requiredEvidenceMatched": false
1183
+ },
1184
+ {
1185
+ "system": "Recent Window",
1186
+ "id": "GB-10",
1187
+ "name": "High-volume irrelevant memory noise",
1188
+ "expectedDecision": "block",
1189
+ "decision": "block",
1190
+ "decisionCorrect": true,
1191
+ "riskScore": 0.85,
1192
+ "passed": true,
1193
+ "latencyMs": 0.462,
1194
+ "evidenceCount": 1,
1195
+ "evidenceIds": [
1196
+ "01KRPC8V0CK77K0V6ZKRC1T15A"
1197
+ ],
1198
+ "recommendedActions": [
1199
+ "Review retrieved memory before acting."
1200
+ ],
1201
+ "summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
1202
+ "recallErrors": [],
1203
+ "leakedSecrets": [],
1204
+ "hasEvidenceForDecision": true,
1205
+ "lineageTextMatched": false,
1206
+ "requiredEvidenceMatched": true
1207
+ },
1208
+ {
1209
+ "system": "Vector Only",
1210
+ "id": "GB-10",
1211
+ "name": "High-volume irrelevant memory noise",
1212
+ "expectedDecision": "block",
1213
+ "decision": "warn",
1214
+ "decisionCorrect": false,
1215
+ "riskScore": 0.35,
1216
+ "passed": false,
1217
+ "latencyMs": 1.051,
1218
+ "evidenceCount": 5,
1219
+ "evidenceIds": [
1220
+ "01KRPC8VCE8VNRWCGWMC1VYNA9",
1221
+ "01KRPC8VCG3GQ7EPDPV9RQ23JA",
1222
+ "01KRPC8VG14K20MGW0C8N1WDGH",
1223
+ "01KRPC8VK28WY2BM5BB3AR9NPA",
1224
+ "01KRPC8V709AFR44CVFQB5MAFW"
1225
+ ],
1226
+ "recommendedActions": [
1227
+ "Treat retrieved memory as advisory context."
1228
+ ],
1229
+ "summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
1230
+ "recallErrors": [],
1231
+ "leakedSecrets": [],
1232
+ "hasEvidenceForDecision": true,
1233
+ "lineageTextMatched": false,
1234
+ "requiredEvidenceMatched": true
1235
+ },
1236
+ {
1237
+ "system": "FTS Only",
1238
+ "id": "GB-10",
1239
+ "name": "High-volume irrelevant memory noise",
1240
+ "expectedDecision": "block",
1241
+ "decision": "allow",
1242
+ "decisionCorrect": false,
1243
+ "riskScore": 0,
1244
+ "passed": false,
1245
+ "latencyMs": 0.545,
1246
+ "evidenceCount": 0,
1247
+ "evidenceIds": [],
1248
+ "recommendedActions": [],
1249
+ "summary": "No memory signal found by this baseline.",
1250
+ "recallErrors": [],
1251
+ "leakedSecrets": [],
1252
+ "hasEvidenceForDecision": false,
1253
+ "lineageTextMatched": false,
1254
+ "requiredEvidenceMatched": false
1255
+ }
1256
+ ]
1257
+ }
1258
+ ],
1259
+ "artifactRedactionSweep": {
1260
+ "checkedAt": "2026-05-15T17:52:12.780Z",
1261
+ "filesChecked": [
1262
+ "benchmarks/output/guardbench-manifest.json",
1263
+ "benchmarks/output/guardbench-raw.json",
1264
+ "benchmarks/output/guardbench-summary.json"
1265
+ ],
1266
+ "seededSecretsChecked": 1,
1267
+ "leakCount": 0,
1268
+ "leaks": [],
1269
+ "passed": true
1270
+ }
1271
+ }