audrey 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +5 -3
  3. package/benchmarks/adapter-self-test.mjs +6 -2
  4. package/benchmarks/adapters/example-allow.mjs +5 -2
  5. package/benchmarks/adapters/mem0-platform.mjs +19 -12
  6. package/benchmarks/adapters/zep-cloud.mjs +51 -27
  7. package/benchmarks/baselines.js +11 -6
  8. package/benchmarks/build-leaderboard.mjs +36 -23
  9. package/benchmarks/cases.js +24 -12
  10. package/benchmarks/create-conformance-card.mjs +12 -3
  11. package/benchmarks/create-submission-bundle.mjs +22 -8
  12. package/benchmarks/dry-run-external-adapters.mjs +24 -12
  13. package/benchmarks/guardbench.js +263 -123
  14. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
  15. package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  16. package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  17. package/benchmarks/output/guardbench-conformance-card.json +11 -11
  18. package/benchmarks/output/guardbench-raw.json +107 -108
  19. package/benchmarks/output/guardbench-summary.json +170 -172
  20. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  21. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  22. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +11 -11
  23. package/benchmarks/output/submission-bundle/guardbench-raw.json +107 -108
  24. package/benchmarks/output/submission-bundle/guardbench-summary.json +170 -172
  25. package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
  26. package/benchmarks/output/submission-bundle/validation-report.json +1 -1
  27. package/benchmarks/output/summary.json +57 -57
  28. package/benchmarks/perf-snapshot.js +12 -9
  29. package/benchmarks/perf.bench.js +14 -6
  30. package/benchmarks/public-paths.mjs +11 -5
  31. package/benchmarks/reference-results.js +10 -5
  32. package/benchmarks/report.js +48 -27
  33. package/benchmarks/run-external-guardbench.mjs +47 -25
  34. package/benchmarks/run.js +112 -59
  35. package/benchmarks/validate-adapter-module.mjs +13 -10
  36. package/benchmarks/validate-adapter-registry.mjs +16 -5
  37. package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
  38. package/benchmarks/verify-external-evidence.mjs +86 -31
  39. package/benchmarks/verify-publication-artifacts.mjs +34 -11
  40. package/benchmarks/verify-submission-bundle.mjs +9 -4
  41. package/dist/mcp-server/config.d.ts +1 -1
  42. package/dist/mcp-server/config.d.ts.map +1 -1
  43. package/dist/mcp-server/config.js +5 -3
  44. package/dist/mcp-server/config.js.map +1 -1
  45. package/dist/mcp-server/index.d.ts +4 -3
  46. package/dist/mcp-server/index.d.ts.map +1 -1
  47. package/dist/mcp-server/index.js +479 -172
  48. package/dist/mcp-server/index.js.map +1 -1
  49. package/dist/src/action-key.d.ts.map +1 -1
  50. package/dist/src/action-key.js +6 -2
  51. package/dist/src/action-key.js.map +1 -1
  52. package/dist/src/adaptive.d.ts.map +1 -1
  53. package/dist/src/adaptive.js +4 -2
  54. package/dist/src/adaptive.js.map +1 -1
  55. package/dist/src/affect.d.ts.map +1 -1
  56. package/dist/src/affect.js +8 -5
  57. package/dist/src/affect.js.map +1 -1
  58. package/dist/src/audrey.d.ts +1 -1
  59. package/dist/src/audrey.d.ts.map +1 -1
  60. package/dist/src/audrey.js +93 -49
  61. package/dist/src/audrey.js.map +1 -1
  62. package/dist/src/capsule.d.ts.map +1 -1
  63. package/dist/src/capsule.js +37 -15
  64. package/dist/src/capsule.js.map +1 -1
  65. package/dist/src/causal.d.ts +1 -1
  66. package/dist/src/causal.d.ts.map +1 -1
  67. package/dist/src/causal.js +4 -2
  68. package/dist/src/causal.js.map +1 -1
  69. package/dist/src/confidence.d.ts.map +1 -1
  70. package/dist/src/confidence.js +5 -5
  71. package/dist/src/confidence.js.map +1 -1
  72. package/dist/src/consolidate.d.ts.map +1 -1
  73. package/dist/src/consolidate.js +17 -9
  74. package/dist/src/consolidate.js.map +1 -1
  75. package/dist/src/context.js +1 -1
  76. package/dist/src/context.js.map +1 -1
  77. package/dist/src/controller.d.ts.map +1 -1
  78. package/dist/src/controller.js +24 -13
  79. package/dist/src/controller.js.map +1 -1
  80. package/dist/src/db.d.ts.map +1 -1
  81. package/dist/src/db.js +78 -27
  82. package/dist/src/db.js.map +1 -1
  83. package/dist/src/decay.d.ts +1 -1
  84. package/dist/src/decay.d.ts.map +1 -1
  85. package/dist/src/decay.js +1 -1
  86. package/dist/src/decay.js.map +1 -1
  87. package/dist/src/embedding.d.ts +12 -4
  88. package/dist/src/embedding.d.ts.map +1 -1
  89. package/dist/src/embedding.js +18 -16
  90. package/dist/src/embedding.js.map +1 -1
  91. package/dist/src/encode.d.ts.map +1 -1
  92. package/dist/src/encode.js +5 -4
  93. package/dist/src/encode.js.map +1 -1
  94. package/dist/src/events.d.ts +3 -2
  95. package/dist/src/events.d.ts.map +1 -1
  96. package/dist/src/events.js +7 -3
  97. package/dist/src/events.js.map +1 -1
  98. package/dist/src/export.d.ts.map +1 -1
  99. package/dist/src/export.js +21 -7
  100. package/dist/src/export.js.map +1 -1
  101. package/dist/src/feedback.d.ts.map +1 -1
  102. package/dist/src/feedback.js +1 -1
  103. package/dist/src/feedback.js.map +1 -1
  104. package/dist/src/forget.d.ts.map +1 -1
  105. package/dist/src/forget.js +12 -6
  106. package/dist/src/forget.js.map +1 -1
  107. package/dist/src/fts.d.ts.map +1 -1
  108. package/dist/src/fts.js +20 -8
  109. package/dist/src/fts.js.map +1 -1
  110. package/dist/src/hybrid-recall.d.ts.map +1 -1
  111. package/dist/src/hybrid-recall.js +12 -6
  112. package/dist/src/hybrid-recall.js.map +1 -1
  113. package/dist/src/impact.d.ts.map +1 -1
  114. package/dist/src/impact.js +26 -10
  115. package/dist/src/impact.js.map +1 -1
  116. package/dist/src/import.d.ts.map +1 -1
  117. package/dist/src/import.js +11 -6
  118. package/dist/src/import.js.map +1 -1
  119. package/dist/src/index.d.ts +3 -3
  120. package/dist/src/index.d.ts.map +1 -1
  121. package/dist/src/index.js +3 -3
  122. package/dist/src/index.js.map +1 -1
  123. package/dist/src/interference.d.ts.map +1 -1
  124. package/dist/src/interference.js +10 -5
  125. package/dist/src/interference.js.map +1 -1
  126. package/dist/src/introspect.d.ts.map +1 -1
  127. package/dist/src/introspect.js +12 -6
  128. package/dist/src/introspect.js.map +1 -1
  129. package/dist/src/llm.d.ts +2 -2
  130. package/dist/src/llm.d.ts.map +1 -1
  131. package/dist/src/llm.js +6 -6
  132. package/dist/src/llm.js.map +1 -1
  133. package/dist/src/migrate.d.ts.map +1 -1
  134. package/dist/src/migrate.js +10 -4
  135. package/dist/src/migrate.js.map +1 -1
  136. package/dist/src/preflight.d.ts.map +1 -1
  137. package/dist/src/preflight.js +6 -8
  138. package/dist/src/preflight.js.map +1 -1
  139. package/dist/src/profile.d.ts.map +1 -1
  140. package/dist/src/profile.js.map +1 -1
  141. package/dist/src/promote.d.ts.map +1 -1
  142. package/dist/src/promote.js +16 -7
  143. package/dist/src/promote.js.map +1 -1
  144. package/dist/src/prompts.d.ts.map +1 -1
  145. package/dist/src/prompts.js +1 -2
  146. package/dist/src/prompts.js.map +1 -1
  147. package/dist/src/recall.d.ts.map +1 -1
  148. package/dist/src/recall.js +85 -18
  149. package/dist/src/recall.js.map +1 -1
  150. package/dist/src/redact.d.ts.map +1 -1
  151. package/dist/src/redact.js +9 -4
  152. package/dist/src/redact.js.map +1 -1
  153. package/dist/src/reflexes.d.ts.map +1 -1
  154. package/dist/src/reflexes.js +1 -7
  155. package/dist/src/reflexes.js.map +1 -1
  156. package/dist/src/rollback.d.ts.map +1 -1
  157. package/dist/src/rollback.js +4 -2
  158. package/dist/src/rollback.js.map +1 -1
  159. package/dist/src/routes.d.ts.map +1 -1
  160. package/dist/src/routes.js +33 -13
  161. package/dist/src/routes.js.map +1 -1
  162. package/dist/src/rules-compiler.d.ts.map +1 -1
  163. package/dist/src/rules-compiler.js +24 -2
  164. package/dist/src/rules-compiler.js.map +1 -1
  165. package/dist/src/server.js +2 -2
  166. package/dist/src/server.js.map +1 -1
  167. package/dist/src/tool-trace.d.ts +2 -2
  168. package/dist/src/tool-trace.d.ts.map +1 -1
  169. package/dist/src/tool-trace.js +12 -4
  170. package/dist/src/tool-trace.js.map +1 -1
  171. package/dist/src/types.d.ts.map +1 -1
  172. package/dist/src/ulid.js +1 -1
  173. package/dist/src/ulid.js.map +1 -1
  174. package/dist/src/utils.d.ts.map +1 -1
  175. package/dist/src/utils.js.map +1 -1
  176. package/dist/src/validate.d.ts.map +1 -1
  177. package/dist/src/validate.js +20 -10
  178. package/dist/src/validate.js.map +1 -1
  179. package/docs/paper/07-evaluation.md +5 -5
  180. package/docs/paper/audrey-paper-v1.md +5 -5
  181. package/docs/paper/evidence-ledger.md +1 -1
  182. package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  183. package/docs/paper/output/arxiv/main.tex +5 -5
  184. package/docs/paper/output/arxiv-compile-report.json +3 -3
  185. package/docs/paper/output/submission-bundle/README.md +5 -3
  186. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
  187. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  188. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  189. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +11 -11
  190. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +107 -108
  191. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +170 -172
  192. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  193. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  194. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
  195. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
  196. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +58 -58
  197. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
  198. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
  199. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
  200. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  201. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
  202. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
  203. package/docs/paper/output/submission-bundle/package.json +17 -4
  204. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +36 -36
  205. package/examples/fintech-ops-demo.js +12 -5
  206. package/examples/healthcare-ops-demo.js +8 -4
  207. package/examples/ollama-memory-agent.js +41 -13
  208. package/examples/stripe-demo.js +12 -5
  209. package/package.json +17 -4
  210. package/scripts/audit-release-completion.mjs +179 -101
  211. package/scripts/create-arxiv-source.mjs +20 -14
  212. package/scripts/create-paper-submission-bundle.mjs +6 -2
  213. package/scripts/finalize-release.mjs +111 -36
  214. package/scripts/prepare-release-cut.mjs +14 -6
  215. package/scripts/publish-release-bundle.mjs +62 -23
  216. package/scripts/publish-release-github-api.mjs +89 -24
  217. package/scripts/smoke-cli.js +9 -9
  218. package/scripts/sync-paper-artifacts.mjs +5 -1
  219. package/scripts/verify-arxiv-compile.mjs +52 -16
  220. package/scripts/verify-arxiv-source.mjs +45 -15
  221. package/scripts/verify-browser-launch-plan.mjs +28 -11
  222. package/scripts/verify-browser-launch-results.mjs +32 -14
  223. package/scripts/verify-paper-artifacts.mjs +539 -79
  224. package/scripts/verify-paper-claims.mjs +48 -20
  225. package/scripts/verify-paper-submission-bundle.mjs +22 -11
  226. package/scripts/verify-publication-pack.mjs +23 -9
  227. package/scripts/verify-release-readiness.mjs +211 -76
@@ -1,16 +1,16 @@
1
1
  {
2
2
  "suite": "GuardBench comparative",
3
- "generatedAt": "2026-05-15T17:52:12.761Z",
3
+ "generatedAt": "2026-05-29T03:45:36.607Z",
4
4
  "manifestVersion": "0.2.0",
5
5
  "provenance": {
6
- "generatedAt": "2026-05-15T17:52:12.761Z",
7
- "gitSha": "82b0e9979680acf751b9e80f6f90f8c6ac74befb",
6
+ "generatedAt": "2026-05-29T03:45:36.607Z",
7
+ "gitSha": "ceed2f51b615175c8bb412b96b5e5a501561189f",
8
8
  "gitDirty": false,
9
- "node": "v24.15.0",
10
- "v8": "13.6.233.17-node.48",
9
+ "node": "v24.16.0",
10
+ "v8": "13.6.233.17-node.49",
11
11
  "platform": "linux",
12
12
  "arch": "x64",
13
- "osRelease": "6.17.0-1013-azure",
13
+ "osRelease": "6.17.0-1015-azure",
14
14
  "cpuModel": "AMD EPYC 9V74 80-Core Processor",
15
15
  "cpuCount": 4,
16
16
  "totalMemoryGb": 15.61,
@@ -33,11 +33,11 @@
33
33
  "decisionCorrect": true,
34
34
  "riskScore": 0.9,
35
35
  "passed": true,
36
- "latencyMs": 6.135,
36
+ "latencyMs": 8.156,
37
37
  "evidenceCount": 2,
38
38
  "evidenceIds": [
39
- "01KRPC8HJAWZEY0085CJJHK54G",
40
- "failure:Bash:2026-05-15T17:52:01.099Z"
39
+ "01KSRXCP2KSTEGGHFKA5WRR1S8",
40
+ "failure:Bash:2026-05-29T03:45:33.268Z"
41
41
  ],
42
42
  "recommendedActions": [
43
43
  "Do not repeat the exact failed action until the prior error is understood or the command is changed.",
@@ -59,7 +59,7 @@
59
59
  "decisionCorrect": false,
60
60
  "riskScore": 0,
61
61
  "passed": false,
62
- "latencyMs": 0.028,
62
+ "latencyMs": 0.035,
63
63
  "evidenceCount": 0,
64
64
  "evidenceIds": [],
65
65
  "recommendedActions": [],
@@ -79,10 +79,10 @@
79
79
  "decisionCorrect": false,
80
80
  "riskScore": 0.55,
81
81
  "passed": false,
82
- "latencyMs": 0.185,
82
+ "latencyMs": 0.223,
83
83
  "evidenceCount": 1,
84
84
  "evidenceIds": [
85
- "01KRPC8HKK5GH11P78E2KB66KW"
85
+ "01KSRXCP3QFKSFEB9TZNQV7P48"
86
86
  ],
87
87
  "recommendedActions": [
88
88
  "Check the recent failed event before repeating a similar action."
@@ -103,10 +103,10 @@
103
103
  "decisionCorrect": false,
104
104
  "riskScore": 0.35,
105
105
  "passed": false,
106
- "latencyMs": 0.742,
106
+ "latencyMs": 0.945,
107
107
  "evidenceCount": 1,
108
108
  "evidenceIds": [
109
- "01KRPC8HM92Q8XXJDSGGCKV8XP"
109
+ "01KSRXCP44MHYS3JJJN1C3H0SR"
110
110
  ],
111
111
  "recommendedActions": [
112
112
  "Treat retrieved memory as advisory context."
@@ -127,7 +127,7 @@
127
127
  "decisionCorrect": false,
128
128
  "riskScore": 0,
129
129
  "passed": false,
130
- "latencyMs": 0.418,
130
+ "latencyMs": 0.467,
131
131
  "evidenceCount": 0,
132
132
  "evidenceIds": [],
133
133
  "recommendedActions": [],
@@ -154,10 +154,10 @@
154
154
  "decisionCorrect": true,
155
155
  "riskScore": 0.85,
156
156
  "passed": true,
157
- "latencyMs": 1.96,
157
+ "latencyMs": 2.277,
158
158
  "evidenceCount": 1,
159
159
  "evidenceIds": [
160
- "01KRPC8HPQ1DDFJ3F929DEEJEB"
160
+ "01KSRXCP4XYS02ZWPSKVYA7A0G"
161
161
  ],
162
162
  "recommendedActions": [
163
163
  "Do not proceed until the high-severity memory warning is addressed.",
@@ -179,7 +179,7 @@
179
179
  "decisionCorrect": false,
180
180
  "riskScore": 0,
181
181
  "passed": false,
182
- "latencyMs": 0.006,
182
+ "latencyMs": 0.004,
183
183
  "evidenceCount": 0,
184
184
  "evidenceIds": [],
185
185
  "recommendedActions": [],
@@ -199,10 +199,10 @@
199
199
  "decisionCorrect": true,
200
200
  "riskScore": 0.85,
201
201
  "passed": true,
202
- "latencyMs": 0.298,
202
+ "latencyMs": 0.322,
203
203
  "evidenceCount": 1,
204
204
  "evidenceIds": [
205
- "01KRPC8HS2VFCEMKZT27Y7J289"
205
+ "01KSRXCP5QE5HDWCC6T278MZFJ"
206
206
  ],
207
207
  "recommendedActions": [
208
208
  "Review retrieved memory before acting."
@@ -223,10 +223,10 @@
223
223
  "decisionCorrect": true,
224
224
  "riskScore": 0.85,
225
225
  "passed": true,
226
- "latencyMs": 0.475,
226
+ "latencyMs": 0.508,
227
227
  "evidenceCount": 1,
228
228
  "evidenceIds": [
229
- "01KRPC8HSJ7N9KKFGH3EZGTFWP"
229
+ "01KSRXCP63JE2K5CC3CMMWRZVM"
230
230
  ],
231
231
  "recommendedActions": [
232
232
  "Review retrieved memory before acting."
@@ -247,7 +247,7 @@
247
247
  "decisionCorrect": false,
248
248
  "riskScore": 0,
249
249
  "passed": false,
250
- "latencyMs": 0.353,
250
+ "latencyMs": 0.428,
251
251
  "evidenceCount": 0,
252
252
  "evidenceIds": [],
253
253
  "recommendedActions": [],
@@ -274,10 +274,10 @@
274
274
  "decisionCorrect": true,
275
275
  "riskScore": 0.55,
276
276
  "passed": true,
277
- "latencyMs": 2.654,
277
+ "latencyMs": 3.184,
278
278
  "evidenceCount": 1,
279
279
  "evidenceIds": [
280
- "failure:Bash:2026-05-15T17:52:01.365Z"
280
+ "failure:Bash:2026-05-29T03:45:33.404Z"
281
281
  ],
282
282
  "recommendedActions": [
283
283
  "Before re-running Bash, check what changed since the last failure."
@@ -298,7 +298,7 @@
298
298
  "decisionCorrect": false,
299
299
  "riskScore": 0,
300
300
  "passed": false,
301
- "latencyMs": 0.007,
301
+ "latencyMs": 0.005,
302
302
  "evidenceCount": 0,
303
303
  "evidenceIds": [],
304
304
  "recommendedActions": [],
@@ -318,10 +318,10 @@
318
318
  "decisionCorrect": true,
319
319
  "riskScore": 0.55,
320
320
  "passed": true,
321
- "latencyMs": 0.079,
321
+ "latencyMs": 0.071,
322
322
  "evidenceCount": 1,
323
323
  "evidenceIds": [
324
- "01KRPC8J3S4YPWSTE26SD6SVPY"
324
+ "01KSRXCP7QG82ZMEVDA0WPBDGS"
325
325
  ],
326
326
  "recommendedActions": [
327
327
  "Check the recent failed event before repeating a similar action."
@@ -342,10 +342,10 @@
342
342
  "decisionCorrect": true,
343
343
  "riskScore": 0.35,
344
344
  "passed": true,
345
- "latencyMs": 0.492,
345
+ "latencyMs": 0.523,
346
346
  "evidenceCount": 1,
347
347
  "evidenceIds": [
348
- "01KRPC8J4BXV5DHC2RY9YBGZZC"
348
+ "01KSRXCP85A8VZCV4YFYWDBZY4"
349
349
  ],
350
350
  "recommendedActions": [
351
351
  "Treat retrieved memory as advisory context."
@@ -366,7 +366,7 @@
366
366
  "decisionCorrect": false,
367
367
  "riskScore": 0,
368
368
  "passed": false,
369
- "latencyMs": 0.381,
369
+ "latencyMs": 0.457,
370
370
  "evidenceCount": 0,
371
371
  "evidenceIds": [],
372
372
  "recommendedActions": [],
@@ -393,10 +393,10 @@
393
393
  "decisionCorrect": true,
394
394
  "riskScore": 0.55,
395
395
  "passed": true,
396
- "latencyMs": 2.465,
396
+ "latencyMs": 2.916,
397
397
  "evidenceCount": 1,
398
398
  "evidenceIds": [
399
- "failure:Bash:2026-05-15T17:52:01.798Z"
399
+ "failure:Bash:2026-05-29T03:45:33.469Z"
400
400
  ],
401
401
  "recommendedActions": [
402
402
  "Before re-running Bash, check what changed since the last failure."
@@ -417,7 +417,7 @@
417
417
  "decisionCorrect": false,
418
418
  "riskScore": 0,
419
419
  "passed": false,
420
- "latencyMs": 0.006,
420
+ "latencyMs": 0.008,
421
421
  "evidenceCount": 0,
422
422
  "evidenceIds": [],
423
423
  "recommendedActions": [],
@@ -437,10 +437,10 @@
437
437
  "decisionCorrect": true,
438
438
  "riskScore": 0.55,
439
439
  "passed": true,
440
- "latencyMs": 0.052,
440
+ "latencyMs": 0.079,
441
441
  "evidenceCount": 1,
442
442
  "evidenceIds": [
443
- "01KRPC8J98CDFSGA2AG5E56TN0"
443
+ "01KSRXCP9QC1E626F1EMNFSYVM"
444
444
  ],
445
445
  "recommendedActions": [
446
446
  "Check the recent failed event before repeating a similar action."
@@ -461,10 +461,10 @@
461
461
  "decisionCorrect": true,
462
462
  "riskScore": 0.35,
463
463
  "passed": true,
464
- "latencyMs": 0.315,
464
+ "latencyMs": 0.427,
465
465
  "evidenceCount": 1,
466
466
  "evidenceIds": [
467
- "01KRPC8J9Q0CMEHTHR4TPX8SYY"
467
+ "01KSRXCPA32S387Y36NHQRFNPT"
468
468
  ],
469
469
  "recommendedActions": [
470
470
  "Treat retrieved memory as advisory context."
@@ -485,7 +485,7 @@
485
485
  "decisionCorrect": false,
486
486
  "riskScore": 0,
487
487
  "passed": false,
488
- "latencyMs": 0.348,
488
+ "latencyMs": 0.43,
489
489
  "evidenceCount": 0,
490
490
  "evidenceIds": [],
491
491
  "recommendedActions": [],
@@ -512,11 +512,11 @@
512
512
  "decisionCorrect": true,
513
513
  "riskScore": 0.2,
514
514
  "passed": true,
515
- "latencyMs": 2.485,
515
+ "latencyMs": 3.161,
516
516
  "evidenceCount": 2,
517
517
  "evidenceIds": [
518
- "01KRPC8JAPXFTFGGG94QP185MS",
519
- "failure:Bash:2026-05-15T17:52:01.877Z"
518
+ "01KSRXCPAXZX9BGBD93N5CDDCM",
519
+ "failure:Bash:2026-05-29T03:45:33.531Z"
520
520
  ],
521
521
  "recommendedActions": [
522
522
  "This exact action has succeeded since its last failure; proceed with normal validation.",
@@ -538,7 +538,7 @@
538
538
  "decisionCorrect": true,
539
539
  "riskScore": 0,
540
540
  "passed": true,
541
- "latencyMs": 0.004,
541
+ "latencyMs": 0.007,
542
542
  "evidenceCount": 0,
543
543
  "evidenceIds": [],
544
544
  "recommendedActions": [],
@@ -558,10 +558,10 @@
558
558
  "decisionCorrect": false,
559
559
  "riskScore": 0.55,
560
560
  "passed": false,
561
- "latencyMs": 0.055,
561
+ "latencyMs": 0.077,
562
562
  "evidenceCount": 1,
563
563
  "evidenceIds": [
564
- "01KRPC8JBSK81DRW3SP4PGA3M0"
564
+ "01KSRXCPBRH43HFGR41QC4C6S1"
565
565
  ],
566
566
  "recommendedActions": [
567
567
  "Check the recent failed event before repeating a similar action."
@@ -582,10 +582,10 @@
582
582
  "decisionCorrect": false,
583
583
  "riskScore": 0.35,
584
584
  "passed": false,
585
- "latencyMs": 0.35,
585
+ "latencyMs": 0.451,
586
586
  "evidenceCount": 1,
587
587
  "evidenceIds": [
588
- "01KRPC8JC7Z8S82XZQM0MC2VED"
588
+ "01KSRXCPC5FRNJEG2MV4DA0M6B"
589
589
  ],
590
590
  "recommendedActions": [
591
591
  "Treat retrieved memory as advisory context."
@@ -606,7 +606,7 @@
606
606
  "decisionCorrect": true,
607
607
  "riskScore": 0,
608
608
  "passed": true,
609
- "latencyMs": 0.322,
609
+ "latencyMs": 0.44,
610
610
  "evidenceCount": 0,
611
611
  "evidenceIds": [],
612
612
  "recommendedActions": [],
@@ -633,7 +633,7 @@
633
633
  "decisionCorrect": true,
634
634
  "riskScore": 0.85,
635
635
  "passed": true,
636
- "latencyMs": 2.159,
636
+ "latencyMs": 2.647,
637
637
  "evidenceCount": 1,
638
638
  "evidenceIds": [
639
639
  "recall:episodic:recall.vector_counts"
@@ -659,7 +659,7 @@
659
659
  "decisionCorrect": false,
660
660
  "riskScore": 0,
661
661
  "passed": false,
662
- "latencyMs": 0.005,
662
+ "latencyMs": 0.01,
663
663
  "evidenceCount": 0,
664
664
  "evidenceIds": [],
665
665
  "recommendedActions": [],
@@ -679,10 +679,10 @@
679
679
  "decisionCorrect": true,
680
680
  "riskScore": 0.85,
681
681
  "passed": true,
682
- "latencyMs": 0.128,
682
+ "latencyMs": 0.153,
683
683
  "evidenceCount": 1,
684
684
  "evidenceIds": [
685
- "01KRPC8JEJYKMEDCJKMDKX3Q7H"
685
+ "01KSRXCPDRK36MH6YDNH3JKEXF"
686
686
  ],
687
687
  "recommendedActions": [
688
688
  "Review retrieved memory before acting."
@@ -703,7 +703,7 @@
703
703
  "decisionCorrect": false,
704
704
  "riskScore": 0.55,
705
705
  "passed": false,
706
- "latencyMs": 0.267,
706
+ "latencyMs": 0.304,
707
707
  "evidenceCount": 0,
708
708
  "evidenceIds": [],
709
709
  "recommendedActions": [
@@ -731,7 +731,7 @@
731
731
  "decisionCorrect": false,
732
732
  "riskScore": 0,
733
733
  "passed": false,
734
- "latencyMs": 0.334,
734
+ "latencyMs": 0.376,
735
735
  "evidenceCount": 0,
736
736
  "evidenceIds": [],
737
737
  "recommendedActions": [],
@@ -758,11 +758,11 @@
758
758
  "decisionCorrect": true,
759
759
  "riskScore": 0.85,
760
760
  "passed": true,
761
- "latencyMs": 1.561,
761
+ "latencyMs": 1.934,
762
762
  "evidenceCount": 2,
763
763
  "evidenceIds": [
764
764
  "recall:fts:recall.fts_lookup",
765
- "01KRPC8JKB36TE59QKA7Z4V2DM"
765
+ "01KSRXCPEXC1RDR4VFSV3ZV759"
766
766
  ],
767
767
  "recommendedActions": [
768
768
  "Do not proceed until the high-severity memory warning is addressed.",
@@ -785,7 +785,7 @@
785
785
  "decisionCorrect": false,
786
786
  "riskScore": 0,
787
787
  "passed": false,
788
- "latencyMs": 0.005,
788
+ "latencyMs": 0.006,
789
789
  "evidenceCount": 0,
790
790
  "evidenceIds": [],
791
791
  "recommendedActions": [],
@@ -805,10 +805,10 @@
805
805
  "decisionCorrect": false,
806
806
  "riskScore": 0.35,
807
807
  "passed": false,
808
- "latencyMs": 0.103,
808
+ "latencyMs": 0.105,
809
809
  "evidenceCount": 1,
810
810
  "evidenceIds": [
811
- "01KRPC8JNWTHH9J03GFSYTHN4K"
811
+ "01KSRXCPFQ579DG3V402TKWYPM"
812
812
  ],
813
813
  "recommendedActions": [
814
814
  "Treat retrieved memory as advisory context."
@@ -829,10 +829,10 @@
829
829
  "decisionCorrect": false,
830
830
  "riskScore": 0.35,
831
831
  "passed": false,
832
- "latencyMs": 0.314,
832
+ "latencyMs": 0.347,
833
833
  "evidenceCount": 1,
834
834
  "evidenceIds": [
835
- "01KRPC8JPC7P0SJDFCJXF222DE"
835
+ "01KSRXCPG3Q8K0YSYA2SAVRPMM"
836
836
  ],
837
837
  "recommendedActions": [
838
838
  "Treat retrieved memory as advisory context."
@@ -853,7 +853,7 @@
853
853
  "decisionCorrect": false,
854
854
  "riskScore": 0.55,
855
855
  "passed": false,
856
- "latencyMs": 0.129,
856
+ "latencyMs": 0.13,
857
857
  "evidenceCount": 0,
858
858
  "evidenceIds": [],
859
859
  "recommendedActions": [
@@ -888,11 +888,11 @@
888
888
  "decisionCorrect": true,
889
889
  "riskScore": 0.9,
890
890
  "passed": true,
891
- "latencyMs": 2.339,
891
+ "latencyMs": 2.599,
892
892
  "evidenceCount": 2,
893
893
  "evidenceIds": [
894
- "01KRPC8JQFVTGQBPCSTSKTRPY7",
895
- "failure:Bash:2026-05-15T17:52:02.287Z"
894
+ "01KSRXCPGV1X3H49QBRCN72084",
895
+ "failure:Bash:2026-05-29T03:45:33.723Z"
896
896
  ],
897
897
  "recommendedActions": [
898
898
  "Do not repeat the exact failed action until the prior error is understood or the command is changed.",
@@ -914,7 +914,7 @@
914
914
  "decisionCorrect": false,
915
915
  "riskScore": 0,
916
916
  "passed": false,
917
- "latencyMs": 0.007,
917
+ "latencyMs": 0.005,
918
918
  "evidenceCount": 0,
919
919
  "evidenceIds": [],
920
920
  "recommendedActions": [],
@@ -934,10 +934,10 @@
934
934
  "decisionCorrect": false,
935
935
  "riskScore": 0.55,
936
936
  "passed": false,
937
- "latencyMs": 0.049,
937
+ "latencyMs": 0.062,
938
938
  "evidenceCount": 1,
939
939
  "evidenceIds": [
940
- "01KRPC8JRKBJR9Y6CTD0D1ZX47"
940
+ "01KSRXCPHPYBHWZKFJ5XCHY1X6"
941
941
  ],
942
942
  "recommendedActions": [
943
943
  "Check the recent failed event before repeating a similar action."
@@ -958,10 +958,10 @@
958
958
  "decisionCorrect": false,
959
959
  "riskScore": 0.35,
960
960
  "passed": false,
961
- "latencyMs": 0.331,
961
+ "latencyMs": 0.396,
962
962
  "evidenceCount": 1,
963
963
  "evidenceIds": [
964
- "01KRPC8JS661GJEJJV12PR5YD3"
964
+ "01KSRXCPJ2NXZ1VNNKPQ5RH818"
965
965
  ],
966
966
  "recommendedActions": [
967
967
  "Treat retrieved memory as advisory context."
@@ -982,7 +982,7 @@
982
982
  "decisionCorrect": false,
983
983
  "riskScore": 0,
984
984
  "passed": false,
985
- "latencyMs": 0.306,
985
+ "latencyMs": 0.35,
986
986
  "evidenceCount": 0,
987
987
  "evidenceIds": [],
988
988
  "recommendedActions": [],
@@ -1009,11 +1009,11 @@
1009
1009
  "decisionCorrect": true,
1010
1010
  "riskScore": 0.85,
1011
1011
  "passed": true,
1012
- "latencyMs": 1.963,
1012
+ "latencyMs": 2.391,
1013
1013
  "evidenceCount": 2,
1014
1014
  "evidenceIds": [
1015
- "01KRPC8K2N9C3SKKD835K921Z8",
1016
- "01KRPC8K2PVBNMYZ2RBA7B2Q9X"
1015
+ "01KSRXCPJTXVN9X36WASHM2QY6",
1016
+ "01KSRXCPJV1JQBFZ19K6H796AG"
1017
1017
  ],
1018
1018
  "recommendedActions": [
1019
1019
  "Do not proceed until the high-severity memory warning is addressed.",
@@ -1055,11 +1055,11 @@
1055
1055
  "decisionCorrect": true,
1056
1056
  "riskScore": 0.85,
1057
1057
  "passed": true,
1058
- "latencyMs": 0.088,
1058
+ "latencyMs": 0.106,
1059
1059
  "evidenceCount": 2,
1060
1060
  "evidenceIds": [
1061
- "01KRPC8K3V9JQY1TQFJQDWGHGM",
1062
- "01KRPC8K3TKQJ65V280YRNH91B"
1061
+ "01KSRXCPKNY5BNX2TH3M407J48",
1062
+ "01KSRXCPKMTBHPCWYJWJ3REV9J"
1063
1063
  ],
1064
1064
  "recommendedActions": [
1065
1065
  "Review retrieved memory before acting."
@@ -1080,11 +1080,11 @@
1080
1080
  "decisionCorrect": true,
1081
1081
  "riskScore": 0.85,
1082
1082
  "passed": true,
1083
- "latencyMs": 0.345,
1083
+ "latencyMs": 0.4,
1084
1084
  "evidenceCount": 2,
1085
1085
  "evidenceIds": [
1086
- "01KRPC8K4CWWGSHGAYZ5JDF62G",
1087
- "01KRPC8K4D0KF2C6EW79KC869P"
1086
+ "01KSRXCPM08WEJAJ579D9KS053",
1087
+ "01KSRXCPM1V6CVS8380AN3F39Y"
1088
1088
  ],
1089
1089
  "recommendedActions": [
1090
1090
  "Review retrieved memory before acting."
@@ -1105,7 +1105,7 @@
1105
1105
  "decisionCorrect": false,
1106
1106
  "riskScore": 0,
1107
1107
  "passed": false,
1108
- "latencyMs": 0.329,
1108
+ "latencyMs": 0.378,
1109
1109
  "evidenceCount": 0,
1110
1110
  "evidenceIds": [],
1111
1111
  "recommendedActions": [],
@@ -1132,29 +1132,28 @@
1132
1132
  "decisionCorrect": true,
1133
1133
  "riskScore": 0.85,
1134
1134
  "passed": true,
1135
- "latencyMs": 30.791,
1136
- "evidenceCount": 13,
1135
+ "latencyMs": 21.17,
1136
+ "evidenceCount": 12,
1137
1137
  "evidenceIds": [
1138
- "01KRPC8PQ72DA5K79S9YZ7N381",
1139
- "01KRPC8PQ6YCVWK55HP85M0JKB",
1140
- "01KRPC8PMZ7SZFK6P2HCZQF23X",
1141
- "01KRPC8PHVXXXJ1HRFGXQ9SNZD",
1142
- "01KRPC8PE7CP3E77NRQKFWB01Z",
1143
- "01KRPC8PC7C083T4QRW0PB54W0",
1144
- "01KRPC8P76C1BBHBKMW79XHVPA",
1145
- "01KRPC8NSJ25DKGHN9RM5EKGSZ",
1146
- "01KRPC8NSFC7N7AHWGCBNHXP2P",
1147
- "01KRPC8MWXZ9DVQJ2QAFM2EJJC",
1148
- "01KRPC8MV37S2ZR305M1PCPCJA",
1149
- "01KRPC8KZNCXB2CYDMJ6QVV5CJ",
1150
- "01KRPC8K5SHHV6HE5MQ10DSKAT"
1138
+ "01KSRXCQ4DK284E35ZKNYDXWBQ",
1139
+ "01KSRXCQ3H1TVR4E552DQVV9MG",
1140
+ "01KSRXCQ39QKSSNZWEFZBHMNT9",
1141
+ "01KSRXCPYP6VKM8AZC7KZ4SN6W",
1142
+ "01KSRXCPVWWCPWE3M38G6VM1BG",
1143
+ "01KSRXCPV7YMCBAT0602VZ3DQG",
1144
+ "01KSRXCPTM8GHZXKXNKH5FMRG6",
1145
+ "01KSRXCPSNJTZHJK1MWE6WNNYW",
1146
+ "01KSRXCPS3K2GR6MFXTMTDEKD3",
1147
+ "01KSRXCPS3K2GR6MFXTMTDEKD2",
1148
+ "01KSRXCPRGC2EN41NQD4MYJ1Q1",
1149
+ "01KSRXCPNG135506TFPF1WMAVB"
1151
1150
  ],
1152
1151
  "recommendedActions": [
1153
1152
  "Do not proceed until the high-severity memory warning is addressed.",
1154
1153
  "Apply this must-follow rule before acting.",
1155
1154
  "Treat this as uncertain context and verify before relying on it."
1156
1155
  ],
1157
- "summary": "Blocked: 13 memory signals, 1 high severity, 12 medium severity found before acting.",
1156
+ "summary": "Blocked: 12 memory signals, 1 high severity, 11 medium severity found before acting.",
1158
1157
  "recallErrors": [],
1159
1158
  "leakedSecrets": [],
1160
1159
  "hasEvidenceForDecision": true,
@@ -1170,7 +1169,7 @@
1170
1169
  "decisionCorrect": false,
1171
1170
  "riskScore": 0,
1172
1171
  "passed": false,
1173
- "latencyMs": 0.009,
1172
+ "latencyMs": 0.011,
1174
1173
  "evidenceCount": 0,
1175
1174
  "evidenceIds": [],
1176
1175
  "recommendedActions": [],
@@ -1190,10 +1189,10 @@
1190
1189
  "decisionCorrect": true,
1191
1190
  "riskScore": 0.85,
1192
1191
  "passed": true,
1193
- "latencyMs": 0.462,
1192
+ "latencyMs": 0.421,
1194
1193
  "evidenceCount": 1,
1195
1194
  "evidenceIds": [
1196
- "01KRPC8V0CK77K0V6ZKRC1T15A"
1195
+ "01KSRXCR6FARVQ7ATWYWC5QAF9"
1197
1196
  ],
1198
1197
  "recommendedActions": [
1199
1198
  "Review retrieved memory before acting."
@@ -1214,14 +1213,14 @@
1214
1213
  "decisionCorrect": false,
1215
1214
  "riskScore": 0.35,
1216
1215
  "passed": false,
1217
- "latencyMs": 1.051,
1216
+ "latencyMs": 1.551,
1218
1217
  "evidenceCount": 5,
1219
1218
  "evidenceIds": [
1220
- "01KRPC8VCE8VNRWCGWMC1VYNA9",
1221
- "01KRPC8VCG3GQ7EPDPV9RQ23JA",
1222
- "01KRPC8VG14K20MGW0C8N1WDGH",
1223
- "01KRPC8VK28WY2BM5BB3AR9NPA",
1224
- "01KRPC8V709AFR44CVFQB5MAFW"
1219
+ "01KSRXCRC5YBFBKT1RM4SPXRZZ",
1220
+ "01KSRXCR9R09K2J5HM1BGN1PSW",
1221
+ "01KSRXCRJKF9PWQG7YRGGK1TP6",
1222
+ "01KSRXCRGTW1V1VGWWT869D36Q",
1223
+ "01KSRXCRP3KVXPGD7WMNS3KWKF"
1225
1224
  ],
1226
1225
  "recommendedActions": [
1227
1226
  "Treat retrieved memory as advisory context."
@@ -1242,7 +1241,7 @@
1242
1241
  "decisionCorrect": false,
1243
1242
  "riskScore": 0,
1244
1243
  "passed": false,
1245
- "latencyMs": 0.545,
1244
+ "latencyMs": 0.717,
1246
1245
  "evidenceCount": 0,
1247
1246
  "evidenceIds": [],
1248
1247
  "recommendedActions": [],
@@ -1257,7 +1256,7 @@
1257
1256
  }
1258
1257
  ],
1259
1258
  "artifactRedactionSweep": {
1260
- "checkedAt": "2026-05-15T17:52:12.780Z",
1259
+ "checkedAt": "2026-05-29T03:45:36.646Z",
1261
1260
  "filesChecked": [
1262
1261
  "benchmarks/output/guardbench-manifest.json",
1263
1262
  "benchmarks/output/guardbench-raw.json",