audrey 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/README.md +13 -3
- package/benchmarks/adapter-self-test.mjs +6 -2
- package/benchmarks/adapters/example-allow.mjs +5 -2
- package/benchmarks/adapters/mem0-platform.mjs +19 -12
- package/benchmarks/adapters/zep-cloud.mjs +51 -27
- package/benchmarks/baselines.js +11 -6
- package/benchmarks/build-leaderboard.mjs +36 -23
- package/benchmarks/cases.js +24 -12
- package/benchmarks/create-conformance-card.mjs +12 -3
- package/benchmarks/create-submission-bundle.mjs +22 -8
- package/benchmarks/dry-run-external-adapters.mjs +24 -12
- package/benchmarks/guardbench.js +263 -123
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +106 -106
- package/benchmarks/output/guardbench-summary.json +168 -168
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +106 -106
- package/benchmarks/output/submission-bundle/guardbench-summary.json +168 -168
- package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +58 -58
- package/benchmarks/perf-snapshot.js +12 -9
- package/benchmarks/perf.bench.js +14 -6
- package/benchmarks/public-paths.mjs +11 -5
- package/benchmarks/reference-results.js +10 -5
- package/benchmarks/report.js +48 -27
- package/benchmarks/run-external-guardbench.mjs +47 -25
- package/benchmarks/run.js +112 -59
- package/benchmarks/validate-adapter-module.mjs +13 -10
- package/benchmarks/validate-adapter-registry.mjs +16 -5
- package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
- package/benchmarks/verify-external-evidence.mjs +86 -31
- package/benchmarks/verify-publication-artifacts.mjs +34 -11
- package/benchmarks/verify-submission-bundle.mjs +9 -4
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.d.ts.map +1 -1
- package/dist/mcp-server/config.js +5 -3
- package/dist/mcp-server/config.js.map +1 -1
- package/dist/mcp-server/index.d.ts +7 -347
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +289 -256
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/mcp-server/tool-schemas.d.ts +341 -0
- package/dist/mcp-server/tool-schemas.d.ts.map +1 -0
- package/dist/mcp-server/tool-schemas.js +248 -0
- package/dist/mcp-server/tool-schemas.js.map +1 -0
- package/dist/mcp-server/tool-validation.d.ts +17 -0
- package/dist/mcp-server/tool-validation.d.ts.map +1 -0
- package/dist/mcp-server/tool-validation.js +41 -0
- package/dist/mcp-server/tool-validation.js.map +1 -0
- package/dist/src/action-key.d.ts.map +1 -1
- package/dist/src/action-key.js +6 -2
- package/dist/src/action-key.js.map +1 -1
- package/dist/src/adaptive.d.ts.map +1 -1
- package/dist/src/adaptive.js +4 -2
- package/dist/src/adaptive.js.map +1 -1
- package/dist/src/affect.d.ts.map +1 -1
- package/dist/src/affect.js +8 -5
- package/dist/src/affect.js.map +1 -1
- package/dist/src/audrey.d.ts +1 -1
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +93 -49
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/capsule.d.ts.map +1 -1
- package/dist/src/capsule.js +37 -15
- package/dist/src/capsule.js.map +1 -1
- package/dist/src/causal.d.ts +1 -1
- package/dist/src/causal.d.ts.map +1 -1
- package/dist/src/causal.js +4 -2
- package/dist/src/causal.js.map +1 -1
- package/dist/src/confidence.d.ts.map +1 -1
- package/dist/src/confidence.js +5 -5
- package/dist/src/confidence.js.map +1 -1
- package/dist/src/consolidate.d.ts.map +1 -1
- package/dist/src/consolidate.js +17 -9
- package/dist/src/consolidate.js.map +1 -1
- package/dist/src/context.js +1 -1
- package/dist/src/context.js.map +1 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +24 -13
- package/dist/src/controller.js.map +1 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +78 -27
- package/dist/src/db.js.map +1 -1
- package/dist/src/decay.d.ts +1 -1
- package/dist/src/decay.d.ts.map +1 -1
- package/dist/src/decay.js +1 -1
- package/dist/src/decay.js.map +1 -1
- package/dist/src/embedding.d.ts +12 -4
- package/dist/src/embedding.d.ts.map +1 -1
- package/dist/src/embedding.js +18 -16
- package/dist/src/embedding.js.map +1 -1
- package/dist/src/encode.d.ts.map +1 -1
- package/dist/src/encode.js +5 -4
- package/dist/src/encode.js.map +1 -1
- package/dist/src/events.d.ts +3 -2
- package/dist/src/events.d.ts.map +1 -1
- package/dist/src/events.js +7 -3
- package/dist/src/events.js.map +1 -1
- package/dist/src/export.d.ts.map +1 -1
- package/dist/src/export.js +21 -7
- package/dist/src/export.js.map +1 -1
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +1 -1
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/forget.d.ts.map +1 -1
- package/dist/src/forget.js +12 -6
- package/dist/src/forget.js.map +1 -1
- package/dist/src/fts.d.ts.map +1 -1
- package/dist/src/fts.js +20 -8
- package/dist/src/fts.js.map +1 -1
- package/dist/src/hybrid-recall.d.ts.map +1 -1
- package/dist/src/hybrid-recall.js +12 -6
- package/dist/src/hybrid-recall.js.map +1 -1
- package/dist/src/impact.d.ts.map +1 -1
- package/dist/src/impact.js +26 -10
- package/dist/src/impact.js.map +1 -1
- package/dist/src/import.d.ts.map +1 -1
- package/dist/src/import.js +11 -6
- package/dist/src/import.js.map +1 -1
- package/dist/src/index.d.ts +3 -3
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -3
- package/dist/src/index.js.map +1 -1
- package/dist/src/interference.d.ts.map +1 -1
- package/dist/src/interference.js +10 -5
- package/dist/src/interference.js.map +1 -1
- package/dist/src/introspect.d.ts.map +1 -1
- package/dist/src/introspect.js +12 -6
- package/dist/src/introspect.js.map +1 -1
- package/dist/src/llm.d.ts +2 -2
- package/dist/src/llm.d.ts.map +1 -1
- package/dist/src/llm.js +6 -6
- package/dist/src/llm.js.map +1 -1
- package/dist/src/migrate.d.ts.map +1 -1
- package/dist/src/migrate.js +10 -4
- package/dist/src/migrate.js.map +1 -1
- package/dist/src/preflight.d.ts.map +1 -1
- package/dist/src/preflight.js +6 -8
- package/dist/src/preflight.js.map +1 -1
- package/dist/src/profile.d.ts.map +1 -1
- package/dist/src/profile.js.map +1 -1
- package/dist/src/promote.d.ts.map +1 -1
- package/dist/src/promote.js +16 -7
- package/dist/src/promote.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +1 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/recall.d.ts.map +1 -1
- package/dist/src/recall.js +85 -18
- package/dist/src/recall.js.map +1 -1
- package/dist/src/redact.d.ts.map +1 -1
- package/dist/src/redact.js +9 -4
- package/dist/src/redact.js.map +1 -1
- package/dist/src/reflexes.d.ts.map +1 -1
- package/dist/src/reflexes.js +1 -7
- package/dist/src/reflexes.js.map +1 -1
- package/dist/src/rollback.d.ts.map +1 -1
- package/dist/src/rollback.js +4 -2
- package/dist/src/rollback.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +33 -13
- package/dist/src/routes.js.map +1 -1
- package/dist/src/rules-compiler.d.ts.map +1 -1
- package/dist/src/rules-compiler.js +24 -2
- package/dist/src/rules-compiler.js.map +1 -1
- package/dist/src/server.js +2 -2
- package/dist/src/server.js.map +1 -1
- package/dist/src/tool-trace.d.ts +2 -2
- package/dist/src/tool-trace.d.ts.map +1 -1
- package/dist/src/tool-trace.js +12 -4
- package/dist/src/tool-trace.js.map +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/ulid.js +1 -1
- package/dist/src/ulid.js.map +1 -1
- package/dist/src/utils.d.ts.map +1 -1
- package/dist/src/utils.js.map +1 -1
- package/dist/src/validate.d.ts.map +1 -1
- package/dist/src/validate.js +20 -10
- package/dist/src/validate.js.map +1 -1
- package/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +13 -3
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +106 -106
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +168 -168
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +64 -64
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +17 -4
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +34 -34
- package/examples/fintech-ops-demo.js +12 -5
- package/examples/healthcare-ops-demo.js +8 -4
- package/examples/ollama-memory-agent.js +41 -13
- package/examples/stripe-demo.js +12 -5
- package/package.json +17 -4
- package/scripts/audit-release-completion.mjs +179 -101
- package/scripts/create-arxiv-source.mjs +20 -14
- package/scripts/create-paper-submission-bundle.mjs +6 -2
- package/scripts/finalize-release.mjs +111 -36
- package/scripts/prepare-release-cut.mjs +14 -6
- package/scripts/publish-release-bundle.mjs +62 -23
- package/scripts/publish-release-github-api.mjs +89 -24
- package/scripts/smoke-cli.js +9 -9
- package/scripts/sync-paper-artifacts.mjs +5 -1
- package/scripts/verify-arxiv-compile.mjs +52 -16
- package/scripts/verify-arxiv-source.mjs +45 -15
- package/scripts/verify-browser-launch-plan.mjs +28 -11
- package/scripts/verify-browser-launch-results.mjs +32 -14
- package/scripts/verify-paper-artifacts.mjs +539 -79
- package/scripts/verify-paper-claims.mjs +48 -20
- package/scripts/verify-paper-submission-bundle.mjs +22 -11
- package/scripts/verify-publication-pack.mjs +23 -9
- package/scripts/verify-release-readiness.mjs +211 -76
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"suite": "GuardBench comparative",
|
|
3
|
-
"generatedAt": "2026-05-
|
|
3
|
+
"generatedAt": "2026-05-29T13:33:23.188Z",
|
|
4
4
|
"manifest": {
|
|
5
5
|
"manifestVersion": "0.2.0",
|
|
6
6
|
"suiteId": "guardbench-local-comparative",
|
|
@@ -416,15 +416,15 @@
|
|
|
416
416
|
]
|
|
417
417
|
},
|
|
418
418
|
"provenance": {
|
|
419
|
-
"generatedAt": "2026-05-
|
|
420
|
-
"gitSha": "
|
|
419
|
+
"generatedAt": "2026-05-29T13:33:23.189Z",
|
|
420
|
+
"gitSha": "9f771bae94f5ce4cfd5d5425e300a6a440c833d2",
|
|
421
421
|
"gitDirty": false,
|
|
422
|
-
"node": "v24.
|
|
423
|
-
"v8": "13.6.233.17-node.
|
|
422
|
+
"node": "v24.16.0",
|
|
423
|
+
"v8": "13.6.233.17-node.49",
|
|
424
424
|
"platform": "linux",
|
|
425
425
|
"arch": "x64",
|
|
426
|
-
"osRelease": "6.17.0-
|
|
427
|
-
"cpuModel": "
|
|
426
|
+
"osRelease": "6.17.0-1015-azure",
|
|
427
|
+
"cpuModel": "Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz",
|
|
428
428
|
"cpuCount": 4,
|
|
429
429
|
"totalMemoryGb": 15.61,
|
|
430
430
|
"embeddingProvider": "mock",
|
|
@@ -449,14 +449,14 @@
|
|
|
449
449
|
"redactionLeaks": 0,
|
|
450
450
|
"recallDegradationDetectionRate": 1,
|
|
451
451
|
"latency": {
|
|
452
|
-
"p50Ms":
|
|
453
|
-
"p95Ms":
|
|
454
|
-
"maxMs":
|
|
452
|
+
"p50Ms": 3.09,
|
|
453
|
+
"p95Ms": 28.181,
|
|
454
|
+
"maxMs": 28.181
|
|
455
455
|
},
|
|
456
456
|
"systemSummaries": [
|
|
457
457
|
{
|
|
458
458
|
"system": "Audrey Guard",
|
|
459
|
-
"generatedAt": "2026-05-
|
|
459
|
+
"generatedAt": "2026-05-29T13:33:23.188Z",
|
|
460
460
|
"scenarios": 10,
|
|
461
461
|
"passed": 10,
|
|
462
462
|
"passRate": 1,
|
|
@@ -470,14 +470,14 @@
|
|
|
470
470
|
"redactionLeaks": 0,
|
|
471
471
|
"recallDegradationDetectionRate": 1,
|
|
472
472
|
"latency": {
|
|
473
|
-
"p50Ms":
|
|
474
|
-
"p95Ms":
|
|
475
|
-
"maxMs":
|
|
473
|
+
"p50Ms": 3.09,
|
|
474
|
+
"p95Ms": 28.181,
|
|
475
|
+
"maxMs": 28.181
|
|
476
476
|
}
|
|
477
477
|
},
|
|
478
478
|
{
|
|
479
479
|
"system": "No Memory",
|
|
480
|
-
"generatedAt": "2026-05-
|
|
480
|
+
"generatedAt": "2026-05-29T13:33:23.188Z",
|
|
481
481
|
"scenarios": 10,
|
|
482
482
|
"passed": 1,
|
|
483
483
|
"passRate": 0.1,
|
|
@@ -491,14 +491,14 @@
|
|
|
491
491
|
"redactionLeaks": 0,
|
|
492
492
|
"recallDegradationDetectionRate": 0,
|
|
493
493
|
"latency": {
|
|
494
|
-
"p50Ms": 0.
|
|
495
|
-
"p95Ms": 0.
|
|
496
|
-
"maxMs": 0.
|
|
494
|
+
"p50Ms": 0.008,
|
|
495
|
+
"p95Ms": 0.061,
|
|
496
|
+
"maxMs": 0.061
|
|
497
497
|
}
|
|
498
498
|
},
|
|
499
499
|
{
|
|
500
500
|
"system": "Recent Window",
|
|
501
|
-
"generatedAt": "2026-05-
|
|
501
|
+
"generatedAt": "2026-05-29T13:33:23.188Z",
|
|
502
502
|
"scenarios": 10,
|
|
503
503
|
"passed": 6,
|
|
504
504
|
"passRate": 0.6,
|
|
@@ -512,14 +512,14 @@
|
|
|
512
512
|
"redactionLeaks": 0,
|
|
513
513
|
"recallDegradationDetectionRate": 0.5,
|
|
514
514
|
"latency": {
|
|
515
|
-
"p50Ms": 0.
|
|
516
|
-
"p95Ms": 0.
|
|
517
|
-
"maxMs": 0.
|
|
515
|
+
"p50Ms": 0.138,
|
|
516
|
+
"p95Ms": 0.434,
|
|
517
|
+
"maxMs": 0.434
|
|
518
518
|
}
|
|
519
519
|
},
|
|
520
520
|
{
|
|
521
521
|
"system": "Vector Only",
|
|
522
|
-
"generatedAt": "2026-05-
|
|
522
|
+
"generatedAt": "2026-05-29T13:33:23.188Z",
|
|
523
523
|
"scenarios": 10,
|
|
524
524
|
"passed": 4,
|
|
525
525
|
"passRate": 0.4,
|
|
@@ -533,14 +533,14 @@
|
|
|
533
533
|
"redactionLeaks": 0,
|
|
534
534
|
"recallDegradationDetectionRate": 0,
|
|
535
535
|
"latency": {
|
|
536
|
-
"p50Ms": 0.
|
|
537
|
-
"p95Ms": 1.
|
|
538
|
-
"maxMs": 1.
|
|
536
|
+
"p50Ms": 0.529,
|
|
537
|
+
"p95Ms": 1.356,
|
|
538
|
+
"maxMs": 1.356
|
|
539
539
|
}
|
|
540
540
|
},
|
|
541
541
|
{
|
|
542
542
|
"system": "FTS Only",
|
|
543
|
-
"generatedAt": "2026-05-
|
|
543
|
+
"generatedAt": "2026-05-29T13:33:23.188Z",
|
|
544
544
|
"scenarios": 10,
|
|
545
545
|
"passed": 1,
|
|
546
546
|
"passRate": 0.1,
|
|
@@ -554,16 +554,16 @@
|
|
|
554
554
|
"redactionLeaks": 0,
|
|
555
555
|
"recallDegradationDetectionRate": 0,
|
|
556
556
|
"latency": {
|
|
557
|
-
"p50Ms": 0.
|
|
558
|
-
"p95Ms": 0.
|
|
559
|
-
"maxMs": 0.
|
|
557
|
+
"p50Ms": 0.421,
|
|
558
|
+
"p95Ms": 0.633,
|
|
559
|
+
"maxMs": 0.633
|
|
560
560
|
}
|
|
561
561
|
}
|
|
562
562
|
],
|
|
563
563
|
"comparisons": {
|
|
564
564
|
"bestBaseline": {
|
|
565
565
|
"system": "Recent Window",
|
|
566
|
-
"generatedAt": "2026-05-
|
|
566
|
+
"generatedAt": "2026-05-29T13:33:23.188Z",
|
|
567
567
|
"scenarios": 10,
|
|
568
568
|
"passed": 6,
|
|
569
569
|
"passRate": 0.6,
|
|
@@ -577,9 +577,9 @@
|
|
|
577
577
|
"redactionLeaks": 0,
|
|
578
578
|
"recallDegradationDetectionRate": 0.5,
|
|
579
579
|
"latency": {
|
|
580
|
-
"p50Ms": 0.
|
|
581
|
-
"p95Ms": 0.
|
|
582
|
-
"maxMs": 0.
|
|
580
|
+
"p50Ms": 0.138,
|
|
581
|
+
"p95Ms": 0.434,
|
|
582
|
+
"maxMs": 0.434
|
|
583
583
|
}
|
|
584
584
|
},
|
|
585
585
|
"audreyMarginOverBestBaseline": 0.4
|
|
@@ -594,11 +594,11 @@
|
|
|
594
594
|
"decisionCorrect": true,
|
|
595
595
|
"riskScore": 0.9,
|
|
596
596
|
"passed": true,
|
|
597
|
-
"latencyMs":
|
|
597
|
+
"latencyMs": 7.93,
|
|
598
598
|
"evidenceCount": 2,
|
|
599
599
|
"evidenceIds": [
|
|
600
|
-
"
|
|
601
|
-
"failure:Bash:2026-05-
|
|
600
|
+
"01KSSZ0Y9FJAC1S37EWC7YK3BX",
|
|
601
|
+
"failure:Bash:2026-05-29T13:33:20.048Z"
|
|
602
602
|
],
|
|
603
603
|
"recommendedActions": [
|
|
604
604
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -620,10 +620,10 @@
|
|
|
620
620
|
"decisionCorrect": true,
|
|
621
621
|
"riskScore": 0.85,
|
|
622
622
|
"passed": true,
|
|
623
|
-
"latencyMs":
|
|
623
|
+
"latencyMs": 2.263,
|
|
624
624
|
"evidenceCount": 1,
|
|
625
625
|
"evidenceIds": [
|
|
626
|
-
"
|
|
626
|
+
"01KSSZ0YBP031Y5JXGRJTC5FS8"
|
|
627
627
|
],
|
|
628
628
|
"recommendedActions": [
|
|
629
629
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -645,10 +645,10 @@
|
|
|
645
645
|
"decisionCorrect": true,
|
|
646
646
|
"riskScore": 0.55,
|
|
647
647
|
"passed": true,
|
|
648
|
-
"latencyMs":
|
|
648
|
+
"latencyMs": 3.152,
|
|
649
649
|
"evidenceCount": 1,
|
|
650
650
|
"evidenceIds": [
|
|
651
|
-
"failure:Bash:2026-05-
|
|
651
|
+
"failure:Bash:2026-05-29T13:33:20.179Z"
|
|
652
652
|
],
|
|
653
653
|
"recommendedActions": [
|
|
654
654
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -669,10 +669,10 @@
|
|
|
669
669
|
"decisionCorrect": true,
|
|
670
670
|
"riskScore": 0.55,
|
|
671
671
|
"passed": true,
|
|
672
|
-
"latencyMs": 2.
|
|
672
|
+
"latencyMs": 2.846,
|
|
673
673
|
"evidenceCount": 1,
|
|
674
674
|
"evidenceIds": [
|
|
675
|
-
"failure:Bash:2026-05-
|
|
675
|
+
"failure:Bash:2026-05-29T13:33:20.243Z"
|
|
676
676
|
],
|
|
677
677
|
"recommendedActions": [
|
|
678
678
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -693,11 +693,11 @@
|
|
|
693
693
|
"decisionCorrect": true,
|
|
694
694
|
"riskScore": 0.2,
|
|
695
695
|
"passed": true,
|
|
696
|
-
"latencyMs":
|
|
696
|
+
"latencyMs": 3.09,
|
|
697
697
|
"evidenceCount": 2,
|
|
698
698
|
"evidenceIds": [
|
|
699
|
-
"
|
|
700
|
-
"failure:Bash:2026-05-
|
|
699
|
+
"01KSSZ0YHGH45B4CPKET1W5VZW",
|
|
700
|
+
"failure:Bash:2026-05-29T13:33:20.302Z"
|
|
701
701
|
],
|
|
702
702
|
"recommendedActions": [
|
|
703
703
|
"This exact action has succeeded since its last failure; proceed with normal validation.",
|
|
@@ -719,7 +719,7 @@
|
|
|
719
719
|
"decisionCorrect": true,
|
|
720
720
|
"riskScore": 0.85,
|
|
721
721
|
"passed": true,
|
|
722
|
-
"latencyMs": 2.
|
|
722
|
+
"latencyMs": 2.534,
|
|
723
723
|
"evidenceCount": 1,
|
|
724
724
|
"evidenceIds": [
|
|
725
725
|
"recall:episodic:recall.vector_counts"
|
|
@@ -745,11 +745,11 @@
|
|
|
745
745
|
"decisionCorrect": true,
|
|
746
746
|
"riskScore": 0.85,
|
|
747
747
|
"passed": true,
|
|
748
|
-
"latencyMs":
|
|
748
|
+
"latencyMs": 3.585,
|
|
749
749
|
"evidenceCount": 2,
|
|
750
750
|
"evidenceIds": [
|
|
751
751
|
"recall:fts:recall.fts_lookup",
|
|
752
|
-
"
|
|
752
|
+
"01KSSZ0YN75S42CB2APNH97S32"
|
|
753
753
|
],
|
|
754
754
|
"recommendedActions": [
|
|
755
755
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -772,11 +772,11 @@
|
|
|
772
772
|
"decisionCorrect": true,
|
|
773
773
|
"riskScore": 0.9,
|
|
774
774
|
"passed": true,
|
|
775
|
-
"latencyMs": 2.
|
|
775
|
+
"latencyMs": 2.599,
|
|
776
776
|
"evidenceCount": 2,
|
|
777
777
|
"evidenceIds": [
|
|
778
|
-
"
|
|
779
|
-
"failure:Bash:2026-05-
|
|
778
|
+
"01KSSZ0YQ26JKF6YQETHJEAWCE",
|
|
779
|
+
"failure:Bash:2026-05-29T13:33:20.483Z"
|
|
780
780
|
],
|
|
781
781
|
"recommendedActions": [
|
|
782
782
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -798,11 +798,11 @@
|
|
|
798
798
|
"decisionCorrect": true,
|
|
799
799
|
"riskScore": 0.85,
|
|
800
800
|
"passed": true,
|
|
801
|
-
"latencyMs":
|
|
801
|
+
"latencyMs": 2.341,
|
|
802
802
|
"evidenceCount": 2,
|
|
803
803
|
"evidenceIds": [
|
|
804
|
-
"
|
|
805
|
-
"
|
|
804
|
+
"01KSSZ0YRX53XKT4C23A12RFND",
|
|
805
|
+
"01KSSZ0YRY86WACDBFSD3W0N04"
|
|
806
806
|
],
|
|
807
807
|
"recommendedActions": [
|
|
808
808
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -824,22 +824,22 @@
|
|
|
824
824
|
"decisionCorrect": true,
|
|
825
825
|
"riskScore": 0.85,
|
|
826
826
|
"passed": true,
|
|
827
|
-
"latencyMs":
|
|
827
|
+
"latencyMs": 28.181,
|
|
828
828
|
"evidenceCount": 13,
|
|
829
829
|
"evidenceIds": [
|
|
830
|
-
"
|
|
831
|
-
"
|
|
832
|
-
"
|
|
833
|
-
"
|
|
834
|
-
"
|
|
835
|
-
"
|
|
836
|
-
"
|
|
837
|
-
"
|
|
838
|
-
"
|
|
839
|
-
"
|
|
840
|
-
"
|
|
841
|
-
"
|
|
842
|
-
"
|
|
830
|
+
"01KSSZ0Z92H6MV9SGY92Q4JAF3",
|
|
831
|
+
"01KSSZ0Z8W4Z9J8GT3CK8FC6VS",
|
|
832
|
+
"01KSSZ0Z7B5ZVWXVZKT4VW8A34",
|
|
833
|
+
"01KSSZ0Z7A1AFDK0520E1RH72M",
|
|
834
|
+
"01KSSZ0Z4Y150G4Q3STE6WJBD8",
|
|
835
|
+
"01KSSZ0Z4WMY0QV4VGJD4CCG1B",
|
|
836
|
+
"01KSSZ0Z48NTHT9ZB0PM7ESE4Q",
|
|
837
|
+
"01KSSZ0Z44J6MF93GDY4510PKC",
|
|
838
|
+
"01KSSZ0Z22A7QWQ2KEPKXQM7FK",
|
|
839
|
+
"01KSSZ0Z21X7YP9FJCGMZ61E7X",
|
|
840
|
+
"01KSSZ0YYRG8809DR6R80GG89T",
|
|
841
|
+
"01KSSZ0YYKFZYZKPGJ8TJG319N",
|
|
842
|
+
"01KSSZ0YY3A2N08TE8S3VQ7GJE"
|
|
843
843
|
],
|
|
844
844
|
"recommendedActions": [
|
|
845
845
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -869,11 +869,11 @@
|
|
|
869
869
|
"decisionCorrect": true,
|
|
870
870
|
"riskScore": 0.9,
|
|
871
871
|
"passed": true,
|
|
872
|
-
"latencyMs":
|
|
872
|
+
"latencyMs": 7.93,
|
|
873
873
|
"evidenceCount": 2,
|
|
874
874
|
"evidenceIds": [
|
|
875
|
-
"
|
|
876
|
-
"failure:Bash:2026-05-
|
|
875
|
+
"01KSSZ0Y9FJAC1S37EWC7YK3BX",
|
|
876
|
+
"failure:Bash:2026-05-29T13:33:20.048Z"
|
|
877
877
|
],
|
|
878
878
|
"recommendedActions": [
|
|
879
879
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -895,7 +895,7 @@
|
|
|
895
895
|
"decisionCorrect": false,
|
|
896
896
|
"riskScore": 0,
|
|
897
897
|
"passed": false,
|
|
898
|
-
"latencyMs": 0.
|
|
898
|
+
"latencyMs": 0.061,
|
|
899
899
|
"evidenceCount": 0,
|
|
900
900
|
"evidenceIds": [],
|
|
901
901
|
"recommendedActions": [],
|
|
@@ -915,10 +915,10 @@
|
|
|
915
915
|
"decisionCorrect": false,
|
|
916
916
|
"riskScore": 0.55,
|
|
917
917
|
"passed": false,
|
|
918
|
-
"latencyMs": 0.
|
|
918
|
+
"latencyMs": 0.221,
|
|
919
919
|
"evidenceCount": 1,
|
|
920
920
|
"evidenceIds": [
|
|
921
|
-
"
|
|
921
|
+
"01KSSZ0YAJTGPGFWKAJRPXMRDB"
|
|
922
922
|
],
|
|
923
923
|
"recommendedActions": [
|
|
924
924
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -939,10 +939,10 @@
|
|
|
939
939
|
"decisionCorrect": false,
|
|
940
940
|
"riskScore": 0.35,
|
|
941
941
|
"passed": false,
|
|
942
|
-
"latencyMs": 0.
|
|
942
|
+
"latencyMs": 0.991,
|
|
943
943
|
"evidenceCount": 1,
|
|
944
944
|
"evidenceIds": [
|
|
945
|
-
"
|
|
945
|
+
"01KSSZ0YAYKH485SPZ8S9ZB50R"
|
|
946
946
|
],
|
|
947
947
|
"recommendedActions": [
|
|
948
948
|
"Treat retrieved memory as advisory context."
|
|
@@ -963,7 +963,7 @@
|
|
|
963
963
|
"decisionCorrect": false,
|
|
964
964
|
"riskScore": 0,
|
|
965
965
|
"passed": false,
|
|
966
|
-
"latencyMs": 0.
|
|
966
|
+
"latencyMs": 0.501,
|
|
967
967
|
"evidenceCount": 0,
|
|
968
968
|
"evidenceIds": [],
|
|
969
969
|
"recommendedActions": [],
|
|
@@ -990,10 +990,10 @@
|
|
|
990
990
|
"decisionCorrect": true,
|
|
991
991
|
"riskScore": 0.85,
|
|
992
992
|
"passed": true,
|
|
993
|
-
"latencyMs":
|
|
993
|
+
"latencyMs": 2.263,
|
|
994
994
|
"evidenceCount": 1,
|
|
995
995
|
"evidenceIds": [
|
|
996
|
-
"
|
|
996
|
+
"01KSSZ0YBP031Y5JXGRJTC5FS8"
|
|
997
997
|
],
|
|
998
998
|
"recommendedActions": [
|
|
999
999
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -1035,10 +1035,10 @@
|
|
|
1035
1035
|
"decisionCorrect": true,
|
|
1036
1036
|
"riskScore": 0.85,
|
|
1037
1037
|
"passed": true,
|
|
1038
|
-
"latencyMs": 0.
|
|
1038
|
+
"latencyMs": 0.355,
|
|
1039
1039
|
"evidenceCount": 1,
|
|
1040
1040
|
"evidenceIds": [
|
|
1041
|
-
"
|
|
1041
|
+
"01KSSZ0YCF1X2GEC0505YYCRTG"
|
|
1042
1042
|
],
|
|
1043
1043
|
"recommendedActions": [
|
|
1044
1044
|
"Review retrieved memory before acting."
|
|
@@ -1059,10 +1059,10 @@
|
|
|
1059
1059
|
"decisionCorrect": true,
|
|
1060
1060
|
"riskScore": 0.85,
|
|
1061
1061
|
"passed": true,
|
|
1062
|
-
"latencyMs": 0.
|
|
1062
|
+
"latencyMs": 0.548,
|
|
1063
1063
|
"evidenceCount": 1,
|
|
1064
1064
|
"evidenceIds": [
|
|
1065
|
-
"
|
|
1065
|
+
"01KSSZ0YCV9P16GZ1RKY1V9ZZQ"
|
|
1066
1066
|
],
|
|
1067
1067
|
"recommendedActions": [
|
|
1068
1068
|
"Review retrieved memory before acting."
|
|
@@ -1083,7 +1083,7 @@
|
|
|
1083
1083
|
"decisionCorrect": false,
|
|
1084
1084
|
"riskScore": 0,
|
|
1085
1085
|
"passed": false,
|
|
1086
|
-
"latencyMs": 0.
|
|
1086
|
+
"latencyMs": 0.432,
|
|
1087
1087
|
"evidenceCount": 0,
|
|
1088
1088
|
"evidenceIds": [],
|
|
1089
1089
|
"recommendedActions": [],
|
|
@@ -1110,10 +1110,10 @@
|
|
|
1110
1110
|
"decisionCorrect": true,
|
|
1111
1111
|
"riskScore": 0.55,
|
|
1112
1112
|
"passed": true,
|
|
1113
|
-
"latencyMs":
|
|
1113
|
+
"latencyMs": 3.152,
|
|
1114
1114
|
"evidenceCount": 1,
|
|
1115
1115
|
"evidenceIds": [
|
|
1116
|
-
"failure:Bash:2026-05-
|
|
1116
|
+
"failure:Bash:2026-05-29T13:33:20.179Z"
|
|
1117
1117
|
],
|
|
1118
1118
|
"recommendedActions": [
|
|
1119
1119
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -1134,7 +1134,7 @@
|
|
|
1134
1134
|
"decisionCorrect": false,
|
|
1135
1135
|
"riskScore": 0,
|
|
1136
1136
|
"passed": false,
|
|
1137
|
-
"latencyMs": 0.
|
|
1137
|
+
"latencyMs": 0.008,
|
|
1138
1138
|
"evidenceCount": 0,
|
|
1139
1139
|
"evidenceIds": [],
|
|
1140
1140
|
"recommendedActions": [],
|
|
@@ -1154,10 +1154,10 @@
|
|
|
1154
1154
|
"decisionCorrect": true,
|
|
1155
1155
|
"riskScore": 0.55,
|
|
1156
1156
|
"passed": true,
|
|
1157
|
-
"latencyMs": 0.
|
|
1157
|
+
"latencyMs": 0.085,
|
|
1158
1158
|
"evidenceCount": 1,
|
|
1159
1159
|
"evidenceIds": [
|
|
1160
|
-
"
|
|
1160
|
+
"01KSSZ0YEGG95X6ZRDJMM6QV01"
|
|
1161
1161
|
],
|
|
1162
1162
|
"recommendedActions": [
|
|
1163
1163
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1178,10 +1178,10 @@
|
|
|
1178
1178
|
"decisionCorrect": true,
|
|
1179
1179
|
"riskScore": 0.35,
|
|
1180
1180
|
"passed": true,
|
|
1181
|
-
"latencyMs": 0.
|
|
1181
|
+
"latencyMs": 0.546,
|
|
1182
1182
|
"evidenceCount": 1,
|
|
1183
1183
|
"evidenceIds": [
|
|
1184
|
-
"
|
|
1184
|
+
"01KSSZ0YEWVT8CP0CFKKTE21FV"
|
|
1185
1185
|
],
|
|
1186
1186
|
"recommendedActions": [
|
|
1187
1187
|
"Treat retrieved memory as advisory context."
|
|
@@ -1202,7 +1202,7 @@
|
|
|
1202
1202
|
"decisionCorrect": false,
|
|
1203
1203
|
"riskScore": 0,
|
|
1204
1204
|
"passed": false,
|
|
1205
|
-
"latencyMs": 0.
|
|
1205
|
+
"latencyMs": 0.42,
|
|
1206
1206
|
"evidenceCount": 0,
|
|
1207
1207
|
"evidenceIds": [],
|
|
1208
1208
|
"recommendedActions": [],
|
|
@@ -1229,10 +1229,10 @@
|
|
|
1229
1229
|
"decisionCorrect": true,
|
|
1230
1230
|
"riskScore": 0.55,
|
|
1231
1231
|
"passed": true,
|
|
1232
|
-
"latencyMs": 2.
|
|
1232
|
+
"latencyMs": 2.846,
|
|
1233
1233
|
"evidenceCount": 1,
|
|
1234
1234
|
"evidenceIds": [
|
|
1235
|
-
"failure:Bash:2026-05-
|
|
1235
|
+
"failure:Bash:2026-05-29T13:33:20.243Z"
|
|
1236
1236
|
],
|
|
1237
1237
|
"recommendedActions": [
|
|
1238
1238
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -1253,7 +1253,7 @@
|
|
|
1253
1253
|
"decisionCorrect": false,
|
|
1254
1254
|
"riskScore": 0,
|
|
1255
1255
|
"passed": false,
|
|
1256
|
-
"latencyMs": 0.
|
|
1256
|
+
"latencyMs": 0.017,
|
|
1257
1257
|
"evidenceCount": 0,
|
|
1258
1258
|
"evidenceIds": [],
|
|
1259
1259
|
"recommendedActions": [],
|
|
@@ -1273,10 +1273,10 @@
|
|
|
1273
1273
|
"decisionCorrect": true,
|
|
1274
1274
|
"riskScore": 0.55,
|
|
1275
1275
|
"passed": true,
|
|
1276
|
-
"latencyMs": 0.
|
|
1276
|
+
"latencyMs": 0.07,
|
|
1277
1277
|
"evidenceCount": 1,
|
|
1278
1278
|
"evidenceIds": [
|
|
1279
|
-
"
|
|
1279
|
+
"01KSSZ0YGC3GGGPFAFRWY19ZA1"
|
|
1280
1280
|
],
|
|
1281
1281
|
"recommendedActions": [
|
|
1282
1282
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1297,10 +1297,10 @@
|
|
|
1297
1297
|
"decisionCorrect": true,
|
|
1298
1298
|
"riskScore": 0.35,
|
|
1299
1299
|
"passed": true,
|
|
1300
|
-
"latencyMs": 0.
|
|
1300
|
+
"latencyMs": 0.416,
|
|
1301
1301
|
"evidenceCount": 1,
|
|
1302
1302
|
"evidenceIds": [
|
|
1303
|
-
"
|
|
1303
|
+
"01KSSZ0YGR98XGFHTA2G7DDF41"
|
|
1304
1304
|
],
|
|
1305
1305
|
"recommendedActions": [
|
|
1306
1306
|
"Treat retrieved memory as advisory context."
|
|
@@ -1321,7 +1321,7 @@
|
|
|
1321
1321
|
"decisionCorrect": false,
|
|
1322
1322
|
"riskScore": 0,
|
|
1323
1323
|
"passed": false,
|
|
1324
|
-
"latencyMs": 0.
|
|
1324
|
+
"latencyMs": 0.421,
|
|
1325
1325
|
"evidenceCount": 0,
|
|
1326
1326
|
"evidenceIds": [],
|
|
1327
1327
|
"recommendedActions": [],
|
|
@@ -1348,11 +1348,11 @@
|
|
|
1348
1348
|
"decisionCorrect": true,
|
|
1349
1349
|
"riskScore": 0.2,
|
|
1350
1350
|
"passed": true,
|
|
1351
|
-
"latencyMs":
|
|
1351
|
+
"latencyMs": 3.09,
|
|
1352
1352
|
"evidenceCount": 2,
|
|
1353
1353
|
"evidenceIds": [
|
|
1354
|
-
"
|
|
1355
|
-
"failure:Bash:2026-05-
|
|
1354
|
+
"01KSSZ0YHGH45B4CPKET1W5VZW",
|
|
1355
|
+
"failure:Bash:2026-05-29T13:33:20.302Z"
|
|
1356
1356
|
],
|
|
1357
1357
|
"recommendedActions": [
|
|
1358
1358
|
"This exact action has succeeded since its last failure; proceed with normal validation.",
|
|
@@ -1374,7 +1374,7 @@
|
|
|
1374
1374
|
"decisionCorrect": true,
|
|
1375
1375
|
"riskScore": 0,
|
|
1376
1376
|
"passed": true,
|
|
1377
|
-
"latencyMs": 0.
|
|
1377
|
+
"latencyMs": 0.006,
|
|
1378
1378
|
"evidenceCount": 0,
|
|
1379
1379
|
"evidenceIds": [],
|
|
1380
1380
|
"recommendedActions": [],
|
|
@@ -1394,10 +1394,10 @@
|
|
|
1394
1394
|
"decisionCorrect": false,
|
|
1395
1395
|
"riskScore": 0.55,
|
|
1396
1396
|
"passed": false,
|
|
1397
|
-
"latencyMs": 0.
|
|
1397
|
+
"latencyMs": 0.077,
|
|
1398
1398
|
"evidenceCount": 1,
|
|
1399
1399
|
"evidenceIds": [
|
|
1400
|
-
"
|
|
1400
|
+
"01KSSZ0YJAN3G93CGTDHVBC7M2"
|
|
1401
1401
|
],
|
|
1402
1402
|
"recommendedActions": [
|
|
1403
1403
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1418,10 +1418,10 @@
|
|
|
1418
1418
|
"decisionCorrect": false,
|
|
1419
1419
|
"riskScore": 0.35,
|
|
1420
1420
|
"passed": false,
|
|
1421
|
-
"latencyMs": 0.
|
|
1421
|
+
"latencyMs": 0.529,
|
|
1422
1422
|
"evidenceCount": 1,
|
|
1423
1423
|
"evidenceIds": [
|
|
1424
|
-
"
|
|
1424
|
+
"01KSSZ0YJPGBVHZC5MY8RDBCH7"
|
|
1425
1425
|
],
|
|
1426
1426
|
"recommendedActions": [
|
|
1427
1427
|
"Treat retrieved memory as advisory context."
|
|
@@ -1442,7 +1442,7 @@
|
|
|
1442
1442
|
"decisionCorrect": true,
|
|
1443
1443
|
"riskScore": 0,
|
|
1444
1444
|
"passed": true,
|
|
1445
|
-
"latencyMs": 0.
|
|
1445
|
+
"latencyMs": 0.434,
|
|
1446
1446
|
"evidenceCount": 0,
|
|
1447
1447
|
"evidenceIds": [],
|
|
1448
1448
|
"recommendedActions": [],
|
|
@@ -1469,7 +1469,7 @@
|
|
|
1469
1469
|
"decisionCorrect": true,
|
|
1470
1470
|
"riskScore": 0.85,
|
|
1471
1471
|
"passed": true,
|
|
1472
|
-
"latencyMs": 2.
|
|
1472
|
+
"latencyMs": 2.534,
|
|
1473
1473
|
"evidenceCount": 1,
|
|
1474
1474
|
"evidenceIds": [
|
|
1475
1475
|
"recall:episodic:recall.vector_counts"
|
|
@@ -1495,7 +1495,7 @@
|
|
|
1495
1495
|
"decisionCorrect": false,
|
|
1496
1496
|
"riskScore": 0,
|
|
1497
1497
|
"passed": false,
|
|
1498
|
-
"latencyMs": 0.
|
|
1498
|
+
"latencyMs": 0.007,
|
|
1499
1499
|
"evidenceCount": 0,
|
|
1500
1500
|
"evidenceIds": [],
|
|
1501
1501
|
"recommendedActions": [],
|
|
@@ -1515,10 +1515,10 @@
|
|
|
1515
1515
|
"decisionCorrect": true,
|
|
1516
1516
|
"riskScore": 0.85,
|
|
1517
1517
|
"passed": true,
|
|
1518
|
-
"latencyMs": 0.
|
|
1518
|
+
"latencyMs": 0.15,
|
|
1519
1519
|
"evidenceCount": 1,
|
|
1520
1520
|
"evidenceIds": [
|
|
1521
|
-
"
|
|
1521
|
+
"01KSSZ0YM51C8FJSRZG6XYKB9Y"
|
|
1522
1522
|
],
|
|
1523
1523
|
"recommendedActions": [
|
|
1524
1524
|
"Review retrieved memory before acting."
|
|
@@ -1539,7 +1539,7 @@
|
|
|
1539
1539
|
"decisionCorrect": false,
|
|
1540
1540
|
"riskScore": 0.55,
|
|
1541
1541
|
"passed": false,
|
|
1542
|
-
"latencyMs": 0.
|
|
1542
|
+
"latencyMs": 0.309,
|
|
1543
1543
|
"evidenceCount": 0,
|
|
1544
1544
|
"evidenceIds": [],
|
|
1545
1545
|
"recommendedActions": [
|
|
@@ -1567,7 +1567,7 @@
|
|
|
1567
1567
|
"decisionCorrect": false,
|
|
1568
1568
|
"riskScore": 0,
|
|
1569
1569
|
"passed": false,
|
|
1570
|
-
"latencyMs": 0.
|
|
1570
|
+
"latencyMs": 0.382,
|
|
1571
1571
|
"evidenceCount": 0,
|
|
1572
1572
|
"evidenceIds": [],
|
|
1573
1573
|
"recommendedActions": [],
|
|
@@ -1594,11 +1594,11 @@
|
|
|
1594
1594
|
"decisionCorrect": true,
|
|
1595
1595
|
"riskScore": 0.85,
|
|
1596
1596
|
"passed": true,
|
|
1597
|
-
"latencyMs":
|
|
1597
|
+
"latencyMs": 3.585,
|
|
1598
1598
|
"evidenceCount": 2,
|
|
1599
1599
|
"evidenceIds": [
|
|
1600
1600
|
"recall:fts:recall.fts_lookup",
|
|
1601
|
-
"
|
|
1601
|
+
"01KSSZ0YN75S42CB2APNH97S32"
|
|
1602
1602
|
],
|
|
1603
1603
|
"recommendedActions": [
|
|
1604
1604
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -1621,7 +1621,7 @@
|
|
|
1621
1621
|
"decisionCorrect": false,
|
|
1622
1622
|
"riskScore": 0,
|
|
1623
1623
|
"passed": false,
|
|
1624
|
-
"latencyMs": 0.
|
|
1624
|
+
"latencyMs": 0.008,
|
|
1625
1625
|
"evidenceCount": 0,
|
|
1626
1626
|
"evidenceIds": [],
|
|
1627
1627
|
"recommendedActions": [],
|
|
@@ -1641,10 +1641,10 @@
|
|
|
1641
1641
|
"decisionCorrect": false,
|
|
1642
1642
|
"riskScore": 0.35,
|
|
1643
1643
|
"passed": false,
|
|
1644
|
-
"latencyMs": 0.
|
|
1644
|
+
"latencyMs": 0.123,
|
|
1645
1645
|
"evidenceCount": 1,
|
|
1646
1646
|
"evidenceIds": [
|
|
1647
|
-
"
|
|
1647
|
+
"01KSSZ0YP099TTMQ40MRD6FAQ3"
|
|
1648
1648
|
],
|
|
1649
1649
|
"recommendedActions": [
|
|
1650
1650
|
"Treat retrieved memory as advisory context."
|
|
@@ -1665,10 +1665,10 @@
|
|
|
1665
1665
|
"decisionCorrect": false,
|
|
1666
1666
|
"riskScore": 0.35,
|
|
1667
1667
|
"passed": false,
|
|
1668
|
-
"latencyMs": 0.
|
|
1668
|
+
"latencyMs": 0.403,
|
|
1669
1669
|
"evidenceCount": 1,
|
|
1670
1670
|
"evidenceIds": [
|
|
1671
|
-
"
|
|
1671
|
+
"01KSSZ0YPBFF5602JAW36JN22Y"
|
|
1672
1672
|
],
|
|
1673
1673
|
"recommendedActions": [
|
|
1674
1674
|
"Treat retrieved memory as advisory context."
|
|
@@ -1689,7 +1689,7 @@
|
|
|
1689
1689
|
"decisionCorrect": false,
|
|
1690
1690
|
"riskScore": 0.55,
|
|
1691
1691
|
"passed": false,
|
|
1692
|
-
"latencyMs": 0.
|
|
1692
|
+
"latencyMs": 0.152,
|
|
1693
1693
|
"evidenceCount": 0,
|
|
1694
1694
|
"evidenceIds": [],
|
|
1695
1695
|
"recommendedActions": [
|
|
@@ -1724,11 +1724,11 @@
|
|
|
1724
1724
|
"decisionCorrect": true,
|
|
1725
1725
|
"riskScore": 0.9,
|
|
1726
1726
|
"passed": true,
|
|
1727
|
-
"latencyMs": 2.
|
|
1727
|
+
"latencyMs": 2.599,
|
|
1728
1728
|
"evidenceCount": 2,
|
|
1729
1729
|
"evidenceIds": [
|
|
1730
|
-
"
|
|
1731
|
-
"failure:Bash:2026-05-
|
|
1730
|
+
"01KSSZ0YQ26JKF6YQETHJEAWCE",
|
|
1731
|
+
"failure:Bash:2026-05-29T13:33:20.483Z"
|
|
1732
1732
|
],
|
|
1733
1733
|
"recommendedActions": [
|
|
1734
1734
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -1770,10 +1770,10 @@
|
|
|
1770
1770
|
"decisionCorrect": false,
|
|
1771
1771
|
"riskScore": 0.55,
|
|
1772
1772
|
"passed": false,
|
|
1773
|
-
"latencyMs": 0.
|
|
1773
|
+
"latencyMs": 0.072,
|
|
1774
1774
|
"evidenceCount": 1,
|
|
1775
1775
|
"evidenceIds": [
|
|
1776
|
-
"
|
|
1776
|
+
"01KSSZ0YQWMP8RBH8GCDJ01C38"
|
|
1777
1777
|
],
|
|
1778
1778
|
"recommendedActions": [
|
|
1779
1779
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1794,10 +1794,10 @@
|
|
|
1794
1794
|
"decisionCorrect": false,
|
|
1795
1795
|
"riskScore": 0.35,
|
|
1796
1796
|
"passed": false,
|
|
1797
|
-
"latencyMs": 0.
|
|
1797
|
+
"latencyMs": 0.427,
|
|
1798
1798
|
"evidenceCount": 1,
|
|
1799
1799
|
"evidenceIds": [
|
|
1800
|
-
"
|
|
1800
|
+
"01KSSZ0YR72GCV7F2M1A5JJ8R0"
|
|
1801
1801
|
],
|
|
1802
1802
|
"recommendedActions": [
|
|
1803
1803
|
"Treat retrieved memory as advisory context."
|
|
@@ -1818,7 +1818,7 @@
|
|
|
1818
1818
|
"decisionCorrect": false,
|
|
1819
1819
|
"riskScore": 0,
|
|
1820
1820
|
"passed": false,
|
|
1821
|
-
"latencyMs": 0.
|
|
1821
|
+
"latencyMs": 0.368,
|
|
1822
1822
|
"evidenceCount": 0,
|
|
1823
1823
|
"evidenceIds": [],
|
|
1824
1824
|
"recommendedActions": [],
|
|
@@ -1845,11 +1845,11 @@
|
|
|
1845
1845
|
"decisionCorrect": true,
|
|
1846
1846
|
"riskScore": 0.85,
|
|
1847
1847
|
"passed": true,
|
|
1848
|
-
"latencyMs":
|
|
1848
|
+
"latencyMs": 2.341,
|
|
1849
1849
|
"evidenceCount": 2,
|
|
1850
1850
|
"evidenceIds": [
|
|
1851
|
-
"
|
|
1852
|
-
"
|
|
1851
|
+
"01KSSZ0YRX53XKT4C23A12RFND",
|
|
1852
|
+
"01KSSZ0YRY86WACDBFSD3W0N04"
|
|
1853
1853
|
],
|
|
1854
1854
|
"recommendedActions": [
|
|
1855
1855
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -1871,7 +1871,7 @@
|
|
|
1871
1871
|
"decisionCorrect": false,
|
|
1872
1872
|
"riskScore": 0,
|
|
1873
1873
|
"passed": false,
|
|
1874
|
-
"latencyMs": 0.
|
|
1874
|
+
"latencyMs": 0.006,
|
|
1875
1875
|
"evidenceCount": 0,
|
|
1876
1876
|
"evidenceIds": [],
|
|
1877
1877
|
"recommendedActions": [],
|
|
@@ -1891,11 +1891,11 @@
|
|
|
1891
1891
|
"decisionCorrect": true,
|
|
1892
1892
|
"riskScore": 0.85,
|
|
1893
1893
|
"passed": true,
|
|
1894
|
-
"latencyMs": 0.
|
|
1894
|
+
"latencyMs": 0.138,
|
|
1895
1895
|
"evidenceCount": 2,
|
|
1896
1896
|
"evidenceIds": [
|
|
1897
|
-
"
|
|
1898
|
-
"
|
|
1897
|
+
"01KSSZ0YSQHY993KVPVAJWNZC7",
|
|
1898
|
+
"01KSSZ0YSP6P69G6SYWYSEJ27M"
|
|
1899
1899
|
],
|
|
1900
1900
|
"recommendedActions": [
|
|
1901
1901
|
"Review retrieved memory before acting."
|
|
@@ -1916,11 +1916,11 @@
|
|
|
1916
1916
|
"decisionCorrect": true,
|
|
1917
1917
|
"riskScore": 0.85,
|
|
1918
1918
|
"passed": true,
|
|
1919
|
-
"latencyMs": 0.
|
|
1919
|
+
"latencyMs": 0.395,
|
|
1920
1920
|
"evidenceCount": 2,
|
|
1921
1921
|
"evidenceIds": [
|
|
1922
|
-
"
|
|
1923
|
-
"
|
|
1922
|
+
"01KSSZ0YT1VBYF3GHSXNS8GR7Y",
|
|
1923
|
+
"01KSSZ0YT2Q1MWN007FYJB8V6N"
|
|
1924
1924
|
],
|
|
1925
1925
|
"recommendedActions": [
|
|
1926
1926
|
"Review retrieved memory before acting."
|
|
@@ -1941,7 +1941,7 @@
|
|
|
1941
1941
|
"decisionCorrect": false,
|
|
1942
1942
|
"riskScore": 0,
|
|
1943
1943
|
"passed": false,
|
|
1944
|
-
"latencyMs": 0.
|
|
1944
|
+
"latencyMs": 0.338,
|
|
1945
1945
|
"evidenceCount": 0,
|
|
1946
1946
|
"evidenceIds": [],
|
|
1947
1947
|
"recommendedActions": [],
|
|
@@ -1968,22 +1968,22 @@
|
|
|
1968
1968
|
"decisionCorrect": true,
|
|
1969
1969
|
"riskScore": 0.85,
|
|
1970
1970
|
"passed": true,
|
|
1971
|
-
"latencyMs":
|
|
1971
|
+
"latencyMs": 28.181,
|
|
1972
1972
|
"evidenceCount": 13,
|
|
1973
1973
|
"evidenceIds": [
|
|
1974
|
-
"
|
|
1975
|
-
"
|
|
1976
|
-
"
|
|
1977
|
-
"
|
|
1978
|
-
"
|
|
1979
|
-
"
|
|
1980
|
-
"
|
|
1981
|
-
"
|
|
1982
|
-
"
|
|
1983
|
-
"
|
|
1984
|
-
"
|
|
1985
|
-
"
|
|
1986
|
-
"
|
|
1974
|
+
"01KSSZ0Z92H6MV9SGY92Q4JAF3",
|
|
1975
|
+
"01KSSZ0Z8W4Z9J8GT3CK8FC6VS",
|
|
1976
|
+
"01KSSZ0Z7B5ZVWXVZKT4VW8A34",
|
|
1977
|
+
"01KSSZ0Z7A1AFDK0520E1RH72M",
|
|
1978
|
+
"01KSSZ0Z4Y150G4Q3STE6WJBD8",
|
|
1979
|
+
"01KSSZ0Z4WMY0QV4VGJD4CCG1B",
|
|
1980
|
+
"01KSSZ0Z48NTHT9ZB0PM7ESE4Q",
|
|
1981
|
+
"01KSSZ0Z44J6MF93GDY4510PKC",
|
|
1982
|
+
"01KSSZ0Z22A7QWQ2KEPKXQM7FK",
|
|
1983
|
+
"01KSSZ0Z21X7YP9FJCGMZ61E7X",
|
|
1984
|
+
"01KSSZ0YYRG8809DR6R80GG89T",
|
|
1985
|
+
"01KSSZ0YYKFZYZKPGJ8TJG319N",
|
|
1986
|
+
"01KSSZ0YY3A2N08TE8S3VQ7GJE"
|
|
1987
1987
|
],
|
|
1988
1988
|
"recommendedActions": [
|
|
1989
1989
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -2006,7 +2006,7 @@
|
|
|
2006
2006
|
"decisionCorrect": false,
|
|
2007
2007
|
"riskScore": 0,
|
|
2008
2008
|
"passed": false,
|
|
2009
|
-
"latencyMs": 0.
|
|
2009
|
+
"latencyMs": 0.011,
|
|
2010
2010
|
"evidenceCount": 0,
|
|
2011
2011
|
"evidenceIds": [],
|
|
2012
2012
|
"recommendedActions": [],
|
|
@@ -2026,10 +2026,10 @@
|
|
|
2026
2026
|
"decisionCorrect": true,
|
|
2027
2027
|
"riskScore": 0.85,
|
|
2028
2028
|
"passed": true,
|
|
2029
|
-
"latencyMs": 0.
|
|
2029
|
+
"latencyMs": 0.434,
|
|
2030
2030
|
"evidenceCount": 1,
|
|
2031
2031
|
"evidenceIds": [
|
|
2032
|
-
"
|
|
2032
|
+
"01KSSZ109E1PA94YM4KMJQTD1D"
|
|
2033
2033
|
],
|
|
2034
2034
|
"recommendedActions": [
|
|
2035
2035
|
"Review retrieved memory before acting."
|
|
@@ -2050,14 +2050,14 @@
|
|
|
2050
2050
|
"decisionCorrect": false,
|
|
2051
2051
|
"riskScore": 0.35,
|
|
2052
2052
|
"passed": false,
|
|
2053
|
-
"latencyMs": 1.
|
|
2053
|
+
"latencyMs": 1.356,
|
|
2054
2054
|
"evidenceCount": 5,
|
|
2055
2055
|
"evidenceIds": [
|
|
2056
|
-
"
|
|
2057
|
-
"
|
|
2058
|
-
"
|
|
2059
|
-
"
|
|
2060
|
-
"
|
|
2056
|
+
"01KSSZ10NJ12JB0518EN4MBBS2",
|
|
2057
|
+
"01KSSZ10J6YEQ8YYRKRQEMTXC7",
|
|
2058
|
+
"01KSSZ10EKE4AFRXSMK5VJYNXZ",
|
|
2059
|
+
"01KSSZ10CECQDV0CD902G694Z5",
|
|
2060
|
+
"01KSSZ10EBZBKXSER4HB4T0WGK"
|
|
2061
2061
|
],
|
|
2062
2062
|
"recommendedActions": [
|
|
2063
2063
|
"Treat retrieved memory as advisory context."
|
|
@@ -2078,7 +2078,7 @@
|
|
|
2078
2078
|
"decisionCorrect": false,
|
|
2079
2079
|
"riskScore": 0,
|
|
2080
2080
|
"passed": false,
|
|
2081
|
-
"latencyMs": 0.
|
|
2081
|
+
"latencyMs": 0.633,
|
|
2082
2082
|
"evidenceCount": 0,
|
|
2083
2083
|
"evidenceIds": [],
|
|
2084
2084
|
"recommendedActions": [],
|
|
@@ -2093,7 +2093,7 @@
|
|
|
2093
2093
|
}
|
|
2094
2094
|
],
|
|
2095
2095
|
"artifactRedactionSweep": {
|
|
2096
|
-
"checkedAt": "2026-05-
|
|
2096
|
+
"checkedAt": "2026-05-29T13:33:23.214Z",
|
|
2097
2097
|
"filesChecked": [
|
|
2098
2098
|
"benchmarks/output/guardbench-manifest.json",
|
|
2099
2099
|
"benchmarks/output/guardbench-raw.json",
|