audrey 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +27 -5
- package/benchmarks/guardbench.js +98 -8
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +240 -140
- package/benchmarks/output/guardbench-summary.json +350 -224
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +240 -140
- package/benchmarks/output/submission-bundle/guardbench-summary.json +350 -224
- package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +23 -2
- package/benchmarks/output/submission-bundle/submission-manifest.json +14 -14
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +56 -56
- package/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.js +1 -1
- package/dist/src/audrey.d.ts +10 -0
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +17 -4
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/controller.d.ts +17 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +52 -13
- package/dist/src/controller.js.map +1 -1
- package/dist/src/index.d.ts +2 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +4 -1
- package/dist/src/routes.js.map +1 -1
- package/docs/paper/07-evaluation.md +4 -4
- package/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +27 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +240 -140
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +350 -224
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +14 -14
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +67 -67
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +2 -2
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +35 -35
- package/package.json +2 -2
- package/scripts/smoke-cli.js +22 -2
- package/scripts/verify-release-readiness.mjs +50 -6
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"suite": "GuardBench comparative",
|
|
3
|
-
"generatedAt": "2026-05-
|
|
3
|
+
"generatedAt": "2026-05-15T17:52:12.761Z",
|
|
4
4
|
"manifest": {
|
|
5
5
|
"manifestVersion": "0.2.0",
|
|
6
6
|
"suiteId": "guardbench-local-comparative",
|
|
@@ -416,15 +416,15 @@
|
|
|
416
416
|
]
|
|
417
417
|
},
|
|
418
418
|
"provenance": {
|
|
419
|
-
"generatedAt": "2026-05-
|
|
420
|
-
"gitSha": "
|
|
419
|
+
"generatedAt": "2026-05-15T17:52:12.761Z",
|
|
420
|
+
"gitSha": "82b0e9979680acf751b9e80f6f90f8c6ac74befb",
|
|
421
421
|
"gitDirty": false,
|
|
422
|
-
"node": "v24.
|
|
423
|
-
"v8": "13.6.233.17-node.
|
|
422
|
+
"node": "v24.15.0",
|
|
423
|
+
"v8": "13.6.233.17-node.48",
|
|
424
424
|
"platform": "linux",
|
|
425
425
|
"arch": "x64",
|
|
426
|
-
"osRelease": "6.17.0-
|
|
427
|
-
"cpuModel": "AMD EPYC
|
|
426
|
+
"osRelease": "6.17.0-1013-azure",
|
|
427
|
+
"cpuModel": "AMD EPYC 9V74 80-Core Processor",
|
|
428
428
|
"cpuCount": 4,
|
|
429
429
|
"totalMemoryGb": 15.61,
|
|
430
430
|
"embeddingProvider": "mock",
|
|
@@ -449,14 +449,14 @@
|
|
|
449
449
|
"redactionLeaks": 0,
|
|
450
450
|
"recallDegradationDetectionRate": 1,
|
|
451
451
|
"latency": {
|
|
452
|
-
"p50Ms":
|
|
453
|
-
"p95Ms":
|
|
454
|
-
"maxMs":
|
|
452
|
+
"p50Ms": 2.465,
|
|
453
|
+
"p95Ms": 30.791,
|
|
454
|
+
"maxMs": 30.791
|
|
455
455
|
},
|
|
456
456
|
"systemSummaries": [
|
|
457
457
|
{
|
|
458
458
|
"system": "Audrey Guard",
|
|
459
|
-
"generatedAt": "2026-05-
|
|
459
|
+
"generatedAt": "2026-05-15T17:52:12.760Z",
|
|
460
460
|
"scenarios": 10,
|
|
461
461
|
"passed": 10,
|
|
462
462
|
"passRate": 1,
|
|
@@ -466,117 +466,123 @@
|
|
|
466
466
|
"falseBlockRate": 0,
|
|
467
467
|
"usefulWarningPrecision": 1,
|
|
468
468
|
"evidenceRecall": 1,
|
|
469
|
+
"lineageRichness": 1,
|
|
469
470
|
"redactionLeaks": 0,
|
|
470
471
|
"recallDegradationDetectionRate": 1,
|
|
471
472
|
"latency": {
|
|
472
|
-
"p50Ms":
|
|
473
|
-
"p95Ms":
|
|
474
|
-
"maxMs":
|
|
473
|
+
"p50Ms": 2.465,
|
|
474
|
+
"p95Ms": 30.791,
|
|
475
|
+
"maxMs": 30.791
|
|
475
476
|
}
|
|
476
477
|
},
|
|
477
478
|
{
|
|
478
479
|
"system": "No Memory",
|
|
479
|
-
"generatedAt": "2026-05-
|
|
480
|
+
"generatedAt": "2026-05-15T17:52:12.760Z",
|
|
480
481
|
"scenarios": 10,
|
|
481
|
-
"passed":
|
|
482
|
-
"passRate": 0,
|
|
482
|
+
"passed": 1,
|
|
483
|
+
"passRate": 0.1,
|
|
483
484
|
"decisionCorrect": 1,
|
|
484
485
|
"decisionAccuracy": 0.1,
|
|
485
486
|
"preventionRate": 0,
|
|
486
487
|
"falseBlockRate": 0,
|
|
487
488
|
"usefulWarningPrecision": null,
|
|
488
|
-
"evidenceRecall": 0,
|
|
489
|
+
"evidenceRecall": 0.1,
|
|
490
|
+
"lineageRichness": 0,
|
|
489
491
|
"redactionLeaks": 0,
|
|
490
492
|
"recallDegradationDetectionRate": 0,
|
|
491
493
|
"latency": {
|
|
492
|
-
"p50Ms": 0.
|
|
493
|
-
"p95Ms": 0.
|
|
494
|
-
"maxMs": 0.
|
|
494
|
+
"p50Ms": 0.006,
|
|
495
|
+
"p95Ms": 0.028,
|
|
496
|
+
"maxMs": 0.028
|
|
495
497
|
}
|
|
496
498
|
},
|
|
497
499
|
{
|
|
498
500
|
"system": "Recent Window",
|
|
499
|
-
"generatedAt": "2026-05-
|
|
501
|
+
"generatedAt": "2026-05-15T17:52:12.760Z",
|
|
500
502
|
"scenarios": 10,
|
|
501
|
-
"passed":
|
|
502
|
-
"passRate": 0,
|
|
503
|
+
"passed": 6,
|
|
504
|
+
"passRate": 0.6,
|
|
503
505
|
"decisionCorrect": 6,
|
|
504
506
|
"decisionAccuracy": 0.6,
|
|
505
507
|
"preventionRate": 0.5714285714285714,
|
|
506
508
|
"falseBlockRate": 0,
|
|
507
509
|
"usefulWarningPrecision": 0.3333333333333333,
|
|
508
|
-
"evidenceRecall":
|
|
510
|
+
"evidenceRecall": 1,
|
|
511
|
+
"lineageRichness": 0,
|
|
509
512
|
"redactionLeaks": 0,
|
|
510
|
-
"recallDegradationDetectionRate": 0,
|
|
513
|
+
"recallDegradationDetectionRate": 0.5,
|
|
511
514
|
"latency": {
|
|
512
|
-
"p50Ms": 0.
|
|
513
|
-
"p95Ms": 0.
|
|
514
|
-
"maxMs": 0.
|
|
515
|
+
"p50Ms": 0.103,
|
|
516
|
+
"p95Ms": 0.462,
|
|
517
|
+
"maxMs": 0.462
|
|
515
518
|
}
|
|
516
519
|
},
|
|
517
520
|
{
|
|
518
521
|
"system": "Vector Only",
|
|
519
|
-
"generatedAt": "2026-05-
|
|
522
|
+
"generatedAt": "2026-05-15T17:52:12.761Z",
|
|
520
523
|
"scenarios": 10,
|
|
521
|
-
"passed":
|
|
522
|
-
"passRate": 0,
|
|
524
|
+
"passed": 4,
|
|
525
|
+
"passRate": 0.4,
|
|
523
526
|
"decisionCorrect": 4,
|
|
524
527
|
"decisionAccuracy": 0.4,
|
|
525
528
|
"preventionRate": 0.2857142857142857,
|
|
526
529
|
"falseBlockRate": 0,
|
|
527
530
|
"usefulWarningPrecision": 0.25,
|
|
528
|
-
"evidenceRecall": 0,
|
|
531
|
+
"evidenceRecall": 0.9,
|
|
532
|
+
"lineageRichness": 0,
|
|
529
533
|
"redactionLeaks": 0,
|
|
530
534
|
"recallDegradationDetectionRate": 0,
|
|
531
535
|
"latency": {
|
|
532
|
-
"p50Ms": 0.
|
|
533
|
-
"p95Ms": 1.
|
|
534
|
-
"maxMs": 1.
|
|
536
|
+
"p50Ms": 0.35,
|
|
537
|
+
"p95Ms": 1.051,
|
|
538
|
+
"maxMs": 1.051
|
|
535
539
|
}
|
|
536
540
|
},
|
|
537
541
|
{
|
|
538
542
|
"system": "FTS Only",
|
|
539
|
-
"generatedAt": "2026-05-
|
|
543
|
+
"generatedAt": "2026-05-15T17:52:12.761Z",
|
|
540
544
|
"scenarios": 10,
|
|
541
|
-
"passed":
|
|
542
|
-
"passRate": 0,
|
|
545
|
+
"passed": 1,
|
|
546
|
+
"passRate": 0.1,
|
|
543
547
|
"decisionCorrect": 1,
|
|
544
548
|
"decisionAccuracy": 0.1,
|
|
545
549
|
"preventionRate": 0,
|
|
546
550
|
"falseBlockRate": 0,
|
|
547
551
|
"usefulWarningPrecision": 0,
|
|
548
552
|
"evidenceRecall": 0.1,
|
|
553
|
+
"lineageRichness": 0.1,
|
|
549
554
|
"redactionLeaks": 0,
|
|
550
555
|
"recallDegradationDetectionRate": 0,
|
|
551
556
|
"latency": {
|
|
552
|
-
"p50Ms": 0.
|
|
553
|
-
"p95Ms": 0.
|
|
554
|
-
"maxMs": 0.
|
|
557
|
+
"p50Ms": 0.348,
|
|
558
|
+
"p95Ms": 0.545,
|
|
559
|
+
"maxMs": 0.545
|
|
555
560
|
}
|
|
556
561
|
}
|
|
557
562
|
],
|
|
558
563
|
"comparisons": {
|
|
559
564
|
"bestBaseline": {
|
|
560
|
-
"system": "
|
|
561
|
-
"generatedAt": "2026-05-
|
|
565
|
+
"system": "Recent Window",
|
|
566
|
+
"generatedAt": "2026-05-15T17:52:12.760Z",
|
|
562
567
|
"scenarios": 10,
|
|
563
|
-
"passed":
|
|
564
|
-
"passRate": 0,
|
|
565
|
-
"decisionCorrect":
|
|
566
|
-
"decisionAccuracy": 0.
|
|
567
|
-
"preventionRate": 0,
|
|
568
|
+
"passed": 6,
|
|
569
|
+
"passRate": 0.6,
|
|
570
|
+
"decisionCorrect": 6,
|
|
571
|
+
"decisionAccuracy": 0.6,
|
|
572
|
+
"preventionRate": 0.5714285714285714,
|
|
568
573
|
"falseBlockRate": 0,
|
|
569
|
-
"usefulWarningPrecision":
|
|
570
|
-
"evidenceRecall":
|
|
574
|
+
"usefulWarningPrecision": 0.3333333333333333,
|
|
575
|
+
"evidenceRecall": 1,
|
|
576
|
+
"lineageRichness": 0,
|
|
571
577
|
"redactionLeaks": 0,
|
|
572
|
-
"recallDegradationDetectionRate": 0,
|
|
578
|
+
"recallDegradationDetectionRate": 0.5,
|
|
573
579
|
"latency": {
|
|
574
|
-
"p50Ms": 0.
|
|
575
|
-
"p95Ms": 0.
|
|
576
|
-
"maxMs": 0.
|
|
580
|
+
"p50Ms": 0.103,
|
|
581
|
+
"p95Ms": 0.462,
|
|
582
|
+
"maxMs": 0.462
|
|
577
583
|
}
|
|
578
584
|
},
|
|
579
|
-
"audreyMarginOverBestBaseline":
|
|
585
|
+
"audreyMarginOverBestBaseline": 0.4
|
|
580
586
|
},
|
|
581
587
|
"rows": [
|
|
582
588
|
{
|
|
@@ -588,11 +594,11 @@
|
|
|
588
594
|
"decisionCorrect": true,
|
|
589
595
|
"riskScore": 0.9,
|
|
590
596
|
"passed": true,
|
|
591
|
-
"latencyMs":
|
|
597
|
+
"latencyMs": 6.135,
|
|
592
598
|
"evidenceCount": 2,
|
|
593
599
|
"evidenceIds": [
|
|
594
|
-
"
|
|
595
|
-
"failure:Bash:2026-05-
|
|
600
|
+
"01KRPC8HJAWZEY0085CJJHK54G",
|
|
601
|
+
"failure:Bash:2026-05-15T17:52:01.099Z"
|
|
596
602
|
],
|
|
597
603
|
"recommendedActions": [
|
|
598
604
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -601,6 +607,8 @@
|
|
|
601
607
|
"summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
602
608
|
"recallErrors": [],
|
|
603
609
|
"leakedSecrets": [],
|
|
610
|
+
"hasEvidenceForDecision": true,
|
|
611
|
+
"lineageTextMatched": true,
|
|
604
612
|
"requiredEvidenceMatched": true
|
|
605
613
|
},
|
|
606
614
|
{
|
|
@@ -612,10 +620,10 @@
|
|
|
612
620
|
"decisionCorrect": true,
|
|
613
621
|
"riskScore": 0.85,
|
|
614
622
|
"passed": true,
|
|
615
|
-
"latencyMs":
|
|
623
|
+
"latencyMs": 1.96,
|
|
616
624
|
"evidenceCount": 1,
|
|
617
625
|
"evidenceIds": [
|
|
618
|
-
"
|
|
626
|
+
"01KRPC8HPQ1DDFJ3F929DEEJEB"
|
|
619
627
|
],
|
|
620
628
|
"recommendedActions": [
|
|
621
629
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -624,6 +632,8 @@
|
|
|
624
632
|
"summary": "Blocked: 1 memory signal, 1 high severity found before acting.",
|
|
625
633
|
"recallErrors": [],
|
|
626
634
|
"leakedSecrets": [],
|
|
635
|
+
"hasEvidenceForDecision": true,
|
|
636
|
+
"lineageTextMatched": true,
|
|
627
637
|
"requiredEvidenceMatched": true
|
|
628
638
|
},
|
|
629
639
|
{
|
|
@@ -635,10 +645,10 @@
|
|
|
635
645
|
"decisionCorrect": true,
|
|
636
646
|
"riskScore": 0.55,
|
|
637
647
|
"passed": true,
|
|
638
|
-
"latencyMs":
|
|
648
|
+
"latencyMs": 2.654,
|
|
639
649
|
"evidenceCount": 1,
|
|
640
650
|
"evidenceIds": [
|
|
641
|
-
"failure:Bash:2026-05-
|
|
651
|
+
"failure:Bash:2026-05-15T17:52:01.365Z"
|
|
642
652
|
],
|
|
643
653
|
"recommendedActions": [
|
|
644
654
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -646,6 +656,8 @@
|
|
|
646
656
|
"summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
647
657
|
"recallErrors": [],
|
|
648
658
|
"leakedSecrets": [],
|
|
659
|
+
"hasEvidenceForDecision": true,
|
|
660
|
+
"lineageTextMatched": true,
|
|
649
661
|
"requiredEvidenceMatched": true
|
|
650
662
|
},
|
|
651
663
|
{
|
|
@@ -657,10 +669,10 @@
|
|
|
657
669
|
"decisionCorrect": true,
|
|
658
670
|
"riskScore": 0.55,
|
|
659
671
|
"passed": true,
|
|
660
|
-
"latencyMs":
|
|
672
|
+
"latencyMs": 2.465,
|
|
661
673
|
"evidenceCount": 1,
|
|
662
674
|
"evidenceIds": [
|
|
663
|
-
"failure:Bash:2026-05-
|
|
675
|
+
"failure:Bash:2026-05-15T17:52:01.798Z"
|
|
664
676
|
],
|
|
665
677
|
"recommendedActions": [
|
|
666
678
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -668,6 +680,8 @@
|
|
|
668
680
|
"summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
669
681
|
"recallErrors": [],
|
|
670
682
|
"leakedSecrets": [],
|
|
683
|
+
"hasEvidenceForDecision": true,
|
|
684
|
+
"lineageTextMatched": true,
|
|
671
685
|
"requiredEvidenceMatched": true
|
|
672
686
|
},
|
|
673
687
|
{
|
|
@@ -679,11 +693,11 @@
|
|
|
679
693
|
"decisionCorrect": true,
|
|
680
694
|
"riskScore": 0.2,
|
|
681
695
|
"passed": true,
|
|
682
|
-
"latencyMs":
|
|
696
|
+
"latencyMs": 2.485,
|
|
683
697
|
"evidenceCount": 2,
|
|
684
698
|
"evidenceIds": [
|
|
685
|
-
"
|
|
686
|
-
"failure:Bash:2026-05-
|
|
699
|
+
"01KRPC8JAPXFTFGGG94QP185MS",
|
|
700
|
+
"failure:Bash:2026-05-15T17:52:01.877Z"
|
|
687
701
|
],
|
|
688
702
|
"recommendedActions": [
|
|
689
703
|
"This exact action has succeeded since its last failure; proceed with normal validation.",
|
|
@@ -692,6 +706,8 @@
|
|
|
692
706
|
"summary": "Allowed: this exact Bash action has succeeded since the prior failure. Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
693
707
|
"recallErrors": [],
|
|
694
708
|
"leakedSecrets": [],
|
|
709
|
+
"hasEvidenceForDecision": true,
|
|
710
|
+
"lineageTextMatched": true,
|
|
695
711
|
"requiredEvidenceMatched": true
|
|
696
712
|
},
|
|
697
713
|
{
|
|
@@ -703,7 +719,7 @@
|
|
|
703
719
|
"decisionCorrect": true,
|
|
704
720
|
"riskScore": 0.85,
|
|
705
721
|
"passed": true,
|
|
706
|
-
"latencyMs": 2.
|
|
722
|
+
"latencyMs": 2.159,
|
|
707
723
|
"evidenceCount": 1,
|
|
708
724
|
"evidenceIds": [
|
|
709
725
|
"recall:episodic:recall.vector_counts"
|
|
@@ -716,6 +732,8 @@
|
|
|
716
732
|
"summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
|
|
717
733
|
"recallErrors": [],
|
|
718
734
|
"leakedSecrets": [],
|
|
735
|
+
"hasEvidenceForDecision": true,
|
|
736
|
+
"lineageTextMatched": true,
|
|
719
737
|
"requiredEvidenceMatched": true
|
|
720
738
|
},
|
|
721
739
|
{
|
|
@@ -727,11 +745,11 @@
|
|
|
727
745
|
"decisionCorrect": true,
|
|
728
746
|
"riskScore": 0.85,
|
|
729
747
|
"passed": true,
|
|
730
|
-
"latencyMs":
|
|
748
|
+
"latencyMs": 1.561,
|
|
731
749
|
"evidenceCount": 2,
|
|
732
750
|
"evidenceIds": [
|
|
733
751
|
"recall:fts:recall.fts_lookup",
|
|
734
|
-
"
|
|
752
|
+
"01KRPC8JKB36TE59QKA7Z4V2DM"
|
|
735
753
|
],
|
|
736
754
|
"recommendedActions": [
|
|
737
755
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -741,6 +759,8 @@
|
|
|
741
759
|
"summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
|
|
742
760
|
"recallErrors": [],
|
|
743
761
|
"leakedSecrets": [],
|
|
762
|
+
"hasEvidenceForDecision": true,
|
|
763
|
+
"lineageTextMatched": true,
|
|
744
764
|
"requiredEvidenceMatched": true
|
|
745
765
|
},
|
|
746
766
|
{
|
|
@@ -752,11 +772,11 @@
|
|
|
752
772
|
"decisionCorrect": true,
|
|
753
773
|
"riskScore": 0.9,
|
|
754
774
|
"passed": true,
|
|
755
|
-
"latencyMs": 2.
|
|
775
|
+
"latencyMs": 2.339,
|
|
756
776
|
"evidenceCount": 2,
|
|
757
777
|
"evidenceIds": [
|
|
758
|
-
"
|
|
759
|
-
"failure:Bash:2026-05-
|
|
778
|
+
"01KRPC8JQFVTGQBPCSTSKTRPY7",
|
|
779
|
+
"failure:Bash:2026-05-15T17:52:02.287Z"
|
|
760
780
|
],
|
|
761
781
|
"recommendedActions": [
|
|
762
782
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -765,6 +785,8 @@
|
|
|
765
785
|
"summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
766
786
|
"recallErrors": [],
|
|
767
787
|
"leakedSecrets": [],
|
|
788
|
+
"hasEvidenceForDecision": true,
|
|
789
|
+
"lineageTextMatched": true,
|
|
768
790
|
"requiredEvidenceMatched": true
|
|
769
791
|
},
|
|
770
792
|
{
|
|
@@ -776,11 +798,11 @@
|
|
|
776
798
|
"decisionCorrect": true,
|
|
777
799
|
"riskScore": 0.85,
|
|
778
800
|
"passed": true,
|
|
779
|
-
"latencyMs":
|
|
801
|
+
"latencyMs": 1.963,
|
|
780
802
|
"evidenceCount": 2,
|
|
781
803
|
"evidenceIds": [
|
|
782
|
-
"
|
|
783
|
-
"
|
|
804
|
+
"01KRPC8K2N9C3SKKD835K921Z8",
|
|
805
|
+
"01KRPC8K2PVBNMYZ2RBA7B2Q9X"
|
|
784
806
|
],
|
|
785
807
|
"recommendedActions": [
|
|
786
808
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -789,6 +811,8 @@
|
|
|
789
811
|
"summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
|
|
790
812
|
"recallErrors": [],
|
|
791
813
|
"leakedSecrets": [],
|
|
814
|
+
"hasEvidenceForDecision": true,
|
|
815
|
+
"lineageTextMatched": true,
|
|
792
816
|
"requiredEvidenceMatched": true
|
|
793
817
|
},
|
|
794
818
|
{
|
|
@@ -800,22 +824,22 @@
|
|
|
800
824
|
"decisionCorrect": true,
|
|
801
825
|
"riskScore": 0.85,
|
|
802
826
|
"passed": true,
|
|
803
|
-
"latencyMs":
|
|
827
|
+
"latencyMs": 30.791,
|
|
804
828
|
"evidenceCount": 13,
|
|
805
829
|
"evidenceIds": [
|
|
806
|
-
"
|
|
807
|
-
"
|
|
808
|
-
"
|
|
809
|
-
"
|
|
810
|
-
"
|
|
811
|
-
"
|
|
812
|
-
"
|
|
813
|
-
"
|
|
814
|
-
"
|
|
815
|
-
"
|
|
816
|
-
"
|
|
817
|
-
"
|
|
818
|
-
"
|
|
830
|
+
"01KRPC8PQ72DA5K79S9YZ7N381",
|
|
831
|
+
"01KRPC8PQ6YCVWK55HP85M0JKB",
|
|
832
|
+
"01KRPC8PMZ7SZFK6P2HCZQF23X",
|
|
833
|
+
"01KRPC8PHVXXXJ1HRFGXQ9SNZD",
|
|
834
|
+
"01KRPC8PE7CP3E77NRQKFWB01Z",
|
|
835
|
+
"01KRPC8PC7C083T4QRW0PB54W0",
|
|
836
|
+
"01KRPC8P76C1BBHBKMW79XHVPA",
|
|
837
|
+
"01KRPC8NSJ25DKGHN9RM5EKGSZ",
|
|
838
|
+
"01KRPC8NSFC7N7AHWGCBNHXP2P",
|
|
839
|
+
"01KRPC8MWXZ9DVQJ2QAFM2EJJC",
|
|
840
|
+
"01KRPC8MV37S2ZR305M1PCPCJA",
|
|
841
|
+
"01KRPC8KZNCXB2CYDMJ6QVV5CJ",
|
|
842
|
+
"01KRPC8K5SHHV6HE5MQ10DSKAT"
|
|
819
843
|
],
|
|
820
844
|
"recommendedActions": [
|
|
821
845
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -825,6 +849,8 @@
|
|
|
825
849
|
"summary": "Blocked: 13 memory signals, 1 high severity, 12 medium severity found before acting.",
|
|
826
850
|
"recallErrors": [],
|
|
827
851
|
"leakedSecrets": [],
|
|
852
|
+
"hasEvidenceForDecision": true,
|
|
853
|
+
"lineageTextMatched": true,
|
|
828
854
|
"requiredEvidenceMatched": true
|
|
829
855
|
}
|
|
830
856
|
],
|
|
@@ -843,11 +869,11 @@
|
|
|
843
869
|
"decisionCorrect": true,
|
|
844
870
|
"riskScore": 0.9,
|
|
845
871
|
"passed": true,
|
|
846
|
-
"latencyMs":
|
|
872
|
+
"latencyMs": 6.135,
|
|
847
873
|
"evidenceCount": 2,
|
|
848
874
|
"evidenceIds": [
|
|
849
|
-
"
|
|
850
|
-
"failure:Bash:2026-05-
|
|
875
|
+
"01KRPC8HJAWZEY0085CJJHK54G",
|
|
876
|
+
"failure:Bash:2026-05-15T17:52:01.099Z"
|
|
851
877
|
],
|
|
852
878
|
"recommendedActions": [
|
|
853
879
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -856,6 +882,8 @@
|
|
|
856
882
|
"summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
857
883
|
"recallErrors": [],
|
|
858
884
|
"leakedSecrets": [],
|
|
885
|
+
"hasEvidenceForDecision": true,
|
|
886
|
+
"lineageTextMatched": true,
|
|
859
887
|
"requiredEvidenceMatched": true
|
|
860
888
|
},
|
|
861
889
|
{
|
|
@@ -867,13 +895,15 @@
|
|
|
867
895
|
"decisionCorrect": false,
|
|
868
896
|
"riskScore": 0,
|
|
869
897
|
"passed": false,
|
|
870
|
-
"latencyMs": 0.
|
|
898
|
+
"latencyMs": 0.028,
|
|
871
899
|
"evidenceCount": 0,
|
|
872
900
|
"evidenceIds": [],
|
|
873
901
|
"recommendedActions": [],
|
|
874
902
|
"summary": "No memory baseline always allows proposed actions.",
|
|
875
903
|
"recallErrors": [],
|
|
876
904
|
"leakedSecrets": [],
|
|
905
|
+
"hasEvidenceForDecision": false,
|
|
906
|
+
"lineageTextMatched": false,
|
|
877
907
|
"requiredEvidenceMatched": false
|
|
878
908
|
},
|
|
879
909
|
{
|
|
@@ -885,10 +915,10 @@
|
|
|
885
915
|
"decisionCorrect": false,
|
|
886
916
|
"riskScore": 0.55,
|
|
887
917
|
"passed": false,
|
|
888
|
-
"latencyMs": 0.
|
|
918
|
+
"latencyMs": 0.185,
|
|
889
919
|
"evidenceCount": 1,
|
|
890
920
|
"evidenceIds": [
|
|
891
|
-
"
|
|
921
|
+
"01KRPC8HKK5GH11P78E2KB66KW"
|
|
892
922
|
],
|
|
893
923
|
"recommendedActions": [
|
|
894
924
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -896,7 +926,9 @@
|
|
|
896
926
|
"summary": "Recent-window baseline found a failed Bash event.",
|
|
897
927
|
"recallErrors": [],
|
|
898
928
|
"leakedSecrets": [],
|
|
899
|
-
"
|
|
929
|
+
"hasEvidenceForDecision": true,
|
|
930
|
+
"lineageTextMatched": false,
|
|
931
|
+
"requiredEvidenceMatched": true
|
|
900
932
|
},
|
|
901
933
|
{
|
|
902
934
|
"system": "Vector Only",
|
|
@@ -907,10 +939,10 @@
|
|
|
907
939
|
"decisionCorrect": false,
|
|
908
940
|
"riskScore": 0.35,
|
|
909
941
|
"passed": false,
|
|
910
|
-
"latencyMs":
|
|
942
|
+
"latencyMs": 0.742,
|
|
911
943
|
"evidenceCount": 1,
|
|
912
944
|
"evidenceIds": [
|
|
913
|
-
"
|
|
945
|
+
"01KRPC8HM92Q8XXJDSGGCKV8XP"
|
|
914
946
|
],
|
|
915
947
|
"recommendedActions": [
|
|
916
948
|
"Treat retrieved memory as advisory context."
|
|
@@ -918,7 +950,9 @@
|
|
|
918
950
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
919
951
|
"recallErrors": [],
|
|
920
952
|
"leakedSecrets": [],
|
|
921
|
-
"
|
|
953
|
+
"hasEvidenceForDecision": true,
|
|
954
|
+
"lineageTextMatched": false,
|
|
955
|
+
"requiredEvidenceMatched": true
|
|
922
956
|
},
|
|
923
957
|
{
|
|
924
958
|
"system": "FTS Only",
|
|
@@ -929,13 +963,15 @@
|
|
|
929
963
|
"decisionCorrect": false,
|
|
930
964
|
"riskScore": 0,
|
|
931
965
|
"passed": false,
|
|
932
|
-
"latencyMs": 0.
|
|
966
|
+
"latencyMs": 0.418,
|
|
933
967
|
"evidenceCount": 0,
|
|
934
968
|
"evidenceIds": [],
|
|
935
969
|
"recommendedActions": [],
|
|
936
970
|
"summary": "No memory signal found by this baseline.",
|
|
937
971
|
"recallErrors": [],
|
|
938
972
|
"leakedSecrets": [],
|
|
973
|
+
"hasEvidenceForDecision": false,
|
|
974
|
+
"lineageTextMatched": false,
|
|
939
975
|
"requiredEvidenceMatched": false
|
|
940
976
|
}
|
|
941
977
|
]
|
|
@@ -954,10 +990,10 @@
|
|
|
954
990
|
"decisionCorrect": true,
|
|
955
991
|
"riskScore": 0.85,
|
|
956
992
|
"passed": true,
|
|
957
|
-
"latencyMs":
|
|
993
|
+
"latencyMs": 1.96,
|
|
958
994
|
"evidenceCount": 1,
|
|
959
995
|
"evidenceIds": [
|
|
960
|
-
"
|
|
996
|
+
"01KRPC8HPQ1DDFJ3F929DEEJEB"
|
|
961
997
|
],
|
|
962
998
|
"recommendedActions": [
|
|
963
999
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -966,6 +1002,8 @@
|
|
|
966
1002
|
"summary": "Blocked: 1 memory signal, 1 high severity found before acting.",
|
|
967
1003
|
"recallErrors": [],
|
|
968
1004
|
"leakedSecrets": [],
|
|
1005
|
+
"hasEvidenceForDecision": true,
|
|
1006
|
+
"lineageTextMatched": true,
|
|
969
1007
|
"requiredEvidenceMatched": true
|
|
970
1008
|
},
|
|
971
1009
|
{
|
|
@@ -977,13 +1015,15 @@
|
|
|
977
1015
|
"decisionCorrect": false,
|
|
978
1016
|
"riskScore": 0,
|
|
979
1017
|
"passed": false,
|
|
980
|
-
"latencyMs": 0.
|
|
1018
|
+
"latencyMs": 0.006,
|
|
981
1019
|
"evidenceCount": 0,
|
|
982
1020
|
"evidenceIds": [],
|
|
983
1021
|
"recommendedActions": [],
|
|
984
1022
|
"summary": "No memory baseline always allows proposed actions.",
|
|
985
1023
|
"recallErrors": [],
|
|
986
1024
|
"leakedSecrets": [],
|
|
1025
|
+
"hasEvidenceForDecision": false,
|
|
1026
|
+
"lineageTextMatched": false,
|
|
987
1027
|
"requiredEvidenceMatched": false
|
|
988
1028
|
},
|
|
989
1029
|
{
|
|
@@ -994,11 +1034,11 @@
|
|
|
994
1034
|
"decision": "block",
|
|
995
1035
|
"decisionCorrect": true,
|
|
996
1036
|
"riskScore": 0.85,
|
|
997
|
-
"passed":
|
|
998
|
-
"latencyMs": 0.
|
|
1037
|
+
"passed": true,
|
|
1038
|
+
"latencyMs": 0.298,
|
|
999
1039
|
"evidenceCount": 1,
|
|
1000
1040
|
"evidenceIds": [
|
|
1001
|
-
"
|
|
1041
|
+
"01KRPC8HS2VFCEMKZT27Y7J289"
|
|
1002
1042
|
],
|
|
1003
1043
|
"recommendedActions": [
|
|
1004
1044
|
"Review retrieved memory before acting."
|
|
@@ -1006,7 +1046,9 @@
|
|
|
1006
1046
|
"summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
|
|
1007
1047
|
"recallErrors": [],
|
|
1008
1048
|
"leakedSecrets": [],
|
|
1009
|
-
"
|
|
1049
|
+
"hasEvidenceForDecision": true,
|
|
1050
|
+
"lineageTextMatched": false,
|
|
1051
|
+
"requiredEvidenceMatched": true
|
|
1010
1052
|
},
|
|
1011
1053
|
{
|
|
1012
1054
|
"system": "Vector Only",
|
|
@@ -1016,11 +1058,11 @@
|
|
|
1016
1058
|
"decision": "block",
|
|
1017
1059
|
"decisionCorrect": true,
|
|
1018
1060
|
"riskScore": 0.85,
|
|
1019
|
-
"passed":
|
|
1020
|
-
"latencyMs": 0.
|
|
1061
|
+
"passed": true,
|
|
1062
|
+
"latencyMs": 0.475,
|
|
1021
1063
|
"evidenceCount": 1,
|
|
1022
1064
|
"evidenceIds": [
|
|
1023
|
-
"
|
|
1065
|
+
"01KRPC8HSJ7N9KKFGH3EZGTFWP"
|
|
1024
1066
|
],
|
|
1025
1067
|
"recommendedActions": [
|
|
1026
1068
|
"Review retrieved memory before acting."
|
|
@@ -1028,7 +1070,9 @@
|
|
|
1028
1070
|
"summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
|
|
1029
1071
|
"recallErrors": [],
|
|
1030
1072
|
"leakedSecrets": [],
|
|
1031
|
-
"
|
|
1073
|
+
"hasEvidenceForDecision": true,
|
|
1074
|
+
"lineageTextMatched": false,
|
|
1075
|
+
"requiredEvidenceMatched": true
|
|
1032
1076
|
},
|
|
1033
1077
|
{
|
|
1034
1078
|
"system": "FTS Only",
|
|
@@ -1039,13 +1083,15 @@
|
|
|
1039
1083
|
"decisionCorrect": false,
|
|
1040
1084
|
"riskScore": 0,
|
|
1041
1085
|
"passed": false,
|
|
1042
|
-
"latencyMs": 0.
|
|
1086
|
+
"latencyMs": 0.353,
|
|
1043
1087
|
"evidenceCount": 0,
|
|
1044
1088
|
"evidenceIds": [],
|
|
1045
1089
|
"recommendedActions": [],
|
|
1046
1090
|
"summary": "No memory signal found by this baseline.",
|
|
1047
1091
|
"recallErrors": [],
|
|
1048
1092
|
"leakedSecrets": [],
|
|
1093
|
+
"hasEvidenceForDecision": false,
|
|
1094
|
+
"lineageTextMatched": false,
|
|
1049
1095
|
"requiredEvidenceMatched": false
|
|
1050
1096
|
}
|
|
1051
1097
|
]
|
|
@@ -1064,10 +1110,10 @@
|
|
|
1064
1110
|
"decisionCorrect": true,
|
|
1065
1111
|
"riskScore": 0.55,
|
|
1066
1112
|
"passed": true,
|
|
1067
|
-
"latencyMs":
|
|
1113
|
+
"latencyMs": 2.654,
|
|
1068
1114
|
"evidenceCount": 1,
|
|
1069
1115
|
"evidenceIds": [
|
|
1070
|
-
"failure:Bash:2026-05-
|
|
1116
|
+
"failure:Bash:2026-05-15T17:52:01.365Z"
|
|
1071
1117
|
],
|
|
1072
1118
|
"recommendedActions": [
|
|
1073
1119
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -1075,6 +1121,8 @@
|
|
|
1075
1121
|
"summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
1076
1122
|
"recallErrors": [],
|
|
1077
1123
|
"leakedSecrets": [],
|
|
1124
|
+
"hasEvidenceForDecision": true,
|
|
1125
|
+
"lineageTextMatched": true,
|
|
1078
1126
|
"requiredEvidenceMatched": true
|
|
1079
1127
|
},
|
|
1080
1128
|
{
|
|
@@ -1093,6 +1141,8 @@
|
|
|
1093
1141
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1094
1142
|
"recallErrors": [],
|
|
1095
1143
|
"leakedSecrets": [],
|
|
1144
|
+
"hasEvidenceForDecision": false,
|
|
1145
|
+
"lineageTextMatched": false,
|
|
1096
1146
|
"requiredEvidenceMatched": false
|
|
1097
1147
|
},
|
|
1098
1148
|
{
|
|
@@ -1103,11 +1153,11 @@
|
|
|
1103
1153
|
"decision": "warn",
|
|
1104
1154
|
"decisionCorrect": true,
|
|
1105
1155
|
"riskScore": 0.55,
|
|
1106
|
-
"passed":
|
|
1107
|
-
"latencyMs": 0.
|
|
1156
|
+
"passed": true,
|
|
1157
|
+
"latencyMs": 0.079,
|
|
1108
1158
|
"evidenceCount": 1,
|
|
1109
1159
|
"evidenceIds": [
|
|
1110
|
-
"
|
|
1160
|
+
"01KRPC8J3S4YPWSTE26SD6SVPY"
|
|
1111
1161
|
],
|
|
1112
1162
|
"recommendedActions": [
|
|
1113
1163
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1115,7 +1165,9 @@
|
|
|
1115
1165
|
"summary": "Recent-window baseline found a failed Bash event.",
|
|
1116
1166
|
"recallErrors": [],
|
|
1117
1167
|
"leakedSecrets": [],
|
|
1118
|
-
"
|
|
1168
|
+
"hasEvidenceForDecision": true,
|
|
1169
|
+
"lineageTextMatched": false,
|
|
1170
|
+
"requiredEvidenceMatched": true
|
|
1119
1171
|
},
|
|
1120
1172
|
{
|
|
1121
1173
|
"system": "Vector Only",
|
|
@@ -1125,11 +1177,11 @@
|
|
|
1125
1177
|
"decision": "warn",
|
|
1126
1178
|
"decisionCorrect": true,
|
|
1127
1179
|
"riskScore": 0.35,
|
|
1128
|
-
"passed":
|
|
1129
|
-
"latencyMs": 0.
|
|
1180
|
+
"passed": true,
|
|
1181
|
+
"latencyMs": 0.492,
|
|
1130
1182
|
"evidenceCount": 1,
|
|
1131
1183
|
"evidenceIds": [
|
|
1132
|
-
"
|
|
1184
|
+
"01KRPC8J4BXV5DHC2RY9YBGZZC"
|
|
1133
1185
|
],
|
|
1134
1186
|
"recommendedActions": [
|
|
1135
1187
|
"Treat retrieved memory as advisory context."
|
|
@@ -1137,7 +1189,9 @@
|
|
|
1137
1189
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
1138
1190
|
"recallErrors": [],
|
|
1139
1191
|
"leakedSecrets": [],
|
|
1140
|
-
"
|
|
1192
|
+
"hasEvidenceForDecision": true,
|
|
1193
|
+
"lineageTextMatched": false,
|
|
1194
|
+
"requiredEvidenceMatched": true
|
|
1141
1195
|
},
|
|
1142
1196
|
{
|
|
1143
1197
|
"system": "FTS Only",
|
|
@@ -1148,13 +1202,15 @@
|
|
|
1148
1202
|
"decisionCorrect": false,
|
|
1149
1203
|
"riskScore": 0,
|
|
1150
1204
|
"passed": false,
|
|
1151
|
-
"latencyMs": 0.
|
|
1205
|
+
"latencyMs": 0.381,
|
|
1152
1206
|
"evidenceCount": 0,
|
|
1153
1207
|
"evidenceIds": [],
|
|
1154
1208
|
"recommendedActions": [],
|
|
1155
1209
|
"summary": "No memory signal found by this baseline.",
|
|
1156
1210
|
"recallErrors": [],
|
|
1157
1211
|
"leakedSecrets": [],
|
|
1212
|
+
"hasEvidenceForDecision": false,
|
|
1213
|
+
"lineageTextMatched": false,
|
|
1158
1214
|
"requiredEvidenceMatched": false
|
|
1159
1215
|
}
|
|
1160
1216
|
]
|
|
@@ -1173,10 +1229,10 @@
|
|
|
1173
1229
|
"decisionCorrect": true,
|
|
1174
1230
|
"riskScore": 0.55,
|
|
1175
1231
|
"passed": true,
|
|
1176
|
-
"latencyMs":
|
|
1232
|
+
"latencyMs": 2.465,
|
|
1177
1233
|
"evidenceCount": 1,
|
|
1178
1234
|
"evidenceIds": [
|
|
1179
|
-
"failure:Bash:2026-05-
|
|
1235
|
+
"failure:Bash:2026-05-15T17:52:01.798Z"
|
|
1180
1236
|
],
|
|
1181
1237
|
"recommendedActions": [
|
|
1182
1238
|
"Before re-running Bash, check what changed since the last failure."
|
|
@@ -1184,6 +1240,8 @@
|
|
|
1184
1240
|
"summary": "Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
1185
1241
|
"recallErrors": [],
|
|
1186
1242
|
"leakedSecrets": [],
|
|
1243
|
+
"hasEvidenceForDecision": true,
|
|
1244
|
+
"lineageTextMatched": true,
|
|
1187
1245
|
"requiredEvidenceMatched": true
|
|
1188
1246
|
},
|
|
1189
1247
|
{
|
|
@@ -1195,13 +1253,15 @@
|
|
|
1195
1253
|
"decisionCorrect": false,
|
|
1196
1254
|
"riskScore": 0,
|
|
1197
1255
|
"passed": false,
|
|
1198
|
-
"latencyMs": 0.
|
|
1256
|
+
"latencyMs": 0.006,
|
|
1199
1257
|
"evidenceCount": 0,
|
|
1200
1258
|
"evidenceIds": [],
|
|
1201
1259
|
"recommendedActions": [],
|
|
1202
1260
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1203
1261
|
"recallErrors": [],
|
|
1204
1262
|
"leakedSecrets": [],
|
|
1263
|
+
"hasEvidenceForDecision": false,
|
|
1264
|
+
"lineageTextMatched": false,
|
|
1205
1265
|
"requiredEvidenceMatched": false
|
|
1206
1266
|
},
|
|
1207
1267
|
{
|
|
@@ -1212,11 +1272,11 @@
|
|
|
1212
1272
|
"decision": "warn",
|
|
1213
1273
|
"decisionCorrect": true,
|
|
1214
1274
|
"riskScore": 0.55,
|
|
1215
|
-
"passed":
|
|
1216
|
-
"latencyMs": 0.
|
|
1275
|
+
"passed": true,
|
|
1276
|
+
"latencyMs": 0.052,
|
|
1217
1277
|
"evidenceCount": 1,
|
|
1218
1278
|
"evidenceIds": [
|
|
1219
|
-
"
|
|
1279
|
+
"01KRPC8J98CDFSGA2AG5E56TN0"
|
|
1220
1280
|
],
|
|
1221
1281
|
"recommendedActions": [
|
|
1222
1282
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1224,7 +1284,9 @@
|
|
|
1224
1284
|
"summary": "Recent-window baseline found a failed Bash event.",
|
|
1225
1285
|
"recallErrors": [],
|
|
1226
1286
|
"leakedSecrets": [],
|
|
1227
|
-
"
|
|
1287
|
+
"hasEvidenceForDecision": true,
|
|
1288
|
+
"lineageTextMatched": false,
|
|
1289
|
+
"requiredEvidenceMatched": true
|
|
1228
1290
|
},
|
|
1229
1291
|
{
|
|
1230
1292
|
"system": "Vector Only",
|
|
@@ -1234,11 +1296,11 @@
|
|
|
1234
1296
|
"decision": "warn",
|
|
1235
1297
|
"decisionCorrect": true,
|
|
1236
1298
|
"riskScore": 0.35,
|
|
1237
|
-
"passed":
|
|
1238
|
-
"latencyMs": 0.
|
|
1299
|
+
"passed": true,
|
|
1300
|
+
"latencyMs": 0.315,
|
|
1239
1301
|
"evidenceCount": 1,
|
|
1240
1302
|
"evidenceIds": [
|
|
1241
|
-
"
|
|
1303
|
+
"01KRPC8J9Q0CMEHTHR4TPX8SYY"
|
|
1242
1304
|
],
|
|
1243
1305
|
"recommendedActions": [
|
|
1244
1306
|
"Treat retrieved memory as advisory context."
|
|
@@ -1246,7 +1308,9 @@
|
|
|
1246
1308
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
1247
1309
|
"recallErrors": [],
|
|
1248
1310
|
"leakedSecrets": [],
|
|
1249
|
-
"
|
|
1311
|
+
"hasEvidenceForDecision": true,
|
|
1312
|
+
"lineageTextMatched": false,
|
|
1313
|
+
"requiredEvidenceMatched": true
|
|
1250
1314
|
},
|
|
1251
1315
|
{
|
|
1252
1316
|
"system": "FTS Only",
|
|
@@ -1257,13 +1321,15 @@
|
|
|
1257
1321
|
"decisionCorrect": false,
|
|
1258
1322
|
"riskScore": 0,
|
|
1259
1323
|
"passed": false,
|
|
1260
|
-
"latencyMs": 0.
|
|
1324
|
+
"latencyMs": 0.348,
|
|
1261
1325
|
"evidenceCount": 0,
|
|
1262
1326
|
"evidenceIds": [],
|
|
1263
1327
|
"recommendedActions": [],
|
|
1264
1328
|
"summary": "No memory signal found by this baseline.",
|
|
1265
1329
|
"recallErrors": [],
|
|
1266
1330
|
"leakedSecrets": [],
|
|
1331
|
+
"hasEvidenceForDecision": false,
|
|
1332
|
+
"lineageTextMatched": false,
|
|
1267
1333
|
"requiredEvidenceMatched": false
|
|
1268
1334
|
}
|
|
1269
1335
|
]
|
|
@@ -1282,11 +1348,11 @@
|
|
|
1282
1348
|
"decisionCorrect": true,
|
|
1283
1349
|
"riskScore": 0.2,
|
|
1284
1350
|
"passed": true,
|
|
1285
|
-
"latencyMs":
|
|
1351
|
+
"latencyMs": 2.485,
|
|
1286
1352
|
"evidenceCount": 2,
|
|
1287
1353
|
"evidenceIds": [
|
|
1288
|
-
"
|
|
1289
|
-
"failure:Bash:2026-05-
|
|
1354
|
+
"01KRPC8JAPXFTFGGG94QP185MS",
|
|
1355
|
+
"failure:Bash:2026-05-15T17:52:01.877Z"
|
|
1290
1356
|
],
|
|
1291
1357
|
"recommendedActions": [
|
|
1292
1358
|
"This exact action has succeeded since its last failure; proceed with normal validation.",
|
|
@@ -1295,6 +1361,8 @@
|
|
|
1295
1361
|
"summary": "Allowed: this exact Bash action has succeeded since the prior failure. Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
1296
1362
|
"recallErrors": [],
|
|
1297
1363
|
"leakedSecrets": [],
|
|
1364
|
+
"hasEvidenceForDecision": true,
|
|
1365
|
+
"lineageTextMatched": true,
|
|
1298
1366
|
"requiredEvidenceMatched": true
|
|
1299
1367
|
},
|
|
1300
1368
|
{
|
|
@@ -1305,15 +1373,17 @@
|
|
|
1305
1373
|
"decision": "allow",
|
|
1306
1374
|
"decisionCorrect": true,
|
|
1307
1375
|
"riskScore": 0,
|
|
1308
|
-
"passed":
|
|
1309
|
-
"latencyMs": 0.
|
|
1376
|
+
"passed": true,
|
|
1377
|
+
"latencyMs": 0.004,
|
|
1310
1378
|
"evidenceCount": 0,
|
|
1311
1379
|
"evidenceIds": [],
|
|
1312
1380
|
"recommendedActions": [],
|
|
1313
1381
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1314
1382
|
"recallErrors": [],
|
|
1315
1383
|
"leakedSecrets": [],
|
|
1316
|
-
"
|
|
1384
|
+
"hasEvidenceForDecision": true,
|
|
1385
|
+
"lineageTextMatched": false,
|
|
1386
|
+
"requiredEvidenceMatched": true
|
|
1317
1387
|
},
|
|
1318
1388
|
{
|
|
1319
1389
|
"system": "Recent Window",
|
|
@@ -1324,10 +1394,10 @@
|
|
|
1324
1394
|
"decisionCorrect": false,
|
|
1325
1395
|
"riskScore": 0.55,
|
|
1326
1396
|
"passed": false,
|
|
1327
|
-
"latencyMs": 0.
|
|
1397
|
+
"latencyMs": 0.055,
|
|
1328
1398
|
"evidenceCount": 1,
|
|
1329
1399
|
"evidenceIds": [
|
|
1330
|
-
"
|
|
1400
|
+
"01KRPC8JBSK81DRW3SP4PGA3M0"
|
|
1331
1401
|
],
|
|
1332
1402
|
"recommendedActions": [
|
|
1333
1403
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1335,7 +1405,9 @@
|
|
|
1335
1405
|
"summary": "Recent-window baseline found a failed Bash event.",
|
|
1336
1406
|
"recallErrors": [],
|
|
1337
1407
|
"leakedSecrets": [],
|
|
1338
|
-
"
|
|
1408
|
+
"hasEvidenceForDecision": true,
|
|
1409
|
+
"lineageTextMatched": false,
|
|
1410
|
+
"requiredEvidenceMatched": true
|
|
1339
1411
|
},
|
|
1340
1412
|
{
|
|
1341
1413
|
"system": "Vector Only",
|
|
@@ -1346,10 +1418,10 @@
|
|
|
1346
1418
|
"decisionCorrect": false,
|
|
1347
1419
|
"riskScore": 0.35,
|
|
1348
1420
|
"passed": false,
|
|
1349
|
-
"latencyMs": 0.
|
|
1421
|
+
"latencyMs": 0.35,
|
|
1350
1422
|
"evidenceCount": 1,
|
|
1351
1423
|
"evidenceIds": [
|
|
1352
|
-
"
|
|
1424
|
+
"01KRPC8JC7Z8S82XZQM0MC2VED"
|
|
1353
1425
|
],
|
|
1354
1426
|
"recommendedActions": [
|
|
1355
1427
|
"Treat retrieved memory as advisory context."
|
|
@@ -1357,7 +1429,9 @@
|
|
|
1357
1429
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
1358
1430
|
"recallErrors": [],
|
|
1359
1431
|
"leakedSecrets": [],
|
|
1360
|
-
"
|
|
1432
|
+
"hasEvidenceForDecision": true,
|
|
1433
|
+
"lineageTextMatched": false,
|
|
1434
|
+
"requiredEvidenceMatched": true
|
|
1361
1435
|
},
|
|
1362
1436
|
{
|
|
1363
1437
|
"system": "FTS Only",
|
|
@@ -1367,15 +1441,17 @@
|
|
|
1367
1441
|
"decision": "allow",
|
|
1368
1442
|
"decisionCorrect": true,
|
|
1369
1443
|
"riskScore": 0,
|
|
1370
|
-
"passed":
|
|
1371
|
-
"latencyMs": 0.
|
|
1444
|
+
"passed": true,
|
|
1445
|
+
"latencyMs": 0.322,
|
|
1372
1446
|
"evidenceCount": 0,
|
|
1373
1447
|
"evidenceIds": [],
|
|
1374
1448
|
"recommendedActions": [],
|
|
1375
1449
|
"summary": "No memory signal found by this baseline.",
|
|
1376
1450
|
"recallErrors": [],
|
|
1377
1451
|
"leakedSecrets": [],
|
|
1378
|
-
"
|
|
1452
|
+
"hasEvidenceForDecision": true,
|
|
1453
|
+
"lineageTextMatched": false,
|
|
1454
|
+
"requiredEvidenceMatched": true
|
|
1379
1455
|
}
|
|
1380
1456
|
]
|
|
1381
1457
|
},
|
|
@@ -1393,7 +1469,7 @@
|
|
|
1393
1469
|
"decisionCorrect": true,
|
|
1394
1470
|
"riskScore": 0.85,
|
|
1395
1471
|
"passed": true,
|
|
1396
|
-
"latencyMs": 2.
|
|
1472
|
+
"latencyMs": 2.159,
|
|
1397
1473
|
"evidenceCount": 1,
|
|
1398
1474
|
"evidenceIds": [
|
|
1399
1475
|
"recall:episodic:recall.vector_counts"
|
|
@@ -1406,6 +1482,8 @@
|
|
|
1406
1482
|
"summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
|
|
1407
1483
|
"recallErrors": [],
|
|
1408
1484
|
"leakedSecrets": [],
|
|
1485
|
+
"hasEvidenceForDecision": true,
|
|
1486
|
+
"lineageTextMatched": true,
|
|
1409
1487
|
"requiredEvidenceMatched": true
|
|
1410
1488
|
},
|
|
1411
1489
|
{
|
|
@@ -1417,13 +1495,15 @@
|
|
|
1417
1495
|
"decisionCorrect": false,
|
|
1418
1496
|
"riskScore": 0,
|
|
1419
1497
|
"passed": false,
|
|
1420
|
-
"latencyMs": 0.
|
|
1498
|
+
"latencyMs": 0.005,
|
|
1421
1499
|
"evidenceCount": 0,
|
|
1422
1500
|
"evidenceIds": [],
|
|
1423
1501
|
"recommendedActions": [],
|
|
1424
1502
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1425
1503
|
"recallErrors": [],
|
|
1426
1504
|
"leakedSecrets": [],
|
|
1505
|
+
"hasEvidenceForDecision": false,
|
|
1506
|
+
"lineageTextMatched": false,
|
|
1427
1507
|
"requiredEvidenceMatched": false
|
|
1428
1508
|
},
|
|
1429
1509
|
{
|
|
@@ -1434,11 +1514,11 @@
|
|
|
1434
1514
|
"decision": "block",
|
|
1435
1515
|
"decisionCorrect": true,
|
|
1436
1516
|
"riskScore": 0.85,
|
|
1437
|
-
"passed":
|
|
1438
|
-
"latencyMs": 0.
|
|
1517
|
+
"passed": true,
|
|
1518
|
+
"latencyMs": 0.128,
|
|
1439
1519
|
"evidenceCount": 1,
|
|
1440
1520
|
"evidenceIds": [
|
|
1441
|
-
"
|
|
1521
|
+
"01KRPC8JEJYKMEDCJKMDKX3Q7H"
|
|
1442
1522
|
],
|
|
1443
1523
|
"recommendedActions": [
|
|
1444
1524
|
"Review retrieved memory before acting."
|
|
@@ -1446,7 +1526,9 @@
|
|
|
1446
1526
|
"summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
|
|
1447
1527
|
"recallErrors": [],
|
|
1448
1528
|
"leakedSecrets": [],
|
|
1449
|
-
"
|
|
1529
|
+
"hasEvidenceForDecision": true,
|
|
1530
|
+
"lineageTextMatched": false,
|
|
1531
|
+
"requiredEvidenceMatched": true
|
|
1450
1532
|
},
|
|
1451
1533
|
{
|
|
1452
1534
|
"system": "Vector Only",
|
|
@@ -1457,7 +1539,7 @@
|
|
|
1457
1539
|
"decisionCorrect": false,
|
|
1458
1540
|
"riskScore": 0.55,
|
|
1459
1541
|
"passed": false,
|
|
1460
|
-
"latencyMs": 0.
|
|
1542
|
+
"latencyMs": 0.267,
|
|
1461
1543
|
"evidenceCount": 0,
|
|
1462
1544
|
"evidenceIds": [],
|
|
1463
1545
|
"recommendedActions": [
|
|
@@ -1472,6 +1554,8 @@
|
|
|
1472
1554
|
}
|
|
1473
1555
|
],
|
|
1474
1556
|
"leakedSecrets": [],
|
|
1557
|
+
"hasEvidenceForDecision": false,
|
|
1558
|
+
"lineageTextMatched": false,
|
|
1475
1559
|
"requiredEvidenceMatched": false
|
|
1476
1560
|
},
|
|
1477
1561
|
{
|
|
@@ -1483,13 +1567,15 @@
|
|
|
1483
1567
|
"decisionCorrect": false,
|
|
1484
1568
|
"riskScore": 0,
|
|
1485
1569
|
"passed": false,
|
|
1486
|
-
"latencyMs": 0.
|
|
1570
|
+
"latencyMs": 0.334,
|
|
1487
1571
|
"evidenceCount": 0,
|
|
1488
1572
|
"evidenceIds": [],
|
|
1489
1573
|
"recommendedActions": [],
|
|
1490
1574
|
"summary": "No memory signal found by this baseline.",
|
|
1491
1575
|
"recallErrors": [],
|
|
1492
1576
|
"leakedSecrets": [],
|
|
1577
|
+
"hasEvidenceForDecision": false,
|
|
1578
|
+
"lineageTextMatched": false,
|
|
1493
1579
|
"requiredEvidenceMatched": false
|
|
1494
1580
|
}
|
|
1495
1581
|
]
|
|
@@ -1508,11 +1594,11 @@
|
|
|
1508
1594
|
"decisionCorrect": true,
|
|
1509
1595
|
"riskScore": 0.85,
|
|
1510
1596
|
"passed": true,
|
|
1511
|
-
"latencyMs":
|
|
1597
|
+
"latencyMs": 1.561,
|
|
1512
1598
|
"evidenceCount": 2,
|
|
1513
1599
|
"evidenceIds": [
|
|
1514
1600
|
"recall:fts:recall.fts_lookup",
|
|
1515
|
-
"
|
|
1601
|
+
"01KRPC8JKB36TE59QKA7Z4V2DM"
|
|
1516
1602
|
],
|
|
1517
1603
|
"recommendedActions": [
|
|
1518
1604
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -1522,6 +1608,8 @@
|
|
|
1522
1608
|
"summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
|
|
1523
1609
|
"recallErrors": [],
|
|
1524
1610
|
"leakedSecrets": [],
|
|
1611
|
+
"hasEvidenceForDecision": true,
|
|
1612
|
+
"lineageTextMatched": true,
|
|
1525
1613
|
"requiredEvidenceMatched": true
|
|
1526
1614
|
},
|
|
1527
1615
|
{
|
|
@@ -1533,13 +1621,15 @@
|
|
|
1533
1621
|
"decisionCorrect": false,
|
|
1534
1622
|
"riskScore": 0,
|
|
1535
1623
|
"passed": false,
|
|
1536
|
-
"latencyMs": 0.
|
|
1624
|
+
"latencyMs": 0.005,
|
|
1537
1625
|
"evidenceCount": 0,
|
|
1538
1626
|
"evidenceIds": [],
|
|
1539
1627
|
"recommendedActions": [],
|
|
1540
1628
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1541
1629
|
"recallErrors": [],
|
|
1542
1630
|
"leakedSecrets": [],
|
|
1631
|
+
"hasEvidenceForDecision": false,
|
|
1632
|
+
"lineageTextMatched": false,
|
|
1543
1633
|
"requiredEvidenceMatched": false
|
|
1544
1634
|
},
|
|
1545
1635
|
{
|
|
@@ -1551,10 +1641,10 @@
|
|
|
1551
1641
|
"decisionCorrect": false,
|
|
1552
1642
|
"riskScore": 0.35,
|
|
1553
1643
|
"passed": false,
|
|
1554
|
-
"latencyMs": 0.
|
|
1644
|
+
"latencyMs": 0.103,
|
|
1555
1645
|
"evidenceCount": 1,
|
|
1556
1646
|
"evidenceIds": [
|
|
1557
|
-
"
|
|
1647
|
+
"01KRPC8JNWTHH9J03GFSYTHN4K"
|
|
1558
1648
|
],
|
|
1559
1649
|
"recommendedActions": [
|
|
1560
1650
|
"Treat retrieved memory as advisory context."
|
|
@@ -1562,7 +1652,9 @@
|
|
|
1562
1652
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
1563
1653
|
"recallErrors": [],
|
|
1564
1654
|
"leakedSecrets": [],
|
|
1565
|
-
"
|
|
1655
|
+
"hasEvidenceForDecision": true,
|
|
1656
|
+
"lineageTextMatched": false,
|
|
1657
|
+
"requiredEvidenceMatched": true
|
|
1566
1658
|
},
|
|
1567
1659
|
{
|
|
1568
1660
|
"system": "Vector Only",
|
|
@@ -1573,10 +1665,10 @@
|
|
|
1573
1665
|
"decisionCorrect": false,
|
|
1574
1666
|
"riskScore": 0.35,
|
|
1575
1667
|
"passed": false,
|
|
1576
|
-
"latencyMs": 0.
|
|
1668
|
+
"latencyMs": 0.314,
|
|
1577
1669
|
"evidenceCount": 1,
|
|
1578
1670
|
"evidenceIds": [
|
|
1579
|
-
"
|
|
1671
|
+
"01KRPC8JPC7P0SJDFCJXF222DE"
|
|
1580
1672
|
],
|
|
1581
1673
|
"recommendedActions": [
|
|
1582
1674
|
"Treat retrieved memory as advisory context."
|
|
@@ -1584,7 +1676,9 @@
|
|
|
1584
1676
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
1585
1677
|
"recallErrors": [],
|
|
1586
1678
|
"leakedSecrets": [],
|
|
1587
|
-
"
|
|
1679
|
+
"hasEvidenceForDecision": true,
|
|
1680
|
+
"lineageTextMatched": false,
|
|
1681
|
+
"requiredEvidenceMatched": true
|
|
1588
1682
|
},
|
|
1589
1683
|
{
|
|
1590
1684
|
"system": "FTS Only",
|
|
@@ -1595,7 +1689,7 @@
|
|
|
1595
1689
|
"decisionCorrect": false,
|
|
1596
1690
|
"riskScore": 0.55,
|
|
1597
1691
|
"passed": false,
|
|
1598
|
-
"latencyMs": 0.
|
|
1692
|
+
"latencyMs": 0.129,
|
|
1599
1693
|
"evidenceCount": 0,
|
|
1600
1694
|
"evidenceIds": [],
|
|
1601
1695
|
"recommendedActions": [
|
|
@@ -1610,7 +1704,9 @@
|
|
|
1610
1704
|
}
|
|
1611
1705
|
],
|
|
1612
1706
|
"leakedSecrets": [],
|
|
1613
|
-
"
|
|
1707
|
+
"hasEvidenceForDecision": false,
|
|
1708
|
+
"lineageTextMatched": true,
|
|
1709
|
+
"requiredEvidenceMatched": false
|
|
1614
1710
|
}
|
|
1615
1711
|
]
|
|
1616
1712
|
},
|
|
@@ -1628,11 +1724,11 @@
|
|
|
1628
1724
|
"decisionCorrect": true,
|
|
1629
1725
|
"riskScore": 0.9,
|
|
1630
1726
|
"passed": true,
|
|
1631
|
-
"latencyMs": 2.
|
|
1727
|
+
"latencyMs": 2.339,
|
|
1632
1728
|
"evidenceCount": 2,
|
|
1633
1729
|
"evidenceIds": [
|
|
1634
|
-
"
|
|
1635
|
-
"failure:Bash:2026-05-
|
|
1730
|
+
"01KRPC8JQFVTGQBPCSTSKTRPY7",
|
|
1731
|
+
"failure:Bash:2026-05-15T17:52:02.287Z"
|
|
1636
1732
|
],
|
|
1637
1733
|
"recommendedActions": [
|
|
1638
1734
|
"Do not repeat the exact failed action until the prior error is understood or the command is changed.",
|
|
@@ -1641,6 +1737,8 @@
|
|
|
1641
1737
|
"summary": "Blocked: this exact Bash action failed before. Caution: 1 memory signal, 1 medium severity found before acting.",
|
|
1642
1738
|
"recallErrors": [],
|
|
1643
1739
|
"leakedSecrets": [],
|
|
1740
|
+
"hasEvidenceForDecision": true,
|
|
1741
|
+
"lineageTextMatched": true,
|
|
1644
1742
|
"requiredEvidenceMatched": true
|
|
1645
1743
|
},
|
|
1646
1744
|
{
|
|
@@ -1659,6 +1757,8 @@
|
|
|
1659
1757
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1660
1758
|
"recallErrors": [],
|
|
1661
1759
|
"leakedSecrets": [],
|
|
1760
|
+
"hasEvidenceForDecision": false,
|
|
1761
|
+
"lineageTextMatched": false,
|
|
1662
1762
|
"requiredEvidenceMatched": false
|
|
1663
1763
|
},
|
|
1664
1764
|
{
|
|
@@ -1670,10 +1770,10 @@
|
|
|
1670
1770
|
"decisionCorrect": false,
|
|
1671
1771
|
"riskScore": 0.55,
|
|
1672
1772
|
"passed": false,
|
|
1673
|
-
"latencyMs": 0.
|
|
1773
|
+
"latencyMs": 0.049,
|
|
1674
1774
|
"evidenceCount": 1,
|
|
1675
1775
|
"evidenceIds": [
|
|
1676
|
-
"
|
|
1776
|
+
"01KRPC8JRKBJR9Y6CTD0D1ZX47"
|
|
1677
1777
|
],
|
|
1678
1778
|
"recommendedActions": [
|
|
1679
1779
|
"Check the recent failed event before repeating a similar action."
|
|
@@ -1681,7 +1781,9 @@
|
|
|
1681
1781
|
"summary": "Recent-window baseline found a failed Bash event.",
|
|
1682
1782
|
"recallErrors": [],
|
|
1683
1783
|
"leakedSecrets": [],
|
|
1684
|
-
"
|
|
1784
|
+
"hasEvidenceForDecision": true,
|
|
1785
|
+
"lineageTextMatched": false,
|
|
1786
|
+
"requiredEvidenceMatched": true
|
|
1685
1787
|
},
|
|
1686
1788
|
{
|
|
1687
1789
|
"system": "Vector Only",
|
|
@@ -1692,10 +1794,10 @@
|
|
|
1692
1794
|
"decisionCorrect": false,
|
|
1693
1795
|
"riskScore": 0.35,
|
|
1694
1796
|
"passed": false,
|
|
1695
|
-
"latencyMs": 0.
|
|
1797
|
+
"latencyMs": 0.331,
|
|
1696
1798
|
"evidenceCount": 1,
|
|
1697
1799
|
"evidenceIds": [
|
|
1698
|
-
"
|
|
1800
|
+
"01KRPC8JS661GJEJJV12PR5YD3"
|
|
1699
1801
|
],
|
|
1700
1802
|
"recommendedActions": [
|
|
1701
1803
|
"Treat retrieved memory as advisory context."
|
|
@@ -1703,7 +1805,9 @@
|
|
|
1703
1805
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
1704
1806
|
"recallErrors": [],
|
|
1705
1807
|
"leakedSecrets": [],
|
|
1706
|
-
"
|
|
1808
|
+
"hasEvidenceForDecision": true,
|
|
1809
|
+
"lineageTextMatched": false,
|
|
1810
|
+
"requiredEvidenceMatched": true
|
|
1707
1811
|
},
|
|
1708
1812
|
{
|
|
1709
1813
|
"system": "FTS Only",
|
|
@@ -1714,13 +1818,15 @@
|
|
|
1714
1818
|
"decisionCorrect": false,
|
|
1715
1819
|
"riskScore": 0,
|
|
1716
1820
|
"passed": false,
|
|
1717
|
-
"latencyMs": 0.
|
|
1821
|
+
"latencyMs": 0.306,
|
|
1718
1822
|
"evidenceCount": 0,
|
|
1719
1823
|
"evidenceIds": [],
|
|
1720
1824
|
"recommendedActions": [],
|
|
1721
1825
|
"summary": "No memory signal found by this baseline.",
|
|
1722
1826
|
"recallErrors": [],
|
|
1723
1827
|
"leakedSecrets": [],
|
|
1828
|
+
"hasEvidenceForDecision": false,
|
|
1829
|
+
"lineageTextMatched": false,
|
|
1724
1830
|
"requiredEvidenceMatched": false
|
|
1725
1831
|
}
|
|
1726
1832
|
]
|
|
@@ -1739,11 +1845,11 @@
|
|
|
1739
1845
|
"decisionCorrect": true,
|
|
1740
1846
|
"riskScore": 0.85,
|
|
1741
1847
|
"passed": true,
|
|
1742
|
-
"latencyMs":
|
|
1848
|
+
"latencyMs": 1.963,
|
|
1743
1849
|
"evidenceCount": 2,
|
|
1744
1850
|
"evidenceIds": [
|
|
1745
|
-
"
|
|
1746
|
-
"
|
|
1851
|
+
"01KRPC8K2N9C3SKKD835K921Z8",
|
|
1852
|
+
"01KRPC8K2PVBNMYZ2RBA7B2Q9X"
|
|
1747
1853
|
],
|
|
1748
1854
|
"recommendedActions": [
|
|
1749
1855
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -1752,6 +1858,8 @@
|
|
|
1752
1858
|
"summary": "Blocked: 2 memory signals, 2 high severity found before acting.",
|
|
1753
1859
|
"recallErrors": [],
|
|
1754
1860
|
"leakedSecrets": [],
|
|
1861
|
+
"hasEvidenceForDecision": true,
|
|
1862
|
+
"lineageTextMatched": true,
|
|
1755
1863
|
"requiredEvidenceMatched": true
|
|
1756
1864
|
},
|
|
1757
1865
|
{
|
|
@@ -1763,13 +1871,15 @@
|
|
|
1763
1871
|
"decisionCorrect": false,
|
|
1764
1872
|
"riskScore": 0,
|
|
1765
1873
|
"passed": false,
|
|
1766
|
-
"latencyMs": 0.
|
|
1874
|
+
"latencyMs": 0.004,
|
|
1767
1875
|
"evidenceCount": 0,
|
|
1768
1876
|
"evidenceIds": [],
|
|
1769
1877
|
"recommendedActions": [],
|
|
1770
1878
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1771
1879
|
"recallErrors": [],
|
|
1772
1880
|
"leakedSecrets": [],
|
|
1881
|
+
"hasEvidenceForDecision": false,
|
|
1882
|
+
"lineageTextMatched": false,
|
|
1773
1883
|
"requiredEvidenceMatched": false
|
|
1774
1884
|
},
|
|
1775
1885
|
{
|
|
@@ -1780,12 +1890,12 @@
|
|
|
1780
1890
|
"decision": "block",
|
|
1781
1891
|
"decisionCorrect": true,
|
|
1782
1892
|
"riskScore": 0.85,
|
|
1783
|
-
"passed":
|
|
1784
|
-
"latencyMs": 0.
|
|
1893
|
+
"passed": true,
|
|
1894
|
+
"latencyMs": 0.088,
|
|
1785
1895
|
"evidenceCount": 2,
|
|
1786
1896
|
"evidenceIds": [
|
|
1787
|
-
"
|
|
1788
|
-
"
|
|
1897
|
+
"01KRPC8K3V9JQY1TQFJQDWGHGM",
|
|
1898
|
+
"01KRPC8K3TKQJ65V280YRNH91B"
|
|
1789
1899
|
],
|
|
1790
1900
|
"recommendedActions": [
|
|
1791
1901
|
"Review retrieved memory before acting."
|
|
@@ -1793,7 +1903,9 @@
|
|
|
1793
1903
|
"summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
|
|
1794
1904
|
"recallErrors": [],
|
|
1795
1905
|
"leakedSecrets": [],
|
|
1796
|
-
"
|
|
1906
|
+
"hasEvidenceForDecision": true,
|
|
1907
|
+
"lineageTextMatched": false,
|
|
1908
|
+
"requiredEvidenceMatched": true
|
|
1797
1909
|
},
|
|
1798
1910
|
{
|
|
1799
1911
|
"system": "Vector Only",
|
|
@@ -1803,12 +1915,12 @@
|
|
|
1803
1915
|
"decision": "block",
|
|
1804
1916
|
"decisionCorrect": true,
|
|
1805
1917
|
"riskScore": 0.85,
|
|
1806
|
-
"passed":
|
|
1807
|
-
"latencyMs": 0.
|
|
1918
|
+
"passed": true,
|
|
1919
|
+
"latencyMs": 0.345,
|
|
1808
1920
|
"evidenceCount": 2,
|
|
1809
1921
|
"evidenceIds": [
|
|
1810
|
-
"
|
|
1811
|
-
"
|
|
1922
|
+
"01KRPC8K4CWWGSHGAYZ5JDF62G",
|
|
1923
|
+
"01KRPC8K4D0KF2C6EW79KC869P"
|
|
1812
1924
|
],
|
|
1813
1925
|
"recommendedActions": [
|
|
1814
1926
|
"Review retrieved memory before acting."
|
|
@@ -1816,7 +1928,9 @@
|
|
|
1816
1928
|
"summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
|
|
1817
1929
|
"recallErrors": [],
|
|
1818
1930
|
"leakedSecrets": [],
|
|
1819
|
-
"
|
|
1931
|
+
"hasEvidenceForDecision": true,
|
|
1932
|
+
"lineageTextMatched": false,
|
|
1933
|
+
"requiredEvidenceMatched": true
|
|
1820
1934
|
},
|
|
1821
1935
|
{
|
|
1822
1936
|
"system": "FTS Only",
|
|
@@ -1827,13 +1941,15 @@
|
|
|
1827
1941
|
"decisionCorrect": false,
|
|
1828
1942
|
"riskScore": 0,
|
|
1829
1943
|
"passed": false,
|
|
1830
|
-
"latencyMs": 0.
|
|
1944
|
+
"latencyMs": 0.329,
|
|
1831
1945
|
"evidenceCount": 0,
|
|
1832
1946
|
"evidenceIds": [],
|
|
1833
1947
|
"recommendedActions": [],
|
|
1834
1948
|
"summary": "No memory signal found by this baseline.",
|
|
1835
1949
|
"recallErrors": [],
|
|
1836
1950
|
"leakedSecrets": [],
|
|
1951
|
+
"hasEvidenceForDecision": false,
|
|
1952
|
+
"lineageTextMatched": false,
|
|
1837
1953
|
"requiredEvidenceMatched": false
|
|
1838
1954
|
}
|
|
1839
1955
|
]
|
|
@@ -1852,22 +1968,22 @@
|
|
|
1852
1968
|
"decisionCorrect": true,
|
|
1853
1969
|
"riskScore": 0.85,
|
|
1854
1970
|
"passed": true,
|
|
1855
|
-
"latencyMs":
|
|
1971
|
+
"latencyMs": 30.791,
|
|
1856
1972
|
"evidenceCount": 13,
|
|
1857
1973
|
"evidenceIds": [
|
|
1858
|
-
"
|
|
1859
|
-
"
|
|
1860
|
-
"
|
|
1861
|
-
"
|
|
1862
|
-
"
|
|
1863
|
-
"
|
|
1864
|
-
"
|
|
1865
|
-
"
|
|
1866
|
-
"
|
|
1867
|
-
"
|
|
1868
|
-
"
|
|
1869
|
-
"
|
|
1870
|
-
"
|
|
1974
|
+
"01KRPC8PQ72DA5K79S9YZ7N381",
|
|
1975
|
+
"01KRPC8PQ6YCVWK55HP85M0JKB",
|
|
1976
|
+
"01KRPC8PMZ7SZFK6P2HCZQF23X",
|
|
1977
|
+
"01KRPC8PHVXXXJ1HRFGXQ9SNZD",
|
|
1978
|
+
"01KRPC8PE7CP3E77NRQKFWB01Z",
|
|
1979
|
+
"01KRPC8PC7C083T4QRW0PB54W0",
|
|
1980
|
+
"01KRPC8P76C1BBHBKMW79XHVPA",
|
|
1981
|
+
"01KRPC8NSJ25DKGHN9RM5EKGSZ",
|
|
1982
|
+
"01KRPC8NSFC7N7AHWGCBNHXP2P",
|
|
1983
|
+
"01KRPC8MWXZ9DVQJ2QAFM2EJJC",
|
|
1984
|
+
"01KRPC8MV37S2ZR305M1PCPCJA",
|
|
1985
|
+
"01KRPC8KZNCXB2CYDMJ6QVV5CJ",
|
|
1986
|
+
"01KRPC8K5SHHV6HE5MQ10DSKAT"
|
|
1871
1987
|
],
|
|
1872
1988
|
"recommendedActions": [
|
|
1873
1989
|
"Do not proceed until the high-severity memory warning is addressed.",
|
|
@@ -1877,6 +1993,8 @@
|
|
|
1877
1993
|
"summary": "Blocked: 13 memory signals, 1 high severity, 12 medium severity found before acting.",
|
|
1878
1994
|
"recallErrors": [],
|
|
1879
1995
|
"leakedSecrets": [],
|
|
1996
|
+
"hasEvidenceForDecision": true,
|
|
1997
|
+
"lineageTextMatched": true,
|
|
1880
1998
|
"requiredEvidenceMatched": true
|
|
1881
1999
|
},
|
|
1882
2000
|
{
|
|
@@ -1888,13 +2006,15 @@
|
|
|
1888
2006
|
"decisionCorrect": false,
|
|
1889
2007
|
"riskScore": 0,
|
|
1890
2008
|
"passed": false,
|
|
1891
|
-
"latencyMs": 0.
|
|
2009
|
+
"latencyMs": 0.009,
|
|
1892
2010
|
"evidenceCount": 0,
|
|
1893
2011
|
"evidenceIds": [],
|
|
1894
2012
|
"recommendedActions": [],
|
|
1895
2013
|
"summary": "No memory baseline always allows proposed actions.",
|
|
1896
2014
|
"recallErrors": [],
|
|
1897
2015
|
"leakedSecrets": [],
|
|
2016
|
+
"hasEvidenceForDecision": false,
|
|
2017
|
+
"lineageTextMatched": false,
|
|
1898
2018
|
"requiredEvidenceMatched": false
|
|
1899
2019
|
},
|
|
1900
2020
|
{
|
|
@@ -1905,11 +2025,11 @@
|
|
|
1905
2025
|
"decision": "block",
|
|
1906
2026
|
"decisionCorrect": true,
|
|
1907
2027
|
"riskScore": 0.85,
|
|
1908
|
-
"passed":
|
|
1909
|
-
"latencyMs": 0.
|
|
2028
|
+
"passed": true,
|
|
2029
|
+
"latencyMs": 0.462,
|
|
1910
2030
|
"evidenceCount": 1,
|
|
1911
2031
|
"evidenceIds": [
|
|
1912
|
-
"
|
|
2032
|
+
"01KRPC8V0CK77K0V6ZKRC1T15A"
|
|
1913
2033
|
],
|
|
1914
2034
|
"recommendedActions": [
|
|
1915
2035
|
"Review retrieved memory before acting."
|
|
@@ -1917,7 +2037,9 @@
|
|
|
1917
2037
|
"summary": "Retrieved policy-like memory with lexical overlap, but without Audrey Guard lineage.",
|
|
1918
2038
|
"recallErrors": [],
|
|
1919
2039
|
"leakedSecrets": [],
|
|
1920
|
-
"
|
|
2040
|
+
"hasEvidenceForDecision": true,
|
|
2041
|
+
"lineageTextMatched": false,
|
|
2042
|
+
"requiredEvidenceMatched": true
|
|
1921
2043
|
},
|
|
1922
2044
|
{
|
|
1923
2045
|
"system": "Vector Only",
|
|
@@ -1928,14 +2050,14 @@
|
|
|
1928
2050
|
"decisionCorrect": false,
|
|
1929
2051
|
"riskScore": 0.35,
|
|
1930
2052
|
"passed": false,
|
|
1931
|
-
"latencyMs": 1.
|
|
2053
|
+
"latencyMs": 1.051,
|
|
1932
2054
|
"evidenceCount": 5,
|
|
1933
2055
|
"evidenceIds": [
|
|
1934
|
-
"
|
|
1935
|
-
"
|
|
1936
|
-
"
|
|
1937
|
-
"
|
|
1938
|
-
"
|
|
2056
|
+
"01KRPC8VCE8VNRWCGWMC1VYNA9",
|
|
2057
|
+
"01KRPC8VCG3GQ7EPDPV9RQ23JA",
|
|
2058
|
+
"01KRPC8VG14K20MGW0C8N1WDGH",
|
|
2059
|
+
"01KRPC8VK28WY2BM5BB3AR9NPA",
|
|
2060
|
+
"01KRPC8V709AFR44CVFQB5MAFW"
|
|
1939
2061
|
],
|
|
1940
2062
|
"recommendedActions": [
|
|
1941
2063
|
"Treat retrieved memory as advisory context."
|
|
@@ -1943,7 +2065,9 @@
|
|
|
1943
2065
|
"summary": "Retrieved related memory, but no controller converted it into a strict guard decision.",
|
|
1944
2066
|
"recallErrors": [],
|
|
1945
2067
|
"leakedSecrets": [],
|
|
1946
|
-
"
|
|
2068
|
+
"hasEvidenceForDecision": true,
|
|
2069
|
+
"lineageTextMatched": false,
|
|
2070
|
+
"requiredEvidenceMatched": true
|
|
1947
2071
|
},
|
|
1948
2072
|
{
|
|
1949
2073
|
"system": "FTS Only",
|
|
@@ -1954,20 +2078,22 @@
|
|
|
1954
2078
|
"decisionCorrect": false,
|
|
1955
2079
|
"riskScore": 0,
|
|
1956
2080
|
"passed": false,
|
|
1957
|
-
"latencyMs": 0.
|
|
2081
|
+
"latencyMs": 0.545,
|
|
1958
2082
|
"evidenceCount": 0,
|
|
1959
2083
|
"evidenceIds": [],
|
|
1960
2084
|
"recommendedActions": [],
|
|
1961
2085
|
"summary": "No memory signal found by this baseline.",
|
|
1962
2086
|
"recallErrors": [],
|
|
1963
2087
|
"leakedSecrets": [],
|
|
2088
|
+
"hasEvidenceForDecision": false,
|
|
2089
|
+
"lineageTextMatched": false,
|
|
1964
2090
|
"requiredEvidenceMatched": false
|
|
1965
2091
|
}
|
|
1966
2092
|
]
|
|
1967
2093
|
}
|
|
1968
2094
|
],
|
|
1969
2095
|
"artifactRedactionSweep": {
|
|
1970
|
-
"checkedAt": "2026-05-
|
|
2096
|
+
"checkedAt": "2026-05-15T17:52:12.780Z",
|
|
1971
2097
|
"filesChecked": [
|
|
1972
2098
|
"benchmarks/output/guardbench-manifest.json",
|
|
1973
2099
|
"benchmarks/output/guardbench-raw.json",
|