principles-disciple 1.92.0 → 1.94.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/hooks/after-tool-call-helpers.ts +577 -0
- package/src/hooks/after-tool-call-types.ts +105 -0
- package/src/hooks/gate-block-helper.ts +72 -29
- package/src/hooks/llm.ts +49 -29
- package/src/hooks/pain.ts +176 -462
- package/src/hooks/trajectory-evidence.ts +75 -0
- package/src/hooks/triage-adapter.ts +156 -0
- package/tests/hooks/gate-block-helper-profile.test.ts +186 -0
- package/tests/hooks/pain.test.ts +288 -0
- package/tests/hooks/triage-adapter.test.ts +260 -0
package/tests/hooks/pain.test.ts
CHANGED
|
@@ -8,9 +8,14 @@ import { WorkspaceContext } from '../../src/core/workspace-context.js';
|
|
|
8
8
|
import { EventLogService } from '../../src/core/event-log.js';
|
|
9
9
|
import { setInjectedProbationIds, clearSession } from '../../src/core/session-tracker.js';
|
|
10
10
|
import { resetPainDiagnosticGateForTest } from '../../src/core/pain-diagnostic-gate.js';
|
|
11
|
+
import { loadFeatureFlagFromConfig } from '../../src/core/pd-config-loader.js';
|
|
11
12
|
|
|
12
13
|
vi.mock('fs');
|
|
13
14
|
vi.mock('../../src/utils/io.js');
|
|
15
|
+
vi.mock('../../src/core/pd-config-loader.js', () => ({
|
|
16
|
+
loadPdConfigForPlugin: vi.fn(() => ({ ok: true, source: 'mock', effective: {}, errors: [] })),
|
|
17
|
+
loadFeatureFlagFromConfig: vi.fn(() => ({ enabled: false, source: 'mock' })),
|
|
18
|
+
}));
|
|
14
19
|
vi.mock('../../src/core/evolution-engine.js', () => ({
|
|
15
20
|
recordEvolutionSuccess: vi.fn(),
|
|
16
21
|
recordEvolutionFailure: vi.fn(),
|
|
@@ -480,4 +485,287 @@ describe('Post-Write Checks & Pain Hook', () => {
|
|
|
480
485
|
}));
|
|
481
486
|
});
|
|
482
487
|
|
|
488
|
+
it('PEAT-B1: triage evidence_only returns early before PainDiagnosticGate (no cooldown pollution)', () => {
|
|
489
|
+
// Enable the evidence triage feature flag
|
|
490
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: true, source: 'test' });
|
|
491
|
+
|
|
492
|
+
const mockCtx = { workspaceDir, sessionId: 's-triage-evidence', api: { logger: {} } };
|
|
493
|
+
const mockEvent = {
|
|
494
|
+
toolName: 'write',
|
|
495
|
+
params: { file_path: 'src/main.ts' },
|
|
496
|
+
error: 'Permission denied',
|
|
497
|
+
result: { exitCode: 1 },
|
|
498
|
+
};
|
|
499
|
+
|
|
500
|
+
vi.mocked(ioUtils.normalizePath).mockReturnValue('src/main.ts');
|
|
501
|
+
vi.mocked(ioUtils.isRisky).mockReturnValue(false);
|
|
502
|
+
vi.mocked(fs.existsSync).mockReturnValue(false);
|
|
503
|
+
|
|
504
|
+
handleAfterToolCall(mockEvent as any, mockCtx as any);
|
|
505
|
+
|
|
506
|
+
// Core assertion: pain_detected event is NOT emitted — gate was not reached
|
|
507
|
+
expect(mockEmitSync).not.toHaveBeenCalled();
|
|
508
|
+
// Core assertion: recordPainSignal is NOT called — triage prevented gate evaluation
|
|
509
|
+
expect(mockEventLog.recordPainSignal).not.toHaveBeenCalled();
|
|
510
|
+
// Core assertion: trajectory pain event is NOT recorded — cooldown not polluted
|
|
511
|
+
expect(mockWctx.trajectory.recordPainEvent).not.toHaveBeenCalled();
|
|
512
|
+
// But tool call IS still tracked (friction tracking, not diagnosis)
|
|
513
|
+
expect(mockWctx.trajectory.recordToolCall).toHaveBeenCalledWith(expect.objectContaining({
|
|
514
|
+
sessionId: 's-triage-evidence',
|
|
515
|
+
toolName: 'write',
|
|
516
|
+
outcome: 'failure',
|
|
517
|
+
}));
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
it('PEAT-B1: triage admit proceeds to PainDiagnosticGate and cooldown', () => {
|
|
521
|
+
// For owner_reported source kinds, triage admits, so gate IS reached
|
|
522
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: true, source: 'test' });
|
|
523
|
+
|
|
524
|
+
const mockCtx = { workspaceDir, sessionId: 's-triage-admit', api: { logger: {} } };
|
|
525
|
+
// Manual pain command — triggers the manual pain path
|
|
526
|
+
const mockEvent = {
|
|
527
|
+
toolName: 'pain',
|
|
528
|
+
params: { input: 'test pain' },
|
|
529
|
+
result: { exitCode: 0 },
|
|
530
|
+
error: undefined,
|
|
531
|
+
};
|
|
532
|
+
|
|
533
|
+
handleAfterToolCall(mockEvent as any, mockCtx as any);
|
|
534
|
+
|
|
535
|
+
// Core assertion: pain_detected event IS emitted — gate WAS reached
|
|
536
|
+
expect(mockEmitSync).toHaveBeenCalledWith(expect.objectContaining({
|
|
537
|
+
type: 'pain_detected',
|
|
538
|
+
}));
|
|
539
|
+
// Core assertion: recordPainSignal IS called
|
|
540
|
+
expect(mockEventLog.recordPainSignal).toHaveBeenCalledWith(
|
|
541
|
+
's-triage-admit',
|
|
542
|
+
expect.objectContaining({ score: 100, source: 'manual' }),
|
|
543
|
+
);
|
|
544
|
+
});
|
|
545
|
+
|
|
546
|
+
});
|
|
547
|
+
|
|
548
|
+
// ── PRI-326: Decomposed Pipeline Tests ────────────────────────────────────────
|
|
549
|
+
|
|
550
|
+
import {
|
|
551
|
+
classifyToolCallOutcome,
|
|
552
|
+
buildToolCallObservation,
|
|
553
|
+
handleProbationFeedback,
|
|
554
|
+
evaluatePainAdmissionForToolCall,
|
|
555
|
+
} from '../../src/hooks/after-tool-call-helpers.js';
|
|
556
|
+
import type { ToolCallOutcome, ToolCallObservation } from '../../src/hooks/after-tool-call-types.js';
|
|
557
|
+
|
|
558
|
+
describe('PRI-326: classifyToolCallOutcome', () => {
|
|
559
|
+
it('returns success for exitCode 0 with no error', () => {
|
|
560
|
+
const result = classifyToolCallOutcome({
|
|
561
|
+
toolName: 'read',
|
|
562
|
+
params: {},
|
|
563
|
+
result: { exitCode: 0 },
|
|
564
|
+
error: undefined,
|
|
565
|
+
} as any);
|
|
566
|
+
expect(result.isFailure).toBe(false);
|
|
567
|
+
expect(result.exitCode).toBe(0);
|
|
568
|
+
expect(result.failureSource).toBeUndefined();
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
it('detects failure from top-level exitCode', () => {
|
|
572
|
+
const result = classifyToolCallOutcome({
|
|
573
|
+
toolName: 'bash',
|
|
574
|
+
params: {},
|
|
575
|
+
result: { exitCode: 1 },
|
|
576
|
+
error: undefined,
|
|
577
|
+
} as any);
|
|
578
|
+
expect(result.isFailure).toBe(true);
|
|
579
|
+
expect(result.exitCode).toBe(1);
|
|
580
|
+
expect(result.failureSource).toBe('tool_failure');
|
|
581
|
+
});
|
|
582
|
+
|
|
583
|
+
it('falls back to nested details.exitCode', () => {
|
|
584
|
+
const result = classifyToolCallOutcome({
|
|
585
|
+
toolName: 'bash',
|
|
586
|
+
params: {},
|
|
587
|
+
result: { details: { exitCode: 2 } },
|
|
588
|
+
error: undefined,
|
|
589
|
+
} as any);
|
|
590
|
+
expect(result.isFailure).toBe(true);
|
|
591
|
+
expect(result.exitCode).toBe(2);
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
it('prefers top-level exitCode over nested', () => {
|
|
595
|
+
const result = classifyToolCallOutcome({
|
|
596
|
+
toolName: 'bash',
|
|
597
|
+
params: {},
|
|
598
|
+
result: { exitCode: 0, details: { exitCode: 1 } },
|
|
599
|
+
error: undefined,
|
|
600
|
+
} as any);
|
|
601
|
+
expect(result.isFailure).toBe(false);
|
|
602
|
+
expect(result.exitCode).toBe(0);
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
it('detects failure from error field even with exitCode 0', () => {
|
|
606
|
+
const result = classifyToolCallOutcome({
|
|
607
|
+
toolName: 'write',
|
|
608
|
+
params: {},
|
|
609
|
+
result: { exitCode: 0 },
|
|
610
|
+
error: 'Permission denied',
|
|
611
|
+
} as any);
|
|
612
|
+
expect(result.isFailure).toBe(true);
|
|
613
|
+
expect(result.failureSource).toBe('tool_failure');
|
|
614
|
+
});
|
|
615
|
+
|
|
616
|
+
it('classifies dispatch_error for tool not found', () => {
|
|
617
|
+
const result = classifyToolCallOutcome({
|
|
618
|
+
toolName: 'read',
|
|
619
|
+
params: {},
|
|
620
|
+
result: { exitCode: 1 },
|
|
621
|
+
error: 'tool read_file not found',
|
|
622
|
+
} as any);
|
|
623
|
+
expect(result.isFailure).toBe(true);
|
|
624
|
+
expect(result.failureSource).toBe('dispatch_error');
|
|
625
|
+
});
|
|
626
|
+
|
|
627
|
+
it('treats non-numeric exitCode as 0', () => {
|
|
628
|
+
const result = classifyToolCallOutcome({
|
|
629
|
+
toolName: 'bash',
|
|
630
|
+
params: {},
|
|
631
|
+
result: { exitCode: '0' as any },
|
|
632
|
+
error: undefined,
|
|
633
|
+
} as any);
|
|
634
|
+
expect(result.isFailure).toBe(false);
|
|
635
|
+
});
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
describe('PRI-326: evaluatePainAdmissionForToolCall', () => {
|
|
639
|
+
const workspaceDir = '/mock/workspace';
|
|
640
|
+
const mockConfig = { get: vi.fn().mockReturnValue(undefined) };
|
|
641
|
+
const baseOutcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
642
|
+
const baseObservation: ToolCallObservation = {
|
|
643
|
+
params: { filePath: 'src/main.ts' },
|
|
644
|
+
relPath: 'src/main.ts',
|
|
645
|
+
isRisk: false,
|
|
646
|
+
errorType: 'Other',
|
|
647
|
+
errorHash: 'abc123',
|
|
648
|
+
errorText: 'Permission denied',
|
|
649
|
+
painScore: 10,
|
|
650
|
+
traceId: 'trace-123',
|
|
651
|
+
};
|
|
652
|
+
|
|
653
|
+
beforeEach(() => {
|
|
654
|
+
vi.clearAllMocks();
|
|
655
|
+
resetPainDiagnosticGateForTest();
|
|
656
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: false, source: 'test' });
|
|
657
|
+
});
|
|
658
|
+
|
|
659
|
+
it('returns not_applicable for non-write tool', () => {
|
|
660
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
661
|
+
{ toolName: 'read' } as any, baseObservation, baseOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
662
|
+
);
|
|
663
|
+
expect(result.stage).toBe('not_applicable');
|
|
664
|
+
expect(result.admitted).toBe(false);
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
it('returns not_applicable for success', () => {
|
|
668
|
+
const successOutcome: ToolCallOutcome = { isFailure: false, exitCode: 0, failureSource: undefined };
|
|
669
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
670
|
+
{ toolName: 'write' } as any, baseObservation, successOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
671
|
+
);
|
|
672
|
+
expect(result.stage).toBe('not_applicable');
|
|
673
|
+
});
|
|
674
|
+
|
|
675
|
+
it('returns triage_evidence_only when feature flag on and tool_failure triage rejects', () => {
|
|
676
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: true, source: 'test' });
|
|
677
|
+
|
|
678
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
679
|
+
{ toolName: 'write' } as any, baseObservation, baseOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
680
|
+
);
|
|
681
|
+
expect(result.stage).toBe('triage_evidence_only');
|
|
682
|
+
expect(result.admitted).toBe(false);
|
|
683
|
+
expect(result.reason).toBeTruthy();
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
it('returns gate_admitted when consecutive errors exceed repeatedFailure threshold', () => {
|
|
687
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: false, source: 'test' });
|
|
688
|
+
// consecutiveErrors=5 >= default repeatedFailure threshold of 4 → gate admits via repeated_failure
|
|
689
|
+
const highConsecutiveState = { currentGfi: 0, consecutiveErrors: 5, lastErrorHash: 'abc123' } as any;
|
|
690
|
+
|
|
691
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
692
|
+
{ toolName: 'write' } as any, baseObservation, baseOutcome, highConsecutiveState, undefined, 's-gate-admitted-test', workspaceDir, mockConfig
|
|
693
|
+
);
|
|
694
|
+
expect(result.stage).toBe('gate_admitted');
|
|
695
|
+
expect(result.admitted).toBe(true);
|
|
696
|
+
expect(result.gateResult?.shouldDiagnose).toBe(true);
|
|
697
|
+
expect(result.gateResult?.reason).toBe('repeated_failure');
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
it('includes reason and detail in every decision', () => {
|
|
701
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
702
|
+
{ toolName: 'read' } as any, baseObservation, baseOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
703
|
+
);
|
|
704
|
+
expect(result.reason).toBeTruthy();
|
|
705
|
+
expect(result.detail).toBeTruthy();
|
|
706
|
+
});
|
|
707
|
+
});
|
|
708
|
+
|
|
709
|
+
describe('PRI-326: buildToolCallObservation params defense', () => {
|
|
710
|
+
const profile = { risk_paths: [] } as any;
|
|
711
|
+
|
|
712
|
+
it('handles null params without crashing', () => {
|
|
713
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
714
|
+
const result = buildToolCallObservation(
|
|
715
|
+
{ params: null, error: 'fail', result: {} } as any,
|
|
716
|
+
outcome, '/workspace', profile
|
|
717
|
+
);
|
|
718
|
+
expect(result.relPath).toBe('unknown');
|
|
719
|
+
expect(result.params.filePath).toBeUndefined();
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
it('handles undefined params without crashing', () => {
|
|
723
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
724
|
+
const result = buildToolCallObservation(
|
|
725
|
+
{ params: undefined, error: 'fail', result: {} } as any,
|
|
726
|
+
outcome, '/workspace', profile
|
|
727
|
+
);
|
|
728
|
+
expect(result.relPath).toBe('unknown');
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
it('handles array params without crashing', () => {
|
|
732
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
733
|
+
const result = buildToolCallObservation(
|
|
734
|
+
{ params: ['bad'], error: 'fail', result: {} } as any,
|
|
735
|
+
outcome, '/workspace', profile
|
|
736
|
+
);
|
|
737
|
+
expect(result.relPath).toBe('unknown');
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
it('handles string params without crashing', () => {
|
|
741
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
742
|
+
const result = buildToolCallObservation(
|
|
743
|
+
{ params: 'not-an-object', error: 'fail', result: {} } as any,
|
|
744
|
+
outcome, '/workspace', profile
|
|
745
|
+
);
|
|
746
|
+
expect(result.relPath).toBe('unknown');
|
|
747
|
+
});
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
describe('PRI-326: buildToolCallObservation unserializable result defense', () => {
|
|
751
|
+
const profile = { risk_paths: [] } as any;
|
|
752
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
753
|
+
|
|
754
|
+
it('handles BigInt result without crashing', () => {
|
|
755
|
+
const result = buildToolCallObservation(
|
|
756
|
+
{ params: {}, error: undefined, result: { val: BigInt(42) } } as any,
|
|
757
|
+
outcome, '/workspace', profile
|
|
758
|
+
);
|
|
759
|
+
expect(result.errorText).toContain('unserializable result');
|
|
760
|
+
});
|
|
761
|
+
|
|
762
|
+
it('handles circular reference result without crashing', () => {
|
|
763
|
+
const circular: any = { name: 'loop' };
|
|
764
|
+
circular.self = circular;
|
|
765
|
+
const result = buildToolCallObservation(
|
|
766
|
+
{ params: {}, error: undefined, result: circular } as any,
|
|
767
|
+
outcome, '/workspace', profile
|
|
768
|
+
);
|
|
769
|
+
expect(result.errorText).toContain('unserializable result');
|
|
770
|
+
});
|
|
483
771
|
});
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Triage Adapter Tests — PEAT-B1
|
|
3
|
+
*
|
|
4
|
+
* Tests the plugin-side adapter that maps hook context to SourceKind
|
|
5
|
+
* and calls the pure triage policy from principles-core.
|
|
6
|
+
*
|
|
7
|
+
* ERR checklist:
|
|
8
|
+
* - ERR-001: Source kind resolved from runtime values, not `as` casts.
|
|
9
|
+
* - ERR-002: Every triage result has reason + nextAction.
|
|
10
|
+
* - ERR-024/025/048: Production-path tests for the adapter.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect } from 'vitest';
|
|
14
|
+
import {
|
|
15
|
+
resolveSourceKindFromToolFailure,
|
|
16
|
+
resolveSourceKindFromLlmDetection,
|
|
17
|
+
resolveSourceKindFromGateBlock,
|
|
18
|
+
resolveSourceKindFromCommand,
|
|
19
|
+
resolveSourceKindFromProvider,
|
|
20
|
+
resolveSourceKindFromSubagent,
|
|
21
|
+
evaluateEvidenceTriage,
|
|
22
|
+
isHighConfidenceUnsafeAction,
|
|
23
|
+
} from '../../src/hooks/triage-adapter.js';
|
|
24
|
+
|
|
25
|
+
// ── resolveSourceKindFromToolFailure ────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
describe('resolveSourceKindFromToolFailure', () => {
|
|
28
|
+
it('maps pain tool to agent_on_owner_request with openclaw_context_bound', () => {
|
|
29
|
+
expect(resolveSourceKindFromToolFailure('pain', 'tool_failure', 'openclaw_context_bound')).toBe('agent_on_owner_request');
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('maps pain tool to owner_reported without openclaw_context_bound', () => {
|
|
33
|
+
expect(resolveSourceKindFromToolFailure('pain', 'tool_failure')).toBe('owner_reported');
|
|
34
|
+
expect(resolveSourceKindFromToolFailure('pain', 'tool_failure', 'automatic_hook')).toBe('owner_reported');
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('maps skill:pain to agent_on_owner_request with openclaw_context_bound', () => {
|
|
38
|
+
expect(resolveSourceKindFromToolFailure('skill:pain', 'tool_failure', 'openclaw_context_bound')).toBe('agent_on_owner_request');
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it('maps dispatch_error to dispatch_error', () => {
|
|
42
|
+
expect(resolveSourceKindFromToolFailure('read', 'dispatch_error')).toBe('dispatch_error');
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('maps regular tool failure to tool_failure', () => {
|
|
46
|
+
expect(resolveSourceKindFromToolFailure('write', 'tool_failure')).toBe('tool_failure');
|
|
47
|
+
expect(resolveSourceKindFromToolFailure('exec', 'tool_failure')).toBe('tool_failure');
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('maps undefined tool name with tool_failure to tool_failure', () => {
|
|
51
|
+
expect(resolveSourceKindFromToolFailure(undefined, 'tool_failure')).toBe('tool_failure');
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
// ── resolveSourceKindFromLlmDetection ───────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
describe('resolveSourceKindFromLlmDetection', () => {
|
|
58
|
+
it('maps gfi triggered to gfi_threshold', () => {
|
|
59
|
+
expect(resolveSourceKindFromLlmDetection('llm_some_rule', true)).toBe('gfi_threshold');
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('maps llm_paralysis to llm_paralysis', () => {
|
|
63
|
+
expect(resolveSourceKindFromLlmDetection('llm_paralysis', false)).toBe('llm_paralysis');
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('maps llm_* detection rules to semantic', () => {
|
|
67
|
+
expect(resolveSourceKindFromLlmDetection('llm_repetition', false)).toBe('semantic');
|
|
68
|
+
expect(resolveSourceKindFromLlmDetection('llm_loop', false)).toBe('semantic');
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('maps user_empathy to empathy_inferred', () => {
|
|
72
|
+
expect(resolveSourceKindFromLlmDetection('user_empathy', false)).toBe('empathy_inferred');
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('maps unknown source to unknown', () => {
|
|
76
|
+
expect(resolveSourceKindFromLlmDetection('something_else', false)).toBe('unknown');
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
// ── Other resolve functions ─────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
describe('resolveSourceKindFromGateBlock', () => {
|
|
83
|
+
it('returns rulehost_block', () => {
|
|
84
|
+
expect(resolveSourceKindFromGateBlock()).toBe('rulehost_block');
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
describe('resolveSourceKindFromCommand', () => {
|
|
89
|
+
it('returns owner_reported', () => {
|
|
90
|
+
expect(resolveSourceKindFromCommand()).toBe('owner_reported');
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
describe('resolveSourceKindFromProvider', () => {
|
|
95
|
+
it('returns provider_failure for non-rate-limit', () => {
|
|
96
|
+
expect(resolveSourceKindFromProvider(false)).toBe('provider_failure');
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('returns rate_limit for rate-limit', () => {
|
|
100
|
+
expect(resolveSourceKindFromProvider(true)).toBe('rate_limit');
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe('resolveSourceKindFromSubagent', () => {
|
|
105
|
+
it('returns subagent_error', () => {
|
|
106
|
+
expect(resolveSourceKindFromSubagent()).toBe('subagent_error');
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// ── evaluateEvidenceTriage ──────────────────────────────────────────────────
|
|
111
|
+
|
|
112
|
+
describe('evaluateEvidenceTriage', () => {
|
|
113
|
+
it('admits owner_reported regardless of score', () => {
|
|
114
|
+
const result = evaluateEvidenceTriage('owner_reported', 100);
|
|
115
|
+
expect(result.decision).toBe('admit');
|
|
116
|
+
expect(result.reason).toBeTruthy();
|
|
117
|
+
expect(result.nextAction).toBeTruthy();
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('returns evidence_only for tool_failure', () => {
|
|
121
|
+
const result = evaluateEvidenceTriage('tool_failure', 70);
|
|
122
|
+
expect(result.decision).toBe('evidence_only');
|
|
123
|
+
expect(result.reason).toBeTruthy();
|
|
124
|
+
expect(result.nextAction).toBeTruthy();
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('returns health_only for provider_failure', () => {
|
|
128
|
+
const result = evaluateEvidenceTriage('provider_failure', 60);
|
|
129
|
+
expect(result.decision).toBe('health_only');
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it('returns owner_confirm for empathy_inferred', () => {
|
|
133
|
+
const result = evaluateEvidenceTriage('empathy_inferred', 80);
|
|
134
|
+
expect(result.decision).toBe('owner_confirm');
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it('admits rulehost_block when isUnsafeHighConfidence is true', () => {
|
|
138
|
+
const result = evaluateEvidenceTriage('rulehost_block', 80, { isUnsafeHighConfidence: true });
|
|
139
|
+
expect(result.decision).toBe('admit');
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('returns evidence_only for rulehost_block when isUnsafeHighConfidence is false', () => {
|
|
143
|
+
const result = evaluateEvidenceTriage('rulehost_block', 80, { isUnsafeHighConfidence: false });
|
|
144
|
+
expect(result.decision).toBe('evidence_only');
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
// ── isHighConfidenceUnsafeAction ─────────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
describe('isHighConfidenceUnsafeAction', () => {
|
|
151
|
+
it('returns true when isRisky and score >= 70', () => {
|
|
152
|
+
expect(isHighConfidenceUnsafeAction(70, true)).toBe(true);
|
|
153
|
+
expect(isHighConfidenceUnsafeAction(90, true)).toBe(true);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it('returns false when score < 70', () => {
|
|
157
|
+
expect(isHighConfidenceUnsafeAction(45, true)).toBe(false);
|
|
158
|
+
expect(isHighConfidenceUnsafeAction(69, true)).toBe(false);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it('returns false when not risky', () => {
|
|
162
|
+
expect(isHighConfidenceUnsafeAction(90, false)).toBe(false);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// ── Evidence-Only Cooldown Contract ──────────────────────────────────────────
|
|
167
|
+
//
|
|
168
|
+
// Core contract: when triage returns evidence_only/owner_confirm/health_only,
|
|
169
|
+
// the caller (hook) MUST NOT proceed to evaluatePainDiagnosticGate, which writes
|
|
170
|
+
// cooldown. These tests verify the adapter-level guarantee: non-admit decisions
|
|
171
|
+
// are surfaced clearly with the right nextAction, so the caller can distinguish
|
|
172
|
+
// evidence-only from admit.
|
|
173
|
+
|
|
174
|
+
describe('evidence-only cooldown contract', () => {
|
|
175
|
+
it('tool_failure returns evidence_only — no admit, caller must skip gate', () => {
|
|
176
|
+
const result = evaluateEvidenceTriage('tool_failure', 70);
|
|
177
|
+
expect(result.decision).toBe('evidence_only');
|
|
178
|
+
expect(result.decision).not.toBe('admit');
|
|
179
|
+
expect(result.nextAction).toContain('evidence');
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it('dispatch_error returns evidence_only — no admit', () => {
|
|
183
|
+
const result = evaluateEvidenceTriage('dispatch_error', 50);
|
|
184
|
+
expect(result.decision).toBe('evidence_only');
|
|
185
|
+
expect(result.decision).not.toBe('admit');
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it('semantic (LLM detection) returns evidence_only — no admit', () => {
|
|
189
|
+
const result = evaluateEvidenceTriage('semantic', 55);
|
|
190
|
+
expect(result.decision).toBe('evidence_only');
|
|
191
|
+
expect(result.decision).not.toBe('admit');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it('llm_paralysis returns evidence_only — no admit', () => {
|
|
195
|
+
const result = evaluateEvidenceTriage('llm_paralysis', 40);
|
|
196
|
+
expect(result.decision).toBe('evidence_only');
|
|
197
|
+
expect(result.decision).not.toBe('admit');
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it('gfi_threshold returns evidence_only — no admit', () => {
|
|
201
|
+
const result = evaluateEvidenceTriage('gfi_threshold', 70);
|
|
202
|
+
expect(result.decision).toBe('evidence_only');
|
|
203
|
+
expect(result.decision).not.toBe('admit');
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
it('empathy_inferred returns owner_confirm — no admit', () => {
|
|
207
|
+
const result = evaluateEvidenceTriage('empathy_inferred', 80);
|
|
208
|
+
expect(result.decision).toBe('owner_confirm');
|
|
209
|
+
expect(result.decision).not.toBe('admit');
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
it('provider_failure returns health_only — no admit', () => {
|
|
213
|
+
const result = evaluateEvidenceTriage('provider_failure', 60);
|
|
214
|
+
expect(result.decision).toBe('health_only');
|
|
215
|
+
expect(result.decision).not.toBe('admit');
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it('rulehost_block WITHOUT isUnsafeHighConfidence returns evidence_only — no admit', () => {
|
|
219
|
+
const result = evaluateEvidenceTriage('rulehost_block', 45);
|
|
220
|
+
expect(result.decision).toBe('evidence_only');
|
|
221
|
+
expect(result.decision).not.toBe('admit');
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
it('rulehost_block WITH isUnsafeHighConfidence=true upgrades to admit', () => {
|
|
225
|
+
// This is the ONLY path where rulehost_block reaches the gate
|
|
226
|
+
const result = evaluateEvidenceTriage('rulehost_block', 80, { isUnsafeHighConfidence: true });
|
|
227
|
+
expect(result.decision).toBe('admit');
|
|
228
|
+
expect(result.reason).toContain('unsafe');
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it('every LLM-typical source kind produces non-admit decision (cooldown-safe)', () => {
|
|
232
|
+
// These are the source kinds that handleLlmOutput produces
|
|
233
|
+
const llmSources = [
|
|
234
|
+
{ kind: 'semantic' as const, score: 55 },
|
|
235
|
+
{ kind: 'llm_paralysis' as const, score: 40 },
|
|
236
|
+
{ kind: 'gfi_threshold' as const, score: 70 },
|
|
237
|
+
{ kind: 'empathy_inferred' as const, score: 80 },
|
|
238
|
+
];
|
|
239
|
+
for (const { kind, score } of llmSources) {
|
|
240
|
+
const result = evaluateEvidenceTriage(kind, score);
|
|
241
|
+
expect(result.decision).not.toBe('admit');
|
|
242
|
+
expect(result.reason).toBeTruthy();
|
|
243
|
+
expect(result.nextAction).toBeTruthy();
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it('every after_tool_call-typical source kind produces non-admit decision (cooldown-safe)', () => {
|
|
248
|
+
// These are the source kinds that handleAfterToolCall produces
|
|
249
|
+
const toolSources = [
|
|
250
|
+
{ kind: 'tool_failure' as const, score: 70 },
|
|
251
|
+
{ kind: 'dispatch_error' as const, score: 50 },
|
|
252
|
+
];
|
|
253
|
+
for (const { kind, score } of toolSources) {
|
|
254
|
+
const result = evaluateEvidenceTriage(kind, score);
|
|
255
|
+
expect(result.decision).not.toBe('admit');
|
|
256
|
+
expect(result.reason).toBeTruthy();
|
|
257
|
+
expect(result.nextAction).toBeTruthy();
|
|
258
|
+
}
|
|
259
|
+
});
|
|
260
|
+
});
|