principles-disciple 1.93.0 → 1.94.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/hooks/after-tool-call-helpers.ts +577 -0
- package/src/hooks/after-tool-call-types.ts +105 -0
- package/src/hooks/pain.ts +176 -482
- package/src/hooks/trajectory-evidence.ts +75 -0
- package/tests/hooks/pain.test.ts +225 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Trajectory Evidence Builder — PRI-326
|
|
3
|
+
*
|
|
4
|
+
* Extracted from pain.ts to avoid circular imports between
|
|
5
|
+
* pain.ts and after-tool-call-helpers.ts.
|
|
6
|
+
*
|
|
7
|
+
* Pure data extraction — reads from trajectory DB, sanitizes, returns evidence entries.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { sanitizeAssistantText } from './message-sanitize.js';
|
|
11
|
+
import type { PainEvidenceEntry } from '@principles/core/runtime-v2';
|
|
12
|
+
import { MAX_EVIDENCE_ENTRIES, MAX_EVIDENCE_NOTE_CHARS } from '@principles/core/runtime-v2';
|
|
13
|
+
import type { WorkspaceContext } from '../core/workspace-context.js';
|
|
14
|
+
|
|
15
|
+
export function buildTrajectoryEvidence(wctx: WorkspaceContext, sessionId: string): PainEvidenceEntry[] {
|
|
16
|
+
const evidence: PainEvidenceEntry[] = [];
|
|
17
|
+
|
|
18
|
+
if (!wctx.trajectory || sessionId === 'unknown') {
|
|
19
|
+
evidence.push({
|
|
20
|
+
sourceRef: 'owner_message:unavailable',
|
|
21
|
+
note: `trajectory_unavailable: ${!wctx.trajectory ? 'no_trajectory_db' : 'unknown_session'}`,
|
|
22
|
+
});
|
|
23
|
+
return evidence.slice(0, MAX_EVIDENCE_ENTRIES);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
try {
|
|
27
|
+
const userTurns = wctx.trajectory.listUserTurnsForSession(sessionId) ?? [];
|
|
28
|
+
const lastCorrectionTurn = [...userTurns].reverse().find(t => t.correctionDetected);
|
|
29
|
+
if (lastCorrectionTurn) {
|
|
30
|
+
const sanitizedOwnerMessage = sanitizeAssistantText(
|
|
31
|
+
(lastCorrectionTurn.rawExcerpt ?? '').slice(0, MAX_EVIDENCE_NOTE_CHARS)
|
|
32
|
+
);
|
|
33
|
+
evidence.push({
|
|
34
|
+
sourceRef: `owner_message:${lastCorrectionTurn.createdAt}`,
|
|
35
|
+
note: sanitizedOwnerMessage,
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
} catch (e) {
|
|
39
|
+
evidence.push({
|
|
40
|
+
sourceRef: 'owner_message:unavailable',
|
|
41
|
+
note: `trajectory_user_turns_unavailable: ${String(e).slice(0, 100)}`,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
try {
|
|
46
|
+
const assistantTurns = wctx.trajectory.listAssistantTurns(sessionId) ?? [];
|
|
47
|
+
const recentAssistant = assistantTurns.slice(-3);
|
|
48
|
+
for (const turn of recentAssistant) {
|
|
49
|
+
if (evidence.length >= MAX_EVIDENCE_ENTRIES) break;
|
|
50
|
+
const sanitizedNote = sanitizeAssistantText(
|
|
51
|
+
(turn.sanitizedText ?? '').slice(0, MAX_EVIDENCE_NOTE_CHARS)
|
|
52
|
+
);
|
|
53
|
+
evidence.push({
|
|
54
|
+
sourceRef: `agent_turn:${turn.createdAt}`,
|
|
55
|
+
note: sanitizedNote,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
} catch (e) {
|
|
59
|
+
if (evidence.length < MAX_EVIDENCE_ENTRIES) {
|
|
60
|
+
evidence.push({
|
|
61
|
+
sourceRef: 'agent_turn:unavailable',
|
|
62
|
+
note: `trajectory_assistant_turns_unavailable: ${String(e).slice(0, 100)}`,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (evidence.length === 0) {
|
|
68
|
+
evidence.push({
|
|
69
|
+
sourceRef: 'trajectory:empty',
|
|
70
|
+
note: 'trajectory_available_but_empty: no user correction or assistant turns found',
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return evidence.slice(0, MAX_EVIDENCE_ENTRIES);
|
|
75
|
+
}
|
package/tests/hooks/pain.test.ts
CHANGED
|
@@ -544,3 +544,228 @@ describe('Post-Write Checks & Pain Hook', () => {
|
|
|
544
544
|
});
|
|
545
545
|
|
|
546
546
|
});
|
|
547
|
+
|
|
548
|
+
// ── PRI-326: Decomposed Pipeline Tests ────────────────────────────────────────
|
|
549
|
+
|
|
550
|
+
import {
|
|
551
|
+
classifyToolCallOutcome,
|
|
552
|
+
buildToolCallObservation,
|
|
553
|
+
handleProbationFeedback,
|
|
554
|
+
evaluatePainAdmissionForToolCall,
|
|
555
|
+
} from '../../src/hooks/after-tool-call-helpers.js';
|
|
556
|
+
import type { ToolCallOutcome, ToolCallObservation } from '../../src/hooks/after-tool-call-types.js';
|
|
557
|
+
|
|
558
|
+
describe('PRI-326: classifyToolCallOutcome', () => {
|
|
559
|
+
it('returns success for exitCode 0 with no error', () => {
|
|
560
|
+
const result = classifyToolCallOutcome({
|
|
561
|
+
toolName: 'read',
|
|
562
|
+
params: {},
|
|
563
|
+
result: { exitCode: 0 },
|
|
564
|
+
error: undefined,
|
|
565
|
+
} as any);
|
|
566
|
+
expect(result.isFailure).toBe(false);
|
|
567
|
+
expect(result.exitCode).toBe(0);
|
|
568
|
+
expect(result.failureSource).toBeUndefined();
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
it('detects failure from top-level exitCode', () => {
|
|
572
|
+
const result = classifyToolCallOutcome({
|
|
573
|
+
toolName: 'bash',
|
|
574
|
+
params: {},
|
|
575
|
+
result: { exitCode: 1 },
|
|
576
|
+
error: undefined,
|
|
577
|
+
} as any);
|
|
578
|
+
expect(result.isFailure).toBe(true);
|
|
579
|
+
expect(result.exitCode).toBe(1);
|
|
580
|
+
expect(result.failureSource).toBe('tool_failure');
|
|
581
|
+
});
|
|
582
|
+
|
|
583
|
+
it('falls back to nested details.exitCode', () => {
|
|
584
|
+
const result = classifyToolCallOutcome({
|
|
585
|
+
toolName: 'bash',
|
|
586
|
+
params: {},
|
|
587
|
+
result: { details: { exitCode: 2 } },
|
|
588
|
+
error: undefined,
|
|
589
|
+
} as any);
|
|
590
|
+
expect(result.isFailure).toBe(true);
|
|
591
|
+
expect(result.exitCode).toBe(2);
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
it('prefers top-level exitCode over nested', () => {
|
|
595
|
+
const result = classifyToolCallOutcome({
|
|
596
|
+
toolName: 'bash',
|
|
597
|
+
params: {},
|
|
598
|
+
result: { exitCode: 0, details: { exitCode: 1 } },
|
|
599
|
+
error: undefined,
|
|
600
|
+
} as any);
|
|
601
|
+
expect(result.isFailure).toBe(false);
|
|
602
|
+
expect(result.exitCode).toBe(0);
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
it('detects failure from error field even with exitCode 0', () => {
|
|
606
|
+
const result = classifyToolCallOutcome({
|
|
607
|
+
toolName: 'write',
|
|
608
|
+
params: {},
|
|
609
|
+
result: { exitCode: 0 },
|
|
610
|
+
error: 'Permission denied',
|
|
611
|
+
} as any);
|
|
612
|
+
expect(result.isFailure).toBe(true);
|
|
613
|
+
expect(result.failureSource).toBe('tool_failure');
|
|
614
|
+
});
|
|
615
|
+
|
|
616
|
+
it('classifies dispatch_error for tool not found', () => {
|
|
617
|
+
const result = classifyToolCallOutcome({
|
|
618
|
+
toolName: 'read',
|
|
619
|
+
params: {},
|
|
620
|
+
result: { exitCode: 1 },
|
|
621
|
+
error: 'tool read_file not found',
|
|
622
|
+
} as any);
|
|
623
|
+
expect(result.isFailure).toBe(true);
|
|
624
|
+
expect(result.failureSource).toBe('dispatch_error');
|
|
625
|
+
});
|
|
626
|
+
|
|
627
|
+
it('treats non-numeric exitCode as 0', () => {
|
|
628
|
+
const result = classifyToolCallOutcome({
|
|
629
|
+
toolName: 'bash',
|
|
630
|
+
params: {},
|
|
631
|
+
result: { exitCode: '0' as any },
|
|
632
|
+
error: undefined,
|
|
633
|
+
} as any);
|
|
634
|
+
expect(result.isFailure).toBe(false);
|
|
635
|
+
});
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
describe('PRI-326: evaluatePainAdmissionForToolCall', () => {
|
|
639
|
+
const workspaceDir = '/mock/workspace';
|
|
640
|
+
const mockConfig = { get: vi.fn().mockReturnValue(undefined) };
|
|
641
|
+
const baseOutcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
642
|
+
const baseObservation: ToolCallObservation = {
|
|
643
|
+
params: { filePath: 'src/main.ts' },
|
|
644
|
+
relPath: 'src/main.ts',
|
|
645
|
+
isRisk: false,
|
|
646
|
+
errorType: 'Other',
|
|
647
|
+
errorHash: 'abc123',
|
|
648
|
+
errorText: 'Permission denied',
|
|
649
|
+
painScore: 10,
|
|
650
|
+
traceId: 'trace-123',
|
|
651
|
+
};
|
|
652
|
+
|
|
653
|
+
beforeEach(() => {
|
|
654
|
+
vi.clearAllMocks();
|
|
655
|
+
resetPainDiagnosticGateForTest();
|
|
656
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: false, source: 'test' });
|
|
657
|
+
});
|
|
658
|
+
|
|
659
|
+
it('returns not_applicable for non-write tool', () => {
|
|
660
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
661
|
+
{ toolName: 'read' } as any, baseObservation, baseOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
662
|
+
);
|
|
663
|
+
expect(result.stage).toBe('not_applicable');
|
|
664
|
+
expect(result.admitted).toBe(false);
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
it('returns not_applicable for success', () => {
|
|
668
|
+
const successOutcome: ToolCallOutcome = { isFailure: false, exitCode: 0, failureSource: undefined };
|
|
669
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
670
|
+
{ toolName: 'write' } as any, baseObservation, successOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
671
|
+
);
|
|
672
|
+
expect(result.stage).toBe('not_applicable');
|
|
673
|
+
});
|
|
674
|
+
|
|
675
|
+
it('returns triage_evidence_only when feature flag on and tool_failure triage rejects', () => {
|
|
676
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: true, source: 'test' });
|
|
677
|
+
|
|
678
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
679
|
+
{ toolName: 'write' } as any, baseObservation, baseOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
680
|
+
);
|
|
681
|
+
expect(result.stage).toBe('triage_evidence_only');
|
|
682
|
+
expect(result.admitted).toBe(false);
|
|
683
|
+
expect(result.reason).toBeTruthy();
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
it('returns gate_admitted when consecutive errors exceed repeatedFailure threshold', () => {
|
|
687
|
+
vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: false, source: 'test' });
|
|
688
|
+
// consecutiveErrors=5 >= default repeatedFailure threshold of 4 → gate admits via repeated_failure
|
|
689
|
+
const highConsecutiveState = { currentGfi: 0, consecutiveErrors: 5, lastErrorHash: 'abc123' } as any;
|
|
690
|
+
|
|
691
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
692
|
+
{ toolName: 'write' } as any, baseObservation, baseOutcome, highConsecutiveState, undefined, 's-gate-admitted-test', workspaceDir, mockConfig
|
|
693
|
+
);
|
|
694
|
+
expect(result.stage).toBe('gate_admitted');
|
|
695
|
+
expect(result.admitted).toBe(true);
|
|
696
|
+
expect(result.gateResult?.shouldDiagnose).toBe(true);
|
|
697
|
+
expect(result.gateResult?.reason).toBe('repeated_failure');
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
it('includes reason and detail in every decision', () => {
|
|
701
|
+
const result = evaluatePainAdmissionForToolCall(
|
|
702
|
+
{ toolName: 'read' } as any, baseObservation, baseOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
|
|
703
|
+
);
|
|
704
|
+
expect(result.reason).toBeTruthy();
|
|
705
|
+
expect(result.detail).toBeTruthy();
|
|
706
|
+
});
|
|
707
|
+
});
|
|
708
|
+
|
|
709
|
+
describe('PRI-326: buildToolCallObservation params defense', () => {
|
|
710
|
+
const profile = { risk_paths: [] } as any;
|
|
711
|
+
|
|
712
|
+
it('handles null params without crashing', () => {
|
|
713
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
714
|
+
const result = buildToolCallObservation(
|
|
715
|
+
{ params: null, error: 'fail', result: {} } as any,
|
|
716
|
+
outcome, '/workspace', profile
|
|
717
|
+
);
|
|
718
|
+
expect(result.relPath).toBe('unknown');
|
|
719
|
+
expect(result.params.filePath).toBeUndefined();
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
it('handles undefined params without crashing', () => {
|
|
723
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
724
|
+
const result = buildToolCallObservation(
|
|
725
|
+
{ params: undefined, error: 'fail', result: {} } as any,
|
|
726
|
+
outcome, '/workspace', profile
|
|
727
|
+
);
|
|
728
|
+
expect(result.relPath).toBe('unknown');
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
it('handles array params without crashing', () => {
|
|
732
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
733
|
+
const result = buildToolCallObservation(
|
|
734
|
+
{ params: ['bad'], error: 'fail', result: {} } as any,
|
|
735
|
+
outcome, '/workspace', profile
|
|
736
|
+
);
|
|
737
|
+
expect(result.relPath).toBe('unknown');
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
it('handles string params without crashing', () => {
|
|
741
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
742
|
+
const result = buildToolCallObservation(
|
|
743
|
+
{ params: 'not-an-object', error: 'fail', result: {} } as any,
|
|
744
|
+
outcome, '/workspace', profile
|
|
745
|
+
);
|
|
746
|
+
expect(result.relPath).toBe('unknown');
|
|
747
|
+
});
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
describe('PRI-326: buildToolCallObservation unserializable result defense', () => {
|
|
751
|
+
const profile = { risk_paths: [] } as any;
|
|
752
|
+
const outcome: ToolCallOutcome = { isFailure: true, exitCode: 1, failureSource: 'tool_failure' };
|
|
753
|
+
|
|
754
|
+
it('handles BigInt result without crashing', () => {
|
|
755
|
+
const result = buildToolCallObservation(
|
|
756
|
+
{ params: {}, error: undefined, result: { val: BigInt(42) } } as any,
|
|
757
|
+
outcome, '/workspace', profile
|
|
758
|
+
);
|
|
759
|
+
expect(result.errorText).toContain('unserializable result');
|
|
760
|
+
});
|
|
761
|
+
|
|
762
|
+
it('handles circular reference result without crashing', () => {
|
|
763
|
+
const circular: any = { name: 'loop' };
|
|
764
|
+
circular.self = circular;
|
|
765
|
+
const result = buildToolCallObservation(
|
|
766
|
+
{ params: {}, error: undefined, result: circular } as any,
|
|
767
|
+
outcome, '/workspace', profile
|
|
768
|
+
);
|
|
769
|
+
expect(result.errorText).toContain('unserializable result');
|
|
770
|
+
});
|
|
771
|
+
});
|