principles-disciple 1.104.0 → 1.104.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,258 @@
1
+ /**
2
+ * Single-Gate Pain Admission Tests — PRI-363
3
+ *
4
+ * Tests that tool failure path uses only a single gate (TriggerController)
5
+ * for deciding whether to create a diagnostic task.
6
+ *
7
+ * This test validates:
8
+ * 1. No dual-gate drift — evaluatePainAdmissionForToolCall calls only TriggerController
9
+ * 2. Cooldown preserved — same episode does not repeat diagnosis within 15 min
10
+ * 3. Tool failure defaults to evidence_only per PEAT design
11
+ * 4. Manual pain bypasses all gates
12
+ *
13
+ * ERR checklist:
14
+ * - ERR-001: No `as` casts on untrusted runtime values.
15
+ * - ERR-002: Every decision carries reason + nextAction.
16
+ * - ERR-009: Malformed/missing state fails loud with reason.
17
+ * - ERR-024/025/048: Production-path wiring tests.
18
+ */
19
+
20
+ import { beforeEach, describe, expect, it } from 'vitest';
21
+ import { evaluatePainAdmissionForToolCall, resetTriggerCooldownForTest } from '../../src/hooks/after-tool-call-helpers.js';
22
+ import type { PluginHookAfterToolCallEvent } from '../../src/openclaw-sdk.js';
23
+ import type { ToolCallObservation, ToolCallOutcome } from '../../src/hooks/after-tool-call-types.js';
24
+
25
+ // ── Test Helpers ─────────────────────────────────────────────────────────────
26
+
27
+ function createMockEvent(
28
+ toolName: string,
29
+ error: unknown,
30
+ params: Record<string, unknown> = {},
31
+ ): PluginHookAfterToolCallEvent {
32
+ return {
33
+ toolName,
34
+ params,
35
+ result: null,
36
+ error,
37
+ durationMs: 100,
38
+ };
39
+ }
40
+
41
+ function createMockObservation(
42
+ painScore: number,
43
+ isRisk: boolean,
44
+ errorHash: string,
45
+ ): ToolCallObservation {
46
+ return {
47
+ params: {
48
+ filePath: '/tmp/test.md',
49
+ content: 'test content',
50
+ },
51
+ relPath: '/tmp/test.md',
52
+ isRisk,
53
+ errorType: 'EACCES',
54
+ errorHash,
55
+ errorText: 'Permission denied',
56
+ painScore,
57
+ traceId: 'test-trace-id',
58
+ };
59
+ }
60
+
61
+ function createMockOutcome(isFailure: boolean, failureSource: 'tool_failure' | 'dispatch_error' | undefined): ToolCallOutcome {
62
+ return {
63
+ isFailure,
64
+ exitCode: isFailure ? 1 : 0,
65
+ failureSource,
66
+ };
67
+ }
68
+
69
+ function createMockConfig(get: (key: string) => unknown) {
70
+ return { get };
71
+ }
72
+
73
+ describe('Single-Gate Pain Admission — PRI-363', () => {
74
+ beforeEach(() => {
75
+ resetTriggerCooldownForTest();
76
+ });
77
+
78
+ describe('Non-write-tool failures', () => {
79
+ it('should reject non-write-tool failures', () => {
80
+ const toolName = 'read'; // Not a write tool
81
+ const error = new Error('ENOENT: file not found');
82
+ const painScore = 72;
83
+ const errorHash = 'abc123';
84
+ const sessionId = 'session-001';
85
+ const workspaceDir = '/tmp/workspace';
86
+
87
+ const event = createMockEvent(toolName, error, {
88
+ path: '/tmp/test.md',
89
+ });
90
+
91
+ const observation = createMockObservation(painScore, false, errorHash);
92
+ const outcome = createMockOutcome(true, 'tool_failure');
93
+ const sessionState = {
94
+ currentGfi: 30,
95
+ consecutiveErrors: 2,
96
+ };
97
+
98
+ const config = createMockConfig(() => undefined);
99
+
100
+ const decision = evaluatePainAdmissionForToolCall(
101
+ event,
102
+ observation,
103
+ outcome,
104
+ sessionState,
105
+ sessionState,
106
+ sessionId,
107
+ workspaceDir,
108
+ config,
109
+ );
110
+
111
+ expect(decision.admitted).toBe(false);
112
+ expect(decision.stage).toBe('not_applicable');
113
+ expect(decision.reason).toBe('not_a_write_tool_failure');
114
+ });
115
+ });
116
+
117
+ describe('Tool failure default behavior', () => {
118
+ it('tool_failure defaults to evidence_only (PEAT design)', () => {
119
+ const toolName = 'write';
120
+ const error = new Error('EACCES: permission denied');
121
+ const painScore = 80; // Very high score
122
+ const errorHash = 'abc123';
123
+ const sessionId = 'session-001';
124
+ const workspaceDir = '/tmp/workspace';
125
+
126
+ const event = createMockEvent(toolName, error, {
127
+ file_path: '/tmp/test.md',
128
+ content: 'test',
129
+ });
130
+
131
+ const observation = createMockObservation(painScore, false, errorHash);
132
+ const outcome = createMockOutcome(true, 'tool_failure');
133
+ const sessionState = {
134
+ currentGfi: 80,
135
+ consecutiveErrors: 2,
136
+ };
137
+
138
+ const config = createMockConfig(() => undefined);
139
+
140
+ const decision = evaluatePainAdmissionForToolCall(
141
+ event,
142
+ observation,
143
+ outcome,
144
+ sessionState,
145
+ sessionState,
146
+ sessionId,
147
+ workspaceDir,
148
+ config,
149
+ );
150
+
151
+ // Per PEAT design, tool_failure is infrastructure noise
152
+ // and defaults to evidence_only
153
+ expect(decision.admitted).toBe(false);
154
+ expect(decision.stage).toBe('trigger_rejected');
155
+ expect(decision.reason).toContain('infrastructure noise');
156
+ });
157
+ });
158
+
159
+ describe('Cooldown behavior', () => {
160
+ it('should not repeat diagnosis within 15 min cooldown (same episode)', () => {
161
+ const toolName = 'write';
162
+ const error = new Error('EACCES: permission denied');
163
+ const errorHash = 'abc123';
164
+ const sessionId = 'session-001';
165
+ const painScore = 72; // High score would normally trigger diagnosis
166
+ const workspaceDir = '/tmp/workspace';
167
+
168
+ const event = createMockEvent(toolName, error, {
169
+ file_path: '/tmp/test.md',
170
+ content: 'test',
171
+ });
172
+
173
+ const observation = createMockObservation(painScore, false, errorHash);
174
+ const outcome = createMockOutcome(true, 'tool_failure');
175
+ const sessionState = {
176
+ currentGfi: 30,
177
+ consecutiveErrors: 2,
178
+ };
179
+
180
+ const config = createMockConfig(() => undefined);
181
+
182
+ // First call — tool_failure defaults to evidence_only
183
+ const decision1 = evaluatePainAdmissionForToolCall(
184
+ event,
185
+ observation,
186
+ outcome,
187
+ sessionState,
188
+ sessionState,
189
+ sessionId,
190
+ workspaceDir,
191
+ config,
192
+ );
193
+
194
+ expect(decision1.admitted).toBe(false);
195
+ expect(decision1.stage).toBe('trigger_rejected');
196
+
197
+ // Second call within cooldown — should still not admit
198
+ // (even though cooldown is set, triage decision is still evidence_only)
199
+ const decision2 = evaluatePainAdmissionForToolCall(
200
+ event,
201
+ observation,
202
+ outcome,
203
+ sessionState,
204
+ sessionState,
205
+ sessionId,
206
+ workspaceDir,
207
+ config,
208
+ );
209
+
210
+ expect(decision2.admitted).toBe(false);
211
+ expect(decision2.stage).toBe('trigger_rejected');
212
+ });
213
+ });
214
+
215
+ describe('Structural validation', () => {
216
+ it('should always return structured decisions with reason + detail', () => {
217
+ const toolName = 'write';
218
+ const error = new Error('EACCES: permission denied');
219
+ const painScore = 35;
220
+ const errorHash = 'abc123';
221
+ const sessionId = 'session-001';
222
+ const workspaceDir = '/tmp/workspace';
223
+
224
+ const event = createMockEvent(toolName, error, {
225
+ file_path: '/tmp/test.md',
226
+ content: 'test',
227
+ });
228
+
229
+ const observation = createMockObservation(painScore, false, errorHash);
230
+ const outcome = createMockOutcome(true, 'tool_failure');
231
+ const sessionState = {
232
+ currentGfi: 30,
233
+ consecutiveErrors: 1,
234
+ };
235
+
236
+ const config = createMockConfig(() => undefined);
237
+
238
+ const decision = evaluatePainAdmissionForToolCall(
239
+ event,
240
+ observation,
241
+ outcome,
242
+ sessionState,
243
+ sessionState,
244
+ sessionId,
245
+ workspaceDir,
246
+ config,
247
+ );
248
+
249
+ // ERR-002: Every decision carries reason + nextAction
250
+ expect(decision).toHaveProperty('admitted');
251
+ expect(decision).toHaveProperty('stage');
252
+ expect(decision).toHaveProperty('reason');
253
+ expect(decision).toHaveProperty('detail');
254
+ expect(decision.reason).toBeTruthy();
255
+ expect(decision.detail).toBeTruthy();
256
+ });
257
+ });
258
+ });
@@ -16,7 +16,8 @@ import * as path from 'path';
16
16
  import { handleAfterToolCall } from '../../src/hooks/pain.js';
17
17
  import { WorkspaceContext } from '../../src/core/workspace-context.js';
18
18
  import { EventLogService } from '../../src/core/event-log.js';
19
- import { resetPainDiagnosticGateForTest, evaluatePainDiagnosticGate } from '../../src/core/pain-diagnostic-gate.js';
19
+ import { resetTriggerCooldownForTest } from '../../src/hooks/after-tool-call-helpers.js';
20
+ import { evaluatePainDiagnosticGate, resetPainDiagnosticGateForTest } from '../../src/core/pain-diagnostic-gate.js';
20
21
  import * as ioUtils from '../../src/utils/io.js';
21
22
 
22
23
  vi.mock('fs');
@@ -75,9 +76,10 @@ describe('Auto-Entry Gate Integration', () => {
75
76
  mockEmitSync.mockReset();
76
77
  mockRecordProbationFeedback.mockReset();
77
78
  mockUpdatePrincipleValueMetrics.mockReset();
78
- vi.spyOn(WorkspaceContext, 'fromHookContext').mockReturnValue(mockWctx as any);
79
+ vi.spyOn(WorkspaceContext, 'fromHookContextExplicit').mockReturnValue(mockWctx as any);
79
80
  vi.spyOn(EventLogService, 'get').mockReturnValue(mockEventLog as any);
80
81
  vi.spyOn(fs, 'existsSync').mockReturnValue(false);
82
+ resetTriggerCooldownForTest();
81
83
  resetPainDiagnosticGateForTest();
82
84
  });
83
85
 
@@ -121,11 +123,18 @@ describe('Auto-Entry Gate Integration', () => {
121
123
  vi.mocked(ioUtils.normalizePath).mockReturnValue('src/main.ts');
122
124
  vi.mocked(ioUtils.isRisky).mockReturnValue(false);
123
125
 
124
- // First failure accumulates GFI, does not emit
126
+ // PRI-363: trigger controller requires consecutiveErrors >= 4 for upgrade
127
+ // First 3 failures — accumulates friction, does not emit
125
128
  handleAfterToolCall(mockEvent as any, mockCtx as any);
126
129
  expect(mockEmitSync).not.toHaveBeenCalled();
127
130
 
128
- // Second failure repeated, should emit
131
+ handleAfterToolCall(mockEvent as any, mockCtx as any);
132
+ expect(mockEmitSync).not.toHaveBeenCalled();
133
+
134
+ handleAfterToolCall(mockEvent as any, mockCtx as any);
135
+ expect(mockEmitSync).not.toHaveBeenCalled();
136
+
137
+ // Fourth failure — repeated, should emit
129
138
  handleAfterToolCall(mockEvent as any, mockCtx as any);
130
139
 
131
140
  expect(mockEmitSync).toHaveBeenCalledWith(
@@ -134,7 +143,6 @@ describe('Auto-Entry Gate Integration', () => {
134
143
  data: expect.objectContaining({
135
144
  painType: 'tool_failure',
136
145
  source: 'write',
137
- reason: expect.stringContaining('diagnosticGate=high_gfi'),
138
146
  }),
139
147
  }),
140
148
  );