principles-disciple 1.103.0 → 1.104.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/hooks/after-tool-call-helpers.ts +79 -89
- package/src/hooks/after-tool-call-types.ts +2 -8
- package/src/hooks/raw-observation-adapter.ts +231 -0
- package/src/hooks/raw-observation-types.ts +77 -0
- package/src/hooks/triage-adapter.ts +59 -52
- package/src/hooks/trigger-cooldown-tracker.ts +82 -0
- package/tests/core/surface-guard.test.ts +5 -5
- package/tests/hooks/pain.test.ts +20 -14
- package/tests/hooks/raw-observation-adapter.test.ts +312 -0
- package/tests/hooks/single-gate-pain-admission.test.ts +258 -0
- package/tests/integration/auto-entry-gate.test.ts +13 -5
- package/tests/integration/mvp-surface-registry-guard.test.ts +2 -2
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-Gate Pain Admission Tests — PRI-363
|
|
3
|
+
*
|
|
4
|
+
* Tests that tool failure path uses only a single gate (TriggerController)
|
|
5
|
+
* for deciding whether to create a diagnostic task.
|
|
6
|
+
*
|
|
7
|
+
* This test validates:
|
|
8
|
+
* 1. No dual-gate drift — evaluatePainAdmissionForToolCall calls only TriggerController
|
|
9
|
+
* 2. Cooldown preserved — same episode does not repeat diagnosis within 15 min
|
|
10
|
+
* 3. Tool failure defaults to evidence_only per PEAT design
|
|
11
|
+
* 4. Manual pain bypasses all gates
|
|
12
|
+
*
|
|
13
|
+
* ERR checklist:
|
|
14
|
+
* - ERR-001: No `as` casts on untrusted runtime values.
|
|
15
|
+
* - ERR-002: Every decision carries reason + nextAction.
|
|
16
|
+
* - ERR-009: Malformed/missing state fails loud with reason.
|
|
17
|
+
* - ERR-024/025/048: Production-path wiring tests.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { beforeEach, describe, expect, it } from 'vitest';
|
|
21
|
+
import { evaluatePainAdmissionForToolCall, resetTriggerCooldownForTest } from '../../src/hooks/after-tool-call-helpers.js';
|
|
22
|
+
import type { PluginHookAfterToolCallEvent } from '../../src/openclaw-sdk.js';
|
|
23
|
+
import type { ToolCallObservation, ToolCallOutcome } from '../../src/hooks/after-tool-call-types.js';
|
|
24
|
+
|
|
25
|
+
// ── Test Helpers ─────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
function createMockEvent(
|
|
28
|
+
toolName: string,
|
|
29
|
+
error: unknown,
|
|
30
|
+
params: Record<string, unknown> = {},
|
|
31
|
+
): PluginHookAfterToolCallEvent {
|
|
32
|
+
return {
|
|
33
|
+
toolName,
|
|
34
|
+
params,
|
|
35
|
+
result: null,
|
|
36
|
+
error,
|
|
37
|
+
durationMs: 100,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function createMockObservation(
|
|
42
|
+
painScore: number,
|
|
43
|
+
isRisk: boolean,
|
|
44
|
+
errorHash: string,
|
|
45
|
+
): ToolCallObservation {
|
|
46
|
+
return {
|
|
47
|
+
params: {
|
|
48
|
+
filePath: '/tmp/test.md',
|
|
49
|
+
content: 'test content',
|
|
50
|
+
},
|
|
51
|
+
relPath: '/tmp/test.md',
|
|
52
|
+
isRisk,
|
|
53
|
+
errorType: 'EACCES',
|
|
54
|
+
errorHash,
|
|
55
|
+
errorText: 'Permission denied',
|
|
56
|
+
painScore,
|
|
57
|
+
traceId: 'test-trace-id',
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function createMockOutcome(isFailure: boolean, failureSource: 'tool_failure' | 'dispatch_error' | undefined): ToolCallOutcome {
|
|
62
|
+
return {
|
|
63
|
+
isFailure,
|
|
64
|
+
exitCode: isFailure ? 1 : 0,
|
|
65
|
+
failureSource,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function createMockConfig(get: (key: string) => unknown) {
|
|
70
|
+
return { get };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
describe('Single-Gate Pain Admission — PRI-363', () => {
|
|
74
|
+
beforeEach(() => {
|
|
75
|
+
resetTriggerCooldownForTest();
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
describe('Non-write-tool failures', () => {
|
|
79
|
+
it('should reject non-write-tool failures', () => {
|
|
80
|
+
const toolName = 'read'; // Not a write tool
|
|
81
|
+
const error = new Error('ENOENT: file not found');
|
|
82
|
+
const painScore = 72;
|
|
83
|
+
const errorHash = 'abc123';
|
|
84
|
+
const sessionId = 'session-001';
|
|
85
|
+
const workspaceDir = '/tmp/workspace';
|
|
86
|
+
|
|
87
|
+
const event = createMockEvent(toolName, error, {
|
|
88
|
+
path: '/tmp/test.md',
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
const observation = createMockObservation(painScore, false, errorHash);
|
|
92
|
+
const outcome = createMockOutcome(true, 'tool_failure');
|
|
93
|
+
const sessionState = {
|
|
94
|
+
currentGfi: 30,
|
|
95
|
+
consecutiveErrors: 2,
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
const config = createMockConfig(() => undefined);
|
|
99
|
+
|
|
100
|
+
const decision = evaluatePainAdmissionForToolCall(
|
|
101
|
+
event,
|
|
102
|
+
observation,
|
|
103
|
+
outcome,
|
|
104
|
+
sessionState,
|
|
105
|
+
sessionState,
|
|
106
|
+
sessionId,
|
|
107
|
+
workspaceDir,
|
|
108
|
+
config,
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
expect(decision.admitted).toBe(false);
|
|
112
|
+
expect(decision.stage).toBe('not_applicable');
|
|
113
|
+
expect(decision.reason).toBe('not_a_write_tool_failure');
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
describe('Tool failure default behavior', () => {
|
|
118
|
+
it('tool_failure defaults to evidence_only (PEAT design)', () => {
|
|
119
|
+
const toolName = 'write';
|
|
120
|
+
const error = new Error('EACCES: permission denied');
|
|
121
|
+
const painScore = 80; // Very high score
|
|
122
|
+
const errorHash = 'abc123';
|
|
123
|
+
const sessionId = 'session-001';
|
|
124
|
+
const workspaceDir = '/tmp/workspace';
|
|
125
|
+
|
|
126
|
+
const event = createMockEvent(toolName, error, {
|
|
127
|
+
file_path: '/tmp/test.md',
|
|
128
|
+
content: 'test',
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
const observation = createMockObservation(painScore, false, errorHash);
|
|
132
|
+
const outcome = createMockOutcome(true, 'tool_failure');
|
|
133
|
+
const sessionState = {
|
|
134
|
+
currentGfi: 80,
|
|
135
|
+
consecutiveErrors: 2,
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const config = createMockConfig(() => undefined);
|
|
139
|
+
|
|
140
|
+
const decision = evaluatePainAdmissionForToolCall(
|
|
141
|
+
event,
|
|
142
|
+
observation,
|
|
143
|
+
outcome,
|
|
144
|
+
sessionState,
|
|
145
|
+
sessionState,
|
|
146
|
+
sessionId,
|
|
147
|
+
workspaceDir,
|
|
148
|
+
config,
|
|
149
|
+
);
|
|
150
|
+
|
|
151
|
+
// Per PEAT design, tool_failure is infrastructure noise
|
|
152
|
+
// and defaults to evidence_only
|
|
153
|
+
expect(decision.admitted).toBe(false);
|
|
154
|
+
expect(decision.stage).toBe('trigger_rejected');
|
|
155
|
+
expect(decision.reason).toContain('infrastructure noise');
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
describe('Cooldown behavior', () => {
|
|
160
|
+
it('should not repeat diagnosis within 15 min cooldown (same episode)', () => {
|
|
161
|
+
const toolName = 'write';
|
|
162
|
+
const error = new Error('EACCES: permission denied');
|
|
163
|
+
const errorHash = 'abc123';
|
|
164
|
+
const sessionId = 'session-001';
|
|
165
|
+
const painScore = 72; // High score would normally trigger diagnosis
|
|
166
|
+
const workspaceDir = '/tmp/workspace';
|
|
167
|
+
|
|
168
|
+
const event = createMockEvent(toolName, error, {
|
|
169
|
+
file_path: '/tmp/test.md',
|
|
170
|
+
content: 'test',
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
const observation = createMockObservation(painScore, false, errorHash);
|
|
174
|
+
const outcome = createMockOutcome(true, 'tool_failure');
|
|
175
|
+
const sessionState = {
|
|
176
|
+
currentGfi: 30,
|
|
177
|
+
consecutiveErrors: 2,
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const config = createMockConfig(() => undefined);
|
|
181
|
+
|
|
182
|
+
// First call — tool_failure defaults to evidence_only
|
|
183
|
+
const decision1 = evaluatePainAdmissionForToolCall(
|
|
184
|
+
event,
|
|
185
|
+
observation,
|
|
186
|
+
outcome,
|
|
187
|
+
sessionState,
|
|
188
|
+
sessionState,
|
|
189
|
+
sessionId,
|
|
190
|
+
workspaceDir,
|
|
191
|
+
config,
|
|
192
|
+
);
|
|
193
|
+
|
|
194
|
+
expect(decision1.admitted).toBe(false);
|
|
195
|
+
expect(decision1.stage).toBe('trigger_rejected');
|
|
196
|
+
|
|
197
|
+
// Second call within cooldown — should still not admit
|
|
198
|
+
// (even though cooldown is set, triage decision is still evidence_only)
|
|
199
|
+
const decision2 = evaluatePainAdmissionForToolCall(
|
|
200
|
+
event,
|
|
201
|
+
observation,
|
|
202
|
+
outcome,
|
|
203
|
+
sessionState,
|
|
204
|
+
sessionState,
|
|
205
|
+
sessionId,
|
|
206
|
+
workspaceDir,
|
|
207
|
+
config,
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
expect(decision2.admitted).toBe(false);
|
|
211
|
+
expect(decision2.stage).toBe('trigger_rejected');
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
describe('Structural validation', () => {
|
|
216
|
+
it('should always return structured decisions with reason + detail', () => {
|
|
217
|
+
const toolName = 'write';
|
|
218
|
+
const error = new Error('EACCES: permission denied');
|
|
219
|
+
const painScore = 35;
|
|
220
|
+
const errorHash = 'abc123';
|
|
221
|
+
const sessionId = 'session-001';
|
|
222
|
+
const workspaceDir = '/tmp/workspace';
|
|
223
|
+
|
|
224
|
+
const event = createMockEvent(toolName, error, {
|
|
225
|
+
file_path: '/tmp/test.md',
|
|
226
|
+
content: 'test',
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
const observation = createMockObservation(painScore, false, errorHash);
|
|
230
|
+
const outcome = createMockOutcome(true, 'tool_failure');
|
|
231
|
+
const sessionState = {
|
|
232
|
+
currentGfi: 30,
|
|
233
|
+
consecutiveErrors: 1,
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
const config = createMockConfig(() => undefined);
|
|
237
|
+
|
|
238
|
+
const decision = evaluatePainAdmissionForToolCall(
|
|
239
|
+
event,
|
|
240
|
+
observation,
|
|
241
|
+
outcome,
|
|
242
|
+
sessionState,
|
|
243
|
+
sessionState,
|
|
244
|
+
sessionId,
|
|
245
|
+
workspaceDir,
|
|
246
|
+
config,
|
|
247
|
+
);
|
|
248
|
+
|
|
249
|
+
// ERR-002: Every decision carries reason + nextAction
|
|
250
|
+
expect(decision).toHaveProperty('admitted');
|
|
251
|
+
expect(decision).toHaveProperty('stage');
|
|
252
|
+
expect(decision).toHaveProperty('reason');
|
|
253
|
+
expect(decision).toHaveProperty('detail');
|
|
254
|
+
expect(decision.reason).toBeTruthy();
|
|
255
|
+
expect(decision.detail).toBeTruthy();
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
});
|
|
@@ -16,7 +16,8 @@ import * as path from 'path';
|
|
|
16
16
|
import { handleAfterToolCall } from '../../src/hooks/pain.js';
|
|
17
17
|
import { WorkspaceContext } from '../../src/core/workspace-context.js';
|
|
18
18
|
import { EventLogService } from '../../src/core/event-log.js';
|
|
19
|
-
import {
|
|
19
|
+
import { resetTriggerCooldownForTest } from '../../src/hooks/after-tool-call-helpers.js';
|
|
20
|
+
import { evaluatePainDiagnosticGate, resetPainDiagnosticGateForTest } from '../../src/core/pain-diagnostic-gate.js';
|
|
20
21
|
import * as ioUtils from '../../src/utils/io.js';
|
|
21
22
|
|
|
22
23
|
vi.mock('fs');
|
|
@@ -75,9 +76,10 @@ describe('Auto-Entry Gate Integration', () => {
|
|
|
75
76
|
mockEmitSync.mockReset();
|
|
76
77
|
mockRecordProbationFeedback.mockReset();
|
|
77
78
|
mockUpdatePrincipleValueMetrics.mockReset();
|
|
78
|
-
vi.spyOn(WorkspaceContext, '
|
|
79
|
+
vi.spyOn(WorkspaceContext, 'fromHookContextExplicit').mockReturnValue(mockWctx as any);
|
|
79
80
|
vi.spyOn(EventLogService, 'get').mockReturnValue(mockEventLog as any);
|
|
80
81
|
vi.spyOn(fs, 'existsSync').mockReturnValue(false);
|
|
82
|
+
resetTriggerCooldownForTest();
|
|
81
83
|
resetPainDiagnosticGateForTest();
|
|
82
84
|
});
|
|
83
85
|
|
|
@@ -121,11 +123,18 @@ describe('Auto-Entry Gate Integration', () => {
|
|
|
121
123
|
vi.mocked(ioUtils.normalizePath).mockReturnValue('src/main.ts');
|
|
122
124
|
vi.mocked(ioUtils.isRisky).mockReturnValue(false);
|
|
123
125
|
|
|
124
|
-
//
|
|
126
|
+
// PRI-363: trigger controller requires consecutiveErrors >= 4 for upgrade
|
|
127
|
+
// First 3 failures — accumulates friction, does not emit
|
|
125
128
|
handleAfterToolCall(mockEvent as any, mockCtx as any);
|
|
126
129
|
expect(mockEmitSync).not.toHaveBeenCalled();
|
|
127
130
|
|
|
128
|
-
|
|
131
|
+
handleAfterToolCall(mockEvent as any, mockCtx as any);
|
|
132
|
+
expect(mockEmitSync).not.toHaveBeenCalled();
|
|
133
|
+
|
|
134
|
+
handleAfterToolCall(mockEvent as any, mockCtx as any);
|
|
135
|
+
expect(mockEmitSync).not.toHaveBeenCalled();
|
|
136
|
+
|
|
137
|
+
// Fourth failure — repeated, should emit
|
|
129
138
|
handleAfterToolCall(mockEvent as any, mockCtx as any);
|
|
130
139
|
|
|
131
140
|
expect(mockEmitSync).toHaveBeenCalledWith(
|
|
@@ -134,7 +143,6 @@ describe('Auto-Entry Gate Integration', () => {
|
|
|
134
143
|
data: expect.objectContaining({
|
|
135
144
|
painType: 'tool_failure',
|
|
136
145
|
source: 'write',
|
|
137
|
-
reason: expect.stringContaining('diagnosticGate=high_gfi'),
|
|
138
146
|
}),
|
|
139
147
|
}),
|
|
140
148
|
);
|
|
@@ -435,11 +435,11 @@ describe('MVP Surface Registry Guard (PRI-289)', () => {
|
|
|
435
435
|
expect(guarded).toBeNull();
|
|
436
436
|
});
|
|
437
437
|
|
|
438
|
-
it('guardService returns
|
|
438
|
+
it('guardService returns the service for core surfaces (trajectory is now core)', async () => {
|
|
439
439
|
const { guardService } = await import('../../src/core/surface-guard.js');
|
|
440
440
|
const service = { api: null, start: () => {} };
|
|
441
441
|
const guarded = guardService('service:trajectory', service);
|
|
442
|
-
expect(guarded).
|
|
442
|
+
expect(guarded).toBe(service);
|
|
443
443
|
});
|
|
444
444
|
|
|
445
445
|
it('guardService returns null for unregistered surfaces', async () => {
|