principles-disciple 1.32.0 → 1.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/core/correction-cue-learner.ts +203 -0
- package/src/core/correction-types.ts +88 -0
- package/src/core/init.ts +67 -0
- package/src/service/correction-observer-types.ts +58 -0
- package/src/service/correction-observer-workflow-manager.ts +218 -0
- package/src/service/evolution-worker.ts +161 -140
- package/src/service/nocturnal-service.ts +4 -1
- package/src/service/subagent-workflow/index.ts +14 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +3 -1
- package/tests/service/evolution-worker.nocturnal.test.ts +14 -1
- package/tests/commands/implementation-lifecycle.test.ts +0 -362
- package/tests/core/detection-funnel.test.ts +0 -63
- package/tests/core/evolution-e2e.test.ts +0 -58
- package/tests/core/evolution-engine-gate-integration.test.ts +0 -543
- package/tests/core/evolution-engine.test.ts +0 -562
- package/tests/core/evolution-reducer.test.ts +0 -180
- package/tests/core/evolution-user-stories.e2e.test.ts +0 -249
- package/tests/core/local-worker-routing.test.ts +0 -757
- package/tests/core/rule-host.test.ts +0 -389
- package/tests/core/trajectory-correction-pain.test.ts +0 -180
- package/tests/hooks/gate-edit-verification.test.ts +0 -435
- package/tests/hooks/llm.test.ts +0 -308
- package/tests/hooks/progressive-trust-gate.test.ts +0 -277
- package/tests/hooks/prompt.test.ts +0 -1473
- package/tests/index.integration.test.ts +0 -179
- package/tests/index.shadow-routing.integration.test.ts +0 -140
- package/tests/service/evolution-worker.test.ts +0 -462
- package/tests/service/nocturnal-service.test.ts +0 -577
- package/tests/service/nocturnal-workflow-manager.test.ts +0 -441
- package/tests/tools/critique-prompt.test.ts +0 -260
- package/tests/tools/deep-reflect.test.ts +0 -232
- package/tests/tools/model-index.test.ts +0 -246
- package/tests/ui/app.test.tsx +0 -114
|
@@ -1,389 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Rule Host Tests
|
|
3
|
-
*
|
|
4
|
-
* PURPOSE: Verify that the RuleHost class:
|
|
5
|
-
* - Returns undefined when no active implementations exist
|
|
6
|
-
* - Returns block when an implementation returns block (with short-circuit)
|
|
7
|
-
* - Returns requireApproval when an implementation returns requireApproval
|
|
8
|
-
* - Returns undefined when all implementations return allow or matched=false
|
|
9
|
-
* - Degrades conservatively on vm load or execution errors
|
|
10
|
-
* - Merges multiple implementation decisions correctly
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
14
|
-
import { RuleHost } from '../../src/core/rule-host.js';
|
|
15
|
-
import type { RuleHostInput, RuleHostResult } from '../../src/core/rule-host-types.js';
|
|
16
|
-
|
|
17
|
-
// Mock the ledger module
|
|
18
|
-
vi.mock('../../src/core/principle-tree-ledger.js', () => ({
|
|
19
|
-
loadLedger: vi.fn(),
|
|
20
|
-
listImplementationsByLifecycleState: vi.fn(() => []),
|
|
21
|
-
findActiveImplementation: vi.fn(() => null),
|
|
22
|
-
}));
|
|
23
|
-
|
|
24
|
-
vi.mock('../../src/core/code-implementation-storage.js', () => ({
|
|
25
|
-
loadEntrySource: vi.fn(() => null),
|
|
26
|
-
}));
|
|
27
|
-
|
|
28
|
-
// Mock fs to avoid actual file reads
|
|
29
|
-
vi.mock('fs', () => ({
|
|
30
|
-
existsSync: vi.fn(() => false),
|
|
31
|
-
readFileSync: vi.fn(),
|
|
32
|
-
}));
|
|
33
|
-
|
|
34
|
-
vi.mock('../../src/core/rule-implementation-runtime.js', () => ({
|
|
35
|
-
loadRuleImplementationModule: vi.fn(),
|
|
36
|
-
}));
|
|
37
|
-
|
|
38
|
-
import { listImplementationsByLifecycleState } from '../../src/core/principle-tree-ledger.js';
|
|
39
|
-
import { loadEntrySource } from '../../src/core/code-implementation-storage.js';
|
|
40
|
-
import { loadRuleImplementationModule } from '../../src/core/rule-implementation-runtime.js';
|
|
41
|
-
import * as fs from 'fs';
|
|
42
|
-
|
|
43
|
-
const mockedListImplementations = vi.mocked(listImplementationsByLifecycleState);
|
|
44
|
-
const mockedLoadEntrySource = vi.mocked(loadEntrySource);
|
|
45
|
-
const mockedLoadRuleImplementationModule = vi.mocked(loadRuleImplementationModule);
|
|
46
|
-
const mockedExistsSync = vi.mocked(fs.existsSync);
|
|
47
|
-
const mockedReadFileSync = vi.mocked(fs.readFileSync);
|
|
48
|
-
|
|
49
|
-
function makeInput(overrides?: Partial<RuleHostInput>): RuleHostInput {
|
|
50
|
-
return {
|
|
51
|
-
action: {
|
|
52
|
-
toolName: 'write',
|
|
53
|
-
normalizedPath: 'src/test.ts',
|
|
54
|
-
paramsSummary: {},
|
|
55
|
-
},
|
|
56
|
-
workspace: {
|
|
57
|
-
isRiskPath: false,
|
|
58
|
-
planStatus: 'READY',
|
|
59
|
-
hasPlanFile: true,
|
|
60
|
-
},
|
|
61
|
-
session: {
|
|
62
|
-
sessionId: 'test-session',
|
|
63
|
-
currentGfi: 10,
|
|
64
|
-
recentThinking: false,
|
|
65
|
-
},
|
|
66
|
-
evolution: {
|
|
67
|
-
epTier: 3,
|
|
68
|
-
},
|
|
69
|
-
derived: {
|
|
70
|
-
estimatedLineChanges: 50,
|
|
71
|
-
bashRisk: 'normal',
|
|
72
|
-
},
|
|
73
|
-
...overrides,
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
function makeMockImpl(id: string, type: string = 'code', lifecycleState: string = 'active') {
|
|
78
|
-
return {
|
|
79
|
-
id,
|
|
80
|
-
ruleId: `RULE_${id}`,
|
|
81
|
-
type,
|
|
82
|
-
path: `/impls/${id}.js`,
|
|
83
|
-
version: '1.0.0',
|
|
84
|
-
coversCondition: 'test',
|
|
85
|
-
coveragePercentage: 100,
|
|
86
|
-
lifecycleState,
|
|
87
|
-
createdAt: '2026-01-01T00:00:00Z',
|
|
88
|
-
updatedAt: '2026-01-01T00:00:00Z',
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
describe('RuleHost', () => {
|
|
93
|
-
beforeEach(() => {
|
|
94
|
-
vi.clearAllMocks();
|
|
95
|
-
mockedLoadEntrySource.mockReturnValue(null);
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
it('should return undefined when no active implementations exist (empty ledger)', () => {
|
|
99
|
-
mockedListImplementations.mockReturnValue([]);
|
|
100
|
-
const host = new RuleHost('/mock/state');
|
|
101
|
-
const result = host.evaluate(makeInput());
|
|
102
|
-
expect(result).toBeUndefined();
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
it('should return undefined when implementations exist but none are type=code', () => {
|
|
106
|
-
mockedListImplementations.mockReturnValue([
|
|
107
|
-
makeMockImpl('IMPL_01', 'skill', 'active'),
|
|
108
|
-
makeMockImpl('IMPL_02', 'lora', 'active'),
|
|
109
|
-
] as any);
|
|
110
|
-
const host = new RuleHost('/mock/state');
|
|
111
|
-
const result = host.evaluate(makeInput());
|
|
112
|
-
expect(result).toBeUndefined();
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
it('should return undefined when implementations exist but none are lifecycleState=active', () => {
|
|
116
|
-
// listImplementationsByLifecycleState('active') only returns active ones
|
|
117
|
-
mockedListImplementations.mockReturnValue([]);
|
|
118
|
-
const host = new RuleHost('/mock/state');
|
|
119
|
-
const result = host.evaluate(makeInput());
|
|
120
|
-
expect(result).toBeUndefined();
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
it('should return block when a loaded implementation returns decision=block', () => {
|
|
124
|
-
const blockResult: RuleHostResult = {
|
|
125
|
-
decision: 'block',
|
|
126
|
-
matched: true,
|
|
127
|
-
reason: 'Dangerous operation',
|
|
128
|
-
};
|
|
129
|
-
const mockImpl = makeMockImpl('IMPL_BLOCK');
|
|
130
|
-
|
|
131
|
-
mockedListImplementations.mockReturnValue([mockImpl] as any);
|
|
132
|
-
mockedExistsSync.mockReturnValue(true);
|
|
133
|
-
mockedReadFileSync.mockReturnValue('module.exports = {}');
|
|
134
|
-
mockedLoadRuleImplementationModule.mockReturnValue({
|
|
135
|
-
meta: { name: 'block-test', version: '1.0.0', ruleId: 'RULE_BLOCK', coversCondition: 'test' },
|
|
136
|
-
evaluate: (_input: any, _helpers: any) => blockResult,
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
const host = new RuleHost('/mock/state');
|
|
140
|
-
const result = host.evaluate(makeInput());
|
|
141
|
-
expect(result).toBeDefined();
|
|
142
|
-
expect(result?.decision).toBe('block');
|
|
143
|
-
expect(result?.matched).toBe(true);
|
|
144
|
-
expect(result?.reason).toBe('Dangerous operation');
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
it('should return block (short-circuit) when first of two implementations returns block', () => {
|
|
148
|
-
const blockResult: RuleHostResult = {
|
|
149
|
-
decision: 'block',
|
|
150
|
-
matched: true,
|
|
151
|
-
reason: 'First impl blocks',
|
|
152
|
-
};
|
|
153
|
-
const allowResult: RuleHostResult = {
|
|
154
|
-
decision: 'allow',
|
|
155
|
-
matched: true,
|
|
156
|
-
reason: 'Second impl allows',
|
|
157
|
-
};
|
|
158
|
-
|
|
159
|
-
const impl1 = makeMockImpl('IMPL_01');
|
|
160
|
-
const impl2 = makeMockImpl('IMPL_02');
|
|
161
|
-
|
|
162
|
-
mockedListImplementations.mockReturnValue([impl1, impl2] as any);
|
|
163
|
-
mockedExistsSync.mockReturnValue(true);
|
|
164
|
-
mockedReadFileSync.mockReturnValue('module.exports = {}');
|
|
165
|
-
|
|
166
|
-
let callCount = 0;
|
|
167
|
-
mockedLoadRuleImplementationModule.mockImplementation(() => {
|
|
168
|
-
callCount++;
|
|
169
|
-
if (callCount === 1) {
|
|
170
|
-
return {
|
|
171
|
-
meta: { name: 'block-first', version: '1.0.0', ruleId: 'RULE_01', coversCondition: 'test' },
|
|
172
|
-
evaluate: () => blockResult,
|
|
173
|
-
};
|
|
174
|
-
}
|
|
175
|
-
return {
|
|
176
|
-
meta: { name: 'allow-second', version: '1.0.0', ruleId: 'RULE_02', coversCondition: 'test' },
|
|
177
|
-
evaluate: () => allowResult,
|
|
178
|
-
};
|
|
179
|
-
});
|
|
180
|
-
|
|
181
|
-
const host = new RuleHost('/mock/state');
|
|
182
|
-
const result = host.evaluate(makeInput());
|
|
183
|
-
|
|
184
|
-
expect(result?.decision).toBe('block');
|
|
185
|
-
expect(result?.reason).toBe('First impl blocks');
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
it('should return requireApproval when a loaded implementation returns decision=requireApproval', () => {
|
|
189
|
-
const approvalResult: RuleHostResult = {
|
|
190
|
-
decision: 'requireApproval',
|
|
191
|
-
matched: true,
|
|
192
|
-
reason: 'High-risk path modification',
|
|
193
|
-
};
|
|
194
|
-
const mockImpl = makeMockImpl('IMPL_APPROVAL');
|
|
195
|
-
|
|
196
|
-
mockedListImplementations.mockReturnValue([mockImpl] as any);
|
|
197
|
-
mockedExistsSync.mockReturnValue(true);
|
|
198
|
-
mockedReadFileSync.mockReturnValue('module.exports = {}');
|
|
199
|
-
mockedLoadRuleImplementationModule.mockReturnValue({
|
|
200
|
-
meta: { name: 'approval-test', version: '1.0.0', ruleId: 'RULE_APPROVAL', coversCondition: 'test' },
|
|
201
|
-
evaluate: () => approvalResult,
|
|
202
|
-
});
|
|
203
|
-
|
|
204
|
-
const host = new RuleHost('/mock/state');
|
|
205
|
-
const result = host.evaluate(makeInput());
|
|
206
|
-
expect(result?.decision).toBe('requireApproval');
|
|
207
|
-
expect(result?.matched).toBe(true);
|
|
208
|
-
expect(result?.reason).toBe('High-risk path modification');
|
|
209
|
-
});
|
|
210
|
-
|
|
211
|
-
it('should return undefined when all implementations return allow or matched=false', () => {
|
|
212
|
-
const allowResult: RuleHostResult = {
|
|
213
|
-
decision: 'allow',
|
|
214
|
-
matched: true,
|
|
215
|
-
reason: 'OK',
|
|
216
|
-
};
|
|
217
|
-
const unmatchedResult: RuleHostResult = {
|
|
218
|
-
decision: 'allow',
|
|
219
|
-
matched: false,
|
|
220
|
-
reason: 'Not applicable',
|
|
221
|
-
};
|
|
222
|
-
|
|
223
|
-
const impl1 = makeMockImpl('IMPL_01');
|
|
224
|
-
const impl2 = makeMockImpl('IMPL_02');
|
|
225
|
-
|
|
226
|
-
mockedListImplementations.mockReturnValue([impl1, impl2] as any);
|
|
227
|
-
mockedExistsSync.mockReturnValue(true);
|
|
228
|
-
mockedReadFileSync.mockReturnValue('module.exports = {}');
|
|
229
|
-
|
|
230
|
-
let callCount = 0;
|
|
231
|
-
mockedLoadRuleImplementationModule.mockImplementation(() => {
|
|
232
|
-
callCount++;
|
|
233
|
-
if (callCount === 1) {
|
|
234
|
-
return {
|
|
235
|
-
meta: { name: 'allow-1', version: '1.0.0', ruleId: 'RULE_01', coversCondition: 'test' },
|
|
236
|
-
evaluate: () => allowResult,
|
|
237
|
-
};
|
|
238
|
-
}
|
|
239
|
-
return {
|
|
240
|
-
meta: { name: 'unmatched-2', version: '1.0.0', ruleId: 'RULE_02', coversCondition: 'test' },
|
|
241
|
-
evaluate: () => unmatchedResult,
|
|
242
|
-
};
|
|
243
|
-
});
|
|
244
|
-
|
|
245
|
-
const host = new RuleHost('/mock/state');
|
|
246
|
-
const result = host.evaluate(makeInput());
|
|
247
|
-
expect(result).toBeUndefined();
|
|
248
|
-
});
|
|
249
|
-
|
|
250
|
-
it('should return undefined on vm load error (conservative degradation)', () => {
|
|
251
|
-
const mockImpl = makeMockImpl('IMPL_BAD');
|
|
252
|
-
mockedListImplementations.mockReturnValue([mockImpl] as any);
|
|
253
|
-
mockedExistsSync.mockReturnValue(true);
|
|
254
|
-
mockedReadFileSync.mockReturnValue('bad syntax {{{');
|
|
255
|
-
mockedLoadRuleImplementationModule.mockImplementation(() => {
|
|
256
|
-
throw new Error('Compilation failed: unexpected token');
|
|
257
|
-
});
|
|
258
|
-
|
|
259
|
-
const host = new RuleHost('/mock/state');
|
|
260
|
-
const result = host.evaluate(makeInput());
|
|
261
|
-
expect(result).toBeUndefined();
|
|
262
|
-
});
|
|
263
|
-
|
|
264
|
-
it('should use injected logger for degradation warnings instead of direct console writes', () => {
|
|
265
|
-
const warn = vi.fn();
|
|
266
|
-
const mockImpl = makeMockImpl('IMPL_BAD');
|
|
267
|
-
mockedListImplementations.mockReturnValue([mockImpl] as any);
|
|
268
|
-
mockedExistsSync.mockReturnValue(true);
|
|
269
|
-
mockedReadFileSync.mockReturnValue('bad syntax {{{');
|
|
270
|
-
mockedLoadRuleImplementationModule.mockImplementation(() => {
|
|
271
|
-
throw new Error('Compilation failed: unexpected token');
|
|
272
|
-
});
|
|
273
|
-
|
|
274
|
-
const host = new RuleHost('/mock/state', { warn });
|
|
275
|
-
const result = host.evaluate(makeInput());
|
|
276
|
-
|
|
277
|
-
expect(result).toBeUndefined();
|
|
278
|
-
expect(warn).toHaveBeenCalledWith(
|
|
279
|
-
expect.stringContaining('Failed to compile implementation IMPL_BAD')
|
|
280
|
-
);
|
|
281
|
-
});
|
|
282
|
-
|
|
283
|
-
it('should return undefined on vm execution error (conservative degradation)', () => {
|
|
284
|
-
const mockImpl = makeMockImpl('IMPL_EXEC_ERR');
|
|
285
|
-
mockedListImplementations.mockReturnValue([mockImpl] as any);
|
|
286
|
-
mockedExistsSync.mockReturnValue(true);
|
|
287
|
-
mockedReadFileSync.mockReturnValue('module.exports = {}');
|
|
288
|
-
mockedLoadRuleImplementationModule.mockReturnValue({
|
|
289
|
-
meta: { name: 'exec-err', version: '1.0.0', ruleId: 'RULE_ERR', coversCondition: 'test' },
|
|
290
|
-
evaluate: () => {
|
|
291
|
-
throw new Error('Runtime error during evaluation');
|
|
292
|
-
},
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
const host = new RuleHost('/mock/state');
|
|
296
|
-
const result = host.evaluate(makeInput());
|
|
297
|
-
expect(result).toBeUndefined();
|
|
298
|
-
});
|
|
299
|
-
|
|
300
|
-
it('should return undefined when implementation asset path does not exist', () => {
|
|
301
|
-
const mockImpl = makeMockImpl('IMPL_NOFILE');
|
|
302
|
-
mockedListImplementations.mockReturnValue([mockImpl] as any);
|
|
303
|
-
mockedExistsSync.mockReturnValue(false);
|
|
304
|
-
|
|
305
|
-
const host = new RuleHost('/mock/state');
|
|
306
|
-
const result = host.evaluate(makeInput());
|
|
307
|
-
expect(result).toBeUndefined();
|
|
308
|
-
});
|
|
309
|
-
|
|
310
|
-
it('loads code implementations from storage assets before falling back to impl.path', () => {
|
|
311
|
-
const blockResult: RuleHostResult = {
|
|
312
|
-
decision: 'block',
|
|
313
|
-
matched: true,
|
|
314
|
-
reason: 'Loaded from storage asset',
|
|
315
|
-
};
|
|
316
|
-
const mockImpl = makeMockImpl('IMPL_STORAGE');
|
|
317
|
-
|
|
318
|
-
mockedListImplementations.mockReturnValue([mockImpl] as any);
|
|
319
|
-
mockedLoadEntrySource.mockReturnValue('export const meta = {}; export function evaluate() {}');
|
|
320
|
-
mockedExistsSync.mockReturnValue(false);
|
|
321
|
-
mockedLoadRuleImplementationModule.mockReturnValue({
|
|
322
|
-
meta: { name: 'storage-test', version: '1.0.0', ruleId: 'RULE_STORAGE', coversCondition: 'test' },
|
|
323
|
-
evaluate: () => blockResult,
|
|
324
|
-
});
|
|
325
|
-
|
|
326
|
-
const host = new RuleHost('/mock/state');
|
|
327
|
-
const result = host.evaluate(makeInput());
|
|
328
|
-
|
|
329
|
-
expect(mockedLoadEntrySource).toHaveBeenCalledWith('/mock/state', 'IMPL_STORAGE');
|
|
330
|
-
expect(mockedReadFileSync).not.toHaveBeenCalled();
|
|
331
|
-
expect(result?.decision).toBe('block');
|
|
332
|
-
expect(result?.reason).toBe('Loaded from storage asset');
|
|
333
|
-
});
|
|
334
|
-
|
|
335
|
-
it('should merge multiple requireApproval results', () => {
|
|
336
|
-
const approval1: RuleHostResult = {
|
|
337
|
-
decision: 'requireApproval',
|
|
338
|
-
matched: true,
|
|
339
|
-
reason: 'Risk path',
|
|
340
|
-
diagnostics: { riskLevel: 'high' },
|
|
341
|
-
};
|
|
342
|
-
const approval2: RuleHostResult = {
|
|
343
|
-
decision: 'requireApproval',
|
|
344
|
-
matched: true,
|
|
345
|
-
reason: 'Large change',
|
|
346
|
-
diagnostics: { lineCount: 500 },
|
|
347
|
-
};
|
|
348
|
-
|
|
349
|
-
const impl1 = makeMockImpl('IMPL_01');
|
|
350
|
-
const impl2 = makeMockImpl('IMPL_02');
|
|
351
|
-
|
|
352
|
-
mockedListImplementations.mockReturnValue([impl1, impl2] as any);
|
|
353
|
-
mockedExistsSync.mockReturnValue(true);
|
|
354
|
-
mockedReadFileSync.mockReturnValue('module.exports = {}');
|
|
355
|
-
|
|
356
|
-
let callCount = 0;
|
|
357
|
-
mockedLoadRuleImplementationModule.mockImplementation(() => {
|
|
358
|
-
callCount++;
|
|
359
|
-
if (callCount === 1) {
|
|
360
|
-
return {
|
|
361
|
-
meta: { name: 'approval-1', version: '1.0.0', ruleId: 'RULE_01', coversCondition: 'test' },
|
|
362
|
-
evaluate: () => approval1,
|
|
363
|
-
};
|
|
364
|
-
}
|
|
365
|
-
return {
|
|
366
|
-
meta: { name: 'approval-2', version: '1.0.0', ruleId: 'RULE_02', coversCondition: 'test' },
|
|
367
|
-
evaluate: () => approval2,
|
|
368
|
-
};
|
|
369
|
-
});
|
|
370
|
-
|
|
371
|
-
const host = new RuleHost('/mock/state');
|
|
372
|
-
const result = host.evaluate(makeInput());
|
|
373
|
-
|
|
374
|
-
expect(result?.decision).toBe('requireApproval');
|
|
375
|
-
expect(result?.reason).toContain('Risk path');
|
|
376
|
-
expect(result?.reason).toContain('Large change');
|
|
377
|
-
expect(result?.diagnostics).toEqual({ riskLevel: 'high', lineCount: 500 });
|
|
378
|
-
});
|
|
379
|
-
|
|
380
|
-
it('should return undefined when ledger access fails', () => {
|
|
381
|
-
mockedListImplementations.mockImplementation(() => {
|
|
382
|
-
throw new Error('Ledger file not found');
|
|
383
|
-
});
|
|
384
|
-
|
|
385
|
-
const host = new RuleHost('/mock/state');
|
|
386
|
-
const result = host.evaluate(makeInput());
|
|
387
|
-
expect(result).toBeUndefined();
|
|
388
|
-
});
|
|
389
|
-
});
|
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
|
-
import * as fs from 'fs';
|
|
3
|
-
import * as os from 'os';
|
|
4
|
-
import * as path from 'path';
|
|
5
|
-
import { TrajectoryDatabase } from '../../src/core/trajectory.js';
|
|
6
|
-
|
|
7
|
-
function safeRmDir(dir: string): void {
|
|
8
|
-
try { fs.rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
describe('Trajectory — correction_rejected pain event (Phase 2b)', () => {
|
|
12
|
-
let workspaceDir: string;
|
|
13
|
-
let trajectory: TrajectoryDatabase;
|
|
14
|
-
|
|
15
|
-
beforeEach(() => {
|
|
16
|
-
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-correction-pain-'));
|
|
17
|
-
trajectory = new TrajectoryDatabase({ workspaceDir });
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
afterEach(() => {
|
|
21
|
-
trajectory?.dispose();
|
|
22
|
-
safeRmDir(workspaceDir);
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
it('emits a pain event when a correction sample is rejected', async () => {
|
|
26
|
-
// Step 1: Create a session
|
|
27
|
-
trajectory.recordSession({
|
|
28
|
-
sessionId: 'test-session-001',
|
|
29
|
-
startedAt: new Date().toISOString(),
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
// Step 2: Create an assistant turn (to be referenced)
|
|
33
|
-
const assistantTurnId = trajectory.recordAssistantTurn({
|
|
34
|
-
sessionId: 'test-session-001',
|
|
35
|
-
turnIndex: 0,
|
|
36
|
-
rawText: 'Here is some code I wrote',
|
|
37
|
-
createdAt: new Date().toISOString(),
|
|
38
|
-
});
|
|
39
|
-
|
|
40
|
-
// Step 3: Create a user turn with correction_cue (triggers auto-creation)
|
|
41
|
-
trajectory.recordUserTurn({
|
|
42
|
-
sessionId: 'test-session-001',
|
|
43
|
-
turnIndex: 1,
|
|
44
|
-
rawText: 'This is wrong. Fix it properly.',
|
|
45
|
-
correctionDetected: true,
|
|
46
|
-
correctionCue: 'This is wrong. Fix it properly.',
|
|
47
|
-
referencesAssistantTurnId: assistantTurnId,
|
|
48
|
-
createdAt: new Date().toISOString(),
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
// Wait for async sample creation
|
|
52
|
-
await new Promise(resolve => setTimeout(resolve, 50));
|
|
53
|
-
|
|
54
|
-
// Verify sample was created as pending
|
|
55
|
-
const pendingSamples = trajectory.listCorrectionSamples('pending');
|
|
56
|
-
expect(pendingSamples.length).toBe(1);
|
|
57
|
-
const sampleId = pendingSamples[0].sampleId;
|
|
58
|
-
|
|
59
|
-
// Verify no pain events yet
|
|
60
|
-
const painEventsBefore = trajectory.listPainEventsForSession('test-session-001');
|
|
61
|
-
expect(painEventsBefore.length).toBe(0);
|
|
62
|
-
|
|
63
|
-
// Step 4: Review as rejected
|
|
64
|
-
trajectory.reviewCorrectionSample(sampleId, 'rejected', 'Does not match requirements');
|
|
65
|
-
|
|
66
|
-
// Step 5: Verify pain event was created
|
|
67
|
-
const painEventsAfter = trajectory.listPainEventsForSession('test-session-001');
|
|
68
|
-
expect(painEventsAfter.length).toBe(1);
|
|
69
|
-
|
|
70
|
-
const painEvent = painEventsAfter[0];
|
|
71
|
-
expect(painEvent.source).toBe('correction_rejected');
|
|
72
|
-
expect(painEvent.reason).toContain('Correction rejected');
|
|
73
|
-
expect(painEvent.origin).toBe('system_infer');
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
it('does NOT emit a pain event when a correction sample is approved', async () => {
|
|
77
|
-
// Setup: session + assistant turn + user correction turn
|
|
78
|
-
trajectory.recordSession({
|
|
79
|
-
sessionId: 'test-session-002',
|
|
80
|
-
startedAt: new Date().toISOString(),
|
|
81
|
-
});
|
|
82
|
-
const assistantTurnId = trajectory.recordAssistantTurn({
|
|
83
|
-
sessionId: 'test-session-002',
|
|
84
|
-
turnIndex: 0,
|
|
85
|
-
rawText: 'Here is some code',
|
|
86
|
-
createdAt: new Date().toISOString(),
|
|
87
|
-
});
|
|
88
|
-
trajectory.recordUserTurn({
|
|
89
|
-
sessionId: 'test-session-002',
|
|
90
|
-
turnIndex: 1,
|
|
91
|
-
rawText: 'This needs work',
|
|
92
|
-
correctionDetected: true,
|
|
93
|
-
correctionCue: 'This needs work',
|
|
94
|
-
referencesAssistantTurnId: assistantTurnId,
|
|
95
|
-
createdAt: new Date().toISOString(),
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
// Wait for async sample creation
|
|
99
|
-
await new Promise(resolve => setTimeout(resolve, 50));
|
|
100
|
-
|
|
101
|
-
// Get pending sample
|
|
102
|
-
const pendingSamples = trajectory.listCorrectionSamples('pending');
|
|
103
|
-
expect(pendingSamples.length).toBe(1);
|
|
104
|
-
const sampleId = pendingSamples[0].sampleId;
|
|
105
|
-
|
|
106
|
-
// Review as approved - should NOT trigger pain event
|
|
107
|
-
trajectory.reviewCorrectionSample(sampleId, 'approved', 'Looks good');
|
|
108
|
-
|
|
109
|
-
// Verify NO pain event was created (approved != rejected)
|
|
110
|
-
const painEvents = trajectory.listPainEventsForSession('test-session-002');
|
|
111
|
-
expect(painEvents.length).toBe(0);
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
it('maps quality_score to pain_score correctly (0-100 range)', async () => {
|
|
115
|
-
// Setup: with quality score components
|
|
116
|
-
trajectory.recordSession({
|
|
117
|
-
sessionId: 'test-session-003',
|
|
118
|
-
startedAt: new Date().toISOString(),
|
|
119
|
-
});
|
|
120
|
-
const assistantTurnId = trajectory.recordAssistantTurn({
|
|
121
|
-
sessionId: 'test-session-003',
|
|
122
|
-
turnIndex: 0,
|
|
123
|
-
rawText: 'Code here',
|
|
124
|
-
createdAt: new Date().toISOString(),
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
// Create user turn with correction_cue (adds 20 points)
|
|
128
|
-
trajectory.recordUserTurn({
|
|
129
|
-
sessionId: 'test-session-003',
|
|
130
|
-
turnIndex: 1,
|
|
131
|
-
rawText: 'Wrong approach. Try a different algorithm.',
|
|
132
|
-
correctionDetected: true,
|
|
133
|
-
correctionCue: 'Wrong approach. Try a different algorithm.',
|
|
134
|
-
referencesAssistantTurnId: assistantTurnId,
|
|
135
|
-
createdAt: new Date().toISOString(),
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
// Add a failed tool call (adds 20 points)
|
|
139
|
-
trajectory.recordToolCall({
|
|
140
|
-
sessionId: 'test-session-003',
|
|
141
|
-
turnIndex: 2,
|
|
142
|
-
toolName: 'write',
|
|
143
|
-
toolCallIndex: 0,
|
|
144
|
-
paramsJson: { path: '/tmp/test.txt', content: 'test' },
|
|
145
|
-
outcome: 'failure',
|
|
146
|
-
errorMessage: 'Permission denied',
|
|
147
|
-
errorType: 'PermissionError',
|
|
148
|
-
createdAt: new Date().toISOString(),
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
// Add successful calls (adds 25 points)
|
|
152
|
-
trajectory.recordToolCall({
|
|
153
|
-
sessionId: 'test-session-003',
|
|
154
|
-
turnIndex: 3,
|
|
155
|
-
toolName: 'read',
|
|
156
|
-
toolCallIndex: 1,
|
|
157
|
-
paramsJson: { path: '/tmp/test.txt' },
|
|
158
|
-
outcome: 'success',
|
|
159
|
-
resultJson: { content: 'file content' },
|
|
160
|
-
createdAt: new Date().toISOString(),
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
// Wait for async sample creation
|
|
164
|
-
await new Promise(resolve => setTimeout(resolve, 50));
|
|
165
|
-
|
|
166
|
-
// Get pending sample (quality_score ~65: 20 + 20 + 25)
|
|
167
|
-
const pendingSamples = trajectory.listCorrectionSamples('pending');
|
|
168
|
-
expect(pendingSamples.length).toBe(1);
|
|
169
|
-
const sampleId = pendingSamples[0].sampleId;
|
|
170
|
-
|
|
171
|
-
// Review as rejected
|
|
172
|
-
trajectory.reviewCorrectionSample(sampleId, 'rejected', 'Test rejection');
|
|
173
|
-
|
|
174
|
-
// Verify pain score is clamped to 0-100
|
|
175
|
-
const painEvents = trajectory.listPainEventsForSession('test-session-003');
|
|
176
|
-
expect(painEvents.length).toBe(1);
|
|
177
|
-
expect(painEvents[0].score).toBeGreaterThanOrEqual(0);
|
|
178
|
-
expect(painEvents[0].score).toBeLessThanOrEqual(100);
|
|
179
|
-
});
|
|
180
|
-
});
|