principles-disciple 1.32.0 → 1.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/openclaw.plugin.json +4 -4
  2. package/package.json +1 -1
  3. package/src/core/correction-cue-learner.ts +203 -0
  4. package/src/core/correction-types.ts +88 -0
  5. package/src/core/evolution-logger.ts +3 -3
  6. package/src/core/init.ts +67 -0
  7. package/src/service/correction-observer-types.ts +58 -0
  8. package/src/service/correction-observer-workflow-manager.ts +218 -0
  9. package/src/service/evolution-worker.ts +172 -146
  10. package/src/service/nocturnal-service.ts +4 -1
  11. package/src/service/subagent-workflow/index.ts +14 -0
  12. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +3 -1
  13. package/tests/service/evolution-worker.nocturnal.test.ts +14 -1
  14. package/tests/service/evolution-worker.timeout.test.ts +350 -0
  15. package/tests/commands/implementation-lifecycle.test.ts +0 -362
  16. package/tests/core/detection-funnel.test.ts +0 -63
  17. package/tests/core/evolution-e2e.test.ts +0 -58
  18. package/tests/core/evolution-engine-gate-integration.test.ts +0 -543
  19. package/tests/core/evolution-engine.test.ts +0 -562
  20. package/tests/core/evolution-reducer.test.ts +0 -180
  21. package/tests/core/evolution-user-stories.e2e.test.ts +0 -249
  22. package/tests/core/local-worker-routing.test.ts +0 -757
  23. package/tests/core/rule-host.test.ts +0 -389
  24. package/tests/core/trajectory-correction-pain.test.ts +0 -180
  25. package/tests/hooks/gate-edit-verification.test.ts +0 -435
  26. package/tests/hooks/llm.test.ts +0 -308
  27. package/tests/hooks/progressive-trust-gate.test.ts +0 -277
  28. package/tests/hooks/prompt.test.ts +0 -1473
  29. package/tests/index.integration.test.ts +0 -179
  30. package/tests/index.shadow-routing.integration.test.ts +0 -140
  31. package/tests/service/evolution-worker.test.ts +0 -462
  32. package/tests/service/nocturnal-service.test.ts +0 -577
  33. package/tests/service/nocturnal-workflow-manager.test.ts +0 -441
  34. package/tests/tools/critique-prompt.test.ts +0 -260
  35. package/tests/tools/deep-reflect.test.ts +0 -232
  36. package/tests/tools/model-index.test.ts +0 -246
  37. package/tests/ui/app.test.tsx +0 -114
@@ -1,462 +0,0 @@
1
- import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
- import {
3
- EvolutionWorkerService,
4
- createEvolutionTaskId,
5
- extractEvolutionTaskId,
6
- hasRecentDuplicateTask,
7
- hasEquivalentPromotedRule,
8
- registerEvolutionTaskSession,
9
- } from '../../src/service/evolution-worker.js';
10
- import { DictionaryService } from '../../src/core/dictionary-service.js';
11
- import * as sessionTracker from '../../src/core/session-tracker.js';
12
- import * as eventLog from '../../src/core/event-log.js';
13
- import * as fs from 'fs';
14
- import * as os from 'os';
15
- import * as path from 'path';
16
- import { evaluatePhase3Inputs } from '../../src/service/phase3-input-filter.js';
17
- import { safeRmDir } from '../test-utils.js';
18
-
19
- vi.mock('../../src/core/dictionary-service');
20
- vi.mock('../../src/core/session-tracker', () => ({
21
- initPersistence: vi.fn(),
22
- flushAllSessions: vi.fn(),
23
- listSessions: vi.fn(() => []), // Returns empty sessions for idle detection
24
- }));
25
-
26
- describe('EvolutionWorkerService', () => {
27
- beforeEach(() => {
28
- vi.useFakeTimers();
29
- });
30
-
31
- afterEach(() => {
32
- vi.useRealTimers();
33
- vi.clearAllMocks();
34
- });
35
-
36
-
37
- it('should detect recent duplicate tasks by source and preview', () => {
38
- const now = new Date('2026-03-18T00:30:00.000Z').getTime();
39
- const queue = [
40
- {
41
- id: 'a1',
42
- score: 50,
43
- source: 'llm_p_frustration_023',
44
- reason: 'pain',
45
- trigger_text_preview: '[EVOLUTION_ACK] 有失败记录',
46
- timestamp: '2026-03-18T00:10:00.000Z',
47
- status: 'pending',
48
- },
49
- ];
50
-
51
- expect(hasRecentDuplicateTask(queue as any, 'llm_p_frustration_023', '[EVOLUTION_ACK] 有失败记录', now, 'pain')).toBe(true);
52
- expect(hasRecentDuplicateTask(queue as any, 'llm_p_frustration_023', 'different preview', now, 'pain')).toBe(false);
53
- // Different reason should not be considered duplicate
54
- expect(hasRecentDuplicateTask(queue as any, 'llm_p_frustration_023', '[EVOLUTION_ACK] 有失败记录', now, 'different_reason')).toBe(false);
55
- });
56
-
57
- it('should skip promoting duplicate exact-match rules', () => {
58
- const dictionary = {
59
- getAllRules: () => ({
60
- EXISTING: {
61
- type: 'exact_match',
62
- phrases: ['Need more evidence'],
63
- status: 'active',
64
- },
65
- }),
66
- };
67
-
68
- expect(hasEquivalentPromotedRule(dictionary as any, 'Need more evidence')).toBe(true);
69
- expect(hasEquivalentPromotedRule(dictionary as any, 'Another phrase')).toBe(false);
70
- });
71
-
72
- it('should generate distinct ids for different pain reasons with the same preview', () => {
73
- const now = new Date('2026-03-20T06:38:32.222Z').getTime();
74
-
75
- const idA = createEvolutionTaskId(
76
- 'tool_failure',
77
- 50,
78
- '',
79
- 'Tool edit failed on memory/.scratchpad.md',
80
- now
81
- );
82
- const idB = createEvolutionTaskId(
83
- 'tool_failure',
84
- 50,
85
- '',
86
- 'Tool edit failed on MEMORY.md',
87
- now
88
- );
89
-
90
- expect(idA).not.toBe(idB);
91
- });
92
-
93
- it('should extract evolution task ids from diagnostician payloads', () => {
94
- expect(extractEvolutionTaskId('Diagnose systemic pain [ID: ab12cd34]. Source: tool_failure.')).toBe('ab12cd34');
95
- expect(extractEvolutionTaskId('plain task without id')).toBeNull();
96
- });
97
-
98
- it('should register assigned diagnostician session on the matching in-progress task', async () => {
99
- const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-evolution-session-'));
100
- const queuePath = path.join(dir, 'evolution_queue.json');
101
-
102
- fs.writeFileSync(queuePath, JSON.stringify([
103
- { id: 'task-a', status: 'pending', score: 40, source: 'pain', reason: 'a', timestamp: '2026-03-20T00:00:00.000Z' },
104
- { id: 'task-b', status: 'in_progress', score: 80, source: 'pain', reason: 'b', timestamp: '2026-03-20T00:00:00.000Z' }
105
- ], null, 2), 'utf8');
106
-
107
- try {
108
- const registered = await registerEvolutionTaskSession(
109
- () => queuePath,
110
- 'task-b',
111
- 'agent:diagnostician:session-1',
112
- { warn: vi.fn() }
113
- );
114
-
115
- expect(registered).toBe(true);
116
- const saved = JSON.parse(fs.readFileSync(queuePath, 'utf8'));
117
- expect(saved[1].assigned_session_key).toBe('agent:diagnostician:session-1');
118
- expect(saved[1].started_at).toBeDefined();
119
- } finally {
120
- safeRmDir(dir);
121
- }
122
- });
123
-
124
- it('should process queue work without persisting a legacy directive file', async () => {
125
- const mockDict = {
126
- flush: vi.fn()
127
- };
128
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
129
-
130
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-evolution-worker-'));
131
- const stateDir = path.join(workspaceDir, '.state');
132
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
133
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
134
- fs.writeFileSync(
135
- path.join(stateDir, 'evolution_queue.json'),
136
- JSON.stringify([
137
- { id: 'task-1', score: 90, source: 'tool_failure', reason: 'write failed', timestamp: '2026-03-20T00:00:00.000Z', status: 'pending' },
138
- ], null, 2),
139
- 'utf8'
140
- );
141
-
142
- const ctx = {
143
- workspaceDir,
144
- stateDir,
145
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
146
- };
147
-
148
- try {
149
- EvolutionWorkerService.start(ctx as any);
150
-
151
- await vi.advanceTimersByTimeAsync(5000);
152
-
153
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
154
- expect(queue[0].status).toBe('in_progress');
155
- expect(fs.existsSync(path.join(stateDir, 'evolution_directive.json'))).toBe(false);
156
- } finally {
157
- EvolutionWorkerService.stop(ctx as any);
158
- safeRmDir(workspaceDir);
159
- }
160
- });
161
-
162
- describe('sleep_reflection stuck in_progress recovery', () => {
163
- it('should recover stuck in_progress sleep_reflection tasks older than timeout', async () => {
164
- const mockDict = {
165
- flush: vi.fn()
166
- };
167
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
168
-
169
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-sleep-recovery-'));
170
- const stateDir = path.join(workspaceDir, '.state');
171
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
172
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
173
-
174
- // Create a sleep_reflection task that's been in_progress for 2 hours
175
- const twoHoursAgo = new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString();
176
- fs.writeFileSync(
177
- path.join(stateDir, 'evolution_queue.json'),
178
- JSON.stringify([
179
- {
180
- id: 'sleep-stuck',
181
- taskKind: 'sleep_reflection',
182
- priority: 'medium',
183
- score: 50,
184
- source: 'nocturnal',
185
- reason: 'Sleep-mode reflection',
186
- trigger_text_preview: 'Idle workspace detected',
187
- timestamp: twoHoursAgo,
188
- enqueued_at: twoHoursAgo,
189
- started_at: twoHoursAgo,
190
- status: 'in_progress',
191
- traceId: 'sleep-stuck',
192
- retryCount: 0,
193
- maxRetries: 1,
194
- },
195
- ], null, 2),
196
- 'utf8'
197
- );
198
-
199
- const ctx = {
200
- workspaceDir,
201
- stateDir,
202
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
203
- };
204
-
205
- try {
206
- EvolutionWorkerService.start(ctx as any);
207
- await vi.advanceTimersByTimeAsync(5000);
208
-
209
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
210
- expect(queue[0].status).toBe('failed');
211
- expect(queue[0].resolution).toBe('failed_max_retries');
212
- expect(queue[0].completed_at).toBeDefined();
213
- expect(queue[0].lastError).toContain('timed out');
214
- } finally {
215
- EvolutionWorkerService.stop(ctx as any);
216
- safeRmDir(workspaceDir);
217
- }
218
- });
219
-
220
- it('should not recover sleep_reflection tasks within timeout', async () => {
221
- const mockDict = { flush: vi.fn() };
222
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
223
-
224
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-sleep-recent-'));
225
- const stateDir = path.join(workspaceDir, '.state');
226
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
227
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
228
-
229
- // Task started 10 minutes ago — well within 1-hour timeout
230
- const tenMinutesAgo = new Date(Date.now() - 10 * 60 * 1000).toISOString();
231
- fs.writeFileSync(
232
- path.join(stateDir, 'evolution_queue.json'),
233
- JSON.stringify([
234
- {
235
- id: 'sleep-recent',
236
- taskKind: 'sleep_reflection',
237
- priority: 'medium',
238
- score: 50,
239
- source: 'nocturnal',
240
- reason: 'Sleep-mode reflection',
241
- trigger_text_preview: 'Idle workspace detected',
242
- timestamp: tenMinutesAgo,
243
- enqueued_at: tenMinutesAgo,
244
- started_at: tenMinutesAgo,
245
- status: 'in_progress',
246
- traceId: 'sleep-recent',
247
- retryCount: 0,
248
- maxRetries: 1,
249
- },
250
- ], null, 2),
251
- 'utf8'
252
- );
253
-
254
- const ctx = {
255
- workspaceDir,
256
- stateDir,
257
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
258
- };
259
-
260
- try {
261
- EvolutionWorkerService.start(ctx as any);
262
- await vi.advanceTimersByTimeAsync(5000);
263
-
264
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
265
- // Still in_progress — not old enough to recover
266
- expect(queue[0].status).toBe('in_progress');
267
- } finally {
268
- EvolutionWorkerService.stop(ctx as any);
269
- safeRmDir(workspaceDir);
270
- }
271
- });
272
-
273
- it('should not affect pain_diagnosis in_progress timeout logic', async () => {
274
- const mockDict = { flush: vi.fn() };
275
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
276
-
277
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-pain-unchanged-'));
278
- const stateDir = path.join(workspaceDir, '.state');
279
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
280
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
281
-
282
- // pain_diagnosis task that's been in_progress for 2 hours — should be auto-completed
283
- const twoHoursAgo = new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString();
284
- fs.writeFileSync(
285
- path.join(stateDir, 'evolution_queue.json'),
286
- JSON.stringify([
287
- {
288
- id: 'pain-old',
289
- taskKind: 'pain_diagnosis',
290
- priority: 'high',
291
- score: 90,
292
- source: 'tool_failure',
293
- reason: 'write failed',
294
- trigger_text_preview: 'Tool edit failed',
295
- timestamp: twoHoursAgo,
296
- enqueued_at: twoHoursAgo,
297
- started_at: twoHoursAgo,
298
- status: 'in_progress',
299
- traceId: 'pain-old',
300
- retryCount: 0,
301
- maxRetries: 3,
302
- },
303
- ], null, 2),
304
- 'utf8'
305
- );
306
-
307
- const ctx = {
308
- workspaceDir,
309
- stateDir,
310
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
311
- };
312
-
313
- try {
314
- EvolutionWorkerService.start(ctx as any);
315
- await vi.advanceTimersByTimeAsync(5000);
316
-
317
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
318
- // pain_diagnosis uses auto_completed_timeout, NOT failed
319
- expect(queue[0].status).toBe('completed');
320
- expect(queue[0].resolution).toBe('auto_completed_timeout');
321
- } finally {
322
- EvolutionWorkerService.stop(ctx as any);
323
- safeRmDir(workspaceDir);
324
- }
325
- });
326
- });
327
-
328
- describe('Phase 3 Eligibility - Queue Only (Trust Removed)', () => {
329
- it('makes queue eligible when tasks are valid', () => {
330
- const result = evaluatePhase3Inputs(
331
- [{ id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' }]
332
- );
333
-
334
- expect(result.phase3ShadowEligible).toBe(true);
335
- expect(result.queueTruthReady).toBe(true);
336
- expect(result.evolution.eligible).toHaveLength(1);
337
- expect(result.evolution.rejected).toHaveLength(0);
338
- });
339
-
340
- it('makes queue eligible when directive is stale (production scenario)', () => {
341
- const result = evaluatePhase3Inputs(
342
- [{ id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' }]
343
- );
344
-
345
- expect(result.phase3ShadowEligible).toBe(true);
346
- expect(result.queueTruthReady).toBe(true);
347
- });
348
-
349
- it('rejects empty queue', () => {
350
- const result = evaluatePhase3Inputs([]);
351
-
352
- expect(result.phase3ShadowEligible).toBe(false);
353
- expect(result.queueTruthReady).toBe(false);
354
- });
355
-
356
- it('rejects invalid queue status', () => {
357
- const result = evaluatePhase3Inputs(
358
- [{ id: 'task-1', status: 'invalid' }]
359
- );
360
-
361
- expect(result.phase3ShadowEligible).toBe(false);
362
- expect(result.queueTruthReady).toBe(false);
363
- expect(result.evolution.rejected[0].reasons).toContain('invalid_status');
364
- });
365
-
366
- it('eligible requires queue with valid completed tasks', () => {
367
- const result = evaluatePhase3Inputs(
368
- [{ id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' }]
369
- );
370
-
371
- expect(result.phase3ShadowEligible).toBe(true);
372
- expect(result.queueTruthReady).toBe(true);
373
- });
374
-
375
- it('does not accept directive as a parameter (API design)', () => {
376
- const func = evaluatePhase3Inputs;
377
- const funcString = func.toString();
378
-
379
- expect(funcString).not.toMatch(/directive\s*:/);
380
- expect(funcString).not.toMatch(/directive\s*\)/);
381
- });
382
-
383
- it('handles multiple queue items correctly', () => {
384
- const result = evaluatePhase3Inputs([
385
- { id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' },
386
- { id: 'task-2', status: 'in_progress', started_at: '2026-03-25T11:00:00.000Z' },
387
- { id: 'task-3', status: 'pending' }
388
- ]);
389
-
390
- expect(result.phase3ShadowEligible).toBe(true);
391
- expect(result.evolution.eligible).toHaveLength(3);
392
- expect(result.evolution.rejected).toHaveLength(0);
393
- });
394
- });
395
- });
396
-
397
- // ── P0-3 / P1: purgeStaleFailedTasks tests ──
398
-
399
- import { purgeStaleFailedTasks } from '../../src/service/evolution-worker.js';
400
-
401
- describe('purgeStaleFailedTasks', () => {
402
- const makeTask = (id: string, status: string, hoursAgo: number) => ({
403
- id,
404
- taskKind: 'sleep_reflection' as const,
405
- status,
406
- timestamp: new Date(Date.now() - hoursAgo * 60 * 60 * 1000).toISOString(),
407
- enqueued_at: new Date(Date.now() - hoursAgo * 60 * 60 * 1000).toISOString(),
408
- source: 'test',
409
- score: 50,
410
- reason: 'test',
411
- retryCount: 1,
412
- maxRetries: 1,
413
- lastError: 'Nocturnal reflection failed: no_evaluable_principles',
414
- resolution: 'failed_max_retries' as const,
415
- });
416
-
417
- it('should purge failed tasks older than 24 hours', () => {
418
- const queue: any[] = [
419
- makeTask('old-1', 'failed', 30), // 30h old — should be purged
420
- makeTask('old-2', 'failed', 48), // 48h old — should be purged
421
- makeTask('recent', 'failed', 12), // 12h old — should be kept
422
- makeTask('pending', 'pending', 1),
423
- ];
424
-
425
- const result = purgeStaleFailedTasks(queue, console as any);
426
-
427
- expect(result.purged).toBe(2);
428
- expect(result.remaining).toBe(2);
429
- expect(queue.length).toBe(2);
430
- expect(queue.find((t) => t.id === 'old-1')).toBeUndefined();
431
- expect(queue.find((t) => t.id === 'recent')).toBeDefined();
432
- expect(queue.find((t) => t.id === 'pending')).toBeDefined();
433
- });
434
-
435
- it('should not purge non-failed tasks regardless of age', () => {
436
- const queue: any[] = [
437
- makeTask('old-completed', 'completed', 72),
438
- makeTask('old-pending', 'pending', 72),
439
- makeTask('old-in-progress', 'in_progress', 72),
440
- ];
441
-
442
- const result = purgeStaleFailedTasks(queue, console as any);
443
-
444
- expect(result.purged).toBe(0);
445
- expect(result.remaining).toBe(3);
446
- expect(queue.length).toBe(3);
447
- });
448
-
449
- it('should group purge results by failure reason', () => {
450
- const queue: any[] = [
451
- { ...makeTask('fail-1', 'failed', 30), lastError: 'Nocturnal reflection failed: no_evaluable_principles' },
452
- { ...makeTask('fail-2', 'failed', 30), lastError: 'Nocturnal reflection failed: no_evaluable_principles' },
453
- { ...makeTask('fail-3', 'failed', 30), lastError: 'Nocturnal reflection failed: validation_failed' },
454
- ];
455
-
456
- const result = purgeStaleFailedTasks(queue, console as any);
457
-
458
- expect(result.purged).toBe(3);
459
- expect(result.byReason['Nocturnal reflection failed: no_evaluable_principles']).toBe(2);
460
- expect(result.byReason['Nocturnal reflection failed: validation_failed']).toBe(1);
461
- });
462
- });