principles-disciple 1.32.0 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/openclaw.plugin.json +1 -1
  2. package/package.json +1 -1
  3. package/src/core/correction-cue-learner.ts +203 -0
  4. package/src/core/correction-types.ts +88 -0
  5. package/src/core/init.ts +67 -0
  6. package/src/service/correction-observer-types.ts +58 -0
  7. package/src/service/correction-observer-workflow-manager.ts +218 -0
  8. package/src/service/evolution-worker.ts +161 -140
  9. package/src/service/nocturnal-service.ts +4 -1
  10. package/src/service/subagent-workflow/index.ts +14 -0
  11. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +3 -1
  12. package/tests/service/evolution-worker.nocturnal.test.ts +14 -1
  13. package/tests/commands/implementation-lifecycle.test.ts +0 -362
  14. package/tests/core/detection-funnel.test.ts +0 -63
  15. package/tests/core/evolution-e2e.test.ts +0 -58
  16. package/tests/core/evolution-engine-gate-integration.test.ts +0 -543
  17. package/tests/core/evolution-engine.test.ts +0 -562
  18. package/tests/core/evolution-reducer.test.ts +0 -180
  19. package/tests/core/evolution-user-stories.e2e.test.ts +0 -249
  20. package/tests/core/local-worker-routing.test.ts +0 -757
  21. package/tests/core/rule-host.test.ts +0 -389
  22. package/tests/core/trajectory-correction-pain.test.ts +0 -180
  23. package/tests/hooks/gate-edit-verification.test.ts +0 -435
  24. package/tests/hooks/llm.test.ts +0 -308
  25. package/tests/hooks/progressive-trust-gate.test.ts +0 -277
  26. package/tests/hooks/prompt.test.ts +0 -1473
  27. package/tests/index.integration.test.ts +0 -179
  28. package/tests/index.shadow-routing.integration.test.ts +0 -140
  29. package/tests/service/evolution-worker.test.ts +0 -462
  30. package/tests/service/nocturnal-service.test.ts +0 -577
  31. package/tests/service/nocturnal-workflow-manager.test.ts +0 -441
  32. package/tests/tools/critique-prompt.test.ts +0 -260
  33. package/tests/tools/deep-reflect.test.ts +0 -232
  34. package/tests/tools/model-index.test.ts +0 -246
  35. package/tests/ui/app.test.tsx +0 -114
@@ -1,462 +0,0 @@
1
- import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
- import {
3
- EvolutionWorkerService,
4
- createEvolutionTaskId,
5
- extractEvolutionTaskId,
6
- hasRecentDuplicateTask,
7
- hasEquivalentPromotedRule,
8
- registerEvolutionTaskSession,
9
- } from '../../src/service/evolution-worker.js';
10
- import { DictionaryService } from '../../src/core/dictionary-service.js';
11
- import * as sessionTracker from '../../src/core/session-tracker.js';
12
- import * as eventLog from '../../src/core/event-log.js';
13
- import * as fs from 'fs';
14
- import * as os from 'os';
15
- import * as path from 'path';
16
- import { evaluatePhase3Inputs } from '../../src/service/phase3-input-filter.js';
17
- import { safeRmDir } from '../test-utils.js';
18
-
19
- vi.mock('../../src/core/dictionary-service');
20
- vi.mock('../../src/core/session-tracker', () => ({
21
- initPersistence: vi.fn(),
22
- flushAllSessions: vi.fn(),
23
- listSessions: vi.fn(() => []), // Returns empty sessions for idle detection
24
- }));
25
-
26
- describe('EvolutionWorkerService', () => {
27
- beforeEach(() => {
28
- vi.useFakeTimers();
29
- });
30
-
31
- afterEach(() => {
32
- vi.useRealTimers();
33
- vi.clearAllMocks();
34
- });
35
-
36
-
37
- it('should detect recent duplicate tasks by source and preview', () => {
38
- const now = new Date('2026-03-18T00:30:00.000Z').getTime();
39
- const queue = [
40
- {
41
- id: 'a1',
42
- score: 50,
43
- source: 'llm_p_frustration_023',
44
- reason: 'pain',
45
- trigger_text_preview: '[EVOLUTION_ACK] 有失败记录',
46
- timestamp: '2026-03-18T00:10:00.000Z',
47
- status: 'pending',
48
- },
49
- ];
50
-
51
- expect(hasRecentDuplicateTask(queue as any, 'llm_p_frustration_023', '[EVOLUTION_ACK] 有失败记录', now, 'pain')).toBe(true);
52
- expect(hasRecentDuplicateTask(queue as any, 'llm_p_frustration_023', 'different preview', now, 'pain')).toBe(false);
53
- // Different reason should not be considered duplicate
54
- expect(hasRecentDuplicateTask(queue as any, 'llm_p_frustration_023', '[EVOLUTION_ACK] 有失败记录', now, 'different_reason')).toBe(false);
55
- });
56
-
57
- it('should skip promoting duplicate exact-match rules', () => {
58
- const dictionary = {
59
- getAllRules: () => ({
60
- EXISTING: {
61
- type: 'exact_match',
62
- phrases: ['Need more evidence'],
63
- status: 'active',
64
- },
65
- }),
66
- };
67
-
68
- expect(hasEquivalentPromotedRule(dictionary as any, 'Need more evidence')).toBe(true);
69
- expect(hasEquivalentPromotedRule(dictionary as any, 'Another phrase')).toBe(false);
70
- });
71
-
72
- it('should generate distinct ids for different pain reasons with the same preview', () => {
73
- const now = new Date('2026-03-20T06:38:32.222Z').getTime();
74
-
75
- const idA = createEvolutionTaskId(
76
- 'tool_failure',
77
- 50,
78
- '',
79
- 'Tool edit failed on memory/.scratchpad.md',
80
- now
81
- );
82
- const idB = createEvolutionTaskId(
83
- 'tool_failure',
84
- 50,
85
- '',
86
- 'Tool edit failed on MEMORY.md',
87
- now
88
- );
89
-
90
- expect(idA).not.toBe(idB);
91
- });
92
-
93
- it('should extract evolution task ids from diagnostician payloads', () => {
94
- expect(extractEvolutionTaskId('Diagnose systemic pain [ID: ab12cd34]. Source: tool_failure.')).toBe('ab12cd34');
95
- expect(extractEvolutionTaskId('plain task without id')).toBeNull();
96
- });
97
-
98
- it('should register assigned diagnostician session on the matching in-progress task', async () => {
99
- const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-evolution-session-'));
100
- const queuePath = path.join(dir, 'evolution_queue.json');
101
-
102
- fs.writeFileSync(queuePath, JSON.stringify([
103
- { id: 'task-a', status: 'pending', score: 40, source: 'pain', reason: 'a', timestamp: '2026-03-20T00:00:00.000Z' },
104
- { id: 'task-b', status: 'in_progress', score: 80, source: 'pain', reason: 'b', timestamp: '2026-03-20T00:00:00.000Z' }
105
- ], null, 2), 'utf8');
106
-
107
- try {
108
- const registered = await registerEvolutionTaskSession(
109
- () => queuePath,
110
- 'task-b',
111
- 'agent:diagnostician:session-1',
112
- { warn: vi.fn() }
113
- );
114
-
115
- expect(registered).toBe(true);
116
- const saved = JSON.parse(fs.readFileSync(queuePath, 'utf8'));
117
- expect(saved[1].assigned_session_key).toBe('agent:diagnostician:session-1');
118
- expect(saved[1].started_at).toBeDefined();
119
- } finally {
120
- safeRmDir(dir);
121
- }
122
- });
123
-
124
- it('should process queue work without persisting a legacy directive file', async () => {
125
- const mockDict = {
126
- flush: vi.fn()
127
- };
128
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
129
-
130
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-evolution-worker-'));
131
- const stateDir = path.join(workspaceDir, '.state');
132
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
133
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
134
- fs.writeFileSync(
135
- path.join(stateDir, 'evolution_queue.json'),
136
- JSON.stringify([
137
- { id: 'task-1', score: 90, source: 'tool_failure', reason: 'write failed', timestamp: '2026-03-20T00:00:00.000Z', status: 'pending' },
138
- ], null, 2),
139
- 'utf8'
140
- );
141
-
142
- const ctx = {
143
- workspaceDir,
144
- stateDir,
145
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
146
- };
147
-
148
- try {
149
- EvolutionWorkerService.start(ctx as any);
150
-
151
- await vi.advanceTimersByTimeAsync(5000);
152
-
153
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
154
- expect(queue[0].status).toBe('in_progress');
155
- expect(fs.existsSync(path.join(stateDir, 'evolution_directive.json'))).toBe(false);
156
- } finally {
157
- EvolutionWorkerService.stop(ctx as any);
158
- safeRmDir(workspaceDir);
159
- }
160
- });
161
-
162
- describe('sleep_reflection stuck in_progress recovery', () => {
163
- it('should recover stuck in_progress sleep_reflection tasks older than timeout', async () => {
164
- const mockDict = {
165
- flush: vi.fn()
166
- };
167
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
168
-
169
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-sleep-recovery-'));
170
- const stateDir = path.join(workspaceDir, '.state');
171
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
172
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
173
-
174
- // Create a sleep_reflection task that's been in_progress for 2 hours
175
- const twoHoursAgo = new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString();
176
- fs.writeFileSync(
177
- path.join(stateDir, 'evolution_queue.json'),
178
- JSON.stringify([
179
- {
180
- id: 'sleep-stuck',
181
- taskKind: 'sleep_reflection',
182
- priority: 'medium',
183
- score: 50,
184
- source: 'nocturnal',
185
- reason: 'Sleep-mode reflection',
186
- trigger_text_preview: 'Idle workspace detected',
187
- timestamp: twoHoursAgo,
188
- enqueued_at: twoHoursAgo,
189
- started_at: twoHoursAgo,
190
- status: 'in_progress',
191
- traceId: 'sleep-stuck',
192
- retryCount: 0,
193
- maxRetries: 1,
194
- },
195
- ], null, 2),
196
- 'utf8'
197
- );
198
-
199
- const ctx = {
200
- workspaceDir,
201
- stateDir,
202
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
203
- };
204
-
205
- try {
206
- EvolutionWorkerService.start(ctx as any);
207
- await vi.advanceTimersByTimeAsync(5000);
208
-
209
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
210
- expect(queue[0].status).toBe('failed');
211
- expect(queue[0].resolution).toBe('failed_max_retries');
212
- expect(queue[0].completed_at).toBeDefined();
213
- expect(queue[0].lastError).toContain('timed out');
214
- } finally {
215
- EvolutionWorkerService.stop(ctx as any);
216
- safeRmDir(workspaceDir);
217
- }
218
- });
219
-
220
- it('should not recover sleep_reflection tasks within timeout', async () => {
221
- const mockDict = { flush: vi.fn() };
222
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
223
-
224
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-sleep-recent-'));
225
- const stateDir = path.join(workspaceDir, '.state');
226
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
227
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
228
-
229
- // Task started 10 minutes ago — well within 1-hour timeout
230
- const tenMinutesAgo = new Date(Date.now() - 10 * 60 * 1000).toISOString();
231
- fs.writeFileSync(
232
- path.join(stateDir, 'evolution_queue.json'),
233
- JSON.stringify([
234
- {
235
- id: 'sleep-recent',
236
- taskKind: 'sleep_reflection',
237
- priority: 'medium',
238
- score: 50,
239
- source: 'nocturnal',
240
- reason: 'Sleep-mode reflection',
241
- trigger_text_preview: 'Idle workspace detected',
242
- timestamp: tenMinutesAgo,
243
- enqueued_at: tenMinutesAgo,
244
- started_at: tenMinutesAgo,
245
- status: 'in_progress',
246
- traceId: 'sleep-recent',
247
- retryCount: 0,
248
- maxRetries: 1,
249
- },
250
- ], null, 2),
251
- 'utf8'
252
- );
253
-
254
- const ctx = {
255
- workspaceDir,
256
- stateDir,
257
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
258
- };
259
-
260
- try {
261
- EvolutionWorkerService.start(ctx as any);
262
- await vi.advanceTimersByTimeAsync(5000);
263
-
264
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
265
- // Still in_progress — not old enough to recover
266
- expect(queue[0].status).toBe('in_progress');
267
- } finally {
268
- EvolutionWorkerService.stop(ctx as any);
269
- safeRmDir(workspaceDir);
270
- }
271
- });
272
-
273
- it('should not affect pain_diagnosis in_progress timeout logic', async () => {
274
- const mockDict = { flush: vi.fn() };
275
- vi.mocked(DictionaryService.get).mockReturnValue(mockDict as any);
276
-
277
- const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-pain-unchanged-'));
278
- const stateDir = path.join(workspaceDir, '.state');
279
- fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
280
- fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
281
-
282
- // pain_diagnosis task that's been in_progress for 2 hours — should be auto-completed
283
- const twoHoursAgo = new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString();
284
- fs.writeFileSync(
285
- path.join(stateDir, 'evolution_queue.json'),
286
- JSON.stringify([
287
- {
288
- id: 'pain-old',
289
- taskKind: 'pain_diagnosis',
290
- priority: 'high',
291
- score: 90,
292
- source: 'tool_failure',
293
- reason: 'write failed',
294
- trigger_text_preview: 'Tool edit failed',
295
- timestamp: twoHoursAgo,
296
- enqueued_at: twoHoursAgo,
297
- started_at: twoHoursAgo,
298
- status: 'in_progress',
299
- traceId: 'pain-old',
300
- retryCount: 0,
301
- maxRetries: 3,
302
- },
303
- ], null, 2),
304
- 'utf8'
305
- );
306
-
307
- const ctx = {
308
- workspaceDir,
309
- stateDir,
310
- logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }
311
- };
312
-
313
- try {
314
- EvolutionWorkerService.start(ctx as any);
315
- await vi.advanceTimersByTimeAsync(5000);
316
-
317
- const queue = JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
318
- // pain_diagnosis uses auto_completed_timeout, NOT failed
319
- expect(queue[0].status).toBe('completed');
320
- expect(queue[0].resolution).toBe('auto_completed_timeout');
321
- } finally {
322
- EvolutionWorkerService.stop(ctx as any);
323
- safeRmDir(workspaceDir);
324
- }
325
- });
326
- });
327
-
328
- describe('Phase 3 Eligibility - Queue Only (Trust Removed)', () => {
329
- it('makes queue eligible when tasks are valid', () => {
330
- const result = evaluatePhase3Inputs(
331
- [{ id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' }]
332
- );
333
-
334
- expect(result.phase3ShadowEligible).toBe(true);
335
- expect(result.queueTruthReady).toBe(true);
336
- expect(result.evolution.eligible).toHaveLength(1);
337
- expect(result.evolution.rejected).toHaveLength(0);
338
- });
339
-
340
- it('makes queue eligible when directive is stale (production scenario)', () => {
341
- const result = evaluatePhase3Inputs(
342
- [{ id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' }]
343
- );
344
-
345
- expect(result.phase3ShadowEligible).toBe(true);
346
- expect(result.queueTruthReady).toBe(true);
347
- });
348
-
349
- it('rejects empty queue', () => {
350
- const result = evaluatePhase3Inputs([]);
351
-
352
- expect(result.phase3ShadowEligible).toBe(false);
353
- expect(result.queueTruthReady).toBe(false);
354
- });
355
-
356
- it('rejects invalid queue status', () => {
357
- const result = evaluatePhase3Inputs(
358
- [{ id: 'task-1', status: 'invalid' }]
359
- );
360
-
361
- expect(result.phase3ShadowEligible).toBe(false);
362
- expect(result.queueTruthReady).toBe(false);
363
- expect(result.evolution.rejected[0].reasons).toContain('invalid_status');
364
- });
365
-
366
- it('eligible requires queue with valid completed tasks', () => {
367
- const result = evaluatePhase3Inputs(
368
- [{ id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' }]
369
- );
370
-
371
- expect(result.phase3ShadowEligible).toBe(true);
372
- expect(result.queueTruthReady).toBe(true);
373
- });
374
-
375
- it('does not accept directive as a parameter (API design)', () => {
376
- const func = evaluatePhase3Inputs;
377
- const funcString = func.toString();
378
-
379
- expect(funcString).not.toMatch(/directive\s*:/);
380
- expect(funcString).not.toMatch(/directive\s*\)/);
381
- });
382
-
383
- it('handles multiple queue items correctly', () => {
384
- const result = evaluatePhase3Inputs([
385
- { id: 'task-1', status: 'completed', completed_at: '2026-03-25T10:00:00.000Z' },
386
- { id: 'task-2', status: 'in_progress', started_at: '2026-03-25T11:00:00.000Z' },
387
- { id: 'task-3', status: 'pending' }
388
- ]);
389
-
390
- expect(result.phase3ShadowEligible).toBe(true);
391
- expect(result.evolution.eligible).toHaveLength(3);
392
- expect(result.evolution.rejected).toHaveLength(0);
393
- });
394
- });
395
- });
396
-
397
- // ── P0-3 / P1: purgeStaleFailedTasks tests ──
398
-
399
- import { purgeStaleFailedTasks } from '../../src/service/evolution-worker.js';
400
-
401
- describe('purgeStaleFailedTasks', () => {
402
- const makeTask = (id: string, status: string, hoursAgo: number) => ({
403
- id,
404
- taskKind: 'sleep_reflection' as const,
405
- status,
406
- timestamp: new Date(Date.now() - hoursAgo * 60 * 60 * 1000).toISOString(),
407
- enqueued_at: new Date(Date.now() - hoursAgo * 60 * 60 * 1000).toISOString(),
408
- source: 'test',
409
- score: 50,
410
- reason: 'test',
411
- retryCount: 1,
412
- maxRetries: 1,
413
- lastError: 'Nocturnal reflection failed: no_evaluable_principles',
414
- resolution: 'failed_max_retries' as const,
415
- });
416
-
417
- it('should purge failed tasks older than 24 hours', () => {
418
- const queue: any[] = [
419
- makeTask('old-1', 'failed', 30), // 30h old — should be purged
420
- makeTask('old-2', 'failed', 48), // 48h old — should be purged
421
- makeTask('recent', 'failed', 12), // 12h old — should be kept
422
- makeTask('pending', 'pending', 1),
423
- ];
424
-
425
- const result = purgeStaleFailedTasks(queue, console as any);
426
-
427
- expect(result.purged).toBe(2);
428
- expect(result.remaining).toBe(2);
429
- expect(queue.length).toBe(2);
430
- expect(queue.find((t) => t.id === 'old-1')).toBeUndefined();
431
- expect(queue.find((t) => t.id === 'recent')).toBeDefined();
432
- expect(queue.find((t) => t.id === 'pending')).toBeDefined();
433
- });
434
-
435
- it('should not purge non-failed tasks regardless of age', () => {
436
- const queue: any[] = [
437
- makeTask('old-completed', 'completed', 72),
438
- makeTask('old-pending', 'pending', 72),
439
- makeTask('old-in-progress', 'in_progress', 72),
440
- ];
441
-
442
- const result = purgeStaleFailedTasks(queue, console as any);
443
-
444
- expect(result.purged).toBe(0);
445
- expect(result.remaining).toBe(3);
446
- expect(queue.length).toBe(3);
447
- });
448
-
449
- it('should group purge results by failure reason', () => {
450
- const queue: any[] = [
451
- { ...makeTask('fail-1', 'failed', 30), lastError: 'Nocturnal reflection failed: no_evaluable_principles' },
452
- { ...makeTask('fail-2', 'failed', 30), lastError: 'Nocturnal reflection failed: no_evaluable_principles' },
453
- { ...makeTask('fail-3', 'failed', 30), lastError: 'Nocturnal reflection failed: validation_failed' },
454
- ];
455
-
456
- const result = purgeStaleFailedTasks(queue, console as any);
457
-
458
- expect(result.purged).toBe(3);
459
- expect(result.byReason['Nocturnal reflection failed: no_evaluable_principles']).toBe(2);
460
- expect(result.byReason['Nocturnal reflection failed: validation_failed']).toBe(1);
461
- });
462
- });