principles-disciple 1.32.0 → 1.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/openclaw.plugin.json +4 -4
  2. package/package.json +1 -1
  3. package/src/core/correction-cue-learner.ts +203 -0
  4. package/src/core/correction-types.ts +88 -0
  5. package/src/core/evolution-logger.ts +3 -3
  6. package/src/core/init.ts +67 -0
  7. package/src/service/correction-observer-types.ts +58 -0
  8. package/src/service/correction-observer-workflow-manager.ts +218 -0
  9. package/src/service/evolution-worker.ts +172 -146
  10. package/src/service/nocturnal-service.ts +4 -1
  11. package/src/service/subagent-workflow/index.ts +14 -0
  12. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +3 -1
  13. package/tests/service/evolution-worker.nocturnal.test.ts +14 -1
  14. package/tests/service/evolution-worker.timeout.test.ts +350 -0
  15. package/tests/commands/implementation-lifecycle.test.ts +0 -362
  16. package/tests/core/detection-funnel.test.ts +0 -63
  17. package/tests/core/evolution-e2e.test.ts +0 -58
  18. package/tests/core/evolution-engine-gate-integration.test.ts +0 -543
  19. package/tests/core/evolution-engine.test.ts +0 -562
  20. package/tests/core/evolution-reducer.test.ts +0 -180
  21. package/tests/core/evolution-user-stories.e2e.test.ts +0 -249
  22. package/tests/core/local-worker-routing.test.ts +0 -757
  23. package/tests/core/rule-host.test.ts +0 -389
  24. package/tests/core/trajectory-correction-pain.test.ts +0 -180
  25. package/tests/hooks/gate-edit-verification.test.ts +0 -435
  26. package/tests/hooks/llm.test.ts +0 -308
  27. package/tests/hooks/progressive-trust-gate.test.ts +0 -277
  28. package/tests/hooks/prompt.test.ts +0 -1473
  29. package/tests/index.integration.test.ts +0 -179
  30. package/tests/index.shadow-routing.integration.test.ts +0 -140
  31. package/tests/service/evolution-worker.test.ts +0 -462
  32. package/tests/service/nocturnal-service.test.ts +0 -577
  33. package/tests/service/nocturnal-workflow-manager.test.ts +0 -441
  34. package/tests/tools/critique-prompt.test.ts +0 -260
  35. package/tests/tools/deep-reflect.test.ts +0 -232
  36. package/tests/tools/model-index.test.ts +0 -246
  37. package/tests/ui/app.test.tsx +0 -114
@@ -1,543 +0,0 @@
1
- /**
2
- * Evolution Engine Gate Integration Tests
3
- *
4
- * 集成测试:验证 Gate 系统在实际场景下的表现
5
- */
6
-
7
- import { describe, it, test, expect, beforeEach, afterEach } from 'vitest';
8
- import * as fs from 'fs';
9
- import * as path from 'path';
10
- import * as os from 'os';
11
- import {
12
- EvolutionEngine,
13
- getEvolutionEngine,
14
- } from '../../src/core/evolution-engine.js';
15
- import {
16
- EvolutionTier,
17
- TIER_DEFINITIONS,
18
- TASK_DIFFICULTY_CONFIG,
19
- getTierByPoints,
20
- ToolCallContext,
21
- } from '../../src/core/evolution-types.js';
22
-
23
- // ===== 测试工具 =====
24
-
25
- function createTempWorkspace(): string {
26
- const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'ep-gate-test-'));
27
- const stateDir = path.join(tmpDir, '.state');
28
- fs.mkdirSync(stateDir, { recursive: true });
29
- return tmpDir;
30
- }
31
-
32
- function cleanupWorkspace(dir: string): void {
33
- try {
34
- fs.rmSync(dir, { recursive: true, force: true });
35
- } catch {}
36
- }
37
-
38
- // ===== 集成测试套件 =====
39
-
40
- describe('Gate Integration - Tier Progression Flow', () => {
41
- let workspace: string;
42
- let engine: EvolutionEngine;
43
-
44
- beforeEach(() => {
45
- workspace = createTempWorkspace();
46
- engine = new EvolutionEngine(workspace);
47
- });
48
-
49
- afterEach(() => {
50
- cleanupWorkspace(workspace);
51
- });
52
-
53
- test('Seed tier: maxLinesPerWrite = 150 (updated for modern AI capabilities)', () => {
54
- const tierDef = engine.getTierDefinition();
55
- expect(tierDef.permissions.maxLinesPerWrite).toBe(150);
56
- expect(tierDef.permissions.maxFilesPerTask).toBe(3);
57
- expect(tierDef.permissions.allowRiskPath).toBe(false);
58
- expect(tierDef.permissions.allowSubagentSpawn).toBe(true); // Now allowed at Seed tier
59
- });
60
-
61
- test('Seed → Sprout: line limit increases to 300', () => {
62
- // 50 points = Sprout
63
- for (let i = 0; i < 17; i++) {
64
- engine.recordSuccess('write', { difficulty: 'normal' });
65
- }
66
-
67
- const tier = engine.getTier();
68
- expect(tier).toBeGreaterThanOrEqual(EvolutionTier.Sprout);
69
-
70
- const tierDef = engine.getTierDefinition();
71
- expect(tierDef.permissions.maxLinesPerWrite).toBe(300);
72
- });
73
-
74
- test('Seed → Sapling: line limit increases to 500, risk path unlocks', () => {
75
- // 200 points = Sapling
76
- for (let i = 0; i < 26; i++) {
77
- engine.recordSuccess('write', { difficulty: 'hard' });
78
- }
79
-
80
- const tier = engine.getTier();
81
- expect(tier).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
82
-
83
- const tierDef = engine.getTierDefinition();
84
- expect(tierDef.permissions.maxLinesPerWrite).toBe(500);
85
- expect(tierDef.permissions.allowRiskPath).toBe(true); // Risk path unlocks at Sapling
86
- expect(tierDef.permissions.allowSubagentSpawn).toBe(true);
87
- });
88
-
89
- test('Full progression: Seed → Sprout → Sapling → Tree → Forest', () => {
90
- // Seed (0) → Sprout (50)
91
- for (let i = 0; i < 17; i++) engine.recordSuccess('write', { difficulty: 'normal' });
92
- expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sprout);
93
-
94
- // Sprout (50) → Sapling (200)
95
- for (let i = 0; i < 20; i++) engine.recordSuccess('write', { difficulty: 'hard' });
96
- expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
97
-
98
- // Sapling (200) → Tree (500)
99
- for (let i = 0; i < 38; i++) engine.recordSuccess('write', { difficulty: 'hard' });
100
- expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Tree);
101
-
102
- // Tree (500) → Forest (1000)
103
- for (let i = 0; i < 63; i++) engine.recordSuccess('write', { difficulty: 'hard' });
104
- expect(engine.getTier()).toBe(EvolutionTier.Forest);
105
-
106
- // Forest: no limits
107
- const tierDef = engine.getTierDefinition();
108
- const perms = tierDef.permissions;
109
- expect(perms.maxLinesPerWrite).toBe(Infinity);
110
- expect(perms.allowRiskPath).toBe(true);
111
- expect(perms.allowSubagentSpawn).toBe(true);
112
- });
113
- });
114
-
115
- describe('Gate Integration - Blocking Recovery', () => {
116
- let workspace: string;
117
- let engine: EvolutionEngine;
118
-
119
- beforeEach(() => {
120
- workspace = createTempWorkspace();
121
- engine = new EvolutionEngine(workspace);
122
- });
123
-
124
- afterEach(() => {
125
- cleanupWorkspace(workspace);
126
- });
127
-
128
- test('blocked operation: agent can continue with allowed operations', () => {
129
- // Seed tier: 150 line limit - so 200 lines should be blocked
130
- const blocked = engine.beforeToolCall({
131
- toolName: 'write',
132
- content: Array(200).fill('line').join('\n'),
133
- });
134
- expect(blocked.allowed).toBe(false);
135
- expect(blocked.reason).toContain('150');
136
-
137
- // But 100-line write should work (within 150 limit)
138
- const allowed = engine.beforeToolCall({
139
- toolName: 'write',
140
- content: Array(100).fill('line').join('\n'),
141
- });
142
- expect(allowed.allowed).toBe(true);
143
- });
144
-
145
- test('after promotion: previously blocked operations now allowed', () => {
146
- // Initially Seed: 150 line limit
147
- const blocked = engine.beforeToolCall({
148
- toolName: 'write',
149
- content: Array(200).fill('line').join('\n'),
150
- });
151
- expect(blocked.allowed).toBe(false);
152
-
153
- // Earn points and promote to Sprout
154
- for (let i = 0; i < 17; i++) {
155
- engine.recordSuccess('write', { difficulty: 'normal' });
156
- }
157
-
158
- // Now Sprout: 300 line limit
159
- const nowAllowed = engine.beforeToolCall({
160
- toolName: 'write',
161
- content: Array(200).fill('line').join('\n'),
162
- });
163
- expect(nowAllowed.allowed).toBe(true);
164
- });
165
-
166
- test('risk path access unlocks after promotion to Sapling', () => {
167
- // Seed: risk path blocked
168
- const blocked = engine.beforeToolCall({
169
- toolName: 'write',
170
- isRiskPath: true,
171
- lineCount: 10,
172
- });
173
- expect(blocked.allowed).toBe(false);
174
-
175
- // Promote to Sapling (where risk path unlocks)
176
- for (let i = 0; i < 26; i++) {
177
- engine.recordSuccess('write', { difficulty: 'hard' });
178
- }
179
-
180
- const allowed = engine.beforeToolCall({
181
- toolName: 'write',
182
- isRiskPath: true,
183
- lineCount: 10,
184
- });
185
- expect(allowed.allowed).toBe(true);
186
- });
187
- });
188
-
189
- describe('Gate Integration - Multi-tool Consistency', () => {
190
- let workspace: string;
191
- let engine: EvolutionEngine;
192
-
193
- beforeEach(() => {
194
- workspace = createTempWorkspace();
195
- engine = new EvolutionEngine(workspace);
196
- });
197
-
198
- afterEach(() => {
199
- cleanupWorkspace(workspace);
200
- });
201
-
202
- test('write tool respects line limit', () => {
203
- // Exactly at limit (150) - should allow
204
- const exact = engine.beforeToolCall({
205
- toolName: 'write',
206
- content: Array(150).fill('line').join('\n'),
207
- });
208
- expect(exact.allowed).toBe(true);
209
-
210
- // 1 over limit (151) - should block
211
- const over = engine.beforeToolCall({
212
- toolName: 'write',
213
- content: Array(151).fill('line').join('\n'),
214
- });
215
- expect(over.allowed).toBe(false);
216
- });
217
-
218
- test('edit tool respects line limit', () => {
219
- const allowed = engine.beforeToolCall({
220
- toolName: 'edit',
221
- content: Array(100).fill('line').join('\n'),
222
- });
223
- expect(allowed.allowed).toBe(true);
224
-
225
- const blocked = engine.beforeToolCall({
226
- toolName: 'edit',
227
- content: Array(200).fill('line').join('\n'),
228
- });
229
- expect(blocked.allowed).toBe(false);
230
- });
231
-
232
- test('high-risk tools blocked at Seed tier for risk paths', () => {
233
- // run_shell_command and delete_file are high-risk, blocked for risk paths
234
- const highRiskTools = ['run_shell_command', 'delete_file'];
235
-
236
- for (const tool of highRiskTools) {
237
- const result = engine.beforeToolCall({ toolName: tool, isRiskPath: true });
238
- expect(result.allowed).toBe(false);
239
- }
240
-
241
- // sessions_spawn is now allowed at Seed tier
242
- const spawnResult = engine.beforeToolCall({ toolName: 'sessions_spawn' });
243
- expect(spawnResult.allowed).toBe(true);
244
- });
245
-
246
- test('read tool always allowed (no content restriction)', () => {
247
- const result = engine.beforeToolCall({
248
- toolName: 'read',
249
- content: Array(1000).fill('line').join('\n'),
250
- });
251
- expect(result.allowed).toBe(true);
252
- });
253
- });
254
-
255
- describe('Gate Integration - Edge Cases', () => {
256
- let workspace: string;
257
- let engine: EvolutionEngine;
258
-
259
- beforeEach(() => {
260
- workspace = createTempWorkspace();
261
- engine = new EvolutionEngine(workspace);
262
- });
263
-
264
- afterEach(() => {
265
- cleanupWorkspace(workspace);
266
- });
267
-
268
- test('empty content allowed', () => {
269
- const result = engine.beforeToolCall({
270
- toolName: 'write',
271
- content: '',
272
- });
273
- expect(result.allowed).toBe(true);
274
- });
275
-
276
- test('single long line not counted as multiple lines', () => {
277
- // One very long line (not multiple lines)
278
- const result = engine.beforeToolCall({
279
- toolName: 'write',
280
- content: 'a'.repeat(10000), // 10000 chars, 1 line
281
- });
282
- expect(result.allowed).toBe(true);
283
- });
284
-
285
- test('lineCount option works the same as content', () => {
286
- const viaContent = engine.beforeToolCall({
287
- toolName: 'write',
288
- content: Array(21).fill('line').join('\n'),
289
- });
290
-
291
- const viaLineCount = engine.beforeToolCall({
292
- toolName: 'write',
293
- lineCount: 21,
294
- });
295
-
296
- expect(viaContent.allowed).toBe(viaLineCount.allowed);
297
- });
298
-
299
- test('risk path detection at Seed tier', () => {
300
- // Without isRiskPath flag
301
- const normalWrite = engine.beforeToolCall({
302
- toolName: 'write',
303
- filePath: 'src/core/trust-engine.ts',
304
- });
305
- expect(normalWrite.allowed).toBe(true);
306
-
307
- // With isRiskPath flag
308
- const riskWrite = engine.beforeToolCall({
309
- toolName: 'write',
310
- filePath: 'src/core/trust-engine.ts',
311
- isRiskPath: true,
312
- });
313
- expect(riskWrite.allowed).toBe(false);
314
- });
315
-
316
- test('tool name case sensitivity', () => {
317
- // Exact match required
318
- const lowercase = engine.beforeToolCall({ toolName: 'write' });
319
- expect(lowercase.allowed).toBe(true);
320
-
321
- const uppercase = engine.beforeToolCall({ toolName: 'WRITE' });
322
- // Not in HIGH_RISK_TOOLS set, so it's not blocked
323
- expect(uppercase.allowed).toBe(true);
324
- });
325
-
326
- test('no content, no line count - allowed', () => {
327
- const result = engine.beforeToolCall({
328
- toolName: 'write',
329
- });
330
- expect(result.allowed).toBe(true);
331
- });
332
- });
333
-
334
- describe('Gate Integration - Persistence', () => {
335
- let workspace: string;
336
- let engine: EvolutionEngine;
337
-
338
- beforeEach(() => {
339
- workspace = createTempWorkspace();
340
- });
341
-
342
- afterEach(() => {
343
- cleanupWorkspace(workspace);
344
- });
345
-
346
- test('gate permissions restored after restart', () => {
347
- // Initial engine: Seed tier
348
- engine = new EvolutionEngine(workspace);
349
- expect(engine.getTier()).toBe(EvolutionTier.Seed);
350
-
351
- // Risk path should be blocked at Seed
352
- let blocked = engine.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
353
- expect(blocked.allowed).toBe(false);
354
-
355
- // Earn points
356
- for (let i = 0; i < 26; i++) {
357
- engine.recordSuccess('write', { difficulty: 'hard' });
358
- }
359
-
360
- // Now Sapling - risk path allowed
361
- expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
362
- let allowed = engine.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
363
- expect(allowed.allowed).toBe(true);
364
-
365
- // Restart engine (simulating process restart)
366
- engine = new EvolutionEngine(workspace);
367
-
368
- // Should still be Sapling with same permissions
369
- expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
370
- allowed = engine.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
371
- expect(allowed.allowed).toBe(true);
372
- });
373
-
374
- test('points persisted correctly after restart', () => {
375
- engine = new EvolutionEngine(workspace);
376
-
377
- // Record some successes
378
- engine.recordSuccess('write', { difficulty: 'hard' });
379
- engine.recordSuccess('write', { difficulty: 'hard' });
380
- const pointsBefore = engine.getPoints();
381
-
382
- // Restart
383
- engine = new EvolutionEngine(workspace);
384
- const pointsAfter = engine.getPoints();
385
-
386
- expect(pointsAfter).toBe(pointsBefore);
387
- expect(pointsAfter).toBe(TASK_DIFFICULTY_CONFIG.hard.basePoints * 2);
388
- });
389
-
390
- test('double reward persisted correctly', () => {
391
- engine = new EvolutionEngine(workspace);
392
-
393
- // Failure then success = double reward
394
- engine.recordFailure('write', { filePath: 'test.ts' });
395
- const result = engine.recordSuccess('write', { filePath: 'test.ts', difficulty: 'normal' });
396
- expect(result.isDoubleReward).toBe(true);
397
-
398
- // Restart and verify double reward no longer applies (1hr cooldown)
399
- engine = new EvolutionEngine(workspace);
400
- const result2 = engine.recordSuccess('write', { filePath: 'test.ts', difficulty: 'normal' });
401
- expect(result2.isDoubleReward).toBe(false);
402
- });
403
-
404
- test('stats persisted correctly', () => {
405
- engine = new EvolutionEngine(workspace);
406
-
407
- engine.recordSuccess('write', { difficulty: 'normal' });
408
- engine.recordSuccess('write', { difficulty: 'normal' });
409
- engine.recordFailure('write');
410
-
411
- const statsBefore = engine.getStats();
412
-
413
- // Restart
414
- engine = new EvolutionEngine(workspace);
415
- const statsAfter = engine.getStats();
416
-
417
- expect(statsAfter.totalSuccesses).toBe(statsBefore.totalSuccesses);
418
- expect(statsAfter.totalFailures).toBe(statsBefore.totalFailures);
419
- expect(statsAfter.consecutiveSuccesses).toBe(0); // Reset on restart
420
- });
421
- });
422
-
423
- describe('Gate Integration - Real World Scenarios', () => {
424
- let workspace: string;
425
- let engine: EvolutionEngine;
426
-
427
- beforeEach(() => {
428
- workspace = createTempWorkspace();
429
- engine = new EvolutionEngine(workspace);
430
- });
431
-
432
- afterEach(() => {
433
- cleanupWorkspace(workspace);
434
- });
435
-
436
- test('agent starts small, grows capability', () => {
437
- // New agent at Seed
438
- expect(engine.getTier()).toBe(EvolutionTier.Seed);
439
-
440
- // Attempt 200-line write - blocked (Seed limit is 150)
441
- let decision = engine.beforeToolCall({
442
- toolName: 'write',
443
- content: Array(200).fill('line').join('\n'),
444
- });
445
- expect(decision.allowed).toBe(false);
446
-
447
- // Subagent spawn is now allowed at Seed
448
- decision = engine.beforeToolCall({
449
- toolName: 'sessions_spawn',
450
- });
451
- expect(decision.allowed).toBe(true);
452
-
453
- // Risk path is blocked at Seed
454
- decision = engine.beforeToolCall({
455
- toolName: 'write',
456
- isRiskPath: true,
457
- lineCount: 10,
458
- });
459
- expect(decision.allowed).toBe(false);
460
-
461
- // Work hard, grow to Forest
462
- for (let i = 0; i < 125; i++) {
463
- engine.recordSuccess('write', { difficulty: 'hard' });
464
- }
465
-
466
- // Now Forest - can do anything
467
- decision = engine.beforeToolCall({
468
- toolName: 'write',
469
- content: Array(1000).fill('line').join('\n'),
470
- });
471
- expect(decision.allowed).toBe(true);
472
-
473
- decision = engine.beforeToolCall({
474
- toolName: 'sessions_spawn',
475
- });
476
- expect(decision.allowed).toBe(true);
477
-
478
- decision = engine.beforeToolCall({
479
- toolName: 'write',
480
- filePath: 'src/core/trust-engine.ts',
481
- isRiskPath: true,
482
- });
483
- expect(decision.allowed).toBe(true);
484
- });
485
-
486
- test('agent recovers from failure without losing progress', () => {
487
- // Record some successes
488
- for (let i = 0; i < 10; i++) {
489
- engine.recordSuccess('write', { difficulty: 'normal' });
490
- }
491
- const pointsBeforeFailure = engine.getPoints();
492
-
493
- // Record failures
494
- engine.recordFailure('write', { filePath: 'test.ts' });
495
- engine.recordFailure('write', { filePath: 'test2.ts' });
496
-
497
- // Points should not decrease
498
- expect(engine.getPoints()).toBe(pointsBeforeFailure);
499
-
500
- // Recover with double reward
501
- const result = engine.recordSuccess('write', { filePath: 'test.ts', difficulty: 'normal' });
502
- expect(result.isDoubleReward).toBe(true);
503
- expect(engine.getPoints()).toBeGreaterThan(pointsBeforeFailure);
504
- });
505
-
506
- test('status summary reflects gate permissions', () => {
507
- const summary = engine.getStatusSummary();
508
-
509
- expect(summary.tier).toBe(EvolutionTier.Seed);
510
- expect(summary.permissions.maxLinesPerWrite).toBe(150);
511
- expect(summary.permissions.allowRiskPath).toBe(false);
512
- expect(summary.permissions.allowSubagentSpawn).toBe(true); // Allowed at Seed tier
513
-
514
- // Earn promotion to Sapling (risk path unlocks)
515
- for (let i = 0; i < 26; i++) {
516
- engine.recordSuccess('write', { difficulty: 'hard' });
517
- }
518
-
519
- const summaryAfter = engine.getStatusSummary();
520
- expect(summaryAfter.permissions.allowRiskPath).toBe(true);
521
- });
522
-
523
- test('different workspaces have independent gate state', () => {
524
- const engine1 = new EvolutionEngine(workspace);
525
- const workspace2 = createTempWorkspace();
526
- const engine2 = new EvolutionEngine(workspace2);
527
-
528
- // Engine 1 promotes to Sapling
529
- for (let i = 0; i < 26; i++) {
530
- engine1.recordSuccess('write', { difficulty: 'hard' });
531
- }
532
-
533
- // Engine 1 has risk path permission (Sapling tier)
534
- let decision1 = engine1.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
535
- expect(decision1.allowed).toBe(true);
536
-
537
- // Engine 2 is still Seed - risk path blocked
538
- let decision2 = engine2.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
539
- expect(decision2.allowed).toBe(false);
540
-
541
- cleanupWorkspace(workspace2);
542
- });
543
- });