principles-disciple 1.32.0 → 1.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +4 -4
- package/package.json +1 -1
- package/src/core/correction-cue-learner.ts +203 -0
- package/src/core/correction-types.ts +88 -0
- package/src/core/evolution-logger.ts +3 -3
- package/src/core/init.ts +67 -0
- package/src/service/correction-observer-types.ts +58 -0
- package/src/service/correction-observer-workflow-manager.ts +218 -0
- package/src/service/evolution-worker.ts +172 -146
- package/src/service/nocturnal-service.ts +4 -1
- package/src/service/subagent-workflow/index.ts +14 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +3 -1
- package/tests/service/evolution-worker.nocturnal.test.ts +14 -1
- package/tests/service/evolution-worker.timeout.test.ts +350 -0
- package/tests/commands/implementation-lifecycle.test.ts +0 -362
- package/tests/core/detection-funnel.test.ts +0 -63
- package/tests/core/evolution-e2e.test.ts +0 -58
- package/tests/core/evolution-engine-gate-integration.test.ts +0 -543
- package/tests/core/evolution-engine.test.ts +0 -562
- package/tests/core/evolution-reducer.test.ts +0 -180
- package/tests/core/evolution-user-stories.e2e.test.ts +0 -249
- package/tests/core/local-worker-routing.test.ts +0 -757
- package/tests/core/rule-host.test.ts +0 -389
- package/tests/core/trajectory-correction-pain.test.ts +0 -180
- package/tests/hooks/gate-edit-verification.test.ts +0 -435
- package/tests/hooks/llm.test.ts +0 -308
- package/tests/hooks/progressive-trust-gate.test.ts +0 -277
- package/tests/hooks/prompt.test.ts +0 -1473
- package/tests/index.integration.test.ts +0 -179
- package/tests/index.shadow-routing.integration.test.ts +0 -140
- package/tests/service/evolution-worker.test.ts +0 -462
- package/tests/service/nocturnal-service.test.ts +0 -577
- package/tests/service/nocturnal-workflow-manager.test.ts +0 -441
- package/tests/tools/critique-prompt.test.ts +0 -260
- package/tests/tools/deep-reflect.test.ts +0 -232
- package/tests/tools/model-index.test.ts +0 -246
- package/tests/ui/app.test.tsx +0 -114
|
@@ -1,543 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Evolution Engine Gate Integration Tests
|
|
3
|
-
*
|
|
4
|
-
* 集成测试:验证 Gate 系统在实际场景下的表现
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import { describe, it, test, expect, beforeEach, afterEach } from 'vitest';
|
|
8
|
-
import * as fs from 'fs';
|
|
9
|
-
import * as path from 'path';
|
|
10
|
-
import * as os from 'os';
|
|
11
|
-
import {
|
|
12
|
-
EvolutionEngine,
|
|
13
|
-
getEvolutionEngine,
|
|
14
|
-
} from '../../src/core/evolution-engine.js';
|
|
15
|
-
import {
|
|
16
|
-
EvolutionTier,
|
|
17
|
-
TIER_DEFINITIONS,
|
|
18
|
-
TASK_DIFFICULTY_CONFIG,
|
|
19
|
-
getTierByPoints,
|
|
20
|
-
ToolCallContext,
|
|
21
|
-
} from '../../src/core/evolution-types.js';
|
|
22
|
-
|
|
23
|
-
// ===== 测试工具 =====
|
|
24
|
-
|
|
25
|
-
function createTempWorkspace(): string {
|
|
26
|
-
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'ep-gate-test-'));
|
|
27
|
-
const stateDir = path.join(tmpDir, '.state');
|
|
28
|
-
fs.mkdirSync(stateDir, { recursive: true });
|
|
29
|
-
return tmpDir;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function cleanupWorkspace(dir: string): void {
|
|
33
|
-
try {
|
|
34
|
-
fs.rmSync(dir, { recursive: true, force: true });
|
|
35
|
-
} catch {}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
// ===== 集成测试套件 =====
|
|
39
|
-
|
|
40
|
-
describe('Gate Integration - Tier Progression Flow', () => {
|
|
41
|
-
let workspace: string;
|
|
42
|
-
let engine: EvolutionEngine;
|
|
43
|
-
|
|
44
|
-
beforeEach(() => {
|
|
45
|
-
workspace = createTempWorkspace();
|
|
46
|
-
engine = new EvolutionEngine(workspace);
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
afterEach(() => {
|
|
50
|
-
cleanupWorkspace(workspace);
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
test('Seed tier: maxLinesPerWrite = 150 (updated for modern AI capabilities)', () => {
|
|
54
|
-
const tierDef = engine.getTierDefinition();
|
|
55
|
-
expect(tierDef.permissions.maxLinesPerWrite).toBe(150);
|
|
56
|
-
expect(tierDef.permissions.maxFilesPerTask).toBe(3);
|
|
57
|
-
expect(tierDef.permissions.allowRiskPath).toBe(false);
|
|
58
|
-
expect(tierDef.permissions.allowSubagentSpawn).toBe(true); // Now allowed at Seed tier
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
test('Seed → Sprout: line limit increases to 300', () => {
|
|
62
|
-
// 50 points = Sprout
|
|
63
|
-
for (let i = 0; i < 17; i++) {
|
|
64
|
-
engine.recordSuccess('write', { difficulty: 'normal' });
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
const tier = engine.getTier();
|
|
68
|
-
expect(tier).toBeGreaterThanOrEqual(EvolutionTier.Sprout);
|
|
69
|
-
|
|
70
|
-
const tierDef = engine.getTierDefinition();
|
|
71
|
-
expect(tierDef.permissions.maxLinesPerWrite).toBe(300);
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
test('Seed → Sapling: line limit increases to 500, risk path unlocks', () => {
|
|
75
|
-
// 200 points = Sapling
|
|
76
|
-
for (let i = 0; i < 26; i++) {
|
|
77
|
-
engine.recordSuccess('write', { difficulty: 'hard' });
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
const tier = engine.getTier();
|
|
81
|
-
expect(tier).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
|
|
82
|
-
|
|
83
|
-
const tierDef = engine.getTierDefinition();
|
|
84
|
-
expect(tierDef.permissions.maxLinesPerWrite).toBe(500);
|
|
85
|
-
expect(tierDef.permissions.allowRiskPath).toBe(true); // Risk path unlocks at Sapling
|
|
86
|
-
expect(tierDef.permissions.allowSubagentSpawn).toBe(true);
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
test('Full progression: Seed → Sprout → Sapling → Tree → Forest', () => {
|
|
90
|
-
// Seed (0) → Sprout (50)
|
|
91
|
-
for (let i = 0; i < 17; i++) engine.recordSuccess('write', { difficulty: 'normal' });
|
|
92
|
-
expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sprout);
|
|
93
|
-
|
|
94
|
-
// Sprout (50) → Sapling (200)
|
|
95
|
-
for (let i = 0; i < 20; i++) engine.recordSuccess('write', { difficulty: 'hard' });
|
|
96
|
-
expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
|
|
97
|
-
|
|
98
|
-
// Sapling (200) → Tree (500)
|
|
99
|
-
for (let i = 0; i < 38; i++) engine.recordSuccess('write', { difficulty: 'hard' });
|
|
100
|
-
expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Tree);
|
|
101
|
-
|
|
102
|
-
// Tree (500) → Forest (1000)
|
|
103
|
-
for (let i = 0; i < 63; i++) engine.recordSuccess('write', { difficulty: 'hard' });
|
|
104
|
-
expect(engine.getTier()).toBe(EvolutionTier.Forest);
|
|
105
|
-
|
|
106
|
-
// Forest: no limits
|
|
107
|
-
const tierDef = engine.getTierDefinition();
|
|
108
|
-
const perms = tierDef.permissions;
|
|
109
|
-
expect(perms.maxLinesPerWrite).toBe(Infinity);
|
|
110
|
-
expect(perms.allowRiskPath).toBe(true);
|
|
111
|
-
expect(perms.allowSubagentSpawn).toBe(true);
|
|
112
|
-
});
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
describe('Gate Integration - Blocking Recovery', () => {
|
|
116
|
-
let workspace: string;
|
|
117
|
-
let engine: EvolutionEngine;
|
|
118
|
-
|
|
119
|
-
beforeEach(() => {
|
|
120
|
-
workspace = createTempWorkspace();
|
|
121
|
-
engine = new EvolutionEngine(workspace);
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
afterEach(() => {
|
|
125
|
-
cleanupWorkspace(workspace);
|
|
126
|
-
});
|
|
127
|
-
|
|
128
|
-
test('blocked operation: agent can continue with allowed operations', () => {
|
|
129
|
-
// Seed tier: 150 line limit - so 200 lines should be blocked
|
|
130
|
-
const blocked = engine.beforeToolCall({
|
|
131
|
-
toolName: 'write',
|
|
132
|
-
content: Array(200).fill('line').join('\n'),
|
|
133
|
-
});
|
|
134
|
-
expect(blocked.allowed).toBe(false);
|
|
135
|
-
expect(blocked.reason).toContain('150');
|
|
136
|
-
|
|
137
|
-
// But 100-line write should work (within 150 limit)
|
|
138
|
-
const allowed = engine.beforeToolCall({
|
|
139
|
-
toolName: 'write',
|
|
140
|
-
content: Array(100).fill('line').join('\n'),
|
|
141
|
-
});
|
|
142
|
-
expect(allowed.allowed).toBe(true);
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
test('after promotion: previously blocked operations now allowed', () => {
|
|
146
|
-
// Initially Seed: 150 line limit
|
|
147
|
-
const blocked = engine.beforeToolCall({
|
|
148
|
-
toolName: 'write',
|
|
149
|
-
content: Array(200).fill('line').join('\n'),
|
|
150
|
-
});
|
|
151
|
-
expect(blocked.allowed).toBe(false);
|
|
152
|
-
|
|
153
|
-
// Earn points and promote to Sprout
|
|
154
|
-
for (let i = 0; i < 17; i++) {
|
|
155
|
-
engine.recordSuccess('write', { difficulty: 'normal' });
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Now Sprout: 300 line limit
|
|
159
|
-
const nowAllowed = engine.beforeToolCall({
|
|
160
|
-
toolName: 'write',
|
|
161
|
-
content: Array(200).fill('line').join('\n'),
|
|
162
|
-
});
|
|
163
|
-
expect(nowAllowed.allowed).toBe(true);
|
|
164
|
-
});
|
|
165
|
-
|
|
166
|
-
test('risk path access unlocks after promotion to Sapling', () => {
|
|
167
|
-
// Seed: risk path blocked
|
|
168
|
-
const blocked = engine.beforeToolCall({
|
|
169
|
-
toolName: 'write',
|
|
170
|
-
isRiskPath: true,
|
|
171
|
-
lineCount: 10,
|
|
172
|
-
});
|
|
173
|
-
expect(blocked.allowed).toBe(false);
|
|
174
|
-
|
|
175
|
-
// Promote to Sapling (where risk path unlocks)
|
|
176
|
-
for (let i = 0; i < 26; i++) {
|
|
177
|
-
engine.recordSuccess('write', { difficulty: 'hard' });
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
const allowed = engine.beforeToolCall({
|
|
181
|
-
toolName: 'write',
|
|
182
|
-
isRiskPath: true,
|
|
183
|
-
lineCount: 10,
|
|
184
|
-
});
|
|
185
|
-
expect(allowed.allowed).toBe(true);
|
|
186
|
-
});
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
describe('Gate Integration - Multi-tool Consistency', () => {
|
|
190
|
-
let workspace: string;
|
|
191
|
-
let engine: EvolutionEngine;
|
|
192
|
-
|
|
193
|
-
beforeEach(() => {
|
|
194
|
-
workspace = createTempWorkspace();
|
|
195
|
-
engine = new EvolutionEngine(workspace);
|
|
196
|
-
});
|
|
197
|
-
|
|
198
|
-
afterEach(() => {
|
|
199
|
-
cleanupWorkspace(workspace);
|
|
200
|
-
});
|
|
201
|
-
|
|
202
|
-
test('write tool respects line limit', () => {
|
|
203
|
-
// Exactly at limit (150) - should allow
|
|
204
|
-
const exact = engine.beforeToolCall({
|
|
205
|
-
toolName: 'write',
|
|
206
|
-
content: Array(150).fill('line').join('\n'),
|
|
207
|
-
});
|
|
208
|
-
expect(exact.allowed).toBe(true);
|
|
209
|
-
|
|
210
|
-
// 1 over limit (151) - should block
|
|
211
|
-
const over = engine.beforeToolCall({
|
|
212
|
-
toolName: 'write',
|
|
213
|
-
content: Array(151).fill('line').join('\n'),
|
|
214
|
-
});
|
|
215
|
-
expect(over.allowed).toBe(false);
|
|
216
|
-
});
|
|
217
|
-
|
|
218
|
-
test('edit tool respects line limit', () => {
|
|
219
|
-
const allowed = engine.beforeToolCall({
|
|
220
|
-
toolName: 'edit',
|
|
221
|
-
content: Array(100).fill('line').join('\n'),
|
|
222
|
-
});
|
|
223
|
-
expect(allowed.allowed).toBe(true);
|
|
224
|
-
|
|
225
|
-
const blocked = engine.beforeToolCall({
|
|
226
|
-
toolName: 'edit',
|
|
227
|
-
content: Array(200).fill('line').join('\n'),
|
|
228
|
-
});
|
|
229
|
-
expect(blocked.allowed).toBe(false);
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
test('high-risk tools blocked at Seed tier for risk paths', () => {
|
|
233
|
-
// run_shell_command and delete_file are high-risk, blocked for risk paths
|
|
234
|
-
const highRiskTools = ['run_shell_command', 'delete_file'];
|
|
235
|
-
|
|
236
|
-
for (const tool of highRiskTools) {
|
|
237
|
-
const result = engine.beforeToolCall({ toolName: tool, isRiskPath: true });
|
|
238
|
-
expect(result.allowed).toBe(false);
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
// sessions_spawn is now allowed at Seed tier
|
|
242
|
-
const spawnResult = engine.beforeToolCall({ toolName: 'sessions_spawn' });
|
|
243
|
-
expect(spawnResult.allowed).toBe(true);
|
|
244
|
-
});
|
|
245
|
-
|
|
246
|
-
test('read tool always allowed (no content restriction)', () => {
|
|
247
|
-
const result = engine.beforeToolCall({
|
|
248
|
-
toolName: 'read',
|
|
249
|
-
content: Array(1000).fill('line').join('\n'),
|
|
250
|
-
});
|
|
251
|
-
expect(result.allowed).toBe(true);
|
|
252
|
-
});
|
|
253
|
-
});
|
|
254
|
-
|
|
255
|
-
describe('Gate Integration - Edge Cases', () => {
|
|
256
|
-
let workspace: string;
|
|
257
|
-
let engine: EvolutionEngine;
|
|
258
|
-
|
|
259
|
-
beforeEach(() => {
|
|
260
|
-
workspace = createTempWorkspace();
|
|
261
|
-
engine = new EvolutionEngine(workspace);
|
|
262
|
-
});
|
|
263
|
-
|
|
264
|
-
afterEach(() => {
|
|
265
|
-
cleanupWorkspace(workspace);
|
|
266
|
-
});
|
|
267
|
-
|
|
268
|
-
test('empty content allowed', () => {
|
|
269
|
-
const result = engine.beforeToolCall({
|
|
270
|
-
toolName: 'write',
|
|
271
|
-
content: '',
|
|
272
|
-
});
|
|
273
|
-
expect(result.allowed).toBe(true);
|
|
274
|
-
});
|
|
275
|
-
|
|
276
|
-
test('single long line not counted as multiple lines', () => {
|
|
277
|
-
// One very long line (not multiple lines)
|
|
278
|
-
const result = engine.beforeToolCall({
|
|
279
|
-
toolName: 'write',
|
|
280
|
-
content: 'a'.repeat(10000), // 10000 chars, 1 line
|
|
281
|
-
});
|
|
282
|
-
expect(result.allowed).toBe(true);
|
|
283
|
-
});
|
|
284
|
-
|
|
285
|
-
test('lineCount option works the same as content', () => {
|
|
286
|
-
const viaContent = engine.beforeToolCall({
|
|
287
|
-
toolName: 'write',
|
|
288
|
-
content: Array(21).fill('line').join('\n'),
|
|
289
|
-
});
|
|
290
|
-
|
|
291
|
-
const viaLineCount = engine.beforeToolCall({
|
|
292
|
-
toolName: 'write',
|
|
293
|
-
lineCount: 21,
|
|
294
|
-
});
|
|
295
|
-
|
|
296
|
-
expect(viaContent.allowed).toBe(viaLineCount.allowed);
|
|
297
|
-
});
|
|
298
|
-
|
|
299
|
-
test('risk path detection at Seed tier', () => {
|
|
300
|
-
// Without isRiskPath flag
|
|
301
|
-
const normalWrite = engine.beforeToolCall({
|
|
302
|
-
toolName: 'write',
|
|
303
|
-
filePath: 'src/core/trust-engine.ts',
|
|
304
|
-
});
|
|
305
|
-
expect(normalWrite.allowed).toBe(true);
|
|
306
|
-
|
|
307
|
-
// With isRiskPath flag
|
|
308
|
-
const riskWrite = engine.beforeToolCall({
|
|
309
|
-
toolName: 'write',
|
|
310
|
-
filePath: 'src/core/trust-engine.ts',
|
|
311
|
-
isRiskPath: true,
|
|
312
|
-
});
|
|
313
|
-
expect(riskWrite.allowed).toBe(false);
|
|
314
|
-
});
|
|
315
|
-
|
|
316
|
-
test('tool name case sensitivity', () => {
|
|
317
|
-
// Exact match required
|
|
318
|
-
const lowercase = engine.beforeToolCall({ toolName: 'write' });
|
|
319
|
-
expect(lowercase.allowed).toBe(true);
|
|
320
|
-
|
|
321
|
-
const uppercase = engine.beforeToolCall({ toolName: 'WRITE' });
|
|
322
|
-
// Not in HIGH_RISK_TOOLS set, so it's not blocked
|
|
323
|
-
expect(uppercase.allowed).toBe(true);
|
|
324
|
-
});
|
|
325
|
-
|
|
326
|
-
test('no content, no line count - allowed', () => {
|
|
327
|
-
const result = engine.beforeToolCall({
|
|
328
|
-
toolName: 'write',
|
|
329
|
-
});
|
|
330
|
-
expect(result.allowed).toBe(true);
|
|
331
|
-
});
|
|
332
|
-
});
|
|
333
|
-
|
|
334
|
-
describe('Gate Integration - Persistence', () => {
|
|
335
|
-
let workspace: string;
|
|
336
|
-
let engine: EvolutionEngine;
|
|
337
|
-
|
|
338
|
-
beforeEach(() => {
|
|
339
|
-
workspace = createTempWorkspace();
|
|
340
|
-
});
|
|
341
|
-
|
|
342
|
-
afterEach(() => {
|
|
343
|
-
cleanupWorkspace(workspace);
|
|
344
|
-
});
|
|
345
|
-
|
|
346
|
-
test('gate permissions restored after restart', () => {
|
|
347
|
-
// Initial engine: Seed tier
|
|
348
|
-
engine = new EvolutionEngine(workspace);
|
|
349
|
-
expect(engine.getTier()).toBe(EvolutionTier.Seed);
|
|
350
|
-
|
|
351
|
-
// Risk path should be blocked at Seed
|
|
352
|
-
let blocked = engine.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
|
|
353
|
-
expect(blocked.allowed).toBe(false);
|
|
354
|
-
|
|
355
|
-
// Earn points
|
|
356
|
-
for (let i = 0; i < 26; i++) {
|
|
357
|
-
engine.recordSuccess('write', { difficulty: 'hard' });
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
// Now Sapling - risk path allowed
|
|
361
|
-
expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
|
|
362
|
-
let allowed = engine.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
|
|
363
|
-
expect(allowed.allowed).toBe(true);
|
|
364
|
-
|
|
365
|
-
// Restart engine (simulating process restart)
|
|
366
|
-
engine = new EvolutionEngine(workspace);
|
|
367
|
-
|
|
368
|
-
// Should still be Sapling with same permissions
|
|
369
|
-
expect(engine.getTier()).toBeGreaterThanOrEqual(EvolutionTier.Sapling);
|
|
370
|
-
allowed = engine.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
|
|
371
|
-
expect(allowed.allowed).toBe(true);
|
|
372
|
-
});
|
|
373
|
-
|
|
374
|
-
test('points persisted correctly after restart', () => {
|
|
375
|
-
engine = new EvolutionEngine(workspace);
|
|
376
|
-
|
|
377
|
-
// Record some successes
|
|
378
|
-
engine.recordSuccess('write', { difficulty: 'hard' });
|
|
379
|
-
engine.recordSuccess('write', { difficulty: 'hard' });
|
|
380
|
-
const pointsBefore = engine.getPoints();
|
|
381
|
-
|
|
382
|
-
// Restart
|
|
383
|
-
engine = new EvolutionEngine(workspace);
|
|
384
|
-
const pointsAfter = engine.getPoints();
|
|
385
|
-
|
|
386
|
-
expect(pointsAfter).toBe(pointsBefore);
|
|
387
|
-
expect(pointsAfter).toBe(TASK_DIFFICULTY_CONFIG.hard.basePoints * 2);
|
|
388
|
-
});
|
|
389
|
-
|
|
390
|
-
test('double reward persisted correctly', () => {
|
|
391
|
-
engine = new EvolutionEngine(workspace);
|
|
392
|
-
|
|
393
|
-
// Failure then success = double reward
|
|
394
|
-
engine.recordFailure('write', { filePath: 'test.ts' });
|
|
395
|
-
const result = engine.recordSuccess('write', { filePath: 'test.ts', difficulty: 'normal' });
|
|
396
|
-
expect(result.isDoubleReward).toBe(true);
|
|
397
|
-
|
|
398
|
-
// Restart and verify double reward no longer applies (1hr cooldown)
|
|
399
|
-
engine = new EvolutionEngine(workspace);
|
|
400
|
-
const result2 = engine.recordSuccess('write', { filePath: 'test.ts', difficulty: 'normal' });
|
|
401
|
-
expect(result2.isDoubleReward).toBe(false);
|
|
402
|
-
});
|
|
403
|
-
|
|
404
|
-
test('stats persisted correctly', () => {
|
|
405
|
-
engine = new EvolutionEngine(workspace);
|
|
406
|
-
|
|
407
|
-
engine.recordSuccess('write', { difficulty: 'normal' });
|
|
408
|
-
engine.recordSuccess('write', { difficulty: 'normal' });
|
|
409
|
-
engine.recordFailure('write');
|
|
410
|
-
|
|
411
|
-
const statsBefore = engine.getStats();
|
|
412
|
-
|
|
413
|
-
// Restart
|
|
414
|
-
engine = new EvolutionEngine(workspace);
|
|
415
|
-
const statsAfter = engine.getStats();
|
|
416
|
-
|
|
417
|
-
expect(statsAfter.totalSuccesses).toBe(statsBefore.totalSuccesses);
|
|
418
|
-
expect(statsAfter.totalFailures).toBe(statsBefore.totalFailures);
|
|
419
|
-
expect(statsAfter.consecutiveSuccesses).toBe(0); // Reset on restart
|
|
420
|
-
});
|
|
421
|
-
});
|
|
422
|
-
|
|
423
|
-
describe('Gate Integration - Real World Scenarios', () => {
|
|
424
|
-
let workspace: string;
|
|
425
|
-
let engine: EvolutionEngine;
|
|
426
|
-
|
|
427
|
-
beforeEach(() => {
|
|
428
|
-
workspace = createTempWorkspace();
|
|
429
|
-
engine = new EvolutionEngine(workspace);
|
|
430
|
-
});
|
|
431
|
-
|
|
432
|
-
afterEach(() => {
|
|
433
|
-
cleanupWorkspace(workspace);
|
|
434
|
-
});
|
|
435
|
-
|
|
436
|
-
test('agent starts small, grows capability', () => {
|
|
437
|
-
// New agent at Seed
|
|
438
|
-
expect(engine.getTier()).toBe(EvolutionTier.Seed);
|
|
439
|
-
|
|
440
|
-
// Attempt 200-line write - blocked (Seed limit is 150)
|
|
441
|
-
let decision = engine.beforeToolCall({
|
|
442
|
-
toolName: 'write',
|
|
443
|
-
content: Array(200).fill('line').join('\n'),
|
|
444
|
-
});
|
|
445
|
-
expect(decision.allowed).toBe(false);
|
|
446
|
-
|
|
447
|
-
// Subagent spawn is now allowed at Seed
|
|
448
|
-
decision = engine.beforeToolCall({
|
|
449
|
-
toolName: 'sessions_spawn',
|
|
450
|
-
});
|
|
451
|
-
expect(decision.allowed).toBe(true);
|
|
452
|
-
|
|
453
|
-
// Risk path is blocked at Seed
|
|
454
|
-
decision = engine.beforeToolCall({
|
|
455
|
-
toolName: 'write',
|
|
456
|
-
isRiskPath: true,
|
|
457
|
-
lineCount: 10,
|
|
458
|
-
});
|
|
459
|
-
expect(decision.allowed).toBe(false);
|
|
460
|
-
|
|
461
|
-
// Work hard, grow to Forest
|
|
462
|
-
for (let i = 0; i < 125; i++) {
|
|
463
|
-
engine.recordSuccess('write', { difficulty: 'hard' });
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
// Now Forest - can do anything
|
|
467
|
-
decision = engine.beforeToolCall({
|
|
468
|
-
toolName: 'write',
|
|
469
|
-
content: Array(1000).fill('line').join('\n'),
|
|
470
|
-
});
|
|
471
|
-
expect(decision.allowed).toBe(true);
|
|
472
|
-
|
|
473
|
-
decision = engine.beforeToolCall({
|
|
474
|
-
toolName: 'sessions_spawn',
|
|
475
|
-
});
|
|
476
|
-
expect(decision.allowed).toBe(true);
|
|
477
|
-
|
|
478
|
-
decision = engine.beforeToolCall({
|
|
479
|
-
toolName: 'write',
|
|
480
|
-
filePath: 'src/core/trust-engine.ts',
|
|
481
|
-
isRiskPath: true,
|
|
482
|
-
});
|
|
483
|
-
expect(decision.allowed).toBe(true);
|
|
484
|
-
});
|
|
485
|
-
|
|
486
|
-
test('agent recovers from failure without losing progress', () => {
|
|
487
|
-
// Record some successes
|
|
488
|
-
for (let i = 0; i < 10; i++) {
|
|
489
|
-
engine.recordSuccess('write', { difficulty: 'normal' });
|
|
490
|
-
}
|
|
491
|
-
const pointsBeforeFailure = engine.getPoints();
|
|
492
|
-
|
|
493
|
-
// Record failures
|
|
494
|
-
engine.recordFailure('write', { filePath: 'test.ts' });
|
|
495
|
-
engine.recordFailure('write', { filePath: 'test2.ts' });
|
|
496
|
-
|
|
497
|
-
// Points should not decrease
|
|
498
|
-
expect(engine.getPoints()).toBe(pointsBeforeFailure);
|
|
499
|
-
|
|
500
|
-
// Recover with double reward
|
|
501
|
-
const result = engine.recordSuccess('write', { filePath: 'test.ts', difficulty: 'normal' });
|
|
502
|
-
expect(result.isDoubleReward).toBe(true);
|
|
503
|
-
expect(engine.getPoints()).toBeGreaterThan(pointsBeforeFailure);
|
|
504
|
-
});
|
|
505
|
-
|
|
506
|
-
test('status summary reflects gate permissions', () => {
|
|
507
|
-
const summary = engine.getStatusSummary();
|
|
508
|
-
|
|
509
|
-
expect(summary.tier).toBe(EvolutionTier.Seed);
|
|
510
|
-
expect(summary.permissions.maxLinesPerWrite).toBe(150);
|
|
511
|
-
expect(summary.permissions.allowRiskPath).toBe(false);
|
|
512
|
-
expect(summary.permissions.allowSubagentSpawn).toBe(true); // Allowed at Seed tier
|
|
513
|
-
|
|
514
|
-
// Earn promotion to Sapling (risk path unlocks)
|
|
515
|
-
for (let i = 0; i < 26; i++) {
|
|
516
|
-
engine.recordSuccess('write', { difficulty: 'hard' });
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
const summaryAfter = engine.getStatusSummary();
|
|
520
|
-
expect(summaryAfter.permissions.allowRiskPath).toBe(true);
|
|
521
|
-
});
|
|
522
|
-
|
|
523
|
-
test('different workspaces have independent gate state', () => {
|
|
524
|
-
const engine1 = new EvolutionEngine(workspace);
|
|
525
|
-
const workspace2 = createTempWorkspace();
|
|
526
|
-
const engine2 = new EvolutionEngine(workspace2);
|
|
527
|
-
|
|
528
|
-
// Engine 1 promotes to Sapling
|
|
529
|
-
for (let i = 0; i < 26; i++) {
|
|
530
|
-
engine1.recordSuccess('write', { difficulty: 'hard' });
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
// Engine 1 has risk path permission (Sapling tier)
|
|
534
|
-
let decision1 = engine1.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
|
|
535
|
-
expect(decision1.allowed).toBe(true);
|
|
536
|
-
|
|
537
|
-
// Engine 2 is still Seed - risk path blocked
|
|
538
|
-
let decision2 = engine2.beforeToolCall({ toolName: 'write', isRiskPath: true, lineCount: 10 });
|
|
539
|
-
expect(decision2.allowed).toBe(false);
|
|
540
|
-
|
|
541
|
-
cleanupWorkspace(workspace2);
|
|
542
|
-
});
|
|
543
|
-
});
|