principles-disciple 1.36.0 → 1.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/commands/nocturnal-train.ts +1 -0
- package/src/core/event-log.ts +3 -0
- package/src/core/evolution-engine.ts +1 -0
- package/src/core/nocturnal-trinity-types.ts +124 -0
- package/src/core/session-tracker.ts +1 -0
- package/src/core/training-program.ts +1 -0
- package/src/hooks/gate-block-helper.ts +1 -1
- package/src/index.ts +2 -1
- package/src/service/central-sync-service.ts +2 -0
- package/src/service/evolution-dedup.ts +74 -0
- package/src/service/evolution-pain-context.ts +79 -0
- package/src/service/evolution-queue-lock.ts +47 -0
- package/src/service/evolution-queue-migration.ts +173 -0
- package/src/service/evolution-worker.ts +4 -0
- package/src/service/subagent-workflow/workflow-manager-base.ts +1 -0
- package/tests/core/pain-score.property.test.ts +205 -0
- package/tests/integration/chaos-resilience.test.ts +348 -0
- package/tests/integration/gate-real-io.e2e.test.ts +251 -0
- package/tests/integration/pain-diagnostician-loop.e2e.test.ts +380 -0
- package/tests/integration/tool-hooks-workspace-dir.e2e.test.ts +8 -2
- package/tests/integration/trajectory-lifecycle.e2e.test.ts +523 -0
- package/vitest.config.ts +23 -4
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gate Real I/O E2E Tests
|
|
3
|
+
*
|
|
4
|
+
* PURPOSE: Verify Gate decision chain with real file system operations.
|
|
5
|
+
* These tests are designed to DISCOVER bugs, not just confirm existing behavior.
|
|
6
|
+
*
|
|
7
|
+
* DESIGN PRINCIPLES:
|
|
8
|
+
* 1. Use real file system (no mocks for I/O)
|
|
9
|
+
* 2. Test business invariants: blocks MUST be persisted, state MUST be consistent
|
|
10
|
+
* 3. Use independent Oracle: read files directly for verification
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
14
|
+
import * as fs from 'fs';
|
|
15
|
+
import * as os from 'os';
|
|
16
|
+
import * as path from 'path';
|
|
17
|
+
import { TrajectoryDatabase } from '../../src/core/trajectory.js';
|
|
18
|
+
import { EventLog } from '../../src/core/event-log.js';
|
|
19
|
+
|
|
20
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
21
|
+
// Helper functions
|
|
22
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
interface TestWorkspace {
|
|
25
|
+
workspaceDir: string;
|
|
26
|
+
stateDir: string;
|
|
27
|
+
profilePath: string;
|
|
28
|
+
planPath: string;
|
|
29
|
+
trajectory: TrajectoryDatabase;
|
|
30
|
+
eventLog: EventLog;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function createTestWorkspace(): TestWorkspace {
|
|
34
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-e2e-gate-'));
|
|
35
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
36
|
+
const principlesDir = path.join(workspaceDir, '.principles');
|
|
37
|
+
|
|
38
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
39
|
+
fs.mkdirSync(principlesDir, { recursive: true });
|
|
40
|
+
|
|
41
|
+
const profilePath = path.join(principlesDir, 'PROFILE.json');
|
|
42
|
+
const planPath = path.join(workspaceDir, 'PLAN.md');
|
|
43
|
+
|
|
44
|
+
// Create default PROFILE.json
|
|
45
|
+
const defaultProfile = {
|
|
46
|
+
risk_paths: ['/etc/', '/usr/', '~/.ssh/'],
|
|
47
|
+
gate: {
|
|
48
|
+
require_plan_for_risk_paths: true,
|
|
49
|
+
},
|
|
50
|
+
progressive_gate: {
|
|
51
|
+
enabled: true,
|
|
52
|
+
plan_approvals: {
|
|
53
|
+
enabled: false,
|
|
54
|
+
max_lines_override: -1,
|
|
55
|
+
allowed_patterns: [],
|
|
56
|
+
allowed_operations: [],
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
edit_verification: {
|
|
60
|
+
enabled: true,
|
|
61
|
+
max_file_size_bytes: 10 * 1024 * 1024,
|
|
62
|
+
fuzzy_match_enabled: true,
|
|
63
|
+
fuzzy_match_threshold: 0.8,
|
|
64
|
+
skip_large_file_action: 'warn',
|
|
65
|
+
},
|
|
66
|
+
thinking_checkpoint: {
|
|
67
|
+
enabled: false,
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
fs.writeFileSync(profilePath, JSON.stringify(defaultProfile, null, 2));
|
|
71
|
+
|
|
72
|
+
// Create empty PLAN.md
|
|
73
|
+
fs.writeFileSync(planPath, '# PLAN\n\nStatus: READY\n');
|
|
74
|
+
|
|
75
|
+
// Create trajectory database
|
|
76
|
+
const trajectory = new TrajectoryDatabase({ workspaceDir });
|
|
77
|
+
|
|
78
|
+
// Create event log
|
|
79
|
+
const eventLog = new EventLog(stateDir);
|
|
80
|
+
|
|
81
|
+
return { workspaceDir, stateDir, profilePath, planPath, trajectory, eventLog };
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function cleanupWorkspace(ws: TestWorkspace | null): void {
|
|
85
|
+
if (!ws) return;
|
|
86
|
+
try {
|
|
87
|
+
ws.trajectory?.dispose();
|
|
88
|
+
fs.rmSync(ws.workspaceDir, { recursive: true, force: true });
|
|
89
|
+
} catch {
|
|
90
|
+
// ignore
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
95
|
+
// PART 1: EventLog Invariants
|
|
96
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
describe('Gate: EventLog Invariants', () => {
|
|
99
|
+
let ws: TestWorkspace | null = null;
|
|
100
|
+
|
|
101
|
+
beforeEach(() => {
|
|
102
|
+
ws = createTestWorkspace();
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
afterEach(() => {
|
|
106
|
+
cleanupWorkspace(ws);
|
|
107
|
+
ws = null;
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
describe('INVARIANT: Gate block events must be logged', () => {
|
|
111
|
+
it('EventLog MUST persist gate block events', () => {
|
|
112
|
+
ws!.eventLog.recordGateBlock('test-session', {
|
|
113
|
+
toolName: 'run_shell_command',
|
|
114
|
+
filePath: '/etc/passwd',
|
|
115
|
+
reason: 'Risky path detected',
|
|
116
|
+
blockSource: 'e2e_test',
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// Independent verification: check events file
|
|
120
|
+
const today = new Date().toISOString().split('T')[0];
|
|
121
|
+
const eventsFile = path.join(ws!.stateDir, 'logs', `events_${today}.jsonl`);
|
|
122
|
+
|
|
123
|
+
// EventLog buffers, need to flush
|
|
124
|
+
ws!.eventLog.flush();
|
|
125
|
+
|
|
126
|
+
expect(fs.existsSync(eventsFile)).toBe(true);
|
|
127
|
+
|
|
128
|
+
const content = fs.readFileSync(eventsFile, 'utf-8');
|
|
129
|
+
expect(content).toContain('gate_block');
|
|
130
|
+
expect(content).toContain('run_shell_command');
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
136
|
+
// PART 2: Edit Verification Invariants
|
|
137
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
describe('Gate: Edit Verification Invariants', () => {
|
|
140
|
+
let ws: TestWorkspace | null = null;
|
|
141
|
+
let testFilePath: string;
|
|
142
|
+
|
|
143
|
+
beforeEach(() => {
|
|
144
|
+
ws = createTestWorkspace();
|
|
145
|
+
testFilePath = path.join(ws!.workspaceDir, 'test-file.ts');
|
|
146
|
+
fs.writeFileSync(testFilePath, `function hello() {
|
|
147
|
+
console.log('Hello, World!');
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function goodbye() {
|
|
151
|
+
console.log('Goodbye!');
|
|
152
|
+
}
|
|
153
|
+
`);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
afterEach(() => {
|
|
157
|
+
cleanupWorkspace(ws);
|
|
158
|
+
ws = null;
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
describe('INVARIANT: Edit verification format', () => {
|
|
162
|
+
it('Exact match MUST succeed for correct oldText', () => {
|
|
163
|
+
const fileContent = fs.readFileSync(testFilePath, 'utf-8');
|
|
164
|
+
const oldText = "console.log('Hello, World!');";
|
|
165
|
+
|
|
166
|
+
// Verify the text exists - this is what edit verification checks
|
|
167
|
+
expect(fileContent).toContain(oldText);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it('Edit verification MUST fail for non-existent text', () => {
|
|
171
|
+
const fileContent = fs.readFileSync(testFilePath, 'utf-8');
|
|
172
|
+
const nonExistentText = "this text does not exist in the file 12345";
|
|
173
|
+
|
|
174
|
+
// Verify the text does NOT exist
|
|
175
|
+
expect(fileContent).not.toContain(nonExistentText);
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
describe('INVARIANT: File size handling', () => {
|
|
180
|
+
it('Large file MUST be detectable', () => {
|
|
181
|
+
// Create a large file (over 10MB)
|
|
182
|
+
const largeContent = 'x'.repeat(11 * 1024 * 1024);
|
|
183
|
+
const largeFilePath = path.join(ws!.workspaceDir, 'large-file.ts');
|
|
184
|
+
fs.writeFileSync(largeFilePath, largeContent);
|
|
185
|
+
|
|
186
|
+
const stats = fs.statSync(largeFilePath);
|
|
187
|
+
|
|
188
|
+
// INVARIANT: Large file must be detectable
|
|
189
|
+
expect(stats.size).toBeGreaterThan(10 * 1024 * 1024);
|
|
190
|
+
|
|
191
|
+
// Cleanup
|
|
192
|
+
fs.unlinkSync(largeFilePath);
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
198
|
+
// PART 3: Resilience Tests
|
|
199
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
describe('Gate: Resilience', () => {
|
|
202
|
+
let ws: TestWorkspace | null = null;
|
|
203
|
+
|
|
204
|
+
beforeEach(() => {
|
|
205
|
+
ws = createTestWorkspace();
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
afterEach(() => {
|
|
209
|
+
cleanupWorkspace(ws);
|
|
210
|
+
ws = null;
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
describe('RESILIENCE: Missing configuration', () => {
|
|
214
|
+
it('Profile file MUST be readable when valid JSON', () => {
|
|
215
|
+
const profileContent = fs.readFileSync(ws!.profilePath, 'utf-8');
|
|
216
|
+
const profile = JSON.parse(profileContent);
|
|
217
|
+
|
|
218
|
+
// INVARIANT: Valid profile must have expected structure
|
|
219
|
+
expect(profile).toBeDefined();
|
|
220
|
+
expect(profile.gate).toBeDefined();
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
it('Gate MUST handle corrupted PROFILE.json gracefully', () => {
|
|
224
|
+
// Write invalid JSON
|
|
225
|
+
fs.writeFileSync(ws!.profilePath, 'not valid json {{{');
|
|
226
|
+
|
|
227
|
+
// Attempt to parse should throw, but not crash
|
|
228
|
+
expect(() => {
|
|
229
|
+
try {
|
|
230
|
+
JSON.parse(fs.readFileSync(ws!.profilePath, 'utf-8'));
|
|
231
|
+
} catch {
|
|
232
|
+
// Expected
|
|
233
|
+
}
|
|
234
|
+
}).not.toThrow();
|
|
235
|
+
});
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
describe('RESILIENCE: Missing state directory', () => {
|
|
239
|
+
it('EventLog MUST handle missing logs directory', () => {
|
|
240
|
+
// Remove state directory
|
|
241
|
+
fs.rmSync(ws!.stateDir, { recursive: true, force: true });
|
|
242
|
+
|
|
243
|
+
// Attempt to create event log
|
|
244
|
+
// Should recreate the directory
|
|
245
|
+
expect(() => new EventLog(ws!.stateDir)).not.toThrow();
|
|
246
|
+
|
|
247
|
+
// Verify directory was created
|
|
248
|
+
expect(fs.existsSync(ws!.stateDir)).toBe(true);
|
|
249
|
+
});
|
|
250
|
+
});
|
|
251
|
+
});
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pain → Diagnostician Loop E2E Tests
|
|
3
|
+
*
|
|
4
|
+
* PURPOSE: Verify business invariants in the Pain → Diagnostician loop.
|
|
5
|
+
* These tests are designed to DISCOVER bugs, not just confirm existing behavior.
|
|
6
|
+
*
|
|
7
|
+
* DESIGN PRINCIPLES:
|
|
8
|
+
* 1. Use real file system (no mocks for I/O)
|
|
9
|
+
* 2. Test business invariants, not implementation details
|
|
10
|
+
* 3. Use independent Oracle data sources for verification
|
|
11
|
+
* 4. Test resilience (corruption recovery, concurrency safety)
|
|
12
|
+
*
|
|
13
|
+
* DATA FLOW:
|
|
14
|
+
* after_tool_call (hooks/pain.ts)
|
|
15
|
+
* → writePainFlag → .state/.pain_flag
|
|
16
|
+
* Evolution Worker (service/evolution-worker.ts)
|
|
17
|
+
* → enqueues pain_diagnosis task → evolution_queue.json
|
|
18
|
+
* → addDiagnosticianTask → diagnostician_tasks.json
|
|
19
|
+
* before_prompt_build (hooks/prompt.ts)
|
|
20
|
+
* → getPendingDiagnosticianTasks → inject into prompt
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
24
|
+
import * as fs from 'fs';
|
|
25
|
+
import * as os from 'os';
|
|
26
|
+
import * as path from 'path';
|
|
27
|
+
import {
|
|
28
|
+
buildPainFlag,
|
|
29
|
+
writePainFlag,
|
|
30
|
+
readPainFlagData,
|
|
31
|
+
validatePainFlag,
|
|
32
|
+
} from '../../src/core/pain.js';
|
|
33
|
+
import { getPendingDiagnosticianTasks, addDiagnosticianTask } from '../../src/core/diagnostician-task-store.js';
|
|
34
|
+
|
|
35
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
36
|
+
// Helper functions
|
|
37
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
function createTestWorkspace(): { workspaceDir: string; stateDir: string } {
|
|
40
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-e2e-pain-'));
|
|
41
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
42
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
43
|
+
fs.mkdirSync(path.join(workspaceDir, '.principles'), { recursive: true });
|
|
44
|
+
return { workspaceDir, stateDir };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function cleanupWorkspace(workspaceDir: string): void {
|
|
48
|
+
try {
|
|
49
|
+
fs.rmSync(workspaceDir, { recursive: true, force: true });
|
|
50
|
+
} catch {
|
|
51
|
+
// ignore
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
56
|
+
// PART 1: Business Invariants
|
|
57
|
+
// Tests that verify system MUST maintain these rules
|
|
58
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
describe('Pain → Diagnostician: Business Invariants', () => {
|
|
61
|
+
let workspaceDir: string;
|
|
62
|
+
let stateDir: string;
|
|
63
|
+
|
|
64
|
+
beforeEach(() => {
|
|
65
|
+
const ws = createTestWorkspace();
|
|
66
|
+
workspaceDir = ws.workspaceDir;
|
|
67
|
+
stateDir = ws.stateDir;
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
afterEach(() => {
|
|
71
|
+
cleanupWorkspace(workspaceDir);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// ── INVARIANT 1: Pain flag format contract ──
|
|
75
|
+
describe('INVARIANT: Pain flag format contract', () => {
|
|
76
|
+
it('MUST contain all required fields after writePainFlag', () => {
|
|
77
|
+
const data = buildPainFlag({
|
|
78
|
+
source: 'tool_failure',
|
|
79
|
+
score: '70',
|
|
80
|
+
reason: 'Command failed with exit code 1',
|
|
81
|
+
session_id: 'test-session-123',
|
|
82
|
+
agent_id: 'main',
|
|
83
|
+
is_risky: true,
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
writePainFlag(workspaceDir, data);
|
|
87
|
+
|
|
88
|
+
// Independent verification: read file directly, don't trust writePainFlag
|
|
89
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
90
|
+
expect(fs.existsSync(painFlagPath)).toBe(true);
|
|
91
|
+
|
|
92
|
+
const content = fs.readFileSync(painFlagPath, 'utf-8');
|
|
93
|
+
|
|
94
|
+
// INVARIANT: All required fields MUST be present
|
|
95
|
+
expect(content).toContain('source: tool_failure');
|
|
96
|
+
expect(content).toContain('score: 70');
|
|
97
|
+
expect(content).toMatch(/time: \d{4}-\d{2}-\d{2}T/); // ISO timestamp with space
|
|
98
|
+
expect(content).toContain('reason:');
|
|
99
|
+
expect(content).toContain('is_risky: true');
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it('MUST NOT write empty optional fields to disk', () => {
|
|
103
|
+
const data = buildPainFlag({
|
|
104
|
+
source: 'human_intervention',
|
|
105
|
+
score: '50',
|
|
106
|
+
reason: 'User feedback',
|
|
107
|
+
// Optional fields omitted
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
writePainFlag(workspaceDir, data);
|
|
111
|
+
|
|
112
|
+
const content = fs.readFileSync(path.join(stateDir, '.pain_flag'), 'utf-8');
|
|
113
|
+
|
|
114
|
+
// INVARIANT: Empty optional fields MUST NOT appear in file
|
|
115
|
+
// This prevents confusion when reading the file
|
|
116
|
+
expect(content).not.toMatch(/trace_id:\s*$/m);
|
|
117
|
+
expect(content).not.toMatch(/trigger_text_preview:\s*$/m);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('score MUST be in valid range 0-100', () => {
|
|
121
|
+
// Test boundary values
|
|
122
|
+
const scores = ['0', '50', '100'];
|
|
123
|
+
|
|
124
|
+
for (const score of scores) {
|
|
125
|
+
const data = buildPainFlag({
|
|
126
|
+
source: 'test',
|
|
127
|
+
score,
|
|
128
|
+
reason: 'Test',
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
writePainFlag(workspaceDir, data);
|
|
132
|
+
|
|
133
|
+
const read = readPainFlagData(workspaceDir);
|
|
134
|
+
const numScore = Number(read.score);
|
|
135
|
+
|
|
136
|
+
// INVARIANT: Score MUST be in valid range
|
|
137
|
+
expect(numScore).toBeGreaterThanOrEqual(0);
|
|
138
|
+
expect(numScore).toBeLessThanOrEqual(100);
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// ── INVARIANT 2: Pain flag validation contract ──
|
|
144
|
+
describe('INVARIANT: Pain flag validation', () => {
|
|
145
|
+
it('validatePainFlag MUST reject flags missing required fields', () => {
|
|
146
|
+
const invalidFlags = [
|
|
147
|
+
{ source: '', score: '50', time: '2024-01-01', reason: 'test' }, // empty source
|
|
148
|
+
{ source: 'test', score: '', time: '2024-01-01', reason: 'test' }, // empty score
|
|
149
|
+
{ source: 'test', score: '50', time: '', reason: 'test' }, // empty time
|
|
150
|
+
{ source: 'test', score: '50', time: '2024-01-01', reason: '' }, // empty reason
|
|
151
|
+
];
|
|
152
|
+
|
|
153
|
+
for (const flag of invalidFlags) {
|
|
154
|
+
const missing = validatePainFlag(flag);
|
|
155
|
+
// INVARIANT: Missing required fields MUST be detected
|
|
156
|
+
expect(missing.length).toBeGreaterThan(0);
|
|
157
|
+
}
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it('validatePainFlag MUST accept valid flags', () => {
|
|
161
|
+
const validFlag = {
|
|
162
|
+
source: 'tool_failure',
|
|
163
|
+
score: '70',
|
|
164
|
+
time: new Date().toISOString(),
|
|
165
|
+
reason: 'Command failed',
|
|
166
|
+
session_id: 'test',
|
|
167
|
+
agent_id: 'main',
|
|
168
|
+
is_risky: 'false',
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
const missing = validatePainFlag(validFlag);
|
|
172
|
+
// INVARIANT: Valid flags MUST pass validation
|
|
173
|
+
expect(missing).toEqual([]);
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
// ── INVARIANT 3: Diagnostician task store contract ──
|
|
178
|
+
describe('INVARIANT: Diagnostician task store', () => {
|
|
179
|
+
it('MUST persist tasks with correct structure', async () => {
|
|
180
|
+
const taskId = `task-${Date.now()}`;
|
|
181
|
+
const prompt = 'Diagnose the following pain signal:\n- source: tool_failure\n- score: 70';
|
|
182
|
+
|
|
183
|
+
await addDiagnosticianTask(stateDir, taskId, prompt);
|
|
184
|
+
|
|
185
|
+
// Independent verification: read file directly
|
|
186
|
+
const tasksPath = path.join(stateDir, 'diagnostician_tasks.json');
|
|
187
|
+
expect(fs.existsSync(tasksPath)).toBe(true);
|
|
188
|
+
|
|
189
|
+
const store = JSON.parse(fs.readFileSync(tasksPath, 'utf-8'));
|
|
190
|
+
|
|
191
|
+
// INVARIANT: Task MUST be in store with correct structure
|
|
192
|
+
expect(store.tasks).toBeDefined();
|
|
193
|
+
expect(store.tasks[taskId]).toBeDefined();
|
|
194
|
+
expect(store.tasks[taskId].prompt).toBe(prompt);
|
|
195
|
+
expect(store.tasks[taskId].status).toBe('pending');
|
|
196
|
+
expect(store.tasks[taskId].createdAt).toBeDefined();
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
it('getPendingDiagnosticianTasks MUST only return pending tasks', async () => {
|
|
200
|
+
// Add pending task
|
|
201
|
+
await addDiagnosticianTask(stateDir, 'pending-task', 'Test prompt');
|
|
202
|
+
|
|
203
|
+
// Add completed task manually
|
|
204
|
+
const tasksPath = path.join(stateDir, 'diagnostician_tasks.json');
|
|
205
|
+
const store = { tasks: {} };
|
|
206
|
+
store.tasks['completed-task'] = {
|
|
207
|
+
prompt: 'Completed',
|
|
208
|
+
status: 'completed',
|
|
209
|
+
createdAt: new Date().toISOString(),
|
|
210
|
+
};
|
|
211
|
+
fs.writeFileSync(tasksPath, JSON.stringify(store));
|
|
212
|
+
|
|
213
|
+
// Add pending task to the store
|
|
214
|
+
const existingStore = JSON.parse(fs.readFileSync(tasksPath, 'utf-8'));
|
|
215
|
+
existingStore.tasks['pending-task'] = {
|
|
216
|
+
prompt: 'Pending',
|
|
217
|
+
status: 'pending',
|
|
218
|
+
createdAt: new Date().toISOString(),
|
|
219
|
+
};
|
|
220
|
+
fs.writeFileSync(tasksPath, JSON.stringify(existingStore));
|
|
221
|
+
|
|
222
|
+
const pending = getPendingDiagnosticianTasks(stateDir);
|
|
223
|
+
|
|
224
|
+
// INVARIANT: Only pending tasks MUST be returned
|
|
225
|
+
expect(pending.some(t => t.id === 'pending-task')).toBe(true);
|
|
226
|
+
expect(pending.some(t => t.id === 'completed-task')).toBe(false);
|
|
227
|
+
});
|
|
228
|
+
});
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
232
|
+
// PART 2: Resilience Tests
|
|
233
|
+
// Tests that verify system behavior under abnormal conditions
|
|
234
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
235
|
+
|
|
236
|
+
describe('Pain → Diagnostician: Resilience', () => {
|
|
237
|
+
let workspaceDir: string;
|
|
238
|
+
let stateDir: string;
|
|
239
|
+
|
|
240
|
+
beforeEach(() => {
|
|
241
|
+
const ws = createTestWorkspace();
|
|
242
|
+
workspaceDir = ws.workspaceDir;
|
|
243
|
+
stateDir = ws.stateDir;
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
afterEach(() => {
|
|
247
|
+
cleanupWorkspace(workspaceDir);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// ── RESILIENCE 1: Corruption recovery ──
|
|
251
|
+
describe('RESILIENCE: Corruption recovery', () => {
|
|
252
|
+
it('readPainFlagData MUST NOT crash on corrupted file', () => {
|
|
253
|
+
// Write corrupted content
|
|
254
|
+
fs.writeFileSync(path.join(stateDir, '.pain_flag'), 'invalid {{{ json');
|
|
255
|
+
|
|
256
|
+
// This should NOT throw
|
|
257
|
+
expect(() => readPainFlagData(workspaceDir)).not.toThrow();
|
|
258
|
+
|
|
259
|
+
const data = readPainFlagData(workspaceDir);
|
|
260
|
+
|
|
261
|
+
// INVARIANT: Should return safe default, not undefined/null
|
|
262
|
+
expect(data).toBeDefined();
|
|
263
|
+
expect(typeof data).toBe('object');
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
it('getPendingDiagnosticianTasks MUST NOT crash on missing file', () => {
|
|
267
|
+
// Don't create diagnostician_tasks.json
|
|
268
|
+
|
|
269
|
+
// This should NOT throw
|
|
270
|
+
expect(() => getPendingDiagnosticianTasks(stateDir)).not.toThrow();
|
|
271
|
+
|
|
272
|
+
const pending = getPendingDiagnosticianTasks(stateDir);
|
|
273
|
+
|
|
274
|
+
// INVARIANT: Should return empty array, not crash
|
|
275
|
+
expect(Array.isArray(pending)).toBe(true);
|
|
276
|
+
expect(pending.length).toBe(0);
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
it('getPendingDiagnosticianTasks MUST NOT crash on corrupted JSON', () => {
|
|
280
|
+
fs.writeFileSync(
|
|
281
|
+
path.join(stateDir, 'diagnostician_tasks.json'),
|
|
282
|
+
'not valid json {{{'
|
|
283
|
+
);
|
|
284
|
+
|
|
285
|
+
// This should NOT throw
|
|
286
|
+
expect(() => getPendingDiagnosticianTasks(stateDir)).not.toThrow();
|
|
287
|
+
|
|
288
|
+
const pending = getPendingDiagnosticianTasks(stateDir);
|
|
289
|
+
|
|
290
|
+
// INVARIANT: Should return empty array as fallback
|
|
291
|
+
expect(Array.isArray(pending)).toBe(true);
|
|
292
|
+
});
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
// ── RESILIENCE 2: Concurrent access safety ──
|
|
296
|
+
describe('RESILIENCE: Concurrent access', () => {
|
|
297
|
+
it('writePainFlag MUST handle rapid sequential writes', () => {
|
|
298
|
+
// Simulate rapid consecutive writes
|
|
299
|
+
for (let i = 0; i < 10; i++) {
|
|
300
|
+
const data = buildPainFlag({
|
|
301
|
+
source: `test-${i}`,
|
|
302
|
+
score: String(i * 10),
|
|
303
|
+
reason: `Test ${i}`,
|
|
304
|
+
});
|
|
305
|
+
writePainFlag(workspaceDir, data);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// INVARIANT: File must exist and be valid after rapid writes
|
|
309
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
310
|
+
expect(fs.existsSync(painFlagPath)).toBe(true);
|
|
311
|
+
|
|
312
|
+
const content = fs.readFileSync(painFlagPath, 'utf-8');
|
|
313
|
+
|
|
314
|
+
// Should not have corruption artifacts
|
|
315
|
+
expect(content).not.toContain('undefined');
|
|
316
|
+
expect(content).not.toContain('[object Object]');
|
|
317
|
+
expect(content).not.toContain('null');
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
323
|
+
// PART 3: Round-trip Tests
|
|
324
|
+
// Tests that verify data survives the full write → read cycle
|
|
325
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
326
|
+
|
|
327
|
+
describe('Pain → Diagnostician: Round-trip', () => {
|
|
328
|
+
let workspaceDir: string;
|
|
329
|
+
let stateDir: string;
|
|
330
|
+
|
|
331
|
+
beforeEach(() => {
|
|
332
|
+
const ws = createTestWorkspace();
|
|
333
|
+
workspaceDir = ws.workspaceDir;
|
|
334
|
+
stateDir = ws.stateDir;
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
afterEach(() => {
|
|
338
|
+
cleanupWorkspace(workspaceDir);
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
it('Pain flag round-trip: write → read → verify', () => {
|
|
342
|
+
const original = buildPainFlag({
|
|
343
|
+
source: 'tool_failure',
|
|
344
|
+
score: '75',
|
|
345
|
+
reason: 'npm test failed with exit code 1',
|
|
346
|
+
session_id: 'session-abc123',
|
|
347
|
+
agent_id: 'main',
|
|
348
|
+
is_risky: false,
|
|
349
|
+
trace_id: 'trace-xyz789',
|
|
350
|
+
trigger_text_preview: 'npm test',
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
writePainFlag(workspaceDir, original);
|
|
354
|
+
const read = readPainFlagData(workspaceDir);
|
|
355
|
+
|
|
356
|
+
// INVARIANT: All fields MUST survive round-trip
|
|
357
|
+
expect(read.source).toBe(original.source);
|
|
358
|
+
expect(read.score).toBe(original.score);
|
|
359
|
+
expect(read.reason).toBe(original.reason);
|
|
360
|
+
expect(read.session_id).toBe(original.session_id);
|
|
361
|
+
expect(read.agent_id).toBe(original.agent_id);
|
|
362
|
+
expect(read.is_risky).toBe(original.is_risky);
|
|
363
|
+
expect(read.trace_id).toBe(original.trace_id);
|
|
364
|
+
expect(read.trigger_text_preview).toBe(original.trigger_text_preview);
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
it('Diagnostician task round-trip: add → get → verify', async () => {
|
|
368
|
+
const taskId = 'round-trip-task';
|
|
369
|
+
const prompt = 'Analyze the following error:\n```\nError: ENOENT\n```';
|
|
370
|
+
|
|
371
|
+
await addDiagnosticianTask(stateDir, taskId, prompt);
|
|
372
|
+
const pending = getPendingDiagnosticianTasks(stateDir);
|
|
373
|
+
const task = pending.find(t => t.id === taskId);
|
|
374
|
+
|
|
375
|
+
// INVARIANT: Task MUST survive round-trip
|
|
376
|
+
expect(task).toBeDefined();
|
|
377
|
+
expect(task!.task.prompt).toBe(prompt);
|
|
378
|
+
expect(task!.task.status).toBe('pending');
|
|
379
|
+
});
|
|
380
|
+
});
|
|
@@ -31,11 +31,17 @@ const createMockApi = (workspaceDir: string) => ({
|
|
|
31
31
|
pluginConfig: {},
|
|
32
32
|
});
|
|
33
33
|
|
|
34
|
+
// Helper to get today's events file path (EventLog uses date-stamped files)
|
|
35
|
+
const getTodayEventsFile = (logsDir: string) => {
|
|
36
|
+
const today = new Date().toISOString().split('T')[0];
|
|
37
|
+
return path.join(logsDir, `events_${today}.jsonl`);
|
|
38
|
+
};
|
|
39
|
+
|
|
34
40
|
describe('E2E: Tool Hooks workspaceDir Resolution', () => {
|
|
35
41
|
const testWorkspaceDir = path.join(os.tmpdir(), 'pd-tool-hooks-e2e-test');
|
|
36
42
|
const stateDir = path.join(testWorkspaceDir, '.state');
|
|
37
43
|
const logsDir = path.join(stateDir, 'logs');
|
|
38
|
-
const eventsFile =
|
|
44
|
+
const eventsFile = getTodayEventsFile(logsDir);
|
|
39
45
|
|
|
40
46
|
beforeAll(() => {
|
|
41
47
|
// Create test workspace structure
|
|
@@ -149,7 +155,7 @@ describe('E2E: EventLog flushImmediately', () => {
|
|
|
149
155
|
const testWorkspaceDir = path.join(os.tmpdir(), 'pd-eventlog-flush-test');
|
|
150
156
|
const stateDir = path.join(testWorkspaceDir, '.state');
|
|
151
157
|
const logsDir = path.join(stateDir, 'logs');
|
|
152
|
-
const eventsFile =
|
|
158
|
+
const eventsFile = getTodayEventsFile(logsDir);
|
|
153
159
|
|
|
154
160
|
beforeAll(() => {
|
|
155
161
|
fs.mkdirSync(logsDir, { recursive: true });
|