principles-disciple 1.36.0 → 1.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,523 @@
1
+ /**
2
+ * Trajectory Lifecycle E2E Tests
3
+ *
4
+ * PURPOSE: Verify Trajectory database lifecycle with real SQLite operations.
5
+ * These tests are designed to DISCOVER bugs, not just confirm existing behavior.
6
+ *
7
+ * DESIGN PRINCIPLES:
8
+ * 1. Use real SQLite database (no mocks)
9
+ * 2. Test business invariants: data MUST persist, relationships MUST be valid
10
+ * 3. Use independent Oracle: query database directly for verification
11
+ *
12
+ * DATA FLOW:
13
+ * Tool Call → recordToolCall → SQLite
14
+ * LLM Output → recordAssistantTurn → SQLite (+ blob storage for large text)
15
+ * User Turn → recordUserTurn → SQLite
16
+ * Pain Event → recordPainEvent → SQLite
17
+ * Gate Block → recordGateBlock → SQLite
18
+ */
19
+
20
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
21
+ import * as fs from 'fs';
22
+ import * as os from 'os';
23
+ import * as path from 'path';
24
+ import { TrajectoryDatabase } from '../../src/core/trajectory.js';
25
+
26
+ // ─────────────────────────────────────────────────────────────────────
27
+ // Helper functions
28
+ // ─────────────────────────────────────────────────────────────────────
29
+
30
+ interface TestContext {
31
+ workspaceDir: string;
32
+ trajectory: TrajectoryDatabase;
33
+ db: any;
34
+ }
35
+
36
+ function createTestContext(): TestContext {
37
+ const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-e2e-trajectory-'));
38
+ const trajectory = new TrajectoryDatabase({ workspaceDir });
39
+ const db = trajectory['db'];
40
+ return { workspaceDir, trajectory, db };
41
+ }
42
+
43
+ function cleanupContext(ctx: TestContext | null): void {
44
+ if (!ctx) return;
45
+ try {
46
+ ctx.trajectory?.dispose();
47
+ fs.rmSync(ctx.workspaceDir, { recursive: true, force: true });
48
+ } catch {
49
+ // ignore
50
+ }
51
+ }
52
+
53
+ function isoNow(): string {
54
+ return new Date().toISOString();
55
+ }
56
+
57
+ // ─────────────────────────────────────────────────────────────────────
58
+ // PART 1: Session Lifecycle Invariants
59
+ // ─────────────────────────────────────────────────────────────────────
60
+
61
+ describe('Trajectory: Session Lifecycle Invariants', () => {
62
+ let ctx: TestContext | null = null;
63
+
64
+ beforeEach(() => {
65
+ ctx = createTestContext();
66
+ });
67
+
68
+ afterEach(() => {
69
+ cleanupContext(ctx);
70
+ ctx = null;
71
+ });
72
+
73
+ describe('INVARIANT: Session must be unique', () => {
74
+ it('Recording same session twice MUST not create duplicates', () => {
75
+ const sessionId = 'session-unique-test';
76
+
77
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
78
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
79
+
80
+ // Independent verification: count sessions in database
81
+ const sessions = ctx!.db!.prepare('SELECT * FROM sessions WHERE session_id = ?').all(sessionId);
82
+
83
+ // INVARIANT: Should have exactly one session
84
+ expect(sessions.length).toBe(1);
85
+ });
86
+
87
+ it('Session MUST have valid startedAt timestamp', () => {
88
+ const sessionId = 'session-timestamp-test';
89
+ const startedAt = isoNow();
90
+
91
+ ctx!.trajectory.recordSession({ sessionId, startedAt });
92
+
93
+ // Independent verification
94
+ const session = ctx!.db!.prepare('SELECT * FROM sessions WHERE session_id = ?').get(sessionId) as any;
95
+
96
+ // INVARIANT: Timestamp must be valid ISO string
97
+ expect(session).toBeDefined();
98
+ expect(session.started_at).toBe(startedAt);
99
+ expect(() => new Date(session.started_at)).not.toThrow();
100
+ });
101
+ });
102
+ });
103
+
104
+ // ─────────────────────────────────────────────────────────────────────
105
+ // PART 2: Tool Call Invariants
106
+ // ─────────────────────────────────────────────────────────────────────
107
+
108
+ describe('Trajectory: Tool Call Invariants', () => {
109
+ let ctx: TestContext | null = null;
110
+
111
+ beforeEach(() => {
112
+ ctx = createTestContext();
113
+ });
114
+
115
+ afterEach(() => {
116
+ cleanupContext(ctx);
117
+ ctx = null;
118
+ });
119
+
120
+ describe('INVARIANT: Tool calls must be linked to session', () => {
121
+ it('Tool call MUST reference valid session', () => {
122
+ const sessionId = 'session-tool-test';
123
+
124
+ // Create session first
125
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
126
+
127
+ // Record tool call
128
+ ctx!.trajectory.recordToolCall({
129
+ sessionId,
130
+ toolName: 'read_file',
131
+ outcome: 'success',
132
+ createdAt: isoNow(),
133
+ });
134
+
135
+ // Independent verification
136
+ const toolCalls = ctx!.db!.prepare('SELECT * FROM tool_calls WHERE session_id = ?').all(sessionId) as any[];
137
+
138
+ // INVARIANT: Tool call must be linked to session
139
+ expect(toolCalls.length).toBe(1);
140
+ expect(toolCalls[0].tool_name).toBe('read_file');
141
+ expect(toolCalls[0].outcome).toBe('success');
142
+ });
143
+
144
+ it('Failed tool calls MUST have error info', () => {
145
+ const sessionId = 'session-tool-fail';
146
+
147
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
148
+
149
+ ctx!.trajectory.recordToolCall({
150
+ sessionId,
151
+ toolName: 'run_shell_command',
152
+ outcome: 'failure',
153
+ errorMessage: 'Command failed',
154
+ exitCode: 1,
155
+ createdAt: isoNow(),
156
+ });
157
+
158
+ // Independent verification
159
+ const toolCalls = ctx!.db!.prepare('SELECT * FROM tool_calls WHERE session_id = ?').all(sessionId) as any[];
160
+
161
+ // INVARIANT: Failed tool call must have error info
162
+ expect(toolCalls.length).toBe(1);
163
+ expect(toolCalls[0].outcome).toBe('failure');
164
+ expect(toolCalls[0].error_message).toBeDefined();
165
+ });
166
+
167
+ it('Multiple tool calls MUST preserve order', () => {
168
+ const sessionId = 'session-tool-order';
169
+
170
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
171
+
172
+ for (let i = 0; i < 5; i++) {
173
+ ctx!.trajectory.recordToolCall({
174
+ sessionId,
175
+ toolName: `tool_${i}`,
176
+ outcome: 'success',
177
+ createdAt: isoNow(),
178
+ });
179
+ }
180
+
181
+ // Independent verification
182
+ const toolCalls = ctx!.db!.prepare('SELECT * FROM tool_calls WHERE session_id = ? ORDER BY created_at').all(sessionId) as any[];
183
+
184
+ // INVARIANT: Order must be preserved
185
+ expect(toolCalls.length).toBe(5);
186
+ for (let i = 0; i < 5; i++) {
187
+ expect(toolCalls[i].tool_name).toBe(`tool_${i}`);
188
+ }
189
+ });
190
+ });
191
+ });
192
+
193
+ // ─────────────────────────────────────────────────────────────────────
194
+ // PART 3: Assistant Turn Invariants
195
+ // ─────────────────────────────────────────────────────────────────────
196
+
197
+ describe('Trajectory: Assistant Turn Invariants', () => {
198
+ let ctx: TestContext | null = null;
199
+
200
+ beforeEach(() => {
201
+ ctx = createTestContext();
202
+ });
203
+
204
+ afterEach(() => {
205
+ cleanupContext(ctx);
206
+ ctx = null;
207
+ });
208
+
209
+ describe('INVARIANT: Assistant turns must have valid content', () => {
210
+ it('Assistant turn MUST store sanitized text', () => {
211
+ const sessionId = 'session-assistant-test';
212
+
213
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
214
+
215
+ const turnId = ctx!.trajectory.recordAssistantTurn({
216
+ sessionId,
217
+ runId: 'run-1',
218
+ provider: 'openai',
219
+ model: 'gpt-4',
220
+ rawText: 'This is the raw assistant response',
221
+ sanitizedText: 'This is the sanitized assistant response',
222
+ usageJson: { prompt_tokens: 100, completion_tokens: 50 },
223
+ empathySignalJson: {},
224
+ createdAt: isoNow(),
225
+ });
226
+
227
+ // Independent verification
228
+ const turns = ctx!.db!.prepare('SELECT * FROM assistant_turns WHERE session_id = ?').all(sessionId) as any[];
229
+
230
+ // INVARIANT: Turn must be stored with correct content
231
+ expect(turns.length).toBe(1);
232
+ expect(turns[0].sanitized_text).toBe('This is the sanitized assistant response');
233
+ expect(turnId).toBeGreaterThan(0);
234
+ });
235
+
236
+ it('Large assistant text MUST be stored in blob storage', () => {
237
+ const sessionId = 'session-large-text';
238
+
239
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
240
+
241
+ // Create large text (> 16KB inline threshold)
242
+ const largeText = 'x'.repeat(20 * 1024);
243
+
244
+ const turnId = ctx!.trajectory.recordAssistantTurn({
245
+ sessionId,
246
+ runId: 'run-1',
247
+ provider: 'openai',
248
+ model: 'gpt-4',
249
+ rawText: largeText,
250
+ sanitizedText: largeText,
251
+ usageJson: {},
252
+ empathySignalJson: {},
253
+ createdAt: isoNow(),
254
+ });
255
+
256
+ // Independent verification
257
+ const turns = ctx!.db!.prepare('SELECT * FROM assistant_turns WHERE id = ?').all(turnId) as any[];
258
+
259
+ // INVARIANT: Large text must not be stored inline
260
+ expect(turns.length).toBe(1);
261
+ // Either raw_text is null (stored in blob) or it's the full text
262
+ const storedText = turns[0].raw_text;
263
+ expect(storedText === null || storedText === largeText).toBe(true);
264
+ });
265
+ });
266
+ });
267
+
268
+ // ─────────────────────────────────────────────────────────────────────
269
+ // PART 4: User Turn Invariants
270
+ // ─────────────────────────────────────────────────────────────────────
271
+
272
+ describe('Trajectory: User Turn Invariants', () => {
273
+ let ctx: TestContext | null = null;
274
+
275
+ beforeEach(() => {
276
+ ctx = createTestContext();
277
+ });
278
+
279
+ afterEach(() => {
280
+ cleanupContext(ctx);
281
+ ctx = null;
282
+ });
283
+
284
+ describe('INVARIANT: User turns must capture corrections', () => {
285
+ it('Correction detected MUST be recorded', () => {
286
+ const sessionId = 'session-correction-test';
287
+
288
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
289
+
290
+ const atId = ctx!.trajectory.recordAssistantTurn({
291
+ sessionId,
292
+ runId: 'run-1',
293
+ provider: 'openai',
294
+ model: 'gpt-4',
295
+ rawText: 'Here is my suggestion',
296
+ sanitizedText: 'Here is my suggestion',
297
+ usageJson: {},
298
+ empathySignalJson: {},
299
+ createdAt: isoNow(),
300
+ });
301
+
302
+ ctx!.trajectory.recordUserTurn({
303
+ sessionId,
304
+ turnIndex: 1,
305
+ rawText: 'That is wrong, try again',
306
+ correctionDetected: true,
307
+ correctionCue: 'wrong',
308
+ referencesAssistantTurnId: atId,
309
+ createdAt: isoNow(),
310
+ });
311
+
312
+ // Independent verification
313
+ const userTurns = ctx!.db!.prepare('SELECT * FROM user_turns WHERE session_id = ?').all(sessionId) as any[];
314
+
315
+ // INVARIANT: Correction must be recorded
316
+ expect(userTurns.length).toBe(1);
317
+ expect(userTurns[0].correction_detected).toBe(1); // SQLite stores as 1/0
318
+ expect(userTurns[0].correction_cue).toBe('wrong');
319
+ });
320
+
321
+ it('User turn MUST reference assistant turn', () => {
322
+ const sessionId = 'session-ref-test';
323
+
324
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
325
+
326
+ const atId = ctx!.trajectory.recordAssistantTurn({
327
+ sessionId,
328
+ runId: 'run-1',
329
+ provider: 'openai',
330
+ model: 'gpt-4',
331
+ rawText: 'Response',
332
+ sanitizedText: 'Response',
333
+ usageJson: {},
334
+ empathySignalJson: {},
335
+ createdAt: isoNow(),
336
+ });
337
+
338
+ ctx!.trajectory.recordUserTurn({
339
+ sessionId,
340
+ turnIndex: 1,
341
+ rawText: 'User feedback',
342
+ correctionDetected: false,
343
+ referencesAssistantTurnId: atId,
344
+ createdAt: isoNow(),
345
+ });
346
+
347
+ // Independent verification
348
+ const userTurns = ctx!.db!.prepare('SELECT * FROM user_turns WHERE session_id = ?').all(sessionId) as any[];
349
+
350
+ // INVARIANT: Reference must be valid
351
+ expect(userTurns[0].references_assistant_turn_id).toBe(atId);
352
+ });
353
+ });
354
+ });
355
+
356
+ // ─────────────────────────────────────────────────────────────────────
357
+ // PART 5: Pain Event Invariants
358
+ // ─────────────────────────────────────────────────────────────────────
359
+
360
+ describe('Trajectory: Pain Event Invariants', () => {
361
+ let ctx: TestContext | null = null;
362
+
363
+ beforeEach(() => {
364
+ ctx = createTestContext();
365
+ });
366
+
367
+ afterEach(() => {
368
+ cleanupContext(ctx);
369
+ ctx = null;
370
+ });
371
+
372
+ describe('INVARIANT: Pain events must have valid scores', () => {
373
+ it('Pain event MUST have score in valid range', () => {
374
+ const sessionId = 'session-pain-test';
375
+
376
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
377
+
378
+ ctx!.trajectory.recordPainEvent({
379
+ sessionId,
380
+ source: 'tool_failure',
381
+ score: 75,
382
+ reason: 'Command failed',
383
+ origin: 'after_tool_call',
384
+ text: 'npm test failed',
385
+ createdAt: isoNow(),
386
+ });
387
+
388
+ // Independent verification
389
+ const painEvents = ctx!.db!.prepare('SELECT * FROM pain_events WHERE session_id = ?').all(sessionId) as any[];
390
+
391
+ // INVARIANT: Score must be in valid range
392
+ expect(painEvents.length).toBe(1);
393
+ expect(painEvents[0].score).toBeGreaterThanOrEqual(0);
394
+ expect(painEvents[0].score).toBeLessThanOrEqual(100);
395
+ expect(painEvents[0].source).toBe('tool_failure');
396
+ });
397
+
398
+ it('Multiple pain events MUST accumulate correctly', () => {
399
+ const sessionId = 'session-multi-pain';
400
+
401
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
402
+
403
+ const scores = [30, 50, 70];
404
+ for (const score of scores) {
405
+ ctx!.trajectory.recordPainEvent({
406
+ sessionId,
407
+ source: 'test',
408
+ score,
409
+ reason: `Pain ${score}`,
410
+ origin: 'test',
411
+ text: '',
412
+ createdAt: isoNow(),
413
+ });
414
+ }
415
+
416
+ // Independent verification
417
+ const painEvents = ctx!.db!.prepare('SELECT * FROM pain_events WHERE session_id = ?').all(sessionId) as any[];
418
+
419
+ // INVARIANT: All events must be recorded
420
+ expect(painEvents.length).toBe(3);
421
+ expect(painEvents.map(e => e.score)).toEqual(expect.arrayContaining([30, 50, 70]));
422
+ });
423
+ });
424
+ });
425
+
426
+ // ─────────────────────────────────────────────────────────────────────
427
+ // PART 6: Resilience Tests
428
+ // ─────────────────────────────────────────────────────────────────────
429
+
430
+ describe('Trajectory: Resilience', () => {
431
+ let ctx: TestContext | null = null;
432
+
433
+ beforeEach(() => {
434
+ ctx = createTestContext();
435
+ });
436
+
437
+ afterEach(() => {
438
+ cleanupContext(ctx);
439
+ ctx = null;
440
+ });
441
+
442
+ describe('RESILIENCE: Database consistency', () => {
443
+ it('Database MUST remain consistent after dispose and reopen', () => {
444
+ const sessionId = 'session-reopen-test';
445
+
446
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
447
+ ctx!.trajectory.recordToolCall({
448
+ sessionId,
449
+ toolName: 'read_file',
450
+ outcome: 'success',
451
+ createdAt: isoNow(),
452
+ });
453
+
454
+ // Dispose
455
+ ctx!.trajectory.dispose();
456
+
457
+ // Reopen
458
+ const trajectory2 = new TrajectoryDatabase({ workspaceDir: ctx!.workspaceDir });
459
+ const db2 = trajectory2['db'];
460
+
461
+ // Independent verification
462
+ const sessions = db2!.prepare('SELECT * FROM sessions WHERE session_id = ?').all(sessionId);
463
+ const toolCalls = db2!.prepare('SELECT * FROM tool_calls WHERE session_id = ?').all(sessionId);
464
+
465
+ // INVARIANT: Data must persist after reopen
466
+ expect(sessions.length).toBe(1);
467
+ expect(toolCalls.length).toBe(1);
468
+
469
+ trajectory2.dispose();
470
+ });
471
+
472
+ it('Concurrent writes MUST not corrupt database', async () => {
473
+ const sessionId = 'session-concurrent-test';
474
+
475
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
476
+
477
+ // Concurrent tool call records
478
+ const promises = [];
479
+ for (let i = 0; i < 10; i++) {
480
+ promises.push(
481
+ new Promise(resolve => {
482
+ ctx!.trajectory.recordToolCall({
483
+ sessionId,
484
+ toolName: `concurrent_tool_${i}`,
485
+ outcome: 'success',
486
+ createdAt: isoNow(),
487
+ });
488
+ resolve(void 0);
489
+ })
490
+ );
491
+ }
492
+
493
+ await Promise.all(promises);
494
+
495
+ // Independent verification
496
+ const toolCalls = ctx!.db!.prepare('SELECT * FROM tool_calls WHERE session_id = ?').all(sessionId) as any[];
497
+
498
+ // INVARIANT: All concurrent writes must be recorded
499
+ expect(toolCalls.length).toBe(10);
500
+ });
501
+ });
502
+
503
+ describe('RESILIENCE: Statistics integrity', () => {
504
+ it('Daily metrics MUST reflect actual data', () => {
505
+ const sessionId = 'session-metrics-test';
506
+
507
+ ctx!.trajectory.recordSession({ sessionId, startedAt: isoNow() });
508
+
509
+ // Record various events
510
+ ctx!.trajectory.recordToolCall({ sessionId, toolName: 'read', outcome: 'success', createdAt: isoNow() });
511
+ ctx!.trajectory.recordToolCall({ sessionId, toolName: 'write', outcome: 'failure', createdAt: isoNow() });
512
+ ctx!.trajectory.recordPainEvent({ sessionId, source: 'test', score: 50, reason: 'test', origin: 'test', text: '', createdAt: isoNow() });
513
+
514
+ // Get stats
515
+ const stats = ctx!.trajectory.getDataStats();
516
+
517
+ // INVARIANT: Stats must reflect actual data
518
+ expect(stats).toBeDefined();
519
+ expect(stats.toolCalls).toBeGreaterThanOrEqual(2);
520
+ expect(stats.painEvents).toBeGreaterThanOrEqual(1);
521
+ });
522
+ });
523
+ });
package/vitest.config.ts CHANGED
@@ -18,17 +18,33 @@ import { defineConfig } from 'vitest/config';
18
18
  */
19
19
 
20
20
  // Integration tests: use real SQLite database
21
+ // These tests require better-sqlite3 to be compiled
21
22
  const integrationTests = [
23
+ // Core DB tests
22
24
  'tests/core/control-ui-db.test.ts',
23
25
  'tests/core/evolution-logger.test.ts',
24
26
  'tests/core/nocturnal-e2e.test.ts',
25
27
  'tests/core/nocturnal-trajectory-extractor.test.ts',
26
28
  'tests/core/replay-engine.test.ts',
27
29
  'tests/core/trajectory.test.ts',
28
- 'tests/integration/**/*.test.ts',
29
- 'tests/integration/**/*.test.tsx',
30
+ 'tests/core/workspace-context.test.ts',
31
+ // Service tests with DB dependencies
30
32
  'tests/service/nocturnal-service-code-candidate.test.ts',
31
33
  'tests/service/nocturnal-target-selector.test.ts',
34
+ 'tests/service/evolution-worker.nocturnal.test.ts',
35
+ 'tests/service/evolution-worker.timeout.test.ts',
36
+ 'tests/service/data-endpoints-regression.test.ts',
37
+ 'tests/service/control-ui-query-service.test.ts',
38
+ 'tests/service/keyword-optimization-service.test.ts',
39
+ // Hook tests with DB dependencies
40
+ 'tests/hooks/subagent.test.ts',
41
+ 'tests/hooks/gate-pipeline-integration.test.ts',
42
+ 'tests/hooks/gate-rule-host-pipeline.test.ts',
43
+ // Script tests with DB
44
+ 'tests/scripts/validate-live-path.test.ts',
45
+ // Integration test directory
46
+ 'tests/integration/**/*.test.ts',
47
+ 'tests/integration/**/*.test.tsx',
32
48
  ];
33
49
 
34
50
  export default defineConfig({
@@ -55,14 +71,17 @@ export default defineConfig({
55
71
  name: 'unit',
56
72
  include: ['tests/**/*.test.ts', 'tests/**/*.test.tsx'],
57
73
  exclude: integrationTests,
58
- pool: 'threads',
74
+ // Use forks pool to avoid better-sqlite3 teardown hangs
75
+ // Native modules don't clean up properly in threads pool
76
+ pool: 'forks',
59
77
  },
60
78
  },
61
79
  {
62
80
  test: {
63
81
  name: 'integration',
64
82
  include: integrationTests,
65
- pool: 'threads',
83
+ // Use forks pool for integration tests too - better-sqlite3 cleanup issues
84
+ pool: 'forks',
66
85
  },
67
86
  },
68
87
  ],