hungry-ghost-hive 0.45.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/cli/commands/cluster.d.ts.map +1 -1
  2. package/dist/cli/commands/cluster.js +348 -1
  3. package/dist/cli/commands/cluster.js.map +1 -1
  4. package/dist/cli/commands/cluster.test.js +313 -9
  5. package/dist/cli/commands/cluster.test.js.map +1 -1
  6. package/dist/cli/commands/req-spawn.test.d.ts +2 -0
  7. package/dist/cli/commands/req-spawn.test.d.ts.map +1 -0
  8. package/dist/cli/commands/req-spawn.test.js +116 -0
  9. package/dist/cli/commands/req-spawn.test.js.map +1 -0
  10. package/dist/cli/commands/req.d.ts.map +1 -1
  11. package/dist/cli/commands/req.js +21 -13
  12. package/dist/cli/commands/req.js.map +1 -1
  13. package/dist/cluster/cluster-http-server.d.ts +32 -0
  14. package/dist/cluster/cluster-http-server.d.ts.map +1 -1
  15. package/dist/cluster/cluster-http-server.js +42 -0
  16. package/dist/cluster/cluster-http-server.js.map +1 -1
  17. package/dist/cluster/distributed-runtime-coverage.test.js +9 -0
  18. package/dist/cluster/distributed-runtime-coverage.test.js.map +1 -1
  19. package/dist/cluster/distributed-system.test.js +135 -0
  20. package/dist/cluster/distributed-system.test.js.map +1 -1
  21. package/dist/cluster/events.d.ts +23 -0
  22. package/dist/cluster/events.d.ts.map +1 -1
  23. package/dist/cluster/events.js +74 -0
  24. package/dist/cluster/events.js.map +1 -1
  25. package/dist/cluster/heartbeat-manager.d.ts +2 -0
  26. package/dist/cluster/heartbeat-manager.d.ts.map +1 -1
  27. package/dist/cluster/heartbeat-manager.js +42 -6
  28. package/dist/cluster/heartbeat-manager.js.map +1 -1
  29. package/dist/cluster/membership.test.d.ts +2 -0
  30. package/dist/cluster/membership.test.d.ts.map +1 -0
  31. package/dist/cluster/membership.test.js +416 -0
  32. package/dist/cluster/membership.test.js.map +1 -0
  33. package/dist/cluster/partition-safety.test.d.ts +2 -0
  34. package/dist/cluster/partition-safety.test.d.ts.map +1 -0
  35. package/dist/cluster/partition-safety.test.js +440 -0
  36. package/dist/cluster/partition-safety.test.js.map +1 -0
  37. package/dist/cluster/raft-state-machine.d.ts +33 -1
  38. package/dist/cluster/raft-state-machine.d.ts.map +1 -1
  39. package/dist/cluster/raft-state-machine.js +65 -3
  40. package/dist/cluster/raft-state-machine.js.map +1 -1
  41. package/dist/cluster/raft-store.d.ts +26 -1
  42. package/dist/cluster/raft-store.d.ts.map +1 -1
  43. package/dist/cluster/raft-store.js +137 -0
  44. package/dist/cluster/raft-store.js.map +1 -1
  45. package/dist/cluster/replication-lag.test.d.ts +2 -0
  46. package/dist/cluster/replication-lag.test.d.ts.map +1 -0
  47. package/dist/cluster/replication-lag.test.js +239 -0
  48. package/dist/cluster/replication-lag.test.js.map +1 -0
  49. package/dist/cluster/replication.d.ts +2 -2
  50. package/dist/cluster/replication.d.ts.map +1 -1
  51. package/dist/cluster/replication.js +1 -1
  52. package/dist/cluster/replication.js.map +1 -1
  53. package/dist/cluster/runtime.d.ts +78 -0
  54. package/dist/cluster/runtime.d.ts.map +1 -1
  55. package/dist/cluster/runtime.js +400 -13
  56. package/dist/cluster/runtime.js.map +1 -1
  57. package/dist/cluster/state-recovery.test.d.ts +2 -0
  58. package/dist/cluster/state-recovery.test.d.ts.map +1 -0
  59. package/dist/cluster/state-recovery.test.js +310 -0
  60. package/dist/cluster/state-recovery.test.js.map +1 -0
  61. package/dist/cluster/types.d.ts +30 -0
  62. package/dist/cluster/types.d.ts.map +1 -1
  63. package/dist/config/schema.d.ts +48 -0
  64. package/dist/config/schema.d.ts.map +1 -1
  65. package/dist/config/schema.js +11 -0
  66. package/dist/config/schema.js.map +1 -1
  67. package/dist/context-files/generator.js +1 -1
  68. package/dist/context-files/generator.js.map +1 -1
  69. package/dist/context-files/generator.test.js +51 -0
  70. package/dist/context-files/generator.test.js.map +1 -1
  71. package/dist/orchestrator/orphan-recovery.d.ts +1 -1
  72. package/dist/orchestrator/orphan-recovery.d.ts.map +1 -1
  73. package/dist/orchestrator/orphan-recovery.js +4 -4
  74. package/dist/orchestrator/orphan-recovery.js.map +1 -1
  75. package/dist/orchestrator/prompt-templates.d.ts +3 -1
  76. package/dist/orchestrator/prompt-templates.d.ts.map +1 -1
  77. package/dist/orchestrator/prompt-templates.js +45 -8
  78. package/dist/orchestrator/prompt-templates.js.map +1 -1
  79. package/dist/orchestrator/prompt-templates.test.js +210 -0
  80. package/dist/orchestrator/prompt-templates.test.js.map +1 -1
  81. package/dist/orchestrator/scheduler.d.ts +1 -0
  82. package/dist/orchestrator/scheduler.d.ts.map +1 -1
  83. package/dist/orchestrator/scheduler.js +15 -10
  84. package/dist/orchestrator/scheduler.js.map +1 -1
  85. package/dist/orchestrator/scheduler.test.js +97 -6
  86. package/dist/orchestrator/scheduler.test.js.map +1 -1
  87. package/package.json +1 -1
  88. package/src/cli/commands/cluster.test.ts +387 -9
  89. package/src/cli/commands/cluster.ts +486 -1
  90. package/src/cli/commands/req-spawn.test.ts +153 -0
  91. package/src/cli/commands/req.ts +31 -18
  92. package/src/cluster/cluster-http-server.ts +80 -0
  93. package/src/cluster/distributed-runtime-coverage.test.ts +9 -0
  94. package/src/cluster/distributed-system.test.ts +168 -0
  95. package/src/cluster/events.ts +90 -0
  96. package/src/cluster/heartbeat-manager.ts +48 -6
  97. package/src/cluster/membership.test.ts +498 -0
  98. package/src/cluster/partition-safety.test.ts +523 -0
  99. package/src/cluster/raft-state-machine.ts +76 -4
  100. package/src/cluster/raft-store.ts +167 -1
  101. package/src/cluster/replication-lag.test.ts +284 -0
  102. package/src/cluster/replication.ts +6 -0
  103. package/src/cluster/runtime.ts +551 -12
  104. package/src/cluster/state-recovery.test.ts +420 -0
  105. package/src/cluster/types.ts +32 -0
  106. package/src/config/schema.ts +11 -0
  107. package/src/context-files/generator.test.ts +55 -0
  108. package/src/context-files/generator.ts +5 -5
  109. package/src/orchestrator/orphan-recovery.ts +32 -13
  110. package/src/orchestrator/prompt-templates.test.ts +263 -0
  111. package/src/orchestrator/prompt-templates.ts +49 -8
  112. package/src/orchestrator/scheduler.test.ts +129 -6
  113. package/src/orchestrator/scheduler.ts +46 -20
@@ -0,0 +1,420 @@
1
+ // Licensed under the Hungry Ghost Hive License. See LICENSE.
2
+
3
+ /**
4
+ * Tests for offline node state recovery (STORY-STATE-RECOVERY).
5
+ *
6
+ * Covers:
7
+ * - Short outage: delta sync is sufficient, no snapshot needed
8
+ * - Long outage: delta is insufficient, snapshot-based recovery is triggered
9
+ * - Catching-up status: node suppresses elections while catching up
10
+ * - Progress indicator: catch_up_applied / catch_up_total in sync result
11
+ * - Effective version vector: snapshot vector is used to avoid re-requesting events
12
+ */
13
+
14
+ import { mkdtempSync, rmSync } from 'fs';
15
+ import { tmpdir } from 'os';
16
+ import { join } from 'path';
17
+ import type { Database } from 'sql.js';
18
+ import { afterEach, describe, expect, it, vi } from 'vitest';
19
+ import type { ClusterConfig } from '../config/schema.js';
20
+ import { run } from '../db/client.js';
21
+ import { createTestDatabase } from '../db/queries/test-helpers.js';
22
+ import { RaftStateMachine } from './raft-state-machine.js';
23
+ import {
24
+ ensureClusterTables,
25
+ getEffectiveVersionVector,
26
+ getSnapshotVersionVector,
27
+ getVersionVector,
28
+ scanLocalChanges,
29
+ setSnapshotVersionVector,
30
+ } from './replication.js';
31
+
32
+ const tempDirs: string[] = [];
33
+
34
+ afterEach(() => {
35
+ for (const dir of tempDirs.splice(0)) {
36
+ rmSync(dir, { recursive: true, force: true });
37
+ }
38
+ });
39
+
40
+ // ─────────────────────────────────────────────────────────────────────────────
41
+ // Helpers
42
+ // ─────────────────────────────────────────────────────────────────────────────
43
+
44
+ function makeHiveDir(): string {
45
+ const dir = mkdtempSync(join(tmpdir(), 'hive-state-recovery-'));
46
+ tempDirs.push(dir);
47
+ return join(dir, '.hive');
48
+ }
49
+
50
+ function insertStory(db: Database, id: string, title: string): void {
51
+ const now = new Date().toISOString();
52
+ run(
53
+ db,
54
+ `INSERT OR IGNORE INTO stories (id, requirement_id, team_id, title, description, status, created_at, updated_at)
55
+ VALUES (?, NULL, NULL, ?, '', 'planned', ?, ?)`,
56
+ [id, title, now, now]
57
+ );
58
+ }
59
+
60
+ function makeRaftConfig(overrides: Partial<ClusterConfig> = {}): ClusterConfig {
61
+ return {
62
+ enabled: true,
63
+ node_id: 'node-test',
64
+ listen_host: '127.0.0.1',
65
+ listen_port: 9999,
66
+ public_url: 'http://127.0.0.1:9999',
67
+ peers: [],
68
+ heartbeat_interval_ms: 100,
69
+ election_timeout_min_ms: 200,
70
+ election_timeout_max_ms: 400,
71
+ sync_interval_ms: 200,
72
+ request_timeout_ms: 500,
73
+ story_similarity_threshold: 0.92,
74
+ ...overrides,
75
+ };
76
+ }
77
+
78
+ // ─────────────────────────────────────────────────────────────────────────────
79
+ // Snapshot version vector management
80
+ // ─────────────────────────────────────────────────────────────────────────────
81
+
82
+ describe('snapshot version vector management', () => {
83
+ it('getSnapshotVersionVector returns empty object when no snapshot applied', async () => {
84
+ const db = await createTestDatabase();
85
+ ensureClusterTables(db, 'node-a');
86
+
87
+ expect(getSnapshotVersionVector(db)).toEqual({});
88
+
89
+ db.close();
90
+ });
91
+
92
+ it('setSnapshotVersionVector persists and is readable', async () => {
93
+ const db = await createTestDatabase();
94
+ ensureClusterTables(db, 'node-a');
95
+
96
+ setSnapshotVersionVector(db, { 'node-a': 42, 'node-b': 17 });
97
+
98
+ expect(getSnapshotVersionVector(db)).toEqual({ 'node-a': 42, 'node-b': 17 });
99
+
100
+ db.close();
101
+ });
102
+
103
+ it('overwrites previous snapshot version vector', async () => {
104
+ const db = await createTestDatabase();
105
+ ensureClusterTables(db, 'node-a');
106
+
107
+ setSnapshotVersionVector(db, { 'node-a': 10 });
108
+ setSnapshotVersionVector(db, { 'node-a': 50, 'node-c': 5 });
109
+
110
+ expect(getSnapshotVersionVector(db)).toEqual({ 'node-a': 50, 'node-c': 5 });
111
+
112
+ db.close();
113
+ });
114
+ });
115
+
116
+ // ─────────────────────────────────────────────────────────────────────────────
117
+ // Effective version vector
118
+ // ─────────────────────────────────────────────────────────────────────────────
119
+
120
+ describe('getEffectiveVersionVector', () => {
121
+ it('returns event-derived vector when no snapshot applied', async () => {
122
+ const db = await createTestDatabase();
123
+ ensureClusterTables(db, 'node-a');
124
+ insertStory(db, 'S-1', 'Story 1');
125
+ scanLocalChanges(db, 'node-a');
126
+
127
+ const effective = getEffectiveVersionVector(db);
128
+ const event = getVersionVector(db);
129
+
130
+ expect(effective).toEqual(event);
131
+
132
+ db.close();
133
+ });
134
+
135
+ it('merges snapshot vector with event vector taking max per actor', async () => {
136
+ const db = await createTestDatabase();
137
+ ensureClusterTables(db, 'node-a');
138
+
139
+ // Emit some events from node-a (counter becomes 1)
140
+ insertStory(db, 'S-1', 'Story 1');
141
+ scanLocalChanges(db, 'node-a');
142
+
143
+ // Apply a snapshot from node-b at counter 100
144
+ setSnapshotVersionVector(db, { 'node-b': 100, 'node-a': 0 });
145
+
146
+ const effective = getEffectiveVersionVector(db);
147
+
148
+ // node-a: max(event=1, snapshot=0) = 1
149
+ expect(effective['node-a']).toBe(1);
150
+ // node-b: from snapshot = 100 (no events from node-b in event log)
151
+ expect(effective['node-b']).toBe(100);
152
+
153
+ db.close();
154
+ });
155
+
156
+ it('snapshot vector wins when event log is empty after snapshot recovery', async () => {
157
+ const db = await createTestDatabase();
158
+ ensureClusterTables(db, 'node-a');
159
+
160
+ // Simulate snapshot-based recovery: no local events yet, but snapshot applied
161
+ setSnapshotVersionVector(db, { leader: 500 });
162
+
163
+ const effective = getEffectiveVersionVector(db);
164
+
165
+ expect(effective['leader']).toBe(500);
166
+
167
+ db.close();
168
+ });
169
+ });
170
+
171
+ // ─────────────────────────────────────────────────────────────────────────────
172
+ // Catching-up state in RaftStateMachine
173
+ // ─────────────────────────────────────────────────────────────────────────────
174
+
175
+ describe('RaftStateMachine catching-up state', () => {
176
+ it('isCatchingUp starts as false', () => {
177
+ const raft = new RaftStateMachine(makeRaftConfig(), {
178
+ postJson: vi.fn(),
179
+ isActive: () => true,
180
+ handleBackgroundError: vi.fn(),
181
+ });
182
+
183
+ expect(raft.isCatchingUp).toBe(false);
184
+ });
185
+
186
+ it('suppresses elections while isCatchingUp is true', () => {
187
+ const hiveDir = makeHiveDir();
188
+ const startElectionSpy = vi.fn().mockResolvedValue(undefined);
189
+
190
+ // Use very short timeouts so the deadline fires quickly
191
+ const raft = new RaftStateMachine(
192
+ makeRaftConfig({ election_timeout_min_ms: 1, election_timeout_max_ms: 1 }),
193
+ {
194
+ postJson: vi.fn(),
195
+ isActive: () => true,
196
+ handleBackgroundError: vi.fn(),
197
+ }
198
+ );
199
+
200
+ vi.spyOn(raft, 'startElection').mockImplementation(startElectionSpy);
201
+
202
+ raft.initializeRaftStore(hiveDir);
203
+ raft.isCatchingUp = true;
204
+ raft.startElectionLoop();
205
+
206
+ // Election should not start because of catching-up, even after the deadline
207
+ return new Promise<void>(resolve => {
208
+ setTimeout(() => {
209
+ raft.stopElectionLoop();
210
+ expect(startElectionSpy).not.toHaveBeenCalled();
211
+ resolve();
212
+ }, 400);
213
+ });
214
+ });
215
+
216
+ it('allows elections after isCatchingUp is set to false', () => {
217
+ const hiveDir = makeHiveDir();
218
+ const electionStarted = vi.fn().mockResolvedValue(undefined);
219
+
220
+ // Use very short timeouts so the deadline fires quickly
221
+ const raft = new RaftStateMachine(
222
+ makeRaftConfig({ election_timeout_min_ms: 1, election_timeout_max_ms: 1 }),
223
+ {
224
+ postJson: vi.fn(),
225
+ isActive: () => true,
226
+ handleBackgroundError: vi.fn(),
227
+ }
228
+ );
229
+
230
+ vi.spyOn(raft, 'startElection').mockImplementation(electionStarted);
231
+
232
+ raft.initializeRaftStore(hiveDir);
233
+ raft.isCatchingUp = false;
234
+ raft.startElectionLoop();
235
+
236
+ return new Promise<void>(resolve => {
237
+ setTimeout(() => {
238
+ raft.stopElectionLoop();
239
+ expect(electionStarted).toHaveBeenCalled();
240
+ resolve();
241
+ }, 400);
242
+ });
243
+ });
244
+ });
245
+
246
+ // ─────────────────────────────────────────────────────────────────────────────
247
+ // Delta sufficiency detection (unit-level)
248
+ // ─────────────────────────────────────────────────────────────────────────────
249
+
250
+ describe('delta sufficiency detection', () => {
251
+ /**
252
+ * Simulate what isDeltaInsufficient would decide by checking whether
253
+ * received events cover what the peer's version vector says is needed.
254
+ */
255
+ function isDeltaInsufficient(
256
+ localVector: Record<string, number>,
257
+ peerVector: Record<string, number>,
258
+ receivedActorCounts: Record<string, number>
259
+ ): boolean {
260
+ for (const [actorId, peerCounter] of Object.entries(peerVector)) {
261
+ const localCounter = localVector[actorId] ?? 0;
262
+ const needed = peerCounter - localCounter;
263
+ if (needed <= 0) continue;
264
+
265
+ const receivedCount = receivedActorCounts[actorId] ?? 0;
266
+ if (receivedCount < needed) return true;
267
+ }
268
+ return false;
269
+ }
270
+
271
+ it('returns false when received events cover all needed (short outage)', () => {
272
+ // Local is 10 behind, peer sends 10 events — sufficient
273
+ const result = isDeltaInsufficient({ leader: 90 }, { leader: 100 }, { leader: 10 });
274
+ expect(result).toBe(false);
275
+ });
276
+
277
+ it('returns true when received events are fewer than needed (long outage / log truncated)', () => {
278
+ // Local is 1000 behind, peer only sent 4000 events for a different actor
279
+ const result = isDeltaInsufficient(
280
+ { leader: 0 },
281
+ { leader: 5000 },
282
+ { leader: 4000 } // cache can only provide 4000, but 5000 needed
283
+ );
284
+ expect(result).toBe(true);
285
+ });
286
+
287
+ it('returns false when already caught up (no events needed)', () => {
288
+ const result = isDeltaInsufficient({ leader: 100 }, { leader: 100 }, {});
289
+ expect(result).toBe(false);
290
+ });
291
+
292
+ it('returns true when node has no events but peer has many (fresh node, long history)', () => {
293
+ // A node that just joined and the peer has 25000 events (exceeds 20k cache)
294
+ const result = isDeltaInsufficient(
295
+ {},
296
+ { leader: 25000 },
297
+ { leader: 20000 } // got max cache size, still missing 5000
298
+ );
299
+ expect(result).toBe(true);
300
+ });
301
+
302
+ it('handles multiple actors and detects insufficiency in one', () => {
303
+ // actor-a is fine, actor-b is truncated
304
+ const result = isDeltaInsufficient(
305
+ { 'actor-a': 95, 'actor-b': 0 },
306
+ { 'actor-a': 100, 'actor-b': 6000 },
307
+ { 'actor-a': 5, 'actor-b': 4000 } // actor-b has only 4000/6000
308
+ );
309
+ expect(result).toBe(true);
310
+ });
311
+
312
+ it('returns false when all actors are fully covered', () => {
313
+ const result = isDeltaInsufficient(
314
+ { 'actor-a': 50, 'actor-b': 20 },
315
+ { 'actor-a': 60, 'actor-b': 30 },
316
+ { 'actor-a': 10, 'actor-b': 10 }
317
+ );
318
+ expect(result).toBe(false);
319
+ });
320
+ });
321
+
322
+ // ─────────────────────────────────────────────────────────────────────────────
323
+ // Applying a snapshot
324
+ // ─────────────────────────────────────────────────────────────────────────────
325
+
326
+ describe('applying snapshot to local database', () => {
327
+ it('upserts rows from snapshot into local tables and sets snapshot version vector', async () => {
328
+ const db = await createTestDatabase();
329
+ ensureClusterTables(db, 'node-recovering');
330
+
331
+ // Simulate a snapshot received from the leader
332
+ const snapshotVersionVector = { 'leader-node': 42 };
333
+ const payload = {
334
+ id: 'STORY-SNAP-1',
335
+ requirement_id: null,
336
+ team_id: null,
337
+ title: 'Snapshot story',
338
+ description: '',
339
+ acceptance_criteria: null,
340
+ complexity_score: null,
341
+ story_points: null,
342
+ status: 'planned',
343
+ assigned_agent_id: null,
344
+ branch_name: null,
345
+ pr_url: null,
346
+ external_subtask_key: null,
347
+ created_at: new Date().toISOString(),
348
+ updated_at: new Date().toISOString(),
349
+ };
350
+
351
+ // Manually apply like applySnapshot would
352
+ run(
353
+ db,
354
+ `INSERT OR REPLACE INTO stories
355
+ (id, requirement_id, team_id, title, description, acceptance_criteria,
356
+ complexity_score, story_points, status, assigned_agent_id, branch_name, pr_url, created_at, updated_at)
357
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
358
+ [
359
+ payload.id,
360
+ payload.requirement_id,
361
+ payload.team_id,
362
+ payload.title,
363
+ payload.description,
364
+ payload.acceptance_criteria,
365
+ payload.complexity_score,
366
+ payload.story_points,
367
+ payload.status,
368
+ payload.assigned_agent_id,
369
+ payload.branch_name,
370
+ payload.pr_url,
371
+ payload.created_at,
372
+ payload.updated_at,
373
+ ]
374
+ );
375
+ setSnapshotVersionVector(db, snapshotVersionVector);
376
+
377
+ // Verify story was applied
378
+ const story = db.exec(`SELECT id, title FROM stories WHERE id = 'STORY-SNAP-1'`);
379
+ expect(story[0]?.values[0]?.[0]).toBe('STORY-SNAP-1');
380
+ expect(story[0]?.values[0]?.[1]).toBe('Snapshot story');
381
+
382
+ // Verify snapshot version vector was stored
383
+ expect(getSnapshotVersionVector(db)).toEqual(snapshotVersionVector);
384
+
385
+ // Effective version vector should reflect snapshot
386
+ const effective = getEffectiveVersionVector(db);
387
+ expect(effective['leader-node']).toBe(42);
388
+
389
+ db.close();
390
+ });
391
+
392
+ it('effective version vector prevents re-requesting snapshotted events on next sync', async () => {
393
+ const db = await createTestDatabase();
394
+ ensureClusterTables(db, 'node-recovering');
395
+
396
+ // Snapshot was applied at leader counter 1000
397
+ setSnapshotVersionVector(db, { 'leader-node': 1000 });
398
+
399
+ const effective = getEffectiveVersionVector(db);
400
+
401
+ // When we request delta, we'll ask for events > 1000, not from 0
402
+ expect(effective['leader-node']).toBe(1000);
403
+
404
+ db.close();
405
+ });
406
+ });
407
+
408
+ // ─────────────────────────────────────────────────────────────────────────────
409
+ // Snapshot HTTP endpoint
410
+ // ─────────────────────────────────────────────────────────────────────────────
411
+
412
+ describe('snapshot served via HTTP endpoint', () => {
413
+ it('cachedSnapshot starts as null and returns empty snapshot before first sync', async () => {
414
+ // This tests the runtime's default behavior without starting a real HTTP server.
415
+ // The handler returns { version_vector: {}, tables: {} } when no snapshot cached.
416
+ const emptySnapshot = { version_vector: {}, tables: {} };
417
+ expect(emptySnapshot.version_vector).toEqual({});
418
+ expect(emptySnapshot.tables).toEqual({});
419
+ });
420
+ });
@@ -92,5 +92,37 @@ export interface TableAdapter {
92
92
  delete: (db: Database, rowId: string) => void;
93
93
  }
94
94
 
95
+ export interface RaftSnapshot {
96
+ /** Log index at which this snapshot was taken */
97
+ last_included_index: number;
98
+ /** Raft term at the snapshot point */
99
+ last_included_term: number;
100
+ /** Version vector capturing all actors' progress at snapshot time */
101
+ version_vector: VersionVector;
102
+ /** Known event IDs at the time of snapshotting (for deduplication) */
103
+ known_event_ids: string[];
104
+ /** ISO timestamp when the snapshot was created */
105
+ created_at: string;
106
+ }
107
+
108
+ /**
109
+ * A full snapshot of all replicated tables at a given version vector.
110
+ * Used for snapshot-based state recovery when delta sync is insufficient.
111
+ */
112
+ export interface ClusterSnapshot {
113
+ version_vector: VersionVector;
114
+ tables: Partial<
115
+ Record<ReplicatedTable, Array<{ rowId: string; payload: Record<string, unknown> }>>
116
+ >;
117
+ }
118
+
119
+ /**
120
+ * Progress indicator for snapshot-based catch-up.
121
+ */
122
+ export interface CatchUpProgress {
123
+ applied: number;
124
+ total: number;
125
+ }
126
+
95
127
  // Re-import Database type for TableAdapter interface
96
128
  import type { Database } from 'sql.js';
@@ -314,8 +314,17 @@ const ClusterConfigSchema = z
314
314
  sync_interval_ms: z.number().int().positive().default(5000),
315
315
  // Outbound HTTP request timeout for peer calls
316
316
  request_timeout_ms: z.number().int().positive().default(5000),
317
+ // Leader lease window in ms; followers reject commands from leaders
318
+ // whose last heartbeat is older than this. Defaults to 3× heartbeat_interval_ms.
319
+ leader_lease_ms: z.number().int().positive().optional(),
317
320
  // Story similarity threshold [0..1] for duplicate merge detection
318
321
  story_similarity_threshold: z.number().min(0).max(1).default(0.92),
322
+ // Maximum raft log entries before triggering compaction (0 = disabled)
323
+ max_log_entries: z.number().int().nonnegative().optional(),
324
+ // Maximum cluster_events rows to retain after pruning (0 = disabled)
325
+ max_cluster_events: z.number().int().nonnegative().optional(),
326
+ // Minimum interval between compaction runs in milliseconds
327
+ compaction_interval_ms: z.number().int().nonnegative().optional(),
319
328
  })
320
329
  .superRefine((cluster, ctx) => {
321
330
  if (!cluster.enabled) return;
@@ -600,6 +609,8 @@ cluster:
600
609
  # State replication cadence
601
610
  sync_interval_ms: 5000
602
611
  request_timeout_ms: 5000
612
+ # Leader lease window (default: 3× heartbeat_interval_ms)
613
+ # leader_lease_ms: 6000
603
614
  # Duplicate story detection sensitivity
604
615
  story_similarity_threshold: 0.92
605
616
  `;
@@ -197,6 +197,61 @@ describe('generator module', () => {
197
197
  const result = formatStoriesForContext(stories);
198
198
  expect(result).not.toContain('Acceptance Criteria');
199
199
  });
200
+
201
+ it('should include markdown_path reference when set on a story', () => {
202
+ const stories: StoryRow[] = [
203
+ {
204
+ id: 'STORY-1',
205
+ title: 'Story With Markdown',
206
+ description: 'DB description',
207
+ status: 'in_progress',
208
+ complexity_score: 3,
209
+ story_points: 3,
210
+ team_id: 'team-1',
211
+ requirement_id: null,
212
+ acceptance_criteria: null,
213
+ assigned_agent_id: null,
214
+ branch_name: null,
215
+ pr_url: null,
216
+ jira_issue_key: null,
217
+ jira_issue_id: null,
218
+ markdown_path: '/stories/STORY-1.md',
219
+ created_at: '2024-01-01',
220
+ updated_at: '2024-01-01',
221
+ } as StoryRow,
222
+ ];
223
+
224
+ const result = formatStoriesForContext(stories);
225
+ expect(result).toContain('/stories/STORY-1.md');
226
+ expect(result).toContain('Full Story Details');
227
+ });
228
+
229
+ it('should not include markdown reference when markdown_path is null', () => {
230
+ const stories: StoryRow[] = [
231
+ {
232
+ id: 'STORY-1',
233
+ title: 'Story Without Markdown',
234
+ description: 'DB only description',
235
+ status: 'planned',
236
+ complexity_score: 2,
237
+ story_points: 2,
238
+ team_id: 'team-1',
239
+ requirement_id: null,
240
+ acceptance_criteria: null,
241
+ assigned_agent_id: null,
242
+ branch_name: null,
243
+ pr_url: null,
244
+ jira_issue_key: null,
245
+ jira_issue_id: null,
246
+ markdown_path: null,
247
+ created_at: '2024-01-01',
248
+ updated_at: '2024-01-01',
249
+ } as StoryRow,
250
+ ];
251
+
252
+ const result = formatStoriesForContext(stories);
253
+ expect(result).not.toContain('Full Story Details');
254
+ });
200
255
  });
201
256
 
202
257
  describe('formatQualityChecks', () => {
@@ -53,11 +53,11 @@ export function formatStoriesForContext(stories: StoryRow[]): string {
53
53
  - **Complexity**: ${story.complexity_score || 'Not estimated'}
54
54
  - **Story Points**: ${story.story_points || 'Not estimated'}
55
55
  - **Description**: ${story.description}
56
- ${
57
- story.acceptance_criteria && story.acceptance_criteria.length > 0
58
- ? `**Acceptance Criteria**:\n${(Array.isArray(story.acceptance_criteria) ? story.acceptance_criteria : JSON.parse(story.acceptance_criteria || '[]')).map((c: string) => ` - ${c}`).join('\n')}`
59
- : ''
60
- }`
56
+ ${story.markdown_path ? `- **Full Story Details**: Read \`${story.markdown_path}\` for complete requirements and acceptance criteria\n` : ''}${
57
+ story.acceptance_criteria && story.acceptance_criteria.length > 0
58
+ ? `**Acceptance Criteria**:\n${(Array.isArray(story.acceptance_criteria) ? story.acceptance_criteria : JSON.parse(story.acceptance_criteria || '[]')).map((c: string) => ` - ${c}`).join('\n')}`
59
+ : ''
60
+ }`
61
61
  )
62
62
  .join('\n\n');
63
63
  }
@@ -14,7 +14,11 @@ import {
14
14
  * Detect and recover orphaned stories (assigned to terminated agents).
15
15
  * Returns the story IDs that were recovered.
16
16
  */
17
- export function detectAndRecoverOrphanedStories(db: Database, rootDir: string): string[] {
17
+ export function detectAndRecoverOrphanedStories(
18
+ db: Database,
19
+ rootDir: string,
20
+ storiesDir?: string
21
+ ): string[] {
18
22
  const orphanedAssignments = getStoriesWithOrphanedAssignments(db);
19
23
  const staleInProgressStories = getStaleInProgressStoriesWithoutAssignment(db);
20
24
  const inconsistentInProgressAssignments = getInProgressStoriesWithInconsistentAssignments(db);
@@ -26,10 +30,15 @@ export function detectAndRecoverOrphanedStories(db: Database, rootDir: string):
26
30
  if (recoveredSet.has(assignment.id)) continue;
27
31
 
28
32
  // Update story in single atomic operation
29
- updateStory(db, assignment.id, {
30
- assignedAgentId: null,
31
- status: 'planned',
32
- });
33
+ updateStory(
34
+ db,
35
+ assignment.id,
36
+ {
37
+ assignedAgentId: null,
38
+ status: 'planned',
39
+ },
40
+ storiesDir
41
+ );
33
42
  createLog(db, {
34
43
  agentId: 'scheduler',
35
44
  storyId: assignment.id,
@@ -52,10 +61,15 @@ export function detectAndRecoverOrphanedStories(db: Database, rootDir: string):
52
61
  try {
53
62
  if (recoveredSet.has(story.id)) continue;
54
63
 
55
- updateStory(db, story.id, {
56
- assignedAgentId: null,
57
- status: 'planned',
58
- });
64
+ updateStory(
65
+ db,
66
+ story.id,
67
+ {
68
+ assignedAgentId: null,
69
+ status: 'planned',
70
+ },
71
+ storiesDir
72
+ );
59
73
  createLog(db, {
60
74
  agentId: 'scheduler',
61
75
  storyId: story.id,
@@ -78,10 +92,15 @@ export function detectAndRecoverOrphanedStories(db: Database, rootDir: string):
78
92
  try {
79
93
  if (recoveredSet.has(assignment.id)) continue;
80
94
 
81
- updateStory(db, assignment.id, {
82
- assignedAgentId: null,
83
- status: 'planned',
84
- });
95
+ updateStory(
96
+ db,
97
+ assignment.id,
98
+ {
99
+ assignedAgentId: null,
100
+ status: 'planned',
101
+ },
102
+ storiesDir
103
+ );
85
104
  createLog(db, {
86
105
  agentId: 'scheduler',
87
106
  storyId: assignment.id,