trekoon 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ import { type Database } from "bun:sqlite";
20
20
 
21
21
  import { TrackerDomain } from "../domain/tracker-domain";
22
22
  import { type BoardEventBus } from "./event-bus";
23
- import { buildBoardSnapshot, type BoardSnapshot } from "./snapshot";
23
+ import { buildBoardSnapshot, buildBoardSnapshotDelta, type BoardSnapshot } from "./snapshot";
24
24
 
25
25
  const IN_PROCESS_WAL_SUPPRESS_MS = 500;
26
26
 
@@ -66,7 +66,25 @@ function changeKeyEqual(
66
66
  return a.version === b.version && a.updatedAt === b.updatedAt;
67
67
  }
68
68
 
69
+ /**
70
+ * Test-only call counter for {@link derivedRecordFingerprint}. Tests assert the
71
+ * leaf short-circuit path never enters this function. Production callers ignore
72
+ * the counter entirely.
73
+ */
74
+ let derivedFingerprintCalls = 0;
75
+
76
+ /** @internal — exposed for tests to verify the leaf no-stringify invariant. */
77
+ export function __resetDerivedFingerprintCallCount(): void {
78
+ derivedFingerprintCalls = 0;
79
+ }
80
+
81
+ /** @internal — exposed for tests to verify the leaf no-stringify invariant. */
82
+ export function __getDerivedFingerprintCallCount(): number {
83
+ return derivedFingerprintCalls;
84
+ }
85
+
69
86
  function derivedRecordFingerprint(value: unknown): string {
87
+ derivedFingerprintCalls += 1;
70
88
  if (!value || typeof value !== "object") {
71
89
  return JSON.stringify(value);
72
90
  }
@@ -106,26 +124,63 @@ function derivedRecordFingerprint(value: unknown): string {
106
124
  return JSON.stringify(record);
107
125
  }
108
126
 
109
- function recordMatchesPublishedDelta(record: unknown, publishedRecord: unknown): boolean {
127
+ function recordMatchesPublishedDelta(
128
+ record: unknown,
129
+ publishedRecord: unknown,
130
+ options: { readonly isLeaf: boolean },
131
+ ): boolean {
110
132
  const recordKey = recordChangeKey(record);
111
133
  const publishedKey = recordChangeKey(publishedRecord);
112
- return changeKeyEqual(recordKey, publishedKey) &&
113
- derivedRecordFingerprint(record) === derivedRecordFingerprint(publishedRecord);
134
+ if (!changeKeyEqual(recordKey, publishedKey)) {
135
+ return false;
136
+ }
137
+
138
+ // Leaf entities (subtask, dependency) have no derived-field fan-in beyond
139
+ // dependency rows themselves, and dependency rows always ship as their own
140
+ // collection delta. A matching (version, updatedAt) tuple is therefore
141
+ // sufficient to confirm the leaf record has not diverged from what the
142
+ // route handler already published — no JSON.stringify needed.
143
+ if (options.isLeaf) {
144
+ return true;
145
+ }
146
+
147
+ return derivedRecordFingerprint(record) === derivedRecordFingerprint(publishedRecord);
114
148
  }
115
149
 
116
- function recordChanged(previousRecord: unknown, currentRecord: unknown): boolean {
150
+ function recordChanged(
151
+ previousRecord: unknown,
152
+ currentRecord: unknown,
153
+ options: { readonly isLeaf: boolean },
154
+ ): boolean {
117
155
  if (!changeKeyEqual(recordChangeKey(previousRecord), recordChangeKey(currentRecord))) {
118
156
  return true;
119
157
  }
120
158
 
121
- // The board snapshot includes derived parent fields (for example epic task
122
- // counts/search text and task subtask lists). Child writes do not bump the
123
- // parent row version, but those derived fields still need to reach connected
124
- // boards through WAL deltas.
159
+ // Leaf entities (subtask, dependency) carry only fields that are mutated
160
+ // through their own row writes and those writes bump (version, updatedAt)
161
+ // in lockstep. Matching tuples therefore mean the leaf row is genuinely
162
+ // unchanged; we can short-circuit without paying the JSON.stringify cost.
163
+ //
164
+ // Subtask derived fields (blockedBy/blocks/dependencyIds/dependentIds) are
165
+ // recomputed by the client from the dependency-row collection (see
166
+ // src/board/assets/state/utils.js), so any dep change reaches subscribers
167
+ // via the dependencies delta even when the subtask short-circuits here.
168
+ if (options.isLeaf) {
169
+ return false;
170
+ }
171
+
172
+ // Parent entities (epic, task) carry derived fields (task counts, taskIds,
173
+ // subtasks list, searchText, blocks/blockedBy) that can shift without the
174
+ // parent row's version moving. Keep the fingerprint comparison so child
175
+ // writes still surface through the parent record.
125
176
  return derivedRecordFingerprint(previousRecord) !== derivedRecordFingerprint(currentRecord);
126
177
  }
127
178
 
128
- function diffById(previous: readonly unknown[] | undefined, current: readonly unknown[] | undefined): CollectionDiff {
179
+ function diffById(
180
+ previous: readonly unknown[] | undefined,
181
+ current: readonly unknown[] | undefined,
182
+ options: { readonly isLeaf: boolean },
183
+ ): CollectionDiff {
129
184
  const previousIndex = new Map<string, unknown>();
130
185
  for (const record of previous ?? []) {
131
186
  const id = recordId(record);
@@ -148,7 +203,7 @@ function diffById(previous: readonly unknown[] | undefined, current: readonly un
148
203
  upserted.push(record);
149
204
  continue;
150
205
  }
151
- if (recordChanged(previousRecord, record)) {
206
+ if (recordChanged(previousRecord, record, options)) {
152
207
  upserted.push(record);
153
208
  }
154
209
  }
@@ -193,6 +248,7 @@ function suppressAlreadyPublishedDiff(
193
248
  diff: CollectionDiff,
194
249
  publishedRecords: Map<string, unknown>,
195
250
  publishedDeletedIds: Set<string>,
251
+ options: { readonly isLeaf: boolean },
196
252
  ): CollectionDiff {
197
253
  return {
198
254
  upserted: diff.upserted.filter((record) => {
@@ -202,7 +258,7 @@ function suppressAlreadyPublishedDiff(
202
258
  }
203
259
 
204
260
  const publishedRecord = publishedRecords.get(id);
205
- return publishedRecord === undefined || !recordMatchesPublishedDelta(record, publishedRecord);
261
+ return publishedRecord === undefined || !recordMatchesPublishedDelta(record, publishedRecord, options);
206
262
  }),
207
263
  deletedIds: diff.deletedIds.filter((id) => !publishedDeletedIds.has(id)),
208
264
  };
@@ -212,6 +268,298 @@ function hasDiffChanges(...diffs: readonly CollectionDiff[]): boolean {
212
268
  return diffs.some((diff) => diff.upserted.length > 0 || diff.deletedIds.length > 0);
213
269
  }
214
270
 
271
+ // -- Event-cursor reconciliation -------------------------------------------
272
+ //
273
+ // Reads canonical mutation events appended by `appendEventWithGitContext`
274
+ // (src/sync/event-writes.ts) and translates them into the minimal set of
275
+ // entity IDs whose snapshot rows must be re-read. This lets the watcher avoid
276
+ // a full board read on every WAL tick — the dominant cost on large boards.
277
+ //
278
+ // The full-snapshot diff path is kept as a fallback for cases where the event
279
+ // stream is not safely consumable (cursor pruned, parse failure, first-tick
280
+ // warm-up, or any unexpected event shape).
281
+
282
+ interface EventRow {
283
+ readonly id: string;
284
+ readonly entity_kind: string;
285
+ readonly entity_id: string;
286
+ readonly operation: string;
287
+ readonly payload: string;
288
+ readonly created_at: number;
289
+ }
290
+
291
+ interface EventCursor {
292
+ readonly createdAt: number;
293
+ readonly id: string;
294
+ }
295
+
296
+ interface EventCursorDelta {
297
+ readonly epicIds: string[];
298
+ readonly taskIds: string[];
299
+ readonly subtaskIds: string[];
300
+ readonly dependencyIds: string[];
301
+ readonly deletedEpicIds: string[];
302
+ readonly deletedTaskIds: string[];
303
+ readonly deletedSubtaskIds: string[];
304
+ readonly deletedDependencyIds: string[];
305
+ }
306
+
307
+ type EventCursorReconcileResult =
308
+ | { readonly kind: "ok"; readonly newCursor: EventCursor; readonly delta: EventCursorDelta }
309
+ | { readonly kind: "fallback"; readonly reason: string };
310
+
311
+ /** Read the most recent event row to seed the cursor at watcher start. */
312
+ function readLatestEventCursor(db: Database): EventCursor | null {
313
+ const row = db
314
+ .query(
315
+ "SELECT id, created_at FROM events ORDER BY created_at DESC, id DESC LIMIT 1;",
316
+ )
317
+ .get() as { id: string; created_at: number } | null;
318
+ if (!row) {
319
+ return null;
320
+ }
321
+ return { createdAt: row.created_at, id: row.id };
322
+ }
323
+
324
+ /**
325
+ * Determine whether a non-null cursor predates the retained-events window —
326
+ * i.e. the event the cursor points at is missing from the live `events` table
327
+ * AND there are older retained events on any branch. When that happens the
328
+ * watcher cannot derive the diff from events alone and must fall back.
329
+ *
330
+ * We avoid the more expensive per-branch retention check that `sync/service.ts`
331
+ * does for sync cursors: the watcher consumes events across all branches, so a
332
+ * single "is this cursor.id still present in events?" check is enough — if the
333
+ * row is gone, the safe move is fallback.
334
+ */
335
+ function isCursorStale(db: Database, cursor: EventCursor): boolean {
336
+ const row = db
337
+ .query("SELECT 1 AS hit FROM events WHERE id = ? LIMIT 1;")
338
+ .get(cursor.id) as { hit: number } | null;
339
+ return row === null;
340
+ }
341
+
342
+ /**
343
+ * Read events after `cursor` ordered by (created_at, id). When `cursor` is
344
+ * null the caller must already be on the fallback path; this helper is not
345
+ * invoked.
346
+ */
347
+ function readEventsSinceCursor(db: Database, cursor: EventCursor): EventRow[] {
348
+ return db
349
+ .query(
350
+ `SELECT id, entity_kind, entity_id, operation, payload, created_at
351
+ FROM events
352
+ WHERE (created_at > ?) OR (created_at = ? AND id > ?)
353
+ ORDER BY created_at ASC, id ASC;`,
354
+ )
355
+ .all(cursor.createdAt, cursor.createdAt, cursor.id) as EventRow[];
356
+ }
357
+
358
+ /**
359
+ * Translate a list of event rows into per-kind upsert/delete ID sets that
360
+ * can be fed to {@link buildBoardSnapshotDelta}. Returns `null` if any event
361
+ * payload fails to parse or the entity_kind/operation pair is unknown — the
362
+ * caller treats `null` as a signal to fall back to the full-snapshot path.
363
+ *
364
+ * Parent-ascendant fan-in: when a task event fires, the parent epic must also
365
+ * be included so derived fields (taskIds, counts, searchText) reach the
366
+ * client. When a subtask event fires, the parent task and grandparent epic
367
+ * must also be included. We pull payloads (`epic_id`, `task_id`) first and
368
+ * fall back to a domain lookup for deletions or older events without those
369
+ * fields.
370
+ *
371
+ * Dependency events fan in both endpoints' parents so blocked-by/blocks
372
+ * derived arrays on the endpoints' epic/task records stay in sync.
373
+ */
374
+ function eventsToCursorDelta(events: readonly EventRow[], domain: TrackerDomain): EventCursorDelta | null {
375
+ const epicIds = new Set<string>();
376
+ const taskIds = new Set<string>();
377
+ const subtaskIds = new Set<string>();
378
+ const dependencyIds = new Set<string>();
379
+ const deletedEpicIds = new Set<string>();
380
+ const deletedTaskIds = new Set<string>();
381
+ const deletedSubtaskIds = new Set<string>();
382
+ const deletedDependencyIds = new Set<string>();
383
+
384
+ const includeTaskAndEpicForTaskId = (taskId: string, payloadEpicId: unknown): void => {
385
+ taskIds.add(taskId);
386
+ if (typeof payloadEpicId === "string" && payloadEpicId.length > 0) {
387
+ epicIds.add(payloadEpicId);
388
+ return;
389
+ }
390
+ const task = domain.getTask(taskId);
391
+ if (task) {
392
+ epicIds.add(task.epicId);
393
+ }
394
+ };
395
+
396
+ const includeSubtaskWithAscendants = (subtaskId: string, payloadTaskId: unknown): void => {
397
+ subtaskIds.add(subtaskId);
398
+ let resolvedTaskId: string | null = null;
399
+ if (typeof payloadTaskId === "string" && payloadTaskId.length > 0) {
400
+ resolvedTaskId = payloadTaskId;
401
+ } else {
402
+ resolvedTaskId = domain.getSubtask(subtaskId)?.taskId ?? null;
403
+ }
404
+ if (resolvedTaskId !== null) {
405
+ includeTaskAndEpicForTaskId(resolvedTaskId, undefined);
406
+ }
407
+ };
408
+
409
+ const includeDependencyEndpointParents = (sourceId: unknown, sourceKind: unknown, targetId: unknown, targetKind: unknown): void => {
410
+ const endpoints: Array<{ id: string; kind: string }> = [];
411
+ if (typeof sourceId === "string" && sourceId.length > 0) {
412
+ endpoints.push({ id: sourceId, kind: typeof sourceKind === "string" ? sourceKind : "" });
413
+ }
414
+ if (typeof targetId === "string" && targetId.length > 0) {
415
+ endpoints.push({ id: targetId, kind: typeof targetKind === "string" ? targetKind : "" });
416
+ }
417
+ for (const endpoint of endpoints) {
418
+ if (endpoint.kind === "subtask") {
419
+ includeSubtaskWithAscendants(endpoint.id, undefined);
420
+ } else {
421
+ includeTaskAndEpicForTaskId(endpoint.id, undefined);
422
+ }
423
+ }
424
+ };
425
+
426
+ for (const event of events) {
427
+ let parsedPayload: unknown;
428
+ try {
429
+ parsedPayload = JSON.parse(event.payload);
430
+ } catch {
431
+ return null;
432
+ }
433
+
434
+ const fields = (parsedPayload as { fields?: Record<string, unknown> })?.fields ?? {};
435
+
436
+ switch (event.entity_kind) {
437
+ case "epic": {
438
+ if (event.operation === "epic.created" || event.operation === "epic.updated") {
439
+ epicIds.add(event.entity_id);
440
+ } else if (event.operation === "epic.deleted") {
441
+ deletedEpicIds.add(event.entity_id);
442
+ } else {
443
+ return null;
444
+ }
445
+ break;
446
+ }
447
+ case "task": {
448
+ if (event.operation === "task.created" || event.operation === "task.updated") {
449
+ includeTaskAndEpicForTaskId(event.entity_id, fields.epic_id);
450
+ } else if (event.operation === "task.deleted") {
451
+ // Non-cascade task deletes carry `epic_id` in fields so the watcher
452
+ // can fan-in the parent epic (taskIds / counts / searchText all
453
+ // change). Cascade deletes omit `epic_id` because the matching
454
+ // `epic.deleted` event already surfaces the epic-level change —
455
+ // including the parent there would emit an upsert for a doomed
456
+ // epic alongside its deletedEpicIds entry.
457
+ if (typeof fields.epic_id === "string" && fields.epic_id.length > 0) {
458
+ epicIds.add(fields.epic_id);
459
+ }
460
+ deletedTaskIds.add(event.entity_id);
461
+ } else {
462
+ return null;
463
+ }
464
+ break;
465
+ }
466
+ case "subtask": {
467
+ if (event.operation === "subtask.created" || event.operation === "subtask.updated") {
468
+ includeSubtaskWithAscendants(event.entity_id, fields.task_id);
469
+ } else if (event.operation === "subtask.deleted") {
470
+ deletedSubtaskIds.add(event.entity_id);
471
+ // Parent task's subtasks list / searchText changed too: re-emit it.
472
+ const parentTaskId = typeof fields.task_id === "string" && fields.task_id.length > 0
473
+ ? fields.task_id
474
+ : domain.getSubtask(event.entity_id)?.taskId ?? null;
475
+ if (parentTaskId !== null) {
476
+ includeTaskAndEpicForTaskId(parentTaskId, undefined);
477
+ }
478
+ } else {
479
+ return null;
480
+ }
481
+ break;
482
+ }
483
+ case "dependency": {
484
+ // Dependency entity_id is the composite "sourceKind:sourceId->dependsOnKind:dependsOnId".
485
+ // The actual dependency row id lives in payload.fields.dependency_id (see
486
+ // mutation-service.#dependencyEventFields). Without that field we cannot
487
+ // safely surface the dependency delta — fall back.
488
+ const dependencyId = fields.dependency_id;
489
+ if (typeof dependencyId !== "string" || dependencyId.length === 0) {
490
+ return null;
491
+ }
492
+ if (event.operation === "dependency.added") {
493
+ dependencyIds.add(dependencyId);
494
+ } else if (event.operation === "dependency.removed") {
495
+ deletedDependencyIds.add(dependencyId);
496
+ } else {
497
+ return null;
498
+ }
499
+ includeDependencyEndpointParents(fields.source_id, fields.source_kind, fields.depends_on_id, fields.depends_on_kind);
500
+ break;
501
+ }
502
+ default:
503
+ return null;
504
+ }
505
+ }
506
+
507
+ return {
508
+ epicIds: [...epicIds],
509
+ taskIds: [...taskIds],
510
+ subtaskIds: [...subtaskIds],
511
+ dependencyIds: [...dependencyIds],
512
+ deletedEpicIds: [...deletedEpicIds],
513
+ deletedTaskIds: [...deletedTaskIds],
514
+ deletedSubtaskIds: [...deletedSubtaskIds],
515
+ deletedDependencyIds: [...deletedDependencyIds],
516
+ };
517
+ }
518
+
519
+ function tryEventCursorReconcile(
520
+ db: Database,
521
+ domain: TrackerDomain,
522
+ cursor: EventCursor | null,
523
+ ): EventCursorReconcileResult {
524
+ if (cursor === null) {
525
+ return { kind: "fallback", reason: "warm-up" };
526
+ }
527
+
528
+ if (isCursorStale(db, cursor)) {
529
+ return { kind: "fallback", reason: "cursor-stale" };
530
+ }
531
+
532
+ const events = readEventsSinceCursor(db, cursor);
533
+ if (events.length === 0) {
534
+ return {
535
+ kind: "ok",
536
+ newCursor: cursor,
537
+ delta: {
538
+ epicIds: [],
539
+ taskIds: [],
540
+ subtaskIds: [],
541
+ dependencyIds: [],
542
+ deletedEpicIds: [],
543
+ deletedTaskIds: [],
544
+ deletedSubtaskIds: [],
545
+ deletedDependencyIds: [],
546
+ },
547
+ };
548
+ }
549
+
550
+ const delta = eventsToCursorDelta(events, domain);
551
+ if (delta === null) {
552
+ return { kind: "fallback", reason: "event-parse-or-shape" };
553
+ }
554
+
555
+ const lastEvent = events[events.length - 1]!;
556
+ return {
557
+ kind: "ok",
558
+ newCursor: { createdAt: lastEvent.created_at, id: lastEvent.id },
559
+ delta,
560
+ };
561
+ }
562
+
215
563
  function readMtime(path: string): number {
216
564
  if (!existsSync(path)) {
217
565
  return 0;
@@ -247,6 +595,20 @@ export interface WalWatcherOptions {
247
595
  * Tests inject a throwing or stubbed builder to exercise failure paths.
248
596
  */
249
597
  readonly buildSnapshot?: (domain: TrackerDomain) => BoardSnapshot;
598
+ /**
599
+ * When `true`, the watcher always runs the legacy full-snapshot diff path
600
+ * even when a usable event cursor is available. Used by tests that verify
601
+ * the fallback contract is bit-identical to the optimized path.
602
+ */
603
+ readonly forceFullSnapshotReconcile?: boolean;
604
+ /**
605
+ * Optional reconcile observer for tests. Reports which path each tick used,
606
+ * along with the reason for any fallback. Production code ignores this.
607
+ */
608
+ readonly onReconcile?: (info: {
609
+ readonly path: "event-cursor" | "full-snapshot";
610
+ readonly reason?: string;
611
+ }) => void;
250
612
  }
251
613
 
252
614
  export interface WalWatcher {
@@ -283,90 +645,285 @@ export function startWalWatcher(options: WalWatcherOptions): WalWatcher {
283
645
  const buildSnapshot = options.buildSnapshot ?? buildBoardSnapshot;
284
646
 
285
647
  let lastSnapshot = buildSnapshot(domain);
648
+ let lastEventCursor: EventCursor | null = readLatestEventCursor(options.db);
286
649
 
287
650
  let debounceTimer: ReturnType<typeof setTimeout> | null = null;
288
651
  let closed = false;
289
652
  let failures = 0;
290
653
  let lastSuppressedInProcessWriteAt = 0;
654
+ let lastReconcileAt = 0;
655
+ // The event-cursor hot path no longer rebuilds `lastSnapshot` on every
656
+ // successful tick — that full snapshot read was the dominant cost on large
657
+ // boards. Setting this flag tells the fallback path that the baseline may
658
+ // be older than what subscribers have already received via cursor deltas.
659
+ // The fallback diff against a stale baseline can over-publish, but that is
660
+ // strictly a recovery operation triggered only when the cursor path bails
661
+ // (warm-up, cursor pruned, unknown event shape) — already a heavier path.
662
+ let lastSnapshotStale = false;
663
+ // Tracks the most recently logged reconcile-failure message so the catch
664
+ // block in reconcile() can emit a log on every distinct message even when
665
+ // the modulo counter would otherwise throttle it. Empty string means "no
666
+ // failure logged yet" — the first failure of any kind will surface.
667
+ let lastLoggedFailureMessage = "";
668
+
669
+ function runFullSnapshotReconcile(shouldSuppressInProcessTick: boolean, inProcessWriteAt: number): void {
670
+ const fresh = buildSnapshot(domain);
671
+ const epicsDiff = diffById(lastSnapshot.epics, fresh.epics, { isLeaf: false });
672
+ const tasksDiff = diffById(lastSnapshot.tasks, fresh.tasks, { isLeaf: false });
673
+ const subtasksDiff = diffById(lastSnapshot.subtasks, fresh.subtasks, { isLeaf: true });
674
+ const dependenciesDiff = diffById(lastSnapshot.dependencies, fresh.dependencies, { isLeaf: true });
675
+
676
+ const shouldSuppressDiff = shouldSuppressInProcessTick
677
+ ? {
678
+ epics: suppressAlreadyPublishedDiff(
679
+ epicsDiff,
680
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "epics"),
681
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedEpicIds"),
682
+ { isLeaf: false },
683
+ ),
684
+ tasks: suppressAlreadyPublishedDiff(
685
+ tasksDiff,
686
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "tasks"),
687
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedTaskIds"),
688
+ { isLeaf: false },
689
+ ),
690
+ subtasks: suppressAlreadyPublishedDiff(
691
+ subtasksDiff,
692
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "subtasks"),
693
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedSubtaskIds"),
694
+ { isLeaf: true },
695
+ ),
696
+ dependencies: suppressAlreadyPublishedDiff(
697
+ dependenciesDiff,
698
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "dependencies"),
699
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedDependencyIds"),
700
+ { isLeaf: true },
701
+ ),
702
+ }
703
+ : null;
291
704
 
292
- function reconcile(): void {
293
- if (closed) {
705
+ if (shouldSuppressInProcessTick) {
706
+ lastSuppressedInProcessWriteAt = inProcessWriteAt;
707
+ }
708
+
709
+ const publishEpicsDiff = shouldSuppressDiff?.epics ?? epicsDiff;
710
+ const publishTasksDiff = shouldSuppressDiff?.tasks ?? tasksDiff;
711
+ const publishSubtasksDiff = shouldSuppressDiff?.subtasks ?? subtasksDiff;
712
+ const publishDependenciesDiff = shouldSuppressDiff?.dependencies ?? dependenciesDiff;
713
+
714
+ const hasChanges = hasDiffChanges(publishEpicsDiff, publishTasksDiff, publishSubtasksDiff, publishDependenciesDiff);
715
+
716
+ lastSnapshot = fresh;
717
+ // The fallback just rebuilt the snapshot from the live domain, so the
718
+ // baseline is no longer stale. Future event-cursor ticks may set the
719
+ // flag again as they advance without rebuilding lastSnapshot.
720
+ lastSnapshotStale = false;
721
+ // Reseat the cursor at the latest event so the next tick can attempt the
722
+ // optimized path again. Without this, every subsequent tick would also
723
+ // see "cursor stale" on a freshly-recovered watcher.
724
+ lastEventCursor = readLatestEventCursor(options.db);
725
+
726
+ if (!hasChanges) {
294
727
  return;
295
728
  }
296
- const inProcessWriteAt = options.eventBus.lastInProcessWriteAt;
297
- const shouldSuppressInProcessTick =
298
- inProcessWriteAt > lastSuppressedInProcessWriteAt &&
299
- Date.now() - inProcessWriteAt <= IN_PROCESS_WAL_SUPPRESS_MS;
300
729
 
301
- try {
302
- const fresh = buildSnapshot(domain);
303
- const epicsDiff = diffById(lastSnapshot.epics, fresh.epics);
304
- const tasksDiff = diffById(lastSnapshot.tasks, fresh.tasks);
305
- const subtasksDiff = diffById(lastSnapshot.subtasks, fresh.subtasks);
306
- const dependenciesDiff = diffById(lastSnapshot.dependencies, fresh.dependencies);
307
-
308
- const shouldSuppressDiff = shouldSuppressInProcessTick
309
- ? {
310
- epics: suppressAlreadyPublishedDiff(
311
- epicsDiff,
312
- recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "epics"),
313
- deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedEpicIds"),
314
- ),
315
- tasks: suppressAlreadyPublishedDiff(
316
- tasksDiff,
317
- recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "tasks"),
318
- deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedTaskIds"),
319
- ),
320
- subtasks: suppressAlreadyPublishedDiff(
321
- subtasksDiff,
322
- recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "subtasks"),
323
- deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedSubtaskIds"),
324
- ),
325
- dependencies: suppressAlreadyPublishedDiff(
326
- dependenciesDiff,
327
- recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "dependencies"),
328
- deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedDependencyIds"),
329
- ),
330
- }
331
- : null;
730
+ options.eventBus.publishSnapshotDelta({
731
+ generatedAt: Date.now(),
732
+ source: "wal-watcher",
733
+ epics: publishEpicsDiff.upserted,
734
+ tasks: publishTasksDiff.upserted,
735
+ subtasks: publishSubtasksDiff.upserted,
736
+ dependencies: publishDependenciesDiff.upserted,
737
+ deletedEpicIds: publishEpicsDiff.deletedIds,
738
+ deletedTaskIds: publishTasksDiff.deletedIds,
739
+ deletedSubtaskIds: publishSubtasksDiff.deletedIds,
740
+ deletedDependencyIds: publishDependenciesDiff.deletedIds,
741
+ });
742
+ }
332
743
 
744
+ function runEventCursorReconcile(
745
+ cursorResult: Extract<EventCursorReconcileResult, { kind: "ok" }>,
746
+ shouldSuppressInProcessTick: boolean,
747
+ inProcessWriteAt: number,
748
+ ): void {
749
+ const { newCursor, delta } = cursorResult;
750
+ const noChanges =
751
+ delta.epicIds.length === 0 &&
752
+ delta.taskIds.length === 0 &&
753
+ delta.subtaskIds.length === 0 &&
754
+ delta.dependencyIds.length === 0 &&
755
+ delta.deletedEpicIds.length === 0 &&
756
+ delta.deletedTaskIds.length === 0 &&
757
+ delta.deletedSubtaskIds.length === 0 &&
758
+ delta.deletedDependencyIds.length === 0;
759
+
760
+ // The event-cursor hot path used to rebuild `lastSnapshot` on every tick
761
+ // via `buildSnapshot(domain)` — the full board read that dominates CPU on
762
+ // large boards. We now skip it entirely and mark the baseline stale so
763
+ // the next fallback tick (cursor pruned / parse failure / etc.) knows it
764
+ // may need to publish a recovery delta against an older baseline.
765
+ //
766
+ // Both `lastEventCursor` and the staleness flag advance ONLY after the
767
+ // publish call below returns successfully (or when there is nothing to
768
+ // publish). If `publishSnapshotDelta` throws, leaving these at their
769
+ // prior values ensures the next tick re-runs the same cursor delta —
770
+ // subscribers never miss a row because of a transient listener error.
771
+
772
+ if (noChanges) {
773
+ // No events to process means the cursor itself did not move (see
774
+ // tryEventCursorReconcile). Nothing to advance, nothing to mark stale.
775
+ lastEventCursor = newCursor;
333
776
  if (shouldSuppressInProcessTick) {
334
777
  lastSuppressedInProcessWriteAt = inProcessWriteAt;
335
778
  }
779
+ return;
780
+ }
336
781
 
337
- const publishEpicsDiff = shouldSuppressDiff?.epics ?? epicsDiff;
338
- const publishTasksDiff = shouldSuppressDiff?.tasks ?? tasksDiff;
339
- const publishSubtasksDiff = shouldSuppressDiff?.subtasks ?? subtasksDiff;
340
- const publishDependenciesDiff = shouldSuppressDiff?.dependencies ?? dependenciesDiff;
782
+ const snapshotDelta = buildBoardSnapshotDelta(domain, {
783
+ epicIds: delta.epicIds,
784
+ taskIds: delta.taskIds,
785
+ subtaskIds: delta.subtaskIds,
786
+ dependencyIds: delta.dependencyIds,
787
+ deletedEpicIds: delta.deletedEpicIds,
788
+ deletedTaskIds: delta.deletedTaskIds,
789
+ deletedSubtaskIds: delta.deletedSubtaskIds,
790
+ deletedDependencyIds: delta.deletedDependencyIds,
791
+ });
341
792
 
342
- const hasChanges = hasDiffChanges(publishEpicsDiff, publishTasksDiff, publishSubtasksDiff, publishDependenciesDiff);
793
+ // Pack the targeted-read result into the same CollectionDiff shape the
794
+ // suppression helper expects, then run the standard in-process duplicate
795
+ // filter against the route handler's last published delta.
796
+ const epicsDiff: CollectionDiff = {
797
+ upserted: Array.isArray(snapshotDelta.epics) ? (snapshotDelta.epics as unknown[]) : [],
798
+ deletedIds: [...delta.deletedEpicIds],
799
+ };
800
+ const tasksDiff: CollectionDiff = {
801
+ upserted: Array.isArray(snapshotDelta.tasks) ? (snapshotDelta.tasks as unknown[]) : [],
802
+ deletedIds: [...delta.deletedTaskIds],
803
+ };
804
+ const subtasksDiff: CollectionDiff = {
805
+ upserted: Array.isArray(snapshotDelta.subtasks) ? (snapshotDelta.subtasks as unknown[]) : [],
806
+ deletedIds: [...delta.deletedSubtaskIds],
807
+ };
808
+ const dependenciesDiff: CollectionDiff = {
809
+ upserted: Array.isArray(snapshotDelta.dependencies) ? (snapshotDelta.dependencies as unknown[]) : [],
810
+ deletedIds: [...delta.deletedDependencyIds],
811
+ };
812
+
813
+ const suppressed = shouldSuppressInProcessTick
814
+ ? {
815
+ epics: suppressAlreadyPublishedDiff(
816
+ epicsDiff,
817
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "epics"),
818
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedEpicIds"),
819
+ { isLeaf: false },
820
+ ),
821
+ tasks: suppressAlreadyPublishedDiff(
822
+ tasksDiff,
823
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "tasks"),
824
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedTaskIds"),
825
+ { isLeaf: false },
826
+ ),
827
+ subtasks: suppressAlreadyPublishedDiff(
828
+ subtasksDiff,
829
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "subtasks"),
830
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedSubtaskIds"),
831
+ { isLeaf: true },
832
+ ),
833
+ dependencies: suppressAlreadyPublishedDiff(
834
+ dependenciesDiff,
835
+ recordsByIdFromDelta(options.eventBus.lastInProcessSnapshotDelta, "dependencies"),
836
+ deletedIdsFromDelta(options.eventBus.lastInProcessSnapshotDelta, "deletedDependencyIds"),
837
+ { isLeaf: true },
838
+ ),
839
+ }
840
+ : null;
841
+
842
+ const publishEpicsDiff = suppressed?.epics ?? epicsDiff;
843
+ const publishTasksDiff = suppressed?.tasks ?? tasksDiff;
844
+ const publishSubtasksDiff = suppressed?.subtasks ?? subtasksDiff;
845
+ const publishDependenciesDiff = suppressed?.dependencies ?? dependenciesDiff;
846
+
847
+ if (!hasDiffChanges(publishEpicsDiff, publishTasksDiff, publishSubtasksDiff, publishDependenciesDiff)) {
848
+ // Nothing to publish (suppression filtered the in-process duplicate, or
849
+ // the targeted snapshot read returned no rows for the touched IDs).
850
+ // Advance cursor since the canonical events have been accounted for;
851
+ // replaying them would not produce a different result. Mark the
852
+ // baseline stale because the underlying domain has moved even though
853
+ // no delta needed to ship.
854
+ lastEventCursor = newCursor;
855
+ lastSnapshotStale = true;
856
+ if (shouldSuppressInProcessTick) {
857
+ lastSuppressedInProcessWriteAt = inProcessWriteAt;
858
+ }
859
+ return;
860
+ }
343
861
 
344
- lastSnapshot = fresh;
862
+ options.eventBus.publishSnapshotDelta({
863
+ generatedAt: Date.now(),
864
+ source: "wal-watcher",
865
+ epics: publishEpicsDiff.upserted,
866
+ tasks: publishTasksDiff.upserted,
867
+ subtasks: publishSubtasksDiff.upserted,
868
+ dependencies: publishDependenciesDiff.upserted,
869
+ deletedEpicIds: publishEpicsDiff.deletedIds,
870
+ deletedTaskIds: publishTasksDiff.deletedIds,
871
+ deletedSubtaskIds: publishSubtasksDiff.deletedIds,
872
+ deletedDependencyIds: publishDependenciesDiff.deletedIds,
873
+ });
345
874
 
346
- if (!hasChanges) {
347
- return;
348
- }
875
+ // Publish succeeded — only now is it safe to advance cursor and mark the
876
+ // baseline stale. If the call above threw, the outer reconcile() catch
877
+ // handles it and leaves these unchanged so the next tick replays the
878
+ // same delta.
879
+ lastEventCursor = newCursor;
880
+ lastSnapshotStale = true;
881
+ if (shouldSuppressInProcessTick) {
882
+ lastSuppressedInProcessWriteAt = inProcessWriteAt;
883
+ }
884
+ }
885
+
886
+ function reconcile(): void {
887
+ if (closed) {
888
+ return;
889
+ }
890
+ lastReconcileAt = Date.now();
891
+ const inProcessWriteAt = options.eventBus.lastInProcessWriteAt;
892
+ const shouldSuppressInProcessTick =
893
+ inProcessWriteAt > lastSuppressedInProcessWriteAt &&
894
+ Date.now() - inProcessWriteAt <= IN_PROCESS_WAL_SUPPRESS_MS;
349
895
 
350
- options.eventBus.publishSnapshotDelta({
351
- generatedAt: Date.now(),
352
- source: "wal-watcher",
353
- epics: publishEpicsDiff.upserted,
354
- tasks: publishTasksDiff.upserted,
355
- subtasks: publishSubtasksDiff.upserted,
356
- dependencies: publishDependenciesDiff.upserted,
357
- deletedEpicIds: publishEpicsDiff.deletedIds,
358
- deletedTaskIds: publishTasksDiff.deletedIds,
359
- deletedSubtaskIds: publishSubtasksDiff.deletedIds,
360
- deletedDependencyIds: publishDependenciesDiff.deletedIds,
361
- });
896
+ try {
897
+ if (!options.forceFullSnapshotReconcile) {
898
+ const cursorResult = tryEventCursorReconcile(options.db, domain, lastEventCursor);
899
+ if (cursorResult.kind === "ok") {
900
+ options.onReconcile?.({ path: "event-cursor" });
901
+ runEventCursorReconcile(cursorResult, shouldSuppressInProcessTick, inProcessWriteAt);
902
+ return;
903
+ }
904
+ options.onReconcile?.({ path: "full-snapshot", reason: cursorResult.reason });
905
+ } else {
906
+ options.onReconcile?.({ path: "full-snapshot", reason: "forced" });
907
+ }
908
+ runFullSnapshotReconcile(shouldSuppressInProcessTick, inProcessWriteAt);
362
909
  } catch (error) {
363
910
  // Reconciliation must never crash the server. Errors here usually mean
364
911
  // the database is mid-write or a downstream snapshot builder threw; the
365
- // next mtime tick will retry. Log every Nth failure to keep operators
366
- // informed without flooding stderr on persistent faults.
912
+ // next mtime tick will retry.
913
+ //
914
+ // Logging policy: always log the first occurrence of every distinct
915
+ // failure message so operators see new fault modes immediately, then
916
+ // throttle subsequent identical messages via the modulo counter to
917
+ // keep stderr quiet on persistent faults. Without the first-occurrence
918
+ // guarantee, a transient one-shot failure with `logEveryNthFailure=5`
919
+ // would be silenced entirely until four more identical failures piled
920
+ // up — exactly the wrong signal for an operator.
367
921
  failures += 1;
368
- if (failures % logEveryNthFailure === 0) {
922
+ const failureMessage = error instanceof Error ? error.message : String(error);
923
+ const isNewMessage = failureMessage !== lastLoggedFailureMessage;
924
+ if (isNewMessage || failures % logEveryNthFailure === 0) {
369
925
  logger(`wal-watcher: reconcile failed (${failures} total failures)`, error);
926
+ lastLoggedFailureMessage = failureMessage;
370
927
  }
371
928
  }
372
929
  }
@@ -392,7 +949,13 @@ export function startWalWatcher(options: WalWatcherOptions): WalWatcher {
392
949
  const currentMtime = readMtime(walFile);
393
950
  // mtime can equal 0 when the WAL was just checkpointed and removed; treat
394
951
  // any change (including transitions to/from 0) as worth reconciling.
395
- if (currentMtime !== lastWalMtime) {
952
+ // Additionally, treat rapid sub-ms writes — where mtime is unchanged but
953
+ // enough wall-clock time has elapsed since the last reconcile — as worth
954
+ // reconciling. This prevents missed updates when two writes land in the
955
+ // same filesystem mtime tick.
956
+ const mtimeChanged = currentMtime !== lastWalMtime;
957
+ const staleEnough = Date.now() - lastReconcileAt > debounceMs;
958
+ if (mtimeChanged || staleEnough) {
396
959
  lastWalMtime = currentMtime;
397
960
  scheduleReconcile();
398
961
  }