@git-stunts/git-warp 10.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +201 -0
  2. package/NOTICE +16 -0
  3. package/README.md +480 -0
  4. package/SECURITY.md +30 -0
  5. package/bin/git-warp +24 -0
  6. package/bin/warp-graph.js +1574 -0
  7. package/index.d.ts +2366 -0
  8. package/index.js +180 -0
  9. package/package.json +129 -0
  10. package/scripts/install-git-warp.sh +258 -0
  11. package/scripts/uninstall-git-warp.sh +139 -0
  12. package/src/domain/WarpGraph.js +3157 -0
  13. package/src/domain/crdt/Dot.js +160 -0
  14. package/src/domain/crdt/LWW.js +154 -0
  15. package/src/domain/crdt/ORSet.js +371 -0
  16. package/src/domain/crdt/VersionVector.js +222 -0
  17. package/src/domain/entities/GraphNode.js +60 -0
  18. package/src/domain/errors/EmptyMessageError.js +47 -0
  19. package/src/domain/errors/ForkError.js +30 -0
  20. package/src/domain/errors/IndexError.js +23 -0
  21. package/src/domain/errors/OperationAbortedError.js +22 -0
  22. package/src/domain/errors/QueryError.js +39 -0
  23. package/src/domain/errors/SchemaUnsupportedError.js +17 -0
  24. package/src/domain/errors/ShardCorruptionError.js +56 -0
  25. package/src/domain/errors/ShardLoadError.js +57 -0
  26. package/src/domain/errors/ShardValidationError.js +61 -0
  27. package/src/domain/errors/StorageError.js +57 -0
  28. package/src/domain/errors/SyncError.js +30 -0
  29. package/src/domain/errors/TraversalError.js +23 -0
  30. package/src/domain/errors/WarpError.js +31 -0
  31. package/src/domain/errors/WormholeError.js +28 -0
  32. package/src/domain/errors/WriterError.js +39 -0
  33. package/src/domain/errors/index.js +21 -0
  34. package/src/domain/services/AnchorMessageCodec.js +99 -0
  35. package/src/domain/services/BitmapIndexBuilder.js +225 -0
  36. package/src/domain/services/BitmapIndexReader.js +435 -0
  37. package/src/domain/services/BoundaryTransitionRecord.js +463 -0
  38. package/src/domain/services/CheckpointMessageCodec.js +147 -0
  39. package/src/domain/services/CheckpointSerializerV5.js +281 -0
  40. package/src/domain/services/CheckpointService.js +384 -0
  41. package/src/domain/services/CommitDagTraversalService.js +156 -0
  42. package/src/domain/services/DagPathFinding.js +712 -0
  43. package/src/domain/services/DagTopology.js +239 -0
  44. package/src/domain/services/DagTraversal.js +245 -0
  45. package/src/domain/services/Frontier.js +108 -0
  46. package/src/domain/services/GCMetrics.js +101 -0
  47. package/src/domain/services/GCPolicy.js +122 -0
  48. package/src/domain/services/GitLogParser.js +205 -0
  49. package/src/domain/services/HealthCheckService.js +246 -0
  50. package/src/domain/services/HookInstaller.js +326 -0
  51. package/src/domain/services/HttpSyncServer.js +262 -0
  52. package/src/domain/services/IndexRebuildService.js +426 -0
  53. package/src/domain/services/IndexStalenessChecker.js +103 -0
  54. package/src/domain/services/JoinReducer.js +582 -0
  55. package/src/domain/services/KeyCodec.js +113 -0
  56. package/src/domain/services/LegacyAnchorDetector.js +67 -0
  57. package/src/domain/services/LogicalTraversal.js +351 -0
  58. package/src/domain/services/MessageCodecInternal.js +132 -0
  59. package/src/domain/services/MessageSchemaDetector.js +145 -0
  60. package/src/domain/services/MigrationService.js +55 -0
  61. package/src/domain/services/ObserverView.js +265 -0
  62. package/src/domain/services/PatchBuilderV2.js +669 -0
  63. package/src/domain/services/PatchMessageCodec.js +140 -0
  64. package/src/domain/services/ProvenanceIndex.js +337 -0
  65. package/src/domain/services/ProvenancePayload.js +242 -0
  66. package/src/domain/services/QueryBuilder.js +835 -0
  67. package/src/domain/services/StateDiff.js +300 -0
  68. package/src/domain/services/StateSerializerV5.js +156 -0
  69. package/src/domain/services/StreamingBitmapIndexBuilder.js +709 -0
  70. package/src/domain/services/SyncProtocol.js +593 -0
  71. package/src/domain/services/TemporalQuery.js +201 -0
  72. package/src/domain/services/TranslationCost.js +221 -0
  73. package/src/domain/services/TraversalService.js +8 -0
  74. package/src/domain/services/WarpMessageCodec.js +29 -0
  75. package/src/domain/services/WarpStateIndexBuilder.js +127 -0
  76. package/src/domain/services/WormholeService.js +353 -0
  77. package/src/domain/types/TickReceipt.js +285 -0
  78. package/src/domain/types/WarpTypes.js +209 -0
  79. package/src/domain/types/WarpTypesV2.js +200 -0
  80. package/src/domain/utils/CachedValue.js +140 -0
  81. package/src/domain/utils/EventId.js +89 -0
  82. package/src/domain/utils/LRUCache.js +112 -0
  83. package/src/domain/utils/MinHeap.js +114 -0
  84. package/src/domain/utils/RefLayout.js +280 -0
  85. package/src/domain/utils/WriterId.js +205 -0
  86. package/src/domain/utils/cancellation.js +33 -0
  87. package/src/domain/utils/canonicalStringify.js +42 -0
  88. package/src/domain/utils/defaultClock.js +20 -0
  89. package/src/domain/utils/defaultCodec.js +51 -0
  90. package/src/domain/utils/nullLogger.js +21 -0
  91. package/src/domain/utils/roaring.js +181 -0
  92. package/src/domain/utils/shardVersion.js +9 -0
  93. package/src/domain/warp/PatchSession.js +217 -0
  94. package/src/domain/warp/Writer.js +181 -0
  95. package/src/hooks/post-merge.sh +60 -0
  96. package/src/infrastructure/adapters/BunHttpAdapter.js +225 -0
  97. package/src/infrastructure/adapters/ClockAdapter.js +57 -0
  98. package/src/infrastructure/adapters/ConsoleLogger.js +150 -0
  99. package/src/infrastructure/adapters/DenoHttpAdapter.js +230 -0
  100. package/src/infrastructure/adapters/GitGraphAdapter.js +787 -0
  101. package/src/infrastructure/adapters/GlobalClockAdapter.js +5 -0
  102. package/src/infrastructure/adapters/NoOpLogger.js +62 -0
  103. package/src/infrastructure/adapters/NodeCryptoAdapter.js +32 -0
  104. package/src/infrastructure/adapters/NodeHttpAdapter.js +98 -0
  105. package/src/infrastructure/adapters/PerformanceClockAdapter.js +5 -0
  106. package/src/infrastructure/adapters/WebCryptoAdapter.js +121 -0
  107. package/src/infrastructure/codecs/CborCodec.js +384 -0
  108. package/src/ports/BlobPort.js +30 -0
  109. package/src/ports/ClockPort.js +25 -0
  110. package/src/ports/CodecPort.js +25 -0
  111. package/src/ports/CommitPort.js +114 -0
  112. package/src/ports/ConfigPort.js +31 -0
  113. package/src/ports/CryptoPort.js +38 -0
  114. package/src/ports/GraphPersistencePort.js +57 -0
  115. package/src/ports/HttpServerPort.js +25 -0
  116. package/src/ports/IndexStoragePort.js +39 -0
  117. package/src/ports/LoggerPort.js +68 -0
  118. package/src/ports/RefPort.js +51 -0
  119. package/src/ports/TreePort.js +51 -0
  120. package/src/visualization/index.js +26 -0
  121. package/src/visualization/layouts/converters.js +75 -0
  122. package/src/visualization/layouts/elkAdapter.js +86 -0
  123. package/src/visualization/layouts/elkLayout.js +95 -0
  124. package/src/visualization/layouts/index.js +29 -0
  125. package/src/visualization/renderers/ascii/box.js +16 -0
  126. package/src/visualization/renderers/ascii/check.js +271 -0
  127. package/src/visualization/renderers/ascii/colors.js +13 -0
  128. package/src/visualization/renderers/ascii/formatters.js +73 -0
  129. package/src/visualization/renderers/ascii/graph.js +344 -0
  130. package/src/visualization/renderers/ascii/history.js +335 -0
  131. package/src/visualization/renderers/ascii/index.js +14 -0
  132. package/src/visualization/renderers/ascii/info.js +245 -0
  133. package/src/visualization/renderers/ascii/materialize.js +255 -0
  134. package/src/visualization/renderers/ascii/path.js +240 -0
  135. package/src/visualization/renderers/ascii/progress.js +32 -0
  136. package/src/visualization/renderers/ascii/symbols.js +33 -0
  137. package/src/visualization/renderers/ascii/table.js +19 -0
  138. package/src/visualization/renderers/browser/index.js +1 -0
  139. package/src/visualization/renderers/svg/index.js +159 -0
  140. package/src/visualization/utils/ansi.js +14 -0
  141. package/src/visualization/utils/time.js +40 -0
  142. package/src/visualization/utils/truncate.js +40 -0
  143. package/src/visualization/utils/unicode.js +52 -0
@@ -0,0 +1,426 @@
1
+ import defaultCodec from '../utils/defaultCodec.js';
2
+ import BitmapIndexBuilder from './BitmapIndexBuilder.js';
3
+ import BitmapIndexReader from './BitmapIndexReader.js';
4
+ import StreamingBitmapIndexBuilder from './StreamingBitmapIndexBuilder.js';
5
+ import { loadIndexFrontier, checkStaleness } from './IndexStalenessChecker.js';
6
+ import nullLogger from '../utils/nullLogger.js';
7
+ import { checkAborted } from '../utils/cancellation.js';
8
+
9
+ /**
10
+ * Service for building and loading the bitmap index from the graph.
11
+ *
12
+ * This service orchestrates index creation by walking the graph and persisting
13
+ * the resulting bitmap shards to storage via the IndexStoragePort. The bitmap
14
+ * index enables O(1) neighbor lookups (children/parents) after a one-time
15
+ * O(N) rebuild cost.
16
+ *
17
+ * **Build Modes**:
18
+ * - **In-memory** (default): Fast, but requires O(N) memory. Best for graphs
19
+ * under ~1M nodes or systems with ample RAM.
20
+ * - **Streaming**: Memory-bounded, flushes to storage periodically. Required
21
+ * for very large graphs that exceed available memory.
22
+ *
23
+ * **Index Structure**: The index is stored as a Git tree containing:
24
+ * - `meta_XX.json`: SHA-to-numeric-ID mappings (256 shards by SHA prefix)
25
+ * - `shards_fwd_XX.json`: Forward edge bitmaps (for child lookups)
26
+ * - `shards_rev_XX.json`: Reverse edge bitmaps (for parent lookups)
27
+ * - `frontier.json`: Writer frontier snapshot (for staleness detection)
28
+ *
29
+ * **Staleness Detection**: The index stores the frontier at build time.
30
+ * On load, the current frontier can be compared to detect if new patches
31
+ * have been written since the index was built.
32
+ *
33
+ * @module domain/services/IndexRebuildService
34
+ * @see BitmapIndexBuilder
35
+ * @see BitmapIndexReader
36
+ * @see StreamingBitmapIndexBuilder
37
+ */
38
+ export default class IndexRebuildService {
39
+ /**
40
+ * Creates an IndexRebuildService instance.
41
+ *
42
+ * @param {Object} options - Configuration options
43
+ * @param {Object} options.graphService - Graph service providing node iteration.
44
+ * Must implement `iterateNodes({ ref, limit }) => AsyncGenerator<GraphNode>`.
45
+ * @param {import('../../ports/IndexStoragePort.js').default} options.storage - Storage adapter
46
+ * for persisting index blobs and trees. Typically GitGraphAdapter.
47
+ * @param {import('../../ports/LoggerPort.js').default} [options.logger] - Logger for
48
+ * structured logging. Defaults to null logger (no logging).
49
+ * @param {import('../../ports/CryptoPort.js').default} [options.crypto] - Crypto adapter for checksums
50
+ * @throws {Error} If graphService is not provided
51
+ * @throws {Error} If storage adapter is not provided
52
+ */
53
+ constructor({ graphService, storage, logger = nullLogger, codec, crypto }) {
54
+ if (!graphService) {
55
+ throw new Error('IndexRebuildService requires a graphService');
56
+ }
57
+ if (!storage) {
58
+ throw new Error('IndexRebuildService requires a storage adapter');
59
+ }
60
+ this.graphService = graphService;
61
+ this.storage = storage;
62
+ this.logger = logger;
63
+ /** @type {import('../../ports/CodecPort.js').default|undefined} */
64
+ this._codec = codec || defaultCodec;
65
+ /** @type {import('../../ports/CryptoPort.js').default|undefined} */
66
+ this._crypto = crypto;
67
+ }
68
+
69
+ /**
70
+ * Rebuilds the bitmap index by walking the graph from a ref.
71
+ *
72
+ * **Build Modes**:
73
+ *
74
+ * *In-memory mode* (default, when `maxMemoryBytes` not specified):
75
+ * - Fastest option, single pass with bulk serialization at end
76
+ * - Memory: O(N) where N is number of nodes (~150-200MB for 1M nodes)
77
+ *
78
+ * *Streaming mode* (when `maxMemoryBytes` is specified):
79
+ * - Memory-bounded operation for very large graphs
80
+ * - Flushes bitmap data to storage when threshold exceeded
81
+ * - Merges chunks at finalization
82
+ * - More I/O operations, but constant memory ceiling
83
+ *
84
+ * **Persistence**: Creates a Git tree containing sharded JSON blobs:
85
+ * - `meta_XX.json`: SHA→ID mappings (256 shards by SHA prefix)
86
+ * - `shards_fwd_XX.json`: Forward edge bitmaps (child lookups)
87
+ * - `shards_rev_XX.json`: Reverse edge bitmaps (parent lookups)
88
+ *
89
+ * @param {string} ref - Git ref to start traversal from (e.g., 'HEAD', branch name, SHA)
90
+ * @param {Object} [options] - Rebuild options
91
+ * @param {number} [options.limit=10000000] - Maximum nodes to process (1 to 10,000,000)
92
+ * @param {number} [options.maxMemoryBytes] - Enable streaming mode with this memory threshold.
93
+ * When bitmap memory exceeds this value, data is flushed to storage.
94
+ * Recommended: 50-100MB for most systems. Minimum: 1MB.
95
+ * @param {Function} [options.onFlush] - Callback invoked on each flush (streaming mode only).
96
+ * Receives { flushedBytes, totalFlushedBytes, flushCount }.
97
+ * @param {Function} [options.onProgress] - Callback invoked periodically during processing.
98
+ * Receives { processedNodes, currentMemoryBytes }.
99
+ * @param {AbortSignal} [options.signal] - Optional AbortSignal for cancellation support.
100
+ * When aborted, throws OperationAbortedError at the next loop boundary.
101
+ * @param {Map<string, string>} [options.frontier] - Frontier to persist alongside the rebuilt index.
102
+ * Maps writer IDs to their tip SHAs; stored in the index tree for staleness detection.
103
+ * @returns {Promise<string>} OID of the created tree containing the index
104
+ * @throws {Error} If maxMemoryBytes is specified but not positive
105
+ * @throws {OperationAbortedError} If the signal is aborted during rebuild
106
+ * @throws {Error} If graphService.iterateNodes() fails (e.g., invalid ref)
107
+ * @throws {Error} If storage.writeBlob() or storage.writeTree() fails
108
+ *
109
+ * @example
110
+ * // In-memory rebuild (default, fast)
111
+ * const treeOid = await rebuildService.rebuild('HEAD');
112
+ *
113
+ * @example
114
+ * // Streaming rebuild with 50MB memory limit
115
+ * const treeOid = await rebuildService.rebuild('HEAD', {
116
+ * maxMemoryBytes: 50 * 1024 * 1024,
117
+ * onFlush: ({ flushCount }) => console.log(`Flush #${flushCount}`),
118
+ * });
119
+ */
120
+ async rebuild(ref, { limit = 10_000_000, maxMemoryBytes, onFlush, onProgress, signal, frontier } = {}) {
121
+ if (maxMemoryBytes !== undefined && maxMemoryBytes <= 0) {
122
+ throw new Error('maxMemoryBytes must be a positive number');
123
+ }
124
+ const mode = maxMemoryBytes !== undefined ? 'streaming' : 'in-memory';
125
+ this.logger.info('Starting index rebuild', {
126
+ operation: 'rebuild',
127
+ ref,
128
+ limit,
129
+ mode,
130
+ maxMemoryBytes: maxMemoryBytes ?? null,
131
+ });
132
+
133
+ const startTime = performance.now();
134
+
135
+ try {
136
+ let treeOid;
137
+ if (maxMemoryBytes !== undefined) {
138
+ treeOid = await this._rebuildStreaming(ref, { limit, maxMemoryBytes, onFlush, onProgress, signal, frontier });
139
+ } else {
140
+ treeOid = await this._rebuildInMemory(ref, { limit, onProgress, signal, frontier });
141
+ }
142
+
143
+ const durationMs = performance.now() - startTime;
144
+ this.logger.info('Index rebuild complete', {
145
+ operation: 'rebuild',
146
+ ref,
147
+ mode,
148
+ treeOid,
149
+ durationMs,
150
+ });
151
+
152
+ return treeOid;
153
+ } catch (err) {
154
+ const durationMs = performance.now() - startTime;
155
+ this.logger.error('Index rebuild failed', {
156
+ operation: 'rebuild',
157
+ ref,
158
+ mode,
159
+ error: err.message,
160
+ durationMs,
161
+ });
162
+ throw err;
163
+ }
164
+ }
165
+
166
+ /**
167
+ * In-memory rebuild implementation (original behavior).
168
+ *
169
+ * Loads all nodes into memory, builds the complete index, then persists
170
+ * in a single batch. This is the fastest approach but requires O(N) memory
171
+ * where N is the number of nodes.
172
+ *
173
+ * **Memory usage**: Approximately 150-200 bytes per node for the bitmap
174
+ * data structures, plus temporary overhead during serialization.
175
+ *
176
+ * @param {string} ref - Git ref to traverse from
177
+ * @param {Object} options - Options
178
+ * @param {number} options.limit - Maximum nodes to process
179
+ * @param {Function} [options.onProgress] - Progress callback invoked every 10,000 nodes.
180
+ * Receives `{ processedNodes: number, currentMemoryBytes: null }`.
181
+ * @param {AbortSignal} [options.signal] - Abort signal for cancellation. Checked every
182
+ * 10,000 nodes to balance responsiveness with performance.
183
+ * @param {Map<string, string>} [options.frontier] - Frontier to persist with the index
184
+ * @returns {Promise<string>} Tree OID of the persisted index
185
+ * @throws {OperationAbortedError} If the signal is aborted during iteration
186
+ * @throws {Error} If node iteration fails (e.g., invalid ref, Git error)
187
+ * @throws {Error} If index persistence fails (storage error)
188
+ * @private
189
+ */
190
+ async _rebuildInMemory(ref, { limit, onProgress, signal, frontier }) {
191
+ const builder = new BitmapIndexBuilder({ crypto: this._crypto });
192
+ let processedNodes = 0;
193
+
194
+ for await (const node of this.graphService.iterateNodes({ ref, limit })) {
195
+ builder.registerNode(node.sha);
196
+ for (const parentSha of node.parents) {
197
+ builder.addEdge(parentSha, node.sha);
198
+ }
199
+
200
+ processedNodes++;
201
+ if (processedNodes % 10000 === 0) {
202
+ checkAborted(signal, 'rebuild');
203
+ if (onProgress) {
204
+ onProgress({ processedNodes, currentMemoryBytes: null });
205
+ }
206
+ }
207
+ }
208
+
209
+ return await this._persistIndex(builder, { frontier });
210
+ }
211
+
212
+ /**
213
+ * Streaming rebuild implementation with memory-bounded operation.
214
+ *
215
+ * Uses StreamingBitmapIndexBuilder to flush bitmap data to storage when
216
+ * memory usage exceeds the threshold. Multiple chunks are written during
217
+ * iteration, then merged at finalization.
218
+ *
219
+ * **Memory usage**: Bounded by `maxMemoryBytes`. When exceeded, current
220
+ * bitmap data is serialized and flushed to storage, freeing memory for
221
+ * continued iteration.
222
+ *
223
+ * **I/O pattern**: Higher I/O than in-memory mode due to intermediate
224
+ * flushes. Each flush writes partial shards that are later merged.
225
+ *
226
+ * **Trade-offs**: Use streaming mode when:
227
+ * - Graph is too large to fit in memory
228
+ * - Memory is constrained (container limits, shared systems)
229
+ * - You can tolerate longer rebuild times for lower memory usage
230
+ *
231
+ * @param {string} ref - Git ref to traverse from
232
+ * @param {Object} options - Options
233
+ * @param {number} options.limit - Maximum nodes to process
234
+ * @param {number} options.maxMemoryBytes - Memory threshold in bytes. When estimated
235
+ * bitmap memory exceeds this, a flush is triggered.
236
+ * @param {Function} [options.onFlush] - Flush callback invoked after each flush.
237
+ * Receives `{ flushedBytes, totalFlushedBytes, flushCount }`.
238
+ * @param {Function} [options.onProgress] - Progress callback invoked every 10,000 nodes.
239
+ * Receives `{ processedNodes, currentMemoryBytes }`.
240
+ * @param {AbortSignal} [options.signal] - Abort signal for cancellation. Checked every
241
+ * 10,000 nodes during iteration and at finalization.
242
+ * @param {Map<string, string>} [options.frontier] - Frontier to persist with the index
243
+ * @returns {Promise<string>} Tree OID of the persisted index
244
+ * @throws {OperationAbortedError} If the signal is aborted during iteration or finalization
245
+ * @throws {Error} If node iteration fails (e.g., invalid ref, Git error)
246
+ * @throws {Error} If flush or finalization fails (storage error)
247
+ * @private
248
+ */
249
+ async _rebuildStreaming(ref, { limit, maxMemoryBytes, onFlush, onProgress, signal, frontier }) {
250
+ const builder = new StreamingBitmapIndexBuilder({
251
+ storage: this.storage,
252
+ maxMemoryBytes,
253
+ onFlush,
254
+ crypto: this._crypto,
255
+ });
256
+
257
+ let processedNodes = 0;
258
+
259
+ for await (const node of this.graphService.iterateNodes({ ref, limit })) {
260
+ await builder.registerNode(node.sha);
261
+ for (const parentSha of node.parents) {
262
+ await builder.addEdge(parentSha, node.sha);
263
+ }
264
+
265
+ processedNodes++;
266
+ if (processedNodes % 10000 === 0) {
267
+ checkAborted(signal, 'rebuild');
268
+ if (onProgress) {
269
+ const stats = builder.getMemoryStats();
270
+ onProgress({
271
+ processedNodes,
272
+ currentMemoryBytes: stats.estimatedBitmapBytes,
273
+ });
274
+ }
275
+ }
276
+ }
277
+
278
+ return await builder.finalize({ signal, frontier });
279
+ }
280
+
281
+ /**
282
+ * Persists a built index to storage (in-memory builder only).
283
+ *
284
+ * Serializes the builder's state and writes each shard as a blob,
285
+ * then creates a tree containing all shards.
286
+ *
287
+ * **Persistence format**: Creates a flat tree with entries like:
288
+ * - `100644 blob <oid>\tmeta_00.json`
289
+ * - `100644 blob <oid>\tshards_fwd_00.json`
290
+ * - `100644 blob <oid>\tshards_rev_00.json`
291
+ * - `100644 blob <oid>\tfrontier.json` (if frontier provided)
292
+ *
293
+ * @param {BitmapIndexBuilder} builder - The builder containing index data
294
+ * @param {Object} [options] - Persistence options
295
+ * @param {Map<string, string>} [options.frontier] - Frontier to include in the tree
296
+ * @returns {Promise<string>} OID of the created tree
297
+ * @throws {Error} If storage.writeBlob() fails for any shard
298
+ * @throws {Error} If storage.writeTree() fails
299
+ * @private
300
+ */
301
+ async _persistIndex(builder, { frontier } = {}) {
302
+ const treeStructure = await builder.serialize({ frontier });
303
+ const flatEntries = [];
304
+ for (const [path, buffer] of Object.entries(treeStructure)) {
305
+ const oid = await this.storage.writeBlob(buffer);
306
+ flatEntries.push(`100644 blob ${oid}\t${path}`);
307
+ }
308
+ return await this.storage.writeTree(flatEntries);
309
+ }
310
+
311
+ /**
312
+ * Loads a previously built index from a tree OID.
313
+ *
314
+ * **Memory cost**: Lazy loading - only shards accessed are loaded into memory.
315
+ * - Initial load: O(1) - just stores shard OID mappings (~50KB for 256 shards)
316
+ * - Per-query: Loads 1-3 shards on demand (~1-5KB each, cached after first access)
317
+ * - Worst case (all shards loaded): Similar to rebuild memory (~150-200MB for 1M nodes)
318
+ *
319
+ * **Persistence**: Reads from storage. The tree OID can be stored
320
+ * in a ref (e.g., 'refs/warp/index') for persistence across sessions.
321
+ *
322
+ * **Strict Mode** (default: `true`):
323
+ * When `strict` is enabled (fail-closed behavior), the reader will validate
324
+ * shard integrity during loading. If corruption or validation failures are
325
+ * detected, errors are thrown immediately, allowing callers to trigger rebuilds.
326
+ *
327
+ * When `strict` is disabled (graceful degradation), the reader will attempt
328
+ * to continue operation despite integrity issues, which may result in
329
+ * incomplete or incorrect query results.
330
+ *
331
+ * @param {string} treeOid - OID of the index tree (from rebuild() or a saved ref)
332
+ * @param {Object} [options] - Load options
333
+ * @param {boolean} [options.strict=true] - Enable strict integrity verification (fail-closed).
334
+ * When true, throws on any shard validation or corruption errors.
335
+ * When false, attempts graceful degradation.
336
+ * @param {Map<string, string>} [options.currentFrontier] - Frontier to compare for staleness.
337
+ * Maps writer IDs to their current tip SHAs. When provided, triggers a staleness
338
+ * check against the frontier stored in the index.
339
+ * @param {boolean} [options.autoRebuild=false] - Auto-rebuild when a stale index is detected.
340
+ * Requires `rebuildRef` to be set.
341
+ * @param {string} [options.rebuildRef] - Git ref to rebuild from when `autoRebuild` is true.
342
+ * Required if `autoRebuild` is true.
343
+ * @returns {Promise<BitmapIndexReader>} Configured reader ready for O(1) queries.
344
+ * The reader lazily loads shards on demand; initial load is O(1).
345
+ * @throws {Error} If treeOid is invalid or tree cannot be read from storage
346
+ * @throws {Error} If autoRebuild is true but rebuildRef is not provided
347
+ * @throws {ShardValidationError} (strict mode) If shard structure validation fails
348
+ * (e.g., missing required fields, invalid format)
349
+ * @throws {ShardCorruptionError} (strict mode) If shard data integrity check fails
350
+ * (e.g., checksum mismatch, truncated data)
351
+ * @throws {ShardLoadError} (strict mode) If shard cannot be loaded from storage
352
+ * (e.g., blob not found, I/O error)
353
+ *
354
+ * @example
355
+ * // Load with strict integrity checking (default)
356
+ * try {
357
+ * const reader = await service.load(treeOid);
358
+ * } catch (err) {
359
+ * if (err instanceof ShardValidationError || err instanceof ShardCorruptionError) {
360
+ * // Integrity failure - trigger rebuild
361
+ * const newTreeOid = await service.rebuild(ref);
362
+ * const reader = await service.load(newTreeOid);
363
+ * }
364
+ * }
365
+ *
366
+ * @example
367
+ * // Load with graceful degradation (non-strict)
368
+ * const reader = await service.load(treeOid, { strict: false });
369
+ *
370
+ * @example
371
+ * // Load from a saved ref
372
+ * const savedOid = await storage.readRef('refs/warp/index');
373
+ * const reader = await rebuildService.load(savedOid);
374
+ */
375
+ async load(treeOid, { strict = true, currentFrontier, autoRebuild = false, rebuildRef } = {}) {
376
+ this.logger.debug('Loading index', {
377
+ operation: 'load',
378
+ treeOid,
379
+ strict,
380
+ });
381
+
382
+ if (autoRebuild && !rebuildRef) {
383
+ throw new Error('rebuildRef is required when autoRebuild is true');
384
+ }
385
+
386
+ const startTime = performance.now();
387
+ const shardOids = await this.storage.readTreeOids(treeOid);
388
+ const shardCount = Object.keys(shardOids).length;
389
+
390
+ // Staleness check
391
+ if (currentFrontier) {
392
+ const indexFrontier = await loadIndexFrontier(shardOids, this.storage, { codec: this._codec });
393
+ if (indexFrontier) {
394
+ const result = checkStaleness(indexFrontier, currentFrontier);
395
+ if (result.stale) {
396
+ this.logger.warn('Index is stale', {
397
+ operation: 'load',
398
+ reason: result.reason,
399
+ hint: 'Rebuild the index or pass autoRebuild: true',
400
+ });
401
+ if (autoRebuild && rebuildRef) {
402
+ const newTreeOid = await this.rebuild(rebuildRef, { frontier: currentFrontier });
403
+ return await this.load(newTreeOid, { strict });
404
+ }
405
+ }
406
+ } else {
407
+ this.logger.debug('No frontier in index (legacy); skipping staleness check', {
408
+ operation: 'load',
409
+ });
410
+ }
411
+ }
412
+
413
+ const reader = new BitmapIndexReader({ storage: this.storage, strict, logger: this.logger.child({ component: 'BitmapIndexReader' }), crypto: this._crypto });
414
+ reader.setup(shardOids);
415
+
416
+ const durationMs = performance.now() - startTime;
417
+ this.logger.debug('Index loaded', {
418
+ operation: 'load',
419
+ treeOid,
420
+ shardCount,
421
+ durationMs,
422
+ });
423
+
424
+ return reader;
425
+ }
426
+ }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * IndexStalenessChecker - Detects stale bitmap indexes by comparing
3
+ * frontier metadata stored at build time against current writer refs.
4
+ */
5
+
6
+ import defaultCodec from '../utils/defaultCodec.js';
7
+
8
+ /** @private */
9
+ function validateEnvelope(envelope, label) {
10
+ if (!envelope || typeof envelope !== 'object' || !envelope.frontier || typeof envelope.frontier !== 'object') {
11
+ throw new Error(`invalid frontier envelope for ${label}`);
12
+ }
13
+ }
14
+
15
+ /**
16
+ * Loads the frontier from an index tree's shard OIDs.
17
+ *
18
+ * @param {Record<string, string>} shardOids - Map of path → blob OID from readTreeOids
19
+ * @param {import('../../ports/IndexStoragePort.js').default} storage - Storage adapter
20
+ * @param {Object} [options]
21
+ * @param {import('../../ports/CodecPort.js').default} [options.codec] - Codec for deserialization
22
+ * @returns {Promise<Map<string, string>|null>} Frontier map, or null if not present (legacy index)
23
+ */
24
+ export async function loadIndexFrontier(shardOids, storage, { codec } = {}) {
25
+ const c = codec || defaultCodec;
26
+ const cborOid = shardOids['frontier.cbor'];
27
+ if (cborOid) {
28
+ const buffer = await storage.readBlob(cborOid);
29
+ const envelope = c.decode(buffer);
30
+ validateEnvelope(envelope, 'frontier.cbor');
31
+ return new Map(Object.entries(envelope.frontier));
32
+ }
33
+
34
+ const jsonOid = shardOids['frontier.json'];
35
+ if (jsonOid) {
36
+ const buffer = await storage.readBlob(jsonOid);
37
+ const envelope = JSON.parse(buffer.toString('utf-8'));
38
+ validateEnvelope(envelope, 'frontier.json');
39
+ return new Map(Object.entries(envelope.frontier));
40
+ }
41
+
42
+ return null;
43
+ }
44
+
45
+ /**
46
+ * @typedef {Object} StalenessResult
47
+ * @property {boolean} stale - Whether the index is stale
48
+ * @property {string} reason - Human-readable summary
49
+ * @property {string[]} advancedWriters - Writers whose tips changed
50
+ * @property {string[]} newWriters - Writers not in index frontier
51
+ * @property {string[]} removedWriters - Writers in index but not current
52
+ */
53
+
54
+ /** @private */
55
+ function buildReason({ stale, advancedWriters, newWriters, removedWriters }) {
56
+ if (!stale) {
57
+ return 'index is current';
58
+ }
59
+ const parts = [];
60
+ if (advancedWriters.length > 0) {
61
+ parts.push(`${advancedWriters.length} writer(s) advanced`);
62
+ }
63
+ if (newWriters.length > 0) {
64
+ parts.push(`${newWriters.length} new writer(s)`);
65
+ }
66
+ if (removedWriters.length > 0) {
67
+ parts.push(`${removedWriters.length} writer(s) removed`);
68
+ }
69
+ return parts.join(', ');
70
+ }
71
+
72
+ /**
73
+ * Compares index frontier against current frontier to detect staleness.
74
+ *
75
+ * @param {Map<string, string>} indexFrontier - Frontier stored in the index
76
+ * @param {Map<string, string>} currentFrontier - Current frontier from refs
77
+ * @returns {StalenessResult}
78
+ */
79
+ export function checkStaleness(indexFrontier, currentFrontier) {
80
+ const advancedWriters = [];
81
+ const newWriters = [];
82
+ const removedWriters = [];
83
+
84
+ for (const [writerId, tipSha] of currentFrontier) {
85
+ const indexTip = indexFrontier.get(writerId);
86
+ if (indexTip === undefined) {
87
+ newWriters.push(writerId);
88
+ } else if (indexTip !== tipSha) {
89
+ advancedWriters.push(writerId);
90
+ }
91
+ }
92
+
93
+ for (const writerId of indexFrontier.keys()) {
94
+ if (!currentFrontier.has(writerId)) {
95
+ removedWriters.push(writerId);
96
+ }
97
+ }
98
+
99
+ const stale = advancedWriters.length > 0 || newWriters.length > 0 || removedWriters.length > 0;
100
+ const reason = buildReason({ stale, advancedWriters, newWriters, removedWriters });
101
+
102
+ return { stale, reason, advancedWriters, newWriters, removedWriters };
103
+ }