@git-stunts/git-warp 10.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +201 -0
  2. package/NOTICE +16 -0
  3. package/README.md +480 -0
  4. package/SECURITY.md +30 -0
  5. package/bin/git-warp +24 -0
  6. package/bin/warp-graph.js +1574 -0
  7. package/index.d.ts +2366 -0
  8. package/index.js +180 -0
  9. package/package.json +129 -0
  10. package/scripts/install-git-warp.sh +258 -0
  11. package/scripts/uninstall-git-warp.sh +139 -0
  12. package/src/domain/WarpGraph.js +3157 -0
  13. package/src/domain/crdt/Dot.js +160 -0
  14. package/src/domain/crdt/LWW.js +154 -0
  15. package/src/domain/crdt/ORSet.js +371 -0
  16. package/src/domain/crdt/VersionVector.js +222 -0
  17. package/src/domain/entities/GraphNode.js +60 -0
  18. package/src/domain/errors/EmptyMessageError.js +47 -0
  19. package/src/domain/errors/ForkError.js +30 -0
  20. package/src/domain/errors/IndexError.js +23 -0
  21. package/src/domain/errors/OperationAbortedError.js +22 -0
  22. package/src/domain/errors/QueryError.js +39 -0
  23. package/src/domain/errors/SchemaUnsupportedError.js +17 -0
  24. package/src/domain/errors/ShardCorruptionError.js +56 -0
  25. package/src/domain/errors/ShardLoadError.js +57 -0
  26. package/src/domain/errors/ShardValidationError.js +61 -0
  27. package/src/domain/errors/StorageError.js +57 -0
  28. package/src/domain/errors/SyncError.js +30 -0
  29. package/src/domain/errors/TraversalError.js +23 -0
  30. package/src/domain/errors/WarpError.js +31 -0
  31. package/src/domain/errors/WormholeError.js +28 -0
  32. package/src/domain/errors/WriterError.js +39 -0
  33. package/src/domain/errors/index.js +21 -0
  34. package/src/domain/services/AnchorMessageCodec.js +99 -0
  35. package/src/domain/services/BitmapIndexBuilder.js +225 -0
  36. package/src/domain/services/BitmapIndexReader.js +435 -0
  37. package/src/domain/services/BoundaryTransitionRecord.js +463 -0
  38. package/src/domain/services/CheckpointMessageCodec.js +147 -0
  39. package/src/domain/services/CheckpointSerializerV5.js +281 -0
  40. package/src/domain/services/CheckpointService.js +384 -0
  41. package/src/domain/services/CommitDagTraversalService.js +156 -0
  42. package/src/domain/services/DagPathFinding.js +712 -0
  43. package/src/domain/services/DagTopology.js +239 -0
  44. package/src/domain/services/DagTraversal.js +245 -0
  45. package/src/domain/services/Frontier.js +108 -0
  46. package/src/domain/services/GCMetrics.js +101 -0
  47. package/src/domain/services/GCPolicy.js +122 -0
  48. package/src/domain/services/GitLogParser.js +205 -0
  49. package/src/domain/services/HealthCheckService.js +246 -0
  50. package/src/domain/services/HookInstaller.js +326 -0
  51. package/src/domain/services/HttpSyncServer.js +262 -0
  52. package/src/domain/services/IndexRebuildService.js +426 -0
  53. package/src/domain/services/IndexStalenessChecker.js +103 -0
  54. package/src/domain/services/JoinReducer.js +582 -0
  55. package/src/domain/services/KeyCodec.js +113 -0
  56. package/src/domain/services/LegacyAnchorDetector.js +67 -0
  57. package/src/domain/services/LogicalTraversal.js +351 -0
  58. package/src/domain/services/MessageCodecInternal.js +132 -0
  59. package/src/domain/services/MessageSchemaDetector.js +145 -0
  60. package/src/domain/services/MigrationService.js +55 -0
  61. package/src/domain/services/ObserverView.js +265 -0
  62. package/src/domain/services/PatchBuilderV2.js +669 -0
  63. package/src/domain/services/PatchMessageCodec.js +140 -0
  64. package/src/domain/services/ProvenanceIndex.js +337 -0
  65. package/src/domain/services/ProvenancePayload.js +242 -0
  66. package/src/domain/services/QueryBuilder.js +835 -0
  67. package/src/domain/services/StateDiff.js +300 -0
  68. package/src/domain/services/StateSerializerV5.js +156 -0
  69. package/src/domain/services/StreamingBitmapIndexBuilder.js +709 -0
  70. package/src/domain/services/SyncProtocol.js +593 -0
  71. package/src/domain/services/TemporalQuery.js +201 -0
  72. package/src/domain/services/TranslationCost.js +221 -0
  73. package/src/domain/services/TraversalService.js +8 -0
  74. package/src/domain/services/WarpMessageCodec.js +29 -0
  75. package/src/domain/services/WarpStateIndexBuilder.js +127 -0
  76. package/src/domain/services/WormholeService.js +353 -0
  77. package/src/domain/types/TickReceipt.js +285 -0
  78. package/src/domain/types/WarpTypes.js +209 -0
  79. package/src/domain/types/WarpTypesV2.js +200 -0
  80. package/src/domain/utils/CachedValue.js +140 -0
  81. package/src/domain/utils/EventId.js +89 -0
  82. package/src/domain/utils/LRUCache.js +112 -0
  83. package/src/domain/utils/MinHeap.js +114 -0
  84. package/src/domain/utils/RefLayout.js +280 -0
  85. package/src/domain/utils/WriterId.js +205 -0
  86. package/src/domain/utils/cancellation.js +33 -0
  87. package/src/domain/utils/canonicalStringify.js +42 -0
  88. package/src/domain/utils/defaultClock.js +20 -0
  89. package/src/domain/utils/defaultCodec.js +51 -0
  90. package/src/domain/utils/nullLogger.js +21 -0
  91. package/src/domain/utils/roaring.js +181 -0
  92. package/src/domain/utils/shardVersion.js +9 -0
  93. package/src/domain/warp/PatchSession.js +217 -0
  94. package/src/domain/warp/Writer.js +181 -0
  95. package/src/hooks/post-merge.sh +60 -0
  96. package/src/infrastructure/adapters/BunHttpAdapter.js +225 -0
  97. package/src/infrastructure/adapters/ClockAdapter.js +57 -0
  98. package/src/infrastructure/adapters/ConsoleLogger.js +150 -0
  99. package/src/infrastructure/adapters/DenoHttpAdapter.js +230 -0
  100. package/src/infrastructure/adapters/GitGraphAdapter.js +787 -0
  101. package/src/infrastructure/adapters/GlobalClockAdapter.js +5 -0
  102. package/src/infrastructure/adapters/NoOpLogger.js +62 -0
  103. package/src/infrastructure/adapters/NodeCryptoAdapter.js +32 -0
  104. package/src/infrastructure/adapters/NodeHttpAdapter.js +98 -0
  105. package/src/infrastructure/adapters/PerformanceClockAdapter.js +5 -0
  106. package/src/infrastructure/adapters/WebCryptoAdapter.js +121 -0
  107. package/src/infrastructure/codecs/CborCodec.js +384 -0
  108. package/src/ports/BlobPort.js +30 -0
  109. package/src/ports/ClockPort.js +25 -0
  110. package/src/ports/CodecPort.js +25 -0
  111. package/src/ports/CommitPort.js +114 -0
  112. package/src/ports/ConfigPort.js +31 -0
  113. package/src/ports/CryptoPort.js +38 -0
  114. package/src/ports/GraphPersistencePort.js +57 -0
  115. package/src/ports/HttpServerPort.js +25 -0
  116. package/src/ports/IndexStoragePort.js +39 -0
  117. package/src/ports/LoggerPort.js +68 -0
  118. package/src/ports/RefPort.js +51 -0
  119. package/src/ports/TreePort.js +51 -0
  120. package/src/visualization/index.js +26 -0
  121. package/src/visualization/layouts/converters.js +75 -0
  122. package/src/visualization/layouts/elkAdapter.js +86 -0
  123. package/src/visualization/layouts/elkLayout.js +95 -0
  124. package/src/visualization/layouts/index.js +29 -0
  125. package/src/visualization/renderers/ascii/box.js +16 -0
  126. package/src/visualization/renderers/ascii/check.js +271 -0
  127. package/src/visualization/renderers/ascii/colors.js +13 -0
  128. package/src/visualization/renderers/ascii/formatters.js +73 -0
  129. package/src/visualization/renderers/ascii/graph.js +344 -0
  130. package/src/visualization/renderers/ascii/history.js +335 -0
  131. package/src/visualization/renderers/ascii/index.js +14 -0
  132. package/src/visualization/renderers/ascii/info.js +245 -0
  133. package/src/visualization/renderers/ascii/materialize.js +255 -0
  134. package/src/visualization/renderers/ascii/path.js +240 -0
  135. package/src/visualization/renderers/ascii/progress.js +32 -0
  136. package/src/visualization/renderers/ascii/symbols.js +33 -0
  137. package/src/visualization/renderers/ascii/table.js +19 -0
  138. package/src/visualization/renderers/browser/index.js +1 -0
  139. package/src/visualization/renderers/svg/index.js +159 -0
  140. package/src/visualization/utils/ansi.js +14 -0
  141. package/src/visualization/utils/time.js +40 -0
  142. package/src/visualization/utils/truncate.js +40 -0
  143. package/src/visualization/utils/unicode.js +52 -0
@@ -0,0 +1,709 @@
1
+ import defaultCodec from '../utils/defaultCodec.js';
2
+ import ShardCorruptionError from '../errors/ShardCorruptionError.js';
3
+ import ShardValidationError from '../errors/ShardValidationError.js';
4
+ import nullLogger from '../utils/nullLogger.js';
5
+ import { checkAborted } from '../utils/cancellation.js';
6
+ import { getRoaringBitmap32 } from '../utils/roaring.js';
7
+ import { canonicalStringify } from '../utils/canonicalStringify.js';
8
+ import { SHARD_VERSION } from '../utils/shardVersion.js';
9
+
10
+ // Re-export for backwards compatibility
11
+ export { SHARD_VERSION };
12
+
13
+ /**
14
+ * Default memory threshold before flushing (50MB).
15
+ * @const {number}
16
+ */
17
+ const DEFAULT_MAX_MEMORY_BYTES = 50 * 1024 * 1024;
18
+
19
+ /**
20
+ * Estimated bytes per SHA→ID mapping entry.
21
+ * Accounts for: 40-char string (~80 bytes with overhead) + number (8 bytes) + Map overhead.
22
+ * @const {number}
23
+ */
24
+ const BYTES_PER_ID_MAPPING = 120;
25
+
26
+ /**
27
+ * Base overhead per RoaringBitmap32 instance (empty bitmap).
28
+ * @const {number}
29
+ */
30
+ const BITMAP_BASE_OVERHEAD = 64;
31
+
32
+ /**
33
+ * Computes a SHA-256 checksum of the given data.
34
+ * Uses canonical JSON stringification for deterministic output
35
+ * across different JavaScript engines.
36
+ *
37
+ * @param {Object} data - The data object to checksum
38
+ * @param {import('../../ports/CryptoPort.js').default} crypto - CryptoPort instance
39
+ * @returns {Promise<string|null>} Hex-encoded SHA-256 hash
40
+ */
41
+ const computeChecksum = async (data, crypto) => {
42
+ if (!crypto) { return null; }
43
+ const json = canonicalStringify(data);
44
+ return await crypto.hash('sha256', json);
45
+ };
46
+
47
+ /**
48
+ * Streaming bitmap index builder with memory-bounded operation.
49
+ *
50
+ * Unlike {@link BitmapIndexBuilder}, this builder flushes bitmap data to storage
51
+ * periodically when memory usage exceeds a threshold. This enables indexing
52
+ * arbitrarily large graphs without OOM.
53
+ *
54
+ * **Memory Model**:
55
+ * - SHA→ID mappings are kept in memory (required for global ID consistency)
56
+ * - Bitmap data is flushed to storage when threshold exceeded
57
+ * - Flushed chunks are merged at finalization
58
+ *
59
+ * **Trade-offs**:
60
+ * - More I/O operations than in-memory builder
61
+ * - Requires storage adapter (not pure domain)
62
+ * - Merge step at finalization adds overhead
63
+ *
64
+ * @example
65
+ * const builder = new StreamingBitmapIndexBuilder({
66
+ * storage: gitAdapter,
67
+ * maxMemoryBytes: 100 * 1024 * 1024, // 100MB
68
+ * });
69
+ *
70
+ * for await (const node of nodes) {
71
+ * await builder.registerNode(node.sha);
72
+ * for (const parent of node.parents) {
73
+ * await builder.addEdge(parent, node.sha);
74
+ * }
75
+ * }
76
+ *
77
+ * const treeOid = await builder.finalize();
78
+ */
79
+ export default class StreamingBitmapIndexBuilder {
80
+ /**
81
+ * Creates a new StreamingBitmapIndexBuilder instance.
82
+ *
83
+ * @param {Object} options - Configuration options
84
+ * @param {Object} options.storage - Storage adapter implementing IndexStoragePort.
85
+ * Required methods: writeBlob, writeTree, readBlob
86
+ * @param {number} [options.maxMemoryBytes=52428800] - Maximum bitmap memory before flush (default 50MB).
87
+ * Note: SHA→ID mappings are not counted against this limit as they must remain in memory.
88
+ * @param {Function} [options.onFlush] - Optional callback invoked on each flush.
89
+ * Receives { flushedBytes, totalFlushedBytes, flushCount }.
90
+ * @param {import('../../ports/LoggerPort.js').default} [options.logger] - Logger for structured logging.
91
+ * Defaults to NoOpLogger (no logging).
92
+ */
93
+ constructor({ storage, maxMemoryBytes = DEFAULT_MAX_MEMORY_BYTES, onFlush, logger = nullLogger, crypto, codec }) {
94
+ if (!storage) {
95
+ throw new Error('StreamingBitmapIndexBuilder requires a storage adapter');
96
+ }
97
+ if (maxMemoryBytes <= 0) {
98
+ throw new Error('maxMemoryBytes must be a positive number');
99
+ }
100
+
101
+ /** @type {import('../../ports/CryptoPort.js').default} */
102
+ this._crypto = crypto;
103
+
104
+ /** @type {import('../../ports/CodecPort.js').default|undefined} */
105
+ this._codec = codec || defaultCodec;
106
+
107
+ /** @type {Object} */
108
+ this.storage = storage;
109
+
110
+ /** @type {number} */
111
+ this.maxMemoryBytes = maxMemoryBytes;
112
+
113
+ /** @type {Function|undefined} */
114
+ this.onFlush = onFlush;
115
+
116
+ /** @type {import('../../ports/LoggerPort.js').default} */
117
+ this.logger = logger;
118
+
119
+ /** @type {Map<string, number>} SHA → numeric ID (kept in memory) */
120
+ this.shaToId = new Map();
121
+
122
+ /** @type {string[]} ID → SHA reverse mapping (kept in memory) */
123
+ this.idToSha = [];
124
+
125
+ /** @type {Map<string, RoaringBitmap32>} Current in-memory bitmaps */
126
+ this.bitmaps = new Map();
127
+
128
+ /** @type {number} Estimated bytes used by current bitmaps */
129
+ this.estimatedBitmapBytes = 0;
130
+
131
+ /** @type {Map<string, string[]>} path → array of blob OIDs (for multi-chunk shards) */
132
+ this.flushedChunks = new Map();
133
+
134
+ /** @type {number} Total bytes flushed to storage */
135
+ this.totalFlushedBytes = 0;
136
+
137
+ /** @type {number} Number of flush operations performed */
138
+ this.flushCount = 0;
139
+
140
+ /** @type {typeof import('roaring').RoaringBitmap32} Cached constructor */
141
+ this._RoaringBitmap32 = getRoaringBitmap32();
142
+ }
143
+
144
+ /**
145
+ * Registers a node without adding edges.
146
+ *
147
+ * This method assigns a numeric ID to the given SHA if it hasn't been
148
+ * registered before. The ID is used internally for bitmap indexing.
149
+ * If the node has already been registered, returns the existing ID.
150
+ *
151
+ * @param {string} sha - The node's SHA (40-character hex string)
152
+ * @returns {Promise<number>} The assigned numeric ID (0-indexed, monotonically increasing)
153
+ */
154
+ registerNode(sha) {
155
+ return Promise.resolve(this._getOrCreateId(sha));
156
+ }
157
+
158
+ /**
159
+ * Adds a directed edge from source to target node.
160
+ *
161
+ * Creates or updates bitmap entries for both forward (src → tgt) and
162
+ * reverse (tgt → src) edge lookups. Both nodes are automatically registered
163
+ * if not already present.
164
+ *
165
+ * May trigger an automatic flush if memory usage exceeds the configured
166
+ * `maxMemoryBytes` threshold after adding the edge.
167
+ *
168
+ * @param {string} srcSha - Source node SHA (parent, 40-character hex string)
169
+ * @param {string} tgtSha - Target node SHA (child, 40-character hex string)
170
+ * @returns {Promise<void>} Resolves when edge is added (and flushed if necessary)
171
+ * @async
172
+ */
173
+ async addEdge(srcSha, tgtSha) {
174
+ const srcId = this._getOrCreateId(srcSha);
175
+ const tgtId = this._getOrCreateId(tgtSha);
176
+
177
+ this._addToBitmap({ sha: srcSha, id: tgtId, type: 'fwd' });
178
+ this._addToBitmap({ sha: tgtSha, id: srcId, type: 'rev' });
179
+
180
+ // Check if we need to flush
181
+ if (this.estimatedBitmapBytes >= this.maxMemoryBytes) {
182
+ await this.flush();
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Serializes current in-memory bitmaps into a shard structure.
188
+ *
189
+ * Groups bitmaps by type ('fwd' or 'rev') and SHA prefix (first 2 hex chars).
190
+ * Each bitmap is serialized to a portable format and base64-encoded.
191
+ *
192
+ * @returns {{fwd: Object<string, Object<string, string>>, rev: Object<string, Object<string, string>>}}
193
+ * Object with 'fwd' and 'rev' keys, each mapping prefix to SHA→base64Bitmap entries
194
+ * @private
195
+ */
196
+ _serializeBitmapsToShards() {
197
+ const bitmapShards = { fwd: {}, rev: {} };
198
+ for (const [key, bitmap] of this.bitmaps) {
199
+ const type = key.substring(0, 3);
200
+ const sha = key.substring(4);
201
+ const prefix = sha.substring(0, 2);
202
+
203
+ if (!bitmapShards[type][prefix]) {
204
+ bitmapShards[type][prefix] = {};
205
+ }
206
+ bitmapShards[type][prefix][sha] = bitmap.serialize(true).toString('base64');
207
+ }
208
+ return bitmapShards;
209
+ }
210
+
211
+ /**
212
+ * Writes serialized bitmap shards to storage and tracks their OIDs.
213
+ *
214
+ * Each shard is wrapped in a versioned envelope with a checksum before writing.
215
+ * The resulting blob OIDs are tracked in `flushedChunks` for later merging.
216
+ * Writes are performed in parallel for efficiency.
217
+ *
218
+ * @param {{fwd: Object<string, Object<string, string>>, rev: Object<string, Object<string, string>>}} bitmapShards
219
+ * Object with 'fwd' and 'rev' keys containing prefix-grouped bitmap data
220
+ * @returns {Promise<void>} Resolves when all shards have been written
221
+ * @async
222
+ * @private
223
+ */
224
+ async _writeShardsToStorage(bitmapShards) {
225
+ const tasks = [];
226
+
227
+ for (const type of ['fwd', 'rev']) {
228
+ for (const [prefix, shardData] of Object.entries(bitmapShards[type])) {
229
+ const path = `shards_${type}_${prefix}.json`;
230
+ tasks.push(
231
+ computeChecksum(shardData, this._crypto).then(async (checksum) => {
232
+ const envelope = {
233
+ version: SHARD_VERSION,
234
+ checksum,
235
+ data: shardData,
236
+ };
237
+ const buffer = Buffer.from(JSON.stringify(envelope));
238
+ const oid = await this.storage.writeBlob(buffer);
239
+ if (!this.flushedChunks.has(path)) {
240
+ this.flushedChunks.set(path, []);
241
+ }
242
+ this.flushedChunks.get(path).push(oid);
243
+ })
244
+ );
245
+ }
246
+ }
247
+
248
+ await Promise.all(tasks);
249
+ }
250
+
251
+ /**
252
+ * Flushes current bitmap data to storage.
253
+ *
254
+ * Serializes all in-memory bitmaps, writes them as versioned blob chunks,
255
+ * and clears the bitmap map to free memory. SHA→ID mappings are preserved
256
+ * in memory as they are required for global ID consistency.
257
+ *
258
+ * This method is called automatically when memory usage exceeds
259
+ * `maxMemoryBytes`, but can also be called manually to force a flush.
260
+ *
261
+ * If no bitmaps are in memory (e.g., after a previous flush), this
262
+ * method returns immediately without performing any I/O.
263
+ *
264
+ * Invokes the `onFlush` callback (if configured) after successful flush.
265
+ *
266
+ * @returns {Promise<void>} Resolves when flush is complete
267
+ * @async
268
+ */
269
+ async flush() {
270
+ if (this.bitmaps.size === 0) {
271
+ return;
272
+ }
273
+
274
+ const flushedBytes = this.estimatedBitmapBytes;
275
+ const bitmapShards = this._serializeBitmapsToShards();
276
+ await this._writeShardsToStorage(bitmapShards);
277
+
278
+ // Clear bitmaps and reset memory counter
279
+ this.bitmaps.clear();
280
+ this.totalFlushedBytes += flushedBytes;
281
+ this.estimatedBitmapBytes = 0;
282
+ this.flushCount++;
283
+
284
+ this.logger.debug('Flushed bitmap data', {
285
+ operation: 'flush',
286
+ flushedBytes,
287
+ totalFlushedBytes: this.totalFlushedBytes,
288
+ flushCount: this.flushCount,
289
+ });
290
+
291
+ // Invoke callback if provided
292
+ if (this.onFlush) {
293
+ this.onFlush({
294
+ flushedBytes,
295
+ totalFlushedBytes: this.totalFlushedBytes,
296
+ flushCount: this.flushCount,
297
+ });
298
+ }
299
+ }
300
+
301
+ /**
302
+ * Builds meta shards (SHA→ID mappings) grouped by SHA prefix.
303
+ *
304
+ * Groups all registered SHA→ID mappings by the first two hex characters
305
+ * of the SHA. This enables efficient loading of only relevant shards
306
+ * during index reads.
307
+ *
308
+ * @returns {Object<string, Object<string, number>>} Object mapping 2-char hex prefix
309
+ * to objects of SHA→numeric ID mappings
310
+ * @private
311
+ */
312
+ _buildMetaShards() {
313
+ const idShards = {};
314
+ for (const [sha, id] of this.shaToId) {
315
+ const prefix = sha.substring(0, 2);
316
+ if (!idShards[prefix]) {
317
+ idShards[prefix] = {};
318
+ }
319
+ idShards[prefix][sha] = id;
320
+ }
321
+ return idShards;
322
+ }
323
+
324
+ /**
325
+ * Writes meta shards to storage in parallel.
326
+ *
327
+ * Each shard is wrapped in a versioned envelope with checksum before writing.
328
+ * Writes are performed in parallel using Promise.all for efficiency.
329
+ *
330
+ * @param {Object<string, Object<string, number>>} idShards - Object mapping 2-char hex prefix
331
+ * to objects of SHA→numeric ID mappings
332
+ * @returns {Promise<string[]>} Array of Git tree entry strings in format
333
+ * "100644 blob <oid>\tmeta_<prefix>.json"
334
+ * @async
335
+ * @private
336
+ */
337
+ async _writeMetaShards(idShards) {
338
+ return await Promise.all(
339
+ Object.entries(idShards).map(async ([prefix, map]) => {
340
+ const path = `meta_${prefix}.json`;
341
+ const envelope = {
342
+ version: SHARD_VERSION,
343
+ checksum: await computeChecksum(map, this._crypto),
344
+ data: map,
345
+ };
346
+ const buffer = Buffer.from(JSON.stringify(envelope));
347
+ const oid = await this.storage.writeBlob(buffer);
348
+ return `100644 blob ${oid}\t${path}`;
349
+ })
350
+ );
351
+ }
352
+
353
+ /**
354
+ * Processes bitmap shards, merging multiple chunks if necessary.
355
+ *
356
+ * For each shard path, if multiple chunks were flushed during the build,
357
+ * they are merged by ORing their bitmaps together. Single-chunk shards
358
+ * are used directly without merging.
359
+ *
360
+ * Processing is parallelized across shard paths for efficiency.
361
+ *
362
+ * @param {Object} [options] - Options
363
+ * @param {AbortSignal} [options.signal] - Optional AbortSignal for cancellation.
364
+ * If aborted, throws an error with code 'ABORT_ERR'.
365
+ * @returns {Promise<string[]>} Array of Git tree entry strings in format
366
+ * "100644 blob <oid>\tshards_<type>_<prefix>.json"
367
+ * @throws {Error} If the operation is aborted via signal
368
+ * @throws {ShardValidationError} If a chunk has an unsupported version (from _mergeChunks)
369
+ * @throws {ShardCorruptionError} If a chunk's checksum is invalid (from _mergeChunks)
370
+ * @async
371
+ * @private
372
+ */
373
+ async _processBitmapShards({ signal } = {}) {
374
+ return await Promise.all(
375
+ Array.from(this.flushedChunks.entries()).map(async ([path, oids]) => {
376
+ checkAborted(signal, 'processBitmapShards');
377
+ const finalOid = oids.length === 1 ? oids[0] : await this._mergeChunks(oids, { signal });
378
+ return `100644 blob ${finalOid}\t${path}`;
379
+ })
380
+ );
381
+ }
382
+
383
+ /**
384
+ * Finalizes the index and returns the tree OID.
385
+ *
386
+ * Performs the following steps:
387
+ * 1. Flushes any remaining in-memory bitmap data to storage
388
+ * 2. Builds and writes meta shards (SHA→ID mappings) grouped by prefix
389
+ * 3. Merges multi-chunk bitmap shards by ORing bitmaps together
390
+ * 4. Optionally writes frontier metadata for staleness detection
391
+ * 5. Creates and returns the final Git tree containing all shards
392
+ *
393
+ * Meta shards and bitmap shards are processed using Promise.all
394
+ * since they are independent (prefix-based partitioning).
395
+ *
396
+ * The resulting tree structure:
397
+ * ```
398
+ * index-tree/
399
+ * meta_00.json ... meta_ff.json # SHA→ID mappings by prefix
400
+ * shards_fwd_00.json ... shards_fwd_ff.json # Forward edge bitmaps
401
+ * shards_rev_00.json ... shards_rev_ff.json # Reverse edge bitmaps
402
+ * frontier.cbor # Optional: CBOR-encoded frontier
403
+ * frontier.json # Optional: JSON-encoded frontier
404
+ * ```
405
+ *
406
+ * @param {Object} [options] - Finalization options
407
+ * @param {AbortSignal} [options.signal] - Optional AbortSignal for cancellation.
408
+ * If aborted, throws an error with code 'ABORT_ERR'.
409
+ * @param {Map<string, number>} [options.frontier] - Optional version vector frontier
410
+ * (writerId → clock) for staleness detection. If provided, frontier.cbor and
411
+ * frontier.json files are included in the tree.
412
+ * @returns {Promise<string>} OID of the created Git tree containing the complete index
413
+ * @throws {Error} If the operation is aborted via signal
414
+ * @throws {ShardValidationError} If a chunk has an unsupported version during merge
415
+ * @throws {ShardCorruptionError} If a chunk's checksum is invalid during merge
416
+ * @async
417
+ */
418
+ async finalize({ signal, frontier } = {}) {
419
+ this.logger.debug('Finalizing index', {
420
+ operation: 'finalize',
421
+ nodeCount: this.shaToId.size,
422
+ totalFlushedBytes: this.totalFlushedBytes,
423
+ flushCount: this.flushCount,
424
+ });
425
+
426
+ checkAborted(signal, 'finalize');
427
+ await this.flush();
428
+
429
+ checkAborted(signal, 'finalize');
430
+ const idShards = this._buildMetaShards();
431
+ const metaEntries = await this._writeMetaShards(idShards);
432
+
433
+ checkAborted(signal, 'finalize');
434
+ const bitmapEntries = await this._processBitmapShards({ signal });
435
+ const flatEntries = [...metaEntries, ...bitmapEntries];
436
+
437
+ // Store frontier metadata for staleness detection
438
+ if (frontier) {
439
+ const sorted = {};
440
+ for (const key of Array.from(frontier.keys()).sort()) {
441
+ sorted[key] = frontier.get(key);
442
+ }
443
+ const envelope = { version: 1, writerCount: frontier.size, frontier: sorted };
444
+ const cborOid = await this.storage.writeBlob(Buffer.from(this._codec.encode(envelope)));
445
+ flatEntries.push(`100644 blob ${cborOid}\tfrontier.cbor`);
446
+ const jsonOid = await this.storage.writeBlob(Buffer.from(canonicalStringify(envelope)));
447
+ flatEntries.push(`100644 blob ${jsonOid}\tfrontier.json`);
448
+ }
449
+
450
+ const treeOid = await this.storage.writeTree(flatEntries);
451
+
452
+ this.logger.debug('Index finalized', {
453
+ operation: 'finalize',
454
+ treeOid,
455
+ shardCount: flatEntries.length,
456
+ nodeCount: this.shaToId.size,
457
+ });
458
+
459
+ return treeOid;
460
+ }
461
+
462
+ /**
463
+ * Returns current memory statistics for monitoring and debugging.
464
+ *
465
+ * Useful for understanding memory pressure during index building and
466
+ * tuning the `maxMemoryBytes` threshold.
467
+ *
468
+ * @returns {Object} Memory statistics object
469
+ * @property {number} estimatedBitmapBytes - Current estimated size of in-memory bitmaps in bytes.
470
+ * This is an approximation based on bitmap operations; actual memory usage may vary.
471
+ * @property {number} estimatedMappingBytes - Estimated size of SHA→ID mappings in bytes.
472
+ * Calculated as nodeCount * BYTES_PER_ID_MAPPING (120 bytes per entry).
473
+ * @property {number} totalFlushedBytes - Total bytes flushed to storage across all flush operations.
474
+ * @property {number} flushCount - Number of flush operations performed so far.
475
+ * @property {number} nodeCount - Total number of unique nodes registered (by SHA).
476
+ * @property {number} bitmapCount - Number of bitmaps currently held in memory.
477
+ */
478
+ getMemoryStats() {
479
+ return {
480
+ estimatedBitmapBytes: this.estimatedBitmapBytes,
481
+ estimatedMappingBytes: this.shaToId.size * BYTES_PER_ID_MAPPING,
482
+ totalFlushedBytes: this.totalFlushedBytes,
483
+ flushCount: this.flushCount,
484
+ nodeCount: this.shaToId.size,
485
+ bitmapCount: this.bitmaps.size,
486
+ };
487
+ }
488
+
489
+ /**
490
+ * Gets or creates a numeric ID for a SHA.
491
+ *
492
+ * If the SHA has been seen before, returns its existing ID.
493
+ * Otherwise, assigns the next available ID (equal to current array length)
494
+ * and stores the bidirectional mapping.
495
+ *
496
+ * IDs are assigned sequentially starting from 0 in the order nodes are first seen.
497
+ *
498
+ * @param {string} sha - The SHA to look up or register (40-character hex string)
499
+ * @returns {number} The numeric ID (0-indexed, monotonically increasing)
500
+ * @private
501
+ */
502
+ _getOrCreateId(sha) {
503
+ if (this.shaToId.has(sha)) {
504
+ return this.shaToId.get(sha);
505
+ }
506
+ const id = this.idToSha.length;
507
+ this.idToSha.push(sha);
508
+ this.shaToId.set(sha, id);
509
+ return id;
510
+ }
511
+
512
+ /**
513
+ * Adds an ID to a node's bitmap and updates memory estimate.
514
+ *
515
+ * Creates a new RoaringBitmap32 if this is the first edge for the given
516
+ * SHA and type combination. Updates the `estimatedBitmapBytes` counter
517
+ * to track memory usage for automatic flushing.
518
+ *
519
+ * Memory estimation:
520
+ * - New bitmap: adds BITMAP_BASE_OVERHEAD (64 bytes)
521
+ * - New entry in existing bitmap: adds ~4 bytes (approximation)
522
+ *
523
+ * @param {Object} opts - Options object
524
+ * @param {string} opts.sha - The SHA to use as bitmap key (40-character hex string)
525
+ * @param {number} opts.id - The numeric ID to add to the bitmap
526
+ * @param {'fwd'|'rev'} opts.type - Edge direction type: 'fwd' for forward edges
527
+ * (this node's children), 'rev' for reverse edges (this node's parents)
528
+ * @private
529
+ */
530
+ _addToBitmap({ sha, id, type }) {
531
+ const key = `${type}_${sha}`;
532
+ if (!this.bitmaps.has(key)) {
533
+ this.bitmaps.set(key, new this._RoaringBitmap32());
534
+ this.estimatedBitmapBytes += BITMAP_BASE_OVERHEAD;
535
+ }
536
+
537
+ const bitmap = this.bitmaps.get(key);
538
+ const sizeBefore = bitmap.size;
539
+ bitmap.add(id);
540
+ const sizeAfter = bitmap.size;
541
+
542
+ // Estimate ~4 bytes per new entry (approximation; actual Roaring compression varies widely based on data distribution)
543
+ if (sizeAfter > sizeBefore) {
544
+ this.estimatedBitmapBytes += 4;
545
+ }
546
+ }
547
+
548
+ /**
549
+ * Loads a chunk from storage, parses JSON, and validates version and checksum.
550
+ *
551
+ * Performs the following validation steps:
552
+ * 1. Reads blob from storage by OID
553
+ * 2. Parses JSON envelope (throws ShardCorruptionError if invalid)
554
+ * 3. Validates version matches SHARD_VERSION (throws ShardValidationError if mismatch)
555
+ * 4. Recomputes and validates checksum (throws ShardCorruptionError if mismatch)
556
+ *
557
+ * @param {string} oid - Git blob OID of the chunk to load (40-character hex string)
558
+ * @returns {Promise<Object<string, string>>} The validated chunk data (SHA→base64Bitmap mappings)
559
+ * @throws {ShardCorruptionError} If the chunk cannot be parsed as JSON or checksum is invalid.
560
+ * Error context includes: oid, reason ('invalid_format' or 'invalid_checksum'), originalError
561
+ * @throws {ShardValidationError} If the chunk has an unsupported version.
562
+ * Error context includes: oid, expected version, actual version, field
563
+ * @async
564
+ * @private
565
+ */
566
+ async _loadAndValidateChunk(oid) {
567
+ const buffer = await this.storage.readBlob(oid);
568
+ let envelope;
569
+ try {
570
+ envelope = JSON.parse(buffer.toString('utf-8'));
571
+ } catch (err) {
572
+ throw new ShardCorruptionError('Failed to parse shard JSON', {
573
+ oid,
574
+ reason: 'invalid_format',
575
+ originalError: err.message,
576
+ });
577
+ }
578
+
579
+ // Validate version
580
+ if (envelope.version !== SHARD_VERSION) {
581
+ throw new ShardValidationError('Shard version mismatch', {
582
+ oid,
583
+ expected: SHARD_VERSION,
584
+ actual: envelope.version,
585
+ field: 'version',
586
+ });
587
+ }
588
+
589
+ // Validate checksum
590
+ const expectedChecksum = await computeChecksum(envelope.data, this._crypto);
591
+ if (envelope.checksum !== expectedChecksum) {
592
+ throw new ShardCorruptionError('Shard checksum mismatch', {
593
+ oid,
594
+ reason: 'invalid_checksum',
595
+ context: {
596
+ expected: expectedChecksum,
597
+ actual: envelope.checksum,
598
+ },
599
+ });
600
+ }
601
+
602
+ return envelope.data;
603
+ }
604
+
605
+ /**
606
+ * Deserializes a base64-encoded bitmap and merges it into the merged object.
607
+ *
608
+ * If no bitmap exists for the SHA in the merged object, the deserialized bitmap
609
+ * is stored directly. If a bitmap already exists, the new bitmap is ORed into
610
+ * it using `orInPlace` to combine edge sets.
611
+ *
612
+ * @param {Object} opts - Options object
613
+ * @param {Object<string, RoaringBitmap32>} opts.merged - Object mapping SHA to
614
+ * RoaringBitmap32 instances (mutated in place)
615
+ * @param {string} opts.sha - The SHA key for this bitmap (40-character hex string)
616
+ * @param {string} opts.base64Bitmap - Base64-encoded serialized RoaringBitmap32 data
617
+ * @param {string} opts.oid - Git blob OID of the source chunk (for error reporting)
618
+ * @throws {ShardCorruptionError} If the bitmap cannot be deserialized from base64.
619
+ * Error context includes: oid, reason ('invalid_bitmap'), originalError
620
+ * @private
621
+ */
622
+ _mergeDeserializedBitmap({ merged, sha, base64Bitmap, oid }) {
623
+ let bitmap;
624
+ try {
625
+ bitmap = this._RoaringBitmap32.deserialize(Buffer.from(base64Bitmap, 'base64'), true);
626
+ } catch (err) {
627
+ throw new ShardCorruptionError('Failed to deserialize bitmap', {
628
+ oid,
629
+ reason: 'invalid_bitmap',
630
+ originalError: err.message,
631
+ });
632
+ }
633
+
634
+ if (!merged[sha]) {
635
+ merged[sha] = bitmap;
636
+ } else {
637
+ // OR the bitmaps together
638
+ merged[sha].orInPlace(bitmap);
639
+ }
640
+ }
641
+
642
+ /**
643
+ * Merges multiple shard chunks by ORing their bitmaps together.
644
+ *
645
+ * This is called during finalization when a shard path has multiple flushed
646
+ * chunks that need to be combined. Each chunk is loaded, validated, and its
647
+ * bitmaps are ORed together by SHA key.
648
+ *
649
+ * The merge process:
650
+ * 1. Iterates through each chunk OID
651
+ * 2. Loads and validates each chunk (version + checksum)
652
+ * 3. Deserializes bitmaps and ORs them together by SHA
653
+ * 4. Serializes the merged result with new checksum
654
+ * 5. Writes the merged blob to storage
655
+ *
656
+ * Supports cancellation via AbortSignal between chunk processing iterations.
657
+ *
658
+ * @param {string[]} oids - Git blob OIDs of chunks to merge (40-character hex strings)
659
+ * @param {Object} [options] - Options object
660
+ * @param {AbortSignal} [options.signal] - Optional AbortSignal for cancellation.
661
+ * Checked between chunk iterations; if aborted, throws with code 'ABORT_ERR'.
662
+ * @returns {Promise<string>} Git blob OID of the merged shard (40-character hex string)
663
+ * @throws {Error} If the operation is aborted via signal
664
+ * @throws {ShardValidationError} If a chunk has an unsupported version.
665
+ * Contains context: oid, expected version, actual version
666
+ * @throws {ShardCorruptionError} If a chunk's checksum does not match, JSON parsing fails,
667
+ * bitmap deserialization fails, or final serialization fails.
668
+ * Contains context: oid/reason and relevant details
669
+ * @async
670
+ * @private
671
+ */
672
+ async _mergeChunks(oids, { signal } = {}) {
673
+ // Load all chunks and merge bitmaps by SHA
674
+ const merged = {};
675
+
676
+ for (const oid of oids) {
677
+ checkAborted(signal, 'mergeChunks');
678
+ const chunk = await this._loadAndValidateChunk(oid);
679
+
680
+ for (const [sha, base64Bitmap] of Object.entries(chunk)) {
681
+ this._mergeDeserializedBitmap({ merged, sha, base64Bitmap, oid });
682
+ }
683
+ }
684
+
685
+ // Serialize merged result
686
+ const result = {};
687
+ for (const [sha, bitmap] of Object.entries(merged)) {
688
+ result[sha] = bitmap.serialize(true).toString('base64');
689
+ }
690
+
691
+ // Wrap merged result in envelope with version and checksum
692
+ const mergedEnvelope = {
693
+ version: SHARD_VERSION,
694
+ checksum: await computeChecksum(result, this._crypto),
695
+ data: result,
696
+ };
697
+
698
+ let serialized;
699
+ try {
700
+ serialized = Buffer.from(JSON.stringify(mergedEnvelope));
701
+ } catch (err) {
702
+ throw new ShardCorruptionError('Failed to serialize merged shard', {
703
+ reason: 'serialization_error',
704
+ originalError: err.message,
705
+ });
706
+ }
707
+ return this.storage.writeBlob(serialized);
708
+ }
709
+ }