@git-stunts/git-warp 10.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/NOTICE +16 -0
- package/README.md +480 -0
- package/SECURITY.md +30 -0
- package/bin/git-warp +24 -0
- package/bin/warp-graph.js +1574 -0
- package/index.d.ts +2366 -0
- package/index.js +180 -0
- package/package.json +129 -0
- package/scripts/install-git-warp.sh +258 -0
- package/scripts/uninstall-git-warp.sh +139 -0
- package/src/domain/WarpGraph.js +3157 -0
- package/src/domain/crdt/Dot.js +160 -0
- package/src/domain/crdt/LWW.js +154 -0
- package/src/domain/crdt/ORSet.js +371 -0
- package/src/domain/crdt/VersionVector.js +222 -0
- package/src/domain/entities/GraphNode.js +60 -0
- package/src/domain/errors/EmptyMessageError.js +47 -0
- package/src/domain/errors/ForkError.js +30 -0
- package/src/domain/errors/IndexError.js +23 -0
- package/src/domain/errors/OperationAbortedError.js +22 -0
- package/src/domain/errors/QueryError.js +39 -0
- package/src/domain/errors/SchemaUnsupportedError.js +17 -0
- package/src/domain/errors/ShardCorruptionError.js +56 -0
- package/src/domain/errors/ShardLoadError.js +57 -0
- package/src/domain/errors/ShardValidationError.js +61 -0
- package/src/domain/errors/StorageError.js +57 -0
- package/src/domain/errors/SyncError.js +30 -0
- package/src/domain/errors/TraversalError.js +23 -0
- package/src/domain/errors/WarpError.js +31 -0
- package/src/domain/errors/WormholeError.js +28 -0
- package/src/domain/errors/WriterError.js +39 -0
- package/src/domain/errors/index.js +21 -0
- package/src/domain/services/AnchorMessageCodec.js +99 -0
- package/src/domain/services/BitmapIndexBuilder.js +225 -0
- package/src/domain/services/BitmapIndexReader.js +435 -0
- package/src/domain/services/BoundaryTransitionRecord.js +463 -0
- package/src/domain/services/CheckpointMessageCodec.js +147 -0
- package/src/domain/services/CheckpointSerializerV5.js +281 -0
- package/src/domain/services/CheckpointService.js +384 -0
- package/src/domain/services/CommitDagTraversalService.js +156 -0
- package/src/domain/services/DagPathFinding.js +712 -0
- package/src/domain/services/DagTopology.js +239 -0
- package/src/domain/services/DagTraversal.js +245 -0
- package/src/domain/services/Frontier.js +108 -0
- package/src/domain/services/GCMetrics.js +101 -0
- package/src/domain/services/GCPolicy.js +122 -0
- package/src/domain/services/GitLogParser.js +205 -0
- package/src/domain/services/HealthCheckService.js +246 -0
- package/src/domain/services/HookInstaller.js +326 -0
- package/src/domain/services/HttpSyncServer.js +262 -0
- package/src/domain/services/IndexRebuildService.js +426 -0
- package/src/domain/services/IndexStalenessChecker.js +103 -0
- package/src/domain/services/JoinReducer.js +582 -0
- package/src/domain/services/KeyCodec.js +113 -0
- package/src/domain/services/LegacyAnchorDetector.js +67 -0
- package/src/domain/services/LogicalTraversal.js +351 -0
- package/src/domain/services/MessageCodecInternal.js +132 -0
- package/src/domain/services/MessageSchemaDetector.js +145 -0
- package/src/domain/services/MigrationService.js +55 -0
- package/src/domain/services/ObserverView.js +265 -0
- package/src/domain/services/PatchBuilderV2.js +669 -0
- package/src/domain/services/PatchMessageCodec.js +140 -0
- package/src/domain/services/ProvenanceIndex.js +337 -0
- package/src/domain/services/ProvenancePayload.js +242 -0
- package/src/domain/services/QueryBuilder.js +835 -0
- package/src/domain/services/StateDiff.js +300 -0
- package/src/domain/services/StateSerializerV5.js +156 -0
- package/src/domain/services/StreamingBitmapIndexBuilder.js +709 -0
- package/src/domain/services/SyncProtocol.js +593 -0
- package/src/domain/services/TemporalQuery.js +201 -0
- package/src/domain/services/TranslationCost.js +221 -0
- package/src/domain/services/TraversalService.js +8 -0
- package/src/domain/services/WarpMessageCodec.js +29 -0
- package/src/domain/services/WarpStateIndexBuilder.js +127 -0
- package/src/domain/services/WormholeService.js +353 -0
- package/src/domain/types/TickReceipt.js +285 -0
- package/src/domain/types/WarpTypes.js +209 -0
- package/src/domain/types/WarpTypesV2.js +200 -0
- package/src/domain/utils/CachedValue.js +140 -0
- package/src/domain/utils/EventId.js +89 -0
- package/src/domain/utils/LRUCache.js +112 -0
- package/src/domain/utils/MinHeap.js +114 -0
- package/src/domain/utils/RefLayout.js +280 -0
- package/src/domain/utils/WriterId.js +205 -0
- package/src/domain/utils/cancellation.js +33 -0
- package/src/domain/utils/canonicalStringify.js +42 -0
- package/src/domain/utils/defaultClock.js +20 -0
- package/src/domain/utils/defaultCodec.js +51 -0
- package/src/domain/utils/nullLogger.js +21 -0
- package/src/domain/utils/roaring.js +181 -0
- package/src/domain/utils/shardVersion.js +9 -0
- package/src/domain/warp/PatchSession.js +217 -0
- package/src/domain/warp/Writer.js +181 -0
- package/src/hooks/post-merge.sh +60 -0
- package/src/infrastructure/adapters/BunHttpAdapter.js +225 -0
- package/src/infrastructure/adapters/ClockAdapter.js +57 -0
- package/src/infrastructure/adapters/ConsoleLogger.js +150 -0
- package/src/infrastructure/adapters/DenoHttpAdapter.js +230 -0
- package/src/infrastructure/adapters/GitGraphAdapter.js +787 -0
- package/src/infrastructure/adapters/GlobalClockAdapter.js +5 -0
- package/src/infrastructure/adapters/NoOpLogger.js +62 -0
- package/src/infrastructure/adapters/NodeCryptoAdapter.js +32 -0
- package/src/infrastructure/adapters/NodeHttpAdapter.js +98 -0
- package/src/infrastructure/adapters/PerformanceClockAdapter.js +5 -0
- package/src/infrastructure/adapters/WebCryptoAdapter.js +121 -0
- package/src/infrastructure/codecs/CborCodec.js +384 -0
- package/src/ports/BlobPort.js +30 -0
- package/src/ports/ClockPort.js +25 -0
- package/src/ports/CodecPort.js +25 -0
- package/src/ports/CommitPort.js +114 -0
- package/src/ports/ConfigPort.js +31 -0
- package/src/ports/CryptoPort.js +38 -0
- package/src/ports/GraphPersistencePort.js +57 -0
- package/src/ports/HttpServerPort.js +25 -0
- package/src/ports/IndexStoragePort.js +39 -0
- package/src/ports/LoggerPort.js +68 -0
- package/src/ports/RefPort.js +51 -0
- package/src/ports/TreePort.js +51 -0
- package/src/visualization/index.js +26 -0
- package/src/visualization/layouts/converters.js +75 -0
- package/src/visualization/layouts/elkAdapter.js +86 -0
- package/src/visualization/layouts/elkLayout.js +95 -0
- package/src/visualization/layouts/index.js +29 -0
- package/src/visualization/renderers/ascii/box.js +16 -0
- package/src/visualization/renderers/ascii/check.js +271 -0
- package/src/visualization/renderers/ascii/colors.js +13 -0
- package/src/visualization/renderers/ascii/formatters.js +73 -0
- package/src/visualization/renderers/ascii/graph.js +344 -0
- package/src/visualization/renderers/ascii/history.js +335 -0
- package/src/visualization/renderers/ascii/index.js +14 -0
- package/src/visualization/renderers/ascii/info.js +245 -0
- package/src/visualization/renderers/ascii/materialize.js +255 -0
- package/src/visualization/renderers/ascii/path.js +240 -0
- package/src/visualization/renderers/ascii/progress.js +32 -0
- package/src/visualization/renderers/ascii/symbols.js +33 -0
- package/src/visualization/renderers/ascii/table.js +19 -0
- package/src/visualization/renderers/browser/index.js +1 -0
- package/src/visualization/renderers/svg/index.js +159 -0
- package/src/visualization/utils/ansi.js +14 -0
- package/src/visualization/utils/time.js +40 -0
- package/src/visualization/utils/truncate.js +40 -0
- package/src/visualization/utils/unicode.js +52 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
import defaultCodec from '../utils/defaultCodec.js';
|
|
2
|
+
import BitmapIndexBuilder from './BitmapIndexBuilder.js';
|
|
3
|
+
import BitmapIndexReader from './BitmapIndexReader.js';
|
|
4
|
+
import StreamingBitmapIndexBuilder from './StreamingBitmapIndexBuilder.js';
|
|
5
|
+
import { loadIndexFrontier, checkStaleness } from './IndexStalenessChecker.js';
|
|
6
|
+
import nullLogger from '../utils/nullLogger.js';
|
|
7
|
+
import { checkAborted } from '../utils/cancellation.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Service for building and loading the bitmap index from the graph.
|
|
11
|
+
*
|
|
12
|
+
* This service orchestrates index creation by walking the graph and persisting
|
|
13
|
+
* the resulting bitmap shards to storage via the IndexStoragePort. The bitmap
|
|
14
|
+
* index enables O(1) neighbor lookups (children/parents) after a one-time
|
|
15
|
+
* O(N) rebuild cost.
|
|
16
|
+
*
|
|
17
|
+
* **Build Modes**:
|
|
18
|
+
* - **In-memory** (default): Fast, but requires O(N) memory. Best for graphs
|
|
19
|
+
* under ~1M nodes or systems with ample RAM.
|
|
20
|
+
* - **Streaming**: Memory-bounded, flushes to storage periodically. Required
|
|
21
|
+
* for very large graphs that exceed available memory.
|
|
22
|
+
*
|
|
23
|
+
* **Index Structure**: The index is stored as a Git tree containing:
|
|
24
|
+
* - `meta_XX.json`: SHA-to-numeric-ID mappings (256 shards by SHA prefix)
|
|
25
|
+
* - `shards_fwd_XX.json`: Forward edge bitmaps (for child lookups)
|
|
26
|
+
* - `shards_rev_XX.json`: Reverse edge bitmaps (for parent lookups)
|
|
27
|
+
* - `frontier.json`: Writer frontier snapshot (for staleness detection)
|
|
28
|
+
*
|
|
29
|
+
* **Staleness Detection**: The index stores the frontier at build time.
|
|
30
|
+
* On load, the current frontier can be compared to detect if new patches
|
|
31
|
+
* have been written since the index was built.
|
|
32
|
+
*
|
|
33
|
+
* @module domain/services/IndexRebuildService
|
|
34
|
+
* @see BitmapIndexBuilder
|
|
35
|
+
* @see BitmapIndexReader
|
|
36
|
+
* @see StreamingBitmapIndexBuilder
|
|
37
|
+
*/
|
|
38
|
+
export default class IndexRebuildService {
|
|
39
|
+
/**
|
|
40
|
+
* Creates an IndexRebuildService instance.
|
|
41
|
+
*
|
|
42
|
+
* @param {Object} options - Configuration options
|
|
43
|
+
* @param {Object} options.graphService - Graph service providing node iteration.
|
|
44
|
+
* Must implement `iterateNodes({ ref, limit }) => AsyncGenerator<GraphNode>`.
|
|
45
|
+
* @param {import('../../ports/IndexStoragePort.js').default} options.storage - Storage adapter
|
|
46
|
+
* for persisting index blobs and trees. Typically GitGraphAdapter.
|
|
47
|
+
* @param {import('../../ports/LoggerPort.js').default} [options.logger] - Logger for
|
|
48
|
+
* structured logging. Defaults to null logger (no logging).
|
|
49
|
+
* @param {import('../../ports/CryptoPort.js').default} [options.crypto] - Crypto adapter for checksums
|
|
50
|
+
* @throws {Error} If graphService is not provided
|
|
51
|
+
* @throws {Error} If storage adapter is not provided
|
|
52
|
+
*/
|
|
53
|
+
constructor({ graphService, storage, logger = nullLogger, codec, crypto }) {
|
|
54
|
+
if (!graphService) {
|
|
55
|
+
throw new Error('IndexRebuildService requires a graphService');
|
|
56
|
+
}
|
|
57
|
+
if (!storage) {
|
|
58
|
+
throw new Error('IndexRebuildService requires a storage adapter');
|
|
59
|
+
}
|
|
60
|
+
this.graphService = graphService;
|
|
61
|
+
this.storage = storage;
|
|
62
|
+
this.logger = logger;
|
|
63
|
+
/** @type {import('../../ports/CodecPort.js').default|undefined} */
|
|
64
|
+
this._codec = codec || defaultCodec;
|
|
65
|
+
/** @type {import('../../ports/CryptoPort.js').default|undefined} */
|
|
66
|
+
this._crypto = crypto;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Rebuilds the bitmap index by walking the graph from a ref.
|
|
71
|
+
*
|
|
72
|
+
* **Build Modes**:
|
|
73
|
+
*
|
|
74
|
+
* *In-memory mode* (default, when `maxMemoryBytes` not specified):
|
|
75
|
+
* - Fastest option, single pass with bulk serialization at end
|
|
76
|
+
* - Memory: O(N) where N is number of nodes (~150-200MB for 1M nodes)
|
|
77
|
+
*
|
|
78
|
+
* *Streaming mode* (when `maxMemoryBytes` is specified):
|
|
79
|
+
* - Memory-bounded operation for very large graphs
|
|
80
|
+
* - Flushes bitmap data to storage when threshold exceeded
|
|
81
|
+
* - Merges chunks at finalization
|
|
82
|
+
* - More I/O operations, but constant memory ceiling
|
|
83
|
+
*
|
|
84
|
+
* **Persistence**: Creates a Git tree containing sharded JSON blobs:
|
|
85
|
+
* - `meta_XX.json`: SHA→ID mappings (256 shards by SHA prefix)
|
|
86
|
+
* - `shards_fwd_XX.json`: Forward edge bitmaps (child lookups)
|
|
87
|
+
* - `shards_rev_XX.json`: Reverse edge bitmaps (parent lookups)
|
|
88
|
+
*
|
|
89
|
+
* @param {string} ref - Git ref to start traversal from (e.g., 'HEAD', branch name, SHA)
|
|
90
|
+
* @param {Object} [options] - Rebuild options
|
|
91
|
+
* @param {number} [options.limit=10000000] - Maximum nodes to process (1 to 10,000,000)
|
|
92
|
+
* @param {number} [options.maxMemoryBytes] - Enable streaming mode with this memory threshold.
|
|
93
|
+
* When bitmap memory exceeds this value, data is flushed to storage.
|
|
94
|
+
* Recommended: 50-100MB for most systems. Minimum: 1MB.
|
|
95
|
+
* @param {Function} [options.onFlush] - Callback invoked on each flush (streaming mode only).
|
|
96
|
+
* Receives { flushedBytes, totalFlushedBytes, flushCount }.
|
|
97
|
+
* @param {Function} [options.onProgress] - Callback invoked periodically during processing.
|
|
98
|
+
* Receives { processedNodes, currentMemoryBytes }.
|
|
99
|
+
* @param {AbortSignal} [options.signal] - Optional AbortSignal for cancellation support.
|
|
100
|
+
* When aborted, throws OperationAbortedError at the next loop boundary.
|
|
101
|
+
* @param {Map<string, string>} [options.frontier] - Frontier to persist alongside the rebuilt index.
|
|
102
|
+
* Maps writer IDs to their tip SHAs; stored in the index tree for staleness detection.
|
|
103
|
+
* @returns {Promise<string>} OID of the created tree containing the index
|
|
104
|
+
* @throws {Error} If maxMemoryBytes is specified but not positive
|
|
105
|
+
* @throws {OperationAbortedError} If the signal is aborted during rebuild
|
|
106
|
+
* @throws {Error} If graphService.iterateNodes() fails (e.g., invalid ref)
|
|
107
|
+
* @throws {Error} If storage.writeBlob() or storage.writeTree() fails
|
|
108
|
+
*
|
|
109
|
+
* @example
|
|
110
|
+
* // In-memory rebuild (default, fast)
|
|
111
|
+
* const treeOid = await rebuildService.rebuild('HEAD');
|
|
112
|
+
*
|
|
113
|
+
* @example
|
|
114
|
+
* // Streaming rebuild with 50MB memory limit
|
|
115
|
+
* const treeOid = await rebuildService.rebuild('HEAD', {
|
|
116
|
+
* maxMemoryBytes: 50 * 1024 * 1024,
|
|
117
|
+
* onFlush: ({ flushCount }) => console.log(`Flush #${flushCount}`),
|
|
118
|
+
* });
|
|
119
|
+
*/
|
|
120
|
+
async rebuild(ref, { limit = 10_000_000, maxMemoryBytes, onFlush, onProgress, signal, frontier } = {}) {
|
|
121
|
+
if (maxMemoryBytes !== undefined && maxMemoryBytes <= 0) {
|
|
122
|
+
throw new Error('maxMemoryBytes must be a positive number');
|
|
123
|
+
}
|
|
124
|
+
const mode = maxMemoryBytes !== undefined ? 'streaming' : 'in-memory';
|
|
125
|
+
this.logger.info('Starting index rebuild', {
|
|
126
|
+
operation: 'rebuild',
|
|
127
|
+
ref,
|
|
128
|
+
limit,
|
|
129
|
+
mode,
|
|
130
|
+
maxMemoryBytes: maxMemoryBytes ?? null,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const startTime = performance.now();
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
let treeOid;
|
|
137
|
+
if (maxMemoryBytes !== undefined) {
|
|
138
|
+
treeOid = await this._rebuildStreaming(ref, { limit, maxMemoryBytes, onFlush, onProgress, signal, frontier });
|
|
139
|
+
} else {
|
|
140
|
+
treeOid = await this._rebuildInMemory(ref, { limit, onProgress, signal, frontier });
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const durationMs = performance.now() - startTime;
|
|
144
|
+
this.logger.info('Index rebuild complete', {
|
|
145
|
+
operation: 'rebuild',
|
|
146
|
+
ref,
|
|
147
|
+
mode,
|
|
148
|
+
treeOid,
|
|
149
|
+
durationMs,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
return treeOid;
|
|
153
|
+
} catch (err) {
|
|
154
|
+
const durationMs = performance.now() - startTime;
|
|
155
|
+
this.logger.error('Index rebuild failed', {
|
|
156
|
+
operation: 'rebuild',
|
|
157
|
+
ref,
|
|
158
|
+
mode,
|
|
159
|
+
error: err.message,
|
|
160
|
+
durationMs,
|
|
161
|
+
});
|
|
162
|
+
throw err;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* In-memory rebuild implementation (original behavior).
|
|
168
|
+
*
|
|
169
|
+
* Loads all nodes into memory, builds the complete index, then persists
|
|
170
|
+
* in a single batch. This is the fastest approach but requires O(N) memory
|
|
171
|
+
* where N is the number of nodes.
|
|
172
|
+
*
|
|
173
|
+
* **Memory usage**: Approximately 150-200 bytes per node for the bitmap
|
|
174
|
+
* data structures, plus temporary overhead during serialization.
|
|
175
|
+
*
|
|
176
|
+
* @param {string} ref - Git ref to traverse from
|
|
177
|
+
* @param {Object} options - Options
|
|
178
|
+
* @param {number} options.limit - Maximum nodes to process
|
|
179
|
+
* @param {Function} [options.onProgress] - Progress callback invoked every 10,000 nodes.
|
|
180
|
+
* Receives `{ processedNodes: number, currentMemoryBytes: null }`.
|
|
181
|
+
* @param {AbortSignal} [options.signal] - Abort signal for cancellation. Checked every
|
|
182
|
+
* 10,000 nodes to balance responsiveness with performance.
|
|
183
|
+
* @param {Map<string, string>} [options.frontier] - Frontier to persist with the index
|
|
184
|
+
* @returns {Promise<string>} Tree OID of the persisted index
|
|
185
|
+
* @throws {OperationAbortedError} If the signal is aborted during iteration
|
|
186
|
+
* @throws {Error} If node iteration fails (e.g., invalid ref, Git error)
|
|
187
|
+
* @throws {Error} If index persistence fails (storage error)
|
|
188
|
+
* @private
|
|
189
|
+
*/
|
|
190
|
+
async _rebuildInMemory(ref, { limit, onProgress, signal, frontier }) {
|
|
191
|
+
const builder = new BitmapIndexBuilder({ crypto: this._crypto });
|
|
192
|
+
let processedNodes = 0;
|
|
193
|
+
|
|
194
|
+
for await (const node of this.graphService.iterateNodes({ ref, limit })) {
|
|
195
|
+
builder.registerNode(node.sha);
|
|
196
|
+
for (const parentSha of node.parents) {
|
|
197
|
+
builder.addEdge(parentSha, node.sha);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
processedNodes++;
|
|
201
|
+
if (processedNodes % 10000 === 0) {
|
|
202
|
+
checkAborted(signal, 'rebuild');
|
|
203
|
+
if (onProgress) {
|
|
204
|
+
onProgress({ processedNodes, currentMemoryBytes: null });
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return await this._persistIndex(builder, { frontier });
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Streaming rebuild implementation with memory-bounded operation.
|
|
214
|
+
*
|
|
215
|
+
* Uses StreamingBitmapIndexBuilder to flush bitmap data to storage when
|
|
216
|
+
* memory usage exceeds the threshold. Multiple chunks are written during
|
|
217
|
+
* iteration, then merged at finalization.
|
|
218
|
+
*
|
|
219
|
+
* **Memory usage**: Bounded by `maxMemoryBytes`. When exceeded, current
|
|
220
|
+
* bitmap data is serialized and flushed to storage, freeing memory for
|
|
221
|
+
* continued iteration.
|
|
222
|
+
*
|
|
223
|
+
* **I/O pattern**: Higher I/O than in-memory mode due to intermediate
|
|
224
|
+
* flushes. Each flush writes partial shards that are later merged.
|
|
225
|
+
*
|
|
226
|
+
* **Trade-offs**: Use streaming mode when:
|
|
227
|
+
* - Graph is too large to fit in memory
|
|
228
|
+
* - Memory is constrained (container limits, shared systems)
|
|
229
|
+
* - You can tolerate longer rebuild times for lower memory usage
|
|
230
|
+
*
|
|
231
|
+
* @param {string} ref - Git ref to traverse from
|
|
232
|
+
* @param {Object} options - Options
|
|
233
|
+
* @param {number} options.limit - Maximum nodes to process
|
|
234
|
+
* @param {number} options.maxMemoryBytes - Memory threshold in bytes. When estimated
|
|
235
|
+
* bitmap memory exceeds this, a flush is triggered.
|
|
236
|
+
* @param {Function} [options.onFlush] - Flush callback invoked after each flush.
|
|
237
|
+
* Receives `{ flushedBytes, totalFlushedBytes, flushCount }`.
|
|
238
|
+
* @param {Function} [options.onProgress] - Progress callback invoked every 10,000 nodes.
|
|
239
|
+
* Receives `{ processedNodes, currentMemoryBytes }`.
|
|
240
|
+
* @param {AbortSignal} [options.signal] - Abort signal for cancellation. Checked every
|
|
241
|
+
* 10,000 nodes during iteration and at finalization.
|
|
242
|
+
* @param {Map<string, string>} [options.frontier] - Frontier to persist with the index
|
|
243
|
+
* @returns {Promise<string>} Tree OID of the persisted index
|
|
244
|
+
* @throws {OperationAbortedError} If the signal is aborted during iteration or finalization
|
|
245
|
+
* @throws {Error} If node iteration fails (e.g., invalid ref, Git error)
|
|
246
|
+
* @throws {Error} If flush or finalization fails (storage error)
|
|
247
|
+
* @private
|
|
248
|
+
*/
|
|
249
|
+
async _rebuildStreaming(ref, { limit, maxMemoryBytes, onFlush, onProgress, signal, frontier }) {
|
|
250
|
+
const builder = new StreamingBitmapIndexBuilder({
|
|
251
|
+
storage: this.storage,
|
|
252
|
+
maxMemoryBytes,
|
|
253
|
+
onFlush,
|
|
254
|
+
crypto: this._crypto,
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
let processedNodes = 0;
|
|
258
|
+
|
|
259
|
+
for await (const node of this.graphService.iterateNodes({ ref, limit })) {
|
|
260
|
+
await builder.registerNode(node.sha);
|
|
261
|
+
for (const parentSha of node.parents) {
|
|
262
|
+
await builder.addEdge(parentSha, node.sha);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
processedNodes++;
|
|
266
|
+
if (processedNodes % 10000 === 0) {
|
|
267
|
+
checkAborted(signal, 'rebuild');
|
|
268
|
+
if (onProgress) {
|
|
269
|
+
const stats = builder.getMemoryStats();
|
|
270
|
+
onProgress({
|
|
271
|
+
processedNodes,
|
|
272
|
+
currentMemoryBytes: stats.estimatedBitmapBytes,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return await builder.finalize({ signal, frontier });
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Persists a built index to storage (in-memory builder only).
|
|
283
|
+
*
|
|
284
|
+
* Serializes the builder's state and writes each shard as a blob,
|
|
285
|
+
* then creates a tree containing all shards.
|
|
286
|
+
*
|
|
287
|
+
* **Persistence format**: Creates a flat tree with entries like:
|
|
288
|
+
* - `100644 blob <oid>\tmeta_00.json`
|
|
289
|
+
* - `100644 blob <oid>\tshards_fwd_00.json`
|
|
290
|
+
* - `100644 blob <oid>\tshards_rev_00.json`
|
|
291
|
+
* - `100644 blob <oid>\tfrontier.json` (if frontier provided)
|
|
292
|
+
*
|
|
293
|
+
* @param {BitmapIndexBuilder} builder - The builder containing index data
|
|
294
|
+
* @param {Object} [options] - Persistence options
|
|
295
|
+
* @param {Map<string, string>} [options.frontier] - Frontier to include in the tree
|
|
296
|
+
* @returns {Promise<string>} OID of the created tree
|
|
297
|
+
* @throws {Error} If storage.writeBlob() fails for any shard
|
|
298
|
+
* @throws {Error} If storage.writeTree() fails
|
|
299
|
+
* @private
|
|
300
|
+
*/
|
|
301
|
+
async _persistIndex(builder, { frontier } = {}) {
|
|
302
|
+
const treeStructure = await builder.serialize({ frontier });
|
|
303
|
+
const flatEntries = [];
|
|
304
|
+
for (const [path, buffer] of Object.entries(treeStructure)) {
|
|
305
|
+
const oid = await this.storage.writeBlob(buffer);
|
|
306
|
+
flatEntries.push(`100644 blob ${oid}\t${path}`);
|
|
307
|
+
}
|
|
308
|
+
return await this.storage.writeTree(flatEntries);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Loads a previously built index from a tree OID.
|
|
313
|
+
*
|
|
314
|
+
* **Memory cost**: Lazy loading - only shards accessed are loaded into memory.
|
|
315
|
+
* - Initial load: O(1) - just stores shard OID mappings (~50KB for 256 shards)
|
|
316
|
+
* - Per-query: Loads 1-3 shards on demand (~1-5KB each, cached after first access)
|
|
317
|
+
* - Worst case (all shards loaded): Similar to rebuild memory (~150-200MB for 1M nodes)
|
|
318
|
+
*
|
|
319
|
+
* **Persistence**: Reads from storage. The tree OID can be stored
|
|
320
|
+
* in a ref (e.g., 'refs/warp/index') for persistence across sessions.
|
|
321
|
+
*
|
|
322
|
+
* **Strict Mode** (default: `true`):
|
|
323
|
+
* When `strict` is enabled (fail-closed behavior), the reader will validate
|
|
324
|
+
* shard integrity during loading. If corruption or validation failures are
|
|
325
|
+
* detected, errors are thrown immediately, allowing callers to trigger rebuilds.
|
|
326
|
+
*
|
|
327
|
+
* When `strict` is disabled (graceful degradation), the reader will attempt
|
|
328
|
+
* to continue operation despite integrity issues, which may result in
|
|
329
|
+
* incomplete or incorrect query results.
|
|
330
|
+
*
|
|
331
|
+
* @param {string} treeOid - OID of the index tree (from rebuild() or a saved ref)
|
|
332
|
+
* @param {Object} [options] - Load options
|
|
333
|
+
* @param {boolean} [options.strict=true] - Enable strict integrity verification (fail-closed).
|
|
334
|
+
* When true, throws on any shard validation or corruption errors.
|
|
335
|
+
* When false, attempts graceful degradation.
|
|
336
|
+
* @param {Map<string, string>} [options.currentFrontier] - Frontier to compare for staleness.
|
|
337
|
+
* Maps writer IDs to their current tip SHAs. When provided, triggers a staleness
|
|
338
|
+
* check against the frontier stored in the index.
|
|
339
|
+
* @param {boolean} [options.autoRebuild=false] - Auto-rebuild when a stale index is detected.
|
|
340
|
+
* Requires `rebuildRef` to be set.
|
|
341
|
+
* @param {string} [options.rebuildRef] - Git ref to rebuild from when `autoRebuild` is true.
|
|
342
|
+
* Required if `autoRebuild` is true.
|
|
343
|
+
* @returns {Promise<BitmapIndexReader>} Configured reader ready for O(1) queries.
|
|
344
|
+
* The reader lazily loads shards on demand; initial load is O(1).
|
|
345
|
+
* @throws {Error} If treeOid is invalid or tree cannot be read from storage
|
|
346
|
+
* @throws {Error} If autoRebuild is true but rebuildRef is not provided
|
|
347
|
+
* @throws {ShardValidationError} (strict mode) If shard structure validation fails
|
|
348
|
+
* (e.g., missing required fields, invalid format)
|
|
349
|
+
* @throws {ShardCorruptionError} (strict mode) If shard data integrity check fails
|
|
350
|
+
* (e.g., checksum mismatch, truncated data)
|
|
351
|
+
* @throws {ShardLoadError} (strict mode) If shard cannot be loaded from storage
|
|
352
|
+
* (e.g., blob not found, I/O error)
|
|
353
|
+
*
|
|
354
|
+
* @example
|
|
355
|
+
* // Load with strict integrity checking (default)
|
|
356
|
+
* try {
|
|
357
|
+
* const reader = await service.load(treeOid);
|
|
358
|
+
* } catch (err) {
|
|
359
|
+
* if (err instanceof ShardValidationError || err instanceof ShardCorruptionError) {
|
|
360
|
+
* // Integrity failure - trigger rebuild
|
|
361
|
+
* const newTreeOid = await service.rebuild(ref);
|
|
362
|
+
* const reader = await service.load(newTreeOid);
|
|
363
|
+
* }
|
|
364
|
+
* }
|
|
365
|
+
*
|
|
366
|
+
* @example
|
|
367
|
+
* // Load with graceful degradation (non-strict)
|
|
368
|
+
* const reader = await service.load(treeOid, { strict: false });
|
|
369
|
+
*
|
|
370
|
+
* @example
|
|
371
|
+
* // Load from a saved ref
|
|
372
|
+
* const savedOid = await storage.readRef('refs/warp/index');
|
|
373
|
+
* const reader = await rebuildService.load(savedOid);
|
|
374
|
+
*/
|
|
375
|
+
async load(treeOid, { strict = true, currentFrontier, autoRebuild = false, rebuildRef } = {}) {
|
|
376
|
+
this.logger.debug('Loading index', {
|
|
377
|
+
operation: 'load',
|
|
378
|
+
treeOid,
|
|
379
|
+
strict,
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
if (autoRebuild && !rebuildRef) {
|
|
383
|
+
throw new Error('rebuildRef is required when autoRebuild is true');
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const startTime = performance.now();
|
|
387
|
+
const shardOids = await this.storage.readTreeOids(treeOid);
|
|
388
|
+
const shardCount = Object.keys(shardOids).length;
|
|
389
|
+
|
|
390
|
+
// Staleness check
|
|
391
|
+
if (currentFrontier) {
|
|
392
|
+
const indexFrontier = await loadIndexFrontier(shardOids, this.storage, { codec: this._codec });
|
|
393
|
+
if (indexFrontier) {
|
|
394
|
+
const result = checkStaleness(indexFrontier, currentFrontier);
|
|
395
|
+
if (result.stale) {
|
|
396
|
+
this.logger.warn('Index is stale', {
|
|
397
|
+
operation: 'load',
|
|
398
|
+
reason: result.reason,
|
|
399
|
+
hint: 'Rebuild the index or pass autoRebuild: true',
|
|
400
|
+
});
|
|
401
|
+
if (autoRebuild && rebuildRef) {
|
|
402
|
+
const newTreeOid = await this.rebuild(rebuildRef, { frontier: currentFrontier });
|
|
403
|
+
return await this.load(newTreeOid, { strict });
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
} else {
|
|
407
|
+
this.logger.debug('No frontier in index (legacy); skipping staleness check', {
|
|
408
|
+
operation: 'load',
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
const reader = new BitmapIndexReader({ storage: this.storage, strict, logger: this.logger.child({ component: 'BitmapIndexReader' }), crypto: this._crypto });
|
|
414
|
+
reader.setup(shardOids);
|
|
415
|
+
|
|
416
|
+
const durationMs = performance.now() - startTime;
|
|
417
|
+
this.logger.debug('Index loaded', {
|
|
418
|
+
operation: 'load',
|
|
419
|
+
treeOid,
|
|
420
|
+
shardCount,
|
|
421
|
+
durationMs,
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
return reader;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IndexStalenessChecker - Detects stale bitmap indexes by comparing
|
|
3
|
+
* frontier metadata stored at build time against current writer refs.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import defaultCodec from '../utils/defaultCodec.js';
|
|
7
|
+
|
|
8
|
+
/** @private */
|
|
9
|
+
function validateEnvelope(envelope, label) {
|
|
10
|
+
if (!envelope || typeof envelope !== 'object' || !envelope.frontier || typeof envelope.frontier !== 'object') {
|
|
11
|
+
throw new Error(`invalid frontier envelope for ${label}`);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Loads the frontier from an index tree's shard OIDs.
|
|
17
|
+
*
|
|
18
|
+
* @param {Record<string, string>} shardOids - Map of path → blob OID from readTreeOids
|
|
19
|
+
* @param {import('../../ports/IndexStoragePort.js').default} storage - Storage adapter
|
|
20
|
+
* @param {Object} [options]
|
|
21
|
+
* @param {import('../../ports/CodecPort.js').default} [options.codec] - Codec for deserialization
|
|
22
|
+
* @returns {Promise<Map<string, string>|null>} Frontier map, or null if not present (legacy index)
|
|
23
|
+
*/
|
|
24
|
+
export async function loadIndexFrontier(shardOids, storage, { codec } = {}) {
|
|
25
|
+
const c = codec || defaultCodec;
|
|
26
|
+
const cborOid = shardOids['frontier.cbor'];
|
|
27
|
+
if (cborOid) {
|
|
28
|
+
const buffer = await storage.readBlob(cborOid);
|
|
29
|
+
const envelope = c.decode(buffer);
|
|
30
|
+
validateEnvelope(envelope, 'frontier.cbor');
|
|
31
|
+
return new Map(Object.entries(envelope.frontier));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const jsonOid = shardOids['frontier.json'];
|
|
35
|
+
if (jsonOid) {
|
|
36
|
+
const buffer = await storage.readBlob(jsonOid);
|
|
37
|
+
const envelope = JSON.parse(buffer.toString('utf-8'));
|
|
38
|
+
validateEnvelope(envelope, 'frontier.json');
|
|
39
|
+
return new Map(Object.entries(envelope.frontier));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* @typedef {Object} StalenessResult
|
|
47
|
+
* @property {boolean} stale - Whether the index is stale
|
|
48
|
+
* @property {string} reason - Human-readable summary
|
|
49
|
+
* @property {string[]} advancedWriters - Writers whose tips changed
|
|
50
|
+
* @property {string[]} newWriters - Writers not in index frontier
|
|
51
|
+
* @property {string[]} removedWriters - Writers in index but not current
|
|
52
|
+
*/
|
|
53
|
+
|
|
54
|
+
/** @private */
|
|
55
|
+
function buildReason({ stale, advancedWriters, newWriters, removedWriters }) {
|
|
56
|
+
if (!stale) {
|
|
57
|
+
return 'index is current';
|
|
58
|
+
}
|
|
59
|
+
const parts = [];
|
|
60
|
+
if (advancedWriters.length > 0) {
|
|
61
|
+
parts.push(`${advancedWriters.length} writer(s) advanced`);
|
|
62
|
+
}
|
|
63
|
+
if (newWriters.length > 0) {
|
|
64
|
+
parts.push(`${newWriters.length} new writer(s)`);
|
|
65
|
+
}
|
|
66
|
+
if (removedWriters.length > 0) {
|
|
67
|
+
parts.push(`${removedWriters.length} writer(s) removed`);
|
|
68
|
+
}
|
|
69
|
+
return parts.join(', ');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Compares index frontier against current frontier to detect staleness.
|
|
74
|
+
*
|
|
75
|
+
* @param {Map<string, string>} indexFrontier - Frontier stored in the index
|
|
76
|
+
* @param {Map<string, string>} currentFrontier - Current frontier from refs
|
|
77
|
+
* @returns {StalenessResult}
|
|
78
|
+
*/
|
|
79
|
+
export function checkStaleness(indexFrontier, currentFrontier) {
|
|
80
|
+
const advancedWriters = [];
|
|
81
|
+
const newWriters = [];
|
|
82
|
+
const removedWriters = [];
|
|
83
|
+
|
|
84
|
+
for (const [writerId, tipSha] of currentFrontier) {
|
|
85
|
+
const indexTip = indexFrontier.get(writerId);
|
|
86
|
+
if (indexTip === undefined) {
|
|
87
|
+
newWriters.push(writerId);
|
|
88
|
+
} else if (indexTip !== tipSha) {
|
|
89
|
+
advancedWriters.push(writerId);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
for (const writerId of indexFrontier.keys()) {
|
|
94
|
+
if (!currentFrontier.has(writerId)) {
|
|
95
|
+
removedWriters.push(writerId);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const stale = advancedWriters.length > 0 || newWriters.length > 0 || removedWriters.length > 0;
|
|
100
|
+
const reason = buildReason({ stale, advancedWriters, newWriters, removedWriters });
|
|
101
|
+
|
|
102
|
+
return { stale, reason, advancedWriters, newWriters, removedWriters };
|
|
103
|
+
}
|