sweet-search 2.5.2 → 2.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -13,14 +13,24 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
import fs from 'fs/promises';
|
|
16
|
-
import { existsSync } from 'fs';
|
|
16
|
+
import { existsSync, statSync } from 'fs';
|
|
17
17
|
import path from 'path';
|
|
18
18
|
import { HNSW_CONFIG, DB_PATHS, EMBEDDING_CONFIG } from '../infrastructure/config/index.js';
|
|
19
|
+
import {
|
|
20
|
+
createBitmap,
|
|
21
|
+
loadBitmap,
|
|
22
|
+
resizeBitmap,
|
|
23
|
+
saveBitmap,
|
|
24
|
+
setBit,
|
|
25
|
+
isSet,
|
|
26
|
+
} from '../infrastructure/tombstone-bitmap-reader.js';
|
|
19
27
|
|
|
20
28
|
// =============================================================================
|
|
21
29
|
// HNSW INDEX CLASS (USearch Implementation)
|
|
22
30
|
// =============================================================================
|
|
23
31
|
|
|
32
|
+
const HNSW_MAX_ELEMENTS_HARD_CEILING = 100_000_000;
|
|
33
|
+
|
|
24
34
|
export class HNSWIndex {
|
|
25
35
|
constructor(options = {}) {
|
|
26
36
|
this.dimension = options.dimension || EMBEDDING_CONFIG.hnswDimension;
|
|
@@ -30,6 +40,7 @@ export class HNSWIndex {
|
|
|
30
40
|
this.efSearch = options.efSearch || HNSW_CONFIG.efSearch;
|
|
31
41
|
this.metric = options.metric || HNSW_CONFIG.metric;
|
|
32
42
|
this.indexPath = options.indexPath || DB_PATHS.hnswIndex;
|
|
43
|
+
this.stalePath = options.stalePath || `${this.indexPath}.stale.bin`;
|
|
33
44
|
|
|
34
45
|
this.index = null;
|
|
35
46
|
this.idMap = new Map(); // string id -> numeric key
|
|
@@ -41,6 +52,7 @@ export class HNSWIndex {
|
|
|
41
52
|
this.useFallback = false;
|
|
42
53
|
this.vectors = []; // Fallback: store all vectors
|
|
43
54
|
this.usearchModule = null;
|
|
55
|
+
this._staleBitmapCache = null;
|
|
44
56
|
}
|
|
45
57
|
|
|
46
58
|
/**
|
|
@@ -73,6 +85,8 @@ export class HNSWIndex {
|
|
|
73
85
|
dimensions: this.dimension,
|
|
74
86
|
quantization: 'f32',
|
|
75
87
|
});
|
|
88
|
+
this._reserveNativeCapacity(this.maxElements);
|
|
89
|
+
this.useFallback = false;
|
|
76
90
|
|
|
77
91
|
console.log(`HNSW: Using USearch (${usearchMetric}, dim=${this.dimension}, M=${this.M})`);
|
|
78
92
|
} catch (err) {
|
|
@@ -111,20 +125,58 @@ export class HNSWIndex {
|
|
|
111
125
|
return key;
|
|
112
126
|
}
|
|
113
127
|
|
|
114
|
-
// Add new
|
|
115
|
-
|
|
128
|
+
// Add new. Commit maps only after native add succeeds; otherwise a
|
|
129
|
+
// transient native failure would leave a row visible without a graph node.
|
|
130
|
+
const key = this.nextKey;
|
|
131
|
+
|
|
132
|
+
if (!this.useFallback && this.index) {
|
|
133
|
+
this._addNativeVector(key, vecArray);
|
|
134
|
+
} else {
|
|
135
|
+
this.vectors[key] = { id, vector: normalized, metadata };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
this.nextKey++;
|
|
116
139
|
this.idMap.set(id, key);
|
|
117
140
|
this.reverseMap.set(key, id);
|
|
118
141
|
this.metadata.set(id, metadata);
|
|
119
142
|
|
|
120
|
-
|
|
121
|
-
|
|
143
|
+
return key;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
_addNativeVector(key, vecArray) {
|
|
147
|
+
try {
|
|
122
148
|
this.index.add(BigInt(key), vecArray);
|
|
123
|
-
|
|
124
|
-
|
|
149
|
+
return;
|
|
150
|
+
} catch (err) {
|
|
151
|
+
if (!isNativeCapacityError(err)) {
|
|
152
|
+
throw err;
|
|
153
|
+
}
|
|
125
154
|
}
|
|
126
155
|
|
|
127
|
-
|
|
156
|
+
const nextCapacity = this._nextNativeCapacity();
|
|
157
|
+
if (!this._reserveNativeCapacity(nextCapacity)) {
|
|
158
|
+
throw new Error(
|
|
159
|
+
`HNSW capacity exhausted at ${this.maxElements} elements and native reserve() is unavailable`
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
this.index.add(BigInt(key), vecArray);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
_nextNativeCapacity() {
|
|
166
|
+
const current = Math.max(1, this.maxElements, this.nextKey + 1);
|
|
167
|
+
return Math.min(
|
|
168
|
+
HNSW_MAX_ELEMENTS_HARD_CEILING,
|
|
169
|
+
Math.max(current + 1, Math.ceil(current * 1.25), this.nextKey + 1)
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
_reserveNativeCapacity(capacity) {
|
|
174
|
+
if (!this.index || typeof this.index.reserve !== 'function') return false;
|
|
175
|
+
const nextCapacity = Math.min(HNSW_MAX_ELEMENTS_HARD_CEILING, Math.ceil(capacity));
|
|
176
|
+
if (!Number.isFinite(nextCapacity) || nextCapacity <= 0) return false;
|
|
177
|
+
this.index.reserve(nextCapacity);
|
|
178
|
+
this.maxElements = Math.max(this.maxElements, nextCapacity);
|
|
179
|
+
return true;
|
|
128
180
|
}
|
|
129
181
|
|
|
130
182
|
/**
|
|
@@ -144,10 +196,89 @@ export class HNSWIndex {
|
|
|
144
196
|
/**
|
|
145
197
|
* Search for k nearest neighbors
|
|
146
198
|
*/
|
|
199
|
+
_loadStaleBitmap() {
|
|
200
|
+
let stat;
|
|
201
|
+
try {
|
|
202
|
+
stat = statSync(this.stalePath, { bigint: true });
|
|
203
|
+
} catch {
|
|
204
|
+
this._staleBitmapCache = null;
|
|
205
|
+
return null;
|
|
206
|
+
}
|
|
207
|
+
const statKey = `${stat.mtimeNs}:${stat.ctimeNs}:${stat.size}`;
|
|
208
|
+
|
|
209
|
+
if (
|
|
210
|
+
this._staleBitmapCache
|
|
211
|
+
&& this._staleBitmapCache.statKey === statKey
|
|
212
|
+
) {
|
|
213
|
+
return this._staleBitmapCache.bitmap;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
try {
|
|
217
|
+
const bitmap = loadBitmap(this.stalePath);
|
|
218
|
+
this._staleBitmapCache = { statKey, bitmap };
|
|
219
|
+
return bitmap;
|
|
220
|
+
} catch (err) {
|
|
221
|
+
if (process.env.SWEET_DEBUG) {
|
|
222
|
+
console.debug(`[HNSW] ignoring unreadable stale bitmap ${this.stalePath}: ${err.message}`);
|
|
223
|
+
}
|
|
224
|
+
this._staleBitmapCache = { statKey, bitmap: null };
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
_isKeyStale(key, bitmap) {
|
|
230
|
+
return bitmap ? isSet(bitmap, key) : false;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
_markKeyStale(key) {
|
|
234
|
+
const capacity = Math.max(key + 1, this.nextKey, 1);
|
|
235
|
+
let bitmap = null;
|
|
236
|
+
try {
|
|
237
|
+
bitmap = loadBitmap(this.stalePath);
|
|
238
|
+
} catch (err) {
|
|
239
|
+
if (process.env.SWEET_DEBUG) {
|
|
240
|
+
console.debug(`[HNSW] replacing unreadable stale bitmap ${this.stalePath}: ${err.message}`);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
bitmap = bitmap ? resizeBitmap(bitmap, capacity) : createBitmap(capacity);
|
|
244
|
+
setBit(bitmap, key);
|
|
245
|
+
saveBitmap(this.stalePath, bitmap);
|
|
246
|
+
this._staleBitmapCache = null;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async clearStaleBitmap() {
|
|
250
|
+
await fs.rm(this.stalePath, { force: true });
|
|
251
|
+
this._staleBitmapCache = null;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
_oversampleTarget(k, bitmap) {
|
|
255
|
+
const searchable = this._searchableKeyCount();
|
|
256
|
+
const live = this._liveCount(bitmap);
|
|
257
|
+
const tombstoned = Math.max(0, searchable - live);
|
|
258
|
+
if (tombstoned === 0) return k;
|
|
259
|
+
const s = Math.max(0, Math.min(tombstoned / Math.max(1, searchable), 0.5));
|
|
260
|
+
return Math.min(Math.max(k + 64, Math.ceil(k / Math.max(0.05, 1 - s) * 2)), k * 20);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
_searchableKeyCount() {
|
|
264
|
+
if (this.useFallback) return this.vectors.length;
|
|
265
|
+
return Math.max(this.nextKey, this.idMap.size);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
_liveCount(bitmap) {
|
|
269
|
+
if (!bitmap) return this.idMap.size;
|
|
270
|
+
let live = 0;
|
|
271
|
+
for (const key of this.reverseMap.keys()) {
|
|
272
|
+
if (!this._isKeyStale(key, bitmap)) live++;
|
|
273
|
+
}
|
|
274
|
+
return live;
|
|
275
|
+
}
|
|
276
|
+
|
|
147
277
|
async search(queryVector, k = 10) {
|
|
148
278
|
await this.init();
|
|
149
279
|
|
|
150
280
|
const start = performance.now();
|
|
281
|
+
const staleBitmap = this._loadStaleBitmap();
|
|
151
282
|
|
|
152
283
|
// Truncate and normalize query
|
|
153
284
|
const truncated = queryVector.length > this.dimension
|
|
@@ -160,38 +291,50 @@ export class HNSWIndex {
|
|
|
160
291
|
if (!this.useFallback && this.index) {
|
|
161
292
|
// Use native USearch
|
|
162
293
|
const vecArray = new Float32Array(normalized);
|
|
163
|
-
const
|
|
294
|
+
const candidateK = this._oversampleTarget(k, staleBitmap);
|
|
295
|
+
const actualK = Math.min(candidateK, this._searchableKeyCount());
|
|
164
296
|
|
|
165
297
|
if (actualK === 0) {
|
|
166
298
|
results = [];
|
|
167
299
|
} else {
|
|
168
|
-
const
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
300
|
+
const collect = (limit) => {
|
|
301
|
+
const searchResult = this.index.search(vecArray, limit);
|
|
302
|
+
const collected = [];
|
|
303
|
+
// USearch returns { keys: BigUint64Array, distances: Float32Array, count: number }
|
|
304
|
+
const count = searchResult.count || searchResult.keys?.length || 0;
|
|
305
|
+
|
|
306
|
+
for (let i = 0; i < count; i++) {
|
|
307
|
+
const key = Number(searchResult.keys[i]);
|
|
308
|
+
if (this._isKeyStale(key, staleBitmap)) continue;
|
|
309
|
+
const id = this.reverseMap.get(key);
|
|
310
|
+
if (id) {
|
|
311
|
+
// Convert distance to similarity (cosine distance to similarity)
|
|
312
|
+
const distance = searchResult.distances[i];
|
|
313
|
+
const score = this.metric === 'cosine' ? 1 - distance : -distance;
|
|
314
|
+
|
|
315
|
+
collected.push({
|
|
316
|
+
id,
|
|
317
|
+
score,
|
|
318
|
+
metadata: this.metadata.get(id) || {},
|
|
319
|
+
});
|
|
320
|
+
if (collected.length >= k) break;
|
|
321
|
+
}
|
|
187
322
|
}
|
|
323
|
+
return collected;
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
results = collect(actualK);
|
|
327
|
+
const retryK = Math.min(actualK * 2, this._searchableKeyCount());
|
|
328
|
+
if (results.length < k && retryK > actualK) {
|
|
329
|
+
results = collect(retryK);
|
|
188
330
|
}
|
|
189
331
|
}
|
|
190
332
|
} else {
|
|
191
333
|
// Pure JS fallback: O(N) scan
|
|
192
334
|
results = this.vectors
|
|
193
|
-
.
|
|
194
|
-
.
|
|
335
|
+
.map((v, key) => ({ v, key }))
|
|
336
|
+
.filter(({ v, key }) => v !== null && !this._isKeyStale(key, staleBitmap))
|
|
337
|
+
.map(({ v }) => ({
|
|
195
338
|
id: v.id,
|
|
196
339
|
score: this.cosineSimilarity(normalized, v.vector),
|
|
197
340
|
metadata: v.metadata || {},
|
|
@@ -207,7 +350,7 @@ export class HNSWIndex {
|
|
|
207
350
|
latency_us: Math.round(latency * 1000), // microseconds
|
|
208
351
|
latency_ms: latency.toFixed(3),
|
|
209
352
|
k,
|
|
210
|
-
total: this.
|
|
353
|
+
total: this._liveCount(staleBitmap),
|
|
211
354
|
usedFallback: this.useFallback,
|
|
212
355
|
};
|
|
213
356
|
}
|
|
@@ -217,9 +360,10 @@ export class HNSWIndex {
|
|
|
217
360
|
*/
|
|
218
361
|
async get(id) {
|
|
219
362
|
if (!this.idMap.has(id)) return null;
|
|
363
|
+
const key = this.idMap.get(id);
|
|
364
|
+
if (this._isKeyStale(key, this._loadStaleBitmap())) return null;
|
|
220
365
|
|
|
221
366
|
if (this.useFallback) {
|
|
222
|
-
const key = this.idMap.get(id);
|
|
223
367
|
return this.vectors[key];
|
|
224
368
|
}
|
|
225
369
|
|
|
@@ -236,15 +380,7 @@ export class HNSWIndex {
|
|
|
236
380
|
if (!this.idMap.has(id)) return false;
|
|
237
381
|
|
|
238
382
|
const key = this.idMap.get(id);
|
|
239
|
-
|
|
240
|
-
// USearch supports remove
|
|
241
|
-
if (!this.useFallback && this.index?.remove) {
|
|
242
|
-
try {
|
|
243
|
-
this.index.remove(BigInt(key));
|
|
244
|
-
} catch (err) {
|
|
245
|
-
// Ignore removal errors
|
|
246
|
-
}
|
|
247
|
-
}
|
|
383
|
+
this._markKeyStale(key);
|
|
248
384
|
|
|
249
385
|
if (this.useFallback) {
|
|
250
386
|
this.vectors[key] = null;
|
|
@@ -258,7 +394,22 @@ export class HNSWIndex {
|
|
|
258
394
|
}
|
|
259
395
|
|
|
260
396
|
/**
|
|
261
|
-
* Save index to disk
|
|
397
|
+
* Save index to disk.
|
|
398
|
+
*
|
|
399
|
+
* Publish semantics: each sidecar is written to a sibling
|
|
400
|
+
* `<path>.tmp.<pid>` and then `renameSync`'d into its canonical name.
|
|
401
|
+
* On POSIX, atomic rename keeps existing mmaps valid against the
|
|
402
|
+
* unlinked old inode — without this, a cross-process reader that
|
|
403
|
+
* holds a `usearch.view()` mmap over the canonical .usearch file
|
|
404
|
+
* would SIGBUS / SIGSEGV the moment the next reconcile tick or
|
|
405
|
+
* maintenance pass truncates+writes the file in place.
|
|
406
|
+
*
|
|
407
|
+
* Publish ORDER: data first (.usearch / .vectors.json), then
|
|
408
|
+
* .meta.json LAST. A fresh reader that successfully reads the new
|
|
409
|
+
* meta.json is guaranteed to read the matching data sidecar
|
|
410
|
+
* alongside it. The brief residual window — `(OLD meta, NEW
|
|
411
|
+
* .usearch)` — yields MISSING results (keys beyond the new index
|
|
412
|
+
* size are absent) instead of GARBAGE results.
|
|
262
413
|
*/
|
|
263
414
|
async save(indexPath = this.indexPath) {
|
|
264
415
|
await fs.mkdir(path.dirname(indexPath), { recursive: true });
|
|
@@ -277,19 +428,24 @@ export class HNSWIndex {
|
|
|
277
428
|
useFallback: this.useFallback,
|
|
278
429
|
};
|
|
279
430
|
|
|
280
|
-
// Save metadata
|
|
281
431
|
const metaPath = indexPath.replace('.idx', '.meta.json');
|
|
282
|
-
|
|
432
|
+
const metaTmpPath = `${metaPath}.tmp.${process.pid}`;
|
|
433
|
+
await fs.writeFile(metaTmpPath, JSON.stringify(state, null, 2));
|
|
283
434
|
|
|
284
435
|
if (!this.useFallback && this.index) {
|
|
285
|
-
// Save USearch index (uses .usearch extension)
|
|
286
436
|
const usearchPath = indexPath.replace('.idx', '.usearch');
|
|
287
|
-
|
|
437
|
+
const usearchTmpPath = `${usearchPath}.tmp.${process.pid}`;
|
|
438
|
+
this.index.save(usearchTmpPath);
|
|
439
|
+
// Atomic rename: data first, descriptor last.
|
|
440
|
+
await fs.rename(usearchTmpPath, usearchPath);
|
|
441
|
+
await fs.rename(metaTmpPath, metaPath);
|
|
288
442
|
console.log(`HNSW: Saved ${this.nextKey} vectors to ${usearchPath} (USearch)`);
|
|
289
443
|
} else {
|
|
290
|
-
// Save fallback vectors
|
|
291
444
|
const vectorsPath = indexPath.replace('.idx', '.vectors.json');
|
|
292
|
-
|
|
445
|
+
const vectorsTmpPath = `${vectorsPath}.tmp.${process.pid}`;
|
|
446
|
+
await fs.writeFile(vectorsTmpPath, JSON.stringify(this.vectors));
|
|
447
|
+
await fs.rename(vectorsTmpPath, vectorsPath);
|
|
448
|
+
await fs.rename(metaTmpPath, metaPath);
|
|
293
449
|
console.log(`HNSW: Saved ${this.vectors.length} vectors to ${vectorsPath} (fallback)`);
|
|
294
450
|
}
|
|
295
451
|
}
|
|
@@ -366,6 +522,10 @@ export class HNSWIndex {
|
|
|
366
522
|
this.vectors = JSON.parse(await fs.readFile(vectorsPath, 'utf-8'));
|
|
367
523
|
this.useFallback = true;
|
|
368
524
|
console.log(`HNSW: Loaded ${this.vectors.length} vectors from ${vectorsPath} (fallback)`);
|
|
525
|
+
} else if (!state.useFallback && (state.idMap?.length || state.nextKey || 0) > 0) {
|
|
526
|
+
throw new Error(
|
|
527
|
+
`HNSW native artifact is missing or unreadable for ${indexPath}; refusing to serve stale metadata without vectors`
|
|
528
|
+
);
|
|
369
529
|
} else {
|
|
370
530
|
// Initialize empty fallback
|
|
371
531
|
this.useFallback = true;
|
|
@@ -432,11 +592,17 @@ export class HNSWIndex {
|
|
|
432
592
|
this.metadata.clear();
|
|
433
593
|
this.nextKey = 0;
|
|
434
594
|
this.vectors = [];
|
|
595
|
+
await this.clearStaleBitmap();
|
|
435
596
|
this.index = null;
|
|
436
597
|
await this.init();
|
|
437
598
|
}
|
|
438
599
|
}
|
|
439
600
|
|
|
601
|
+
function isNativeCapacityError(err) {
|
|
602
|
+
const message = String(err?.message || err).toLowerCase();
|
|
603
|
+
return /\b(capacity|reserve|max\s*elements?|max_elements|full|allocation|out of memory|oom)\b/.test(message);
|
|
604
|
+
}
|
|
605
|
+
|
|
440
606
|
// =============================================================================
|
|
441
607
|
// FACTORY FUNCTION
|
|
442
608
|
// =============================================================================
|
|
@@ -447,7 +613,7 @@ export class HNSWIndex {
|
|
|
447
613
|
export async function createHNSWIndex(options = {}) {
|
|
448
614
|
const index = new HNSWIndex(options);
|
|
449
615
|
|
|
450
|
-
if (options.load &&
|
|
616
|
+
if (options.load && hnswArtifactsExist(options.indexPath || DB_PATHS.hnswIndex)) {
|
|
451
617
|
await index.load(options.indexPath);
|
|
452
618
|
} else {
|
|
453
619
|
await index.init();
|
|
@@ -456,6 +622,11 @@ export async function createHNSWIndex(options = {}) {
|
|
|
456
622
|
return index;
|
|
457
623
|
}
|
|
458
624
|
|
|
625
|
+
function hnswArtifactsExist(indexPath) {
|
|
626
|
+
const metaPath = indexPath.replace('.idx', '.meta.json');
|
|
627
|
+
return existsSync(metaPath);
|
|
628
|
+
}
|
|
629
|
+
|
|
459
630
|
// =============================================================================
|
|
460
631
|
// CLI
|
|
461
632
|
// =============================================================================
|