sweet-search 2.5.13 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -9
- package/core/cli.js +41 -3
- package/core/embedding/embedding-local-model.js +106 -10
- package/core/embedding/embedding-service.js +59 -1
- package/core/embedding/model-client.mjs +257 -0
- package/core/embedding/model-server.mjs +217 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
- package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
- package/core/incremental-indexing/application/operator-cli.mjs +14 -5
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
- package/core/incremental-indexing/application/reconciler.mjs +87 -15
- package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
- package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
- package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
- package/core/indexing/artifact-builder.js +1 -1
- package/core/indexing/dedup/dedup-phase.js +36 -17
- package/core/indexing/dedup/exemplar-selector.js +5 -0
- package/core/indexing/index-codebase-v21.js +37 -14
- package/core/indexing/index-maintainer.mjs +337 -6
- package/core/indexing/indexer-ann.js +27 -434
- package/core/indexing/indexer-build.js +30 -14
- package/core/indexing/indexer-manifest.js +0 -3
- package/core/indexing/indexer-phases.js +101 -25
- package/core/indexing/maintainer-launcher.mjs +22 -0
- package/core/indexing/maintainer-watcher.mjs +397 -0
- package/core/indexing/os-priority.mjs +160 -0
- package/core/indexing/rss-budget.mjs +425 -0
- package/core/indexing/streaming-vectors.js +450 -0
- package/core/infrastructure/config/platform.js +14 -10
- package/core/infrastructure/onnx-session-utils.js +37 -0
- package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
- package/core/ranking/late-interaction-index.js +58 -7
- package/core/search/daemon-registry.js +199 -0
- package/core/search/search-read-semantic.js +9 -3
- package/core/search/search-semantic.js +6 -29
- package/core/search/search-server.js +527 -27
- package/core/search/session-daemon-prewarm.mjs +110 -1
- package/core/search/sweet-search.js +0 -38
- package/core/vector-store/binary-hnsw-index.js +692 -78
- package/core/vector-store/index.js +1 -4
- package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
- package/eval/agent-read-workflows/bin/ss-read +2 -0
- package/mcp/tool-handlers.js +1 -2
- package/package.json +11 -8
- package/scripts/uninstall.js +2 -0
- package/core/vector-store/hnsw-index.js +0 -751
|
@@ -376,6 +376,21 @@ export class LateInteractionIndex {
|
|
|
376
376
|
this._segmentSize = options.segmentSize || LI_SEGMENT_SIZE;
|
|
377
377
|
this._docSegmentPositions = new Map(); // doc id -> { segmentPath, docIndex }
|
|
378
378
|
this._staleBitmapCache = new Map(); // segment path -> { mtimeMs, size, bitmap }
|
|
379
|
+
|
|
380
|
+
// Bounded build mode (Phase C completion). When `buildEvict` is set,
|
|
381
|
+
// _flushSegment() drops each flushed segment's per-token slabs from
|
|
382
|
+
// `this.documents` so peak indexing memory stays O(one segment) instead of
|
|
383
|
+
// O(all docs) — the regression that let large repos accumulate the entire
|
|
384
|
+
// per-token corpus in the heap. Only safe during a from-scratch build (no
|
|
385
|
+
// search reads, no rewrite-from-documents save path). The fast-path save()
|
|
386
|
+
// writes the manifest from the already-flushed segment files, so it never
|
|
387
|
+
// needs the evicted docs back. A lightweight id set keeps alias-pointer
|
|
388
|
+
// registration valid after the exemplar's tokens are gone, and running
|
|
389
|
+
// doc/token totals keep getStats() + the save() doc-count accurate.
|
|
390
|
+
this._evictMode = !!options.buildEvict;
|
|
391
|
+
this._evictedDocs = 0;
|
|
392
|
+
this._evictedTokens = 0;
|
|
393
|
+
this._addedIds = this._evictMode ? new Set() : null;
|
|
379
394
|
}
|
|
380
395
|
|
|
381
396
|
/**
|
|
@@ -406,6 +421,21 @@ export class LateInteractionIndex {
|
|
|
406
421
|
this._finalIndexPath = finalIndexPath;
|
|
407
422
|
this._segments = [];
|
|
408
423
|
this._currentSegment = new Map();
|
|
424
|
+
// Reset bounded-build counters for the fresh staged save.
|
|
425
|
+
this._evictedDocs = 0;
|
|
426
|
+
this._evictedTokens = 0;
|
|
427
|
+
if (this._addedIds) this._addedIds.clear();
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* True if `id` is (or was) a document in this build — checks both the live
|
|
432
|
+
* `documents` map and, in bounded build mode, the lightweight id set that
|
|
433
|
+
* survives segment eviction. Alias-pointer registration uses this to verify
|
|
434
|
+
* an exemplar exists even after its per-token slab has been flushed+evicted.
|
|
435
|
+
*/
|
|
436
|
+
hasDoc(id) {
|
|
437
|
+
if (this.documents.has(id)) return true;
|
|
438
|
+
return this._addedIds ? this._addedIds.has(id) : false;
|
|
409
439
|
}
|
|
410
440
|
|
|
411
441
|
/**
|
|
@@ -568,6 +598,7 @@ export class LateInteractionIndex {
|
|
|
568
598
|
this.documents.set(id, docEntry);
|
|
569
599
|
if (docEntry.minArray) this._hasPerTokenQuant = true;
|
|
570
600
|
this._currentSegment.set(id, docEntry);
|
|
601
|
+
if (this._evictMode) this._addedIds.add(id);
|
|
571
602
|
|
|
572
603
|
// Flush segment to disk when full — releases memory for completed segments
|
|
573
604
|
if (this._currentSegment.size >= this._segmentSize) {
|
|
@@ -595,6 +626,18 @@ export class LateInteractionIndex {
|
|
|
595
626
|
await this._writeSegmentFile(segPath, this._currentSegment);
|
|
596
627
|
this._segments.push({ path: segPath, count: this._currentSegment.size });
|
|
597
628
|
|
|
629
|
+
// Bounded build mode: drop this segment's per-token slabs from the live
|
|
630
|
+
// documents map now that they're durable on disk. Keeps peak heap O(one
|
|
631
|
+
// segment). The id set + running totals preserve everything later stages
|
|
632
|
+
// need (alias validity via hasDoc(), doc/token counts for save()+stats).
|
|
633
|
+
if (this._evictMode) {
|
|
634
|
+
for (const [id, doc] of this._currentSegment) {
|
|
635
|
+
this.documents.delete(id);
|
|
636
|
+
this._evictedDocs++;
|
|
637
|
+
this._evictedTokens += doc.numTokens || 0;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
598
641
|
// Release segment memory — these docs will be reloaded from segments during load()
|
|
599
642
|
this._currentSegment = new Map();
|
|
600
643
|
}
|
|
@@ -1594,11 +1637,16 @@ export class LateInteractionIndex {
|
|
|
1594
1637
|
async save() {
|
|
1595
1638
|
await fs.mkdir(path.dirname(this.indexPath), { recursive: true });
|
|
1596
1639
|
|
|
1640
|
+
// Total doc count including any flushed-and-evicted segments (bounded build
|
|
1641
|
+
// mode). In normal mode `_evictedDocs` is 0, so this is byte-identical to
|
|
1642
|
+
// `this.documents.size`.
|
|
1643
|
+
const effectiveTotal = this.documents.size + this._evictedDocs;
|
|
1644
|
+
|
|
1597
1645
|
// Use segmented format when the doc count exceeds one segment.
|
|
1598
1646
|
// Always rewrite ALL segments from this.documents (the authoritative
|
|
1599
1647
|
// state) — never reuse stale segment files from a previous load,
|
|
1600
1648
|
// because documents may have been removed since then.
|
|
1601
|
-
const useSegmented =
|
|
1649
|
+
const useSegmented = effectiveTotal >= this._segmentSize;
|
|
1602
1650
|
|
|
1603
1651
|
if (useSegmented) {
|
|
1604
1652
|
if (!this._loadedExisting) {
|
|
@@ -1607,7 +1655,7 @@ export class LateInteractionIndex {
|
|
|
1607
1655
|
}
|
|
1608
1656
|
|
|
1609
1657
|
const flushedCount = this._segments.reduce((sum, segment) => sum + segment.count, 0);
|
|
1610
|
-
if (flushedCount ===
|
|
1658
|
+
if (flushedCount === effectiveTotal && this._segments.length > 0) {
|
|
1611
1659
|
// Staging-aware segment directory. _segmentDir was pre-seeded by
|
|
1612
1660
|
// resetForSave() when staging; otherwise derive from indexPath.
|
|
1613
1661
|
const segDir = this._segmentDir || (this.indexPath + '.segments');
|
|
@@ -1623,7 +1671,7 @@ export class LateInteractionIndex {
|
|
|
1623
1671
|
poolFactor: this.poolFactor,
|
|
1624
1672
|
whtSeed: this.whtSeed || 0,
|
|
1625
1673
|
whtOrdering: this.whtOrdering,
|
|
1626
|
-
totalDocuments:
|
|
1674
|
+
totalDocuments: effectiveTotal,
|
|
1627
1675
|
segments: this._segments.map((segment) => ({
|
|
1628
1676
|
path: path.basename(segment.path),
|
|
1629
1677
|
count: segment.count,
|
|
@@ -2362,13 +2410,16 @@ export class LateInteractionIndex {
|
|
|
2362
2410
|
* Get index statistics
|
|
2363
2411
|
*/
|
|
2364
2412
|
getStats() {
|
|
2365
|
-
let totalTokens = 0;
|
|
2413
|
+
let totalTokens = this._evictedTokens || 0;
|
|
2366
2414
|
for (const doc of this.documents.values()) {
|
|
2367
2415
|
totalTokens += doc.numTokens;
|
|
2368
2416
|
}
|
|
2369
2417
|
|
|
2370
|
-
|
|
2371
|
-
|
|
2418
|
+
// In bounded build mode, flushed docs are evicted from `documents` but their
|
|
2419
|
+
// counts live in `_evictedDocs`/`_evictedTokens` so stats stay accurate.
|
|
2420
|
+
const docCount = this.documents.size + (this._evictedDocs || 0);
|
|
2421
|
+
const avgTokens = docCount > 0 ?
|
|
2422
|
+
(totalTokens / docCount).toFixed(1) : 0;
|
|
2372
2423
|
|
|
2373
2424
|
let bytesPerToken;
|
|
2374
2425
|
if (this.quantBits === 4) {
|
|
@@ -2381,7 +2432,7 @@ export class LateInteractionIndex {
|
|
|
2381
2432
|
const estimatedMB = (totalTokens * bytesPerToken / 1024 / 1024).toFixed(2);
|
|
2382
2433
|
|
|
2383
2434
|
return {
|
|
2384
|
-
documents:
|
|
2435
|
+
documents: docCount,
|
|
2385
2436
|
totalTokens,
|
|
2386
2437
|
avgTokensPerDoc: avgTokens,
|
|
2387
2438
|
tokenDim: this.tokenDim,
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Best-effort resident search-daemon registry (footprint cap support).
|
|
3
|
+
*
|
|
4
|
+
* Backs the optional SWEET_SEARCH_MAX_DAEMONS cap (Part 2 of the daemon
|
|
5
|
+
* footprint work). Each warm search daemon, WHEN the cap is opted into,
|
|
6
|
+
* upserts a single entry describing itself into one shared JSON file and
|
|
7
|
+
* refreshes it on a coarse timer. A daemon enforcing the cap reads the file,
|
|
8
|
+
* prunes entries whose process is gone or whose socket no longer answers, and
|
|
9
|
+
* (when more daemons are resident than the cap allows) sends /stop to the
|
|
10
|
+
* least-recently-active peers — never itself, never the most-recently-active.
|
|
11
|
+
*
|
|
12
|
+
* Properties:
|
|
13
|
+
* - ONLY search daemons ever call this module. The index maintainer
|
|
14
|
+
* (core/indexing/*) never imports it, so a maintainer can never be
|
|
15
|
+
* enumerated, listed, or signalled through the registry.
|
|
16
|
+
* - Every operation is best-effort: a redundant eviction is harmless and a
|
|
17
|
+
* read/write race resolves to "do nothing this tick". All I/O is
|
|
18
|
+
* try/caught; writes are atomic (tmp + rename) so a crash mid-write never
|
|
19
|
+
* leaves a torn file.
|
|
20
|
+
* - lastActivityMs stores REAL query activity (the daemon's /search and
|
|
21
|
+
* /read-semantic wall-clock), so "least-recently-active" == least-recently
|
|
22
|
+
* queried. The actively-used repo's daemon is never evicted by an
|
|
23
|
+
* equally-or-less-recently-active peer; the one residual race is a
|
|
24
|
+
* newly-STARTED peer, which is freshest-by-construction (its startedAt
|
|
25
|
+
* seeds lastActivityMs) and may evict a recently-active-but-stale-stamped
|
|
26
|
+
* peer within one registry-refresh interval, because the registry reflects
|
|
27
|
+
* activity only as of each daemon's coarse registryTouchSelf tick.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import fs from 'node:fs/promises';
|
|
31
|
+
import { readFileSync } from 'node:fs';
|
|
32
|
+
import http from 'node:http';
|
|
33
|
+
|
|
34
|
+
const DEFAULT_REGISTRY_PATH = '/tmp/sweet-search-daemons.json';
|
|
35
|
+
|
|
36
|
+
/** Path to the shared registry file (override via SWEET_SEARCH_DAEMON_REGISTRY for tests). */
|
|
37
|
+
export function registryPath(env = process.env) {
|
|
38
|
+
return env.SWEET_SEARCH_DAEMON_REGISTRY || DEFAULT_REGISTRY_PATH;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Is a process with this pid alive right now? Treats EPERM (process owned by
|
|
43
|
+
* another user) as alive — standard `kill -0` probe.
|
|
44
|
+
*/
|
|
45
|
+
export function pidAlive(pid) {
|
|
46
|
+
const n = Number(pid);
|
|
47
|
+
if (!Number.isInteger(n) || n <= 0) return false;
|
|
48
|
+
try {
|
|
49
|
+
process.kill(n, 0);
|
|
50
|
+
return true;
|
|
51
|
+
} catch (err) {
|
|
52
|
+
return err && err.code === 'EPERM';
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Read + parse the registry, returning a { "<pid>": entry } map ({} on any error). */
|
|
57
|
+
export async function readRegistry(env = process.env) {
|
|
58
|
+
try {
|
|
59
|
+
const raw = await fs.readFile(registryPath(env), 'utf-8');
|
|
60
|
+
const parsed = JSON.parse(raw);
|
|
61
|
+
const daemons = parsed && typeof parsed === 'object' ? parsed.daemons : null;
|
|
62
|
+
return daemons && typeof daemons === 'object' ? daemons : {};
|
|
63
|
+
} catch {
|
|
64
|
+
return {};
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** Atomically persist the daemon map (tmp + rename). Best-effort: swallows errors. */
|
|
69
|
+
async function writeRegistryAtomic(daemons, env = process.env) {
|
|
70
|
+
const target = registryPath(env);
|
|
71
|
+
// Per-pid tmp suffix so two daemons writing concurrently never collide on the
|
|
72
|
+
// tmp file; the rename is atomic so the reader always sees a whole document.
|
|
73
|
+
const tmp = `${target}.${process.pid}.tmp`;
|
|
74
|
+
try {
|
|
75
|
+
await fs.writeFile(tmp, JSON.stringify({ daemons }), { mode: 0o600 });
|
|
76
|
+
await fs.rename(tmp, target);
|
|
77
|
+
return true;
|
|
78
|
+
} catch {
|
|
79
|
+
try { await fs.unlink(tmp); } catch { /* ignore */ }
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Insert/replace this daemon's entry. */
|
|
85
|
+
export async function upsertSelf(entry, env = process.env) {
|
|
86
|
+
const daemons = await readRegistry(env);
|
|
87
|
+
daemons[String(entry.pid)] = { ...entry };
|
|
88
|
+
return writeRegistryAtomic(daemons, env);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** Refresh this daemon's lastActivityMs (no-op if its entry vanished). */
|
|
92
|
+
export async function touchSelf(pid, lastActivityMs, env = process.env) {
|
|
93
|
+
const daemons = await readRegistry(env);
|
|
94
|
+
const key = String(pid);
|
|
95
|
+
if (!daemons[key]) return false;
|
|
96
|
+
daemons[key].lastActivityMs = lastActivityMs;
|
|
97
|
+
return writeRegistryAtomic(daemons, env);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Remove this daemon's entry (called on graceful shutdown). */
|
|
101
|
+
export async function removeSelf(pid, env = process.env) {
|
|
102
|
+
const daemons = await readRegistry(env);
|
|
103
|
+
const key = String(pid);
|
|
104
|
+
if (!(key in daemons)) return false;
|
|
105
|
+
delete daemons[key];
|
|
106
|
+
return writeRegistryAtomic(daemons, env);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* GET /health over an explicit unix socket. Resolves true on a 200, false
|
|
111
|
+
* otherwise (unreachable, non-200, timeout). Mirrors getServerHealth's probe
|
|
112
|
+
* but parameterised by socket so we can check peers, not just our own.
|
|
113
|
+
*/
|
|
114
|
+
export function socketHealthy(socketPath, timeoutMs = 500) {
|
|
115
|
+
return new Promise((resolve) => {
|
|
116
|
+
try {
|
|
117
|
+
const req = http.request({ socketPath, path: '/health', method: 'GET' }, (res) => {
|
|
118
|
+
res.on('data', () => {});
|
|
119
|
+
res.on('end', () => resolve(res.statusCode === 200));
|
|
120
|
+
});
|
|
121
|
+
req.on('error', () => resolve(false));
|
|
122
|
+
req.setTimeout(timeoutMs, () => { req.destroy(); resolve(false); });
|
|
123
|
+
req.end();
|
|
124
|
+
} catch {
|
|
125
|
+
resolve(false);
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Prune entries whose process is gone OR whose socket no longer answers
|
|
132
|
+
* /health, persist the pruned map, and return the surviving (live) entries.
|
|
133
|
+
*
|
|
134
|
+
* `probe` lets tests inject a synchronous/async liveness override; by default
|
|
135
|
+
* the registry uses pidAlive + socketHealthy. Best-effort throughout.
|
|
136
|
+
*/
|
|
137
|
+
export async function pruneAndList({ env = process.env, probe = null, timeoutMs = 500 } = {}) {
|
|
138
|
+
const daemons = await readRegistry(env);
|
|
139
|
+
const live = [];
|
|
140
|
+
const liveMap = {};
|
|
141
|
+
for (const [key, entry] of Object.entries(daemons)) {
|
|
142
|
+
if (!entry || typeof entry !== 'object') continue;
|
|
143
|
+
let ok;
|
|
144
|
+
if (probe) {
|
|
145
|
+
ok = await probe(entry);
|
|
146
|
+
} else {
|
|
147
|
+
ok = pidAlive(entry.pid) && await socketHealthy(entry.socketPath, timeoutMs);
|
|
148
|
+
}
|
|
149
|
+
if (ok) {
|
|
150
|
+
live.push(entry);
|
|
151
|
+
liveMap[key] = entry;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (Object.keys(liveMap).length !== Object.keys(daemons).length) {
|
|
155
|
+
await writeRegistryAtomic(liveMap, env);
|
|
156
|
+
}
|
|
157
|
+
return live;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Pick up to `count` eviction targets: the least-recently-active peers that are
|
|
162
|
+
* NOT self AND strictly less-recently-active than self, sorted oldest-first.
|
|
163
|
+
*
|
|
164
|
+
* The "older than self" gate is what makes CONCURRENT enforcement safe: every
|
|
165
|
+
* resident daemon runs this independently, but a daemon only ever sheds peers
|
|
166
|
+
* less active than itself — never itself, never a more-recently-active peer. So
|
|
167
|
+
* the union of all daemons' evictions is exactly the surplus (the oldest
|
|
168
|
+
* live.length-cap daemons): the newest daemon alone already targets precisely
|
|
169
|
+
* that set, and every other daemon targets a subset of it. The actively-used
|
|
170
|
+
* repo's daemon (freshest lastActivityMs) is therefore never evicted by an
|
|
171
|
+
* equally-or-less-recently-active peer — though a newly-started peer, freshest
|
|
172
|
+
* by construction, may evict it within one registry-refresh interval before
|
|
173
|
+
* its next registryTouchSelf tick re-stamps it. The cap converges without
|
|
174
|
+
* over-shooting below it.
|
|
175
|
+
*
|
|
176
|
+
* When self is absent from the list (e.g. an unregistered caller, or tests),
|
|
177
|
+
* the gate falls back to "any non-self", i.e. plain least-recently-active.
|
|
178
|
+
*/
|
|
179
|
+
export function selectEvictionTargets(liveEntries, selfPid, count) {
|
|
180
|
+
if (!Array.isArray(liveEntries) || count <= 0) return [];
|
|
181
|
+
const selfKey = String(selfPid);
|
|
182
|
+
const self = liveEntries.find((e) => e && String(e.pid) === selfKey);
|
|
183
|
+
const cutoff = self ? (self.lastActivityMs ?? 0) : Infinity;
|
|
184
|
+
return liveEntries
|
|
185
|
+
.filter((e) => e && String(e.pid) !== selfKey && (e.lastActivityMs ?? 0) < cutoff)
|
|
186
|
+
.sort((a, b) => (a.lastActivityMs ?? 0) - (b.lastActivityMs ?? 0))
|
|
187
|
+
.slice(0, count);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/** Synchronous registry read (used only by diagnostics/tests). */
|
|
191
|
+
export function readRegistrySync(env = process.env) {
|
|
192
|
+
try {
|
|
193
|
+
const parsed = JSON.parse(readFileSync(registryPath(env), 'utf-8'));
|
|
194
|
+
const daemons = parsed && typeof parsed === 'object' ? parsed.daemons : null;
|
|
195
|
+
return daemons && typeof daemons === 'object' ? daemons : {};
|
|
196
|
+
} catch {
|
|
197
|
+
return {};
|
|
198
|
+
}
|
|
199
|
+
}
|
|
@@ -445,9 +445,9 @@ function _scoreSymbol(chunks, queryTerms, queryRaw) {
|
|
|
445
445
|
return scores;
|
|
446
446
|
}
|
|
447
447
|
|
|
448
|
-
async function _scoreLateInteraction(chunks, query, projectRoot) {
|
|
448
|
+
async function _scoreLateInteraction(chunks, query, projectRoot, lateInteractionIndexOverride = null) {
|
|
449
449
|
if (chunks.length === 0) return { scores: new Map(), ran: false };
|
|
450
|
-
const liIndex = await _getLateInteractionIndex(projectRoot);
|
|
450
|
+
const liIndex = lateInteractionIndexOverride || await _getLateInteractionIndex(projectRoot);
|
|
451
451
|
if (!liIndex) return { scores: new Map(), ran: false };
|
|
452
452
|
|
|
453
453
|
// Only score chunks whose IDs actually appear in the LI index. Use the
|
|
@@ -625,6 +625,7 @@ function _fallbackSpanFromText(fileText, totalLines, maxChars) {
|
|
|
625
625
|
* @param {number} [req.maxTokens] - Convenience: ~maxChars / 4
|
|
626
626
|
* @param {string} [req.projectRoot]
|
|
627
627
|
* @param {boolean} [req.verbose=false] - include timings + signal contributions
|
|
628
|
+
* @param {Object} [req._lateInteractionIndex] - private daemon injection; same-project index only
|
|
628
629
|
* @returns {Promise<Object>}
|
|
629
630
|
*/
|
|
630
631
|
async function _readSemanticUnpinned(req) {
|
|
@@ -686,7 +687,12 @@ async function _readSemanticUnpinned(req) {
|
|
|
686
687
|
const tLex1 = performance.now();
|
|
687
688
|
|
|
688
689
|
const tLi0 = performance.now();
|
|
689
|
-
const { scores: maxsimScores, ran: liRan } = await _scoreLateInteraction(
|
|
690
|
+
const { scores: maxsimScores, ran: liRan } = await _scoreLateInteraction(
|
|
691
|
+
chunks,
|
|
692
|
+
req.query,
|
|
693
|
+
projectRoot,
|
|
694
|
+
req._lateInteractionIndex || null,
|
|
695
|
+
);
|
|
690
696
|
const tLi1 = performance.now();
|
|
691
697
|
|
|
692
698
|
// Threshold gate on MaxSim — drop chunks whose LI score is too low. This
|
|
@@ -617,16 +617,12 @@ export async function semanticSearchStandard(query, options = {}) {
|
|
|
617
617
|
|
|
618
618
|
let candidates;
|
|
619
619
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
candidates = hnswResult.results;
|
|
627
|
-
this.log(`HNSW: ${hnswResult.latency_us}us for ${hnswResult.k} candidates (adaptive: ${numCandidates})`);
|
|
628
|
-
} else if (this.hasCodebaseIndex) {
|
|
629
|
-
// Fallback: O(N) scan from SQLite
|
|
620
|
+
// Non-3-stage ("Standard") path: the binary 3-stage cascade is the default
|
|
621
|
+
// (see semanticSearch dispatcher). This path is reached only when 3-stage is
|
|
622
|
+
// disabled or no binary index exists, and scans float vectors directly from
|
|
623
|
+
// SQLite. (The legacy usearch float-HNSW shortcut was removed.)
|
|
624
|
+
if (this.hasCodebaseIndex) {
|
|
625
|
+
// O(N) scan from SQLite
|
|
630
626
|
candidates = await this.vectorScan(queryEmbedding, rerank ? 100 : k);
|
|
631
627
|
this.log(`Vector scan: ${candidates.length} candidates`);
|
|
632
628
|
} else {
|
|
@@ -790,22 +786,3 @@ export function shouldSkipRerank(scores, options = {}) {
|
|
|
790
786
|
return { skip: false, reason: 'needs_rerank' };
|
|
791
787
|
}
|
|
792
788
|
|
|
793
|
-
/**
|
|
794
|
-
* Adaptive candidate count based on query complexity
|
|
795
|
-
*/
|
|
796
|
-
export function getAdaptiveCandidateCount(query, baseCount) {
|
|
797
|
-
const trimmed = query.trim();
|
|
798
|
-
|
|
799
|
-
// Very short queries (likely identifiers): use 50% of base
|
|
800
|
-
if (trimmed.length < 15) {
|
|
801
|
-
return Math.max(Math.floor(baseCount * 0.5), 20);
|
|
802
|
-
}
|
|
803
|
-
|
|
804
|
-
// Short queries without question words: use 75% of base
|
|
805
|
-
if (trimmed.length < 30 && !/\b(how|what|where|why|when|which)\b/i.test(trimmed)) {
|
|
806
|
-
return Math.max(Math.floor(baseCount * 0.75), 30);
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
// Complex queries (questions, long): use full base
|
|
810
|
-
return baseCount;
|
|
811
|
-
}
|