sweet-search 2.4.2 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/core/cli.js +43 -5
  2. package/core/embedding/embedding-cache.js +266 -18
  3. package/core/embedding/embedding-service.js +45 -9
  4. package/core/graph/graph-expansion.js +52 -12
  5. package/core/graph/graph-extractor.js +30 -1
  6. package/core/indexing/ast-chunker.js +331 -16
  7. package/core/indexing/chunking/chunk-builder.js +34 -1
  8. package/core/indexing/index-codebase-v21.js +31 -2
  9. package/core/indexing/index.js +6 -3
  10. package/core/indexing/indexer-ann.js +45 -6
  11. package/core/indexing/indexer-build.js +9 -1
  12. package/core/indexing/indexer-phases.js +6 -4
  13. package/core/indexing/indexing-file-policy.js +140 -0
  14. package/core/indexing/li-skip-policy.js +11 -220
  15. package/core/infrastructure/codebase-repository.js +21 -0
  16. package/core/infrastructure/config/embedding.js +20 -1
  17. package/core/infrastructure/config/graph.js +2 -2
  18. package/core/infrastructure/config/ranking.js +10 -0
  19. package/core/infrastructure/config/vector-store.js +1 -1
  20. package/core/infrastructure/coreml-cascade.js +236 -30
  21. package/core/infrastructure/coreml-cascade.json +25 -0
  22. package/core/infrastructure/index.js +17 -0
  23. package/core/infrastructure/init-config.js +216 -0
  24. package/core/infrastructure/language-patterns/registry-core.js +18 -0
  25. package/core/infrastructure/model-registry.js +12 -0
  26. package/core/infrastructure/native-inference.js +143 -51
  27. package/core/infrastructure/tree-sitter-provider.js +92 -2
  28. package/core/ranking/cascaded-scorer.js +6 -2
  29. package/core/ranking/file-kind-ranking.js +264 -0
  30. package/core/ranking/late-interaction-index.js +10 -4
  31. package/core/ranking/late-interaction-policy.js +304 -0
  32. package/core/search/context-expander.js +267 -28
  33. package/core/search/index.js +4 -0
  34. package/core/search/search-cli.js +3 -1
  35. package/core/search/search-pattern.js +4 -3
  36. package/core/search/search-postprocess.js +189 -8
  37. package/core/search/search-read-semantic.js +734 -0
  38. package/core/search/search-read.js +481 -0
  39. package/core/search/search-server.js +153 -5
  40. package/core/search/sweet-search.js +133 -16
  41. package/core/start-server.js +13 -2
  42. package/mcp/server.js +41 -0
  43. package/mcp/tool-handlers.js +117 -6
  44. package/package.json +9 -7
  45. package/scripts/init.js +386 -5
  46. package/scripts/uninstall.js +152 -6
@@ -0,0 +1,734 @@
1
+ /**
2
+ * sweet-search read-semantic — span selection by hybrid retrieval, content from disk.
3
+ *
4
+ * Pipeline:
5
+ * 1. Enumerate candidate spans for the target file from the vectors index.
6
+ * 2. Build a candidate union from three signals:
7
+ * - lexical: term matches (regex over query terms) on chunk text + symbol
8
+ * - symbol: exact substring match against the chunk's symbol/signature
9
+ * - MaxSim: ColBERT-style late interaction (token-level), if the LI
10
+ * index is available for these chunk IDs
11
+ * 3. Rank by Reciprocal Rank Fusion (RRF). If MaxSim ran, do a final
12
+ * LI-only re-rank over the fused top-K and use the LI score as the
13
+ * authoritative score on returned spans.
14
+ * 4. Re-read the selected spans from disk (filesystem ground truth).
15
+ * 5. Expand by contextLines, merge adjacent/overlapping spans, enforce a
16
+ * character/token budget.
17
+ *
18
+ * Why hybrid: a pure single-vector dense path is known to be weaker on code
19
+ * than ColBERT-style late interaction, and even MaxSim alone underperforms
20
+ * BM25+MaxSim fusion on out-of-domain queries (AllianceCoder 2025; ECIR 2026
21
+ * Late Interaction workshop survey). For per-file span selection we don't
22
+ * have a strong corpus-level lexical index to lean on — symbol-name and
23
+ * regex token candidates are the cheap and effective substitutes.
24
+ *
25
+ * DDD: search/ application layer. Allowed to import infrastructure (DB,
26
+ * config) and ranking (LI). Never imports indexing/ or query/. Single-file
27
+ * scope, so no graph-domain dependency required here; the candidate union
28
+ * has a documented seam where graph 1-hop neighbors can plug in later
29
+ * (cross-file would belong in a separate corpus-level read tool).
30
+ */
31
+
32
+ import path from 'node:path';
33
+ import { CodebaseRepository } from '../infrastructure/codebase-repository.js';
34
+ import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
35
+ import { applyPersistedLiModel } from '../infrastructure/init-config.js';
36
+ import { readFile as readFileExact } from './search-read.js';
37
+
38
+ // Applies the user's persisted LI model exactly once per (projectRoot, env)
39
+ // pair so encodeQuery/_getLateInteractionIndex below see the right variant.
40
+ // Without this an edge-only init silently uses the standard 768d model for
41
+ // query encoding while the on-disk LI index was built with the 256d edge
42
+ // model — every score becomes nonsense (the dim mismatch trips the
43
+ // modelMismatch guard but query encoding has already paid the wrong-cost).
44
+ const _appliedLiPerRoot = new Map(); // projectRoot -> appliedModel
45
+ function _ensurePersistedLiModelApplied(projectRoot) {
46
+ const key = projectRoot || process.cwd();
47
+ if (_appliedLiPerRoot.has(key)) return;
48
+ const r = applyPersistedLiModel(key);
49
+ _appliedLiPerRoot.set(key, r.applied);
50
+ }
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Defaults — keep modest so a one-file call stays under ~100ms after warmup.
54
+ // ---------------------------------------------------------------------------
55
+
56
+ const DEFAULTS = {
57
+ topK: 5,
58
+ threshold: 0.4, // MaxSim score floor when LI ranks
59
+ contextLines: 2, // expand selected spans by ±N lines
60
+ maxChars: 8000, // hard cap on returned exact text
61
+ rrfK: 60, // standard RRF constant
62
+ lexicalWeight: 1.0,
63
+ symbolWeight: 1.5, // symbol-name hits are stronger evidence per-file
64
+ maxsimWeight: 1.6, // late interaction wins ties
65
+ };
66
+
67
+ const APPROX_CHARS_PER_TOKEN = 4;
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // Module-level lazy singletons
71
+ // ---------------------------------------------------------------------------
72
+
73
+ let _repo = null;
74
+ function _getRepo() {
75
+ if (_repo === null) {
76
+ try { _repo = new CodebaseRepository(DB_PATHS.codebase); }
77
+ catch { _repo = false; }
78
+ }
79
+ return _repo || null;
80
+ }
81
+
82
+ let _liIndex = null;
83
+ let _liInitPromise = null;
84
+ async function _getLateInteractionIndex() {
85
+ if (_liIndex) return _liIndex;
86
+ if (_liInitPromise) return _liInitPromise;
87
+ if (!LATE_INTERACTION_CONFIG?.enabled) return null;
88
+ _liInitPromise = (async () => {
89
+ try {
90
+ const { LateInteractionIndex } = await import('../ranking/late-interaction-index.js');
91
+ const idx = new LateInteractionIndex({});
92
+ await idx.init();
93
+ // If the index is empty (no segments, no docs), treat as unavailable —
94
+ // saves a noisy warning later when scoreWithLateInteraction runs.
95
+ if (!idx.documents || idx.documents.size === 0) {
96
+ _liIndex = false;
97
+ return null;
98
+ }
99
+ _liIndex = idx;
100
+ return idx;
101
+ } catch {
102
+ _liIndex = false;
103
+ return null;
104
+ } finally {
105
+ _liInitPromise = null;
106
+ }
107
+ })();
108
+ return _liInitPromise;
109
+ }
110
+
111
+ let _encodeQueryFn = null;
112
+ async function _getEncodeQuery() {
113
+ if (_encodeQueryFn) return _encodeQueryFn;
114
+ try {
115
+ const mod = await import('../ranking/late-interaction-model.js');
116
+ _encodeQueryFn = mod.encodeQuery;
117
+ return _encodeQueryFn;
118
+ } catch {
119
+ return null;
120
+ }
121
+ }
122
+
123
+ // ---------------------------------------------------------------------------
124
+ // Helpers
125
+ // ---------------------------------------------------------------------------
126
+
127
+ function _projectRelative(absOrRelPath, projectRoot) {
128
+ const root = projectRoot || process.cwd();
129
+ const abs = path.isAbsolute(absOrRelPath)
130
+ ? absOrRelPath
131
+ : path.resolve(root, absOrRelPath);
132
+ const rel = path.relative(root, abs);
133
+ return rel.startsWith('..') || path.isAbsolute(rel) ? abs : rel;
134
+ }
135
+
136
+ function _parseMeta(rawMeta) {
137
+ if (!rawMeta) return null;
138
+ if (typeof rawMeta === 'object') return rawMeta;
139
+ try { return JSON.parse(rawMeta); } catch { return null; }
140
+ }
141
+
142
+ function _metaSymbol(meta) {
143
+ return meta.name ?? meta.symbol ?? null;
144
+ }
145
+
146
+ function _metaType(meta) {
147
+ return meta.type ?? meta.chunk_type ?? null;
148
+ }
149
+
150
+ function _metaStartLine(meta) {
151
+ return typeof meta.startLine === 'number' ? meta.startLine
152
+ : typeof meta.line_start === 'number' ? meta.line_start
153
+ : null;
154
+ }
155
+
156
+ function _metaEndLine(meta) {
157
+ return typeof meta.endLine === 'number' ? meta.endLine
158
+ : typeof meta.line_end === 'number' ? meta.line_end
159
+ : null;
160
+ }
161
+
162
+ function _tokenizeQuery(q) {
163
+ // Split on non-word, lowercase, drop very short tokens — close enough to
164
+ // BM25-grade tokenisation for per-file term hits without a full index.
165
+ return Array.from(new Set(
166
+ String(q).toLowerCase().split(/[^a-zA-Z0-9_]+/g).filter(t => t.length >= 2),
167
+ ));
168
+ }
169
+
170
+ function _escapeRegex(s) {
171
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
172
+ }
173
+
174
+ // ---------------------------------------------------------------------------
175
+ // Candidate enumeration — load chunk metadata + per-chunk on-disk text slice
176
+ // ---------------------------------------------------------------------------
177
+
178
+ async function _loadFileChunks(filePathRel, projectRoot) {
179
+ const repo = _getRepo();
180
+ if (!repo) return { chunks: [], language: null };
181
+ const rows = repo.getChunksByFilePath(filePathRel);
182
+ if (rows.length === 0) return { chunks: [], language: null };
183
+
184
+ // Read whole file once (filesystem is ground truth) — slice each span on disk.
185
+ let diskRead;
186
+ try {
187
+ diskRead = await readFileExact({
188
+ path: filePathRel,
189
+ projectRoot,
190
+ includeMetadata: false,
191
+ });
192
+ } catch {
193
+ return { chunks: [], language: null };
194
+ }
195
+ if (!diskRead.ok) return { chunks: [], language: null };
196
+
197
+ const fileText = diskRead.text;
198
+ const lineToOffset = (() => {
199
+ const offsets = [0];
200
+ for (let i = 0; i < fileText.length; i++) {
201
+ if (fileText.charCodeAt(i) === 10 /* \n */) offsets.push(i + 1);
202
+ }
203
+ return offsets;
204
+ })();
205
+ const totalLines = lineToOffset.length;
206
+
207
+ let language = null;
208
+ const chunks = [];
209
+ for (const row of rows) {
210
+ const meta = _parseMeta(row.metadata) || {};
211
+ if (!language && meta.language) language = meta.language;
212
+ const startLine = _metaStartLine(meta);
213
+ const endLine = _metaEndLine(meta);
214
+ if (startLine == null || endLine == null) continue;
215
+ if (startLine < 1 || startLine > totalLines) continue;
216
+
217
+ const a = Math.max(1, startLine);
218
+ const b = Math.min(totalLines, Math.max(a, endLine));
219
+ const startByte = lineToOffset[a - 1];
220
+ const endByte = (b < totalLines) ? lineToOffset[b] : fileText.length;
221
+ // Preserve disk bytes exactly (including a trailing newline if it was on
222
+ // disk) — chunk text is consumed by lexical scoring, not returned.
223
+ const exactText = fileText.slice(startByte, endByte);
224
+
225
+ chunks.push({
226
+ id: row.id,
227
+ symbol: _metaSymbol(meta),
228
+ type: _metaType(meta),
229
+ signature: meta.signature ?? null,
230
+ startLine: a,
231
+ endLine: b,
232
+ exactText, // re-read from disk
233
+ });
234
+ }
235
+ chunks.sort((c1, c2) => c1.startLine - c2.startLine);
236
+ return { chunks, language, totalLines, fileText };
237
+ }
238
+
239
+ // ---------------------------------------------------------------------------
240
+ // Candidate scoring signals (per file)
241
+ // ---------------------------------------------------------------------------
242
+
243
+ function _scoreLexical(chunks, queryTerms) {
244
+ if (queryTerms.length === 0) return new Map();
245
+ const re = new RegExp(`\\b(?:${queryTerms.map(_escapeRegex).join('|')})\\b`, 'gi');
246
+ const scores = new Map();
247
+ for (const c of chunks) {
248
+ re.lastIndex = 0;
249
+ let hits = 0;
250
+ let m;
251
+ while ((m = re.exec(c.exactText)) !== null) {
252
+ hits++;
253
+ if (hits > 50) break; // cap runaway counters on huge chunks
254
+ }
255
+ if (hits > 0) {
256
+ // Diminishing returns — first hits carry more weight than the 30th.
257
+ scores.set(c.id, Math.log2(1 + hits));
258
+ }
259
+ }
260
+ return scores;
261
+ }
262
+
263
+ function _scoreSymbol(chunks, queryTerms, queryRaw) {
264
+ if (queryTerms.length === 0) return new Map();
265
+ const lowerRaw = String(queryRaw).toLowerCase();
266
+ const scores = new Map();
267
+ for (const c of chunks) {
268
+ const sym = (c.symbol || '').toLowerCase();
269
+ if (!sym) continue;
270
+ let s = 0;
271
+ if (sym && lowerRaw.includes(sym)) s += 2; // raw query mentions the symbol
272
+ for (const t of queryTerms) {
273
+ if (sym === t) s += 3; // exact name match
274
+ else if (sym.includes(t)) s += 1; // substring
275
+ }
276
+ if (s > 0) scores.set(c.id, s);
277
+ }
278
+ return scores;
279
+ }
280
+
281
+ async function _scoreLateInteraction(chunks, query) {
282
+ if (chunks.length === 0) return { scores: new Map(), ran: false };
283
+ const liIndex = await _getLateInteractionIndex();
284
+ if (!liIndex) return { scores: new Map(), ran: false };
285
+
286
+ // Only score chunks whose IDs actually appear in the LI index.
287
+ const candidates = chunks
288
+ .filter(c => liIndex.documents.has(c.id))
289
+ .map(c => ({ id: c.id, score: 0 }));
290
+ if (candidates.length === 0) return { scores: new Map(), ran: false };
291
+
292
+ const encodeQuery = await _getEncodeQuery();
293
+ if (!encodeQuery) return { scores: new Map(), ran: false };
294
+
295
+ let qTokens;
296
+ try { qTokens = await encodeQuery(query); }
297
+ catch { return { scores: new Map(), ran: false }; }
298
+ if (!qTokens || qTokens.length === 0) return { scores: new Map(), ran: false };
299
+
300
+ let scored;
301
+ try {
302
+ scored = await liIndex.scoreWithLateInteraction(qTokens, candidates);
303
+ } catch {
304
+ return { scores: new Map(), ran: false };
305
+ }
306
+
307
+ const out = new Map();
308
+ for (const r of scored) out.set(r.id, r.lateInteractionScore ?? r.score ?? 0);
309
+ return { scores: out, ran: true };
310
+ }
311
+
312
+ // ---------------------------------------------------------------------------
313
+ // Reciprocal Rank Fusion over multiple signal maps
314
+ // ---------------------------------------------------------------------------
315
+
316
+ function _rrfFuse(signalMaps, weights, rrfK) {
317
+ // signalMaps: [{ id -> score }] in same order as `weights`
318
+ const fused = new Map();
319
+ for (let i = 0; i < signalMaps.length; i++) {
320
+ const m = signalMaps[i];
321
+ if (!m || m.size === 0) continue;
322
+ const w = weights[i] ?? 1;
323
+ const sorted = [...m.entries()].sort((a, b) => b[1] - a[1]);
324
+ for (let r = 0; r < sorted.length; r++) {
325
+ const [id] = sorted[r];
326
+ const contribution = w / (rrfK + r + 1);
327
+ fused.set(id, (fused.get(id) || 0) + contribution);
328
+ }
329
+ }
330
+ return fused;
331
+ }
332
+
333
+ // ---------------------------------------------------------------------------
334
+ // Span post-processing — context expansion, merging, budget enforcement
335
+ // ---------------------------------------------------------------------------
336
+
337
+ function _expandAndMergeSpans(selected, totalLines, contextLines) {
338
+ if (selected.length === 0) return [];
339
+ const padded = selected
340
+ .map(s => ({
341
+ ...s,
342
+ startLine: Math.max(1, s.startLine - contextLines),
343
+ endLine: Math.min(totalLines, s.endLine + contextLines),
344
+ }))
345
+ .sort((a, b) => a.startLine - b.startLine);
346
+
347
+ const merged = [];
348
+ for (const span of padded) {
349
+ const last = merged[merged.length - 1];
350
+ if (last && span.startLine <= last.endLine + 1) {
351
+ // Overlap or touching — merge.
352
+ last.endLine = Math.max(last.endLine, span.endLine);
353
+ last.score = Math.max(last.score, span.score);
354
+ last.symbols = Array.from(new Set([
355
+ ...(last.symbols || []),
356
+ ...(span.symbol ? [span.symbol] : []),
357
+ ]));
358
+ last.types = Array.from(new Set([
359
+ ...(last.types || []),
360
+ ...(span.type ? [span.type] : []),
361
+ ]));
362
+ last.chunkIds.push(span.id);
363
+ } else {
364
+ merged.push({
365
+ startLine: span.startLine,
366
+ endLine: span.endLine,
367
+ score: span.score,
368
+ symbols: span.symbol ? [span.symbol] : [],
369
+ types: span.type ? [span.type] : [],
370
+ chunkIds: [span.id],
371
+ });
372
+ }
373
+ }
374
+ return merged;
375
+ }
376
+
377
+ function _sliceSpanFromDisk(fileText, lineOffsets, startLine, endLine) {
378
+ const total = lineOffsets.length;
379
+ if (total === 0) return '';
380
+ const a = Math.max(1, startLine | 0);
381
+ const b = Math.min(total, Math.max(a, endLine | 0));
382
+ const startByte = lineOffsets[a - 1];
383
+ const endByte = (b < total) ? lineOffsets[b] : fileText.length;
384
+ // Return disk-exact bytes; never strip newlines that exist on disk.
385
+ return fileText.slice(startByte, endByte);
386
+ }
387
+
388
+ function _enforceCharBudget(spans, fileText, lineOffsets, maxChars) {
389
+ // Greedy: take spans by score until we'd blow the budget. The minimum
390
+ // span we always include is the top-1 (truncated if it alone exceeds the
391
+ // budget) — better to return one truncated span than nothing.
392
+ const ranked = [...spans].sort((a, b) => b.score - a.score);
393
+ const kept = [];
394
+ let used = 0;
395
+ for (const span of ranked) {
396
+ const text = _sliceSpanFromDisk(fileText, lineOffsets, span.startLine, span.endLine);
397
+ const cost = text.length;
398
+ if (kept.length === 0 && cost > maxChars) {
399
+ // Truncate the single top span; prefer head of the span (definition first).
400
+ const truncatedText = text.slice(0, maxChars);
401
+ kept.push({ ...span, text: truncatedText, truncated: true });
402
+ used += truncatedText.length;
403
+ break;
404
+ }
405
+ if (used + cost > maxChars) continue;
406
+ kept.push({ ...span, text });
407
+ used += cost;
408
+ }
409
+ // Restore line order in the final output for readability.
410
+ kept.sort((a, b) => a.startLine - b.startLine);
411
+ return { spans: kept, charsUsed: used };
412
+ }
413
+
414
+ function _fallbackSpanFromRead(fallback, maxChars) {
415
+ const text = fallback.text || '';
416
+ const capped = text.length > maxChars ? text.slice(0, maxChars) : text;
417
+ return {
418
+ startLine: 1,
419
+ endLine: fallback.totalLines,
420
+ score: 0,
421
+ symbols: [],
422
+ types: [],
423
+ chunkIds: [],
424
+ text: capped,
425
+ truncated: capped.length < text.length || undefined,
426
+ };
427
+ }
428
+
429
+ function _fallbackSpanFromText(fileText, totalLines, maxChars) {
430
+ const capped = fileText.length > maxChars ? fileText.slice(0, maxChars) : fileText;
431
+ return {
432
+ startLine: 1,
433
+ endLine: totalLines,
434
+ score: 0,
435
+ symbols: [],
436
+ types: [],
437
+ chunkIds: [],
438
+ text: capped,
439
+ truncated: capped.length < fileText.length || undefined,
440
+ };
441
+ }
442
+
443
+ // ---------------------------------------------------------------------------
444
+ // Public API
445
+ // ---------------------------------------------------------------------------
446
+
447
+ /**
448
+ * @param {Object} req
449
+ * @param {string} req.path - File path (project-relative or absolute)
450
+ * @param {string} req.query - Natural language query
451
+ * @param {number} [req.topK=5]
452
+ * @param {number} [req.threshold=0.4] - MaxSim score floor when LI runs
453
+ * @param {number} [req.contextLines=2]
454
+ * @param {number} [req.maxChars=8000]
455
+ * @param {number} [req.maxTokens] - Convenience: ~maxChars / 4
456
+ * @param {string} [req.projectRoot]
457
+ * @param {boolean} [req.verbose=false] - include timings + signal contributions
458
+ * @returns {Promise<Object>}
459
+ */
460
+ export async function readSemantic(req) {
461
+ const t0 = performance.now();
462
+ if (!req || !req.path) throw new Error('path is required');
463
+ if (!req.query || !String(req.query).trim()) throw new Error('query is required');
464
+
465
+ const projectRoot = req.projectRoot || process.cwd();
466
+ _ensurePersistedLiModelApplied(projectRoot);
467
+ const filePathRel = _projectRelative(req.path, projectRoot);
468
+
469
+ const topK = req.topK ?? DEFAULTS.topK;
470
+ const threshold = req.threshold ?? DEFAULTS.threshold;
471
+ const contextLines = req.contextLines ?? DEFAULTS.contextLines;
472
+ const maxChars = req.maxChars
473
+ ?? (req.maxTokens != null ? req.maxTokens * APPROX_CHARS_PER_TOKEN : DEFAULTS.maxChars);
474
+ const verbose = !!req.verbose;
475
+
476
+ const tLoad0 = performance.now();
477
+ const { chunks, language, totalLines, fileText } = await _loadFileChunks(filePathRel, projectRoot);
478
+ const tLoad1 = performance.now();
479
+
480
+ // No chunks at all → fall back to plain read so the caller still gets
481
+ // exact text. Document the fallback in the response.
482
+ if (!chunks || chunks.length === 0) {
483
+ const fallback = await readFileExact({ path: req.path, projectRoot });
484
+ return {
485
+ file: filePathRel,
486
+ query: req.query,
487
+ ok: fallback.ok,
488
+ indexed: false,
489
+ fellBack: true,
490
+ reason: 'file not indexed for semantic span selection — returning whole file via plain read',
491
+ language: fallback.language,
492
+ totalLines: fallback.totalLines,
493
+ spans: fallback.ok ? [_fallbackSpanFromRead(fallback, maxChars)] : [],
494
+ charsReturned: fallback.ok ? Math.min((fallback.text || '').length, maxChars) : 0,
495
+ approxTokensReturned: fallback.ok ? Math.ceil(Math.min((fallback.text || '').length, maxChars) / APPROX_CHARS_PER_TOKEN) : 0,
496
+ timings: { totalMs: +(performance.now() - t0).toFixed(2) },
497
+ };
498
+ }
499
+
500
+ // Build line-offset table over the disk text once for span re-reads.
501
+ const lineOffsets = (() => {
502
+ const offsets = [0];
503
+ for (let i = 0; i < fileText.length; i++) {
504
+ if (fileText.charCodeAt(i) === 10) offsets.push(i + 1);
505
+ }
506
+ return offsets;
507
+ })();
508
+
509
+ const queryTerms = _tokenizeQuery(req.query);
510
+
511
+ const tLex0 = performance.now();
512
+ const lexicalScores = _scoreLexical(chunks, queryTerms);
513
+ const symbolScores = _scoreSymbol(chunks, queryTerms, req.query);
514
+ const tLex1 = performance.now();
515
+
516
+ const tLi0 = performance.now();
517
+ const { scores: maxsimScores, ran: liRan } = await _scoreLateInteraction(chunks, req.query);
518
+ const tLi1 = performance.now();
519
+
520
+ // Threshold gate on MaxSim — drop chunks whose LI score is too low. This
521
+ // is purely a score-floor: chunks still surviving via lexical/symbol can
522
+ // be retained downstream, since the floor is a MaxSim-specific quality
523
+ // signal.
524
+ if (liRan && threshold > 0) {
525
+ for (const [id, s] of [...maxsimScores]) {
526
+ if (s < threshold) maxsimScores.delete(id);
527
+ }
528
+ }
529
+
530
+ // Fuse — all three signals contribute via RRF.
531
+ const fused = _rrfFuse(
532
+ [lexicalScores, symbolScores, maxsimScores],
533
+ [DEFAULTS.lexicalWeight, DEFAULTS.symbolWeight, DEFAULTS.maxsimWeight],
534
+ DEFAULTS.rrfK,
535
+ );
536
+
537
+ // If everything is empty, return the whole file as a graceful fallback
538
+ // with a low confidence marker rather than nothing.
539
+ if (fused.size === 0) {
540
+ return {
541
+ file: filePathRel,
542
+ query: req.query,
543
+ ok: true,
544
+ indexed: true,
545
+ fellBack: true,
546
+ reason: 'no chunk matched query signals — returning whole file',
547
+ language,
548
+ totalLines,
549
+ spans: [_fallbackSpanFromText(fileText, totalLines, maxChars)],
550
+ charsReturned: Math.min(fileText.length, maxChars),
551
+ approxTokensReturned: Math.ceil(Math.min(fileText.length, maxChars) / APPROX_CHARS_PER_TOKEN),
552
+ signals: verbose ? { liRan, lexicalHits: 0, symbolHits: 0, maxsimHits: 0 } : undefined,
553
+ timings: verbose ? {
554
+ loadMs: +(tLoad1 - tLoad0).toFixed(2),
555
+ lexicalMs: +(tLex1 - tLex0).toFixed(2),
556
+ liMs: +(tLi1 - tLi0).toFixed(2),
557
+ totalMs: +(performance.now() - t0).toFixed(2),
558
+ } : { totalMs: +(performance.now() - t0).toFixed(2) },
559
+ };
560
+ }
561
+
562
+ // Take top-K by fused score, then pull the actual chunk records.
563
+ const fusedTop = [...fused.entries()]
564
+ .sort((a, b) => b[1] - a[1])
565
+ .slice(0, Math.max(topK * 2, topK)); // overshoot a bit before LI re-rank
566
+ const idToChunk = new Map(chunks.map(c => [c.id, c]));
567
+
568
+ // Final re-rank: prefer late-interaction score when LI ran; otherwise the
569
+ // RRF score is the authority. This mirrors the SOTA pattern (cheap candidate
570
+ // pool → expensive LI re-rank on the survivors).
571
+ const ranked = fusedTop
572
+ .map(([id, fusedScore]) => {
573
+ const c = idToChunk.get(id);
574
+ if (!c) return null;
575
+ const li = maxsimScores.get(id);
576
+ const finalScore = liRan && li != null ? li : fusedScore;
577
+ return {
578
+ id,
579
+ symbol: c.symbol,
580
+ type: c.type,
581
+ startLine: c.startLine,
582
+ endLine: c.endLine,
583
+ score: finalScore,
584
+ signals: {
585
+ lexical: lexicalScores.get(id) || 0,
586
+ symbol: symbolScores.get(id) || 0,
587
+ maxsim: liRan ? (maxsimScores.get(id) ?? null) : null,
588
+ fused: fusedScore,
589
+ },
590
+ };
591
+ })
592
+ .filter(Boolean)
593
+ .sort((a, b) => b.score - a.score)
594
+ .slice(0, topK);
595
+
596
+ const merged = _expandAndMergeSpans(ranked, totalLines, contextLines);
597
+ const { spans, charsUsed } = _enforceCharBudget(merged, fileText, lineOffsets, maxChars);
598
+
599
+ return {
600
+ file: filePathRel,
601
+ query: req.query,
602
+ ok: true,
603
+ indexed: true,
604
+ fellBack: false,
605
+ language,
606
+ totalLines,
607
+ spans,
608
+ charsReturned: charsUsed,
609
+ approxTokensReturned: Math.ceil(charsUsed / APPROX_CHARS_PER_TOKEN),
610
+ signals: verbose ? {
611
+ liRan,
612
+ lexicalHits: lexicalScores.size,
613
+ symbolHits: symbolScores.size,
614
+ maxsimHits: maxsimScores.size,
615
+ fusedCandidates: fused.size,
616
+ preMergeRanked: ranked,
617
+ } : undefined,
618
+ timings: verbose ? {
619
+ loadMs: +(tLoad1 - tLoad0).toFixed(2),
620
+ lexicalMs: +(tLex1 - tLex0).toFixed(2),
621
+ liMs: +(tLi1 - tLi0).toFixed(2),
622
+ totalMs: +(performance.now() - t0).toFixed(2),
623
+ } : { totalMs: +(performance.now() - t0).toFixed(2) },
624
+ };
625
+ }
626
+
627
+ // ---------------------------------------------------------------------------
628
+ // Formatting
629
+ // ---------------------------------------------------------------------------
630
+
631
+ export function formatReadSemanticResult(result, format = 'agent') {
632
+ if (format === 'json') return JSON.stringify(result, null, 2);
633
+
634
+ const fence = result.language ? '```' + result.language : '```';
635
+ const header = result.fellBack
636
+ ? `### ${result.file} — full file (${result.reason || 'fallback'})`
637
+ : `### ${result.file} — top spans for: ${JSON.stringify(result.query)}`;
638
+ const lines = [header];
639
+ if (!result.ok) {
640
+ lines.push(`[error]`);
641
+ return lines.join('\n');
642
+ }
643
+ for (const span of result.spans) {
644
+ const label = span.symbols && span.symbols.length
645
+ ? `${span.symbols.join(', ')} (lines ${span.startLine}-${span.endLine})`
646
+ : `lines ${span.startLine}-${span.endLine}`;
647
+ lines.push(`-- ${label}${typeof span.score === 'number' ? ` — score=${span.score.toFixed(3)}` : ''}`);
648
+ lines.push(fence);
649
+ lines.push(span.text);
650
+ lines.push('```');
651
+ }
652
+ return lines.join('\n');
653
+ }
654
+
655
+ // ---------------------------------------------------------------------------
656
+ // CLI handler
657
+ // sweet-search read-semantic path/to/file.ts "how does X work"
658
+ // sweet-search read-semantic path/to/file.ts "..." --top 5 --threshold 0.4
659
+ // sweet-search read-semantic path/to/file.ts "..." --json --verbose
660
+ // ---------------------------------------------------------------------------
661
+
662
+ function _parseArgs(args) {
663
+ const positional = [];
664
+ let format = 'agent';
665
+ let topK; let threshold; let contextLines; let maxChars; let maxTokens; let verbose = false;
666
+ for (let i = 0; i < args.length; i++) {
667
+ const a = args[i];
668
+ if (a === '--json') format = 'json';
669
+ else if (a === '--agent') format = 'agent';
670
+ else if (a === '--verbose') verbose = true;
671
+ else if (a === '--top' || a === '--top-k' || a === '-k') topK = +args[++i];
672
+ else if (a === '--threshold') threshold = +args[++i];
673
+ else if (a === '--context') contextLines = +args[++i];
674
+ else if (a === '--max-chars') maxChars = +args[++i];
675
+ else if (a === '--max-tokens') maxTokens = +args[++i];
676
+ else if (a === '--help' || a === '-h') return { help: true };
677
+ else if (a.startsWith('--')) throw new Error(`unknown flag: ${a}`);
678
+ else positional.push(a);
679
+ }
680
+ return { positional, format, topK, threshold, contextLines, maxChars, maxTokens, verbose };
681
+ }
682
+
683
+ function _printHelp() {
684
+ process.stdout.write([
685
+ 'sweet-search read-semantic — return only the file spans relevant to a query',
686
+ '',
687
+ 'Usage:',
688
+ ' sweet-search read-semantic <file> "<query>"',
689
+ '',
690
+ 'Options:',
691
+ ' --top, -k <n> Max ranked spans before merging (default: 5)',
692
+ ' --threshold <f> MaxSim score floor when LI runs (default: 0.4)',
693
+ ' --context <n> Lines of pre/post context per selected span (default: 2)',
694
+ ' --max-chars <n> Hard cap on returned text (default: 8000)',
695
+ ' --max-tokens <n> Convenience cap (~chars/4)',
696
+ ' --json Emit JSON',
697
+ ' --verbose Include timings + per-signal scores',
698
+ '',
699
+ ].join('\n'));
700
+ }
701
+
702
+ export async function handleReadSemanticCli(args) {
703
+ let parsed;
704
+ try { parsed = _parseArgs(args); }
705
+ catch (err) { process.stderr.write(`[sweet-search read-semantic] ${err.message}\n`); process.exit(2); }
706
+ if (parsed.help || !parsed.positional || parsed.positional.length < 2) {
707
+ _printHelp();
708
+ process.exit(parsed.help ? 0 : 2);
709
+ }
710
+ const [file, ...queryParts] = parsed.positional;
711
+ const query = queryParts.join(' ');
712
+ const result = await readSemantic({
713
+ path: file,
714
+ query,
715
+ topK: parsed.topK,
716
+ threshold: parsed.threshold,
717
+ contextLines: parsed.contextLines,
718
+ maxChars: parsed.maxChars,
719
+ maxTokens: parsed.maxTokens,
720
+ verbose: parsed.verbose,
721
+ });
722
+ process.stdout.write(formatReadSemanticResult(result, parsed.format));
723
+ if (parsed.format !== 'json') process.stdout.write('\n');
724
+ process.exit(result.ok ? 0 : 1);
725
+ }
726
+
727
+ // Test-only export — clears caches between unit tests.
728
+ export function __resetReadSemanticCachesForTests() {
729
+ _repo = null;
730
+ _liIndex = null;
731
+ _liInitPromise = null;
732
+ _encodeQueryFn = null;
733
+ _appliedLiPerRoot.clear();
734
+ }