sweet-search 2.4.2 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/core/cli.js +43 -5
  2. package/core/embedding/embedding-cache.js +266 -18
  3. package/core/embedding/embedding-service.js +45 -9
  4. package/core/graph/graph-expansion.js +52 -12
  5. package/core/graph/graph-extractor.js +30 -1
  6. package/core/indexing/ast-chunker.js +331 -16
  7. package/core/indexing/chunking/chunk-builder.js +34 -1
  8. package/core/indexing/index-codebase-v21.js +31 -2
  9. package/core/indexing/index.js +6 -3
  10. package/core/indexing/indexer-ann.js +45 -6
  11. package/core/indexing/indexer-build.js +9 -1
  12. package/core/indexing/indexer-phases.js +6 -4
  13. package/core/indexing/indexing-file-policy.js +140 -0
  14. package/core/indexing/li-skip-policy.js +11 -220
  15. package/core/infrastructure/codebase-repository.js +21 -0
  16. package/core/infrastructure/config/embedding.js +20 -1
  17. package/core/infrastructure/config/graph.js +2 -2
  18. package/core/infrastructure/config/ranking.js +10 -0
  19. package/core/infrastructure/config/vector-store.js +1 -1
  20. package/core/infrastructure/coreml-cascade.js +236 -30
  21. package/core/infrastructure/coreml-cascade.json +25 -0
  22. package/core/infrastructure/index.js +17 -0
  23. package/core/infrastructure/init-config.js +216 -0
  24. package/core/infrastructure/language-patterns/registry-core.js +18 -0
  25. package/core/infrastructure/model-registry.js +12 -0
  26. package/core/infrastructure/native-inference.js +143 -51
  27. package/core/infrastructure/tree-sitter-provider.js +92 -2
  28. package/core/ranking/cascaded-scorer.js +6 -2
  29. package/core/ranking/file-kind-ranking.js +264 -0
  30. package/core/ranking/late-interaction-index.js +10 -4
  31. package/core/ranking/late-interaction-policy.js +304 -0
  32. package/core/search/context-expander.js +267 -28
  33. package/core/search/index.js +4 -0
  34. package/core/search/search-cli.js +3 -1
  35. package/core/search/search-pattern.js +4 -3
  36. package/core/search/search-postprocess.js +189 -8
  37. package/core/search/search-read-semantic.js +734 -0
  38. package/core/search/search-read.js +481 -0
  39. package/core/search/search-server.js +153 -5
  40. package/core/search/sweet-search.js +133 -16
  41. package/core/start-server.js +13 -2
  42. package/mcp/server.js +41 -0
  43. package/mcp/tool-handlers.js +117 -6
  44. package/package.json +9 -7
  45. package/scripts/init.js +386 -5
  46. package/scripts/uninstall.js +152 -6
@@ -0,0 +1,481 @@
1
+ /**
2
+ * sweet-search read — filesystem-grounded file reader.
3
+ *
4
+ * Returns exact bytes from disk. The vectors index may attach symbol/chunk
5
+ * metadata for indexed files, but the returned `text` always comes from
6
+ * `node:fs`, never from the (truncated) DB column.
7
+ *
8
+ * Design notes:
9
+ * - Filesystem is ground truth. Never return DB-stored text as content.
10
+ * - Batch up to 20 files; per-file errors do not fail the batch.
11
+ * - Warm-process cache keyed by `path|size|mtimeMs` avoids re-reading hot
12
+ * files; line-offset table lets line-range reads avoid materialising the
13
+ * whole content for large files.
14
+ *
15
+ * DDD: this module lives in the search/ application layer (allowed to import
16
+ * infrastructure for filesystem grounding and chunk metadata).
17
+ */
18
+
19
+ import { promises as fs, statSync } from 'node:fs';
20
+ import path from 'node:path';
21
+ import { CodebaseRepository } from '../infrastructure/codebase-repository.js';
22
+ import { DB_PATHS } from '../infrastructure/config/index.js';
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Cache — keyed by absolutePath|size|mtimeMs (any change invalidates).
26
+ // Bounded LRU. Entries hold either the full text + line-offset table, or just
27
+ // the line-offset table for very large files where we deliberately avoid
28
+ // caching the whole content.
29
+ // ---------------------------------------------------------------------------
30
+
31
+ const CACHE_MAX_ENTRIES = 64;
32
+ const CACHE_LARGE_FILE_BYTES = 4 * 1024 * 1024; // 4MB — switch to range-read mode
33
+ const _cache = new Map(); // key -> { text|null, lineOffsets, size, mtimeMs }
34
+
35
+ function _cacheKey(absPath, size, mtimeMs) {
36
+ return `${absPath}|${size}|${mtimeMs}`;
37
+ }
38
+
39
+ function _cacheTouch(key, value) {
40
+ if (_cache.has(key)) _cache.delete(key);
41
+ _cache.set(key, value);
42
+ while (_cache.size > CACHE_MAX_ENTRIES) {
43
+ const oldest = _cache.keys().next().value;
44
+ _cache.delete(oldest);
45
+ }
46
+ }
47
+
48
+ // ---------------------------------------------------------------------------
49
+ // Repository singleton — lazy and tolerant of a missing/empty DB.
50
+ // ---------------------------------------------------------------------------
51
+
52
+ let _repo = null;
53
+ function _getRepo() {
54
+ if (_repo === null) {
55
+ try { _repo = new CodebaseRepository(DB_PATHS.codebase); }
56
+ catch { _repo = false; }
57
+ }
58
+ return _repo || null;
59
+ }
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // Path resolution helpers
63
+ // ---------------------------------------------------------------------------
64
+
65
+ function _resolvePath(p, projectRoot) {
66
+ if (!p) throw new Error('path is required');
67
+ if (path.isAbsolute(p)) return p;
68
+ return path.resolve(projectRoot || process.cwd(), p);
69
+ }
70
+
71
+ function _projectRelative(absPath, projectRoot) {
72
+ const root = projectRoot || process.cwd();
73
+ const rel = path.relative(root, absPath);
74
+ // Inside the project root → use relative form (matches vectors.file_path).
75
+ // Outside → keep the absolute path (no chunks will match anyway).
76
+ return rel.startsWith('..') || path.isAbsolute(rel) ? absPath : rel;
77
+ }
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // Line-offset table — index of byte offsets where each line starts.
81
+ // lineOffsets[i] = byte offset of start of line (i+1). lineOffsets has
82
+ // totalLines entries. To slice lines [a..b] (1-based, inclusive):
83
+ // start = lineOffsets[a-1]
84
+ // end = (b < totalLines) ? lineOffsets[b] : buffer.length
85
+ // ---------------------------------------------------------------------------
86
+
87
+ function _buildLineOffsets(buf) {
88
+ const offsets = [0];
89
+ for (let i = 0; i < buf.length; i++) {
90
+ if (buf[i] === 0x0A /* \n */) offsets.push(i + 1);
91
+ }
92
+ // If the file ends without a trailing newline, the final offset isn't a
93
+ // line start — strip it. The line count is offsets.length.
94
+ if (offsets[offsets.length - 1] === buf.length) offsets.pop();
95
+ return offsets;
96
+ }
97
+
98
+ // ---------------------------------------------------------------------------
99
+ // Read implementation
100
+ // ---------------------------------------------------------------------------
101
+
102
+ async function _readFromDisk(absPath) {
103
+ // statSync is OK here — async stat costs more than the sync syscall.
104
+ let stat;
105
+ try { stat = statSync(absPath); }
106
+ catch (err) { throw new Error(`stat failed: ${err.code || err.message}`); }
107
+ if (!stat.isFile()) throw new Error('not a regular file');
108
+
109
+ const key = _cacheKey(absPath, stat.size, stat.mtimeMs);
110
+ const cached = _cache.get(key);
111
+ if (cached) {
112
+ _cacheTouch(key, cached);
113
+ return { ...cached, key, size: stat.size, mtimeMs: stat.mtimeMs };
114
+ }
115
+
116
+ // For large files we still read fully on first call (Node fs has no
117
+ // efficient line-aware streaming primitive), but subsequent line-range
118
+ // reads will reuse the cached offset table without re-reading from disk.
119
+ // If the file is enormous and the caller asked for a range, we read just
120
+ // enough bytes to cover the range — see _sliceLines().
121
+ const buf = await fs.readFile(absPath);
122
+ const lineOffsets = _buildLineOffsets(buf);
123
+ const isLarge = stat.size > CACHE_LARGE_FILE_BYTES;
124
+ const entry = {
125
+ text: isLarge ? null : buf.toString('utf8'),
126
+ bufferRef: isLarge ? null : null, // not held — text is the canonical form
127
+ lineOffsets,
128
+ size: stat.size,
129
+ mtimeMs: stat.mtimeMs,
130
+ };
131
+ _cacheTouch(key, entry);
132
+
133
+ // Even for large files we return the freshly-read text on this call so the
134
+ // first read is correct; subsequent calls can stream by line range.
135
+ return {
136
+ text: entry.text ?? buf.toString('utf8'),
137
+ lineOffsets,
138
+ size: stat.size,
139
+ mtimeMs: stat.mtimeMs,
140
+ key,
141
+ };
142
+ }
143
+
144
+ function _normalizeLineRange(lineOffsets, startLine, endLine) {
145
+ // Returns the exact disk bytes for lines [startLine..endLine] (1-based,
146
+ // inclusive). Trailing newlines that are present on disk are preserved —
147
+ // we are a filesystem-grounded reader and must never silently mutate
148
+ // returned content.
149
+ const total = lineOffsets.length;
150
+ if (total === 0) return { startLine: 1, endLine: 0, totalLines: 0, startByte: 0, endByte: 0 };
151
+ const s = Math.max(1, startLine | 0);
152
+ const eRaw = (endLine == null) ? total : (endLine | 0);
153
+ const e = Math.min(total, Math.max(s, eRaw));
154
+ const startByte = lineOffsets[s - 1];
155
+ return { startLine: s, endLine: e, totalLines: total, startByte, endByte: null };
156
+ }
157
+
158
+ function _sliceLines(text, lineOffsets, startLine, endLine) {
159
+ const range = _normalizeLineRange(lineOffsets, startLine, endLine);
160
+ if (range.totalLines === 0) return { text: '', startLine: 1, endLine: 0, totalLines: 0 };
161
+ const endByte = (range.endLine < range.totalLines)
162
+ ? lineOffsets[range.endLine]
163
+ : Buffer.byteLength(text, 'utf8');
164
+ // Slice on bytes via Buffer view to handle multibyte UTF-8 safely.
165
+ const buf = Buffer.from(text, 'utf8');
166
+ const slice = buf.subarray(range.startByte, endByte).toString('utf8');
167
+ return { text: slice, startLine: range.startLine, endLine: range.endLine, totalLines: range.totalLines };
168
+ }
169
+
170
+ async function _sliceLinesFromDisk(absPath, lineOffsets, fileSize, startLine, endLine) {
171
+ const range = _normalizeLineRange(lineOffsets, startLine, endLine);
172
+ if (range.totalLines === 0) return { text: '', startLine: 1, endLine: 0, totalLines: 0 };
173
+ const endByte = (range.endLine < range.totalLines) ? lineOffsets[range.endLine] : fileSize;
174
+ const len = Math.max(0, endByte - range.startByte);
175
+ const handle = await fs.open(absPath, 'r');
176
+ try {
177
+ const buf = Buffer.allocUnsafe(len);
178
+ await handle.read(buf, 0, len, range.startByte);
179
+ return {
180
+ text: buf.toString('utf8'),
181
+ startLine: range.startLine,
182
+ endLine: range.endLine,
183
+ totalLines: range.totalLines,
184
+ };
185
+ } finally {
186
+ await handle.close();
187
+ }
188
+ }
189
+
190
+ // ---------------------------------------------------------------------------
191
+ // Index metadata enrichment
192
+ // ---------------------------------------------------------------------------
193
+
194
+ function _parseMeta(rawMeta) {
195
+ if (!rawMeta) return null;
196
+ if (typeof rawMeta === 'object') return rawMeta;
197
+ try { return JSON.parse(rawMeta); } catch { return null; }
198
+ }
199
+
200
+ function _metaSymbol(meta) {
201
+ return meta.name ?? meta.symbol ?? null;
202
+ }
203
+
204
+ function _metaType(meta) {
205
+ return meta.type ?? meta.chunk_type ?? null;
206
+ }
207
+
208
+ function _metaStartLine(meta) {
209
+ return typeof meta.startLine === 'number' ? meta.startLine
210
+ : typeof meta.line_start === 'number' ? meta.line_start
211
+ : null;
212
+ }
213
+
214
+ function _metaEndLine(meta) {
215
+ return typeof meta.endLine === 'number' ? meta.endLine
216
+ : typeof meta.line_end === 'number' ? meta.line_end
217
+ : null;
218
+ }
219
+
220
+ function _attachIndexMetadata(filePathRel) {
221
+ const repo = _getRepo();
222
+ if (!repo) return { indexed: false, chunks: [], language: null };
223
+
224
+ const rows = repo.getChunksByFilePath(filePathRel);
225
+ if (rows.length === 0) return { indexed: false, chunks: [], language: null };
226
+
227
+ const chunks = [];
228
+ let language = null;
229
+ for (const row of rows) {
230
+ const meta = _parseMeta(row.metadata) || {};
231
+ if (!language && meta.language) language = meta.language;
232
+ chunks.push({
233
+ id: row.id,
234
+ symbol: _metaSymbol(meta),
235
+ type: _metaType(meta),
236
+ startLine: _metaStartLine(meta),
237
+ endLine: _metaEndLine(meta),
238
+ signature: meta.signature ?? null,
239
+ });
240
+ }
241
+ // Order by startLine for predictable consumption.
242
+ chunks.sort((a, b) => (a.startLine ?? 0) - (b.startLine ?? 0));
243
+ return { indexed: true, chunks, language };
244
+ }
245
+
246
+ // ---------------------------------------------------------------------------
247
+ // Public API — single read
248
+ // ---------------------------------------------------------------------------
249
+
250
+ /**
251
+ * Read one file (or one line range of one file).
252
+ *
253
+ * @param {Object} req
254
+ * @param {string} req.path - File path. Absolute or relative to projectRoot.
255
+ * @param {number} [req.startLine] - 1-based, inclusive
256
+ * @param {number} [req.endLine] - 1-based, inclusive
257
+ * @param {string} [req.projectRoot] - default: process.cwd()
258
+ * @param {boolean} [req.includeMetadata=true] - attach index chunks/language
259
+ * @returns {Promise<Object>}
260
+ */
261
+ export async function readFile(req) {
262
+ const t0 = performance.now();
263
+ const projectRoot = req.projectRoot || process.cwd();
264
+ const absPath = _resolvePath(req.path, projectRoot);
265
+ const relForIndex = _projectRelative(absPath, projectRoot);
266
+
267
+ let disk;
268
+ try {
269
+ disk = await _readFromDisk(absPath);
270
+ } catch (err) {
271
+ return {
272
+ file: req.path,
273
+ ok: false,
274
+ error: err.message || String(err),
275
+ exact: true,
276
+ indexed: false,
277
+ };
278
+ }
279
+
280
+ const wantsRange = req.startLine != null || req.endLine != null;
281
+ const fullText = !wantsRange && disk.text == null
282
+ ? await fs.readFile(absPath, 'utf8')
283
+ : disk.text;
284
+ const sliced = wantsRange
285
+ ? (disk.text == null
286
+ ? await _sliceLinesFromDisk(absPath, disk.lineOffsets, disk.size, req.startLine ?? 1, req.endLine ?? null)
287
+ : _sliceLines(disk.text, disk.lineOffsets, req.startLine ?? 1, req.endLine ?? null))
288
+ : { text: fullText, startLine: 1, endLine: disk.lineOffsets.length, totalLines: disk.lineOffsets.length };
289
+
290
+ let language = null;
291
+ let chunks = [];
292
+ let indexed = false;
293
+ if (req.includeMetadata !== false) {
294
+ const meta = _attachIndexMetadata(relForIndex);
295
+ indexed = meta.indexed;
296
+ chunks = meta.chunks;
297
+ language = meta.language;
298
+ }
299
+
300
+ // If a line range was requested, narrow attached chunks to the overlap.
301
+ if (wantsRange && chunks.length) {
302
+ chunks = chunks.filter(c =>
303
+ c.startLine == null || c.endLine == null
304
+ ? true
305
+ : (c.endLine >= sliced.startLine && c.startLine <= sliced.endLine),
306
+ );
307
+ }
308
+
309
+ return {
310
+ file: req.path,
311
+ absolutePath: absPath,
312
+ ok: true,
313
+ exact: true,
314
+ indexed,
315
+ language,
316
+ totalLines: sliced.totalLines,
317
+ bytes: disk.size,
318
+ mtimeMs: disk.mtimeMs,
319
+ range: wantsRange ? { startLine: sliced.startLine, endLine: sliced.endLine } : null,
320
+ text: sliced.text,
321
+ chunks,
322
+ timings: { totalMs: +(performance.now() - t0).toFixed(2) },
323
+ };
324
+ }
325
+
326
+ /**
327
+ * Batch read — up to 20 files in parallel. Per-file failures are returned
328
+ * inline; the batch never throws unless `files` is malformed.
329
+ *
330
+ * @param {Object[]} files - [{ path, startLine?, endLine? }, ...]
331
+ * @param {Object} [opts]
332
+ * @param {string} [opts.projectRoot]
333
+ * @param {boolean} [opts.includeMetadata=true]
334
+ * @returns {Promise<{files: Object[], totalMs: number}>}
335
+ */
336
+ export async function readFiles(files, opts = {}) {
337
+ if (!Array.isArray(files) || files.length === 0) {
338
+ return { files: [], totalMs: 0 };
339
+ }
340
+ if (files.length > 20) {
341
+ throw new Error(`read accepts at most 20 files; got ${files.length}`);
342
+ }
343
+ const t0 = performance.now();
344
+ const results = await Promise.all(files.map(f => readFile({
345
+ path: f.path,
346
+ startLine: f.startLine,
347
+ endLine: f.endLine,
348
+ projectRoot: opts.projectRoot,
349
+ includeMetadata: opts.includeMetadata !== false,
350
+ })));
351
+ return { files: results, totalMs: +(performance.now() - t0).toFixed(2) };
352
+ }
353
+
354
+ // ---------------------------------------------------------------------------
355
+ // Formatting
356
+ // ---------------------------------------------------------------------------
357
+
358
+ function _formatAgent(result) {
359
+ if (!result.ok) {
360
+ return `### ${result.file}\n[error] ${result.error}\n`;
361
+ }
362
+ const fence = result.language ? '```' + result.language : '```';
363
+ const range = result.range
364
+ ? ` (lines ${result.range.startLine}-${result.range.endLine} of ${result.totalLines})`
365
+ : ` (${result.totalLines} lines)`;
366
+ let symbolHint = '';
367
+ if (result.chunks && result.chunks.length > 0 && result.chunks.length <= 12) {
368
+ const names = result.chunks
369
+ .map(c => c.symbol ? `${c.type || 'symbol'}:${c.symbol}` : null)
370
+ .filter(Boolean);
371
+ if (names.length) symbolHint = `\nsymbols: ${names.join(', ')}`;
372
+ }
373
+ return `### ${result.file}${range}${symbolHint}\n${fence}\n${result.text}\n\`\`\`\n`;
374
+ }
375
+
376
+ export function formatReadResults(results, format = 'agent') {
377
+ if (format === 'json') {
378
+ return JSON.stringify({ files: results.files, totalMs: results.totalMs }, null, 2);
379
+ }
380
+ if (format === 'raw') {
381
+ return results.files.map(r => r.ok ? r.text : `[error: ${r.file}] ${r.error}`).join('\n\n');
382
+ }
383
+ return results.files.map(_formatAgent).join('\n');
384
+ }
385
+
386
+ // ---------------------------------------------------------------------------
387
+ // CLI handler
388
+ // Usage:
389
+ // sweet-search read path/to/file.ts
390
+ // sweet-search read path/to/file.ts --lines 45-92
391
+ // sweet-search read a.ts b.ts c.ts
392
+ // sweet-search read path/to/file.ts --json
393
+ // sweet-search read path/to/file.ts --raw
394
+ // ---------------------------------------------------------------------------
395
+
396
+ function _parseLineRange(spec) {
397
+ // Accepts "45-92", "45:92", "45" (single line), or "45-" (open end).
398
+ if (!spec) return [null, null];
399
+ const m = String(spec).match(/^(\d+)(?:[-:](\d+)?)?$/);
400
+ if (!m) throw new Error(`invalid --lines spec: ${spec}`);
401
+ const start = +m[1];
402
+ const end = m[2] != null ? +m[2] : (spec.includes('-') || spec.includes(':') ? null : start);
403
+ return [start, end];
404
+ }
405
+
406
+ function _parseArgs(args) {
407
+ const positional = [];
408
+ let format = 'agent';
409
+ let startLine = null;
410
+ let endLine = null;
411
+ let includeMetadata = true;
412
+ for (let i = 0; i < args.length; i++) {
413
+ const a = args[i];
414
+ if (a === '--json') format = 'json';
415
+ else if (a === '--raw') format = 'raw';
416
+ else if (a === '--agent') format = 'agent';
417
+ else if (a === '--no-metadata') includeMetadata = false;
418
+ else if (a === '--lines') {
419
+ const [s, e] = _parseLineRange(args[++i]);
420
+ startLine = s; endLine = e;
421
+ } else if (a === '--help' || a === '-h') {
422
+ return { help: true };
423
+ } else if (a.startsWith('--')) {
424
+ // Unknown flag — surface clearly rather than silently swallowing.
425
+ throw new Error(`unknown flag: ${a}`);
426
+ } else {
427
+ positional.push(a);
428
+ }
429
+ }
430
+ return { positional, format, startLine, endLine, includeMetadata };
431
+ }
432
+
433
+ function _printHelp() {
434
+ process.stdout.write([
435
+ 'sweet-search read — filesystem-grounded file reader',
436
+ '',
437
+ 'Usage:',
438
+ ' sweet-search read <path> [...path] Read 1-20 files',
439
+ ' sweet-search read <path> --lines 45-92',
440
+ '',
441
+ 'Options:',
442
+ ' --lines <a-b> 1-based inclusive range. Use "45-" for open end, "45" for one line.',
443
+ ' --json Emit JSON (machine-readable)',
444
+ ' --raw Emit raw text only (no fences/headers)',
445
+ ' --agent Default — markdown fenced block + symbol hints',
446
+ ' --no-metadata Skip index metadata attachment',
447
+ '',
448
+ ].join('\n'));
449
+ }
450
+
451
+ export async function handleReadCli(args) {
452
+ let parsed;
453
+ try { parsed = _parseArgs(args); }
454
+ catch (err) { process.stderr.write(`[sweet-search read] ${err.message}\n`); process.exit(2); }
455
+ if (parsed.help || !parsed.positional || parsed.positional.length === 0) {
456
+ _printHelp();
457
+ process.exit(parsed.help ? 0 : 2);
458
+ }
459
+ const wantsRange = parsed.startLine != null || parsed.endLine != null;
460
+ if (wantsRange && parsed.positional.length > 1) {
461
+ process.stderr.write('[sweet-search read] --lines requires exactly one path\n');
462
+ process.exit(2);
463
+ }
464
+ const files = parsed.positional.map(p => ({
465
+ path: p,
466
+ startLine: wantsRange ? parsed.startLine : undefined,
467
+ endLine: wantsRange ? parsed.endLine : undefined,
468
+ }));
469
+ const out = await readFiles(files, { includeMetadata: parsed.includeMetadata });
470
+ process.stdout.write(formatReadResults(out, parsed.format));
471
+ if (parsed.format !== 'json') process.stdout.write('\n');
472
+ // Non-zero exit if every file failed (so shell pipelines see the error).
473
+ const allFailed = out.files.length > 0 && out.files.every(f => !f.ok);
474
+ process.exit(allFailed ? 1 : 0);
475
+ }
476
+
477
+ // Test-only export — clears caches between unit tests.
478
+ export function __resetReadCachesForTests() {
479
+ _cache.clear();
480
+ _repo = null;
481
+ }
@@ -220,7 +220,7 @@ export async function startServer() {
220
220
 
221
221
  // Agent mode: context packaging (ColGrep agent format)
222
222
  const rawFormat = url.searchParams.get('format');
223
- const AGENT_FORMATS = new Set(['agent', 'agent_preview', 'agent_full']);
223
+ const AGENT_FORMATS = new Set(['agent', 'agent_preview', 'agent_full', 'agent_full_xl']);
224
224
  const agentFormat = AGENT_FORMATS.has(rawFormat) ? rawFormat : undefined;
225
225
  const tokenBudget = url.searchParams.has('budget')
226
226
  ? parseInt(url.searchParams.get('budget'), 10)
@@ -257,8 +257,13 @@ export async function startServer() {
257
257
  ...(agentFormat && { format: agentFormat, tokenBudget }),
258
258
  });
259
259
 
260
- // Agent mode: return the packaged response directly as JSON
260
+ // Agent mode: return the packaged response directly as JSON.
261
+ // Inject server-side repo identity so callers can prove which repo
262
+ // produced these results (defends against multi-repo bench reusing
263
+ // a stale daemon — see eval/agent-read-workflows/run-bench.js).
261
264
  if (searchResult.format === 'agent') {
265
+ searchResult.serverProjectRoot = searcher.projectRoot || null;
266
+ searchResult.serverPid = process.pid;
262
267
  res.writeHead(200, { 'Content-Type': 'application/json' });
263
268
  res.end(JSON.stringify(searchResult));
264
269
  } else {
@@ -286,12 +291,22 @@ export async function startServer() {
286
291
  }
287
292
  } else if (req.method === 'GET' && reqUrl === '/health') {
288
293
  const status = initError ? 'failed' : (serverReady ? 'ready' : 'starting');
294
+ // Repo identity — harness uses these to verify the daemon serves the
295
+ // expected repo, not a leftover from a previous benchmark subprocess.
296
+ // We resolve the path so symlinks/relative differences are normalised.
297
+ const rawProjectRoot = searcher.projectRoot || null;
298
+ let resolvedProjectRoot = null;
299
+ try { if (rawProjectRoot) resolvedProjectRoot = (await import('path')).default.resolve(rawProjectRoot); } catch { /* */ }
289
300
  res.writeHead(200, { 'Content-Type': 'application/json' });
290
301
  res.end(JSON.stringify({
291
302
  status,
292
303
  warm: serverReady,
293
304
  pid: process.pid,
294
305
  uptimeSec: Math.round(process.uptime()),
306
+ projectRoot: rawProjectRoot,
307
+ resolvedProjectRoot,
308
+ codebaseDbPath: searcher.codebaseDbPath || null,
309
+ initialized: serverReady && !initError,
295
310
  init: {
296
311
  startedAt: new Date(initStartedAt).toISOString(),
297
312
  elapsedMs: initTimeMs ?? (Date.now() - initStartedAt),
@@ -481,6 +496,137 @@ export async function queryServer(query, options = {}) {
481
496
  });
482
497
  }
483
498
 
499
+ /**
500
+ * Fetch /health from the running daemon. Returns the parsed body, or null if
501
+ * the daemon is unreachable / replies non-200.
502
+ *
503
+ * Use this (not isServerRunning alone) when you need repo identity to make a
504
+ * decision — e.g., the agent-bench harness must know which repo the daemon
505
+ * is currently serving so it can refuse cross-repo contamination.
506
+ */
507
+ export async function getServerHealth({ timeoutMs = 1000 } = {}) {
508
+ try {
509
+ const http = await import('http');
510
+ return await new Promise((resolve) => {
511
+ const req = http.get(`http://localhost:${SEARCH_SERVER_PORT}/health`, (res) => {
512
+ let payload = '';
513
+ res.on('data', chunk => { payload += chunk; });
514
+ res.on('end', () => {
515
+ if (res.statusCode !== 200) { resolve(null); return; }
516
+ try { resolve(JSON.parse(payload)); }
517
+ catch { resolve(null); }
518
+ });
519
+ });
520
+ req.on('error', () => resolve(null));
521
+ req.setTimeout(timeoutMs, () => { req.destroy(); resolve(null); });
522
+ });
523
+ } catch {
524
+ return null;
525
+ }
526
+ }
527
+
528
+ /**
529
+ * Send /stop to the running daemon (Unix-socket only — TCP is forbidden).
530
+ * Returns true if the request reached the daemon (200 reply or connection
531
+ * closed by the dying server). Caller is expected to poll until the socket
532
+ * disappears or wait a short cool-down.
533
+ */
534
+ export async function stopServer({ timeoutMs = 5000 } = {}) {
535
+ try {
536
+ const http = await import('http');
537
+ return await new Promise((resolve) => {
538
+ const req = http.request({
539
+ socketPath: SEARCH_SERVER_SOCKET, path: '/stop', method: 'GET',
540
+ }, (res) => {
541
+ res.on('data', () => {});
542
+ res.on('end', () => resolve(true));
543
+ });
544
+ // The server may close the socket abruptly as it exits before sending an
545
+ // end-of-response. Treat that as success too.
546
+ req.on('error', (err) => {
547
+ const msg = (err && err.code) || '';
548
+ if (msg === 'ECONNRESET' || msg === 'EPIPE' || msg === 'ENOENT') resolve(true);
549
+ else resolve(false);
550
+ });
551
+ req.setTimeout(timeoutMs, () => { req.destroy(); resolve(false); });
552
+ req.end();
553
+ });
554
+ } catch {
555
+ return false;
556
+ }
557
+ }
558
+
559
+ /**
560
+ * Best-effort wait for the daemon to exit. Returns true once /health stops
561
+ * answering (within timeoutMs); false otherwise.
562
+ */
563
+ export async function waitForServerExit({ timeoutMs = 8000, intervalMs = 200 } = {}) {
564
+ const deadline = Date.now() + timeoutMs;
565
+ while (Date.now() < deadline) {
566
+ if (!(await isServerRunning())) return true;
567
+ await new Promise(r => setTimeout(r, intervalMs));
568
+ }
569
+ return false;
570
+ }
571
+
572
+ /**
573
+ * Ensure the warm daemon serves the requested projectRoot. If a daemon is
574
+ * already running with a different projectRoot, stop it first, then re-spawn.
575
+ *
576
+ * Returns:
577
+ * { ok: true, health, action: 'reused'|'spawned'|'restarted' }
578
+ * { ok: false, reason, health? }
579
+ *
580
+ * Used by the agent-bench harness to fail closed against cross-repo
581
+ * contamination (see eval/agent-read-workflows/run-bench.js warmup phase).
582
+ */
583
+ export async function ensureDaemonForProjectRoot(expectedProjectRoot, {
584
+ timeoutMs = 60000, intervalMs = 500,
585
+ } = {}) {
586
+ const path = (await import('path')).default;
587
+ const expected = path.resolve(expectedProjectRoot);
588
+ let action = null;
589
+
590
+ let health = await getServerHealth();
591
+ if (health && health.resolvedProjectRoot && health.resolvedProjectRoot === expected) {
592
+ return { ok: true, health, action: 'reused' };
593
+ }
594
+
595
+ if (health && health.resolvedProjectRoot && health.resolvedProjectRoot !== expected) {
596
+ // Wrong-repo daemon. Stop it and respawn with the correct env.
597
+ await stopServer();
598
+ const exited = await waitForServerExit();
599
+ if (!exited) {
600
+ return { ok: false, reason: 'previous-daemon-failed-to-exit', health };
601
+ }
602
+ action = 'restarted';
603
+ } else {
604
+ action = 'spawned';
605
+ }
606
+
607
+ // Spawn detached daemon. autoSpawnServer inherits env, so the caller must
608
+ // already have SWEET_SEARCH_PROJECT_ROOT set to expectedProjectRoot.
609
+ if (process.env.SWEET_SEARCH_PROJECT_ROOT) {
610
+ const envResolved = path.resolve(process.env.SWEET_SEARCH_PROJECT_ROOT);
611
+ if (envResolved !== expected) {
612
+ return {
613
+ ok: false,
614
+ reason: `caller env SWEET_SEARCH_PROJECT_ROOT=${envResolved} differs from expected=${expected}`,
615
+ };
616
+ }
617
+ }
618
+ await autoSpawnServer();
619
+ const deadline = Date.now() + timeoutMs;
620
+ while (Date.now() < deadline) {
621
+ health = await getServerHealth();
622
+ if (health && health.resolvedProjectRoot === expected && (health.warm === true || health.status === 'ready')) {
623
+ return { ok: true, health, action };
624
+ }
625
+ await new Promise(r => setTimeout(r, intervalMs));
626
+ }
627
+ return { ok: false, reason: 'daemon-did-not-become-ready-with-expected-root', health };
628
+ }
629
+
484
630
  export async function isServerRunning() {
485
631
  try {
486
632
  const http = await import('http');
@@ -519,13 +665,15 @@ export async function autoSpawnServer() {
519
665
  const { fileURLToPath } = await import('url');
520
666
  const path = await import('path');
521
667
 
522
- // Dynamic import to get the sweet-search.js file path
668
+ // Spawn the real CLI entrypoint with --serve. sweet-search.js is a library
669
+ // module and does not process argv, so launching it directly never starts
670
+ // the daemon.
523
671
  const __filename = fileURLToPath(import.meta.url);
524
- const sweetSearchPath = path.join(path.dirname(__filename), 'sweet-search.js');
672
+ const sweetSearchPath = path.join(path.dirname(__filename), '..', 'cli.js');
525
673
 
526
674
  console.error('[AutoStart] Starting warm server in background...');
527
675
 
528
- // Spawn detached process — run sweet-search.js with --serve
676
+ // Spawn detached process — run sweet-search with --serve
529
677
  const child = spawn(process.execPath, [sweetSearchPath, '--serve'], {
530
678
  detached: true,
531
679
  stdio: 'ignore',