sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +36 -9
  2. package/core/cli.js +41 -3
  3. package/core/embedding/embedding-local-model.js +106 -10
  4. package/core/embedding/embedding-service.js +59 -1
  5. package/core/embedding/model-client.mjs +257 -0
  6. package/core/embedding/model-server.mjs +217 -0
  7. package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
  8. package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
  9. package/core/incremental-indexing/application/operator-cli.mjs +14 -5
  10. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
  11. package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
  12. package/core/incremental-indexing/application/reconciler.mjs +87 -15
  13. package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
  14. package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
  15. package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
  16. package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
  17. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
  18. package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
  19. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
  20. package/core/indexing/artifact-builder.js +1 -1
  21. package/core/indexing/dedup/dedup-phase.js +36 -17
  22. package/core/indexing/dedup/exemplar-selector.js +5 -0
  23. package/core/indexing/index-codebase-v21.js +37 -14
  24. package/core/indexing/index-maintainer.mjs +337 -6
  25. package/core/indexing/indexer-ann.js +27 -434
  26. package/core/indexing/indexer-build.js +30 -14
  27. package/core/indexing/indexer-manifest.js +0 -3
  28. package/core/indexing/indexer-phases.js +101 -25
  29. package/core/indexing/maintainer-launcher.mjs +22 -0
  30. package/core/indexing/maintainer-watcher.mjs +397 -0
  31. package/core/indexing/os-priority.mjs +160 -0
  32. package/core/indexing/rss-budget.mjs +425 -0
  33. package/core/indexing/streaming-vectors.js +450 -0
  34. package/core/infrastructure/config/platform.js +14 -10
  35. package/core/infrastructure/onnx-session-utils.js +37 -0
  36. package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
  37. package/core/ranking/late-interaction-index.js +58 -7
  38. package/core/search/daemon-registry.js +199 -0
  39. package/core/search/search-read-semantic.js +9 -3
  40. package/core/search/search-semantic.js +6 -29
  41. package/core/search/search-server.js +527 -27
  42. package/core/search/session-daemon-prewarm.mjs +110 -1
  43. package/core/search/sweet-search.js +0 -38
  44. package/core/vector-store/binary-hnsw-index.js +692 -78
  45. package/core/vector-store/index.js +1 -4
  46. package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
  47. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
  48. package/eval/agent-read-workflows/bin/ss-read +2 -0
  49. package/mcp/tool-handlers.js +1 -2
  50. package/package.json +11 -8
  51. package/scripts/uninstall.js +2 -0
  52. package/core/vector-store/hnsw-index.js +0 -751
@@ -9,11 +9,19 @@
9
9
  */
10
10
 
11
11
  import fs from 'fs/promises';
12
- import { existsSync } from 'fs';
12
+ import { existsSync, realpathSync } from 'fs';
13
+ import path from 'node:path';
13
14
  import { LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
14
15
  import { clearCache } from '../embedding/embedding-cache.js';
15
16
  import { launchMaintainer } from '../indexing/maintainer-launcher.mjs';
16
- import { projectSocketPath, projectPidFile, tcpPort } from './server-identity.js';
17
+ import { projectSocketPath, projectPidFile, tcpPort, resolveProjectRoot } from './server-identity.js';
18
+ import {
19
+ upsertSelf as registryUpsertSelf,
20
+ touchSelf as registryTouchSelf,
21
+ removeSelf as registryRemoveSelf,
22
+ pruneAndList as registryPruneAndList,
23
+ selectEvictionTargets as registrySelectEvictionTargets,
24
+ } from './daemon-registry.js';
17
25
 
18
26
  // =============================================================================
19
27
  // Server constants
@@ -27,6 +35,313 @@ export const SEARCH_SERVER_PORT = 9876;
27
35
  export const SEARCH_SERVER_TIMEOUT_MS = 30_000;
28
36
  export const SEARCH_SERVER_MAX_URL_LENGTH = 16_384;
29
37
  export const SEARCH_SERVER_MAX_QUERY_LENGTH = 2_000;
38
+ export const SEARCH_SERVER_MAX_READ_PATH_LENGTH = 8_192;
39
+
40
+ function canonicalProjectRoot(root) {
41
+ const resolved = path.resolve(root || process.cwd());
42
+ try {
43
+ return realpathSync.native(resolved);
44
+ } catch {
45
+ return resolved;
46
+ }
47
+ }
48
+
49
+ function parseFiniteNumber(value, name) {
50
+ if (value == null || value === '') return undefined;
51
+ const n = Number(value);
52
+ if (!Number.isFinite(n)) throw new Error(`${name} must be a finite number`);
53
+ return n;
54
+ }
55
+
56
+ function parseInteger(value, name) {
57
+ if (value == null || value === '') return undefined;
58
+ const n = Number.parseInt(value, 10);
59
+ if (!Number.isInteger(n)) throw new Error(`${name} must be an integer`);
60
+ return n;
61
+ }
62
+
63
+ function reusableLateInteractionIndex(searcher) {
64
+ const idx = searcher?.lateInteractionIndex || null;
65
+ if (!idx) return null;
66
+ if (idx.modelMismatch === true) return null;
67
+ if (!idx.documents || idx.documents.size === 0) return null;
68
+ return idx;
69
+ }
70
+
71
+ function readSemanticError(status, message, extra = {}) {
72
+ return {
73
+ status,
74
+ contentType: 'application/json',
75
+ body: JSON.stringify({ error: message, ...extra }),
76
+ };
77
+ }
78
+
79
+ export async function buildReadSemanticDaemonResponse(reqUrl, {
80
+ isUnixSocket = false,
81
+ serverReady = false,
82
+ initError = null,
83
+ searcher = null,
84
+ readSemanticFn = null,
85
+ formatReadSemanticResultFn = null,
86
+ } = {}) {
87
+ if (!isUnixSocket) {
88
+ return readSemanticError(403, '/read-semantic is only available via Unix socket');
89
+ }
90
+ if (!serverReady) {
91
+ const reason = initError?.message
92
+ ? `Server initialization failed: ${initError.message}`
93
+ : 'Server is starting, please retry';
94
+ return readSemanticError(503, reason, { status: initError ? 'failed' : 'starting' });
95
+ }
96
+ if (reqUrl.length > SEARCH_SERVER_MAX_URL_LENGTH) {
97
+ return readSemanticError(414, `Request URL too long (max ${SEARCH_SERVER_MAX_URL_LENGTH} chars)`);
98
+ }
99
+
100
+ let url;
101
+ try {
102
+ url = new URL(reqUrl, `http://localhost:${SEARCH_SERVER_PORT}`);
103
+ } catch {
104
+ return readSemanticError(400, 'Invalid request URL');
105
+ }
106
+
107
+ const file = url.searchParams.get('path') || url.searchParams.get('file') || '';
108
+ const query = url.searchParams.get('q') || url.searchParams.get('query') || '';
109
+ const requestedRoot = url.searchParams.get('projectRoot') || '';
110
+ const format = url.searchParams.get('format') === 'json' ? 'json' : 'agent';
111
+
112
+ if (!file) return readSemanticError(400, 'Missing path parameter ?path=');
113
+ if (file.length > SEARCH_SERVER_MAX_READ_PATH_LENGTH) {
114
+ return readSemanticError(413, `Path too long (max ${SEARCH_SERVER_MAX_READ_PATH_LENGTH} chars)`);
115
+ }
116
+ if (!query) return readSemanticError(400, 'Missing query parameter ?q=');
117
+ if (query.length > SEARCH_SERVER_MAX_QUERY_LENGTH) {
118
+ return readSemanticError(413, `Query too long (max ${SEARCH_SERVER_MAX_QUERY_LENGTH} chars)`);
119
+ }
120
+ if (!requestedRoot) return readSemanticError(400, 'Missing projectRoot parameter');
121
+
122
+ const serverRoot = canonicalProjectRoot(searcher?.projectRoot || process.cwd());
123
+ const clientRoot = canonicalProjectRoot(requestedRoot);
124
+ if (serverRoot !== clientRoot) {
125
+ return readSemanticError(409, 'Daemon project root mismatch', {
126
+ serverProjectRoot: serverRoot,
127
+ requestedProjectRoot: clientRoot,
128
+ });
129
+ }
130
+
131
+ let topK; let threshold; let contextLines; let maxChars; let maxTokens;
132
+ try {
133
+ topK = parseInteger(url.searchParams.get('k') ?? url.searchParams.get('topK'), 'topK');
134
+ threshold = parseFiniteNumber(url.searchParams.get('threshold'), 'threshold');
135
+ contextLines = parseInteger(url.searchParams.get('contextLines') ?? url.searchParams.get('context'), 'contextLines');
136
+ maxChars = parseInteger(url.searchParams.get('maxChars'), 'maxChars');
137
+ maxTokens = parseInteger(url.searchParams.get('maxTokens'), 'maxTokens');
138
+ } catch (err) {
139
+ return readSemanticError(400, err.message);
140
+ }
141
+ const verbose = url.searchParams.get('verbose') === 'true';
142
+
143
+ try {
144
+ let readSemantic = readSemanticFn;
145
+ let formatReadSemanticResult = formatReadSemanticResultFn;
146
+ if (!readSemantic || !formatReadSemanticResult) {
147
+ const mod = await import('./search-read-semantic.js');
148
+ readSemantic = readSemantic || mod.readSemantic;
149
+ formatReadSemanticResult = formatReadSemanticResult || mod.formatReadSemanticResult;
150
+ }
151
+ const result = await readSemantic({
152
+ path: file,
153
+ query,
154
+ projectRoot: serverRoot,
155
+ topK,
156
+ threshold,
157
+ contextLines,
158
+ maxChars,
159
+ maxTokens,
160
+ verbose,
161
+ _lateInteractionIndex: reusableLateInteractionIndex(searcher),
162
+ });
163
+ const body = formatReadSemanticResult(result, format);
164
+ return {
165
+ status: result?.ok === false ? 404 : 200,
166
+ contentType: format === 'json' ? 'application/json' : 'text/plain; charset=utf-8',
167
+ body: format === 'json' ? body : `${body}\n`,
168
+ };
169
+ } catch (err) {
170
+ return readSemanticError(500, err.message || String(err));
171
+ }
172
+ }
173
+
174
+ // Daemon handler for `trace` — structural callers/callees/impact served from the
175
+ // warm daemon so the native client pays no node startup + the code-graph.db is
176
+ // page-cache warm. Byte-identical to the in-process path (search-trace.js
177
+ // handleTraceCli): SAME traceSymbol + formatStructuralContext, with the banner
178
+ // emitted client-side (native binary), exactly like /read-semantic.
179
+ export async function buildTraceDaemonResponse(reqUrl, {
180
+ isUnixSocket = false,
181
+ serverReady = false,
182
+ initError = null,
183
+ searcher = null,
184
+ } = {}) {
185
+ if (!isUnixSocket) {
186
+ return readSemanticError(403, '/trace is only available via Unix socket');
187
+ }
188
+ if (!serverReady) {
189
+ const reason = initError?.message
190
+ ? `Server initialization failed: ${initError.message}`
191
+ : 'Server is starting, please retry';
192
+ return readSemanticError(503, reason, { status: initError ? 'failed' : 'starting' });
193
+ }
194
+ if (reqUrl.length > SEARCH_SERVER_MAX_URL_LENGTH) {
195
+ return readSemanticError(414, `Request URL too long (max ${SEARCH_SERVER_MAX_URL_LENGTH} chars)`);
196
+ }
197
+ let url;
198
+ try {
199
+ url = new URL(reqUrl, `http://localhost:${SEARCH_SERVER_PORT}`);
200
+ } catch {
201
+ return readSemanticError(400, 'Invalid request URL');
202
+ }
203
+
204
+ const symbol = url.searchParams.get('symbol') || url.searchParams.get('q') || '';
205
+ const requestedRoot = url.searchParams.get('projectRoot') || '';
206
+ const json = url.searchParams.get('format') === 'json';
207
+
208
+ if (!symbol) return readSemanticError(400, 'Missing symbol parameter ?symbol=');
209
+ if (symbol.length > SEARCH_SERVER_MAX_QUERY_LENGTH) {
210
+ return readSemanticError(413, `Symbol too long (max ${SEARCH_SERVER_MAX_QUERY_LENGTH} chars)`);
211
+ }
212
+ if (!requestedRoot) return readSemanticError(400, 'Missing projectRoot parameter');
213
+
214
+ const serverRoot = canonicalProjectRoot(searcher?.projectRoot || process.cwd());
215
+ const clientRoot = canonicalProjectRoot(requestedRoot);
216
+ if (serverRoot !== clientRoot) {
217
+ return readSemanticError(409, 'Daemon project root mismatch', {
218
+ serverProjectRoot: serverRoot,
219
+ requestedProjectRoot: clientRoot,
220
+ });
221
+ }
222
+
223
+ const filePath = url.searchParams.get('file') || undefined;
224
+ const queryHint = url.searchParams.get('hint') || '';
225
+ let maxDepth; let tokenBudget;
226
+ try {
227
+ maxDepth = parseInteger(url.searchParams.get('depth'), 'depth');
228
+ tokenBudget = parseInteger(url.searchParams.get('budget'), 'budget');
229
+ } catch (err) {
230
+ return readSemanticError(400, err.message);
231
+ }
232
+
233
+ try {
234
+ const { traceSymbol, formatStructuralContext } = await import('./search-trace.js');
235
+ // Mirror search-trace.js parseArgs defaults exactly (maxDepth 3, adaptive
236
+ // budget when null) so the result is identical to the in-process call.
237
+ const result = traceSymbol(symbol, {
238
+ projectRoot: serverRoot,
239
+ filePath,
240
+ queryHint,
241
+ maxDepth: maxDepth ?? 3,
242
+ tokenBudget: tokenBudget ?? null,
243
+ });
244
+ // handleTraceCli writes `console.log(json ? JSON : formatStructuralContext)`,
245
+ // i.e. body + exactly one trailing newline in BOTH modes.
246
+ const body = json ? JSON.stringify(result, null, 2) : formatStructuralContext(result);
247
+ return {
248
+ status: 200,
249
+ contentType: json ? 'application/json' : 'text/plain; charset=utf-8',
250
+ body: `${body}\n`,
251
+ };
252
+ } catch (err) {
253
+ return readSemanticError(500, err.message || String(err));
254
+ }
255
+ }
256
+
257
+ // Daemon handler for `read` — filesystem-grounded multi-file reader served from
258
+ // the warm daemon (no per-call node startup). Byte-identical to the in-process
259
+ // path (search-read.js handleReadCli): SAME readFiles + formatReadResults.
260
+ // readFiles statSync's every call (stat-keyed cache absPath|size|mtimeMs), so
261
+ // read-your-writes freshness is preserved across the daemon boundary.
262
+ export async function buildReadDaemonResponse(reqUrl, {
263
+ isUnixSocket = false,
264
+ serverReady = false,
265
+ initError = null,
266
+ searcher = null,
267
+ } = {}) {
268
+ if (!isUnixSocket) {
269
+ return readSemanticError(403, '/read is only available via Unix socket');
270
+ }
271
+ // NOTE: /read deliberately does NOT gate on serverReady. `read` returns exact
272
+ // file bytes from node:fs (search-read.js) and never touches the searcher /
273
+ // indexes — the only index reference is `searcher?.projectRoot` in the root
274
+ // check below, already null-tolerant. Gating it on init readiness made `read`
275
+ // fail (503 → native client exit) during the cold-start window for no reason
276
+ // (Codex finding). read-semantic/trace DO need indexes and keep their gate
277
+ // (with a bounded readiness-wait added at the dispatch site).
278
+ if (reqUrl.length > SEARCH_SERVER_MAX_URL_LENGTH) {
279
+ return readSemanticError(414, `Request URL too long (max ${SEARCH_SERVER_MAX_URL_LENGTH} chars)`);
280
+ }
281
+ let url;
282
+ try {
283
+ url = new URL(reqUrl, `http://localhost:${SEARCH_SERVER_PORT}`);
284
+ } catch {
285
+ return readSemanticError(400, 'Invalid request URL');
286
+ }
287
+
288
+ const paths = url.searchParams.getAll('path');
289
+ const requestedRoot = url.searchParams.get('projectRoot') || '';
290
+ const fmtParam = url.searchParams.get('format') || 'agent';
291
+ const format = (fmtParam === 'json' || fmtParam === 'raw') ? fmtParam : 'agent';
292
+
293
+ if (paths.length === 0) return readSemanticError(400, 'Missing path parameter ?path=');
294
+ if (paths.length > 20) return readSemanticError(413, 'read accepts at most 20 files');
295
+ for (const p of paths) {
296
+ if (p.length > SEARCH_SERVER_MAX_READ_PATH_LENGTH) {
297
+ return readSemanticError(413, `Path too long (max ${SEARCH_SERVER_MAX_READ_PATH_LENGTH} chars)`);
298
+ }
299
+ }
300
+ if (!requestedRoot) return readSemanticError(400, 'Missing projectRoot parameter');
301
+
302
+ const serverRoot = canonicalProjectRoot(searcher?.projectRoot || process.cwd());
303
+ const clientRoot = canonicalProjectRoot(requestedRoot);
304
+ if (serverRoot !== clientRoot) {
305
+ return readSemanticError(409, 'Daemon project root mismatch', {
306
+ serverProjectRoot: serverRoot,
307
+ requestedProjectRoot: clientRoot,
308
+ });
309
+ }
310
+
311
+ let startLine; let endLine;
312
+ try {
313
+ startLine = parseInteger(url.searchParams.get('startLine'), 'startLine');
314
+ endLine = parseInteger(url.searchParams.get('endLine'), 'endLine');
315
+ } catch (err) {
316
+ return readSemanticError(400, err.message);
317
+ }
318
+ const includeMetadata = url.searchParams.get('metadata') !== 'false';
319
+ const wantsRange = startLine != null || endLine != null;
320
+ if (wantsRange && paths.length > 1) {
321
+ return readSemanticError(400, '--lines requires exactly one path');
322
+ }
323
+ const files = paths.map(p => ({
324
+ path: p,
325
+ startLine: wantsRange ? startLine : undefined,
326
+ endLine: wantsRange ? endLine : undefined,
327
+ }));
328
+
329
+ try {
330
+ const { readFiles, formatReadResults } = await import('./search-read.js');
331
+ const out = await readFiles(files, { projectRoot: serverRoot, includeMetadata });
332
+ const body = formatReadResults(out, format);
333
+ // handleReadCli appends '\n' for non-json output (the extra process.stdout
334
+ // .write('\n')); json gets no trailing newline. Mirror exactly.
335
+ const allFailed = out.files.length > 0 && out.files.every(f => !f.ok);
336
+ return {
337
+ status: allFailed ? 404 : 200,
338
+ contentType: format === 'json' ? 'application/json' : 'text/plain; charset=utf-8',
339
+ body: format === 'json' ? body : `${body}\n`,
340
+ };
341
+ } catch (err) {
342
+ return readSemanticError(500, err.message || String(err));
343
+ }
344
+ }
30
345
 
31
346
  function buildTextSearchResponse(results, stats, totalTime, { summary = false, mid = false, color = true, decorate = true } = {}) {
32
347
  const routeMode = stats?.routing?.mode || 'auto';
@@ -189,6 +504,22 @@ export async function startServer() {
189
504
  let initError = null;
190
505
  let initTimeMs = null;
191
506
 
507
+ // Bounded readiness wait for the INDEX-DEPENDENT endpoints (read-semantic /
508
+ // trace). The Unix socket is bound before searcher.init() finishes (below),
509
+ // and the native client treats "connectable" as "ready" and fires its request
510
+ // immediately — so a cold-start request used to hit a 503 and the client
511
+ // exited (Codex finding). Waiting here (up to ~10s, or until a terminal
512
+ // initError) turns that race into a short latency hit instead of a hard
513
+ // failure. /read does NOT use this (it needs no indexes); /health is instant.
514
+ const READINESS_WAIT_MS = 10_000;
515
+ const waitForServerReady = async (budgetMs = READINESS_WAIT_MS) => {
516
+ if (serverReady || initError) return;
517
+ const deadline = Date.now() + budgetMs;
518
+ while (!serverReady && !initError && Date.now() < deadline) {
519
+ await new Promise((resolve) => setTimeout(resolve, 50));
520
+ }
521
+ };
522
+
192
523
  // Track request count for periodic cache clearing in long-running sessions.
193
524
  let requestCount = 0;
194
525
  const CACHE_CLEAR_INTERVAL = 1000; // Clear caches every 1000 requests
@@ -196,13 +527,91 @@ export async function startServer() {
196
527
  let tcpServer;
197
528
  let unixServer;
198
529
 
530
+ // ---------------------------------------------------------------------------
531
+ // Daemon lifecycle (footprint bound). A warm per-repo daemon holds ~1–2 GB
532
+ // (HNSW + vocab + float sidecar) and historically NEVER self-terminated, so
533
+ // resident daemons accumulated unbounded across repos/sessions. Two bounds:
534
+ // (1) idle-TTL eviction (default ON): self-stop after no QUERY traffic for
535
+ // SWEET_SEARCH_DAEMON_IDLE_TTL_MS. Tracked by WALL CLOCK, not
536
+ // requestCount, because an idle daemon never increments requestCount.
537
+ // (2) resident-daemon LRU cap (default OFF; SWEET_SEARCH_MAX_DAEMONS): a
538
+ // hard ceiling on concurrently-resident daemons via a shared registry.
539
+ // NOTE: lastActivityMs is set ONLY by real query routes (/search,
540
+ // /read-semantic) — never by /health or /stop, so liveness probes (prewarm,
541
+ // isServerRunning) can never keep an idle daemon alive.
542
+ // ---------------------------------------------------------------------------
543
+ let lastActivityMs = Date.now();
544
+ let idleTimer = null;
545
+ let registryTimer = null;
546
+ let shuttingDown = false;
547
+
548
+ // Close a server, letting in-flight requests finish but never letting an idle
549
+ // keep-alive socket block exit (bounded grace, then resolve regardless).
550
+ const closeServerGracefully = (srv, graceMs = 3000) => new Promise((resolve) => {
551
+ if (!srv) { resolve(); return; }
552
+ let settled = false;
553
+ const done = () => { if (!settled) { settled = true; resolve(); } };
554
+ try {
555
+ srv.close(done);
556
+ // Drop idle keep-alive connections immediately; active requests still drain.
557
+ srv.closeIdleConnections?.();
558
+ } catch { done(); return; }
559
+ const t = setTimeout(done, graceMs);
560
+ if (t.unref) t.unref();
561
+ });
562
+
563
+ // Single idempotent teardown shared by SIGINT, /stop, and the idle timer.
564
+ const gracefulShutdown = async (reason) => {
565
+ if (shuttingDown) return;
566
+ shuttingDown = true;
567
+ if (idleTimer) { clearInterval(idleTimer); idleTimer = null; }
568
+ if (registryTimer) { clearInterval(registryTimer); registryTimer = null; }
569
+ if (capEnabled) {
570
+ try { await registryRemoveSelf(process.pid); } catch (err) {
571
+ if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] registry remove: ${err?.message || err}\n`);
572
+ }
573
+ }
574
+ await closeServerGracefully(tcpServer);
575
+ await closeServerGracefully(unixServer);
576
+ try { searcher.close(); } catch (err) {
577
+ if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] searcher close: ${err?.message || err}\n`);
578
+ }
579
+ try { await fs.unlink(pidFile); } catch (err) {
580
+ if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] ${err?.message || err}\n`);
581
+ }
582
+ try { await fs.unlink(socketPath); } catch (err) {
583
+ if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] ${err?.message || err}\n`);
584
+ }
585
+ console.log(`[Server] Shutdown (${reason}).`);
586
+ process.exit(0);
587
+ };
588
+
589
+ // Resident-daemon cap (default OFF). Read once at startup to gate registry
590
+ // participation; the numeric cap is re-read per enforcement tick.
591
+ const capEnabled = Number(process.env.SWEET_SEARCH_MAX_DAEMONS ?? 0) > 0;
592
+
593
+ // Enforce the LRU cap: prune dead registry entries, then /stop the
594
+ // least-recently-active peers that are NOT self until we're within the cap.
595
+ const enforceDaemonCap = async () => {
596
+ const cap = Number(process.env.SWEET_SEARCH_MAX_DAEMONS ?? 0);
597
+ if (!(cap > 0)) return;
598
+ let live;
599
+ try { live = await registryPruneAndList(); } catch { return; }
600
+ if (!Array.isArray(live) || live.length <= cap) return;
601
+ const targets = registrySelectEvictionTargets(live, process.pid, live.length - cap);
602
+ for (const t of targets) {
603
+ try { await sendStopToSocket(t.socketPath); } catch (err) {
604
+ if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] cap evict ${t?.socketPath}: ${err?.message || err}\n`);
605
+ }
606
+ }
607
+ };
608
+
199
609
  // Shared request handler for both TCP and Unix socket
200
610
  const handleRequest = async (req, res) => {
201
611
  const reqUrl = req.url || '';
202
612
 
203
613
  const componentState = {
204
614
  graphIndex: Boolean(searcher.hasGraphIndex),
205
- hnswIndex: Boolean(searcher.hasHnswIndex),
206
615
  binaryHnswIndex: Boolean(searcher.hasBinaryHnswIndex),
207
616
  lateInteractionIndex: Boolean(searcher.hasLateInteractionIndex && searcher.useLateInteraction),
208
617
  embeddingService: serverReady,
@@ -222,6 +631,8 @@ export async function startServer() {
222
631
  }
223
632
 
224
633
  if (req.method === 'GET' && reqUrl.startsWith('/search?')) {
634
+ // Real query traffic — reset the idle-TTL clock (NOT /health or /stop).
635
+ lastActivityMs = Date.now();
225
636
  if (!serverReady) {
226
637
  const reason = initError?.message
227
638
  ? `Server initialization failed: ${initError.message}`
@@ -344,6 +755,45 @@ export async function startServer() {
344
755
  res.writeHead(500, { 'Content-Type': 'application/json' });
345
756
  res.end(JSON.stringify({ error: err.message }));
346
757
  }
758
+ } else if (req.method === 'GET' && reqUrl.startsWith('/read-semantic?')) {
759
+ // Real query traffic — reset the idle-TTL clock (NOT /health or /stop).
760
+ lastActivityMs = Date.now();
761
+ // read-semantic needs the indexes — wait out the cold-start init race
762
+ // (bounded) so a freshly-spawned daemon doesn't 503 the first request.
763
+ await waitForServerReady();
764
+ const response = await buildReadSemanticDaemonResponse(reqUrl, {
765
+ isUnixSocket: !req.socket.remoteAddress,
766
+ serverReady,
767
+ initError,
768
+ searcher,
769
+ });
770
+ res.writeHead(response.status, { 'Content-Type': response.contentType });
771
+ res.end(response.body);
772
+ } else if (req.method === 'GET' && reqUrl.startsWith('/trace?')) {
773
+ // Real query traffic — reset the idle-TTL clock (NOT /health or /stop).
774
+ lastActivityMs = Date.now();
775
+ // trace needs the code-graph — wait out the cold-start init race (bounded)
776
+ // so a freshly-spawned daemon doesn't 503 the first request.
777
+ await waitForServerReady();
778
+ const response = await buildTraceDaemonResponse(reqUrl, {
779
+ isUnixSocket: !req.socket.remoteAddress,
780
+ serverReady,
781
+ initError,
782
+ searcher,
783
+ });
784
+ res.writeHead(response.status, { 'Content-Type': response.contentType });
785
+ res.end(response.body);
786
+ } else if (req.method === 'GET' && reqUrl.startsWith('/read?')) {
787
+ // Real query traffic — reset the idle-TTL clock (NOT /health or /stop).
788
+ lastActivityMs = Date.now();
789
+ const response = await buildReadDaemonResponse(reqUrl, {
790
+ isUnixSocket: !req.socket.remoteAddress,
791
+ serverReady,
792
+ initError,
793
+ searcher,
794
+ });
795
+ res.writeHead(response.status, { 'Content-Type': response.contentType });
796
+ res.end(response.body);
347
797
  } else if (req.method === 'GET' && reqUrl === '/health') {
348
798
  const status = initError ? 'failed' : (serverReady ? 'ready' : 'starting');
349
799
  // Repo identity — harness uses these to verify the daemon serves the
@@ -379,15 +829,7 @@ export async function startServer() {
379
829
  }
380
830
  res.writeHead(200, { 'Content-Type': 'text/plain' });
381
831
  res.end('Shutting down...\n');
382
- if (tcpServer) tcpServer.close();
383
- if (unixServer) unixServer.close();
384
- try { await fs.unlink(pidFile); } catch (err) {
385
- if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] ${err?.message || err}\n`);
386
- }
387
- try { await fs.unlink(socketPath); } catch (err) {
388
- if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] ${err?.message || err}\n`);
389
- }
390
- process.exit(0);
832
+ await gracefulShutdown('stop');
391
833
  } else {
392
834
  res.writeHead(404, { 'Content-Type': 'text/plain' });
393
835
  res.end('Not found. Use GET /search?q=<query>&mode=auto&k=10\n');
@@ -474,23 +916,53 @@ export async function startServer() {
474
916
  if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] maintainer launch: ${err?.message || err}\n`);
475
917
  }
476
918
 
477
- // Alias for graceful shutdown
478
- const server = tcpServer;
919
+ // Handle graceful shutdown (shared idempotent teardown).
920
+ process.on('SIGINT', () => { gracefulShutdown('sigint'); });
479
921
 
480
- // Handle graceful shutdown
481
- process.on('SIGINT', async () => {
482
- console.log('\n[Server] Shutting down...');
483
- if (tcpServer) tcpServer.close();
484
- unixServer.close();
485
- searcher.close();
486
- try { await fs.unlink(pidFile); } catch (err) {
487
- if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] ${err?.message || err}\n`);
488
- }
489
- try { await fs.unlink(socketPath); } catch (err) {
490
- if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] ${err?.message || err}\n`);
922
+ // (1) Idle-TTL eviction — default ON. Unref'd so it never keeps the event
923
+ // loop alive on its own. The TTL is read every tick so tests/operators can
924
+ // tune it live; 0 disables. Self-stops once no QUERY route has been hit for
925
+ // longer than the TTL — the actively-used repo's daemon keeps resetting
926
+ // lastActivityMs and is therefore never idle-evicted while it is being
927
+ // queried. (The separate LRU cap below is the only path that can stop an
928
+ // active peer, and only via a newly-started peer within one registry-refresh
929
+ // interval; see daemon-registry.js selectEvictionTargets.)
930
+ idleTimer = setInterval(() => {
931
+ const ttl = Number(process.env.SWEET_SEARCH_DAEMON_IDLE_TTL_MS ?? 1_200_000);
932
+ if (ttl > 0 && Date.now() - lastActivityMs > ttl) {
933
+ gracefulShutdown('idle-ttl');
491
934
  }
492
- process.exit(0);
493
- });
935
+ }, Number(process.env.SWEET_SEARCH_DAEMON_IDLE_CHECK_MS ?? 60_000));
936
+ if (idleTimer.unref) idleTimer.unref();
937
+
938
+ // (2) Resident-daemon LRU cap — default OFF. Only when SWEET_SEARCH_MAX_DAEMONS
939
+ // is opted into do we touch the shared registry at all: register self, then
940
+ // refresh our real query activity + prune dead peers + enforce the cap on a
941
+ // coarse unref'd timer. The maintainer is never enumerated (it never registers).
942
+ if (capEnabled) {
943
+ const entry = {
944
+ pid: process.pid,
945
+ projectRoot: resolveProjectRoot(),
946
+ socketPath,
947
+ pidFile,
948
+ startedAt: Date.now(),
949
+ lastActivityMs,
950
+ };
951
+ registryUpsertSelf(entry)
952
+ .then(() => enforceDaemonCap())
953
+ .catch((err) => {
954
+ if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] registry init: ${err?.message || err}\n`);
955
+ });
956
+
957
+ registryTimer = setInterval(() => {
958
+ registryTouchSelf(process.pid, lastActivityMs)
959
+ .then(() => enforceDaemonCap())
960
+ .catch((err) => {
961
+ if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] registry tick: ${err?.message || err}\n`);
962
+ });
963
+ }, Number(process.env.SWEET_SEARCH_DAEMON_REGISTRY_REFRESH_MS ?? 45_000));
964
+ if (registryTimer.unref) registryTimer.unref();
965
+ }
494
966
  }
495
967
 
496
968
  // =============================================================================
@@ -626,6 +1098,34 @@ export async function stopServer({ timeoutMs = 5000 } = {}) {
626
1098
  }
627
1099
  }
628
1100
 
1101
+ /**
1102
+ * Send /stop to a daemon on an EXPLICIT socket (variant of stopServer, which
1103
+ * always targets this process's own socket). Used by the resident-daemon LRU
1104
+ * cap to evict a least-recently-active PEER. Returns true if the request
1105
+ * reached the daemon (200, or the connection dropped as it exited).
1106
+ */
1107
+ export async function sendStopToSocket(socketPath, { timeoutMs = 5000 } = {}) {
1108
+ if (!socketPath) return false;
1109
+ try {
1110
+ const http = await import('http');
1111
+ return await new Promise((resolve) => {
1112
+ const req = http.request({ socketPath, path: '/stop', method: 'GET' }, (res) => {
1113
+ res.on('data', () => {});
1114
+ res.on('end', () => resolve(true));
1115
+ });
1116
+ req.on('error', (err) => {
1117
+ const msg = (err && err.code) || '';
1118
+ if (msg === 'ECONNRESET' || msg === 'EPIPE' || msg === 'ENOENT') resolve(true);
1119
+ else resolve(false);
1120
+ });
1121
+ req.setTimeout(timeoutMs, () => { req.destroy(); resolve(false); });
1122
+ req.end();
1123
+ });
1124
+ } catch {
1125
+ return false;
1126
+ }
1127
+ }
1128
+
629
1129
  /**
630
1130
  * Best-effort wait for the daemon to exit. Returns true once /health stops
631
1131
  * answering at all (within timeoutMs); false otherwise. This intentionally