grepmax 0.17.4 → 0.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -66,6 +66,7 @@ gmax impact handleAuth # Dependents + affected tests
66
66
  gmax similar handleAuth # Find similar code patterns
67
67
  gmax dead handleAuth # Unused-symbol check via call graph (DEAD / PUBLIC EXPORT / LIVE)
68
68
  gmax context "auth system" --budget 4000 # Token-budgeted topic summary
69
+ gmax context src/lib/auth.ts --budget 4000 # Deterministic file/path context
69
70
  ```
70
71
 
71
72
  ### Project Commands
@@ -117,7 +118,7 @@ Plugins auto-update when you run `npm install -g grepmax@latest` — no need to
117
118
 
118
119
  | Tool | Description |
119
120
  | --- | --- |
120
- | `semantic_search` | Search by meaning. 16+ params: query, limit, role, language, scope (project/all), project filtering, etc. |
121
+ | `semantic_search` | Search by meaning. Pointer mode matches CLI `--agent` output; `detail=code/full` returns snippets. |
121
122
  | `code_skeleton` | File structure with bodies collapsed (~4x fewer tokens). |
122
123
  | `trace_calls` | Call graph: importers, callers (multi-hop), callees with file:line. |
123
124
  | `extract_symbol` | Complete function/class body by symbol name. |
@@ -76,6 +76,7 @@ const daemon_client_1 = require("../utils/daemon-client");
76
76
  const index_config_1 = require("../index/index-config");
77
77
  const log_rotate_1 = require("../utils/log-rotate");
78
78
  const pool_1 = require("../workers/pool");
79
+ const daemon_launcher_1 = require("../utils/daemon-launcher");
79
80
  const node_child_process_1 = require("node:child_process");
80
81
  const http = __importStar(require("node:http"));
81
82
  // 30 min was too aggressive — every shutdown is a chance for races, FSEvents
@@ -93,6 +94,23 @@ const IDLE_TIMEOUT_MS = (() => {
93
94
  return parsed; // <= 0 disables the idle check below
94
95
  })();
95
96
  const HEARTBEAT_INTERVAL_MS = 60 * 1000;
97
+ // Self-recycle. Under continuous load (a busy monorepo) the idle timeout never
98
+ // fires, so a long-lived daemon never gets a fresh start. The 24h age trigger
99
+ // is the primary hygiene mechanism. The RSS trigger is a backstop for a genuine
100
+ // runaway only: the daemon's memory is dominated by LanceDB working set, which
101
+ // legitimately spikes to ~1.7 GB during compaction (then frees) on a ~250k-chunk
102
+ // store, so the ceiling sits well above that to avoid recycling on normal
103
+ // spikes. The maintenance-active guard in maybeRecycle() also defers during
104
+ // compaction. Either ceiling <= 0 disables that trigger.
105
+ const envNum = (name, fallback) => {
106
+ const raw = process.env[name];
107
+ if (raw == null)
108
+ return fallback;
109
+ const parsed = Number(raw);
110
+ return Number.isFinite(parsed) ? parsed : fallback;
111
+ };
112
+ const MAX_LIFETIME_MS = envNum("GMAX_DAEMON_MAX_LIFETIME_MS", 24 * 60 * 60 * 1000);
113
+ const RSS_WATERMARK_MB = envNum("GMAX_DAEMON_RSS_WATERMARK_MB", 2560);
96
114
  // Watcher health windows used for FSEvents auto-recovery.
97
115
  const FSEVENTS_RECOVERY_INTERVAL_MS = 60 * 60 * 1000; // try recovery hourly
98
116
  const FSEVENTS_HEALTH_WINDOW_MS = 5 * 60 * 1000; // 5 min of quiet = "healthy"
@@ -112,6 +130,11 @@ class Daemon {
112
130
  this.heartbeatTick = 0;
113
131
  this.mlxRecoveryInFlight = false;
114
132
  this.shuttingDown = false;
133
+ this.recycling = false;
134
+ // PIDs flagged as orphan workers on the previous sweep. A worker must look
135
+ // orphaned twice in a row before we kill it, so a worker the pool forked
136
+ // between our process snapshot and its array update is never killed by a race.
137
+ this.suspectedOrphanWorkers = new Set();
115
138
  this.pendingOps = new Set();
116
139
  this.watcherFailCount = new Map();
117
140
  this.pollIntervals = new Map();
@@ -298,6 +321,8 @@ class Daemon {
298
321
  this.heartbeatTick++;
299
322
  if (this.heartbeatTick % 5 === 0) {
300
323
  void this.checkMlxHealth();
324
+ this.sweepOrphanWorkers();
325
+ this.maybeRecycle();
301
326
  }
302
327
  }, HEARTBEAT_INTERVAL_MS);
303
328
  // 10. Idle timeout (skip when disabled via env)
@@ -1303,6 +1328,77 @@ class Daemon {
1303
1328
  * the PID file, which becomes stale when a daemon is orphaned through
1304
1329
  * the lock-compromise path.
1305
1330
  */
1331
+ /**
1332
+ * Gracefully hand off to a fresh daemon when this one has grown too old or
1333
+ * too large. Only fires when quiet — no active compaction and no in-flight
1334
+ * project operations — so a recycle never interrupts indexing work. The
1335
+ * successor re-runs catchup on startup, so nothing is lost.
1336
+ */
1337
+ maybeRecycle() {
1338
+ var _a;
1339
+ if (this.shuttingDown || this.recycling)
1340
+ return;
1341
+ const ageMs = process.uptime() * 1000;
1342
+ const rssMb = process.memoryUsage().rss / (1024 * 1024);
1343
+ const ageExceeded = MAX_LIFETIME_MS > 0 && ageMs > MAX_LIFETIME_MS;
1344
+ const rssExceeded = RSS_WATERMARK_MB > 0 && rssMb > RSS_WATERMARK_MB;
1345
+ if (!ageExceeded && !rssExceeded)
1346
+ return;
1347
+ // Defer while busy; we'll re-check next tick.
1348
+ if ((_a = this.vectorDb) === null || _a === void 0 ? void 0 : _a.isMaintenanceActive())
1349
+ return;
1350
+ if (this.projectLocks.size > 0)
1351
+ return;
1352
+ const reason = ageExceeded
1353
+ ? `age ${(ageMs / 3600000).toFixed(1)}h > ${(MAX_LIFETIME_MS / 3600000).toFixed(1)}h`
1354
+ : `rss ${Math.round(rssMb)}MB > ${RSS_WATERMARK_MB}MB`;
1355
+ console.log(`[daemon] Recycling (${reason}) — handing off to a fresh daemon`);
1356
+ this.recycling = true;
1357
+ void this.shutdown({ relaunch: true }).finally(() => process.exit(0));
1358
+ }
1359
+ /**
1360
+ * Kill gmax-worker processes that are children of THIS daemon but the worker
1361
+ * pool no longer tracks — strays left behind if a kill ever failed silently.
1362
+ * Filters by parent PID so a per-project `gmax watch`'s own workers are never
1363
+ * touched. Requires a worker to look orphaned on two consecutive sweeps so a
1364
+ * just-forked worker can't be killed by a snapshot race.
1365
+ */
1366
+ sweepOrphanWorkers() {
1367
+ if (this.shuttingDown || !(0, pool_1.isWorkerPoolInitialized)())
1368
+ return;
1369
+ const tracked = new Set((0, pool_1.getWorkerPool)().getWorkerPids());
1370
+ const workerPids = new Set(this.findProcessesByTitle("gmax-worker"));
1371
+ const ourChildren = this.findChildPids();
1372
+ const orphans = ourChildren.filter((pid) => workerPids.has(pid) && !tracked.has(pid));
1373
+ const confirmed = orphans.filter((pid) => this.suspectedOrphanWorkers.has(pid));
1374
+ this.suspectedOrphanWorkers = new Set(orphans);
1375
+ for (const pid of confirmed) {
1376
+ console.log(`[daemon] Killing orphan worker PID:${pid} (untracked by pool)`);
1377
+ try {
1378
+ process.kill(pid, "SIGKILL");
1379
+ }
1380
+ catch (_a) { }
1381
+ this.suspectedOrphanWorkers.delete(pid);
1382
+ }
1383
+ }
1384
+ /** Child PIDs of this process (workers, MLX, llama-server). */
1385
+ findChildPids() {
1386
+ try {
1387
+ const out = (0, node_child_process_1.execSync)(`pgrep -P ${process.pid}`, {
1388
+ timeout: 5000,
1389
+ encoding: "utf-8",
1390
+ }).trim();
1391
+ if (!out)
1392
+ return [];
1393
+ return out
1394
+ .split("\n")
1395
+ .map((s) => parseInt(s.trim(), 10))
1396
+ .filter((n) => Number.isFinite(n) && n > 0);
1397
+ }
1398
+ catch (_a) {
1399
+ return [];
1400
+ }
1401
+ }
1306
1402
  killStaleProcesses() {
1307
1403
  return __awaiter(this, void 0, void 0, function* () {
1308
1404
  // 1. Check for other daemon processes
@@ -1359,7 +1455,7 @@ class Daemon {
1359
1455
  }
1360
1456
  }
1361
1457
  shutdown() {
1362
- return __awaiter(this, void 0, void 0, function* () {
1458
+ return __awaiter(this, arguments, void 0, function* (opts = {}) {
1363
1459
  var _a, _b, _c, _d;
1364
1460
  if (this.shuttingDown)
1365
1461
  return;
@@ -1449,6 +1545,14 @@ class Daemon {
1449
1545
  yield ((_d = this.vectorDb) === null || _d === void 0 ? void 0 : _d.close());
1450
1546
  }
1451
1547
  catch (_l) { }
1548
+ // Hand off to a successor only after every resource is released and the
1549
+ // liveness markers (socket/pid/lock) are already gone — so the fresh
1550
+ // daemon's singleton check sees a clean slate and opens LanceDB/LMDB
1551
+ // without contending with this exiting process.
1552
+ if (opts.relaunch) {
1553
+ const pid = (0, daemon_launcher_1.spawnDaemon)();
1554
+ console.log(`[daemon] Spawned successor daemon${pid ? ` (PID: ${pid})` : " (spawn failed)"}`);
1555
+ }
1452
1556
  console.log("[daemon] Shutdown complete");
1453
1557
  });
1454
1558
  }
@@ -78,6 +78,11 @@ class ColbertModel {
78
78
  intraOpNumThreads: ONNX_THREADS,
79
79
  interOpNumThreads: 1,
80
80
  graphOptimizationLevel: "all",
81
+ // ColBERT runs locally on every batch (MLX only covers dense), so its
82
+ // arena is the worker's main memory hog. Variable-length inputs mean the
83
+ // arena/mem-pattern never amortize — disable both to keep RSS bounded.
84
+ enableCpuMemArena: false,
85
+ enableMemPattern: false,
81
86
  };
82
87
  log(`Worker: Loading ColBERT ONNX session from ${modelPath}`);
83
88
  this.session = yield ort.InferenceSession.create(modelPath, sessionOptions);
@@ -86,6 +86,13 @@ class GraniteModel {
86
86
  intraOpNumThreads: ONNX_THREADS,
87
87
  interOpNumThreads: 1,
88
88
  graphOptimizationLevel: "all",
89
+ // Embedding inputs are variable-length, so the CPU memory arena and
90
+ // memory-pattern optimizer don't get reused across batches — they just
91
+ // retain the largest tensor a worker ever saw (a long/minified file can
92
+ // pin ~2 GB for the worker's lifetime). Disabling both bounds native
93
+ // memory at the cost of a small per-inference allocation.
94
+ enableCpuMemArena: false,
95
+ enableMemPattern: false,
89
96
  };
90
97
  this.session = yield ort.InferenceSession.create(modelPath, sessionOptions);
91
98
  });
@@ -137,6 +144,10 @@ class GraniteModel {
137
144
  runBatch(texts) {
138
145
  return __awaiter(this, void 0, void 0, function* () {
139
146
  var _a, _b, _c;
147
+ // Lazy-load: in the normal path MLX/GPU handles dense embedding and this
148
+ // ONNX model is never used, so we avoid paying its resident cost in every
149
+ // worker. load() is idempotent and only runs on the first fallback batch.
150
+ yield this.load();
140
151
  if (!this.session || !this.tokenizer)
141
152
  return [];
142
153
  const encoded = yield this.tokenizer(texts, {
@@ -125,7 +125,10 @@ class WorkerOrchestrator {
125
125
  }
126
126
  ensureReady() {
127
127
  return __awaiter(this, void 0, void 0, function* () {
128
- if (this.granite.isReady() && this.colbert.isReady()) {
128
+ // Granite (dense ONNX) is loaded lazily on first fallback use inside
129
+ // granite.runBatch — in the normal MLX/GPU path it's never loaded, so we
130
+ // don't gate readiness on it or pay its resident cost per worker.
131
+ if (this.colbert.isReady()) {
129
132
  return;
130
133
  }
131
134
  if (this.initPromise)
@@ -136,7 +139,6 @@ class WorkerOrchestrator {
136
139
  yield Promise.all([
137
140
  this.chunker.init(),
138
141
  this.skeletonizer.init(),
139
- this.granite.load(),
140
142
  this.colbert.load(),
141
143
  ]);
142
144
  stopTimer();
@@ -82,6 +82,26 @@ const TASK_TIMEOUT_MS = (() => {
82
82
  return fromEnv;
83
83
  return 120000;
84
84
  })();
85
+ // Absolute per-task ceiling. Unlike TASK_TIMEOUT_MS (a no-progress timeout that
86
+ // every heartbeat resets), this is wall-clock from dispatch and is NEVER reset
87
+ // by heartbeats. It bounds a task that keeps emitting progress but never
88
+ // finishes — e.g. a worker wedged on a hung MLX request that still services its
89
+ // heartbeat timer. A single processFile (one file → batches of 16 chunks) is
90
+ // seconds even for huge files, so 5 min is generous headroom, never a real cap.
91
+ const HARD_DEADLINE_MS = (() => {
92
+ var _a;
93
+ const fromEnv = Number.parseInt((_a = process.env.GMAX_WORKER_HARD_DEADLINE_MS) !== null && _a !== void 0 ? _a : "", 10);
94
+ if (Number.isFinite(fromEnv) && fromEnv > 0)
95
+ return fromEnv;
96
+ return 300000;
97
+ })();
98
+ // Backstop for the leak that motivated all of the above: a worker left
99
+ // busy=true with no live timer to rescue it (a dropped IPC result, or a
100
+ // timeout-kill whose SIGKILL threw and left the process alive but de-listed).
101
+ // The reaper skips busy workers, so without this such a worker is immortal —
102
+ // we saw six survive 4-5 days. Set above HARD_DEADLINE so the per-task timer
103
+ // normally wins and this only fires for the no-timer case.
104
+ const STUCK_BUSY_MS = HARD_DEADLINE_MS + 60000;
85
105
  const FORCE_KILL_GRACE_MS = 200;
86
106
  // Longer grace for idle reaps: the worker isn't urgently in the way, and a
87
107
  // graceful SIGTERM lets ONNX free ~1GB of model memory. But if SIGTERM is
@@ -97,6 +117,11 @@ class ProcessWorker {
97
117
  this.busy = false;
98
118
  this.pendingTaskId = null;
99
119
  this.lastBusyTime = Date.now();
120
+ // Wall-clock at which this worker became busy with its current task; null
121
+ // when idle. Used by the reaper to detect workers wedged in busy=true.
122
+ this.busySince = null;
123
+ // Most recent RSS (bytes) the worker reported, for memory-based recycling.
124
+ this.lastRssBytes = 0;
100
125
  // Set when the pool has cleaned up after this worker (via exit or error
101
126
  // event). Guards against handleWorkerExit running twice when both events
102
127
  // fire for the same crash.
@@ -122,6 +147,23 @@ function resolveProcessWorker() {
122
147
  throw new Error("Process worker file not found");
123
148
  }
124
149
  const IDLE_WORKER_TIMEOUT_MS = 60000; // reap idle workers after 60s
150
+ // Idle-worker floor. Kept at 1 (not 2) to favour low resident memory over
151
+ // search warmth: an idle worker holds ~300 MB-1 GB, and on this deployment
152
+ // searches are infrequent, so paying a one-off cold start (~10-15s to boot +
153
+ // load models) on the rare search is preferable to keeping a second worker
154
+ // warm. The pool still scales up to maxWorkers on demand for indexing bursts.
155
+ const MIN_KEEP_WORKERS = 1;
156
+ // Recycle an idle worker whose RSS has grown past this. ONNX native memory
157
+ // (model arenas) lives outside V8, so --max-old-space-size can't bound it — a
158
+ // worker that processed one big file can stay pinned at ~2 GB. Replacing it
159
+ // with a fresh worker reclaims that. 0 (or negative) disables the check.
160
+ const WORKER_RSS_RECYCLE_MB = (() => {
161
+ var _a;
162
+ const fromEnv = Number.parseInt((_a = process.env.GMAX_WORKER_RSS_RECYCLE_MB) !== null && _a !== void 0 ? _a : "", 10);
163
+ if (Number.isFinite(fromEnv))
164
+ return fromEnv;
165
+ return 800;
166
+ })();
125
167
  // Methods that must skip the indexing backlog. encodeQuery is the search hot
126
168
  // path: a single query is ~17ms but waits behind every queued processFile.
127
169
  // rerank is similarly small and latency-sensitive.
@@ -147,17 +189,33 @@ class WorkerPool {
147
189
  this.maxWorkers = Math.max(1, config_1.CONFIG.WORKER_THREADS);
148
190
  // Lazy spawn: start with 1 worker, scale up on demand
149
191
  this.spawnWorker();
150
- // Periodically reap idle workers back down to 1
151
- this.idleReapInterval = setInterval(() => this.reapIdleWorkers(), IDLE_WORKER_TIMEOUT_MS);
192
+ // Periodically reap idle workers back to MIN_KEEP, and force-kill any
193
+ // worker wedged in busy=true (the leak backstop — see STUCK_BUSY_MS).
194
+ this.idleReapInterval = setInterval(() => {
195
+ this.reapStuckWorkers();
196
+ this.reapBloatedWorkers();
197
+ this.reapIdleWorkers();
198
+ }, IDLE_WORKER_TIMEOUT_MS);
152
199
  }
153
200
  isHealthy() {
154
201
  return !this.destroyed && this.workers.length > 0;
155
202
  }
203
+ /** PIDs of workers the pool currently tracks. Used by the daemon's orphan
204
+ * sweep to distinguish live, accounted-for workers from de-listed strays. */
205
+ getWorkerPids() {
206
+ return this.workers
207
+ .map((w) => w.child.pid)
208
+ .filter((pid) => pid !== undefined);
209
+ }
156
210
  clearTaskTimeout(task) {
157
211
  if (task.timeout) {
158
212
  clearTimeout(task.timeout);
159
213
  task.timeout = undefined;
160
214
  }
215
+ if (task.hardTimeout) {
216
+ clearTimeout(task.hardTimeout);
217
+ task.hardTimeout = undefined;
218
+ }
161
219
  }
162
220
  removeFromQueue(taskId) {
163
221
  const pi = this.priorityQueue.indexOf(taskId);
@@ -174,6 +232,7 @@ class WorkerPool {
174
232
  if (worker) {
175
233
  worker.busy = false;
176
234
  worker.pendingTaskId = null;
235
+ worker.busySince = null;
177
236
  worker.lastBusyTime = Date.now();
178
237
  }
179
238
  }
@@ -222,12 +281,15 @@ class WorkerPool {
222
281
  (0, logger_1.log)("pool", `spawn PID:${worker.child.pid} (${this.workers.length + 1}/${Math.max(1, config_1.CONFIG.WORKER_THREADS)})`);
223
282
  const onMessage = (msg) => {
224
283
  var _a, _b, _c, _d, _e, _f, _g, _h;
284
+ if (typeof msg.rss === "number")
285
+ worker.lastRssBytes = msg.rss;
225
286
  // Fast cleanup for tasks that were aborted while running
226
287
  if (this.abortedTasks.has(msg.id)) {
227
288
  this.abortedTasks.delete(msg.id);
228
289
  const task = this.tasks.get(msg.id);
229
290
  if (task) {
230
291
  this.completeTask(task, worker);
292
+ this.recycleIfBloated(worker);
231
293
  this.dispatch();
232
294
  }
233
295
  return;
@@ -236,10 +298,15 @@ class WorkerPool {
236
298
  if (!task)
237
299
  return;
238
300
  if ("heartbeat" in msg) {
239
- // Reset timeout
240
- this.clearTaskTimeout(task);
301
+ // Reset only the no-progress timeout. The hard deadline is left
302
+ // untouched on purpose — heartbeats must not be able to extend a task
303
+ // past its absolute ceiling.
304
+ if (task.timeout) {
305
+ clearTimeout(task.timeout);
306
+ task.timeout = undefined;
307
+ }
241
308
  if (task.worker) {
242
- task.timeout = setTimeout(() => this.handleTaskTimeout(task, task.worker), TASK_TIMEOUT_MS);
309
+ task.timeout = setTimeout(() => this.handleTaskTimeout(task, task.worker, "no progress"), TASK_TIMEOUT_MS);
243
310
  }
244
311
  return;
245
312
  }
@@ -260,6 +327,7 @@ class WorkerPool {
260
327
  }
261
328
  this.completeTask(task, worker);
262
329
  this.consecutiveRespawns = 0;
330
+ this.recycleIfBloated(worker);
263
331
  this.dispatch();
264
332
  };
265
333
  const onExit = (code, signal) => this.handleWorkerExit(worker, code, signal, "exit");
@@ -341,15 +409,16 @@ class WorkerPool {
341
409
  this.dispatch();
342
410
  });
343
411
  }
344
- handleTaskTimeout(task, worker) {
412
+ handleTaskTimeout(task, worker, reason = "no progress") {
345
413
  var _a, _b, _c, _d;
346
414
  if (this.destroyed || !this.tasks.has(task.id))
347
415
  return;
348
416
  this.clearTaskTimeout(task);
417
+ const limitMs = reason === "hard deadline" ? HARD_DEADLINE_MS : TASK_TIMEOUT_MS;
349
418
  const filePath = (_d = (_b = (_a = task.payload) === null || _a === void 0 ? void 0 : _a.path) !== null && _b !== void 0 ? _b : (_c = task.payload) === null || _c === void 0 ? void 0 : _c.absolutePath) !== null && _d !== void 0 ? _d : "unknown";
350
- (0, logger_1.log)("pool", `timeout task=${task.id} method=${task.method} file=${filePath} after ${TASK_TIMEOUT_MS}ms — killing worker PID:${worker.child.pid}`);
419
+ (0, logger_1.log)("pool", `timeout task=${task.id} method=${task.method} file=${filePath} (${reason}, ${limitMs}ms) — killing worker PID:${worker.child.pid}`);
351
420
  this.completeTask(task, null);
352
- task.reject(new Error(`Worker task ${task.method} timed out after ${TASK_TIMEOUT_MS}ms on ${filePath}`));
421
+ task.reject(new Error(`Worker task ${task.method} exceeded ${reason} limit (${limitMs}ms) on ${filePath}`));
353
422
  worker.cleanedUp = true;
354
423
  worker.child.removeAllListeners("message");
355
424
  worker.child.removeAllListeners("exit");
@@ -393,9 +462,13 @@ class WorkerPool {
393
462
  }
394
463
  idle.busy = true;
395
464
  idle.pendingTaskId = task.id;
465
+ idle.busySince = Date.now();
396
466
  task.worker = idle;
397
467
  task.startTime = Date.now();
398
- task.timeout = setTimeout(() => this.handleTaskTimeout(task, idle), TASK_TIMEOUT_MS);
468
+ task.timeout = setTimeout(() => this.handleTaskTimeout(task, idle, "no progress"), TASK_TIMEOUT_MS);
469
+ // Absolute deadline: never cleared/re-armed by heartbeats, so a task that
470
+ // keeps emitting progress but never completes is still bounded.
471
+ task.hardTimeout = setTimeout(() => this.handleTaskTimeout(task, idle, "hard deadline"), HARD_DEADLINE_MS);
399
472
  const filePath = (_e = (_c = (_b = task.payload) === null || _b === void 0 ? void 0 : _b.path) !== null && _c !== void 0 ? _c : (_d = task.payload) === null || _d === void 0 ? void 0 : _d.absolutePath) !== null && _e !== void 0 ? _e : "";
400
473
  const busyCount = this.workers.filter((w) => w.busy).length;
401
474
  (0, logger_1.debug)("pool", `dispatch task=${task.id} method=${task.method}${filePath ? ` file=${filePath}` : ""} → PID:${idle.child.pid} (busy=${busyCount}/${this.workers.length} queue=${this.taskQueue.length}+${this.priorityQueue.length}p)`);
@@ -425,14 +498,113 @@ class WorkerPool {
425
498
  return this.enqueue("rerank", input, signal);
426
499
  }
427
500
  /**
428
- * Reap idle workers back down to MIN_KEEP. Keeps the most recently active.
429
- * Called on a timer never removes busy workers. Min=2 so a search task
430
- * always has spare capacity even when one worker is busy with a long
431
- * indexing batch (a fresh worker takes 10–15s to boot + load models, which
432
- * dwarfs a ~13ms encodeQuery).
501
+ * Force-kill workers wedged in busy=true past STUCK_BUSY_MS. The per-task
502
+ * hard deadline normally rescues these first; this catches the case where no
503
+ * live timer exists a dropped IPC result, or a prior SIGKILL that threw and
504
+ * left the process alive but de-listed. Unlike the idle reaper this ignores
505
+ * MIN_KEEP (a stuck worker is dead weight even at the floor) and goes straight
506
+ * to SIGKILL, letting the natural 'exit' handler fail the task and respawn.
507
+ */
508
+ reapStuckWorkers() {
509
+ if (this.destroyed)
510
+ return;
511
+ const now = Date.now();
512
+ const stuck = this.workers.filter((w) => w.busy && w.busySince !== null && now - w.busySince > STUCK_BUSY_MS);
513
+ for (const w of stuck) {
514
+ const busyMs = w.busySince !== null ? now - w.busySince : 0;
515
+ (0, logger_1.log)("pool", `stuck worker PID:${w.child.pid} busy ${Math.round(busyMs / 1000)}s (>${STUCK_BUSY_MS}ms) — SIGKILL`);
516
+ // Leave listeners attached so handleWorkerExit runs on the resulting
517
+ // 'exit' event: it fails any task still bound to this worker and respawns
518
+ // if work is pending. Do not pre-set cleanedUp for the same reason.
519
+ try {
520
+ w.child.kill("SIGKILL");
521
+ }
522
+ catch (_a) { }
523
+ }
524
+ }
525
+ /**
526
+ * Recycle idle workers whose RSS has grown past WORKER_RSS_RECYCLE_MB. Unlike
527
+ * the idle reaper this ignores MIN_KEEP — a bloated worker is replaced rather
528
+ * than merely trimmed: we SIGTERM it (graceful, lets ONNX free its arenas)
529
+ * and respawn a fresh one if that drops us below MIN_KEEP. Only idle workers
530
+ * are touched, so an in-flight task is never interrupted.
531
+ */
532
+ reapBloatedWorkers() {
533
+ if (this.destroyed || WORKER_RSS_RECYCLE_MB <= 0)
534
+ return;
535
+ const limitBytes = WORKER_RSS_RECYCLE_MB * 1024 * 1024;
536
+ const bloated = this.workers.filter((w) => !w.busy && w.lastRssBytes > limitBytes);
537
+ for (const w of bloated)
538
+ this.recycleWorker(w, "idle");
539
+ }
540
+ /**
541
+ * Recycle a worker whose RSS exceeds the threshold the instant it goes free
542
+ * between tasks. The idle reaper alone can't catch this: under continuous
543
+ * churn (a busy monorepo trickling one small file at a time) a worker is
544
+ * dispatched again within the 60s idle window, so a worker that peaked at
545
+ * ~1.4 GB on one large file never looks "idle" and stays pinned. Checking at
546
+ * task completion — when busy was just cleared and before the next dispatch —
547
+ * bounds RSS regardless of how steady the churn is. No-op while busy, so an
548
+ * in-flight task is never interrupted.
549
+ */
550
+ recycleIfBloated(worker) {
551
+ if (this.destroyed ||
552
+ WORKER_RSS_RECYCLE_MB <= 0 ||
553
+ worker.busy ||
554
+ worker.cleanedUp) {
555
+ return;
556
+ }
557
+ if (worker.lastRssBytes > WORKER_RSS_RECYCLE_MB * 1024 * 1024) {
558
+ this.recycleWorker(worker, "post-task");
559
+ }
560
+ }
561
+ /**
562
+ * SIGTERM a worker (graceful, lets ONNX free its arenas), drop it from the
563
+ * pool, escalate to SIGKILL if it ignores the signal, and refill back to the
564
+ * floor with a fresh, lean worker. Shared by the idle and post-task RSS paths.
565
+ */
566
+ recycleWorker(w, reason) {
567
+ if (w.cleanedUp)
568
+ return;
569
+ (0, logger_1.log)("pool", `recycle bloated worker PID:${w.child.pid} (${reason}, rss ${Math.round(w.lastRssBytes / 1048576)}MB > ${WORKER_RSS_RECYCLE_MB}MB)`);
570
+ w.cleanedUp = true;
571
+ w.child.removeAllListeners("message");
572
+ w.child.removeAllListeners("exit");
573
+ w.child.removeAllListeners("error");
574
+ const pid = w.child.pid;
575
+ try {
576
+ w.child.kill("SIGTERM");
577
+ }
578
+ catch (_a) { }
579
+ this.workers = this.workers.filter((x) => x !== w);
580
+ // Escalate to SIGKILL if SIGTERM is ignored (a worker mid native-call
581
+ // won't service signals).
582
+ if (pid !== undefined) {
583
+ setTimeout(() => {
584
+ try {
585
+ process.kill(pid, 0);
586
+ try {
587
+ process.kill(pid, "SIGKILL");
588
+ }
589
+ catch (_a) { }
590
+ }
591
+ catch (_b) {
592
+ // ESRCH — already gone.
593
+ }
594
+ }, REAP_FORCE_KILL_GRACE_MS);
595
+ }
596
+ // Replace anything we dropped below the floor with fresh, lean workers.
597
+ while (!this.destroyed && this.workers.length < MIN_KEEP_WORKERS) {
598
+ this.spawnWorker();
599
+ }
600
+ }
601
+ /**
602
+ * Reap idle workers back down to MIN_KEEP_WORKERS. Keeps the most recently
603
+ * active. Called on a timer — never removes busy workers. See
604
+ * MIN_KEEP_WORKERS for the memory-vs-warmth tradeoff behind the floor.
433
605
  */
434
606
  reapIdleWorkers() {
435
- const MIN_KEEP = 2;
607
+ const MIN_KEEP = MIN_KEEP_WORKERS;
436
608
  if (this.destroyed || this.workers.length <= MIN_KEEP)
437
609
  return;
438
610
  const now = Date.now();
@@ -49,9 +49,12 @@ const node_process_1 = __importDefault(require("node:process"));
49
49
  node_process_1.default.title = "gmax-worker";
50
50
  const worker_1 = __importStar(require("./worker"));
51
51
  const logger_1 = require("../utils/logger");
52
+ // Every outgoing message also carries `rss` (see send()).
52
53
  const send = (msg) => {
53
54
  if (node_process_1.default.send) {
54
- node_process_1.default.send(msg);
55
+ // Attach current RSS so the pool can recycle workers whose native (ONNX)
56
+ // memory has ballooned — the V8 --max-old-space-size cap can't see it.
57
+ node_process_1.default.send(Object.assign(Object.assign({}, msg), { rss: node_process_1.default.memoryUsage().rss }));
55
58
  }
56
59
  };
57
60
  node_process_1.default.on("message", (msg) => __awaiter(void 0, void 0, void 0, function* () {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.4",
3
+ "version": "0.17.5",
4
4
  "author": "Robert Owens <78518764+reowens@users.noreply.github.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.4",
3
+ "version": "0.17.5",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",
@@ -192,7 +192,7 @@ Understand:
192
192
  Survey:
193
193
  gmax project codebase overview (langs, structure, key symbols)
194
194
  gmax skeleton <file> file structure (file path, NOT a directory)
195
- gmax context "topic" --budget 4000 multi-file topic summary
195
+ gmax context "topic-or-path" --budget 4000 topic summary or deterministic file/dir context
196
196
  gmax log <path-or-symbol> git commits (replaces recent/diff)
197
197
  gmax status indexed projects
198
198
 
@@ -186,12 +186,14 @@ gmax dead handleAuth --in src/ # restrict to a sub-path
186
186
  ```
187
187
  Status is `DEAD` (no callers, not exported), `PUBLIC EXPORT` (no internal callers but the defining chunk is exported — check external usage), or `LIVE` (with caller count + top-3 file:line). The call graph reflects what tree-sitter chunked: dynamic dispatch, reflection, eval, and string-built call sites won't show up — `DEAD` is a hypothesis, not a proof.
188
188
 
189
- ### Context — `gmax context <topic> --budget <tokens>`
189
+ ### Context — `gmax context <topic-or-path> --budget <tokens>`
190
190
  ```
191
191
  gmax context "authentication system" --budget 4000
192
192
  gmax context "payment flow" --budget 8000
193
+ gmax context src/lib/auth.ts --budget 3000
193
194
  gmax context src/lib/auth/ --budget 3000
194
195
  ```
196
+ Use the path form when you already know the file or directory; it skips semantic search and gives deterministic structure/excerpt context.
195
197
 
196
198
  ### Investigate — `gmax investigate "question"` (requires LLM)
197
199
  ```
@@ -229,7 +231,7 @@ gmax llm on/off/start/stop/status # manage local LLM server
229
231
  11. **Impact** — `Bash(gmax impact <symbol>)` for blast radius before significant changes
230
232
  12. **Similar** — `Bash(gmax similar <symbol>)` to find similar patterns for DRY analysis
231
233
  13. **Dead** — `Bash(gmax dead <symbol>)` to check if a symbol has zero inbound callers (hypothesis, not proof)
232
- 14. **Context** — `Bash(gmax context "topic" --budget 4000)` for a token-budgeted topic summary
234
+ 14. **Context** — `Bash(gmax context "topic-or-path" --budget 4000)` for token-budgeted topic or path context
233
235
  15. **Related** — `Bash(gmax related <file>)` to see what else to look at
234
236
  16. **Status** — `Bash(gmax status)` to check index state across all projects
235
237