grepmax 0.17.4 → 0.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/lib/daemon/daemon.js +105 -1
- package/dist/lib/workers/embeddings/colbert.js +5 -0
- package/dist/lib/workers/embeddings/granite.js +11 -0
- package/dist/lib/workers/orchestrator.js +4 -2
- package/dist/lib/workers/pool.js +187 -15
- package/dist/lib/workers/process-child.js +4 -1
- package/package.json +1 -1
- package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
- package/plugins/grepmax/hooks/start.js +1 -1
- package/plugins/grepmax/skills/grepmax/SKILL.md +4 -2
package/README.md
CHANGED
|
@@ -66,6 +66,7 @@ gmax impact handleAuth # Dependents + affected tests
|
|
|
66
66
|
gmax similar handleAuth # Find similar code patterns
|
|
67
67
|
gmax dead handleAuth # Unused-symbol check via call graph (DEAD / PUBLIC EXPORT / LIVE)
|
|
68
68
|
gmax context "auth system" --budget 4000 # Token-budgeted topic summary
|
|
69
|
+
gmax context src/lib/auth.ts --budget 4000 # Deterministic file/path context
|
|
69
70
|
```
|
|
70
71
|
|
|
71
72
|
### Project Commands
|
|
@@ -117,7 +118,7 @@ Plugins auto-update when you run `npm install -g grepmax@latest` — no need to
|
|
|
117
118
|
|
|
118
119
|
| Tool | Description |
|
|
119
120
|
| --- | --- |
|
|
120
|
-
| `semantic_search` | Search by meaning.
|
|
121
|
+
| `semantic_search` | Search by meaning. Pointer mode matches CLI `--agent` output; `detail=code/full` returns snippets. |
|
|
121
122
|
| `code_skeleton` | File structure with bodies collapsed (~4x fewer tokens). |
|
|
122
123
|
| `trace_calls` | Call graph: importers, callers (multi-hop), callees with file:line. |
|
|
123
124
|
| `extract_symbol` | Complete function/class body by symbol name. |
|
|
@@ -76,6 +76,7 @@ const daemon_client_1 = require("../utils/daemon-client");
|
|
|
76
76
|
const index_config_1 = require("../index/index-config");
|
|
77
77
|
const log_rotate_1 = require("../utils/log-rotate");
|
|
78
78
|
const pool_1 = require("../workers/pool");
|
|
79
|
+
const daemon_launcher_1 = require("../utils/daemon-launcher");
|
|
79
80
|
const node_child_process_1 = require("node:child_process");
|
|
80
81
|
const http = __importStar(require("node:http"));
|
|
81
82
|
// 30 min was too aggressive — every shutdown is a chance for races, FSEvents
|
|
@@ -93,6 +94,23 @@ const IDLE_TIMEOUT_MS = (() => {
|
|
|
93
94
|
return parsed; // <= 0 disables the idle check below
|
|
94
95
|
})();
|
|
95
96
|
const HEARTBEAT_INTERVAL_MS = 60 * 1000;
|
|
97
|
+
// Self-recycle. Under continuous load (a busy monorepo) the idle timeout never
|
|
98
|
+
// fires, so a long-lived daemon never gets a fresh start. The 24h age trigger
|
|
99
|
+
// is the primary hygiene mechanism. The RSS trigger is a backstop for a genuine
|
|
100
|
+
// runaway only: the daemon's memory is dominated by LanceDB working set, which
|
|
101
|
+
// legitimately spikes to ~1.7 GB during compaction (then frees) on a ~250k-chunk
|
|
102
|
+
// store, so the ceiling sits well above that to avoid recycling on normal
|
|
103
|
+
// spikes. The maintenance-active guard in maybeRecycle() also defers during
|
|
104
|
+
// compaction. Either ceiling <= 0 disables that trigger.
|
|
105
|
+
const envNum = (name, fallback) => {
|
|
106
|
+
const raw = process.env[name];
|
|
107
|
+
if (raw == null)
|
|
108
|
+
return fallback;
|
|
109
|
+
const parsed = Number(raw);
|
|
110
|
+
return Number.isFinite(parsed) ? parsed : fallback;
|
|
111
|
+
};
|
|
112
|
+
const MAX_LIFETIME_MS = envNum("GMAX_DAEMON_MAX_LIFETIME_MS", 24 * 60 * 60 * 1000);
|
|
113
|
+
const RSS_WATERMARK_MB = envNum("GMAX_DAEMON_RSS_WATERMARK_MB", 2560);
|
|
96
114
|
// Watcher health windows used for FSEvents auto-recovery.
|
|
97
115
|
const FSEVENTS_RECOVERY_INTERVAL_MS = 60 * 60 * 1000; // try recovery hourly
|
|
98
116
|
const FSEVENTS_HEALTH_WINDOW_MS = 5 * 60 * 1000; // 5 min of quiet = "healthy"
|
|
@@ -112,6 +130,11 @@ class Daemon {
|
|
|
112
130
|
this.heartbeatTick = 0;
|
|
113
131
|
this.mlxRecoveryInFlight = false;
|
|
114
132
|
this.shuttingDown = false;
|
|
133
|
+
this.recycling = false;
|
|
134
|
+
// PIDs flagged as orphan workers on the previous sweep. A worker must look
|
|
135
|
+
// orphaned twice in a row before we kill it, so a worker the pool forked
|
|
136
|
+
// between our process snapshot and its array update is never killed by a race.
|
|
137
|
+
this.suspectedOrphanWorkers = new Set();
|
|
115
138
|
this.pendingOps = new Set();
|
|
116
139
|
this.watcherFailCount = new Map();
|
|
117
140
|
this.pollIntervals = new Map();
|
|
@@ -298,6 +321,8 @@ class Daemon {
|
|
|
298
321
|
this.heartbeatTick++;
|
|
299
322
|
if (this.heartbeatTick % 5 === 0) {
|
|
300
323
|
void this.checkMlxHealth();
|
|
324
|
+
this.sweepOrphanWorkers();
|
|
325
|
+
this.maybeRecycle();
|
|
301
326
|
}
|
|
302
327
|
}, HEARTBEAT_INTERVAL_MS);
|
|
303
328
|
// 10. Idle timeout (skip when disabled via env)
|
|
@@ -1303,6 +1328,77 @@ class Daemon {
|
|
|
1303
1328
|
* the PID file, which becomes stale when a daemon is orphaned through
|
|
1304
1329
|
* the lock-compromise path.
|
|
1305
1330
|
*/
|
|
1331
|
+
/**
|
|
1332
|
+
* Gracefully hand off to a fresh daemon when this one has grown too old or
|
|
1333
|
+
* too large. Only fires when quiet — no active compaction and no in-flight
|
|
1334
|
+
* project operations — so a recycle never interrupts indexing work. The
|
|
1335
|
+
* successor re-runs catchup on startup, so nothing is lost.
|
|
1336
|
+
*/
|
|
1337
|
+
maybeRecycle() {
|
|
1338
|
+
var _a;
|
|
1339
|
+
if (this.shuttingDown || this.recycling)
|
|
1340
|
+
return;
|
|
1341
|
+
const ageMs = process.uptime() * 1000;
|
|
1342
|
+
const rssMb = process.memoryUsage().rss / (1024 * 1024);
|
|
1343
|
+
const ageExceeded = MAX_LIFETIME_MS > 0 && ageMs > MAX_LIFETIME_MS;
|
|
1344
|
+
const rssExceeded = RSS_WATERMARK_MB > 0 && rssMb > RSS_WATERMARK_MB;
|
|
1345
|
+
if (!ageExceeded && !rssExceeded)
|
|
1346
|
+
return;
|
|
1347
|
+
// Defer while busy; we'll re-check next tick.
|
|
1348
|
+
if ((_a = this.vectorDb) === null || _a === void 0 ? void 0 : _a.isMaintenanceActive())
|
|
1349
|
+
return;
|
|
1350
|
+
if (this.projectLocks.size > 0)
|
|
1351
|
+
return;
|
|
1352
|
+
const reason = ageExceeded
|
|
1353
|
+
? `age ${(ageMs / 3600000).toFixed(1)}h > ${(MAX_LIFETIME_MS / 3600000).toFixed(1)}h`
|
|
1354
|
+
: `rss ${Math.round(rssMb)}MB > ${RSS_WATERMARK_MB}MB`;
|
|
1355
|
+
console.log(`[daemon] Recycling (${reason}) — handing off to a fresh daemon`);
|
|
1356
|
+
this.recycling = true;
|
|
1357
|
+
void this.shutdown({ relaunch: true }).finally(() => process.exit(0));
|
|
1358
|
+
}
|
|
1359
|
+
/**
|
|
1360
|
+
* Kill gmax-worker processes that are children of THIS daemon but the worker
|
|
1361
|
+
* pool no longer tracks — strays left behind if a kill ever failed silently.
|
|
1362
|
+
* Filters by parent PID so a per-project `gmax watch`'s own workers are never
|
|
1363
|
+
* touched. Requires a worker to look orphaned on two consecutive sweeps so a
|
|
1364
|
+
* just-forked worker can't be killed by a snapshot race.
|
|
1365
|
+
*/
|
|
1366
|
+
sweepOrphanWorkers() {
|
|
1367
|
+
if (this.shuttingDown || !(0, pool_1.isWorkerPoolInitialized)())
|
|
1368
|
+
return;
|
|
1369
|
+
const tracked = new Set((0, pool_1.getWorkerPool)().getWorkerPids());
|
|
1370
|
+
const workerPids = new Set(this.findProcessesByTitle("gmax-worker"));
|
|
1371
|
+
const ourChildren = this.findChildPids();
|
|
1372
|
+
const orphans = ourChildren.filter((pid) => workerPids.has(pid) && !tracked.has(pid));
|
|
1373
|
+
const confirmed = orphans.filter((pid) => this.suspectedOrphanWorkers.has(pid));
|
|
1374
|
+
this.suspectedOrphanWorkers = new Set(orphans);
|
|
1375
|
+
for (const pid of confirmed) {
|
|
1376
|
+
console.log(`[daemon] Killing orphan worker PID:${pid} (untracked by pool)`);
|
|
1377
|
+
try {
|
|
1378
|
+
process.kill(pid, "SIGKILL");
|
|
1379
|
+
}
|
|
1380
|
+
catch (_a) { }
|
|
1381
|
+
this.suspectedOrphanWorkers.delete(pid);
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
/** Child PIDs of this process (workers, MLX, llama-server). */
|
|
1385
|
+
findChildPids() {
|
|
1386
|
+
try {
|
|
1387
|
+
const out = (0, node_child_process_1.execSync)(`pgrep -P ${process.pid}`, {
|
|
1388
|
+
timeout: 5000,
|
|
1389
|
+
encoding: "utf-8",
|
|
1390
|
+
}).trim();
|
|
1391
|
+
if (!out)
|
|
1392
|
+
return [];
|
|
1393
|
+
return out
|
|
1394
|
+
.split("\n")
|
|
1395
|
+
.map((s) => parseInt(s.trim(), 10))
|
|
1396
|
+
.filter((n) => Number.isFinite(n) && n > 0);
|
|
1397
|
+
}
|
|
1398
|
+
catch (_a) {
|
|
1399
|
+
return [];
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1306
1402
|
killStaleProcesses() {
|
|
1307
1403
|
return __awaiter(this, void 0, void 0, function* () {
|
|
1308
1404
|
// 1. Check for other daemon processes
|
|
@@ -1359,7 +1455,7 @@ class Daemon {
|
|
|
1359
1455
|
}
|
|
1360
1456
|
}
|
|
1361
1457
|
shutdown() {
|
|
1362
|
-
return __awaiter(this,
|
|
1458
|
+
return __awaiter(this, arguments, void 0, function* (opts = {}) {
|
|
1363
1459
|
var _a, _b, _c, _d;
|
|
1364
1460
|
if (this.shuttingDown)
|
|
1365
1461
|
return;
|
|
@@ -1449,6 +1545,14 @@ class Daemon {
|
|
|
1449
1545
|
yield ((_d = this.vectorDb) === null || _d === void 0 ? void 0 : _d.close());
|
|
1450
1546
|
}
|
|
1451
1547
|
catch (_l) { }
|
|
1548
|
+
// Hand off to a successor only after every resource is released and the
|
|
1549
|
+
// liveness markers (socket/pid/lock) are already gone — so the fresh
|
|
1550
|
+
// daemon's singleton check sees a clean slate and opens LanceDB/LMDB
|
|
1551
|
+
// without contending with this exiting process.
|
|
1552
|
+
if (opts.relaunch) {
|
|
1553
|
+
const pid = (0, daemon_launcher_1.spawnDaemon)();
|
|
1554
|
+
console.log(`[daemon] Spawned successor daemon${pid ? ` (PID: ${pid})` : " (spawn failed)"}`);
|
|
1555
|
+
}
|
|
1452
1556
|
console.log("[daemon] Shutdown complete");
|
|
1453
1557
|
});
|
|
1454
1558
|
}
|
|
@@ -78,6 +78,11 @@ class ColbertModel {
|
|
|
78
78
|
intraOpNumThreads: ONNX_THREADS,
|
|
79
79
|
interOpNumThreads: 1,
|
|
80
80
|
graphOptimizationLevel: "all",
|
|
81
|
+
// ColBERT runs locally on every batch (MLX only covers dense), so its
|
|
82
|
+
// arena is the worker's main memory hog. Variable-length inputs mean the
|
|
83
|
+
// arena/mem-pattern never amortize — disable both to keep RSS bounded.
|
|
84
|
+
enableCpuMemArena: false,
|
|
85
|
+
enableMemPattern: false,
|
|
81
86
|
};
|
|
82
87
|
log(`Worker: Loading ColBERT ONNX session from ${modelPath}`);
|
|
83
88
|
this.session = yield ort.InferenceSession.create(modelPath, sessionOptions);
|
|
@@ -86,6 +86,13 @@ class GraniteModel {
|
|
|
86
86
|
intraOpNumThreads: ONNX_THREADS,
|
|
87
87
|
interOpNumThreads: 1,
|
|
88
88
|
graphOptimizationLevel: "all",
|
|
89
|
+
// Embedding inputs are variable-length, so the CPU memory arena and
|
|
90
|
+
// memory-pattern optimizer don't get reused across batches — they just
|
|
91
|
+
// retain the largest tensor a worker ever saw (a long/minified file can
|
|
92
|
+
// pin ~2 GB for the worker's lifetime). Disabling both bounds native
|
|
93
|
+
// memory at the cost of a small per-inference allocation.
|
|
94
|
+
enableCpuMemArena: false,
|
|
95
|
+
enableMemPattern: false,
|
|
89
96
|
};
|
|
90
97
|
this.session = yield ort.InferenceSession.create(modelPath, sessionOptions);
|
|
91
98
|
});
|
|
@@ -137,6 +144,10 @@ class GraniteModel {
|
|
|
137
144
|
runBatch(texts) {
|
|
138
145
|
return __awaiter(this, void 0, void 0, function* () {
|
|
139
146
|
var _a, _b, _c;
|
|
147
|
+
// Lazy-load: in the normal path MLX/GPU handles dense embedding and this
|
|
148
|
+
// ONNX model is never used, so we avoid paying its resident cost in every
|
|
149
|
+
// worker. load() is idempotent and only runs on the first fallback batch.
|
|
150
|
+
yield this.load();
|
|
140
151
|
if (!this.session || !this.tokenizer)
|
|
141
152
|
return [];
|
|
142
153
|
const encoded = yield this.tokenizer(texts, {
|
|
@@ -125,7 +125,10 @@ class WorkerOrchestrator {
|
|
|
125
125
|
}
|
|
126
126
|
ensureReady() {
|
|
127
127
|
return __awaiter(this, void 0, void 0, function* () {
|
|
128
|
-
|
|
128
|
+
// Granite (dense ONNX) is loaded lazily on first fallback use inside
|
|
129
|
+
// granite.runBatch — in the normal MLX/GPU path it's never loaded, so we
|
|
130
|
+
// don't gate readiness on it or pay its resident cost per worker.
|
|
131
|
+
if (this.colbert.isReady()) {
|
|
129
132
|
return;
|
|
130
133
|
}
|
|
131
134
|
if (this.initPromise)
|
|
@@ -136,7 +139,6 @@ class WorkerOrchestrator {
|
|
|
136
139
|
yield Promise.all([
|
|
137
140
|
this.chunker.init(),
|
|
138
141
|
this.skeletonizer.init(),
|
|
139
|
-
this.granite.load(),
|
|
140
142
|
this.colbert.load(),
|
|
141
143
|
]);
|
|
142
144
|
stopTimer();
|
package/dist/lib/workers/pool.js
CHANGED
|
@@ -82,6 +82,26 @@ const TASK_TIMEOUT_MS = (() => {
|
|
|
82
82
|
return fromEnv;
|
|
83
83
|
return 120000;
|
|
84
84
|
})();
|
|
85
|
+
// Absolute per-task ceiling. Unlike TASK_TIMEOUT_MS (a no-progress timeout that
|
|
86
|
+
// every heartbeat resets), this is wall-clock from dispatch and is NEVER reset
|
|
87
|
+
// by heartbeats. It bounds a task that keeps emitting progress but never
|
|
88
|
+
// finishes — e.g. a worker wedged on a hung MLX request that still services its
|
|
89
|
+
// heartbeat timer. A single processFile (one file → batches of 16 chunks) is
|
|
90
|
+
// seconds even for huge files, so 5 min is generous headroom, never a real cap.
|
|
91
|
+
const HARD_DEADLINE_MS = (() => {
|
|
92
|
+
var _a;
|
|
93
|
+
const fromEnv = Number.parseInt((_a = process.env.GMAX_WORKER_HARD_DEADLINE_MS) !== null && _a !== void 0 ? _a : "", 10);
|
|
94
|
+
if (Number.isFinite(fromEnv) && fromEnv > 0)
|
|
95
|
+
return fromEnv;
|
|
96
|
+
return 300000;
|
|
97
|
+
})();
|
|
98
|
+
// Backstop for the leak that motivated all of the above: a worker left
|
|
99
|
+
// busy=true with no live timer to rescue it (a dropped IPC result, or a
|
|
100
|
+
// timeout-kill whose SIGKILL threw and left the process alive but de-listed).
|
|
101
|
+
// The reaper skips busy workers, so without this such a worker is immortal —
|
|
102
|
+
// we saw six survive 4-5 days. Set above HARD_DEADLINE so the per-task timer
|
|
103
|
+
// normally wins and this only fires for the no-timer case.
|
|
104
|
+
const STUCK_BUSY_MS = HARD_DEADLINE_MS + 60000;
|
|
85
105
|
const FORCE_KILL_GRACE_MS = 200;
|
|
86
106
|
// Longer grace for idle reaps: the worker isn't urgently in the way, and a
|
|
87
107
|
// graceful SIGTERM lets ONNX free ~1GB of model memory. But if SIGTERM is
|
|
@@ -97,6 +117,11 @@ class ProcessWorker {
|
|
|
97
117
|
this.busy = false;
|
|
98
118
|
this.pendingTaskId = null;
|
|
99
119
|
this.lastBusyTime = Date.now();
|
|
120
|
+
// Wall-clock at which this worker became busy with its current task; null
|
|
121
|
+
// when idle. Used by the reaper to detect workers wedged in busy=true.
|
|
122
|
+
this.busySince = null;
|
|
123
|
+
// Most recent RSS (bytes) the worker reported, for memory-based recycling.
|
|
124
|
+
this.lastRssBytes = 0;
|
|
100
125
|
// Set when the pool has cleaned up after this worker (via exit or error
|
|
101
126
|
// event). Guards against handleWorkerExit running twice when both events
|
|
102
127
|
// fire for the same crash.
|
|
@@ -122,6 +147,23 @@ function resolveProcessWorker() {
|
|
|
122
147
|
throw new Error("Process worker file not found");
|
|
123
148
|
}
|
|
124
149
|
const IDLE_WORKER_TIMEOUT_MS = 60000; // reap idle workers after 60s
|
|
150
|
+
// Idle-worker floor. Kept at 1 (not 2) to favour low resident memory over
|
|
151
|
+
// search warmth: an idle worker holds ~300 MB-1 GB, and on this deployment
|
|
152
|
+
// searches are infrequent, so paying a one-off cold start (~10-15s to boot +
|
|
153
|
+
// load models) on the rare search is preferable to keeping a second worker
|
|
154
|
+
// warm. The pool still scales up to maxWorkers on demand for indexing bursts.
|
|
155
|
+
const MIN_KEEP_WORKERS = 1;
|
|
156
|
+
// Recycle an idle worker whose RSS has grown past this. ONNX native memory
|
|
157
|
+
// (model arenas) lives outside V8, so --max-old-space-size can't bound it — a
|
|
158
|
+
// worker that processed one big file can stay pinned at ~2 GB. Replacing it
|
|
159
|
+
// with a fresh worker reclaims that. 0 (or negative) disables the check.
|
|
160
|
+
const WORKER_RSS_RECYCLE_MB = (() => {
|
|
161
|
+
var _a;
|
|
162
|
+
const fromEnv = Number.parseInt((_a = process.env.GMAX_WORKER_RSS_RECYCLE_MB) !== null && _a !== void 0 ? _a : "", 10);
|
|
163
|
+
if (Number.isFinite(fromEnv))
|
|
164
|
+
return fromEnv;
|
|
165
|
+
return 800;
|
|
166
|
+
})();
|
|
125
167
|
// Methods that must skip the indexing backlog. encodeQuery is the search hot
|
|
126
168
|
// path: a single query is ~17ms but waits behind every queued processFile.
|
|
127
169
|
// rerank is similarly small and latency-sensitive.
|
|
@@ -147,17 +189,33 @@ class WorkerPool {
|
|
|
147
189
|
this.maxWorkers = Math.max(1, config_1.CONFIG.WORKER_THREADS);
|
|
148
190
|
// Lazy spawn: start with 1 worker, scale up on demand
|
|
149
191
|
this.spawnWorker();
|
|
150
|
-
// Periodically reap idle workers back
|
|
151
|
-
|
|
192
|
+
// Periodically reap idle workers back to MIN_KEEP, and force-kill any
|
|
193
|
+
// worker wedged in busy=true (the leak backstop — see STUCK_BUSY_MS).
|
|
194
|
+
this.idleReapInterval = setInterval(() => {
|
|
195
|
+
this.reapStuckWorkers();
|
|
196
|
+
this.reapBloatedWorkers();
|
|
197
|
+
this.reapIdleWorkers();
|
|
198
|
+
}, IDLE_WORKER_TIMEOUT_MS);
|
|
152
199
|
}
|
|
153
200
|
isHealthy() {
|
|
154
201
|
return !this.destroyed && this.workers.length > 0;
|
|
155
202
|
}
|
|
203
|
+
/** PIDs of workers the pool currently tracks. Used by the daemon's orphan
|
|
204
|
+
* sweep to distinguish live, accounted-for workers from de-listed strays. */
|
|
205
|
+
getWorkerPids() {
|
|
206
|
+
return this.workers
|
|
207
|
+
.map((w) => w.child.pid)
|
|
208
|
+
.filter((pid) => pid !== undefined);
|
|
209
|
+
}
|
|
156
210
|
clearTaskTimeout(task) {
|
|
157
211
|
if (task.timeout) {
|
|
158
212
|
clearTimeout(task.timeout);
|
|
159
213
|
task.timeout = undefined;
|
|
160
214
|
}
|
|
215
|
+
if (task.hardTimeout) {
|
|
216
|
+
clearTimeout(task.hardTimeout);
|
|
217
|
+
task.hardTimeout = undefined;
|
|
218
|
+
}
|
|
161
219
|
}
|
|
162
220
|
removeFromQueue(taskId) {
|
|
163
221
|
const pi = this.priorityQueue.indexOf(taskId);
|
|
@@ -174,6 +232,7 @@ class WorkerPool {
|
|
|
174
232
|
if (worker) {
|
|
175
233
|
worker.busy = false;
|
|
176
234
|
worker.pendingTaskId = null;
|
|
235
|
+
worker.busySince = null;
|
|
177
236
|
worker.lastBusyTime = Date.now();
|
|
178
237
|
}
|
|
179
238
|
}
|
|
@@ -222,12 +281,15 @@ class WorkerPool {
|
|
|
222
281
|
(0, logger_1.log)("pool", `spawn PID:${worker.child.pid} (${this.workers.length + 1}/${Math.max(1, config_1.CONFIG.WORKER_THREADS)})`);
|
|
223
282
|
const onMessage = (msg) => {
|
|
224
283
|
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
284
|
+
if (typeof msg.rss === "number")
|
|
285
|
+
worker.lastRssBytes = msg.rss;
|
|
225
286
|
// Fast cleanup for tasks that were aborted while running
|
|
226
287
|
if (this.abortedTasks.has(msg.id)) {
|
|
227
288
|
this.abortedTasks.delete(msg.id);
|
|
228
289
|
const task = this.tasks.get(msg.id);
|
|
229
290
|
if (task) {
|
|
230
291
|
this.completeTask(task, worker);
|
|
292
|
+
this.recycleIfBloated(worker);
|
|
231
293
|
this.dispatch();
|
|
232
294
|
}
|
|
233
295
|
return;
|
|
@@ -236,10 +298,15 @@ class WorkerPool {
|
|
|
236
298
|
if (!task)
|
|
237
299
|
return;
|
|
238
300
|
if ("heartbeat" in msg) {
|
|
239
|
-
// Reset timeout
|
|
240
|
-
|
|
301
|
+
// Reset only the no-progress timeout. The hard deadline is left
|
|
302
|
+
// untouched on purpose — heartbeats must not be able to extend a task
|
|
303
|
+
// past its absolute ceiling.
|
|
304
|
+
if (task.timeout) {
|
|
305
|
+
clearTimeout(task.timeout);
|
|
306
|
+
task.timeout = undefined;
|
|
307
|
+
}
|
|
241
308
|
if (task.worker) {
|
|
242
|
-
task.timeout = setTimeout(() => this.handleTaskTimeout(task, task.worker), TASK_TIMEOUT_MS);
|
|
309
|
+
task.timeout = setTimeout(() => this.handleTaskTimeout(task, task.worker, "no progress"), TASK_TIMEOUT_MS);
|
|
243
310
|
}
|
|
244
311
|
return;
|
|
245
312
|
}
|
|
@@ -260,6 +327,7 @@ class WorkerPool {
|
|
|
260
327
|
}
|
|
261
328
|
this.completeTask(task, worker);
|
|
262
329
|
this.consecutiveRespawns = 0;
|
|
330
|
+
this.recycleIfBloated(worker);
|
|
263
331
|
this.dispatch();
|
|
264
332
|
};
|
|
265
333
|
const onExit = (code, signal) => this.handleWorkerExit(worker, code, signal, "exit");
|
|
@@ -341,15 +409,16 @@ class WorkerPool {
|
|
|
341
409
|
this.dispatch();
|
|
342
410
|
});
|
|
343
411
|
}
|
|
344
|
-
handleTaskTimeout(task, worker) {
|
|
412
|
+
handleTaskTimeout(task, worker, reason = "no progress") {
|
|
345
413
|
var _a, _b, _c, _d;
|
|
346
414
|
if (this.destroyed || !this.tasks.has(task.id))
|
|
347
415
|
return;
|
|
348
416
|
this.clearTaskTimeout(task);
|
|
417
|
+
const limitMs = reason === "hard deadline" ? HARD_DEADLINE_MS : TASK_TIMEOUT_MS;
|
|
349
418
|
const filePath = (_d = (_b = (_a = task.payload) === null || _a === void 0 ? void 0 : _a.path) !== null && _b !== void 0 ? _b : (_c = task.payload) === null || _c === void 0 ? void 0 : _c.absolutePath) !== null && _d !== void 0 ? _d : "unknown";
|
|
350
|
-
(0, logger_1.log)("pool", `timeout task=${task.id} method=${task.method} file=${filePath}
|
|
419
|
+
(0, logger_1.log)("pool", `timeout task=${task.id} method=${task.method} file=${filePath} (${reason}, ${limitMs}ms) — killing worker PID:${worker.child.pid}`);
|
|
351
420
|
this.completeTask(task, null);
|
|
352
|
-
task.reject(new Error(`Worker task ${task.method}
|
|
421
|
+
task.reject(new Error(`Worker task ${task.method} exceeded ${reason} limit (${limitMs}ms) on ${filePath}`));
|
|
353
422
|
worker.cleanedUp = true;
|
|
354
423
|
worker.child.removeAllListeners("message");
|
|
355
424
|
worker.child.removeAllListeners("exit");
|
|
@@ -393,9 +462,13 @@ class WorkerPool {
|
|
|
393
462
|
}
|
|
394
463
|
idle.busy = true;
|
|
395
464
|
idle.pendingTaskId = task.id;
|
|
465
|
+
idle.busySince = Date.now();
|
|
396
466
|
task.worker = idle;
|
|
397
467
|
task.startTime = Date.now();
|
|
398
|
-
task.timeout = setTimeout(() => this.handleTaskTimeout(task, idle), TASK_TIMEOUT_MS);
|
|
468
|
+
task.timeout = setTimeout(() => this.handleTaskTimeout(task, idle, "no progress"), TASK_TIMEOUT_MS);
|
|
469
|
+
// Absolute deadline: never cleared/re-armed by heartbeats, so a task that
|
|
470
|
+
// keeps emitting progress but never completes is still bounded.
|
|
471
|
+
task.hardTimeout = setTimeout(() => this.handleTaskTimeout(task, idle, "hard deadline"), HARD_DEADLINE_MS);
|
|
399
472
|
const filePath = (_e = (_c = (_b = task.payload) === null || _b === void 0 ? void 0 : _b.path) !== null && _c !== void 0 ? _c : (_d = task.payload) === null || _d === void 0 ? void 0 : _d.absolutePath) !== null && _e !== void 0 ? _e : "";
|
|
400
473
|
const busyCount = this.workers.filter((w) => w.busy).length;
|
|
401
474
|
(0, logger_1.debug)("pool", `dispatch task=${task.id} method=${task.method}${filePath ? ` file=${filePath}` : ""} → PID:${idle.child.pid} (busy=${busyCount}/${this.workers.length} queue=${this.taskQueue.length}+${this.priorityQueue.length}p)`);
|
|
@@ -425,14 +498,113 @@ class WorkerPool {
|
|
|
425
498
|
return this.enqueue("rerank", input, signal);
|
|
426
499
|
}
|
|
427
500
|
/**
|
|
428
|
-
*
|
|
429
|
-
*
|
|
430
|
-
*
|
|
431
|
-
*
|
|
432
|
-
*
|
|
501
|
+
* Force-kill workers wedged in busy=true past STUCK_BUSY_MS. The per-task
|
|
502
|
+
* hard deadline normally rescues these first; this catches the case where no
|
|
503
|
+
* live timer exists — a dropped IPC result, or a prior SIGKILL that threw and
|
|
504
|
+
* left the process alive but de-listed. Unlike the idle reaper this ignores
|
|
505
|
+
* MIN_KEEP (a stuck worker is dead weight even at the floor) and goes straight
|
|
506
|
+
* to SIGKILL, letting the natural 'exit' handler fail the task and respawn.
|
|
507
|
+
*/
|
|
508
|
+
reapStuckWorkers() {
|
|
509
|
+
if (this.destroyed)
|
|
510
|
+
return;
|
|
511
|
+
const now = Date.now();
|
|
512
|
+
const stuck = this.workers.filter((w) => w.busy && w.busySince !== null && now - w.busySince > STUCK_BUSY_MS);
|
|
513
|
+
for (const w of stuck) {
|
|
514
|
+
const busyMs = w.busySince !== null ? now - w.busySince : 0;
|
|
515
|
+
(0, logger_1.log)("pool", `stuck worker PID:${w.child.pid} busy ${Math.round(busyMs / 1000)}s (>${STUCK_BUSY_MS}ms) — SIGKILL`);
|
|
516
|
+
// Leave listeners attached so handleWorkerExit runs on the resulting
|
|
517
|
+
// 'exit' event: it fails any task still bound to this worker and respawns
|
|
518
|
+
// if work is pending. Do not pre-set cleanedUp for the same reason.
|
|
519
|
+
try {
|
|
520
|
+
w.child.kill("SIGKILL");
|
|
521
|
+
}
|
|
522
|
+
catch (_a) { }
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
/**
|
|
526
|
+
* Recycle idle workers whose RSS has grown past WORKER_RSS_RECYCLE_MB. Unlike
|
|
527
|
+
* the idle reaper this ignores MIN_KEEP — a bloated worker is replaced rather
|
|
528
|
+
* than merely trimmed: we SIGTERM it (graceful, lets ONNX free its arenas)
|
|
529
|
+
* and respawn a fresh one if that drops us below MIN_KEEP. Only idle workers
|
|
530
|
+
* are touched, so an in-flight task is never interrupted.
|
|
531
|
+
*/
|
|
532
|
+
reapBloatedWorkers() {
|
|
533
|
+
if (this.destroyed || WORKER_RSS_RECYCLE_MB <= 0)
|
|
534
|
+
return;
|
|
535
|
+
const limitBytes = WORKER_RSS_RECYCLE_MB * 1024 * 1024;
|
|
536
|
+
const bloated = this.workers.filter((w) => !w.busy && w.lastRssBytes > limitBytes);
|
|
537
|
+
for (const w of bloated)
|
|
538
|
+
this.recycleWorker(w, "idle");
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Recycle a worker whose RSS exceeds the threshold the instant it goes free
|
|
542
|
+
* between tasks. The idle reaper alone can't catch this: under continuous
|
|
543
|
+
* churn (a busy monorepo trickling one small file at a time) a worker is
|
|
544
|
+
* dispatched again within the 60s idle window, so a worker that peaked at
|
|
545
|
+
* ~1.4 GB on one large file never looks "idle" and stays pinned. Checking at
|
|
546
|
+
* task completion — when busy was just cleared and before the next dispatch —
|
|
547
|
+
* bounds RSS regardless of how steady the churn is. No-op while busy, so an
|
|
548
|
+
* in-flight task is never interrupted.
|
|
549
|
+
*/
|
|
550
|
+
recycleIfBloated(worker) {
|
|
551
|
+
if (this.destroyed ||
|
|
552
|
+
WORKER_RSS_RECYCLE_MB <= 0 ||
|
|
553
|
+
worker.busy ||
|
|
554
|
+
worker.cleanedUp) {
|
|
555
|
+
return;
|
|
556
|
+
}
|
|
557
|
+
if (worker.lastRssBytes > WORKER_RSS_RECYCLE_MB * 1024 * 1024) {
|
|
558
|
+
this.recycleWorker(worker, "post-task");
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
/**
|
|
562
|
+
* SIGTERM a worker (graceful, lets ONNX free its arenas), drop it from the
|
|
563
|
+
* pool, escalate to SIGKILL if it ignores the signal, and refill back to the
|
|
564
|
+
* floor with a fresh, lean worker. Shared by the idle and post-task RSS paths.
|
|
565
|
+
*/
|
|
566
|
+
recycleWorker(w, reason) {
|
|
567
|
+
if (w.cleanedUp)
|
|
568
|
+
return;
|
|
569
|
+
(0, logger_1.log)("pool", `recycle bloated worker PID:${w.child.pid} (${reason}, rss ${Math.round(w.lastRssBytes / 1048576)}MB > ${WORKER_RSS_RECYCLE_MB}MB)`);
|
|
570
|
+
w.cleanedUp = true;
|
|
571
|
+
w.child.removeAllListeners("message");
|
|
572
|
+
w.child.removeAllListeners("exit");
|
|
573
|
+
w.child.removeAllListeners("error");
|
|
574
|
+
const pid = w.child.pid;
|
|
575
|
+
try {
|
|
576
|
+
w.child.kill("SIGTERM");
|
|
577
|
+
}
|
|
578
|
+
catch (_a) { }
|
|
579
|
+
this.workers = this.workers.filter((x) => x !== w);
|
|
580
|
+
// Escalate to SIGKILL if SIGTERM is ignored (a worker mid native-call
|
|
581
|
+
// won't service signals).
|
|
582
|
+
if (pid !== undefined) {
|
|
583
|
+
setTimeout(() => {
|
|
584
|
+
try {
|
|
585
|
+
process.kill(pid, 0);
|
|
586
|
+
try {
|
|
587
|
+
process.kill(pid, "SIGKILL");
|
|
588
|
+
}
|
|
589
|
+
catch (_a) { }
|
|
590
|
+
}
|
|
591
|
+
catch (_b) {
|
|
592
|
+
// ESRCH — already gone.
|
|
593
|
+
}
|
|
594
|
+
}, REAP_FORCE_KILL_GRACE_MS);
|
|
595
|
+
}
|
|
596
|
+
// Replace anything we dropped below the floor with fresh, lean workers.
|
|
597
|
+
while (!this.destroyed && this.workers.length < MIN_KEEP_WORKERS) {
|
|
598
|
+
this.spawnWorker();
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Reap idle workers back down to MIN_KEEP_WORKERS. Keeps the most recently
|
|
603
|
+
* active. Called on a timer — never removes busy workers. See
|
|
604
|
+
* MIN_KEEP_WORKERS for the memory-vs-warmth tradeoff behind the floor.
|
|
433
605
|
*/
|
|
434
606
|
reapIdleWorkers() {
|
|
435
|
-
const MIN_KEEP =
|
|
607
|
+
const MIN_KEEP = MIN_KEEP_WORKERS;
|
|
436
608
|
if (this.destroyed || this.workers.length <= MIN_KEEP)
|
|
437
609
|
return;
|
|
438
610
|
const now = Date.now();
|
|
@@ -49,9 +49,12 @@ const node_process_1 = __importDefault(require("node:process"));
|
|
|
49
49
|
node_process_1.default.title = "gmax-worker";
|
|
50
50
|
const worker_1 = __importStar(require("./worker"));
|
|
51
51
|
const logger_1 = require("../utils/logger");
|
|
52
|
+
// Every outgoing message also carries `rss` (see send()).
|
|
52
53
|
const send = (msg) => {
|
|
53
54
|
if (node_process_1.default.send) {
|
|
54
|
-
|
|
55
|
+
// Attach current RSS so the pool can recycle workers whose native (ONNX)
|
|
56
|
+
// memory has ballooned — the V8 --max-old-space-size cap can't see it.
|
|
57
|
+
node_process_1.default.send(Object.assign(Object.assign({}, msg), { rss: node_process_1.default.memoryUsage().rss }));
|
|
55
58
|
}
|
|
56
59
|
};
|
|
57
60
|
node_process_1.default.on("message", (msg) => __awaiter(void 0, void 0, void 0, function* () {
|
package/package.json
CHANGED
|
@@ -192,7 +192,7 @@ Understand:
|
|
|
192
192
|
Survey:
|
|
193
193
|
gmax project codebase overview (langs, structure, key symbols)
|
|
194
194
|
gmax skeleton <file> file structure (file path, NOT a directory)
|
|
195
|
-
gmax context "topic" --budget 4000
|
|
195
|
+
gmax context "topic-or-path" --budget 4000 topic summary or deterministic file/dir context
|
|
196
196
|
gmax log <path-or-symbol> git commits (replaces recent/diff)
|
|
197
197
|
gmax status indexed projects
|
|
198
198
|
|
|
@@ -186,12 +186,14 @@ gmax dead handleAuth --in src/ # restrict to a sub-path
|
|
|
186
186
|
```
|
|
187
187
|
Status is `DEAD` (no callers, not exported), `PUBLIC EXPORT` (no internal callers but the defining chunk is exported — check external usage), or `LIVE` (with caller count + top-3 file:line). The call graph reflects what tree-sitter chunked: dynamic dispatch, reflection, eval, and string-built call sites won't show up — `DEAD` is a hypothesis, not a proof.
|
|
188
188
|
|
|
189
|
-
### Context — `gmax context <topic> --budget <tokens>`
|
|
189
|
+
### Context — `gmax context <topic-or-path> --budget <tokens>`
|
|
190
190
|
```
|
|
191
191
|
gmax context "authentication system" --budget 4000
|
|
192
192
|
gmax context "payment flow" --budget 8000
|
|
193
|
+
gmax context src/lib/auth.ts --budget 3000
|
|
193
194
|
gmax context src/lib/auth/ --budget 3000
|
|
194
195
|
```
|
|
196
|
+
Use the path form when you already know the file or directory; it skips semantic search and gives deterministic structure/excerpt context.
|
|
195
197
|
|
|
196
198
|
### Investigate — `gmax investigate "question"` (requires LLM)
|
|
197
199
|
```
|
|
@@ -229,7 +231,7 @@ gmax llm on/off/start/stop/status # manage local LLM server
|
|
|
229
231
|
11. **Impact** — `Bash(gmax impact <symbol>)` for blast radius before significant changes
|
|
230
232
|
12. **Similar** — `Bash(gmax similar <symbol>)` to find similar patterns for DRY analysis
|
|
231
233
|
13. **Dead** — `Bash(gmax dead <symbol>)` to check if a symbol has zero inbound callers (hypothesis, not proof)
|
|
232
|
-
14. **Context** — `Bash(gmax context "topic" --budget 4000)` for
|
|
234
|
+
14. **Context** — `Bash(gmax context "topic-or-path" --budget 4000)` for token-budgeted topic or path context
|
|
233
235
|
15. **Related** — `Bash(gmax related <file>)` to see what else to look at
|
|
234
236
|
16. **Status** — `Bash(gmax status)` to check index state across all projects
|
|
235
237
|
|