omnius 1.0.152 → 1.0.153

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -529083,14 +529083,16 @@ async function scanOllamaProcesses(options2 = {}) {
529083
529083
  ]);
529084
529084
  const snapshots = [];
529085
529085
  for (const row of psRows) {
529086
- if (!isOllamaServeCommand(row.command))
529086
+ const serve = isOllamaServeCommand(row.command);
529087
+ const runner = !serve && isOllamaRunnerCommand(row.command);
529088
+ if (!serve && !runner)
529087
529089
  continue;
529088
- const env2 = await readProcEnv(system, row.pid);
529090
+ const env2 = serve ? await readProcEnv(system, row.pid) : { readable: false, values: {} };
529089
529091
  const ports = uniqueNumbers([
529090
529092
  ...portMap.get(row.pid) ?? [],
529091
- ...portsFromEnv(env2.values)
529093
+ ...serve ? portsFromEnv(env2.values) : []
529092
529094
  ]);
529093
- const ollamaApiPorts = await probeOllamaPorts(system, ports);
529095
+ const ollamaApiPorts = serve ? await probeOllamaPorts(system, ports) : [];
529094
529096
  snapshots.push({
529095
529097
  pid: row.pid,
529096
529098
  ppid: row.ppid,
@@ -529103,7 +529105,9 @@ async function scanOllamaProcesses(options2 = {}) {
529103
529105
  envReadable: env2.readable,
529104
529106
  env: env2.values,
529105
529107
  gpuMemoryMb: gpuMemoryByPid.get(row.pid) ?? null,
529106
- isOllamaServe: true
529108
+ isOllamaServe: serve,
529109
+ isOllamaRunner: runner,
529110
+ runnerModelBlob: runner ? parseRunnerModelBlob(row.command) : null
529107
529111
  });
529108
529112
  }
529109
529113
  const decisions = classifyOllamaProcesses(snapshots, {
@@ -529223,6 +529227,20 @@ function classifyOllamaProcesses(processes, options2 = {}) {
529223
529227
  const hasPortEvidence = proc.ports.length > 0 && proc.ollamaApiPorts.length > 0;
529224
529228
  const lowCpu = proc.cpuPct === null || proc.cpuPct <= cpuThresholdPct;
529225
529229
  const oldEnough = proc.elapsedMs === null || proc.elapsedMs >= minAgeMs;
529230
+ if (proc.isOllamaRunner) {
529231
+ const liveServePids = new Set(processes.filter((p2) => p2.isOllamaServe).map((p2) => p2.pid));
529232
+ const parentIsLiveServe = proc.ppid !== null && liveServePids.has(proc.ppid);
529233
+ if (parentIsLiveServe) {
529234
+ return decision(proc, "keep", "active-pool-owned", true, false, "deterministic", [`runner managed by live ollama serve pid=${proc.ppid}`]);
529235
+ }
529236
+ reasons.push(`orphan ollama runner (ppid=${proc.ppid ?? "?"} is not a live ollama serve)`);
529237
+ if (proc.runnerModelBlob)
529238
+ reasons.push(`holds model blob ${basename18(proc.runnerModelBlob)}`);
529239
+ if (proc.gpuMemoryMb !== null && proc.gpuMemoryMb > 0) {
529240
+ reasons.push(`holding ${proc.gpuMemoryMb} MB GPU memory`);
529241
+ }
529242
+ return decision(proc, "terminate", "orphan-runner", false, true, "deterministic", reasons);
529243
+ }
529226
529244
  if (!proc.isOllamaServe) {
529227
529245
  return decision(proc, "keep", "unknown-ollama", false, false, "deterministic", ["not an ollama serve process"]);
529228
529246
  }
@@ -529577,6 +529595,14 @@ function isOllamaServeCommand(command) {
529577
529595
  const [exe, subcommand] = command.trim().split(/\s+/);
529578
529596
  return basename18(exe ?? "") === "ollama" && subcommand === "serve";
529579
529597
  }
529598
+ function isOllamaRunnerCommand(command) {
529599
+ const [exe, subcommand] = command.trim().split(/\s+/);
529600
+ return basename18(exe ?? "") === "ollama" && subcommand === "runner";
529601
+ }
529602
+ function parseRunnerModelBlob(command) {
529603
+ const m2 = command.match(/--model\s+(\S+)/);
529604
+ return m2?.[1] ?? null;
529605
+ }
529580
529606
  function portsFromEnv(env2) {
529581
529607
  const host = env2["OLLAMA_HOST"];
529582
529608
  if (!host)
@@ -530144,15 +530170,31 @@ var init_ollama_pool = __esm({
530144
530170
  const child = spawn23(config.ollamaBinary, ["serve"], {
530145
530171
  env: env2,
530146
530172
  stdio: ["ignore", "pipe", "pipe"],
530147
- detached: false
530173
+ detached: true
530148
530174
  });
530175
+ child.unref();
530149
530176
  const proc = {
530150
530177
  pid: child.pid ?? -1,
530151
530178
  kill: () => {
530152
- try {
530153
- child.kill("SIGTERM");
530154
- } catch {
530155
- }
530179
+ const pid = child.pid;
530180
+ if (!pid || pid <= 0)
530181
+ return;
530182
+ const tryKill = (target, sig) => {
530183
+ try {
530184
+ process.kill(target, sig);
530185
+ return true;
530186
+ } catch {
530187
+ return false;
530188
+ }
530189
+ };
530190
+ const groupKilled = tryKill(-pid, "SIGTERM");
530191
+ if (!groupKilled)
530192
+ tryKill(pid, "SIGTERM");
530193
+ setTimeout(() => {
530194
+ if (tryKill(-pid, "SIGKILL"))
530195
+ return;
530196
+ tryKill(pid, "SIGKILL");
530197
+ }, 8e3).unref();
530156
530198
  }
530157
530199
  };
530158
530200
  child.stdout?.on("data", () => {
@@ -530242,6 +530284,65 @@ var init_ollama_pool = __esm({
530242
530284
  }, null));
530243
530285
  this.startReaper();
530244
530286
  this.scheduleStartupCleanupScan();
530287
+ this.installProcessExitHooks();
530288
+ }
530289
+ /**
530290
+ * Process-exit hooks that kill every spawned `ollama serve` subprocess
530291
+ * group on Omnius shutdown. Catches all the ways the previous orphan
530292
+ * runners were created:
530293
+ *
530294
+ * - SIGTERM / SIGINT (systemd stop, Ctrl-C, kill PID)
530295
+ * - process.exit() from a fatal error path
530296
+ * - 'beforeExit' (graceful natural shutdown)
530297
+ *
530298
+ * The 'exit' listener is intentionally synchronous-only because Node has
530299
+ * already started tearing down by then — it just fires `process.kill` on
530300
+ * the recorded PGIDs and returns. Async work is impossible at that stage.
530301
+ *
530302
+ * Signal handlers also forward the signal after teardown so any other
530303
+ * shutdown plumbing (web framework graceful close, etc.) still observes
530304
+ * the original signal — we don't swallow it.
530305
+ *
530306
+ * Idempotent across multiple pool instances in the same process: every
530307
+ * pool only kills its OWN pids.
530308
+ */
530309
+ installProcessExitHooks() {
530310
+ if (process.env["NODE_ENV"] === "test" || process.env["VITEST"])
530311
+ return;
530312
+ if (process.env["OMNIUS_OLLAMA_PROCESS_EXIT_HOOK"] === "0")
530313
+ return;
530314
+ const killAllPoolOwnedSync = (sig) => {
530315
+ for (const inst of this.instances) {
530316
+ if (!inst.state.poolOwned)
530317
+ continue;
530318
+ const pid = inst.state.pid;
530319
+ if (!pid || pid <= 0)
530320
+ continue;
530321
+ try {
530322
+ process.kill(-pid, sig);
530323
+ } catch {
530324
+ try {
530325
+ process.kill(pid, sig);
530326
+ } catch {
530327
+ }
530328
+ }
530329
+ }
530330
+ };
530331
+ const onSignal = (sig) => {
530332
+ killAllPoolOwnedSync(sig);
530333
+ process.removeListener("SIGTERM", onSigterm);
530334
+ process.removeListener("SIGINT", onSigint);
530335
+ try {
530336
+ process.kill(process.pid, sig);
530337
+ } catch {
530338
+ }
530339
+ };
530340
+ const onSigterm = () => onSignal("SIGTERM");
530341
+ const onSigint = () => onSignal("SIGINT");
530342
+ process.on("SIGTERM", onSigterm);
530343
+ process.on("SIGINT", onSigint);
530344
+ process.on("exit", () => killAllPoolOwnedSync("SIGTERM"));
530345
+ process.on("beforeExit", () => killAllPoolOwnedSync("SIGTERM"));
530245
530346
  }
530246
530347
  /**
530247
530348
  * Resolve the effective agent id for an acquire request. Explicit option
@@ -530834,7 +530935,7 @@ var init_ollama_pool = __esm({
530834
530935
  return;
530835
530936
  const handle2 = setTimeout(async () => {
530836
530937
  try {
530837
- const cleanupOnStart = process.env["OMNIUS_OLLAMA_CLEANUP_ON_START"] === "1";
530938
+ const cleanupOnStart = process.env["OMNIUS_OLLAMA_CLEANUP_ON_START"] !== "0";
530838
530939
  const { cleanupStaleOllamaProcesses: cleanupStaleOllamaProcesses2 } = await Promise.resolve().then(() => (init_ollama_pool_cleanup(), ollama_pool_cleanup_exports));
530839
530940
  const report2 = await cleanupStaleOllamaProcesses2({
530840
530941
  dryRun: !cleanupOnStart,
@@ -530847,6 +530948,9 @@ var init_ollama_pool = __esm({
530847
530948
  if (stale.length > 0) {
530848
530949
  this.emit(cleanupOnStart ? "stale-process-cleanup" : "stale-processes-found", {
530849
530950
  staleCount: stale.length,
530951
+ // Surface orphan-runner counts separately so observability can
530952
+ // alert on the VRAM-hostage failure mode specifically.
530953
+ orphanRunnerCount: report2.decisions.filter((d2) => d2.classification === "orphan-runner" && d2.action === "terminate").length,
530850
530954
  dryRun: report2.dryRun,
530851
530955
  terminated: report2.terminated,
530852
530956
  skipped: report2.skipped,
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.152",
3
+ "version": "1.0.153",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.152",
9
+ "version": "1.0.153",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.152",
3
+ "version": "1.0.153",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",