omnius 1.0.105 → 1.0.107

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -578020,10 +578020,51 @@ async function needsTextToolMode(modelName, backendUrl2) {
578020
578020
  const hasTools = await checkToolSupport(modelName, backendUrl2);
578021
578021
  return !hasTools;
578022
578022
  }
578023
+ function parseNvidiaSmi(stdout) {
578024
+ let total = 0;
578025
+ let free = 0;
578026
+ let name10 = "";
578027
+ const lines = stdout.trim().split("\n").filter(Boolean);
578028
+ for (const line of lines) {
578029
+ const parts = line.split(",").map((s2) => s2.trim());
578030
+ const totMB = parseInt(parts[0] ?? "0", 10);
578031
+ const freeMB = parseInt(parts[1] ?? "0", 10);
578032
+ if (!isNaN(totMB)) total += totMB / 1024;
578033
+ if (!isNaN(freeMB)) free += freeMB / 1024;
578034
+ if (!name10 && parts[2]) name10 = parts[2];
578035
+ }
578036
+ return { total, free, name: name10 };
578037
+ }
578038
+ function parseFreeBytes(stdout) {
578039
+ const memLine = stdout.split("\n").find((l2) => /^Mem:/i.test(l2)) ?? "";
578040
+ const nums = memLine.match(/\d+/g);
578041
+ if (!nums || nums.length === 0) return { total: 0, available: 0 };
578042
+ const total = parseInt(nums[0], 10) || 0;
578043
+ const available = nums.length >= 6 ? parseInt(nums[5], 10) || 0 : 0;
578044
+ return { total, available };
578045
+ }
578046
+ function parseRocmSmi(stdout) {
578047
+ let total = 0;
578048
+ let used = 0;
578049
+ let name10 = "";
578050
+ const lines = stdout.trim().split("\n").filter(Boolean);
578051
+ for (const line of lines) {
578052
+ if (line.toLowerCase().startsWith("device") || line.startsWith("=")) continue;
578053
+ const parts = line.split(",").map((s2) => s2.trim());
578054
+ if (parts.length < 3) continue;
578055
+ const usedBytes = parseInt(parts[1] ?? "0", 10);
578056
+ const totBytes = parseInt(parts[2] ?? "0", 10);
578057
+ if (!isNaN(usedBytes)) used += usedBytes / 1024 ** 3;
578058
+ if (!isNaN(totBytes)) total += totBytes / 1024 ** 3;
578059
+ if (!name10 && parts[0]) name10 = parts[0];
578060
+ }
578061
+ return { total, free: Math.max(0, total - used), name: name10 ? `AMD ${name10}` : "AMD GPU" };
578062
+ }
578023
578063
  function detectSystemSpecs() {
578024
578064
  let totalRamGB = 0;
578025
578065
  let availableRamGB = 0;
578026
578066
  let gpuVramGB = 0;
578067
+ let availableVramGB = 0;
578027
578068
  let gpuName = "";
578028
578069
  try {
578029
578070
  const memInfo = execSync50("free -b 2>/dev/null || sysctl -n hw.memsize 2>/dev/null", {
@@ -578031,14 +578072,14 @@ function detectSystemSpecs() {
578031
578072
  timeout: 5e3
578032
578073
  });
578033
578074
  if (memInfo.includes("Mem:")) {
578034
- const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
578035
- if (match) {
578036
- totalRamGB = parseInt(match[1], 10) / 1024 ** 3;
578037
- availableRamGB = parseInt(match[2], 10) / 1024 ** 3;
578075
+ const { total, available } = parseFreeBytes(memInfo);
578076
+ if (total > 0) {
578077
+ totalRamGB = total / 1024 ** 3;
578078
+ availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
578038
578079
  }
578039
578080
  } else {
578040
578081
  const bytes = parseInt(memInfo.trim(), 10);
578041
- if (!isNaN(bytes)) {
578082
+ if (!isNaN(bytes) && bytes > 0) {
578042
578083
  totalRamGB = bytes / 1024 ** 3;
578043
578084
  availableRamGB = totalRamGB * 0.7;
578044
578085
  }
@@ -578047,24 +578088,33 @@ function detectSystemSpecs() {
578047
578088
  }
578048
578089
  try {
578049
578090
  const nvidiaSmi = execSync50(
578050
- "nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>/dev/null",
578091
+ "nvidia-smi --query-gpu=memory.total,memory.free,name --format=csv,noheader,nounits 2>/dev/null",
578051
578092
  { encoding: "utf8", timeout: 5e3 }
578052
578093
  );
578053
- const lines = nvidiaSmi.trim().split("\n");
578054
- if (lines.length > 0) {
578055
- for (const line of lines) {
578056
- const parts = line.split(",").map((s2) => s2.trim());
578057
- const vramMB = parseInt(parts[0] ?? "0", 10);
578058
- if (!isNaN(vramMB)) gpuVramGB += vramMB / 1024;
578059
- if (!gpuName && parts[1]) gpuName = parts[1];
578060
- }
578061
- }
578094
+ const r2 = parseNvidiaSmi(nvidiaSmi);
578095
+ gpuVramGB += r2.total;
578096
+ availableVramGB += r2.free;
578097
+ if (!gpuName && r2.name) gpuName = r2.name;
578062
578098
  } catch {
578063
578099
  }
578100
+ if (gpuVramGB === 0) {
578101
+ try {
578102
+ const rocmSmi = execSync50(
578103
+ "rocm-smi --showmeminfo vram --csv 2>/dev/null",
578104
+ { encoding: "utf8", timeout: 5e3 }
578105
+ );
578106
+ const r2 = parseRocmSmi(rocmSmi);
578107
+ gpuVramGB += r2.total;
578108
+ availableVramGB += r2.free;
578109
+ if (!gpuName && r2.name) gpuName = r2.name;
578110
+ } catch {
578111
+ }
578112
+ }
578064
578113
  return {
578065
578114
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578066
578115
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578067
578116
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578117
+ availableVramGB: Math.round(availableVramGB * 10) / 10,
578068
578118
  gpuName
578069
578119
  };
578070
578120
  }
@@ -578072,6 +578122,7 @@ async function detectSystemSpecsAsync() {
578072
578122
  let totalRamGB = 0;
578073
578123
  let availableRamGB = 0;
578074
578124
  let gpuVramGB = 0;
578125
+ let availableVramGB = 0;
578075
578126
  let gpuName = "";
578076
578127
  try {
578077
578128
  const { stdout: memInfo } = await execAsync2(
@@ -578079,14 +578130,14 @@ async function detectSystemSpecsAsync() {
578079
578130
  { timeout: 5e3 }
578080
578131
  );
578081
578132
  if (memInfo.includes("Mem:")) {
578082
- const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
578083
- if (match) {
578084
- totalRamGB = parseInt(match[1], 10) / 1024 ** 3;
578085
- availableRamGB = parseInt(match[2], 10) / 1024 ** 3;
578133
+ const { total, available } = parseFreeBytes(memInfo);
578134
+ if (total > 0) {
578135
+ totalRamGB = total / 1024 ** 3;
578136
+ availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
578086
578137
  }
578087
578138
  } else {
578088
578139
  const bytes = parseInt(memInfo.trim(), 10);
578089
- if (!isNaN(bytes)) {
578140
+ if (!isNaN(bytes) && bytes > 0) {
578090
578141
  totalRamGB = bytes / 1024 ** 3;
578091
578142
  availableRamGB = totalRamGB * 0.7;
578092
578143
  }
@@ -578095,24 +578146,33 @@ async function detectSystemSpecsAsync() {
578095
578146
  }
578096
578147
  try {
578097
578148
  const { stdout: nvidiaSmi } = await execAsync2(
578098
- "nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>/dev/null",
578149
+ "nvidia-smi --query-gpu=memory.total,memory.free,name --format=csv,noheader,nounits 2>/dev/null",
578099
578150
  { timeout: 5e3 }
578100
578151
  );
578101
- const lines = nvidiaSmi.trim().split("\n");
578102
- if (lines.length > 0) {
578103
- for (const line of lines) {
578104
- const parts = line.split(",").map((s2) => s2.trim());
578105
- const vramMB = parseInt(parts[0] ?? "0", 10);
578106
- if (!isNaN(vramMB)) gpuVramGB += vramMB / 1024;
578107
- if (!gpuName && parts[1]) gpuName = parts[1];
578108
- }
578109
- }
578152
+ const r2 = parseNvidiaSmi(nvidiaSmi);
578153
+ gpuVramGB += r2.total;
578154
+ availableVramGB += r2.free;
578155
+ if (!gpuName && r2.name) gpuName = r2.name;
578110
578156
  } catch {
578111
578157
  }
578158
+ if (gpuVramGB === 0) {
578159
+ try {
578160
+ const { stdout: rocmSmi } = await execAsync2(
578161
+ "rocm-smi --showmeminfo vram --csv 2>/dev/null",
578162
+ { timeout: 5e3 }
578163
+ );
578164
+ const r2 = parseRocmSmi(rocmSmi);
578165
+ gpuVramGB += r2.total;
578166
+ availableVramGB += r2.free;
578167
+ if (!gpuName && r2.name) gpuName = r2.name;
578168
+ } catch {
578169
+ }
578170
+ }
578112
578171
  return {
578113
578172
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578114
578173
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578115
578174
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578175
+ availableVramGB: Math.round(availableVramGB * 10) / 10,
578116
578176
  gpuName
578117
578177
  };
578118
578178
  }
@@ -578127,9 +578187,10 @@ function recommendModel(specs) {
578127
578187
  }
578128
578188
  return QWEN_VARIANTS.find((v) => v.tag === "qwen3.5:cloud");
578129
578189
  }
578130
- function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578190
+ function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578131
578191
  const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
578132
- const totalAvail = Math.max(specs.gpuVramGB, ramBudget);
578192
+ const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
578193
+ const totalAvail = Math.max(vramBudget, ramBudget);
578133
578194
  const remaining = Math.max(0, totalAvail - modelSizeGB2);
578134
578195
  const usableGB = remaining * 0.85;
578135
578196
  let numCtx;
@@ -578141,7 +578202,6 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578141
578202
  const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
578142
578203
  numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
578143
578204
  }
578144
- numCtx = Math.min(numCtx, 131072);
578145
578205
  if (archMax && archMax > 0) numCtx = Math.min(numCtx, archMax);
578146
578206
  if (kvBytesPerToken && kvBytesPerToken > 0 && modelSizeGB2 > 0) {
578147
578207
  const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
@@ -578149,6 +578209,11 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578149
578209
  const budgetCap = Math.max(2048, Math.floor(maxTokensFromBudget / 1024) * 1024);
578150
578210
  numCtx = Math.min(numCtx, budgetCap);
578151
578211
  }
578212
+ return numCtx;
578213
+ }
578214
+ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578215
+ let numCtx = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
578216
+ numCtx = Math.min(numCtx, 131072);
578152
578217
  const label = numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
578153
578218
  return { numCtx, label };
578154
578219
  }
@@ -578156,18 +578221,17 @@ function formatContextLabel(numCtx) {
578156
578221
  return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
578157
578222
  }
578158
578223
  function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578159
- const memoryBudget = calculateContextWindow(
578224
+ const memoryFit = calculateMemoryBoundedNumCtx(
578160
578225
  specs,
578161
578226
  modelSizeGB2,
578162
578227
  kvBytesPerToken,
578163
578228
  archMax
578164
578229
  );
578165
- if (archMax && archMax > 0) {
578166
- const archCtx = Math.max(2048, Math.floor(archMax / 1024) * 1024);
578167
- const numCtx = Math.min(archCtx, memoryBudget.numCtx);
578168
- return { numCtx, label: formatContextLabel(numCtx) };
578169
- }
578170
- return memoryBudget;
578230
+ const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
578231
+ const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
578232
+ const fits = Math.min(memoryFit, archCtx);
578233
+ const numCtx = Math.max(floor, fits);
578234
+ return { numCtx, label: formatContextLabel(numCtx) };
578171
578235
  }
578172
578236
  function ask(rl, question) {
578173
578237
  return new Promise((resolve52) => {
@@ -580359,7 +580423,7 @@ export PATH="${binDir}:$PATH" # Added by omnius for nvim
580359
580423
  } catch {
580360
580424
  }
580361
580425
  }
580362
- var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
580426
+ var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, EXPANDED_VARIANT_MIN_NUM_CTX, _cloudflaredInstallPromise;
580363
580427
  var init_setup = __esm({
580364
580428
  "packages/cli/src/tui/setup.ts"() {
580365
580429
  "use strict";
@@ -580392,6 +580456,7 @@ var init_setup = __esm({
580392
580456
  { tag: "qwen3.5:397b-cloud", sizeGB: 0, label: "397B Cloud (Ollama Cloud)", cloud: true }
580393
580457
  ];
580394
580458
  _toolSupportCache = /* @__PURE__ */ new Map();
580459
+ EXPANDED_VARIANT_MIN_NUM_CTX = 32768;
580395
580460
  _cloudflaredInstallPromise = null;
580396
580461
  }
580397
580462
  });
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.105",
3
+ "version": "1.0.107",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.105",
9
+ "version": "1.0.107",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.105",
3
+ "version": "1.0.107",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",