omnius 1.0.106 → 1.0.108

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -578020,6 +578020,23 @@ async function needsTextToolMode(modelName, backendUrl2) {
578020
578020
  const hasTools = await checkToolSupport(modelName, backendUrl2);
578021
578021
  return !hasTools;
578022
578022
  }
578023
+ function detectUnifiedMemory() {
578024
+ if (process.platform === "darwin" && process.arch === "arm64") return true;
578025
+ if (process.platform === "linux") {
578026
+ try {
578027
+ if (existsSync91("/etc/nv_tegra_release")) return true;
578028
+ } catch {
578029
+ }
578030
+ try {
578031
+ if (existsSync91("/sys/devices/soc0/family")) {
578032
+ const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
578033
+ if (family.includes("tegra")) return true;
578034
+ }
578035
+ } catch {
578036
+ }
578037
+ }
578038
+ return false;
578039
+ }
578023
578040
  function parseNvidiaSmi(stdout) {
578024
578041
  let total = 0;
578025
578042
  let free = 0;
@@ -578035,6 +578052,14 @@ function parseNvidiaSmi(stdout) {
578035
578052
  }
578036
578053
  return { total, free, name: name10 };
578037
578054
  }
578055
+ function parseFreeBytes(stdout) {
578056
+ const memLine = stdout.split("\n").find((l2) => /^Mem:/i.test(l2)) ?? "";
578057
+ const nums = memLine.match(/\d+/g);
578058
+ if (!nums || nums.length === 0) return { total: 0, available: 0 };
578059
+ const total = parseInt(nums[0], 10) || 0;
578060
+ const available = nums.length >= 6 ? parseInt(nums[5], 10) || 0 : 0;
578061
+ return { total, available };
578062
+ }
578038
578063
  function parseRocmSmi(stdout) {
578039
578064
  let total = 0;
578040
578065
  let used = 0;
@@ -578064,14 +578089,14 @@ function detectSystemSpecs() {
578064
578089
  timeout: 5e3
578065
578090
  });
578066
578091
  if (memInfo.includes("Mem:")) {
578067
- const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
578068
- if (match) {
578069
- totalRamGB = parseInt(match[1], 10) / 1024 ** 3;
578070
- availableRamGB = parseInt(match[2], 10) / 1024 ** 3;
578092
+ const { total, available } = parseFreeBytes(memInfo);
578093
+ if (total > 0) {
578094
+ totalRamGB = total / 1024 ** 3;
578095
+ availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
578071
578096
  }
578072
578097
  } else {
578073
578098
  const bytes = parseInt(memInfo.trim(), 10);
578074
- if (!isNaN(bytes)) {
578099
+ if (!isNaN(bytes) && bytes > 0) {
578075
578100
  totalRamGB = bytes / 1024 ** 3;
578076
578101
  availableRamGB = totalRamGB * 0.7;
578077
578102
  }
@@ -578102,12 +578127,18 @@ function detectSystemSpecs() {
578102
578127
  } catch {
578103
578128
  }
578104
578129
  }
578130
+ const unifiedMemory = detectUnifiedMemory();
578131
+ if (unifiedMemory && totalRamGB > 0) {
578132
+ const floorGB = totalRamGB * 0.8;
578133
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578134
+ }
578105
578135
  return {
578106
578136
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578107
578137
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578108
578138
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578109
578139
  availableVramGB: Math.round(availableVramGB * 10) / 10,
578110
- gpuName
578140
+ gpuName,
578141
+ unifiedMemory
578111
578142
  };
578112
578143
  }
578113
578144
  async function detectSystemSpecsAsync() {
@@ -578122,14 +578153,14 @@ async function detectSystemSpecsAsync() {
578122
578153
  { timeout: 5e3 }
578123
578154
  );
578124
578155
  if (memInfo.includes("Mem:")) {
578125
- const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
578126
- if (match) {
578127
- totalRamGB = parseInt(match[1], 10) / 1024 ** 3;
578128
- availableRamGB = parseInt(match[2], 10) / 1024 ** 3;
578156
+ const { total, available } = parseFreeBytes(memInfo);
578157
+ if (total > 0) {
578158
+ totalRamGB = total / 1024 ** 3;
578159
+ availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
578129
578160
  }
578130
578161
  } else {
578131
578162
  const bytes = parseInt(memInfo.trim(), 10);
578132
- if (!isNaN(bytes)) {
578163
+ if (!isNaN(bytes) && bytes > 0) {
578133
578164
  totalRamGB = bytes / 1024 ** 3;
578134
578165
  availableRamGB = totalRamGB * 0.7;
578135
578166
  }
@@ -578160,12 +578191,18 @@ async function detectSystemSpecsAsync() {
578160
578191
  } catch {
578161
578192
  }
578162
578193
  }
578194
+ const unifiedMemory = detectUnifiedMemory();
578195
+ if (unifiedMemory && totalRamGB > 0) {
578196
+ const floorGB = totalRamGB * 0.8;
578197
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578198
+ }
578163
578199
  return {
578164
578200
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578165
578201
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578166
578202
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578167
578203
  availableVramGB: Math.round(availableVramGB * 10) / 10,
578168
- gpuName
578204
+ gpuName,
578205
+ unifiedMemory
578169
578206
  };
578170
578207
  }
578171
578208
  function recommendModel(specs) {
@@ -578179,7 +578216,7 @@ function recommendModel(specs) {
578179
578216
  }
578180
578217
  return QWEN_VARIANTS.find((v) => v.tag === "qwen3.5:cloud");
578181
578218
  }
578182
- function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578219
+ function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578183
578220
  const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
578184
578221
  const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
578185
578222
  const totalAvail = Math.max(vramBudget, ramBudget);
@@ -578190,11 +578227,19 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578190
578227
  const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
578191
578228
  numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
578192
578229
  } else {
578193
- const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
578230
+ const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
578231
+ // 4B-class
578232
+ modelSizeGB2 <= 12 ? 160 * 1024 : (
578233
+ // 9B-class
578234
+ modelSizeGB2 <= 25 ? 256 * 1024 : (
578235
+ // 30B-class
578236
+ 384 * 1024
578237
+ )
578238
+ )
578239
+ );
578194
578240
  const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
578195
578241
  numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
578196
578242
  }
578197
- numCtx = Math.min(numCtx, 131072);
578198
578243
  if (archMax && archMax > 0) numCtx = Math.min(numCtx, archMax);
578199
578244
  if (kvBytesPerToken && kvBytesPerToken > 0 && modelSizeGB2 > 0) {
578200
578245
  const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
@@ -578202,6 +578247,11 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578202
578247
  const budgetCap = Math.max(2048, Math.floor(maxTokensFromBudget / 1024) * 1024);
578203
578248
  numCtx = Math.min(numCtx, budgetCap);
578204
578249
  }
578250
+ return numCtx;
578251
+ }
578252
+ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578253
+ let numCtx = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
578254
+ numCtx = Math.min(numCtx, 131072);
578205
578255
  const label = numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
578206
578256
  return { numCtx, label };
578207
578257
  }
@@ -578209,18 +578259,17 @@ function formatContextLabel(numCtx) {
578209
578259
  return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
578210
578260
  }
578211
578261
  function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578212
- const memoryBudget = calculateContextWindow(
578262
+ const memoryFit = calculateMemoryBoundedNumCtx(
578213
578263
  specs,
578214
578264
  modelSizeGB2,
578215
578265
  kvBytesPerToken,
578216
578266
  archMax
578217
578267
  );
578218
- if (archMax && archMax > 0) {
578219
- const archCtx = Math.max(2048, Math.floor(archMax / 1024) * 1024);
578220
- const numCtx = Math.min(archCtx, memoryBudget.numCtx);
578221
- return { numCtx, label: formatContextLabel(numCtx) };
578222
- }
578223
- return memoryBudget;
578268
+ const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
578269
+ const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
578270
+ const fits = Math.min(memoryFit, archCtx);
578271
+ const numCtx = Math.max(floor, fits);
578272
+ return { numCtx, label: formatContextLabel(numCtx) };
578224
578273
  }
578225
578274
  function ask(rl, question) {
578226
578275
  return new Promise((resolve52) => {
@@ -579988,6 +580037,17 @@ function modelSizeGB(models, modelName) {
579988
580037
  const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
579989
580038
  return known?.sizeGB ?? 4;
579990
580039
  }
580040
+ function defaultLayersForArch(arch3) {
580041
+ const a2 = arch3.toLowerCase();
580042
+ if (a2.includes("qwen")) return 36;
580043
+ if (a2.includes("llama")) return 32;
580044
+ if (a2.includes("mistral")) return 32;
580045
+ if (a2.includes("phi")) return 32;
580046
+ if (a2.includes("gemma")) return 42;
580047
+ if (a2.includes("granite")) return 40;
580048
+ if (a2.includes("command")) return 40;
580049
+ return 32;
580050
+ }
579991
580051
  async function queryModelKVInfo(backendUrl2, modelName) {
579992
580052
  try {
579993
580053
  const normalized = backendUrl2.replace(/\/+$/, "");
@@ -580003,13 +580063,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
580003
580063
  const info = data.model_info;
580004
580064
  const arch3 = info["general.architecture"];
580005
580065
  if (!arch3) return null;
580006
- const nLayers = info[`${arch3}.block_count`];
580007
- const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
580008
- const keyDim = info[`${arch3}.attention.key_length`];
580009
- const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
580066
+ const nLayersRaw = info[`${arch3}.block_count`];
580067
+ const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
580068
+ const keyDimRaw = info[`${arch3}.attention.key_length`];
580069
+ const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
580010
580070
  const archMax = info[`${arch3}.context_length`];
580011
580071
  if (!archMax) return null;
580012
- if (!nLayers || !nKVHeads || !keyDim || !valDim) return { archMax };
580072
+ const keyDim = keyDimRaw ?? 128;
580073
+ const valDim = valDimRaw ?? 128;
580074
+ const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
580075
+ const nKVHeads = nKVHeadsRaw ?? 32;
580076
+ if (!nLayers) return { archMax };
580013
580077
  const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
580014
580078
  return { kvBytesPerToken, archMax };
580015
580079
  } catch {
@@ -580412,7 +580476,7 @@ export PATH="${binDir}:$PATH" # Added by omnius for nvim
580412
580476
  } catch {
580413
580477
  }
580414
580478
  }
580415
- var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
580479
+ var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, EXPANDED_VARIANT_MIN_NUM_CTX, _cloudflaredInstallPromise;
580416
580480
  var init_setup = __esm({
580417
580481
  "packages/cli/src/tui/setup.ts"() {
580418
580482
  "use strict";
@@ -580445,6 +580509,7 @@ var init_setup = __esm({
580445
580509
  { tag: "qwen3.5:397b-cloud", sizeGB: 0, label: "397B Cloud (Ollama Cloud)", cloud: true }
580446
580510
  ];
580447
580511
  _toolSupportCache = /* @__PURE__ */ new Map();
580512
+ EXPANDED_VARIANT_MIN_NUM_CTX = 32768;
580448
580513
  _cloudflaredInstallPromise = null;
580449
580514
  }
580450
580515
  });
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.106",
3
+ "version": "1.0.108",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.106",
9
+ "version": "1.0.108",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.106",
3
+ "version": "1.0.108",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",