omnius 1.0.107 → 1.0.108

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -578020,6 +578020,23 @@ async function needsTextToolMode(modelName, backendUrl2) {
578020
578020
  const hasTools = await checkToolSupport(modelName, backendUrl2);
578021
578021
  return !hasTools;
578022
578022
  }
578023
+ function detectUnifiedMemory() {
578024
+ if (process.platform === "darwin" && process.arch === "arm64") return true;
578025
+ if (process.platform === "linux") {
578026
+ try {
578027
+ if (existsSync91("/etc/nv_tegra_release")) return true;
578028
+ } catch {
578029
+ }
578030
+ try {
578031
+ if (existsSync91("/sys/devices/soc0/family")) {
578032
+ const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
578033
+ if (family.includes("tegra")) return true;
578034
+ }
578035
+ } catch {
578036
+ }
578037
+ }
578038
+ return false;
578039
+ }
578023
578040
  function parseNvidiaSmi(stdout) {
578024
578041
  let total = 0;
578025
578042
  let free = 0;
@@ -578110,12 +578127,18 @@ function detectSystemSpecs() {
578110
578127
  } catch {
578111
578128
  }
578112
578129
  }
578130
+ const unifiedMemory = detectUnifiedMemory();
578131
+ if (unifiedMemory && totalRamGB > 0) {
578132
+ const floorGB = totalRamGB * 0.8;
578133
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578134
+ }
578113
578135
  return {
578114
578136
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578115
578137
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578116
578138
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578117
578139
  availableVramGB: Math.round(availableVramGB * 10) / 10,
578118
- gpuName
578140
+ gpuName,
578141
+ unifiedMemory
578119
578142
  };
578120
578143
  }
578121
578144
  async function detectSystemSpecsAsync() {
@@ -578168,12 +578191,18 @@ async function detectSystemSpecsAsync() {
578168
578191
  } catch {
578169
578192
  }
578170
578193
  }
578194
+ const unifiedMemory = detectUnifiedMemory();
578195
+ if (unifiedMemory && totalRamGB > 0) {
578196
+ const floorGB = totalRamGB * 0.8;
578197
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578198
+ }
578171
578199
  return {
578172
578200
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578173
578201
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578174
578202
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578175
578203
  availableVramGB: Math.round(availableVramGB * 10) / 10,
578176
- gpuName
578204
+ gpuName,
578205
+ unifiedMemory
578177
578206
  };
578178
578207
  }
578179
578208
  function recommendModel(specs) {
@@ -578198,7 +578227,16 @@ function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, arch
578198
578227
  const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
578199
578228
  numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
578200
578229
  } else {
578201
- const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
578230
+ const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
578231
+ // 4B-class
578232
+ modelSizeGB2 <= 12 ? 160 * 1024 : (
578233
+ // 9B-class
578234
+ modelSizeGB2 <= 25 ? 256 * 1024 : (
578235
+ // 30B-class
578236
+ 384 * 1024
578237
+ )
578238
+ )
578239
+ );
578202
578240
  const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
578203
578241
  numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
578204
578242
  }
@@ -579999,6 +580037,17 @@ function modelSizeGB(models, modelName) {
579999
580037
  const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
580000
580038
  return known?.sizeGB ?? 4;
580001
580039
  }
580040
+ function defaultLayersForArch(arch3) {
580041
+ const a2 = arch3.toLowerCase();
580042
+ if (a2.includes("qwen")) return 36;
580043
+ if (a2.includes("llama")) return 32;
580044
+ if (a2.includes("mistral")) return 32;
580045
+ if (a2.includes("phi")) return 32;
580046
+ if (a2.includes("gemma")) return 42;
580047
+ if (a2.includes("granite")) return 40;
580048
+ if (a2.includes("command")) return 40;
580049
+ return 32;
580050
+ }
580002
580051
  async function queryModelKVInfo(backendUrl2, modelName) {
580003
580052
  try {
580004
580053
  const normalized = backendUrl2.replace(/\/+$/, "");
@@ -580014,13 +580063,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
580014
580063
  const info = data.model_info;
580015
580064
  const arch3 = info["general.architecture"];
580016
580065
  if (!arch3) return null;
580017
- const nLayers = info[`${arch3}.block_count`];
580018
- const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
580019
- const keyDim = info[`${arch3}.attention.key_length`];
580020
- const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
580066
+ const nLayersRaw = info[`${arch3}.block_count`];
580067
+ const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
580068
+ const keyDimRaw = info[`${arch3}.attention.key_length`];
580069
+ const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
580021
580070
  const archMax = info[`${arch3}.context_length`];
580022
580071
  if (!archMax) return null;
580023
- if (!nLayers || !nKVHeads || !keyDim || !valDim) return { archMax };
580072
+ const keyDim = keyDimRaw ?? 128;
580073
+ const valDim = valDimRaw ?? 128;
580074
+ const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
580075
+ const nKVHeads = nKVHeadsRaw ?? 32;
580076
+ if (!nLayers) return { archMax };
580024
580077
  const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
580025
580078
  return { kvBytesPerToken, archMax };
580026
580079
  } catch {
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.107",
3
+ "version": "1.0.108",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.107",
9
+ "version": "1.0.108",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.107",
3
+ "version": "1.0.108",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",