npm - omnius - Versions diffs - 1.0.107 → 1.0.108 - Mend

omnius 1.0.107 → 1.0.108

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -578020,6 +578020,23 @@ async function needsTextToolMode(modelName, backendUrl2) {
   const hasTools = await checkToolSupport(modelName, backendUrl2);
   return !hasTools;
 }
+function detectUnifiedMemory() {
+  if (process.platform === "darwin" && process.arch === "arm64") return true;
+  if (process.platform === "linux") {
+    try {
+      if (existsSync91("/etc/nv_tegra_release")) return true;
+    } catch {
+    }
+    try {
+      if (existsSync91("/sys/devices/soc0/family")) {
+        const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
+        if (family.includes("tegra")) return true;
+      }
+    } catch {
+    }
+  }
+  return false;
+}
 function parseNvidiaSmi(stdout) {
   let total = 0;
   let free = 0;
@@ -578110,12 +578127,18 @@ function detectSystemSpecs() {
     } catch {
     }
   }
+  const unifiedMemory = detectUnifiedMemory();
+  if (unifiedMemory && totalRamGB > 0) {
+    const floorGB = totalRamGB * 0.8;
+    if (availableRamGB < floorGB) availableRamGB = floorGB;
+  }
   return {
     totalRamGB: Math.round(totalRamGB * 10) / 10,
     availableRamGB: Math.round(availableRamGB * 10) / 10,
     gpuVramGB: Math.round(gpuVramGB * 10) / 10,
     availableVramGB: Math.round(availableVramGB * 10) / 10,
-    gpuName
+    gpuName,
+    unifiedMemory
   };
 }
 async function detectSystemSpecsAsync() {
@@ -578168,12 +578191,18 @@ async function detectSystemSpecsAsync() {
     } catch {
     }
   }
+  const unifiedMemory = detectUnifiedMemory();
+  if (unifiedMemory && totalRamGB > 0) {
+    const floorGB = totalRamGB * 0.8;
+    if (availableRamGB < floorGB) availableRamGB = floorGB;
+  }
   return {
     totalRamGB: Math.round(totalRamGB * 10) / 10,
     availableRamGB: Math.round(availableRamGB * 10) / 10,
     gpuVramGB: Math.round(gpuVramGB * 10) / 10,
     availableVramGB: Math.round(availableVramGB * 10) / 10,
-    gpuName
+    gpuName,
+    unifiedMemory
   };
 }
 function recommendModel(specs) {
@@ -578198,7 +578227,16 @@ function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, arch
     const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
     numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
   } else {
-    const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
+    const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
+      // 4B-class
+      modelSizeGB2 <= 12 ? 160 * 1024 : (
+        // 9B-class
+        modelSizeGB2 <= 25 ? 256 * 1024 : (
+          // 30B-class
+          384 * 1024
+        )
+      )
+    );
     const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
     numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
   }
@@ -579999,6 +580037,17 @@ function modelSizeGB(models, modelName) {
   const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
   return known?.sizeGB ?? 4;
 }
+function defaultLayersForArch(arch3) {
+  const a2 = arch3.toLowerCase();
+  if (a2.includes("qwen")) return 36;
+  if (a2.includes("llama")) return 32;
+  if (a2.includes("mistral")) return 32;
+  if (a2.includes("phi")) return 32;
+  if (a2.includes("gemma")) return 42;
+  if (a2.includes("granite")) return 40;
+  if (a2.includes("command")) return 40;
+  return 32;
+}
 async function queryModelKVInfo(backendUrl2, modelName) {
   try {
     const normalized = backendUrl2.replace(/\/+$/, "");
@@ -580014,13 +580063,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
     const info = data.model_info;
     const arch3 = info["general.architecture"];
     if (!arch3) return null;
-    const nLayers = info[`${arch3}.block_count`];
-    const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
-    const keyDim = info[`${arch3}.attention.key_length`];
-    const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
+    const nLayersRaw = info[`${arch3}.block_count`];
+    const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
+    const keyDimRaw = info[`${arch3}.attention.key_length`];
+    const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
     const archMax = info[`${arch3}.context_length`];
     if (!archMax) return null;
-    if (!nLayers || !nKVHeads || !keyDim || !valDim) return { archMax };
+    const keyDim = keyDimRaw ?? 128;
+    const valDim = valDimRaw ?? 128;
+    const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
+    const nKVHeads = nKVHeadsRaw ?? 32;
+    if (!nLayers) return { archMax };
     const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
     return { kvBytesPerToken, archMax };
   } catch {

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.107",
+  "version": "1.0.108",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.107",
+      "version": "1.0.108",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.107",
+  "version": "1.0.108",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",