npm - omnius - Versions diffs - 1.0.106 → 1.0.108 - Mend

omnius 1.0.106 → 1.0.108

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -578020,6 +578020,23 @@ async function needsTextToolMode(modelName, backendUrl2) {
   const hasTools = await checkToolSupport(modelName, backendUrl2);
   return !hasTools;
 }
+function detectUnifiedMemory() {
+  if (process.platform === "darwin" && process.arch === "arm64") return true;
+  if (process.platform === "linux") {
+    try {
+      if (existsSync91("/etc/nv_tegra_release")) return true;
+    } catch {
+    }
+    try {
+      if (existsSync91("/sys/devices/soc0/family")) {
+        const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
+        if (family.includes("tegra")) return true;
+      }
+    } catch {
+    }
+  }
+  return false;
+}
 function parseNvidiaSmi(stdout) {
   let total = 0;
   let free = 0;
@@ -578035,6 +578052,14 @@ function parseNvidiaSmi(stdout) {
   }
   return { total, free, name: name10 };
 }
+function parseFreeBytes(stdout) {
+  const memLine = stdout.split("\n").find((l2) => /^Mem:/i.test(l2)) ?? "";
+  const nums = memLine.match(/\d+/g);
+  if (!nums || nums.length === 0) return { total: 0, available: 0 };
+  const total = parseInt(nums[0], 10) || 0;
+  const available = nums.length >= 6 ? parseInt(nums[5], 10) || 0 : 0;
+  return { total, available };
+}
 function parseRocmSmi(stdout) {
   let total = 0;
   let used = 0;
@@ -578064,14 +578089,14 @@ function detectSystemSpecs() {
       timeout: 5e3
     });
     if (memInfo.includes("Mem:")) {
-      const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
-      if (match) {
-        totalRamGB = parseInt(match[1], 10) / 1024 ** 3;
-        availableRamGB = parseInt(match[2], 10) / 1024 ** 3;
+      const { total, available } = parseFreeBytes(memInfo);
+      if (total > 0) {
+        totalRamGB = total / 1024 ** 3;
+        availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
       }
     } else {
       const bytes = parseInt(memInfo.trim(), 10);
-      if (!isNaN(bytes)) {
+      if (!isNaN(bytes) && bytes > 0) {
         totalRamGB = bytes / 1024 ** 3;
         availableRamGB = totalRamGB * 0.7;
       }
@@ -578102,12 +578127,18 @@ function detectSystemSpecs() {
     } catch {
     }
   }
+  const unifiedMemory = detectUnifiedMemory();
+  if (unifiedMemory && totalRamGB > 0) {
+    const floorGB = totalRamGB * 0.8;
+    if (availableRamGB < floorGB) availableRamGB = floorGB;
+  }
   return {
     totalRamGB: Math.round(totalRamGB * 10) / 10,
     availableRamGB: Math.round(availableRamGB * 10) / 10,
     gpuVramGB: Math.round(gpuVramGB * 10) / 10,
     availableVramGB: Math.round(availableVramGB * 10) / 10,
-    gpuName
+    gpuName,
+    unifiedMemory
   };
 }
 async function detectSystemSpecsAsync() {
@@ -578122,14 +578153,14 @@ async function detectSystemSpecsAsync() {
       { timeout: 5e3 }
     );
     if (memInfo.includes("Mem:")) {
-      const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
-      if (match) {
-        totalRamGB = parseInt(match[1], 10) / 1024 ** 3;
-        availableRamGB = parseInt(match[2], 10) / 1024 ** 3;
+      const { total, available } = parseFreeBytes(memInfo);
+      if (total > 0) {
+        totalRamGB = total / 1024 ** 3;
+        availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
       }
     } else {
       const bytes = parseInt(memInfo.trim(), 10);
-      if (!isNaN(bytes)) {
+      if (!isNaN(bytes) && bytes > 0) {
         totalRamGB = bytes / 1024 ** 3;
         availableRamGB = totalRamGB * 0.7;
       }
@@ -578160,12 +578191,18 @@ async function detectSystemSpecsAsync() {
     } catch {
     }
   }
+  const unifiedMemory = detectUnifiedMemory();
+  if (unifiedMemory && totalRamGB > 0) {
+    const floorGB = totalRamGB * 0.8;
+    if (availableRamGB < floorGB) availableRamGB = floorGB;
+  }
   return {
     totalRamGB: Math.round(totalRamGB * 10) / 10,
     availableRamGB: Math.round(availableRamGB * 10) / 10,
     gpuVramGB: Math.round(gpuVramGB * 10) / 10,
     availableVramGB: Math.round(availableVramGB * 10) / 10,
-    gpuName
+    gpuName,
+    unifiedMemory
   };
 }
 function recommendModel(specs) {
@@ -578179,7 +578216,7 @@ function recommendModel(specs) {
   }
   return QWEN_VARIANTS.find((v) => v.tag === "qwen3.5:cloud");
 }
-function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
+function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax) {
   const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
   const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
   const totalAvail = Math.max(vramBudget, ramBudget);
@@ -578190,11 +578227,19 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
     const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
     numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
   } else {
-    const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
+    const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
+      // 4B-class
+      modelSizeGB2 <= 12 ? 160 * 1024 : (
+        // 9B-class
+        modelSizeGB2 <= 25 ? 256 * 1024 : (
+          // 30B-class
+          384 * 1024
+        )
+      )
+    );
     const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
     numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
   }
-  numCtx = Math.min(numCtx, 131072);
   if (archMax && archMax > 0) numCtx = Math.min(numCtx, archMax);
   if (kvBytesPerToken && kvBytesPerToken > 0 && modelSizeGB2 > 0) {
     const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
@@ -578202,6 +578247,11 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
     const budgetCap = Math.max(2048, Math.floor(maxTokensFromBudget / 1024) * 1024);
     numCtx = Math.min(numCtx, budgetCap);
   }
+  return numCtx;
+}
+function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
+  let numCtx = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
+  numCtx = Math.min(numCtx, 131072);
   const label = numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
   return { numCtx, label };
 }
@@ -578209,18 +578259,17 @@ function formatContextLabel(numCtx) {
   return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
 }
 function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
-  const memoryBudget = calculateContextWindow(
+  const memoryFit = calculateMemoryBoundedNumCtx(
     specs,
     modelSizeGB2,
     kvBytesPerToken,
     archMax
   );
-  if (archMax && archMax > 0) {
-    const archCtx = Math.max(2048, Math.floor(archMax / 1024) * 1024);
-    const numCtx = Math.min(archCtx, memoryBudget.numCtx);
-    return { numCtx, label: formatContextLabel(numCtx) };
-  }
-  return memoryBudget;
+  const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
+  const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
+  const fits = Math.min(memoryFit, archCtx);
+  const numCtx = Math.max(floor, fits);
+  return { numCtx, label: formatContextLabel(numCtx) };
 }
 function ask(rl, question) {
   return new Promise((resolve52) => {
@@ -579988,6 +580037,17 @@ function modelSizeGB(models, modelName) {
   const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
   return known?.sizeGB ?? 4;
 }
+function defaultLayersForArch(arch3) {
+  const a2 = arch3.toLowerCase();
+  if (a2.includes("qwen")) return 36;
+  if (a2.includes("llama")) return 32;
+  if (a2.includes("mistral")) return 32;
+  if (a2.includes("phi")) return 32;
+  if (a2.includes("gemma")) return 42;
+  if (a2.includes("granite")) return 40;
+  if (a2.includes("command")) return 40;
+  return 32;
+}
 async function queryModelKVInfo(backendUrl2, modelName) {
   try {
     const normalized = backendUrl2.replace(/\/+$/, "");
@@ -580003,13 +580063,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
     const info = data.model_info;
     const arch3 = info["general.architecture"];
     if (!arch3) return null;
-    const nLayers = info[`${arch3}.block_count`];
-    const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
-    const keyDim = info[`${arch3}.attention.key_length`];
-    const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
+    const nLayersRaw = info[`${arch3}.block_count`];
+    const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
+    const keyDimRaw = info[`${arch3}.attention.key_length`];
+    const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
     const archMax = info[`${arch3}.context_length`];
     if (!archMax) return null;
-    if (!nLayers || !nKVHeads || !keyDim || !valDim) return { archMax };
+    const keyDim = keyDimRaw ?? 128;
+    const valDim = valDimRaw ?? 128;
+    const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
+    const nKVHeads = nKVHeadsRaw ?? 32;
+    if (!nLayers) return { archMax };
     const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
     return { kvBytesPerToken, archMax };
   } catch {
@@ -580412,7 +580476,7 @@ export PATH="${binDir}:$PATH"  # Added by omnius for nvim
   } catch {
   }
 }
-var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
+var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, EXPANDED_VARIANT_MIN_NUM_CTX, _cloudflaredInstallPromise;
 var init_setup = __esm({
   "packages/cli/src/tui/setup.ts"() {
     "use strict";
@@ -580445,6 +580509,7 @@ var init_setup = __esm({
       { tag: "qwen3.5:397b-cloud", sizeGB: 0, label: "397B Cloud (Ollama Cloud)", cloud: true }
     ];
     _toolSupportCache = /* @__PURE__ */ new Map();
+    EXPANDED_VARIANT_MIN_NUM_CTX = 32768;
     _cloudflaredInstallPromise = null;
   }
 });

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.106",
+  "version": "1.0.108",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.106",
+      "version": "1.0.108",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.106",
+  "version": "1.0.108",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",