omnius 1.0.107 → 1.0.109

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -577955,6 +577955,7 @@ __export(setup_exports, {
577955
577955
  ensurePythonVenv: () => ensurePythonVenv,
577956
577956
  ensureVisionDeps: () => ensureVisionDeps,
577957
577957
  expandedModelName: () => expandedModelName,
577958
+ formatExpandedContextDiagnostic: () => formatExpandedContextDiagnostic,
577958
577959
  getLatestOllamaVersion: () => getLatestOllamaVersion,
577959
577960
  getOllamaVersion: () => getOllamaVersion,
577960
577961
  hasCmd: () => hasCmd,
@@ -578020,6 +578021,33 @@ async function needsTextToolMode(modelName, backendUrl2) {
578020
578021
  const hasTools = await checkToolSupport(modelName, backendUrl2);
578021
578022
  return !hasTools;
578022
578023
  }
578024
+ function detectUnifiedMemory(hasDiscreteGpu = false) {
578025
+ if (process.platform === "darwin" && process.arch === "arm64") return true;
578026
+ if (process.platform === "linux") {
578027
+ try {
578028
+ if (existsSync91("/etc/nv_tegra_release")) return true;
578029
+ } catch {
578030
+ }
578031
+ try {
578032
+ if (existsSync91("/sys/devices/soc0/family")) {
578033
+ const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
578034
+ if (family.includes("tegra")) return true;
578035
+ }
578036
+ } catch {
578037
+ }
578038
+ try {
578039
+ if (existsSync91("/proc/device-tree/model")) {
578040
+ const model = readFileSync74("/proc/device-tree/model", "utf8").replace(/\0+$/, "").toLowerCase();
578041
+ if (/jetson|tegra|orin|xavier|nano|raspberry|rockchip|rk\d{4}|mt\d{4}/.test(model)) {
578042
+ return true;
578043
+ }
578044
+ }
578045
+ } catch {
578046
+ }
578047
+ if (process.arch === "arm64" && !hasDiscreteGpu) return true;
578048
+ }
578049
+ return false;
578050
+ }
578023
578051
  function parseNvidiaSmi(stdout) {
578024
578052
  let total = 0;
578025
578053
  let free = 0;
@@ -578110,12 +578138,22 @@ function detectSystemSpecs() {
578110
578138
  } catch {
578111
578139
  }
578112
578140
  }
578141
+ const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
578142
+ if (unifiedMemory && totalRamGB > 0) {
578143
+ const floorGB = totalRamGB * 0.8;
578144
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578145
+ }
578146
+ if (!unifiedMemory && totalRamGB > 0) {
578147
+ const floorGB = totalRamGB * 0.75;
578148
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578149
+ }
578113
578150
  return {
578114
578151
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578115
578152
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578116
578153
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578117
578154
  availableVramGB: Math.round(availableVramGB * 10) / 10,
578118
- gpuName
578155
+ gpuName,
578156
+ unifiedMemory
578119
578157
  };
578120
578158
  }
578121
578159
  async function detectSystemSpecsAsync() {
@@ -578168,12 +578206,22 @@ async function detectSystemSpecsAsync() {
578168
578206
  } catch {
578169
578207
  }
578170
578208
  }
578209
+ const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
578210
+ if (unifiedMemory && totalRamGB > 0) {
578211
+ const floorGB = totalRamGB * 0.8;
578212
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578213
+ }
578214
+ if (!unifiedMemory && totalRamGB > 0) {
578215
+ const floorGB = totalRamGB * 0.75;
578216
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578217
+ }
578171
578218
  return {
578172
578219
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578173
578220
  availableRamGB: Math.round(availableRamGB * 10) / 10,
578174
578221
  gpuVramGB: Math.round(gpuVramGB * 10) / 10,
578175
578222
  availableVramGB: Math.round(availableVramGB * 10) / 10,
578176
- gpuName
578223
+ gpuName,
578224
+ unifiedMemory
578177
578225
  };
578178
578226
  }
578179
578227
  function recommendModel(specs) {
@@ -578198,7 +578246,16 @@ function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, arch
578198
578246
  const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
578199
578247
  numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
578200
578248
  } else {
578201
- const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
578249
+ const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
578250
+ // 4B-class
578251
+ modelSizeGB2 <= 12 ? 160 * 1024 : (
578252
+ // 9B-class
578253
+ modelSizeGB2 <= 25 ? 256 * 1024 : (
578254
+ // 30B-class
578255
+ 384 * 1024
578256
+ )
578257
+ )
578258
+ );
578202
578259
  const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
578203
578260
  numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
578204
578261
  }
@@ -578221,17 +578278,41 @@ function formatContextLabel(numCtx) {
578221
578278
  return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
578222
578279
  }
578223
578280
  function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578224
- const memoryFit = calculateMemoryBoundedNumCtx(
578225
- specs,
578226
- modelSizeGB2,
578227
- kvBytesPerToken,
578228
- archMax
578229
- );
578230
- const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
578281
+ const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
578282
+ const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
578283
+ const totalAvail = Math.max(vramBudget, ramBudget);
578284
+ const remaining = Math.max(0, totalAvail - modelSizeGB2);
578285
+ const usableGB = remaining * 0.85;
578286
+ const memoryFit = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
578287
+ const archCtxValue = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : null;
578288
+ const archCtx = archCtxValue ?? Number.POSITIVE_INFINITY;
578231
578289
  const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
578232
578290
  const fits = Math.min(memoryFit, archCtx);
578233
578291
  const numCtx = Math.max(floor, fits);
578234
- return { numCtx, label: formatContextLabel(numCtx) };
578292
+ let limitedBy;
578293
+ if (numCtx === floor && fits < floor) limitedBy = "floor";
578294
+ else if (archCtxValue !== null && numCtx === archCtxValue) limitedBy = "arch";
578295
+ else limitedBy = "memory";
578296
+ const effectiveKvBpt = kvBytesPerToken && kvBytesPerToken > 0 ? kvBytesPerToken : (modelSizeGB2 <= 5 ? 64 : modelSizeGB2 <= 12 ? 160 : modelSizeGB2 <= 25 ? 256 : 384) * 1024;
578297
+ return {
578298
+ numCtx,
578299
+ label: formatContextLabel(numCtx),
578300
+ math: {
578301
+ numCtx,
578302
+ label: formatContextLabel(numCtx),
578303
+ modelSizeGB: modelSizeGB2,
578304
+ kvBytesPerToken: effectiveKvBpt,
578305
+ kvSource: kvBytesPerToken && kvBytesPerToken > 0 ? "model_info" : "fallback",
578306
+ archMax: archMax && archMax > 0 ? archMax : null,
578307
+ ramBudgetGB: ramBudget,
578308
+ vramBudgetGB: vramBudget,
578309
+ usableGB,
578310
+ memoryFit,
578311
+ archCtx: archCtxValue,
578312
+ floor,
578313
+ limitedBy
578314
+ }
578315
+ };
578235
578316
  }
578236
578317
  function ask(rl, question) {
578237
578318
  return new Promise((resolve52) => {
@@ -579999,6 +580080,17 @@ function modelSizeGB(models, modelName) {
579999
580080
  const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
580000
580081
  return known?.sizeGB ?? 4;
580001
580082
  }
580083
+ function defaultLayersForArch(arch3) {
580084
+ const a2 = arch3.toLowerCase();
580085
+ if (a2.includes("qwen")) return 36;
580086
+ if (a2.includes("llama")) return 32;
580087
+ if (a2.includes("mistral")) return 32;
580088
+ if (a2.includes("phi")) return 32;
580089
+ if (a2.includes("gemma")) return 42;
580090
+ if (a2.includes("granite")) return 40;
580091
+ if (a2.includes("command")) return 40;
580092
+ return 32;
580093
+ }
580002
580094
  async function queryModelKVInfo(backendUrl2, modelName) {
580003
580095
  try {
580004
580096
  const normalized = backendUrl2.replace(/\/+$/, "");
@@ -580014,13 +580106,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
580014
580106
  const info = data.model_info;
580015
580107
  const arch3 = info["general.architecture"];
580016
580108
  if (!arch3) return null;
580017
- const nLayers = info[`${arch3}.block_count`];
580018
- const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
580019
- const keyDim = info[`${arch3}.attention.key_length`];
580020
- const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
580109
+ const nLayersRaw = info[`${arch3}.block_count`];
580110
+ const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`];
580111
+ const keyDimRaw = info[`${arch3}.attention.key_length`];
580112
+ const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
580021
580113
  const archMax = info[`${arch3}.context_length`];
580022
580114
  if (!archMax) return null;
580023
- if (!nLayers || !nKVHeads || !keyDim || !valDim) return { archMax };
580115
+ const keyDim = keyDimRaw ?? 128;
580116
+ const valDim = valDimRaw ?? 128;
580117
+ const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
580118
+ const nKVHeads = nKVHeadsRaw ?? 8;
580119
+ if (!nLayers) return { archMax };
580024
580120
  const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
580025
580121
  return { kvBytesPerToken, archMax };
580026
580122
  } catch {
@@ -580151,6 +580247,21 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
580151
580247
  archMax
580152
580248
  );
580153
580249
  }
580250
+ function formatExpandedContextDiagnostic(specs, math) {
580251
+ const fmtGB = (n2) => `${n2.toFixed(1)}GB`;
580252
+ const fmtKB = (n2) => `${Math.round(n2 / 1024)}KB`;
580253
+ const fmtK = (n2) => n2 >= 1024 ? `${Math.floor(n2 / 1024)}K` : String(n2);
580254
+ const memBits = [];
580255
+ if (specs.gpuVramGB > 0) {
580256
+ memBits.push(`VRAM ${fmtGB(specs.availableVramGB || specs.gpuVramGB)}/${fmtGB(specs.gpuVramGB)}`);
580257
+ }
580258
+ memBits.push(`RAM ${fmtGB(specs.availableRamGB)}/${fmtGB(specs.totalRamGB)}${specs.unifiedMemory ? " unified" : ""}`);
580259
+ const mem = memBits.join(", ");
580260
+ const kv = `KV ${fmtKB(math.kvBytesPerToken)}/tok (${math.kvSource})`;
580261
+ const fit2 = `fit ${fmtK(math.memoryFit)}, arch ${math.archCtx !== null ? fmtK(math.archCtx) : "n/a"}, floor ${fmtK(math.floor)}`;
580262
+ const limit = `→ ${fmtK(math.numCtx)} (${math.limitedBy === "floor" ? "min floor" : math.limitedBy === "arch" ? "arch-capped" : "memory-fit"})`;
580263
+ return `[${mem} | model ${fmtGB(math.modelSizeGB)} | ${kv} | ${fit2} ${limit}]`;
580264
+ }
580154
580265
  async function ensureExpandedContext(modelName, backendUrl2) {
580155
580266
  if (modelName.includes("cloud") || modelName.includes(":cloud")) {
580156
580267
  return { model: modelName, created: false, contextLabel: "remote", numCtx: 0 };
@@ -580177,11 +580288,11 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580177
580288
  kvInfo?.kvBytesPerToken,
580178
580289
  kvInfo?.archMax
580179
580290
  ).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: modelName }));
580180
- return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580291
+ return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580181
580292
  }
580182
580293
  const existing = await checkExpandedVariant(modelName, backendUrl2);
580183
580294
  if (existing === null) {
580184
- return { model: modelName, created: false, contextLabel: "", numCtx: 0 };
580295
+ return { model: modelName, created: false, contextLabel: "", numCtx: 0, specs, math: ctx3.math };
580185
580296
  }
580186
580297
  if (typeof existing === "string") {
580187
580298
  const lostTools = await wrapperLacksToolsCapability(backendUrl2, existing).catch(() => false);
@@ -580196,7 +580307,7 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580196
580307
  kvInfo?.archMax
580197
580308
  );
580198
580309
  if (rebuilt) {
580199
- return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580310
+ return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580200
580311
  }
580201
580312
  } catch {
580202
580313
  }
@@ -580211,13 +580322,13 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580211
580322
  kvInfo?.kvBytesPerToken,
580212
580323
  kvInfo?.archMax
580213
580324
  ).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: existing }));
580214
- return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580325
+ return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580215
580326
  }
580216
580327
  const created = await createExpandedVariantAsync(modelName, specs, sizeGB, kvInfo?.kvBytesPerToken, kvInfo?.archMax);
580217
580328
  if (created) {
580218
- return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580329
+ return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580219
580330
  }
580220
- return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580331
+ return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580221
580332
  }
580222
580333
  function guessBaseFromVariant(variantName, models) {
580223
580334
  const stripped = stripVariantTag(variantName);
@@ -602188,14 +602299,15 @@ async function switchModel(query, ctx3, local = false) {
602188
602299
  match.name,
602189
602300
  ctx3.config.backendUrl
602190
602301
  );
602302
+ const diag = result.specs && result.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(result.specs, result.math)) : "";
602191
602303
  if (result.created) {
602192
602304
  renderInfo(
602193
- `Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)`
602305
+ `Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)${diag}`
602194
602306
  );
602195
602307
  finalModel = result.model;
602196
602308
  } else if (result.model !== match.name) {
602197
602309
  renderInfo(
602198
- `Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})`
602310
+ `Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})${diag}`
602199
602311
  );
602200
602312
  finalModel = result.model;
602201
602313
  }
@@ -653743,13 +653855,14 @@ This is an independent background session started from /background.`
653743
653855
  currentConfig.model,
653744
653856
  currentConfig.backendUrl
653745
653857
  );
653858
+ const diag = expandResult.specs && expandResult.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(expandResult.specs, expandResult.math)) : "";
653746
653859
  if (expandResult.created) {
653747
653860
  config = { ...config, model: expandResult.model };
653748
653861
  currentConfig = { ...currentConfig, model: expandResult.model };
653749
653862
  statusBar.setModelName(expandResult.model);
653750
653863
  writeContent(
653751
653864
  () => renderInfo(
653752
- `Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)`
653865
+ `Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)${diag}`
653753
653866
  )
653754
653867
  );
653755
653868
  } else if (expandResult.model !== currentConfig.model) {
@@ -653758,7 +653871,7 @@ This is an independent background session started from /background.`
653758
653871
  statusBar.setModelName(expandResult.model);
653759
653872
  writeContent(
653760
653873
  () => renderInfo(
653761
- `Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})`
653874
+ `Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})${diag}`
653762
653875
  )
653763
653876
  );
653764
653877
  }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.107",
3
+ "version": "1.0.109",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.107",
9
+ "version": "1.0.109",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.107",
3
+ "version": "1.0.109",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",