omnius 1.0.108 → 1.0.109

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -577955,6 +577955,7 @@ __export(setup_exports, {
577955
577955
  ensurePythonVenv: () => ensurePythonVenv,
577956
577956
  ensureVisionDeps: () => ensureVisionDeps,
577957
577957
  expandedModelName: () => expandedModelName,
577958
+ formatExpandedContextDiagnostic: () => formatExpandedContextDiagnostic,
577958
577959
  getLatestOllamaVersion: () => getLatestOllamaVersion,
577959
577960
  getOllamaVersion: () => getOllamaVersion,
577960
577961
  hasCmd: () => hasCmd,
@@ -578020,7 +578021,7 @@ async function needsTextToolMode(modelName, backendUrl2) {
578020
578021
  const hasTools = await checkToolSupport(modelName, backendUrl2);
578021
578022
  return !hasTools;
578022
578023
  }
578023
- function detectUnifiedMemory() {
578024
+ function detectUnifiedMemory(hasDiscreteGpu = false) {
578024
578025
  if (process.platform === "darwin" && process.arch === "arm64") return true;
578025
578026
  if (process.platform === "linux") {
578026
578027
  try {
@@ -578034,6 +578035,16 @@ function detectUnifiedMemory() {
578034
578035
  }
578035
578036
  } catch {
578036
578037
  }
578038
+ try {
578039
+ if (existsSync91("/proc/device-tree/model")) {
578040
+ const model = readFileSync74("/proc/device-tree/model", "utf8").replace(/\0+$/, "").toLowerCase();
578041
+ if (/jetson|tegra|orin|xavier|nano|raspberry|rockchip|rk\d{4}|mt\d{4}/.test(model)) {
578042
+ return true;
578043
+ }
578044
+ }
578045
+ } catch {
578046
+ }
578047
+ if (process.arch === "arm64" && !hasDiscreteGpu) return true;
578037
578048
  }
578038
578049
  return false;
578039
578050
  }
@@ -578127,11 +578138,15 @@ function detectSystemSpecs() {
578127
578138
  } catch {
578128
578139
  }
578129
578140
  }
578130
- const unifiedMemory = detectUnifiedMemory();
578141
+ const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
578131
578142
  if (unifiedMemory && totalRamGB > 0) {
578132
578143
  const floorGB = totalRamGB * 0.8;
578133
578144
  if (availableRamGB < floorGB) availableRamGB = floorGB;
578134
578145
  }
578146
+ if (!unifiedMemory && totalRamGB > 0) {
578147
+ const floorGB = totalRamGB * 0.75;
578148
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578149
+ }
578135
578150
  return {
578136
578151
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578137
578152
  availableRamGB: Math.round(availableRamGB * 10) / 10,
@@ -578191,11 +578206,15 @@ async function detectSystemSpecsAsync() {
578191
578206
  } catch {
578192
578207
  }
578193
578208
  }
578194
- const unifiedMemory = detectUnifiedMemory();
578209
+ const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
578195
578210
  if (unifiedMemory && totalRamGB > 0) {
578196
578211
  const floorGB = totalRamGB * 0.8;
578197
578212
  if (availableRamGB < floorGB) availableRamGB = floorGB;
578198
578213
  }
578214
+ if (!unifiedMemory && totalRamGB > 0) {
578215
+ const floorGB = totalRamGB * 0.75;
578216
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578217
+ }
578199
578218
  return {
578200
578219
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578201
578220
  availableRamGB: Math.round(availableRamGB * 10) / 10,
@@ -578259,17 +578278,41 @@ function formatContextLabel(numCtx) {
578259
578278
  return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
578260
578279
  }
578261
578280
  function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578262
- const memoryFit = calculateMemoryBoundedNumCtx(
578263
- specs,
578264
- modelSizeGB2,
578265
- kvBytesPerToken,
578266
- archMax
578267
- );
578268
- const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
578281
+ const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
578282
+ const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
578283
+ const totalAvail = Math.max(vramBudget, ramBudget);
578284
+ const remaining = Math.max(0, totalAvail - modelSizeGB2);
578285
+ const usableGB = remaining * 0.85;
578286
+ const memoryFit = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
578287
+ const archCtxValue = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : null;
578288
+ const archCtx = archCtxValue ?? Number.POSITIVE_INFINITY;
578269
578289
  const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
578270
578290
  const fits = Math.min(memoryFit, archCtx);
578271
578291
  const numCtx = Math.max(floor, fits);
578272
- return { numCtx, label: formatContextLabel(numCtx) };
578292
+ let limitedBy;
578293
+ if (numCtx === floor && fits < floor) limitedBy = "floor";
578294
+ else if (archCtxValue !== null && numCtx === archCtxValue) limitedBy = "arch";
578295
+ else limitedBy = "memory";
578296
+ const effectiveKvBpt = kvBytesPerToken && kvBytesPerToken > 0 ? kvBytesPerToken : (modelSizeGB2 <= 5 ? 64 : modelSizeGB2 <= 12 ? 160 : modelSizeGB2 <= 25 ? 256 : 384) * 1024;
578297
+ return {
578298
+ numCtx,
578299
+ label: formatContextLabel(numCtx),
578300
+ math: {
578301
+ numCtx,
578302
+ label: formatContextLabel(numCtx),
578303
+ modelSizeGB: modelSizeGB2,
578304
+ kvBytesPerToken: effectiveKvBpt,
578305
+ kvSource: kvBytesPerToken && kvBytesPerToken > 0 ? "model_info" : "fallback",
578306
+ archMax: archMax && archMax > 0 ? archMax : null,
578307
+ ramBudgetGB: ramBudget,
578308
+ vramBudgetGB: vramBudget,
578309
+ usableGB,
578310
+ memoryFit,
578311
+ archCtx: archCtxValue,
578312
+ floor,
578313
+ limitedBy
578314
+ }
578315
+ };
578273
578316
  }
578274
578317
  function ask(rl, question) {
578275
578318
  return new Promise((resolve52) => {
@@ -580064,7 +580107,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
580064
580107
  const arch3 = info["general.architecture"];
580065
580108
  if (!arch3) return null;
580066
580109
  const nLayersRaw = info[`${arch3}.block_count`];
580067
- const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
580110
+ const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`];
580068
580111
  const keyDimRaw = info[`${arch3}.attention.key_length`];
580069
580112
  const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
580070
580113
  const archMax = info[`${arch3}.context_length`];
@@ -580072,7 +580115,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
580072
580115
  const keyDim = keyDimRaw ?? 128;
580073
580116
  const valDim = valDimRaw ?? 128;
580074
580117
  const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
580075
- const nKVHeads = nKVHeadsRaw ?? 32;
580118
+ const nKVHeads = nKVHeadsRaw ?? 8;
580076
580119
  if (!nLayers) return { archMax };
580077
580120
  const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
580078
580121
  return { kvBytesPerToken, archMax };
@@ -580204,6 +580247,21 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
580204
580247
  archMax
580205
580248
  );
580206
580249
  }
580250
+ function formatExpandedContextDiagnostic(specs, math) {
580251
+ const fmtGB = (n2) => `${n2.toFixed(1)}GB`;
580252
+ const fmtKB = (n2) => `${Math.round(n2 / 1024)}KB`;
580253
+ const fmtK = (n2) => n2 >= 1024 ? `${Math.floor(n2 / 1024)}K` : String(n2);
580254
+ const memBits = [];
580255
+ if (specs.gpuVramGB > 0) {
580256
+ memBits.push(`VRAM ${fmtGB(specs.availableVramGB || specs.gpuVramGB)}/${fmtGB(specs.gpuVramGB)}`);
580257
+ }
580258
+ memBits.push(`RAM ${fmtGB(specs.availableRamGB)}/${fmtGB(specs.totalRamGB)}${specs.unifiedMemory ? " unified" : ""}`);
580259
+ const mem = memBits.join(", ");
580260
+ const kv = `KV ${fmtKB(math.kvBytesPerToken)}/tok (${math.kvSource})`;
580261
+ const fit2 = `fit ${fmtK(math.memoryFit)}, arch ${math.archCtx !== null ? fmtK(math.archCtx) : "n/a"}, floor ${fmtK(math.floor)}`;
580262
+ const limit = `→ ${fmtK(math.numCtx)} (${math.limitedBy === "floor" ? "min floor" : math.limitedBy === "arch" ? "arch-capped" : "memory-fit"})`;
580263
+ return `[${mem} | model ${fmtGB(math.modelSizeGB)} | ${kv} | ${fit2} ${limit}]`;
580264
+ }
580207
580265
  async function ensureExpandedContext(modelName, backendUrl2) {
580208
580266
  if (modelName.includes("cloud") || modelName.includes(":cloud")) {
580209
580267
  return { model: modelName, created: false, contextLabel: "remote", numCtx: 0 };
@@ -580230,11 +580288,11 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580230
580288
  kvInfo?.kvBytesPerToken,
580231
580289
  kvInfo?.archMax
580232
580290
  ).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: modelName }));
580233
- return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580291
+ return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580234
580292
  }
580235
580293
  const existing = await checkExpandedVariant(modelName, backendUrl2);
580236
580294
  if (existing === null) {
580237
- return { model: modelName, created: false, contextLabel: "", numCtx: 0 };
580295
+ return { model: modelName, created: false, contextLabel: "", numCtx: 0, specs, math: ctx3.math };
580238
580296
  }
580239
580297
  if (typeof existing === "string") {
580240
580298
  const lostTools = await wrapperLacksToolsCapability(backendUrl2, existing).catch(() => false);
@@ -580249,7 +580307,7 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580249
580307
  kvInfo?.archMax
580250
580308
  );
580251
580309
  if (rebuilt) {
580252
- return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580310
+ return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580253
580311
  }
580254
580312
  } catch {
580255
580313
  }
@@ -580264,13 +580322,13 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580264
580322
  kvInfo?.kvBytesPerToken,
580265
580323
  kvInfo?.archMax
580266
580324
  ).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: existing }));
580267
- return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580325
+ return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580268
580326
  }
580269
580327
  const created = await createExpandedVariantAsync(modelName, specs, sizeGB, kvInfo?.kvBytesPerToken, kvInfo?.archMax);
580270
580328
  if (created) {
580271
- return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580329
+ return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580272
580330
  }
580273
- return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580331
+ return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580274
580332
  }
580275
580333
  function guessBaseFromVariant(variantName, models) {
580276
580334
  const stripped = stripVariantTag(variantName);
@@ -602241,14 +602299,15 @@ async function switchModel(query, ctx3, local = false) {
602241
602299
  match.name,
602242
602300
  ctx3.config.backendUrl
602243
602301
  );
602302
+ const diag = result.specs && result.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(result.specs, result.math)) : "";
602244
602303
  if (result.created) {
602245
602304
  renderInfo(
602246
- `Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)`
602305
+ `Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)${diag}`
602247
602306
  );
602248
602307
  finalModel = result.model;
602249
602308
  } else if (result.model !== match.name) {
602250
602309
  renderInfo(
602251
- `Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})`
602310
+ `Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})${diag}`
602252
602311
  );
602253
602312
  finalModel = result.model;
602254
602313
  }
@@ -653796,13 +653855,14 @@ This is an independent background session started from /background.`
653796
653855
  currentConfig.model,
653797
653856
  currentConfig.backendUrl
653798
653857
  );
653858
+ const diag = expandResult.specs && expandResult.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(expandResult.specs, expandResult.math)) : "";
653799
653859
  if (expandResult.created) {
653800
653860
  config = { ...config, model: expandResult.model };
653801
653861
  currentConfig = { ...currentConfig, model: expandResult.model };
653802
653862
  statusBar.setModelName(expandResult.model);
653803
653863
  writeContent(
653804
653864
  () => renderInfo(
653805
- `Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)`
653865
+ `Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)${diag}`
653806
653866
  )
653807
653867
  );
653808
653868
  } else if (expandResult.model !== currentConfig.model) {
@@ -653811,7 +653871,7 @@ This is an independent background session started from /background.`
653811
653871
  statusBar.setModelName(expandResult.model);
653812
653872
  writeContent(
653813
653873
  () => renderInfo(
653814
- `Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})`
653874
+ `Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})${diag}`
653815
653875
  )
653816
653876
  );
653817
653877
  }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.108",
3
+ "version": "1.0.109",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.108",
9
+ "version": "1.0.109",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.108",
3
+ "version": "1.0.109",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",