omnius 1.0.106 → 1.0.108
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +93 -28
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -578020,6 +578020,23 @@ async function needsTextToolMode(modelName, backendUrl2) {
|
|
|
578020
578020
|
const hasTools = await checkToolSupport(modelName, backendUrl2);
|
|
578021
578021
|
return !hasTools;
|
|
578022
578022
|
}
|
|
578023
|
+
function detectUnifiedMemory() {
|
|
578024
|
+
if (process.platform === "darwin" && process.arch === "arm64") return true;
|
|
578025
|
+
if (process.platform === "linux") {
|
|
578026
|
+
try {
|
|
578027
|
+
if (existsSync91("/etc/nv_tegra_release")) return true;
|
|
578028
|
+
} catch {
|
|
578029
|
+
}
|
|
578030
|
+
try {
|
|
578031
|
+
if (existsSync91("/sys/devices/soc0/family")) {
|
|
578032
|
+
const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
|
|
578033
|
+
if (family.includes("tegra")) return true;
|
|
578034
|
+
}
|
|
578035
|
+
} catch {
|
|
578036
|
+
}
|
|
578037
|
+
}
|
|
578038
|
+
return false;
|
|
578039
|
+
}
|
|
578023
578040
|
function parseNvidiaSmi(stdout) {
|
|
578024
578041
|
let total = 0;
|
|
578025
578042
|
let free = 0;
|
|
@@ -578035,6 +578052,14 @@ function parseNvidiaSmi(stdout) {
|
|
|
578035
578052
|
}
|
|
578036
578053
|
return { total, free, name: name10 };
|
|
578037
578054
|
}
|
|
578055
|
+
function parseFreeBytes(stdout) {
|
|
578056
|
+
const memLine = stdout.split("\n").find((l2) => /^Mem:/i.test(l2)) ?? "";
|
|
578057
|
+
const nums = memLine.match(/\d+/g);
|
|
578058
|
+
if (!nums || nums.length === 0) return { total: 0, available: 0 };
|
|
578059
|
+
const total = parseInt(nums[0], 10) || 0;
|
|
578060
|
+
const available = nums.length >= 6 ? parseInt(nums[5], 10) || 0 : 0;
|
|
578061
|
+
return { total, available };
|
|
578062
|
+
}
|
|
578038
578063
|
function parseRocmSmi(stdout) {
|
|
578039
578064
|
let total = 0;
|
|
578040
578065
|
let used = 0;
|
|
@@ -578064,14 +578089,14 @@ function detectSystemSpecs() {
|
|
|
578064
578089
|
timeout: 5e3
|
|
578065
578090
|
});
|
|
578066
578091
|
if (memInfo.includes("Mem:")) {
|
|
578067
|
-
const
|
|
578068
|
-
if (
|
|
578069
|
-
totalRamGB =
|
|
578070
|
-
availableRamGB =
|
|
578092
|
+
const { total, available } = parseFreeBytes(memInfo);
|
|
578093
|
+
if (total > 0) {
|
|
578094
|
+
totalRamGB = total / 1024 ** 3;
|
|
578095
|
+
availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
|
|
578071
578096
|
}
|
|
578072
578097
|
} else {
|
|
578073
578098
|
const bytes = parseInt(memInfo.trim(), 10);
|
|
578074
|
-
if (!isNaN(bytes)) {
|
|
578099
|
+
if (!isNaN(bytes) && bytes > 0) {
|
|
578075
578100
|
totalRamGB = bytes / 1024 ** 3;
|
|
578076
578101
|
availableRamGB = totalRamGB * 0.7;
|
|
578077
578102
|
}
|
|
@@ -578102,12 +578127,18 @@ function detectSystemSpecs() {
|
|
|
578102
578127
|
} catch {
|
|
578103
578128
|
}
|
|
578104
578129
|
}
|
|
578130
|
+
const unifiedMemory = detectUnifiedMemory();
|
|
578131
|
+
if (unifiedMemory && totalRamGB > 0) {
|
|
578132
|
+
const floorGB = totalRamGB * 0.8;
|
|
578133
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578134
|
+
}
|
|
578105
578135
|
return {
|
|
578106
578136
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578107
578137
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578108
578138
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578109
578139
|
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578110
|
-
gpuName
|
|
578140
|
+
gpuName,
|
|
578141
|
+
unifiedMemory
|
|
578111
578142
|
};
|
|
578112
578143
|
}
|
|
578113
578144
|
async function detectSystemSpecsAsync() {
|
|
@@ -578122,14 +578153,14 @@ async function detectSystemSpecsAsync() {
|
|
|
578122
578153
|
{ timeout: 5e3 }
|
|
578123
578154
|
);
|
|
578124
578155
|
if (memInfo.includes("Mem:")) {
|
|
578125
|
-
const
|
|
578126
|
-
if (
|
|
578127
|
-
totalRamGB =
|
|
578128
|
-
availableRamGB =
|
|
578156
|
+
const { total, available } = parseFreeBytes(memInfo);
|
|
578157
|
+
if (total > 0) {
|
|
578158
|
+
totalRamGB = total / 1024 ** 3;
|
|
578159
|
+
availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
|
|
578129
578160
|
}
|
|
578130
578161
|
} else {
|
|
578131
578162
|
const bytes = parseInt(memInfo.trim(), 10);
|
|
578132
|
-
if (!isNaN(bytes)) {
|
|
578163
|
+
if (!isNaN(bytes) && bytes > 0) {
|
|
578133
578164
|
totalRamGB = bytes / 1024 ** 3;
|
|
578134
578165
|
availableRamGB = totalRamGB * 0.7;
|
|
578135
578166
|
}
|
|
@@ -578160,12 +578191,18 @@ async function detectSystemSpecsAsync() {
|
|
|
578160
578191
|
} catch {
|
|
578161
578192
|
}
|
|
578162
578193
|
}
|
|
578194
|
+
const unifiedMemory = detectUnifiedMemory();
|
|
578195
|
+
if (unifiedMemory && totalRamGB > 0) {
|
|
578196
|
+
const floorGB = totalRamGB * 0.8;
|
|
578197
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578198
|
+
}
|
|
578163
578199
|
return {
|
|
578164
578200
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578165
578201
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578166
578202
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578167
578203
|
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578168
|
-
gpuName
|
|
578204
|
+
gpuName,
|
|
578205
|
+
unifiedMemory
|
|
578169
578206
|
};
|
|
578170
578207
|
}
|
|
578171
578208
|
function recommendModel(specs) {
|
|
@@ -578179,7 +578216,7 @@ function recommendModel(specs) {
|
|
|
578179
578216
|
}
|
|
578180
578217
|
return QWEN_VARIANTS.find((v) => v.tag === "qwen3.5:cloud");
|
|
578181
578218
|
}
|
|
578182
|
-
function
|
|
578219
|
+
function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578183
578220
|
const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
|
|
578184
578221
|
const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
|
|
578185
578222
|
const totalAvail = Math.max(vramBudget, ramBudget);
|
|
@@ -578190,11 +578227,19 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
|
578190
578227
|
const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
|
|
578191
578228
|
numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
578192
578229
|
} else {
|
|
578193
|
-
const kvEstimate = modelSizeGB2 <= 5 ?
|
|
578230
|
+
const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
|
|
578231
|
+
// 4B-class
|
|
578232
|
+
modelSizeGB2 <= 12 ? 160 * 1024 : (
|
|
578233
|
+
// 9B-class
|
|
578234
|
+
modelSizeGB2 <= 25 ? 256 * 1024 : (
|
|
578235
|
+
// 30B-class
|
|
578236
|
+
384 * 1024
|
|
578237
|
+
)
|
|
578238
|
+
)
|
|
578239
|
+
);
|
|
578194
578240
|
const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
|
|
578195
578241
|
numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
578196
578242
|
}
|
|
578197
|
-
numCtx = Math.min(numCtx, 131072);
|
|
578198
578243
|
if (archMax && archMax > 0) numCtx = Math.min(numCtx, archMax);
|
|
578199
578244
|
if (kvBytesPerToken && kvBytesPerToken > 0 && modelSizeGB2 > 0) {
|
|
578200
578245
|
const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
|
|
@@ -578202,6 +578247,11 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
|
578202
578247
|
const budgetCap = Math.max(2048, Math.floor(maxTokensFromBudget / 1024) * 1024);
|
|
578203
578248
|
numCtx = Math.min(numCtx, budgetCap);
|
|
578204
578249
|
}
|
|
578250
|
+
return numCtx;
|
|
578251
|
+
}
|
|
578252
|
+
function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578253
|
+
let numCtx = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
|
|
578254
|
+
numCtx = Math.min(numCtx, 131072);
|
|
578205
578255
|
const label = numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
|
|
578206
578256
|
return { numCtx, label };
|
|
578207
578257
|
}
|
|
@@ -578209,18 +578259,17 @@ function formatContextLabel(numCtx) {
|
|
|
578209
578259
|
return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
|
|
578210
578260
|
}
|
|
578211
578261
|
function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578212
|
-
const
|
|
578262
|
+
const memoryFit = calculateMemoryBoundedNumCtx(
|
|
578213
578263
|
specs,
|
|
578214
578264
|
modelSizeGB2,
|
|
578215
578265
|
kvBytesPerToken,
|
|
578216
578266
|
archMax
|
|
578217
578267
|
);
|
|
578218
|
-
|
|
578219
|
-
|
|
578220
|
-
|
|
578221
|
-
|
|
578222
|
-
}
|
|
578223
|
-
return memoryBudget;
|
|
578268
|
+
const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
|
|
578269
|
+
const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
|
|
578270
|
+
const fits = Math.min(memoryFit, archCtx);
|
|
578271
|
+
const numCtx = Math.max(floor, fits);
|
|
578272
|
+
return { numCtx, label: formatContextLabel(numCtx) };
|
|
578224
578273
|
}
|
|
578225
578274
|
function ask(rl, question) {
|
|
578226
578275
|
return new Promise((resolve52) => {
|
|
@@ -579988,6 +580037,17 @@ function modelSizeGB(models, modelName) {
|
|
|
579988
580037
|
const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
|
|
579989
580038
|
return known?.sizeGB ?? 4;
|
|
579990
580039
|
}
|
|
580040
|
+
function defaultLayersForArch(arch3) {
|
|
580041
|
+
const a2 = arch3.toLowerCase();
|
|
580042
|
+
if (a2.includes("qwen")) return 36;
|
|
580043
|
+
if (a2.includes("llama")) return 32;
|
|
580044
|
+
if (a2.includes("mistral")) return 32;
|
|
580045
|
+
if (a2.includes("phi")) return 32;
|
|
580046
|
+
if (a2.includes("gemma")) return 42;
|
|
580047
|
+
if (a2.includes("granite")) return 40;
|
|
580048
|
+
if (a2.includes("command")) return 40;
|
|
580049
|
+
return 32;
|
|
580050
|
+
}
|
|
579991
580051
|
async function queryModelKVInfo(backendUrl2, modelName) {
|
|
579992
580052
|
try {
|
|
579993
580053
|
const normalized = backendUrl2.replace(/\/+$/, "");
|
|
@@ -580003,13 +580063,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
|
|
|
580003
580063
|
const info = data.model_info;
|
|
580004
580064
|
const arch3 = info["general.architecture"];
|
|
580005
580065
|
if (!arch3) return null;
|
|
580006
|
-
const
|
|
580007
|
-
const
|
|
580008
|
-
const
|
|
580009
|
-
const
|
|
580066
|
+
const nLayersRaw = info[`${arch3}.block_count`];
|
|
580067
|
+
const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
|
|
580068
|
+
const keyDimRaw = info[`${arch3}.attention.key_length`];
|
|
580069
|
+
const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
|
|
580010
580070
|
const archMax = info[`${arch3}.context_length`];
|
|
580011
580071
|
if (!archMax) return null;
|
|
580012
|
-
|
|
580072
|
+
const keyDim = keyDimRaw ?? 128;
|
|
580073
|
+
const valDim = valDimRaw ?? 128;
|
|
580074
|
+
const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
|
|
580075
|
+
const nKVHeads = nKVHeadsRaw ?? 32;
|
|
580076
|
+
if (!nLayers) return { archMax };
|
|
580013
580077
|
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
580014
580078
|
return { kvBytesPerToken, archMax };
|
|
580015
580079
|
} catch {
|
|
@@ -580412,7 +580476,7 @@ export PATH="${binDir}:$PATH" # Added by omnius for nvim
|
|
|
580412
580476
|
} catch {
|
|
580413
580477
|
}
|
|
580414
580478
|
}
|
|
580415
|
-
var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
|
|
580479
|
+
var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, EXPANDED_VARIANT_MIN_NUM_CTX, _cloudflaredInstallPromise;
|
|
580416
580480
|
var init_setup = __esm({
|
|
580417
580481
|
"packages/cli/src/tui/setup.ts"() {
|
|
580418
580482
|
"use strict";
|
|
@@ -580445,6 +580509,7 @@ var init_setup = __esm({
|
|
|
580445
580509
|
{ tag: "qwen3.5:397b-cloud", sizeGB: 0, label: "397B Cloud (Ollama Cloud)", cloud: true }
|
|
580446
580510
|
];
|
|
580447
580511
|
_toolSupportCache = /* @__PURE__ */ new Map();
|
|
580512
|
+
EXPANDED_VARIANT_MIN_NUM_CTX = 32768;
|
|
580448
580513
|
_cloudflaredInstallPromise = null;
|
|
580449
580514
|
}
|
|
580450
580515
|
});
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.108",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.108",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED