omnius 1.0.105 → 1.0.107
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +106 -41
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -578020,10 +578020,51 @@ async function needsTextToolMode(modelName, backendUrl2) {
|
|
|
578020
578020
|
const hasTools = await checkToolSupport(modelName, backendUrl2);
|
|
578021
578021
|
return !hasTools;
|
|
578022
578022
|
}
|
|
578023
|
+
function parseNvidiaSmi(stdout) {
|
|
578024
|
+
let total = 0;
|
|
578025
|
+
let free = 0;
|
|
578026
|
+
let name10 = "";
|
|
578027
|
+
const lines = stdout.trim().split("\n").filter(Boolean);
|
|
578028
|
+
for (const line of lines) {
|
|
578029
|
+
const parts = line.split(",").map((s2) => s2.trim());
|
|
578030
|
+
const totMB = parseInt(parts[0] ?? "0", 10);
|
|
578031
|
+
const freeMB = parseInt(parts[1] ?? "0", 10);
|
|
578032
|
+
if (!isNaN(totMB)) total += totMB / 1024;
|
|
578033
|
+
if (!isNaN(freeMB)) free += freeMB / 1024;
|
|
578034
|
+
if (!name10 && parts[2]) name10 = parts[2];
|
|
578035
|
+
}
|
|
578036
|
+
return { total, free, name: name10 };
|
|
578037
|
+
}
|
|
578038
|
+
function parseFreeBytes(stdout) {
|
|
578039
|
+
const memLine = stdout.split("\n").find((l2) => /^Mem:/i.test(l2)) ?? "";
|
|
578040
|
+
const nums = memLine.match(/\d+/g);
|
|
578041
|
+
if (!nums || nums.length === 0) return { total: 0, available: 0 };
|
|
578042
|
+
const total = parseInt(nums[0], 10) || 0;
|
|
578043
|
+
const available = nums.length >= 6 ? parseInt(nums[5], 10) || 0 : 0;
|
|
578044
|
+
return { total, available };
|
|
578045
|
+
}
|
|
578046
|
+
function parseRocmSmi(stdout) {
|
|
578047
|
+
let total = 0;
|
|
578048
|
+
let used = 0;
|
|
578049
|
+
let name10 = "";
|
|
578050
|
+
const lines = stdout.trim().split("\n").filter(Boolean);
|
|
578051
|
+
for (const line of lines) {
|
|
578052
|
+
if (line.toLowerCase().startsWith("device") || line.startsWith("=")) continue;
|
|
578053
|
+
const parts = line.split(",").map((s2) => s2.trim());
|
|
578054
|
+
if (parts.length < 3) continue;
|
|
578055
|
+
const usedBytes = parseInt(parts[1] ?? "0", 10);
|
|
578056
|
+
const totBytes = parseInt(parts[2] ?? "0", 10);
|
|
578057
|
+
if (!isNaN(usedBytes)) used += usedBytes / 1024 ** 3;
|
|
578058
|
+
if (!isNaN(totBytes)) total += totBytes / 1024 ** 3;
|
|
578059
|
+
if (!name10 && parts[0]) name10 = parts[0];
|
|
578060
|
+
}
|
|
578061
|
+
return { total, free: Math.max(0, total - used), name: name10 ? `AMD ${name10}` : "AMD GPU" };
|
|
578062
|
+
}
|
|
578023
578063
|
function detectSystemSpecs() {
|
|
578024
578064
|
let totalRamGB = 0;
|
|
578025
578065
|
let availableRamGB = 0;
|
|
578026
578066
|
let gpuVramGB = 0;
|
|
578067
|
+
let availableVramGB = 0;
|
|
578027
578068
|
let gpuName = "";
|
|
578028
578069
|
try {
|
|
578029
578070
|
const memInfo = execSync50("free -b 2>/dev/null || sysctl -n hw.memsize 2>/dev/null", {
|
|
@@ -578031,14 +578072,14 @@ function detectSystemSpecs() {
|
|
|
578031
578072
|
timeout: 5e3
|
|
578032
578073
|
});
|
|
578033
578074
|
if (memInfo.includes("Mem:")) {
|
|
578034
|
-
const
|
|
578035
|
-
if (
|
|
578036
|
-
totalRamGB =
|
|
578037
|
-
availableRamGB =
|
|
578075
|
+
const { total, available } = parseFreeBytes(memInfo);
|
|
578076
|
+
if (total > 0) {
|
|
578077
|
+
totalRamGB = total / 1024 ** 3;
|
|
578078
|
+
availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
|
|
578038
578079
|
}
|
|
578039
578080
|
} else {
|
|
578040
578081
|
const bytes = parseInt(memInfo.trim(), 10);
|
|
578041
|
-
if (!isNaN(bytes)) {
|
|
578082
|
+
if (!isNaN(bytes) && bytes > 0) {
|
|
578042
578083
|
totalRamGB = bytes / 1024 ** 3;
|
|
578043
578084
|
availableRamGB = totalRamGB * 0.7;
|
|
578044
578085
|
}
|
|
@@ -578047,24 +578088,33 @@ function detectSystemSpecs() {
|
|
|
578047
578088
|
}
|
|
578048
578089
|
try {
|
|
578049
578090
|
const nvidiaSmi = execSync50(
|
|
578050
|
-
"nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>/dev/null",
|
|
578091
|
+
"nvidia-smi --query-gpu=memory.total,memory.free,name --format=csv,noheader,nounits 2>/dev/null",
|
|
578051
578092
|
{ encoding: "utf8", timeout: 5e3 }
|
|
578052
578093
|
);
|
|
578053
|
-
const
|
|
578054
|
-
|
|
578055
|
-
|
|
578056
|
-
|
|
578057
|
-
const vramMB = parseInt(parts[0] ?? "0", 10);
|
|
578058
|
-
if (!isNaN(vramMB)) gpuVramGB += vramMB / 1024;
|
|
578059
|
-
if (!gpuName && parts[1]) gpuName = parts[1];
|
|
578060
|
-
}
|
|
578061
|
-
}
|
|
578094
|
+
const r2 = parseNvidiaSmi(nvidiaSmi);
|
|
578095
|
+
gpuVramGB += r2.total;
|
|
578096
|
+
availableVramGB += r2.free;
|
|
578097
|
+
if (!gpuName && r2.name) gpuName = r2.name;
|
|
578062
578098
|
} catch {
|
|
578063
578099
|
}
|
|
578100
|
+
if (gpuVramGB === 0) {
|
|
578101
|
+
try {
|
|
578102
|
+
const rocmSmi = execSync50(
|
|
578103
|
+
"rocm-smi --showmeminfo vram --csv 2>/dev/null",
|
|
578104
|
+
{ encoding: "utf8", timeout: 5e3 }
|
|
578105
|
+
);
|
|
578106
|
+
const r2 = parseRocmSmi(rocmSmi);
|
|
578107
|
+
gpuVramGB += r2.total;
|
|
578108
|
+
availableVramGB += r2.free;
|
|
578109
|
+
if (!gpuName && r2.name) gpuName = r2.name;
|
|
578110
|
+
} catch {
|
|
578111
|
+
}
|
|
578112
|
+
}
|
|
578064
578113
|
return {
|
|
578065
578114
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578066
578115
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578067
578116
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578117
|
+
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578068
578118
|
gpuName
|
|
578069
578119
|
};
|
|
578070
578120
|
}
|
|
@@ -578072,6 +578122,7 @@ async function detectSystemSpecsAsync() {
|
|
|
578072
578122
|
let totalRamGB = 0;
|
|
578073
578123
|
let availableRamGB = 0;
|
|
578074
578124
|
let gpuVramGB = 0;
|
|
578125
|
+
let availableVramGB = 0;
|
|
578075
578126
|
let gpuName = "";
|
|
578076
578127
|
try {
|
|
578077
578128
|
const { stdout: memInfo } = await execAsync2(
|
|
@@ -578079,14 +578130,14 @@ async function detectSystemSpecsAsync() {
|
|
|
578079
578130
|
{ timeout: 5e3 }
|
|
578080
578131
|
);
|
|
578081
578132
|
if (memInfo.includes("Mem:")) {
|
|
578082
|
-
const
|
|
578083
|
-
if (
|
|
578084
|
-
totalRamGB =
|
|
578085
|
-
availableRamGB =
|
|
578133
|
+
const { total, available } = parseFreeBytes(memInfo);
|
|
578134
|
+
if (total > 0) {
|
|
578135
|
+
totalRamGB = total / 1024 ** 3;
|
|
578136
|
+
availableRamGB = available > 0 ? available / 1024 ** 3 : totalRamGB * 0.75;
|
|
578086
578137
|
}
|
|
578087
578138
|
} else {
|
|
578088
578139
|
const bytes = parseInt(memInfo.trim(), 10);
|
|
578089
|
-
if (!isNaN(bytes)) {
|
|
578140
|
+
if (!isNaN(bytes) && bytes > 0) {
|
|
578090
578141
|
totalRamGB = bytes / 1024 ** 3;
|
|
578091
578142
|
availableRamGB = totalRamGB * 0.7;
|
|
578092
578143
|
}
|
|
@@ -578095,24 +578146,33 @@ async function detectSystemSpecsAsync() {
|
|
|
578095
578146
|
}
|
|
578096
578147
|
try {
|
|
578097
578148
|
const { stdout: nvidiaSmi } = await execAsync2(
|
|
578098
|
-
"nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>/dev/null",
|
|
578149
|
+
"nvidia-smi --query-gpu=memory.total,memory.free,name --format=csv,noheader,nounits 2>/dev/null",
|
|
578099
578150
|
{ timeout: 5e3 }
|
|
578100
578151
|
);
|
|
578101
|
-
const
|
|
578102
|
-
|
|
578103
|
-
|
|
578104
|
-
|
|
578105
|
-
const vramMB = parseInt(parts[0] ?? "0", 10);
|
|
578106
|
-
if (!isNaN(vramMB)) gpuVramGB += vramMB / 1024;
|
|
578107
|
-
if (!gpuName && parts[1]) gpuName = parts[1];
|
|
578108
|
-
}
|
|
578109
|
-
}
|
|
578152
|
+
const r2 = parseNvidiaSmi(nvidiaSmi);
|
|
578153
|
+
gpuVramGB += r2.total;
|
|
578154
|
+
availableVramGB += r2.free;
|
|
578155
|
+
if (!gpuName && r2.name) gpuName = r2.name;
|
|
578110
578156
|
} catch {
|
|
578111
578157
|
}
|
|
578158
|
+
if (gpuVramGB === 0) {
|
|
578159
|
+
try {
|
|
578160
|
+
const { stdout: rocmSmi } = await execAsync2(
|
|
578161
|
+
"rocm-smi --showmeminfo vram --csv 2>/dev/null",
|
|
578162
|
+
{ timeout: 5e3 }
|
|
578163
|
+
);
|
|
578164
|
+
const r2 = parseRocmSmi(rocmSmi);
|
|
578165
|
+
gpuVramGB += r2.total;
|
|
578166
|
+
availableVramGB += r2.free;
|
|
578167
|
+
if (!gpuName && r2.name) gpuName = r2.name;
|
|
578168
|
+
} catch {
|
|
578169
|
+
}
|
|
578170
|
+
}
|
|
578112
578171
|
return {
|
|
578113
578172
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578114
578173
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578115
578174
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578175
|
+
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578116
578176
|
gpuName
|
|
578117
578177
|
};
|
|
578118
578178
|
}
|
|
@@ -578127,9 +578187,10 @@ function recommendModel(specs) {
|
|
|
578127
578187
|
}
|
|
578128
578188
|
return QWEN_VARIANTS.find((v) => v.tag === "qwen3.5:cloud");
|
|
578129
578189
|
}
|
|
578130
|
-
function
|
|
578190
|
+
function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578131
578191
|
const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
|
|
578132
|
-
const
|
|
578192
|
+
const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
|
|
578193
|
+
const totalAvail = Math.max(vramBudget, ramBudget);
|
|
578133
578194
|
const remaining = Math.max(0, totalAvail - modelSizeGB2);
|
|
578134
578195
|
const usableGB = remaining * 0.85;
|
|
578135
578196
|
let numCtx;
|
|
@@ -578141,7 +578202,6 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
|
578141
578202
|
const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
|
|
578142
578203
|
numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
578143
578204
|
}
|
|
578144
|
-
numCtx = Math.min(numCtx, 131072);
|
|
578145
578205
|
if (archMax && archMax > 0) numCtx = Math.min(numCtx, archMax);
|
|
578146
578206
|
if (kvBytesPerToken && kvBytesPerToken > 0 && modelSizeGB2 > 0) {
|
|
578147
578207
|
const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
|
|
@@ -578149,6 +578209,11 @@ function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
|
578149
578209
|
const budgetCap = Math.max(2048, Math.floor(maxTokensFromBudget / 1024) * 1024);
|
|
578150
578210
|
numCtx = Math.min(numCtx, budgetCap);
|
|
578151
578211
|
}
|
|
578212
|
+
return numCtx;
|
|
578213
|
+
}
|
|
578214
|
+
function calculateContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578215
|
+
let numCtx = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
|
|
578216
|
+
numCtx = Math.min(numCtx, 131072);
|
|
578152
578217
|
const label = numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
|
|
578153
578218
|
return { numCtx, label };
|
|
578154
578219
|
}
|
|
@@ -578156,18 +578221,17 @@ function formatContextLabel(numCtx) {
|
|
|
578156
578221
|
return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
|
|
578157
578222
|
}
|
|
578158
578223
|
function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578159
|
-
const
|
|
578224
|
+
const memoryFit = calculateMemoryBoundedNumCtx(
|
|
578160
578225
|
specs,
|
|
578161
578226
|
modelSizeGB2,
|
|
578162
578227
|
kvBytesPerToken,
|
|
578163
578228
|
archMax
|
|
578164
578229
|
);
|
|
578165
|
-
|
|
578166
|
-
|
|
578167
|
-
|
|
578168
|
-
|
|
578169
|
-
}
|
|
578170
|
-
return memoryBudget;
|
|
578230
|
+
const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
|
|
578231
|
+
const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
|
|
578232
|
+
const fits = Math.min(memoryFit, archCtx);
|
|
578233
|
+
const numCtx = Math.max(floor, fits);
|
|
578234
|
+
return { numCtx, label: formatContextLabel(numCtx) };
|
|
578171
578235
|
}
|
|
578172
578236
|
function ask(rl, question) {
|
|
578173
578237
|
return new Promise((resolve52) => {
|
|
@@ -580359,7 +580423,7 @@ export PATH="${binDir}:$PATH" # Added by omnius for nvim
|
|
|
580359
580423
|
} catch {
|
|
580360
580424
|
}
|
|
580361
580425
|
}
|
|
580362
|
-
var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
|
|
580426
|
+
var execAsync2, OMNIUS_FIRST_RUN_BANNER, ANSI_RE, visibleLen2, QWEN_VARIANTS, _toolSupportCache, EXPANDED_VARIANT_MIN_NUM_CTX, _cloudflaredInstallPromise;
|
|
580363
580427
|
var init_setup = __esm({
|
|
580364
580428
|
"packages/cli/src/tui/setup.ts"() {
|
|
580365
580429
|
"use strict";
|
|
@@ -580392,6 +580456,7 @@ var init_setup = __esm({
|
|
|
580392
580456
|
{ tag: "qwen3.5:397b-cloud", sizeGB: 0, label: "397B Cloud (Ollama Cloud)", cloud: true }
|
|
580393
580457
|
];
|
|
580394
580458
|
_toolSupportCache = /* @__PURE__ */ new Map();
|
|
580459
|
+
EXPANDED_VARIANT_MIN_NUM_CTX = 32768;
|
|
580395
580460
|
_cloudflaredInstallPromise = null;
|
|
580396
580461
|
}
|
|
580397
580462
|
});
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.107",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.107",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED