omnius 1.0.107 → 1.0.108
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +61 -8
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -578020,6 +578020,23 @@ async function needsTextToolMode(modelName, backendUrl2) {
|
|
|
578020
578020
|
const hasTools = await checkToolSupport(modelName, backendUrl2);
|
|
578021
578021
|
return !hasTools;
|
|
578022
578022
|
}
|
|
578023
|
+
function detectUnifiedMemory() {
|
|
578024
|
+
if (process.platform === "darwin" && process.arch === "arm64") return true;
|
|
578025
|
+
if (process.platform === "linux") {
|
|
578026
|
+
try {
|
|
578027
|
+
if (existsSync91("/etc/nv_tegra_release")) return true;
|
|
578028
|
+
} catch {
|
|
578029
|
+
}
|
|
578030
|
+
try {
|
|
578031
|
+
if (existsSync91("/sys/devices/soc0/family")) {
|
|
578032
|
+
const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
|
|
578033
|
+
if (family.includes("tegra")) return true;
|
|
578034
|
+
}
|
|
578035
|
+
} catch {
|
|
578036
|
+
}
|
|
578037
|
+
}
|
|
578038
|
+
return false;
|
|
578039
|
+
}
|
|
578023
578040
|
function parseNvidiaSmi(stdout) {
|
|
578024
578041
|
let total = 0;
|
|
578025
578042
|
let free = 0;
|
|
@@ -578110,12 +578127,18 @@ function detectSystemSpecs() {
|
|
|
578110
578127
|
} catch {
|
|
578111
578128
|
}
|
|
578112
578129
|
}
|
|
578130
|
+
const unifiedMemory = detectUnifiedMemory();
|
|
578131
|
+
if (unifiedMemory && totalRamGB > 0) {
|
|
578132
|
+
const floorGB = totalRamGB * 0.8;
|
|
578133
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578134
|
+
}
|
|
578113
578135
|
return {
|
|
578114
578136
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578115
578137
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578116
578138
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578117
578139
|
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578118
|
-
gpuName
|
|
578140
|
+
gpuName,
|
|
578141
|
+
unifiedMemory
|
|
578119
578142
|
};
|
|
578120
578143
|
}
|
|
578121
578144
|
async function detectSystemSpecsAsync() {
|
|
@@ -578168,12 +578191,18 @@ async function detectSystemSpecsAsync() {
|
|
|
578168
578191
|
} catch {
|
|
578169
578192
|
}
|
|
578170
578193
|
}
|
|
578194
|
+
const unifiedMemory = detectUnifiedMemory();
|
|
578195
|
+
if (unifiedMemory && totalRamGB > 0) {
|
|
578196
|
+
const floorGB = totalRamGB * 0.8;
|
|
578197
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578198
|
+
}
|
|
578171
578199
|
return {
|
|
578172
578200
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578173
578201
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578174
578202
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578175
578203
|
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578176
|
-
gpuName
|
|
578204
|
+
gpuName,
|
|
578205
|
+
unifiedMemory
|
|
578177
578206
|
};
|
|
578178
578207
|
}
|
|
578179
578208
|
function recommendModel(specs) {
|
|
@@ -578198,7 +578227,16 @@ function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, arch
|
|
|
578198
578227
|
const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
|
|
578199
578228
|
numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
578200
578229
|
} else {
|
|
578201
|
-
const kvEstimate = modelSizeGB2 <= 5 ?
|
|
578230
|
+
const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
|
|
578231
|
+
// 4B-class
|
|
578232
|
+
modelSizeGB2 <= 12 ? 160 * 1024 : (
|
|
578233
|
+
// 9B-class
|
|
578234
|
+
modelSizeGB2 <= 25 ? 256 * 1024 : (
|
|
578235
|
+
// 30B-class
|
|
578236
|
+
384 * 1024
|
|
578237
|
+
)
|
|
578238
|
+
)
|
|
578239
|
+
);
|
|
578202
578240
|
const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
|
|
578203
578241
|
numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
578204
578242
|
}
|
|
@@ -579999,6 +580037,17 @@ function modelSizeGB(models, modelName) {
|
|
|
579999
580037
|
const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
|
|
580000
580038
|
return known?.sizeGB ?? 4;
|
|
580001
580039
|
}
|
|
580040
|
+
function defaultLayersForArch(arch3) {
|
|
580041
|
+
const a2 = arch3.toLowerCase();
|
|
580042
|
+
if (a2.includes("qwen")) return 36;
|
|
580043
|
+
if (a2.includes("llama")) return 32;
|
|
580044
|
+
if (a2.includes("mistral")) return 32;
|
|
580045
|
+
if (a2.includes("phi")) return 32;
|
|
580046
|
+
if (a2.includes("gemma")) return 42;
|
|
580047
|
+
if (a2.includes("granite")) return 40;
|
|
580048
|
+
if (a2.includes("command")) return 40;
|
|
580049
|
+
return 32;
|
|
580050
|
+
}
|
|
580002
580051
|
async function queryModelKVInfo(backendUrl2, modelName) {
|
|
580003
580052
|
try {
|
|
580004
580053
|
const normalized = backendUrl2.replace(/\/+$/, "");
|
|
@@ -580014,13 +580063,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
|
|
|
580014
580063
|
const info = data.model_info;
|
|
580015
580064
|
const arch3 = info["general.architecture"];
|
|
580016
580065
|
if (!arch3) return null;
|
|
580017
|
-
const
|
|
580018
|
-
const
|
|
580019
|
-
const
|
|
580020
|
-
const
|
|
580066
|
+
const nLayersRaw = info[`${arch3}.block_count`];
|
|
580067
|
+
const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
|
|
580068
|
+
const keyDimRaw = info[`${arch3}.attention.key_length`];
|
|
580069
|
+
const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
|
|
580021
580070
|
const archMax = info[`${arch3}.context_length`];
|
|
580022
580071
|
if (!archMax) return null;
|
|
580023
|
-
|
|
580072
|
+
const keyDim = keyDimRaw ?? 128;
|
|
580073
|
+
const valDim = valDimRaw ?? 128;
|
|
580074
|
+
const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
|
|
580075
|
+
const nKVHeads = nKVHeadsRaw ?? 32;
|
|
580076
|
+
if (!nLayers) return { archMax };
|
|
580024
580077
|
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
580025
580078
|
return { kvBytesPerToken, archMax };
|
|
580026
580079
|
} catch {
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.108",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.108",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED