omnius 1.0.107 → 1.0.109
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +139 -26
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -577955,6 +577955,7 @@ __export(setup_exports, {
|
|
|
577955
577955
|
ensurePythonVenv: () => ensurePythonVenv,
|
|
577956
577956
|
ensureVisionDeps: () => ensureVisionDeps,
|
|
577957
577957
|
expandedModelName: () => expandedModelName,
|
|
577958
|
+
formatExpandedContextDiagnostic: () => formatExpandedContextDiagnostic,
|
|
577958
577959
|
getLatestOllamaVersion: () => getLatestOllamaVersion,
|
|
577959
577960
|
getOllamaVersion: () => getOllamaVersion,
|
|
577960
577961
|
hasCmd: () => hasCmd,
|
|
@@ -578020,6 +578021,33 @@ async function needsTextToolMode(modelName, backendUrl2) {
|
|
|
578020
578021
|
const hasTools = await checkToolSupport(modelName, backendUrl2);
|
|
578021
578022
|
return !hasTools;
|
|
578022
578023
|
}
|
|
578024
|
+
function detectUnifiedMemory(hasDiscreteGpu = false) {
|
|
578025
|
+
if (process.platform === "darwin" && process.arch === "arm64") return true;
|
|
578026
|
+
if (process.platform === "linux") {
|
|
578027
|
+
try {
|
|
578028
|
+
if (existsSync91("/etc/nv_tegra_release")) return true;
|
|
578029
|
+
} catch {
|
|
578030
|
+
}
|
|
578031
|
+
try {
|
|
578032
|
+
if (existsSync91("/sys/devices/soc0/family")) {
|
|
578033
|
+
const family = readFileSync74("/sys/devices/soc0/family", "utf8").trim().toLowerCase();
|
|
578034
|
+
if (family.includes("tegra")) return true;
|
|
578035
|
+
}
|
|
578036
|
+
} catch {
|
|
578037
|
+
}
|
|
578038
|
+
try {
|
|
578039
|
+
if (existsSync91("/proc/device-tree/model")) {
|
|
578040
|
+
const model = readFileSync74("/proc/device-tree/model", "utf8").replace(/\0+$/, "").toLowerCase();
|
|
578041
|
+
if (/jetson|tegra|orin|xavier|nano|raspberry|rockchip|rk\d{4}|mt\d{4}/.test(model)) {
|
|
578042
|
+
return true;
|
|
578043
|
+
}
|
|
578044
|
+
}
|
|
578045
|
+
} catch {
|
|
578046
|
+
}
|
|
578047
|
+
if (process.arch === "arm64" && !hasDiscreteGpu) return true;
|
|
578048
|
+
}
|
|
578049
|
+
return false;
|
|
578050
|
+
}
|
|
578023
578051
|
function parseNvidiaSmi(stdout) {
|
|
578024
578052
|
let total = 0;
|
|
578025
578053
|
let free = 0;
|
|
@@ -578110,12 +578138,22 @@ function detectSystemSpecs() {
|
|
|
578110
578138
|
} catch {
|
|
578111
578139
|
}
|
|
578112
578140
|
}
|
|
578141
|
+
const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
|
|
578142
|
+
if (unifiedMemory && totalRamGB > 0) {
|
|
578143
|
+
const floorGB = totalRamGB * 0.8;
|
|
578144
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578145
|
+
}
|
|
578146
|
+
if (!unifiedMemory && totalRamGB > 0) {
|
|
578147
|
+
const floorGB = totalRamGB * 0.75;
|
|
578148
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578149
|
+
}
|
|
578113
578150
|
return {
|
|
578114
578151
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578115
578152
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578116
578153
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578117
578154
|
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578118
|
-
gpuName
|
|
578155
|
+
gpuName,
|
|
578156
|
+
unifiedMemory
|
|
578119
578157
|
};
|
|
578120
578158
|
}
|
|
578121
578159
|
async function detectSystemSpecsAsync() {
|
|
@@ -578168,12 +578206,22 @@ async function detectSystemSpecsAsync() {
|
|
|
578168
578206
|
} catch {
|
|
578169
578207
|
}
|
|
578170
578208
|
}
|
|
578209
|
+
const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
|
|
578210
|
+
if (unifiedMemory && totalRamGB > 0) {
|
|
578211
|
+
const floorGB = totalRamGB * 0.8;
|
|
578212
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578213
|
+
}
|
|
578214
|
+
if (!unifiedMemory && totalRamGB > 0) {
|
|
578215
|
+
const floorGB = totalRamGB * 0.75;
|
|
578216
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578217
|
+
}
|
|
578171
578218
|
return {
|
|
578172
578219
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578173
578220
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
578174
578221
|
gpuVramGB: Math.round(gpuVramGB * 10) / 10,
|
|
578175
578222
|
availableVramGB: Math.round(availableVramGB * 10) / 10,
|
|
578176
|
-
gpuName
|
|
578223
|
+
gpuName,
|
|
578224
|
+
unifiedMemory
|
|
578177
578225
|
};
|
|
578178
578226
|
}
|
|
578179
578227
|
function recommendModel(specs) {
|
|
@@ -578198,7 +578246,16 @@ function calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, arch
|
|
|
578198
578246
|
const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvBytesPerToken);
|
|
578199
578247
|
numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
578200
578248
|
} else {
|
|
578201
|
-
const kvEstimate = modelSizeGB2 <= 5 ?
|
|
578249
|
+
const kvEstimate = modelSizeGB2 <= 5 ? 64 * 1024 : (
|
|
578250
|
+
// 4B-class
|
|
578251
|
+
modelSizeGB2 <= 12 ? 160 * 1024 : (
|
|
578252
|
+
// 9B-class
|
|
578253
|
+
modelSizeGB2 <= 25 ? 256 * 1024 : (
|
|
578254
|
+
// 30B-class
|
|
578255
|
+
384 * 1024
|
|
578256
|
+
)
|
|
578257
|
+
)
|
|
578258
|
+
);
|
|
578202
578259
|
const maxTokens = Math.floor(usableGB * 1024 ** 3 / kvEstimate);
|
|
578203
578260
|
numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
578204
578261
|
}
|
|
@@ -578221,17 +578278,41 @@ function formatContextLabel(numCtx) {
|
|
|
578221
578278
|
return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
|
|
578222
578279
|
}
|
|
578223
578280
|
function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578224
|
-
const
|
|
578225
|
-
|
|
578226
|
-
|
|
578227
|
-
|
|
578228
|
-
|
|
578229
|
-
);
|
|
578230
|
-
const
|
|
578281
|
+
const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
|
|
578282
|
+
const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
|
|
578283
|
+
const totalAvail = Math.max(vramBudget, ramBudget);
|
|
578284
|
+
const remaining = Math.max(0, totalAvail - modelSizeGB2);
|
|
578285
|
+
const usableGB = remaining * 0.85;
|
|
578286
|
+
const memoryFit = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
|
|
578287
|
+
const archCtxValue = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : null;
|
|
578288
|
+
const archCtx = archCtxValue ?? Number.POSITIVE_INFINITY;
|
|
578231
578289
|
const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
|
|
578232
578290
|
const fits = Math.min(memoryFit, archCtx);
|
|
578233
578291
|
const numCtx = Math.max(floor, fits);
|
|
578234
|
-
|
|
578292
|
+
let limitedBy;
|
|
578293
|
+
if (numCtx === floor && fits < floor) limitedBy = "floor";
|
|
578294
|
+
else if (archCtxValue !== null && numCtx === archCtxValue) limitedBy = "arch";
|
|
578295
|
+
else limitedBy = "memory";
|
|
578296
|
+
const effectiveKvBpt = kvBytesPerToken && kvBytesPerToken > 0 ? kvBytesPerToken : (modelSizeGB2 <= 5 ? 64 : modelSizeGB2 <= 12 ? 160 : modelSizeGB2 <= 25 ? 256 : 384) * 1024;
|
|
578297
|
+
return {
|
|
578298
|
+
numCtx,
|
|
578299
|
+
label: formatContextLabel(numCtx),
|
|
578300
|
+
math: {
|
|
578301
|
+
numCtx,
|
|
578302
|
+
label: formatContextLabel(numCtx),
|
|
578303
|
+
modelSizeGB: modelSizeGB2,
|
|
578304
|
+
kvBytesPerToken: effectiveKvBpt,
|
|
578305
|
+
kvSource: kvBytesPerToken && kvBytesPerToken > 0 ? "model_info" : "fallback",
|
|
578306
|
+
archMax: archMax && archMax > 0 ? archMax : null,
|
|
578307
|
+
ramBudgetGB: ramBudget,
|
|
578308
|
+
vramBudgetGB: vramBudget,
|
|
578309
|
+
usableGB,
|
|
578310
|
+
memoryFit,
|
|
578311
|
+
archCtx: archCtxValue,
|
|
578312
|
+
floor,
|
|
578313
|
+
limitedBy
|
|
578314
|
+
}
|
|
578315
|
+
};
|
|
578235
578316
|
}
|
|
578236
578317
|
function ask(rl, question) {
|
|
578237
578318
|
return new Promise((resolve52) => {
|
|
@@ -579999,6 +580080,17 @@ function modelSizeGB(models, modelName) {
|
|
|
579999
580080
|
const known = QWEN_VARIANTS.find((v) => modelName.includes(v.tag.split(":")[1] ?? ""));
|
|
580000
580081
|
return known?.sizeGB ?? 4;
|
|
580001
580082
|
}
|
|
580083
|
+
function defaultLayersForArch(arch3) {
|
|
580084
|
+
const a2 = arch3.toLowerCase();
|
|
580085
|
+
if (a2.includes("qwen")) return 36;
|
|
580086
|
+
if (a2.includes("llama")) return 32;
|
|
580087
|
+
if (a2.includes("mistral")) return 32;
|
|
580088
|
+
if (a2.includes("phi")) return 32;
|
|
580089
|
+
if (a2.includes("gemma")) return 42;
|
|
580090
|
+
if (a2.includes("granite")) return 40;
|
|
580091
|
+
if (a2.includes("command")) return 40;
|
|
580092
|
+
return 32;
|
|
580093
|
+
}
|
|
580002
580094
|
async function queryModelKVInfo(backendUrl2, modelName) {
|
|
580003
580095
|
try {
|
|
580004
580096
|
const normalized = backendUrl2.replace(/\/+$/, "");
|
|
@@ -580014,13 +580106,17 @@ async function queryModelKVInfo(backendUrl2, modelName) {
|
|
|
580014
580106
|
const info = data.model_info;
|
|
580015
580107
|
const arch3 = info["general.architecture"];
|
|
580016
580108
|
if (!arch3) return null;
|
|
580017
|
-
const
|
|
580018
|
-
const
|
|
580019
|
-
const
|
|
580020
|
-
const
|
|
580109
|
+
const nLayersRaw = info[`${arch3}.block_count`];
|
|
580110
|
+
const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`];
|
|
580111
|
+
const keyDimRaw = info[`${arch3}.attention.key_length`];
|
|
580112
|
+
const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
|
|
580021
580113
|
const archMax = info[`${arch3}.context_length`];
|
|
580022
580114
|
if (!archMax) return null;
|
|
580023
|
-
|
|
580115
|
+
const keyDim = keyDimRaw ?? 128;
|
|
580116
|
+
const valDim = valDimRaw ?? 128;
|
|
580117
|
+
const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
|
|
580118
|
+
const nKVHeads = nKVHeadsRaw ?? 8;
|
|
580119
|
+
if (!nLayers) return { archMax };
|
|
580024
580120
|
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
580025
580121
|
return { kvBytesPerToken, archMax };
|
|
580026
580122
|
} catch {
|
|
@@ -580151,6 +580247,21 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
|
|
|
580151
580247
|
archMax
|
|
580152
580248
|
);
|
|
580153
580249
|
}
|
|
580250
|
+
function formatExpandedContextDiagnostic(specs, math) {
|
|
580251
|
+
const fmtGB = (n2) => `${n2.toFixed(1)}GB`;
|
|
580252
|
+
const fmtKB = (n2) => `${Math.round(n2 / 1024)}KB`;
|
|
580253
|
+
const fmtK = (n2) => n2 >= 1024 ? `${Math.floor(n2 / 1024)}K` : String(n2);
|
|
580254
|
+
const memBits = [];
|
|
580255
|
+
if (specs.gpuVramGB > 0) {
|
|
580256
|
+
memBits.push(`VRAM ${fmtGB(specs.availableVramGB || specs.gpuVramGB)}/${fmtGB(specs.gpuVramGB)}`);
|
|
580257
|
+
}
|
|
580258
|
+
memBits.push(`RAM ${fmtGB(specs.availableRamGB)}/${fmtGB(specs.totalRamGB)}${specs.unifiedMemory ? " unified" : ""}`);
|
|
580259
|
+
const mem = memBits.join(", ");
|
|
580260
|
+
const kv = `KV ${fmtKB(math.kvBytesPerToken)}/tok (${math.kvSource})`;
|
|
580261
|
+
const fit2 = `fit ${fmtK(math.memoryFit)}, arch ${math.archCtx !== null ? fmtK(math.archCtx) : "n/a"}, floor ${fmtK(math.floor)}`;
|
|
580262
|
+
const limit = `→ ${fmtK(math.numCtx)} (${math.limitedBy === "floor" ? "min floor" : math.limitedBy === "arch" ? "arch-capped" : "memory-fit"})`;
|
|
580263
|
+
return `[${mem} | model ${fmtGB(math.modelSizeGB)} | ${kv} | ${fit2} ${limit}]`;
|
|
580264
|
+
}
|
|
580154
580265
|
async function ensureExpandedContext(modelName, backendUrl2) {
|
|
580155
580266
|
if (modelName.includes("cloud") || modelName.includes(":cloud")) {
|
|
580156
580267
|
return { model: modelName, created: false, contextLabel: "remote", numCtx: 0 };
|
|
@@ -580177,11 +580288,11 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580177
580288
|
kvInfo?.kvBytesPerToken,
|
|
580178
580289
|
kvInfo?.archMax
|
|
580179
580290
|
).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: modelName }));
|
|
580180
|
-
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580291
|
+
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580181
580292
|
}
|
|
580182
580293
|
const existing = await checkExpandedVariant(modelName, backendUrl2);
|
|
580183
580294
|
if (existing === null) {
|
|
580184
|
-
return { model: modelName, created: false, contextLabel: "", numCtx: 0 };
|
|
580295
|
+
return { model: modelName, created: false, contextLabel: "", numCtx: 0, specs, math: ctx3.math };
|
|
580185
580296
|
}
|
|
580186
580297
|
if (typeof existing === "string") {
|
|
580187
580298
|
const lostTools = await wrapperLacksToolsCapability(backendUrl2, existing).catch(() => false);
|
|
@@ -580196,7 +580307,7 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580196
580307
|
kvInfo?.archMax
|
|
580197
580308
|
);
|
|
580198
580309
|
if (rebuilt) {
|
|
580199
|
-
return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580310
|
+
return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580200
580311
|
}
|
|
580201
580312
|
} catch {
|
|
580202
580313
|
}
|
|
@@ -580211,13 +580322,13 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580211
580322
|
kvInfo?.kvBytesPerToken,
|
|
580212
580323
|
kvInfo?.archMax
|
|
580213
580324
|
).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: existing }));
|
|
580214
|
-
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580325
|
+
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580215
580326
|
}
|
|
580216
580327
|
const created = await createExpandedVariantAsync(modelName, specs, sizeGB, kvInfo?.kvBytesPerToken, kvInfo?.archMax);
|
|
580217
580328
|
if (created) {
|
|
580218
|
-
return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580329
|
+
return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580219
580330
|
}
|
|
580220
|
-
return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580331
|
+
return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580221
580332
|
}
|
|
580222
580333
|
function guessBaseFromVariant(variantName, models) {
|
|
580223
580334
|
const stripped = stripVariantTag(variantName);
|
|
@@ -602188,14 +602299,15 @@ async function switchModel(query, ctx3, local = false) {
|
|
|
602188
602299
|
match.name,
|
|
602189
602300
|
ctx3.config.backendUrl
|
|
602190
602301
|
);
|
|
602302
|
+
const diag = result.specs && result.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(result.specs, result.math)) : "";
|
|
602191
602303
|
if (result.created) {
|
|
602192
602304
|
renderInfo(
|
|
602193
|
-
`Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)`
|
|
602305
|
+
`Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)${diag}`
|
|
602194
602306
|
);
|
|
602195
602307
|
finalModel = result.model;
|
|
602196
602308
|
} else if (result.model !== match.name) {
|
|
602197
602309
|
renderInfo(
|
|
602198
|
-
`Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})`
|
|
602310
|
+
`Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})${diag}`
|
|
602199
602311
|
);
|
|
602200
602312
|
finalModel = result.model;
|
|
602201
602313
|
}
|
|
@@ -653743,13 +653855,14 @@ This is an independent background session started from /background.`
|
|
|
653743
653855
|
currentConfig.model,
|
|
653744
653856
|
currentConfig.backendUrl
|
|
653745
653857
|
);
|
|
653858
|
+
const diag = expandResult.specs && expandResult.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(expandResult.specs, expandResult.math)) : "";
|
|
653746
653859
|
if (expandResult.created) {
|
|
653747
653860
|
config = { ...config, model: expandResult.model };
|
|
653748
653861
|
currentConfig = { ...currentConfig, model: expandResult.model };
|
|
653749
653862
|
statusBar.setModelName(expandResult.model);
|
|
653750
653863
|
writeContent(
|
|
653751
653864
|
() => renderInfo(
|
|
653752
|
-
`Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)`
|
|
653865
|
+
`Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)${diag}`
|
|
653753
653866
|
)
|
|
653754
653867
|
);
|
|
653755
653868
|
} else if (expandResult.model !== currentConfig.model) {
|
|
@@ -653758,7 +653871,7 @@ This is an independent background session started from /background.`
|
|
|
653758
653871
|
statusBar.setModelName(expandResult.model);
|
|
653759
653872
|
writeContent(
|
|
653760
653873
|
() => renderInfo(
|
|
653761
|
-
`Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})`
|
|
653874
|
+
`Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})${diag}`
|
|
653762
653875
|
)
|
|
653763
653876
|
);
|
|
653764
653877
|
}
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.109",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.109",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED