omnius 1.0.108 → 1.0.109
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +83 -23
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -577955,6 +577955,7 @@ __export(setup_exports, {
|
|
|
577955
577955
|
ensurePythonVenv: () => ensurePythonVenv,
|
|
577956
577956
|
ensureVisionDeps: () => ensureVisionDeps,
|
|
577957
577957
|
expandedModelName: () => expandedModelName,
|
|
577958
|
+
formatExpandedContextDiagnostic: () => formatExpandedContextDiagnostic,
|
|
577958
577959
|
getLatestOllamaVersion: () => getLatestOllamaVersion,
|
|
577959
577960
|
getOllamaVersion: () => getOllamaVersion,
|
|
577960
577961
|
hasCmd: () => hasCmd,
|
|
@@ -578020,7 +578021,7 @@ async function needsTextToolMode(modelName, backendUrl2) {
|
|
|
578020
578021
|
const hasTools = await checkToolSupport(modelName, backendUrl2);
|
|
578021
578022
|
return !hasTools;
|
|
578022
578023
|
}
|
|
578023
|
-
function detectUnifiedMemory() {
|
|
578024
|
+
function detectUnifiedMemory(hasDiscreteGpu = false) {
|
|
578024
578025
|
if (process.platform === "darwin" && process.arch === "arm64") return true;
|
|
578025
578026
|
if (process.platform === "linux") {
|
|
578026
578027
|
try {
|
|
@@ -578034,6 +578035,16 @@ function detectUnifiedMemory() {
|
|
|
578034
578035
|
}
|
|
578035
578036
|
} catch {
|
|
578036
578037
|
}
|
|
578038
|
+
try {
|
|
578039
|
+
if (existsSync91("/proc/device-tree/model")) {
|
|
578040
|
+
const model = readFileSync74("/proc/device-tree/model", "utf8").replace(/\0+$/, "").toLowerCase();
|
|
578041
|
+
if (/jetson|tegra|orin|xavier|nano|raspberry|rockchip|rk\d{4}|mt\d{4}/.test(model)) {
|
|
578042
|
+
return true;
|
|
578043
|
+
}
|
|
578044
|
+
}
|
|
578045
|
+
} catch {
|
|
578046
|
+
}
|
|
578047
|
+
if (process.arch === "arm64" && !hasDiscreteGpu) return true;
|
|
578037
578048
|
}
|
|
578038
578049
|
return false;
|
|
578039
578050
|
}
|
|
@@ -578127,11 +578138,15 @@ function detectSystemSpecs() {
|
|
|
578127
578138
|
} catch {
|
|
578128
578139
|
}
|
|
578129
578140
|
}
|
|
578130
|
-
const unifiedMemory = detectUnifiedMemory();
|
|
578141
|
+
const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
|
|
578131
578142
|
if (unifiedMemory && totalRamGB > 0) {
|
|
578132
578143
|
const floorGB = totalRamGB * 0.8;
|
|
578133
578144
|
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578134
578145
|
}
|
|
578146
|
+
if (!unifiedMemory && totalRamGB > 0) {
|
|
578147
|
+
const floorGB = totalRamGB * 0.75;
|
|
578148
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578149
|
+
}
|
|
578135
578150
|
return {
|
|
578136
578151
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578137
578152
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
@@ -578191,11 +578206,15 @@ async function detectSystemSpecsAsync() {
|
|
|
578191
578206
|
} catch {
|
|
578192
578207
|
}
|
|
578193
578208
|
}
|
|
578194
|
-
const unifiedMemory = detectUnifiedMemory();
|
|
578209
|
+
const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
|
|
578195
578210
|
if (unifiedMemory && totalRamGB > 0) {
|
|
578196
578211
|
const floorGB = totalRamGB * 0.8;
|
|
578197
578212
|
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578198
578213
|
}
|
|
578214
|
+
if (!unifiedMemory && totalRamGB > 0) {
|
|
578215
|
+
const floorGB = totalRamGB * 0.75;
|
|
578216
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578217
|
+
}
|
|
578199
578218
|
return {
|
|
578200
578219
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578201
578220
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
@@ -578259,17 +578278,41 @@ function formatContextLabel(numCtx) {
|
|
|
578259
578278
|
return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
|
|
578260
578279
|
}
|
|
578261
578280
|
function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578262
|
-
const
|
|
578263
|
-
|
|
578264
|
-
|
|
578265
|
-
|
|
578266
|
-
|
|
578267
|
-
);
|
|
578268
|
-
const
|
|
578281
|
+
const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
|
|
578282
|
+
const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
|
|
578283
|
+
const totalAvail = Math.max(vramBudget, ramBudget);
|
|
578284
|
+
const remaining = Math.max(0, totalAvail - modelSizeGB2);
|
|
578285
|
+
const usableGB = remaining * 0.85;
|
|
578286
|
+
const memoryFit = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
|
|
578287
|
+
const archCtxValue = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : null;
|
|
578288
|
+
const archCtx = archCtxValue ?? Number.POSITIVE_INFINITY;
|
|
578269
578289
|
const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
|
|
578270
578290
|
const fits = Math.min(memoryFit, archCtx);
|
|
578271
578291
|
const numCtx = Math.max(floor, fits);
|
|
578272
|
-
|
|
578292
|
+
let limitedBy;
|
|
578293
|
+
if (numCtx === floor && fits < floor) limitedBy = "floor";
|
|
578294
|
+
else if (archCtxValue !== null && numCtx === archCtxValue) limitedBy = "arch";
|
|
578295
|
+
else limitedBy = "memory";
|
|
578296
|
+
const effectiveKvBpt = kvBytesPerToken && kvBytesPerToken > 0 ? kvBytesPerToken : (modelSizeGB2 <= 5 ? 64 : modelSizeGB2 <= 12 ? 160 : modelSizeGB2 <= 25 ? 256 : 384) * 1024;
|
|
578297
|
+
return {
|
|
578298
|
+
numCtx,
|
|
578299
|
+
label: formatContextLabel(numCtx),
|
|
578300
|
+
math: {
|
|
578301
|
+
numCtx,
|
|
578302
|
+
label: formatContextLabel(numCtx),
|
|
578303
|
+
modelSizeGB: modelSizeGB2,
|
|
578304
|
+
kvBytesPerToken: effectiveKvBpt,
|
|
578305
|
+
kvSource: kvBytesPerToken && kvBytesPerToken > 0 ? "model_info" : "fallback",
|
|
578306
|
+
archMax: archMax && archMax > 0 ? archMax : null,
|
|
578307
|
+
ramBudgetGB: ramBudget,
|
|
578308
|
+
vramBudgetGB: vramBudget,
|
|
578309
|
+
usableGB,
|
|
578310
|
+
memoryFit,
|
|
578311
|
+
archCtx: archCtxValue,
|
|
578312
|
+
floor,
|
|
578313
|
+
limitedBy
|
|
578314
|
+
}
|
|
578315
|
+
};
|
|
578273
578316
|
}
|
|
578274
578317
|
function ask(rl, question) {
|
|
578275
578318
|
return new Promise((resolve52) => {
|
|
@@ -580064,7 +580107,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
|
|
|
580064
580107
|
const arch3 = info["general.architecture"];
|
|
580065
580108
|
if (!arch3) return null;
|
|
580066
580109
|
const nLayersRaw = info[`${arch3}.block_count`];
|
|
580067
|
-
const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`]
|
|
580110
|
+
const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`];
|
|
580068
580111
|
const keyDimRaw = info[`${arch3}.attention.key_length`];
|
|
580069
580112
|
const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
|
|
580070
580113
|
const archMax = info[`${arch3}.context_length`];
|
|
@@ -580072,7 +580115,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
|
|
|
580072
580115
|
const keyDim = keyDimRaw ?? 128;
|
|
580073
580116
|
const valDim = valDimRaw ?? 128;
|
|
580074
580117
|
const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
|
|
580075
|
-
const nKVHeads = nKVHeadsRaw ??
|
|
580118
|
+
const nKVHeads = nKVHeadsRaw ?? 8;
|
|
580076
580119
|
if (!nLayers) return { archMax };
|
|
580077
580120
|
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
580078
580121
|
return { kvBytesPerToken, archMax };
|
|
@@ -580204,6 +580247,21 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
|
|
|
580204
580247
|
archMax
|
|
580205
580248
|
);
|
|
580206
580249
|
}
|
|
580250
|
+
function formatExpandedContextDiagnostic(specs, math) {
|
|
580251
|
+
const fmtGB = (n2) => `${n2.toFixed(1)}GB`;
|
|
580252
|
+
const fmtKB = (n2) => `${Math.round(n2 / 1024)}KB`;
|
|
580253
|
+
const fmtK = (n2) => n2 >= 1024 ? `${Math.floor(n2 / 1024)}K` : String(n2);
|
|
580254
|
+
const memBits = [];
|
|
580255
|
+
if (specs.gpuVramGB > 0) {
|
|
580256
|
+
memBits.push(`VRAM ${fmtGB(specs.availableVramGB || specs.gpuVramGB)}/${fmtGB(specs.gpuVramGB)}`);
|
|
580257
|
+
}
|
|
580258
|
+
memBits.push(`RAM ${fmtGB(specs.availableRamGB)}/${fmtGB(specs.totalRamGB)}${specs.unifiedMemory ? " unified" : ""}`);
|
|
580259
|
+
const mem = memBits.join(", ");
|
|
580260
|
+
const kv = `KV ${fmtKB(math.kvBytesPerToken)}/tok (${math.kvSource})`;
|
|
580261
|
+
const fit2 = `fit ${fmtK(math.memoryFit)}, arch ${math.archCtx !== null ? fmtK(math.archCtx) : "n/a"}, floor ${fmtK(math.floor)}`;
|
|
580262
|
+
const limit = `→ ${fmtK(math.numCtx)} (${math.limitedBy === "floor" ? "min floor" : math.limitedBy === "arch" ? "arch-capped" : "memory-fit"})`;
|
|
580263
|
+
return `[${mem} | model ${fmtGB(math.modelSizeGB)} | ${kv} | ${fit2} ${limit}]`;
|
|
580264
|
+
}
|
|
580207
580265
|
async function ensureExpandedContext(modelName, backendUrl2) {
|
|
580208
580266
|
if (modelName.includes("cloud") || modelName.includes(":cloud")) {
|
|
580209
580267
|
return { model: modelName, created: false, contextLabel: "remote", numCtx: 0 };
|
|
@@ -580230,11 +580288,11 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580230
580288
|
kvInfo?.kvBytesPerToken,
|
|
580231
580289
|
kvInfo?.archMax
|
|
580232
580290
|
).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: modelName }));
|
|
580233
|
-
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580291
|
+
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580234
580292
|
}
|
|
580235
580293
|
const existing = await checkExpandedVariant(modelName, backendUrl2);
|
|
580236
580294
|
if (existing === null) {
|
|
580237
|
-
return { model: modelName, created: false, contextLabel: "", numCtx: 0 };
|
|
580295
|
+
return { model: modelName, created: false, contextLabel: "", numCtx: 0, specs, math: ctx3.math };
|
|
580238
580296
|
}
|
|
580239
580297
|
if (typeof existing === "string") {
|
|
580240
580298
|
const lostTools = await wrapperLacksToolsCapability(backendUrl2, existing).catch(() => false);
|
|
@@ -580249,7 +580307,7 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580249
580307
|
kvInfo?.archMax
|
|
580250
580308
|
);
|
|
580251
580309
|
if (rebuilt) {
|
|
580252
|
-
return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580310
|
+
return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580253
580311
|
}
|
|
580254
580312
|
} catch {
|
|
580255
580313
|
}
|
|
@@ -580264,13 +580322,13 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580264
580322
|
kvInfo?.kvBytesPerToken,
|
|
580265
580323
|
kvInfo?.archMax
|
|
580266
580324
|
).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: existing }));
|
|
580267
|
-
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580325
|
+
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580268
580326
|
}
|
|
580269
580327
|
const created = await createExpandedVariantAsync(modelName, specs, sizeGB, kvInfo?.kvBytesPerToken, kvInfo?.archMax);
|
|
580270
580328
|
if (created) {
|
|
580271
|
-
return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580329
|
+
return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580272
580330
|
}
|
|
580273
|
-
return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580331
|
+
return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580274
580332
|
}
|
|
580275
580333
|
function guessBaseFromVariant(variantName, models) {
|
|
580276
580334
|
const stripped = stripVariantTag(variantName);
|
|
@@ -602241,14 +602299,15 @@ async function switchModel(query, ctx3, local = false) {
|
|
|
602241
602299
|
match.name,
|
|
602242
602300
|
ctx3.config.backendUrl
|
|
602243
602301
|
);
|
|
602302
|
+
const diag = result.specs && result.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(result.specs, result.math)) : "";
|
|
602244
602303
|
if (result.created) {
|
|
602245
602304
|
renderInfo(
|
|
602246
|
-
`Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)`
|
|
602305
|
+
`Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)${diag}`
|
|
602247
602306
|
);
|
|
602248
602307
|
finalModel = result.model;
|
|
602249
602308
|
} else if (result.model !== match.name) {
|
|
602250
602309
|
renderInfo(
|
|
602251
|
-
`Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})`
|
|
602310
|
+
`Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})${diag}`
|
|
602252
602311
|
);
|
|
602253
602312
|
finalModel = result.model;
|
|
602254
602313
|
}
|
|
@@ -653796,13 +653855,14 @@ This is an independent background session started from /background.`
|
|
|
653796
653855
|
currentConfig.model,
|
|
653797
653856
|
currentConfig.backendUrl
|
|
653798
653857
|
);
|
|
653858
|
+
const diag = expandResult.specs && expandResult.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(expandResult.specs, expandResult.math)) : "";
|
|
653799
653859
|
if (expandResult.created) {
|
|
653800
653860
|
config = { ...config, model: expandResult.model };
|
|
653801
653861
|
currentConfig = { ...currentConfig, model: expandResult.model };
|
|
653802
653862
|
statusBar.setModelName(expandResult.model);
|
|
653803
653863
|
writeContent(
|
|
653804
653864
|
() => renderInfo(
|
|
653805
|
-
`Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)`
|
|
653865
|
+
`Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)${diag}`
|
|
653806
653866
|
)
|
|
653807
653867
|
);
|
|
653808
653868
|
} else if (expandResult.model !== currentConfig.model) {
|
|
@@ -653811,7 +653871,7 @@ This is an independent background session started from /background.`
|
|
|
653811
653871
|
statusBar.setModelName(expandResult.model);
|
|
653812
653872
|
writeContent(
|
|
653813
653873
|
() => renderInfo(
|
|
653814
|
-
`Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})`
|
|
653874
|
+
`Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})${diag}`
|
|
653815
653875
|
)
|
|
653816
653876
|
);
|
|
653817
653877
|
}
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.109",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.109",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED