@vtstech/pi-shared 1.1.2-dev → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/format.js +11 -5
- package/ollama.js +2 -2
- package/package.json +1 -1
package/format.js
CHANGED
|
@@ -66,12 +66,18 @@ function sanitizeForReport(s, maxLines = 40) {
|
|
|
66
66
|
function padRight(s, n) {
|
|
67
67
|
return s + " ".repeat(Math.max(0, n - s.length));
|
|
68
68
|
}
|
|
69
|
-
function
|
|
69
|
+
function estimateMemory(parameterSize, quantizationLevel, contextLength) {
|
|
70
70
|
const params = parseParamCount(parameterSize);
|
|
71
71
|
if (params === void 0) return void 0;
|
|
72
72
|
const bitsPerParam = bitsPerParamForQuant(quantizationLevel);
|
|
73
73
|
const modelBytes = params * bitsPerParam / 8;
|
|
74
|
-
|
|
74
|
+
const cpuMultiplier = contextLength != null ? 1.5 + contextLength / 1e5 : 2.5;
|
|
75
|
+
return {
|
|
76
|
+
gpu: Math.ceil(modelBytes * 1.1),
|
|
77
|
+
// 10% overhead — GPU: weights dominate
|
|
78
|
+
cpu: Math.ceil(modelBytes * cpuMultiplier)
|
|
79
|
+
// context-aware — CPU: KV cache dominates
|
|
80
|
+
};
|
|
75
81
|
}
|
|
76
82
|
function parseParamCount(s) {
|
|
77
83
|
if (!s || typeof s !== "string") return void 0;
|
|
@@ -100,8 +106,8 @@ function parseParamCount(s) {
|
|
|
100
106
|
}
|
|
101
107
|
function bitsPerParamForQuant(quant) {
|
|
102
108
|
const q = quant.toUpperCase().replace(/[-_.]/g, "");
|
|
103
|
-
if (q.startsWith("FP32") || q === "
|
|
104
|
-
if (q.startsWith("F16") || q === "
|
|
109
|
+
if (q.startsWith("FP32") || q === "F32" || q === "TF32") return 32;
|
|
110
|
+
if (q.startsWith("F16") || q === "BF16") return 16;
|
|
105
111
|
if (q.startsWith("Q8")) return 8;
|
|
106
112
|
if (q.startsWith("IQ4")) return 4.5;
|
|
107
113
|
if (q.startsWith("IQ3")) return 3.5;
|
|
@@ -116,7 +122,7 @@ function bitsPerParamForQuant(quant) {
|
|
|
116
122
|
}
|
|
117
123
|
export {
|
|
118
124
|
bytesHuman,
|
|
119
|
-
|
|
125
|
+
estimateMemory,
|
|
120
126
|
fail,
|
|
121
127
|
fmtBytes,
|
|
122
128
|
fmtDur,
|
package/ollama.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import os from "node:os";
|
|
5
|
-
var EXTENSION_VERSION = "1.1.2
|
|
5
|
+
var EXTENSION_VERSION = "1.1.2";
|
|
6
6
|
var MODELS_JSON_PATH = path.join(os.homedir(), ".pi", "agent", "models.json");
|
|
7
7
|
var _modelsJsonCache = null;
|
|
8
8
|
var _ollamaBaseUrlCache = null;
|
|
@@ -168,7 +168,7 @@ function detectProvider(ctx) {
|
|
|
168
168
|
return {
|
|
169
169
|
kind: "builtin",
|
|
170
170
|
name: providerName,
|
|
171
|
-
apiMode: apiMode ||
|
|
171
|
+
apiMode: apiMode || "openai-completions",
|
|
172
172
|
baseUrl,
|
|
173
173
|
apiKey
|
|
174
174
|
};
|