@vtstech/pi-model-test 1.0.9 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/model-test.js +67 -79
- package/package.json +2 -2
package/model-test.js
CHANGED
|
@@ -12,57 +12,17 @@ import {
|
|
|
12
12
|
truncate,
|
|
13
13
|
sanitizeForReport
|
|
14
14
|
} from "@vtstech/pi-shared/format";
|
|
15
|
-
import { getOllamaBaseUrl, detectModelFamily, readModelsJson,
|
|
16
|
-
function detectProvider(ctx) {
|
|
17
|
-
const model = ctx.model;
|
|
18
|
-
if (!model) return { kind: "unknown", name: "none" };
|
|
19
|
-
const providerName = model.provider || "";
|
|
20
|
-
if (!providerName) return { kind: "unknown", name: "none" };
|
|
21
|
-
const modelsJson = readModelsJson();
|
|
22
|
-
const userProviderCfg = (modelsJson.providers || {})[providerName];
|
|
23
|
-
if (userProviderCfg) {
|
|
24
|
-
const baseUrl = userProviderCfg.baseUrl || "";
|
|
25
|
-
const apiMode = userProviderCfg.api || "";
|
|
26
|
-
const apiKey = userProviderCfg.apiKey || "";
|
|
27
|
-
const isOllama = /ollama/i.test(providerName) || /localhost:\d+/.test(baseUrl) || /127\.0\.0\.1:\d+/.test(baseUrl) || /0\.0\.0\.0:\d+/.test(baseUrl) || /\/api\/chat/.test(baseUrl) || apiMode === "ollama";
|
|
28
|
-
if (isOllama) {
|
|
29
|
-
return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
|
|
30
|
-
}
|
|
31
|
-
if (/\/api\/chat/.test(baseUrl)) {
|
|
32
|
-
return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
|
|
33
|
-
}
|
|
34
|
-
return {
|
|
35
|
-
kind: "builtin",
|
|
36
|
-
name: providerName,
|
|
37
|
-
apiMode: apiMode || userProviderCfg.api || "openai-completions",
|
|
38
|
-
baseUrl,
|
|
39
|
-
apiKey
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
const builtin = BUILTIN_PROVIDERS[providerName];
|
|
43
|
-
if (builtin) {
|
|
44
|
-
const apiKey = process.env[builtin.envKey] || "";
|
|
45
|
-
return {
|
|
46
|
-
kind: "builtin",
|
|
47
|
-
name: providerName,
|
|
48
|
-
apiMode: builtin.api,
|
|
49
|
-
baseUrl: builtin.baseUrl,
|
|
50
|
-
envKey: builtin.envKey,
|
|
51
|
-
apiKey
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
return { kind: "unknown", name: providerName };
|
|
55
|
-
}
|
|
15
|
+
import { getOllamaBaseUrl, detectModelFamily, readModelsJson, writeModelsJson, fetchModelContextLength, EXTENSION_VERSION, detectProvider } from "@vtstech/pi-shared/ollama";
|
|
56
16
|
var CONFIG = {
|
|
57
17
|
// General API settings
|
|
58
18
|
DEFAULT_TIMEOUT_MS: 999999,
|
|
59
|
-
//
|
|
19
|
+
// ~16.7 minutes — effectively unlimited for slow models
|
|
60
20
|
CONNECT_TIMEOUT_S: 60,
|
|
61
|
-
//
|
|
21
|
+
// 60 seconds to establish connection
|
|
62
22
|
MAX_RETRIES: 1,
|
|
63
23
|
// Single retry for transient failures
|
|
64
24
|
RETRY_DELAY_MS: 1e4,
|
|
65
|
-
//
|
|
25
|
+
// 10 seconds between retries
|
|
66
26
|
EXEC_BUFFER_MS: 8e3,
|
|
67
27
|
// Extra buffer for exec timeout over curl timeout
|
|
68
28
|
// Model generation settings
|
|
@@ -74,31 +34,32 @@ var CONFIG = {
|
|
|
74
34
|
MIN_THINKING_LENGTH: 10,
|
|
75
35
|
// Minimum chars to consider thinking tokens valid
|
|
76
36
|
TOOL_TEST_TIMEOUT_MS: 999999,
|
|
77
|
-
//
|
|
37
|
+
// Effectively unlimited for slow tool usage tests
|
|
78
38
|
TOOL_TEST_MAX_TIME_S: 999999,
|
|
79
39
|
// Max curl time for tool tests (effectively unlimited)
|
|
80
40
|
TOOL_SUPPORT_TIMEOUT_MS: 999999,
|
|
81
|
-
//
|
|
41
|
+
// Effectively unlimited for tool support detection
|
|
82
42
|
TOOL_SUPPORT_MAX_TIME_S: 999999,
|
|
83
43
|
// Max curl time for tool support detection
|
|
84
44
|
// Metadata retrieval
|
|
85
45
|
TAGS_TIMEOUT_MS: 15e3,
|
|
86
46
|
// 15 seconds for /api/tags
|
|
87
47
|
TAGS_CONNECT_TIMEOUT_S: 30,
|
|
88
|
-
//
|
|
48
|
+
// 30 seconds connection timeout for tags
|
|
89
49
|
MODEL_INFO_TIMEOUT_MS: 3e4,
|
|
90
|
-
//
|
|
50
|
+
// 30 seconds for model info lookup
|
|
91
51
|
// Provider API settings
|
|
92
52
|
PROVIDER_TIMEOUT_MS: 999999,
|
|
93
|
-
//
|
|
53
|
+
// Effectively unlimited for cloud provider API calls
|
|
94
54
|
PROVIDER_TOOL_TIMEOUT_MS: 12e4,
|
|
95
|
-
//
|
|
55
|
+
// 120 seconds for tool usage tests on providers
|
|
96
56
|
// Rate limiting
|
|
97
57
|
TEST_DELAY_MS: 1e4
|
|
98
|
-
//
|
|
58
|
+
// 10 seconds between tests to avoid rate limiting
|
|
99
59
|
};
|
|
100
60
|
var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
|
|
101
61
|
var TOOL_SUPPORT_CACHE_PATH = path.join(TOOL_SUPPORT_CACHE_DIR, "tool_support.json");
|
|
62
|
+
var _toolSupportCacheInMemory = null;
|
|
102
63
|
function readToolSupportCache() {
|
|
103
64
|
try {
|
|
104
65
|
if (fs.existsSync(TOOL_SUPPORT_CACHE_PATH)) {
|
|
@@ -116,19 +77,21 @@ function writeToolSupportCache(cache) {
|
|
|
116
77
|
fs.writeFileSync(TOOL_SUPPORT_CACHE_PATH, JSON.stringify(cache, null, 2) + "\n", "utf-8");
|
|
117
78
|
}
|
|
118
79
|
function getCachedToolSupport(model) {
|
|
119
|
-
const cache = readToolSupportCache();
|
|
80
|
+
const cache = _toolSupportCacheInMemory || readToolSupportCache();
|
|
81
|
+
if (!_toolSupportCacheInMemory) _toolSupportCacheInMemory = cache;
|
|
120
82
|
const entry = cache[model];
|
|
121
83
|
if (!entry) return null;
|
|
122
84
|
if (!entry.support || !["native", "react", "none"].includes(entry.support)) return null;
|
|
123
85
|
return entry;
|
|
124
86
|
}
|
|
125
87
|
function cacheToolSupport(model, support, family) {
|
|
126
|
-
const cache = readToolSupportCache();
|
|
88
|
+
const cache = _toolSupportCacheInMemory || readToolSupportCache();
|
|
127
89
|
cache[model] = {
|
|
128
90
|
support,
|
|
129
91
|
testedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
130
92
|
family
|
|
131
93
|
};
|
|
94
|
+
_toolSupportCacheInMemory = cache;
|
|
132
95
|
writeToolSupportCache(cache);
|
|
133
96
|
}
|
|
134
97
|
function model_test_temp_default(pi) {
|
|
@@ -247,10 +210,8 @@ function model_test_temp_default(pi) {
|
|
|
247
210
|
{ role: "user", content: "Reply with exactly: PONG" }
|
|
248
211
|
], { maxTokens: 10, timeoutMs: 3e4 });
|
|
249
212
|
const elapsedMs = Date.now() - start;
|
|
250
|
-
const content = result.content.trim().toUpperCase();
|
|
251
213
|
const reachable = true;
|
|
252
214
|
const authValid = true;
|
|
253
|
-
const hasPong = content.includes("PONG");
|
|
254
215
|
return {
|
|
255
216
|
pass: reachable && authValid,
|
|
256
217
|
reachable,
|
|
@@ -259,7 +220,6 @@ function model_test_temp_default(pi) {
|
|
|
259
220
|
elapsedMs
|
|
260
221
|
};
|
|
261
222
|
} catch (e) {
|
|
262
|
-
const start = Date.now();
|
|
263
223
|
let reachable = false;
|
|
264
224
|
let authValid = false;
|
|
265
225
|
const msg = e.message || "";
|
|
@@ -290,7 +250,6 @@ function model_test_temp_default(pi) {
|
|
|
290
250
|
const prompt = `A snail climbs 3 feet up a wall each day, but slides back 2 feet each night. The wall is 10 feet tall. How many days does it take the snail to reach the top? Think step by step and give the final answer on its own line like: ANSWER: <number>`;
|
|
291
251
|
try {
|
|
292
252
|
let response, elapsedMs;
|
|
293
|
-
let usedThinkingFallback = false;
|
|
294
253
|
try {
|
|
295
254
|
const result = await ollamaChat(model, [
|
|
296
255
|
{ role: "user", content: prompt }
|
|
@@ -309,7 +268,6 @@ function model_test_temp_default(pi) {
|
|
|
309
268
|
], { think: true });
|
|
310
269
|
response = retry.response;
|
|
311
270
|
elapsedMs = retry.elapsedMs;
|
|
312
|
-
usedThinkingFallback = true;
|
|
313
271
|
} else {
|
|
314
272
|
throw firstErr;
|
|
315
273
|
}
|
|
@@ -917,12 +875,30 @@ The JSON object must have exactly these 4 keys:
|
|
|
917
875
|
parsed = JSON.parse(cleaned);
|
|
918
876
|
} catch {
|
|
919
877
|
const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
878
|
+
let braceDepth = 0, bracketDepth = 0;
|
|
879
|
+
let inString = false, escapeNext = false;
|
|
880
|
+
for (let i = 0; i < cleaned.length; i++) {
|
|
881
|
+
const c = cleaned[i];
|
|
882
|
+
if (escapeNext) {
|
|
883
|
+
escapeNext = false;
|
|
884
|
+
continue;
|
|
885
|
+
}
|
|
886
|
+
if (c === "\\") {
|
|
887
|
+
if (inString) escapeNext = true;
|
|
888
|
+
continue;
|
|
889
|
+
}
|
|
890
|
+
if (c === '"') {
|
|
891
|
+
inString = !inString;
|
|
892
|
+
continue;
|
|
893
|
+
}
|
|
894
|
+
if (inString) continue;
|
|
895
|
+
if (c === "{") braceDepth++;
|
|
896
|
+
else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
|
|
897
|
+
else if (c === "[") bracketDepth++;
|
|
898
|
+
else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
|
|
899
|
+
}
|
|
900
|
+
if (braceDepth > 0 || bracketDepth > 0) {
|
|
901
|
+
const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
|
|
926
902
|
try {
|
|
927
903
|
parsed = JSON.parse(repaired);
|
|
928
904
|
repairNote = " (repaired truncated JSON)";
|
|
@@ -976,12 +952,30 @@ The JSON object must have exactly these 4 keys:
|
|
|
976
952
|
parsed = JSON.parse(cleaned);
|
|
977
953
|
} catch {
|
|
978
954
|
const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
955
|
+
let braceDepth = 0, bracketDepth = 0;
|
|
956
|
+
let inString = false, escapeNext = false;
|
|
957
|
+
for (let i = 0; i < cleaned.length; i++) {
|
|
958
|
+
const c = cleaned[i];
|
|
959
|
+
if (escapeNext) {
|
|
960
|
+
escapeNext = false;
|
|
961
|
+
continue;
|
|
962
|
+
}
|
|
963
|
+
if (c === "\\") {
|
|
964
|
+
if (inString) escapeNext = true;
|
|
965
|
+
continue;
|
|
966
|
+
}
|
|
967
|
+
if (c === '"') {
|
|
968
|
+
inString = !inString;
|
|
969
|
+
continue;
|
|
970
|
+
}
|
|
971
|
+
if (inString) continue;
|
|
972
|
+
if (c === "{") braceDepth++;
|
|
973
|
+
else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
|
|
974
|
+
else if (c === "[") bracketDepth++;
|
|
975
|
+
else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
|
|
976
|
+
}
|
|
977
|
+
if (braceDepth > 0 || bracketDepth > 0) {
|
|
978
|
+
const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
|
|
985
979
|
try {
|
|
986
980
|
parsed = JSON.parse(repaired);
|
|
987
981
|
repairNote = " (repaired truncated JSON)";
|
|
@@ -1187,14 +1181,8 @@ The JSON object must have exactly these 4 keys:
|
|
|
1187
1181
|
return ctx.model?.id;
|
|
1188
1182
|
}
|
|
1189
1183
|
function updateModelsJsonReasoning(model, hasReasoning) {
|
|
1190
|
-
const agentDir = path.join(os.homedir(), ".pi", "agent");
|
|
1191
|
-
const modelsJsonPath = path.join(agentDir, "models.json");
|
|
1192
|
-
if (!fs.existsSync(modelsJsonPath)) {
|
|
1193
|
-
return { updated: false, message: "models.json not found \u2014 skipped" };
|
|
1194
|
-
}
|
|
1195
1184
|
try {
|
|
1196
|
-
const
|
|
1197
|
-
const config = JSON.parse(raw);
|
|
1185
|
+
const config = readModelsJson();
|
|
1198
1186
|
let updated = false;
|
|
1199
1187
|
for (const provider of Object.values(config.providers || {})) {
|
|
1200
1188
|
const models = provider.models || [];
|
|
@@ -1214,7 +1202,7 @@ The JSON object must have exactly these 4 keys:
|
|
|
1214
1202
|
if (!updated) {
|
|
1215
1203
|
return { updated: false, message: `${model} not found in models.json \u2014 skipped` };
|
|
1216
1204
|
}
|
|
1217
|
-
|
|
1205
|
+
writeModelsJson(config);
|
|
1218
1206
|
const action = hasReasoning ? "set reasoning: true" : "set reasoning: false";
|
|
1219
1207
|
return { updated: true, message: `\u2705 Updated ${model}: ${action}` };
|
|
1220
1208
|
} catch (e) {
|
|
@@ -1222,7 +1210,7 @@ The JSON object must have exactly these 4 keys:
|
|
|
1222
1210
|
}
|
|
1223
1211
|
}
|
|
1224
1212
|
const branding = [
|
|
1225
|
-
` \u26A1 Pi Model Benchmark
|
|
1213
|
+
` \u26A1 Pi Model Benchmark v${EXTENSION_VERSION}`,
|
|
1226
1214
|
` Written by VTSTech`,
|
|
1227
1215
|
` GitHub: https://github.com/VTSTech`,
|
|
1228
1216
|
` Website: www.vts-tech.org`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vtstech/pi-model-test",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Model benchmark/testing extension for Pi Coding Agent",
|
|
5
5
|
"main": "model-test.js",
|
|
6
6
|
"keywords": ["pi-extensions"],
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"url": "https://github.com/VTSTech/pi-coding-agent"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
-
"@vtstech/pi-shared": "1.0
|
|
17
|
+
"@vtstech/pi-shared": "1.1.0"
|
|
18
18
|
},
|
|
19
19
|
"peerDependencies": {
|
|
20
20
|
"@mariozechner/pi-coding-agent": ">=0.66"
|