@vtstech/pi-model-test 1.0.9 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/model-test.js +234 -339
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -43,12 +43,13 @@ pi install "npm:@vtstech/pi-model-test"
|
|
|
43
43
|
## Features
|
|
44
44
|
|
|
45
45
|
- Auto-detects Ollama vs cloud provider (OpenRouter, Anthropic, Google, OpenAI, Groq, DeepSeek, Mistral, xAI, Together, Fireworks, Cohere)
|
|
46
|
-
-
|
|
46
|
+
- Uses native `fetch()` for all HTTP communication (no shell subprocess or curl dependency)
|
|
47
|
+
- Automatic remote Ollama URL resolution (reads from `models.json` on every call — picks up config changes immediately)
|
|
47
48
|
- Timeout resilience with auto-retry on empty responses
|
|
48
49
|
- Rate limit delay between tests (configurable)
|
|
49
50
|
- Thinking model fallback (retries with `think: true`)
|
|
50
51
|
- Tool support cache (`~/.pi/agent/cache/tool_support.json`)
|
|
51
|
-
- JSON repair for truncated output
|
|
52
|
+
- JSON repair for truncated output (stack-based nesting-aware parser)
|
|
52
53
|
- Tab-completion for model names
|
|
53
54
|
|
|
54
55
|
## Links
|
package/model-test.js
CHANGED
|
@@ -12,59 +12,17 @@ import {
|
|
|
12
12
|
truncate,
|
|
13
13
|
sanitizeForReport
|
|
14
14
|
} from "@vtstech/pi-shared/format";
|
|
15
|
-
import { getOllamaBaseUrl, detectModelFamily, readModelsJson,
|
|
16
|
-
function detectProvider(ctx) {
|
|
17
|
-
const model = ctx.model;
|
|
18
|
-
if (!model) return { kind: "unknown", name: "none" };
|
|
19
|
-
const providerName = model.provider || "";
|
|
20
|
-
if (!providerName) return { kind: "unknown", name: "none" };
|
|
21
|
-
const modelsJson = readModelsJson();
|
|
22
|
-
const userProviderCfg = (modelsJson.providers || {})[providerName];
|
|
23
|
-
if (userProviderCfg) {
|
|
24
|
-
const baseUrl = userProviderCfg.baseUrl || "";
|
|
25
|
-
const apiMode = userProviderCfg.api || "";
|
|
26
|
-
const apiKey = userProviderCfg.apiKey || "";
|
|
27
|
-
const isOllama = /ollama/i.test(providerName) || /localhost:\d+/.test(baseUrl) || /127\.0\.0\.1:\d+/.test(baseUrl) || /0\.0\.0\.0:\d+/.test(baseUrl) || /\/api\/chat/.test(baseUrl) || apiMode === "ollama";
|
|
28
|
-
if (isOllama) {
|
|
29
|
-
return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
|
|
30
|
-
}
|
|
31
|
-
if (/\/api\/chat/.test(baseUrl)) {
|
|
32
|
-
return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
|
|
33
|
-
}
|
|
34
|
-
return {
|
|
35
|
-
kind: "builtin",
|
|
36
|
-
name: providerName,
|
|
37
|
-
apiMode: apiMode || userProviderCfg.api || "openai-completions",
|
|
38
|
-
baseUrl,
|
|
39
|
-
apiKey
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
const builtin = BUILTIN_PROVIDERS[providerName];
|
|
43
|
-
if (builtin) {
|
|
44
|
-
const apiKey = process.env[builtin.envKey] || "";
|
|
45
|
-
return {
|
|
46
|
-
kind: "builtin",
|
|
47
|
-
name: providerName,
|
|
48
|
-
apiMode: builtin.api,
|
|
49
|
-
baseUrl: builtin.baseUrl,
|
|
50
|
-
envKey: builtin.envKey,
|
|
51
|
-
apiKey
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
return { kind: "unknown", name: providerName };
|
|
55
|
-
}
|
|
15
|
+
import { getOllamaBaseUrl, detectModelFamily, readModelsJson, writeModelsJson, fetchModelContextLength, EXTENSION_VERSION, detectProvider } from "@vtstech/pi-shared/ollama";
|
|
56
16
|
var CONFIG = {
|
|
57
17
|
// General API settings
|
|
58
18
|
DEFAULT_TIMEOUT_MS: 999999,
|
|
59
|
-
//
|
|
19
|
+
// ~16.7 minutes — effectively unlimited for slow models
|
|
60
20
|
CONNECT_TIMEOUT_S: 60,
|
|
61
|
-
//
|
|
21
|
+
// 60 seconds to establish connection
|
|
62
22
|
MAX_RETRIES: 1,
|
|
63
23
|
// Single retry for transient failures
|
|
64
24
|
RETRY_DELAY_MS: 1e4,
|
|
65
|
-
//
|
|
66
|
-
EXEC_BUFFER_MS: 8e3,
|
|
67
|
-
// Extra buffer for exec timeout over curl timeout
|
|
25
|
+
// 10 seconds between retries
|
|
68
26
|
// Model generation settings
|
|
69
27
|
NUM_PREDICT: 1024,
|
|
70
28
|
// Max tokens in response
|
|
@@ -74,31 +32,26 @@ var CONFIG = {
|
|
|
74
32
|
MIN_THINKING_LENGTH: 10,
|
|
75
33
|
// Minimum chars to consider thinking tokens valid
|
|
76
34
|
TOOL_TEST_TIMEOUT_MS: 999999,
|
|
77
|
-
//
|
|
78
|
-
TOOL_TEST_MAX_TIME_S: 999999,
|
|
79
|
-
// Max curl time for tool tests (effectively unlimited)
|
|
35
|
+
// Effectively unlimited for slow tool usage tests
|
|
80
36
|
TOOL_SUPPORT_TIMEOUT_MS: 999999,
|
|
81
|
-
//
|
|
82
|
-
TOOL_SUPPORT_MAX_TIME_S: 999999,
|
|
83
|
-
// Max curl time for tool support detection
|
|
37
|
+
// Effectively unlimited for tool support detection
|
|
84
38
|
// Metadata retrieval
|
|
85
39
|
TAGS_TIMEOUT_MS: 15e3,
|
|
86
40
|
// 15 seconds for /api/tags
|
|
87
|
-
TAGS_CONNECT_TIMEOUT_S: 30,
|
|
88
|
-
// 10 seconds connection timeout for tags
|
|
89
41
|
MODEL_INFO_TIMEOUT_MS: 3e4,
|
|
90
|
-
//
|
|
42
|
+
// 30 seconds for model info lookup
|
|
91
43
|
// Provider API settings
|
|
92
44
|
PROVIDER_TIMEOUT_MS: 999999,
|
|
93
|
-
//
|
|
45
|
+
// Effectively unlimited for cloud provider API calls
|
|
94
46
|
PROVIDER_TOOL_TIMEOUT_MS: 12e4,
|
|
95
|
-
//
|
|
47
|
+
// 120 seconds for tool usage tests on providers
|
|
96
48
|
// Rate limiting
|
|
97
49
|
TEST_DELAY_MS: 1e4
|
|
98
|
-
//
|
|
50
|
+
// 10 seconds between tests to avoid rate limiting
|
|
99
51
|
};
|
|
100
52
|
var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
|
|
101
53
|
var TOOL_SUPPORT_CACHE_PATH = path.join(TOOL_SUPPORT_CACHE_DIR, "tool_support.json");
|
|
54
|
+
var _toolSupportCacheInMemory = null;
|
|
102
55
|
function readToolSupportCache() {
|
|
103
56
|
try {
|
|
104
57
|
if (fs.existsSync(TOOL_SUPPORT_CACHE_PATH)) {
|
|
@@ -116,69 +69,138 @@ function writeToolSupportCache(cache) {
|
|
|
116
69
|
fs.writeFileSync(TOOL_SUPPORT_CACHE_PATH, JSON.stringify(cache, null, 2) + "\n", "utf-8");
|
|
117
70
|
}
|
|
118
71
|
function getCachedToolSupport(model) {
|
|
119
|
-
const cache = readToolSupportCache();
|
|
72
|
+
const cache = _toolSupportCacheInMemory || readToolSupportCache();
|
|
73
|
+
if (!_toolSupportCacheInMemory) _toolSupportCacheInMemory = cache;
|
|
120
74
|
const entry = cache[model];
|
|
121
75
|
if (!entry) return null;
|
|
122
76
|
if (!entry.support || !["native", "react", "none"].includes(entry.support)) return null;
|
|
123
77
|
return entry;
|
|
124
78
|
}
|
|
125
79
|
function cacheToolSupport(model, support, family) {
|
|
126
|
-
const cache = readToolSupportCache();
|
|
80
|
+
const cache = _toolSupportCacheInMemory || readToolSupportCache();
|
|
127
81
|
cache[model] = {
|
|
128
82
|
support,
|
|
129
83
|
testedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
130
84
|
family
|
|
131
85
|
};
|
|
86
|
+
_toolSupportCacheInMemory = cache;
|
|
132
87
|
writeToolSupportCache(cache);
|
|
133
88
|
}
|
|
134
89
|
function model_test_temp_default(pi) {
|
|
135
|
-
|
|
90
|
+
function ollamaBase() {
|
|
91
|
+
return getOllamaBaseUrl();
|
|
92
|
+
}
|
|
136
93
|
async function rateLimitDelay(lines) {
|
|
137
94
|
if (CONFIG.TEST_DELAY_MS > 0) {
|
|
138
95
|
lines.push(info(`Waiting ${msHuman(CONFIG.TEST_DELAY_MS)} to avoid rate limiting...`));
|
|
139
96
|
await new Promise((r) => setTimeout(r, CONFIG.TEST_DELAY_MS));
|
|
140
97
|
}
|
|
141
98
|
}
|
|
99
|
+
function scoreReasoning(msg) {
|
|
100
|
+
const allNumbers = msg.match(/\b(\d+)\b/g) || [];
|
|
101
|
+
const answer = allNumbers.length > 0 ? allNumbers[allNumbers.length - 1] : "?";
|
|
102
|
+
const isCorrect = answer === "8";
|
|
103
|
+
const reasoningPatterns = [
|
|
104
|
+
"because",
|
|
105
|
+
"therefore",
|
|
106
|
+
"since",
|
|
107
|
+
"step",
|
|
108
|
+
"subtract",
|
|
109
|
+
"minus",
|
|
110
|
+
"each day",
|
|
111
|
+
"each night",
|
|
112
|
+
"slides",
|
|
113
|
+
"climbs",
|
|
114
|
+
"night",
|
|
115
|
+
"reaches",
|
|
116
|
+
"finally",
|
|
117
|
+
"last day"
|
|
118
|
+
];
|
|
119
|
+
const hasReasoningWords = reasoningPatterns.some((w) => msg.toLowerCase().includes(w));
|
|
120
|
+
const hasNumberedSteps = /^\s*\d+\.\s/m.test(msg);
|
|
121
|
+
const hasReasoning = hasReasoningWords || hasNumberedSteps;
|
|
122
|
+
if (isCorrect && hasReasoning) return { score: "STRONG", pass: true };
|
|
123
|
+
if (isCorrect) return { score: "MODERATE", pass: true };
|
|
124
|
+
if (hasReasoning) return { score: "WEAK", pass: false };
|
|
125
|
+
return { score: "FAIL", pass: false };
|
|
126
|
+
}
|
|
127
|
+
function scoreNativeToolCall(fnName, args) {
|
|
128
|
+
const hasCorrectTool = fnName === "get_weather";
|
|
129
|
+
const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
|
|
130
|
+
const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
|
|
131
|
+
if (hasCorrectTool && hasLocation && unitValid) return { score: "STRONG", pass: true };
|
|
132
|
+
if (hasCorrectTool && hasLocation) return { score: "MODERATE", pass: true };
|
|
133
|
+
return { score: "WEAK", pass: false };
|
|
134
|
+
}
|
|
135
|
+
function scoreTextToolCall(fnName, args) {
|
|
136
|
+
const isWeatherTool = fnName === "get_weather";
|
|
137
|
+
const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
|
|
138
|
+
if (isWeatherTool && hasLocation) return { score: "STRONG", pass: true };
|
|
139
|
+
if (isWeatherTool) return { score: "MODERATE", pass: true };
|
|
140
|
+
return { score: "WEAK", pass: false };
|
|
141
|
+
}
|
|
142
|
+
function parseTextToolCall(content) {
|
|
143
|
+
const firstBrace = content.indexOf("{");
|
|
144
|
+
if (firstBrace === -1) return null;
|
|
145
|
+
const lastBrace = content.lastIndexOf("}");
|
|
146
|
+
if (lastBrace <= firstBrace) return null;
|
|
147
|
+
const jsonCandidate = content.slice(firstBrace, lastBrace + 1);
|
|
148
|
+
let textToolParsed = null;
|
|
149
|
+
try {
|
|
150
|
+
textToolParsed = JSON.parse(jsonCandidate);
|
|
151
|
+
} catch {
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
if (!textToolParsed || typeof textToolParsed.name !== "string") return null;
|
|
155
|
+
const rawArgs = textToolParsed.arguments || { ...textToolParsed };
|
|
156
|
+
const { name: _, ...fnArgs } = rawArgs;
|
|
157
|
+
return { fnName: textToolParsed.name, args: fnArgs };
|
|
158
|
+
}
|
|
142
159
|
async function ollamaChat(model, messages, options = {}, timeoutMs = CONFIG.DEFAULT_TIMEOUT_MS, retries = CONFIG.MAX_RETRIES) {
|
|
143
160
|
const body = { model, messages, stream: false, options: { num_predict: CONFIG.NUM_PREDICT, temperature: CONFIG.TEMPERATURE, ...options } };
|
|
161
|
+
const url = `${ollamaBase()}/api/chat`;
|
|
144
162
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
145
163
|
const start = Date.now();
|
|
164
|
+
const controller = new AbortController();
|
|
165
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
146
166
|
try {
|
|
147
|
-
const
|
|
148
|
-
"
|
|
149
|
-
"
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
String(CONFIG.CONNECT_TIMEOUT_S),
|
|
154
|
-
"--max-time",
|
|
155
|
-
String(Math.ceil(timeoutMs / 1e3)),
|
|
156
|
-
`${OLLAMA_BASE}/api/chat`,
|
|
157
|
-
"-H",
|
|
158
|
-
"Content-Type: application/json",
|
|
159
|
-
"-d",
|
|
160
|
-
JSON.stringify(body)
|
|
161
|
-
], { timeout: timeoutMs + CONFIG.EXEC_BUFFER_MS });
|
|
167
|
+
const res = await fetch(url, {
|
|
168
|
+
method: "POST",
|
|
169
|
+
headers: { "Content-Type": "application/json" },
|
|
170
|
+
body: JSON.stringify(body),
|
|
171
|
+
signal: controller.signal
|
|
172
|
+
});
|
|
162
173
|
const elapsedMs = Date.now() - start;
|
|
163
|
-
if (
|
|
164
|
-
const
|
|
165
|
-
throw new Error(`
|
|
174
|
+
if (!res.ok) {
|
|
175
|
+
const errorText = await res.text().catch(() => "unknown error");
|
|
176
|
+
throw new Error(`Ollama API returned ${res.status}: ${truncate(errorText, 200)}`);
|
|
166
177
|
}
|
|
167
|
-
|
|
178
|
+
const text = await res.text();
|
|
179
|
+
if (!text.trim()) {
|
|
168
180
|
if (attempt < retries) {
|
|
169
181
|
await new Promise((r) => setTimeout(r, CONFIG.RETRY_DELAY_MS));
|
|
170
182
|
continue;
|
|
171
183
|
}
|
|
172
184
|
throw new Error(`Empty response from Ollama after ${attempt + 1} attempt(s)`);
|
|
173
185
|
}
|
|
174
|
-
const parsed = JSON.parse(
|
|
186
|
+
const parsed = JSON.parse(text);
|
|
175
187
|
return { response: parsed, elapsedMs };
|
|
176
188
|
} catch (e) {
|
|
177
|
-
|
|
189
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
190
|
+
if (e instanceof Error && e.name === "AbortError") {
|
|
191
|
+
if (attempt < retries) {
|
|
192
|
+
await new Promise((r) => setTimeout(r, CONFIG.RETRY_DELAY_MS));
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
throw new Error(`Ollama API timed out after ${msHuman(timeoutMs)}`);
|
|
196
|
+
}
|
|
197
|
+
if (attempt < retries && (msg.includes("Empty response") || msg.includes("ECONNREFUSED") || msg.includes("ECONNRESET") || msg.includes("fetch failed"))) {
|
|
178
198
|
await new Promise((r) => setTimeout(r, CONFIG.RETRY_DELAY_MS));
|
|
179
199
|
continue;
|
|
180
200
|
}
|
|
181
201
|
throw e;
|
|
202
|
+
} finally {
|
|
203
|
+
clearTimeout(timeoutId);
|
|
182
204
|
}
|
|
183
205
|
}
|
|
184
206
|
throw new Error("Unreachable");
|
|
@@ -247,10 +269,8 @@ function model_test_temp_default(pi) {
|
|
|
247
269
|
{ role: "user", content: "Reply with exactly: PONG" }
|
|
248
270
|
], { maxTokens: 10, timeoutMs: 3e4 });
|
|
249
271
|
const elapsedMs = Date.now() - start;
|
|
250
|
-
const content = result.content.trim().toUpperCase();
|
|
251
272
|
const reachable = true;
|
|
252
273
|
const authValid = true;
|
|
253
|
-
const hasPong = content.includes("PONG");
|
|
254
274
|
return {
|
|
255
275
|
pass: reachable && authValid,
|
|
256
276
|
reachable,
|
|
@@ -259,7 +279,6 @@ function model_test_temp_default(pi) {
|
|
|
259
279
|
elapsedMs
|
|
260
280
|
};
|
|
261
281
|
} catch (e) {
|
|
262
|
-
const start = Date.now();
|
|
263
282
|
let reachable = false;
|
|
264
283
|
let authValid = false;
|
|
265
284
|
const msg = e.message || "";
|
|
@@ -290,7 +309,6 @@ function model_test_temp_default(pi) {
|
|
|
290
309
|
const prompt = `A snail climbs 3 feet up a wall each day, but slides back 2 feet each night. The wall is 10 feet tall. How many days does it take the snail to reach the top? Think step by step and give the final answer on its own line like: ANSWER: <number>`;
|
|
291
310
|
try {
|
|
292
311
|
let response, elapsedMs;
|
|
293
|
-
let usedThinkingFallback = false;
|
|
294
312
|
try {
|
|
295
313
|
const result = await ollamaChat(model, [
|
|
296
314
|
{ role: "user", content: prompt }
|
|
@@ -309,7 +327,6 @@ function model_test_temp_default(pi) {
|
|
|
309
327
|
], { think: true });
|
|
310
328
|
response = retry.response;
|
|
311
329
|
elapsedMs = retry.elapsedMs;
|
|
312
|
-
usedThinkingFallback = true;
|
|
313
330
|
} else {
|
|
314
331
|
throw firstErr;
|
|
315
332
|
}
|
|
@@ -322,41 +339,7 @@ function model_test_temp_default(pi) {
|
|
|
322
339
|
}
|
|
323
340
|
const allNumbers = effectiveMsg.match(/\b(\d+)\b/g) || [];
|
|
324
341
|
const answer = allNumbers.length > 0 ? allNumbers[allNumbers.length - 1] : "?";
|
|
325
|
-
const
|
|
326
|
-
const reasoningPatterns = [
|
|
327
|
-
"because",
|
|
328
|
-
"therefore",
|
|
329
|
-
"since",
|
|
330
|
-
"step",
|
|
331
|
-
"subtract",
|
|
332
|
-
"minus",
|
|
333
|
-
"each day",
|
|
334
|
-
"each night",
|
|
335
|
-
"slides",
|
|
336
|
-
"climbs",
|
|
337
|
-
"night",
|
|
338
|
-
"reaches",
|
|
339
|
-
"finally",
|
|
340
|
-
"last day"
|
|
341
|
-
];
|
|
342
|
-
const hasReasoningWords = reasoningPatterns.some((w) => effectiveMsg.toLowerCase().includes(w));
|
|
343
|
-
const hasNumberedSteps = /^\s*\d+\.\s/m.test(effectiveMsg);
|
|
344
|
-
const hasReasoning = hasReasoningWords || hasNumberedSteps;
|
|
345
|
-
let score;
|
|
346
|
-
let pass;
|
|
347
|
-
if (isCorrect && hasReasoning) {
|
|
348
|
-
score = "STRONG";
|
|
349
|
-
pass = true;
|
|
350
|
-
} else if (isCorrect) {
|
|
351
|
-
score = "MODERATE";
|
|
352
|
-
pass = true;
|
|
353
|
-
} else if (hasReasoning) {
|
|
354
|
-
score = "WEAK";
|
|
355
|
-
pass = false;
|
|
356
|
-
} else {
|
|
357
|
-
score = "FAIL";
|
|
358
|
-
pass = false;
|
|
359
|
-
}
|
|
342
|
+
const { score, pass } = scoreReasoning(effectiveMsg);
|
|
360
343
|
const displayMsg = msg.trim().length > 0 ? effectiveMsg : `[thinking tokens] ${effectiveMsg}`;
|
|
361
344
|
return { pass, score, reasoning: displayMsg, answer, elapsedMs };
|
|
362
345
|
} catch (e) {
|
|
@@ -375,41 +358,7 @@ function model_test_temp_default(pi) {
|
|
|
375
358
|
}
|
|
376
359
|
const allNumbers = msg.match(/\b(\d+)\b/g) || [];
|
|
377
360
|
const answer = allNumbers.length > 0 ? allNumbers[allNumbers.length - 1] : "?";
|
|
378
|
-
const
|
|
379
|
-
const reasoningPatterns = [
|
|
380
|
-
"because",
|
|
381
|
-
"therefore",
|
|
382
|
-
"since",
|
|
383
|
-
"step",
|
|
384
|
-
"subtract",
|
|
385
|
-
"minus",
|
|
386
|
-
"each day",
|
|
387
|
-
"each night",
|
|
388
|
-
"slides",
|
|
389
|
-
"climbs",
|
|
390
|
-
"night",
|
|
391
|
-
"reaches",
|
|
392
|
-
"finally",
|
|
393
|
-
"last day"
|
|
394
|
-
];
|
|
395
|
-
const hasReasoningWords = reasoningPatterns.some((w) => msg.toLowerCase().includes(w));
|
|
396
|
-
const hasNumberedSteps = /^\s*\d+\.\s/m.test(msg);
|
|
397
|
-
const hasReasoning = hasReasoningWords || hasNumberedSteps;
|
|
398
|
-
let score;
|
|
399
|
-
let pass;
|
|
400
|
-
if (isCorrect && hasReasoning) {
|
|
401
|
-
score = "STRONG";
|
|
402
|
-
pass = true;
|
|
403
|
-
} else if (isCorrect) {
|
|
404
|
-
score = "MODERATE";
|
|
405
|
-
pass = true;
|
|
406
|
-
} else if (hasReasoning) {
|
|
407
|
-
score = "WEAK";
|
|
408
|
-
pass = false;
|
|
409
|
-
} else {
|
|
410
|
-
score = "FAIL";
|
|
411
|
-
pass = false;
|
|
412
|
-
}
|
|
361
|
+
const { score, pass } = scoreReasoning(msg);
|
|
413
362
|
return { pass, score, reasoning: msg, answer, elapsedMs: result.elapsedMs };
|
|
414
363
|
} catch (e) {
|
|
415
364
|
return { pass: false, score: "ERROR", reasoning: e.message, answer: "?", elapsedMs: 0 };
|
|
@@ -465,29 +414,24 @@ function model_test_temp_default(pi) {
|
|
|
465
414
|
options: { num_predict: CONFIG.NUM_PREDICT, temperature: CONFIG.TEMPERATURE }
|
|
466
415
|
};
|
|
467
416
|
try {
|
|
417
|
+
const controller = new AbortController();
|
|
418
|
+
const timeoutId = setTimeout(() => controller.abort(), CONFIG.TOOL_TEST_TIMEOUT_MS);
|
|
468
419
|
const start = Date.now();
|
|
469
|
-
const
|
|
470
|
-
"
|
|
471
|
-
"
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
String(CONFIG.CONNECT_TIMEOUT_S),
|
|
476
|
-
"--max-time",
|
|
477
|
-
String(CONFIG.TOOL_TEST_MAX_TIME_S),
|
|
478
|
-
`${OLLAMA_BASE}/api/chat`,
|
|
479
|
-
"-H",
|
|
480
|
-
"Content-Type: application/json",
|
|
481
|
-
"-d",
|
|
482
|
-
JSON.stringify(body)
|
|
483
|
-
], { timeout: CONFIG.TOOL_TEST_TIMEOUT_MS });
|
|
420
|
+
const res = await fetch(`${ollamaBase()}/api/chat`, {
|
|
421
|
+
method: "POST",
|
|
422
|
+
headers: { "Content-Type": "application/json" },
|
|
423
|
+
body: JSON.stringify(body),
|
|
424
|
+
signal: controller.signal
|
|
425
|
+
});
|
|
484
426
|
const elapsedMs = Date.now() - start;
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
427
|
+
clearTimeout(timeoutId);
|
|
428
|
+
if (!res.ok) {
|
|
429
|
+
const errorText = await res.text().catch(() => "unknown error");
|
|
430
|
+
return { pass: false, score: "ERROR", hasToolCalls: false, toolCall: `fetch error: ${res.status}`, response: "", elapsedMs };
|
|
488
431
|
}
|
|
489
|
-
|
|
490
|
-
|
|
432
|
+
const text = await res.text();
|
|
433
|
+
if (!text.trim()) throw new Error("Empty response from Ollama");
|
|
434
|
+
const parsed = JSON.parse(text);
|
|
491
435
|
const toolCalls = parsed?.message?.tool_calls;
|
|
492
436
|
const content = parsed?.message?.content || "";
|
|
493
437
|
if (toolCalls && toolCalls.length > 0) {
|
|
@@ -506,20 +450,7 @@ function model_test_temp_default(pi) {
|
|
|
506
450
|
elapsedMs
|
|
507
451
|
};
|
|
508
452
|
}
|
|
509
|
-
const
|
|
510
|
-
const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
|
|
511
|
-
const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
|
|
512
|
-
let score;
|
|
513
|
-
if (hasCorrectTool && hasLocation && unitValid) {
|
|
514
|
-
score = "STRONG";
|
|
515
|
-
} else if (hasCorrectTool && hasLocation) {
|
|
516
|
-
score = "MODERATE";
|
|
517
|
-
} else if (hasCorrectTool) {
|
|
518
|
-
score = "WEAK";
|
|
519
|
-
} else {
|
|
520
|
-
score = "WEAK";
|
|
521
|
-
}
|
|
522
|
-
const pass = score !== "WEAK";
|
|
453
|
+
const { score, pass } = scoreNativeToolCall(fn.name || "", args);
|
|
523
454
|
return {
|
|
524
455
|
pass,
|
|
525
456
|
score,
|
|
@@ -529,38 +460,14 @@ function model_test_temp_default(pi) {
|
|
|
529
460
|
elapsedMs
|
|
530
461
|
};
|
|
531
462
|
}
|
|
532
|
-
const
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
const lastBrace = content.lastIndexOf("}");
|
|
536
|
-
if (lastBrace > firstBrace) {
|
|
537
|
-
const jsonCandidate = content.slice(firstBrace, lastBrace + 1);
|
|
538
|
-
try {
|
|
539
|
-
textToolParsed = JSON.parse(jsonCandidate);
|
|
540
|
-
} catch {
|
|
541
|
-
}
|
|
542
|
-
}
|
|
543
|
-
}
|
|
544
|
-
if (textToolParsed && typeof textToolParsed.name === "string") {
|
|
545
|
-
const fnName = textToolParsed.name;
|
|
546
|
-
const rawArgs = textToolParsed.arguments || { ...textToolParsed };
|
|
547
|
-
const { name: _, ...fnArgs } = rawArgs;
|
|
548
|
-
const isWeatherTool = fnName === "get_weather";
|
|
549
|
-
const hasLocation = typeof fnArgs.location === "string" && fnArgs.location.toLowerCase().includes("paris");
|
|
550
|
-
let score;
|
|
551
|
-
if (isWeatherTool && hasLocation) {
|
|
552
|
-
score = "STRONG";
|
|
553
|
-
} else if (isWeatherTool) {
|
|
554
|
-
score = "MODERATE";
|
|
555
|
-
} else {
|
|
556
|
-
score = "WEAK";
|
|
557
|
-
}
|
|
558
|
-
const pass = score !== "WEAK";
|
|
463
|
+
const textParsed = parseTextToolCall(content);
|
|
464
|
+
if (textParsed) {
|
|
465
|
+
const { score, pass } = scoreTextToolCall(textParsed.fnName, textParsed.args);
|
|
559
466
|
return {
|
|
560
467
|
pass,
|
|
561
468
|
score,
|
|
562
469
|
hasToolCalls: true,
|
|
563
|
-
toolCall: `${fnName}(${JSON.stringify(
|
|
470
|
+
toolCall: `${textParsed.fnName}(${JSON.stringify(textParsed.args)})`,
|
|
564
471
|
response: content,
|
|
565
472
|
elapsedMs
|
|
566
473
|
};
|
|
@@ -622,20 +529,7 @@ function model_test_temp_default(pi) {
|
|
|
622
529
|
elapsedMs: result.elapsedMs
|
|
623
530
|
};
|
|
624
531
|
}
|
|
625
|
-
const
|
|
626
|
-
const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
|
|
627
|
-
const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
|
|
628
|
-
let score;
|
|
629
|
-
if (hasCorrectTool && hasLocation && unitValid) {
|
|
630
|
-
score = "STRONG";
|
|
631
|
-
} else if (hasCorrectTool && hasLocation) {
|
|
632
|
-
score = "MODERATE";
|
|
633
|
-
} else if (hasCorrectTool) {
|
|
634
|
-
score = "WEAK";
|
|
635
|
-
} else {
|
|
636
|
-
score = "WEAK";
|
|
637
|
-
}
|
|
638
|
-
const pass = score !== "WEAK";
|
|
532
|
+
const { score, pass } = scoreNativeToolCall(fn.name || "", args);
|
|
639
533
|
return {
|
|
640
534
|
pass,
|
|
641
535
|
score,
|
|
@@ -645,38 +539,14 @@ function model_test_temp_default(pi) {
|
|
|
645
539
|
elapsedMs: result.elapsedMs
|
|
646
540
|
};
|
|
647
541
|
}
|
|
648
|
-
const
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
const lastBrace = content.lastIndexOf("}");
|
|
652
|
-
if (lastBrace > firstBrace) {
|
|
653
|
-
const jsonCandidate = content.slice(firstBrace, lastBrace + 1);
|
|
654
|
-
try {
|
|
655
|
-
textToolParsed = JSON.parse(jsonCandidate);
|
|
656
|
-
} catch {
|
|
657
|
-
}
|
|
658
|
-
}
|
|
659
|
-
}
|
|
660
|
-
if (textToolParsed && typeof textToolParsed.name === "string") {
|
|
661
|
-
const fnName = textToolParsed.name;
|
|
662
|
-
const rawArgs = textToolParsed.arguments || { ...textToolParsed };
|
|
663
|
-
const { name: _, ...fnArgs } = rawArgs;
|
|
664
|
-
const isWeatherTool = fnName === "get_weather";
|
|
665
|
-
const hasLocation = typeof fnArgs.location === "string" && fnArgs.location.toLowerCase().includes("paris");
|
|
666
|
-
let score;
|
|
667
|
-
if (isWeatherTool && hasLocation) {
|
|
668
|
-
score = "STRONG";
|
|
669
|
-
} else if (isWeatherTool) {
|
|
670
|
-
score = "MODERATE";
|
|
671
|
-
} else {
|
|
672
|
-
score = "WEAK";
|
|
673
|
-
}
|
|
674
|
-
const pass = score !== "WEAK";
|
|
542
|
+
const textParsed = parseTextToolCall(content);
|
|
543
|
+
if (textParsed) {
|
|
544
|
+
const { score, pass } = scoreTextToolCall(textParsed.fnName, textParsed.args);
|
|
675
545
|
return {
|
|
676
546
|
pass,
|
|
677
547
|
score,
|
|
678
548
|
hasToolCalls: true,
|
|
679
|
-
toolCall: `${fnName}(${JSON.stringify(
|
|
549
|
+
toolCall: `${textParsed.fnName}(${JSON.stringify(textParsed.args)})`,
|
|
680
550
|
response: content,
|
|
681
551
|
elapsedMs: result.elapsedMs
|
|
682
552
|
};
|
|
@@ -713,29 +583,24 @@ function model_test_temp_default(pi) {
|
|
|
713
583
|
options: { num_predict: CONFIG.NUM_PREDICT, temperature: CONFIG.TEMPERATURE }
|
|
714
584
|
};
|
|
715
585
|
try {
|
|
586
|
+
const controller = new AbortController();
|
|
587
|
+
const timeoutId = setTimeout(() => controller.abort(), CONFIG.TOOL_TEST_TIMEOUT_MS);
|
|
716
588
|
const start = Date.now();
|
|
717
|
-
const
|
|
718
|
-
"
|
|
719
|
-
"
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
String(CONFIG.CONNECT_TIMEOUT_S),
|
|
724
|
-
"--max-time",
|
|
725
|
-
String(CONFIG.TOOL_TEST_MAX_TIME_S),
|
|
726
|
-
`${OLLAMA_BASE}/api/chat`,
|
|
727
|
-
"-H",
|
|
728
|
-
"Content-Type: application/json",
|
|
729
|
-
"-d",
|
|
730
|
-
JSON.stringify(body)
|
|
731
|
-
], { timeout: CONFIG.TOOL_TEST_TIMEOUT_MS });
|
|
589
|
+
const res = await fetch(`${ollamaBase()}/api/chat`, {
|
|
590
|
+
method: "POST",
|
|
591
|
+
headers: { "Content-Type": "application/json" },
|
|
592
|
+
body: JSON.stringify(body),
|
|
593
|
+
signal: controller.signal
|
|
594
|
+
});
|
|
732
595
|
const elapsedMs = Date.now() - start;
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
596
|
+
clearTimeout(timeoutId);
|
|
597
|
+
if (!res.ok) {
|
|
598
|
+
const errorText = await res.text().catch(() => "unknown error");
|
|
599
|
+
return { pass: false, score: "ERROR", toolCall: `fetch error: ${res.status}`, thought: "", response: "", elapsedMs };
|
|
736
600
|
}
|
|
737
|
-
|
|
738
|
-
|
|
601
|
+
const text = await res.text();
|
|
602
|
+
if (!text.trim()) throw new Error("Empty response from Ollama");
|
|
603
|
+
const parsed = JSON.parse(text);
|
|
739
604
|
const content = (parsed?.message?.content || "").trim();
|
|
740
605
|
if (!content) {
|
|
741
606
|
return { pass: false, score: "FAIL", toolCall: "empty response", thought: "", response: "", elapsedMs };
|
|
@@ -744,20 +609,20 @@ function model_test_temp_default(pi) {
|
|
|
744
609
|
const sharedParser = pi._reactParser;
|
|
745
610
|
if (sharedParser?.ALL_DIALECT_PATTERNS) {
|
|
746
611
|
for (const dp of sharedParser.ALL_DIALECT_PATTERNS) {
|
|
747
|
-
const
|
|
748
|
-
if (
|
|
749
|
-
let toolName =
|
|
612
|
+
const result = sharedParser.parseReactWithPatterns(content, dp, true);
|
|
613
|
+
if (result) {
|
|
614
|
+
let toolName = result.name;
|
|
750
615
|
let argsStr;
|
|
751
|
-
const rawArgs =
|
|
616
|
+
const rawArgs = result.args ? JSON.stringify(result.args) : "";
|
|
752
617
|
if (rawArgs && rawArgs !== "{}") {
|
|
753
618
|
argsStr = rawArgs;
|
|
754
|
-
} else if (
|
|
755
|
-
const jsonStart =
|
|
619
|
+
} else if (result.raw) {
|
|
620
|
+
const jsonStart = result.raw.indexOf("{");
|
|
756
621
|
if (jsonStart !== -1) {
|
|
757
622
|
let depth = 0, jsonEnd = -1;
|
|
758
|
-
for (let i = jsonStart; i <
|
|
759
|
-
if (
|
|
760
|
-
else if (
|
|
623
|
+
for (let i = jsonStart; i < result.raw.length; i++) {
|
|
624
|
+
if (result.raw[i] === "{") depth++;
|
|
625
|
+
else if (result.raw[i] === "}") {
|
|
761
626
|
depth--;
|
|
762
627
|
if (depth === 0) {
|
|
763
628
|
jsonEnd = i;
|
|
@@ -765,14 +630,14 @@ function model_test_temp_default(pi) {
|
|
|
765
630
|
}
|
|
766
631
|
}
|
|
767
632
|
}
|
|
768
|
-
argsStr = jsonEnd !== -1 ?
|
|
633
|
+
argsStr = jsonEnd !== -1 ? result.raw.slice(jsonStart, jsonEnd + 1) : "";
|
|
769
634
|
} else {
|
|
770
635
|
argsStr = "";
|
|
771
636
|
}
|
|
772
637
|
} else {
|
|
773
638
|
argsStr = "";
|
|
774
639
|
}
|
|
775
|
-
parsedResult = { name: toolName, args: argsStr, thought:
|
|
640
|
+
parsedResult = { name: toolName, args: argsStr, thought: result.thought || "", dialect: result.dialect };
|
|
776
641
|
break;
|
|
777
642
|
}
|
|
778
643
|
}
|
|
@@ -917,12 +782,30 @@ The JSON object must have exactly these 4 keys:
|
|
|
917
782
|
parsed = JSON.parse(cleaned);
|
|
918
783
|
} catch {
|
|
919
784
|
const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
785
|
+
let braceDepth = 0, bracketDepth = 0;
|
|
786
|
+
let inString = false, escapeNext = false;
|
|
787
|
+
for (let i = 0; i < cleaned.length; i++) {
|
|
788
|
+
const c = cleaned[i];
|
|
789
|
+
if (escapeNext) {
|
|
790
|
+
escapeNext = false;
|
|
791
|
+
continue;
|
|
792
|
+
}
|
|
793
|
+
if (c === "\\") {
|
|
794
|
+
if (inString) escapeNext = true;
|
|
795
|
+
continue;
|
|
796
|
+
}
|
|
797
|
+
if (c === '"') {
|
|
798
|
+
inString = !inString;
|
|
799
|
+
continue;
|
|
800
|
+
}
|
|
801
|
+
if (inString) continue;
|
|
802
|
+
if (c === "{") braceDepth++;
|
|
803
|
+
else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
|
|
804
|
+
else if (c === "[") bracketDepth++;
|
|
805
|
+
else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
|
|
806
|
+
}
|
|
807
|
+
if (braceDepth > 0 || bracketDepth > 0) {
|
|
808
|
+
const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
|
|
926
809
|
try {
|
|
927
810
|
parsed = JSON.parse(repaired);
|
|
928
811
|
repairNote = " (repaired truncated JSON)";
|
|
@@ -976,12 +859,30 @@ The JSON object must have exactly these 4 keys:
|
|
|
976
859
|
parsed = JSON.parse(cleaned);
|
|
977
860
|
} catch {
|
|
978
861
|
const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
862
|
+
let braceDepth = 0, bracketDepth = 0;
|
|
863
|
+
let inString = false, escapeNext = false;
|
|
864
|
+
for (let i = 0; i < cleaned.length; i++) {
|
|
865
|
+
const c = cleaned[i];
|
|
866
|
+
if (escapeNext) {
|
|
867
|
+
escapeNext = false;
|
|
868
|
+
continue;
|
|
869
|
+
}
|
|
870
|
+
if (c === "\\") {
|
|
871
|
+
if (inString) escapeNext = true;
|
|
872
|
+
continue;
|
|
873
|
+
}
|
|
874
|
+
if (c === '"') {
|
|
875
|
+
inString = !inString;
|
|
876
|
+
continue;
|
|
877
|
+
}
|
|
878
|
+
if (inString) continue;
|
|
879
|
+
if (c === "{") braceDepth++;
|
|
880
|
+
else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
|
|
881
|
+
else if (c === "[") bracketDepth++;
|
|
882
|
+
else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
|
|
883
|
+
}
|
|
884
|
+
if (braceDepth > 0 || bracketDepth > 0) {
|
|
885
|
+
const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
|
|
985
886
|
try {
|
|
986
887
|
parsed = JSON.parse(repaired);
|
|
987
888
|
repairNote = " (repaired truncated JSON)";
|
|
@@ -1057,29 +958,29 @@ The JSON object must have exactly these 4 keys:
|
|
|
1057
958
|
};
|
|
1058
959
|
try {
|
|
1059
960
|
const start = Date.now();
|
|
1060
|
-
const
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
"
|
|
1064
|
-
"
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
"120",
|
|
1069
|
-
`${OLLAMA_BASE}/api/chat`,
|
|
1070
|
-
"-H",
|
|
1071
|
-
"Content-Type: application/json",
|
|
1072
|
-
"-d",
|
|
1073
|
-
JSON.stringify(body)
|
|
1074
|
-
], { timeout: 13e4 });
|
|
961
|
+
const controller = new AbortController();
|
|
962
|
+
const timeoutId = setTimeout(() => controller.abort(), 13e4);
|
|
963
|
+
const res = await fetch(`${ollamaBase()}/api/chat`, {
|
|
964
|
+
method: "POST",
|
|
965
|
+
headers: { "Content-Type": "application/json" },
|
|
966
|
+
body: JSON.stringify(body),
|
|
967
|
+
signal: controller.signal
|
|
968
|
+
});
|
|
1075
969
|
const elapsedMs = Date.now() - start;
|
|
1076
|
-
|
|
1077
|
-
|
|
970
|
+
clearTimeout(timeoutId);
|
|
971
|
+
if (!res.ok) {
|
|
972
|
+
const detail = await res.text().catch(() => "unknown error");
|
|
1078
973
|
const level2 = "none";
|
|
1079
974
|
cacheToolSupport(model, level2, family);
|
|
1080
|
-
return { level: level2, cached: false, evidence: `API error: ${truncate(detail, 100)}`, elapsedMs };
|
|
975
|
+
return { level: level2, cached: false, evidence: `API error ${res.status}: ${truncate(detail, 100)}`, elapsedMs };
|
|
1081
976
|
}
|
|
1082
|
-
const
|
|
977
|
+
const text = await res.text();
|
|
978
|
+
if (!text.trim()) {
|
|
979
|
+
const level2 = "none";
|
|
980
|
+
cacheToolSupport(model, level2, family);
|
|
981
|
+
return { level: level2, cached: false, evidence: "empty response from Ollama", elapsedMs };
|
|
982
|
+
}
|
|
983
|
+
const parsed = JSON.parse(text);
|
|
1083
984
|
const toolCalls = parsed?.message?.tool_calls;
|
|
1084
985
|
const content = (parsed?.message?.content || "").trim();
|
|
1085
986
|
if (toolCalls && Array.isArray(toolCalls) && toolCalls.length > 0) {
|
|
@@ -1175,9 +1076,9 @@ The JSON object must have exactly these 4 keys:
|
|
|
1175
1076
|
}
|
|
1176
1077
|
async function getOllamaModels() {
|
|
1177
1078
|
try {
|
|
1178
|
-
const
|
|
1179
|
-
if (
|
|
1180
|
-
const data =
|
|
1079
|
+
const res = await fetch(`${ollamaBase()}/api/tags`, { signal: AbortSignal.timeout(15e3) });
|
|
1080
|
+
if (!res.ok) return [];
|
|
1081
|
+
const data = await res.json();
|
|
1181
1082
|
return (data.models || []).map((m) => m.name).filter(Boolean);
|
|
1182
1083
|
} catch {
|
|
1183
1084
|
return [];
|
|
@@ -1187,14 +1088,8 @@ The JSON object must have exactly these 4 keys:
|
|
|
1187
1088
|
return ctx.model?.id;
|
|
1188
1089
|
}
|
|
1189
1090
|
function updateModelsJsonReasoning(model, hasReasoning) {
|
|
1190
|
-
const agentDir = path.join(os.homedir(), ".pi", "agent");
|
|
1191
|
-
const modelsJsonPath = path.join(agentDir, "models.json");
|
|
1192
|
-
if (!fs.existsSync(modelsJsonPath)) {
|
|
1193
|
-
return { updated: false, message: "models.json not found \u2014 skipped" };
|
|
1194
|
-
}
|
|
1195
1091
|
try {
|
|
1196
|
-
const
|
|
1197
|
-
const config = JSON.parse(raw);
|
|
1092
|
+
const config = readModelsJson();
|
|
1198
1093
|
let updated = false;
|
|
1199
1094
|
for (const provider of Object.values(config.providers || {})) {
|
|
1200
1095
|
const models = provider.models || [];
|
|
@@ -1214,7 +1109,7 @@ The JSON object must have exactly these 4 keys:
|
|
|
1214
1109
|
if (!updated) {
|
|
1215
1110
|
return { updated: false, message: `${model} not found in models.json \u2014 skipped` };
|
|
1216
1111
|
}
|
|
1217
|
-
|
|
1112
|
+
writeModelsJson(config);
|
|
1218
1113
|
const action = hasReasoning ? "set reasoning: true" : "set reasoning: false";
|
|
1219
1114
|
return { updated: true, message: `\u2705 Updated ${model}: ${action}` };
|
|
1220
1115
|
} catch (e) {
|
|
@@ -1222,7 +1117,7 @@ The JSON object must have exactly these 4 keys:
|
|
|
1222
1117
|
}
|
|
1223
1118
|
}
|
|
1224
1119
|
const branding = [
|
|
1225
|
-
` \u26A1 Pi Model Benchmark
|
|
1120
|
+
` \u26A1 Pi Model Benchmark v${EXTENSION_VERSION}`,
|
|
1226
1121
|
` Written by VTSTech`,
|
|
1227
1122
|
` GitHub: https://github.com/VTSTech`,
|
|
1228
1123
|
` Website: www.vts-tech.org`
|
|
@@ -1243,7 +1138,7 @@ The JSON object must have exactly these 4 keys:
|
|
|
1243
1138
|
}
|
|
1244
1139
|
}
|
|
1245
1140
|
lines.push(info(`API: ${apiMode}`));
|
|
1246
|
-
const nativeContext = await fetchModelContextLength(
|
|
1141
|
+
const nativeContext = await fetchModelContextLength(ollamaBase(), model);
|
|
1247
1142
|
if (nativeContext !== void 0) {
|
|
1248
1143
|
const ctxStr = nativeContext >= 1e3 ? `${(nativeContext / 1e3).toFixed(1)}k` : String(nativeContext);
|
|
1249
1144
|
lines.push(info(`Context: ${ctxStr} tokens (native max)`));
|
|
@@ -1254,9 +1149,9 @@ The JSON object must have exactly these 4 keys:
|
|
|
1254
1149
|
let modelQuant = "unknown";
|
|
1255
1150
|
let modelModified = "unknown";
|
|
1256
1151
|
try {
|
|
1257
|
-
const
|
|
1258
|
-
if (
|
|
1259
|
-
const tags =
|
|
1152
|
+
const tagsRes = await fetch(`${ollamaBase()}/api/tags`, { signal: AbortSignal.timeout(1e4) });
|
|
1153
|
+
if (tagsRes.ok) {
|
|
1154
|
+
const tags = await tagsRes.json();
|
|
1260
1155
|
const entry = (tags.models || []).find((m) => m.name === model);
|
|
1261
1156
|
if (entry) {
|
|
1262
1157
|
const details = entry.details || {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vtstech/pi-model-test",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Model benchmark/testing extension for Pi Coding Agent",
|
|
5
5
|
"main": "model-test.js",
|
|
6
6
|
"keywords": ["pi-extensions"],
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"url": "https://github.com/VTSTech/pi-coding-agent"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
-
"@vtstech/pi-shared": "1.
|
|
17
|
+
"@vtstech/pi-shared": "1.1.1"
|
|
18
18
|
},
|
|
19
19
|
"peerDependencies": {
|
|
20
20
|
"@mariozechner/pi-coding-agent": ">=0.66"
|