@vtstech/pi-model-test 1.0.9 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/model-test.js +67 -79
  2. package/package.json +2 -2
package/model-test.js CHANGED
@@ -12,57 +12,17 @@ import {
12
12
  truncate,
13
13
  sanitizeForReport
14
14
  } from "@vtstech/pi-shared/format";
15
- import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS, fetchModelContextLength } from "@vtstech/pi-shared/ollama";
16
- function detectProvider(ctx) {
17
- const model = ctx.model;
18
- if (!model) return { kind: "unknown", name: "none" };
19
- const providerName = model.provider || "";
20
- if (!providerName) return { kind: "unknown", name: "none" };
21
- const modelsJson = readModelsJson();
22
- const userProviderCfg = (modelsJson.providers || {})[providerName];
23
- if (userProviderCfg) {
24
- const baseUrl = userProviderCfg.baseUrl || "";
25
- const apiMode = userProviderCfg.api || "";
26
- const apiKey = userProviderCfg.apiKey || "";
27
- const isOllama = /ollama/i.test(providerName) || /localhost:\d+/.test(baseUrl) || /127\.0\.0\.1:\d+/.test(baseUrl) || /0\.0\.0\.0:\d+/.test(baseUrl) || /\/api\/chat/.test(baseUrl) || apiMode === "ollama";
28
- if (isOllama) {
29
- return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
30
- }
31
- if (/\/api\/chat/.test(baseUrl)) {
32
- return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
33
- }
34
- return {
35
- kind: "builtin",
36
- name: providerName,
37
- apiMode: apiMode || userProviderCfg.api || "openai-completions",
38
- baseUrl,
39
- apiKey
40
- };
41
- }
42
- const builtin = BUILTIN_PROVIDERS[providerName];
43
- if (builtin) {
44
- const apiKey = process.env[builtin.envKey] || "";
45
- return {
46
- kind: "builtin",
47
- name: providerName,
48
- apiMode: builtin.api,
49
- baseUrl: builtin.baseUrl,
50
- envKey: builtin.envKey,
51
- apiKey
52
- };
53
- }
54
- return { kind: "unknown", name: providerName };
55
- }
15
+ import { getOllamaBaseUrl, detectModelFamily, readModelsJson, writeModelsJson, fetchModelContextLength, EXTENSION_VERSION, detectProvider } from "@vtstech/pi-shared/ollama";
56
16
  var CONFIG = {
57
17
  // General API settings
58
18
  DEFAULT_TIMEOUT_MS: 999999,
59
- // 8.3 minutes - default timeout for model responses
19
+ // ~16.7 minutes effectively unlimited for slow models
60
20
  CONNECT_TIMEOUT_S: 60,
61
- // 30 seconds to establish connection
21
+ // 60 seconds to establish connection
62
22
  MAX_RETRIES: 1,
63
23
  // Single retry for transient failures
64
24
  RETRY_DELAY_MS: 1e4,
65
- // 2 seconds between retries
25
+ // 10 seconds between retries
66
26
  EXEC_BUFFER_MS: 8e3,
67
27
  // Extra buffer for exec timeout over curl timeout
68
28
  // Model generation settings
@@ -74,31 +34,32 @@ var CONFIG = {
74
34
  MIN_THINKING_LENGTH: 10,
75
35
  // Minimum chars to consider thinking tokens valid
76
36
  TOOL_TEST_TIMEOUT_MS: 999999,
77
- // 90 seconds for tool usage tests
37
+ // Effectively unlimited for slow tool usage tests
78
38
  TOOL_TEST_MAX_TIME_S: 999999,
79
39
  // Max curl time for tool tests (effectively unlimited)
80
40
  TOOL_SUPPORT_TIMEOUT_MS: 999999,
81
- // 2+ minutes for tool support detection
41
+ // Effectively unlimited for tool support detection
82
42
  TOOL_SUPPORT_MAX_TIME_S: 999999,
83
43
  // Max curl time for tool support detection
84
44
  // Metadata retrieval
85
45
  TAGS_TIMEOUT_MS: 15e3,
86
46
  // 15 seconds for /api/tags
87
47
  TAGS_CONNECT_TIMEOUT_S: 30,
88
- // 10 seconds connection timeout for tags
48
+ // 30 seconds connection timeout for tags
89
49
  MODEL_INFO_TIMEOUT_MS: 3e4,
90
- // 10 seconds for model info lookup
50
+ // 30 seconds for model info lookup
91
51
  // Provider API settings
92
52
  PROVIDER_TIMEOUT_MS: 999999,
93
- // 2 minutes for cloud provider API calls
53
+ // Effectively unlimited for cloud provider API calls
94
54
  PROVIDER_TOOL_TIMEOUT_MS: 12e4,
95
- // 60 seconds for tool usage tests on providers
55
+ // 120 seconds for tool usage tests on providers
96
56
  // Rate limiting
97
57
  TEST_DELAY_MS: 1e4
98
- // 30 seconds between tests to avoid rate limiting
58
+ // 10 seconds between tests to avoid rate limiting
99
59
  };
100
60
  var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
101
61
  var TOOL_SUPPORT_CACHE_PATH = path.join(TOOL_SUPPORT_CACHE_DIR, "tool_support.json");
62
+ var _toolSupportCacheInMemory = null;
102
63
  function readToolSupportCache() {
103
64
  try {
104
65
  if (fs.existsSync(TOOL_SUPPORT_CACHE_PATH)) {
@@ -116,19 +77,21 @@ function writeToolSupportCache(cache) {
116
77
  fs.writeFileSync(TOOL_SUPPORT_CACHE_PATH, JSON.stringify(cache, null, 2) + "\n", "utf-8");
117
78
  }
118
79
  function getCachedToolSupport(model) {
119
- const cache = readToolSupportCache();
80
+ const cache = _toolSupportCacheInMemory || readToolSupportCache();
81
+ if (!_toolSupportCacheInMemory) _toolSupportCacheInMemory = cache;
120
82
  const entry = cache[model];
121
83
  if (!entry) return null;
122
84
  if (!entry.support || !["native", "react", "none"].includes(entry.support)) return null;
123
85
  return entry;
124
86
  }
125
87
  function cacheToolSupport(model, support, family) {
126
- const cache = readToolSupportCache();
88
+ const cache = _toolSupportCacheInMemory || readToolSupportCache();
127
89
  cache[model] = {
128
90
  support,
129
91
  testedAt: (/* @__PURE__ */ new Date()).toISOString(),
130
92
  family
131
93
  };
94
+ _toolSupportCacheInMemory = cache;
132
95
  writeToolSupportCache(cache);
133
96
  }
134
97
  function model_test_temp_default(pi) {
@@ -247,10 +210,8 @@ function model_test_temp_default(pi) {
247
210
  { role: "user", content: "Reply with exactly: PONG" }
248
211
  ], { maxTokens: 10, timeoutMs: 3e4 });
249
212
  const elapsedMs = Date.now() - start;
250
- const content = result.content.trim().toUpperCase();
251
213
  const reachable = true;
252
214
  const authValid = true;
253
- const hasPong = content.includes("PONG");
254
215
  return {
255
216
  pass: reachable && authValid,
256
217
  reachable,
@@ -259,7 +220,6 @@ function model_test_temp_default(pi) {
259
220
  elapsedMs
260
221
  };
261
222
  } catch (e) {
262
- const start = Date.now();
263
223
  let reachable = false;
264
224
  let authValid = false;
265
225
  const msg = e.message || "";
@@ -290,7 +250,6 @@ function model_test_temp_default(pi) {
290
250
  const prompt = `A snail climbs 3 feet up a wall each day, but slides back 2 feet each night. The wall is 10 feet tall. How many days does it take the snail to reach the top? Think step by step and give the final answer on its own line like: ANSWER: <number>`;
291
251
  try {
292
252
  let response, elapsedMs;
293
- let usedThinkingFallback = false;
294
253
  try {
295
254
  const result = await ollamaChat(model, [
296
255
  { role: "user", content: prompt }
@@ -309,7 +268,6 @@ function model_test_temp_default(pi) {
309
268
  ], { think: true });
310
269
  response = retry.response;
311
270
  elapsedMs = retry.elapsedMs;
312
- usedThinkingFallback = true;
313
271
  } else {
314
272
  throw firstErr;
315
273
  }
@@ -917,12 +875,30 @@ The JSON object must have exactly these 4 keys:
917
875
  parsed = JSON.parse(cleaned);
918
876
  } catch {
919
877
  const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
920
- const openBraces = (cleaned.match(/\{/g) || []).length;
921
- const closeBraces = (cleaned.match(/\}/g) || []).length;
922
- const openBrackets = (cleaned.match(/\[/g) || []).length;
923
- const closeBrackets = (cleaned.match(/\]/g) || []).length;
924
- if (openBraces > closeBraces || openBrackets > closeBrackets) {
925
- const repaired = cleaned + "}".repeat(Math.max(0, openBraces - closeBraces)) + "]".repeat(Math.max(0, openBrackets - closeBrackets));
878
+ let braceDepth = 0, bracketDepth = 0;
879
+ let inString = false, escapeNext = false;
880
+ for (let i = 0; i < cleaned.length; i++) {
881
+ const c = cleaned[i];
882
+ if (escapeNext) {
883
+ escapeNext = false;
884
+ continue;
885
+ }
886
+ if (c === "\\") {
887
+ if (inString) escapeNext = true;
888
+ continue;
889
+ }
890
+ if (c === '"') {
891
+ inString = !inString;
892
+ continue;
893
+ }
894
+ if (inString) continue;
895
+ if (c === "{") braceDepth++;
896
+ else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
897
+ else if (c === "[") bracketDepth++;
898
+ else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
899
+ }
900
+ if (braceDepth > 0 || bracketDepth > 0) {
901
+ const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
926
902
  try {
927
903
  parsed = JSON.parse(repaired);
928
904
  repairNote = " (repaired truncated JSON)";
@@ -976,12 +952,30 @@ The JSON object must have exactly these 4 keys:
976
952
  parsed = JSON.parse(cleaned);
977
953
  } catch {
978
954
  const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
979
- const openBraces = (cleaned.match(/\{/g) || []).length;
980
- const closeBraces = (cleaned.match(/\}/g) || []).length;
981
- const openBrackets = (cleaned.match(/\[/g) || []).length;
982
- const closeBrackets = (cleaned.match(/\]/g) || []).length;
983
- if (openBraces > closeBraces || openBrackets > closeBrackets) {
984
- const repaired = cleaned + "}".repeat(Math.max(0, openBraces - closeBraces)) + "]".repeat(Math.max(0, openBrackets - closeBrackets));
955
+ let braceDepth = 0, bracketDepth = 0;
956
+ let inString = false, escapeNext = false;
957
+ for (let i = 0; i < cleaned.length; i++) {
958
+ const c = cleaned[i];
959
+ if (escapeNext) {
960
+ escapeNext = false;
961
+ continue;
962
+ }
963
+ if (c === "\\") {
964
+ if (inString) escapeNext = true;
965
+ continue;
966
+ }
967
+ if (c === '"') {
968
+ inString = !inString;
969
+ continue;
970
+ }
971
+ if (inString) continue;
972
+ if (c === "{") braceDepth++;
973
+ else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
974
+ else if (c === "[") bracketDepth++;
975
+ else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
976
+ }
977
+ if (braceDepth > 0 || bracketDepth > 0) {
978
+ const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
985
979
  try {
986
980
  parsed = JSON.parse(repaired);
987
981
  repairNote = " (repaired truncated JSON)";
@@ -1187,14 +1181,8 @@ The JSON object must have exactly these 4 keys:
1187
1181
  return ctx.model?.id;
1188
1182
  }
1189
1183
  function updateModelsJsonReasoning(model, hasReasoning) {
1190
- const agentDir = path.join(os.homedir(), ".pi", "agent");
1191
- const modelsJsonPath = path.join(agentDir, "models.json");
1192
- if (!fs.existsSync(modelsJsonPath)) {
1193
- return { updated: false, message: "models.json not found \u2014 skipped" };
1194
- }
1195
1184
  try {
1196
- const raw = fs.readFileSync(modelsJsonPath, "utf-8");
1197
- const config = JSON.parse(raw);
1185
+ const config = readModelsJson();
1198
1186
  let updated = false;
1199
1187
  for (const provider of Object.values(config.providers || {})) {
1200
1188
  const models = provider.models || [];
@@ -1214,7 +1202,7 @@ The JSON object must have exactly these 4 keys:
1214
1202
  if (!updated) {
1215
1203
  return { updated: false, message: `${model} not found in models.json \u2014 skipped` };
1216
1204
  }
1217
- fs.writeFileSync(modelsJsonPath, JSON.stringify(config, null, 2) + "\n", "utf-8");
1205
+ writeModelsJson(config);
1218
1206
  const action = hasReasoning ? "set reasoning: true" : "set reasoning: false";
1219
1207
  return { updated: true, message: `\u2705 Updated ${model}: ${action}` };
1220
1208
  } catch (e) {
@@ -1222,7 +1210,7 @@ The JSON object must have exactly these 4 keys:
1222
1210
  }
1223
1211
  }
1224
1212
  const branding = [
1225
- ` \u26A1 Pi Model Benchmark v1.0.9`,
1213
+ ` \u26A1 Pi Model Benchmark v${EXTENSION_VERSION}`,
1226
1214
  ` Written by VTSTech`,
1227
1215
  ` GitHub: https://github.com/VTSTech`,
1228
1216
  ` Website: www.vts-tech.org`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vtstech/pi-model-test",
3
- "version": "1.0.9",
3
+ "version": "1.1.0",
4
4
  "description": "Model benchmark/testing extension for Pi Coding Agent",
5
5
  "main": "model-test.js",
6
6
  "keywords": ["pi-extensions"],
@@ -14,7 +14,7 @@
14
14
  "url": "https://github.com/VTSTech/pi-coding-agent"
15
15
  },
16
16
  "dependencies": {
17
- "@vtstech/pi-shared": "1.0.9"
17
+ "@vtstech/pi-shared": "1.1.0"
18
18
  },
19
19
  "peerDependencies": {
20
20
  "@mariozechner/pi-coding-agent": ">=0.66"