npm - @vtstech/pi-model-test - Versions diffs - 1.0.9 → 1.1.1 - Mend

@vtstech/pi-model-test 1.0.9 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -43,12 +43,13 @@ pi install "npm:@vtstech/pi-model-test"
 ## Features
 - Auto-detects Ollama vs cloud provider (OpenRouter, Anthropic, Google, OpenAI, Groq, DeepSeek, Mistral, xAI, Together, Fireworks, Cohere)
-- Automatic remote Ollama URL resolution
+- Uses native `fetch()` for all HTTP communication (no shell subprocess or curl dependency)
+- Automatic remote Ollama URL resolution (reads from `models.json` on every call — picks up config changes immediately)
 - Timeout resilience with auto-retry on empty responses
 - Rate limit delay between tests (configurable)
 - Thinking model fallback (retries with `think: true`)
 - Tool support cache (`~/.pi/agent/cache/tool_support.json`)
-- JSON repair for truncated output
+- JSON repair for truncated output (stack-based nesting-aware parser)
 - Tab-completion for model names
 ## Links

package/model-test.js CHANGED Viewed

@@ -12,59 +12,17 @@ import {
   truncate,
   sanitizeForReport
 } from "@vtstech/pi-shared/format";
-import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS, fetchModelContextLength } from "@vtstech/pi-shared/ollama";
-function detectProvider(ctx) {
-  const model = ctx.model;
-  if (!model) return { kind: "unknown", name: "none" };
-  const providerName = model.provider || "";
-  if (!providerName) return { kind: "unknown", name: "none" };
-  const modelsJson = readModelsJson();
-  const userProviderCfg = (modelsJson.providers || {})[providerName];
-  if (userProviderCfg) {
-    const baseUrl = userProviderCfg.baseUrl || "";
-    const apiMode = userProviderCfg.api || "";
-    const apiKey = userProviderCfg.apiKey || "";
-    const isOllama = /ollama/i.test(providerName) || /localhost:\d+/.test(baseUrl) || /127\.0\.0\.1:\d+/.test(baseUrl) || /0\.0\.0\.0:\d+/.test(baseUrl) || /\/api\/chat/.test(baseUrl) || apiMode === "ollama";
-    if (isOllama) {
-      return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
-    }
-    if (/\/api\/chat/.test(baseUrl)) {
-      return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
-    }
-    return {
-      kind: "builtin",
-      name: providerName,
-      apiMode: apiMode || userProviderCfg.api || "openai-completions",
-      baseUrl,
-      apiKey
-    };
-  }
-  const builtin = BUILTIN_PROVIDERS[providerName];
-  if (builtin) {
-    const apiKey = process.env[builtin.envKey] || "";
-    return {
-      kind: "builtin",
-      name: providerName,
-      apiMode: builtin.api,
-      baseUrl: builtin.baseUrl,
-      envKey: builtin.envKey,
-      apiKey
-    };
-  }
-  return { kind: "unknown", name: providerName };
-}
+import { getOllamaBaseUrl, detectModelFamily, readModelsJson, writeModelsJson, fetchModelContextLength, EXTENSION_VERSION, detectProvider } from "@vtstech/pi-shared/ollama";
 var CONFIG = {
   // General API settings
   DEFAULT_TIMEOUT_MS: 999999,
-  // 8.3 minutes - default timeout for model responses
+  // ~16.7 minutes — effectively unlimited for slow models
   CONNECT_TIMEOUT_S: 60,
-  // 30 seconds to establish connection
+  // 60 seconds to establish connection
   MAX_RETRIES: 1,
   // Single retry for transient failures
   RETRY_DELAY_MS: 1e4,
-  // 2 seconds between retries
-  EXEC_BUFFER_MS: 8e3,
-  // Extra buffer for exec timeout over curl timeout
+  // 10 seconds between retries
   // Model generation settings
   NUM_PREDICT: 1024,
   // Max tokens in response
@@ -74,31 +32,26 @@ var CONFIG = {
   MIN_THINKING_LENGTH: 10,
   // Minimum chars to consider thinking tokens valid
   TOOL_TEST_TIMEOUT_MS: 999999,
-  // 90 seconds for tool usage tests
-  TOOL_TEST_MAX_TIME_S: 999999,
-  // Max curl time for tool tests (effectively unlimited)
+  // Effectively unlimited for slow tool usage tests
   TOOL_SUPPORT_TIMEOUT_MS: 999999,
-  // 2+ minutes for tool support detection
-  TOOL_SUPPORT_MAX_TIME_S: 999999,
-  // Max curl time for tool support detection
+  // Effectively unlimited for tool support detection
   // Metadata retrieval
   TAGS_TIMEOUT_MS: 15e3,
   // 15 seconds for /api/tags
-  TAGS_CONNECT_TIMEOUT_S: 30,
-  // 10 seconds connection timeout for tags
   MODEL_INFO_TIMEOUT_MS: 3e4,
-  // 10 seconds for model info lookup
+  // 30 seconds for model info lookup
   // Provider API settings
   PROVIDER_TIMEOUT_MS: 999999,
-  // 2 minutes for cloud provider API calls
+  // Effectively unlimited for cloud provider API calls
   PROVIDER_TOOL_TIMEOUT_MS: 12e4,
-  // 60 seconds for tool usage tests on providers
+  // 120 seconds for tool usage tests on providers
   // Rate limiting
   TEST_DELAY_MS: 1e4
-  // 30 seconds between tests to avoid rate limiting
+  // 10 seconds between tests to avoid rate limiting
 };
 var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
 var TOOL_SUPPORT_CACHE_PATH = path.join(TOOL_SUPPORT_CACHE_DIR, "tool_support.json");
+var _toolSupportCacheInMemory = null;
 function readToolSupportCache() {
   try {
     if (fs.existsSync(TOOL_SUPPORT_CACHE_PATH)) {
@@ -116,69 +69,138 @@ function writeToolSupportCache(cache) {
   fs.writeFileSync(TOOL_SUPPORT_CACHE_PATH, JSON.stringify(cache, null, 2) + "\n", "utf-8");
 }
 function getCachedToolSupport(model) {
-  const cache = readToolSupportCache();
+  const cache = _toolSupportCacheInMemory || readToolSupportCache();
+  if (!_toolSupportCacheInMemory) _toolSupportCacheInMemory = cache;
   const entry = cache[model];
   if (!entry) return null;
   if (!entry.support || !["native", "react", "none"].includes(entry.support)) return null;
   return entry;
 }
 function cacheToolSupport(model, support, family) {
-  const cache = readToolSupportCache();
+  const cache = _toolSupportCacheInMemory || readToolSupportCache();
   cache[model] = {
     support,
     testedAt: (/* @__PURE__ */ new Date()).toISOString(),
     family
   };
+  _toolSupportCacheInMemory = cache;
   writeToolSupportCache(cache);
 }
 function model_test_temp_default(pi) {
-  const OLLAMA_BASE = getOllamaBaseUrl();
+  function ollamaBase() {
+    return getOllamaBaseUrl();
+  }
   async function rateLimitDelay(lines) {
     if (CONFIG.TEST_DELAY_MS > 0) {
       lines.push(info(`Waiting ${msHuman(CONFIG.TEST_DELAY_MS)} to avoid rate limiting...`));
       await new Promise((r) => setTimeout(r, CONFIG.TEST_DELAY_MS));
     }
   }
+  function scoreReasoning(msg) {
+    const allNumbers = msg.match(/\b(\d+)\b/g) || [];
+    const answer = allNumbers.length > 0 ? allNumbers[allNumbers.length - 1] : "?";
+    const isCorrect = answer === "8";
+    const reasoningPatterns = [
+      "because",
+      "therefore",
+      "since",
+      "step",
+      "subtract",
+      "minus",
+      "each day",
+      "each night",
+      "slides",
+      "climbs",
+      "night",
+      "reaches",
+      "finally",
+      "last day"
+    ];
+    const hasReasoningWords = reasoningPatterns.some((w) => msg.toLowerCase().includes(w));
+    const hasNumberedSteps = /^\s*\d+\.\s/m.test(msg);
+    const hasReasoning = hasReasoningWords || hasNumberedSteps;
+    if (isCorrect && hasReasoning) return { score: "STRONG", pass: true };
+    if (isCorrect) return { score: "MODERATE", pass: true };
+    if (hasReasoning) return { score: "WEAK", pass: false };
+    return { score: "FAIL", pass: false };
+  }
+  function scoreNativeToolCall(fnName, args) {
+    const hasCorrectTool = fnName === "get_weather";
+    const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
+    const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
+    if (hasCorrectTool && hasLocation && unitValid) return { score: "STRONG", pass: true };
+    if (hasCorrectTool && hasLocation) return { score: "MODERATE", pass: true };
+    return { score: "WEAK", pass: false };
+  }
+  function scoreTextToolCall(fnName, args) {
+    const isWeatherTool = fnName === "get_weather";
+    const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
+    if (isWeatherTool && hasLocation) return { score: "STRONG", pass: true };
+    if (isWeatherTool) return { score: "MODERATE", pass: true };
+    return { score: "WEAK", pass: false };
+  }
+  function parseTextToolCall(content) {
+    const firstBrace = content.indexOf("{");
+    if (firstBrace === -1) return null;
+    const lastBrace = content.lastIndexOf("}");
+    if (lastBrace <= firstBrace) return null;
+    const jsonCandidate = content.slice(firstBrace, lastBrace + 1);
+    let textToolParsed = null;
+    try {
+      textToolParsed = JSON.parse(jsonCandidate);
+    } catch {
+      return null;
+    }
+    if (!textToolParsed || typeof textToolParsed.name !== "string") return null;
+    const rawArgs = textToolParsed.arguments || { ...textToolParsed };
+    const { name: _, ...fnArgs } = rawArgs;
+    return { fnName: textToolParsed.name, args: fnArgs };
+  }
   async function ollamaChat(model, messages, options = {}, timeoutMs = CONFIG.DEFAULT_TIMEOUT_MS, retries = CONFIG.MAX_RETRIES) {
     const body = { model, messages, stream: false, options: { num_predict: CONFIG.NUM_PREDICT, temperature: CONFIG.TEMPERATURE, ...options } };
+    const url = `${ollamaBase()}/api/chat`;
     for (let attempt = 0; attempt <= retries; attempt++) {
       const start = Date.now();
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
       try {
-        const result = await pi.exec("curl", [
-          "-s",
-          "--fail-with-body",
-          "-X",
-          "POST",
-          "--connect-timeout",
-          String(CONFIG.CONNECT_TIMEOUT_S),
-          "--max-time",
-          String(Math.ceil(timeoutMs / 1e3)),
-          `${OLLAMA_BASE}/api/chat`,
-          "-H",
-          "Content-Type: application/json",
-          "-d",
-          JSON.stringify(body)
-        ], { timeout: timeoutMs + CONFIG.EXEC_BUFFER_MS });
+        const res = await fetch(url, {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify(body),
+          signal: controller.signal
+        });
         const elapsedMs = Date.now() - start;
-        if (result.code !== 0) {
-          const detail = result.stderr?.trim() || result.stdout?.trim() || "unknown error";
-          throw new Error(`curl exited ${result.code}: ${detail}`);
+        if (!res.ok) {
+          const errorText = await res.text().catch(() => "unknown error");
+          throw new Error(`Ollama API returned ${res.status}: ${truncate(errorText, 200)}`);
         }
-        if (!result.stdout.trim()) {
+        const text = await res.text();
+        if (!text.trim()) {
           if (attempt < retries) {
             await new Promise((r) => setTimeout(r, CONFIG.RETRY_DELAY_MS));
             continue;
           }
           throw new Error(`Empty response from Ollama after ${attempt + 1} attempt(s)`);
         }
-        const parsed = JSON.parse(result.stdout);
+        const parsed = JSON.parse(text);
         return { response: parsed, elapsedMs };
       } catch (e) {
-        if (attempt < retries && (e.message.includes("Empty response") || e.message.includes("timed out") || e.message.includes("curl exited 22") || e.message.includes("curl exited 28") || e.message.includes("curl exited 35") || e.message.includes("curl exited 52"))) {
+        const msg = e instanceof Error ? e.message : String(e);
+        if (e instanceof Error && e.name === "AbortError") {
+          if (attempt < retries) {
+            await new Promise((r) => setTimeout(r, CONFIG.RETRY_DELAY_MS));
+            continue;
+          }
+          throw new Error(`Ollama API timed out after ${msHuman(timeoutMs)}`);
+        }
+        if (attempt < retries && (msg.includes("Empty response") || msg.includes("ECONNREFUSED") || msg.includes("ECONNRESET") || msg.includes("fetch failed"))) {
           await new Promise((r) => setTimeout(r, CONFIG.RETRY_DELAY_MS));
           continue;
         }
         throw e;
+      } finally {
+        clearTimeout(timeoutId);
       }
     }
     throw new Error("Unreachable");
@@ -247,10 +269,8 @@ function model_test_temp_default(pi) {
         { role: "user", content: "Reply with exactly: PONG" }
       ], { maxTokens: 10, timeoutMs: 3e4 });
       const elapsedMs = Date.now() - start;
-      const content = result.content.trim().toUpperCase();
       const reachable = true;
       const authValid = true;
-      const hasPong = content.includes("PONG");
       return {
         pass: reachable && authValid,
         reachable,
@@ -259,7 +279,6 @@ function model_test_temp_default(pi) {
         elapsedMs
       };
     } catch (e) {
-      const start = Date.now();
       let reachable = false;
       let authValid = false;
       const msg = e.message || "";
@@ -290,7 +309,6 @@ function model_test_temp_default(pi) {
     const prompt = `A snail climbs 3 feet up a wall each day, but slides back 2 feet each night. The wall is 10 feet tall. How many days does it take the snail to reach the top? Think step by step and give the final answer on its own line like: ANSWER: <number>`;
     try {
       let response, elapsedMs;
-      let usedThinkingFallback = false;
       try {
         const result = await ollamaChat(model, [
           { role: "user", content: prompt }
@@ -309,7 +327,6 @@ function model_test_temp_default(pi) {
           ], { think: true });
           response = retry.response;
           elapsedMs = retry.elapsedMs;
-          usedThinkingFallback = true;
         } else {
           throw firstErr;
         }
@@ -322,41 +339,7 @@ function model_test_temp_default(pi) {
       }
       const allNumbers = effectiveMsg.match(/\b(\d+)\b/g) || [];
       const answer = allNumbers.length > 0 ? allNumbers[allNumbers.length - 1] : "?";
-      const isCorrect = answer === "8";
-      const reasoningPatterns = [
-        "because",
-        "therefore",
-        "since",
-        "step",
-        "subtract",
-        "minus",
-        "each day",
-        "each night",
-        "slides",
-        "climbs",
-        "night",
-        "reaches",
-        "finally",
-        "last day"
-      ];
-      const hasReasoningWords = reasoningPatterns.some((w) => effectiveMsg.toLowerCase().includes(w));
-      const hasNumberedSteps = /^\s*\d+\.\s/m.test(effectiveMsg);
-      const hasReasoning = hasReasoningWords || hasNumberedSteps;
-      let score;
-      let pass;
-      if (isCorrect && hasReasoning) {
-        score = "STRONG";
-        pass = true;
-      } else if (isCorrect) {
-        score = "MODERATE";
-        pass = true;
-      } else if (hasReasoning) {
-        score = "WEAK";
-        pass = false;
-      } else {
-        score = "FAIL";
-        pass = false;
-      }
+      const { score, pass } = scoreReasoning(effectiveMsg);
       const displayMsg = msg.trim().length > 0 ? effectiveMsg : `[thinking tokens] ${effectiveMsg}`;
       return { pass, score, reasoning: displayMsg, answer, elapsedMs };
     } catch (e) {
@@ -375,41 +358,7 @@ function model_test_temp_default(pi) {
       }
       const allNumbers = msg.match(/\b(\d+)\b/g) || [];
       const answer = allNumbers.length > 0 ? allNumbers[allNumbers.length - 1] : "?";
-      const isCorrect = answer === "8";
-      const reasoningPatterns = [
-        "because",
-        "therefore",
-        "since",
-        "step",
-        "subtract",
-        "minus",
-        "each day",
-        "each night",
-        "slides",
-        "climbs",
-        "night",
-        "reaches",
-        "finally",
-        "last day"
-      ];
-      const hasReasoningWords = reasoningPatterns.some((w) => msg.toLowerCase().includes(w));
-      const hasNumberedSteps = /^\s*\d+\.\s/m.test(msg);
-      const hasReasoning = hasReasoningWords || hasNumberedSteps;
-      let score;
-      let pass;
-      if (isCorrect && hasReasoning) {
-        score = "STRONG";
-        pass = true;
-      } else if (isCorrect) {
-        score = "MODERATE";
-        pass = true;
-      } else if (hasReasoning) {
-        score = "WEAK";
-        pass = false;
-      } else {
-        score = "FAIL";
-        pass = false;
-      }
+      const { score, pass } = scoreReasoning(msg);
       return { pass, score, reasoning: msg, answer, elapsedMs: result.elapsedMs };
     } catch (e) {
       return { pass: false, score: "ERROR", reasoning: e.message, answer: "?", elapsedMs: 0 };
@@ -465,29 +414,24 @@ function model_test_temp_default(pi) {
       options: { num_predict: CONFIG.NUM_PREDICT, temperature: CONFIG.TEMPERATURE }
     };
     try {
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), CONFIG.TOOL_TEST_TIMEOUT_MS);
       const start = Date.now();
-      const result = await pi.exec("curl", [
-        "-s",
-        "--fail-with-body",
-        "-X",
-        "POST",
-        "--connect-timeout",
-        String(CONFIG.CONNECT_TIMEOUT_S),
-        "--max-time",
-        String(CONFIG.TOOL_TEST_MAX_TIME_S),
-        `${OLLAMA_BASE}/api/chat`,
-        "-H",
-        "Content-Type: application/json",
-        "-d",
-        JSON.stringify(body)
-      ], { timeout: CONFIG.TOOL_TEST_TIMEOUT_MS });
+      const res = await fetch(`${ollamaBase()}/api/chat`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(body),
+        signal: controller.signal
+      });
       const elapsedMs = Date.now() - start;
-      if (result.code !== 0) {
-        const detail = result.stderr?.trim() || result.stdout?.trim() || "unknown error";
-        return { pass: false, score: "ERROR", hasToolCalls: false, toolCall: `curl error: ${result.code}: ${detail}`, response: "", elapsedMs };
+      clearTimeout(timeoutId);
+      if (!res.ok) {
+        const errorText = await res.text().catch(() => "unknown error");
+        return { pass: false, score: "ERROR", hasToolCalls: false, toolCall: `fetch error: ${res.status}`, response: "", elapsedMs };
       }
-      if (!result.stdout.trim()) throw new Error("Empty response from Ollama");
-      const parsed = JSON.parse(result.stdout);
+      const text = await res.text();
+      if (!text.trim()) throw new Error("Empty response from Ollama");
+      const parsed = JSON.parse(text);
       const toolCalls = parsed?.message?.tool_calls;
       const content = parsed?.message?.content || "";
       if (toolCalls && toolCalls.length > 0) {
@@ -506,20 +450,7 @@ function model_test_temp_default(pi) {
             elapsedMs
           };
         }
-        const hasCorrectTool = fn.name === "get_weather";
-        const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
-        const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
-        let score;
-        if (hasCorrectTool && hasLocation && unitValid) {
-          score = "STRONG";
-        } else if (hasCorrectTool && hasLocation) {
-          score = "MODERATE";
-        } else if (hasCorrectTool) {
-          score = "WEAK";
-        } else {
-          score = "WEAK";
-        }
-        const pass = score !== "WEAK";
+        const { score, pass } = scoreNativeToolCall(fn.name || "", args);
         return {
           pass,
           score,
@@ -529,38 +460,14 @@ function model_test_temp_default(pi) {
           elapsedMs
         };
       }
-      const firstBrace = content.indexOf("{");
-      let textToolParsed = null;
-      if (firstBrace !== -1) {
-        const lastBrace = content.lastIndexOf("}");
-        if (lastBrace > firstBrace) {
-          const jsonCandidate = content.slice(firstBrace, lastBrace + 1);
-          try {
-            textToolParsed = JSON.parse(jsonCandidate);
-          } catch {
-          }
-        }
-      }
-      if (textToolParsed && typeof textToolParsed.name === "string") {
-        const fnName = textToolParsed.name;
-        const rawArgs = textToolParsed.arguments || { ...textToolParsed };
-        const { name: _, ...fnArgs } = rawArgs;
-        const isWeatherTool = fnName === "get_weather";
-        const hasLocation = typeof fnArgs.location === "string" && fnArgs.location.toLowerCase().includes("paris");
-        let score;
-        if (isWeatherTool && hasLocation) {
-          score = "STRONG";
-        } else if (isWeatherTool) {
-          score = "MODERATE";
-        } else {
-          score = "WEAK";
-        }
-        const pass = score !== "WEAK";
+      const textParsed = parseTextToolCall(content);
+      if (textParsed) {
+        const { score, pass } = scoreTextToolCall(textParsed.fnName, textParsed.args);
         return {
           pass,
           score,
           hasToolCalls: true,
-          toolCall: `${fnName}(${JSON.stringify(fnArgs)})`,
+          toolCall: `${textParsed.fnName}(${JSON.stringify(textParsed.args)})`,
           response: content,
           elapsedMs
         };
@@ -622,20 +529,7 @@ function model_test_temp_default(pi) {
             elapsedMs: result.elapsedMs
           };
         }
-        const hasCorrectTool = fn.name === "get_weather";
-        const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
-        const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
-        let score;
-        if (hasCorrectTool && hasLocation && unitValid) {
-          score = "STRONG";
-        } else if (hasCorrectTool && hasLocation) {
-          score = "MODERATE";
-        } else if (hasCorrectTool) {
-          score = "WEAK";
-        } else {
-          score = "WEAK";
-        }
-        const pass = score !== "WEAK";
+        const { score, pass } = scoreNativeToolCall(fn.name || "", args);
         return {
           pass,
           score,
@@ -645,38 +539,14 @@ function model_test_temp_default(pi) {
           elapsedMs: result.elapsedMs
         };
       }
-      const firstBrace = content.indexOf("{");
-      let textToolParsed = null;
-      if (firstBrace !== -1) {
-        const lastBrace = content.lastIndexOf("}");
-        if (lastBrace > firstBrace) {
-          const jsonCandidate = content.slice(firstBrace, lastBrace + 1);
-          try {
-            textToolParsed = JSON.parse(jsonCandidate);
-          } catch {
-          }
-        }
-      }
-      if (textToolParsed && typeof textToolParsed.name === "string") {
-        const fnName = textToolParsed.name;
-        const rawArgs = textToolParsed.arguments || { ...textToolParsed };
-        const { name: _, ...fnArgs } = rawArgs;
-        const isWeatherTool = fnName === "get_weather";
-        const hasLocation = typeof fnArgs.location === "string" && fnArgs.location.toLowerCase().includes("paris");
-        let score;
-        if (isWeatherTool && hasLocation) {
-          score = "STRONG";
-        } else if (isWeatherTool) {
-          score = "MODERATE";
-        } else {
-          score = "WEAK";
-        }
-        const pass = score !== "WEAK";
+      const textParsed = parseTextToolCall(content);
+      if (textParsed) {
+        const { score, pass } = scoreTextToolCall(textParsed.fnName, textParsed.args);
         return {
           pass,
           score,
           hasToolCalls: true,
-          toolCall: `${fnName}(${JSON.stringify(fnArgs)})`,
+          toolCall: `${textParsed.fnName}(${JSON.stringify(textParsed.args)})`,
           response: content,
           elapsedMs: result.elapsedMs
         };
@@ -713,29 +583,24 @@ function model_test_temp_default(pi) {
       options: { num_predict: CONFIG.NUM_PREDICT, temperature: CONFIG.TEMPERATURE }
     };
     try {
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), CONFIG.TOOL_TEST_TIMEOUT_MS);
       const start = Date.now();
-      const result = await pi.exec("curl", [
-        "-s",
-        "--fail-with-body",
-        "-X",
-        "POST",
-        "--connect-timeout",
-        String(CONFIG.CONNECT_TIMEOUT_S),
-        "--max-time",
-        String(CONFIG.TOOL_TEST_MAX_TIME_S),
-        `${OLLAMA_BASE}/api/chat`,
-        "-H",
-        "Content-Type: application/json",
-        "-d",
-        JSON.stringify(body)
-      ], { timeout: CONFIG.TOOL_TEST_TIMEOUT_MS });
+      const res = await fetch(`${ollamaBase()}/api/chat`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(body),
+        signal: controller.signal
+      });
       const elapsedMs = Date.now() - start;
-      if (result.code !== 0) {
-        const detail = result.stderr?.trim() || result.stdout?.trim() || "unknown error";
-        return { pass: false, score: "ERROR", toolCall: `curl error: ${result.code}: ${detail}`, thought: "", response: "", elapsedMs };
+      clearTimeout(timeoutId);
+      if (!res.ok) {
+        const errorText = await res.text().catch(() => "unknown error");
+        return { pass: false, score: "ERROR", toolCall: `fetch error: ${res.status}`, thought: "", response: "", elapsedMs };
       }
-      if (!result.stdout.trim()) throw new Error("Empty response from Ollama");
-      const parsed = JSON.parse(result.stdout);
+      const text = await res.text();
+      if (!text.trim()) throw new Error("Empty response from Ollama");
+      const parsed = JSON.parse(text);
       const content = (parsed?.message?.content || "").trim();
       if (!content) {
         return { pass: false, score: "FAIL", toolCall: "empty response", thought: "", response: "", elapsedMs };
@@ -744,20 +609,20 @@ function model_test_temp_default(pi) {
       const sharedParser = pi._reactParser;
       if (sharedParser?.ALL_DIALECT_PATTERNS) {
         for (const dp of sharedParser.ALL_DIALECT_PATTERNS) {
-          const result2 = sharedParser.parseReactWithPatterns(content, dp, true);
-          if (result2) {
-            let toolName = result2.name;
+          const result = sharedParser.parseReactWithPatterns(content, dp, true);
+          if (result) {
+            let toolName = result.name;
             let argsStr;
-            const rawArgs = result2.args ? JSON.stringify(result2.args) : "";
+            const rawArgs = result.args ? JSON.stringify(result.args) : "";
             if (rawArgs && rawArgs !== "{}") {
               argsStr = rawArgs;
-            } else if (result2.raw) {
-              const jsonStart = result2.raw.indexOf("{");
+            } else if (result.raw) {
+              const jsonStart = result.raw.indexOf("{");
               if (jsonStart !== -1) {
                 let depth = 0, jsonEnd = -1;
-                for (let i = jsonStart; i < result2.raw.length; i++) {
-                  if (result2.raw[i] === "{") depth++;
-                  else if (result2.raw[i] === "}") {
+                for (let i = jsonStart; i < result.raw.length; i++) {
+                  if (result.raw[i] === "{") depth++;
+                  else if (result.raw[i] === "}") {
                     depth--;
                     if (depth === 0) {
                       jsonEnd = i;
@@ -765,14 +630,14 @@ function model_test_temp_default(pi) {
                     }
                   }
                 }
-                argsStr = jsonEnd !== -1 ? result2.raw.slice(jsonStart, jsonEnd + 1) : "";
+                argsStr = jsonEnd !== -1 ? result.raw.slice(jsonStart, jsonEnd + 1) : "";
               } else {
                 argsStr = "";
               }
             } else {
               argsStr = "";
             }
-            parsedResult = { name: toolName, args: argsStr, thought: result2.thought || "", dialect: result2.dialect };
+            parsedResult = { name: toolName, args: argsStr, thought: result.thought || "", dialect: result.dialect };
             break;
           }
         }
@@ -917,12 +782,30 @@ The JSON object must have exactly these 4 keys:
         parsed = JSON.parse(cleaned);
       } catch {
         const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
-        const openBraces = (cleaned.match(/\{/g) || []).length;
-        const closeBraces = (cleaned.match(/\}/g) || []).length;
-        const openBrackets = (cleaned.match(/\[/g) || []).length;
-        const closeBrackets = (cleaned.match(/\]/g) || []).length;
-        if (openBraces > closeBraces || openBrackets > closeBrackets) {
-          const repaired = cleaned + "}".repeat(Math.max(0, openBraces - closeBraces)) + "]".repeat(Math.max(0, openBrackets - closeBrackets));
+        let braceDepth = 0, bracketDepth = 0;
+        let inString = false, escapeNext = false;
+        for (let i = 0; i < cleaned.length; i++) {
+          const c = cleaned[i];
+          if (escapeNext) {
+            escapeNext = false;
+            continue;
+          }
+          if (c === "\\") {
+            if (inString) escapeNext = true;
+            continue;
+          }
+          if (c === '"') {
+            inString = !inString;
+            continue;
+          }
+          if (inString) continue;
+          if (c === "{") braceDepth++;
+          else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
+          else if (c === "[") bracketDepth++;
+          else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
+        }
+        if (braceDepth > 0 || bracketDepth > 0) {
+          const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
           try {
             parsed = JSON.parse(repaired);
             repairNote = " (repaired truncated JSON)";
@@ -976,12 +859,30 @@ The JSON object must have exactly these 4 keys:
         parsed = JSON.parse(cleaned);
       } catch {
         const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
-        const openBraces = (cleaned.match(/\{/g) || []).length;
-        const closeBraces = (cleaned.match(/\}/g) || []).length;
-        const openBrackets = (cleaned.match(/\[/g) || []).length;
-        const closeBrackets = (cleaned.match(/\]/g) || []).length;
-        if (openBraces > closeBraces || openBrackets > closeBrackets) {
-          const repaired = cleaned + "}".repeat(Math.max(0, openBraces - closeBraces)) + "]".repeat(Math.max(0, openBrackets - closeBrackets));
+        let braceDepth = 0, bracketDepth = 0;
+        let inString = false, escapeNext = false;
+        for (let i = 0; i < cleaned.length; i++) {
+          const c = cleaned[i];
+          if (escapeNext) {
+            escapeNext = false;
+            continue;
+          }
+          if (c === "\\") {
+            if (inString) escapeNext = true;
+            continue;
+          }
+          if (c === '"') {
+            inString = !inString;
+            continue;
+          }
+          if (inString) continue;
+          if (c === "{") braceDepth++;
+          else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
+          else if (c === "[") bracketDepth++;
+          else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
+        }
+        if (braceDepth > 0 || bracketDepth > 0) {
+          const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
           try {
             parsed = JSON.parse(repaired);
             repairNote = " (repaired truncated JSON)";
@@ -1057,29 +958,29 @@ The JSON object must have exactly these 4 keys:
     };
     try {
       const start = Date.now();
-      const result = await pi.exec("curl", [
-        "-s",
-        "--fail-with-body",
-        "-X",
-        "POST",
-        "--connect-timeout",
-        "30",
-        "--max-time",
-        "120",
-        `${OLLAMA_BASE}/api/chat`,
-        "-H",
-        "Content-Type: application/json",
-        "-d",
-        JSON.stringify(body)
-      ], { timeout: 13e4 });
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), 13e4);
+      const res = await fetch(`${ollamaBase()}/api/chat`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(body),
+        signal: controller.signal
+      });
       const elapsedMs = Date.now() - start;
-      if (result.code !== 0 || !result.stdout.trim()) {
-        const detail = result.stderr?.trim() || result.stdout?.trim() || "empty response";
+      clearTimeout(timeoutId);
+      if (!res.ok) {
+        const detail = await res.text().catch(() => "unknown error");
         const level2 = "none";
         cacheToolSupport(model, level2, family);
-        return { level: level2, cached: false, evidence: `API error: ${truncate(detail, 100)}`, elapsedMs };
+        return { level: level2, cached: false, evidence: `API error ${res.status}: ${truncate(detail, 100)}`, elapsedMs };
       }
-      const parsed = JSON.parse(result.stdout);
+      const text = await res.text();
+      if (!text.trim()) {
+        const level2 = "none";
+        cacheToolSupport(model, level2, family);
+        return { level: level2, cached: false, evidence: "empty response from Ollama", elapsedMs };
+      }
+      const parsed = JSON.parse(text);
       const toolCalls = parsed?.message?.tool_calls;
       const content = (parsed?.message?.content || "").trim();
       if (toolCalls && Array.isArray(toolCalls) && toolCalls.length > 0) {
@@ -1175,9 +1076,9 @@ The JSON object must have exactly these 4 keys:
   }
   async function getOllamaModels() {
     try {
-      const result = await pi.exec("curl", ["-s", "--connect-timeout", "10", `${OLLAMA_BASE}/api/tags`], { timeout: 15e3 });
-      if (result.code !== 0 || !result.stdout.trim()) return [];
-      const data = JSON.parse(result.stdout);
+      const res = await fetch(`${ollamaBase()}/api/tags`, { signal: AbortSignal.timeout(15e3) });
+      if (!res.ok) return [];
+      const data = await res.json();
       return (data.models || []).map((m) => m.name).filter(Boolean);
     } catch {
       return [];
@@ -1187,14 +1088,8 @@ The JSON object must have exactly these 4 keys:
     return ctx.model?.id;
   }
   function updateModelsJsonReasoning(model, hasReasoning) {
-    const agentDir = path.join(os.homedir(), ".pi", "agent");
-    const modelsJsonPath = path.join(agentDir, "models.json");
-    if (!fs.existsSync(modelsJsonPath)) {
-      return { updated: false, message: "models.json not found \u2014 skipped" };
-    }
     try {
-      const raw = fs.readFileSync(modelsJsonPath, "utf-8");
-      const config = JSON.parse(raw);
+      const config = readModelsJson();
       let updated = false;
       for (const provider of Object.values(config.providers || {})) {
         const models = provider.models || [];
@@ -1214,7 +1109,7 @@ The JSON object must have exactly these 4 keys:
       if (!updated) {
         return { updated: false, message: `${model} not found in models.json \u2014 skipped` };
       }
-      fs.writeFileSync(modelsJsonPath, JSON.stringify(config, null, 2) + "\n", "utf-8");
+      writeModelsJson(config);
       const action = hasReasoning ? "set reasoning: true" : "set reasoning: false";
       return { updated: true, message: `\u2705 Updated ${model}: ${action}` };
     } catch (e) {
@@ -1222,7 +1117,7 @@ The JSON object must have exactly these 4 keys:
     }
   }
   const branding = [
-    `  \u26A1 Pi Model Benchmark v1.0.9`,
+    `  \u26A1 Pi Model Benchmark v${EXTENSION_VERSION}`,
     `  Written by VTSTech`,
     `  GitHub: https://github.com/VTSTech`,
     `  Website: www.vts-tech.org`
@@ -1243,7 +1138,7 @@ The JSON object must have exactly these 4 keys:
       }
     }
     lines.push(info(`API: ${apiMode}`));
-    const nativeContext = await fetchModelContextLength(OLLAMA_BASE, model);
+    const nativeContext = await fetchModelContextLength(ollamaBase(), model);
     if (nativeContext !== void 0) {
       const ctxStr = nativeContext >= 1e3 ? `${(nativeContext / 1e3).toFixed(1)}k` : String(nativeContext);
       lines.push(info(`Context: ${ctxStr} tokens (native max)`));
@@ -1254,9 +1149,9 @@ The JSON object must have exactly these 4 keys:
     let modelQuant = "unknown";
     let modelModified = "unknown";
     try {
-      const tagsResult = await pi.exec("curl", ["-s", `${OLLAMA_BASE}/api/tags`], { timeout: 1e4 });
-      if (tagsResult.code === 0 && tagsResult.stdout.trim()) {
-        const tags = JSON.parse(tagsResult.stdout);
+      const tagsRes = await fetch(`${ollamaBase()}/api/tags`, { signal: AbortSignal.timeout(1e4) });
+      if (tagsRes.ok) {
+        const tags = await tagsRes.json();
         const entry = (tags.models || []).find((m) => m.name === model);
         if (entry) {
           const details = entry.details || {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vtstech/pi-model-test",
-  "version": "1.0.9",
+  "version": "1.1.1",
   "description": "Model benchmark/testing extension for Pi Coding Agent",
   "main": "model-test.js",
   "keywords": ["pi-extensions"],
@@ -14,7 +14,7 @@
     "url": "https://github.com/VTSTech/pi-coding-agent"
   },
   "dependencies": {
-    "@vtstech/pi-shared": "1.0.9"
+    "@vtstech/pi-shared": "1.1.1"
   },
   "peerDependencies": {
     "@mariozechner/pi-coding-agent": ">=0.66"