npm - @vtstech/pi-model-test - Versions diffs - 1.0.7 → 1.0.9 - Mend

@vtstech/pi-model-test 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/model-test.js +176 -109
package/package.json +2 -2

package/model-test.js CHANGED Viewed

@@ -12,7 +12,7 @@ import {
   truncate,
   sanitizeForReport
 } from "@vtstech/pi-shared/format";
-import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS } from "@vtstech/pi-shared/ollama";
+import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS, fetchModelContextLength } from "@vtstech/pi-shared/ollama";
 function detectProvider(ctx) {
   const model = ctx.model;
   if (!model) return { kind: "unknown", name: "none" };
@@ -55,15 +55,15 @@ function detectProvider(ctx) {
 }
 var CONFIG = {
   // General API settings
-  DEFAULT_TIMEOUT_MS: 6e5,
+  DEFAULT_TIMEOUT_MS: 999999,
   // 8.3 minutes - default timeout for model responses
-  CONNECT_TIMEOUT_S: 30,
+  CONNECT_TIMEOUT_S: 60,
   // 30 seconds to establish connection
   MAX_RETRIES: 1,
   // Single retry for transient failures
-  RETRY_DELAY_MS: 2e3,
+  RETRY_DELAY_MS: 1e4,
   // 2 seconds between retries
-  EXEC_BUFFER_MS: 5e3,
+  EXEC_BUFFER_MS: 8e3,
   // Extra buffer for exec timeout over curl timeout
   // Model generation settings
   NUM_PREDICT: 1024,
@@ -73,28 +73,28 @@ var CONFIG = {
   // Test-specific settings
   MIN_THINKING_LENGTH: 10,
   // Minimum chars to consider thinking tokens valid
-  TOOL_TEST_TIMEOUT_MS: 9e4,
+  TOOL_TEST_TIMEOUT_MS: 999999,
   // 90 seconds for tool usage tests
-  TOOL_TEST_MAX_TIME_S: 9999,
+  TOOL_TEST_MAX_TIME_S: 999999,
   // Max curl time for tool tests (effectively unlimited)
-  TOOL_SUPPORT_TIMEOUT_MS: 26e4,
+  TOOL_SUPPORT_TIMEOUT_MS: 999999,
   // 2+ minutes for tool support detection
-  TOOL_SUPPORT_MAX_TIME_S: 240,
+  TOOL_SUPPORT_MAX_TIME_S: 999999,
   // Max curl time for tool support detection
   // Metadata retrieval
   TAGS_TIMEOUT_MS: 15e3,
   // 15 seconds for /api/tags
-  TAGS_CONNECT_TIMEOUT_S: 10,
+  TAGS_CONNECT_TIMEOUT_S: 30,
   // 10 seconds connection timeout for tags
-  MODEL_INFO_TIMEOUT_MS: 1e4,
+  MODEL_INFO_TIMEOUT_MS: 3e4,
   // 10 seconds for model info lookup
   // Provider API settings
-  PROVIDER_TIMEOUT_MS: 12e4,
+  PROVIDER_TIMEOUT_MS: 999999,
   // 2 minutes for cloud provider API calls
-  PROVIDER_TOOL_TIMEOUT_MS: 6e4,
+  PROVIDER_TOOL_TIMEOUT_MS: 12e4,
   // 60 seconds for tool usage tests on providers
   // Rate limiting
-  TEST_DELAY_MS: 3e4
+  TEST_DELAY_MS: 1e4
   // 30 seconds between tests to avoid rate limiting
 };
 var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
@@ -740,90 +740,111 @@ function model_test_temp_default(pi) {
       if (!content) {
         return { pass: false, score: "FAIL", toolCall: "empty response", thought: "", response: "", elapsedMs };
       }
-      const THOUGHT_RE = /Thought:\s*(.*?)(?=Action:|Final Answer:|$)/is;
-      const ACTION_RE = /Action:\s*[`"']?(\w+)[`"']?\s*\n?\s*Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:|Action:)|$)/is;
-      const ACTION_RE_SAMELINE = /Action:\s*[`"']?(\w+)[`"']?\s+Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:)|$)/is;
-      const ACTION_RE_LOOSE = /Action:\s*(.+?)\n\s*Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:|Action:)|$)/is;
-      const ACTION_RE_PAREN = /Action:\s*(\w+)\s*\(([^)]*)\)/i;
-      let thought = "";
-      const thoughtMatch = THOUGHT_RE.exec(content);
-      if (thoughtMatch) thought = thoughtMatch[1].trim();
-      let match = ACTION_RE.exec(content);
-      if (!match) match = ACTION_RE_SAMELINE.exec(content);
-      let looseMatch = false;
-      if (!match) {
-        const looseResult = ACTION_RE_LOOSE.exec(content);
-        if (looseResult) {
-          const candidate = looseResult[1].trim().replace(/[`"']/g, "");
-          const isToolIdentifier = /^\w+$/.test(candidate) && (candidate.includes("_") || candidate.includes("-"));
-          const isKnownTool = /^(get_weather|calculate)$/i.test(candidate);
-          if (isToolIdentifier || isKnownTool) {
-            match = looseResult;
-            looseMatch = true;
-          }
-        }
-      }
-      let parenMatch = false;
-      if (!match) match = ACTION_RE_PAREN.exec(content), parenMatch = true;
-      if (match) {
-        let toolName = match[1].trim().replace(/[`"']/g, "");
-        if (looseMatch) {
-          const actionText = toolName.toLowerCase();
-          if (actionText.includes("get_weather")) toolName = "get_weather";
-          else {
-            const toolWords = actionText.match(/\b[a-z][a-z0-9]*(?:[_-][a-z0-9]+)+\b/gi) || [];
-            if (toolWords.length > 0) toolName = toolWords[0];
-          }
-        }
-        const rawArgs = parenMatch ? match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim() : match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
-        let argsParsed = false;
-        let argsStr = rawArgs;
-        if (parenMatch && rawArgs && !rawArgs.startsWith("{")) {
-          const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
-          if (pairs) {
-            const obj = {};
-            for (const p of pairs) {
-              const colonIdx = p.indexOf(":");
-              const key = p.slice(0, colonIdx).trim();
-              let val = p.slice(colonIdx + 1).trim();
-              if (val.startsWith('"') && val.endsWith('"') || val.startsWith("'") && val.endsWith("'")) {
-                val = val.slice(1, -1);
+      let parsedResult = null;
+      const sharedParser = pi._reactParser;
+      if (sharedParser?.ALL_DIALECT_PATTERNS) {
+        for (const dp of sharedParser.ALL_DIALECT_PATTERNS) {
+          const result2 = sharedParser.parseReactWithPatterns(content, dp, true);
+          if (result2) {
+            let toolName = result2.name;
+            let argsStr;
+            const rawArgs = result2.args ? JSON.stringify(result2.args) : "";
+            if (rawArgs && rawArgs !== "{}") {
+              argsStr = rawArgs;
+            } else if (result2.raw) {
+              const jsonStart = result2.raw.indexOf("{");
+              if (jsonStart !== -1) {
+                let depth = 0, jsonEnd = -1;
+                for (let i = jsonStart; i < result2.raw.length; i++) {
+                  if (result2.raw[i] === "{") depth++;
+                  else if (result2.raw[i] === "}") {
+                    depth--;
+                    if (depth === 0) {
+                      jsonEnd = i;
+                      break;
+                    }
+                  }
+                }
+                argsStr = jsonEnd !== -1 ? result2.raw.slice(jsonStart, jsonEnd + 1) : "";
+              } else {
+                argsStr = "";
               }
-              obj[key] = val;
-            }
-            try {
-              argsStr = JSON.stringify(obj);
-              argsParsed = true;
-            } catch {
+            } else {
+              argsStr = "";
             }
+            parsedResult = { name: toolName, args: argsStr, thought: result2.thought || "", dialect: result2.dialect };
+            break;
           }
         }
-        if (!argsParsed) {
-          const jsonStart = rawArgs.indexOf("{");
-          if (jsonStart !== -1) {
-            let depth = 0;
-            let jsonEnd = -1;
-            for (let i = jsonStart; i < rawArgs.length; i++) {
-              if (rawArgs[i] === "{") depth++;
-              else if (rawArgs[i] === "}") {
-                depth--;
-                if (depth === 0) {
-                  jsonEnd = i;
-                  break;
+      } else {
+        const dialectDefs = [
+          { name: "react", action: "Action:", input: "Action Input:" },
+          { name: "function", action: "Function:", input: "Function Input:" },
+          { name: "tool", action: "Tool:", input: "Tool Input:" },
+          { name: "call", action: "Call:", input: "Input:" }
+        ];
+        for (const dd of dialectDefs) {
+          const esc = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+          const aT = esc(dd.action);
+          const iT = esc(dd.input);
+          const primaryRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s*\\n?\\s*${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
+          const sameRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s+${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
+          const parenRe = new RegExp(`${aT}\\s*(\\w+)\\s*\\(([^)]*)\\)`, "i");
+          let m = primaryRe.exec(content) || sameRe.exec(content);
+          let isParen = false;
+          if (!m) {
+            m = parenRe.exec(content);
+            isParen = true;
+          }
+          if (m) {
+            const toolName = m[1].trim().replace(/[`"']/g, "");
+            const rawArgs = m[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
+            let argsStr = "";
+            if (isParen && rawArgs && !rawArgs.startsWith("{")) {
+              const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
+              if (pairs) {
+                const obj = {};
+                for (const p of pairs) {
+                  const ci = p.indexOf(":");
+                  let v = p.slice(ci + 1).trim();
+                  if (v.startsWith('"') && v.endsWith('"') || v.startsWith("'") && v.endsWith("'")) v = v.slice(1, -1);
+                  obj[p.slice(0, ci).trim()] = v;
                 }
+                argsStr = JSON.stringify(obj);
+              } else {
+                argsStr = rawArgs;
               }
-            }
-            if (jsonEnd !== -1) {
-              const jsonStr = rawArgs.slice(jsonStart, jsonEnd + 1);
-              try {
-                JSON.parse(jsonStr);
-                argsParsed = true;
-                argsStr = jsonStr;
-              } catch {
+            } else {
+              const js = rawArgs.indexOf("{");
+              if (js !== -1) {
+                let d = 0, je = -1;
+                for (let i = js; i < rawArgs.length; i++) {
+                  if (rawArgs[i] === "{") d++;
+                  else if (rawArgs[i] === "}") {
+                    d--;
+                    if (d === 0) {
+                      je = i;
+                      break;
+                    }
+                  }
+                }
+                argsStr = je !== -1 ? rawArgs.slice(js, je + 1) : rawArgs;
+              } else {
+                argsStr = rawArgs;
               }
             }
+            let thought = "";
+            const thoughtRe = /Thought:\s*(.*?)(?=Action:|Function:|Tool:|Call:|Final Answer:|$)/is;
+            const tm = thoughtRe.exec(content);
+            if (tm) thought = tm[1].trim();
+            parsedResult = { name: toolName, args: argsStr, thought, dialect: dd.name };
+            break;
           }
         }
+      }
+      if (parsedResult) {
+        let { name: toolName, args: argsStr, thought, dialect } = parsedResult;
+        const argsParsed = argsStr.length > 0;
         let score;
         const isWeatherTool = toolName.toLowerCase().includes("get_weather") || toolName.toLowerCase() === "get_weather";
         if (isWeatherTool && argsParsed) {
@@ -840,15 +861,25 @@ function model_test_temp_default(pi) {
           toolCall: `${toolName}(${argsStr})`,
           thought,
           response: content,
-          elapsedMs
+          elapsedMs,
+          dialect: dialect || "react"
         };
       }
+      const altTagPatterns = [
+        /^\s*Function:\s*/im,
+        /^\s*Tool:\s*/im,
+        /^\s*Call:\s*/im,
+        /<function_call/i,
+        /<invoke\s/i
+      ];
+      const hasAltTag = altTagPatterns.some((p) => p.test(content));
       const hasToolMention = /\bget_weather\b/i.test(content) || /\btool\b/i.test(content);
-      if (hasToolMention) {
+      if (hasAltTag || hasToolMention) {
+        const detail = hasAltTag ? "model used alternative tool-call tags but format was not parseable" : "model mentioned tool but not in ReAct format";
         return {
           pass: false,
           score: "FAIL",
-          toolCall: "none \u2014 model mentioned tool but not in ReAct format",
+          toolCall: `none \u2014 ${detail}`,
           thought: "",
           response: content,
           elapsedMs
@@ -1071,25 +1102,40 @@ The JSON object must have exactly these 4 keys:
         };
       }
       const reactPatterns = [
+        // Classic ReAct
         /^\s*Action:\s*/im,
-        // "Action: get_weather"
         /^\s*Action Input:\s*/im,
-        // "Action Input: {"location": "Tokyo"}"
         /^\s*Thought:\s*/im,
-        // "Thought: I need to look up the weather"
         /Action:\s*\w+/i,
-        // "Action: get_weather" anywhere
-        /Action Input:\s*\{/i
-        // "Action Input: {..." anywhere
+        /Action Input:\s*\{/i,
+        // Function dialect
+        /^\s*Function:\s*/im,
+        /^\s*Function Input:\s*/im,
+        /Function:\s*\w+/i,
+        // Tool dialect
+        /^\s*Tool:\s*/im,
+        /^\s*Tool Input:\s*/im,
+        /Tool:\s*\w+/i,
+        // Call dialect
+        /^\s*Call:\s*/im,
+        /^\s*Input:\s*/im,
+        /Call:\s*\w+/i
       ];
-      const hasReActPattern = reactPatterns.some((p) => p.test(content));
-      if (hasReActPattern) {
+      const matchedPatterns = [];
+      for (const p of reactPatterns) {
+        if (p.test(content)) matchedPatterns.push(p.source);
+      }
+      if (matchedPatterns.length > 0) {
+        let dialectName = "react";
+        if (/Function:/i.test(content)) dialectName = "function";
+        else if (/Tool:/i.test(content)) dialectName = "tool";
+        else if (/Call:/i.test(content)) dialectName = "call";
         const level2 = "react";
         cacheToolSupport(model, level2, family);
         return {
           level: level2,
           cached: false,
-          evidence: `ReAct format detected in text response`,
+          evidence: `ReAct format detected (${dialectName} dialect) in text response`,
           elapsedMs
         };
       }
@@ -1176,17 +1222,32 @@ The JSON object must have exactly these 4 keys:
     }
   }
   const branding = [
-    `  \u26A1 Pi Model Benchmark v1.0.7`,
+    `  \u26A1 Pi Model Benchmark v1.0.9`,
     `  Written by VTSTech`,
     `  GitHub: https://github.com/VTSTech`,
     `  Website: www.vts-tech.org`
   ].join("\n");
-  async function testModelOllama(model) {
+  async function testModelOllama(model, providerInfo, ctx) {
     const lines = [];
     const totalStart = Date.now();
     lines.push(branding);
     lines.push(section(`MODEL: ${model}`));
     lines.push(info("Provider: Ollama (local/remote)"));
+    const modelsJson = readModelsJson();
+    let apiMode = "ollama";
+    const providerName = ctx?.model?.provider || providerInfo?.name || "";
+    if (providerName && modelsJson) {
+      const providerCfg = (modelsJson.providers || {})[providerName];
+      if (providerCfg) {
+        apiMode = providerCfg.api || "ollama";
+      }
+    }
+    lines.push(info(`API: ${apiMode}`));
+    const nativeContext = await fetchModelContextLength(OLLAMA_BASE, model);
+    if (nativeContext !== void 0) {
+      const ctxStr = nativeContext >= 1e3 ? `${(nativeContext / 1e3).toFixed(1)}k` : String(nativeContext);
+      lines.push(info(`Context: ${ctxStr} tokens (native max)`));
+    }
     let modelSize = "unknown";
     let modelFamily = "unknown";
     let modelParams = "unknown";
@@ -1286,23 +1347,24 @@ The JSON object must have exactly these 4 keys:
     await rateLimitDelay(lines);
     const react = await testReactParsing(model);
     lines.push(info(`Time: ${msHuman(react.elapsedMs)}`));
+    const dialectTag = react.dialect && react.dialect !== "react" ? ` [${react.dialect} dialect]` : "";
     if (react.score === "STRONG") {
-      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
+      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
       if (react.thought) {
         lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "MODERATE") {
-      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
+      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
       if (react.thought) {
         lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "WEAK") {
-      lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args`));
+      lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args${dialectTag}`));
       if (react.thought) {
         lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "FAIL") {
-      lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})`));
+      lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})${dialectTag}`));
       if (react.response) {
         lines.push(info(`Response: ${sanitizeForReport(react.response)}`));
       }
@@ -1387,7 +1449,7 @@ The JSON object must have exactly these 4 keys:
     }
     return lines.join("\n");
   }
-  async function testModelProvider(providerInfo, model) {
+  async function testModelProvider(providerInfo, model, ctx) {
     const lines = [];
     const totalStart = Date.now();
     lines.push(branding);
@@ -1400,6 +1462,11 @@ The JSON object must have exactly these 4 keys:
     } else {
       lines.push(warn(`API Key: NOT SET (${providerInfo.envKey || "env var not found"})`));
     }
+    const contextWindow = ctx?.model?.contextWindow ?? null;
+    if (contextWindow !== null) {
+      const ctxStr = contextWindow >= 1e3 ? `${(contextWindow / 1e3).toFixed(1)}k` : String(contextWindow);
+      lines.push(info(`Context: ${ctxStr} tokens`));
+    }
     lines.push(section("CONNECTIVITY TEST"));
     lines.push(info("Sending minimal request to verify API reachability and key validity..."));
     const connectivity = await testConnectivity(providerInfo, model);
@@ -1520,9 +1587,9 @@ The JSON object must have exactly these 4 keys:
   async function testModel(model, ctx) {
     const providerInfo = ctx ? detectProvider(ctx) : { kind: "ollama", name: "ollama" };
     if (providerInfo.kind === "ollama") {
-      return testModelOllama(model);
+      return testModelOllama(model, providerInfo, ctx);
     } else if (providerInfo.kind === "builtin") {
-      return testModelProvider(providerInfo, model);
+      return testModelProvider(providerInfo, model, ctx);
     } else {
       return testModelOllama(model);
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vtstech/pi-model-test",
-  "version": "1.0.7",
+  "version": "1.0.9",
   "description": "Model benchmark/testing extension for Pi Coding Agent",
   "main": "model-test.js",
   "keywords": ["pi-extensions"],
@@ -14,7 +14,7 @@
     "url": "https://github.com/VTSTech/pi-coding-agent"
   },
   "dependencies": {
-    "@vtstech/pi-shared": "1.0.7"
+    "@vtstech/pi-shared": "1.0.9"
   },
   "peerDependencies": {
     "@mariozechner/pi-coding-agent": ">=0.66"