npm - @vtstech/pi-model-test - Versions diffs - 1.0.5 → 1.0.7 - Mend

@vtstech/pi-model-test 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/model-test.js +39 -28
package/package.json +2 -2

package/model-test.js CHANGED Viewed

@@ -12,20 +12,7 @@ import {
   truncate,
   sanitizeForReport
 } from "@vtstech/pi-shared/format";
-import { getOllamaBaseUrl, detectModelFamily, readModelsJson } from "@vtstech/pi-shared/ollama";
-var BUILTIN_PROVIDERS = {
-  openrouter: { api: "openai-completions", baseUrl: "https://openrouter.ai/api/v1", envKey: "OPENROUTER_API_KEY" },
-  anthropic: { api: "anthropic-messages", baseUrl: "https://api.anthropic.com/v1", envKey: "ANTHROPIC_API_KEY" },
-  google: { api: "gemini", baseUrl: "https://generativelanguage.googleapis.com", envKey: "GOOGLE_API_KEY" },
-  openai: { api: "openai-completions", baseUrl: "https://api.openai.com/v1", envKey: "OPENAI_API_KEY" },
-  groq: { api: "openai-completions", baseUrl: "https://api.groq.com/v1", envKey: "GROQ_API_KEY" },
-  deepseek: { api: "openai-completions", baseUrl: "https://api.deepseek.com/v1", envKey: "DEEPSEEK_API_KEY" },
-  mistral: { api: "openai-completions", baseUrl: "https://api.mistral.ai/v1", envKey: "MISTRAL_API_KEY" },
-  xai: { api: "openai-completions", baseUrl: "https://api.x.ai/v1", envKey: "XAI_API_KEY" },
-  together: { api: "openai-completions", baseUrl: "https://api.together.xyz/v1", envKey: "TOGETHER_API_KEY" },
-  fireworks: { api: "openai-completions", baseUrl: "https://api.fireworks.ai/inference/v1", envKey: "FIREWORKS_API_KEY" },
-  cohere: { api: "cohere-chat", baseUrl: "https://api.cohere.com/v1", envKey: "COHERE_API_KEY" }
-};
+import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS } from "@vtstech/pi-shared/ollama";
 function detectProvider(ctx) {
   const model = ctx.model;
   if (!model) return { kind: "unknown", name: "none" };
@@ -37,7 +24,7 @@ function detectProvider(ctx) {
     const baseUrl = userProviderCfg.baseUrl || "";
     const apiMode = userProviderCfg.api || "";
     const apiKey = userProviderCfg.apiKey || "";
-    const isOllama = /ollama/i.test(providerName) || /localhost:\d+/.test(baseUrl) || /127\.0\.0\.1:\d+/.test(baseUrl) || /\/api\/chat/.test(baseUrl) || apiMode === "ollama";
+    const isOllama = /ollama/i.test(providerName) || /localhost:\d+/.test(baseUrl) || /127\.0\.0\.1:\d+/.test(baseUrl) || /0\.0\.0\.0:\d+/.test(baseUrl) || /\/api\/chat/.test(baseUrl) || apiMode === "ollama";
     if (isOllama) {
       return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
     }
@@ -521,16 +508,20 @@ function model_test_temp_default(pi) {
         }
         const hasCorrectTool = fn.name === "get_weather";
         const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
+        const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
         let score;
-        if (hasCorrectTool && hasLocation) {
+        if (hasCorrectTool && hasLocation && unitValid) {
           score = "STRONG";
-        } else if (hasCorrectTool) {
+        } else if (hasCorrectTool && hasLocation) {
           score = "MODERATE";
+        } else if (hasCorrectTool) {
+          score = "WEAK";
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fn.name}(${JSON.stringify(args)})`,
@@ -564,8 +555,9 @@ function model_test_temp_default(pi) {
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fnName}(${JSON.stringify(fnArgs)})`,
@@ -632,16 +624,20 @@ function model_test_temp_default(pi) {
         }
         const hasCorrectTool = fn.name === "get_weather";
         const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
+        const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
         let score;
-        if (hasCorrectTool && hasLocation) {
+        if (hasCorrectTool && hasLocation && unitValid) {
           score = "STRONG";
-        } else if (hasCorrectTool) {
+        } else if (hasCorrectTool && hasLocation) {
           score = "MODERATE";
+        } else if (hasCorrectTool) {
+          score = "WEAK";
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fn.name}(${JSON.stringify(args)})`,
@@ -675,8 +671,9 @@ function model_test_temp_default(pi) {
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fnName}(${JSON.stringify(fnArgs)})`,
@@ -754,7 +751,18 @@ function model_test_temp_default(pi) {
       let match = ACTION_RE.exec(content);
       if (!match) match = ACTION_RE_SAMELINE.exec(content);
       let looseMatch = false;
-      if (!match) match = ACTION_RE_LOOSE.exec(content), looseMatch = true;
+      if (!match) {
+        const looseResult = ACTION_RE_LOOSE.exec(content);
+        if (looseResult) {
+          const candidate = looseResult[1].trim().replace(/[`"']/g, "");
+          const isToolIdentifier = /^\w+$/.test(candidate) && (candidate.includes("_") || candidate.includes("-"));
+          const isKnownTool = /^(get_weather|calculate)$/i.test(candidate);
+          if (isToolIdentifier || isKnownTool) {
+            match = looseResult;
+            looseMatch = true;
+          }
+        }
+      }
       let parenMatch = false;
       if (!match) match = ACTION_RE_PAREN.exec(content), parenMatch = true;
       if (match) {
@@ -825,8 +833,9 @@ function model_test_temp_default(pi) {
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           toolCall: `${toolName}(${argsStr})`,
           thought,
@@ -1167,7 +1176,7 @@ The JSON object must have exactly these 4 keys:
     }
   }
   const branding = [
-    `  \u26A1 Pi Model Benchmark v1.0.5`,
+    `  \u26A1 Pi Model Benchmark v1.0.7`,
     `  Written by VTSTech`,
     `  GitHub: https://github.com/VTSTech`,
     `  Website: www.vts-tech.org`
@@ -1349,11 +1358,13 @@ The JSON object must have exactly these 4 keys:
     lines.push(info(`Cache: ${TOOL_SUPPORT_CACHE_PATH}`));
     lines.push(section("SUMMARY"));
     const totalMs = Date.now() - totalStart;
+    const toolPass = tools.score === "STRONG" || tools.score === "MODERATE";
+    const reactPass = react.score === "STRONG" || react.score === "MODERATE";
     const tests = [
       { name: "Reasoning", pass: reasoning.score === "STRONG" || reasoning.score === "MODERATE", score: reasoning.score },
       { name: "Thinking", pass: thinking.supported, score: thinking.supported ? "YES" : "NO" },
-      { name: "Tool Usage", pass: tools.pass, score: tools.score },
-      { name: "ReAct Parse", pass: react.pass, score: react.score },
+      { name: "Tool Usage", pass: toolPass, score: tools.score },
+      { name: "ReAct Parse", pass: reactPass, score: react.score },
       { name: "Instructions", pass: instructions.pass, score: instructions.score },
       { name: "Tool Support", pass: toolSupport.level === "native" || toolSupport.level === "react", score: toolSupport.level.toUpperCase() }
     ];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vtstech/pi-model-test",
-  "version": "1.0.5",
+  "version": "1.0.7",
   "description": "Model benchmark/testing extension for Pi Coding Agent",
   "main": "model-test.js",
   "keywords": ["pi-extensions"],
@@ -14,7 +14,7 @@
     "url": "https://github.com/VTSTech/pi-coding-agent"
   },
   "dependencies": {
-    "@vtstech/pi-shared": "1.0.5"
+    "@vtstech/pi-shared": "1.0.7"
   },
   "peerDependencies": {
     "@mariozechner/pi-coding-agent": ">=0.66"