npm - @vtstech/pi-model-test - Versions diffs - 1.0.6 → 1.0.8 - Mend

@vtstech/pi-model-test 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/model-test.js +75 -31
package/package.json +2 -2

package/model-test.js CHANGED Viewed

@@ -12,7 +12,7 @@ import {
   truncate,
   sanitizeForReport
 } from "@vtstech/pi-shared/format";
-import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS } from "@vtstech/pi-shared/ollama";
+import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS, fetchModelContextLength } from "@vtstech/pi-shared/ollama";
 function detectProvider(ctx) {
   const model = ctx.model;
   if (!model) return { kind: "unknown", name: "none" };
@@ -55,15 +55,15 @@ function detectProvider(ctx) {
 }
 var CONFIG = {
   // General API settings
-  DEFAULT_TIMEOUT_MS: 6e5,
+  DEFAULT_TIMEOUT_MS: 999999,
   // 8.3 minutes - default timeout for model responses
-  CONNECT_TIMEOUT_S: 30,
+  CONNECT_TIMEOUT_S: 60,
   // 30 seconds to establish connection
   MAX_RETRIES: 1,
   // Single retry for transient failures
-  RETRY_DELAY_MS: 2e3,
+  RETRY_DELAY_MS: 1e4,
   // 2 seconds between retries
-  EXEC_BUFFER_MS: 5e3,
+  EXEC_BUFFER_MS: 8e3,
   // Extra buffer for exec timeout over curl timeout
   // Model generation settings
   NUM_PREDICT: 1024,
@@ -73,28 +73,28 @@ var CONFIG = {
   // Test-specific settings
   MIN_THINKING_LENGTH: 10,
   // Minimum chars to consider thinking tokens valid
-  TOOL_TEST_TIMEOUT_MS: 9e4,
+  TOOL_TEST_TIMEOUT_MS: 999999,
   // 90 seconds for tool usage tests
-  TOOL_TEST_MAX_TIME_S: 9999,
+  TOOL_TEST_MAX_TIME_S: 999999,
   // Max curl time for tool tests (effectively unlimited)
-  TOOL_SUPPORT_TIMEOUT_MS: 26e4,
+  TOOL_SUPPORT_TIMEOUT_MS: 999999,
   // 2+ minutes for tool support detection
-  TOOL_SUPPORT_MAX_TIME_S: 240,
+  TOOL_SUPPORT_MAX_TIME_S: 999999,
   // Max curl time for tool support detection
   // Metadata retrieval
   TAGS_TIMEOUT_MS: 15e3,
   // 15 seconds for /api/tags
-  TAGS_CONNECT_TIMEOUT_S: 10,
+  TAGS_CONNECT_TIMEOUT_S: 30,
   // 10 seconds connection timeout for tags
-  MODEL_INFO_TIMEOUT_MS: 1e4,
+  MODEL_INFO_TIMEOUT_MS: 3e4,
   // 10 seconds for model info lookup
   // Provider API settings
-  PROVIDER_TIMEOUT_MS: 12e4,
+  PROVIDER_TIMEOUT_MS: 999999,
   // 2 minutes for cloud provider API calls
-  PROVIDER_TOOL_TIMEOUT_MS: 6e4,
+  PROVIDER_TOOL_TIMEOUT_MS: 12e4,
   // 60 seconds for tool usage tests on providers
   // Rate limiting
-  TEST_DELAY_MS: 3e4
+  TEST_DELAY_MS: 1e4
   // 30 seconds between tests to avoid rate limiting
 };
 var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
@@ -508,16 +508,20 @@ function model_test_temp_default(pi) {
         }
         const hasCorrectTool = fn.name === "get_weather";
         const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
+        const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
         let score;
-        if (hasCorrectTool && hasLocation) {
+        if (hasCorrectTool && hasLocation && unitValid) {
           score = "STRONG";
-        } else if (hasCorrectTool) {
+        } else if (hasCorrectTool && hasLocation) {
           score = "MODERATE";
+        } else if (hasCorrectTool) {
+          score = "WEAK";
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fn.name}(${JSON.stringify(args)})`,
@@ -551,8 +555,9 @@ function model_test_temp_default(pi) {
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fnName}(${JSON.stringify(fnArgs)})`,
@@ -619,16 +624,20 @@ function model_test_temp_default(pi) {
         }
         const hasCorrectTool = fn.name === "get_weather";
         const hasLocation = typeof args.location === "string" && args.location.toLowerCase().includes("paris");
+        const unitValid = args.unit === void 0 || typeof args.unit === "string" && ["celsius", "fahrenheit"].includes(args.unit.toLowerCase());
         let score;
-        if (hasCorrectTool && hasLocation) {
+        if (hasCorrectTool && hasLocation && unitValid) {
           score = "STRONG";
-        } else if (hasCorrectTool) {
+        } else if (hasCorrectTool && hasLocation) {
           score = "MODERATE";
+        } else if (hasCorrectTool) {
+          score = "WEAK";
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fn.name}(${JSON.stringify(args)})`,
@@ -662,8 +671,9 @@ function model_test_temp_default(pi) {
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           hasToolCalls: true,
           toolCall: `${fnName}(${JSON.stringify(fnArgs)})`,
@@ -741,7 +751,18 @@ function model_test_temp_default(pi) {
       let match = ACTION_RE.exec(content);
       if (!match) match = ACTION_RE_SAMELINE.exec(content);
       let looseMatch = false;
-      if (!match) match = ACTION_RE_LOOSE.exec(content), looseMatch = true;
+      if (!match) {
+        const looseResult = ACTION_RE_LOOSE.exec(content);
+        if (looseResult) {
+          const candidate = looseResult[1].trim().replace(/[`"']/g, "");
+          const isToolIdentifier = /^\w+$/.test(candidate) && (candidate.includes("_") || candidate.includes("-"));
+          const isKnownTool = /^(get_weather|calculate)$/i.test(candidate);
+          if (isToolIdentifier || isKnownTool) {
+            match = looseResult;
+            looseMatch = true;
+          }
+        }
+      }
       let parenMatch = false;
       if (!match) match = ACTION_RE_PAREN.exec(content), parenMatch = true;
       if (match) {
@@ -812,8 +833,9 @@ function model_test_temp_default(pi) {
         } else {
           score = "WEAK";
         }
+        const pass = score !== "WEAK";
         return {
-          pass: true,
+          pass,
           score,
           toolCall: `${toolName}(${argsStr})`,
           thought,
@@ -1154,17 +1176,32 @@ The JSON object must have exactly these 4 keys:
     }
   }
   const branding = [
-    `  \u26A1 Pi Model Benchmark v1.0.6`,
+    `  \u26A1 Pi Model Benchmark v1.0.8`,
     `  Written by VTSTech`,
     `  GitHub: https://github.com/VTSTech`,
     `  Website: www.vts-tech.org`
   ].join("\n");
-  async function testModelOllama(model) {
+  async function testModelOllama(model, providerInfo, ctx) {
     const lines = [];
     const totalStart = Date.now();
     lines.push(branding);
     lines.push(section(`MODEL: ${model}`));
     lines.push(info("Provider: Ollama (local/remote)"));
+    const modelsJson = readModelsJson();
+    let apiMode = "ollama";
+    const providerName = ctx?.model?.provider || providerInfo?.name || "";
+    if (providerName && modelsJson) {
+      const providerCfg = (modelsJson.providers || {})[providerName];
+      if (providerCfg) {
+        apiMode = providerCfg.api || "ollama";
+      }
+    }
+    lines.push(info(`API: ${apiMode}`));
+    const nativeContext = await fetchModelContextLength(OLLAMA_BASE, model);
+    if (nativeContext !== void 0) {
+      const ctxStr = nativeContext >= 1e3 ? `${(nativeContext / 1e3).toFixed(1)}k` : String(nativeContext);
+      lines.push(info(`Context: ${ctxStr} tokens (native max)`));
+    }
     let modelSize = "unknown";
     let modelFamily = "unknown";
     let modelParams = "unknown";
@@ -1336,11 +1373,13 @@ The JSON object must have exactly these 4 keys:
     lines.push(info(`Cache: ${TOOL_SUPPORT_CACHE_PATH}`));
     lines.push(section("SUMMARY"));
     const totalMs = Date.now() - totalStart;
+    const toolPass = tools.score === "STRONG" || tools.score === "MODERATE";
+    const reactPass = react.score === "STRONG" || react.score === "MODERATE";
     const tests = [
       { name: "Reasoning", pass: reasoning.score === "STRONG" || reasoning.score === "MODERATE", score: reasoning.score },
       { name: "Thinking", pass: thinking.supported, score: thinking.supported ? "YES" : "NO" },
-      { name: "Tool Usage", pass: tools.pass, score: tools.score },
-      { name: "ReAct Parse", pass: react.pass, score: react.score },
+      { name: "Tool Usage", pass: toolPass, score: tools.score },
+      { name: "ReAct Parse", pass: reactPass, score: react.score },
       { name: "Instructions", pass: instructions.pass, score: instructions.score },
       { name: "Tool Support", pass: toolSupport.level === "native" || toolSupport.level === "react", score: toolSupport.level.toUpperCase() }
     ];
@@ -1363,7 +1402,7 @@ The JSON object must have exactly these 4 keys:
     }
     return lines.join("\n");
   }
-  async function testModelProvider(providerInfo, model) {
+  async function testModelProvider(providerInfo, model, ctx) {
     const lines = [];
     const totalStart = Date.now();
     lines.push(branding);
@@ -1376,6 +1415,11 @@ The JSON object must have exactly these 4 keys:
     } else {
       lines.push(warn(`API Key: NOT SET (${providerInfo.envKey || "env var not found"})`));
     }
+    const contextWindow = ctx?.model?.contextWindow ?? null;
+    if (contextWindow !== null) {
+      const ctxStr = contextWindow >= 1e3 ? `${(contextWindow / 1e3).toFixed(1)}k` : String(contextWindow);
+      lines.push(info(`Context: ${ctxStr} tokens`));
+    }
     lines.push(section("CONNECTIVITY TEST"));
     lines.push(info("Sending minimal request to verify API reachability and key validity..."));
     const connectivity = await testConnectivity(providerInfo, model);
@@ -1496,9 +1540,9 @@ The JSON object must have exactly these 4 keys:
   async function testModel(model, ctx) {
     const providerInfo = ctx ? detectProvider(ctx) : { kind: "ollama", name: "ollama" };
     if (providerInfo.kind === "ollama") {
-      return testModelOllama(model);
+      return testModelOllama(model, providerInfo, ctx);
     } else if (providerInfo.kind === "builtin") {
-      return testModelProvider(providerInfo, model);
+      return testModelProvider(providerInfo, model, ctx);
     } else {
       return testModelOllama(model);
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vtstech/pi-model-test",
-  "version": "1.0.6",
+  "version": "1.0.8",
   "description": "Model benchmark/testing extension for Pi Coding Agent",
   "main": "model-test.js",
   "keywords": ["pi-extensions"],
@@ -14,7 +14,7 @@
     "url": "https://github.com/VTSTech/pi-coding-agent"
   },
   "dependencies": {
-    "@vtstech/pi-shared": "1.0.6"
+    "@vtstech/pi-shared": "1.0.8"
   },
   "peerDependencies": {
     "@mariozechner/pi-coding-agent": ">=0.66"