npm - @vtstech/pi-model-test - Versions diffs - 1.0.8 → 1.0.9 - Mend

@vtstech/pi-model-test 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/model-test.js +138 -91
package/package.json +2 -2

package/model-test.js CHANGED Viewed

@@ -740,90 +740,111 @@ function model_test_temp_default(pi) {
       if (!content) {
         return { pass: false, score: "FAIL", toolCall: "empty response", thought: "", response: "", elapsedMs };
       }
-      const THOUGHT_RE = /Thought:\s*(.*?)(?=Action:|Final Answer:|$)/is;
-      const ACTION_RE = /Action:\s*[`"']?(\w+)[`"']?\s*\n?\s*Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:|Action:)|$)/is;
-      const ACTION_RE_SAMELINE = /Action:\s*[`"']?(\w+)[`"']?\s+Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:)|$)/is;
-      const ACTION_RE_LOOSE = /Action:\s*(.+?)\n\s*Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:|Action:)|$)/is;
-      const ACTION_RE_PAREN = /Action:\s*(\w+)\s*\(([^)]*)\)/i;
-      let thought = "";
-      const thoughtMatch = THOUGHT_RE.exec(content);
-      if (thoughtMatch) thought = thoughtMatch[1].trim();
-      let match = ACTION_RE.exec(content);
-      if (!match) match = ACTION_RE_SAMELINE.exec(content);
-      let looseMatch = false;
-      if (!match) {
-        const looseResult = ACTION_RE_LOOSE.exec(content);
-        if (looseResult) {
-          const candidate = looseResult[1].trim().replace(/[`"']/g, "");
-          const isToolIdentifier = /^\w+$/.test(candidate) && (candidate.includes("_") || candidate.includes("-"));
-          const isKnownTool = /^(get_weather|calculate)$/i.test(candidate);
-          if (isToolIdentifier || isKnownTool) {
-            match = looseResult;
-            looseMatch = true;
-          }
-        }
-      }
-      let parenMatch = false;
-      if (!match) match = ACTION_RE_PAREN.exec(content), parenMatch = true;
-      if (match) {
-        let toolName = match[1].trim().replace(/[`"']/g, "");
-        if (looseMatch) {
-          const actionText = toolName.toLowerCase();
-          if (actionText.includes("get_weather")) toolName = "get_weather";
-          else {
-            const toolWords = actionText.match(/\b[a-z][a-z0-9]*(?:[_-][a-z0-9]+)+\b/gi) || [];
-            if (toolWords.length > 0) toolName = toolWords[0];
-          }
-        }
-        const rawArgs = parenMatch ? match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim() : match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
-        let argsParsed = false;
-        let argsStr = rawArgs;
-        if (parenMatch && rawArgs && !rawArgs.startsWith("{")) {
-          const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
-          if (pairs) {
-            const obj = {};
-            for (const p of pairs) {
-              const colonIdx = p.indexOf(":");
-              const key = p.slice(0, colonIdx).trim();
-              let val = p.slice(colonIdx + 1).trim();
-              if (val.startsWith('"') && val.endsWith('"') || val.startsWith("'") && val.endsWith("'")) {
-                val = val.slice(1, -1);
+      let parsedResult = null;
+      const sharedParser = pi._reactParser;
+      if (sharedParser?.ALL_DIALECT_PATTERNS) {
+        for (const dp of sharedParser.ALL_DIALECT_PATTERNS) {
+          const result2 = sharedParser.parseReactWithPatterns(content, dp, true);
+          if (result2) {
+            let toolName = result2.name;
+            let argsStr;
+            const rawArgs = result2.args ? JSON.stringify(result2.args) : "";
+            if (rawArgs && rawArgs !== "{}") {
+              argsStr = rawArgs;
+            } else if (result2.raw) {
+              const jsonStart = result2.raw.indexOf("{");
+              if (jsonStart !== -1) {
+                let depth = 0, jsonEnd = -1;
+                for (let i = jsonStart; i < result2.raw.length; i++) {
+                  if (result2.raw[i] === "{") depth++;
+                  else if (result2.raw[i] === "}") {
+                    depth--;
+                    if (depth === 0) {
+                      jsonEnd = i;
+                      break;
+                    }
+                  }
+                }
+                argsStr = jsonEnd !== -1 ? result2.raw.slice(jsonStart, jsonEnd + 1) : "";
+              } else {
+                argsStr = "";
               }
-              obj[key] = val;
-            }
-            try {
-              argsStr = JSON.stringify(obj);
-              argsParsed = true;
-            } catch {
+            } else {
+              argsStr = "";
             }
+            parsedResult = { name: toolName, args: argsStr, thought: result2.thought || "", dialect: result2.dialect };
+            break;
           }
         }
-        if (!argsParsed) {
-          const jsonStart = rawArgs.indexOf("{");
-          if (jsonStart !== -1) {
-            let depth = 0;
-            let jsonEnd = -1;
-            for (let i = jsonStart; i < rawArgs.length; i++) {
-              if (rawArgs[i] === "{") depth++;
-              else if (rawArgs[i] === "}") {
-                depth--;
-                if (depth === 0) {
-                  jsonEnd = i;
-                  break;
+      } else {
+        const dialectDefs = [
+          { name: "react", action: "Action:", input: "Action Input:" },
+          { name: "function", action: "Function:", input: "Function Input:" },
+          { name: "tool", action: "Tool:", input: "Tool Input:" },
+          { name: "call", action: "Call:", input: "Input:" }
+        ];
+        for (const dd of dialectDefs) {
+          const esc = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+          const aT = esc(dd.action);
+          const iT = esc(dd.input);
+          const primaryRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s*\\n?\\s*${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
+          const sameRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s+${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
+          const parenRe = new RegExp(`${aT}\\s*(\\w+)\\s*\\(([^)]*)\\)`, "i");
+          let m = primaryRe.exec(content) || sameRe.exec(content);
+          let isParen = false;
+          if (!m) {
+            m = parenRe.exec(content);
+            isParen = true;
+          }
+          if (m) {
+            const toolName = m[1].trim().replace(/[`"']/g, "");
+            const rawArgs = m[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
+            let argsStr = "";
+            if (isParen && rawArgs && !rawArgs.startsWith("{")) {
+              const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
+              if (pairs) {
+                const obj = {};
+                for (const p of pairs) {
+                  const ci = p.indexOf(":");
+                  let v = p.slice(ci + 1).trim();
+                  if (v.startsWith('"') && v.endsWith('"') || v.startsWith("'") && v.endsWith("'")) v = v.slice(1, -1);
+                  obj[p.slice(0, ci).trim()] = v;
                 }
+                argsStr = JSON.stringify(obj);
+              } else {
+                argsStr = rawArgs;
               }
-            }
-            if (jsonEnd !== -1) {
-              const jsonStr = rawArgs.slice(jsonStart, jsonEnd + 1);
-              try {
-                JSON.parse(jsonStr);
-                argsParsed = true;
-                argsStr = jsonStr;
-              } catch {
+            } else {
+              const js = rawArgs.indexOf("{");
+              if (js !== -1) {
+                let d = 0, je = -1;
+                for (let i = js; i < rawArgs.length; i++) {
+                  if (rawArgs[i] === "{") d++;
+                  else if (rawArgs[i] === "}") {
+                    d--;
+                    if (d === 0) {
+                      je = i;
+                      break;
+                    }
+                  }
+                }
+                argsStr = je !== -1 ? rawArgs.slice(js, je + 1) : rawArgs;
+              } else {
+                argsStr = rawArgs;
               }
             }
+            let thought = "";
+            const thoughtRe = /Thought:\s*(.*?)(?=Action:|Function:|Tool:|Call:|Final Answer:|$)/is;
+            const tm = thoughtRe.exec(content);
+            if (tm) thought = tm[1].trim();
+            parsedResult = { name: toolName, args: argsStr, thought, dialect: dd.name };
+            break;
           }
         }
+      }
+      if (parsedResult) {
+        let { name: toolName, args: argsStr, thought, dialect } = parsedResult;
+        const argsParsed = argsStr.length > 0;
         let score;
         const isWeatherTool = toolName.toLowerCase().includes("get_weather") || toolName.toLowerCase() === "get_weather";
         if (isWeatherTool && argsParsed) {
@@ -840,15 +861,25 @@ function model_test_temp_default(pi) {
           toolCall: `${toolName}(${argsStr})`,
           thought,
           response: content,
-          elapsedMs
+          elapsedMs,
+          dialect: dialect || "react"
         };
       }
+      const altTagPatterns = [
+        /^\s*Function:\s*/im,
+        /^\s*Tool:\s*/im,
+        /^\s*Call:\s*/im,
+        /<function_call/i,
+        /<invoke\s/i
+      ];
+      const hasAltTag = altTagPatterns.some((p) => p.test(content));
       const hasToolMention = /\bget_weather\b/i.test(content) || /\btool\b/i.test(content);
-      if (hasToolMention) {
+      if (hasAltTag || hasToolMention) {
+        const detail = hasAltTag ? "model used alternative tool-call tags but format was not parseable" : "model mentioned tool but not in ReAct format";
         return {
           pass: false,
           score: "FAIL",
-          toolCall: "none \u2014 model mentioned tool but not in ReAct format",
+          toolCall: `none \u2014 ${detail}`,
           thought: "",
           response: content,
           elapsedMs
@@ -1071,25 +1102,40 @@ The JSON object must have exactly these 4 keys:
         };
       }
       const reactPatterns = [
+        // Classic ReAct
         /^\s*Action:\s*/im,
-        // "Action: get_weather"
         /^\s*Action Input:\s*/im,
-        // "Action Input: {"location": "Tokyo"}"
         /^\s*Thought:\s*/im,
-        // "Thought: I need to look up the weather"
         /Action:\s*\w+/i,
-        // "Action: get_weather" anywhere
-        /Action Input:\s*\{/i
-        // "Action Input: {..." anywhere
+        /Action Input:\s*\{/i,
+        // Function dialect
+        /^\s*Function:\s*/im,
+        /^\s*Function Input:\s*/im,
+        /Function:\s*\w+/i,
+        // Tool dialect
+        /^\s*Tool:\s*/im,
+        /^\s*Tool Input:\s*/im,
+        /Tool:\s*\w+/i,
+        // Call dialect
+        /^\s*Call:\s*/im,
+        /^\s*Input:\s*/im,
+        /Call:\s*\w+/i
       ];
-      const hasReActPattern = reactPatterns.some((p) => p.test(content));
-      if (hasReActPattern) {
+      const matchedPatterns = [];
+      for (const p of reactPatterns) {
+        if (p.test(content)) matchedPatterns.push(p.source);
+      }
+      if (matchedPatterns.length > 0) {
+        let dialectName = "react";
+        if (/Function:/i.test(content)) dialectName = "function";
+        else if (/Tool:/i.test(content)) dialectName = "tool";
+        else if (/Call:/i.test(content)) dialectName = "call";
         const level2 = "react";
         cacheToolSupport(model, level2, family);
         return {
           level: level2,
           cached: false,
-          evidence: `ReAct format detected in text response`,
+          evidence: `ReAct format detected (${dialectName} dialect) in text response`,
           elapsedMs
         };
       }
@@ -1176,7 +1222,7 @@ The JSON object must have exactly these 4 keys:
     }
   }
   const branding = [
-    `  \u26A1 Pi Model Benchmark v1.0.8`,
+    `  \u26A1 Pi Model Benchmark v1.0.9`,
     `  Written by VTSTech`,
     `  GitHub: https://github.com/VTSTech`,
     `  Website: www.vts-tech.org`
@@ -1301,23 +1347,24 @@ The JSON object must have exactly these 4 keys:
     await rateLimitDelay(lines);
     const react = await testReactParsing(model);
     lines.push(info(`Time: ${msHuman(react.elapsedMs)}`));
+    const dialectTag = react.dialect && react.dialect !== "react" ? ` [${react.dialect} dialect]` : "";
     if (react.score === "STRONG") {
-      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
+      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
       if (react.thought) {
         lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "MODERATE") {
-      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
+      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
       if (react.thought) {
         lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "WEAK") {
-      lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args`));
+      lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args${dialectTag}`));
       if (react.thought) {
         lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "FAIL") {
-      lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})`));
+      lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})${dialectTag}`));
       if (react.response) {
         lines.push(info(`Response: ${sanitizeForReport(react.response)}`));
       }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vtstech/pi-model-test",
-  "version": "1.0.8",
+  "version": "1.0.9",
   "description": "Model benchmark/testing extension for Pi Coding Agent",
   "main": "model-test.js",
   "keywords": ["pi-extensions"],
@@ -14,7 +14,7 @@
     "url": "https://github.com/VTSTech/pi-coding-agent"
   },
   "dependencies": {
-    "@vtstech/pi-shared": "1.0.8"
+    "@vtstech/pi-shared": "1.0.9"
   },
   "peerDependencies": {
     "@mariozechner/pi-coding-agent": ">=0.66"