@vtstech/pi-model-test 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/model-test.js +204 -169
  2. package/package.json +2 -2
package/model-test.js CHANGED
@@ -12,57 +12,17 @@ import {
12
12
  truncate,
13
13
  sanitizeForReport
14
14
  } from "@vtstech/pi-shared/format";
15
- import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS, fetchModelContextLength } from "@vtstech/pi-shared/ollama";
16
- function detectProvider(ctx) {
17
- const model = ctx.model;
18
- if (!model) return { kind: "unknown", name: "none" };
19
- const providerName = model.provider || "";
20
- if (!providerName) return { kind: "unknown", name: "none" };
21
- const modelsJson = readModelsJson();
22
- const userProviderCfg = (modelsJson.providers || {})[providerName];
23
- if (userProviderCfg) {
24
- const baseUrl = userProviderCfg.baseUrl || "";
25
- const apiMode = userProviderCfg.api || "";
26
- const apiKey = userProviderCfg.apiKey || "";
27
- const isOllama = /ollama/i.test(providerName) || /localhost:\d+/.test(baseUrl) || /127\.0\.0\.1:\d+/.test(baseUrl) || /0\.0\.0\.0:\d+/.test(baseUrl) || /\/api\/chat/.test(baseUrl) || apiMode === "ollama";
28
- if (isOllama) {
29
- return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
30
- }
31
- if (/\/api\/chat/.test(baseUrl)) {
32
- return { kind: "ollama", name: providerName, apiMode: "ollama", baseUrl, apiKey };
33
- }
34
- return {
35
- kind: "builtin",
36
- name: providerName,
37
- apiMode: apiMode || userProviderCfg.api || "openai-completions",
38
- baseUrl,
39
- apiKey
40
- };
41
- }
42
- const builtin = BUILTIN_PROVIDERS[providerName];
43
- if (builtin) {
44
- const apiKey = process.env[builtin.envKey] || "";
45
- return {
46
- kind: "builtin",
47
- name: providerName,
48
- apiMode: builtin.api,
49
- baseUrl: builtin.baseUrl,
50
- envKey: builtin.envKey,
51
- apiKey
52
- };
53
- }
54
- return { kind: "unknown", name: providerName };
55
- }
15
+ import { getOllamaBaseUrl, detectModelFamily, readModelsJson, writeModelsJson, fetchModelContextLength, EXTENSION_VERSION, detectProvider } from "@vtstech/pi-shared/ollama";
56
16
  var CONFIG = {
57
17
  // General API settings
58
18
  DEFAULT_TIMEOUT_MS: 999999,
59
- // 8.3 minutes - default timeout for model responses
19
+ // ~16.7 minutes effectively unlimited for slow models
60
20
  CONNECT_TIMEOUT_S: 60,
61
- // 30 seconds to establish connection
21
+ // 60 seconds to establish connection
62
22
  MAX_RETRIES: 1,
63
23
  // Single retry for transient failures
64
24
  RETRY_DELAY_MS: 1e4,
65
- // 2 seconds between retries
25
+ // 10 seconds between retries
66
26
  EXEC_BUFFER_MS: 8e3,
67
27
  // Extra buffer for exec timeout over curl timeout
68
28
  // Model generation settings
@@ -74,31 +34,32 @@ var CONFIG = {
74
34
  MIN_THINKING_LENGTH: 10,
75
35
  // Minimum chars to consider thinking tokens valid
76
36
  TOOL_TEST_TIMEOUT_MS: 999999,
77
- // 90 seconds for tool usage tests
37
+ // Effectively unlimited for slow tool usage tests
78
38
  TOOL_TEST_MAX_TIME_S: 999999,
79
39
  // Max curl time for tool tests (effectively unlimited)
80
40
  TOOL_SUPPORT_TIMEOUT_MS: 999999,
81
- // 2+ minutes for tool support detection
41
+ // Effectively unlimited for tool support detection
82
42
  TOOL_SUPPORT_MAX_TIME_S: 999999,
83
43
  // Max curl time for tool support detection
84
44
  // Metadata retrieval
85
45
  TAGS_TIMEOUT_MS: 15e3,
86
46
  // 15 seconds for /api/tags
87
47
  TAGS_CONNECT_TIMEOUT_S: 30,
88
- // 10 seconds connection timeout for tags
48
+ // 30 seconds connection timeout for tags
89
49
  MODEL_INFO_TIMEOUT_MS: 3e4,
90
- // 10 seconds for model info lookup
50
+ // 30 seconds for model info lookup
91
51
  // Provider API settings
92
52
  PROVIDER_TIMEOUT_MS: 999999,
93
- // 2 minutes for cloud provider API calls
53
+ // Effectively unlimited for cloud provider API calls
94
54
  PROVIDER_TOOL_TIMEOUT_MS: 12e4,
95
- // 60 seconds for tool usage tests on providers
55
+ // 120 seconds for tool usage tests on providers
96
56
  // Rate limiting
97
57
  TEST_DELAY_MS: 1e4
98
- // 30 seconds between tests to avoid rate limiting
58
+ // 10 seconds between tests to avoid rate limiting
99
59
  };
100
60
  var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
101
61
  var TOOL_SUPPORT_CACHE_PATH = path.join(TOOL_SUPPORT_CACHE_DIR, "tool_support.json");
62
+ var _toolSupportCacheInMemory = null;
102
63
  function readToolSupportCache() {
103
64
  try {
104
65
  if (fs.existsSync(TOOL_SUPPORT_CACHE_PATH)) {
@@ -116,19 +77,21 @@ function writeToolSupportCache(cache) {
116
77
  fs.writeFileSync(TOOL_SUPPORT_CACHE_PATH, JSON.stringify(cache, null, 2) + "\n", "utf-8");
117
78
  }
118
79
  function getCachedToolSupport(model) {
119
- const cache = readToolSupportCache();
80
+ const cache = _toolSupportCacheInMemory || readToolSupportCache();
81
+ if (!_toolSupportCacheInMemory) _toolSupportCacheInMemory = cache;
120
82
  const entry = cache[model];
121
83
  if (!entry) return null;
122
84
  if (!entry.support || !["native", "react", "none"].includes(entry.support)) return null;
123
85
  return entry;
124
86
  }
125
87
  function cacheToolSupport(model, support, family) {
126
- const cache = readToolSupportCache();
88
+ const cache = _toolSupportCacheInMemory || readToolSupportCache();
127
89
  cache[model] = {
128
90
  support,
129
91
  testedAt: (/* @__PURE__ */ new Date()).toISOString(),
130
92
  family
131
93
  };
94
+ _toolSupportCacheInMemory = cache;
132
95
  writeToolSupportCache(cache);
133
96
  }
134
97
  function model_test_temp_default(pi) {
@@ -247,10 +210,8 @@ function model_test_temp_default(pi) {
247
210
  { role: "user", content: "Reply with exactly: PONG" }
248
211
  ], { maxTokens: 10, timeoutMs: 3e4 });
249
212
  const elapsedMs = Date.now() - start;
250
- const content = result.content.trim().toUpperCase();
251
213
  const reachable = true;
252
214
  const authValid = true;
253
- const hasPong = content.includes("PONG");
254
215
  return {
255
216
  pass: reachable && authValid,
256
217
  reachable,
@@ -259,7 +220,6 @@ function model_test_temp_default(pi) {
259
220
  elapsedMs
260
221
  };
261
222
  } catch (e) {
262
- const start = Date.now();
263
223
  let reachable = false;
264
224
  let authValid = false;
265
225
  const msg = e.message || "";
@@ -290,7 +250,6 @@ function model_test_temp_default(pi) {
290
250
  const prompt = `A snail climbs 3 feet up a wall each day, but slides back 2 feet each night. The wall is 10 feet tall. How many days does it take the snail to reach the top? Think step by step and give the final answer on its own line like: ANSWER: <number>`;
291
251
  try {
292
252
  let response, elapsedMs;
293
- let usedThinkingFallback = false;
294
253
  try {
295
254
  const result = await ollamaChat(model, [
296
255
  { role: "user", content: prompt }
@@ -309,7 +268,6 @@ function model_test_temp_default(pi) {
309
268
  ], { think: true });
310
269
  response = retry.response;
311
270
  elapsedMs = retry.elapsedMs;
312
- usedThinkingFallback = true;
313
271
  } else {
314
272
  throw firstErr;
315
273
  }
@@ -740,90 +698,111 @@ function model_test_temp_default(pi) {
740
698
  if (!content) {
741
699
  return { pass: false, score: "FAIL", toolCall: "empty response", thought: "", response: "", elapsedMs };
742
700
  }
743
- const THOUGHT_RE = /Thought:\s*(.*?)(?=Action:|Final Answer:|$)/is;
744
- const ACTION_RE = /Action:\s*[`"']?(\w+)[`"']?\s*\n?\s*Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:|Action:)|$)/is;
745
- const ACTION_RE_SAMELINE = /Action:\s*[`"']?(\w+)[`"']?\s+Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:)|$)/is;
746
- const ACTION_RE_LOOSE = /Action:\s*(.+?)\n\s*Action Input:\s*(.*?)(?=\n\s*(?:Observation:|Thought:|Final Answer:|Action:)|$)/is;
747
- const ACTION_RE_PAREN = /Action:\s*(\w+)\s*\(([^)]*)\)/i;
748
- let thought = "";
749
- const thoughtMatch = THOUGHT_RE.exec(content);
750
- if (thoughtMatch) thought = thoughtMatch[1].trim();
751
- let match = ACTION_RE.exec(content);
752
- if (!match) match = ACTION_RE_SAMELINE.exec(content);
753
- let looseMatch = false;
754
- if (!match) {
755
- const looseResult = ACTION_RE_LOOSE.exec(content);
756
- if (looseResult) {
757
- const candidate = looseResult[1].trim().replace(/[`"']/g, "");
758
- const isToolIdentifier = /^\w+$/.test(candidate) && (candidate.includes("_") || candidate.includes("-"));
759
- const isKnownTool = /^(get_weather|calculate)$/i.test(candidate);
760
- if (isToolIdentifier || isKnownTool) {
761
- match = looseResult;
762
- looseMatch = true;
763
- }
764
- }
765
- }
766
- let parenMatch = false;
767
- if (!match) match = ACTION_RE_PAREN.exec(content), parenMatch = true;
768
- if (match) {
769
- let toolName = match[1].trim().replace(/[`"']/g, "");
770
- if (looseMatch) {
771
- const actionText = toolName.toLowerCase();
772
- if (actionText.includes("get_weather")) toolName = "get_weather";
773
- else {
774
- const toolWords = actionText.match(/\b[a-z][a-z0-9]*(?:[_-][a-z0-9]+)+\b/gi) || [];
775
- if (toolWords.length > 0) toolName = toolWords[0];
776
- }
777
- }
778
- const rawArgs = parenMatch ? match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim() : match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
779
- let argsParsed = false;
780
- let argsStr = rawArgs;
781
- if (parenMatch && rawArgs && !rawArgs.startsWith("{")) {
782
- const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
783
- if (pairs) {
784
- const obj = {};
785
- for (const p of pairs) {
786
- const colonIdx = p.indexOf(":");
787
- const key = p.slice(0, colonIdx).trim();
788
- let val = p.slice(colonIdx + 1).trim();
789
- if (val.startsWith('"') && val.endsWith('"') || val.startsWith("'") && val.endsWith("'")) {
790
- val = val.slice(1, -1);
701
+ let parsedResult = null;
702
+ const sharedParser = pi._reactParser;
703
+ if (sharedParser?.ALL_DIALECT_PATTERNS) {
704
+ for (const dp of sharedParser.ALL_DIALECT_PATTERNS) {
705
+ const result2 = sharedParser.parseReactWithPatterns(content, dp, true);
706
+ if (result2) {
707
+ let toolName = result2.name;
708
+ let argsStr;
709
+ const rawArgs = result2.args ? JSON.stringify(result2.args) : "";
710
+ if (rawArgs && rawArgs !== "{}") {
711
+ argsStr = rawArgs;
712
+ } else if (result2.raw) {
713
+ const jsonStart = result2.raw.indexOf("{");
714
+ if (jsonStart !== -1) {
715
+ let depth = 0, jsonEnd = -1;
716
+ for (let i = jsonStart; i < result2.raw.length; i++) {
717
+ if (result2.raw[i] === "{") depth++;
718
+ else if (result2.raw[i] === "}") {
719
+ depth--;
720
+ if (depth === 0) {
721
+ jsonEnd = i;
722
+ break;
723
+ }
724
+ }
725
+ }
726
+ argsStr = jsonEnd !== -1 ? result2.raw.slice(jsonStart, jsonEnd + 1) : "";
727
+ } else {
728
+ argsStr = "";
791
729
  }
792
- obj[key] = val;
793
- }
794
- try {
795
- argsStr = JSON.stringify(obj);
796
- argsParsed = true;
797
- } catch {
730
+ } else {
731
+ argsStr = "";
798
732
  }
733
+ parsedResult = { name: toolName, args: argsStr, thought: result2.thought || "", dialect: result2.dialect };
734
+ break;
799
735
  }
800
736
  }
801
- if (!argsParsed) {
802
- const jsonStart = rawArgs.indexOf("{");
803
- if (jsonStart !== -1) {
804
- let depth = 0;
805
- let jsonEnd = -1;
806
- for (let i = jsonStart; i < rawArgs.length; i++) {
807
- if (rawArgs[i] === "{") depth++;
808
- else if (rawArgs[i] === "}") {
809
- depth--;
810
- if (depth === 0) {
811
- jsonEnd = i;
812
- break;
737
+ } else {
738
+ const dialectDefs = [
739
+ { name: "react", action: "Action:", input: "Action Input:" },
740
+ { name: "function", action: "Function:", input: "Function Input:" },
741
+ { name: "tool", action: "Tool:", input: "Tool Input:" },
742
+ { name: "call", action: "Call:", input: "Input:" }
743
+ ];
744
+ for (const dd of dialectDefs) {
745
+ const esc = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
746
+ const aT = esc(dd.action);
747
+ const iT = esc(dd.input);
748
+ const primaryRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s*\\n?\\s*${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
749
+ const sameRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s+${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
750
+ const parenRe = new RegExp(`${aT}\\s*(\\w+)\\s*\\(([^)]*)\\)`, "i");
751
+ let m = primaryRe.exec(content) || sameRe.exec(content);
752
+ let isParen = false;
753
+ if (!m) {
754
+ m = parenRe.exec(content);
755
+ isParen = true;
756
+ }
757
+ if (m) {
758
+ const toolName = m[1].trim().replace(/[`"']/g, "");
759
+ const rawArgs = m[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
760
+ let argsStr = "";
761
+ if (isParen && rawArgs && !rawArgs.startsWith("{")) {
762
+ const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
763
+ if (pairs) {
764
+ const obj = {};
765
+ for (const p of pairs) {
766
+ const ci = p.indexOf(":");
767
+ let v = p.slice(ci + 1).trim();
768
+ if (v.startsWith('"') && v.endsWith('"') || v.startsWith("'") && v.endsWith("'")) v = v.slice(1, -1);
769
+ obj[p.slice(0, ci).trim()] = v;
813
770
  }
771
+ argsStr = JSON.stringify(obj);
772
+ } else {
773
+ argsStr = rawArgs;
814
774
  }
815
- }
816
- if (jsonEnd !== -1) {
817
- const jsonStr = rawArgs.slice(jsonStart, jsonEnd + 1);
818
- try {
819
- JSON.parse(jsonStr);
820
- argsParsed = true;
821
- argsStr = jsonStr;
822
- } catch {
775
+ } else {
776
+ const js = rawArgs.indexOf("{");
777
+ if (js !== -1) {
778
+ let d = 0, je = -1;
779
+ for (let i = js; i < rawArgs.length; i++) {
780
+ if (rawArgs[i] === "{") d++;
781
+ else if (rawArgs[i] === "}") {
782
+ d--;
783
+ if (d === 0) {
784
+ je = i;
785
+ break;
786
+ }
787
+ }
788
+ }
789
+ argsStr = je !== -1 ? rawArgs.slice(js, je + 1) : rawArgs;
790
+ } else {
791
+ argsStr = rawArgs;
823
792
  }
824
793
  }
794
+ let thought = "";
795
+ const thoughtRe = /Thought:\s*(.*?)(?=Action:|Function:|Tool:|Call:|Final Answer:|$)/is;
796
+ const tm = thoughtRe.exec(content);
797
+ if (tm) thought = tm[1].trim();
798
+ parsedResult = { name: toolName, args: argsStr, thought, dialect: dd.name };
799
+ break;
825
800
  }
826
801
  }
802
+ }
803
+ if (parsedResult) {
804
+ let { name: toolName, args: argsStr, thought, dialect } = parsedResult;
805
+ const argsParsed = argsStr.length > 0;
827
806
  let score;
828
807
  const isWeatherTool = toolName.toLowerCase().includes("get_weather") || toolName.toLowerCase() === "get_weather";
829
808
  if (isWeatherTool && argsParsed) {
@@ -840,15 +819,25 @@ function model_test_temp_default(pi) {
840
819
  toolCall: `${toolName}(${argsStr})`,
841
820
  thought,
842
821
  response: content,
843
- elapsedMs
822
+ elapsedMs,
823
+ dialect: dialect || "react"
844
824
  };
845
825
  }
826
+ const altTagPatterns = [
827
+ /^\s*Function:\s*/im,
828
+ /^\s*Tool:\s*/im,
829
+ /^\s*Call:\s*/im,
830
+ /<function_call/i,
831
+ /<invoke\s/i
832
+ ];
833
+ const hasAltTag = altTagPatterns.some((p) => p.test(content));
846
834
  const hasToolMention = /\bget_weather\b/i.test(content) || /\btool\b/i.test(content);
847
- if (hasToolMention) {
835
+ if (hasAltTag || hasToolMention) {
836
+ const detail = hasAltTag ? "model used alternative tool-call tags but format was not parseable" : "model mentioned tool but not in ReAct format";
848
837
  return {
849
838
  pass: false,
850
839
  score: "FAIL",
851
- toolCall: "none \u2014 model mentioned tool but not in ReAct format",
840
+ toolCall: `none \u2014 ${detail}`,
852
841
  thought: "",
853
842
  response: content,
854
843
  elapsedMs
@@ -886,12 +875,30 @@ The JSON object must have exactly these 4 keys:
886
875
  parsed = JSON.parse(cleaned);
887
876
  } catch {
888
877
  const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
889
- const openBraces = (cleaned.match(/\{/g) || []).length;
890
- const closeBraces = (cleaned.match(/\}/g) || []).length;
891
- const openBrackets = (cleaned.match(/\[/g) || []).length;
892
- const closeBrackets = (cleaned.match(/\]/g) || []).length;
893
- if (openBraces > closeBraces || openBrackets > closeBrackets) {
894
- const repaired = cleaned + "}".repeat(Math.max(0, openBraces - closeBraces)) + "]".repeat(Math.max(0, openBrackets - closeBrackets));
878
+ let braceDepth = 0, bracketDepth = 0;
879
+ let inString = false, escapeNext = false;
880
+ for (let i = 0; i < cleaned.length; i++) {
881
+ const c = cleaned[i];
882
+ if (escapeNext) {
883
+ escapeNext = false;
884
+ continue;
885
+ }
886
+ if (c === "\\") {
887
+ if (inString) escapeNext = true;
888
+ continue;
889
+ }
890
+ if (c === '"') {
891
+ inString = !inString;
892
+ continue;
893
+ }
894
+ if (inString) continue;
895
+ if (c === "{") braceDepth++;
896
+ else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
897
+ else if (c === "[") bracketDepth++;
898
+ else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
899
+ }
900
+ if (braceDepth > 0 || bracketDepth > 0) {
901
+ const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
895
902
  try {
896
903
  parsed = JSON.parse(repaired);
897
904
  repairNote = " (repaired truncated JSON)";
@@ -945,12 +952,30 @@ The JSON object must have exactly these 4 keys:
945
952
  parsed = JSON.parse(cleaned);
946
953
  } catch {
947
954
  const cleaned = msg.replace(/```json?\s*/gi, "").replace(/```/g, "").trim();
948
- const openBraces = (cleaned.match(/\{/g) || []).length;
949
- const closeBraces = (cleaned.match(/\}/g) || []).length;
950
- const openBrackets = (cleaned.match(/\[/g) || []).length;
951
- const closeBrackets = (cleaned.match(/\]/g) || []).length;
952
- if (openBraces > closeBraces || openBrackets > closeBrackets) {
953
- const repaired = cleaned + "}".repeat(Math.max(0, openBraces - closeBraces)) + "]".repeat(Math.max(0, openBrackets - closeBrackets));
955
+ let braceDepth = 0, bracketDepth = 0;
956
+ let inString = false, escapeNext = false;
957
+ for (let i = 0; i < cleaned.length; i++) {
958
+ const c = cleaned[i];
959
+ if (escapeNext) {
960
+ escapeNext = false;
961
+ continue;
962
+ }
963
+ if (c === "\\") {
964
+ if (inString) escapeNext = true;
965
+ continue;
966
+ }
967
+ if (c === '"') {
968
+ inString = !inString;
969
+ continue;
970
+ }
971
+ if (inString) continue;
972
+ if (c === "{") braceDepth++;
973
+ else if (c === "}") braceDepth = Math.max(0, braceDepth - 1);
974
+ else if (c === "[") bracketDepth++;
975
+ else if (c === "]") bracketDepth = Math.max(0, bracketDepth - 1);
976
+ }
977
+ if (braceDepth > 0 || bracketDepth > 0) {
978
+ const repaired = cleaned + "}".repeat(braceDepth) + "]".repeat(bracketDepth);
954
979
  try {
955
980
  parsed = JSON.parse(repaired);
956
981
  repairNote = " (repaired truncated JSON)";
@@ -1071,25 +1096,40 @@ The JSON object must have exactly these 4 keys:
1071
1096
  };
1072
1097
  }
1073
1098
  const reactPatterns = [
1099
+ // Classic ReAct
1074
1100
  /^\s*Action:\s*/im,
1075
- // "Action: get_weather"
1076
1101
  /^\s*Action Input:\s*/im,
1077
- // "Action Input: {"location": "Tokyo"}"
1078
1102
  /^\s*Thought:\s*/im,
1079
- // "Thought: I need to look up the weather"
1080
1103
  /Action:\s*\w+/i,
1081
- // "Action: get_weather" anywhere
1082
- /Action Input:\s*\{/i
1083
- // "Action Input: {..." anywhere
1104
+ /Action Input:\s*\{/i,
1105
+ // Function dialect
1106
+ /^\s*Function:\s*/im,
1107
+ /^\s*Function Input:\s*/im,
1108
+ /Function:\s*\w+/i,
1109
+ // Tool dialect
1110
+ /^\s*Tool:\s*/im,
1111
+ /^\s*Tool Input:\s*/im,
1112
+ /Tool:\s*\w+/i,
1113
+ // Call dialect
1114
+ /^\s*Call:\s*/im,
1115
+ /^\s*Input:\s*/im,
1116
+ /Call:\s*\w+/i
1084
1117
  ];
1085
- const hasReActPattern = reactPatterns.some((p) => p.test(content));
1086
- if (hasReActPattern) {
1118
+ const matchedPatterns = [];
1119
+ for (const p of reactPatterns) {
1120
+ if (p.test(content)) matchedPatterns.push(p.source);
1121
+ }
1122
+ if (matchedPatterns.length > 0) {
1123
+ let dialectName = "react";
1124
+ if (/Function:/i.test(content)) dialectName = "function";
1125
+ else if (/Tool:/i.test(content)) dialectName = "tool";
1126
+ else if (/Call:/i.test(content)) dialectName = "call";
1087
1127
  const level2 = "react";
1088
1128
  cacheToolSupport(model, level2, family);
1089
1129
  return {
1090
1130
  level: level2,
1091
1131
  cached: false,
1092
- evidence: `ReAct format detected in text response`,
1132
+ evidence: `ReAct format detected (${dialectName} dialect) in text response`,
1093
1133
  elapsedMs
1094
1134
  };
1095
1135
  }
@@ -1141,14 +1181,8 @@ The JSON object must have exactly these 4 keys:
1141
1181
  return ctx.model?.id;
1142
1182
  }
1143
1183
  function updateModelsJsonReasoning(model, hasReasoning) {
1144
- const agentDir = path.join(os.homedir(), ".pi", "agent");
1145
- const modelsJsonPath = path.join(agentDir, "models.json");
1146
- if (!fs.existsSync(modelsJsonPath)) {
1147
- return { updated: false, message: "models.json not found \u2014 skipped" };
1148
- }
1149
1184
  try {
1150
- const raw = fs.readFileSync(modelsJsonPath, "utf-8");
1151
- const config = JSON.parse(raw);
1185
+ const config = readModelsJson();
1152
1186
  let updated = false;
1153
1187
  for (const provider of Object.values(config.providers || {})) {
1154
1188
  const models = provider.models || [];
@@ -1168,7 +1202,7 @@ The JSON object must have exactly these 4 keys:
1168
1202
  if (!updated) {
1169
1203
  return { updated: false, message: `${model} not found in models.json \u2014 skipped` };
1170
1204
  }
1171
- fs.writeFileSync(modelsJsonPath, JSON.stringify(config, null, 2) + "\n", "utf-8");
1205
+ writeModelsJson(config);
1172
1206
  const action = hasReasoning ? "set reasoning: true" : "set reasoning: false";
1173
1207
  return { updated: true, message: `\u2705 Updated ${model}: ${action}` };
1174
1208
  } catch (e) {
@@ -1176,7 +1210,7 @@ The JSON object must have exactly these 4 keys:
1176
1210
  }
1177
1211
  }
1178
1212
  const branding = [
1179
- ` \u26A1 Pi Model Benchmark v1.0.8`,
1213
+ ` \u26A1 Pi Model Benchmark v${EXTENSION_VERSION}`,
1180
1214
  ` Written by VTSTech`,
1181
1215
  ` GitHub: https://github.com/VTSTech`,
1182
1216
  ` Website: www.vts-tech.org`
@@ -1301,23 +1335,24 @@ The JSON object must have exactly these 4 keys:
1301
1335
  await rateLimitDelay(lines);
1302
1336
  const react = await testReactParsing(model);
1303
1337
  lines.push(info(`Time: ${msHuman(react.elapsedMs)}`));
1338
+ const dialectTag = react.dialect && react.dialect !== "react" ? ` [${react.dialect} dialect]` : "";
1304
1339
  if (react.score === "STRONG") {
1305
- lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
1340
+ lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
1306
1341
  if (react.thought) {
1307
1342
  lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
1308
1343
  }
1309
1344
  } else if (react.score === "MODERATE") {
1310
- lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
1345
+ lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
1311
1346
  if (react.thought) {
1312
1347
  lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
1313
1348
  }
1314
1349
  } else if (react.score === "WEAK") {
1315
- lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args`));
1350
+ lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args${dialectTag}`));
1316
1351
  if (react.thought) {
1317
1352
  lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
1318
1353
  }
1319
1354
  } else if (react.score === "FAIL") {
1320
- lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})`));
1355
+ lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})${dialectTag}`));
1321
1356
  if (react.response) {
1322
1357
  lines.push(info(`Response: ${sanitizeForReport(react.response)}`));
1323
1358
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vtstech/pi-model-test",
3
- "version": "1.0.8",
3
+ "version": "1.1.0",
4
4
  "description": "Model benchmark/testing extension for Pi Coding Agent",
5
5
  "main": "model-test.js",
6
6
  "keywords": ["pi-extensions"],
@@ -14,7 +14,7 @@
14
14
  "url": "https://github.com/VTSTech/pi-coding-agent"
15
15
  },
16
16
  "dependencies": {
17
- "@vtstech/pi-shared": "1.0.8"
17
+ "@vtstech/pi-shared": "1.1.0"
18
18
  },
19
19
  "peerDependencies": {
20
20
  "@mariozechner/pi-coding-agent": ">=0.66"