@vtstech/pi-model-test 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/model-test.js +176 -109
- package/package.json +2 -2
package/model-test.js
CHANGED
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
truncate,
|
|
13
13
|
sanitizeForReport
|
|
14
14
|
} from "@vtstech/pi-shared/format";
|
|
15
|
-
import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS } from "@vtstech/pi-shared/ollama";
|
|
15
|
+
import { getOllamaBaseUrl, detectModelFamily, readModelsJson, BUILTIN_PROVIDERS, fetchModelContextLength } from "@vtstech/pi-shared/ollama";
|
|
16
16
|
function detectProvider(ctx) {
|
|
17
17
|
const model = ctx.model;
|
|
18
18
|
if (!model) return { kind: "unknown", name: "none" };
|
|
@@ -55,15 +55,15 @@ function detectProvider(ctx) {
|
|
|
55
55
|
}
|
|
56
56
|
var CONFIG = {
|
|
57
57
|
// General API settings
|
|
58
|
-
DEFAULT_TIMEOUT_MS:
|
|
58
|
+
DEFAULT_TIMEOUT_MS: 999999,
|
|
59
59
|
// 8.3 minutes - default timeout for model responses
|
|
60
|
-
CONNECT_TIMEOUT_S:
|
|
60
|
+
CONNECT_TIMEOUT_S: 60,
|
|
61
61
|
// 30 seconds to establish connection
|
|
62
62
|
MAX_RETRIES: 1,
|
|
63
63
|
// Single retry for transient failures
|
|
64
|
-
RETRY_DELAY_MS:
|
|
64
|
+
RETRY_DELAY_MS: 1e4,
|
|
65
65
|
// 2 seconds between retries
|
|
66
|
-
EXEC_BUFFER_MS:
|
|
66
|
+
EXEC_BUFFER_MS: 8e3,
|
|
67
67
|
// Extra buffer for exec timeout over curl timeout
|
|
68
68
|
// Model generation settings
|
|
69
69
|
NUM_PREDICT: 1024,
|
|
@@ -73,28 +73,28 @@ var CONFIG = {
|
|
|
73
73
|
// Test-specific settings
|
|
74
74
|
MIN_THINKING_LENGTH: 10,
|
|
75
75
|
// Minimum chars to consider thinking tokens valid
|
|
76
|
-
TOOL_TEST_TIMEOUT_MS:
|
|
76
|
+
TOOL_TEST_TIMEOUT_MS: 999999,
|
|
77
77
|
// 90 seconds for tool usage tests
|
|
78
|
-
TOOL_TEST_MAX_TIME_S:
|
|
78
|
+
TOOL_TEST_MAX_TIME_S: 999999,
|
|
79
79
|
// Max curl time for tool tests (effectively unlimited)
|
|
80
|
-
TOOL_SUPPORT_TIMEOUT_MS:
|
|
80
|
+
TOOL_SUPPORT_TIMEOUT_MS: 999999,
|
|
81
81
|
// 2+ minutes for tool support detection
|
|
82
|
-
TOOL_SUPPORT_MAX_TIME_S:
|
|
82
|
+
TOOL_SUPPORT_MAX_TIME_S: 999999,
|
|
83
83
|
// Max curl time for tool support detection
|
|
84
84
|
// Metadata retrieval
|
|
85
85
|
TAGS_TIMEOUT_MS: 15e3,
|
|
86
86
|
// 15 seconds for /api/tags
|
|
87
|
-
TAGS_CONNECT_TIMEOUT_S:
|
|
87
|
+
TAGS_CONNECT_TIMEOUT_S: 30,
|
|
88
88
|
// 10 seconds connection timeout for tags
|
|
89
|
-
MODEL_INFO_TIMEOUT_MS:
|
|
89
|
+
MODEL_INFO_TIMEOUT_MS: 3e4,
|
|
90
90
|
// 10 seconds for model info lookup
|
|
91
91
|
// Provider API settings
|
|
92
|
-
PROVIDER_TIMEOUT_MS:
|
|
92
|
+
PROVIDER_TIMEOUT_MS: 999999,
|
|
93
93
|
// 2 minutes for cloud provider API calls
|
|
94
|
-
PROVIDER_TOOL_TIMEOUT_MS:
|
|
94
|
+
PROVIDER_TOOL_TIMEOUT_MS: 12e4,
|
|
95
95
|
// 60 seconds for tool usage tests on providers
|
|
96
96
|
// Rate limiting
|
|
97
|
-
TEST_DELAY_MS:
|
|
97
|
+
TEST_DELAY_MS: 1e4
|
|
98
98
|
// 30 seconds between tests to avoid rate limiting
|
|
99
99
|
};
|
|
100
100
|
var TOOL_SUPPORT_CACHE_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
|
|
@@ -740,90 +740,111 @@ function model_test_temp_default(pi) {
|
|
|
740
740
|
if (!content) {
|
|
741
741
|
return { pass: false, score: "FAIL", toolCall: "empty response", thought: "", response: "", elapsedMs };
|
|
742
742
|
}
|
|
743
|
-
|
|
744
|
-
const
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
const actionText = toolName.toLowerCase();
|
|
772
|
-
if (actionText.includes("get_weather")) toolName = "get_weather";
|
|
773
|
-
else {
|
|
774
|
-
const toolWords = actionText.match(/\b[a-z][a-z0-9]*(?:[_-][a-z0-9]+)+\b/gi) || [];
|
|
775
|
-
if (toolWords.length > 0) toolName = toolWords[0];
|
|
776
|
-
}
|
|
777
|
-
}
|
|
778
|
-
const rawArgs = parenMatch ? match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim() : match[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
|
|
779
|
-
let argsParsed = false;
|
|
780
|
-
let argsStr = rawArgs;
|
|
781
|
-
if (parenMatch && rawArgs && !rawArgs.startsWith("{")) {
|
|
782
|
-
const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
|
|
783
|
-
if (pairs) {
|
|
784
|
-
const obj = {};
|
|
785
|
-
for (const p of pairs) {
|
|
786
|
-
const colonIdx = p.indexOf(":");
|
|
787
|
-
const key = p.slice(0, colonIdx).trim();
|
|
788
|
-
let val = p.slice(colonIdx + 1).trim();
|
|
789
|
-
if (val.startsWith('"') && val.endsWith('"') || val.startsWith("'") && val.endsWith("'")) {
|
|
790
|
-
val = val.slice(1, -1);
|
|
743
|
+
let parsedResult = null;
|
|
744
|
+
const sharedParser = pi._reactParser;
|
|
745
|
+
if (sharedParser?.ALL_DIALECT_PATTERNS) {
|
|
746
|
+
for (const dp of sharedParser.ALL_DIALECT_PATTERNS) {
|
|
747
|
+
const result2 = sharedParser.parseReactWithPatterns(content, dp, true);
|
|
748
|
+
if (result2) {
|
|
749
|
+
let toolName = result2.name;
|
|
750
|
+
let argsStr;
|
|
751
|
+
const rawArgs = result2.args ? JSON.stringify(result2.args) : "";
|
|
752
|
+
if (rawArgs && rawArgs !== "{}") {
|
|
753
|
+
argsStr = rawArgs;
|
|
754
|
+
} else if (result2.raw) {
|
|
755
|
+
const jsonStart = result2.raw.indexOf("{");
|
|
756
|
+
if (jsonStart !== -1) {
|
|
757
|
+
let depth = 0, jsonEnd = -1;
|
|
758
|
+
for (let i = jsonStart; i < result2.raw.length; i++) {
|
|
759
|
+
if (result2.raw[i] === "{") depth++;
|
|
760
|
+
else if (result2.raw[i] === "}") {
|
|
761
|
+
depth--;
|
|
762
|
+
if (depth === 0) {
|
|
763
|
+
jsonEnd = i;
|
|
764
|
+
break;
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
argsStr = jsonEnd !== -1 ? result2.raw.slice(jsonStart, jsonEnd + 1) : "";
|
|
769
|
+
} else {
|
|
770
|
+
argsStr = "";
|
|
791
771
|
}
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
try {
|
|
795
|
-
argsStr = JSON.stringify(obj);
|
|
796
|
-
argsParsed = true;
|
|
797
|
-
} catch {
|
|
772
|
+
} else {
|
|
773
|
+
argsStr = "";
|
|
798
774
|
}
|
|
775
|
+
parsedResult = { name: toolName, args: argsStr, thought: result2.thought || "", dialect: result2.dialect };
|
|
776
|
+
break;
|
|
799
777
|
}
|
|
800
778
|
}
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
779
|
+
} else {
|
|
780
|
+
const dialectDefs = [
|
|
781
|
+
{ name: "react", action: "Action:", input: "Action Input:" },
|
|
782
|
+
{ name: "function", action: "Function:", input: "Function Input:" },
|
|
783
|
+
{ name: "tool", action: "Tool:", input: "Tool Input:" },
|
|
784
|
+
{ name: "call", action: "Call:", input: "Input:" }
|
|
785
|
+
];
|
|
786
|
+
for (const dd of dialectDefs) {
|
|
787
|
+
const esc = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
788
|
+
const aT = esc(dd.action);
|
|
789
|
+
const iT = esc(dd.input);
|
|
790
|
+
const primaryRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s*\\n?\\s*${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
|
|
791
|
+
const sameRe = new RegExp(`${aT}\\s*[\\x60"']?(\\w+)[\\x60"']?\\s+${iT}\\s*([\\s\\S]*?)(?=\\n\\s*(?:Observation:|Thought:|Final Answer:|${dd.action})|$)`, "is");
|
|
792
|
+
const parenRe = new RegExp(`${aT}\\s*(\\w+)\\s*\\(([^)]*)\\)`, "i");
|
|
793
|
+
let m = primaryRe.exec(content) || sameRe.exec(content);
|
|
794
|
+
let isParen = false;
|
|
795
|
+
if (!m) {
|
|
796
|
+
m = parenRe.exec(content);
|
|
797
|
+
isParen = true;
|
|
798
|
+
}
|
|
799
|
+
if (m) {
|
|
800
|
+
const toolName = m[1].trim().replace(/[`"']/g, "");
|
|
801
|
+
const rawArgs = m[2].trim().replace(/^```\w*\s*/gm, "").replace(/```\s*$/gm, "").trim();
|
|
802
|
+
let argsStr = "";
|
|
803
|
+
if (isParen && rawArgs && !rawArgs.startsWith("{")) {
|
|
804
|
+
const pairs = rawArgs.match(/(\w+)\s*:\s*("[^"]*"|'[^']*'|\S+)/g);
|
|
805
|
+
if (pairs) {
|
|
806
|
+
const obj = {};
|
|
807
|
+
for (const p of pairs) {
|
|
808
|
+
const ci = p.indexOf(":");
|
|
809
|
+
let v = p.slice(ci + 1).trim();
|
|
810
|
+
if (v.startsWith('"') && v.endsWith('"') || v.startsWith("'") && v.endsWith("'")) v = v.slice(1, -1);
|
|
811
|
+
obj[p.slice(0, ci).trim()] = v;
|
|
813
812
|
}
|
|
813
|
+
argsStr = JSON.stringify(obj);
|
|
814
|
+
} else {
|
|
815
|
+
argsStr = rawArgs;
|
|
814
816
|
}
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
817
|
+
} else {
|
|
818
|
+
const js = rawArgs.indexOf("{");
|
|
819
|
+
if (js !== -1) {
|
|
820
|
+
let d = 0, je = -1;
|
|
821
|
+
for (let i = js; i < rawArgs.length; i++) {
|
|
822
|
+
if (rawArgs[i] === "{") d++;
|
|
823
|
+
else if (rawArgs[i] === "}") {
|
|
824
|
+
d--;
|
|
825
|
+
if (d === 0) {
|
|
826
|
+
je = i;
|
|
827
|
+
break;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
argsStr = je !== -1 ? rawArgs.slice(js, je + 1) : rawArgs;
|
|
832
|
+
} else {
|
|
833
|
+
argsStr = rawArgs;
|
|
823
834
|
}
|
|
824
835
|
}
|
|
836
|
+
let thought = "";
|
|
837
|
+
const thoughtRe = /Thought:\s*(.*?)(?=Action:|Function:|Tool:|Call:|Final Answer:|$)/is;
|
|
838
|
+
const tm = thoughtRe.exec(content);
|
|
839
|
+
if (tm) thought = tm[1].trim();
|
|
840
|
+
parsedResult = { name: toolName, args: argsStr, thought, dialect: dd.name };
|
|
841
|
+
break;
|
|
825
842
|
}
|
|
826
843
|
}
|
|
844
|
+
}
|
|
845
|
+
if (parsedResult) {
|
|
846
|
+
let { name: toolName, args: argsStr, thought, dialect } = parsedResult;
|
|
847
|
+
const argsParsed = argsStr.length > 0;
|
|
827
848
|
let score;
|
|
828
849
|
const isWeatherTool = toolName.toLowerCase().includes("get_weather") || toolName.toLowerCase() === "get_weather";
|
|
829
850
|
if (isWeatherTool && argsParsed) {
|
|
@@ -840,15 +861,25 @@ function model_test_temp_default(pi) {
|
|
|
840
861
|
toolCall: `${toolName}(${argsStr})`,
|
|
841
862
|
thought,
|
|
842
863
|
response: content,
|
|
843
|
-
elapsedMs
|
|
864
|
+
elapsedMs,
|
|
865
|
+
dialect: dialect || "react"
|
|
844
866
|
};
|
|
845
867
|
}
|
|
868
|
+
const altTagPatterns = [
|
|
869
|
+
/^\s*Function:\s*/im,
|
|
870
|
+
/^\s*Tool:\s*/im,
|
|
871
|
+
/^\s*Call:\s*/im,
|
|
872
|
+
/<function_call/i,
|
|
873
|
+
/<invoke\s/i
|
|
874
|
+
];
|
|
875
|
+
const hasAltTag = altTagPatterns.some((p) => p.test(content));
|
|
846
876
|
const hasToolMention = /\bget_weather\b/i.test(content) || /\btool\b/i.test(content);
|
|
847
|
-
if (hasToolMention) {
|
|
877
|
+
if (hasAltTag || hasToolMention) {
|
|
878
|
+
const detail = hasAltTag ? "model used alternative tool-call tags but format was not parseable" : "model mentioned tool but not in ReAct format";
|
|
848
879
|
return {
|
|
849
880
|
pass: false,
|
|
850
881
|
score: "FAIL",
|
|
851
|
-
toolCall:
|
|
882
|
+
toolCall: `none \u2014 ${detail}`,
|
|
852
883
|
thought: "",
|
|
853
884
|
response: content,
|
|
854
885
|
elapsedMs
|
|
@@ -1071,25 +1102,40 @@ The JSON object must have exactly these 4 keys:
|
|
|
1071
1102
|
};
|
|
1072
1103
|
}
|
|
1073
1104
|
const reactPatterns = [
|
|
1105
|
+
// Classic ReAct
|
|
1074
1106
|
/^\s*Action:\s*/im,
|
|
1075
|
-
// "Action: get_weather"
|
|
1076
1107
|
/^\s*Action Input:\s*/im,
|
|
1077
|
-
// "Action Input: {"location": "Tokyo"}"
|
|
1078
1108
|
/^\s*Thought:\s*/im,
|
|
1079
|
-
// "Thought: I need to look up the weather"
|
|
1080
1109
|
/Action:\s*\w+/i,
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1110
|
+
/Action Input:\s*\{/i,
|
|
1111
|
+
// Function dialect
|
|
1112
|
+
/^\s*Function:\s*/im,
|
|
1113
|
+
/^\s*Function Input:\s*/im,
|
|
1114
|
+
/Function:\s*\w+/i,
|
|
1115
|
+
// Tool dialect
|
|
1116
|
+
/^\s*Tool:\s*/im,
|
|
1117
|
+
/^\s*Tool Input:\s*/im,
|
|
1118
|
+
/Tool:\s*\w+/i,
|
|
1119
|
+
// Call dialect
|
|
1120
|
+
/^\s*Call:\s*/im,
|
|
1121
|
+
/^\s*Input:\s*/im,
|
|
1122
|
+
/Call:\s*\w+/i
|
|
1084
1123
|
];
|
|
1085
|
-
const
|
|
1086
|
-
|
|
1124
|
+
const matchedPatterns = [];
|
|
1125
|
+
for (const p of reactPatterns) {
|
|
1126
|
+
if (p.test(content)) matchedPatterns.push(p.source);
|
|
1127
|
+
}
|
|
1128
|
+
if (matchedPatterns.length > 0) {
|
|
1129
|
+
let dialectName = "react";
|
|
1130
|
+
if (/Function:/i.test(content)) dialectName = "function";
|
|
1131
|
+
else if (/Tool:/i.test(content)) dialectName = "tool";
|
|
1132
|
+
else if (/Call:/i.test(content)) dialectName = "call";
|
|
1087
1133
|
const level2 = "react";
|
|
1088
1134
|
cacheToolSupport(model, level2, family);
|
|
1089
1135
|
return {
|
|
1090
1136
|
level: level2,
|
|
1091
1137
|
cached: false,
|
|
1092
|
-
evidence: `ReAct format detected in text response`,
|
|
1138
|
+
evidence: `ReAct format detected (${dialectName} dialect) in text response`,
|
|
1093
1139
|
elapsedMs
|
|
1094
1140
|
};
|
|
1095
1141
|
}
|
|
@@ -1176,17 +1222,32 @@ The JSON object must have exactly these 4 keys:
|
|
|
1176
1222
|
}
|
|
1177
1223
|
}
|
|
1178
1224
|
const branding = [
|
|
1179
|
-
` \u26A1 Pi Model Benchmark v1.0.
|
|
1225
|
+
` \u26A1 Pi Model Benchmark v1.0.9`,
|
|
1180
1226
|
` Written by VTSTech`,
|
|
1181
1227
|
` GitHub: https://github.com/VTSTech`,
|
|
1182
1228
|
` Website: www.vts-tech.org`
|
|
1183
1229
|
].join("\n");
|
|
1184
|
-
async function testModelOllama(model) {
|
|
1230
|
+
async function testModelOllama(model, providerInfo, ctx) {
|
|
1185
1231
|
const lines = [];
|
|
1186
1232
|
const totalStart = Date.now();
|
|
1187
1233
|
lines.push(branding);
|
|
1188
1234
|
lines.push(section(`MODEL: ${model}`));
|
|
1189
1235
|
lines.push(info("Provider: Ollama (local/remote)"));
|
|
1236
|
+
const modelsJson = readModelsJson();
|
|
1237
|
+
let apiMode = "ollama";
|
|
1238
|
+
const providerName = ctx?.model?.provider || providerInfo?.name || "";
|
|
1239
|
+
if (providerName && modelsJson) {
|
|
1240
|
+
const providerCfg = (modelsJson.providers || {})[providerName];
|
|
1241
|
+
if (providerCfg) {
|
|
1242
|
+
apiMode = providerCfg.api || "ollama";
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
lines.push(info(`API: ${apiMode}`));
|
|
1246
|
+
const nativeContext = await fetchModelContextLength(OLLAMA_BASE, model);
|
|
1247
|
+
if (nativeContext !== void 0) {
|
|
1248
|
+
const ctxStr = nativeContext >= 1e3 ? `${(nativeContext / 1e3).toFixed(1)}k` : String(nativeContext);
|
|
1249
|
+
lines.push(info(`Context: ${ctxStr} tokens (native max)`));
|
|
1250
|
+
}
|
|
1190
1251
|
let modelSize = "unknown";
|
|
1191
1252
|
let modelFamily = "unknown";
|
|
1192
1253
|
let modelParams = "unknown";
|
|
@@ -1286,23 +1347,24 @@ The JSON object must have exactly these 4 keys:
|
|
|
1286
1347
|
await rateLimitDelay(lines);
|
|
1287
1348
|
const react = await testReactParsing(model);
|
|
1288
1349
|
lines.push(info(`Time: ${msHuman(react.elapsedMs)}`));
|
|
1350
|
+
const dialectTag = react.dialect && react.dialect !== "react" ? ` [${react.dialect} dialect]` : "";
|
|
1289
1351
|
if (react.score === "STRONG") {
|
|
1290
|
-
lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
|
|
1352
|
+
lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
|
|
1291
1353
|
if (react.thought) {
|
|
1292
1354
|
lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
|
|
1293
1355
|
}
|
|
1294
1356
|
} else if (react.score === "MODERATE") {
|
|
1295
|
-
lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
|
|
1357
|
+
lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})${dialectTag}`));
|
|
1296
1358
|
if (react.thought) {
|
|
1297
1359
|
lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
|
|
1298
1360
|
}
|
|
1299
1361
|
} else if (react.score === "WEAK") {
|
|
1300
|
-
lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args`));
|
|
1362
|
+
lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args${dialectTag}`));
|
|
1301
1363
|
if (react.thought) {
|
|
1302
1364
|
lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
|
|
1303
1365
|
}
|
|
1304
1366
|
} else if (react.score === "FAIL") {
|
|
1305
|
-
lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})`));
|
|
1367
|
+
lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})${dialectTag}`));
|
|
1306
1368
|
if (react.response) {
|
|
1307
1369
|
lines.push(info(`Response: ${sanitizeForReport(react.response)}`));
|
|
1308
1370
|
}
|
|
@@ -1387,7 +1449,7 @@ The JSON object must have exactly these 4 keys:
|
|
|
1387
1449
|
}
|
|
1388
1450
|
return lines.join("\n");
|
|
1389
1451
|
}
|
|
1390
|
-
async function testModelProvider(providerInfo, model) {
|
|
1452
|
+
async function testModelProvider(providerInfo, model, ctx) {
|
|
1391
1453
|
const lines = [];
|
|
1392
1454
|
const totalStart = Date.now();
|
|
1393
1455
|
lines.push(branding);
|
|
@@ -1400,6 +1462,11 @@ The JSON object must have exactly these 4 keys:
|
|
|
1400
1462
|
} else {
|
|
1401
1463
|
lines.push(warn(`API Key: NOT SET (${providerInfo.envKey || "env var not found"})`));
|
|
1402
1464
|
}
|
|
1465
|
+
const contextWindow = ctx?.model?.contextWindow ?? null;
|
|
1466
|
+
if (contextWindow !== null) {
|
|
1467
|
+
const ctxStr = contextWindow >= 1e3 ? `${(contextWindow / 1e3).toFixed(1)}k` : String(contextWindow);
|
|
1468
|
+
lines.push(info(`Context: ${ctxStr} tokens`));
|
|
1469
|
+
}
|
|
1403
1470
|
lines.push(section("CONNECTIVITY TEST"));
|
|
1404
1471
|
lines.push(info("Sending minimal request to verify API reachability and key validity..."));
|
|
1405
1472
|
const connectivity = await testConnectivity(providerInfo, model);
|
|
@@ -1520,9 +1587,9 @@ The JSON object must have exactly these 4 keys:
|
|
|
1520
1587
|
async function testModel(model, ctx) {
|
|
1521
1588
|
const providerInfo = ctx ? detectProvider(ctx) : { kind: "ollama", name: "ollama" };
|
|
1522
1589
|
if (providerInfo.kind === "ollama") {
|
|
1523
|
-
return testModelOllama(model);
|
|
1590
|
+
return testModelOllama(model, providerInfo, ctx);
|
|
1524
1591
|
} else if (providerInfo.kind === "builtin") {
|
|
1525
|
-
return testModelProvider(providerInfo, model);
|
|
1592
|
+
return testModelProvider(providerInfo, model, ctx);
|
|
1526
1593
|
} else {
|
|
1527
1594
|
return testModelOllama(model);
|
|
1528
1595
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vtstech/pi-model-test",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.9",
|
|
4
4
|
"description": "Model benchmark/testing extension for Pi Coding Agent",
|
|
5
5
|
"main": "model-test.js",
|
|
6
6
|
"keywords": ["pi-extensions"],
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"url": "https://github.com/VTSTech/pi-coding-agent"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
-
"@vtstech/pi-shared": "1.0.
|
|
17
|
+
"@vtstech/pi-shared": "1.0.9"
|
|
18
18
|
},
|
|
19
19
|
"peerDependencies": {
|
|
20
20
|
"@mariozechner/pi-coding-agent": ">=0.66"
|