@vtstech/pi-shared 1.1.6 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/config-io.js +49 -0
- package/errors.js +56 -0
- package/format.js +1 -0
- package/model-test-utils.js +125 -0
- package/ollama.js +113 -25
- package/package.json +1 -1
- package/provider-sync.js +18 -0
- package/react-parser.js +18 -0
- package/security.js +128 -9
- package/test-report.js +89 -0
package/README.md
CHANGED
|
@@ -10,10 +10,10 @@ This is an internal dependency — you don't need to install it directly. It's p
|
|
|
10
10
|
|--------|-------------|
|
|
11
11
|
| `debug` | Conditional debug logging via `PI_EXTENSIONS_DEBUG=1` env var — `debugLog(module, message, ...args)` |
|
|
12
12
|
| `format` | Section headers, indicators (ok/fail/warn/info), numeric formatters (bytes, ms, percentages), string utilities |
|
|
13
|
-
| `model-test-utils` | Shared test utilities — `ChatFn` abstraction, unified test functions
|
|
14
|
-
| `ollama` | Ollama base URL resolution, models.json I/O with TTL cache, model family detection, provider detection, Ollama API helpers |
|
|
13
|
+
| `model-test-utils` | Shared test utilities — `ChatFn` abstraction, unified test functions, scoring helpers, tool support cache, user config (`~/.pi/agent/model-test-config.json`), test history with regression detection (`~/.pi/agent/cache/model-test-history.json`) |
|
|
14
|
+
| `ollama` | Ollama base URL resolution, models.json I/O with TTL cache, async write mutex (`acquireModelsJsonLock`, `readModifyWriteModelsJson`), exponential backoff retry (`withRetry`), model family detection, provider detection, Ollama API helpers |
|
|
15
15
|
| `react-parser` | Multi-dialect ReAct text parser — 4 dialects (react, function, tool, call), `parseReact()`, `detectReactDialect()`, `fuzzyMatchToolName()` |
|
|
16
|
-
| `security` | Security mode toggle (`basic`/`max`), partitioned command blocklist (41 CRITICAL + 25 EXTENDED), mode-aware SSRF (
|
|
16
|
+
| `security` | Security mode toggle (`basic`/`max`), partitioned command blocklist (41 CRITICAL + 25 EXTENDED) with full-word scanning, mode-aware SSRF (22 + 7 patterns), path validation with symlink dereference, URL validation, command sanitization, DNS rebinding protection (`resolveAndCheckHostname`), buffered audit logging with mode tracking (`AUDIT_LOG_PATH` exported) |
|
|
17
17
|
| `types` | Type definitions (ToolSupportLevel, AuditEntry, etc.) |
|
|
18
18
|
|
|
19
19
|
## Usage
|
package/config-io.js
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// shared/config-io.ts
|
|
2
|
+
import * as fs from "node:fs";
|
|
3
|
+
import * as path from "node:path";
|
|
4
|
+
import os from "node:os";
|
|
5
|
+
var PI_AGENT_DIR = path.join(os.homedir(), ".pi", "agent");
|
|
6
|
+
function readJsonConfig(filePath, defaultValue = {}) {
|
|
7
|
+
try {
|
|
8
|
+
if (fs.existsSync(filePath)) {
|
|
9
|
+
return JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
10
|
+
}
|
|
11
|
+
} catch (err) {
|
|
12
|
+
if (typeof process !== "undefined" && process.env.PI_EXTENSIONS_DEBUG === "1") {
|
|
13
|
+
console.debug(`[config-io] Failed to read config: ${filePath}`, err);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return defaultValue;
|
|
17
|
+
}
|
|
18
|
+
function writeJsonConfig(filePath, data) {
|
|
19
|
+
const dir = path.dirname(filePath);
|
|
20
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
21
|
+
const content = JSON.stringify(data, null, 2) + "\n";
|
|
22
|
+
const tmpPath = filePath + ".tmp";
|
|
23
|
+
try {
|
|
24
|
+
fs.writeFileSync(tmpPath, content, "utf-8");
|
|
25
|
+
fs.renameSync(tmpPath, filePath);
|
|
26
|
+
} catch {
|
|
27
|
+
fs.writeFileSync(filePath, content, "utf-8");
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
var SETTINGS_PATH = path.join(PI_AGENT_DIR, "settings.json");
|
|
31
|
+
var SECURITY_PATH = path.join(PI_AGENT_DIR, "security.json");
|
|
32
|
+
var REACT_MODE_PATH = path.join(PI_AGENT_DIR, "react-mode.json");
|
|
33
|
+
var MODEL_TEST_CONFIG_PATH = path.join(PI_AGENT_DIR, "model-test-config.json");
|
|
34
|
+
function readSettings() {
|
|
35
|
+
return readJsonConfig(SETTINGS_PATH);
|
|
36
|
+
}
|
|
37
|
+
function writeSettings(data) {
|
|
38
|
+
writeJsonConfig(SETTINGS_PATH, data);
|
|
39
|
+
}
|
|
40
|
+
export {
|
|
41
|
+
MODEL_TEST_CONFIG_PATH,
|
|
42
|
+
REACT_MODE_PATH,
|
|
43
|
+
SECURITY_PATH,
|
|
44
|
+
SETTINGS_PATH,
|
|
45
|
+
readJsonConfig,
|
|
46
|
+
readSettings,
|
|
47
|
+
writeJsonConfig,
|
|
48
|
+
writeSettings
|
|
49
|
+
};
|
package/errors.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
|
|
3
|
+
var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
|
|
4
|
+
|
|
5
|
+
// shared/errors.ts
|
|
6
|
+
var ExtensionError = class extends Error {
|
|
7
|
+
constructor(message, code) {
|
|
8
|
+
super(message);
|
|
9
|
+
__publicField(this, "code", code);
|
|
10
|
+
this.name = "ExtensionError";
|
|
11
|
+
}
|
|
12
|
+
};
|
|
13
|
+
var ConfigError = class extends ExtensionError {
|
|
14
|
+
constructor(message) {
|
|
15
|
+
super(message, "CONFIG_ERROR");
|
|
16
|
+
this.name = "ConfigError";
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
var ApiError = class extends ExtensionError {
|
|
20
|
+
constructor(message, statusCode, url) {
|
|
21
|
+
super(message, "API_ERROR");
|
|
22
|
+
__publicField(this, "statusCode", statusCode);
|
|
23
|
+
__publicField(this, "url", url);
|
|
24
|
+
this.name = "ApiError";
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
var TimeoutError = class extends ExtensionError {
|
|
28
|
+
constructor(message, timeoutMs) {
|
|
29
|
+
super(message, "TIMEOUT");
|
|
30
|
+
__publicField(this, "timeoutMs", timeoutMs);
|
|
31
|
+
this.name = "TimeoutError";
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
var SecurityError = class extends ExtensionError {
|
|
35
|
+
constructor(message, rule, detail) {
|
|
36
|
+
super(message, "SECURITY_VIOLATION");
|
|
37
|
+
__publicField(this, "rule", rule);
|
|
38
|
+
__publicField(this, "detail", detail);
|
|
39
|
+
this.name = "SecurityError";
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
var ToolError = class extends ExtensionError {
|
|
43
|
+
constructor(message, toolName) {
|
|
44
|
+
super(message, "TOOL_ERROR");
|
|
45
|
+
__publicField(this, "toolName", toolName);
|
|
46
|
+
this.name = "ToolError";
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
export {
|
|
50
|
+
ApiError,
|
|
51
|
+
ConfigError,
|
|
52
|
+
ExtensionError,
|
|
53
|
+
SecurityError,
|
|
54
|
+
TimeoutError,
|
|
55
|
+
ToolError
|
|
56
|
+
};
|
package/format.js
CHANGED
|
@@ -32,6 +32,7 @@ function msHuman(ms) {
|
|
|
32
32
|
}
|
|
33
33
|
function fmtBytes(b) {
|
|
34
34
|
if (b === 0) return "0B";
|
|
35
|
+
if (b < 1024) return `${b}B`;
|
|
35
36
|
if (b >= 1073741824) return `${(b / 1073741824).toFixed(1)}G`;
|
|
36
37
|
if (b >= 1048576) return `${(b / 1048576).toFixed(0)}M`;
|
|
37
38
|
return `${(b / 1024).toFixed(0)}K`;
|
package/model-test-utils.js
CHANGED
|
@@ -34,10 +34,42 @@ var CONFIG = {
|
|
|
34
34
|
// Effectively unlimited for cloud provider API calls
|
|
35
35
|
PROVIDER_TOOL_TIMEOUT_MS: 12e4,
|
|
36
36
|
// 120 seconds for tool usage tests on providers
|
|
37
|
+
// Context length fetching
|
|
38
|
+
CONTEXT_BATCH_SIZE: 3,
|
|
39
|
+
// Concurrent requests when fetching model context lengths
|
|
37
40
|
// Rate limiting
|
|
38
41
|
TEST_DELAY_MS: 1e4
|
|
39
42
|
// 10 seconds between tests to avoid rate limiting
|
|
40
43
|
};
|
|
44
|
+
var TEST_CONFIG_DIR = path.join(os.homedir(), ".pi", "agent");
|
|
45
|
+
var TEST_CONFIG_PATH = path.join(TEST_CONFIG_DIR, "model-test-config.json");
|
|
46
|
+
function readTestConfig() {
|
|
47
|
+
try {
|
|
48
|
+
if (fs.existsSync(TEST_CONFIG_PATH)) {
|
|
49
|
+
const raw = fs.readFileSync(TEST_CONFIG_PATH, "utf-8");
|
|
50
|
+
return JSON.parse(raw);
|
|
51
|
+
}
|
|
52
|
+
} catch {
|
|
53
|
+
}
|
|
54
|
+
return {};
|
|
55
|
+
}
|
|
56
|
+
function getEffectiveConfig() {
|
|
57
|
+
const userConfig = readTestConfig();
|
|
58
|
+
return {
|
|
59
|
+
...CONFIG,
|
|
60
|
+
DEFAULT_TIMEOUT_MS: userConfig.defaultTimeoutMs ?? CONFIG.DEFAULT_TIMEOUT_MS,
|
|
61
|
+
CONNECT_TIMEOUT_S: userConfig.connectTimeoutS ?? CONFIG.CONNECT_TIMEOUT_S,
|
|
62
|
+
MAX_RETRIES: userConfig.maxRetries ?? CONFIG.MAX_RETRIES,
|
|
63
|
+
RETRY_DELAY_MS: userConfig.retryDelayMs ?? CONFIG.RETRY_DELAY_MS,
|
|
64
|
+
TEST_DELAY_MS: userConfig.testDelayMs ?? CONFIG.TEST_DELAY_MS,
|
|
65
|
+
TOOL_TEST_TIMEOUT_MS: userConfig.toolTestTimeoutMs ?? CONFIG.TOOL_TEST_TIMEOUT_MS,
|
|
66
|
+
PROVIDER_TIMEOUT_MS: userConfig.providerTimeoutMs ?? CONFIG.PROVIDER_TIMEOUT_MS,
|
|
67
|
+
PROVIDER_TOOL_TIMEOUT_MS: userConfig.providerToolTimeoutMs ?? CONFIG.PROVIDER_TOOL_TIMEOUT_MS,
|
|
68
|
+
CONTEXT_BATCH_SIZE: userConfig.contextBatchSize ?? CONFIG.CONTEXT_BATCH_SIZE,
|
|
69
|
+
NUM_PREDICT: userConfig.numPredict ?? CONFIG.NUM_PREDICT,
|
|
70
|
+
TEMPERATURE: userConfig.temperature ?? CONFIG.TEMPERATURE
|
|
71
|
+
};
|
|
72
|
+
}
|
|
41
73
|
var WEATHER_TOOL_DEFINITION = {
|
|
42
74
|
type: "function",
|
|
43
75
|
function: {
|
|
@@ -150,6 +182,91 @@ function cacheToolSupport(model, support, family) {
|
|
|
150
182
|
_toolSupportCacheInMemory = cache;
|
|
151
183
|
writeToolSupportCache(cache);
|
|
152
184
|
}
|
|
185
|
+
var TEST_HISTORY_DIR = path.join(os.homedir(), ".pi", "agent", "cache");
|
|
186
|
+
var TEST_HISTORY_PATH = path.join(TEST_HISTORY_DIR, "model-test-history.json");
|
|
187
|
+
var MAX_HISTORY_PER_MODEL = 50;
|
|
188
|
+
var MAX_HISTORY_TOTAL = 500;
|
|
189
|
+
function readTestHistory() {
|
|
190
|
+
try {
|
|
191
|
+
if (fs.existsSync(TEST_HISTORY_PATH)) {
|
|
192
|
+
const raw = fs.readFileSync(TEST_HISTORY_PATH, "utf-8");
|
|
193
|
+
return JSON.parse(raw);
|
|
194
|
+
}
|
|
195
|
+
} catch {
|
|
196
|
+
}
|
|
197
|
+
return {};
|
|
198
|
+
}
|
|
199
|
+
function writeTestHistory(history) {
|
|
200
|
+
for (const model of Object.keys(history)) {
|
|
201
|
+
if (history[model].length > MAX_HISTORY_PER_MODEL) {
|
|
202
|
+
history[model] = history[model].slice(-MAX_HISTORY_PER_MODEL);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
let totalEntries = 0;
|
|
206
|
+
const modelsByRecency = Object.entries(history).map(([model, entries]) => ({
|
|
207
|
+
model,
|
|
208
|
+
entries,
|
|
209
|
+
lastEntry: entries[entries.length - 1]?.timestamp || ""
|
|
210
|
+
})).sort((a, b) => b.lastEntry.localeCompare(a.lastEntry));
|
|
211
|
+
const trimmedHistory = {};
|
|
212
|
+
for (const { model, entries } of modelsByRecency) {
|
|
213
|
+
if (totalEntries + entries.length > MAX_HISTORY_TOTAL) {
|
|
214
|
+
const remaining = MAX_HISTORY_TOTAL - totalEntries;
|
|
215
|
+
if (remaining <= 0) break;
|
|
216
|
+
trimmedHistory[model] = entries.slice(-remaining);
|
|
217
|
+
totalEntries += remaining;
|
|
218
|
+
} else {
|
|
219
|
+
trimmedHistory[model] = entries;
|
|
220
|
+
totalEntries += entries.length;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (!fs.existsSync(TEST_HISTORY_DIR)) {
|
|
224
|
+
fs.mkdirSync(TEST_HISTORY_DIR, { recursive: true });
|
|
225
|
+
}
|
|
226
|
+
fs.writeFileSync(TEST_HISTORY_PATH, JSON.stringify(trimmedHistory, null, 2) + "\n", "utf-8");
|
|
227
|
+
}
|
|
228
|
+
function appendTestHistory(entry) {
|
|
229
|
+
const history = readTestHistory();
|
|
230
|
+
if (!history[entry.model]) {
|
|
231
|
+
history[entry.model] = [];
|
|
232
|
+
}
|
|
233
|
+
history[entry.model].push(entry);
|
|
234
|
+
writeTestHistory(history);
|
|
235
|
+
}
|
|
236
|
+
function getModelHistory(model, limit = 10) {
|
|
237
|
+
const history = readTestHistory();
|
|
238
|
+
const entries = history[model] || [];
|
|
239
|
+
return entries.slice(-limit);
|
|
240
|
+
}
|
|
241
|
+
function detectRegression(model, current) {
|
|
242
|
+
const history = readTestHistory();
|
|
243
|
+
const entries = history[model] || [];
|
|
244
|
+
if (entries.length < 2) return [];
|
|
245
|
+
const previous = entries[entries.length - 2];
|
|
246
|
+
const regressions = [];
|
|
247
|
+
const scoreOrder = ["STRONG", "MODERATE", "WEAK", "FAIL", "ERROR", "NO", "YES"];
|
|
248
|
+
const scoreRank = (s) => {
|
|
249
|
+
const idx = scoreOrder.indexOf(s);
|
|
250
|
+
return idx >= 0 ? idx : 99;
|
|
251
|
+
};
|
|
252
|
+
if (scoreRank(current.tests.reasoning.score) > scoreRank(previous.tests.reasoning.score)) {
|
|
253
|
+
regressions.push({ test: "Reasoning", previous: previous.tests.reasoning.score, current: current.tests.reasoning.score });
|
|
254
|
+
}
|
|
255
|
+
if (scoreRank(current.tests.toolUsage.score) > scoreRank(previous.tests.toolUsage.score)) {
|
|
256
|
+
regressions.push({ test: "Tool Usage", previous: previous.tests.toolUsage.score, current: current.tests.toolUsage.score });
|
|
257
|
+
}
|
|
258
|
+
if (scoreRank(current.tests.reactParsing.score) > scoreRank(previous.tests.reactParsing.score)) {
|
|
259
|
+
regressions.push({ test: "ReAct Parsing", previous: previous.tests.reactParsing.score, current: current.tests.reactParsing.score });
|
|
260
|
+
}
|
|
261
|
+
if (scoreRank(current.tests.instructionFollowing.score) > scoreRank(previous.tests.instructionFollowing.score)) {
|
|
262
|
+
regressions.push({ test: "Instructions", previous: previous.tests.instructionFollowing.score, current: current.tests.instructionFollowing.score });
|
|
263
|
+
}
|
|
264
|
+
const supportRank = (s) => s === "native" ? 0 : s === "react" ? 1 : 2;
|
|
265
|
+
if (supportRank(current.tests.toolSupport.level) > supportRank(previous.tests.toolSupport.level)) {
|
|
266
|
+
regressions.push({ test: "Tool Support", previous: previous.tests.toolSupport.level, current: current.tests.toolSupport.level });
|
|
267
|
+
}
|
|
268
|
+
return regressions;
|
|
269
|
+
}
|
|
153
270
|
var REASONING_PROMPT = `A snail climbs 3 feet up a wall each day, but slides back 2 feet each night. The wall is 10 feet tall. How many days does it take the snail to reach the top? Think step by step and give the final answer on its own line like: ANSWER: <number>`;
|
|
154
271
|
var TOOL_SYSTEM_PROMPT = "You are a helpful assistant. Use the available tools when needed.";
|
|
155
272
|
var TOOL_USER_PROMPT = "What's the weather like in Paris right now?";
|
|
@@ -309,11 +426,18 @@ The JSON object must have exactly these 4 keys:
|
|
|
309
426
|
}
|
|
310
427
|
export {
|
|
311
428
|
CONFIG,
|
|
429
|
+
TEST_CONFIG_PATH,
|
|
312
430
|
TOOL_SUPPORT_CACHE_PATH,
|
|
313
431
|
WEATHER_TOOL_DEFINITION,
|
|
432
|
+
appendTestHistory,
|
|
314
433
|
cacheToolSupport,
|
|
434
|
+
detectRegression,
|
|
315
435
|
getCachedToolSupport,
|
|
436
|
+
getEffectiveConfig,
|
|
437
|
+
getModelHistory,
|
|
316
438
|
parseTextToolCall,
|
|
439
|
+
readTestConfig,
|
|
440
|
+
readTestHistory,
|
|
317
441
|
readToolSupportCache,
|
|
318
442
|
scoreNativeToolCall,
|
|
319
443
|
scoreReasoning,
|
|
@@ -321,5 +445,6 @@ export {
|
|
|
321
445
|
testInstructionFollowingUnified,
|
|
322
446
|
testReasoningUnified,
|
|
323
447
|
testToolUsageUnified,
|
|
448
|
+
writeTestHistory,
|
|
324
449
|
writeToolSupportCache
|
|
325
450
|
};
|
package/ollama.js
CHANGED
|
@@ -12,7 +12,7 @@ function debugLog(module, message, ...args) {
|
|
|
12
12
|
}
|
|
13
13
|
|
|
14
14
|
// shared/ollama.ts
|
|
15
|
-
var EXTENSION_VERSION = "1.1.
|
|
15
|
+
var EXTENSION_VERSION = "1.1.8";
|
|
16
16
|
var MODELS_JSON_PATH = path.join(os.homedir(), ".pi", "agent", "models.json");
|
|
17
17
|
var _modelsJsonCache = null;
|
|
18
18
|
var _ollamaBaseUrlCache = null;
|
|
@@ -71,35 +71,120 @@ function writeModelsJson(data) {
|
|
|
71
71
|
_modelsJsonCache = null;
|
|
72
72
|
_ollamaBaseUrlCache = null;
|
|
73
73
|
}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
var _modelsJsonLock = null;
|
|
75
|
+
async function acquireModelsJsonLock() {
|
|
76
|
+
while (_modelsJsonLock) {
|
|
77
|
+
await _modelsJsonLock;
|
|
78
|
+
}
|
|
79
|
+
let releaseLock;
|
|
80
|
+
_modelsJsonLock = new Promise((resolve) => {
|
|
81
|
+
releaseLock = resolve;
|
|
77
82
|
});
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
83
|
+
return {
|
|
84
|
+
release: () => {
|
|
85
|
+
releaseLock();
|
|
86
|
+
_modelsJsonLock = null;
|
|
87
|
+
}
|
|
88
|
+
};
|
|
81
89
|
}
|
|
82
|
-
async function
|
|
90
|
+
async function readModifyWriteModelsJson(modifier) {
|
|
91
|
+
const { release } = await acquireModelsJsonLock();
|
|
83
92
|
try {
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
93
|
+
const data = readModelsJson();
|
|
94
|
+
const modified = modifier(data);
|
|
95
|
+
if (modified === null) return false;
|
|
96
|
+
writeModelsJson(modified);
|
|
97
|
+
return true;
|
|
98
|
+
} finally {
|
|
99
|
+
release();
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
var DEFAULT_RETRY_OPTIONS = {
|
|
103
|
+
maxRetries: 2,
|
|
104
|
+
baseDelayMs: 1e3,
|
|
105
|
+
maxDelayMs: 1e4,
|
|
106
|
+
retryOnTimeout: true,
|
|
107
|
+
retryOnConnectionError: true
|
|
108
|
+
};
|
|
109
|
+
function backoffDelay(attempt, baseDelayMs, maxDelayMs) {
|
|
110
|
+
const delay = Math.min(baseDelayMs * Math.pow(2, attempt), maxDelayMs);
|
|
111
|
+
const jitter = delay * 0.25 * (Math.random() * 2 - 1);
|
|
112
|
+
return Math.max(0, Math.round(delay + jitter));
|
|
113
|
+
}
|
|
114
|
+
var RETRYABLE_ERROR_PATTERNS = [
|
|
115
|
+
"ECONNREFUSED",
|
|
116
|
+
"ECONNRESET",
|
|
117
|
+
"ENOTFOUND",
|
|
118
|
+
"ETIMEDOUT",
|
|
119
|
+
"fetch failed",
|
|
120
|
+
"network error",
|
|
121
|
+
"socket hang up",
|
|
122
|
+
"Empty response"
|
|
123
|
+
];
|
|
124
|
+
function isRetryableError(error, opts) {
|
|
125
|
+
if (error instanceof Error) {
|
|
126
|
+
if (error.name === "AbortError" && opts.retryOnTimeout) return true;
|
|
127
|
+
const msg = error.message;
|
|
128
|
+
if (opts.retryOnConnectionError && RETRYABLE_ERROR_PATTERNS.some((p) => msg.includes(p))) {
|
|
129
|
+
return true;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
async function withRetry(fn, options) {
|
|
135
|
+
const opts = { ...DEFAULT_RETRY_OPTIONS, ...options };
|
|
136
|
+
let lastError;
|
|
137
|
+
for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
|
|
138
|
+
try {
|
|
139
|
+
return await fn();
|
|
140
|
+
} catch (error) {
|
|
141
|
+
lastError = error;
|
|
142
|
+
if (attempt < opts.maxRetries && isRetryableError(error, opts)) {
|
|
143
|
+
const delay = backoffDelay(attempt, opts.baseDelayMs, opts.maxDelayMs);
|
|
144
|
+
debugLog("ollama", `Retry ${attempt + 1}/${opts.maxRetries} after ${delay}ms: ${error instanceof Error ? error.message : String(error)}`);
|
|
145
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
throw error;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
throw lastError;
|
|
152
|
+
}
|
|
153
|
+
async function fetchOllamaModels(baseUrl) {
|
|
154
|
+
return withRetry(async () => {
|
|
155
|
+
const res = await fetch(`${baseUrl}/api/tags`, {
|
|
156
|
+
signal: AbortSignal.timeout(5e3)
|
|
89
157
|
});
|
|
90
|
-
if (!res.ok)
|
|
158
|
+
if (!res.ok) throw new Error(`Ollama returned ${res.status}`);
|
|
91
159
|
const data = await res.json();
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
160
|
+
return data.models ?? [];
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
async function fetchModelContextLength(baseUrl, modelName) {
|
|
164
|
+
return withRetry(async () => {
|
|
165
|
+
try {
|
|
166
|
+
const res = await fetch(`${baseUrl}/api/show`, {
|
|
167
|
+
method: "POST",
|
|
168
|
+
headers: { "Content-Type": "application/json" },
|
|
169
|
+
body: JSON.stringify({ name: modelName }),
|
|
170
|
+
signal: AbortSignal.timeout(3e4)
|
|
171
|
+
});
|
|
172
|
+
if (!res.ok) return void 0;
|
|
173
|
+
const data = await res.json();
|
|
174
|
+
for (const key of Object.keys(data?.model_info ?? {})) {
|
|
175
|
+
if (key.endsWith(".context_length")) {
|
|
176
|
+
const val = data.model_info[key];
|
|
177
|
+
if (typeof val === "number") return val;
|
|
178
|
+
}
|
|
96
179
|
}
|
|
180
|
+
const numCtx = data?.model_info?.["num_ctx"];
|
|
181
|
+
if (typeof numCtx === "number") return numCtx;
|
|
182
|
+
} catch (err) {
|
|
183
|
+
debugLog("ollama", `failed to fetch context length for ${model}`, err);
|
|
184
|
+
return void 0;
|
|
97
185
|
}
|
|
98
|
-
const numCtx = data?.model_info?.["num_ctx"];
|
|
99
|
-
if (typeof numCtx === "number") return numCtx;
|
|
100
|
-
} catch {
|
|
101
186
|
return void 0;
|
|
102
|
-
}
|
|
187
|
+
});
|
|
103
188
|
}
|
|
104
189
|
async function fetchContextLengthsBatched(baseUrl, modelNames, batchSize = 3) {
|
|
105
190
|
const result = /* @__PURE__ */ new Map();
|
|
@@ -162,9 +247,9 @@ function detectModelFamily(modelName) {
|
|
|
162
247
|
return "unknown";
|
|
163
248
|
}
|
|
164
249
|
function detectProvider(ctx) {
|
|
165
|
-
const
|
|
166
|
-
if (!
|
|
167
|
-
const providerName =
|
|
250
|
+
const model2 = ctx.model;
|
|
251
|
+
if (!model2) return { kind: "unknown", name: "none" };
|
|
252
|
+
const providerName = model2.provider || "";
|
|
168
253
|
if (!providerName) return { kind: "unknown", name: "none" };
|
|
169
254
|
const modelsJson = readModelsJson();
|
|
170
255
|
const userProviderCfg = (modelsJson.providers || {})[providerName];
|
|
@@ -205,6 +290,7 @@ export {
|
|
|
205
290
|
BUILTIN_PROVIDERS,
|
|
206
291
|
EXTENSION_VERSION,
|
|
207
292
|
MODELS_JSON_PATH,
|
|
293
|
+
acquireModelsJsonLock,
|
|
208
294
|
detectModelFamily,
|
|
209
295
|
detectProvider,
|
|
210
296
|
fetchContextLengthsBatched,
|
|
@@ -213,5 +299,7 @@ export {
|
|
|
213
299
|
getOllamaBaseUrl,
|
|
214
300
|
isReasoningModel,
|
|
215
301
|
readModelsJson,
|
|
302
|
+
readModifyWriteModelsJson,
|
|
303
|
+
withRetry,
|
|
216
304
|
writeModelsJson
|
|
217
305
|
};
|
package/package.json
CHANGED
package/provider-sync.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// shared/provider-sync.ts
|
|
2
|
+
function mergeModels(newModels, oldModels) {
|
|
3
|
+
const oldModelMap = new Map(oldModels.map((m) => [m.id, m]));
|
|
4
|
+
return newModels.map((m) => {
|
|
5
|
+
const old = oldModelMap.get(m.id);
|
|
6
|
+
if (old) {
|
|
7
|
+
const merged = { ...m };
|
|
8
|
+
for (const [k, v] of Object.entries(old)) {
|
|
9
|
+
if (!(k in m)) merged[k] = v;
|
|
10
|
+
}
|
|
11
|
+
return merged;
|
|
12
|
+
}
|
|
13
|
+
return m;
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
export {
|
|
17
|
+
mergeModels
|
|
18
|
+
};
|
package/react-parser.js
CHANGED
|
@@ -111,6 +111,23 @@ function extractJsonArgs(rawArgs) {
|
|
|
111
111
|
if (cmdMatch) return { command: cmdMatch[1] };
|
|
112
112
|
return { input: jsonStr };
|
|
113
113
|
}
|
|
114
|
+
function extractBraceJson(raw) {
|
|
115
|
+
const jsonStart = raw.indexOf("{");
|
|
116
|
+
if (jsonStart === -1) return "";
|
|
117
|
+
let depth = 0;
|
|
118
|
+
let jsonEnd = -1;
|
|
119
|
+
for (let i = jsonStart; i < raw.length; i++) {
|
|
120
|
+
if (raw[i] === "{") depth++;
|
|
121
|
+
else if (raw[i] === "}") {
|
|
122
|
+
depth--;
|
|
123
|
+
if (depth === 0) {
|
|
124
|
+
jsonEnd = i;
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return jsonEnd !== -1 ? raw.slice(jsonStart, jsonEnd + 1) : "";
|
|
130
|
+
}
|
|
114
131
|
function parseReact(text) {
|
|
115
132
|
for (const dp of ALL_DIALECT_PATTERNS) {
|
|
116
133
|
const result = parseReactWithPatterns(text, dp);
|
|
@@ -382,6 +399,7 @@ export {
|
|
|
382
399
|
WORD_MAPPINGS,
|
|
383
400
|
buildDialectPatterns,
|
|
384
401
|
detectReactDialect,
|
|
402
|
+
extractBraceJson,
|
|
385
403
|
extractJsonArgs,
|
|
386
404
|
extractToolFromJson,
|
|
387
405
|
fuzzyMatchToolName,
|
package/security.js
CHANGED
|
@@ -12,6 +12,7 @@ function debugLog(module, message, ...args) {
|
|
|
12
12
|
}
|
|
13
13
|
|
|
14
14
|
// shared/security.ts
|
|
15
|
+
import dns from "node:dns";
|
|
15
16
|
var SETTINGS_PATH = path.join(os.homedir(), ".pi", "agent", "settings.json");
|
|
16
17
|
var SECURITY_CONFIG_PATH = path.join(os.homedir(), ".pi", "agent", "security.json");
|
|
17
18
|
function getSecurityMode() {
|
|
@@ -158,6 +159,8 @@ var BLOCKED_URL_ALWAYS = /* @__PURE__ */ new Set([
|
|
|
158
159
|
"172.29.",
|
|
159
160
|
"172.30.",
|
|
160
161
|
"172.31.",
|
|
162
|
+
// IPv6-mapped IPv4 cloud metadata (always blocked)
|
|
163
|
+
"::ffff:169.254.169.254",
|
|
161
164
|
// Internal service patterns
|
|
162
165
|
"internal.",
|
|
163
166
|
"private.",
|
|
@@ -171,6 +174,25 @@ var BLOCKED_URL_MAX_ONLY = /* @__PURE__ */ new Set([
|
|
|
171
174
|
"::1",
|
|
172
175
|
"::ffff:127.0.0.1",
|
|
173
176
|
"::ffff:0.0.0.0",
|
|
177
|
+
// IPv6-mapped IPv4 private ranges (always blocked in max mode)
|
|
178
|
+
"::ffff:10.",
|
|
179
|
+
"::ffff:192.168.",
|
|
180
|
+
"::ffff:172.16.",
|
|
181
|
+
"::ffff:172.17.",
|
|
182
|
+
"::ffff:172.18.",
|
|
183
|
+
"::ffff:172.19.",
|
|
184
|
+
"::ffff:172.20.",
|
|
185
|
+
"::ffff:172.21.",
|
|
186
|
+
"::ffff:172.22.",
|
|
187
|
+
"::ffff:172.23.",
|
|
188
|
+
"::ffff:172.24.",
|
|
189
|
+
"::ffff:172.25.",
|
|
190
|
+
"::ffff:172.26.",
|
|
191
|
+
"::ffff:172.27.",
|
|
192
|
+
"::ffff:172.28.",
|
|
193
|
+
"::ffff:172.29.",
|
|
194
|
+
"::ffff:172.30.",
|
|
195
|
+
"::ffff:172.31.",
|
|
174
196
|
// Local/internal patterns
|
|
175
197
|
"local."
|
|
176
198
|
]);
|
|
@@ -233,9 +255,9 @@ function validatePath(filePath, allowedDirs) {
|
|
|
233
255
|
}
|
|
234
256
|
}
|
|
235
257
|
const cwd = process.cwd();
|
|
236
|
-
const safePrefixes = ["/
|
|
258
|
+
const safePrefixes = ["/home", "/tmp", cwd];
|
|
237
259
|
for (const prefix of safePrefixes) {
|
|
238
|
-
if (resolved.startsWith(prefix)) return { valid: true, error: "" };
|
|
260
|
+
if (resolved.startsWith(prefix + "/") || resolved === prefix) return { valid: true, error: "" };
|
|
239
261
|
}
|
|
240
262
|
if (allowedDirs) {
|
|
241
263
|
for (const dir of allowedDirs) {
|
|
@@ -248,6 +270,53 @@ function validatePath(filePath, allowedDirs) {
|
|
|
248
270
|
}
|
|
249
271
|
return { valid: false, error: `Path not in allowed directories: ${filePath}` };
|
|
250
272
|
}
|
|
273
|
+
function stripIpv6Mapped(ip) {
|
|
274
|
+
if (ip.startsWith("::ffff:") && !ip.startsWith("::ffff:0:0")) {
|
|
275
|
+
return ip.slice(7);
|
|
276
|
+
}
|
|
277
|
+
return ip;
|
|
278
|
+
}
|
|
279
|
+
function isLoopbackIp(ip) {
|
|
280
|
+
const norm = stripIpv6Mapped(ip);
|
|
281
|
+
if (norm.startsWith("127.") || norm === "0.0.0.0") return true;
|
|
282
|
+
if (ip === "::1" || ip === "::ffff:0.0.0.0") return true;
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
function isPrivateIp(ip) {
|
|
286
|
+
const norm = stripIpv6Mapped(ip);
|
|
287
|
+
if (norm.startsWith("10.") || norm.startsWith("192.168.")) return true;
|
|
288
|
+
if (/^172\.(1[6-9]|2\d|3[01])\./.test(norm)) return true;
|
|
289
|
+
if (norm === "169.254.169.254") return true;
|
|
290
|
+
if (ip.startsWith("fc") || ip.startsWith("fd")) return true;
|
|
291
|
+
if (ip.startsWith("fe80:")) return true;
|
|
292
|
+
return false;
|
|
293
|
+
}
|
|
294
|
+
async function resolveAndCheckHostname(hostname, blockPrivate = true) {
|
|
295
|
+
try {
|
|
296
|
+
const addresses = await new Promise((resolve2, reject) => {
|
|
297
|
+
dns.lookup(hostname, { all: true }, (err, addresses2) => {
|
|
298
|
+
if (err) reject(err);
|
|
299
|
+
else resolve2(addresses2);
|
|
300
|
+
});
|
|
301
|
+
});
|
|
302
|
+
if (!addresses || addresses.length === 0) {
|
|
303
|
+
return { safe: true, error: "" };
|
|
304
|
+
}
|
|
305
|
+
for (const addr of addresses) {
|
|
306
|
+
const ip = addr.address;
|
|
307
|
+
const normIp = stripIpv6Mapped(ip);
|
|
308
|
+
if (normIp === "169.254.169.254") {
|
|
309
|
+
return { safe: false, error: `SSRF protection: hostname ${hostname} resolves to cloud metadata IP ${ip}` };
|
|
310
|
+
}
|
|
311
|
+
if (blockPrivate && (isLoopbackIp(ip) || isPrivateIp(ip))) {
|
|
312
|
+
return { safe: false, error: `SSRF protection: hostname ${hostname} resolves to private/reserved IP ${ip} (DNS rebinding check)` };
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return { safe: true, error: "" };
|
|
316
|
+
} catch {
|
|
317
|
+
return { safe: true, error: "" };
|
|
318
|
+
}
|
|
319
|
+
}
|
|
251
320
|
function isSafeUrl(url, blockSsrf = true) {
|
|
252
321
|
if (!url) return { safe: false, error: "URL cannot be empty" };
|
|
253
322
|
let parsed;
|
|
@@ -307,13 +376,25 @@ var INJECTION_PATTERNS = [
|
|
|
307
376
|
];
|
|
308
377
|
function sanitizeCommand(command) {
|
|
309
378
|
if (!command) return { isSafe: false, error: "Command cannot be empty", command: "" };
|
|
310
|
-
|
|
311
|
-
|
|
379
|
+
let normalizedCmd = command.normalize("NFKC");
|
|
380
|
+
normalizedCmd = normalizedCmd.replace(/[\u0000-\u001f\u007f-\u009f\u200b-\u200f\u2028-\u202e\ufeff\u2060-\u2069]/g, "");
|
|
381
|
+
if (normalizedCmd !== command.replace(/[\u0000-\u001f\u007f-\u009f\u200b-\u200f\u2028-\u202e\ufeff\u2060-\u2069]/g, "").normalize("NFKC")) {
|
|
382
|
+
debugLog("security", "command contained Unicode normalization variance (possible homoglyph bypass)", { original: command });
|
|
383
|
+
}
|
|
384
|
+
command = normalizedCmd;
|
|
385
|
+
const trimmed = command.trim();
|
|
386
|
+
if (!trimmed) return { isSafe: false, error: "Command cannot be empty", command: "" };
|
|
387
|
+
const parts = trimmed.split(/\s+/);
|
|
312
388
|
let baseCmd = parts[0].toLowerCase();
|
|
313
389
|
if (baseCmd.includes("/")) baseCmd = baseCmd.split("/").pop();
|
|
314
390
|
if (baseCmd.includes("\\")) baseCmd = baseCmd.split("\\").pop();
|
|
315
|
-
|
|
316
|
-
|
|
391
|
+
for (const raw of parts) {
|
|
392
|
+
let word = raw.toLowerCase();
|
|
393
|
+
if (word.includes("/")) word = word.split("/").pop();
|
|
394
|
+
if (word.includes("\\")) word = word.split("\\").pop();
|
|
395
|
+
if (CRITICAL_COMMANDS.has(word)) {
|
|
396
|
+
return { isSafe: false, error: `Blocked command: ${word} (critical)`, command: "" };
|
|
397
|
+
}
|
|
317
398
|
}
|
|
318
399
|
const mode = getSecurityMode();
|
|
319
400
|
if (mode === "max" && EXTENDED_COMMANDS.has(baseCmd)) {
|
|
@@ -332,16 +413,46 @@ function sanitizeCommand(command) {
|
|
|
332
413
|
}
|
|
333
414
|
var AUDIT_DIR = path.join(os.homedir(), ".pi", "agent");
|
|
334
415
|
var AUDIT_LOG_PATH = path.join(AUDIT_DIR, "audit.log");
|
|
335
|
-
|
|
416
|
+
var AUDIT_BUFFER_MAX_ENTRIES = 50;
|
|
417
|
+
var AUDIT_FLUSH_INTERVAL_MS = 500;
|
|
418
|
+
var _auditBuffer = [];
|
|
419
|
+
var _auditFlushTimer = null;
|
|
420
|
+
function ensureAuditFlushTimer() {
|
|
421
|
+
if (_auditFlushTimer) return;
|
|
422
|
+
_auditFlushTimer = setInterval(() => {
|
|
423
|
+
if (_auditBuffer.length > 0) {
|
|
424
|
+
flushAuditBuffer();
|
|
425
|
+
}
|
|
426
|
+
}, AUDIT_FLUSH_INTERVAL_MS);
|
|
427
|
+
const timerRef = _auditFlushTimer;
|
|
428
|
+
if (timerRef.unref) {
|
|
429
|
+
timerRef.unref();
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
function flushAuditBuffer() {
|
|
433
|
+
if (_auditBuffer.length === 0) return;
|
|
336
434
|
try {
|
|
337
435
|
if (!fs.existsSync(AUDIT_DIR)) {
|
|
338
436
|
fs.mkdirSync(AUDIT_DIR, { recursive: true });
|
|
339
437
|
}
|
|
438
|
+
const batch = _auditBuffer.join("");
|
|
439
|
+
fs.appendFileSync(AUDIT_LOG_PATH, batch, "utf-8");
|
|
440
|
+
} catch (err) {
|
|
441
|
+
debugLog("security", "audit buffer flush failure", err);
|
|
442
|
+
}
|
|
443
|
+
_auditBuffer = [];
|
|
444
|
+
}
|
|
445
|
+
function appendAuditEntry(entry) {
|
|
446
|
+
try {
|
|
447
|
+
ensureAuditFlushTimer();
|
|
340
448
|
const enriched = { ...entry, securityMode: getSecurityMode() };
|
|
341
449
|
const line = JSON.stringify(enriched) + "\n";
|
|
342
|
-
|
|
450
|
+
_auditBuffer.push(line);
|
|
451
|
+
if (_auditBuffer.length >= AUDIT_BUFFER_MAX_ENTRIES) {
|
|
452
|
+
flushAuditBuffer();
|
|
453
|
+
}
|
|
343
454
|
} catch (err) {
|
|
344
|
-
debugLog("security", "audit log
|
|
455
|
+
debugLog("security", "audit log entry creation failure", err);
|
|
345
456
|
}
|
|
346
457
|
}
|
|
347
458
|
function readRecentAuditEntries(count = 50) {
|
|
@@ -362,6 +473,12 @@ function readRecentAuditEntries(count = 50) {
|
|
|
362
473
|
return [];
|
|
363
474
|
}
|
|
364
475
|
}
|
|
476
|
+
process.on("exit", () => {
|
|
477
|
+
flushAuditBuffer();
|
|
478
|
+
});
|
|
479
|
+
process.on("SIGTERM", () => {
|
|
480
|
+
flushAuditBuffer();
|
|
481
|
+
});
|
|
365
482
|
function checkBashToolInput(input) {
|
|
366
483
|
const command = input.command ?? input.cmd ?? "";
|
|
367
484
|
if (!command) return { safe: true, rule: "", detail: "" };
|
|
@@ -432,9 +549,11 @@ export {
|
|
|
432
549
|
checkFileToolInput,
|
|
433
550
|
checkHttpToolInput,
|
|
434
551
|
checkInjectionPatterns,
|
|
552
|
+
flushAuditBuffer,
|
|
435
553
|
getSecurityMode,
|
|
436
554
|
isSafeUrl,
|
|
437
555
|
readRecentAuditEntries,
|
|
556
|
+
resolveAndCheckHostname,
|
|
438
557
|
sanitizeCommand,
|
|
439
558
|
setSecurityMode,
|
|
440
559
|
validatePath
|
package/test-report.js
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// shared/format.ts
|
|
2
|
+
function section(title) {
|
|
3
|
+
return `
|
|
4
|
+
\u2500\u2500 ${title} ${"\u2500".repeat(Math.max(1, 60 - title.length - 4))}`;
|
|
5
|
+
}
|
|
6
|
+
function ok(msg) {
|
|
7
|
+
return ` \u2705 ${msg}`;
|
|
8
|
+
}
|
|
9
|
+
function fail(msg) {
|
|
10
|
+
return ` \u274C ${msg}`;
|
|
11
|
+
}
|
|
12
|
+
function warn(msg) {
|
|
13
|
+
return ` \u26A0\uFE0F ${msg}`;
|
|
14
|
+
}
|
|
15
|
+
function info(msg) {
|
|
16
|
+
return ` \u2139\uFE0F ${msg}`;
|
|
17
|
+
}
|
|
18
|
+
function msHuman(ms) {
|
|
19
|
+
if (ms < 1e3) return `${ms.toFixed(0)}ms`;
|
|
20
|
+
if (ms < 6e4) return `${(ms / 1e3).toFixed(1)}s`;
|
|
21
|
+
return `${(ms / 6e4).toFixed(1)}m`;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// shared/ollama.ts
|
|
25
|
+
import * as path from "node:path";
|
|
26
|
+
import os from "node:os";
|
|
27
|
+
|
|
28
|
+
// shared/debug.ts
|
|
29
|
+
var DEBUG_ENABLED = process.env.PI_EXTENSIONS_DEBUG === "1";
|
|
30
|
+
|
|
31
|
+
// shared/ollama.ts
|
|
32
|
+
var EXTENSION_VERSION = "1.1.8";
|
|
33
|
+
var MODELS_JSON_PATH = path.join(os.homedir(), ".pi", "agent", "models.json");
|
|
34
|
+
|
|
35
|
+
// shared/test-report.ts
|
|
36
|
+
var branding = [
|
|
37
|
+
` \u26A1 Pi Model Benchmark v${EXTENSION_VERSION}`,
|
|
38
|
+
` Written by VTSTech`,
|
|
39
|
+
` GitHub: https://github.com/VTSTech`,
|
|
40
|
+
` Website: www.vts-tech.org`
|
|
41
|
+
].join("\n");
|
|
42
|
+
function formatTestScore(score, label) {
|
|
43
|
+
switch (score) {
|
|
44
|
+
case "STRONG":
|
|
45
|
+
return ok(`${label} (${score})`);
|
|
46
|
+
case "MODERATE":
|
|
47
|
+
return ok(`${label} (${score})`);
|
|
48
|
+
case "WEAK":
|
|
49
|
+
return warn(`${label} (${score})`);
|
|
50
|
+
case "FAIL":
|
|
51
|
+
return fail(`${label} (${score})`);
|
|
52
|
+
case "ERROR":
|
|
53
|
+
return fail(`Error: ${label}`);
|
|
54
|
+
default:
|
|
55
|
+
return fail(`${label} (${score})`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
function formatTestSummary(tests, totalMs) {
|
|
59
|
+
const lines = [];
|
|
60
|
+
lines.push(section("SUMMARY"));
|
|
61
|
+
for (const t of tests) {
|
|
62
|
+
lines.push(t.pass ? ok(`${t.name}: ${t.score}`) : fail(`${t.name}: ${t.score}`));
|
|
63
|
+
}
|
|
64
|
+
lines.push(info(`Total time: ${msHuman(totalMs)}`));
|
|
65
|
+
const passed = tests.filter((t) => t.pass).length;
|
|
66
|
+
lines.push(info(`Score: ${passed}/${tests.length} tests passed`));
|
|
67
|
+
return lines;
|
|
68
|
+
}
|
|
69
|
+
function formatRecommendation(model2, passed, total, via) {
|
|
70
|
+
const suffix = via ? ` via ${via}` : "";
|
|
71
|
+
const lines = [];
|
|
72
|
+
lines.push(section("RECOMMENDATION"));
|
|
73
|
+
if (passed === total) {
|
|
74
|
+
lines.push(ok(`${model2} is a STRONG model${suffix} \u2014 full capability`));
|
|
75
|
+
} else if (passed > 0 && passed >= total - 1) {
|
|
76
|
+
lines.push(ok(`${model2} is a GOOD model${suffix} \u2014 most capabilities work`));
|
|
77
|
+
} else if (passed > 0 && passed >= total - 2) {
|
|
78
|
+
lines.push(warn(`${model2} is USABLE${suffix} \u2014 some capabilities are limited`));
|
|
79
|
+
} else {
|
|
80
|
+
lines.push(fail(`${model2} is WEAK${suffix} \u2014 limited capabilities for agent use`));
|
|
81
|
+
}
|
|
82
|
+
return lines;
|
|
83
|
+
}
|
|
84
|
+
export {
|
|
85
|
+
branding,
|
|
86
|
+
formatRecommendation,
|
|
87
|
+
formatTestScore,
|
|
88
|
+
formatTestSummary
|
|
89
|
+
};
|