npm - @vtstech/pi-model-test - Versions diffs - 1.0.3 → 1.0.4-1 - Mend

@vtstech/pi-model-test 1.0.3 → 1.0.4-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,61 @@
+# @vtstech/pi-model-test
+Model benchmark extension for the [Pi Coding Agent](https://github.com/badlogic/pi-mono).
+Test any model for reasoning, tool usage, and instruction following — works with Ollama and cloud providers.
+## Install
+```bash
+pi install "npm:@vtstech/pi-model-test"
+```
+## Commands
+```bash
+/model-test                     Test current Pi model (auto-detects provider)
+/model-test qwen3:0.6b          Test a specific Ollama model
+/model-test --all               Test every Ollama model
+```
+## Test Suites
+### Ollama (6 tests)
+| Test | Scoring |
+|------|---------|
+| Reasoning (snail puzzle) | STRONG / MODERATE / WEAK / FAIL |
+| Thinking token support | SUPPORTED / NOT SUPPORTED |
+| Tool usage (native + text) | STRONG / MODERATE / WEAK / FAIL |
+| ReAct parsing | STRONG / MODERATE / WEAK / FAIL |
+| Instruction following (JSON) | STRONG / MODERATE / WEAK / FAIL |
+| Tool support detection | NATIVE / REACT / NONE |
+### Cloud Providers (4 tests)
+| Test | Scoring |
+|------|---------|
+| Connectivity | OK / FAIL |
+| Reasoning | STRONG / MODERATE / WEAK / FAIL |
+| Instruction following | STRONG / MODERATE / WEAK / FAIL |
+| Tool usage (function calling) | STRONG / MODERATE / WEAK / FAIL |
+## Features
+- Auto-detects Ollama vs cloud provider (OpenRouter, Anthropic, Google, OpenAI, Groq, DeepSeek, Mistral, xAI, Together, Fireworks, Cohere)
+- Automatic remote Ollama URL resolution
+- Timeout resilience with auto-retry on empty responses
+- Rate limit delay between tests (configurable)
+- Thinking model fallback (retries with `think: true`)
+- Tool support cache (`~/.pi/agent/cache/tool_support.json`)
+- JSON repair for truncated output
+- Tab-completion for model names
+## Links
+- [Full Documentation](https://github.com/VTSTech/pi-coding-agent#model-benchmark-model-testts)
+- [Changelog](https://github.com/VTSTech/pi-coding-agent/blob/main/CHANGELOG.md)
+## License
+MIT — [VTSTech](https://www.vts-tech.org)

package/model-test.js CHANGED Viewed

@@ -1,42 +1,18 @@
-var __create = Object.create;
-var __defProp = Object.defineProperty;
-var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
-var __getOwnPropNames = Object.getOwnPropertyNames;
-var __getProtoOf = Object.getPrototypeOf;
-var __hasOwnProp = Object.prototype.hasOwnProperty;
-var __export = (target, all) => {
-  for (var name in all)
-    __defProp(target, name, { get: all[name], enumerable: true });
-};
-var __copyProps = (to, from, except, desc) => {
-  if (from && typeof from === "object" || typeof from === "function") {
-    for (let key of __getOwnPropNames(from))
-      if (!__hasOwnProp.call(to, key) && key !== except)
-        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
-  }
-  return to;
-};
-var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
-  // If the importer is in node compatibility mode or this is not an ESM
-  // file that has been converted to a CommonJS file using a Babel-
-  // compatible transform (i.e. "__esModule" has not been set), then set
-  // "default" to the CommonJS "module.exports" for node compatibility.
-  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
-  mod
-));
-var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
 // .build-npm/model-test/model-test.temp.ts
-var model_test_temp_exports = {};
-__export(model_test_temp_exports, {
-  default: () => model_test_temp_default
-});
-module.exports = __toCommonJS(model_test_temp_exports);
-var fs = __toESM(require("node:fs"));
-var os = __toESM(require("node:os"));
-var path = __toESM(require("node:path"));
-var import_format = require("@vtstech/pi-shared/format");
-var import_ollama = require("@vtstech/pi-shared/ollama");
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import {
+  section,
+  ok,
+  fail,
+  warn,
+  info,
+  msHuman,
+  truncate,
+  sanitizeForReport
+} from "@vtstech/pi-shared/format";
+import { getOllamaBaseUrl, detectModelFamily, readModelsJson } from "@vtstech/pi-shared/ollama";
 var BUILTIN_PROVIDERS = {
   openrouter: { api: "openai-completions", baseUrl: "https://openrouter.ai/api/v1", envKey: "OPENROUTER_API_KEY" },
   anthropic: { api: "anthropic-messages", baseUrl: "https://api.anthropic.com/v1", envKey: "ANTHROPIC_API_KEY" },
@@ -55,7 +31,7 @@ function detectProvider(ctx) {
   if (!model) return { kind: "unknown", name: "none" };
   const providerName = model.provider || "";
   if (!providerName) return { kind: "unknown", name: "none" };
-  const modelsJson = (0, import_ollama.readModelsJson)();
+  const modelsJson = readModelsJson();
   const userProviderCfg = (modelsJson.providers || {})[providerName];
   if (userProviderCfg) {
     const baseUrl = userProviderCfg.baseUrl || "";
@@ -169,10 +145,10 @@ function cacheToolSupport(model, support, family) {
   writeToolSupportCache(cache);
 }
 function model_test_temp_default(pi) {
-  const OLLAMA_BASE = (0, import_ollama.getOllamaBaseUrl)();
+  const OLLAMA_BASE = getOllamaBaseUrl();
   async function rateLimitDelay(lines) {
     if (CONFIG.TEST_DELAY_MS > 0) {
-      lines.push((0, import_format.info)(`Waiting ${(0, import_format.msHuman)(CONFIG.TEST_DELAY_MS)} to avoid rate limiting...`));
+      lines.push(info(`Waiting ${msHuman(CONFIG.TEST_DELAY_MS)} to avoid rate limiting...`));
       await new Promise((r) => setTimeout(r, CONFIG.TEST_DELAY_MS));
     }
   }
@@ -254,7 +230,7 @@ function model_test_temp_default(pi) {
       const elapsedMs = Date.now() - start;
       if (!res.ok) {
         const errorText = await res.text().catch(() => "unknown error");
-        throw new Error(`API returned ${res.status}: ${(0, import_format.truncate)(errorText, 200)}`);
+        throw new Error(`API returned ${res.status}: ${truncate(errorText, 200)}`);
       }
       const data = await res.json();
       const choice = data.choices?.[0];
@@ -270,7 +246,7 @@ function model_test_temp_default(pi) {
     } catch (e) {
       const elapsedMs = Date.now() - start;
       if (e.name === "AbortError") {
-        throw new Error(`Provider API timed out after ${(0, import_format.msHuman)(elapsedMs)}`);
+        throw new Error(`Provider API timed out after ${msHuman(elapsedMs)}`);
       }
       throw e;
     } finally {
@@ -915,7 +891,7 @@ The JSON object must have exactly these 4 keys:
         }
       }
       if (!parsed) {
-        return { pass: false, score: "FAIL", output: (0, import_format.sanitizeForReport)(msg), elapsedMs };
+        return { pass: false, score: "FAIL", output: sanitizeForReport(msg), elapsedMs };
       }
       const hasKeys = parsed.name && parsed.can_count !== void 0 && parsed.sum !== void 0 && parsed.language;
       const correctSum = parsed.sum === 42;
@@ -974,7 +950,7 @@ The JSON object must have exactly these 4 keys:
         }
       }
       if (!parsed) {
-        return { pass: false, score: "FAIL", output: (0, import_format.sanitizeForReport)(msg), elapsedMs: result.elapsedMs };
+        return { pass: false, score: "FAIL", output: sanitizeForReport(msg), elapsedMs: result.elapsedMs };
       }
       const hasKeys = parsed.name && parsed.can_count !== void 0 && parsed.sum !== void 0 && parsed.language;
       const correctSum = parsed.sum === 42;
@@ -1061,7 +1037,7 @@ The JSON object must have exactly these 4 keys:
         const detail = result.stderr?.trim() || result.stdout?.trim() || "empty response";
         const level2 = "none";
         cacheToolSupport(model, level2, family);
-        return { level: level2, cached: false, evidence: `API error: ${(0, import_format.truncate)(detail, 100)}`, elapsedMs };
+        return { level: level2, cached: false, evidence: `API error: ${truncate(detail, 100)}`, elapsedMs };
       }
       const parsed = JSON.parse(result.stdout);
       const toolCalls = parsed?.message?.tool_calls;
@@ -1133,7 +1109,7 @@ The JSON object must have exactly these 4 keys:
       }
       const level = "none";
       cacheToolSupport(model, level, family);
-      const cleanContent = (0, import_format.truncate)(strippedContent, 150);
+      const cleanContent = truncate(strippedContent, 150);
       const evidenceDetail = hasTextToolSignal ? `no structured tool calling (text mentions tool: ${cleanContent})` : `no tool calling patterns (text: ${cleanContent})`;
       return { level, cached: false, evidence: evidenceDetail, elapsedMs };
     } catch (e) {
@@ -1200,8 +1176,8 @@ The JSON object must have exactly these 4 keys:
     const lines = [];
     const totalStart = Date.now();
     lines.push(branding);
-    lines.push((0, import_format.section)(`MODEL: ${model}`));
-    lines.push((0, import_format.info)("Provider: Ollama (local/remote)"));
+    lines.push(section(`MODEL: ${model}`));
+    lines.push(info("Provider: Ollama (local/remote)"));
     let modelSize = "unknown";
     let modelFamily = "unknown";
     let modelParams = "unknown";
@@ -1227,125 +1203,125 @@ The JSON object must have exactly these 4 keys:
       }
     } catch {
     }
-    const detectedFamily = (0, import_ollama.detectModelFamily)(model);
-    lines.push((0, import_format.info)(`Size: ${modelSize}  |  Params: ${modelParams}  |  Quant: ${modelQuant}`));
-    lines.push((0, import_format.info)(`Family: ${modelFamily}  |  Detected: ${detectedFamily}  |  Modified: ${modelModified}`));
-    lines.push((0, import_format.section)("REASONING TEST"));
-    lines.push((0, import_format.info)("Prompt: A snail climbs 3ft up a wall each day, slides 2ft back each night. Wall is 10ft. How many days?"));
-    lines.push((0, import_format.info)("Testing..."));
+    const detectedFamily = detectModelFamily(model);
+    lines.push(info(`Size: ${modelSize}  |  Params: ${modelParams}  |  Quant: ${modelQuant}`));
+    lines.push(info(`Family: ${modelFamily}  |  Detected: ${detectedFamily}  |  Modified: ${modelModified}`));
+    lines.push(section("REASONING TEST"));
+    lines.push(info("Prompt: A snail climbs 3ft up a wall each day, slides 2ft back each night. Wall is 10ft. How many days?"));
+    lines.push(info("Testing..."));
     const reasoning = await testReasoning(model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(reasoning.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(reasoning.elapsedMs)}`));
     if (reasoning.score === "STRONG") {
-      lines.push((0, import_format.ok)(`Answer: ${reasoning.answer} \u2014 Correct with clear reasoning (${reasoning.score})`));
+      lines.push(ok(`Answer: ${reasoning.answer} \u2014 Correct with clear reasoning (${reasoning.score})`));
     } else if (reasoning.score === "MODERATE") {
-      lines.push((0, import_format.ok)(`Answer: ${reasoning.answer} \u2014 Correct but weak reasoning (${reasoning.score})`));
+      lines.push(ok(`Answer: ${reasoning.answer} \u2014 Correct but weak reasoning (${reasoning.score})`));
     } else if (reasoning.score === "WEAK") {
-      lines.push((0, import_format.fail)(`Answer: ${reasoning.answer} \u2014 Reasoned but wrong answer (${reasoning.score})`));
+      lines.push(fail(`Answer: ${reasoning.answer} \u2014 Reasoned but wrong answer (${reasoning.score})`));
     } else if (reasoning.score === "FAIL") {
-      lines.push((0, import_format.fail)(`Answer: ${reasoning.answer} \u2014 No reasoning detected (${reasoning.score})`));
+      lines.push(fail(`Answer: ${reasoning.answer} \u2014 No reasoning detected (${reasoning.score})`));
     } else {
-      const errMsg = reasoning.reasoning.includes("<!DOCTYPE") || reasoning.reasoning.includes("<html") ? reasoning.reasoning.split("\n")[0].slice(0, 100) + "..." : (0, import_format.truncate)(reasoning.reasoning, 300);
-      lines.push((0, import_format.fail)(`Error: ${errMsg}`));
+      const errMsg = reasoning.reasoning.includes("<!DOCTYPE") || reasoning.reasoning.includes("<html") ? reasoning.reasoning.split("\n")[0].slice(0, 100) + "..." : truncate(reasoning.reasoning, 300);
+      lines.push(fail(`Error: ${errMsg}`));
     }
-    lines.push((0, import_format.info)(`Response: ${(0, import_format.sanitizeForReport)(reasoning.reasoning)}`));
-    lines.push((0, import_format.section)("THINKING TEST"));
-    lines.push((0, import_format.info)('Prompt: "Multiply 37 by 43. Explain your reasoning step by step."'));
+    lines.push(info(`Response: ${sanitizeForReport(reasoning.reasoning)}`));
+    lines.push(section("THINKING TEST"));
+    lines.push(info('Prompt: "Multiply 37 by 43. Explain your reasoning step by step."'));
     await rateLimitDelay(lines);
     const thinking = await testThinking(model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(thinking.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(thinking.elapsedMs)}`));
     if (thinking.supported) {
-      lines.push((0, import_format.ok)(`Thinking/reasoning tokens: SUPPORTED`));
-      lines.push((0, import_format.info)(`Thinking content: ${(0, import_format.sanitizeForReport)(thinking.thinkingContent)}`));
+      lines.push(ok(`Thinking/reasoning tokens: SUPPORTED`));
+      lines.push(info(`Thinking content: ${sanitizeForReport(thinking.thinkingContent)}`));
     } else {
-      lines.push((0, import_format.fail)(`Thinking/reasoning tokens: NOT SUPPORTED`));
+      lines.push(fail(`Thinking/reasoning tokens: NOT SUPPORTED`));
     }
-    lines.push((0, import_format.info)(`Answer output: ${(0, import_format.sanitizeForReport)(thinking.answerContent)}`));
-    lines.push((0, import_format.section)("MODELS.JSON SYNC"));
+    lines.push(info(`Answer output: ${sanitizeForReport(thinking.answerContent)}`));
+    lines.push(section("MODELS.JSON SYNC"));
     const reasoningUpdate = updateModelsJsonReasoning(model, thinking.supported);
-    lines.push((0, import_format.info)(reasoningUpdate.message));
-    lines.push((0, import_format.section)("TOOL USAGE TEST"));
-    lines.push((0, import_format.info)(`Prompt: "What's the weather in Paris?" (with get_weather tool available)`));
-    lines.push((0, import_format.info)("Testing..."));
+    lines.push(info(reasoningUpdate.message));
+    lines.push(section("TOOL USAGE TEST"));
+    lines.push(info(`Prompt: "What's the weather in Paris?" (with get_weather tool available)`));
+    lines.push(info("Testing..."));
     await rateLimitDelay(lines);
     const tools = await testToolUsage(model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(tools.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(tools.elapsedMs)}`));
     if (tools.score === "STRONG") {
-      lines.push((0, import_format.ok)(`Tool call: ${tools.toolCall} (${tools.score})`));
+      lines.push(ok(`Tool call: ${tools.toolCall} (${tools.score})`));
       if (tools.response) {
-        lines.push((0, import_format.info)(`Raw response: ${(0, import_format.sanitizeForReport)(tools.response)}`));
+        lines.push(info(`Raw response: ${sanitizeForReport(tools.response)}`));
       }
     } else if (tools.score === "MODERATE") {
-      lines.push((0, import_format.ok)(`Tool call: ${tools.toolCall} (${tools.score})`));
+      lines.push(ok(`Tool call: ${tools.toolCall} (${tools.score})`));
       if (tools.response) {
-        lines.push((0, import_format.info)(`Raw response: ${(0, import_format.sanitizeForReport)(tools.response)}`));
+        lines.push(info(`Raw response: ${sanitizeForReport(tools.response)}`));
       }
     } else if (tools.score === "WEAK") {
-      lines.push((0, import_format.warn)(`Tool call: ${tools.toolCall} (${tools.score}) \u2014 malformed call`));
+      lines.push(warn(`Tool call: ${tools.toolCall} (${tools.score}) \u2014 malformed call`));
       if (tools.response) {
-        lines.push((0, import_format.info)(`Raw response: ${(0, import_format.sanitizeForReport)(tools.response)}`));
+        lines.push(info(`Raw response: ${sanitizeForReport(tools.response)}`));
       }
     } else if (tools.score === "FAIL") {
       const hasResponse = tools.response && tools.response.trim().length > 0;
-      lines.push((0, import_format.fail)(`Tool call: none \u2014 ${hasResponse ? "model responded in text instead" : "model returned empty response"} (${tools.score})`));
+      lines.push(fail(`Tool call: none \u2014 ${hasResponse ? "model responded in text instead" : "model returned empty response"} (${tools.score})`));
       if (hasResponse) {
-        lines.push((0, import_format.info)(`Text response: ${(0, import_format.sanitizeForReport)(tools.response)}`));
+        lines.push(info(`Text response: ${sanitizeForReport(tools.response)}`));
       } else {
-        lines.push((0, import_format.info)("Text response: (empty)"));
+        lines.push(info("Text response: (empty)"));
       }
     } else {
-      lines.push((0, import_format.fail)(`Error: ${tools.toolCall}`));
+      lines.push(fail(`Error: ${tools.toolCall}`));
     }
-    lines.push((0, import_format.section)("REACT PARSING TEST"));
-    lines.push((0, import_format.info)(`Prompt: "What's the weather in Tokyo?" (ReAct format, no native tools)`));
-    lines.push((0, import_format.info)("Testing..."));
+    lines.push(section("REACT PARSING TEST"));
+    lines.push(info(`Prompt: "What's the weather in Tokyo?" (ReAct format, no native tools)`));
+    lines.push(info("Testing..."));
     await rateLimitDelay(lines);
     const react = await testReactParsing(model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(react.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(react.elapsedMs)}`));
     if (react.score === "STRONG") {
-      lines.push((0, import_format.ok)(`ReAct parsed: ${react.toolCall} (${react.score})`));
+      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
       if (react.thought) {
-        lines.push((0, import_format.info)(`Thought: ${(0, import_format.sanitizeForReport)(react.thought)}`));
+        lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "MODERATE") {
-      lines.push((0, import_format.ok)(`ReAct parsed: ${react.toolCall} (${react.score})`));
+      lines.push(ok(`ReAct parsed: ${react.toolCall} (${react.score})`));
       if (react.thought) {
-        lines.push((0, import_format.info)(`Thought: ${(0, import_format.sanitizeForReport)(react.thought)}`));
+        lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "WEAK") {
-      lines.push((0, import_format.warn)(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args`));
+      lines.push(warn(`ReAct parsed: ${react.toolCall} (${react.score}) \u2014 wrong tool or malformed args`));
       if (react.thought) {
-        lines.push((0, import_format.info)(`Thought: ${(0, import_format.sanitizeForReport)(react.thought)}`));
+        lines.push(info(`Thought: ${sanitizeForReport(react.thought)}`));
       }
     } else if (react.score === "FAIL") {
-      lines.push((0, import_format.fail)(`ReAct parsing: ${react.toolCall} (${react.score})`));
+      lines.push(fail(`ReAct parsing: ${react.toolCall} (${react.score})`));
       if (react.response) {
-        lines.push((0, import_format.info)(`Response: ${(0, import_format.sanitizeForReport)(react.response)}`));
+        lines.push(info(`Response: ${sanitizeForReport(react.response)}`));
       }
     } else {
-      lines.push((0, import_format.fail)(`Error: ${react.toolCall}`));
+      lines.push(fail(`Error: ${react.toolCall}`));
     }
-    lines.push((0, import_format.section)("INSTRUCTION FOLLOWING TEST"));
-    lines.push((0, import_format.info)("Prompt: Respond with ONLY a JSON object with keys: name, can_count, sum (15+27), language"));
-    lines.push((0, import_format.info)("Testing..."));
+    lines.push(section("INSTRUCTION FOLLOWING TEST"));
+    lines.push(info("Prompt: Respond with ONLY a JSON object with keys: name, can_count, sum (15+27), language"));
+    lines.push(info("Testing..."));
     await rateLimitDelay(lines);
     const instructions = await testInstructionFollowing(model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(instructions.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(instructions.elapsedMs)}`));
     if (instructions.score === "STRONG") {
-      lines.push((0, import_format.ok)(`JSON output valid with correct values (${instructions.score})`));
+      lines.push(ok(`JSON output valid with correct values (${instructions.score})`));
     } else if (instructions.score === "MODERATE") {
-      lines.push((0, import_format.ok)(`JSON output valid but some values incorrect (${instructions.score})`));
+      lines.push(ok(`JSON output valid but some values incorrect (${instructions.score})`));
     } else if (instructions.score === "WEAK") {
-      lines.push((0, import_format.warn)(`Partial JSON compliance (${instructions.score})`));
+      lines.push(warn(`Partial JSON compliance (${instructions.score})`));
     } else {
-      lines.push((0, import_format.fail)(`Failed to produce valid JSON (${instructions.score})`));
+      lines.push(fail(`Failed to produce valid JSON (${instructions.score})`));
     }
-    lines.push((0, import_format.info)(`Output: ${(0, import_format.sanitizeForReport)(instructions.output)}`));
-    lines.push((0, import_format.section)("TOOL SUPPORT DETECTION"));
-    lines.push((0, import_format.info)("Probing model for tool calling capability (native / ReAct / none)"));
-    lines.push((0, import_format.info)("Testing..."));
+    lines.push(info(`Output: ${sanitizeForReport(instructions.output)}`));
+    lines.push(section("TOOL SUPPORT DETECTION"));
+    lines.push(info("Probing model for tool calling capability (native / ReAct / none)"));
+    lines.push(info("Testing..."));
     await rateLimitDelay(lines);
     const toolSupport = await testToolSupport(model, detectedFamily);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(toolSupport.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(toolSupport.elapsedMs)}`));
     const supportLabel = (level) => {
       switch (level) {
         case "native":
@@ -1359,19 +1335,19 @@ The JSON object must have exactly these 4 keys:
       }
     };
     if (toolSupport.cached) {
-      lines.push((0, import_format.info)(`Result: ${supportLabel(toolSupport.level)} \u2014 from cache`));
+      lines.push(info(`Result: ${supportLabel(toolSupport.level)} \u2014 from cache`));
     } else {
       if (toolSupport.level === "native") {
-        lines.push((0, import_format.ok)(`Tool support: ${supportLabel(toolSupport.level)}`));
+        lines.push(ok(`Tool support: ${supportLabel(toolSupport.level)}`));
       } else if (toolSupport.level === "react") {
-        lines.push((0, import_format.ok)(`Tool support: ${supportLabel(toolSupport.level)}`));
+        lines.push(ok(`Tool support: ${supportLabel(toolSupport.level)}`));
       } else {
-        lines.push((0, import_format.warn)(`Tool support: ${supportLabel(toolSupport.level)}`));
+        lines.push(warn(`Tool support: ${supportLabel(toolSupport.level)}`));
       }
     }
-    lines.push((0, import_format.info)(`Evidence: ${toolSupport.evidence}`));
-    lines.push((0, import_format.info)(`Cache: ${TOOL_SUPPORT_CACHE_PATH}`));
-    lines.push((0, import_format.section)("SUMMARY"));
+    lines.push(info(`Evidence: ${toolSupport.evidence}`));
+    lines.push(info(`Cache: ${TOOL_SUPPORT_CACHE_PATH}`));
+    lines.push(section("SUMMARY"));
     const totalMs = Date.now() - totalStart;
     const tests = [
       { name: "Reasoning", pass: reasoning.score === "STRONG" || reasoning.score === "MODERATE", score: reasoning.score },
@@ -1384,19 +1360,19 @@ The JSON object must have exactly these 4 keys:
     const passed = tests.filter((t) => t.pass).length;
     const total = tests.length;
     for (const t of tests) {
-      lines.push(t.pass ? (0, import_format.ok)(`${t.name}: ${t.score}`) : (0, import_format.fail)(`${t.name}: ${t.score}`));
+      lines.push(t.pass ? ok(`${t.name}: ${t.score}`) : fail(`${t.name}: ${t.score}`));
     }
-    lines.push((0, import_format.info)(`Total time: ${(0, import_format.msHuman)(totalMs)}`));
-    lines.push((0, import_format.info)(`Score: ${passed}/${total} tests passed`));
-    lines.push((0, import_format.section)("RECOMMENDATION"));
+    lines.push(info(`Total time: ${msHuman(totalMs)}`));
+    lines.push(info(`Score: ${passed}/${total} tests passed`));
+    lines.push(section("RECOMMENDATION"));
     if (passed === 6) {
-      lines.push((0, import_format.ok)(`${model} is a STRONG model \u2014 full capability`));
+      lines.push(ok(`${model} is a STRONG model \u2014 full capability`));
     } else if (passed >= 5) {
-      lines.push((0, import_format.ok)(`${model} is a GOOD model \u2014 most capabilities work`));
+      lines.push(ok(`${model} is a GOOD model \u2014 most capabilities work`));
     } else if (passed >= 4) {
-      lines.push((0, import_format.warn)(`${model} is USABLE \u2014 some capabilities are limited`));
+      lines.push(warn(`${model} is USABLE \u2014 some capabilities are limited`));
     } else {
-      lines.push((0, import_format.fail)(`${model} is WEAK \u2014 limited capabilities for agent use`));
+      lines.push(fail(`${model} is WEAK \u2014 limited capabilities for agent use`));
     }
     return lines.join("\n");
   }
@@ -1404,106 +1380,106 @@ The JSON object must have exactly these 4 keys:
     const lines = [];
     const totalStart = Date.now();
     lines.push(branding);
-    lines.push((0, import_format.section)(`MODEL: ${model}`));
-    lines.push((0, import_format.info)(`Provider: ${providerInfo.name} (built-in)`));
-    lines.push((0, import_format.info)(`API: ${providerInfo.apiMode || "openai-completions"}`));
-    lines.push((0, import_format.info)(`Base URL: ${providerInfo.baseUrl || "unknown"}`));
+    lines.push(section(`MODEL: ${model}`));
+    lines.push(info(`Provider: ${providerInfo.name} (built-in)`));
+    lines.push(info(`API: ${providerInfo.apiMode || "openai-completions"}`));
+    lines.push(info(`Base URL: ${providerInfo.baseUrl || "unknown"}`));
     if (providerInfo.apiKey) {
-      lines.push((0, import_format.info)(`API Key: ****${providerInfo.apiKey.slice(-4)}`));
+      lines.push(info(`API Key: ****${providerInfo.apiKey.slice(-4)}`));
     } else {
-      lines.push((0, import_format.warn)(`API Key: NOT SET (${providerInfo.envKey || "env var not found"})`));
+      lines.push(warn(`API Key: NOT SET (${providerInfo.envKey || "env var not found"})`));
     }
-    lines.push((0, import_format.section)("CONNECTIVITY TEST"));
-    lines.push((0, import_format.info)("Sending minimal request to verify API reachability and key validity..."));
+    lines.push(section("CONNECTIVITY TEST"));
+    lines.push(info("Sending minimal request to verify API reachability and key validity..."));
     const connectivity = await testConnectivity(providerInfo, model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(connectivity.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(connectivity.elapsedMs)}`));
     if (connectivity.pass) {
-      lines.push((0, import_format.ok)(`API reachable and authenticated`));
+      lines.push(ok(`API reachable and authenticated`));
     } else {
       if (!connectivity.reachable) {
-        lines.push((0, import_format.fail)(`API not reachable: ${connectivity.error || "unknown error"}`));
+        lines.push(fail(`API not reachable: ${connectivity.error || "unknown error"}`));
       } else if (!connectivity.authValid) {
-        lines.push((0, import_format.fail)(`Authentication failed: ${connectivity.error || "invalid or missing API key"}`));
+        lines.push(fail(`Authentication failed: ${connectivity.error || "invalid or missing API key"}`));
       } else {
-        lines.push((0, import_format.fail)(`Connectivity error: ${connectivity.error || "unknown"}`));
+        lines.push(fail(`Connectivity error: ${connectivity.error || "unknown"}`));
       }
-      lines.push((0, import_format.warn)("Skipping remaining tests \u2014 fix connectivity first"));
-      lines.push((0, import_format.info)("Tip: Check your API key is set correctly and the provider endpoint is accessible"));
+      lines.push(warn("Skipping remaining tests \u2014 fix connectivity first"));
+      lines.push(info("Tip: Check your API key is set correctly and the provider endpoint is accessible"));
       return lines.join("\n");
     }
-    lines.push((0, import_format.section)("REASONING TEST"));
-    lines.push((0, import_format.info)("Prompt: A snail climbs 3ft up a wall each day, slides 2ft back each night. Wall is 10ft. How many days?"));
-    lines.push((0, import_format.info)("Testing..."));
+    lines.push(section("REASONING TEST"));
+    lines.push(info("Prompt: A snail climbs 3ft up a wall each day, slides 2ft back each night. Wall is 10ft. How many days?"));
+    lines.push(info("Testing..."));
     await rateLimitDelay(lines);
     const reasoning = await testReasoningProvider(providerInfo, model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(reasoning.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(reasoning.elapsedMs)}`));
     if (reasoning.score === "STRONG") {
-      lines.push((0, import_format.ok)(`Answer: ${reasoning.answer} \u2014 Correct with clear reasoning (${reasoning.score})`));
+      lines.push(ok(`Answer: ${reasoning.answer} \u2014 Correct with clear reasoning (${reasoning.score})`));
     } else if (reasoning.score === "MODERATE") {
-      lines.push((0, import_format.ok)(`Answer: ${reasoning.answer} \u2014 Correct but weak reasoning (${reasoning.score})`));
+      lines.push(ok(`Answer: ${reasoning.answer} \u2014 Correct but weak reasoning (${reasoning.score})`));
     } else if (reasoning.score === "WEAK") {
-      lines.push((0, import_format.fail)(`Answer: ${reasoning.answer} \u2014 Reasoned but wrong answer (${reasoning.score})`));
+      lines.push(fail(`Answer: ${reasoning.answer} \u2014 Reasoned but wrong answer (${reasoning.score})`));
     } else if (reasoning.score === "FAIL") {
-      lines.push((0, import_format.fail)(`Answer: ${reasoning.answer} \u2014 No reasoning detected (${reasoning.score})`));
+      lines.push(fail(`Answer: ${reasoning.answer} \u2014 No reasoning detected (${reasoning.score})`));
     } else {
-      const errMsg = reasoning.reasoning.includes("<!DOCTYPE") || reasoning.reasoning.includes("<html") ? reasoning.reasoning.split("\n")[0].slice(0, 100) + "..." : (0, import_format.truncate)(reasoning.reasoning, 300);
-      lines.push((0, import_format.fail)(`Error: ${errMsg}`));
+      const errMsg = reasoning.reasoning.includes("<!DOCTYPE") || reasoning.reasoning.includes("<html") ? reasoning.reasoning.split("\n")[0].slice(0, 100) + "..." : truncate(reasoning.reasoning, 300);
+      lines.push(fail(`Error: ${errMsg}`));
     }
-    lines.push((0, import_format.info)(`Response: ${(0, import_format.sanitizeForReport)(reasoning.reasoning)}`));
-    lines.push((0, import_format.section)("INSTRUCTION FOLLOWING TEST"));
-    lines.push((0, import_format.info)("Prompt: Respond with ONLY a JSON object with keys: name, can_count, sum (15+27), language"));
-    lines.push((0, import_format.info)("Testing..."));
+    lines.push(info(`Response: ${sanitizeForReport(reasoning.reasoning)}`));
+    lines.push(section("INSTRUCTION FOLLOWING TEST"));
+    lines.push(info("Prompt: Respond with ONLY a JSON object with keys: name, can_count, sum (15+27), language"));
+    lines.push(info("Testing..."));
     await rateLimitDelay(lines);
     const instructions = await testInstructionFollowingProvider(providerInfo, model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(instructions.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(instructions.elapsedMs)}`));
     if (instructions.score === "STRONG") {
-      lines.push((0, import_format.ok)(`JSON output valid with correct values (${instructions.score})`));
+      lines.push(ok(`JSON output valid with correct values (${instructions.score})`));
     } else if (instructions.score === "MODERATE") {
-      lines.push((0, import_format.ok)(`JSON output valid but some values incorrect (${instructions.score})`));
+      lines.push(ok(`JSON output valid but some values incorrect (${instructions.score})`));
     } else if (instructions.score === "WEAK") {
-      lines.push((0, import_format.warn)(`Partial JSON compliance (${instructions.score})`));
+      lines.push(warn(`Partial JSON compliance (${instructions.score})`));
     } else {
-      lines.push((0, import_format.fail)(`Failed to produce valid JSON (${instructions.score})`));
+      lines.push(fail(`Failed to produce valid JSON (${instructions.score})`));
     }
-    lines.push((0, import_format.info)(`Output: ${(0, import_format.sanitizeForReport)(instructions.output)}`));
-    lines.push((0, import_format.section)("TOOL USAGE TEST"));
-    lines.push((0, import_format.info)(`Prompt: "What's the weather in Paris?" (with get_weather tool available)`));
-    lines.push((0, import_format.info)("Testing..."));
+    lines.push(info(`Output: ${sanitizeForReport(instructions.output)}`));
+    lines.push(section("TOOL USAGE TEST"));
+    lines.push(info(`Prompt: "What's the weather in Paris?" (with get_weather tool available)`));
+    lines.push(info("Testing..."));
     await rateLimitDelay(lines);
     const toolTest = await testToolUsageProvider(providerInfo, model);
-    lines.push((0, import_format.info)(`Time: ${(0, import_format.msHuman)(toolTest.elapsedMs)}`));
+    lines.push(info(`Time: ${msHuman(toolTest.elapsedMs)}`));
     if (toolTest.score === "STRONG") {
-      lines.push((0, import_format.ok)(`Tool call: ${toolTest.toolCall} (${toolTest.score})`));
+      lines.push(ok(`Tool call: ${toolTest.toolCall} (${toolTest.score})`));
       if (toolTest.response) {
-        lines.push((0, import_format.info)(`Raw response: ${(0, import_format.sanitizeForReport)(toolTest.response)}`));
+        lines.push(info(`Raw response: ${sanitizeForReport(toolTest.response)}`));
       }
     } else if (toolTest.score === "MODERATE") {
-      lines.push((0, import_format.ok)(`Tool call: ${toolTest.toolCall} (${toolTest.score})`));
+      lines.push(ok(`Tool call: ${toolTest.toolCall} (${toolTest.score})`));
       if (toolTest.response) {
-        lines.push((0, import_format.info)(`Raw response: ${(0, import_format.sanitizeForReport)(toolTest.response)}`));
+        lines.push(info(`Raw response: ${sanitizeForReport(toolTest.response)}`));
       }
     } else if (toolTest.score === "WEAK") {
-      lines.push((0, import_format.warn)(`Tool call: ${toolTest.toolCall} (${toolTest.score}) \u2014 malformed call`));
+      lines.push(warn(`Tool call: ${toolTest.toolCall} (${toolTest.score}) \u2014 malformed call`));
       if (toolTest.response) {
-        lines.push((0, import_format.info)(`Raw response: ${(0, import_format.sanitizeForReport)(toolTest.response)}`));
+        lines.push(info(`Raw response: ${sanitizeForReport(toolTest.response)}`));
       }
     } else if (toolTest.score === "FAIL") {
       const hasResponse = toolTest.response && toolTest.response.trim().length > 0;
-      lines.push((0, import_format.fail)(`Tool call: none \u2014 ${hasResponse ? "model responded in text instead" : "model returned empty response"} (${toolTest.score})`));
+      lines.push(fail(`Tool call: none \u2014 ${hasResponse ? "model responded in text instead" : "model returned empty response"} (${toolTest.score})`));
       if (hasResponse) {
-        lines.push((0, import_format.info)(`Text response: ${(0, import_format.sanitizeForReport)(toolTest.response)}`));
+        lines.push(info(`Text response: ${sanitizeForReport(toolTest.response)}`));
       } else {
-        lines.push((0, import_format.info)("Text response: (empty)"));
+        lines.push(info("Text response: (empty)"));
       }
     } else {
-      lines.push((0, import_format.fail)(`Error: ${toolTest.toolCall}`));
+      lines.push(fail(`Error: ${toolTest.toolCall}`));
     }
-    lines.push((0, import_format.section)("SKIPPED TESTS (OLLAMA-ONLY)"));
-    lines.push((0, import_format.warn)("Thinking test \u2014 Ollama-specific think:true option and message.thinking field"));
-    lines.push((0, import_format.warn)("ReAct parsing test \u2014 only relevant for Ollama models without native tool calling"));
-    lines.push((0, import_format.warn)("Tool support detection \u2014 Ollama-specific tool support cache"));
-    lines.push((0, import_format.warn)("Model metadata \u2014 Ollama-specific /api/tags endpoint"));
-    lines.push((0, import_format.section)("SUMMARY"));
+    lines.push(section("SKIPPED TESTS (OLLAMA-ONLY)"));
+    lines.push(warn("Thinking test \u2014 Ollama-specific think:true option and message.thinking field"));
+    lines.push(warn("ReAct parsing test \u2014 only relevant for Ollama models without native tool calling"));
+    lines.push(warn("Tool support detection \u2014 Ollama-specific tool support cache"));
+    lines.push(warn("Model metadata \u2014 Ollama-specific /api/tags endpoint"));
+    lines.push(section("SUMMARY"));
     const totalMs = Date.now() - totalStart;
     const tests = [
       { name: "Connectivity", pass: connectivity.pass, score: connectivity.pass ? "OK" : "FAIL" },
@@ -1514,19 +1490,19 @@ The JSON object must have exactly these 4 keys:
     const passed = tests.filter((t) => t.pass).length;
     const total = tests.length;
     for (const t of tests) {
-      lines.push(t.pass ? (0, import_format.ok)(`${t.name}: ${t.score}`) : (0, import_format.fail)(`${t.name}: ${t.score}`));
+      lines.push(t.pass ? ok(`${t.name}: ${t.score}`) : fail(`${t.name}: ${t.score}`));
     }
-    lines.push((0, import_format.info)(`Total time: ${(0, import_format.msHuman)(totalMs)}`));
-    lines.push((0, import_format.info)(`Score: ${passed}/${total} tests passed`));
-    lines.push((0, import_format.section)("RECOMMENDATION"));
+    lines.push(info(`Total time: ${msHuman(totalMs)}`));
+    lines.push(info(`Score: ${passed}/${total} tests passed`));
+    lines.push(section("RECOMMENDATION"));
     if (passed === 4) {
-      lines.push((0, import_format.ok)(`${model} is a STRONG model via ${providerInfo.name} \u2014 full capability`));
+      lines.push(ok(`${model} is a STRONG model via ${providerInfo.name} \u2014 full capability`));
     } else if (passed >= 3) {
-      lines.push((0, import_format.ok)(`${model} is a GOOD model via ${providerInfo.name} \u2014 most capabilities work`));
+      lines.push(ok(`${model} is a GOOD model via ${providerInfo.name} \u2014 most capabilities work`));
     } else if (passed >= 2) {
-      lines.push((0, import_format.warn)(`${model} is USABLE via ${providerInfo.name} \u2014 some capabilities are limited`));
+      lines.push(warn(`${model} is USABLE via ${providerInfo.name} \u2014 some capabilities are limited`));
     } else {
-      lines.push((0, import_format.fail)(`${model} is WEAK via ${providerInfo.name} \u2014 limited capabilities for agent use`));
+      lines.push(fail(`${model} is WEAK via ${providerInfo.name} \u2014 limited capabilities for agent use`));
     }
     return lines.join("\n");
   }
@@ -1647,3 +1623,6 @@ The JSON object must have exactly these 4 keys:
     }
   });
 }
+export {
+  model_test_temp_default as default
+};

package/package.json CHANGED Viewed

@@ -1,11 +1,12 @@
 {
   "name": "@vtstech/pi-model-test",
-  "version": "1.0.3",
+  "version": "1.0.4-1",
   "description": "Model benchmark/testing extension for Pi Coding Agent",
   "main": "model-test.js",
   "keywords": ["pi-package", "pi-extensions"],
   "license": "MIT",
   "access": "public",
+  "type": "module",
   "author": "VTSTech",
   "homepage": "https://www.vts-tech.org",
   "repository": {
@@ -13,7 +14,7 @@
     "url": "https://github.com/VTSTech/pi-coding-agent"
   },
   "dependencies": {
-    "@vtstech/pi-shared": "1.0.3"
+    "@vtstech/pi-shared": "1.0.4-1"
   },
   "peerDependencies": {
     "@mariozechner/pi-coding-agent": ">=0.66"