npm - llm-checker - Versions diffs - 3.4.0 → 3.4.1 - Mend

llm-checker 3.4.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +25 -3
package/bin/enhanced_cli.js +2 -2
package/bin/mcp-server.mjs +423 -1
package/package.json +1 -1
package/src/hardware/backends/cuda-detector.js +26 -8

package/README.md CHANGED Viewed

@@ -133,9 +133,22 @@ llm-checker ai-run --calibrated --category coding --prompt "Refactor this functi
 LLM Checker is published in all primary channels:
-- npm (latest): [`llm-checker@latest`](https://www.npmjs.com/package/llm-checker)
+- npm (latest, recommended): [`llm-checker@latest`](https://www.npmjs.com/package/llm-checker)
 - GitHub Releases: [Release history](https://github.com/Pavelevich/llm-checker/releases)
-- GitHub Packages: [`@pavelevich/llm-checker`](https://github.com/users/Pavelevich/packages/npm/package/llm-checker)
+- GitHub Packages (legacy mirror, may lag): [`@pavelevich/llm-checker`](https://github.com/users/Pavelevich/packages/npm/package/llm-checker)
+### Important: Use npm for Latest Builds
+If you need the newest release, install from npm (`llm-checker`), not the scoped GitHub Packages mirror.
+If you installed `@pavelevich/llm-checker` and version looks old:
+```bash
+npm uninstall -g @pavelevich/llm-checker
+npm install -g llm-checker@latest
+hash -r
+llm-checker --version
+```
 ### v3.3.0 Highlights
@@ -148,7 +161,9 @@ LLM Checker is published in all primary channels:
 - Hardened Jetson CUDA detection to avoid false CPU-only fallback.
 - Documentation reorganized under `docs/` with clearer onboarding paths.
-### Optional: Install from GitHub Packages
+### Optional (Legacy): Install from GitHub Packages
+Use this only if you explicitly need GitHub Packages. It may not match npm latest.
 ```bash
 # 1) Configure registry + token (PAT with read:packages)
@@ -261,6 +276,11 @@ Once connected, Claude can use these tools:
 | `installed` | Rank your already-downloaded Ollama models |
 | `search` | Search the Ollama model catalog with filters |
 | `smart_recommend` | Advanced recommendations using the full scoring engine |
+| `ollama_plan` | Build a capacity plan for local models with recommended context/parallel/memory settings |
+| `ollama_plan_env` | Return ready-to-paste `export ...` env vars from the recommended or fallback plan profile |
+| `policy_validate` | Validate a policy file against the v1 schema and return structured validation output |
+| `audit_export` | Run policy compliance export (`json`/`csv`/`sarif`/`all`) for `check` or `recommend` flows |
+| `calibrate` | Generate calibration artifacts from a prompt suite with typed MCP inputs |
 **Ollama Management:**
@@ -281,6 +301,8 @@ Once connected, Claude can use these tools:
 | `cleanup_models` | Analyze installed models — find redundancies, cloud-only models, oversized models, and upgrade candidates |
 | `project_recommend` | Scan a project directory (languages, frameworks, size) and recommend the best model for that codebase |
 | `ollama_monitor` | Real-time system status: RAM usage, loaded models, memory headroom analysis |
+| `cli_help` | List all allowlisted CLI commands exposed through MCP |
+| `cli_exec` | Execute any allowlisted `llm-checker` CLI command with custom args (policy/audit/calibrate/sync/ai-run/etc.) |
 ### Example Prompts

package/bin/enhanced_cli.js CHANGED Viewed

@@ -4155,8 +4155,8 @@ program
                 }
                 if (backend === 'cuda' && info.info) {
-                    console.log(`  Driver: ${info.info.driver}`);
-                    console.log(`  CUDA: ${info.info.cuda}`);
+                    console.log(`  Driver: ${info.info.driver || 'unknown'}`);
+                    console.log(`  CUDA: ${info.info.cuda || 'unknown'}`);
                     console.log(`  Total VRAM: ${info.info.totalVRAM}GB`);
                     for (const gpu of info.info.gpus) {
                         console.log(`  ${gpu.name}: ${gpu.memory.total}GB`);

package/bin/mcp-server.mjs CHANGED Viewed

@@ -101,13 +101,89 @@ function nsToSec(ns) {
   return (ns / 1e9).toFixed(2);
 }
+function tryParseJSON(text) {
+  try {
+    return JSON.parse(text);
+  } catch {
+    return null;
+  }
+}
+function formatExportBlock(envObject) {
+  if (!envObject || typeof envObject !== "object") return "";
+  const entries = Object.entries(envObject).filter(([, value]) => value !== undefined && value !== null);
+  if (entries.length === 0) return "";
+  return entries
+    .map(([key, value]) => `export ${key}="${String(value)}"`)
+    .join("\n");
+}
+function summarizeOllamaPlan(payload) {
+  if (!payload || typeof payload !== "object") return null;
+  const plan = payload.plan;
+  if (!plan || typeof plan !== "object") return null;
+  const selectedModels = Array.isArray(plan.models)
+    ? plan.models.map((model) => model?.name).filter(Boolean)
+    : [];
+  const hardware = plan.hardware || {};
+  const memory = plan.memory || {};
+  const recommendation = plan.recommendation || {};
+  const risk = plan.risk || {};
+  const lines = [
+    "OLLAMA CAPACITY PLAN",
+    `Hardware: ${hardware.backendName || hardware.backend || "unknown"}`,
+    `Models: ${selectedModels.length > 0 ? selectedModels.join(", ") : "none selected"}`,
+    "",
+    "Recommended envelope:",
+    `  Context: ${plan.envelope?.context?.recommended ?? "?"}`,
+    `  Parallel: ${plan.envelope?.parallel?.recommended ?? "?"}`,
+    `  Loaded models: ${plan.envelope?.loaded_models?.recommended ?? "?"}`,
+    `  Estimated memory: ${memory.recommendedEstimatedGB ?? "?"}GB / ${memory.budgetGB ?? "?"}GB (${memory.utilizationPercent ?? "?"}%)`,
+    `  Risk: ${(risk.level || "unknown").toUpperCase()} (${risk.score ?? "?"}/100)`,
+  ];
+  if (recommendation && Object.keys(recommendation).length > 0) {
+    lines.push("");
+    lines.push("Recommended env vars:");
+    if (recommendation.num_ctx !== undefined) lines.push(`  export OLLAMA_NUM_CTX="${recommendation.num_ctx}"`);
+    if (recommendation.num_parallel !== undefined) lines.push(`  export OLLAMA_NUM_PARALLEL="${recommendation.num_parallel}"`);
+    if (recommendation.max_loaded_models !== undefined) lines.push(`  export OLLAMA_MAX_LOADED_MODELS="${recommendation.max_loaded_models}"`);
+    if (recommendation.max_queue !== undefined) lines.push(`  export OLLAMA_MAX_QUEUE="${recommendation.max_queue}"`);
+    if (recommendation.keep_alive !== undefined) lines.push(`  export OLLAMA_KEEP_ALIVE="${recommendation.keep_alive}"`);
+    if (recommendation.flash_attention !== undefined) lines.push(`  export OLLAMA_FLASH_ATTENTION="${recommendation.flash_attention}"`);
+  }
+  return lines.join("\n");
+}
+const ALLOWED_CLI_COMMANDS = new Set([
+  "policy",
+  "audit",
+  "calibrate",
+  "check",
+  "ollama",
+  "installed",
+  "ollama-plan",
+  "recommend",
+  "list-models",
+  "ai-check",
+  "ai-run",
+  "demo",
+  "sync",
+  "search",
+  "smart-recommend",
+  "hw-detect",
+]);
 // ============================================================================
 // MCP SERVER
 // ============================================================================
 const server = new McpServer({
   name: "llm-checker",
-  version: "3.2.0",
+  version: "3.4.0",
 });
 // ============================================================================
@@ -198,6 +274,352 @@ server.tool(
   }
 );
+server.tool(
+  "ollama_plan",
+  "Build an Ollama capacity plan for selected local models and return recommended context/parallel/memory settings",
+  {
+    models: z
+      .array(z.string())
+      .optional()
+      .describe("Optional list of model tags/families to include (default: all local models)"),
+    ctx: z.number().int().positive().optional().describe("Target context window in tokens"),
+    concurrency: z.number().int().positive().optional().describe("Target parallel request count"),
+    objective: z
+      .enum(["latency", "balanced", "throughput"])
+      .optional()
+      .describe("Optimization objective"),
+    reserve_gb: z.number().min(0).optional().describe("Memory reserve in GB for OS/background workloads"),
+  },
+  async ({ models, ctx, concurrency, objective, reserve_gb }) => {
+    const args = ["ollama-plan", "--json"];
+    if (Array.isArray(models) && models.length > 0) args.push("--models", ...models);
+    if (ctx !== undefined) args.push("--ctx", String(ctx));
+    if (concurrency !== undefined) args.push("--concurrency", String(concurrency));
+    if (objective) args.push("--objective", objective);
+    if (reserve_gb !== undefined) args.push("--reserve-gb", String(reserve_gb));
+    const result = await run(args, 180000);
+    const payload = tryParseJSON(result);
+    if (!payload) {
+      return {
+        content: [{ type: "text", text: result }],
+      };
+    }
+    const summary = summarizeOllamaPlan(payload);
+    const output = summary
+      ? `${summary}\n\nRAW JSON:\n${JSON.stringify(payload, null, 2)}`
+      : JSON.stringify(payload, null, 2);
+    return {
+      content: [{ type: "text", text: output }],
+    };
+  }
+);
+server.tool(
+  "ollama_plan_env",
+  "Return shell export commands from an Ollama capacity plan (recommended or fallback profile)",
+  {
+    profile: z
+      .enum(["recommended", "fallback"])
+      .optional()
+      .describe("Which profile to return (default: recommended)"),
+    models: z
+      .array(z.string())
+      .optional()
+      .describe("Optional list of model tags/families to include (default: all local models)"),
+    ctx: z.number().int().positive().optional().describe("Target context window in tokens"),
+    concurrency: z.number().int().positive().optional().describe("Target parallel request count"),
+    objective: z
+      .enum(["latency", "balanced", "throughput"])
+      .optional()
+      .describe("Optimization objective"),
+    reserve_gb: z.number().min(0).optional().describe("Memory reserve in GB for OS/background workloads"),
+  },
+  async ({ profile, models, ctx, concurrency, objective, reserve_gb }) => {
+    const args = ["ollama-plan", "--json"];
+    if (Array.isArray(models) && models.length > 0) args.push("--models", ...models);
+    if (ctx !== undefined) args.push("--ctx", String(ctx));
+    if (concurrency !== undefined) args.push("--concurrency", String(concurrency));
+    if (objective) args.push("--objective", objective);
+    if (reserve_gb !== undefined) args.push("--reserve-gb", String(reserve_gb));
+    const result = await run(args, 180000);
+    const payload = tryParseJSON(result);
+    if (!payload?.plan) {
+      return {
+        content: [{ type: "text", text: `Failed to parse ollama-plan output:\n${result}` }],
+        isError: true,
+      };
+    }
+    const selectedProfile = profile || "recommended";
+    const plan = payload.plan;
+    let envValues = null;
+    if (selectedProfile === "fallback") {
+      const fallback = plan.fallback || {};
+      envValues = {
+        OLLAMA_NUM_CTX: fallback.num_ctx,
+        OLLAMA_NUM_PARALLEL: fallback.num_parallel,
+        OLLAMA_MAX_LOADED_MODELS: fallback.max_loaded_models,
+      };
+    } else {
+      envValues = plan.shell?.env || null;
+      if (!envValues) {
+        const recommendation = plan.recommendation || {};
+        envValues = {
+          OLLAMA_NUM_CTX: recommendation.num_ctx,
+          OLLAMA_NUM_PARALLEL: recommendation.num_parallel,
+          OLLAMA_MAX_LOADED_MODELS: recommendation.max_loaded_models,
+          OLLAMA_MAX_QUEUE: recommendation.max_queue,
+          OLLAMA_KEEP_ALIVE: recommendation.keep_alive,
+          OLLAMA_FLASH_ATTENTION: recommendation.flash_attention,
+        };
+      }
+    }
+    const exports = formatExportBlock(envValues);
+    if (!exports) {
+      return {
+        content: [{ type: "text", text: "No environment values available for this plan/profile." }],
+        isError: true,
+      };
+    }
+    return {
+      content: [
+        {
+          type: "text",
+          text: [`PROFILE: ${selectedProfile.toUpperCase()}`, "", exports].join("\n"),
+        },
+      ],
+    };
+  }
+);
+server.tool(
+  "cli_help",
+  "List all llm-checker CLI commands exposed via cli_exec",
+  {},
+  async () => {
+    const commands = [...ALLOWED_CLI_COMMANDS].sort();
+    const lines = [
+      "Available commands for cli_exec:",
+      ...commands.map((command) => `  - ${command}`),
+      "",
+      "Examples:",
+      '  cli_exec command="ollama-plan" args=["--json"]',
+      '  cli_exec command="policy" args=["validate","--file","policy.yaml","--json"]',
+      '  cli_exec command="search" args=["qwen","--use-case","coding","--limit","5"]',
+    ];
+    return { content: [{ type: "text", text: lines.join("\n") }] };
+  }
+);
+server.tool(
+  "cli_exec",
+  "Execute any supported llm-checker CLI command (allowlisted) with custom arguments",
+  {
+    command: z.string().describe("Top-level command (use cli_help to list allowed commands)"),
+    args: z
+      .array(z.string())
+      .optional()
+      .describe("Additional CLI args, exactly as used in terminal (without shell quoting)"),
+    timeout_ms: z.number().int().min(1000).max(600000).optional().describe("Execution timeout in milliseconds"),
+  },
+  async ({ command, args, timeout_ms }) => {
+    const trimmedCommand = String(command || "").trim();
+    if (!ALLOWED_CLI_COMMANDS.has(trimmedCommand)) {
+      return {
+        content: [
+          {
+            type: "text",
+            text: `Unsupported command "${trimmedCommand}". Use cli_help to list allowed commands.`,
+          },
+        ],
+        isError: true,
+      };
+    }
+    const safeArgs = Array.isArray(args) ? args : [];
+    if (safeArgs.length > 100) {
+      return {
+        content: [{ type: "text", text: "Too many arguments. Limit is 100." }],
+        isError: true,
+      };
+    }
+    const result = await run([trimmedCommand, ...safeArgs], timeout_ms || 180000);
+    return { content: [{ type: "text", text: result }] };
+  }
+);
+server.tool(
+  "policy_validate",
+  "Validate a policy file against the v1 schema and return structured validation output",
+  {
+    file: z.string().optional().describe("Policy file path (default: policy.yaml)"),
+  },
+  async ({ file }) => {
+    const args = ["policy", "validate", "--json"];
+    if (file) args.push("--file", file);
+    const result = await run(args, 120000);
+    const payload = tryParseJSON(result);
+    if (!payload) {
+      return {
+        content: [{ type: "text", text: result }],
+      };
+    }
+    const status = payload.valid ? "VALID" : "INVALID";
+    const header = [
+      `POLICY VALIDATION: ${status}`,
+      `File: ${payload.file || file || "policy.yaml"}`,
+      `Errors: ${payload.errorCount ?? (Array.isArray(payload.errors) ? payload.errors.length : 0)}`,
+    ].join("\n");
+    return {
+      content: [{ type: "text", text: `${header}\n\n${JSON.stringify(payload, null, 2)}` }],
+      isError: !payload.valid,
+    };
+  }
+);
+server.tool(
+  "audit_export",
+  "Run policy compliance audit export (json/csv/sarif/all) for check/recommend flows",
+  {
+    policy: z.string().describe("Policy file path"),
+    command: z
+      .enum(["check", "recommend"])
+      .optional()
+      .describe("Evaluation source (default: check)"),
+    format: z
+      .enum(["json", "csv", "sarif", "all"])
+      .optional()
+      .describe("Export format (default: json)"),
+    out: z.string().optional().describe("Output file path (single format only)"),
+    out_dir: z.string().optional().describe("Output directory when --out is omitted"),
+    use_case: z.string().optional().describe("Use case when command=check"),
+    category: z.string().optional().describe("Category hint when command=recommend"),
+    optimize: z
+      .enum(["balanced", "speed", "quality", "context", "coding"])
+      .optional()
+      .describe("Optimization profile when command=recommend"),
+    runtime: z
+      .enum(["ollama", "vllm", "mlx"])
+      .optional()
+      .describe("Runtime backend for check mode"),
+    include_cloud: z.boolean().optional().describe("Include cloud models in check-mode analysis"),
+    max_size: z.string().optional().describe('Maximum model size for check mode (example: "24B" or "12GB")'),
+    min_size: z.string().optional().describe('Minimum model size for check mode (example: "3B" or "2GB")'),
+    limit: z.number().int().positive().optional().describe("Model analysis limit for check mode"),
+    verbose: z.boolean().optional().describe("Enable verbose progress (default: true)"),
+  },
+  async ({
+    policy,
+    command,
+    format,
+    out,
+    out_dir,
+    use_case,
+    category,
+    optimize,
+    runtime,
+    include_cloud,
+    max_size,
+    min_size,
+    limit,
+    verbose,
+  }) => {
+    const args = ["audit", "export", "--policy", policy];
+    if (command) args.push("--command", command);
+    if (format) args.push("--format", format);
+    if (out) args.push("--out", out);
+    if (out_dir) args.push("--out-dir", out_dir);
+    if (use_case) args.push("--use-case", use_case);
+    if (category) args.push("--category", category);
+    if (optimize) args.push("--optimize", optimize);
+    if (runtime) args.push("--runtime", runtime);
+    if (include_cloud) args.push("--include-cloud");
+    if (max_size) args.push("--max-size", max_size);
+    if (min_size) args.push("--min-size", min_size);
+    if (limit !== undefined) args.push("--limit", String(limit));
+    if (verbose === false) args.push("--no-verbose");
+    const result = await run(args, 300000);
+    const hadFailure =
+      /audit export failed:/i.test(result) ||
+      /blocking violations detected/i.test(result) ||
+      /enforcement result:\s*blocking/i.test(result);
+    return {
+      content: [{ type: "text", text: result }],
+      isError: hadFailure,
+    };
+  }
+);
+server.tool(
+  "calibrate",
+  "Generate calibration artifacts from a JSONL prompt suite (dry-run, contract-only, or full benchmark mode)",
+  {
+    suite: z.string().describe("Prompt suite path in JSONL format"),
+    models: z.array(z.string()).describe("Model identifiers to include"),
+    output: z.string().describe("Calibration result output path (.json/.yaml/.yml)"),
+    runtime: z
+      .enum(["ollama", "vllm", "mlx"])
+      .optional()
+      .describe("Inference runtime backend"),
+    mode: z
+      .enum(["dry-run", "contract-only", "full"])
+      .optional()
+      .describe("Execution mode"),
+    objective: z
+      .enum(["speed", "quality", "balanced"])
+      .optional()
+      .describe("Calibration objective"),
+    policy_out: z.string().optional().describe("Optional calibration policy output path"),
+    warmup: z.number().int().positive().optional().describe("Warmup runs per prompt in full mode"),
+    iterations: z.number().int().positive().optional().describe("Measured iterations per prompt in full mode"),
+    timeout_ms: z.number().int().positive().optional().describe("Per-prompt timeout in full mode (ms)"),
+    dry_run: z.boolean().optional().describe("Shortcut flag for dry-run mode"),
+  },
+  async ({
+    suite,
+    models,
+    output,
+    runtime,
+    mode,
+    objective,
+    policy_out,
+    warmup,
+    iterations,
+    timeout_ms,
+    dry_run,
+  }) => {
+    const args = ["calibrate", "--suite", suite, "--models", ...models, "--output", output];
+    if (runtime) args.push("--runtime", runtime);
+    if (mode) args.push("--mode", mode);
+    if (objective) args.push("--objective", objective);
+    if (policy_out) args.push("--policy-out", policy_out);
+    if (warmup !== undefined) args.push("--warmup", String(warmup));
+    if (iterations !== undefined) args.push("--iterations", String(iterations));
+    if (timeout_ms !== undefined) args.push("--timeout-ms", String(timeout_ms));
+    if (dry_run) args.push("--dry-run");
+    const result = await run(args, 600000);
+    const hadFailure = /calibration failed:/i.test(result);
+    return {
+      content: [{ type: "text", text: result }],
+      isError: hadFailure,
+    };
+  }
+);
 // ============================================================================
 // OLLAMA MANAGEMENT TOOLS
 // ============================================================================

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-checker",
-  "version": "3.4.0",
+  "version": "3.4.1",
   "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
   "bin": {
     "llm-checker": "bin/cli.js",

package/src/hardware/backends/cuda-detector.js CHANGED Viewed

@@ -322,7 +322,7 @@ class CUDADetector {
         const modelRaw = this.readJetsonModel();
         const model = this.normalizeJetsonModel(modelRaw);
         const cudaVersion = this.detectJetsonCudaVersion();
-        const driverVersion = this.detectJetsonDriverVersion();
+        const driverVersion = this.detectJetsonDriverVersion() || 'unknown';
         const totalSystemGB = Math.max(1, Math.round(os.totalmem() / (1024 ** 3)));
         const sharedGpuMemoryGB = Math.max(1, Math.round(totalSystemGB * 0.85));
         const capabilities = this.getJetsonCapabilities(modelRaw || model);
@@ -423,11 +423,26 @@ class CUDADetector {
     }
     detectJetsonDriverVersion() {
-        const versionInfo = this.readFileIfExists('/proc/driver/nvidia/version');
-        if (!versionInfo) return null;
+        const driverSources = [
+            '/proc/driver/nvidia/version',
+            '/sys/module/nvidia/version'
+        ];
+        for (const source of driverSources) {
+            const versionInfo = this.readFileIfExists(source);
+            if (!versionInfo) continue;
+            const kernelMatch = versionInfo.match(/Kernel Module(?:\s+for\s+\w+)?\s+([0-9]+(?:\.[0-9]+){1,3})/i);
+            if (kernelMatch) return kernelMatch[1];
+            const nvrmMatch = versionInfo.match(/NVRM version:\s*.*?([0-9]+(?:\.[0-9]+){1,3})/i);
+            if (nvrmMatch) return nvrmMatch[1];
-        const match = versionInfo.match(/Kernel Module\s+([0-9.]+)/i);
-        return match ? match[1] : null;
+            const genericMatch = versionInfo.match(/\b([0-9]+(?:\.[0-9]+){1,3})\b/);
+            if (genericMatch) return genericMatch[1];
+        }
+        return null;
     }
     getJetsonCapabilities(model) {
@@ -734,10 +749,13 @@ class CUDADetector {
         const primary = this.getPrimaryGPU();
         const gpuName = primary.name.toLowerCase()
             .replace(/nvidia|geforce|quadro|tesla/gi, '')
-            .replace(/\s+/g, '-')
-            .trim();
+            .replace(/[^a-z0-9]+/gi, '-')
+            .replace(/-+/g, '-')
+            .replace(/^-|-$/g, '');
+        const normalizedGpuName = gpuName || 'gpu';
+        const normalizedVRAM = Number.isFinite(info.totalVRAM) ? Math.max(0, Math.round(info.totalVRAM)) : 0;
-        return `cuda-${gpuName}-${info.totalVRAM}gb${info.isMultiGPU ? '-x' + info.gpus.length : ''}`;
+        return `cuda-${normalizedGpuName}-${normalizedVRAM}gb${info.isMultiGPU ? '-x' + info.gpus.length : ''}`;
     }
     /**