npm - llm-checker - Versions diffs - 3.2.3 → 3.2.5 - Mend

llm-checker 3.2.3 → 3.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md +107 -32
package/package.json +5 -4
package/src/models/deterministic-selector.js +101 -9
package/src/policy/policy-engine.js +14 -6

package/README.md CHANGED Viewed

@@ -93,14 +93,14 @@ npm install sql.js
 LLM Checker is published in all primary channels:
-- npm (latest): [`llm-checker@3.2.1`](https://www.npmjs.com/package/llm-checker)
-- GitHub Release: [`v3.2.1` (2026-02-17)](https://github.com/Pavelevich/llm-checker/releases/tag/v3.2.1)
+- npm (latest): [`llm-checker@3.2.4`](https://www.npmjs.com/package/llm-checker)
+- GitHub Release: [`v3.2.4`](https://github.com/Pavelevich/llm-checker/releases/tag/v3.2.4)
 - GitHub Packages: [`@pavelevich/llm-checker`](https://github.com/users/Pavelevich/packages/npm/package/llm-checker)
-### v3.2.1 Highlights
+### v3.2.4 Highlights
-- Added vLLM/MLX runtime support and speculative decoding estimation.
-- Improved GPU detection, added DGX Spark/GB10 support, strengthened Node runtime guards, and updated tooling comparison notes.
+- Fixed `recommend` hardware-profile handling so discrete VRAM limits are honored consistently.
+- Added deterministic selector regression coverage for 24GB VRAM fit behavior.
 ### Optional: Install from GitHub Packages
@@ -110,7 +110,7 @@ echo "@pavelevich:registry=https://npm.pkg.github.com" >> ~/.npmrc
 echo "//npm.pkg.github.com/:_authToken=${GITHUB_TOKEN}" >> ~/.npmrc
 # 2) Install
-npm install -g @pavelevich/llm-checker@3.2.1
+npm install -g @pavelevich/llm-checker@3.2.4
 ```
 ---
@@ -266,9 +266,47 @@ llm-checker audit export --policy ./policy.yaml --command check --format all --o
 - `--format all` honors `reporting.formats` in your policy (falls back to `json,csv,sarif`).
 - In `enforce` mode with blocking violations, reports are still written before non-zero exit.
+### Integration Examples (SIEM / CI Artifacts)
+```bash
+# CI artifact (JSON) for post-processing in pipeline jobs
+llm-checker audit export --policy ./policy.yaml --command check --format json --out ./reports/policy-report.json
+# Flat CSV for SIEM ingestion (Splunk/ELK/DataDog pipelines)
+llm-checker audit export --policy ./policy.yaml --command check --format csv --out ./reports/policy-report.csv
+# SARIF for security/code-scanning tooling integrations
+llm-checker audit export --policy ./policy.yaml --command check --format sarif --out ./reports/policy-report.sarif
+```
+### GitHub Actions Policy Gate (Copy-Paste)
+```yaml
+name: Policy Gate
+on: [pull_request]
+jobs:
+  policy-gate:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+      - run: npm ci
+      - run: node bin/enhanced_cli.js check --policy ./policy.yaml --runtime ollama --no-verbose
+      - if: always()
+        run: node bin/enhanced_cli.js audit export --policy ./policy.yaml --command check --format all --runtime ollama --no-verbose --out-dir ./policy-reports
+      - if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: policy-audit-reports
+          path: ./policy-reports
+```
 ### Provenance Fields in Reports
-Each finding includes normalized model provenance fields:
+`check`, `recommend`, and `audit export` outputs include normalized model provenance fields:
 - `source`
 - `registry`
@@ -276,7 +314,8 @@ Each finding includes normalized model provenance fields:
 - `license`
 - `digest`
-If a field is unavailable from model metadata, reports use `"unknown"` instead of omitting the field. This keeps downstream parsers deterministic.
+If a field is unavailable from model metadata, outputs use `"unknown"` instead of omitting the field. This keeps downstream parsers deterministic.
+License values are canonicalized for policy checks (for example `MIT License` -> `mit`, `Apache 2.0` -> `apache-2.0`).
 ### AI Commands
@@ -359,7 +398,7 @@ llm-checker search qwen --quant Q4_K_M --max-size 8
 LLM Checker prioritizes the full scraped Ollama model cache (all families/sizes/variants) and falls back to a built-in curated catalog when cache is unavailable.
-The curated fallback catalog includes 35+ models from the most popular Ollama families:
+The curated fallback catalog includes 35+ models from the most popular Ollama families (used only when the dynamic scraped pool is unavailable):
 | Family | Models | Best For |
 |--------|--------|----------|
@@ -481,30 +520,66 @@ The selector automatically picks the best quantization that fits your available
 ## Architecture
+LLM Checker uses a deterministic pipeline so the same inputs produce the same ranked output, with explicit policy outcomes for governance workflows.
+```mermaid
+flowchart LR
+  subgraph Inputs
+    HW["Hardware detector<br/>CPU/GPU/RAM/backend"]
+    REG["Dynamic Ollama catalog<br/>(curated fallback if unavailable)"]
+    LOCAL["Installed local models"]
+    FLAGS["CLI options<br/>use-case/runtime/limits/policy"]
+  end
+  subgraph Pipeline["Selection Pipeline"]
+    NORMALIZE["Normalize and deduplicate model pool"]
+    PROFILE["Hardware profile and memory budget"]
+    FILTER["Use-case/category filtering"]
+    QUANT["Quantization fit selection"]
+    SCORE["Deterministic 4D scoring<br/>Q/S/F/C"]
+    POLICY["Policy evaluation (optional)<br/>audit or enforce"]
+    RANK["Rank and explain candidates"]
+  end
+  subgraph Outputs
+    REC["check / recommend output"]
+    AUDIT["audit export<br/>JSON / CSV / SARIF"]
+    RUN["pull/run-ready commands"]
+  end
+  REG --> NORMALIZE
+  LOCAL --> NORMALIZE
+  HW --> PROFILE
+  FLAGS --> FILTER
+  FLAGS --> POLICY
+  NORMALIZE --> FILTER
+  PROFILE --> QUANT
+  FILTER --> QUANT
+  QUANT --> SCORE
+  SCORE --> POLICY
+  SCORE --> RANK
+  POLICY --> RANK
+  RANK --> REC
+  POLICY --> AUDIT
+  RANK --> RUN
 ```
-┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
-│  Hardware       │────>│  Model          │────>│  Deterministic  │
-│  Detection      │     │  Catalog (35+)  │     │  Selector       │
-└─────────────────┘     └─────────────────┘     └─────────────────┘
-        │                       │                       │
-   Detects GPU/CPU         JSON catalog +           4D scoring
-   Memory / Backend        Installed models         Per-category weights
-   Usable memory calc      Auto-dedup               Memory calibration
-                                                        │
-                                                        v
-                                               ┌─────────────────┐
-                                               │  Ranked         │
-                                               │  Recommendations│
-                                               └─────────────────┘
-```
-**Selector Pipeline:**
-1. **Hardware profiling** &mdash; CPU, GPU, RAM, acceleration backend
-2. **Model pool** &mdash; Merge full Ollama scraped pool (or curated fallback) + installed models (deduped)
-3. **Category filter** &mdash; Keep models relevant to the use case
-4. **Quantization selection** &mdash; Best quant that fits in memory budget
-5. **4D scoring** &mdash; Q, S, F, C with category-specific weights
-6. **Ranking** &mdash; Top N candidates returned
+### Component Responsibilities
+- **Input layer**: Collects runtime constraints from hardware detection, local inventory, dynamic registry data, and CLI flags.
+- **Normalization layer**: Deduplicates identifiers/tags and builds a canonical candidate set.
+- **Selection layer**: Filters by use case, selects the best fitting quantization, and computes deterministic Q/S/F/C scores.
+- **Governance layer**: Applies policy rules in `audit` or `enforce` mode and records explicit violation metadata.
+- **Output layer**: Returns ranked recommendations plus machine-readable compliance artifacts when requested.
+### Execution Stages
+1. **Hardware profiling**: Detect CPU/GPU/RAM and effective backend capabilities.
+2. **Model pool assembly**: Merge dynamic scraped catalog (or curated fallback) with locally installed models.
+3. **Candidate filtering**: Keep only relevant models for the requested use case.
+4. **Fit selection**: Choose the best quantization for available memory budget.
+5. **Deterministic scoring**: Score each candidate across quality, speed, fit, and context.
+6. **Policy + ranking**: Apply optional policy checks, then rank and return actionable commands.
 ---
@@ -559,7 +634,7 @@ src/
     deterministic-selector.js  # Primary selection algorithm
     scoring-config.js          # Centralized scoring weights
     scoring-engine.js          # Advanced scoring (smart-recommend)
-    catalog.json               # Curated fallback catalog (35+ models)
+    catalog.json               # Curated fallback catalog (35+ models, only if dynamic pool unavailable)
   ai/
     multi-objective-selector.js  # Multi-objective optimization
     ai-check-selector.js        # LLM-based evaluation

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-checker",
-  "version": "3.2.3",
+  "version": "3.2.5",
   "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
   "bin": {
     "llm-checker": "bin/cli.js",
@@ -10,14 +10,15 @@
   "main": "src/index.js",
   "scripts": {
     "test": "node tests/run-all-tests.js",
-    "test:gpu": "node tests/gpu-detection/multi-gpu.test.js",
-    "test:platform": "node tests/platform-tests/cross-platform.test.js",
-    "test:ui": "node tests/ui-tests/interface.test.js",
+    "test:gpu": "node tests/amd-gpu-detection.test.js",
+    "test:platform": "node tests/hardware-simulation-tests.js",
+    "test:ui": "node tests/ui-cli-smoke.test.js",
     "test:runtime": "node tests/runtime-specdec-tests.js",
     "test:deterministic-pool": "node tests/deterministic-model-pool-check.js",
     "test:policy": "node tests/policy-commands.test.js",
     "test:policy-cli": "node tests/policy-cli-enforcement.js",
     "test:policy-engine": "node tests/policy-engine.test.js",
+    "test:policy-audit": "node tests/policy-audit-reporter.test.js",
     "test:policy-e2e": "node tests/policy-e2e-integration.test.js",
     "test:hardware-detector": "node tests/hardware-detector-regression.js",
     "test:all": "node tests/run-all-tests.js",

package/src/models/deterministic-selector.js CHANGED Viewed

@@ -128,6 +128,95 @@ class DeterministicModelSelector {
         return hardware;
     }
+    /**
+     * Normalize hardware shape coming from different detectors/callers.
+     * Ensures deterministic selector always has:
+     * - memory.totalGB
+     * - gpu.vramGB
+     * - acceleration.supports_*
+     */
+    normalizeHardwareProfile(input = {}) {
+        const toNumber = (value) => {
+            if (typeof value === 'number' && Number.isFinite(value)) return value;
+            if (typeof value === 'string' && value.trim() !== '' && Number.isFinite(Number(value))) {
+                return Number(value);
+            }
+            return null;
+        };
+        const cpu = input.cpu || {};
+        const gpu = input.gpu || {};
+        const memory = input.memory || {};
+        const acceleration = input.acceleration || {};
+        const totalMemGB =
+            toNumber(memory.totalGB) ??
+            toNumber(memory.total) ??
+            toNumber(input.total_ram_gb) ??
+            toNumber(input.memoryGB) ??
+            8;
+        const usableMemGB =
+            toNumber(input.usableMemGB) ??
+            Math.max(1, Math.min(0.8 * totalMemGB, totalMemGB - 2));
+        const vramGB =
+            toNumber(gpu.vramGB) ??
+            toNumber(gpu.vram) ??
+            toNumber(gpu.totalVRAM) ??
+            toNumber(gpu.vramPerGPU) ??
+            0;
+        const modelHints = `${gpu.model || ''} ${gpu.vendor || ''} ${gpu.type || ''}`.toLowerCase();
+        const inferredUnified =
+            Boolean(gpu.unified) ||
+            /apple|m1|m2|m3|m4|unified/.test(modelHints);
+        let gpuType = gpu.type;
+        if (!gpuType) {
+            if (inferredUnified) gpuType = 'apple_silicon';
+            else if (/nvidia|rtx|gtx|tesla|quadro/.test(modelHints)) gpuType = 'nvidia';
+            else if (/amd|radeon|rx |instinct/.test(modelHints)) gpuType = 'amd';
+            else gpuType = 'cpu_only';
+        }
+        const normalizedAcceleration = {
+            supports_metal:
+                typeof acceleration.supports_metal === 'boolean'
+                    ? acceleration.supports_metal
+                    : gpuType === 'apple_silicon',
+            supports_cuda:
+                typeof acceleration.supports_cuda === 'boolean'
+                    ? acceleration.supports_cuda
+                    : gpuType === 'nvidia',
+            supports_rocm:
+                typeof acceleration.supports_rocm === 'boolean'
+                    ? acceleration.supports_rocm
+                    : gpuType === 'amd'
+        };
+        return {
+            ...input,
+            cpu: {
+                ...cpu,
+                architecture: cpu.architecture || cpu.arch || process.arch || 'x86_64',
+                cores: toNumber(cpu.cores) ?? toNumber(cpu.physicalCores) ?? 4
+            },
+            gpu: {
+                ...gpu,
+                type: gpuType,
+                vramGB,
+                unified: inferredUnified
+            },
+            memory: {
+                ...memory,
+                totalGB: totalMemGB
+            },
+            acceleration: normalizedAcceleration,
+            usableMemGB
+        };
+    }
     async getCPUInfo() {
         const os = require('os');
         return {
@@ -810,7 +899,8 @@ class DeterministicModelSelector {
         }
         // Phase 0: Gather data
-        const hardware = providedHardware || await this.getHardware();
+        const detectedHardware = providedHardware || await this.getHardware();
+        const hardware = this.normalizeHardwareProfile(detectedHardware);
         const installed = Array.isArray(installedModels) ? installedModels : await this.getInstalledModels();
         const externalPool = Array.isArray(modelPool) && modelPool.length > 0
             ? this.normalizeExternalModels(modelPool)
@@ -1299,22 +1389,22 @@ class DeterministicModelSelector {
         };
     }
-    mapHardwareTier(hardware) {
+    mapHardwareTier(hardware = {}) {
         let ram, cores;
-        if (hardware.memory && hardware.memory.totalGB) {
+        if (hardware?.memory?.totalGB) {
             ram = hardware.memory.totalGB;
-        } else if (hardware.memory && hardware.memory.total) {
+        } else if (hardware?.memory?.total) {
             ram = hardware.memory.total;
-        } else if (hardware.total_ram_gb) {
+        } else if (hardware?.total_ram_gb) {
             ram = hardware.total_ram_gb;
         } else {
             ram = 8;
         }
-        if (hardware.cpu && hardware.cpu.cores) {
+        if (hardware?.cpu?.cores) {
             cores = hardware.cpu.cores;
-        } else if (hardware.cpu_cores) {
+        } else if (hardware?.cpu_cores) {
             cores = hardware.cpu_cores;
         } else {
             cores = 4;
@@ -1366,6 +1456,7 @@ class DeterministicModelSelector {
         const recommendations = {};
         const normalizedPool = this.normalizeExternalModels(Array.isArray(allModels) ? allModels : []);
         const installedModels = await this.getInstalledModels();
+        const normalizedHardware = this.normalizeHardwareProfile(hardware || await this.getHardware());
         for (const category of categories) {
             try {
@@ -1373,19 +1464,20 @@ class DeterministicModelSelector {
                     topN: 3,
                     enableProbe: false,
                     silent: true,
+                    hardware: normalizedHardware,
                     installedModels,
                     modelPool: normalizedPool
                 });
                 recommendations[category] = {
-                    tier: this.mapHardwareTier(hardware),
+                    tier: this.mapHardwareTier(normalizedHardware),
                     bestModels: result.candidates.map(candidate => this.mapCandidateToLegacyFormat(candidate)),
                     totalEvaluated: result.total_evaluated,
                     category: this.getCategoryInfo(category)
                 };
             } catch (error) {
                 recommendations[category] = {
-                    tier: this.mapHardwareTier(hardware),
+                    tier: this.mapHardwareTier(normalizedHardware),
                     bestModels: [],
                     totalEvaluated: 0,
                     category: this.getCategoryInfo(category)

package/src/policy/policy-engine.js CHANGED Viewed

@@ -1,3 +1,5 @@
+const { normalizeLicense, UNKNOWN_VALUE } = require('../provenance/model-provenance');
 const NOOP_POLICY = {
     version: 1,
     org: 'default',
@@ -271,8 +273,8 @@ class PolicyEngine {
         if (!isPlainObject(complianceRules)) return;
         const approvedLicenses = asArray(complianceRules.approved_licenses)
-            .map((license) => toLowerString(license))
-            .filter(Boolean);
+            .map((license) => normalizeLicense(license))
+            .filter((license) => license && license !== UNKNOWN_VALUE);
         if (approvedLicenses.length === 0) return;
@@ -410,11 +412,17 @@ class PolicyEngine {
     getModelLicense(model) {
         const raw =
-            toLowerString(model.license) ||
-            toLowerString(model.license_id) ||
-            toLowerString(model.licenseId);
+            model?.provenance?.license ??
+            model?.license ??
+            model?.license_id ??
+            model?.licenseId;
+        const normalized = normalizeLicense(raw);
+        if (!normalized || normalized === UNKNOWN_VALUE) {
+            return null;
+        }
-        return raw;
+        return normalized;
     }
     resolveBackend(model, context) {