llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +28 -8
  2. package/analyzer/compatibility.js +5 -0
  3. package/analyzer/performance.js +5 -4
  4. package/bin/cli.js +5 -39
  5. package/bin/enhanced_cli.js +449 -24
  6. package/bin/mcp-server.mjs +266 -101
  7. package/package.json +13 -8
  8. package/src/ai/multi-objective-selector.js +118 -11
  9. package/src/calibration/calibration-manager.js +4 -1
  10. package/src/data/model-database.js +489 -5
  11. package/src/data/registry-ingestors.js +751 -0
  12. package/src/data/registry-recommender.js +514 -0
  13. package/src/data/seed/README.md +11 -3
  14. package/src/data/seed/models.db +0 -0
  15. package/src/data/sync-manager.js +32 -18
  16. package/src/hardware/backends/apple-silicon.js +5 -1
  17. package/src/hardware/backends/cuda-detector.js +47 -19
  18. package/src/hardware/backends/intel-detector.js +6 -2
  19. package/src/hardware/backends/rocm-detector.js +6 -2
  20. package/src/hardware/detector.js +57 -30
  21. package/src/hardware/unified-detector.js +129 -25
  22. package/src/index.js +68 -4
  23. package/src/models/ai-check-selector.js +36 -5
  24. package/src/models/deterministic-selector.js +179 -18
  25. package/src/models/expanded_database.js +9 -5
  26. package/src/models/intelligent-selector.js +87 -1
  27. package/src/models/moe-assumptions.js +11 -0
  28. package/src/models/requirements.js +16 -11
  29. package/src/models/scoring-core.js +341 -0
  30. package/src/models/scoring-engine.js +9 -2
  31. package/src/ollama/capacity-planner.js +15 -2
  32. package/src/ollama/client.js +70 -30
  33. package/src/ollama/enhanced-client.js +20 -2
  34. package/src/ollama/manager.js +14 -2
  35. package/src/policy/cli-policy.js +8 -2
  36. package/src/policy/policy-engine.js +2 -1
  37. package/src/provenance/model-provenance.js +4 -1
  38. package/src/ui/cli-theme.js +47 -7
  39. package/src/ui/interactive-panel.js +162 -24
@@ -10,6 +10,7 @@
10
10
 
11
11
  const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
12
12
  const { normalizePlatform } = require('../utils/platform');
13
+ const { rankModels } = require('../models/scoring-core');
13
14
 
14
15
  class MultiObjectiveSelector {
15
16
  constructor() {
@@ -40,23 +41,124 @@ class MultiObjectiveSelector {
40
41
  }
41
42
 
42
43
  /**
43
- * Select best models using multi-objective ranking
44
+ * Select best models using the UNIFIED canonical scoring core (issue #88).
45
+ *
46
+ * `check` used to rank through this selector's own multi-objective math,
47
+ * which diverged from `recommend`/`smart-recommend` and never received the
48
+ * PR #89 high-capacity right-sizing fix. It now routes the ranking through
49
+ * the shared DeterministicModelSelector core (via scoring-core.rankModels)
50
+ * so identical (model, hardware) inputs score identically across all three
51
+ * commands and the high-capacity floor applies here too.
52
+ *
53
+ * The output shape is preserved exactly: `{ compatible, marginal,
54
+ * incompatible }`, each entry being the ORIGINAL model object spread with
55
+ * `totalScore`, `components { quality, speed, ttfb, context, hardwareMatch }`
56
+ * and `reasoning`, so downstream `check` rendering and the regression test
57
+ * (which calls `estimateModelParams` on the returned object) keep working.
44
58
  */
45
59
  async selectBestModels(hardware, models, category = 'general', topK = 10) {
46
- // Step 1: Hard filters - remove incompatible models
60
+ const inputModels = Array.isArray(models) ? models.filter(Boolean) : [];
61
+ if (inputModels.length === 0) {
62
+ return { compatible: [], marginal: [], incompatible: [] };
63
+ }
64
+
65
+ let ranking;
66
+ try {
67
+ ranking = await rankModels(inputModels, hardware, { category, topN: inputModels.length });
68
+ } catch (error) {
69
+ ranking = null;
70
+ }
71
+
72
+ // Defensive fallback: if the unified core is unavailable for any reason,
73
+ // fall back to the legacy multi-objective ranking so `check` still works.
74
+ if (!ranking || !Array.isArray(ranking.candidates)) {
75
+ return this.selectBestModelsLegacy(hardware, inputModels, category, topK);
76
+ }
77
+
78
+ const scoredModels = [];
79
+ const rankedSources = new Set();
80
+ for (const candidate of ranking.candidates) {
81
+ const source = candidate?.meta?.__source;
82
+ if (!source) continue;
83
+ rankedSources.add(source);
84
+ scoredModels.push(this.mapCoreCandidateToMultiObjective(candidate, source, hardware, category));
85
+ }
86
+
87
+ // Models the canonical core dropped (category filter / budget) are not
88
+ // viable on this hardware for this use case -> treat as incompatible,
89
+ // mirroring the previous hard-filter semantics.
90
+ const incompatibleExtras = inputModels
91
+ .filter((model) => !rankedSources.has(model))
92
+ .map((model) => ({
93
+ ...model,
94
+ totalScore: 0,
95
+ components: { quality: 0, speed: 0, ttfb: 0, context: 0, hardwareMatch: 0 },
96
+ reasoning: 'Filtered out by unified scoring core (does not fit hardware/use-case)'
97
+ }));
98
+
99
+ scoredModels.sort((a, b) => b.totalScore - a.totalScore);
100
+
101
+ const classified = this.classifyResults(scoredModels, topK);
102
+ classified.incompatible = [...classified.incompatible, ...incompatibleExtras].slice(0, 5);
103
+ return classified;
104
+ }
105
+
106
+ /**
107
+ * Map a unified-core candidate back into this selector's multi-objective
108
+ * output shape. The 0-100 `score` from the deterministic core becomes
109
+ * `totalScore`; component sub-scores are normalized to 0-1 to match the
110
+ * historical `components` contract consumed by `check` rendering.
111
+ */
112
+ mapCoreCandidateToMultiObjective(candidate, source, hardware, category) {
113
+ const components = candidate.components || {};
114
+ const to01 = (value) => {
115
+ const num = Number(value);
116
+ if (!Number.isFinite(num)) return 0;
117
+ return Math.max(0, Math.min(1, num / 100));
118
+ };
119
+
120
+ const quality = to01(components.Q);
121
+ const speed = to01(components.S);
122
+ const context = to01(components.C);
123
+ // The deterministic core folds hardware fitness into the `F` (fit) plus
124
+ // `H` (high-capacity right-sizing) components; surface that as the
125
+ // historical `hardwareMatch` signal so `check` insights stay meaningful.
126
+ const hardwareMatch = to01((Number(components.F) || 0) + (Number(components.H) || 0));
127
+
128
+ return {
129
+ ...source,
130
+ totalScore: Math.round(candidate.score * 100) / 100,
131
+ score: Math.round(candidate.score * 100) / 100,
132
+ components: {
133
+ quality,
134
+ speed,
135
+ ttfb: speed, // ttfb tracks speed; legacy field retained for shape
136
+ context,
137
+ hardwareMatch
138
+ },
139
+ quant: candidate.quant || source.quant,
140
+ estimatedRAM: candidate.requiredGB,
141
+ estimatedTPS: candidate.estTPS,
142
+ reasoning: candidate.rationale ||
143
+ this.generateReasoning(source, hardware, quality, hardwareMatch)
144
+ };
145
+ }
146
+
147
+ /**
148
+ * Legacy multi-objective ranking, retained ONLY as a defensive fallback if
149
+ * the unified core throws. Not used on the normal path.
150
+ */
151
+ selectBestModelsLegacy(hardware, models, category = 'general', topK = 10) {
47
152
  const compatibleModels = this.applyHardFilters(hardware, models);
48
-
153
+
49
154
  if (compatibleModels.length === 0) {
50
155
  return { compatible: [], marginal: [], incompatible: models };
51
156
  }
52
157
 
53
- // Step 2: Multi-objective scoring
54
- const scoredModels = compatibleModels.map(model =>
158
+ const scoredModels = compatibleModels.map(model =>
55
159
  this.calculateMultiObjectiveScore(hardware, model, category)
56
160
  ).filter(Boolean);
57
-
58
161
 
59
- // Step 3: Sort and classify
60
162
  scoredModels.sort((a, b) => b.totalScore - a.totalScore);
61
163
 
62
164
  return this.classifyResults(scoredModels, topK);
@@ -784,11 +886,16 @@ class MultiObjectiveSelector {
784
886
  }
785
887
 
786
888
  estimateKVCache(model, contextLength) {
787
- // Rough KV cache estimation: 2 * layers * hidden_size * seq_len * 2 bytes
889
+ // KV cache grows linearly with parameter count and context length. The old
890
+ // formula derived both "layers" and "hidden size" from params and multiplied
891
+ // them, making the estimate scale with params^2 — a 70B model at 8k came out
892
+ // at ~299 GB (real value ~11 GB), which made checkMemoryCompatibility reject
893
+ // every mid/large model. Use the same calibrated linear factor as the Ollama
894
+ // capacity planner (~0.08 GB per 1B params at 4k context).
788
895
  const params = this.estimateModelParams(model);
789
- const layers = Math.round(params * 2); // Rough approximation
790
- const hiddenSize = Math.round(params * 1000); // Rough approximation
791
- return (2 * layers * hiddenSize * contextLength * 2) / (1024 ** 3); // GB
896
+ const ctx = Number.isFinite(contextLength) && contextLength > 0 ? contextLength : 4096;
897
+ const kvFactorPer4k = 0.08; // GB per 1B params at 4k context (fp16)
898
+ return Math.max(0, params * kvFactorPer4k * (ctx / 4096)); // GB
792
899
  }
793
900
 
794
901
  estimateTokensPerSecond(hardware, model) {
@@ -320,7 +320,10 @@ class CalibrationManager {
320
320
  NO_COLOR: '1'
321
321
  }
322
322
  });
323
- const latencyMs = Number((process.hrtime.bigint() - started) / 1_000_000n);
323
+ // Convert ns->ms in floating point: dividing the BigInt first floored away
324
+ // all sub-millisecond precision (a 0.5 ms call read as 0 ms, skewing p50/p95,
325
+ // ttft and tokens/sec). The ns diff is well within Number's safe range.
326
+ const latencyMs = Number(process.hrtime.bigint() - started) / 1_000_000;
324
327
 
325
328
  if (result.error) {
326
329
  const error = new Error(result.error.message || 'Failed to execute runtime prompt.');