llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +28 -8
  2. package/analyzer/compatibility.js +5 -0
  3. package/analyzer/performance.js +5 -4
  4. package/bin/cli.js +5 -39
  5. package/bin/enhanced_cli.js +449 -24
  6. package/bin/mcp-server.mjs +266 -101
  7. package/package.json +13 -8
  8. package/src/ai/multi-objective-selector.js +118 -11
  9. package/src/calibration/calibration-manager.js +4 -1
  10. package/src/data/model-database.js +489 -5
  11. package/src/data/registry-ingestors.js +751 -0
  12. package/src/data/registry-recommender.js +514 -0
  13. package/src/data/seed/README.md +11 -3
  14. package/src/data/seed/models.db +0 -0
  15. package/src/data/sync-manager.js +32 -18
  16. package/src/hardware/backends/apple-silicon.js +5 -1
  17. package/src/hardware/backends/cuda-detector.js +47 -19
  18. package/src/hardware/backends/intel-detector.js +6 -2
  19. package/src/hardware/backends/rocm-detector.js +6 -2
  20. package/src/hardware/detector.js +57 -30
  21. package/src/hardware/unified-detector.js +129 -25
  22. package/src/index.js +68 -4
  23. package/src/models/ai-check-selector.js +36 -5
  24. package/src/models/deterministic-selector.js +179 -18
  25. package/src/models/expanded_database.js +9 -5
  26. package/src/models/intelligent-selector.js +87 -1
  27. package/src/models/moe-assumptions.js +11 -0
  28. package/src/models/requirements.js +16 -11
  29. package/src/models/scoring-core.js +341 -0
  30. package/src/models/scoring-engine.js +9 -2
  31. package/src/ollama/capacity-planner.js +15 -2
  32. package/src/ollama/client.js +70 -30
  33. package/src/ollama/enhanced-client.js +20 -2
  34. package/src/ollama/manager.js +14 -2
  35. package/src/policy/cli-policy.js +8 -2
  36. package/src/policy/policy-engine.js +2 -1
  37. package/src/provenance/model-provenance.js +4 -1
  38. package/src/ui/cli-theme.js +47 -7
  39. package/src/ui/interactive-panel.js +162 -24
@@ -62,6 +62,25 @@ Respond with JSON only, no additional text.`;
62
62
  /**
63
63
  * Main AI-Check function
64
64
  */
65
+ /** Normalize the --models option (array, or comma/space-separated string) to a list. */
66
+ parseModelFilter(models) {
67
+ if (!models) return [];
68
+ const list = Array.isArray(models) ? models : String(models).split(/[,\s]+/);
69
+ return list.map((m) => String(m).trim().toLowerCase()).filter(Boolean);
70
+ }
71
+
72
+ /** True when an Ollama DB model matches a user-supplied name fragment. */
73
+ modelMatchesFilter(model, needle) {
74
+ const identifier = String(model?.model_identifier || '').toLowerCase();
75
+ const name = String(model?.model_name || '').toLowerCase();
76
+ return (
77
+ identifier === needle ||
78
+ name === needle ||
79
+ identifier.includes(needle) ||
80
+ name.includes(needle)
81
+ );
82
+ }
83
+
65
84
  async aiCheck(options = {}) {
66
85
  const {
67
86
  category = 'general',
@@ -90,11 +109,23 @@ Respond with JSON only, no additional text.`;
90
109
  const budget = hardware.gpu.unified ? hardware.usableMemGB :
91
110
  (hardware.gpu.vramGB || hardware.usableMemGB);
92
111
 
93
- // Filter models by category first
94
- const categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
95
-
96
- if (!silent) {
97
- console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
112
+ // Optional explicit model filter (--models qwen2.5,llama3.1). When present
113
+ // it overrides the category filter: the user asked for specific models.
114
+ const modelFilter = this.parseModelFilter(options.models);
115
+ let categoryModels;
116
+ if (modelFilter.length > 0) {
117
+ categoryModels = allOllamaModels.filter((model) =>
118
+ modelFilter.some((needle) => this.modelMatchesFilter(model, needle))
119
+ );
120
+ if (!silent) {
121
+ console.log(chalk.cyan('│') + ` Restricted to ${categoryModels.length} model(s) matching --models`);
122
+ }
123
+ } else {
124
+ // Filter models by category first
125
+ categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
126
+ if (!silent) {
127
+ console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
128
+ }
98
129
  }
99
130
 
100
131
  // Evaluate each model using deterministic scoring
@@ -1556,10 +1556,21 @@ class DeterministicModelSelector {
1556
1556
  const S = speedEstimate.score;
1557
1557
  const F = this.calculateFitScore(requiredGB, budget);
1558
1558
  const C = this.calculateContextScore(model, targetCtx);
1559
+ const capacityAdjustment = this.calculateHighCapacitySizeAdjustment(
1560
+ hardware,
1561
+ model,
1562
+ budget,
1563
+ category,
1564
+ optimizeFor
1565
+ );
1559
1566
 
1560
1567
  // 4. Calculate final weighted score
1561
1568
  const weights = this.getScoringWeights(category, optimizeFor);
1562
- const score = Math.round((Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
1569
+ const weightedScore = Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3];
1570
+ const score = Math.max(
1571
+ 0,
1572
+ Math.min(100, Math.round((weightedScore + capacityAdjustment.score) * 10) / 10)
1573
+ );
1563
1574
 
1564
1575
  // 5. Build rationale
1565
1576
  const rationale = this.buildRationale(
@@ -1572,7 +1583,8 @@ class DeterministicModelSelector {
1572
1583
  Q,
1573
1584
  S,
1574
1585
  memoryEstimate,
1575
- speedEstimate
1586
+ speedEstimate,
1587
+ capacityAdjustment
1576
1588
  );
1577
1589
 
1578
1590
  return {
@@ -1599,7 +1611,8 @@ class DeterministicModelSelector {
1599
1611
  runtime: speedEstimate.runtime,
1600
1612
  moe: speedEstimate.moe
1601
1613
  },
1602
- components: { Q, S, F, C }
1614
+ components: { Q, S, F, C, H: capacityAdjustment.score },
1615
+ optimizeFor
1603
1616
  };
1604
1617
  }
1605
1618
 
@@ -1858,6 +1871,9 @@ class DeterministicModelSelector {
1858
1871
  if (hardware.cpu.cores >= 8) base *= 1.1;
1859
1872
  if (hardware.acceleration.supports_metal || hardware.acceleration.supports_cuda) base *= 1.2;
1860
1873
 
1874
+ const acceleratorScale = this.calculateAcceleratorSpeedScale(hardware, backend);
1875
+ base *= acceleratorScale.multiplier;
1876
+
1861
1877
  const normalizedRuntime = normalizeMoERuntime(runtime);
1862
1878
  const moe = estimateMoESpeedMultiplier({
1863
1879
  model,
@@ -1880,7 +1896,46 @@ class DeterministicModelSelector {
1880
1896
  estimatedTPS,
1881
1897
  score,
1882
1898
  runtime: normalizedRuntime,
1883
- moe
1899
+ moe,
1900
+ acceleratorScale
1901
+ };
1902
+ }
1903
+
1904
+ calculateAcceleratorSpeedScale(hardware = {}, backend = 'cpu_x86') {
1905
+ if (backend !== 'cuda' && backend !== 'metal') {
1906
+ return { multiplier: 1, reason: null };
1907
+ }
1908
+
1909
+ const gpu = hardware.gpu || {};
1910
+ const memory = hardware.memory || {};
1911
+ const toFiniteNumber = (value, fallback = 0) => {
1912
+ const parsed = Number(value);
1913
+ return Number.isFinite(parsed) ? parsed : fallback;
1914
+ };
1915
+ const vramGB = toFiniteNumber(gpu.vramGB ?? gpu.vram ?? gpu.totalVRAM, 0);
1916
+ const ramGB = toFiniteNumber(memory.totalGB ?? memory.total, 0);
1917
+ const acceleratorMemoryGB = backend === 'metal' && Boolean(gpu.unified)
1918
+ ? Math.max(vramGB, ramGB)
1919
+ : vramGB;
1920
+ const gpuCount = Math.max(1, toFiniteNumber(gpu.gpuCount ?? gpu.count, 1));
1921
+
1922
+ let multiplier = 1;
1923
+ if (acceleratorMemoryGB >= 160) multiplier *= 3.2;
1924
+ else if (acceleratorMemoryGB >= 96) multiplier *= 2.6;
1925
+ else if (acceleratorMemoryGB >= 80) multiplier *= 2.2;
1926
+ else if (acceleratorMemoryGB >= 48) multiplier *= 1.7;
1927
+ else if (acceleratorMemoryGB >= 24) multiplier *= 1.15;
1928
+
1929
+ if (backend === 'cuda' && gpuCount > 1) {
1930
+ multiplier *= Math.min(1.8, 1 + ((gpuCount - 1) * 0.25));
1931
+ }
1932
+
1933
+ const rounded = Math.round(multiplier * 100) / 100;
1934
+ return {
1935
+ multiplier: rounded,
1936
+ reason: rounded > 1
1937
+ ? `${backend.toUpperCase()} capacity x${rounded}`
1938
+ : null
1884
1939
  };
1885
1940
  }
1886
1941
 
@@ -1888,13 +1943,79 @@ class DeterministicModelSelector {
1888
1943
  const ratio = requiredGB / budgetGB;
1889
1944
  if (ratio <= 0.9) return 100;
1890
1945
  if (ratio <= 1.0) return 70;
1891
- return 0; // Should be filtered out earlier
1946
+ return 0; // Unreachable in practice: evaluateModel drops requiredGB > budget.
1892
1947
  }
1893
1948
 
1894
1949
  calculateContextScore(model, targetCtx) {
1895
- if (model.ctxMax >= targetCtx) return 100;
1896
- if (model.ctxMax >= targetCtx * 0.5) return 70;
1897
- return 0; // Should be filtered out earlier
1950
+ const ctxMax = Number(model?.ctxMax) || 0;
1951
+ if (ctxMax >= targetCtx) return 100;
1952
+ if (ctxMax >= targetCtx * 0.5) return 70;
1953
+ // Context is NOT pre-filtered: a model that cannot serve the requested
1954
+ // context still scores here (0 for this component) and stays eligible,
1955
+ // weighted down rather than excluded.
1956
+ return 0;
1957
+ }
1958
+
1959
+ getHighCapacitySizeTarget(budgetGB, hardware = {}) {
1960
+ if (!Number.isFinite(budgetGB) || budgetGB < 32) return null;
1961
+
1962
+ const isMultiGPU = Boolean(hardware?.gpu?.isMultiGPU);
1963
+ if (budgetGB >= 128) return { minParamsB: 30, sweetSpotParamsB: 70 };
1964
+ if (budgetGB >= 80) return { minParamsB: 30, sweetSpotParamsB: 70 };
1965
+ if (budgetGB >= 48) return { minParamsB: 20, sweetSpotParamsB: 34 };
1966
+ if (budgetGB >= 32 && isMultiGPU) return { minParamsB: 30, sweetSpotParamsB: 30 };
1967
+ if (budgetGB >= 32) return { minParamsB: 13, sweetSpotParamsB: 30 };
1968
+ return null;
1969
+ }
1970
+
1971
+ calculateHighCapacitySizeAdjustment(hardware, model, budgetGB, category, optimizeFor = 'balanced') {
1972
+ const objective = this.normalizeOptimizationObjective(optimizeFor);
1973
+ if (objective === 'speed' || category === 'embeddings') {
1974
+ return { score: 0, reason: null };
1975
+ }
1976
+
1977
+ const normalizedHardware = this.normalizeHardwareProfile(hardware || {});
1978
+ const tier = this.mapHardwareTier(normalizedHardware);
1979
+ const highCapacityTiers = new Set(['very_high', 'ultra_high', 'extreme', 'flagship']);
1980
+ const target = this.getHighCapacitySizeTarget(budgetGB, normalizedHardware);
1981
+ const hasHighCapacitySignal =
1982
+ Boolean(target) ||
1983
+ highCapacityTiers.has(tier) ||
1984
+ Number(normalizedHardware?.gpu?.vramGB || 0) >= 48;
1985
+
1986
+ if (!hasHighCapacitySignal || !target) {
1987
+ return { score: 0, reason: null };
1988
+ }
1989
+
1990
+ const params = this.parseBillionsValue(model?.paramsB);
1991
+ if (!Number.isFinite(params) || params <= 0) {
1992
+ return { score: 0, reason: null };
1993
+ }
1994
+
1995
+ const categoryMultiplier = category === 'multimodal' ? 0.6 : 1;
1996
+ if (params < target.minParamsB) {
1997
+ const deficitRatio = (target.minParamsB - params) / target.minParamsB;
1998
+ const penalty = -Math.min(24, deficitRatio * 24) * categoryMultiplier;
1999
+ const roundedPenalty = Math.round(penalty * 10) / 10;
2000
+ return {
2001
+ score: roundedPenalty,
2002
+ reason: `below ${target.minParamsB}B high-capacity floor`
2003
+ };
2004
+ }
2005
+
2006
+ const distanceRatio = Math.min(
2007
+ 1,
2008
+ Math.abs(params - target.sweetSpotParamsB) / target.sweetSpotParamsB
2009
+ );
2010
+ const bonus = Math.max(0, 12 * (1 - distanceRatio)) * categoryMultiplier;
2011
+ const roundedBonus = Math.round(bonus * 10) / 10;
2012
+
2013
+ return {
2014
+ score: roundedBonus,
2015
+ reason: roundedBonus > 0
2016
+ ? `${target.sweetSpotParamsB}B high-capacity target`
2017
+ : null
2018
+ };
1898
2019
  }
1899
2020
 
1900
2021
  estimatePracticalMaxParamsForBudget(budgetGB) {
@@ -1994,7 +2115,19 @@ class DeterministicModelSelector {
1994
2115
  return highCapacityPromoted;
1995
2116
  }
1996
2117
 
1997
- buildRationale(hardware, model, quant, requiredGB, budget, category, Q, S, memoryEstimate = null, speedEstimate = null) {
2118
+ buildRationale(
2119
+ hardware,
2120
+ model,
2121
+ quant,
2122
+ requiredGB,
2123
+ budget,
2124
+ category,
2125
+ Q,
2126
+ S,
2127
+ memoryEstimate = null,
2128
+ speedEstimate = null,
2129
+ capacityAdjustment = null
2130
+ ) {
1998
2131
  const parts = [];
1999
2132
 
2000
2133
  // Memory fit
@@ -2027,6 +2160,14 @@ class DeterministicModelSelector {
2027
2160
  const multiplier = Number(speedEstimate.moe.multiplier || 1).toFixed(2);
2028
2161
  parts.push(`MoE speed x${multiplier} (${runtimeLabel})`);
2029
2162
  }
2163
+
2164
+ if (speedEstimate?.acceleratorScale?.multiplier > 1) {
2165
+ parts.push(speedEstimate.acceleratorScale.reason);
2166
+ }
2167
+
2168
+ if (capacityAdjustment?.reason) {
2169
+ parts.push(capacityAdjustment.reason);
2170
+ }
2030
2171
 
2031
2172
  // Size sweet spot
2032
2173
  if (model.paramsB >= 7 && model.paramsB <= 13) {
@@ -2114,14 +2255,21 @@ class DeterministicModelSelector {
2114
2255
 
2115
2256
  updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category) {
2116
2257
  const normalizedS = this.normalizeTPSToScore(measuredTPS, category);
2117
-
2118
- // Recalculate final score with measured speed
2119
- const weights = this.categoryWeights[category];
2120
- const { Q, F, C } = candidate.components;
2121
-
2258
+
2259
+ // Re-score with the measured speed using the SAME weighting source as
2260
+ // evaluateModel: getScoringWeights honours the user's optimizeFor profile and
2261
+ // falls back to the general weights for categories (e.g. 'talking') that have
2262
+ // no entry in DETERMINISTIC_WEIGHTS — indexing this.categoryWeights[category]
2263
+ // directly threw a TypeError for those. We also re-add the stored capacity
2264
+ // adjustment (H) and clamp, so a probed score stays comparable to a
2265
+ // non-probed one instead of being silently lower.
2266
+ const weights = this.getScoringWeights(category, candidate.optimizeFor || 'balanced');
2267
+ const { Q, F, C, H = 0 } = candidate.components;
2268
+
2122
2269
  candidate.estTPS = measuredTPS;
2123
2270
  candidate.components.S = normalizedS;
2124
- candidate.score = Math.round((Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
2271
+ const weighted = Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3];
2272
+ candidate.score = Math.max(0, Math.min(100, Math.round((weighted + H) * 10) / 10));
2125
2273
  }
2126
2274
 
2127
2275
  normalizeTPSToScore(tps, category) {
@@ -2194,6 +2342,9 @@ class DeterministicModelSelector {
2194
2342
  estimatedRAM: candidate.requiredGB,
2195
2343
  reasoning: candidate.rationale,
2196
2344
  runtime: candidate.runtime || candidate.speed?.runtime || 'ollama',
2345
+ installCommand: candidate.meta.installCommand || provenance.install_command || '',
2346
+ downloadUrl: candidate.meta.downloadUrl || provenance.download_url || '',
2347
+ artifactFormat: candidate.meta.artifact?.format || '',
2197
2348
  memoryAssumptionSource: candidate.memory?.assumptionSource || 'dense_params',
2198
2349
  speedAssumptions: candidate.speed?.moe ? {
2199
2350
  applied: Boolean(candidate.speed.moe.applied),
@@ -2375,19 +2526,24 @@ class DeterministicModelSelector {
2375
2526
  Object.entries(recommendations).forEach(([category, data]) => {
2376
2527
  const bestModel = data.bestModels[0];
2377
2528
  if (bestModel) {
2529
+ const command = bestModel.installCommand ||
2530
+ bestModel.provenance?.install_command ||
2531
+ `ollama pull ${bestModel.model_identifier}`;
2378
2532
  summary.by_category[category] = {
2379
2533
  name: bestModel.model_name || bestModel.name,
2380
2534
  identifier: bestModel.model_identifier,
2381
2535
  score: Math.round(bestModel.categoryScore || bestModel.score),
2382
- command: `ollama pull ${bestModel.model_identifier}`,
2536
+ command,
2383
2537
  size: this.formatModelSize(bestModel),
2384
2538
  quantization: bestModel.quantization || bestModel.quant || 'Q4_K_M',
2539
+ runtime: bestModel.runtime || bestModel.provenance?.runtime || 'ollama',
2385
2540
  pulls: bestModel.pulls || 0,
2386
2541
  source: bestModel.source || bestModel.provenance?.source || 'unknown',
2387
2542
  registry: bestModel.registry || bestModel.provenance?.registry || 'unknown',
2388
2543
  version: bestModel.version || bestModel.provenance?.version || 'unknown',
2389
2544
  license: bestModel.license || bestModel.provenance?.license || 'unknown',
2390
2545
  digest: bestModel.digest || bestModel.provenance?.digest || 'unknown',
2546
+ download_url: bestModel.downloadUrl || bestModel.provenance?.download_url || '',
2391
2547
  provenance: bestModel.provenance || {
2392
2548
  source: bestModel.source || 'unknown',
2393
2549
  registry: bestModel.registry || 'unknown',
@@ -2397,7 +2553,7 @@ class DeterministicModelSelector {
2397
2553
  }
2398
2554
  };
2399
2555
 
2400
- summary.quick_commands.push(`ollama pull ${bestModel.model_identifier}`);
2556
+ summary.quick_commands.push(command);
2401
2557
 
2402
2558
  const isGeneralCategory = ['general', 'coding', 'talking', 'reading'].includes(category);
2403
2559
  const score = bestModel.categoryScore || bestModel.score || 0;
@@ -2411,18 +2567,23 @@ class DeterministicModelSelector {
2411
2567
  });
2412
2568
 
2413
2569
  if (bestOverallModel) {
2570
+ const command = bestOverallModel.installCommand ||
2571
+ bestOverallModel.provenance?.install_command ||
2572
+ `ollama pull ${bestOverallModel.model_identifier}`;
2414
2573
  summary.best_overall = {
2415
2574
  name: bestOverallModel.model_name || bestOverallModel.name,
2416
2575
  identifier: bestOverallModel.model_identifier,
2417
2576
  category: bestOverallCategory,
2418
2577
  score: Math.round(bestOverallScore),
2419
- command: `ollama pull ${bestOverallModel.model_identifier}`,
2578
+ command,
2420
2579
  quantization: bestOverallModel.quantization || bestOverallModel.quant || 'Q4_K_M',
2580
+ runtime: bestOverallModel.runtime || bestOverallModel.provenance?.runtime || 'ollama',
2421
2581
  source: bestOverallModel.source || bestOverallModel.provenance?.source || 'unknown',
2422
2582
  registry: bestOverallModel.registry || bestOverallModel.provenance?.registry || 'unknown',
2423
2583
  version: bestOverallModel.version || bestOverallModel.provenance?.version || 'unknown',
2424
2584
  license: bestOverallModel.license || bestOverallModel.provenance?.license || 'unknown',
2425
2585
  digest: bestOverallModel.digest || bestOverallModel.provenance?.digest || 'unknown',
2586
+ download_url: bestOverallModel.downloadUrl || bestOverallModel.provenance?.download_url || '',
2426
2587
  provenance: bestOverallModel.provenance || {
2427
2588
  source: bestOverallModel.source || 'unknown',
2428
2589
  registry: bestOverallModel.registry || 'unknown',
@@ -1007,18 +1007,22 @@ class ExpandedModelsDatabase {
1007
1007
  }
1008
1008
 
1009
1009
  estimateMemoryUsage(model) {
1010
- const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
1010
+ // Derive footprint from parameter count, not by stripping the unit off the
1011
+ // size string and treating the bare number as gigabytes — that read a 774M
1012
+ // model ("774M") as ~774 GB and a 22M model as ~22 GB. ~0.7 GB per 1B params
1013
+ // is a reasonable quantized-runtime footprint baseline.
1014
+ const sizeGB = this.extractModelParams(model) * 0.7;
1011
1015
 
1012
1016
  // Rough estimates including model loading overhead
1013
1017
  return {
1014
- minimal: Math.round(sizeGB * 1.2), // With quantization
1015
- typical: Math.round(sizeGB * 1.5), // Standard loading
1016
- maximum: Math.round(sizeGB * 2.0) // With full context
1018
+ minimal: Math.max(1, Math.round(sizeGB * 1.2)), // With quantization
1019
+ typical: Math.max(1, Math.round(sizeGB * 1.5)), // Standard loading
1020
+ maximum: Math.max(1, Math.round(sizeGB * 2.0)) // With full context
1017
1021
  };
1018
1022
  }
1019
1023
 
1020
1024
  estimatePowerConsumption(model, hardware) {
1021
- const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
1025
+ const sizeGB = this.extractModelParams(model) * 0.7;
1022
1026
  const tier = this.getHardwareTier(hardware);
1023
1027
 
1024
1028
  const basePower = {
@@ -10,6 +10,7 @@ const ScoringEngine = require('./scoring-engine');
10
10
  const UnifiedDetector = require('../hardware/unified-detector');
11
11
  const PolicyManager = require('../policy/policy-manager');
12
12
  const PolicyEngine = require('../policy/policy-engine');
13
+ const { rankModels } = require('./scoring-core');
13
14
 
14
15
  function isPlainObject(value) {
15
16
  return typeof value === 'object' && value !== null && !Array.isArray(value);
@@ -66,7 +67,9 @@ class IntelligentSelector {
66
67
  // Apply filters
67
68
  const filtered = this.applyFilters(variants, opts, hardware);
68
69
 
69
- // Score all filtered variants
70
+ // Score all filtered variants. ScoringEngine still produces the
71
+ // per-variant `score` objects (final/components/meta) consumed by the
72
+ // smart-recommend display, but the RANKING is unified below.
70
73
  const scored = this.scoring.filterAndScore(filtered, hardware, {
71
74
  useCase: opts.useCase,
72
75
  targetContext: opts.targetContext,
@@ -75,6 +78,12 @@ class IntelligentSelector {
75
78
  headroom: opts.headroom || 2
76
79
  });
77
80
 
81
+ // Unify ranking with the canonical scoring core (issue #88): re-order
82
+ // the scored list and rewrite each item's final score using the shared
83
+ // DeterministicModelSelector so smart-recommend agrees with
84
+ // `check`/`recommend` and inherits the PR #89 high-capacity floor.
85
+ await this.applyUnifiedRanking(scored, hardware, opts);
86
+
78
87
  const policyEngine = this.resolvePolicyEngine(opts);
79
88
  const scoredWithPolicy = policyEngine.evaluateScoredVariants(
80
89
  scored,
@@ -114,6 +123,83 @@ class IntelligentSelector {
114
123
  };
115
124
  }
116
125
 
126
+ /**
127
+ * Re-rank the ScoringEngine-scored variants using the canonical scoring
128
+ * core so smart-recommend's ordering and headline scores match
129
+ * `check`/`recommend` and inherit the high-capacity right-sizing floor.
130
+ *
131
+ * Mutates `scored` in place: it is sorted by the unified score and each
132
+ * item's `score.final` is overwritten with the canonical 0-100 score.
133
+ * Component/meta sub-scores are left intact so the existing display (which
134
+ * shows Q/S/F and estimated TPS) keeps working. If the core cannot rank a
135
+ * variant (or throws), that item keeps its original ScoringEngine score and
136
+ * sorts after the unified ones, preserving a sensible fallback ordering.
137
+ */
138
+ async applyUnifiedRanking(scored, hardware, opts = {}) {
139
+ if (!Array.isArray(scored) || scored.length === 0) return scored;
140
+
141
+ let ranking;
142
+ try {
143
+ ranking = await rankModels(
144
+ scored.map((item) => item.variant),
145
+ hardware,
146
+ {
147
+ category: opts.useCase || 'general',
148
+ optimizeFor: opts.optimizeFor || opts.optimize || 'balanced',
149
+ runtime: opts.runtime || 'ollama',
150
+ topN: scored.length
151
+ }
152
+ );
153
+ } catch (error) {
154
+ return scored; // Defensive: keep original ScoringEngine ordering.
155
+ }
156
+
157
+ if (!ranking || !Array.isArray(ranking.candidates)) return scored;
158
+
159
+ // Map each source variant -> its unified score + ordering index.
160
+ const unifiedByVariant = new Map();
161
+ ranking.candidates.forEach((candidate, index) => {
162
+ const source = candidate?.meta?.__source;
163
+ if (!source) return;
164
+ unifiedByVariant.set(source, {
165
+ unifiedScore: Math.round(candidate.score * 10) / 10,
166
+ rank: index,
167
+ quant: candidate.quant,
168
+ estimatedTPS: candidate.estTPS
169
+ });
170
+ });
171
+
172
+ for (const item of scored) {
173
+ const unified = unifiedByVariant.get(item.variant);
174
+ if (!unified) {
175
+ // Not ranked by the core (e.g. filtered out): sort last and tag
176
+ // so any downstream tie-breaks are deterministic.
177
+ item.__unifiedRank = Number.MAX_SAFE_INTEGER;
178
+ continue;
179
+ }
180
+ item.__unifiedRank = unified.rank;
181
+ if (item.score) {
182
+ item.score.final = Math.min(100, Math.max(0, Math.round(unified.unifiedScore)));
183
+ if (item.score.meta) {
184
+ item.score.meta.unifiedScore = unified.unifiedScore;
185
+ }
186
+ }
187
+ }
188
+
189
+ scored.sort((a, b) => {
190
+ const ra = Number.isFinite(a.__unifiedRank) ? a.__unifiedRank : Number.MAX_SAFE_INTEGER;
191
+ const rb = Number.isFinite(b.__unifiedRank) ? b.__unifiedRank : Number.MAX_SAFE_INTEGER;
192
+ if (ra !== rb) return ra - rb;
193
+ return (b.score?.final || 0) - (a.score?.final || 0);
194
+ });
195
+
196
+ for (const item of scored) {
197
+ delete item.__unifiedRank;
198
+ }
199
+
200
+ return scored;
201
+ }
202
+
117
203
  /**
118
204
  * Resolve policy engine from explicit options, in-memory policy, or policy file.
119
205
  */
@@ -24,6 +24,14 @@ const MOE_RUNTIME_PROFILES = Object.freeze({
24
24
  maxEffectiveGain: 2.65,
25
25
  notes: ['optimized scheduler', 'better expert batching', 'lower offload pressure']
26
26
  }),
27
+ transformers: Object.freeze({
28
+ runtime: 'transformers',
29
+ routingOverhead: 0.15,
30
+ communicationOverhead: 0.10,
31
+ offloadOverhead: 0.06,
32
+ maxEffectiveGain: 2.45,
33
+ notes: ['general Hugging Face path', 'broad architecture support', 'higher Python overhead than vLLM']
34
+ }),
27
35
  mlx: Object.freeze({
28
36
  runtime: 'mlx',
29
37
  routingOverhead: 0.16,
@@ -45,6 +53,9 @@ const MOE_RUNTIME_PROFILES = Object.freeze({
45
53
  const RUNTIME_ALIASES = Object.freeze({
46
54
  ollama: 'ollama',
47
55
  vllm: 'vllm',
56
+ transformers: 'transformers',
57
+ 'huggingface-transformers': 'transformers',
58
+ hf: 'transformers',
48
59
  mlx: 'mlx',
49
60
  'mlx-lm': 'mlx',
50
61
  mlx_lm: 'mlx',
@@ -134,17 +134,22 @@ class RequirementsCalculator {
134
134
  }
135
135
 
136
136
  parseModelSize(sizeString) {
137
- const normalized = sizeString.toLowerCase().replace(/[^0-9.kmb]/g, '');
138
-
139
- if (normalized.includes('k')) {
140
- return parseFloat(normalized.replace('k', '')) / 1000;
141
- } else if (normalized.includes('m')) {
142
- return parseFloat(normalized.replace('m', '')) / 1000;
143
- } else if (normalized.includes('b')) {
144
- return parseFloat(normalized.replace('b', ''));
145
- } else {
146
- return parseFloat(normalized);
147
- }
137
+ // Anchor the number to its unit instead of globally stripping every char
138
+ // that isn't 0-9.kmb: the old approach kept stray k/m/b from model words, so
139
+ // "Llama 3.2 3B" normalized to "m3.23b" and parsed as 0.003B, and unit-only
140
+ // inputs produced NaN. Prefer a number that carries a B/M/K unit (the real
141
+ // size token, "3B") over a bare number (a version like "3.2").
142
+ const text = String(sizeString || '');
143
+ const match = text.match(/(\d+(?:\.\d+)?)\s*([kmb])\b/i) || text.match(/(\d+(?:\.\d+)?)/);
144
+ if (!match) return 1;
145
+
146
+ const value = parseFloat(match[1]);
147
+ if (!Number.isFinite(value)) return 1;
148
+
149
+ const unit = (match[2] || 'b').toLowerCase();
150
+ if (unit === 'k') return value / 1_000_000; // thousands of params -> billions
151
+ if (unit === 'm') return value / 1000; // millions of params -> billions
152
+ return value; // billions
148
153
  }
149
154
 
150
155
  getContextMultiplier(contextLength) {