llm-checker 3.5.14 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,21 @@ const si = require('systeminformation');
13
13
  const { execSync } = require('child_process');
14
14
  const { normalizePlatform } = require('../utils/platform');
15
15
 
16
+ // Recent GPUs whose PCI device id is not yet resolved to a model name by the
17
+ // distro pci.ids database (so lspci / systeminformation report them as a bare
18
+ // "Device <id>"). Mapping the device id lets us (a) give them a real name and
19
+ // (b) collapse the multiple raw views of the SAME card that different detection
20
+ // sources produce into one inventory entry. Unknown ids degrade gracefully to a
21
+ // stable `pci:<id>` match key, so this table only needs the newest cards.
22
+ const PCI_GPU_MAP = {
23
+ // NVIDIA Blackwell (RTX 50 series, desktop)
24
+ '2f04': { family: 'rtx5070', type: 'dedicated', name: 'NVIDIA GeForce RTX 5070' },
25
+ '2c02': { family: 'rtx5080', type: 'dedicated', name: 'NVIDIA GeForce RTX 5080' },
26
+ '2b85': { family: 'rtx5090', type: 'dedicated', name: 'NVIDIA GeForce RTX 5090' },
27
+ // AMD Raphael / Granite Ridge desktop iGPU (Ryzen 7000/9000 non-G)
28
+ '13c0': { family: 'amd-raphael-igpu', type: 'integrated', name: 'AMD Radeon Graphics (Raphael)' }
29
+ };
30
+
16
31
  class UnifiedDetector {
17
32
  constructor() {
18
33
  this.backends = {
@@ -613,14 +628,19 @@ class UnifiedDetector {
613
628
 
614
629
  const normalized = controllers
615
630
  .map((controller) => {
616
- const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
631
+ let name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
617
632
  if (!name || name.toLowerCase() === 'unknown') return null;
618
633
  if (this.isRemoteDisplayModel(name)) return null;
619
634
 
620
635
  const nameLower = name.toLowerCase();
621
636
  if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
622
637
 
623
- const isIntegrated = this.isIntegratedGPUModel(name);
638
+ // Resolve recent cards that the runtime could only report as a bare
639
+ // "Device <id>" so they get a real name and correct integrated flag.
640
+ const mapped = this.resolveMappedGpu(name) || this.resolveMappedGpu(controller?.deviceId);
641
+ if (mapped) name = mapped.name;
642
+
643
+ const isIntegrated = mapped ? mapped.type === 'integrated' : this.isIntegratedGPUModel(name);
624
644
  let vram = isIntegrated
625
645
  ? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
626
646
  : this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
@@ -711,30 +731,55 @@ class UnifiedDetector {
711
731
 
712
732
  if (!isNvidia && !isAMD && !isIntel) continue;
713
733
 
714
- const genericName = line
715
- .replace(/^[0-9a-f:.]+\s+/i, '')
716
- .replace(/\(rev\s+[0-9a-f]+\)$/i, '')
717
- .trim();
734
+ const vendorLabel = isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel');
735
+ const pciId = this.extractPciDeviceId(line);
736
+ const mapped = this.resolveMappedGpu(line);
718
737
 
738
+ // Prefer the resolved model name inside a trailing "[Model] [vvvv:dddd]"
739
+ // pair (e.g. "[GeForce RTX 4060]"). Otherwise clean the raw lspci line
740
+ // down to a readable device string instead of using the whole line.
719
741
  const bracketName = line.match(/\[(?![0-9a-f]{4}:[0-9a-f]{4}\])([^\]]+)\]\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i);
720
- const name = (bracketName?.[1] || genericName || 'Unknown GPU').replace(/\s+/g, ' ').trim();
721
- if (!name || name.toLowerCase() === 'unknown gpu') continue;
742
+ let name = (bracketName?.[1] || '').replace(/\s+/g, ' ').trim();
743
+
744
+ if (!name) {
745
+ name = line
746
+ .replace(/^[0-9a-f]{2,4}:[0-9a-f]{2}\.[0-9a-f]\s+/i, '') // PCI address
747
+ .replace(/^(?:vga compatible|3d|display)\s+controller\s+\[[0-9a-f]{4}\]:\s*/i, '') // class prefix
748
+ .replace(/\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i, '') // [vvvv:dddd]
749
+ .replace(/\s*\(rev\s+[0-9a-f]+\)\s*$/i, '') // (rev xx)
750
+ .replace(/\b(?:corporation|corp\.?|inc\.?|advanced micro devices,?)\b/gi, '')
751
+ .replace(/\[amd\/ati\]/gi, '')
752
+ .replace(/\s+/g, ' ')
753
+ .trim();
754
+ }
755
+
756
+ // If the card could not be resolved to a real model, give it a stable,
757
+ // readable name that carries the PCI id so it dedupes across sources.
758
+ const meaningful = name.replace(/\b(?:nvidia|amd|ati|intel|device|graphics|gpu|controller)\b/gi, '').replace(/[^a-z0-9]/gi, '').trim();
759
+ if (mapped) {
760
+ name = mapped.name;
761
+ } else if (!meaningful) {
762
+ name = pciId ? `${vendorLabel} Device ${pciId.toUpperCase()}` : `${vendorLabel} GPU`;
763
+ }
722
764
 
723
- const isIntegrated = this.isIntegratedGPUModel(name) || isIntel;
765
+ const isIntegrated = mapped
766
+ ? mapped.type === 'integrated'
767
+ : (this.isIntegratedGPUModel(name) || (isIntel && !/\barc\b/i.test(name)));
724
768
  let vram = this.estimateFallbackVRAM(name);
725
769
  if (isIntegrated) {
726
770
  vram = 0;
727
771
  }
728
772
 
729
- const dedupeKey = `${name.toLowerCase()}|${isIntegrated ? 'i' : 'd'}`;
773
+ const dedupeKey = `${this.getGpuMatchKey(name)}|${isIntegrated ? 'i' : 'd'}`;
730
774
  if (seen.has(dedupeKey)) continue;
731
775
  seen.add(dedupeKey);
732
776
 
733
777
  results.push({
734
778
  name,
735
- vendor: isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel'),
779
+ vendor: vendorLabel,
736
780
  type: isIntegrated ? 'integrated' : 'dedicated',
737
781
  memory: { total: vram },
782
+ pciId: pciId || null,
738
783
  source: 'lspci'
739
784
  });
740
785
  }
@@ -746,22 +791,27 @@ class UnifiedDetector {
746
791
  const num = Number(value);
747
792
  if (!Number.isFinite(num) || num <= 0) return 0;
748
793
 
749
- // Bytes -> GB
750
- if (num > 1024 * 1024) {
751
- return Math.round(num / (1024 * 1024 * 1024));
794
+ // Unit inference by magnitude, kept consistent with
795
+ // HardwareDetector.normalizeVRAM so both detection paths agree:
796
+ //
797
+ // > 1e6 -> raw bytes.
798
+ // >= 1024 -> megabytes (systeminformation reporting range).
799
+ // 1 <= v <= 256 -> already gigabytes. The previous "1..80 means GB"
800
+ // band silently returned 0 for legitimate large GB
801
+ // values, so normalizeFallbackVRAM(192) was 0 — the
802
+ // 192 GB box in issue #88 collapsed to nothing. A
803
+ // single GPU realistically tops out around 192 GB.
804
+ // 257 <= v < 1024 -> sub-gigabyte framebuffer in MB -> rounds to 0/1 GB.
805
+ if (num > 1_000_000) {
806
+ return Math.max(0, Math.round(num / (1024 * 1024 * 1024))); // bytes -> GB
752
807
  }
753
-
754
- // MB -> GB
755
808
  if (num >= 1024) {
756
- return Math.round(num / 1024);
809
+ return Math.max(0, Math.round(num / 1024)); // MB -> GB
757
810
  }
758
-
759
- // Likely already GB
760
- if (num >= 1 && num <= 80) {
761
- return Math.round(num);
811
+ if (num <= 256) {
812
+ return Math.round(num); // already GB (plausible single-GPU range)
762
813
  }
763
-
764
- return 0;
814
+ return Math.max(0, Math.round(num / 1024)); // 257..1023 MB -> GB
765
815
  }
766
816
 
767
817
  isIntegratedGPUModel(model) {
@@ -799,6 +849,21 @@ class UnifiedDetector {
799
849
  if (lower.includes('rx 6900') || lower.includes('rx 6800')) return 16;
800
850
  if (lower.includes('rx 6700')) return 12;
801
851
 
852
+ // NVIDIA workstation / datacenter (Blackwell / Ada / Hopper / Ampere).
853
+ // Matched BEFORE the consumer RTX entries and the generic fallbacks so a
854
+ // high-VRAM professional card is not collapsed to a consumer-tier value or
855
+ // 0 (issue #88: dual "RTX PRO 6000" must reach ~192GB total, not ~16GB).
856
+ if (lower.includes('rtx pro 6000') || lower.includes('rtx 6000 blackwell')) return 96;
857
+ if (lower.includes('rtx 6000 ada') || lower.includes('rtx 5000 ada')) return 48;
858
+ if (lower.includes('rtx a6000') || lower.includes('a6000')) return 48;
859
+ if (lower.includes('rtx a5000') || lower.includes('a5000')) return 24;
860
+ if (lower.includes('l40s') || lower.includes('l40')) return 48;
861
+ if (lower.includes('h200')) return 141;
862
+ if (lower.includes('h100')) return 80;
863
+ if (lower.includes('a100') && (lower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(lower))) return 40;
864
+ if (lower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
865
+ if (lower.includes('a40')) return 48;
866
+
802
867
  if (lower.includes('rtx 5090')) return 32;
803
868
  if (lower.includes('rtx 4090') || lower.includes('rtx 3090')) return 24;
804
869
  if (lower.includes('rtx 5080') || lower.includes('rtx 4080')) return 16;
@@ -817,6 +882,15 @@ class UnifiedDetector {
817
882
  return `${familyMatch[1]}${familyMatch[2]}`;
818
883
  }
819
884
 
885
+ // Different detection sources describe an unresolved card in different
886
+ // ways for the SAME hardware, e.g. systeminformation "Device 2f04" and
887
+ // lspci "...Device [10de:2f04]". Key on the PCI device id (mapped to a
888
+ // canonical family when known) so those collapse to one inventory entry.
889
+ const pciId = this.extractPciDeviceId(name);
890
+ if (pciId) {
891
+ return (PCI_GPU_MAP[pciId] && PCI_GPU_MAP[pciId].family) || `pci:${pciId}`;
892
+ }
893
+
820
894
  const concise = lower
821
895
  .replace(/nvidia|amd|ati|intel|corporation|geforce|radeon|graphics/g, '')
822
896
  .replace(/\s+/g, ' ')
@@ -825,6 +899,26 @@ class UnifiedDetector {
825
899
  return concise || lower;
826
900
  }
827
901
 
902
+ /**
903
+ * Extract a 4-hex PCI device id from a GPU name/description, handling both the
904
+ * lspci "[vendor:device]" form and the bare "Device <id>" form that
905
+ * systeminformation emits for cards it cannot name. Returns null when none.
906
+ */
907
+ extractPciDeviceId(text) {
908
+ const value = String(text || '');
909
+ const bracket = value.match(/\[[0-9a-f]{4}:([0-9a-f]{4})\]/i);
910
+ if (bracket) return bracket[1].toLowerCase();
911
+ const bare = value.match(/\bdevice\s+([0-9a-f]{4})\b/i);
912
+ if (bare) return bare[1].toLowerCase();
913
+ return null;
914
+ }
915
+
916
+ /** Look up a curated mapping for a recent card by PCI device id (or null). */
917
+ resolveMappedGpu(text) {
918
+ const pciId = this.extractPciDeviceId(text);
919
+ return pciId && PCI_GPU_MAP[pciId] ? { pciId, ...PCI_GPU_MAP[pciId] } : null;
920
+ }
921
+
828
922
  /**
829
923
  * Generate hardware fingerprint for benchmarks
830
924
  */
@@ -879,9 +973,19 @@ class UnifiedDetector {
879
973
  summary.bestBackend === 'metal' ||
880
974
  (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
881
975
  ) {
882
- return sizeGB <= (summary.effectiveMemory - 2);
976
+ const effectiveMemory = Number(summary.effectiveMemory);
977
+ if (!Number.isFinite(effectiveMemory) || effectiveMemory <= 0) return false;
978
+ return sizeGB <= (effectiveMemory - 2);
883
979
  } else {
884
- const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
980
+ const totalVRAM = Number(summary.totalVRAM);
981
+ if (!Number.isFinite(totalVRAM) || totalVRAM <= 0) return false;
982
+
983
+ // Guard the per-GPU divisor: gpuCount can be 0 when the summary was
984
+ // built without resolved GPU memory, which previously produced
985
+ // Infinity (totalVRAM / 0) and made any model "fit".
986
+ const gpuCount = Math.max(1, Number(summary.gpuCount) || 0);
987
+ const availableVRAM = useMultiGPU ? totalVRAM : (totalVRAM / gpuCount);
988
+ if (!Number.isFinite(availableVRAM) || availableVRAM <= 0) return false;
885
989
  return sizeGB <= (availableVRAM - 2);
886
990
  }
887
991
  }
@@ -62,6 +62,25 @@ Respond with JSON only, no additional text.`;
62
62
  /**
63
63
  * Main AI-Check function
64
64
  */
65
+ /** Normalize the --models option (array, or comma/space-separated string) to a list. */
66
+ parseModelFilter(models) {
67
+ if (!models) return [];
68
+ const list = Array.isArray(models) ? models : String(models).split(/[,\s]+/);
69
+ return list.map((m) => String(m).trim().toLowerCase()).filter(Boolean);
70
+ }
71
+
72
+ /** True when an Ollama DB model matches a user-supplied name fragment. */
73
+ modelMatchesFilter(model, needle) {
74
+ const identifier = String(model?.model_identifier || '').toLowerCase();
75
+ const name = String(model?.model_name || '').toLowerCase();
76
+ return (
77
+ identifier === needle ||
78
+ name === needle ||
79
+ identifier.includes(needle) ||
80
+ name.includes(needle)
81
+ );
82
+ }
83
+
65
84
  async aiCheck(options = {}) {
66
85
  const {
67
86
  category = 'general',
@@ -90,11 +109,23 @@ Respond with JSON only, no additional text.`;
90
109
  const budget = hardware.gpu.unified ? hardware.usableMemGB :
91
110
  (hardware.gpu.vramGB || hardware.usableMemGB);
92
111
 
93
- // Filter models by category first
94
- const categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
95
-
96
- if (!silent) {
97
- console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
112
+ // Optional explicit model filter (--models qwen2.5,llama3.1). When present
113
+ // it overrides the category filter: the user asked for specific models.
114
+ const modelFilter = this.parseModelFilter(options.models);
115
+ let categoryModels;
116
+ if (modelFilter.length > 0) {
117
+ categoryModels = allOllamaModels.filter((model) =>
118
+ modelFilter.some((needle) => this.modelMatchesFilter(model, needle))
119
+ );
120
+ if (!silent) {
121
+ console.log(chalk.cyan('│') + ` Restricted to ${categoryModels.length} model(s) matching --models`);
122
+ }
123
+ } else {
124
+ // Filter models by category first
125
+ categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
126
+ if (!silent) {
127
+ console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
128
+ }
98
129
  }
99
130
 
100
131
  // Evaluate each model using deterministic scoring
@@ -1556,10 +1556,21 @@ class DeterministicModelSelector {
1556
1556
  const S = speedEstimate.score;
1557
1557
  const F = this.calculateFitScore(requiredGB, budget);
1558
1558
  const C = this.calculateContextScore(model, targetCtx);
1559
+ const capacityAdjustment = this.calculateHighCapacitySizeAdjustment(
1560
+ hardware,
1561
+ model,
1562
+ budget,
1563
+ category,
1564
+ optimizeFor
1565
+ );
1559
1566
 
1560
1567
  // 4. Calculate final weighted score
1561
1568
  const weights = this.getScoringWeights(category, optimizeFor);
1562
- const score = Math.round((Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
1569
+ const weightedScore = Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3];
1570
+ const score = Math.max(
1571
+ 0,
1572
+ Math.min(100, Math.round((weightedScore + capacityAdjustment.score) * 10) / 10)
1573
+ );
1563
1574
 
1564
1575
  // 5. Build rationale
1565
1576
  const rationale = this.buildRationale(
@@ -1572,7 +1583,8 @@ class DeterministicModelSelector {
1572
1583
  Q,
1573
1584
  S,
1574
1585
  memoryEstimate,
1575
- speedEstimate
1586
+ speedEstimate,
1587
+ capacityAdjustment
1576
1588
  );
1577
1589
 
1578
1590
  return {
@@ -1599,7 +1611,8 @@ class DeterministicModelSelector {
1599
1611
  runtime: speedEstimate.runtime,
1600
1612
  moe: speedEstimate.moe
1601
1613
  },
1602
- components: { Q, S, F, C }
1614
+ components: { Q, S, F, C, H: capacityAdjustment.score },
1615
+ optimizeFor
1603
1616
  };
1604
1617
  }
1605
1618
 
@@ -1858,6 +1871,9 @@ class DeterministicModelSelector {
1858
1871
  if (hardware.cpu.cores >= 8) base *= 1.1;
1859
1872
  if (hardware.acceleration.supports_metal || hardware.acceleration.supports_cuda) base *= 1.2;
1860
1873
 
1874
+ const acceleratorScale = this.calculateAcceleratorSpeedScale(hardware, backend);
1875
+ base *= acceleratorScale.multiplier;
1876
+
1861
1877
  const normalizedRuntime = normalizeMoERuntime(runtime);
1862
1878
  const moe = estimateMoESpeedMultiplier({
1863
1879
  model,
@@ -1880,7 +1896,46 @@ class DeterministicModelSelector {
1880
1896
  estimatedTPS,
1881
1897
  score,
1882
1898
  runtime: normalizedRuntime,
1883
- moe
1899
+ moe,
1900
+ acceleratorScale
1901
+ };
1902
+ }
1903
+
1904
+ calculateAcceleratorSpeedScale(hardware = {}, backend = 'cpu_x86') {
1905
+ if (backend !== 'cuda' && backend !== 'metal') {
1906
+ return { multiplier: 1, reason: null };
1907
+ }
1908
+
1909
+ const gpu = hardware.gpu || {};
1910
+ const memory = hardware.memory || {};
1911
+ const toFiniteNumber = (value, fallback = 0) => {
1912
+ const parsed = Number(value);
1913
+ return Number.isFinite(parsed) ? parsed : fallback;
1914
+ };
1915
+ const vramGB = toFiniteNumber(gpu.vramGB ?? gpu.vram ?? gpu.totalVRAM, 0);
1916
+ const ramGB = toFiniteNumber(memory.totalGB ?? memory.total, 0);
1917
+ const acceleratorMemoryGB = backend === 'metal' && Boolean(gpu.unified)
1918
+ ? Math.max(vramGB, ramGB)
1919
+ : vramGB;
1920
+ const gpuCount = Math.max(1, toFiniteNumber(gpu.gpuCount ?? gpu.count, 1));
1921
+
1922
+ let multiplier = 1;
1923
+ if (acceleratorMemoryGB >= 160) multiplier *= 3.2;
1924
+ else if (acceleratorMemoryGB >= 96) multiplier *= 2.6;
1925
+ else if (acceleratorMemoryGB >= 80) multiplier *= 2.2;
1926
+ else if (acceleratorMemoryGB >= 48) multiplier *= 1.7;
1927
+ else if (acceleratorMemoryGB >= 24) multiplier *= 1.15;
1928
+
1929
+ if (backend === 'cuda' && gpuCount > 1) {
1930
+ multiplier *= Math.min(1.8, 1 + ((gpuCount - 1) * 0.25));
1931
+ }
1932
+
1933
+ const rounded = Math.round(multiplier * 100) / 100;
1934
+ return {
1935
+ multiplier: rounded,
1936
+ reason: rounded > 1
1937
+ ? `${backend.toUpperCase()} capacity x${rounded}`
1938
+ : null
1884
1939
  };
1885
1940
  }
1886
1941
 
@@ -1888,13 +1943,79 @@ class DeterministicModelSelector {
1888
1943
  const ratio = requiredGB / budgetGB;
1889
1944
  if (ratio <= 0.9) return 100;
1890
1945
  if (ratio <= 1.0) return 70;
1891
- return 0; // Should be filtered out earlier
1946
+ return 0; // Unreachable in practice: evaluateModel drops requiredGB > budget.
1892
1947
  }
1893
1948
 
1894
1949
  calculateContextScore(model, targetCtx) {
1895
- if (model.ctxMax >= targetCtx) return 100;
1896
- if (model.ctxMax >= targetCtx * 0.5) return 70;
1897
- return 0; // Should be filtered out earlier
1950
+ const ctxMax = Number(model?.ctxMax) || 0;
1951
+ if (ctxMax >= targetCtx) return 100;
1952
+ if (ctxMax >= targetCtx * 0.5) return 70;
1953
+ // Context is NOT pre-filtered: a model that cannot serve the requested
1954
+ // context still scores here (0 for this component) and stays eligible,
1955
+ // weighted down rather than excluded.
1956
+ return 0;
1957
+ }
1958
+
1959
+ getHighCapacitySizeTarget(budgetGB, hardware = {}) {
1960
+ if (!Number.isFinite(budgetGB) || budgetGB < 32) return null;
1961
+
1962
+ const isMultiGPU = Boolean(hardware?.gpu?.isMultiGPU);
1963
+ if (budgetGB >= 128) return { minParamsB: 30, sweetSpotParamsB: 70 };
1964
+ if (budgetGB >= 80) return { minParamsB: 30, sweetSpotParamsB: 70 };
1965
+ if (budgetGB >= 48) return { minParamsB: 20, sweetSpotParamsB: 34 };
1966
+ if (budgetGB >= 32 && isMultiGPU) return { minParamsB: 30, sweetSpotParamsB: 30 };
1967
+ if (budgetGB >= 32) return { minParamsB: 13, sweetSpotParamsB: 30 };
1968
+ return null;
1969
+ }
1970
+
1971
+ calculateHighCapacitySizeAdjustment(hardware, model, budgetGB, category, optimizeFor = 'balanced') {
1972
+ const objective = this.normalizeOptimizationObjective(optimizeFor);
1973
+ if (objective === 'speed' || category === 'embeddings') {
1974
+ return { score: 0, reason: null };
1975
+ }
1976
+
1977
+ const normalizedHardware = this.normalizeHardwareProfile(hardware || {});
1978
+ const tier = this.mapHardwareTier(normalizedHardware);
1979
+ const highCapacityTiers = new Set(['very_high', 'ultra_high', 'extreme', 'flagship']);
1980
+ const target = this.getHighCapacitySizeTarget(budgetGB, normalizedHardware);
1981
+ const hasHighCapacitySignal =
1982
+ Boolean(target) ||
1983
+ highCapacityTiers.has(tier) ||
1984
+ Number(normalizedHardware?.gpu?.vramGB || 0) >= 48;
1985
+
1986
+ if (!hasHighCapacitySignal || !target) {
1987
+ return { score: 0, reason: null };
1988
+ }
1989
+
1990
+ const params = this.parseBillionsValue(model?.paramsB);
1991
+ if (!Number.isFinite(params) || params <= 0) {
1992
+ return { score: 0, reason: null };
1993
+ }
1994
+
1995
+ const categoryMultiplier = category === 'multimodal' ? 0.6 : 1;
1996
+ if (params < target.minParamsB) {
1997
+ const deficitRatio = (target.minParamsB - params) / target.minParamsB;
1998
+ const penalty = -Math.min(24, deficitRatio * 24) * categoryMultiplier;
1999
+ const roundedPenalty = Math.round(penalty * 10) / 10;
2000
+ return {
2001
+ score: roundedPenalty,
2002
+ reason: `below ${target.minParamsB}B high-capacity floor`
2003
+ };
2004
+ }
2005
+
2006
+ const distanceRatio = Math.min(
2007
+ 1,
2008
+ Math.abs(params - target.sweetSpotParamsB) / target.sweetSpotParamsB
2009
+ );
2010
+ const bonus = Math.max(0, 12 * (1 - distanceRatio)) * categoryMultiplier;
2011
+ const roundedBonus = Math.round(bonus * 10) / 10;
2012
+
2013
+ return {
2014
+ score: roundedBonus,
2015
+ reason: roundedBonus > 0
2016
+ ? `${target.sweetSpotParamsB}B high-capacity target`
2017
+ : null
2018
+ };
1898
2019
  }
1899
2020
 
1900
2021
  estimatePracticalMaxParamsForBudget(budgetGB) {
@@ -1994,7 +2115,19 @@ class DeterministicModelSelector {
1994
2115
  return highCapacityPromoted;
1995
2116
  }
1996
2117
 
1997
- buildRationale(hardware, model, quant, requiredGB, budget, category, Q, S, memoryEstimate = null, speedEstimate = null) {
2118
+ buildRationale(
2119
+ hardware,
2120
+ model,
2121
+ quant,
2122
+ requiredGB,
2123
+ budget,
2124
+ category,
2125
+ Q,
2126
+ S,
2127
+ memoryEstimate = null,
2128
+ speedEstimate = null,
2129
+ capacityAdjustment = null
2130
+ ) {
1998
2131
  const parts = [];
1999
2132
 
2000
2133
  // Memory fit
@@ -2027,6 +2160,14 @@ class DeterministicModelSelector {
2027
2160
  const multiplier = Number(speedEstimate.moe.multiplier || 1).toFixed(2);
2028
2161
  parts.push(`MoE speed x${multiplier} (${runtimeLabel})`);
2029
2162
  }
2163
+
2164
+ if (speedEstimate?.acceleratorScale?.multiplier > 1) {
2165
+ parts.push(speedEstimate.acceleratorScale.reason);
2166
+ }
2167
+
2168
+ if (capacityAdjustment?.reason) {
2169
+ parts.push(capacityAdjustment.reason);
2170
+ }
2030
2171
 
2031
2172
  // Size sweet spot
2032
2173
  if (model.paramsB >= 7 && model.paramsB <= 13) {
@@ -2114,14 +2255,21 @@ class DeterministicModelSelector {
2114
2255
 
2115
2256
  updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category) {
2116
2257
  const normalizedS = this.normalizeTPSToScore(measuredTPS, category);
2117
-
2118
- // Recalculate final score with measured speed
2119
- const weights = this.categoryWeights[category];
2120
- const { Q, F, C } = candidate.components;
2121
-
2258
+
2259
+ // Re-score with the measured speed using the SAME weighting source as
2260
+ // evaluateModel: getScoringWeights honours the user's optimizeFor profile and
2261
+ // falls back to the general weights for categories (e.g. 'talking') that have
2262
+ // no entry in DETERMINISTIC_WEIGHTS — indexing this.categoryWeights[category]
2263
+ // directly threw a TypeError for those. We also re-add the stored capacity
2264
+ // adjustment (H) and clamp, so a probed score stays comparable to a
2265
+ // non-probed one instead of being silently lower.
2266
+ const weights = this.getScoringWeights(category, candidate.optimizeFor || 'balanced');
2267
+ const { Q, F, C, H = 0 } = candidate.components;
2268
+
2122
2269
  candidate.estTPS = measuredTPS;
2123
2270
  candidate.components.S = normalizedS;
2124
- candidate.score = Math.round((Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
2271
+ const weighted = Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3];
2272
+ candidate.score = Math.max(0, Math.min(100, Math.round((weighted + H) * 10) / 10));
2125
2273
  }
2126
2274
 
2127
2275
  normalizeTPSToScore(tps, category) {
@@ -1007,18 +1007,22 @@ class ExpandedModelsDatabase {
1007
1007
  }
1008
1008
 
1009
1009
  estimateMemoryUsage(model) {
1010
- const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
1010
+ // Derive footprint from parameter count, not by stripping the unit off the
1011
+ // size string and treating the bare number as gigabytes — that read a 774M
1012
+ // model ("774M") as ~774 GB and a 22M model as ~22 GB. ~0.7 GB per 1B params
1013
+ // is a reasonable quantized-runtime footprint baseline.
1014
+ const sizeGB = this.extractModelParams(model) * 0.7;
1011
1015
 
1012
1016
  // Rough estimates including model loading overhead
1013
1017
  return {
1014
- minimal: Math.round(sizeGB * 1.2), // With quantization
1015
- typical: Math.round(sizeGB * 1.5), // Standard loading
1016
- maximum: Math.round(sizeGB * 2.0) // With full context
1018
+ minimal: Math.max(1, Math.round(sizeGB * 1.2)), // With quantization
1019
+ typical: Math.max(1, Math.round(sizeGB * 1.5)), // Standard loading
1020
+ maximum: Math.max(1, Math.round(sizeGB * 2.0)) // With full context
1017
1021
  };
1018
1022
  }
1019
1023
 
1020
1024
  estimatePowerConsumption(model, hardware) {
1021
- const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
1025
+ const sizeGB = this.extractModelParams(model) * 0.7;
1022
1026
  const tier = this.getHardwareTier(hardware);
1023
1027
 
1024
1028
  const basePower = {