llm-checker 3.5.15 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -8
- package/analyzer/compatibility.js +5 -0
- package/analyzer/performance.js +5 -4
- package/bin/cli.js +5 -39
- package/bin/enhanced_cli.js +449 -24
- package/bin/mcp-server.mjs +266 -101
- package/package.json +13 -8
- package/src/ai/multi-objective-selector.js +118 -11
- package/src/calibration/calibration-manager.js +4 -1
- package/src/data/model-database.js +489 -5
- package/src/data/registry-ingestors.js +751 -0
- package/src/data/registry-recommender.js +514 -0
- package/src/data/seed/README.md +11 -3
- package/src/data/seed/models.db +0 -0
- package/src/data/sync-manager.js +32 -18
- package/src/hardware/backends/apple-silicon.js +5 -1
- package/src/hardware/backends/cuda-detector.js +47 -19
- package/src/hardware/backends/intel-detector.js +6 -2
- package/src/hardware/backends/rocm-detector.js +6 -2
- package/src/hardware/detector.js +57 -30
- package/src/hardware/unified-detector.js +129 -25
- package/src/index.js +68 -4
- package/src/models/ai-check-selector.js +36 -5
- package/src/models/deterministic-selector.js +179 -18
- package/src/models/expanded_database.js +9 -5
- package/src/models/intelligent-selector.js +87 -1
- package/src/models/moe-assumptions.js +11 -0
- package/src/models/requirements.js +16 -11
- package/src/models/scoring-core.js +341 -0
- package/src/models/scoring-engine.js +9 -2
- package/src/ollama/capacity-planner.js +15 -2
- package/src/ollama/client.js +70 -30
- package/src/ollama/enhanced-client.js +20 -2
- package/src/ollama/manager.js +14 -2
- package/src/policy/cli-policy.js +8 -2
- package/src/policy/policy-engine.js +2 -1
- package/src/provenance/model-provenance.js +4 -1
- package/src/ui/cli-theme.js +47 -7
- package/src/ui/interactive-panel.js +162 -24
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
|
|
12
12
|
const { normalizePlatform } = require('../utils/platform');
|
|
13
|
+
const { rankModels } = require('../models/scoring-core');
|
|
13
14
|
|
|
14
15
|
class MultiObjectiveSelector {
|
|
15
16
|
constructor() {
|
|
@@ -40,23 +41,124 @@ class MultiObjectiveSelector {
|
|
|
40
41
|
}
|
|
41
42
|
|
|
42
43
|
/**
|
|
43
|
-
* Select best models using
|
|
44
|
+
* Select best models using the UNIFIED canonical scoring core (issue #88).
|
|
45
|
+
*
|
|
46
|
+
* `check` used to rank through this selector's own multi-objective math,
|
|
47
|
+
* which diverged from `recommend`/`smart-recommend` and never received the
|
|
48
|
+
* PR #89 high-capacity right-sizing fix. It now routes the ranking through
|
|
49
|
+
* the shared DeterministicModelSelector core (via scoring-core.rankModels)
|
|
50
|
+
* so identical (model, hardware) inputs score identically across all three
|
|
51
|
+
* commands and the high-capacity floor applies here too.
|
|
52
|
+
*
|
|
53
|
+
* The output shape is preserved exactly: `{ compatible, marginal,
|
|
54
|
+
* incompatible }`, each entry being the ORIGINAL model object spread with
|
|
55
|
+
* `totalScore`, `components { quality, speed, ttfb, context, hardwareMatch }`
|
|
56
|
+
* and `reasoning`, so downstream `check` rendering and the regression test
|
|
57
|
+
* (which calls `estimateModelParams` on the returned object) keep working.
|
|
44
58
|
*/
|
|
45
59
|
async selectBestModels(hardware, models, category = 'general', topK = 10) {
|
|
46
|
-
|
|
60
|
+
const inputModels = Array.isArray(models) ? models.filter(Boolean) : [];
|
|
61
|
+
if (inputModels.length === 0) {
|
|
62
|
+
return { compatible: [], marginal: [], incompatible: [] };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let ranking;
|
|
66
|
+
try {
|
|
67
|
+
ranking = await rankModels(inputModels, hardware, { category, topN: inputModels.length });
|
|
68
|
+
} catch (error) {
|
|
69
|
+
ranking = null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Defensive fallback: if the unified core is unavailable for any reason,
|
|
73
|
+
// fall back to the legacy multi-objective ranking so `check` still works.
|
|
74
|
+
if (!ranking || !Array.isArray(ranking.candidates)) {
|
|
75
|
+
return this.selectBestModelsLegacy(hardware, inputModels, category, topK);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const scoredModels = [];
|
|
79
|
+
const rankedSources = new Set();
|
|
80
|
+
for (const candidate of ranking.candidates) {
|
|
81
|
+
const source = candidate?.meta?.__source;
|
|
82
|
+
if (!source) continue;
|
|
83
|
+
rankedSources.add(source);
|
|
84
|
+
scoredModels.push(this.mapCoreCandidateToMultiObjective(candidate, source, hardware, category));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Models the canonical core dropped (category filter / budget) are not
|
|
88
|
+
// viable on this hardware for this use case -> treat as incompatible,
|
|
89
|
+
// mirroring the previous hard-filter semantics.
|
|
90
|
+
const incompatibleExtras = inputModels
|
|
91
|
+
.filter((model) => !rankedSources.has(model))
|
|
92
|
+
.map((model) => ({
|
|
93
|
+
...model,
|
|
94
|
+
totalScore: 0,
|
|
95
|
+
components: { quality: 0, speed: 0, ttfb: 0, context: 0, hardwareMatch: 0 },
|
|
96
|
+
reasoning: 'Filtered out by unified scoring core (does not fit hardware/use-case)'
|
|
97
|
+
}));
|
|
98
|
+
|
|
99
|
+
scoredModels.sort((a, b) => b.totalScore - a.totalScore);
|
|
100
|
+
|
|
101
|
+
const classified = this.classifyResults(scoredModels, topK);
|
|
102
|
+
classified.incompatible = [...classified.incompatible, ...incompatibleExtras].slice(0, 5);
|
|
103
|
+
return classified;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Map a unified-core candidate back into this selector's multi-objective
|
|
108
|
+
* output shape. The 0-100 `score` from the deterministic core becomes
|
|
109
|
+
* `totalScore`; component sub-scores are normalized to 0-1 to match the
|
|
110
|
+
* historical `components` contract consumed by `check` rendering.
|
|
111
|
+
*/
|
|
112
|
+
mapCoreCandidateToMultiObjective(candidate, source, hardware, category) {
|
|
113
|
+
const components = candidate.components || {};
|
|
114
|
+
const to01 = (value) => {
|
|
115
|
+
const num = Number(value);
|
|
116
|
+
if (!Number.isFinite(num)) return 0;
|
|
117
|
+
return Math.max(0, Math.min(1, num / 100));
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const quality = to01(components.Q);
|
|
121
|
+
const speed = to01(components.S);
|
|
122
|
+
const context = to01(components.C);
|
|
123
|
+
// The deterministic core folds hardware fitness into the `F` (fit) plus
|
|
124
|
+
// `H` (high-capacity right-sizing) components; surface that as the
|
|
125
|
+
// historical `hardwareMatch` signal so `check` insights stay meaningful.
|
|
126
|
+
const hardwareMatch = to01((Number(components.F) || 0) + (Number(components.H) || 0));
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
...source,
|
|
130
|
+
totalScore: Math.round(candidate.score * 100) / 100,
|
|
131
|
+
score: Math.round(candidate.score * 100) / 100,
|
|
132
|
+
components: {
|
|
133
|
+
quality,
|
|
134
|
+
speed,
|
|
135
|
+
ttfb: speed, // ttfb tracks speed; legacy field retained for shape
|
|
136
|
+
context,
|
|
137
|
+
hardwareMatch
|
|
138
|
+
},
|
|
139
|
+
quant: candidate.quant || source.quant,
|
|
140
|
+
estimatedRAM: candidate.requiredGB,
|
|
141
|
+
estimatedTPS: candidate.estTPS,
|
|
142
|
+
reasoning: candidate.rationale ||
|
|
143
|
+
this.generateReasoning(source, hardware, quality, hardwareMatch)
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Legacy multi-objective ranking, retained ONLY as a defensive fallback if
|
|
149
|
+
* the unified core throws. Not used on the normal path.
|
|
150
|
+
*/
|
|
151
|
+
selectBestModelsLegacy(hardware, models, category = 'general', topK = 10) {
|
|
47
152
|
const compatibleModels = this.applyHardFilters(hardware, models);
|
|
48
|
-
|
|
153
|
+
|
|
49
154
|
if (compatibleModels.length === 0) {
|
|
50
155
|
return { compatible: [], marginal: [], incompatible: models };
|
|
51
156
|
}
|
|
52
157
|
|
|
53
|
-
|
|
54
|
-
const scoredModels = compatibleModels.map(model =>
|
|
158
|
+
const scoredModels = compatibleModels.map(model =>
|
|
55
159
|
this.calculateMultiObjectiveScore(hardware, model, category)
|
|
56
160
|
).filter(Boolean);
|
|
57
|
-
|
|
58
161
|
|
|
59
|
-
// Step 3: Sort and classify
|
|
60
162
|
scoredModels.sort((a, b) => b.totalScore - a.totalScore);
|
|
61
163
|
|
|
62
164
|
return this.classifyResults(scoredModels, topK);
|
|
@@ -784,11 +886,16 @@ class MultiObjectiveSelector {
|
|
|
784
886
|
}
|
|
785
887
|
|
|
786
888
|
estimateKVCache(model, contextLength) {
|
|
787
|
-
//
|
|
889
|
+
// KV cache grows linearly with parameter count and context length. The old
|
|
890
|
+
// formula derived both "layers" and "hidden size" from params and multiplied
|
|
891
|
+
// them, making the estimate scale with params^2 — a 70B model at 8k came out
|
|
892
|
+
// at ~299 GB (real value ~11 GB), which made checkMemoryCompatibility reject
|
|
893
|
+
// every mid/large model. Use the same calibrated linear factor as the Ollama
|
|
894
|
+
// capacity planner (~0.08 GB per 1B params at 4k context).
|
|
788
895
|
const params = this.estimateModelParams(model);
|
|
789
|
-
const
|
|
790
|
-
const
|
|
791
|
-
return (
|
|
896
|
+
const ctx = Number.isFinite(contextLength) && contextLength > 0 ? contextLength : 4096;
|
|
897
|
+
const kvFactorPer4k = 0.08; // GB per 1B params at 4k context (fp16)
|
|
898
|
+
return Math.max(0, params * kvFactorPer4k * (ctx / 4096)); // GB
|
|
792
899
|
}
|
|
793
900
|
|
|
794
901
|
estimateTokensPerSecond(hardware, model) {
|
|
@@ -320,7 +320,10 @@ class CalibrationManager {
|
|
|
320
320
|
NO_COLOR: '1'
|
|
321
321
|
}
|
|
322
322
|
});
|
|
323
|
-
|
|
323
|
+
// Convert ns->ms in floating point: dividing the BigInt first floored away
|
|
324
|
+
// all sub-millisecond precision (a 0.5 ms call read as 0 ms, skewing p50/p95,
|
|
325
|
+
// ttft and tokens/sec). The ns diff is well within Number's safe range.
|
|
326
|
+
const latencyMs = Number(process.hrtime.bigint() - started) / 1_000_000;
|
|
324
327
|
|
|
325
328
|
if (result.error) {
|
|
326
329
|
const error = new Error(result.error.message || 'Failed to execute runtime prompt.');
|