llm-checker 3.2.0 → 3.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -7
- package/analyzer/compatibility.js +20 -0
- package/bin/cli.js +14 -0
- package/bin/enhanced_cli.js +580 -36
- package/package.json +11 -3
- package/src/ai/multi-objective-selector.js +28 -4
- package/src/hardware/backends/cuda-detector.js +32 -11
- package/src/hardware/detector.js +107 -5
- package/src/hardware/specs.js +8 -1
- package/src/index.js +161 -31
- package/src/models/deterministic-selector.js +406 -22
- package/src/models/expanded_database.js +8 -2
- package/src/models/intelligent-selector.js +89 -4
- package/src/models/scoring-engine.js +4 -0
- package/src/models/speculative-decoding-estimator.js +245 -0
- package/src/policy/audit-reporter.js +420 -0
- package/src/policy/cli-policy.js +403 -0
- package/src/policy/policy-engine.js +497 -0
- package/src/policy/policy-manager.js +324 -0
- package/src/provenance/model-provenance.js +176 -0
- package/src/runtime/runtime-support.js +174 -0
- package/bin/CLAUDE.md +0 -27
- package/src/CLAUDE.md +0 -18
- package/src/data/CLAUDE.md +0 -17
- package/src/hardware/CLAUDE.md +0 -18
- package/src/hardware/backends/CLAUDE.md +0 -17
- package/src/models/CLAUDE.md +0 -23
- package/src/ollama/CLAUDE.md +0 -30
- package/src/plugins/CLAUDE.md +0 -17
- package/src/utils/CLAUDE.md +0 -17
|
@@ -7,13 +7,18 @@
|
|
|
7
7
|
|
|
8
8
|
const fs = require('fs');
|
|
9
9
|
const path = require('path');
|
|
10
|
+
const os = require('os');
|
|
10
11
|
const { spawn } = require('child_process');
|
|
11
12
|
const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
|
|
12
13
|
|
|
13
14
|
class DeterministicModelSelector {
|
|
14
15
|
constructor() {
|
|
15
16
|
this.catalogPath = path.join(__dirname, 'catalog.json');
|
|
16
|
-
this.benchCachePath = path.join(
|
|
17
|
+
this.benchCachePath = path.join(os.homedir(), '.llm-checker', 'bench.json');
|
|
18
|
+
this.ollamaCachePaths = [
|
|
19
|
+
path.join(os.homedir(), '.llm-checker', 'cache', 'ollama', 'ollama-detailed-models.json'),
|
|
20
|
+
path.join(__dirname, '../ollama/.cache/ollama-detailed-models.json')
|
|
21
|
+
];
|
|
17
22
|
|
|
18
23
|
// Quality priors table
|
|
19
24
|
this.baseQualityByParams = {
|
|
@@ -205,7 +210,20 @@ class DeterministicModelSelector {
|
|
|
205
210
|
parsed.push({
|
|
206
211
|
...details,
|
|
207
212
|
installed: true,
|
|
208
|
-
installedSize: size
|
|
213
|
+
installedSize: size,
|
|
214
|
+
source: 'ollama_local',
|
|
215
|
+
registry: details.registry || 'ollama.com',
|
|
216
|
+
version: details.version || modelName,
|
|
217
|
+
license: details.license || 'unknown',
|
|
218
|
+
digest: details.digest || 'unknown',
|
|
219
|
+
provenance: {
|
|
220
|
+
...(details.provenance || {}),
|
|
221
|
+
source: 'ollama_local',
|
|
222
|
+
registry: details.registry || 'ollama.com',
|
|
223
|
+
version: details.version || modelName,
|
|
224
|
+
license: details.license || 'unknown',
|
|
225
|
+
digest: details.digest || 'unknown'
|
|
226
|
+
}
|
|
209
227
|
});
|
|
210
228
|
} catch (error) {
|
|
211
229
|
console.warn(`Failed to get details for ${modelName}:`, error.message);
|
|
@@ -233,7 +251,19 @@ class DeterministicModelSelector {
|
|
|
233
251
|
sizeGB: this.extractSizeGB(details),
|
|
234
252
|
modalities: this.extractModalities(details),
|
|
235
253
|
tags: this.extractTags(details),
|
|
236
|
-
model_identifier: modelName
|
|
254
|
+
model_identifier: modelName,
|
|
255
|
+
source: 'ollama_local',
|
|
256
|
+
registry: 'ollama.com',
|
|
257
|
+
version: modelName,
|
|
258
|
+
license: this.extractLicense(details),
|
|
259
|
+
digest: this.extractDigest(details)
|
|
260
|
+
};
|
|
261
|
+
meta.provenance = {
|
|
262
|
+
source: meta.source,
|
|
263
|
+
registry: meta.registry,
|
|
264
|
+
version: meta.version,
|
|
265
|
+
license: meta.license,
|
|
266
|
+
digest: meta.digest
|
|
237
267
|
};
|
|
238
268
|
|
|
239
269
|
return meta;
|
|
@@ -249,6 +279,18 @@ class DeterministicModelSelector {
|
|
|
249
279
|
modalities: ['text'],
|
|
250
280
|
tags: [],
|
|
251
281
|
model_identifier: modelName,
|
|
282
|
+
source: 'ollama_local',
|
|
283
|
+
registry: 'ollama.com',
|
|
284
|
+
version: modelName,
|
|
285
|
+
license: 'unknown',
|
|
286
|
+
digest: 'unknown',
|
|
287
|
+
provenance: {
|
|
288
|
+
source: 'ollama_local',
|
|
289
|
+
registry: 'ollama.com',
|
|
290
|
+
version: modelName,
|
|
291
|
+
license: 'unknown',
|
|
292
|
+
digest: 'unknown'
|
|
293
|
+
},
|
|
252
294
|
error: error.message
|
|
253
295
|
};
|
|
254
296
|
}
|
|
@@ -269,7 +311,19 @@ class DeterministicModelSelector {
|
|
|
269
311
|
|
|
270
312
|
return catalog.models.map(model => ({
|
|
271
313
|
...model,
|
|
272
|
-
installed: false
|
|
314
|
+
installed: false,
|
|
315
|
+
source: model.source || 'static_catalog',
|
|
316
|
+
registry: model.registry || 'ollama.com',
|
|
317
|
+
version: model.version || model.model_identifier || model.name || 'unknown',
|
|
318
|
+
license: model.license || 'unknown',
|
|
319
|
+
digest: model.digest || 'unknown',
|
|
320
|
+
provenance: {
|
|
321
|
+
source: model.source || 'static_catalog',
|
|
322
|
+
registry: model.registry || 'ollama.com',
|
|
323
|
+
version: model.version || model.model_identifier || model.name || 'unknown',
|
|
324
|
+
license: model.license || 'unknown',
|
|
325
|
+
digest: model.digest || 'unknown'
|
|
326
|
+
}
|
|
273
327
|
}));
|
|
274
328
|
} catch (error) {
|
|
275
329
|
console.warn('Failed to load catalog:', error.message);
|
|
@@ -349,6 +403,273 @@ class DeterministicModelSelector {
|
|
|
349
403
|
fs.writeFileSync(this.catalogPath, JSON.stringify(defaultCatalog, null, 2));
|
|
350
404
|
}
|
|
351
405
|
|
|
406
|
+
/**
|
|
407
|
+
* Full model pool loader:
|
|
408
|
+
* 1) Prefer complete Ollama scraped cache (all families/sizes)
|
|
409
|
+
* 2) Fallback to static curated catalog
|
|
410
|
+
*/
|
|
411
|
+
async loadModelPool() {
|
|
412
|
+
const cacheModels = await this.loadOllamaCacheModels();
|
|
413
|
+
if (cacheModels.length > 0) {
|
|
414
|
+
return cacheModels;
|
|
415
|
+
}
|
|
416
|
+
return this.loadCatalog();
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
async loadOllamaCacheModels() {
|
|
420
|
+
for (const cachePath of this.ollamaCachePaths) {
|
|
421
|
+
try {
|
|
422
|
+
if (!fs.existsSync(cachePath)) continue;
|
|
423
|
+
const raw = JSON.parse(fs.readFileSync(cachePath, 'utf8'));
|
|
424
|
+
const sourceModels = Array.isArray(raw) ? raw : (raw.models || []);
|
|
425
|
+
const normalized = this.normalizeExternalModels(sourceModels);
|
|
426
|
+
if (normalized.length > 0) return normalized;
|
|
427
|
+
} catch (error) {
|
|
428
|
+
// Ignore broken cache files and keep trying fallbacks
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return [];
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
normalizeExternalModels(models = []) {
|
|
435
|
+
const normalized = [];
|
|
436
|
+
|
|
437
|
+
for (const model of models) {
|
|
438
|
+
if (!model || typeof model !== 'object') continue;
|
|
439
|
+
|
|
440
|
+
const alreadyNormalized =
|
|
441
|
+
typeof model.paramsB === 'number' &&
|
|
442
|
+
typeof model.ctxMax === 'number' &&
|
|
443
|
+
model.model_identifier;
|
|
444
|
+
|
|
445
|
+
if (alreadyNormalized) {
|
|
446
|
+
normalized.push({
|
|
447
|
+
...model,
|
|
448
|
+
tags: Array.isArray(model.tags) ? model.tags : [],
|
|
449
|
+
modalities: Array.isArray(model.modalities) ? model.modalities : ['text'],
|
|
450
|
+
installed: Boolean(model.installed),
|
|
451
|
+
source: model.source || 'ollama_database',
|
|
452
|
+
registry: model.registry || 'ollama.com',
|
|
453
|
+
version: model.version || model.model_identifier,
|
|
454
|
+
license: model.license || 'unknown',
|
|
455
|
+
digest: model.digest || 'unknown',
|
|
456
|
+
provenance: model.provenance || {
|
|
457
|
+
source: model.source || 'ollama_database',
|
|
458
|
+
registry: model.registry || 'ollama.com',
|
|
459
|
+
version: model.version || model.model_identifier,
|
|
460
|
+
license: model.license || 'unknown',
|
|
461
|
+
digest: model.digest || 'unknown'
|
|
462
|
+
}
|
|
463
|
+
});
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
const converted = this.convertOllamaModelToDeterministicModels(model);
|
|
468
|
+
normalized.push(...converted);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
const deduped = new Map();
|
|
472
|
+
for (const model of normalized) {
|
|
473
|
+
const key = model.model_identifier || model.name;
|
|
474
|
+
if (!key || deduped.has(key)) continue;
|
|
475
|
+
deduped.set(key, model);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
return [...deduped.values()];
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
convertOllamaModelToDeterministicModels(ollamaModel) {
|
|
482
|
+
const baseIdentifier = ollamaModel.model_identifier || ollamaModel.model_name || 'unknown';
|
|
483
|
+
const fallbackTag = `${baseIdentifier}:latest`;
|
|
484
|
+
const variants = Array.isArray(ollamaModel.variants) && ollamaModel.variants.length > 0
|
|
485
|
+
? ollamaModel.variants
|
|
486
|
+
: [{ tag: ollamaModel.model_identifier || fallbackTag }];
|
|
487
|
+
|
|
488
|
+
const contextLength = this.parseContextLength(
|
|
489
|
+
ollamaModel.context_length ||
|
|
490
|
+
ollamaModel.contextLength ||
|
|
491
|
+
ollamaModel.ctxMax
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
const baseText = [
|
|
495
|
+
ollamaModel.model_identifier,
|
|
496
|
+
ollamaModel.model_name,
|
|
497
|
+
ollamaModel.description,
|
|
498
|
+
ollamaModel.detailed_description,
|
|
499
|
+
ollamaModel.primary_category,
|
|
500
|
+
...(Array.isArray(ollamaModel.use_cases) ? ollamaModel.use_cases : []),
|
|
501
|
+
...(Array.isArray(ollamaModel.categories) ? ollamaModel.categories : [])
|
|
502
|
+
].filter(Boolean).join(' ').toLowerCase();
|
|
503
|
+
|
|
504
|
+
const derivedTags = new Set();
|
|
505
|
+
if (baseText.includes('code') || baseText.includes('coder')) derivedTags.add('coder');
|
|
506
|
+
if (baseText.includes('instruct')) derivedTags.add('instruct');
|
|
507
|
+
if (baseText.includes('chat') || baseText.includes('assistant') || baseText.includes('conversation')) derivedTags.add('chat');
|
|
508
|
+
if (baseText.includes('embed')) derivedTags.add('embedding');
|
|
509
|
+
if (baseText.includes('vision') || baseText.includes('vl') || baseText.includes('multimodal') || baseText.includes('image')) derivedTags.add('vision');
|
|
510
|
+
if (baseText.includes('reason') || baseText.includes('math') || baseText.includes('logic')) derivedTags.add('reasoning');
|
|
511
|
+
if (baseText.includes('creative') || baseText.includes('story') || baseText.includes('roleplay')) derivedTags.add('creative');
|
|
512
|
+
|
|
513
|
+
if (ollamaModel.primary_category === 'coding') derivedTags.add('coder');
|
|
514
|
+
if (ollamaModel.primary_category === 'chat') derivedTags.add('chat');
|
|
515
|
+
if (ollamaModel.primary_category === 'embeddings') derivedTags.add('embedding');
|
|
516
|
+
if (ollamaModel.primary_category === 'multimodal') derivedTags.add('vision');
|
|
517
|
+
if (ollamaModel.primary_category === 'reasoning') derivedTags.add('reasoning');
|
|
518
|
+
if (ollamaModel.primary_category === 'creative') derivedTags.add('creative');
|
|
519
|
+
|
|
520
|
+
return variants.map((variant) => {
|
|
521
|
+
const variantTag = variant.tag || fallbackTag;
|
|
522
|
+
const paramsB = this.extractParamsFromString(
|
|
523
|
+
variant.size,
|
|
524
|
+
variantTag,
|
|
525
|
+
ollamaModel.main_size,
|
|
526
|
+
ollamaModel.model_identifier
|
|
527
|
+
);
|
|
528
|
+
const quant = this.normalizeQuantization(
|
|
529
|
+
variant.quantization ||
|
|
530
|
+
this.extractQuantizationFromTag(variantTag) ||
|
|
531
|
+
'Q4_K_M'
|
|
532
|
+
);
|
|
533
|
+
|
|
534
|
+
const variantSizeGB = this.extractVariantSizeGB(variant, paramsB);
|
|
535
|
+
const modalities = this.inferModalities(ollamaModel, variantTag);
|
|
536
|
+
const modelTags = this.inferTagsForVariant(derivedTags, variant, variantTag);
|
|
537
|
+
|
|
538
|
+
const source = ollamaModel.source || 'ollama_database';
|
|
539
|
+
const registry = ollamaModel.registry || 'ollama.com';
|
|
540
|
+
const version = ollamaModel.version || variantTag;
|
|
541
|
+
const license = ollamaModel.license || 'unknown';
|
|
542
|
+
const digest = ollamaModel.digest || 'unknown';
|
|
543
|
+
|
|
544
|
+
return {
|
|
545
|
+
name: variantTag,
|
|
546
|
+
family: this.extractFamily(baseIdentifier),
|
|
547
|
+
paramsB,
|
|
548
|
+
ctxMax: contextLength,
|
|
549
|
+
quant,
|
|
550
|
+
sizeGB: variantSizeGB,
|
|
551
|
+
modalities,
|
|
552
|
+
tags: modelTags,
|
|
553
|
+
model_identifier: variantTag,
|
|
554
|
+
installed: false,
|
|
555
|
+
pulls: ollamaModel.actual_pulls || ollamaModel.pulls || 0,
|
|
556
|
+
source,
|
|
557
|
+
registry,
|
|
558
|
+
version,
|
|
559
|
+
license,
|
|
560
|
+
digest,
|
|
561
|
+
provenance: {
|
|
562
|
+
source,
|
|
563
|
+
registry,
|
|
564
|
+
version,
|
|
565
|
+
license,
|
|
566
|
+
digest
|
|
567
|
+
}
|
|
568
|
+
};
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
parseContextLength(contextValue) {
|
|
573
|
+
if (typeof contextValue === 'number' && Number.isFinite(contextValue) && contextValue > 0) {
|
|
574
|
+
return Math.round(contextValue);
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
if (typeof contextValue === 'string') {
|
|
578
|
+
const match = contextValue.match(/(\d+\.?\d*)\s*([KkMm]?)/);
|
|
579
|
+
if (match) {
|
|
580
|
+
const value = parseFloat(match[1]);
|
|
581
|
+
const unit = (match[2] || '').toUpperCase();
|
|
582
|
+
if (unit === 'M') return Math.round(value * 1024 * 1024);
|
|
583
|
+
if (unit === 'K') return Math.round(value * 1024);
|
|
584
|
+
return Math.round(value);
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
return 4096;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
extractParamsFromString(...values) {
|
|
592
|
+
for (const value of values) {
|
|
593
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
594
|
+
return value;
|
|
595
|
+
}
|
|
596
|
+
if (typeof value !== 'string') continue;
|
|
597
|
+
|
|
598
|
+
const match = value.match(/(\d+\.?\d*)\s*([BbMm])/);
|
|
599
|
+
if (!match) continue;
|
|
600
|
+
const n = parseFloat(match[1]);
|
|
601
|
+
const unit = match[2].toUpperCase();
|
|
602
|
+
return unit === 'M' ? n / 1000 : n;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
return 7;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
extractQuantizationFromTag(tag = '') {
|
|
609
|
+
const match = String(tag).match(/\b(q\d+[_\w]*)\b/i);
|
|
610
|
+
return match ? match[1].toUpperCase() : null;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
normalizeQuantization(quant = 'Q4_K_M') {
|
|
614
|
+
const q = String(quant).toUpperCase();
|
|
615
|
+
if (q.startsWith('Q8')) return 'Q8_0';
|
|
616
|
+
if (q.startsWith('Q6')) return 'Q6_K';
|
|
617
|
+
if (q.startsWith('Q5')) return 'Q5_K_M';
|
|
618
|
+
if (q.startsWith('Q4')) return 'Q4_K_M';
|
|
619
|
+
if (q.startsWith('Q3')) return 'Q3_K';
|
|
620
|
+
if (q.startsWith('Q2')) return 'Q2_K';
|
|
621
|
+
return 'Q4_K_M';
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
extractVariantSizeGB(variant, paramsB) {
|
|
625
|
+
const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
|
|
626
|
+
if (Number.isFinite(candidate) && candidate > 0) return candidate;
|
|
627
|
+
return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
inferModalities(model, variantTag = '') {
|
|
631
|
+
const inputTypes = Array.isArray(model.input_types) ? model.input_types.map((x) => String(x).toLowerCase()) : [];
|
|
632
|
+
const text = [
|
|
633
|
+
model.model_identifier,
|
|
634
|
+
model.model_name,
|
|
635
|
+
model.description,
|
|
636
|
+
model.detailed_description,
|
|
637
|
+
variantTag
|
|
638
|
+
].filter(Boolean).join(' ').toLowerCase();
|
|
639
|
+
|
|
640
|
+
const hasVision = inputTypes.includes('image') ||
|
|
641
|
+
inputTypes.includes('vision') ||
|
|
642
|
+
/vision|vl\b|llava|pixtral|moondream|image|multimodal/.test(text);
|
|
643
|
+
|
|
644
|
+
return hasVision ? ['text', 'vision'] : ['text'];
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
inferTagsForVariant(baseTags, variant, variantTag = '') {
|
|
648
|
+
const tags = new Set(baseTags);
|
|
649
|
+
|
|
650
|
+
if (Array.isArray(variant.categories)) {
|
|
651
|
+
for (const cat of variant.categories) {
|
|
652
|
+
const c = String(cat).toLowerCase();
|
|
653
|
+
if (c.includes('code')) tags.add('coder');
|
|
654
|
+
if (c.includes('chat')) tags.add('chat');
|
|
655
|
+
if (c.includes('embed')) tags.add('embedding');
|
|
656
|
+
if (c.includes('vision') || c.includes('multimodal')) tags.add('vision');
|
|
657
|
+
if (c.includes('reason')) tags.add('reasoning');
|
|
658
|
+
if (c.includes('creative')) tags.add('creative');
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
const lowerTag = String(variantTag).toLowerCase();
|
|
663
|
+
if (lowerTag.includes('code') || lowerTag.includes('coder')) tags.add('coder');
|
|
664
|
+
if (lowerTag.includes('instruct')) tags.add('instruct');
|
|
665
|
+
if (lowerTag.includes('chat')) tags.add('chat');
|
|
666
|
+
if (lowerTag.includes('embed')) tags.add('embedding');
|
|
667
|
+
if (lowerTag.includes('vision') || lowerTag.includes('vl')) tags.add('vision');
|
|
668
|
+
if (lowerTag.includes('reason') || lowerTag.includes('math')) tags.add('reasoning');
|
|
669
|
+
|
|
670
|
+
return [...tags];
|
|
671
|
+
}
|
|
672
|
+
|
|
352
673
|
// ============================================================================
|
|
353
674
|
// HELPER METHODS FOR PARSING OLLAMA OUTPUT
|
|
354
675
|
// ============================================================================
|
|
@@ -423,6 +744,16 @@ class DeterministicModelSelector {
|
|
|
423
744
|
return tags;
|
|
424
745
|
}
|
|
425
746
|
|
|
747
|
+
extractLicense(details) {
|
|
748
|
+
const match = details.match(/license\s+([^\n\r]+)/i);
|
|
749
|
+
return match ? match[1].trim().toLowerCase() : 'unknown';
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
extractDigest(details) {
|
|
753
|
+
const match = details.match(/digest\s+([a-f0-9:]+)/i);
|
|
754
|
+
return match ? match[1].trim().toLowerCase() : 'unknown';
|
|
755
|
+
}
|
|
756
|
+
|
|
426
757
|
async runOllamaCommand(args) {
|
|
427
758
|
return new Promise((resolve, reject) => {
|
|
428
759
|
try {
|
|
@@ -465,10 +796,13 @@ class DeterministicModelSelector {
|
|
|
465
796
|
*/
|
|
466
797
|
async selectModels(category = 'general', options = {}) {
|
|
467
798
|
const {
|
|
468
|
-
targetCtx = this.targetContexts[category],
|
|
799
|
+
targetCtx = this.targetContexts[category] || this.targetContexts.general,
|
|
469
800
|
topN = 5,
|
|
470
801
|
enableProbe = false,
|
|
471
|
-
silent = false
|
|
802
|
+
silent = false,
|
|
803
|
+
hardware: providedHardware = null,
|
|
804
|
+
installedModels = null,
|
|
805
|
+
modelPool = null
|
|
472
806
|
} = options;
|
|
473
807
|
|
|
474
808
|
if (!silent) {
|
|
@@ -476,17 +810,20 @@ class DeterministicModelSelector {
|
|
|
476
810
|
}
|
|
477
811
|
|
|
478
812
|
// Phase 0: Gather data
|
|
479
|
-
const hardware = await this.getHardware();
|
|
480
|
-
const installed = await this.getInstalledModels();
|
|
481
|
-
const
|
|
813
|
+
const hardware = providedHardware || await this.getHardware();
|
|
814
|
+
const installed = Array.isArray(installedModels) ? installedModels : await this.getInstalledModels();
|
|
815
|
+
const externalPool = Array.isArray(modelPool) && modelPool.length > 0
|
|
816
|
+
? this.normalizeExternalModels(modelPool)
|
|
817
|
+
: await this.loadModelPool();
|
|
482
818
|
|
|
483
819
|
if (!silent) {
|
|
484
|
-
|
|
485
|
-
console.log(`
|
|
820
|
+
const memoryGB = hardware?.memory?.totalGB ?? hardware?.memory?.total ?? 0;
|
|
821
|
+
console.log(`Found ${installed.length} installed, ${externalPool.length} available models`);
|
|
822
|
+
console.log(`Hardware: ${hardware.cpu.cores} cores, ${memoryGB}GB RAM, ${hardware.gpu.type}`);
|
|
486
823
|
}
|
|
487
824
|
|
|
488
825
|
// Combine and dedupe models (prefer installed versions)
|
|
489
|
-
const pool = this.combineModels(installed,
|
|
826
|
+
const pool = this.combineModels(installed, externalPool);
|
|
490
827
|
const filtered = this.filterByCategory(pool, category);
|
|
491
828
|
|
|
492
829
|
if (!silent) {
|
|
@@ -495,8 +832,13 @@ class DeterministicModelSelector {
|
|
|
495
832
|
|
|
496
833
|
// Phase 1: Estimation filter
|
|
497
834
|
const candidates = [];
|
|
498
|
-
const
|
|
499
|
-
|
|
835
|
+
const totalMem = hardware?.memory?.totalGB ?? hardware?.memory?.total ?? 8;
|
|
836
|
+
const usableMem = typeof hardware.usableMemGB === 'number'
|
|
837
|
+
? hardware.usableMemGB
|
|
838
|
+
: Math.max(1, Math.min(0.8 * totalMem, totalMem - 2));
|
|
839
|
+
const isUnified = Boolean(hardware?.gpu?.unified) || hardware?.gpu?.type === 'apple_silicon';
|
|
840
|
+
const vram = hardware?.gpu?.vramGB ?? hardware?.gpu?.vram ?? 0;
|
|
841
|
+
const budget = isUnified ? usableMem : (vram || usableMem);
|
|
500
842
|
|
|
501
843
|
for (const model of filtered) {
|
|
502
844
|
const result = this.evaluateModel(model, hardware, category, targetCtx, budget);
|
|
@@ -592,7 +934,7 @@ class DeterministicModelSelector {
|
|
|
592
934
|
const C = this.calculateContextScore(model, targetCtx);
|
|
593
935
|
|
|
594
936
|
// 4. Calculate final weighted score
|
|
595
|
-
const weights = this.categoryWeights[category];
|
|
937
|
+
const weights = this.categoryWeights[category] || this.categoryWeights.general;
|
|
596
938
|
const score = Math.round((Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
|
|
597
939
|
|
|
598
940
|
// 5. Build rationale
|
|
@@ -743,7 +1085,7 @@ class DeterministicModelSelector {
|
|
|
743
1085
|
if (hardware.acceleration.supports_metal || hardware.acceleration.supports_cuda) base *= 1.2;
|
|
744
1086
|
|
|
745
1087
|
// Normalize to 0-100 score
|
|
746
|
-
const target = this.targetSpeeds[category];
|
|
1088
|
+
const target = this.targetSpeeds[category] || this.targetSpeeds.general;
|
|
747
1089
|
return Math.min(100, Math.round((100 * base / target) * 10) / 10);
|
|
748
1090
|
}
|
|
749
1091
|
|
|
@@ -880,7 +1222,7 @@ class DeterministicModelSelector {
|
|
|
880
1222
|
}
|
|
881
1223
|
|
|
882
1224
|
normalizeTPSToScore(tps, category) {
|
|
883
|
-
const target = this.targetSpeeds[category];
|
|
1225
|
+
const target = this.targetSpeeds[category] || this.targetSpeeds.general;
|
|
884
1226
|
return Math.min(100, Math.round((100 * tps / target) * 10) / 10);
|
|
885
1227
|
}
|
|
886
1228
|
|
|
@@ -924,6 +1266,14 @@ class DeterministicModelSelector {
|
|
|
924
1266
|
* Map a candidate to the legacy format expected by callers
|
|
925
1267
|
*/
|
|
926
1268
|
mapCandidateToLegacyFormat(candidate) {
|
|
1269
|
+
const provenance = candidate.meta.provenance || {
|
|
1270
|
+
source: candidate.meta.source || 'unknown',
|
|
1271
|
+
registry: candidate.meta.registry || 'unknown',
|
|
1272
|
+
version: candidate.meta.version || 'unknown',
|
|
1273
|
+
license: candidate.meta.license || 'unknown',
|
|
1274
|
+
digest: candidate.meta.digest || 'unknown'
|
|
1275
|
+
};
|
|
1276
|
+
|
|
927
1277
|
return {
|
|
928
1278
|
model_name: candidate.meta.name,
|
|
929
1279
|
model_identifier: candidate.meta.model_identifier,
|
|
@@ -939,7 +1289,13 @@ class DeterministicModelSelector {
|
|
|
939
1289
|
tags: candidate.meta.tags || [],
|
|
940
1290
|
quantization: candidate.quant,
|
|
941
1291
|
estimatedRAM: candidate.requiredGB,
|
|
942
|
-
reasoning: candidate.rationale
|
|
1292
|
+
reasoning: candidate.rationale,
|
|
1293
|
+
source: provenance.source,
|
|
1294
|
+
registry: provenance.registry,
|
|
1295
|
+
version: provenance.version,
|
|
1296
|
+
license: provenance.license,
|
|
1297
|
+
digest: provenance.digest,
|
|
1298
|
+
provenance
|
|
943
1299
|
};
|
|
944
1300
|
}
|
|
945
1301
|
|
|
@@ -1008,13 +1364,17 @@ class DeterministicModelSelector {
|
|
|
1008
1364
|
async getBestModelsForHardware(hardware, allModels) {
|
|
1009
1365
|
const categories = ['coding', 'reasoning', 'multimodal', 'creative', 'talking', 'reading', 'general'];
|
|
1010
1366
|
const recommendations = {};
|
|
1367
|
+
const normalizedPool = this.normalizeExternalModels(Array.isArray(allModels) ? allModels : []);
|
|
1368
|
+
const installedModels = await this.getInstalledModels();
|
|
1011
1369
|
|
|
1012
1370
|
for (const category of categories) {
|
|
1013
1371
|
try {
|
|
1014
1372
|
const result = await this.selectModels(category, {
|
|
1015
1373
|
topN: 3,
|
|
1016
1374
|
enableProbe: false,
|
|
1017
|
-
silent: true
|
|
1375
|
+
silent: true,
|
|
1376
|
+
installedModels,
|
|
1377
|
+
modelPool: normalizedPool
|
|
1018
1378
|
});
|
|
1019
1379
|
|
|
1020
1380
|
recommendations[category] = {
|
|
@@ -1061,7 +1421,19 @@ class DeterministicModelSelector {
|
|
|
1061
1421
|
score: Math.round(bestModel.categoryScore || bestModel.score),
|
|
1062
1422
|
command: `ollama pull ${bestModel.model_identifier}`,
|
|
1063
1423
|
size: this.formatModelSize(bestModel),
|
|
1064
|
-
pulls: bestModel.pulls || 0
|
|
1424
|
+
pulls: bestModel.pulls || 0,
|
|
1425
|
+
source: bestModel.source || bestModel.provenance?.source || 'unknown',
|
|
1426
|
+
registry: bestModel.registry || bestModel.provenance?.registry || 'unknown',
|
|
1427
|
+
version: bestModel.version || bestModel.provenance?.version || 'unknown',
|
|
1428
|
+
license: bestModel.license || bestModel.provenance?.license || 'unknown',
|
|
1429
|
+
digest: bestModel.digest || bestModel.provenance?.digest || 'unknown',
|
|
1430
|
+
provenance: bestModel.provenance || {
|
|
1431
|
+
source: bestModel.source || 'unknown',
|
|
1432
|
+
registry: bestModel.registry || 'unknown',
|
|
1433
|
+
version: bestModel.version || 'unknown',
|
|
1434
|
+
license: bestModel.license || 'unknown',
|
|
1435
|
+
digest: bestModel.digest || 'unknown'
|
|
1436
|
+
}
|
|
1065
1437
|
};
|
|
1066
1438
|
|
|
1067
1439
|
summary.quick_commands.push(`ollama pull ${bestModel.model_identifier}`);
|
|
@@ -1083,7 +1455,19 @@ class DeterministicModelSelector {
|
|
|
1083
1455
|
identifier: bestOverallModel.model_identifier,
|
|
1084
1456
|
category: bestOverallCategory,
|
|
1085
1457
|
score: Math.round(bestOverallScore),
|
|
1086
|
-
command: `ollama pull ${bestOverallModel.model_identifier}
|
|
1458
|
+
command: `ollama pull ${bestOverallModel.model_identifier}`,
|
|
1459
|
+
source: bestOverallModel.source || bestOverallModel.provenance?.source || 'unknown',
|
|
1460
|
+
registry: bestOverallModel.registry || bestOverallModel.provenance?.registry || 'unknown',
|
|
1461
|
+
version: bestOverallModel.version || bestOverallModel.provenance?.version || 'unknown',
|
|
1462
|
+
license: bestOverallModel.license || bestOverallModel.provenance?.license || 'unknown',
|
|
1463
|
+
digest: bestOverallModel.digest || bestOverallModel.provenance?.digest || 'unknown',
|
|
1464
|
+
provenance: bestOverallModel.provenance || {
|
|
1465
|
+
source: bestOverallModel.source || 'unknown',
|
|
1466
|
+
registry: bestOverallModel.registry || 'unknown',
|
|
1467
|
+
version: bestOverallModel.version || 'unknown',
|
|
1468
|
+
license: bestOverallModel.license || 'unknown',
|
|
1469
|
+
digest: bestOverallModel.digest || 'unknown'
|
|
1470
|
+
}
|
|
1087
1471
|
};
|
|
1088
1472
|
}
|
|
1089
1473
|
|
|
@@ -1142,4 +1526,4 @@ class DeterministicModelSelector {
|
|
|
1142
1526
|
}
|
|
1143
1527
|
}
|
|
1144
1528
|
|
|
1145
|
-
module.exports = DeterministicModelSelector;
|
|
1529
|
+
module.exports = DeterministicModelSelector;
|
|
@@ -999,10 +999,16 @@ class ExpandedModelsDatabase {
|
|
|
999
999
|
} else if (hasDedicatedGPU) {
|
|
1000
1000
|
// Dedicated GPU - much better performance
|
|
1001
1001
|
let gpuTPS = 30;
|
|
1002
|
-
if (gpuModel.toLowerCase().includes('
|
|
1002
|
+
if (gpuModel.toLowerCase().includes('gb10') ||
|
|
1003
|
+
gpuModel.toLowerCase().includes('grace blackwell') ||
|
|
1004
|
+
gpuModel.toLowerCase().includes('dgx spark')) gpuTPS = 90;
|
|
1005
|
+
else if (gpuModel.toLowerCase().includes('h100')) gpuTPS = 120;
|
|
1006
|
+
else if (gpuModel.toLowerCase().includes('a100')) gpuTPS = 95;
|
|
1007
|
+
else if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
|
|
1003
1008
|
else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 50;
|
|
1004
1009
|
else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 40;
|
|
1005
1010
|
else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 30;
|
|
1011
|
+
else if (gpuModel.toLowerCase().includes('p100')) gpuTPS = 32;
|
|
1006
1012
|
else if (vramGB >= 16) gpuTPS = 45;
|
|
1007
1013
|
else if (vramGB >= 8) gpuTPS = 35;
|
|
1008
1014
|
else if (vramGB >= 4) gpuTPS = 25;
|
|
@@ -1139,4 +1145,4 @@ class ExpandedModelsDatabase {
|
|
|
1139
1145
|
}
|
|
1140
1146
|
}
|
|
1141
1147
|
|
|
1142
|
-
module.exports = ExpandedModelsDatabase;
|
|
1148
|
+
module.exports = ExpandedModelsDatabase;
|