llm-checker 3.2.8 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -17
- package/bin/enhanced_cli.js +516 -3
- package/package.json +1 -1
- package/src/calibration/calibration-manager.js +798 -0
- package/src/calibration/policy-routing.js +376 -0
- package/src/calibration/schemas.js +212 -0
- package/src/hardware/backends/cuda-detector.js +355 -5
- package/src/ollama/capacity-planner.js +399 -0
package/bin/enhanced_cli.js
CHANGED
|
@@ -23,6 +23,16 @@ const {
|
|
|
23
23
|
getRuntimeDisplayName,
|
|
24
24
|
getRuntimeCommandSet
|
|
25
25
|
} = require('../src/runtime/runtime-support');
|
|
26
|
+
const { CalibrationManager } = require('../src/calibration/calibration-manager');
|
|
27
|
+
const { SUPPORTED_CALIBRATION_OBJECTIVES } = require('../src/calibration/schemas');
|
|
28
|
+
const {
|
|
29
|
+
resolveRoutingPolicyPreference,
|
|
30
|
+
normalizeTaskName,
|
|
31
|
+
inferTaskFromPrompt,
|
|
32
|
+
resolveCalibrationRoute,
|
|
33
|
+
getRouteModelCandidates,
|
|
34
|
+
selectModelFromRoute
|
|
35
|
+
} = require('../src/calibration/policy-routing');
|
|
26
36
|
const SpeculativeDecodingEstimator = require('../src/models/speculative-decoding-estimator');
|
|
27
37
|
const PolicyManager = require('../src/policy/policy-manager');
|
|
28
38
|
const PolicyEngine = require('../src/policy/policy-engine');
|
|
@@ -38,6 +48,7 @@ const {
|
|
|
38
48
|
serializeComplianceReport
|
|
39
49
|
} = require('../src/policy/audit-reporter');
|
|
40
50
|
const policyManager = new PolicyManager();
|
|
51
|
+
const calibrationManager = new CalibrationManager();
|
|
41
52
|
|
|
42
53
|
// ASCII Art for each command - Large text banners
|
|
43
54
|
const ASCII_ART = {
|
|
@@ -580,6 +591,80 @@ async function checkOllamaAndExit() {
|
|
|
580
591
|
}
|
|
581
592
|
}
|
|
582
593
|
|
|
594
|
+
function parsePositiveIntegerOption(rawValue, optionName) {
|
|
595
|
+
const parsed = Number(rawValue);
|
|
596
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
597
|
+
throw new Error(`Invalid ${optionName}: ${rawValue}`);
|
|
598
|
+
}
|
|
599
|
+
return Math.round(parsed);
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
function parseNonNegativeNumberOption(rawValue, optionName) {
|
|
603
|
+
const parsed = Number(rawValue);
|
|
604
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
605
|
+
throw new Error(`Invalid ${optionName}: ${rawValue}`);
|
|
606
|
+
}
|
|
607
|
+
return parsed;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function selectModelsForPlan(installedModels, requestedModels = []) {
|
|
611
|
+
const requested = Array.isArray(requestedModels)
|
|
612
|
+
? requestedModels.map((model) => String(model || '').trim()).filter(Boolean)
|
|
613
|
+
: [];
|
|
614
|
+
|
|
615
|
+
if (!requested.length) {
|
|
616
|
+
return {
|
|
617
|
+
selected: installedModels.slice(),
|
|
618
|
+
missing: []
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
const selected = [];
|
|
623
|
+
const missing = [];
|
|
624
|
+
const seen = new Set();
|
|
625
|
+
|
|
626
|
+
for (const request of requested) {
|
|
627
|
+
const normalized = request.toLowerCase();
|
|
628
|
+
|
|
629
|
+
let match = installedModels.find(
|
|
630
|
+
(model) => String(model.name || '').toLowerCase() === normalized
|
|
631
|
+
);
|
|
632
|
+
|
|
633
|
+
if (!match) {
|
|
634
|
+
match = installedModels.find((model) =>
|
|
635
|
+
String(model.name || '').toLowerCase().startsWith(`${normalized}:`)
|
|
636
|
+
);
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if (!match) {
|
|
640
|
+
match = installedModels.find(
|
|
641
|
+
(model) => String(model.family || '').toLowerCase() === normalized
|
|
642
|
+
);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
if (!match) {
|
|
646
|
+
match = installedModels.find((model) =>
|
|
647
|
+
String(model.name || '').toLowerCase().includes(normalized)
|
|
648
|
+
);
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
if (!match) {
|
|
652
|
+
missing.push(request);
|
|
653
|
+
continue;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
if (!seen.has(match.name)) {
|
|
657
|
+
selected.push(match);
|
|
658
|
+
seen.add(match.name);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
return {
|
|
663
|
+
selected,
|
|
664
|
+
missing
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
|
|
583
668
|
function getStatusIcon(model, ollamaModels) {
|
|
584
669
|
const ollamaModel = ollamaModels?.find(om => om.matchedModel?.name === model.name);
|
|
585
670
|
|
|
@@ -1073,6 +1158,119 @@ function displayIntelligentRecommendations(intelligentData) {
|
|
|
1073
1158
|
console.log(chalk.red('╰'));
|
|
1074
1159
|
}
|
|
1075
1160
|
|
|
1161
|
+
function toCalibrationSourceLabel(source) {
|
|
1162
|
+
if (source === 'default-discovery') {
|
|
1163
|
+
return '~/.llm-checker/calibration-policy.{yaml,yml,json}';
|
|
1164
|
+
}
|
|
1165
|
+
return source || 'unknown';
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
function collectRecommendationModelIdentifiers(intelligentData) {
|
|
1169
|
+
const identifiers = new Set();
|
|
1170
|
+
const summary = intelligentData?.summary || {};
|
|
1171
|
+
|
|
1172
|
+
if (summary.best_overall?.identifier) {
|
|
1173
|
+
identifiers.add(summary.best_overall.identifier);
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
if (summary.by_category && typeof summary.by_category === 'object') {
|
|
1177
|
+
Object.values(summary.by_category).forEach((entry) => {
|
|
1178
|
+
if (entry?.identifier) {
|
|
1179
|
+
identifiers.add(entry.identifier);
|
|
1180
|
+
}
|
|
1181
|
+
});
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
const recommendationGroups = intelligentData?.recommendations || {};
|
|
1185
|
+
Object.values(recommendationGroups).forEach((group) => {
|
|
1186
|
+
const models = Array.isArray(group?.bestModels) ? group.bestModels : [];
|
|
1187
|
+
models.forEach((model) => {
|
|
1188
|
+
if (model?.model_identifier) {
|
|
1189
|
+
identifiers.add(model.model_identifier);
|
|
1190
|
+
}
|
|
1191
|
+
});
|
|
1192
|
+
});
|
|
1193
|
+
|
|
1194
|
+
return Array.from(identifiers);
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
function resolveCalibratedRouteDecision(calibratedPolicy, requestedTask, availableModels = []) {
|
|
1198
|
+
if (!calibratedPolicy?.policy) return null;
|
|
1199
|
+
|
|
1200
|
+
const resolvedRoute = resolveCalibrationRoute(calibratedPolicy.policy, requestedTask);
|
|
1201
|
+
if (!resolvedRoute?.route) return null;
|
|
1202
|
+
|
|
1203
|
+
const routeCandidates = getRouteModelCandidates(resolvedRoute.route);
|
|
1204
|
+
const routeSelection = selectModelFromRoute(resolvedRoute.route, availableModels);
|
|
1205
|
+
|
|
1206
|
+
const selectedModel = routeSelection?.selectedModel || routeCandidates[0] || null;
|
|
1207
|
+
|
|
1208
|
+
return {
|
|
1209
|
+
requestedTask: resolvedRoute.requestedTask,
|
|
1210
|
+
resolvedTask: resolvedRoute.resolvedTask,
|
|
1211
|
+
usedTaskFallback: Boolean(resolvedRoute.usedTaskFallback),
|
|
1212
|
+
primary: resolvedRoute.route.primary,
|
|
1213
|
+
fallbacks: Array.isArray(resolvedRoute.route.fallbacks) ? resolvedRoute.route.fallbacks : [],
|
|
1214
|
+
routeCandidates,
|
|
1215
|
+
selectedModel,
|
|
1216
|
+
matchedRouteModel: routeSelection?.matchedRouteModel || (routeCandidates[0] || null),
|
|
1217
|
+
matchedAvailableModel: Boolean(routeSelection),
|
|
1218
|
+
usedRouteFallbackModel: Boolean(routeSelection?.usedFallback)
|
|
1219
|
+
};
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
function displayCalibratedRoutingDecision(commandName, calibratedPolicy, routeDecision, warnings = []) {
|
|
1223
|
+
if (!calibratedPolicy && (!warnings || warnings.length === 0)) {
|
|
1224
|
+
return;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
console.log('\n' + chalk.bgBlue.white.bold(' CALIBRATED ROUTING '));
|
|
1228
|
+
console.log(chalk.blue('╭' + '─'.repeat(78)));
|
|
1229
|
+
console.log(chalk.blue('│') + ` Command: ${chalk.cyan(commandName)}`);
|
|
1230
|
+
|
|
1231
|
+
if (calibratedPolicy) {
|
|
1232
|
+
console.log(chalk.blue('│') + ` Policy: ${chalk.green(calibratedPolicy.policyPath)}`);
|
|
1233
|
+
console.log(chalk.blue('│') + ` Source: ${chalk.magenta(toCalibrationSourceLabel(calibratedPolicy.source))}`);
|
|
1234
|
+
} else {
|
|
1235
|
+
console.log(chalk.blue('│') + chalk.yellow(' Policy: not active (deterministic fallback)'));
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
if (routeDecision) {
|
|
1239
|
+
const requestedTask = routeDecision.requestedTask || 'general';
|
|
1240
|
+
const resolvedTask = routeDecision.resolvedTask || requestedTask;
|
|
1241
|
+
const taskDisplay = routeDecision.usedTaskFallback
|
|
1242
|
+
? `${requestedTask} → ${resolvedTask}`
|
|
1243
|
+
: requestedTask;
|
|
1244
|
+
|
|
1245
|
+
const selectedModel = routeDecision.selectedModel || routeDecision.primary || 'N/A';
|
|
1246
|
+
const selectedLabel = routeDecision.usedRouteFallbackModel
|
|
1247
|
+
? `${selectedModel} (fallback)`
|
|
1248
|
+
: selectedModel;
|
|
1249
|
+
|
|
1250
|
+
console.log(chalk.blue('│') + ` Task: ${chalk.white(taskDisplay)}`);
|
|
1251
|
+
console.log(chalk.blue('│') + ` Route primary: ${chalk.green(routeDecision.primary || 'N/A')}`);
|
|
1252
|
+
if (routeDecision.fallbacks && routeDecision.fallbacks.length > 0) {
|
|
1253
|
+
console.log(chalk.blue('│') + ` Route fallbacks: ${chalk.gray(routeDecision.fallbacks.join(', '))}`);
|
|
1254
|
+
}
|
|
1255
|
+
console.log(chalk.blue('│') + ` Selected model: ${chalk.green.bold(selectedLabel)}`);
|
|
1256
|
+
|
|
1257
|
+
if (!routeDecision.matchedAvailableModel) {
|
|
1258
|
+
console.log(
|
|
1259
|
+
chalk.blue('│') +
|
|
1260
|
+
chalk.yellow(' Route did not match local/recommended models; using route primary for visibility.')
|
|
1261
|
+
);
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
if (warnings && warnings.length > 0) {
|
|
1266
|
+
warnings.forEach((warning) => {
|
|
1267
|
+
console.log(chalk.blue('│') + chalk.yellow(` Warning: ${warning}`));
|
|
1268
|
+
});
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
console.log(chalk.blue('╰'));
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1076
1274
|
function displayModelsStats(originalCount, filteredCount, options) {
|
|
1077
1275
|
console.log('\n' + chalk.bgGreen.white.bold(' DATABASE STATS '));
|
|
1078
1276
|
console.log(chalk.green('╭' + '─'.repeat(60)));
|
|
@@ -2441,6 +2639,122 @@ auditCommand.action(() => {
|
|
|
2441
2639
|
auditCommand.outputHelp();
|
|
2442
2640
|
});
|
|
2443
2641
|
|
|
2642
|
+
program
|
|
2643
|
+
.command('calibrate')
|
|
2644
|
+
.description('Generate calibration contract artifacts from a JSONL prompt suite')
|
|
2645
|
+
.requiredOption('--suite <file>', 'Prompt suite path in JSONL format')
|
|
2646
|
+
.requiredOption(
|
|
2647
|
+
'--models <identifiers...>',
|
|
2648
|
+
'Model identifiers to include (repeat flag and/or comma-separate values)'
|
|
2649
|
+
)
|
|
2650
|
+
.requiredOption(
|
|
2651
|
+
'--output <file>',
|
|
2652
|
+
'Calibration result output path (.json, .yaml, or .yml)'
|
|
2653
|
+
)
|
|
2654
|
+
.option(
|
|
2655
|
+
'--runtime <runtime>',
|
|
2656
|
+
`Inference runtime (${SUPPORTED_RUNTIMES.join('|')})`,
|
|
2657
|
+
'ollama'
|
|
2658
|
+
)
|
|
2659
|
+
.option(
|
|
2660
|
+
'--mode <mode>',
|
|
2661
|
+
'Execution mode (dry-run|contract-only|full). Default: contract-only'
|
|
2662
|
+
)
|
|
2663
|
+
.option(
|
|
2664
|
+
'--objective <objective>',
|
|
2665
|
+
`Calibration objective (${SUPPORTED_CALIBRATION_OBJECTIVES.join('|')})`,
|
|
2666
|
+
'balanced'
|
|
2667
|
+
)
|
|
2668
|
+
.option(
|
|
2669
|
+
'--policy-out <file>',
|
|
2670
|
+
'Optional calibration policy output path (.json, .yaml, or .yml)'
|
|
2671
|
+
)
|
|
2672
|
+
.option('--warmup <count>', 'Warmup runs per prompt in full mode', '1')
|
|
2673
|
+
.option('--iterations <count>', 'Measured iterations per prompt in full mode', '2')
|
|
2674
|
+
.option('--timeout-ms <ms>', 'Per-prompt timeout in full mode', '120000')
|
|
2675
|
+
.option('--dry-run', 'Produce draft artifacts without benchmark execution')
|
|
2676
|
+
.addHelpText(
|
|
2677
|
+
'after',
|
|
2678
|
+
`
|
|
2679
|
+
Examples:
|
|
2680
|
+
$ llm-checker calibrate --suite ./prompts.jsonl --models qwen2.5-coder:7b llama3.2:3b --output ./calibration.json
|
|
2681
|
+
$ llm-checker calibrate --suite ./prompts.jsonl --models qwen2.5-coder:7b --mode full --iterations 3 --output ./calibration.json --policy-out ./routing.yaml
|
|
2682
|
+
$ llm-checker calibrate --suite ./prompts.jsonl --models qwen2.5-coder:7b,llama3.2:3b --output ./calibration.yaml --policy-out ./routing.yaml --dry-run
|
|
2683
|
+
`
|
|
2684
|
+
)
|
|
2685
|
+
.action((options) => {
|
|
2686
|
+
try {
|
|
2687
|
+
const runtime = calibrationManager.validateRuntime(options.runtime);
|
|
2688
|
+
const objective = calibrationManager.validateObjective(options.objective);
|
|
2689
|
+
const executionMode = calibrationManager.resolveExecutionMode({
|
|
2690
|
+
mode: options.mode,
|
|
2691
|
+
dryRun: Boolean(options.dryRun)
|
|
2692
|
+
});
|
|
2693
|
+
const models = calibrationManager.parseModelIdentifiers(options.models);
|
|
2694
|
+
const suite = calibrationManager.parsePromptSuite(options.suite);
|
|
2695
|
+
|
|
2696
|
+
let calibrationResult = null;
|
|
2697
|
+
if (executionMode === 'full') {
|
|
2698
|
+
calibrationResult = calibrationManager.runFullCalibration({
|
|
2699
|
+
models,
|
|
2700
|
+
suite,
|
|
2701
|
+
runtime,
|
|
2702
|
+
objective,
|
|
2703
|
+
benchmarkConfig: {
|
|
2704
|
+
warmupRuns: Number.parseInt(options.warmup, 10),
|
|
2705
|
+
measuredIterations: Number.parseInt(options.iterations, 10),
|
|
2706
|
+
timeoutMs: Number.parseInt(options.timeoutMs, 10)
|
|
2707
|
+
}
|
|
2708
|
+
});
|
|
2709
|
+
} else {
|
|
2710
|
+
calibrationResult = calibrationManager.buildDraftCalibrationResult({
|
|
2711
|
+
models,
|
|
2712
|
+
suiteMetadata: suite.metadata,
|
|
2713
|
+
runtime,
|
|
2714
|
+
objective,
|
|
2715
|
+
executionMode
|
|
2716
|
+
});
|
|
2717
|
+
}
|
|
2718
|
+
|
|
2719
|
+
const resultPath = calibrationManager.writeArtifact(options.output, calibrationResult);
|
|
2720
|
+
|
|
2721
|
+
let policyPath = null;
|
|
2722
|
+
if (options.policyOut) {
|
|
2723
|
+
const calibrationPolicy = calibrationManager.buildDraftCalibrationPolicy({
|
|
2724
|
+
calibrationResult,
|
|
2725
|
+
calibrationResultPath: resultPath
|
|
2726
|
+
});
|
|
2727
|
+
policyPath = calibrationManager.writeArtifact(options.policyOut, calibrationPolicy);
|
|
2728
|
+
}
|
|
2729
|
+
|
|
2730
|
+
console.log('\n' + chalk.bgBlue.white.bold(' CALIBRATION ARTIFACTS GENERATED '));
|
|
2731
|
+
console.log(chalk.blue('╭' + '─'.repeat(72)));
|
|
2732
|
+
console.log(chalk.blue('│') + ` Suite: ${chalk.white(suite.path)}`);
|
|
2733
|
+
console.log(chalk.blue('│') + ` Runtime: ${chalk.cyan(runtime)} | Objective: ${chalk.cyan(objective)}`);
|
|
2734
|
+
console.log(chalk.blue('│') + ` Models: ${chalk.white(String(models.length))}`);
|
|
2735
|
+
console.log(chalk.blue('│') + ` Execution mode: ${chalk.yellow(executionMode)}`);
|
|
2736
|
+
if (executionMode === 'full') {
|
|
2737
|
+
console.log(
|
|
2738
|
+
chalk.blue('│') +
|
|
2739
|
+
` Successful: ${chalk.green(
|
|
2740
|
+
String(calibrationResult.summary.successful_models)
|
|
2741
|
+
)} | Failed: ${chalk.red(String(calibrationResult.summary.failed_models))}`
|
|
2742
|
+
);
|
|
2743
|
+
}
|
|
2744
|
+
console.log(chalk.blue('│') + ` Result: ${chalk.green(resultPath)}`);
|
|
2745
|
+
if (policyPath) {
|
|
2746
|
+
console.log(chalk.blue('│') + ` Policy: ${chalk.green(policyPath)}`);
|
|
2747
|
+
}
|
|
2748
|
+
console.log(chalk.blue('╰' + '─'.repeat(72)));
|
|
2749
|
+
} catch (error) {
|
|
2750
|
+
console.error(chalk.red(`Calibration failed: ${error.message}`));
|
|
2751
|
+
if (process.env.DEBUG) {
|
|
2752
|
+
console.error(error.stack);
|
|
2753
|
+
}
|
|
2754
|
+
process.exit(1);
|
|
2755
|
+
}
|
|
2756
|
+
});
|
|
2757
|
+
|
|
2444
2758
|
program
|
|
2445
2759
|
.command('check')
|
|
2446
2760
|
.description('Analyze your system and show compatible LLM models')
|
|
@@ -2802,6 +3116,145 @@ program
|
|
|
2802
3116
|
}
|
|
2803
3117
|
});
|
|
2804
3118
|
|
|
3119
|
+
program
|
|
3120
|
+
.command('ollama-plan')
|
|
3121
|
+
.description('Plan safe Ollama runtime settings for selected local models')
|
|
3122
|
+
.option('--models <models...>', 'Model tags/families to include (default: all local models)')
|
|
3123
|
+
.option('--ctx <tokens>', 'Target context window in tokens', '8192')
|
|
3124
|
+
.option('--concurrency <n>', 'Target parallel request count', '2')
|
|
3125
|
+
.option('--objective <mode>', 'Optimization objective (latency|balanced|throughput)', 'balanced')
|
|
3126
|
+
.option('--reserve-gb <gb>', 'Memory reserve for OS and background workloads', '2')
|
|
3127
|
+
.option('--json', 'Output plan as JSON')
|
|
3128
|
+
.action(async (options) => {
|
|
3129
|
+
const spinner = options.json ? null : ora('Building Ollama capacity plan...').start();
|
|
3130
|
+
|
|
3131
|
+
try {
|
|
3132
|
+
const requestedObjective = String(options.objective || 'balanced').toLowerCase();
|
|
3133
|
+
const supportedObjectives = new Set(['latency', 'balanced', 'throughput']);
|
|
3134
|
+
if (!supportedObjectives.has(requestedObjective)) {
|
|
3135
|
+
throw new Error(`Invalid objective "${options.objective}". Use latency, balanced, or throughput.`);
|
|
3136
|
+
}
|
|
3137
|
+
|
|
3138
|
+
const targetContext = parsePositiveIntegerOption(options.ctx, '--ctx');
|
|
3139
|
+
const targetConcurrency = parsePositiveIntegerOption(options.concurrency, '--concurrency');
|
|
3140
|
+
const reserveGB = parseNonNegativeNumberOption(options.reserveGb, '--reserve-gb');
|
|
3141
|
+
|
|
3142
|
+
const OllamaClient = require('../src/ollama/client');
|
|
3143
|
+
const UnifiedDetector = require('../src/hardware/unified-detector');
|
|
3144
|
+
const OllamaCapacityPlanner = require('../src/ollama/capacity-planner');
|
|
3145
|
+
|
|
3146
|
+
const ollamaClient = new OllamaClient();
|
|
3147
|
+
const availability = await ollamaClient.checkOllamaAvailability();
|
|
3148
|
+
if (!availability.available) {
|
|
3149
|
+
throw new Error(availability.error || 'Ollama is not available');
|
|
3150
|
+
}
|
|
3151
|
+
|
|
3152
|
+
const localModels = await ollamaClient.getLocalModels();
|
|
3153
|
+
if (!localModels || localModels.length === 0) {
|
|
3154
|
+
throw new Error('No local Ollama models found. Install one with: ollama pull llama3.2:3b');
|
|
3155
|
+
}
|
|
3156
|
+
|
|
3157
|
+
const { selected, missing } = selectModelsForPlan(localModels, options.models || []);
|
|
3158
|
+
if (selected.length === 0) {
|
|
3159
|
+
throw new Error(
|
|
3160
|
+
`No matching local models found for: ${(options.models || []).join(', ')}`
|
|
3161
|
+
);
|
|
3162
|
+
}
|
|
3163
|
+
|
|
3164
|
+
const detector = new UnifiedDetector();
|
|
3165
|
+
const hardware = await detector.detect();
|
|
3166
|
+
const planner = new OllamaCapacityPlanner();
|
|
3167
|
+
|
|
3168
|
+
const plan = planner.plan({
|
|
3169
|
+
hardware,
|
|
3170
|
+
models: selected,
|
|
3171
|
+
targetContext,
|
|
3172
|
+
targetConcurrency,
|
|
3173
|
+
objective: requestedObjective,
|
|
3174
|
+
reserveGB
|
|
3175
|
+
});
|
|
3176
|
+
|
|
3177
|
+
if (options.json) {
|
|
3178
|
+
console.log(JSON.stringify({
|
|
3179
|
+
generated_at: new Date().toISOString(),
|
|
3180
|
+
selection: {
|
|
3181
|
+
requested: options.models || [],
|
|
3182
|
+
selected: selected.map((model) => model.name),
|
|
3183
|
+
missing
|
|
3184
|
+
},
|
|
3185
|
+
plan
|
|
3186
|
+
}, null, 2));
|
|
3187
|
+
return;
|
|
3188
|
+
}
|
|
3189
|
+
|
|
3190
|
+
if (spinner) spinner.succeed('Capacity plan generated');
|
|
3191
|
+
|
|
3192
|
+
console.log('\n' + chalk.bgBlue.white.bold(' OLLAMA CAPACITY PLAN '));
|
|
3193
|
+
console.log(
|
|
3194
|
+
chalk.blue('Hardware:'),
|
|
3195
|
+
`${plan.hardware.backendName} (${plan.hardware.backend})`
|
|
3196
|
+
);
|
|
3197
|
+
console.log(
|
|
3198
|
+
chalk.blue('Memory budget:'),
|
|
3199
|
+
`${plan.memory.budgetGB}GB usable (reserve ${plan.hardware.reserveGB}GB)`
|
|
3200
|
+
);
|
|
3201
|
+
|
|
3202
|
+
if (missing.length > 0) {
|
|
3203
|
+
console.log(
|
|
3204
|
+
chalk.yellow('Missing model filters:'),
|
|
3205
|
+
missing.join(', ')
|
|
3206
|
+
);
|
|
3207
|
+
}
|
|
3208
|
+
|
|
3209
|
+
console.log(chalk.blue.bold('\nSelected models:'));
|
|
3210
|
+
for (const model of plan.models) {
|
|
3211
|
+
console.log(
|
|
3212
|
+
` - ${model.name} (${model.size}, ~${model.estimatedBaseMemoryGB}GB base)`
|
|
3213
|
+
);
|
|
3214
|
+
}
|
|
3215
|
+
|
|
3216
|
+
console.log(chalk.blue.bold('\nRecommended envelope:'));
|
|
3217
|
+
console.log(
|
|
3218
|
+
` Context: ${plan.envelope.context.recommended} (requested ${plan.envelope.context.requested})`
|
|
3219
|
+
);
|
|
3220
|
+
console.log(
|
|
3221
|
+
` Parallel: ${plan.envelope.parallel.recommended} (requested ${plan.envelope.parallel.requested})`
|
|
3222
|
+
);
|
|
3223
|
+
console.log(
|
|
3224
|
+
` Loaded models: ${plan.envelope.loaded_models.recommended} (requested ${plan.envelope.loaded_models.requested})`
|
|
3225
|
+
);
|
|
3226
|
+
console.log(
|
|
3227
|
+
` Estimated memory: ${plan.memory.recommendedEstimatedGB}GB / ${plan.memory.budgetGB}GB (${plan.memory.utilizationPercent}%)`
|
|
3228
|
+
);
|
|
3229
|
+
console.log(` Risk: ${plan.risk.level.toUpperCase()} (${plan.risk.score}/100)`);
|
|
3230
|
+
|
|
3231
|
+
if (plan.notes.length > 0) {
|
|
3232
|
+
console.log(chalk.blue.bold('\nNotes:'));
|
|
3233
|
+
for (const note of plan.notes) {
|
|
3234
|
+
console.log(` - ${note}`);
|
|
3235
|
+
}
|
|
3236
|
+
}
|
|
3237
|
+
|
|
3238
|
+
console.log(chalk.blue.bold('\nRecommended env vars:'));
|
|
3239
|
+
for (const [key, value] of Object.entries(plan.shell.env)) {
|
|
3240
|
+
console.log(` export ${key}=${value}`);
|
|
3241
|
+
}
|
|
3242
|
+
|
|
3243
|
+
console.log(chalk.blue.bold('\nFallback profile:'));
|
|
3244
|
+
console.log(
|
|
3245
|
+
` OLLAMA_NUM_CTX=${plan.fallback.num_ctx} OLLAMA_NUM_PARALLEL=${plan.fallback.num_parallel} OLLAMA_MAX_LOADED_MODELS=${plan.fallback.max_loaded_models}`
|
|
3246
|
+
);
|
|
3247
|
+
console.log('');
|
|
3248
|
+
} catch (error) {
|
|
3249
|
+
if (spinner) spinner.fail('Failed to build capacity plan');
|
|
3250
|
+
console.error(chalk.red('Error:'), error.message);
|
|
3251
|
+
if (process.env.DEBUG) {
|
|
3252
|
+
console.error(error.stack);
|
|
3253
|
+
}
|
|
3254
|
+
process.exit(1);
|
|
3255
|
+
}
|
|
3256
|
+
});
|
|
3257
|
+
|
|
2805
3258
|
program
|
|
2806
3259
|
.command('recommend')
|
|
2807
3260
|
.description('Get intelligent model recommendations for your hardware')
|
|
@@ -2809,6 +3262,10 @@ program
|
|
|
2809
3262
|
.option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
|
|
2810
3263
|
.option('--no-verbose', 'Disable step-by-step progress display')
|
|
2811
3264
|
.option('--policy <file>', 'Evaluate recommendations against a policy file')
|
|
3265
|
+
.option(
|
|
3266
|
+
'--calibrated [file]',
|
|
3267
|
+
'Use calibrated routing policy (optional file path; defaults to ~/.llm-checker/calibration-policy.{yaml,yml,json})'
|
|
3268
|
+
)
|
|
2812
3269
|
.addHelpText(
|
|
2813
3270
|
'after',
|
|
2814
3271
|
`
|
|
@@ -2816,6 +3273,11 @@ Enterprise policy examples:
|
|
|
2816
3273
|
$ llm-checker recommend --policy ./policy.yaml
|
|
2817
3274
|
$ llm-checker recommend --policy ./policy.yaml --category coding
|
|
2818
3275
|
$ llm-checker recommend --policy ./policy.yaml --no-verbose
|
|
3276
|
+
|
|
3277
|
+
Calibrated routing examples:
|
|
3278
|
+
$ llm-checker recommend --calibrated --category coding
|
|
3279
|
+
$ llm-checker recommend --calibrated ./calibration-policy.yaml --category reasoning
|
|
3280
|
+
$ llm-checker recommend --policy ./calibration-policy.yaml --category coding
|
|
2819
3281
|
`
|
|
2820
3282
|
)
|
|
2821
3283
|
.action(async (options) => {
|
|
@@ -2823,7 +3285,13 @@ Enterprise policy examples:
|
|
|
2823
3285
|
try {
|
|
2824
3286
|
const verboseEnabled = options.verbose !== false;
|
|
2825
3287
|
const checker = new (getLLMChecker())({ verbose: verboseEnabled });
|
|
2826
|
-
const
|
|
3288
|
+
const routingPreference = resolveRoutingPolicyPreference({
|
|
3289
|
+
policyOption: options.policy,
|
|
3290
|
+
calibratedOption: options.calibrated,
|
|
3291
|
+
loadEnterprisePolicy: loadPolicyConfiguration
|
|
3292
|
+
});
|
|
3293
|
+
const policyConfig = routingPreference.enterprisePolicy;
|
|
3294
|
+
const calibratedPolicy = routingPreference.calibratedPolicy;
|
|
2827
3295
|
|
|
2828
3296
|
if (!verboseEnabled) {
|
|
2829
3297
|
process.stdout.write(chalk.gray('Generating recommendations...'));
|
|
@@ -2860,11 +3328,18 @@ Enterprise policy examples:
|
|
|
2860
3328
|
policyEnforcement = resolvePolicyEnforcement(policyConfig.policy, policyEvaluation);
|
|
2861
3329
|
}
|
|
2862
3330
|
|
|
3331
|
+
const routingTask = normalizeTaskName(options.category || 'general');
|
|
3332
|
+
const recommendationIdentifiers = collectRecommendationModelIdentifiers(intelligentRecommendations);
|
|
3333
|
+
const routeDecision = calibratedPolicy
|
|
3334
|
+
? resolveCalibratedRouteDecision(calibratedPolicy, routingTask, recommendationIdentifiers)
|
|
3335
|
+
: null;
|
|
3336
|
+
|
|
2863
3337
|
// Mostrar información del sistema
|
|
2864
3338
|
displaySystemInfo(hardware, { summary: { hardwareTier: intelligentRecommendations.summary.hardware_tier } });
|
|
2865
3339
|
|
|
2866
3340
|
// Mostrar recomendaciones
|
|
2867
3341
|
displayIntelligentRecommendations(intelligentRecommendations);
|
|
3342
|
+
displayCalibratedRoutingDecision('recommend', calibratedPolicy, routeDecision, routingPreference.warnings);
|
|
2868
3343
|
|
|
2869
3344
|
if (policyConfig && policyEvaluation && policyEnforcement) {
|
|
2870
3345
|
displayPolicySummary('recommend', policyConfig, policyEvaluation, policyEnforcement);
|
|
@@ -3124,7 +3599,13 @@ program
|
|
|
3124
3599
|
.command('ai-run')
|
|
3125
3600
|
.description('AI-powered model selection and execution')
|
|
3126
3601
|
.option('-m, --models <models...>', 'Specific models to choose from')
|
|
3602
|
+
.option('-c, --category <category>', 'Task category hint (coding, reasoning, multimodal, general, etc.)')
|
|
3127
3603
|
.option('--prompt <prompt>', 'Prompt to run with selected model')
|
|
3604
|
+
.option('--policy <file>', 'Explicit calibrated routing policy file (takes precedence over --calibrated)')
|
|
3605
|
+
.option(
|
|
3606
|
+
'--calibrated [file]',
|
|
3607
|
+
'Enable calibrated routing policy (optional file path; defaults to ~/.llm-checker/calibration-policy.{yaml,yml,json})'
|
|
3608
|
+
)
|
|
3128
3609
|
.action(async (options) => {
|
|
3129
3610
|
showAsciiArt('ai-run');
|
|
3130
3611
|
// Check if Ollama is installed first
|
|
@@ -3138,6 +3619,11 @@ program
|
|
|
3138
3619
|
const aiSelector = new AIModelSelector();
|
|
3139
3620
|
const checker = new (getLLMChecker())();
|
|
3140
3621
|
const systemInfo = await checker.getSystemInfo();
|
|
3622
|
+
const routingPreference = resolveRoutingPolicyPreference({
|
|
3623
|
+
policyOption: options.policy,
|
|
3624
|
+
calibratedOption: options.calibrated
|
|
3625
|
+
});
|
|
3626
|
+
const calibratedPolicy = routingPreference.calibratedPolicy;
|
|
3141
3627
|
|
|
3142
3628
|
// Get available models or use provided ones
|
|
3143
3629
|
let candidateModels = options.models;
|
|
@@ -3165,6 +3651,10 @@ program
|
|
|
3165
3651
|
return;
|
|
3166
3652
|
}
|
|
3167
3653
|
}
|
|
3654
|
+
|
|
3655
|
+
candidateModels = Array.isArray(candidateModels)
|
|
3656
|
+
? candidateModels.filter((model) => typeof model === 'string' && model.trim().length > 0)
|
|
3657
|
+
: [];
|
|
3168
3658
|
|
|
3169
3659
|
// AI selection
|
|
3170
3660
|
const systemSpecs = {
|
|
@@ -3175,10 +3665,33 @@ program
|
|
|
3175
3665
|
gpu_model_normalized: systemInfo.gpu?.model ||
|
|
3176
3666
|
(systemInfo.cpu?.manufacturer === 'Apple' ? 'apple_silicon' : 'cpu_only')
|
|
3177
3667
|
};
|
|
3178
|
-
|
|
3179
|
-
const
|
|
3668
|
+
|
|
3669
|
+
const taskHint = normalizeTaskName(options.category || inferTaskFromPrompt(options.prompt));
|
|
3670
|
+
const routeDecision = calibratedPolicy
|
|
3671
|
+
? resolveCalibratedRouteDecision(calibratedPolicy, taskHint, candidateModels)
|
|
3672
|
+
: null;
|
|
3673
|
+
|
|
3674
|
+
let result;
|
|
3675
|
+
if (routeDecision && routeDecision.matchedAvailableModel && routeDecision.selectedModel) {
|
|
3676
|
+
result = {
|
|
3677
|
+
bestModel: routeDecision.selectedModel,
|
|
3678
|
+
confidence: routeDecision.usedRouteFallbackModel ? 0.82 : 0.94,
|
|
3679
|
+
method: 'calibrated-policy-route',
|
|
3680
|
+
reasoning: `Selected from calibrated policy route for ${routeDecision.resolvedTask}`
|
|
3681
|
+
};
|
|
3682
|
+
} else {
|
|
3683
|
+
if (routeDecision && routeDecision.routeCandidates.length > 0) {
|
|
3684
|
+
routingPreference.warnings.push(
|
|
3685
|
+
`Calibrated route candidates (${routeDecision.routeCandidates.join(
|
|
3686
|
+
', '
|
|
3687
|
+
)}) are not installed locally. Falling back to AI selector.`
|
|
3688
|
+
);
|
|
3689
|
+
}
|
|
3690
|
+
result = await aiSelector.selectBestModel(candidateModels, systemSpecs, taskHint);
|
|
3691
|
+
}
|
|
3180
3692
|
|
|
3181
3693
|
spinner.succeed(`Selected ${chalk.green.bold(result.bestModel)} (${result.method}, ${Math.round(result.confidence * 100)}% confidence)`);
|
|
3694
|
+
displayCalibratedRoutingDecision('ai-run', calibratedPolicy, routeDecision, routingPreference.warnings);
|
|
3182
3695
|
|
|
3183
3696
|
// Execute the selected model
|
|
3184
3697
|
console.log(chalk.magenta.bold(`\nLaunching ${result.bestModel}...`));
|
package/package.json
CHANGED