llm-checker 3.2.8 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,16 @@ const {
23
23
  getRuntimeDisplayName,
24
24
  getRuntimeCommandSet
25
25
  } = require('../src/runtime/runtime-support');
26
+ const { CalibrationManager } = require('../src/calibration/calibration-manager');
27
+ const { SUPPORTED_CALIBRATION_OBJECTIVES } = require('../src/calibration/schemas');
28
+ const {
29
+ resolveRoutingPolicyPreference,
30
+ normalizeTaskName,
31
+ inferTaskFromPrompt,
32
+ resolveCalibrationRoute,
33
+ getRouteModelCandidates,
34
+ selectModelFromRoute
35
+ } = require('../src/calibration/policy-routing');
26
36
  const SpeculativeDecodingEstimator = require('../src/models/speculative-decoding-estimator');
27
37
  const PolicyManager = require('../src/policy/policy-manager');
28
38
  const PolicyEngine = require('../src/policy/policy-engine');
@@ -38,6 +48,7 @@ const {
38
48
  serializeComplianceReport
39
49
  } = require('../src/policy/audit-reporter');
40
50
  const policyManager = new PolicyManager();
51
+ const calibrationManager = new CalibrationManager();
41
52
 
42
53
  // ASCII Art for each command - Large text banners
43
54
  const ASCII_ART = {
@@ -580,6 +591,80 @@ async function checkOllamaAndExit() {
580
591
  }
581
592
  }
582
593
 
594
+ function parsePositiveIntegerOption(rawValue, optionName) {
595
+ const parsed = Number(rawValue);
596
+ if (!Number.isFinite(parsed) || parsed <= 0) {
597
+ throw new Error(`Invalid ${optionName}: ${rawValue}`);
598
+ }
599
+ return Math.round(parsed);
600
+ }
601
+
602
+ function parseNonNegativeNumberOption(rawValue, optionName) {
603
+ const parsed = Number(rawValue);
604
+ if (!Number.isFinite(parsed) || parsed < 0) {
605
+ throw new Error(`Invalid ${optionName}: ${rawValue}`);
606
+ }
607
+ return parsed;
608
+ }
609
+
610
+ function selectModelsForPlan(installedModels, requestedModels = []) {
611
+ const requested = Array.isArray(requestedModels)
612
+ ? requestedModels.map((model) => String(model || '').trim()).filter(Boolean)
613
+ : [];
614
+
615
+ if (!requested.length) {
616
+ return {
617
+ selected: installedModels.slice(),
618
+ missing: []
619
+ };
620
+ }
621
+
622
+ const selected = [];
623
+ const missing = [];
624
+ const seen = new Set();
625
+
626
+ for (const request of requested) {
627
+ const normalized = request.toLowerCase();
628
+
629
+ let match = installedModels.find(
630
+ (model) => String(model.name || '').toLowerCase() === normalized
631
+ );
632
+
633
+ if (!match) {
634
+ match = installedModels.find((model) =>
635
+ String(model.name || '').toLowerCase().startsWith(`${normalized}:`)
636
+ );
637
+ }
638
+
639
+ if (!match) {
640
+ match = installedModels.find(
641
+ (model) => String(model.family || '').toLowerCase() === normalized
642
+ );
643
+ }
644
+
645
+ if (!match) {
646
+ match = installedModels.find((model) =>
647
+ String(model.name || '').toLowerCase().includes(normalized)
648
+ );
649
+ }
650
+
651
+ if (!match) {
652
+ missing.push(request);
653
+ continue;
654
+ }
655
+
656
+ if (!seen.has(match.name)) {
657
+ selected.push(match);
658
+ seen.add(match.name);
659
+ }
660
+ }
661
+
662
+ return {
663
+ selected,
664
+ missing
665
+ };
666
+ }
667
+
583
668
  function getStatusIcon(model, ollamaModels) {
584
669
  const ollamaModel = ollamaModels?.find(om => om.matchedModel?.name === model.name);
585
670
 
@@ -1073,6 +1158,119 @@ function displayIntelligentRecommendations(intelligentData) {
1073
1158
  console.log(chalk.red('╰'));
1074
1159
  }
1075
1160
 
1161
+ function toCalibrationSourceLabel(source) {
1162
+ if (source === 'default-discovery') {
1163
+ return '~/.llm-checker/calibration-policy.{yaml,yml,json}';
1164
+ }
1165
+ return source || 'unknown';
1166
+ }
1167
+
1168
+ function collectRecommendationModelIdentifiers(intelligentData) {
1169
+ const identifiers = new Set();
1170
+ const summary = intelligentData?.summary || {};
1171
+
1172
+ if (summary.best_overall?.identifier) {
1173
+ identifiers.add(summary.best_overall.identifier);
1174
+ }
1175
+
1176
+ if (summary.by_category && typeof summary.by_category === 'object') {
1177
+ Object.values(summary.by_category).forEach((entry) => {
1178
+ if (entry?.identifier) {
1179
+ identifiers.add(entry.identifier);
1180
+ }
1181
+ });
1182
+ }
1183
+
1184
+ const recommendationGroups = intelligentData?.recommendations || {};
1185
+ Object.values(recommendationGroups).forEach((group) => {
1186
+ const models = Array.isArray(group?.bestModels) ? group.bestModels : [];
1187
+ models.forEach((model) => {
1188
+ if (model?.model_identifier) {
1189
+ identifiers.add(model.model_identifier);
1190
+ }
1191
+ });
1192
+ });
1193
+
1194
+ return Array.from(identifiers);
1195
+ }
1196
+
1197
+ function resolveCalibratedRouteDecision(calibratedPolicy, requestedTask, availableModels = []) {
1198
+ if (!calibratedPolicy?.policy) return null;
1199
+
1200
+ const resolvedRoute = resolveCalibrationRoute(calibratedPolicy.policy, requestedTask);
1201
+ if (!resolvedRoute?.route) return null;
1202
+
1203
+ const routeCandidates = getRouteModelCandidates(resolvedRoute.route);
1204
+ const routeSelection = selectModelFromRoute(resolvedRoute.route, availableModels);
1205
+
1206
+ const selectedModel = routeSelection?.selectedModel || routeCandidates[0] || null;
1207
+
1208
+ return {
1209
+ requestedTask: resolvedRoute.requestedTask,
1210
+ resolvedTask: resolvedRoute.resolvedTask,
1211
+ usedTaskFallback: Boolean(resolvedRoute.usedTaskFallback),
1212
+ primary: resolvedRoute.route.primary,
1213
+ fallbacks: Array.isArray(resolvedRoute.route.fallbacks) ? resolvedRoute.route.fallbacks : [],
1214
+ routeCandidates,
1215
+ selectedModel,
1216
+ matchedRouteModel: routeSelection?.matchedRouteModel || (routeCandidates[0] || null),
1217
+ matchedAvailableModel: Boolean(routeSelection),
1218
+ usedRouteFallbackModel: Boolean(routeSelection?.usedFallback)
1219
+ };
1220
+ }
1221
+
1222
+ function displayCalibratedRoutingDecision(commandName, calibratedPolicy, routeDecision, warnings = []) {
1223
+ if (!calibratedPolicy && (!warnings || warnings.length === 0)) {
1224
+ return;
1225
+ }
1226
+
1227
+ console.log('\n' + chalk.bgBlue.white.bold(' CALIBRATED ROUTING '));
1228
+ console.log(chalk.blue('╭' + '─'.repeat(78)));
1229
+ console.log(chalk.blue('│') + ` Command: ${chalk.cyan(commandName)}`);
1230
+
1231
+ if (calibratedPolicy) {
1232
+ console.log(chalk.blue('│') + ` Policy: ${chalk.green(calibratedPolicy.policyPath)}`);
1233
+ console.log(chalk.blue('│') + ` Source: ${chalk.magenta(toCalibrationSourceLabel(calibratedPolicy.source))}`);
1234
+ } else {
1235
+ console.log(chalk.blue('│') + chalk.yellow(' Policy: not active (deterministic fallback)'));
1236
+ }
1237
+
1238
+ if (routeDecision) {
1239
+ const requestedTask = routeDecision.requestedTask || 'general';
1240
+ const resolvedTask = routeDecision.resolvedTask || requestedTask;
1241
+ const taskDisplay = routeDecision.usedTaskFallback
1242
+ ? `${requestedTask} → ${resolvedTask}`
1243
+ : requestedTask;
1244
+
1245
+ const selectedModel = routeDecision.selectedModel || routeDecision.primary || 'N/A';
1246
+ const selectedLabel = routeDecision.usedRouteFallbackModel
1247
+ ? `${selectedModel} (fallback)`
1248
+ : selectedModel;
1249
+
1250
+ console.log(chalk.blue('│') + ` Task: ${chalk.white(taskDisplay)}`);
1251
+ console.log(chalk.blue('│') + ` Route primary: ${chalk.green(routeDecision.primary || 'N/A')}`);
1252
+ if (routeDecision.fallbacks && routeDecision.fallbacks.length > 0) {
1253
+ console.log(chalk.blue('│') + ` Route fallbacks: ${chalk.gray(routeDecision.fallbacks.join(', '))}`);
1254
+ }
1255
+ console.log(chalk.blue('│') + ` Selected model: ${chalk.green.bold(selectedLabel)}`);
1256
+
1257
+ if (!routeDecision.matchedAvailableModel) {
1258
+ console.log(
1259
+ chalk.blue('│') +
1260
+ chalk.yellow(' Route did not match local/recommended models; using route primary for visibility.')
1261
+ );
1262
+ }
1263
+ }
1264
+
1265
+ if (warnings && warnings.length > 0) {
1266
+ warnings.forEach((warning) => {
1267
+ console.log(chalk.blue('│') + chalk.yellow(` Warning: ${warning}`));
1268
+ });
1269
+ }
1270
+
1271
+ console.log(chalk.blue('╰'));
1272
+ }
1273
+
1076
1274
  function displayModelsStats(originalCount, filteredCount, options) {
1077
1275
  console.log('\n' + chalk.bgGreen.white.bold(' DATABASE STATS '));
1078
1276
  console.log(chalk.green('╭' + '─'.repeat(60)));
@@ -2441,6 +2639,122 @@ auditCommand.action(() => {
2441
2639
  auditCommand.outputHelp();
2442
2640
  });
2443
2641
 
2642
+ program
2643
+ .command('calibrate')
2644
+ .description('Generate calibration contract artifacts from a JSONL prompt suite')
2645
+ .requiredOption('--suite <file>', 'Prompt suite path in JSONL format')
2646
+ .requiredOption(
2647
+ '--models <identifiers...>',
2648
+ 'Model identifiers to include (repeat flag and/or comma-separate values)'
2649
+ )
2650
+ .requiredOption(
2651
+ '--output <file>',
2652
+ 'Calibration result output path (.json, .yaml, or .yml)'
2653
+ )
2654
+ .option(
2655
+ '--runtime <runtime>',
2656
+ `Inference runtime (${SUPPORTED_RUNTIMES.join('|')})`,
2657
+ 'ollama'
2658
+ )
2659
+ .option(
2660
+ '--mode <mode>',
2661
+ 'Execution mode (dry-run|contract-only|full). Default: contract-only'
2662
+ )
2663
+ .option(
2664
+ '--objective <objective>',
2665
+ `Calibration objective (${SUPPORTED_CALIBRATION_OBJECTIVES.join('|')})`,
2666
+ 'balanced'
2667
+ )
2668
+ .option(
2669
+ '--policy-out <file>',
2670
+ 'Optional calibration policy output path (.json, .yaml, or .yml)'
2671
+ )
2672
+ .option('--warmup <count>', 'Warmup runs per prompt in full mode', '1')
2673
+ .option('--iterations <count>', 'Measured iterations per prompt in full mode', '2')
2674
+ .option('--timeout-ms <ms>', 'Per-prompt timeout in full mode', '120000')
2675
+ .option('--dry-run', 'Produce draft artifacts without benchmark execution')
2676
+ .addHelpText(
2677
+ 'after',
2678
+ `
2679
+ Examples:
2680
+ $ llm-checker calibrate --suite ./prompts.jsonl --models qwen2.5-coder:7b llama3.2:3b --output ./calibration.json
2681
+ $ llm-checker calibrate --suite ./prompts.jsonl --models qwen2.5-coder:7b --mode full --iterations 3 --output ./calibration.json --policy-out ./routing.yaml
2682
+ $ llm-checker calibrate --suite ./prompts.jsonl --models qwen2.5-coder:7b,llama3.2:3b --output ./calibration.yaml --policy-out ./routing.yaml --dry-run
2683
+ `
2684
+ )
2685
+ .action((options) => {
2686
+ try {
2687
+ const runtime = calibrationManager.validateRuntime(options.runtime);
2688
+ const objective = calibrationManager.validateObjective(options.objective);
2689
+ const executionMode = calibrationManager.resolveExecutionMode({
2690
+ mode: options.mode,
2691
+ dryRun: Boolean(options.dryRun)
2692
+ });
2693
+ const models = calibrationManager.parseModelIdentifiers(options.models);
2694
+ const suite = calibrationManager.parsePromptSuite(options.suite);
2695
+
2696
+ let calibrationResult = null;
2697
+ if (executionMode === 'full') {
2698
+ calibrationResult = calibrationManager.runFullCalibration({
2699
+ models,
2700
+ suite,
2701
+ runtime,
2702
+ objective,
2703
+ benchmarkConfig: {
2704
+ warmupRuns: Number.parseInt(options.warmup, 10),
2705
+ measuredIterations: Number.parseInt(options.iterations, 10),
2706
+ timeoutMs: Number.parseInt(options.timeoutMs, 10)
2707
+ }
2708
+ });
2709
+ } else {
2710
+ calibrationResult = calibrationManager.buildDraftCalibrationResult({
2711
+ models,
2712
+ suiteMetadata: suite.metadata,
2713
+ runtime,
2714
+ objective,
2715
+ executionMode
2716
+ });
2717
+ }
2718
+
2719
+ const resultPath = calibrationManager.writeArtifact(options.output, calibrationResult);
2720
+
2721
+ let policyPath = null;
2722
+ if (options.policyOut) {
2723
+ const calibrationPolicy = calibrationManager.buildDraftCalibrationPolicy({
2724
+ calibrationResult,
2725
+ calibrationResultPath: resultPath
2726
+ });
2727
+ policyPath = calibrationManager.writeArtifact(options.policyOut, calibrationPolicy);
2728
+ }
2729
+
2730
+ console.log('\n' + chalk.bgBlue.white.bold(' CALIBRATION ARTIFACTS GENERATED '));
2731
+ console.log(chalk.blue('╭' + '─'.repeat(72)));
2732
+ console.log(chalk.blue('│') + ` Suite: ${chalk.white(suite.path)}`);
2733
+ console.log(chalk.blue('│') + ` Runtime: ${chalk.cyan(runtime)} | Objective: ${chalk.cyan(objective)}`);
2734
+ console.log(chalk.blue('│') + ` Models: ${chalk.white(String(models.length))}`);
2735
+ console.log(chalk.blue('│') + ` Execution mode: ${chalk.yellow(executionMode)}`);
2736
+ if (executionMode === 'full') {
2737
+ console.log(
2738
+ chalk.blue('│') +
2739
+ ` Successful: ${chalk.green(
2740
+ String(calibrationResult.summary.successful_models)
2741
+ )} | Failed: ${chalk.red(String(calibrationResult.summary.failed_models))}`
2742
+ );
2743
+ }
2744
+ console.log(chalk.blue('│') + ` Result: ${chalk.green(resultPath)}`);
2745
+ if (policyPath) {
2746
+ console.log(chalk.blue('│') + ` Policy: ${chalk.green(policyPath)}`);
2747
+ }
2748
+ console.log(chalk.blue('╰' + '─'.repeat(72)));
2749
+ } catch (error) {
2750
+ console.error(chalk.red(`Calibration failed: ${error.message}`));
2751
+ if (process.env.DEBUG) {
2752
+ console.error(error.stack);
2753
+ }
2754
+ process.exit(1);
2755
+ }
2756
+ });
2757
+
2444
2758
  program
2445
2759
  .command('check')
2446
2760
  .description('Analyze your system and show compatible LLM models')
@@ -2802,6 +3116,145 @@ program
2802
3116
  }
2803
3117
  });
2804
3118
 
3119
+ program
3120
+ .command('ollama-plan')
3121
+ .description('Plan safe Ollama runtime settings for selected local models')
3122
+ .option('--models <models...>', 'Model tags/families to include (default: all local models)')
3123
+ .option('--ctx <tokens>', 'Target context window in tokens', '8192')
3124
+ .option('--concurrency <n>', 'Target parallel request count', '2')
3125
+ .option('--objective <mode>', 'Optimization objective (latency|balanced|throughput)', 'balanced')
3126
+ .option('--reserve-gb <gb>', 'Memory reserve for OS and background workloads', '2')
3127
+ .option('--json', 'Output plan as JSON')
3128
+ .action(async (options) => {
3129
+ const spinner = options.json ? null : ora('Building Ollama capacity plan...').start();
3130
+
3131
+ try {
3132
+ const requestedObjective = String(options.objective || 'balanced').toLowerCase();
3133
+ const supportedObjectives = new Set(['latency', 'balanced', 'throughput']);
3134
+ if (!supportedObjectives.has(requestedObjective)) {
3135
+ throw new Error(`Invalid objective "${options.objective}". Use latency, balanced, or throughput.`);
3136
+ }
3137
+
3138
+ const targetContext = parsePositiveIntegerOption(options.ctx, '--ctx');
3139
+ const targetConcurrency = parsePositiveIntegerOption(options.concurrency, '--concurrency');
3140
+ const reserveGB = parseNonNegativeNumberOption(options.reserveGb, '--reserve-gb');
3141
+
3142
+ const OllamaClient = require('../src/ollama/client');
3143
+ const UnifiedDetector = require('../src/hardware/unified-detector');
3144
+ const OllamaCapacityPlanner = require('../src/ollama/capacity-planner');
3145
+
3146
+ const ollamaClient = new OllamaClient();
3147
+ const availability = await ollamaClient.checkOllamaAvailability();
3148
+ if (!availability.available) {
3149
+ throw new Error(availability.error || 'Ollama is not available');
3150
+ }
3151
+
3152
+ const localModels = await ollamaClient.getLocalModels();
3153
+ if (!localModels || localModels.length === 0) {
3154
+ throw new Error('No local Ollama models found. Install one with: ollama pull llama3.2:3b');
3155
+ }
3156
+
3157
+ const { selected, missing } = selectModelsForPlan(localModels, options.models || []);
3158
+ if (selected.length === 0) {
3159
+ throw new Error(
3160
+ `No matching local models found for: ${(options.models || []).join(', ')}`
3161
+ );
3162
+ }
3163
+
3164
+ const detector = new UnifiedDetector();
3165
+ const hardware = await detector.detect();
3166
+ const planner = new OllamaCapacityPlanner();
3167
+
3168
+ const plan = planner.plan({
3169
+ hardware,
3170
+ models: selected,
3171
+ targetContext,
3172
+ targetConcurrency,
3173
+ objective: requestedObjective,
3174
+ reserveGB
3175
+ });
3176
+
3177
+ if (options.json) {
3178
+ console.log(JSON.stringify({
3179
+ generated_at: new Date().toISOString(),
3180
+ selection: {
3181
+ requested: options.models || [],
3182
+ selected: selected.map((model) => model.name),
3183
+ missing
3184
+ },
3185
+ plan
3186
+ }, null, 2));
3187
+ return;
3188
+ }
3189
+
3190
+ if (spinner) spinner.succeed('Capacity plan generated');
3191
+
3192
+ console.log('\n' + chalk.bgBlue.white.bold(' OLLAMA CAPACITY PLAN '));
3193
+ console.log(
3194
+ chalk.blue('Hardware:'),
3195
+ `${plan.hardware.backendName} (${plan.hardware.backend})`
3196
+ );
3197
+ console.log(
3198
+ chalk.blue('Memory budget:'),
3199
+ `${plan.memory.budgetGB}GB usable (reserve ${plan.hardware.reserveGB}GB)`
3200
+ );
3201
+
3202
+ if (missing.length > 0) {
3203
+ console.log(
3204
+ chalk.yellow('Missing model filters:'),
3205
+ missing.join(', ')
3206
+ );
3207
+ }
3208
+
3209
+ console.log(chalk.blue.bold('\nSelected models:'));
3210
+ for (const model of plan.models) {
3211
+ console.log(
3212
+ ` - ${model.name} (${model.size}, ~${model.estimatedBaseMemoryGB}GB base)`
3213
+ );
3214
+ }
3215
+
3216
+ console.log(chalk.blue.bold('\nRecommended envelope:'));
3217
+ console.log(
3218
+ ` Context: ${plan.envelope.context.recommended} (requested ${plan.envelope.context.requested})`
3219
+ );
3220
+ console.log(
3221
+ ` Parallel: ${plan.envelope.parallel.recommended} (requested ${plan.envelope.parallel.requested})`
3222
+ );
3223
+ console.log(
3224
+ ` Loaded models: ${plan.envelope.loaded_models.recommended} (requested ${plan.envelope.loaded_models.requested})`
3225
+ );
3226
+ console.log(
3227
+ ` Estimated memory: ${plan.memory.recommendedEstimatedGB}GB / ${plan.memory.budgetGB}GB (${plan.memory.utilizationPercent}%)`
3228
+ );
3229
+ console.log(` Risk: ${plan.risk.level.toUpperCase()} (${plan.risk.score}/100)`);
3230
+
3231
+ if (plan.notes.length > 0) {
3232
+ console.log(chalk.blue.bold('\nNotes:'));
3233
+ for (const note of plan.notes) {
3234
+ console.log(` - ${note}`);
3235
+ }
3236
+ }
3237
+
3238
+ console.log(chalk.blue.bold('\nRecommended env vars:'));
3239
+ for (const [key, value] of Object.entries(plan.shell.env)) {
3240
+ console.log(` export ${key}=${value}`);
3241
+ }
3242
+
3243
+ console.log(chalk.blue.bold('\nFallback profile:'));
3244
+ console.log(
3245
+ ` OLLAMA_NUM_CTX=${plan.fallback.num_ctx} OLLAMA_NUM_PARALLEL=${plan.fallback.num_parallel} OLLAMA_MAX_LOADED_MODELS=${plan.fallback.max_loaded_models}`
3246
+ );
3247
+ console.log('');
3248
+ } catch (error) {
3249
+ if (spinner) spinner.fail('Failed to build capacity plan');
3250
+ console.error(chalk.red('Error:'), error.message);
3251
+ if (process.env.DEBUG) {
3252
+ console.error(error.stack);
3253
+ }
3254
+ process.exit(1);
3255
+ }
3256
+ });
3257
+
2805
3258
  program
2806
3259
  .command('recommend')
2807
3260
  .description('Get intelligent model recommendations for your hardware')
@@ -2809,6 +3262,10 @@ program
2809
3262
  .option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
2810
3263
  .option('--no-verbose', 'Disable step-by-step progress display')
2811
3264
  .option('--policy <file>', 'Evaluate recommendations against a policy file')
3265
+ .option(
3266
+ '--calibrated [file]',
3267
+ 'Use calibrated routing policy (optional file path; defaults to ~/.llm-checker/calibration-policy.{yaml,yml,json})'
3268
+ )
2812
3269
  .addHelpText(
2813
3270
  'after',
2814
3271
  `
@@ -2816,6 +3273,11 @@ Enterprise policy examples:
2816
3273
  $ llm-checker recommend --policy ./policy.yaml
2817
3274
  $ llm-checker recommend --policy ./policy.yaml --category coding
2818
3275
  $ llm-checker recommend --policy ./policy.yaml --no-verbose
3276
+
3277
+ Calibrated routing examples:
3278
+ $ llm-checker recommend --calibrated --category coding
3279
+ $ llm-checker recommend --calibrated ./calibration-policy.yaml --category reasoning
3280
+ $ llm-checker recommend --policy ./calibration-policy.yaml --category coding
2819
3281
  `
2820
3282
  )
2821
3283
  .action(async (options) => {
@@ -2823,7 +3285,13 @@ Enterprise policy examples:
2823
3285
  try {
2824
3286
  const verboseEnabled = options.verbose !== false;
2825
3287
  const checker = new (getLLMChecker())({ verbose: verboseEnabled });
2826
- const policyConfig = options.policy ? loadPolicyConfiguration(options.policy) : null;
3288
+ const routingPreference = resolveRoutingPolicyPreference({
3289
+ policyOption: options.policy,
3290
+ calibratedOption: options.calibrated,
3291
+ loadEnterprisePolicy: loadPolicyConfiguration
3292
+ });
3293
+ const policyConfig = routingPreference.enterprisePolicy;
3294
+ const calibratedPolicy = routingPreference.calibratedPolicy;
2827
3295
 
2828
3296
  if (!verboseEnabled) {
2829
3297
  process.stdout.write(chalk.gray('Generating recommendations...'));
@@ -2860,11 +3328,18 @@ Enterprise policy examples:
2860
3328
  policyEnforcement = resolvePolicyEnforcement(policyConfig.policy, policyEvaluation);
2861
3329
  }
2862
3330
 
3331
+ const routingTask = normalizeTaskName(options.category || 'general');
3332
+ const recommendationIdentifiers = collectRecommendationModelIdentifiers(intelligentRecommendations);
3333
+ const routeDecision = calibratedPolicy
3334
+ ? resolveCalibratedRouteDecision(calibratedPolicy, routingTask, recommendationIdentifiers)
3335
+ : null;
3336
+
2863
3337
  // Mostrar información del sistema
2864
3338
  displaySystemInfo(hardware, { summary: { hardwareTier: intelligentRecommendations.summary.hardware_tier } });
2865
3339
 
2866
3340
  // Mostrar recomendaciones
2867
3341
  displayIntelligentRecommendations(intelligentRecommendations);
3342
+ displayCalibratedRoutingDecision('recommend', calibratedPolicy, routeDecision, routingPreference.warnings);
2868
3343
 
2869
3344
  if (policyConfig && policyEvaluation && policyEnforcement) {
2870
3345
  displayPolicySummary('recommend', policyConfig, policyEvaluation, policyEnforcement);
@@ -3124,7 +3599,13 @@ program
3124
3599
  .command('ai-run')
3125
3600
  .description('AI-powered model selection and execution')
3126
3601
  .option('-m, --models <models...>', 'Specific models to choose from')
3602
+ .option('-c, --category <category>', 'Task category hint (coding, reasoning, multimodal, general, etc.)')
3127
3603
  .option('--prompt <prompt>', 'Prompt to run with selected model')
3604
+ .option('--policy <file>', 'Explicit calibrated routing policy file (takes precedence over --calibrated)')
3605
+ .option(
3606
+ '--calibrated [file]',
3607
+ 'Enable calibrated routing policy (optional file path; defaults to ~/.llm-checker/calibration-policy.{yaml,yml,json})'
3608
+ )
3128
3609
  .action(async (options) => {
3129
3610
  showAsciiArt('ai-run');
3130
3611
  // Check if Ollama is installed first
@@ -3138,6 +3619,11 @@ program
3138
3619
  const aiSelector = new AIModelSelector();
3139
3620
  const checker = new (getLLMChecker())();
3140
3621
  const systemInfo = await checker.getSystemInfo();
3622
+ const routingPreference = resolveRoutingPolicyPreference({
3623
+ policyOption: options.policy,
3624
+ calibratedOption: options.calibrated
3625
+ });
3626
+ const calibratedPolicy = routingPreference.calibratedPolicy;
3141
3627
 
3142
3628
  // Get available models or use provided ones
3143
3629
  let candidateModels = options.models;
@@ -3165,6 +3651,10 @@ program
3165
3651
  return;
3166
3652
  }
3167
3653
  }
3654
+
3655
+ candidateModels = Array.isArray(candidateModels)
3656
+ ? candidateModels.filter((model) => typeof model === 'string' && model.trim().length > 0)
3657
+ : [];
3168
3658
 
3169
3659
  // AI selection
3170
3660
  const systemSpecs = {
@@ -3175,10 +3665,33 @@ program
3175
3665
  gpu_model_normalized: systemInfo.gpu?.model ||
3176
3666
  (systemInfo.cpu?.manufacturer === 'Apple' ? 'apple_silicon' : 'cpu_only')
3177
3667
  };
3178
-
3179
- const result = await aiSelector.selectBestModel(candidateModels, systemSpecs);
3668
+
3669
+ const taskHint = normalizeTaskName(options.category || inferTaskFromPrompt(options.prompt));
3670
+ const routeDecision = calibratedPolicy
3671
+ ? resolveCalibratedRouteDecision(calibratedPolicy, taskHint, candidateModels)
3672
+ : null;
3673
+
3674
+ let result;
3675
+ if (routeDecision && routeDecision.matchedAvailableModel && routeDecision.selectedModel) {
3676
+ result = {
3677
+ bestModel: routeDecision.selectedModel,
3678
+ confidence: routeDecision.usedRouteFallbackModel ? 0.82 : 0.94,
3679
+ method: 'calibrated-policy-route',
3680
+ reasoning: `Selected from calibrated policy route for ${routeDecision.resolvedTask}`
3681
+ };
3682
+ } else {
3683
+ if (routeDecision && routeDecision.routeCandidates.length > 0) {
3684
+ routingPreference.warnings.push(
3685
+ `Calibrated route candidates (${routeDecision.routeCandidates.join(
3686
+ ', '
3687
+ )}) are not installed locally. Falling back to AI selector.`
3688
+ );
3689
+ }
3690
+ result = await aiSelector.selectBestModel(candidateModels, systemSpecs, taskHint);
3691
+ }
3180
3692
 
3181
3693
  spinner.succeed(`Selected ${chalk.green.bold(result.bestModel)} (${result.method}, ${Math.round(result.confidence * 100)}% confidence)`);
3694
+ displayCalibratedRoutingDecision('ai-run', calibratedPolicy, routeDecision, routingPreference.warnings);
3182
3695
 
3183
3696
  // Execute the selected model
3184
3697
  console.log(chalk.magenta.bold(`\nLaunching ${result.bestModel}...`));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.2.8",
3
+ "version": "3.4.0",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",