omo-recommend-models 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3948 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * omo-recommend-models — Consolidated AI-powered model recommendation for
4
+ * oh-my-openagent.jsonc.
5
+ *
6
+ * Makes a SINGLE AI recommendation pass with FULL context: cloud model
7
+ * inventory AND local Ollama models AND GPU/hardware info. The AI sees
8
+ * everything at once, so it never makes blind placements.
9
+ *
10
+ * Usage:
11
+ * omo-recommend-models # AI-powered (cloud + local)
12
+ * omo-recommend-models -y # auto-accept all
13
+ * omo-recommend-models --rebalance # algorithmic tier-chain only
14
+ * omo-recommend-models --dry-run # preview without writing
15
+ * omo-recommend-models --cloud-only # skip local model discovery
16
+ * omo-recommend-models --local-only # skip cloud model discovery
17
+ */
18
+
19
+ import fs from "node:fs";
20
+ import path from "node:path";
21
+ import readline from "node:readline";
22
+ import { execFileSync, execSync, spawn } from "node:child_process";
23
+ import os from "node:os";
24
+ import { fileURLToPath } from "node:url";
25
+ import { dirname } from "node:path";
26
+ import mri from "mri";
27
+ import pc from "picocolors";
28
+ import {
29
+ CONFIG_PATH,
30
+ BACKUP_PATH,
31
+ confirm as plainConfirm,
32
+ promptUser as plainPromptUser,
33
+ pickFreeModel,
34
+ discoverFreeModels,
35
+ callOpencodeChat,
36
+ parseAiJson,
37
+ loadConfig,
38
+ loadProviderModels,
39
+ buildProviderAliases,
40
+ resolveProvider,
41
+ normalizeLocalModelName,
42
+ formatModelRef,
43
+ buildRichModelLookup,
44
+ } from "../lib/omo-shared.js";
45
+ import {
46
+ backupConfig,
47
+ writeConfigWithValidation,
48
+ } from "../lib/recommend/apply.js";
49
+
50
+ const __filename = fileURLToPath(import.meta.url);
51
+ const __dirname = dirname(__filename);
52
+ const VERSION = "1.0.0";
53
+
54
+ const activeChildren = new Set();
55
+ let clack = null;
56
+ let useClackPrompts = false;
57
+ let debugMode = false;
58
+
59
+ function usage() {
60
+ return [
61
+ "Usage: omo-recommend-models [options]",
62
+ "",
63
+ "Recommend OpenCode OMO model placements for oh-my-openagent.jsonc.",
64
+ "",
65
+ "Options:",
66
+ " -y, --yes Apply recommendations without interactive confirmation",
67
+ " --rebalance Perform algorithmic tier-chain restructuring",
68
+ " --dry-run Preview recommendations without writing config",
69
+ " --cloud-only Skip GPU, Ollama, and local model discovery",
70
+ " --local-only Skip cloud model discovery and API checks",
71
+ " --model <ref> Use an explicit AI panel model; may be repeated",
72
+ " --interactive Force interactive prompts in non-TTY environments",
73
+ " --debug Print stack traces for errors",
74
+ " -h, --help Show this help",
75
+ " -v, --version Show version",
76
+ ].join("\n");
77
+ }
78
+
79
+ function registerChild(child) {
80
+ activeChildren.add(child);
81
+ child.once("exit", () => activeChildren.delete(child));
82
+ child.once("error", () => activeChildren.delete(child));
83
+ return child;
84
+ }
85
+
86
+ function spawnTracked(command, args, options) {
87
+ return registerChild(spawn(command, args, options));
88
+ }
89
+
90
+ function terminateActiveChildren() {
91
+ const children = [...activeChildren].filter(
92
+ (child) => child.exitCode === null && child.signalCode === null,
93
+ );
94
+ for (const child of children) {
95
+ try {
96
+ child.kill("SIGTERM");
97
+ } catch {}
98
+ }
99
+ setTimeout(() => {
100
+ for (const child of children) {
101
+ if (child.exitCode === null && child.signalCode === null) {
102
+ try {
103
+ child.kill("SIGKILL");
104
+ } catch {}
105
+ }
106
+ }
107
+ process.exit(1);
108
+ }, 2000).unref();
109
+ }
110
+
111
+ function installSignalHandlers() {
112
+ let handlingSignal = false;
113
+ const handler = (signal) => {
114
+ if (handlingSignal) return;
115
+ handlingSignal = true;
116
+ process.stderr.write(`\n${signal} received; terminating subprocesses...\n`);
117
+ terminateActiveChildren();
118
+ };
119
+ process.once("SIGINT", handler);
120
+ process.once("SIGTERM", handler);
121
+ }
122
+
123
+ async function configureTerminalUi(enabled) {
124
+ useClackPrompts = Boolean(enabled);
125
+ if (!useClackPrompts) return;
126
+ try {
127
+ clack = await import("@clack/prompts");
128
+ } catch {
129
+ clack = null;
130
+ }
131
+ }
132
+
133
+ async function confirm(question) {
134
+ if (useClackPrompts && clack?.confirm) {
135
+ const answer = await clack.confirm({
136
+ message: question.replace(/\s*\([^)]+\)\s*$/, "").trim(),
137
+ initialValue: false,
138
+ });
139
+ return answer === true;
140
+ }
141
+ return plainConfirm(question);
142
+ }
143
+
144
+ async function promptUser(question) {
145
+ if (useClackPrompts && clack?.text) {
146
+ const answer = await clack.text({ message: question.trim() });
147
+ return typeof answer === "string" ? answer.trim() : "";
148
+ }
149
+ return plainPromptUser(question);
150
+ }
151
+
152
+ function commandExists(binary) {
153
+ if (!binary || binary.includes(path.sep)) return "";
154
+ for (const dir of String(process.env.PATH || "").split(path.delimiter)) {
155
+ if (!dir) continue;
156
+ const candidate = path.join(dir, binary);
157
+ try {
158
+ const stat = fs.statSync(candidate);
159
+ if (!stat.isFile()) continue;
160
+ fs.accessSync(candidate, fs.constants.X_OK);
161
+ return candidate;
162
+ } catch {}
163
+ }
164
+ return "";
165
+ }
166
+
167
+ // =========================================================================
168
+ // Model scoring constants (from omo-recommend-cloud)
169
+ // =========================================================================
170
+
171
+ const FAMILY_TIERS = {
172
+ "claude-opus": 35,
173
+ "claude-sonnet": 28,
174
+ "gemini-pro": 25,
175
+ "gpt-pro": 25,
176
+ gpt: 22,
177
+ "gpt-codex": 18,
178
+ "gpt-codex-spark": 16,
179
+ "gemini-flash": 12,
180
+ "nemotron-free": 10,
181
+ "deepseek-flash-free": 8,
182
+ "claude-haiku": 8,
183
+ "gpt-mini": 5,
184
+ "mimo-v2.5-free": 3,
185
+ "north-free": 0,
186
+ "big-pickle": 0,
187
+ };
188
+
189
+ const PROVIDER_PRESTIGE = {
190
+ openai: 5,
191
+ "github-copilot": 5,
192
+ anthropic: 5,
193
+ opencode: 5,
194
+ };
195
+ const VARIANT_BONUS = { xhigh: 10, max: 8, high: 5, medium: 0, low: -5 };
196
+ const LOCAL_PROVIDER = "local";
197
+ const FREE_PROVIDERS = ["opencode", LOCAL_PROVIDER];
198
+ const QUALITY_TIERS = ["reasoning", "balanced", "fast"];
199
+ const quotaExceededProviders = new Set();
200
+ const providerAvailability = new Map();
201
+ let opencodeOnlyMode = false;
202
+ const providerProbePromises = new Map();
203
+
204
+ // =========================================================================
205
+ // Local model constants (from omo-recommend-local)
206
+ // =========================================================================
207
+
208
+ const MODEL_CACHE_FILE = path.join(
209
+ os.homedir(),
210
+ ".cache",
211
+ "oh-my-opencode",
212
+ "ollama-models.json",
213
+ );
214
+
215
+ const PANEL_CACHE_FILE = path.join(
216
+ os.homedir(),
217
+ ".cache",
218
+ "oh-my-opencode",
219
+ "panel-cache.json",
220
+ );
221
+
222
+ function loadPanelCache() {
223
+ try {
224
+ if (!fs.existsSync(PANEL_CACHE_FILE)) return null;
225
+ const raw = fs.readFileSync(PANEL_CACHE_FILE, "utf-8");
226
+ const data = JSON.parse(raw);
227
+ if (!data || !data.timestamp || !data.result) return null;
228
+ return data;
229
+ } catch {
230
+ return null;
231
+ }
232
+ }
233
+
234
+ function savePanelCache(result, panelModels, gpuInfo) {
235
+ try {
236
+ const dir = path.dirname(PANEL_CACHE_FILE);
237
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
238
+ const gpu = gpuInfo
239
+ ? {
240
+ hasGpu: !!gpuInfo.hasGpu,
241
+ name: gpuInfo.name || "",
242
+ label: gpuInfo.label || "",
243
+ vramGb: gpuInfo.vramGb || 0,
244
+ }
245
+ : null;
246
+ fs.writeFileSync(
247
+ PANEL_CACHE_FILE,
248
+ JSON.stringify(
249
+ {
250
+ timestamp: Date.now(),
251
+ models: panelModels || null,
252
+ gpu,
253
+ result,
254
+ },
255
+ null,
256
+ 2,
257
+ ),
258
+ "utf-8",
259
+ );
260
+ } catch {
261
+ /* cache write failure is non-fatal */
262
+ }
263
+ }
264
+
265
+ function modelListEquals(a, b) {
266
+ if (!a || !b) return false;
267
+ if (a.length !== b.length) return false;
268
+ const sa = [...a].sort();
269
+ const sb = [...b].sort();
270
+ return sa.every((v, i) => v === sb[i]);
271
+ }
272
+
273
+ function isSubsetList(superset, subset) {
274
+ if (!superset || !subset) return false;
275
+ const ss = new Set(superset);
276
+ return subset.every((m) => ss.has(m));
277
+ }
278
+
279
+ const KNOWN_MODELS = [
280
+ { name: "llama3.2", tags: ["1b", "3b"] },
281
+ { name: "llama3.1", tags: ["8b", "70b", "405b"] },
282
+ { name: "llama3", tags: ["8b", "70b", "405b"] },
283
+ { name: "qwen2.5-coder", tags: ["1.5b", "7b", "14b", "32b"] },
284
+ { name: "qwen2.5", tags: ["7b", "14b", "32b", "72b"] },
285
+ { name: "deepseek-coder-v2", tags: ["16b"] },
286
+ { name: "codegemma", tags: ["2b", "7b"] },
287
+ { name: "codellama", tags: ["7b", "13b", "34b", "70b"] },
288
+ { name: "mistral", tags: ["7b"] },
289
+ { name: "mixtral", tags: ["8x7b", "8x22b"] },
290
+ { name: "phi3", tags: ["3.8b", "7b", "14b"] },
291
+ { name: "gemma2", tags: ["2b", "9b", "27b"] },
292
+ { name: "nomic-embed-text", tags: ["v1.5"] },
293
+ { name: "mxbai-embed-large", tags: ["v1"] },
294
+ { name: "llama3.2-vision", tags: ["11b", "90b"] },
295
+ { name: "llava", tags: ["7b", "13b", "34b"] },
296
+ { name: "bakllava", tags: ["7b"] },
297
+ { name: "starcoder2", tags: ["3b", "7b", "15b"] },
298
+ { name: "dolphin-mixtral", tags: ["8x7b"] },
299
+ { name: "neural-chat", tags: ["7b"] },
300
+ { name: "orca-mini", tags: ["3b", "7b"] },
301
+ { name: "tinyllama", tags: ["1.1b"] },
302
+ { name: "falcon3", tags: ["1b", "3b", "7b", "10b"] },
303
+ {
304
+ name: "command-r",
305
+ tags: ["7b", "7b-08-2024", "v01", "35b-08-2024", "104b"],
306
+ },
307
+ { name: "llama-guard3", tags: ["1b", "8b"] },
308
+ { name: "nemotron", tags: ["mini-4b-instruct"] },
309
+ {
310
+ name: "deepseek-r1",
311
+ tags: ["1.5b", "7b", "8b", "14b", "32b", "70b", "671b"],
312
+ },
313
+ { name: "qwq", tags: ["32b"] },
314
+ ];
315
+
316
+ const MODEL_SCORES = {
317
+ "deepseek-coder-v2": 95,
318
+ "qwen2.5-coder": 92,
319
+ "llama3.1": 88,
320
+ llama3: 86,
321
+ codellama: 82,
322
+ codegemma: 78,
323
+ mixtral: 76,
324
+ "deepseek-r1": 74,
325
+ qwq: 72,
326
+ "qwen2.5": 70,
327
+ mistral: 68,
328
+ starcoder2: 66,
329
+ falcon3: 64,
330
+ "llama3.2": 62,
331
+ gemma2: 60,
332
+ "command-r": 58,
333
+ phi3: 55,
334
+ "dolphin-mixtral": 52,
335
+ "neural-chat": 48,
336
+ "orca-mini": 42,
337
+ nemotron: 40,
338
+ "llama-guard3": 35,
339
+ tinyllama: 30,
340
+ "mxbai-embed-large": 20,
341
+ "nomic-embed-text": 18,
342
+ "llama3.2-vision": 70,
343
+ llava: 62,
344
+ bakllava: 55,
345
+ };
346
+
347
+ const BASE_VRAM = {
348
+ "deepseek-coder-v2": 16,
349
+ "qwen2.5-coder": 7,
350
+ "llama3.1": 8,
351
+ llama3: 8,
352
+ codellama: 7,
353
+ codegemma: 7,
354
+ mixtral: 47,
355
+ "deepseek-r1": 7,
356
+ qwq: 32,
357
+ "qwen2.5": 7,
358
+ mistral: 7,
359
+ starcoder2: 7,
360
+ falcon3: 7,
361
+ "llama3.2": 3,
362
+ gemma2: 9,
363
+ "command-r": 7,
364
+ phi3: 4,
365
+ "dolphin-mixtral": 47,
366
+ "neural-chat": 7,
367
+ "orca-mini": 3,
368
+ nemotron: 4,
369
+ "llama-guard3": 8,
370
+ tinyllama: 1,
371
+ "mxbai-embed-large": 1,
372
+ "nomic-embed-text": 1,
373
+ "llama3.2-vision": 11,
374
+ llava: 7,
375
+ bakllava: 7,
376
+ };
377
+
378
+ // =========================================================================
379
+ // Cloud model scoring
380
+ // =========================================================================
381
+
382
+ function scoreFromCache(model, variant, entry) {
383
+ let score = 0;
384
+ if (entry.release_date) {
385
+ const d = new Date(entry.release_date);
386
+ const epochDays = Math.floor(d.getTime() / 86400000);
387
+ if (!isNaN(epochDays)) {
388
+ score += Math.max(0, epochDays - 20000) * 2;
389
+ }
390
+ }
391
+ const family = entry.family || "";
392
+ for (const [key, val] of Object.entries(FAMILY_TIERS)) {
393
+ if (family.includes(key) || family === key) {
394
+ score += val;
395
+ break;
396
+ }
397
+ }
398
+ const caps = entry.capabilities || [];
399
+ if (
400
+ Array.isArray(caps) &&
401
+ caps.some((c) => c.toLowerCase().includes("reasoning"))
402
+ ) {
403
+ score += 15;
404
+ }
405
+ const ctx = entry.context_length || entry.context_window || 0;
406
+ if (ctx > 200000) score += 8;
407
+ else if (ctx > 100000) score += 5;
408
+ else if (ctx > 32000) score += 3;
409
+ if (variant && VARIANT_BONUS[variant] !== undefined)
410
+ score += VARIANT_BONUS[variant];
411
+ const cost = entry.cost || entry.input_price || 0;
412
+ if (typeof cost === "number" && !isNaN(cost)) score -= Math.min(cost * 10, 5);
413
+ const provider = model.split("/")[0];
414
+ score += PROVIDER_PRESTIGE[provider] || 0;
415
+ return isNaN(score) ? 0 : score;
416
+ }
417
+
418
+ function scoreFromHeuristics(model, variant) {
419
+ const parts = model.split("/");
420
+ const modelPart = parts[parts.length - 1] || "";
421
+ let score = 0;
422
+ const verMatch = modelPart.match(/(\d+)\.(\d+)/);
423
+ if (verMatch) score += parseInt(verMatch[1]) * 3 + parseInt(verMatch[2]);
424
+ const lower = modelPart.toLowerCase();
425
+ if (
426
+ lower.includes("opus") ||
427
+ lower.includes("pro") ||
428
+ lower.includes("ultra")
429
+ )
430
+ score += 14;
431
+ if (
432
+ lower.includes("sonnet") ||
433
+ lower.includes("flash") ||
434
+ lower.includes("codex")
435
+ )
436
+ score += 8;
437
+ if (
438
+ lower.includes("haiku") ||
439
+ lower.includes("mini") ||
440
+ lower.includes("nano")
441
+ )
442
+ score += 3;
443
+ if (lower.includes("reasoning")) score += 10;
444
+ if (lower.includes("max") || lower.includes("large") || lower.includes("big"))
445
+ score += 5;
446
+ if (variant && VARIANT_BONUS[variant] !== undefined)
447
+ score += VARIANT_BONUS[variant];
448
+ const sizeMatch = modelPart.match(/(\d+)b/i);
449
+ if (sizeMatch) {
450
+ const size = parseInt(sizeMatch[1]);
451
+ if (size >= 70) score += 20;
452
+ else if (size >= 30) score += 15;
453
+ else if (size >= 13) score += 10;
454
+ else if (size >= 7) score += 5;
455
+ }
456
+ const provider = model.split("/")[0];
457
+ score += PROVIDER_PRESTIGE[provider] || 0;
458
+ return score;
459
+ }
460
+
461
+ function scoreModel(model, variant, cacheEntry) {
462
+ return cacheEntry
463
+ ? scoreFromCache(model, variant, cacheEntry)
464
+ : scoreFromHeuristics(model, variant);
465
+ }
466
+
467
+ function panelModelOrder(config) {
468
+ return String(config?.omo?.panel_model_order || "opencode-first").trim();
469
+ }
470
+
471
+ function sortPanelModelRefs(refs, config) {
472
+ const order = panelModelOrder(config);
473
+ const scored = refs.map((ref, index) => ({
474
+ ref,
475
+ index,
476
+ provider: ref.split("/")[0],
477
+ score: scoreModel(ref, null, null),
478
+ }));
479
+
480
+ if (order === "score") {
481
+ scored.sort((a, b) => (b.score - a.score) || (a.index - b.index));
482
+ } else {
483
+ scored.sort((a, b) => {
484
+ const opencodeDiff = Number(b.provider === "opencode") - Number(a.provider === "opencode");
485
+ if (opencodeDiff !== 0) return opencodeDiff;
486
+ return (b.score - a.score) || (a.index - b.index);
487
+ });
488
+ }
489
+
490
+ return scored.map((s) => s.ref);
491
+ }
492
+
493
+ function panelModelFamilyLabel(modelName) {
494
+ const model = String(modelName || "").toLowerCase();
495
+ if (model.includes("claude-opus")) return "claude-opus";
496
+ if (model.includes("claude-sonnet")) return "claude-sonnet";
497
+ if (model.includes("claude-haiku")) return "claude-haiku";
498
+ if (model.includes("gemini")) return "gemini";
499
+ if (model.includes("gpt")) return "gpt";
500
+ if (model.includes("codex")) return "codex";
501
+ if (model.includes("nemotron")) return "nemotron";
502
+ if (model.includes("deepseek")) return "deepseek";
503
+ if (model.includes("mimo")) return "mimo";
504
+ if (model.includes("north")) return "north";
505
+ if (model.includes("pickle")) return "big-pickle";
506
+ return "";
507
+ }
508
+
509
+ function groupPanelModelRefs(models) {
510
+ const groupOrder = [];
511
+ const byGroup = new Map();
512
+ for (const ref of models || []) {
513
+ const trimmed = String(ref || "").trim();
514
+ if (!trimmed) continue;
515
+ const slash = trimmed.indexOf("/");
516
+ const provider = slash === -1 ? "unknown" : trimmed.slice(0, slash);
517
+ const model = slash === -1 ? trimmed : trimmed.slice(slash + 1);
518
+ const family = provider === "cli" ? "agents" : panelModelFamilyLabel(model);
519
+ const label = provider === "cli"
520
+ ? "CLI agents"
521
+ : provider === "opencode"
522
+ ? "opencode"
523
+ : family
524
+ ? `${provider}/${family}`
525
+ : provider;
526
+ if (!byGroup.has(label)) {
527
+ byGroup.set(label, []);
528
+ groupOrder.push(label);
529
+ }
530
+ byGroup.get(label).push({ ref: trimmed, model });
531
+ }
532
+ return groupOrder.map((label) => ({ label, models: byGroup.get(label) }));
533
+ }
534
+
535
+ function printNumberedPanelModelGroups(models, indent = " ") {
536
+ const groups = groupPanelModelRefs(models);
537
+ const width = String(groups.length).length;
538
+ groups.forEach((group, index) => {
539
+ const prefix = `${indent}${String(index + 1).padStart(width, " ")}. ${group.label}: `;
540
+ group.models.forEach((entry, modelIdx) => {
541
+ if (modelIdx === 0) {
542
+ console.log(`${prefix}${entry.model}`);
543
+ } else {
544
+ console.log(`${" ".repeat(prefix.length)}${entry.model}`);
545
+ }
546
+ });
547
+ });
548
+ return groups.length;
549
+ }
550
+
551
+ function printSelectablePanelModelGroups(models, indent = " ") {
552
+ const groups = groupPanelModelRefs(models);
553
+ const width = String(groups.length).length;
554
+ groups.forEach((group, index) => {
555
+ const prefix = `${indent}[${String(index + 1).padStart(width, " ")}] ${group.label}: `;
556
+ group.models.forEach((entry, modelIdx) => {
557
+ if (modelIdx === 0) {
558
+ console.log(`${prefix}${entry.model}`);
559
+ } else {
560
+ console.log(`${" ".repeat(prefix.length)}${entry.model}`);
561
+ }
562
+ });
563
+ });
564
+ return groups;
565
+ }
566
+
567
+ function configuredPanelModels(config) {
568
+ const models = config?.omo?.panel_models;
569
+ if (!Array.isArray(models)) return [];
570
+ return models
571
+ .map((model) => String(model || "").trim())
572
+ .filter(Boolean);
573
+ }
574
+
575
+ function panelModelsRequireOpencode(models) {
576
+ if (!Array.isArray(models) || models.length === 0) return true;
577
+ return models.some((model) => !String(model || "").startsWith("cli/"));
578
+ }
579
+
580
+ function selectedPanelRequiresOpencode(config, explicitModels) {
581
+ if (explicitModels.length > 0) return panelModelsRequireOpencode(explicitModels);
582
+ const configured = configuredPanelModels(config);
583
+ if (configured.length > 0) return panelModelsRequireOpencode(configured);
584
+ return true;
585
+ }
586
+
587
+ function defaultPanelModels(config) {
588
+ const allFree = sortPanelModelRefs(discoverFreeModels(), config);
589
+ const MAX_PANEL = 5;
590
+ return allFree.length > MAX_PANEL ? allFree.slice(0, MAX_PANEL) : allFree;
591
+ }
592
+
593
+ function plannedPanelModels(config, panelModels) {
594
+ if (panelModels && panelModels.length > 0) return panelModels;
595
+ const configured = configuredPanelModels(config);
596
+ return configured.length > 0 ? configured : defaultPanelModels(config);
597
+ }
598
+
599
+ function createProgress(label) {
600
+ if (useClackPrompts && clack?.spinner) {
601
+ const spinner = clack.spinner();
602
+ spinner.start(label);
603
+ return {
604
+ update(message) {
605
+ spinner.message(`${label}: ${message}`);
606
+ },
607
+ done(message = "done") {
608
+ spinner.stop(`${label}: ${message}`);
609
+ },
610
+ skip(message) {
611
+ spinner.stop(`${label}: ${message}`);
612
+ },
613
+ };
614
+ }
615
+ const start = Date.now();
616
+ process.stdout.write(` ⏳ ${label}...`);
617
+ return {
618
+ update(message) {
619
+ process.stdout.write(`\r ⏳ ${label}: ${message}\x1b[K`);
620
+ },
621
+ done(message = "done") {
622
+ const elapsed = Math.max(0, Math.round((Date.now() - start) / 1000));
623
+ process.stdout.write(`\r ✓ ${label}: ${message} (${elapsed}s)\x1b[K\n`);
624
+ },
625
+ skip(message) {
626
+ process.stdout.write(`\r • ${label}: ${message}\x1b[K\n`);
627
+ },
628
+ };
629
+ }
630
+
631
+ function usableLocalVramGb(gpu) {
632
+ const gpuVram = Number(gpu?.vramGb);
633
+ if (!gpu?.hasGpu || !Number.isFinite(gpuVram)) return 0;
634
+ return Math.max(0, gpuVram - 1.5);
635
+ }
636
+
637
+ function buildFittingModels(allLocalModels, gpu) {
638
+ const usableVramGb = usableLocalVramGb(gpu);
639
+ return (allLocalModels || [])
640
+ .filter((model) => {
641
+ const modelVram = Number(model?.vram);
642
+ return Boolean(
643
+ model &&
644
+ normalizeLocalModelName(model.name) &&
645
+ Number.isFinite(modelVram) &&
646
+ modelVram >= 0 &&
647
+ modelVram <= usableVramGb,
648
+ );
649
+ })
650
+ .map((model) => ({ ...model, name: normalizeLocalModelName(model.name) }));
651
+ }
652
+
653
+ function buildFittingModelMap(allLocalModels, gpu) {
654
+ const byName = new Map();
655
+ for (const model of buildFittingModels(allLocalModels, gpu)) {
656
+ if (!byName.has(model.name)) byName.set(model.name, model);
657
+ }
658
+ return byName;
659
+ }
660
+
661
+ function resolveFittingLocalName(modelName, fittingByName) {
662
+ const name = normalizeLocalModelName(modelName);
663
+ return name && fittingByName.has(name) ? name : "";
664
+ }
665
+
666
+ function normalizeLocalRecommendation(rec, fittingByName, allowLocal) {
667
+ const normalized = normalizeRecommendation(rec);
668
+ if (!normalized) {
669
+ const provider = String(rec?.provider || "").trim();
670
+ const localName =
671
+ allowLocal && !provider
672
+ ? resolveFittingLocalName(rec?.model, fittingByName)
673
+ : "";
674
+ return localName
675
+ ? { ...rec, provider: LOCAL_PROVIDER, model: localName }
676
+ : null;
677
+ }
678
+ if (normalized.provider !== LOCAL_PROVIDER) return normalized;
679
+ if (!allowLocal) return null;
680
+ const localName = resolveFittingLocalName(normalized.model, fittingByName);
681
+ return localName
682
+ ? { ...normalized, provider: LOCAL_PROVIDER, model: localName }
683
+ : null;
684
+ }
685
+
686
+ function resultHasRejectedLocal(aiResult, fittingByName) {
687
+ const hasRejected = (rec, allowLocal) => {
688
+ const normalized = normalizeRecommendation(rec);
689
+ if (!normalized) {
690
+ const provider = String(rec?.provider || "").trim();
691
+ const rawModel = String(rec?.model || "").trim();
692
+ return Boolean(
693
+ rawModel &&
694
+ !provider &&
695
+ (!allowLocal || !resolveFittingLocalName(rawModel, fittingByName)),
696
+ );
697
+ }
698
+ if (normalized.provider !== LOCAL_PROVIDER) return false;
699
+ if (!allowLocal) return true;
700
+ return !resolveFittingLocalName(normalized.model, fittingByName);
701
+ };
702
+
703
+ for (const rec of Array.isArray(aiResult?.cloudRecommendations)
704
+ ? aiResult.cloudRecommendations
705
+ : []) {
706
+ const norm = normalizeAgentRec({ ...rec });
707
+ if (hasRejected(norm.model, true)) return true;
708
+ if ((norm.routing || []).some((r) => hasRejected(r, false))) return true;
709
+ if ((norm.fallback_models || []).some((r) => hasRejected(r, true)))
710
+ return true;
711
+ }
712
+ for (const decision of aiResult?.localModels?.decisions || []) {
713
+ if (!resolveFittingLocalName(decision?.name, fittingByName)) return true;
714
+ }
715
+ for (const placement of aiResult?.localModels?.placements || []) {
716
+ if (!resolveFittingLocalName(placement?.modelName, fittingByName))
717
+ return true;
718
+ }
719
+ return false;
720
+ }
721
+
722
+ /**
723
+ * Normalize per-agent recommendation from either the old format (flat `recommendations` array)
724
+ * or the new format (separate `model`, `routing`, `fallback_models` fields).
725
+ * Always produces the new shape with model/routing/fallback_models.
726
+ */
727
+ function normalizeAgentRec(rec) {
728
+ if (!rec || typeof rec !== "object") return rec;
729
+ // Already has new shape — ensure arrays exist
730
+ if (rec.model && !rec.recommendations) return rec;
731
+ // Old shape: flatten recommendations[0] -> model, rest -> fallback_models
732
+ if (rec.recommendations && Array.isArray(rec.recommendations)) {
733
+ const r = rec.recommendations.filter((x) => x && x.provider && x.model);
734
+ if (r.length > 0) {
735
+ rec.model = r[0];
736
+ rec.routing = [];
737
+ rec.fallback_models = r.slice(1);
738
+ }
739
+ delete rec.recommendations;
740
+ }
741
+ // Ensure fields exist
742
+ if (!rec.model) rec.model = null;
743
+ if (!rec.routing) rec.routing = [];
744
+ if (!rec.fallback_models) rec.fallback_models = [];
745
+ return rec;
746
+ }
747
+
748
+ function normalizeRecommendation(rec) {
749
+ if (!rec || typeof rec !== "object") return null;
750
+ const provider = String(rec.provider || "").trim();
751
+ const rawModel = String(rec.model || "").trim();
752
+ if (!provider && !rawModel) return null;
753
+
754
+ const isLocal =
755
+ provider === "ollama" ||
756
+ provider === LOCAL_PROVIDER ||
757
+ rawModel.startsWith("ollama/") ||
758
+ rawModel.startsWith("local/");
759
+ let normalizedProvider = provider;
760
+ let normalizedModel = rawModel;
761
+
762
+ if (isLocal) {
763
+ const localName = normalizeLocalModelName(rawModel);
764
+ if (!localName) return null;
765
+ normalizedProvider = LOCAL_PROVIDER;
766
+ normalizedModel = localName;
767
+ } else if (provider && rawModel) {
768
+ const prefix = `${provider}/`;
769
+ normalizedModel = rawModel.startsWith(prefix)
770
+ ? rawModel.slice(prefix.length)
771
+ : rawModel;
772
+ }
773
+
774
+ if (!isLocal) {
775
+ if (!normalizedProvider) return null;
776
+ if (!normalizedModel) return null;
777
+ }
778
+
779
+ return { ...rec, provider: normalizedProvider, model: normalizedModel };
780
+ }
781
+
782
+ function modelRef(provider, modelName) {
783
+ return formatModelRef(provider, modelName);
784
+ }
785
+
786
+ function providerState(provider) {
787
+ if (!providerAvailability.has(provider)) {
788
+ providerAvailability.set(provider, {
789
+ creditExhausted: false,
790
+ rateLimitedUntil: 0,
791
+ reason: null,
792
+ });
793
+ }
794
+ return providerAvailability.get(provider);
795
+ }
796
+
797
+ function isProviderAvailable(provider, now = Date.now()) {
798
+ if (!provider || provider === LOCAL_PROVIDER || provider.startsWith("cli/"))
799
+ return true;
800
+ if (opencodeOnlyMode && provider !== "opencode")
801
+ return false;
802
+ if (quotaExceededProviders.has(provider)) return false;
803
+ const state = providerAvailability.get(provider);
804
+ if (!state) return true;
805
+ if (state.creditExhausted) return false;
806
+ return !(state.rateLimitedUntil && state.rateLimitedUntil > now);
807
+ }
808
+
809
+ function markProviderCreditExhausted(provider, reason) {
810
+ if (!provider) return;
811
+ quotaExceededProviders.add(provider);
812
+ const state = providerState(provider);
813
+ state.creditExhausted = true;
814
+ state.reason = reason || "credit-exhausted";
815
+ }
816
+
817
+ function markProviderRateLimited(provider, retryAfterSeconds, reason) {
818
+ if (!provider) return;
819
+ const state = providerState(provider);
820
+ const delayMs = Math.max(1, Number(retryAfterSeconds) || 15) * 1000;
821
+ state.rateLimitedUntil = Math.max(
822
+ state.rateLimitedUntil || 0,
823
+ Date.now() + delayMs,
824
+ );
825
+ state.reason = reason || "rate-limited";
826
+ }
827
+ function probeModel(modelRef) {
828
+ const slash = modelRef.indexOf("/");
829
+ const provider = slash === -1 ? "" : modelRef.slice(0, slash);
830
+ if (!provider || provider === LOCAL_PROVIDER || provider === "opencode" || provider.startsWith("cli/")) {
831
+ return Promise.resolve({ ok: true });
832
+ }
833
+
834
+ if (!isProviderAvailable(provider)) {
835
+ return Promise.resolve({ ok: false, reason: "provider-unavailable" });
836
+ }
837
+
838
+ if (providerProbePromises.has(provider)) {
839
+ return providerProbePromises.get(provider);
840
+ }
841
+
842
+ const promise = new Promise((resolve) => {
843
+ const tempDir = os.tmpdir();
844
+ const child = spawnTracked(
845
+ "opencode",
846
+ [
847
+ "run",
848
+ "--pure",
849
+ "--agent",
850
+ "summary",
851
+ "--dir",
852
+ tempDir,
853
+ "--format",
854
+ "json",
855
+ "--model",
856
+ modelRef,
857
+ "--dangerously-skip-permissions",
858
+ "say 1",
859
+ ],
860
+ {
861
+ cwd: tempDir,
862
+ env: {
863
+ ...process.env,
864
+ PWD: tempDir,
865
+ INIT_CWD: tempDir,
866
+ TERM: "dumb",
867
+ },
868
+ stdio: ["ignore", "pipe", "pipe"],
869
+ }
870
+ );
871
+
872
+ let stdout = "";
873
+ let stderr = "";
874
+ let timedOut = false;
875
+ const timer = setTimeout(() => {
876
+ timedOut = true;
877
+ try {
878
+ child.kill("SIGKILL");
879
+ } catch (e) {}
880
+ }, 30000);
881
+
882
+ child.on("error", (err) => {
883
+ clearTimeout(timer);
884
+ resolve({ ok: false, reason: `spawn-error: ${err.message}` });
885
+ });
886
+
887
+ child.stdout.on("data", (d) => {
888
+ stdout += d.toString();
889
+ });
890
+ child.stderr.on("data", (d) => {
891
+ stderr += d.toString();
892
+ });
893
+
894
+ child.on("close", (code) => {
895
+ clearTimeout(timer);
896
+ if (timedOut) {
897
+ markProviderRateLimited(provider, 30, "rate-limited");
898
+ resolve({ ok: false, reason: "timeout", errorOutput: "Request timed out after 30s" });
899
+ return;
900
+ }
901
+
902
+ const rawError = (stderr + "\n" + stdout).trim();
903
+ const lower = rawError.toLowerCase();
904
+
905
+ if (
906
+ code === 402 ||
907
+ lower.includes("402") ||
908
+ lower.includes("payment required") ||
909
+ lower.includes("payment_required") ||
910
+ lower.includes("quota exceeded") ||
911
+ lower.includes("quota_exceeded") ||
912
+ lower.includes("billing limit") ||
913
+ lower.includes("billing_limit") ||
914
+ lower.includes("credit limit") ||
915
+ lower.includes("credit_limit") ||
916
+ lower.includes("insufficient funds") ||
917
+ lower.includes("insufficient_funds") ||
918
+ lower.includes("usage limit") ||
919
+ lower.includes("budget exceeded") ||
920
+ lower.includes("budget exhausted") ||
921
+ lower.includes("quota restricted") ||
922
+ lower.includes("credit expired") ||
923
+ lower.includes("credits expired") ||
924
+ lower.includes("unauthorized") ||
925
+ lower.includes("forbidden") ||
926
+ lower.includes("invalid api key") ||
927
+ lower.includes("invalid_api_key") ||
928
+ lower.includes("key invalid") ||
929
+ lower.includes("access denied") ||
930
+ lower.includes("exhausted") ||
931
+ lower.includes("restricted") ||
932
+ lower.includes("limit exceeded") ||
933
+ lower.includes("limit_exceeded")
934
+ ) {
935
+ markProviderCreditExhausted(provider, "quota-exceeded");
936
+ resolve({ ok: false, reason: "quota-exceeded", errorOutput: rawError });
937
+ return;
938
+ }
939
+
940
+ if (
941
+ lower.includes("429") ||
942
+ lower.includes("rate limit") ||
943
+ lower.includes("rate_limit") ||
944
+ lower.includes("too many requests") ||
945
+ lower.includes("too_many_requests")
946
+ ) {
947
+ const delay = parseRetryAfterSeconds(rawError) || 15;
948
+ markProviderRateLimited(provider, delay, "rate-limited");
949
+ resolve({ ok: false, reason: "rate-limited", errorOutput: rawError });
950
+ return;
951
+ }
952
+
953
+ if (code !== 0 && code !== null) {
954
+ resolve({ ok: false, reason: `exit-code-${code}`, errorOutput: rawError });
955
+ return;
956
+ }
957
+
958
+ if (code === null) {
959
+ resolve({ ok: false, reason: "terminated-by-signal", errorOutput: "Process terminated by system signal" });
960
+ return;
961
+ }
962
+
963
+ resolve({ ok: true });
964
+ });
965
+ });
966
+
967
+ providerProbePromises.set(provider, promise);
968
+ return promise;
969
+ }
970
+
971
+
972
+ function allConfigEntries(config) {
973
+ return [
974
+ ...Object.entries(config.agents || {}).map(([name, section]) => ({
975
+ name,
976
+ type: "agent",
977
+ section,
978
+ })),
979
+ ...Object.entries(config.categories || {}).map(([name, section]) => ({
980
+ name,
981
+ type: "category",
982
+ section,
983
+ })),
984
+ ];
985
+ }
986
+
987
+ function uniqueByModelRef(recommendations) {
988
+ const seen = new Set();
989
+ const out = [];
990
+ for (const rec of recommendations) {
991
+ const normalized = normalizeRecommendation(rec);
992
+ if (!normalized) continue;
993
+ const key = modelRef(normalized.provider, normalized.model);
994
+ if (seen.has(key)) continue;
995
+ seen.add(key);
996
+ out.push(normalized);
997
+ }
998
+ return out;
999
+ }
1000
+
1001
+ function finalizeFallbackModels(primary, fallbackModels) {
1002
+ const primaryKey =
1003
+ primary && primary.provider && primary.model
1004
+ ? modelRef(primary.provider, primary.model)
1005
+ : null;
1006
+ return uniqueByModelRef(fallbackModels || []).filter(
1007
+ (rec) => modelRef(rec.provider, rec.model) !== primaryKey,
1008
+ );
1009
+ }
1010
+
1011
+ // =========================================================================
1012
+ // Local model discovery (Ollama registry catalog)
1013
+ // =========================================================================
1014
+
1015
+ function execCurl(url, accept) {
1016
+ try {
1017
+ const args = ["-s", "--max-time", "8"];
1018
+ if (accept) args.push("-H", accept);
1019
+ args.push(url);
1020
+ return execFileSync("curl", args, {
1021
+ encoding: "utf-8",
1022
+ timeout: 15000,
1023
+ env: { ...process.env, TERM: "dumb" },
1024
+ });
1025
+ } catch (_) {
1026
+ return "";
1027
+ }
1028
+ }
1029
+
1030
+ function registryModelSizeGb(name, tag) {
1031
+ const url = `https://registry.ollama.ai/v2/library/${encodeURIComponent(name)}/manifests/${encodeURIComponent(tag)}`;
1032
+ const json = execCurl(
1033
+ url,
1034
+ "application/vnd.docker.distribution.manifest.v2+json",
1035
+ );
1036
+ if (!json) return null;
1037
+ try {
1038
+ const manifest = JSON.parse(json);
1039
+ for (const layer of manifest.layers || []) {
1040
+ if (layer.mediaType === "application/vnd.ollama.image.model")
1041
+ return layer.size / 1e9;
1042
+ }
1043
+ return null;
1044
+ } catch (_) {
1045
+ return null;
1046
+ }
1047
+ }
1048
+
1049
+ function loadCachedModels() {
1050
+ try {
1051
+ if (fs.existsSync(MODEL_CACHE_FILE))
1052
+ return JSON.parse(fs.readFileSync(MODEL_CACHE_FILE, "utf-8"));
1053
+ } catch (_) {}
1054
+ return null;
1055
+ }
1056
+
1057
+ function saveCachedModels(models) {
1058
+ try {
1059
+ const dir = path.dirname(MODEL_CACHE_FILE);
1060
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
1061
+ fs.writeFileSync(
1062
+ MODEL_CACHE_FILE,
1063
+ JSON.stringify(models, null, 2),
1064
+ "utf-8",
1065
+ );
1066
+ } catch (_) {}
1067
+ }
1068
+
1069
+ function discoverModels(forceRefresh = false, progress = null) {
1070
+ const cached = forceRefresh ? null : loadCachedModels();
1071
+ if (Array.isArray(cached)) {
1072
+ if (progress) progress.done(`${cached.length} cached models`);
1073
+ return cached;
1074
+ }
1075
+ const models = [];
1076
+ const total = KNOWN_MODELS.reduce(
1077
+ (count, entry) => count + entry.tags.length,
1078
+ 0,
1079
+ );
1080
+ for (const entry of KNOWN_MODELS) {
1081
+ for (const tag of entry.tags) {
1082
+ const name = `${entry.name}:${tag}`;
1083
+ if (progress) progress.update(`${models.length + 1}/${total} ${name}`);
1084
+ const sizeGb = registryModelSizeGb(entry.name, tag);
1085
+ const bMatch = tag.match(/(\d+(?:\.\d+)?)b/i);
1086
+ const bSize = bMatch ? parseFloat(bMatch[1]) : null;
1087
+ const baseVram = BASE_VRAM[entry.name] || 3;
1088
+ const vram = bSize ? (baseVram / 7) * bSize : baseVram;
1089
+ const score = MODEL_SCORES[entry.name] || 30;
1090
+ models.push({
1091
+ name,
1092
+ size: sizeGb != null ? `${sizeGb.toFixed(1)} GB` : "unknown",
1093
+ vram: Math.round(vram * 10) / 10,
1094
+ score,
1095
+ baseModel: entry.name,
1096
+ tag,
1097
+ });
1098
+ }
1099
+ }
1100
+ if (!forceRefresh) saveCachedModels(models);
1101
+ if (progress) progress.done(`${models.length} models cataloged`);
1102
+ return models;
1103
+ }
1104
+
1105
+ // =========================================================================
1106
+ // GPU / Ollama detection
1107
+ // =========================================================================
1108
+
1109
+ function detectGPU() {
1110
+ const dumbEnv = { ...process.env, TERM: "dumb" };
1111
+ try {
1112
+ const out = execFileSync(
1113
+ "nvidia-smi",
1114
+ [
1115
+ "--query-gpu=name,memory.total",
1116
+ "--format=csv,noheader,nounits",
1117
+ ],
1118
+ {
1119
+ encoding: "utf-8",
1120
+ timeout: 5000,
1121
+ stdio: ["ignore", "pipe", "pipe"],
1122
+ env: dumbEnv,
1123
+ },
1124
+ );
1125
+ const [name, memStr] = out.trim().split(", ");
1126
+ const memGB = Math.round(parseInt(memStr) / 1024);
1127
+ return {
1128
+ hasGpu: true,
1129
+ name: name.trim(),
1130
+ label: `${name.trim()} (${memGB} GB VRAM)`,
1131
+ vramGb: memGB,
1132
+ };
1133
+ } catch (_) {}
1134
+ try {
1135
+ const out = execFileSync("ollama", ["list"], {
1136
+ encoding: "utf-8",
1137
+ timeout: 5000,
1138
+ stdio: ["ignore", "pipe", "pipe"],
1139
+ env: dumbEnv,
1140
+ });
1141
+ if (out.includes("NAME"))
1142
+ return {
1143
+ hasGpu: true,
1144
+ name: "unknown GPU",
1145
+ label: "GPU detected (no nvidia-smi)",
1146
+ vramGb: 8,
1147
+ };
1148
+ } catch (_) {}
1149
+ return { hasGpu: false, name: "", label: "No GPU detected", vramGb: 0 };
1150
+ }
1151
+
1152
+ function detectOllama() {
1153
+ let installed = false,
1154
+ running = false,
1155
+ version = null,
1156
+ models = [];
1157
+ const dumbEnv = { ...process.env, TERM: "dumb" };
1158
+ installed = commandExists("ollama").length > 0;
1159
+ if (installed) {
1160
+ try {
1161
+ version = (
1162
+ execFileSync("ollama", ["--version"], {
1163
+ encoding: "utf-8",
1164
+ timeout: 3000,
1165
+ stdio: ["ignore", "pipe", "pipe"],
1166
+ env: dumbEnv,
1167
+ }) || ""
1168
+ ).trim();
1169
+ } catch (_) {}
1170
+ try {
1171
+ const list = execFileSync("ollama", ["list"], {
1172
+ encoding: "utf-8",
1173
+ timeout: 5000,
1174
+ stdio: ["ignore", "pipe", "pipe"],
1175
+ env: dumbEnv,
1176
+ });
1177
+ running = list.includes("NAME");
1178
+ if (running) {
1179
+ const lines = list.trim().split("\n").slice(1);
1180
+ for (const line of lines) {
1181
+ const parts = line.trim().split(/\s+/);
1182
+ if (parts.length >= 2) {
1183
+ const rawName = parts[0];
1184
+ models.push({
1185
+ name: rawName.includes(":") ? rawName : `${rawName}:latest`,
1186
+ size: parts[1] || "unknown",
1187
+ });
1188
+ }
1189
+ }
1190
+ }
1191
+ } catch (_) {}
1192
+ }
1193
+ return { installed, running, version, models };
1194
+ }
1195
+
1196
+ // =========================================================================
1197
+ // Tier chain construction + rebalance (algorithmic path)
1198
+ // =========================================================================
1199
+
1200
+ function buildTierChains(modelCache, providerAliases) {
1201
+ const chains = { reasoning: [], balanced: [], fast: [] };
1202
+ for (const [provider, modelMap] of Object.entries(modelCache.byId || {})) {
1203
+ if (!modelMap || modelMap.size === 0) continue;
1204
+ if (provider === "local") continue;
1205
+ const scored = [];
1206
+ for (const [id, meta] of modelMap) {
1207
+ scored.push({
1208
+ model: `${provider}/${id}`,
1209
+ score: scoreModel(`${provider}/${id}`, null, meta),
1210
+ variant: null,
1211
+ });
1212
+ }
1213
+ if (scored.length === 0) continue;
1214
+ scored.sort((a, b) => b.score - a.score);
1215
+ const n = scored.length;
1216
+ chains.reasoning.push(scored[0]);
1217
+ chains.balanced.push(scored[n <= 2 ? 0 : Math.floor((n - 1) / 2)]);
1218
+ chains.fast.push(scored[n - 1]);
1219
+ }
1220
+ for (const tier of QUALITY_TIERS)
1221
+ chains[tier].sort((a, b) => b.score - a.score);
1222
+ return chains;
1223
+ }
1224
+
1225
+ function fbToString(fb) {
1226
+ return typeof fb === "string"
1227
+ ? fb
1228
+ : fb.model + (fb.variant ? ":" + fb.variant : "");
1229
+ }
1230
+
1231
+ function applyTierChain(entry, tierChain) {
1232
+ if (!tierChain || tierChain.length === 0) return { changed: false };
1233
+ const newModel = tierChain[0].model;
1234
+ const newVariant = tierChain[0].variant || null;
1235
+ const newFallbacks = tierChain.slice(1).map((c) => {
1236
+ const fb = { model: c.model };
1237
+ if (c.variant) fb.variant = c.variant;
1238
+ return fb;
1239
+ });
1240
+ const oldModelStr = entry.model + (entry.variant ? ":" + entry.variant : "");
1241
+ const newModelStr = newModel + (newVariant ? ":" + newVariant : "");
1242
+ const oldFbStr = (entry.fallback_models || []).map(fbToString).join(",");
1243
+ const newFbStr = newFallbacks.map(fbToString).join(",");
1244
+ if (oldModelStr === newModelStr && oldFbStr === newFbStr)
1245
+ return { changed: false };
1246
+ entry.model = newModel;
1247
+ if (newVariant) entry.variant = newVariant;
1248
+ else delete entry.variant;
1249
+ if (newFallbacks.length > 0) entry.fallback_models = newFallbacks;
1250
+ else delete entry.fallback_models;
1251
+ return { changed: true };
1252
+ }
1253
+
1254
+ function findModelInCache(providerKey, modelID, aliases, lookup) {
1255
+ const realProvider = resolveProvider(providerKey, aliases);
1256
+ const modelMap = lookup.byId[realProvider];
1257
+ if (!modelMap) return null;
1258
+ return (
1259
+ modelMap.get(modelID) ||
1260
+ modelMap.get(`${providerKey}/${modelID}`) ||
1261
+ modelMap.get(`${realProvider}/${modelID}`) ||
1262
+ null
1263
+ );
1264
+ }
1265
+
1266
+ function rebalanceEntry(entry, options) {
1267
+ if (!entry || typeof entry !== "object") return { changed: false };
1268
+ const {
1269
+ tierChains,
1270
+ withoutFree,
1271
+ unavailableModels,
1272
+ providerAliases,
1273
+ modelCache,
1274
+ } = options;
1275
+ if (tierChains) {
1276
+ const quality = entry.model_quality || "balanced";
1277
+ const tier = tierChains[quality];
1278
+ if (tier && tier.length > 0) {
1279
+ let chain = tier;
1280
+ if (withoutFree)
1281
+ chain = tier.filter(
1282
+ (c) => !FREE_PROVIDERS.includes(c.model.split("/")[0]),
1283
+ );
1284
+ if (chain.length === 0) return { changed: false };
1285
+ const r = applyTierChain(entry, chain);
1286
+ if (r.changed) return { changed: true, reason: `tier: ${quality}` };
1287
+ return r;
1288
+ }
1289
+ }
1290
+ const refs = [];
1291
+ if (entry.model)
1292
+ refs.push({ model: entry.model, variant: entry.variant || null });
1293
+ if (entry.fallback_models) {
1294
+ for (const fb of entry.fallback_models)
1295
+ refs.push({ model: fb.model, variant: fb.variant || null });
1296
+ }
1297
+ if (refs.length === 0) return { changed: false };
1298
+ if (unavailableModels && unavailableModels.size > 0) {
1299
+ const filtered = refs.filter((r) => !unavailableModels.has(r.model));
1300
+ if (filtered.length === 0) {
1301
+ delete entry.model;
1302
+ delete entry.variant;
1303
+ delete entry.fallback_models;
1304
+ return { changed: true, reason: "all models unavailable" };
1305
+ }
1306
+ refs.length = 0;
1307
+ refs.push(...filtered);
1308
+ }
1309
+ const bestPerProvider = {};
1310
+ for (const ref of refs) {
1311
+ const providerKey = ref.model.split("/")[0];
1312
+ const modelPart = ref.model.slice(ref.model.indexOf("/") + 1);
1313
+ const realProvider = resolveProvider(providerKey, aliases);
1314
+ const cacheEntry = findModelInCache(
1315
+ providerKey,
1316
+ modelPart,
1317
+ aliases,
1318
+ modelCache,
1319
+ );
1320
+ const rank = scoreModel(ref.model, ref.variant, cacheEntry);
1321
+ if (
1322
+ !bestPerProvider[realProvider] ||
1323
+ rank > bestPerProvider[realProvider].rank
1324
+ ) {
1325
+ bestPerProvider[realProvider] = { ...ref, provider: providerKey, rank };
1326
+ }
1327
+ }
1328
+ let candidates = Object.values(bestPerProvider);
1329
+ if (withoutFree)
1330
+ candidates = candidates.filter((c) => !FREE_PROVIDERS.includes(c.provider));
1331
+ if (candidates.length === 0) return { changed: false };
1332
+ candidates.sort((a, b) => b.rank - a.rank);
1333
+ const newModel = candidates[0].model;
1334
+ const newVariant = candidates[0].variant;
1335
+ const newFallbacks = candidates.slice(1).map((c) => {
1336
+ const fb = { model: c.model };
1337
+ if (c.variant) fb.variant = c.variant;
1338
+ return fb;
1339
+ });
1340
+ const oldModelStr = entry.model + (entry.variant ? ":" + entry.variant : "");
1341
+ const newModelStr = newModel + (newVariant ? ":" + newVariant : "");
1342
+ const oldFbStr = (entry.fallback_models || []).map(fbToString).join(",");
1343
+ const newFbStr = newFallbacks.map(fbToString).join(",");
1344
+ if (oldModelStr === newModelStr && oldFbStr === newFbStr)
1345
+ return { changed: false };
1346
+ entry.model = newModel;
1347
+ if (newVariant) entry.variant = newVariant;
1348
+ else delete entry.variant;
1349
+ if (newFallbacks.length > 0) entry.fallback_models = newFallbacks;
1350
+ else delete entry.fallback_models;
1351
+ return { changed: true };
1352
+ }
1353
+
1354
+ function rebalanceConfig(config, options) {
1355
+ const changes = [];
1356
+ for (const [name, agent] of Object.entries(config.agents || {})) {
1357
+ if (agent.model || agent.fallback_models) {
1358
+ const r = rebalanceEntry(agent, options);
1359
+ if (r.changed)
1360
+ changes.push(`agents.${name}${r.reason ? ` — ${r.reason}` : ""}`);
1361
+ }
1362
+ }
1363
+ for (const [name, cat] of Object.entries(config.categories || {})) {
1364
+ if (cat.model || cat.fallback_models) {
1365
+ const r = rebalanceEntry(cat, options);
1366
+ if (r.changed)
1367
+ changes.push(`categories.${name}${r.reason ? ` — ${r.reason}` : ""}`);
1368
+ }
1369
+ }
1370
+ return changes;
1371
+ }
1372
+
1373
+ // =========================================================================
1374
+ // Display functions
1375
+ // =========================================================================
1376
+
1377
+ function showCloudRecommendations(aiResult, config) {
1378
+ if (
1379
+ !aiResult.cloudRecommendations ||
1380
+ aiResult.cloudRecommendations.length === 0
1381
+ )
1382
+ return [];
1383
+ const changes = [];
1384
+ for (const rec of aiResult.cloudRecommendations) {
1385
+ const section = config.agents?.[rec.name] || config.categories?.[rec.name];
1386
+ if (!section) continue;
1387
+
1388
+ const pathPrefix = rec.type === "category" ? "categories" : "agents";
1389
+
1390
+ // Current config values
1391
+ const prevModel = section.model;
1392
+ let prevModelAvailable = false;
1393
+ if (prevModel) {
1394
+ const parts = prevModel.split("/");
1395
+ const prevProv = parts[0];
1396
+ if (isProviderAvailable(prevProv)) {
1397
+ prevModelAvailable = true;
1398
+ }
1399
+ }
1400
+
1401
+ // New recommendations (already normalized to rec.model / rec.fallback_models)
1402
+ const newModel =
1403
+ rec.model && rec.model.provider && rec.model.model
1404
+ ? `${rec.model.provider}/${rec.model.model}`
1405
+ : (prevModelAvailable ? prevModel : null);
1406
+ const newFallbacks = (rec.fallback_models || [])
1407
+ .filter((r) => r.provider && r.model)
1408
+ .map((r) => `${r.provider}/${r.model}`);
1409
+ const newRouting = (rec.routing || [])
1410
+ .filter((r) => r.provider && r.model)
1411
+ .map((r) => `${r.provider}/${r.model}`);
1412
+
1413
+ const modelChanged = (prevModel || null) !== newModel;
1414
+ const hasPrevConfig =
1415
+ prevModel ||
1416
+ (section.fallback_models && section.fallback_models.length > 0);
1417
+ const anyChanged =
1418
+ modelChanged || newFallbacks.length > 0 || newRouting.length > 0;
1419
+
1420
+ // Skip agents with no current config AND no recommendation — pure noise
1421
+ if (!anyChanged && !hasPrevConfig) continue;
1422
+
1423
+ if (anyChanged) changes.push({ section, name: rec.name });
1424
+
1425
+ // Bulleted preview block
1426
+ console.log(` \u2022 ${pathPrefix}.${rec.name}`);
1427
+ console.log(` model: ${newModel || ""}`);
1428
+ if (newRouting.length > 0)
1429
+ console.log(` routing: ${newRouting.join(", ")}`);
1430
+ if (newFallbacks.length > 0)
1431
+ console.log(` fallback_models: ${newFallbacks.join(", ")}`);
1432
+ console.log();
1433
+ }
1434
+ return changes;
1435
+ }
1436
+
1437
+ function showLocalDecisions(aiResult, allLocalModels, ollama) {
1438
+ if (!aiResult.localModels) return;
1439
+ const { decisions, placements } = aiResult.localModels;
1440
+
1441
+ if (decisions && decisions.length > 0) {
1442
+ const installs = decisions.filter((d) => d.action === "install");
1443
+ const keeps = decisions.filter((d) => d.action === "keep");
1444
+ const uninstalls = decisions.filter((d) => d.action === "uninstall");
1445
+ const skips = decisions.filter((d) => d.action === "skip");
1446
+
1447
+ // Placements first
1448
+ if (placements && placements.length > 0) {
1449
+ console.log(
1450
+ `\n── AI: Local config placements (${placements.length}) ────────`,
1451
+ );
1452
+ for (const p of placements) {
1453
+ console.log(
1454
+ ` \u2022 ${p.modelName} \u2192 ${p.agentName} (${p.role})`,
1455
+ );
1456
+ if (p.justification)
1457
+ console.log(` Justification: ${p.justification}`);
1458
+ }
1459
+ }
1460
+
1461
+ if (installs.length > 0) {
1462
+ console.log(
1463
+ `\n── AI: Install (${installs.length}) ────────────────────────`,
1464
+ );
1465
+ for (const d of installs) {
1466
+ const m = allLocalModels.find((x) => x.name === d.name);
1467
+ const extra = m
1468
+ ? ` (${m.size}, ${m.vram} GB VRAM, score ${m.score})`
1469
+ : "";
1470
+ console.log(` \u2022 ${d.name}${extra}`);
1471
+ console.log(` ${d.rationale}`);
1472
+ }
1473
+ }
1474
+ if (keeps.length > 0) {
1475
+ console.log(`\n── AI: Keep (${keeps.length}) ──────────────────────────`);
1476
+ for (const d of keeps) {
1477
+ const m = allLocalModels.find((x) => x.name === d.name);
1478
+ const extra = m
1479
+ ? ` (${m.size}, ${m.vram} GB VRAM, score ${m.score})`
1480
+ : "";
1481
+ console.log(` \u2022 ${d.name}${extra}`);
1482
+ console.log(` ${d.rationale}`);
1483
+ }
1484
+ }
1485
+ if (uninstalls.length > 0) {
1486
+ console.log(
1487
+ `\n── AI: Uninstall (${uninstalls.length}) ────────────────────`,
1488
+ );
1489
+ for (const d of uninstalls) {
1490
+ const m = allLocalModels.find((x) => x.name === d.name);
1491
+ const extra = m
1492
+ ? ` (${m.size}, ${m.vram} GB VRAM, score ${m.score})`
1493
+ : "";
1494
+ const isInstalled = ollama.models.some((x) => x.name === d.name);
1495
+ console.log(` \u2022 ${d.name}${extra}`);
1496
+ console.log(` ${d.rationale}`);
1497
+ if (!isInstalled) console.log(` (not installed, nothing to remove)`);
1498
+ }
1499
+ }
1500
+ if (skips.length > 0) {
1501
+ console.log(`\n── AI: Skip (${skips.length}) ──────────────────────────`);
1502
+ for (const d of skips) {
1503
+ const m = allLocalModels.find((x) => x.name === d.name);
1504
+ const extra = m
1505
+ ? ` (${m.size}, ${m.vram} GB VRAM, score ${m.score})`
1506
+ : "";
1507
+ console.log(` \u2022 ${d.name}${extra}`);
1508
+ console.log(` ${d.rationale}`);
1509
+ }
1510
+ }
1511
+ }
1512
+ }
1513
+
1514
+ // =========================================================================
1515
+ // CLI agent discovery — optional panel members
1516
+ // =========================================================================
1517
+
1518
+ /**
1519
+ * Discover AI CLI agents available on PATH that can be queried for
1520
+ * recommendations alongside panel models. Returns an array of
1521
+ * { ref, call } where ref is like "cli/codex" and call is
1522
+ * an async function(prompt) => parsed JSON or null.
1523
+ */
1524
+ function discoverCliModels(config = {}) {
1525
+ const agents = [];
1526
+
1527
+ function parseCliJson(raw) {
1528
+ const jsonMatch = String(raw || "").match(/\{[\s\S]*\}/);
1529
+ if (!jsonMatch) return null;
1530
+ return JSON.parse(jsonMatch[0]);
1531
+ }
1532
+
1533
+ function renderCliCommand(command, prompt) {
1534
+ if (Array.isArray(command)) {
1535
+ const rendered = command.map((part) =>
1536
+ String(part).replaceAll("{prompt}", prompt),
1537
+ );
1538
+ if (!rendered.some((part) => part.includes(prompt))) rendered.push(prompt);
1539
+ return rendered;
1540
+ }
1541
+ const promptArg = JSON.stringify(prompt);
1542
+ const raw = String(command || "").trim();
1543
+ if (!raw) return "";
1544
+ return raw.includes("{prompt}")
1545
+ ? raw.replaceAll("{prompt}", promptArg)
1546
+ : `${raw} ${promptArg}`;
1547
+ }
1548
+
1549
+ function inferCliBinary(entry, id) {
1550
+ if (entry?.binary) return String(entry.binary).trim();
1551
+ const command = entry?.command;
1552
+ if (Array.isArray(command)) return String(command[0] || id).trim();
1553
+ return String(command || id).trim().split(/\s+/)[0];
1554
+ }
1555
+
1556
+ const cliAdapters = [
1557
+ {
1558
+ binary: "codex",
1559
+ ref: "cli/codex",
1560
+ command: (prompt) =>
1561
+ [
1562
+ "codex",
1563
+ "run",
1564
+ "--model",
1565
+ "opencode/big-pickle",
1566
+ "--preset",
1567
+ "json",
1568
+ "--dangerously-skip-permissions",
1569
+ prompt,
1570
+ ],
1571
+ },
1572
+ {
1573
+ binary: "agy",
1574
+ ref: "cli/agy",
1575
+ command: (prompt) =>
1576
+ ["agy", "--print", "--dangerously-skip-permissions", prompt],
1577
+ },
1578
+ ];
1579
+
1580
+ const configuredAgents = config?.omo?.panel_cli_agents;
1581
+ if (configuredAgents && typeof configuredAgents === "object") {
1582
+ for (const [name, entry] of Object.entries(configuredAgents)) {
1583
+ const id = String(entry?.id || name || "").trim();
1584
+ const command = entry?.command;
1585
+ if (!id || !command) continue;
1586
+ cliAdapters.push({
1587
+ binary: inferCliBinary(entry, id),
1588
+ ref: `cli/${id.replace(/^cli\//, "")}`,
1589
+ command: (prompt) => renderCliCommand(command, prompt),
1590
+ });
1591
+ }
1592
+ }
1593
+
1594
+ // CLI agents on PATH can participate alongside provider-backed models.
1595
+ for (const adapter of cliAdapters) {
1596
+ try {
1597
+ const which = commandExists(adapter.binary);
1598
+ if (which) {
1599
+ agents.push({
1600
+ ref: adapter.ref,
1601
+ call: async (prompt) => {
1602
+ try {
1603
+ const cmd = adapter.command(prompt);
1604
+ if (!cmd) return null;
1605
+ const options = {
1606
+ encoding: "utf-8",
1607
+ timeout: 120000,
1608
+ maxBuffer: 1024 * 1024,
1609
+ stdio: ["ignore", "pipe", "pipe"],
1610
+ env: { ...process.env, TERM: "dumb" },
1611
+ };
1612
+ const raw = Array.isArray(cmd)
1613
+ ? execFileSync(cmd[0], cmd.slice(1), options)
1614
+ : execSync(cmd, { ...options, shell: "/bin/sh" });
1615
+ return parseCliJson(raw);
1616
+ } catch {}
1617
+ return null;
1618
+ },
1619
+ });
1620
+ }
1621
+ } catch {
1622
+ /* not found */
1623
+ }
1624
+ }
1625
+
1626
+ // Try workshop discovery script if available
1627
+ try {
1628
+ const workshop = commandExists("tools/workshop-shell");
1629
+ if (workshop) {
1630
+ // Discovery would parse the script output and add more agents
1631
+ }
1632
+ } catch {
1633
+ /* not available */
1634
+ }
1635
+
1636
+ return agents;
1637
+ }
1638
+
1639
+ // =========================================================================
1640
+ // AI Panel — multi-model consensus & debate
1641
+ // =========================================================================
1642
+ const PANEL_MODEL_TIMEOUT_SECONDS = Math.max(
1643
+ 120,
1644
+ Number.parseInt(process.env.OMO_PANEL_MODEL_TIMEOUT_SECONDS || "180", 10) ||
1645
+ 180,
1646
+ );
1647
+
1648
+ const PANEL_FIRST_BYTE_TIMEOUT_SECONDS = Math.max(
1649
+ 60,
1650
+ Number.parseInt(
1651
+ process.env.OMO_PANEL_FIRST_BYTE_TIMEOUT_SECONDS ||
1652
+ String(PANEL_MODEL_TIMEOUT_SECONDS),
1653
+ 10,
1654
+ ) || PANEL_MODEL_TIMEOUT_SECONDS,
1655
+ );
1656
+
1657
+ function killChild(child) {
1658
+ if (child.exitCode === null && child.signalCode === null)
1659
+ child.kill("SIGTERM");
1660
+ setTimeout(() => {
1661
+ if (child.exitCode === null && child.signalCode === null)
1662
+ child.kill("SIGKILL");
1663
+ }, 5000);
1664
+ }
1665
+
1666
+ function compactErrorText(text) {
1667
+ return (text || "")
1668
+ .replace(/\x1b\[[0-9;]*m/g, "")
1669
+ .split("\n")
1670
+ .map((line) => line.trim())
1671
+ .filter(Boolean)
1672
+ .slice(-3)
1673
+ .join(" ");
1674
+ }
1675
+
1676
+ function parseRetryAfterSeconds(text) {
1677
+ const raw = String(text || "");
1678
+ const numericPatterns = [
1679
+ /retry-after["']?\s*[:=]\s*["']?(\d+)/i,
1680
+ /retry_after["']?\s*[:=]\s*["']?(\d+)/i,
1681
+ /retryAfter["']?\s*[:=]\s*["']?(\d+)/i,
1682
+ /x-ratelimit-reset["']?\s*[:=]\s*["']?(\d+)/i,
1683
+ /x-rate-limit-reset["']?\s*[:=]\s*["']?(\d+)/i,
1684
+ ];
1685
+ for (const pattern of numericPatterns) {
1686
+ const match = raw.match(pattern);
1687
+ if (!match) continue;
1688
+ const value = Number.parseInt(match[1], 10);
1689
+ if (!Number.isFinite(value)) continue;
1690
+ if (pattern.source.includes("reset") && value > 1000000000) {
1691
+ return Math.max(1, value - Math.floor(Date.now() / 1000));
1692
+ }
1693
+ return Math.max(1, value);
1694
+ }
1695
+
1696
+ const dateMatch = raw.match(/retry-after["']?\s*[:=]\s*["']?([^"'\r\n]+)/i);
1697
+ if (dateMatch) {
1698
+ const ts = Date.parse(dateMatch[1].trim());
1699
+ if (Number.isFinite(ts))
1700
+ return Math.max(1, Math.ceil((ts - Date.now()) / 1000));
1701
+ }
1702
+ return null;
1703
+ }
1704
+
1705
+ function extractOpencodeText(stdout) {
1706
+ const texts = [];
1707
+ for (const line of stdout.trim().split("\n")) {
1708
+ try {
1709
+ const evt = JSON.parse(line);
1710
+ if (evt.type === "text" && evt.part && evt.part.text) {
1711
+ texts.push(evt.part.text);
1712
+ }
1713
+ } catch (_) {}
1714
+ }
1715
+ return texts.join("") || null;
1716
+ }
1717
+
1718
+ function callPanelModelAsync(model, prompt, signal, statusRef) {
1719
+ return new Promise((resolve) => {
1720
+ const tempDir = os.tmpdir();
1721
+ const child = spawnTracked(
1722
+ "opencode",
1723
+ [
1724
+ "run",
1725
+ "--pure",
1726
+ "--agent",
1727
+ "summary",
1728
+ "--dir",
1729
+ tempDir,
1730
+ "--format",
1731
+ "json",
1732
+ "--model",
1733
+ model,
1734
+ "--dangerously-skip-permissions",
1735
+ prompt,
1736
+ ],
1737
+ {
1738
+ cwd: tempDir,
1739
+ env: {
1740
+ ...process.env,
1741
+ PWD: tempDir,
1742
+ INIT_CWD: tempDir,
1743
+ TERM: "dumb",
1744
+ },
1745
+ stdio: ["ignore", "pipe", "pipe"],
1746
+ },
1747
+ );
1748
+
1749
+ let stdout = "";
1750
+ let stderr = "";
1751
+ let timersCleaned = false;
1752
+ const cleanupTimers = () => {
1753
+ if (timersCleaned) return;
1754
+ timersCleaned = true;
1755
+ clearTimeout(firstByteTimer);
1756
+ clearTimeout(totalTimer);
1757
+ };
1758
+
1759
+ const firstByteTimer = setTimeout(() => {
1760
+ if (statusRef) statusRef.failReason = "first-byte-timeout";
1761
+ killChild(child);
1762
+ }, PANEL_FIRST_BYTE_TIMEOUT_SECONDS * 1000);
1763
+
1764
+ const totalTimer = setTimeout(() => {
1765
+ if (statusRef) statusRef.failReason = "total-timeout";
1766
+ killChild(child);
1767
+ }, PANEL_MODEL_TIMEOUT_SECONDS * 1000);
1768
+
1769
+ let firstByteReceived = false;
1770
+ child.stdout.on("data", (data) => {
1771
+ if (!firstByteReceived) {
1772
+ firstByteReceived = true;
1773
+ clearTimeout(firstByteTimer);
1774
+ }
1775
+ stdout += data.toString();
1776
+ if (statusRef) {
1777
+ statusRef.phase = "receiving";
1778
+ statusRef.bytes = stdout.length;
1779
+ }
1780
+ });
1781
+
1782
+ child.stderr.on("data", (data) => {
1783
+ stderr += data.toString();
1784
+ if (statusRef) statusRef.stderr = compactErrorText(stderr);
1785
+ });
1786
+
1787
+ child.on("error", (err) => {
1788
+ cleanupTimers();
1789
+ if (statusRef) {
1790
+ statusRef.failReason = "spawn-error";
1791
+ statusRef.stderr = err.message;
1792
+ }
1793
+ resolve(null);
1794
+ });
1795
+
1796
+ child.on("close", (code) => {
1797
+ cleanupTimers();
1798
+ if (signal && signal.aborted) {
1799
+ if (statusRef) statusRef.failReason = "aborted";
1800
+ return resolve(null);
1801
+ }
1802
+
1803
+ // Detect quota / billing errors from stdout/stderr or exit code
1804
+ if (statusRef) {
1805
+ const rawError = stderr + "\n" + stdout;
1806
+ const lower = rawError.toLowerCase();
1807
+ if (
1808
+ code === 402 ||
1809
+ lower.includes("402") ||
1810
+ lower.includes("payment required") ||
1811
+ lower.includes("payment_required") ||
1812
+ lower.includes("quota exceeded") ||
1813
+ lower.includes("quota_exceeded") ||
1814
+ lower.includes("billing limit") ||
1815
+ lower.includes("billing_limit") ||
1816
+ lower.includes("credit limit") ||
1817
+ lower.includes("credit_limit") ||
1818
+ lower.includes("insufficient funds") ||
1819
+ lower.includes("insufficient_funds") ||
1820
+ lower.includes("usage limit") ||
1821
+ lower.includes("budget exceeded")
1822
+ ) {
1823
+ statusRef.quotaExceeded = true;
1824
+ statusRef.creditExhausted = true;
1825
+ if (!statusRef.failReason) statusRef.failReason = "quota-exceeded";
1826
+ statusRef.stderr = compactErrorText(stderr || stdout);
1827
+ }
1828
+ }
1829
+
1830
+ // Detect rate limiting from stdout/stderr text
1831
+ if (statusRef) {
1832
+ const rawError = stderr + "\n" + stdout;
1833
+ const lower = rawError.toLowerCase();
1834
+ if (
1835
+ lower.includes("429") ||
1836
+ lower.includes("rate limit") ||
1837
+ lower.includes("too many requests")
1838
+ ) {
1839
+ statusRef.rateLimited = true;
1840
+ statusRef.retryAfter =
1841
+ parseRetryAfterSeconds(rawError) || statusRef.retryAfter || 15;
1842
+ if (!statusRef.failReason) statusRef.failReason = "rate-limited";
1843
+ statusRef.stderr = compactErrorText(stderr || stdout);
1844
+ }
1845
+ }
1846
+
1847
+ if (!stdout) {
1848
+ if (statusRef) {
1849
+ if (!statusRef.failReason)
1850
+ statusRef.failReason = code ? "process-error" : "empty-response";
1851
+ if (!statusRef.stderr) statusRef.stderr = compactErrorText(stderr);
1852
+ }
1853
+ return resolve(null);
1854
+ }
1855
+ const text = extractOpencodeText(stdout);
1856
+ if (!text && statusRef) {
1857
+ statusRef.failReason = "empty-text";
1858
+ statusRef.stderr = compactErrorText(stderr);
1859
+ }
1860
+ resolve(text);
1861
+ });
1862
+
1863
+ if (signal) {
1864
+ signal.addEventListener(
1865
+ "abort",
1866
+ () => {
1867
+ cleanupTimers();
1868
+ if (statusRef) statusRef.failReason = "aborted";
1869
+ killChild(child);
1870
+ },
1871
+ { once: true },
1872
+ );
1873
+ }
1874
+ });
1875
+ }
1876
+
1877
+ /**
1878
+ * Try to parse AI text as JSON with the expected schema.
1879
+ * Strips markdown fences and attempts JSON.parse.
1880
+ * Returns null on failure (no throw).
1881
+ */
1882
+ /**
1883
+ * When raw AI output isn't valid JSON, ask another model to clean it.
1884
+ * Returns the cleaned text or null.
1885
+ */
1886
+ async function cleanAiResponse(raw) {
1887
+ const models = discoverFreeModels();
1888
+ if (models.length === 0) return null;
1889
+ const model = models[0];
1890
+ const prompt = [
1891
+ "Extract ONLY the JSON object from the text below.",
1892
+ "If there are multiple JSON objects return the LARGEST one.",
1893
+ "Return valid JSON and nothing else. No markdown fences. No explanation.",
1894
+ "",
1895
+ raw,
1896
+ ].join("\n");
1897
+ try {
1898
+ const result = await callPanelModelAsync(model, prompt, null, {});
1899
+ return result || null;
1900
+ } catch (_) {
1901
+ return null;
1902
+ }
1903
+ }
1904
+
1905
+ /**
1906
+ * Call one free model for a per-agent prompt, returning a single
1907
+ * recommendation entry or null on failure. Retries on 429 rate-limit
1908
+ * with exponential backoff respecting Retry-After headers.
1909
+ */
1910
+ async function callModelForAgent(
1911
+ model,
1912
+ prompt,
1913
+ signal,
1914
+ statusRef,
1915
+ cliModels,
1916
+ agentName,
1917
+ maxRetries = 3,
1918
+ ) {
1919
+ // CLI agents use a different dispatch path
1920
+ if (model.startsWith("cli/")) {
1921
+ const cliAgent = (cliModels || []).find((a) => a.ref === model);
1922
+ if (cliAgent) {
1923
+ const parsed = await cliAgent.call(prompt);
1924
+ if (!parsed) return null;
1925
+ if (parsed && !parsed.name && agentName) {
1926
+ parsed.name = agentName;
1927
+ }
1928
+ const rec = normalizeAgentRec(parsed);
1929
+ if (!rec || !rec.name || (rec.model !== null && (!rec.model.provider || !rec.model.model))) return null;
1930
+ return rec;
1931
+ }
1932
+ return null;
1933
+ }
1934
+
1935
+ const provider = model.split("/")[0];
1936
+ if (!isProviderAvailable(provider)) {
1937
+ if (statusRef) {
1938
+ const state = providerAvailability.get(provider);
1939
+ statusRef.failReason = state?.creditExhausted
1940
+ ? "quota-exceeded"
1941
+ : "rate-limited";
1942
+ statusRef.stderr = `Skipped: provider unavailable (${statusRef.failReason})`;
1943
+ }
1944
+ return null;
1945
+ }
1946
+
1947
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1948
+ const raw = await callPanelModelAsync(model, prompt, signal, statusRef);
1949
+ if (raw) {
1950
+ // Got a response — parse and return
1951
+ let parsed = null;
1952
+ try {
1953
+ parsed = parseAiJson(raw);
1954
+ } catch (_) {}
1955
+ if (parsed && !parsed.name && agentName) {
1956
+ parsed.name = agentName;
1957
+ }
1958
+ if (!parsed || !parsed.name || (parsed.model !== null && parsed.model !== undefined && (!parsed.model.provider || !parsed.model.model))) {
1959
+ const cleaned = await cleanAiResponse(raw);
1960
+ if (cleaned) {
1961
+ try {
1962
+ parsed = parseAiJson(cleaned);
1963
+ } catch (_) {}
1964
+ }
1965
+ }
1966
+ if (parsed && !parsed.name && agentName) {
1967
+ parsed.name = agentName;
1968
+ }
1969
+ if (!parsed) return null;
1970
+ const rec = normalizeAgentRec(parsed);
1971
+ if (!rec || !rec.name || (rec.model !== null && (!rec.model.provider || !rec.model.model))) return null;
1972
+ return rec;
1973
+ }
1974
+
1975
+ if (statusRef?.quotaExceeded) {
1976
+ markProviderCreditExhausted(provider, statusRef.failReason);
1977
+ return null;
1978
+ }
1979
+
1980
+ // Non-rate-limit failure — don't retry
1981
+ if (!statusRef?.rateLimited) return null;
1982
+
1983
+ // Rate-limited — use Retry-After if the server specified one, else a fixed interval
1984
+ const delay = statusRef.retryAfter || 15;
1985
+ markProviderRateLimited(provider, delay, statusRef.failReason);
1986
+
1987
+ if (statusRef) {
1988
+ statusRef.failReason = `rate-limited retry ${attempt}/${maxRetries} (${delay}s)`;
1989
+ }
1990
+
1991
+ await new Promise((resolve) => setTimeout(resolve, delay * 1000));
1992
+
1993
+ // Reset rate-limit flags for next attempt (each retry is a fresh request)
1994
+ if (statusRef) {
1995
+ statusRef.rateLimited = false;
1996
+ statusRef.retryAfter = null;
1997
+ statusRef.failReason = null;
1998
+ }
1999
+ }
2000
+ return null;
2001
+ }
2002
+
2003
+ /**
2004
+ * Build a compact per-agent prompt showing the entry, available cloud
2005
+ * models (pre-deduplicated by family), and VRAM-fitting local models.
2006
+ */
2007
+ function buildAgentPrompt(entry, cloudLookup, allLocalModels, gpu, ollama) {
2008
+ const vramAvail = usableLocalVramGb(gpu);
2009
+ const quality = entry.section.model_quality || "balanced";
2010
+ const desc = entry.section.description || "";
2011
+ const currentModel = entry.section.model || "";
2012
+
2013
+ // Helper to check if a model is free (by ref string naming or metadata cost === 0)
2014
+ const isModelFree = (ref, meta) => {
2015
+ const lowerRef = ref.toLowerCase();
2016
+ if (
2017
+ lowerRef.includes("-free") ||
2018
+ lowerRef.endsWith(":free") ||
2019
+ lowerRef.includes("/free-") ||
2020
+ lowerRef.includes("-free-") ||
2021
+ lowerRef.endsWith("/free")
2022
+ ) {
2023
+ return true;
2024
+ }
2025
+ if (meta) {
2026
+ const cost = meta.cost !== undefined ? meta.cost : (meta.input_price !== undefined ? meta.input_price : null);
2027
+ if (cost !== null && cost !== undefined && cost !== "" && Number(cost) === 0) {
2028
+ return true;
2029
+ }
2030
+ }
2031
+ return false;
2032
+ };
2033
+
2034
+ // 1. Get all available providers (not credit exhausted, not rate limited)
2035
+ const availableProviders = Object.keys(cloudLookup.byId || {}).filter(
2036
+ (provider) => provider !== "local" && isProviderAvailable(provider)
2037
+ );
2038
+
2039
+ // 2. Get the top 10 models for each available provider, plus any free models
2040
+ const candidateModels = [];
2041
+ for (const provider of availableProviders) {
2042
+ const modelMap = cloudLookup.byId[provider] || new Map();
2043
+ const scored = [];
2044
+ for (const [id, meta] of modelMap.entries()) {
2045
+ const ref = `${provider}/${id}`;
2046
+ const score = Math.round(scoreModel(ref, null, meta));
2047
+ const free = isModelFree(ref, meta);
2048
+ scored.push({
2049
+ ref,
2050
+ provider,
2051
+ modelId: id,
2052
+ score,
2053
+ free,
2054
+ });
2055
+ }
2056
+ // Sort by score descending
2057
+ scored.sort((a, b) => b.score - a.score);
2058
+
2059
+ // Take top 10
2060
+ const top10 = scored.slice(0, 10);
2061
+ const top10Refs = new Set(top10.map((m) => m.ref));
2062
+
2063
+ const providerCandidates = [...top10];
2064
+ for (const m of scored) {
2065
+ if (m.free && !top10Refs.has(m.ref)) {
2066
+ providerCandidates.push(m);
2067
+ }
2068
+ }
2069
+ candidateModels.push(...providerCandidates);
2070
+ }
2071
+
2072
+ // Sort all candidate models globally by score descending
2073
+ candidateModels.sort((a, b) => b.score - a.score);
2074
+
2075
+ const FAMILY_LIMITS = {
2076
+ "claude-opus": 1,
2077
+ "claude-sonnet": 1,
2078
+ "claude-haiku": 1,
2079
+ "gpt-pro": 1,
2080
+ "gpt-mini": 1,
2081
+ codex: 1,
2082
+ "gemini-pro": 1,
2083
+ "gemini-flash": 1,
2084
+ "google-gemma": 1,
2085
+ "groq-llama": 1,
2086
+ "groq-gpt-oss": 1,
2087
+ "groq-qwen": 1,
2088
+ "deepseek-chat": 1,
2089
+ "deepseek-reasoner": 1,
2090
+ "deepseek-pro": 1,
2091
+ "opencode-mimo": 1,
2092
+ "opencode-nemotron": 1,
2093
+ "opencode-deepseek": 1,
2094
+ "opencode-north": 1,
2095
+ "opencode-big-pickle": 1,
2096
+ };
2097
+
2098
+ const familyKey = (ref) => {
2099
+ const lower = ref.toLowerCase();
2100
+
2101
+ // Free opencode models
2102
+ if (lower.startsWith("opencode/")) {
2103
+ if (lower.includes("mimo")) return "opencode-mimo";
2104
+ if (lower.includes("nemotron")) return "opencode-nemotron";
2105
+ if (lower.includes("deepseek")) return "opencode-deepseek";
2106
+ if (lower.includes("north")) return "opencode-north";
2107
+ if (lower.includes("big-pickle")) return "opencode-big-pickle";
2108
+ }
2109
+
2110
+ // Claude
2111
+ if (lower.includes("claude-opus")) return "claude-opus";
2112
+ if (lower.includes("claude-sonnet")) return "claude-sonnet";
2113
+ if (lower.includes("claude-haiku")) return "claude-haiku";
2114
+
2115
+ // GPT
2116
+ if (lower.includes("gpt-5.5-pro") || lower.includes("gpt-5.5") || lower.includes("gpt-5.4")) {
2117
+ return "gpt-pro";
2118
+ }
2119
+ if (lower.includes("gpt-mini") || lower.includes("gpt-5-mini") || lower.includes("gpt-5.4-mini")) {
2120
+ return "gpt-mini";
2121
+ }
2122
+ if (lower.includes("gpt-5.3-codex") || lower.includes("codex")) {
2123
+ return "codex";
2124
+ }
2125
+
2126
+ // Gemini
2127
+ if (lower.includes("gemini-3.5-flash") || lower.includes("gemini-3-flash") || lower.includes("gemini-flash") || lower.includes("gemini-2.5-flash")) {
2128
+ return "gemini-flash";
2129
+ }
2130
+ if (lower.includes("gemini-2.5-pro") || lower.includes("gemini-3.1-pro") || lower.includes("gemini-3-pro") || lower.includes("gemini-pro")) {
2131
+ return "gemini-pro";
2132
+ }
2133
+
2134
+ // Gemma
2135
+ if (lower.includes("gemma")) return "google-gemma";
2136
+
2137
+ // Llama
2138
+ if (lower.includes("llama")) return "groq-llama";
2139
+
2140
+ // GPT OSS
2141
+ if (lower.includes("gpt-oss")) return "groq-gpt-oss";
2142
+
2143
+ // Qwen
2144
+ if (lower.includes("qwen")) return "groq-qwen";
2145
+
2146
+ // Deepseek paid
2147
+ if (lower.startsWith("deepseek/")) {
2148
+ if (lower.includes("reasoner")) return "deepseek-reasoner";
2149
+ if (lower.includes("pro")) return "deepseek-pro";
2150
+ if (lower.includes("chat") || lower.includes("flash")) return "deepseek-chat";
2151
+ }
2152
+
2153
+ return null;
2154
+ };
2155
+
2156
+ const seenModelIds = new Set();
2157
+ const counts = {};
2158
+ const deduped = [];
2159
+ for (const m of candidateModels) {
2160
+ const fam = familyKey(m.ref);
2161
+ if (!fam) continue;
2162
+
2163
+ // Deduplicate same models across multiple providers by model ID (e.g. gemini-2.5-pro)
2164
+ const normModelId = m.modelId.toLowerCase();
2165
+ if (seenModelIds.has(normModelId)) {
2166
+ continue;
2167
+ }
2168
+
2169
+ const limit = FAMILY_LIMITS[fam] || 1;
2170
+ const n = counts[fam] || 0;
2171
+ if (n < limit) {
2172
+ counts[fam] = n + 1;
2173
+ seenModelIds.add(normModelId);
2174
+ deduped.push(m);
2175
+ }
2176
+ }
2177
+ const providersTable = deduped.map((m) => `${m.ref} ${m.score}`).join("\n");
2178
+
2179
+ // Local models — fit VRAM, show install status
2180
+ const installedNames = installedLocalNameSet(ollama);
2181
+ const fitting = buildFittingModels(allLocalModels, gpu);
2182
+ const localTable = fitting
2183
+ .map(
2184
+ (m) =>
2185
+ `${m.name} v=${m.vram}GB s=${m.score} ${installedNames.has(m.name) ? "inst" : ""}`,
2186
+ )
2187
+ .join("\n");
2188
+
2189
+ return [
2190
+ "OUTPUT: valid JSON only. No markdown.",
2191
+ "",
2192
+ "SCHEMA:",
2193
+ "{",
2194
+ ' "name": str,',
2195
+ ' "type": "agent|category",',
2196
+ ' "profile": str,',
2197
+ ' "model": {"provider": str, "model": str, "reason": str},',
2198
+ ' "routing": [{"provider": str, "model": str, "reason": str}],',
2199
+ ' "fallback_models": [{"provider": str, "model": str, "reason": str}]',
2200
+ "}",
2201
+ "",
2202
+ `AGENT: ${entry.name} | ${entry.type} | ${quality} | cur=${currentModel || "-"} | ${desc}`,
2203
+ "",
2204
+ `HW: GPU=${gpu.label} VRAM=${gpu.vramGb}GB usable=${Math.round(vramAvail * 10) / 10}GB`,
2205
+ "",
2206
+ `CLOUD (${deduped.length}):`,
2207
+ providersTable || "-",
2208
+ "",
2209
+ `LOCAL (${fitting.length} fit VRAM):`,
2210
+ localTable || "-",
2211
+ "",
2212
+ "FIELDS: model=primary routing=delegation_pool fallback_models=retry_pool",
2213
+ "RULES:",
2214
+ "- Sort routing and fallback_models by score descending.",
2215
+ "- Paid/cloud as primary for reasoning/code agents.",
2216
+ "- Free model as fallback unless utility agent (explore/librarian/quick).",
2217
+ "- Prefer highest-scored cloud model for primary unless GPU requirements force local.",
2218
+ "- For utility agents (explore/librarian/quick), use highest-scored FREE cloud as primary.",
2219
+ "- For other agents, prioritize highest-scored paid/cloud model.",
2220
+ "- Place minimum 1 routing entry with highest-scored cloud (or local if only fit).",
2221
+ "- Fill routing pool with next highest-scored cloud models (paid > free).",
2222
+ "- The fallback_models pool must have 3 agents set, ordered by capability:",
2223
+ " * Slot 1 most closely matches the `model` key, in intelligence and token window.",
2224
+ " * Slot 2 is a highly available, fast mid-tier model.",
2225
+ " * Slot 3 is the cheapest, highest-rate-limit model.",
2226
+ "- Remove duplicate entries across model, routing, and fallback_models.",
2227
+ "",
2228
+ "SPECIFIC EXAMPLES:",
2229
+ "",
2230
+ "Case 1 - Reasoning agent (paid/cloud required as primary):",
2231
+ "{",
2232
+ ' "name": "metis",',
2233
+ ' "type": "agent",',
2234
+ ' "profile": "Pre-planning consultant for ambiguous requirements",',
2235
+ ' "model": {"provider": "github-copilot", "model": "claude-opus-4.8", "reason": "Paid cloud model as primary for reasoning/code agent per placement rules"},',
2236
+ ' "routing": [{"provider": "opencode", "model": "nemotron-3-ultra-free", "reason": "Highest-scored cloud free model for delegation pool"},',
2237
+ ' {"provider": "opencode", "model": "north-mini-code-free", "reason": "Second highest-scored cloud free model for delegation pool"}],',
2238
+ ' "fallback_models": [',
2239
+ ' {"provider": "opencode", "model": "gemini-2.5-flash", "reason": "Slot 1: Closely matches primary model in capability/context"},',
2240
+ ' {"provider": "opencode", "model": "mimo-v2.5-free", "reason": "Slot 2: Highly available, fast mid-tier model"},',
2241
+ ' {"provider": "opencode", "model": "north-mini-code-free", "reason": "Slot 3: Cheapest, highest-rate-limit model"}',
2242
+ ' ]',
2243
+ "}",
2244
+ "",
2245
+ "Case 2 - Utility agent (free cloud as primary):",
2246
+ "{",
2247
+ ' "name": "explore",',
2248
+ ' "type": "agent",',
2249
+ ' "profile": "Fast codebase exploration and pattern matching - very lightweight utility work",',
2250
+ ' "model": {"provider": "opencode", "model": "mimo-v2.5-free", "reason": "Free model suitable for lightweight exploration"},',
2251
+ ' "routing": [],',
2252
+ ' "fallback_models": []',
2253
+ "}",
2254
+ "",
2255
+ "Case 3 - Category (free cloud as primary):",
2256
+ "{",
2257
+ ' "name": "visual-engineering",',
2258
+ ' "type": "category",',
2259
+ ' "profile": "Frontend, UI/UX, design, styling, animation",',
2260
+ ' "model": {"provider": "opencode", "model": "nemotron-3-ultra-free", "reason": "Balanced performance for design and visual tasks"},',
2261
+ ' "routing": [],',
2262
+ ' "fallback_models": []',
2263
+ "}",
2264
+ "",
2265
+ "VALIDATION RULES:",
2266
+ "- All providers in routing and fallback_models must be 'opencode' (free) for free agents",
2267
+ "- No local models in routing arrays (only in fallback_models for local fallback)",
2268
+ 'Check the "FIELDS" section carefully: model=primary, routing=delegation_pool, fallback_models=retry_pool',
2269
+ ].join("\n");
2270
+ }
2271
+
2272
+ /**
2273
+ * Run async tasks with a concurrency limit. Each task is a zero-arg async
2274
+ * function. Resolves when all tasks complete.
2275
+ */
2276
+ async function runPool(tasks, concurrency) {
2277
+ const results = new Array(tasks.length);
2278
+ let next = 0;
2279
+ const worker = async () => {
2280
+ while (next < tasks.length) {
2281
+ const idx = next++;
2282
+ results[idx] = await tasks[idx]();
2283
+ }
2284
+ };
2285
+ const pool = Array.from({ length: Math.min(concurrency, tasks.length) }, () =>
2286
+ worker(),
2287
+ );
2288
+ await Promise.all(pool);
2289
+ return results;
2290
+ }
2291
+
2292
+ /**
2293
+ * Interactive model picker: show all available opencode models and let the
2294
+ * user select which ones to include in the panel. Returns the selected
2295
+ * model refs, or null if the user wants the default selection.
2296
+ */
2297
+ async function pickPanelModels(config, availablePaid = []) {
2298
+ const free = discoverFreeModels();
2299
+ const combined = [...new Set([...(availablePaid || []), ...free])];
2300
+ const all = sortPanelModelRefs(combined, config);
2301
+ if (all.length === 0) return null;
2302
+
2303
+ console.log(`\nEvaluate these models for opencode OMO agent roles:`);
2304
+ const selectableGroups = printSelectablePanelModelGroups(all, " ");
2305
+ console.log(` [a] All (default)`);
2306
+
2307
+ const answer = await promptUser(
2308
+ "Select model families by number (e.g. 1,3,5) or press Enter for all: ",
2309
+ );
2310
+ if (!answer || answer.trim().toLowerCase() === "a" || answer.trim() === "")
2311
+ return null;
2312
+
2313
+ const indices = answer
2314
+ .split(",")
2315
+ .map((s) => parseInt(s.trim(), 10))
2316
+ .filter((n) => !isNaN(n) && n >= 1 && n <= selectableGroups.length);
2317
+ if (indices.length === 0) return null;
2318
+
2319
+ const selected = indices.flatMap((i) => selectableGroups[i - 1].models.map((entry) => entry.ref));
2320
+ return selected;
2321
+ }
2322
+
2323
+ /**
2324
+ * Per-agent panel: for each agent/category, all panel models vote in
2325
+ * parallel (concurrency-limited). Consensus is determined per-agent by
2326
+ * majority vote on the model field. Shows a compact single-line progress
2327
+ * bar that never corrupts the terminal.
2328
+ */
2329
+ async function runPanelAndSelect(
2330
+ config,
2331
+ cloudLookup,
2332
+ allLocalModels,
2333
+ gpu,
2334
+ ollama,
2335
+ cloudOnly,
2336
+ panelModels,
2337
+ ) {
2338
+ const cliAgents = discoverCliModels(config);
2339
+ let models;
2340
+ if (panelModels && panelModels.length > 0) {
2341
+ models = panelModels;
2342
+ } else {
2343
+ models = defaultPanelModels(config);
2344
+ if (models.length === 0) throw new Error("No free models available");
2345
+ }
2346
+
2347
+ // Pre-flight check the models we are going to use in the panel
2348
+ let availableModels = [];
2349
+ const progress = createProgress("Verifying panel models availability");
2350
+ let probeResults = [];
2351
+ try {
2352
+ probeResults = await Promise.all(models.map((m) => probeModel(m)));
2353
+ availableModels = models.filter((m, i) => probeResults[i].ok);
2354
+ progress.done(`${availableModels.length} of ${models.length} model(s) available`);
2355
+ } catch (err) {
2356
+ progress.done(`error verifying availability: ${err.message}`);
2357
+ availableModels = models;
2358
+ }
2359
+
2360
+ if (availableModels.length === 0) {
2361
+ console.log("\n\u26A0 No panel models are available (all are quota-restricted or rate-limited). Limiting analysis and recommendations to opencode AI models exclusively.");
2362
+ opencodeOnlyMode = true;
2363
+
2364
+ console.log("\nFailed model details / errors:");
2365
+ for (let i = 0; i < models.length; i++) {
2366
+ const m = models[i];
2367
+ const res = probeResults[i] || { ok: false, reason: "unknown error", errorOutput: "No output" };
2368
+ const errSnippet = res.errorOutput ? res.errorOutput.split("\n")[0] : "unknown error";
2369
+ console.log(` • ${m}: ${res.reason} (${errSnippet})`);
2370
+ }
2371
+
2372
+ console.log("\nFalling back to free opencode models...");
2373
+ const freeModels = defaultPanelModels(config);
2374
+ if (freeModels.length === 0) {
2375
+ throw new Error("No free models available");
2376
+ }
2377
+ const progress2 = createProgress("Verifying free models availability");
2378
+ try {
2379
+ const probeResults2 = await Promise.all(freeModels.map((m) => probeModel(m)));
2380
+ availableModels = freeModels.filter((m, i) => probeResults2[i].ok);
2381
+ progress2.done(`${availableModels.length} of ${freeModels.length} model(s) available`);
2382
+ } catch (err) {
2383
+ progress2.done(`error verifying availability: ${err.message}`);
2384
+ availableModels = freeModels;
2385
+ }
2386
+ if (availableModels.length === 0) {
2387
+ throw new Error("No available free models found");
2388
+ }
2389
+ }
2390
+
2391
+ models = availableModels;
2392
+
2393
+ const agents = allConfigEntries(config);
2394
+ if (agents.length === 0) throw new Error("No agents or categories in config");
2395
+
2396
+ // Per-agent state
2397
+ const state = agents.map((entry) => ({
2398
+ name: entry.name,
2399
+ type: entry.type,
2400
+ results: [],
2401
+ done: false,
2402
+ consensus: null,
2403
+ }));
2404
+
2405
+ // ── Header (printed once, scrolls naturally) ──
2406
+ console.log();
2407
+ console.log(
2408
+ `== AI Panel: ${agents.length} agents, ${models.length} panel models ==`,
2409
+ );
2410
+ console.log(" Models:");
2411
+
2412
+ // ── Compact single-line status bar (updates via \r) ──
2413
+ const totalTasks = agents.length * models.length;
2414
+ let tasksDone = 0;
2415
+ let agentsDone = 0;
2416
+ let currentAgent = "";
2417
+ const modelSuccessCounts = new Map(models.map((model) => [model, 0]));
2418
+ const countWidth = String(totalTasks).length;
2419
+ const formatCount = (value, total) =>
2420
+ `${String(value).padStart(countWidth, " ")}/${total}`;
2421
+
2422
+ const maxLabelWidth = Math.max(
2423
+ "tasks".length,
2424
+ "agents".length,
2425
+ ...models.map((m) => m.length),
2426
+ );
2427
+ const formatLinePrefix = (label) =>
2428
+ ` \u2022 ${(label + ":").padEnd(maxLabelWidth + 1)} `;
2429
+
2430
+ const statusLineCount = 3;
2431
+ const updateStatus = () => {
2432
+ if (process.stdout.isTTY) {
2433
+ process.stdout.write(`\x1b[${models.length + statusLineCount}F`);
2434
+ for (const model of models) {
2435
+ const count = modelSuccessCounts.get(model) || 0;
2436
+ process.stdout.write(
2437
+ `\x1b[2K${formatLinePrefix(model)}${formatCount(count, agents.length)} successful responses\n`,
2438
+ );
2439
+ }
2440
+ process.stdout.write(`\x1b[2Kevaluating ${currentAgent || "-"}\n`);
2441
+ process.stdout.write(`\x1b[2K${formatLinePrefix("tasks")}${formatCount(tasksDone, totalTasks)}\n`);
2442
+ process.stdout.write(`\x1b[2K${formatLinePrefix("agents")}${formatCount(agentsDone, agents.length)}\n`);
2443
+ }
2444
+ };
2445
+ for (const model of models) {
2446
+ console.log(
2447
+ `${formatLinePrefix(model)}${formatCount(modelSuccessCounts.get(model), agents.length)} successful responses`,
2448
+ );
2449
+ }
2450
+ console.log("evaluating -");
2451
+ console.log(`${formatLinePrefix("tasks")}${formatCount(tasksDone, totalTasks)}`);
2452
+ console.log(`${formatLinePrefix("agents")}${formatCount(agentsDone, agents.length)}`);
2453
+ updateStatus();
2454
+
2455
+ // ── Build task functions ──
2456
+ const taskFns = [];
2457
+ for (let ai = 0; ai < agents.length; ai++) {
2458
+ const entry = agents[ai];
2459
+ const st = state[ai];
2460
+
2461
+ for (const m of models) {
2462
+ taskFns.push(async () => {
2463
+ currentAgent = entry.name;
2464
+ const statusRef = {};
2465
+ const prompt = buildAgentPrompt(
2466
+ entry,
2467
+ cloudLookup,
2468
+ allLocalModels,
2469
+ gpu,
2470
+ ollama,
2471
+ );
2472
+ const rec = await callModelForAgent(m, prompt, null, statusRef, cliAgents, entry.name);
2473
+ if (rec) modelSuccessCounts.set(m, (modelSuccessCounts.get(m) || 0) + 1);
2474
+ st.results.push(rec ? { model: m, recommendation: rec } : null);
2475
+ tasksDone++;
2476
+ if (st.results.length === models.length) agentsDone++;
2477
+ updateStatus();
2478
+ });
2479
+ }
2480
+ }
2481
+
2482
+ // ── Run with concurrency limit ──
2483
+ const concurrency = Math.max(1, os.cpus().length);
2484
+ await runPool(taskFns, concurrency);
2485
+
2486
+ // ── Mark all agents done ──
2487
+ for (const st of state) st.done = true;
2488
+
2489
+ // Final status update
2490
+ currentAgent = "";
2491
+ updateStatus();
2492
+ process.stdout.write("\n"); // new line after the status bar
2493
+
2494
+ // ── Tally votes for all agents ──
2495
+ const cloudRecommendations = [];
2496
+ for (let ai = 0; ai < agents.length; ai++) {
2497
+ const entry = agents[ai];
2498
+ const st = state[ai];
2499
+
2500
+ const validResults = st.results.filter(
2501
+ (r) =>
2502
+ r &&
2503
+ r.recommendation &&
2504
+ r.recommendation.model &&
2505
+ r.recommendation.model.provider,
2506
+ );
2507
+ if (validResults.length === 0) continue;
2508
+
2509
+ const modelVotes = {};
2510
+ const modelReasons = {};
2511
+ const routingVotes = {};
2512
+ const fbVotes = {};
2513
+ for (const { recommendation: rec } of validResults) {
2514
+ if (
2515
+ rec.model &&
2516
+ rec.model.provider &&
2517
+ rec.model.model &&
2518
+ isProviderAvailable(rec.model.provider)
2519
+ ) {
2520
+ const key = `${rec.model.provider}/${rec.model.model}`;
2521
+ modelVotes[key] = (modelVotes[key] || 0) + 1;
2522
+ if (rec.model.reason) modelReasons[key] = rec.model.reason;
2523
+ }
2524
+ for (const r of rec.routing || []) {
2525
+ if (r.provider && r.model && isProviderAvailable(r.provider)) {
2526
+ const key = `${r.provider}/${r.model}`;
2527
+ routingVotes[key] = (routingVotes[key] || 0) + 1;
2528
+ }
2529
+ }
2530
+ for (const r of rec.fallback_models || []) {
2531
+ if (r.provider && r.model && isProviderAvailable(r.provider)) {
2532
+ const key = `${r.provider}/${r.model}`;
2533
+ fbVotes[key] = (fbVotes[key] || 0) + 1;
2534
+ }
2535
+ }
2536
+ }
2537
+
2538
+ const total = validResults.length;
2539
+ const majority = total / 2;
2540
+ const sortedModels = Object.entries(modelVotes).sort((a, b) => b[1] - a[1]);
2541
+
2542
+ let consensusRec = null;
2543
+ if (sortedModels.length > 0) {
2544
+ const [winnerKey, winnerCount] = sortedModels[0];
2545
+ const [provider, ...modelParts] = winnerKey.split("/");
2546
+ consensusRec = {
2547
+ name: entry.name,
2548
+ type: entry.type,
2549
+ profile: entry.section.description || entry.section.model_quality || "",
2550
+ model: {
2551
+ provider,
2552
+ model: modelParts.join("/"),
2553
+ reason: `${winnerCount}/${total} models${modelReasons[winnerKey] ? " \u2014 " + modelReasons[winnerKey] : ""}`,
2554
+ },
2555
+ routing: [],
2556
+ fallback_models: [],
2557
+ };
2558
+
2559
+ // Routing: majority support
2560
+ for (const [key, count] of Object.entries(routingVotes).sort(
2561
+ (a, b) => b[1] - a[1],
2562
+ )) {
2563
+ if (count > majority) {
2564
+ const [rp, ...rm] = key.split("/");
2565
+ consensusRec.routing.push({
2566
+ provider: rp,
2567
+ model: rm.join("/"),
2568
+ reason: `${count}/${total} models`,
2569
+ });
2570
+ }
2571
+ }
2572
+
2573
+ // Fallback: majority support
2574
+ for (const [key, count] of Object.entries(fbVotes).sort(
2575
+ (a, b) => b[1] - a[1],
2576
+ )) {
2577
+ if (count > majority) {
2578
+ const [fp, ...fm] = key.split("/");
2579
+ consensusRec.fallback_models.push({
2580
+ provider: fp,
2581
+ model: fm.join("/"),
2582
+ reason: `${count}/${total} models`,
2583
+ });
2584
+ }
2585
+ }
2586
+
2587
+ // Note if no majority
2588
+ if (winnerCount <= majority) {
2589
+ consensusRec.model.reason = `Plurality (${winnerCount}/${total} models) \u2014 no majority`;
2590
+ }
2591
+ }
2592
+
2593
+ st.consensus = consensusRec;
2594
+ if (consensusRec) cloudRecommendations.push(consensusRec);
2595
+ }
2596
+
2597
+ // Build final result (completeAiRecommendations will fill localModels)
2598
+ const result = {
2599
+ recommender: `panel(${models.map((m) => m.split("/").pop()).join("+")})`,
2600
+ analysis: `Per-agent consensus across ${models.length} panel models for ${agents.length} agent(s)`,
2601
+ cloudRecommendations,
2602
+ localModels: { decisions: [], placements: [] },
2603
+ };
2604
+
2605
+ return { selected: result, panel: { state, models } };
2606
+ }
2607
+
2608
+ function installedLocalNameSet(ollama) {
2609
+ return new Set(
2610
+ (ollama.models || [])
2611
+ .map((m) => normalizeLocalModelName(m.name))
2612
+ .filter(Boolean),
2613
+ );
2614
+ }
2615
+
2616
+ function bestCloudRecommendationForProvider(provider, modelMap) {
2617
+ let best = null;
2618
+ for (const [id, meta] of modelMap || []) {
2619
+ const ref = modelRef(provider, id);
2620
+ const score = scoreModel(ref, null, meta);
2621
+ if (!best || score > best.score) {
2622
+ best = {
2623
+ provider,
2624
+ model: id,
2625
+ reason: `Best available ${provider} fallback`,
2626
+ score,
2627
+ };
2628
+ }
2629
+ }
2630
+ if (!best) return null;
2631
+ const { score: _score, ...rec } = best;
2632
+ return rec;
2633
+ }
2634
+
2635
+ function bestLocalModel(allLocalModels, gpu, ollama) {
2636
+ const installed = installedLocalNameSet(ollama);
2637
+ const candidates = buildFittingModels(allLocalModels, gpu);
2638
+ candidates.sort((a, b) => {
2639
+ const scoreDiff = (b.score || 0) - (a.score || 0);
2640
+ if (scoreDiff !== 0) return scoreDiff;
2641
+ return (
2642
+ Number(installed.has(normalizeLocalModelName(b.name))) -
2643
+ Number(installed.has(normalizeLocalModelName(a.name)))
2644
+ );
2645
+ });
2646
+ return candidates[0] || null;
2647
+ }
2648
+
2649
+ function localModelForEntry(aiResult, entryName, allLocalModels, gpu, ollama) {
2650
+ const fittingByName = buildFittingModelMap(allLocalModels, gpu);
2651
+ const placements = aiResult.localModels?.placements || [];
2652
+ const placement = placements.find(
2653
+ (p) => p.agentName === entryName && p.modelName,
2654
+ );
2655
+ if (placement) {
2656
+ const name = resolveFittingLocalName(placement.modelName, fittingByName);
2657
+ if (name) return { name, role: placement.role || "fallback" };
2658
+ }
2659
+
2660
+ const usable = (aiResult.localModels?.decisions || [])
2661
+ .filter((d) => d.action === "install" || d.action === "keep")
2662
+ .map((d) => normalizeLocalModelName(d.name))
2663
+ .filter((name) => resolveFittingLocalName(name, fittingByName));
2664
+ if (usable.length > 0) return { name: usable[0], role: "fallback" };
2665
+
2666
+ const fallback = bestLocalModel(allLocalModels, gpu, ollama);
2667
+ return fallback
2668
+ ? { name: normalizeLocalModelName(fallback.name), role: "fallback" }
2669
+ : null;
2670
+ }
2671
+
2672
+ function ensureLocalDecision(aiResult, modelName, allLocalModels, gpu, ollama) {
2673
+ const name = resolveFittingLocalName(
2674
+ modelName,
2675
+ buildFittingModelMap(allLocalModels, gpu),
2676
+ );
2677
+ if (!name) return;
2678
+ const installed = installedLocalNameSet(ollama);
2679
+ const action = installed.has(name) ? "keep" : "install";
2680
+ const decisions = aiResult.localModels.decisions;
2681
+ const existing = decisions.find(
2682
+ (d) => normalizeLocalModelName(d.name) === name,
2683
+ );
2684
+ if (existing) {
2685
+ existing.name = name;
2686
+ if (existing.action !== "keep" && existing.action !== "install")
2687
+ existing.action = action;
2688
+ if (!existing.rationale)
2689
+ existing.rationale = "Required as the local fallback model.";
2690
+ return;
2691
+ }
2692
+ decisions.push({
2693
+ name,
2694
+ action,
2695
+ rationale: "Required as the local fallback model.",
2696
+ });
2697
+ }
2698
+
2699
+ function completeAiRecommendations(
2700
+ aiResult,
2701
+ config,
2702
+ cloudLookup,
2703
+ allLocalModels,
2704
+ gpu,
2705
+ ollama,
2706
+ ) {
2707
+ const completed = aiResult || {};
2708
+ const fittingByName = buildFittingModelMap(allLocalModels, gpu);
2709
+ completed.localModels = completed.localModels || {
2710
+ decisions: [],
2711
+ placements: [],
2712
+ };
2713
+ completed.localModels.decisions = Array.isArray(
2714
+ completed.localModels.decisions,
2715
+ )
2716
+ ? completed.localModels.decisions
2717
+ .map((d) => ({
2718
+ ...d,
2719
+ name: resolveFittingLocalName(d.name, fittingByName),
2720
+ }))
2721
+ .filter((d) => d.name)
2722
+ : [];
2723
+ completed.localModels.placements = Array.isArray(
2724
+ completed.localModels.placements,
2725
+ )
2726
+ ? completed.localModels.placements
2727
+ .map((p) => ({
2728
+ ...p,
2729
+ modelName: resolveFittingLocalName(p.modelName, fittingByName),
2730
+ }))
2731
+ .filter((p) => p.modelName && p.agentName)
2732
+ : [];
2733
+
2734
+ // Normalize all cloud recommendations to the new shape
2735
+ const entries = allConfigEntries(config);
2736
+ const recByName = new Map();
2737
+ for (const rec of Array.isArray(completed.cloudRecommendations)
2738
+ ? completed.cloudRecommendations
2739
+ : []) {
2740
+ if (!rec || !rec.name) continue;
2741
+ const norm =
2742
+ rec.model || rec.recommendations ? normalizeAgentRec({ ...rec }) : rec;
2743
+ norm.model = normalizeLocalRecommendation(norm.model, fittingByName, true);
2744
+ if (norm.model && !isProviderAvailable(norm.model.provider)) {
2745
+ norm.model = null;
2746
+ }
2747
+ norm.routing = (norm.routing || [])
2748
+ .map((r) => normalizeLocalRecommendation(r, fittingByName, false))
2749
+ .filter((r) => r && isProviderAvailable(r.provider));
2750
+ norm.fallback_models = (norm.fallback_models || [])
2751
+ .map((r) => normalizeLocalRecommendation(r, fittingByName, true))
2752
+ .filter((r) => r && isProviderAvailable(r.provider));
2753
+ recByName.set(norm.name, norm);
2754
+ }
2755
+
2756
+ const cloudProviders = Object.entries(cloudLookup.byId || {}).filter(
2757
+ ([provider, modelMap]) =>
2758
+ provider !== LOCAL_PROVIDER &&
2759
+ modelMap &&
2760
+ modelMap.size > 0 &&
2761
+ isProviderAvailable(provider),
2762
+ );
2763
+
2764
+ for (const entry of entries) {
2765
+ let rec = recByName.get(entry.name);
2766
+ if (!rec) {
2767
+ rec = {
2768
+ name: entry.name,
2769
+ type: entry.type,
2770
+ profile: entry.section.description || entry.section.model_quality || "",
2771
+ model: null,
2772
+ routing: [],
2773
+ fallback_models: [],
2774
+ };
2775
+ }
2776
+ rec.type = rec.type || entry.type;
2777
+ rec.profile =
2778
+ rec.profile ||
2779
+ entry.section.description ||
2780
+ entry.section.model_quality ||
2781
+ "";
2782
+ rec.model = normalizeLocalRecommendation(rec.model, fittingByName, true);
2783
+ if (rec.model && !isProviderAvailable(rec.model.provider)) {
2784
+ rec.model = null;
2785
+ }
2786
+ rec.routing = (rec.routing || [])
2787
+ .map((r) => normalizeLocalRecommendation(r, fittingByName, false))
2788
+ .filter((r) => r && isProviderAvailable(r.provider));
2789
+ rec.fallback_models = (rec.fallback_models || [])
2790
+ .map((r) => normalizeLocalRecommendation(r, fittingByName, true))
2791
+ .filter((r) => r && isProviderAvailable(r.provider));
2792
+
2793
+ // Collect present providers across model + routing + fallback_models
2794
+ const presentProviders = new Set();
2795
+ if (rec.model && rec.model.provider)
2796
+ presentProviders.add(rec.model.provider);
2797
+ for (const r of rec.routing) {
2798
+ if (r.provider) presentProviders.add(r.provider);
2799
+ }
2800
+ for (const r of rec.fallback_models) {
2801
+ if (r.provider) presentProviders.add(r.provider);
2802
+ }
2803
+
2804
+ // Fill in missing cloud providers as fallback_models
2805
+ for (const [provider, modelMap] of cloudProviders) {
2806
+ if (presentProviders.has(provider)) continue;
2807
+ const fallback = bestCloudRecommendationForProvider(provider, modelMap);
2808
+ if (fallback) {
2809
+ rec.fallback_models.push(fallback);
2810
+ presentProviders.add(provider);
2811
+ }
2812
+ }
2813
+
2814
+ // Add local model if missing
2815
+ const localPick = localModelForEntry(
2816
+ completed,
2817
+ entry.name,
2818
+ allLocalModels,
2819
+ gpu,
2820
+ ollama,
2821
+ );
2822
+ if (localPick && !presentProviders.has(LOCAL_PROVIDER)) {
2823
+ const localRec = {
2824
+ provider: LOCAL_PROVIDER,
2825
+ model: localPick.name,
2826
+ reason:
2827
+ localPick.role === "primary"
2828
+ ? "Local primary selected for this role"
2829
+ : "Local fallback for offline or quota-limited operation",
2830
+ };
2831
+ if (localPick.role === "primary") {
2832
+ // Move current model to fallback, promote local to model
2833
+ if (rec.model) rec.fallback_models.unshift(rec.model);
2834
+ rec.model = localRec;
2835
+ } else {
2836
+ rec.fallback_models.push(localRec);
2837
+ }
2838
+ presentProviders.add(LOCAL_PROVIDER);
2839
+ }
2840
+
2841
+ if (localPick) {
2842
+ ensureLocalDecision(
2843
+ completed,
2844
+ localPick.name,
2845
+ allLocalModels,
2846
+ gpu,
2847
+ ollama,
2848
+ );
2849
+ if (
2850
+ !completed.localModels.placements.some(
2851
+ (p) => p.agentName === entry.name,
2852
+ )
2853
+ ) {
2854
+ completed.localModels.placements.push({
2855
+ modelName: localPick.name,
2856
+ agentName: entry.name,
2857
+ role: localPick.role || "fallback",
2858
+ justification: "Required as the local fallback model.",
2859
+ });
2860
+ }
2861
+ }
2862
+
2863
+ // Deduplicate and order runtime fallbacks after all sources have contributed.
2864
+ rec.fallback_models = finalizeFallbackModels(
2865
+ rec.model,
2866
+ rec.fallback_models,
2867
+ );
2868
+ if (!rec.model && rec.fallback_models.length > 0) {
2869
+ rec.model = rec.fallback_models.shift();
2870
+ }
2871
+ rec.routing = uniqueByModelRef(rec.routing);
2872
+ recByName.set(entry.name, rec);
2873
+ }
2874
+
2875
+ const seenDecisionNames = new Set();
2876
+ completed.localModels.decisions = completed.localModels.decisions.filter(
2877
+ (d) => {
2878
+ const name = resolveFittingLocalName(d.name, fittingByName);
2879
+ if (!name || seenDecisionNames.has(name)) return false;
2880
+ seenDecisionNames.add(name);
2881
+ d.name = name;
2882
+ return true;
2883
+ },
2884
+ );
2885
+ completed.cloudRecommendations = entries
2886
+ .map((entry) => recByName.get(entry.name))
2887
+ .filter(Boolean);
2888
+ return completed;
2889
+ }
2890
+
2891
+ // =========================================================================
2892
+ // Apply functions
2893
+ // =========================================================================
2894
+
2895
+ async function applyCloudChanges(aiResult, config, autoYes) {
2896
+ if (
2897
+ !aiResult.cloudRecommendations ||
2898
+ aiResult.cloudRecommendations.length === 0
2899
+ )
2900
+ return 0;
2901
+ let count = 0;
2902
+ for (const rec of aiResult.cloudRecommendations) {
2903
+ const section = config.agents?.[rec.name] || config.categories?.[rec.name];
2904
+ if (!section) continue;
2905
+ if (rec.model) {
2906
+ section.model = `${rec.model.provider}/${rec.model.model}`;
2907
+ if (rec.routing && rec.routing.length > 0) {
2908
+ section.routing = rec.routing.map((r) => `${r.provider}/${r.model}`);
2909
+ } else if (section.routing) {
2910
+ delete section.routing;
2911
+ }
2912
+ if (rec.fallback_models && rec.fallback_models.length > 0) {
2913
+ section.fallback_models = rec.fallback_models.map(
2914
+ (r) => `${r.provider}/${r.model}`,
2915
+ );
2916
+ } else if (section.fallback_models) {
2917
+ delete section.fallback_models;
2918
+ }
2919
+ count++;
2920
+ }
2921
+ }
2922
+ return count;
2923
+ }
2924
+
2925
+ async function applyLocalPlacements(
2926
+ placements,
2927
+ config,
2928
+ autoYes,
2929
+ allLocalModels,
2930
+ ) {
2931
+ if (!placements || placements.length === 0) return 0;
2932
+ const byAgent = {};
2933
+ for (const p of placements) {
2934
+ const section =
2935
+ config.agents?.[p.agentName] || config.categories?.[p.agentName];
2936
+ if (!section) {
2937
+ console.log(
2938
+ ` \u26A0 Agent/category "${p.agentName}" not found in config \u2014 skipping`,
2939
+ );
2940
+ continue;
2941
+ }
2942
+ if (!byAgent[p.agentName]) byAgent[p.agentName] = [];
2943
+ byAgent[p.agentName].push({
2944
+ section,
2945
+ modelName: normalizeLocalModelName(p.modelName),
2946
+ role: p.role,
2947
+ });
2948
+ }
2949
+ if (Object.keys(byAgent).length === 0) return 0;
2950
+
2951
+ let changed = 0;
2952
+ for (const [agentName, entries] of Object.entries(byAgent)) {
2953
+ const section = entries[0].section;
2954
+
2955
+ // Only keep the highest-scored local model per agent
2956
+ let bestEntry = entries[0];
2957
+ let bestScore = -1;
2958
+ for (const e of entries) {
2959
+ const m = allLocalModels.find((x) => x.name === e.modelName);
2960
+ const s = m ? m.score : 0;
2961
+ if (s > bestScore) {
2962
+ bestScore = s;
2963
+ bestEntry = e;
2964
+ }
2965
+ }
2966
+
2967
+ const localRef = modelRef(LOCAL_PROVIDER, bestEntry.modelName);
2968
+ const existingFbs = Array.isArray(section.fallback_models)
2969
+ ? section.fallback_models
2970
+ .map((fb) => (typeof fb === "string" ? fb : fb.model))
2971
+ .filter(Boolean)
2972
+ : [];
2973
+ const cleaned = existingFbs.filter(
2974
+ (fb) => !fb.startsWith("local/") && !fb.startsWith("ollama/"),
2975
+ );
2976
+
2977
+ if (bestEntry.role === "primary") {
2978
+ const previousPrimary =
2979
+ section.model &&
2980
+ !section.model.startsWith("local/") &&
2981
+ !section.model.startsWith("ollama/")
2982
+ ? section.model
2983
+ : null;
2984
+ section.model = localRef;
2985
+ const fallbacks = previousPrimary
2986
+ ? [previousPrimary, ...cleaned]
2987
+ : cleaned;
2988
+ section.fallback_models = [...new Set(fallbacks)];
2989
+ if (section.fallback_models.length === 0) delete section.fallback_models;
2990
+ changed++;
2991
+ console.log(` \u2713 ${agentName}: local primary set to ${localRef}`);
2992
+ } else if (
2993
+ section.model &&
2994
+ !section.model.startsWith("local/") &&
2995
+ !section.model.startsWith("ollama/")
2996
+ ) {
2997
+ // Has cloud primary — add only the best local model as a fallback
2998
+ section.fallback_models = [...new Set([...cleaned, localRef])];
2999
+ changed++;
3000
+ console.log(` \u2713 ${agentName}: local fallback set to ${localRef}`);
3001
+ } else {
3002
+ // No existing cloud model, or already local — set the best model as primary
3003
+ section.model = `local/${normalizeLocalModelName(bestEntry.modelName)}`;
3004
+ if (section.fallback_models) delete section.fallback_models;
3005
+ if (section.routing) delete section.routing;
3006
+ changed++;
3007
+ console.log(
3008
+ ` \u2713 ${agentName}: placed local/${normalizeLocalModelName(bestEntry.modelName)}`,
3009
+ );
3010
+ }
3011
+ }
3012
+ return changed;
3013
+ }
3014
+
3015
+ /**
3016
+ * Pull an Ollama model, with curl-based fallback when the `ollama` CLI fails.
3017
+ * 1. Attempts `ollama pull <name>` (shows native progress bars).
3018
+ * 2. On failure, retries via POST http://localhost:11434/api/pull (streams JSON progress).
3019
+ * Returns true if the model was successfully pulled, false otherwise.
3020
+ */
3021
+ function pullModel(modelName) {
3022
+ // Primary: ollama CLI
3023
+ try {
3024
+ execFileSync("ollama", ["pull", modelName], {
3025
+ stdio: "inherit",
3026
+ timeout: 600000,
3027
+ });
3028
+ return true;
3029
+ } catch (e) {
3030
+ console.error(` \u2716 Failed to pull ${modelName}: ${e.message}`);
3031
+ }
3032
+
3033
+ // Fallback: curl-based Ollama API pull
3034
+ try {
3035
+ console.log(
3036
+ ` \u2192 Retrying via curl API at http://localhost:11434/api/pull ...`,
3037
+ );
3038
+ const body = JSON.stringify({ name: modelName });
3039
+ execFileSync(
3040
+ "curl",
3041
+ [
3042
+ "-N",
3043
+ "-X",
3044
+ "POST",
3045
+ "http://localhost:11434/api/pull",
3046
+ "-d",
3047
+ body,
3048
+ "--max-time",
3049
+ "600",
3050
+ ],
3051
+ { stdio: "inherit", timeout: 610000 },
3052
+ );
3053
+ console.log(` \u2713 ${modelName} pulled via curl`);
3054
+ return true;
3055
+ } catch (e2) {
3056
+ console.error(` \u2716 Also failed via curl: ${e2.message}`);
3057
+ return false;
3058
+ }
3059
+ }
3060
+
3061
+ async function installAndUninstallModels(decisions, ollama, autoYes) {
3062
+ if (!decisions) return new Set();
3063
+ const normalizedDecisions = decisions
3064
+ .map((d) => ({ ...d, name: normalizeLocalModelName(d.name) }))
3065
+ .filter((d) => d.name);
3066
+ const installedNames = installedLocalNameSet(ollama);
3067
+ const toInstall = normalizedDecisions.filter((d) => d.action === "install");
3068
+ const toRemove = normalizedDecisions.filter((d) => d.action === "uninstall");
3069
+ const toKeep = normalizedDecisions.filter((d) => d.action === "keep");
3070
+
3071
+ // Return set of model names confirmed present after install phase
3072
+ const confirmed = new Set(
3073
+ toKeep.filter((d) => installedNames.has(d.name)).map((d) => d.name),
3074
+ );
3075
+
3076
+ for (const d of toInstall) {
3077
+ const alreadyInstalled = installedNames.has(d.name);
3078
+ if (alreadyInstalled) {
3079
+ console.log(` \u2713 ${d.name} already installed`);
3080
+ confirmed.add(d.name);
3081
+ continue;
3082
+ }
3083
+ if (autoYes) {
3084
+ console.log(` \u2192 Pulling ${d.name}...`);
3085
+ if (pullModel(d.name)) {
3086
+ console.log(` \u2713 ${d.name} pulled`);
3087
+ confirmed.add(d.name);
3088
+ } else {
3089
+ console.log(
3090
+ ` \u2192 Config will NOT include placement for ${d.name}\n`,
3091
+ );
3092
+ }
3093
+ } else {
3094
+ const ok = await confirm(` Install ${d.name}? [y/N] `);
3095
+ if (ok) {
3096
+ console.log(` \u2192 Pulling ${d.name}...`);
3097
+ if (pullModel(d.name)) {
3098
+ console.log(` \u2713 ${d.name} pulled`);
3099
+ confirmed.add(d.name);
3100
+ } else {
3101
+ console.log(
3102
+ ` \u2192 Config will NOT include placement for ${d.name}\n`,
3103
+ );
3104
+ }
3105
+ } else {
3106
+ console.log(` \u2192 Skipped`);
3107
+ }
3108
+ }
3109
+ }
3110
+
3111
+ for (const d of toRemove) {
3112
+ const isInstalled = installedNames.has(d.name);
3113
+ if (!isInstalled) {
3114
+ continue;
3115
+ }
3116
+ if (autoYes) {
3117
+ try {
3118
+ execFileSync("ollama", ["rm", d.name], {
3119
+ stdio: "inherit",
3120
+ timeout: 60000,
3121
+ });
3122
+ console.log(` \u2713 ${d.name} removed`);
3123
+ } catch (e) {
3124
+ console.error(` \u2716 Failed to remove ${d.name}: ${e.message}`);
3125
+ }
3126
+ } else {
3127
+ const ok = await confirm(` Uninstall ${d.name}? [y/N] `);
3128
+ if (ok) {
3129
+ try {
3130
+ execFileSync("ollama", ["rm", d.name], {
3131
+ stdio: "inherit",
3132
+ timeout: 60000,
3133
+ });
3134
+ console.log(` \u2713 ${d.name} removed`);
3135
+ } catch (e) {
3136
+ console.error(` \u2716 Failed to remove ${d.name}: ${e.message}`);
3137
+ }
3138
+ } else {
3139
+ console.log(` \u2192 Skipped`);
3140
+ }
3141
+ }
3142
+ }
3143
+
3144
+ return confirmed;
3145
+ }
3146
+
3147
+ async function offerUninstallOrphans(decisions, ollama, autoYes) {
3148
+ // Models the AI evaluated (any decision = considered)
3149
+ const considered = new Set((decisions || []).map((d) => d.name));
3150
+ // Installed models the AI never mentioned
3151
+ const orphans = ollama.models.filter((m) => !considered.has(m.name));
3152
+
3153
+ if (orphans.length === 0) return;
3154
+
3155
+ console.log(
3156
+ `\n\u2500\u2500 Unnecessary models (${orphans.length}) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`,
3157
+ );
3158
+ console.log(` Installed but AI never recommended keeping:`);
3159
+ for (const m of orphans) {
3160
+ console.log(` \u2022 ${m.name} (${m.size})`);
3161
+ }
3162
+ console.log("");
3163
+
3164
+ if (autoYes) {
3165
+ for (const m of orphans) {
3166
+ try {
3167
+ execFileSync("ollama", ["rm", m.name], {
3168
+ stdio: "inherit",
3169
+ timeout: 60000,
3170
+ });
3171
+ console.log(` \u2713 ${m.name} removed`);
3172
+ } catch (e) {
3173
+ console.error(` \u2716 Failed to remove ${m.name}: ${e.message}`);
3174
+ }
3175
+ }
3176
+ return;
3177
+ }
3178
+
3179
+ const ok = await confirm(
3180
+ `Remove these ${orphans.length} model(s) to free disk space? (y/N) `,
3181
+ );
3182
+ if (!ok) {
3183
+ console.log(" \u2192 Skipped\n");
3184
+ return;
3185
+ }
3186
+ for (const m of orphans) {
3187
+ try {
3188
+ execFileSync("ollama", ["rm", m.name], {
3189
+ stdio: "inherit",
3190
+ timeout: 60000,
3191
+ });
3192
+ console.log(` \u2713 ${m.name} removed`);
3193
+ } catch (e) {
3194
+ console.error(` \u2716 Failed to remove ${m.name}: ${e.message}`);
3195
+ }
3196
+ }
3197
+ }
3198
+
3199
+ // =========================================================================
3200
+ // Rebalance display (algorithmic)
3201
+ // =========================================================================
3202
+
3203
+ function showRebalance(config, richLookup, aliases, localModelNames) {
3204
+ const tierChains = buildTierChains(richLookup, aliases);
3205
+ console.log(`\n\uD83D\uDD2E Algorithmic tier chains for ${CONFIG_PATH}\n`);
3206
+ if (localModelNames.length > 0) {
3207
+ console.log(
3208
+ `\u2500\u2500 Local \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`,
3209
+ );
3210
+ console.log(` Ollama models: ${localModelNames.join(", ")}\n`);
3211
+ }
3212
+ for (const tier of QUALITY_TIERS) {
3213
+ const chain = tierChains[tier];
3214
+ if (chain && chain.length > 0) {
3215
+ console.log(
3216
+ `\u2500\u2500 ${tier} chain \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`,
3217
+ );
3218
+ chain.forEach((c, i) => {
3219
+ const prefix = i === 0 ? "\u2192 primary" : ` fallback ${i}`;
3220
+ console.log(` ${prefix}: ${c.model} (${Math.round(c.score)})`);
3221
+ });
3222
+ console.log();
3223
+ }
3224
+ }
3225
+ console.log(
3226
+ `\u2500\u2500 Agents / Categories \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`,
3227
+ );
3228
+ const allEntries = [];
3229
+ for (const [name, agent] of Object.entries(config.agents || {})) {
3230
+ if (agent.model) allEntries.push({ name, type: "agent", entry: agent });
3231
+ }
3232
+ for (const [name, cat] of Object.entries(config.categories || {})) {
3233
+ if (cat.model) allEntries.push({ name, type: "category", entry: cat });
3234
+ }
3235
+ if (allEntries.length === 0) {
3236
+ console.log(" No model references found in config.");
3237
+ } else {
3238
+ console.log(` ${allEntries.length} section(s) with model references:\n`);
3239
+ for (const { name, type, entry } of allEntries) {
3240
+ const quality = entry.model_quality || "balanced";
3241
+ const current = entry.model || "(none)";
3242
+ const chain = tierChains[quality];
3243
+ const recommended = chain && chain.length > 0 ? chain[0].model : "(none)";
3244
+ const changed =
3245
+ current !== recommended ? " \u26A1 would change" : " \u2713";
3246
+ console.log(` ${name} (${type})`);
3247
+ console.log(` quality: ${quality}`);
3248
+ console.log(` current: ${current}`);
3249
+ console.log(` recommend: ${recommended}${changed}`);
3250
+ if (chain && chain.length > 1) {
3251
+ console.log(
3252
+ ` fallbacks: ${chain
3253
+ .slice(1)
3254
+ .map((c) => c.model)
3255
+ .join(", ")}`,
3256
+ );
3257
+ }
3258
+ console.log();
3259
+ }
3260
+ }
3261
+ }
3262
+
3263
+ // =========================================================================
3264
+ // Main
3265
+ // =========================================================================
3266
+
3267
+ async function main() {
3268
+ installSignalHandlers();
3269
+ const parsedArgs = mri(process.argv.slice(2), {
3270
+ boolean: [
3271
+ "yes",
3272
+ "rebalance",
3273
+ "dry-run",
3274
+ "cloud-only",
3275
+ "local-only",
3276
+ "debug",
3277
+ "interactive",
3278
+ "help",
3279
+ "version",
3280
+ ],
3281
+ string: ["model"],
3282
+ alias: { h: "help", v: "version", y: "yes" },
3283
+ default: {
3284
+ yes: false,
3285
+ rebalance: false,
3286
+ "dry-run": false,
3287
+ "cloud-only": false,
3288
+ "local-only": false,
3289
+ debug: false,
3290
+ interactive: false,
3291
+ help: false,
3292
+ version: false,
3293
+ },
3294
+ });
3295
+ if (parsedArgs.help) {
3296
+ console.log(usage());
3297
+ return;
3298
+ }
3299
+ if (parsedArgs.version) {
3300
+ console.log(VERSION);
3301
+ return;
3302
+ }
3303
+ const doRebalance = parsedArgs.rebalance;
3304
+ debugMode = parsedArgs.debug;
3305
+ const realTty =
3306
+ Boolean(process.stdout.isTTY) &&
3307
+ process.env.TERM !== "dumb" &&
3308
+ process.env.CI !== "true";
3309
+ const interactive = parsedArgs.interactive || realTty;
3310
+ const dryRunFallback = !interactive && !parsedArgs.yes;
3311
+ const dryRun = parsedArgs["dry-run"] || dryRunFallback;
3312
+ const autoYes = !dryRun && (parsedArgs.yes || (!parsedArgs.interactive && realTty));
3313
+ const cloudOnly = parsedArgs["cloud-only"];
3314
+ const localOnly = parsedArgs["local-only"];
3315
+ const explicitModels = [parsedArgs.model]
3316
+ .flat()
3317
+ .filter((model) => typeof model === "string" && model.trim())
3318
+ .map((model) => model.trim());
3319
+
3320
+ await configureTerminalUi(realTty);
3321
+ if (dryRunFallback) {
3322
+ console.log(
3323
+ " • Non-interactive environment detected; previewing changes only. Pass --yes to apply.",
3324
+ );
3325
+ }
3326
+
3327
+ const config = loadConfig();
3328
+
3329
+ if (
3330
+ !doRebalance &&
3331
+ selectedPanelRequiresOpencode(config, explicitModels) &&
3332
+ !commandExists("opencode")
3333
+ ) {
3334
+ console.error(
3335
+ [
3336
+ "Error: OpenCode CLI (`opencode`) is required to query AI panel models, but it was not found on PATH.",
3337
+ "Install OpenCode, add `opencode` to PATH, or configure `omo.panel_models` with only `cli/...` agents.",
3338
+ "No config changes were made.",
3339
+ ].join("\n"),
3340
+ );
3341
+ process.exitCode = 1;
3342
+ return;
3343
+ }
3344
+
3345
+ const needPrompt = !doRebalance && explicitModels.length === 0 && !autoYes && !dryRun && !localOnly;
3346
+
3347
+ // Start probing paid models in the background if we're not in local-only mode
3348
+ let paidProbesPromise = Promise.resolve([]);
3349
+ let paidProbesDetails = [];
3350
+ let initialCache = null;
3351
+ let initialAliases = {};
3352
+ let initialCloudLookup = { byId: {}, sets: {} };
3353
+
3354
+ let probesAwaited = false;
3355
+ async function ensureProbesAwaited() {
3356
+ if (probesAwaited) return;
3357
+ probesAwaited = true;
3358
+ if (!localOnly && paidProbesPromise) {
3359
+ const progress = createProgress("Verifying paid models availability");
3360
+ await paidProbesPromise;
3361
+ progress.done();
3362
+ }
3363
+ }
3364
+
3365
+ if (!localOnly) {
3366
+ initialCache = loadProviderModels();
3367
+ initialAliases = buildProviderAliases(config);
3368
+ initialCloudLookup = buildRichModelLookup(initialCache);
3369
+
3370
+ const paidRefs = [];
3371
+ for (const [provider, modelMap] of Object.entries(initialCloudLookup.byId || {})) {
3372
+ if (provider === LOCAL_PROVIDER || provider === "opencode") continue;
3373
+ for (const modelId of modelMap.keys()) {
3374
+ paidRefs.push(`${provider}/${modelId}`);
3375
+ }
3376
+ }
3377
+ const sortedPaid = sortPanelModelRefs(paidRefs, config);
3378
+
3379
+ // Group by provider to avoid being dominated by a single provider with exhausted quota
3380
+ const byProvider = {};
3381
+ for (const ref of sortedPaid) {
3382
+ const provider = ref.split("/")[0];
3383
+ if (!byProvider[provider]) byProvider[provider] = [];
3384
+ byProvider[provider].push(ref);
3385
+ }
3386
+
3387
+ const candidates = [];
3388
+ const providerKeys = Object.keys(byProvider);
3389
+ // Take top 1 from each provider to check provider health without redundant queries
3390
+ for (const prov of providerKeys) {
3391
+ if (byProvider[prov] && byProvider[prov][0]) {
3392
+ candidates.push(byProvider[prov][0]);
3393
+ }
3394
+ }
3395
+ const topPaid = candidates;
3396
+
3397
+ paidProbesPromise = Promise.all(
3398
+ topPaid.map(async (modelRef) => {
3399
+ const res = await probeModel(modelRef);
3400
+ return { modelRef, ok: res.ok, reason: res.reason, errorOutput: res.errorOutput };
3401
+ })
3402
+ ).then((results) => {
3403
+ paidProbesDetails = results;
3404
+ return sortedPaid.filter((m) => {
3405
+ const provider = m.split("/")[0];
3406
+ return isProviderAvailable(provider);
3407
+ });
3408
+ });
3409
+ }
3410
+
3411
+ // GPU / Ollama detection is local integration and is skipped in cloud-only mode.
3412
+ let gpu;
3413
+ let ollama;
3414
+ if (cloudOnly) {
3415
+ createProgress("Checking GPU").skip("skipped by --cloud-only");
3416
+ createProgress("Checking Ollama").skip("skipped by --cloud-only");
3417
+ gpu = {
3418
+ hasGpu: false,
3419
+ name: "",
3420
+ label: "Not checked (--cloud-only)",
3421
+ vramGb: 0,
3422
+ };
3423
+ ollama = { installed: false, running: false, version: null, models: [] };
3424
+ } else {
3425
+ const gpuProgress = createProgress("Checking GPU");
3426
+ gpu = detectGPU();
3427
+ gpuProgress.done(gpu.label);
3428
+
3429
+ const ollamaProgress = createProgress("Checking Ollama");
3430
+ ollama = detectOllama();
3431
+ ollamaProgress.done(
3432
+ ollama.running
3433
+ ? `${ollama.models.length} installed model(s)`
3434
+ : ollama.installed
3435
+ ? "installed, not running"
3436
+ : "not installed",
3437
+ );
3438
+ }
3439
+
3440
+ // Local model catalog (skip if cloud-only, or if Ollama not installed)
3441
+ let allLocalModels = [];
3442
+ let localModelNames = [];
3443
+ if (!cloudOnly && ollama.installed) {
3444
+ const localProgress = createProgress("Discovering local model catalog");
3445
+ allLocalModels = discoverModels(false, localProgress);
3446
+ localModelNames = ollama.models.map((m) => m.name);
3447
+ } else {
3448
+ createProgress("Discovering local model catalog").skip(
3449
+ cloudOnly ? "skipped by --cloud-only" : "Ollama not installed",
3450
+ );
3451
+ }
3452
+
3453
+ // ── Algorithmic --rebalance path ──
3454
+ if (doRebalance) {
3455
+ const cache = loadProviderModels();
3456
+ const aliases = buildProviderAliases(config);
3457
+ const richLookup = buildRichModelLookup(cache);
3458
+ // Include local models in lookup for scoring
3459
+ if (localModelNames.length > 0) {
3460
+ if (!richLookup.sets.local) richLookup.sets.local = new Set();
3461
+ localModelNames.forEach((m) => richLookup.sets.local.add(m));
3462
+ if (!richLookup.byId.local) richLookup.byId.local = new Map();
3463
+ localModelNames.forEach((m) => {
3464
+ if (!richLookup.byId.local.has(m)) richLookup.byId.local.set(m, null);
3465
+ });
3466
+ }
3467
+ showRebalance(config, richLookup, aliases, localModelNames);
3468
+
3469
+ const tierChains = buildTierChains(richLookup, aliases);
3470
+ const rebalanceOptions = {
3471
+ unavailableModels: new Set(),
3472
+ providerAliases: aliases,
3473
+ modelCache: richLookup,
3474
+ tierChains,
3475
+ };
3476
+ const rebalanceChanges = rebalanceConfig(config, rebalanceOptions);
3477
+
3478
+ if (rebalanceChanges.length === 0) {
3479
+ console.log("\n\u2705 No restructuring needed.\n");
3480
+ return;
3481
+ }
3482
+ console.log(
3483
+ `\n\uD83D\uDCCB ${rebalanceChanges.length} section(s) would be restructured:\n`,
3484
+ );
3485
+ for (const c of rebalanceChanges) console.log(` ${c}`);
3486
+ if (dryRun) {
3487
+ console.log(`\n \u2192 Apply: omo-recommend-models --rebalance\n`);
3488
+ return;
3489
+ }
3490
+ if (!autoYes) {
3491
+ const ok = await confirm(
3492
+ `Apply ${rebalanceChanges.length} changes? (y/N) `,
3493
+ );
3494
+ if (!ok) {
3495
+ console.log(" Skipped.\n");
3496
+ return;
3497
+ }
3498
+ }
3499
+ if (backupConfig(CONFIG_PATH, BACKUP_PATH)) {
3500
+ console.log(` \u2713 Backup saved to ${BACKUP_PATH}`);
3501
+ }
3502
+ console.log(` Backup: ${BACKUP_PATH}`);
3503
+ try {
3504
+ writeConfigWithValidation({
3505
+ config,
3506
+ configPath: CONFIG_PATH,
3507
+ backupPath: BACKUP_PATH,
3508
+ validatorPath: path.join(__dirname, "omo-validate-config"),
3509
+ validateStdio: "inherit",
3510
+ });
3511
+ console.log(`\u2705 ${rebalanceChanges.length} section(s) restructured.\n`);
3512
+ } catch (validationErr) {
3513
+ console.error(`\n\u2716 Validation FAILED.`);
3514
+ if (fs.existsSync(BACKUP_PATH)) {
3515
+ console.log(
3516
+ ` \u2713 Reverted to previous config (backup at ${BACKUP_PATH})`,
3517
+ );
3518
+ } else {
3519
+ console.log(
3520
+ ` \u26A0 No backup found at ${BACKUP_PATH} \u2014 config on disk may be invalid.`,
3521
+ );
3522
+ }
3523
+ throw validationErr;
3524
+ }
3525
+ return;
3526
+ }
3527
+
3528
+ const skipCloud = localOnly;
3529
+ let cache = null;
3530
+ if (skipCloud) {
3531
+ createProgress("Loading cloud provider cache").skip(
3532
+ "skipped by --local-only",
3533
+ );
3534
+ } else {
3535
+ const cloudProgress = createProgress("Loading cloud provider cache");
3536
+ cache = needPrompt ? initialCache : loadProviderModels();
3537
+ cloudProgress.done(
3538
+ cache?.models
3539
+ ? `${Object.keys(cache.models).length} provider(s)`
3540
+ : "not found",
3541
+ );
3542
+ }
3543
+ const aliases = skipCloud
3544
+ ? {}
3545
+ : needPrompt
3546
+ ? initialAliases
3547
+ : buildProviderAliases(config);
3548
+ const cloudLookup = skipCloud
3549
+ ? { byId: {}, sets: {} }
3550
+ : needPrompt
3551
+ ? initialCloudLookup
3552
+ : buildRichModelLookup(cache);
3553
+ // Inject local models into cloud lookup for scoring context
3554
+ if (!cloudOnly && localModelNames.length > 0) {
3555
+ if (!cloudLookup.sets.local) cloudLookup.sets.local = new Set();
3556
+ localModelNames.forEach((m) => cloudLookup.sets.local.add(m));
3557
+ if (!cloudLookup.byId.local) cloudLookup.byId.local = new Map();
3558
+ localModelNames.forEach((m) => {
3559
+ if (!cloudLookup.byId.local.has(m)) cloudLookup.byId.local.set(m, null);
3560
+ });
3561
+ }
3562
+
3563
+ const cloudProviderCount = Object.entries(cloudLookup.byId || {}).filter(
3564
+ ([provider, modelMap]) =>
3565
+ provider !== LOCAL_PROVIDER && modelMap && modelMap.size > 0,
3566
+ ).length;
3567
+ console.log(
3568
+ ` \u2713 Model picture: ${cloudProviderCount} cloud provider(s), ${localModelNames.length} installed local model(s)\n`,
3569
+ );
3570
+
3571
+ // Determine which panel models to use
3572
+ // Priority: 1) --model CLI flags, 2) interactive picker, 3) omo.panel_models,
3573
+ // 4) default top 5 ordered by `omo.panel_model_order` ("opencode-first"
3574
+ // unless set to "score").
3575
+ let aiResult;
3576
+ let panel = null;
3577
+ const localCtx = cloudOnly ? [] : allLocalModels;
3578
+ const gpuCtx = cloudOnly
3579
+ ? {
3580
+ hasGpu: false,
3581
+ name: "",
3582
+ label: "Not checked (--cloud-only)",
3583
+ vramGb: 0,
3584
+ }
3585
+ : gpu;
3586
+ const ollamaCtx = cloudOnly
3587
+ ? { installed: false, running: false, version: null, models: [] }
3588
+ : ollama;
3589
+
3590
+ // Check for cached panel result first (early cache prompt while background queries are running)
3591
+ if (!autoYes && !dryRun) {
3592
+ const cached = loadPanelCache();
3593
+ if (cached) {
3594
+ const age = Date.now() - cached.timestamp;
3595
+ const ageStr =
3596
+ age > 86400000
3597
+ ? `${(age / 86400000).toFixed(1)}d`
3598
+ : age > 3600000
3599
+ ? `${(age / 3600000).toFixed(1)}h`
3600
+ : `${Math.round(age / 60000)}m`;
3601
+ const ts = new Date(cached.timestamp)
3602
+ .toISOString()
3603
+ .replace("T", " ")
3604
+ .slice(0, 19);
3605
+
3606
+ const cachedModelList =
3607
+ cached.models || cached.result?.panel?.models || [];
3608
+
3609
+ console.log(`\nCached panel result:`);
3610
+ if (cachedModelList.length > 0) {
3611
+ console.log(` \u2022 Surveyed AI models:`);
3612
+ printNumberedPanelModelGroups(cachedModelList, " ");
3613
+ }
3614
+ if (cached.gpu && cached.gpu.label) {
3615
+ const gpuLine = ` \u2022 Hardware: ${cached.gpu.label}`;
3616
+ console.log(
3617
+ cached.gpu.vramGb
3618
+ ? `${gpuLine} (${cached.gpu.vramGb} GB VRAM)`
3619
+ : gpuLine,
3620
+ );
3621
+ }
3622
+ console.log(` \u2022 Recorded: ${ts} (${ageStr} ago)`);
3623
+
3624
+ const useCached = await confirm(`\nUse cached? (y/N) `);
3625
+ if (useCached) {
3626
+ await ensureProbesAwaited();
3627
+ const fittingByName = buildFittingModelMap(localCtx, gpuCtx);
3628
+ if (resultHasRejectedLocal(cached.result, fittingByName)) {
3629
+ console.log(
3630
+ ` \u2022 Cached result references unavailable local models; running fresh.\n`,
3631
+ );
3632
+ } else {
3633
+ aiResult = completeAiRecommendations(
3634
+ cached.result,
3635
+ config,
3636
+ cloudLookup,
3637
+ localCtx,
3638
+ gpuCtx,
3639
+ ollamaCtx,
3640
+ );
3641
+ panel = cached.result.panel || null;
3642
+ console.log(` \u2713 Loaded cached panel result.\n`);
3643
+ }
3644
+ }
3645
+ }
3646
+ }
3647
+
3648
+ // Determine which panel models to use
3649
+ // Priority: 1) --model CLI flags, 2) interactive picker, 3) omo.panel_models,
3650
+ // 4) default top 5 ordered by `omo.panel_model_order` ("opencode-first"
3651
+ // unless set to "score").
3652
+ let panelModels = null;
3653
+ if (!aiResult) {
3654
+ if (explicitModels.length > 0) {
3655
+ panelModels = explicitModels;
3656
+ console.log(
3657
+ ` \u2713 Using ${panelModels.length} explicitly selected model(s): ${panelModels.join(", ")}\n`,
3658
+ );
3659
+ } else if (!autoYes && !dryRun && !localOnly) {
3660
+ await ensureProbesAwaited();
3661
+ const availablePaid = await paidProbesPromise;
3662
+ const picked = await pickPanelModels(config, availablePaid);
3663
+ if (picked && picked.length > 0) {
3664
+ panelModels = picked;
3665
+ console.log(` \u2713 Using ${panelModels.length} selected model(s)\n`);
3666
+ }
3667
+ }
3668
+
3669
+ if (!panelModels) {
3670
+ const configuredModels = configuredPanelModels(config);
3671
+ if (configuredModels.length > 0) {
3672
+ panelModels = configuredModels;
3673
+ console.log(
3674
+ ` \u2713 Using ${panelModels.length} configured panel model(s): ${panelModels.join(", ")}\n`,
3675
+ );
3676
+ }
3677
+ }
3678
+
3679
+ // Ask Free vs Paid prompt if we still don't have models and prompting is allowed
3680
+ if (!panelModels && !autoYes && !dryRun && !localOnly) {
3681
+ await ensureProbesAwaited();
3682
+ const availablePaid = await paidProbesPromise;
3683
+ if (availablePaid.length > 0) {
3684
+ console.log(`\nAvailable paid models:`);
3685
+ printNumberedPanelModelGroups(availablePaid, " ");
3686
+ console.log();
3687
+
3688
+ const answer = await promptUser(
3689
+ `Use best paid models or free opencode models (slow) for the analysis? (P/f): `,
3690
+ );
3691
+ const usePaid = answer.trim().toLowerCase() !== "f";
3692
+ if (usePaid) {
3693
+ panelModels = availablePaid.slice(0, 5);
3694
+ } else {
3695
+ panelModels = defaultPanelModels(config);
3696
+ opencodeOnlyMode = true;
3697
+ }
3698
+ } else {
3699
+ console.log("\n\u26A0 No paid models are currently verified as available.");
3700
+ console.log(" Please check that your API keys are set and exported in your environment");
3701
+ console.log(" (e.g., GITHUB_TOKEN, OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY).");
3702
+ if (paidProbesDetails && paidProbesDetails.length > 0) {
3703
+ console.log("\n Probe details / errors:");
3704
+ for (const res of paidProbesDetails) {
3705
+ if (!res.ok) {
3706
+ const errSnippet = res.errorOutput ? res.errorOutput.split("\n")[0] : "unknown error";
3707
+ console.log(` • ${res.modelRef}: ${res.reason} (${errSnippet})`);
3708
+ }
3709
+ }
3710
+ }
3711
+ console.log("\n Falling back to free opencode models.\n");
3712
+ panelModels = defaultPanelModels(config);
3713
+ opencodeOnlyMode = true;
3714
+ }
3715
+ }
3716
+
3717
+ // Fallback to default if still not set (e.g., if autoYes, dryRun, or localOnly skipped prompt)
3718
+ if (!panelModels) {
3719
+ await ensureProbesAwaited();
3720
+ const availablePaid = await paidProbesPromise;
3721
+ if (availablePaid.length > 0 && !localOnly) {
3722
+ panelModels = availablePaid.slice(0, 5);
3723
+ } else {
3724
+ panelModels = defaultPanelModels(config);
3725
+ opencodeOnlyMode = true;
3726
+ }
3727
+ }
3728
+ }
3729
+ // (Old cache checking block removed)
3730
+
3731
+ try {
3732
+ if (!aiResult) {
3733
+ console.log(`\nThis run would query:`);
3734
+ printNumberedPanelModelGroups(panelModels, " ");
3735
+ console.log();
3736
+
3737
+ // Ensure background probes of paid models have completed so quotaExceededProviders is fully populated
3738
+ await ensureProbesAwaited();
3739
+
3740
+ // Multi-model panel: all free models recommend in parallel, debate, user picks
3741
+ const panelResult = await runPanelAndSelect(
3742
+ config,
3743
+ cloudLookup,
3744
+ localCtx,
3745
+ gpuCtx,
3746
+ ollamaCtx,
3747
+ cloudOnly,
3748
+ panelModels,
3749
+ );
3750
+ aiResult = completeAiRecommendations(
3751
+ panelResult.selected,
3752
+ config,
3753
+ cloudLookup,
3754
+ localCtx,
3755
+ gpuCtx,
3756
+ ollamaCtx,
3757
+ );
3758
+ panel = panelResult.panel;
3759
+ // Save cache for future runs (include model list and GPU info for invalidation)
3760
+ savePanelCache(
3761
+ {
3762
+ ...panelResult.selected,
3763
+ panel: { models: panelResult.panel?.models },
3764
+ },
3765
+ panelModels,
3766
+ gpuCtx,
3767
+ );
3768
+ }
3769
+ } catch (e) {
3770
+ console.error(`\n\u2716 AI recommendation failed: ${e.message}`);
3771
+ if (fs.existsSync(CONFIG_PATH)) {
3772
+ console.log(" Config unchanged.\n");
3773
+ } else {
3774
+ console.log(
3775
+ "\n Writing a minimal valid config skeleton. Re-run when models are available.\n",
3776
+ );
3777
+ const rewritten = JSON.stringify(config, null, 2) + "\n";
3778
+ fs.writeFileSync(CONFIG_PATH, rewritten, "utf-8");
3779
+ console.log(` \u2713 Config written to ${CONFIG_PATH}`);
3780
+ console.log(` No backup (new config — no previous file to preserve)\n`);
3781
+ }
3782
+ return;
3783
+ }
3784
+
3785
+ console.log(`\n\uD83D\uDCCA AI Analysis (via ${aiResult.recommender}):`);
3786
+ console.log(` ${aiResult.analysis}\n`);
3787
+
3788
+ // Display cloud recommendations
3789
+ const cloudChanges = showCloudRecommendations(aiResult, config);
3790
+
3791
+ // Display local decisions (always show if they exist, regardless of placements)
3792
+ if (aiResult.localModels) {
3793
+ showLocalDecisions(aiResult, allLocalModels, ollama);
3794
+ }
3795
+
3796
+ const hasLocalChanges =
3797
+ aiResult.localModels &&
3798
+ ((aiResult.localModels.placements &&
3799
+ aiResult.localModels.placements.length > 0) ||
3800
+ (aiResult.localModels.decisions &&
3801
+ aiResult.localModels.decisions.some(
3802
+ (d) => d.action === "install" || d.action === "uninstall",
3803
+ )));
3804
+ const hasChanges = cloudChanges.length > 0 || hasLocalChanges;
3805
+
3806
+ if (!hasChanges) {
3807
+ console.log("\u2705 No changes needed.\n");
3808
+ return;
3809
+ }
3810
+
3811
+ if (dryRun) {
3812
+ console.log(`\n \u2192 Apply: omo-recommend-models\n`);
3813
+ return;
3814
+ }
3815
+
3816
+ if (!autoYes) {
3817
+ const ok = await confirm("Apply all recommendations above? (y/N) ");
3818
+ if (!ok) {
3819
+ console.log(" Skipped.\n");
3820
+ return;
3821
+ }
3822
+ }
3823
+
3824
+ // Backup
3825
+ if (backupConfig(CONFIG_PATH, BACKUP_PATH)) {
3826
+ console.log(` \u2713 Backup saved to ${BACKUP_PATH}`);
3827
+ }
3828
+
3829
+ // Local models must be installed/confirmed before config refs are written.
3830
+ const confirmedModels = await installAndUninstallModels(
3831
+ aiResult.localModels?.decisions,
3832
+ ollama,
3833
+ autoYes,
3834
+ );
3835
+
3836
+ // Apply model assignments after filtering out unconfirmed local refs.
3837
+ let totalCloud = 0;
3838
+ if (aiResult.cloudRecommendations) {
3839
+ for (const rec of aiResult.cloudRecommendations) {
3840
+ const section =
3841
+ config.agents?.[rec.name] || config.categories?.[rec.name];
3842
+ if (!section) continue;
3843
+
3844
+ // Filter unconfirmed local models from all three fields
3845
+ const localOk = (r) =>
3846
+ r.provider !== LOCAL_PROVIDER ||
3847
+ (confirmedModels &&
3848
+ confirmedModels.has(normalizeLocalModelName(r.model)));
3849
+
3850
+ const modelOk = rec.model && localOk(rec.model);
3851
+ const routingOk = (rec.routing || []).filter(localOk);
3852
+ const fbOk = (rec.fallback_models || []).filter(localOk);
3853
+
3854
+ if (!modelOk && routingOk.length === 0 && fbOk.length === 0) continue;
3855
+
3856
+ if (modelOk) section.model = `${rec.model.provider}/${rec.model.model}`;
3857
+ section.routing =
3858
+ routingOk.length > 0
3859
+ ? routingOk.map((r) => `${r.provider}/${r.model}`)
3860
+ : undefined;
3861
+ section.fallback_models =
3862
+ fbOk.length > 0
3863
+ ? fbOk.map((r) => `${r.provider}/${r.model}`)
3864
+ : undefined;
3865
+ // Clean up undefined keys
3866
+ if (!section.routing) delete section.routing;
3867
+ if (!section.fallback_models) delete section.fallback_models;
3868
+ totalCloud++;
3869
+ }
3870
+ }
3871
+
3872
+ let totalLocal = 0;
3873
+ if (aiResult.localModels && aiResult.localModels.placements) {
3874
+ const confirmedPlacements = aiResult.localModels.placements.filter((p) =>
3875
+ confirmedModels.has(normalizeLocalModelName(p.modelName)),
3876
+ );
3877
+ if (confirmedPlacements.length > 0) {
3878
+ totalLocal = await applyLocalPlacements(
3879
+ confirmedPlacements,
3880
+ config,
3881
+ autoYes,
3882
+ allLocalModels,
3883
+ );
3884
+ }
3885
+ }
3886
+
3887
+ console.log(` Backup: ${BACKUP_PATH}`);
3888
+ const totalChanges = totalCloud + totalLocal;
3889
+
3890
+ // Validate — roll back on failure
3891
+ console.log("\u2192 Validating changes...");
3892
+ try {
3893
+ writeConfigWithValidation({
3894
+ config,
3895
+ configPath: CONFIG_PATH,
3896
+ backupPath: BACKUP_PATH,
3897
+ validatorPath: path.join(__dirname, "omo-validate-config"),
3898
+ validateStdio: ["inherit", "inherit", "pipe"],
3899
+ });
3900
+ console.log(`\u2705 ${totalChanges} section(s) updated.\n`);
3901
+ } catch (validationErr) {
3902
+ const stderr = validationErr.stderr ? validationErr.stderr.toString() : "";
3903
+ console.error(`\n\u2716 Validation FAILED.`);
3904
+ if (stderr) {
3905
+ for (const line of stderr.trim().split("\n")) {
3906
+ console.error(` ${line}`);
3907
+ }
3908
+ }
3909
+ // Restore from the backup taken before applying
3910
+ if (fs.existsSync(BACKUP_PATH)) {
3911
+ console.log(
3912
+ ` \u2713 Reverted to previous config (backup at ${BACKUP_PATH})`,
3913
+ );
3914
+ } else {
3915
+ console.log(
3916
+ ` \u26A0 No backup found at ${BACKUP_PATH} \u2014 config on disk may be invalid.`,
3917
+ );
3918
+ }
3919
+ console.log(
3920
+ ` Recommendations were NOT applied. Fix the issues above and re-run.`,
3921
+ );
3922
+ throw new Error(
3923
+ `Validation failed after applying recommendations. Config was reverted to backup.`,
3924
+ );
3925
+ }
3926
+
3927
+ // Step 4: Offer to remove models the AI never considered
3928
+ await offerUninstallOrphans(aiResult.localModels?.decisions, ollama, autoYes);
3929
+
3930
+ console.log("\n\u2705 Done.");
3931
+ }
3932
+
3933
+ main()
3934
+ .catch((e) => {
3935
+ console.error(`\n${pc.red("\u2716 " + (e.message || String(e)))}`);
3936
+ if (debugMode && e.stack) {
3937
+ console.error(e.stack);
3938
+ }
3939
+ // Show execSync stderr if available (validation failures, subprocess errors, etc.)
3940
+ if (e.stderr) {
3941
+ const stderrText =
3942
+ typeof e.stderr === "string" ? e.stderr : e.stderr.toString();
3943
+ for (const line of stderrText.trim().split("\n")) {
3944
+ console.error(` ${line}`);
3945
+ }
3946
+ }
3947
+ process.exitCode = 1;
3948
+ });