gearbox-code 0.1.30 → 0.1.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.mjs +992 -149
  2. package/package.json +2 -2
package/dist/cli.mjs CHANGED
@@ -106152,10 +106152,10 @@ var init_catalog = __esm(() => {
106152
106152
  { id: "requesty", label: "Requesty", group: "gateway", exec: "in-loop", authKind: "openai-compat", envVars: ["REQUESTY_API_KEY"], baseUrl: "https://router.requesty.ai/v1", signupUrl: "https://app.requesty.ai" },
106153
106153
  { id: "portkey", label: "Portkey", group: "gateway", exec: "in-loop", authKind: "openai-compat", envVars: ["PORTKEY_API_KEY"], baseUrl: "https://api.portkey.ai/v1", signupUrl: "https://app.portkey.ai", notes: "Config-driven routing via x-portkey-* headers." },
106154
106154
  { id: "litellm", label: "LiteLLM proxy", group: "gateway", exec: "in-loop", authKind: "openai-compat", envVars: ["LITELLM_API_KEY"], signupUrl: "https://docs.litellm.ai/docs/simple_proxy", notes: "Self-hosted; set baseUrl to your proxy." },
106155
- { id: "azure-foundry", label: "Azure AI Foundry", group: "gateway", exec: "in-loop", authKind: "openai-compat", envVars: ["AZURE_AI_FOUNDRY_API_KEY", "AZURE_AI_INFERENCE_API_KEY"], signupUrl: "https://ai.azure.com", defaultModels: ["gpt-5.5", "gpt-5.5-mini", "gpt-4.1", "o4-mini"], notes: "OpenAI-compatible Foundry endpoint. Use baseUrl ending in /openai/v1." },
106155
+ { id: "azure-foundry", label: "Azure AI Foundry", group: "gateway", exec: "in-loop", authKind: "openai-compat", envVars: ["AZURE_AI_FOUNDRY_API_KEY", "AZURE_AI_INFERENCE_API_KEY"], signupUrl: "https://ai.azure.com", defaultModels: ["gpt-5.5", "gpt-5.5-mini", "gpt-4.1", "o4-mini"], discoverOnly: true, notes: "OpenAI-compatible Foundry endpoint. Use baseUrl ending in /openai/v1. Real model ids are discovered per resource." },
106156
106156
  { id: "bedrock", label: "Amazon Bedrock", group: "cloud", exec: "in-loop", authKind: "aws", envVars: ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION", "AWS_PROFILE"], keyPrefix: ["AKIA", "ASIA"], signupUrl: "https://console.aws.amazon.com/bedrock", defaultModels: ["anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-haiku-4-5-20251001-v1:0", "anthropic.claude-opus-4-20250514-v1:0", "amazon.nova-pro-v1:0", "amazon.nova-lite-v1:0", "amazon.nova-micro-v1:0", "meta.llama4-maverick-17b-instruct-v1:0", "meta.llama4-scout-17b-instruct-v1:0"], notes: "AWS IAM credentials or ~/.aws profile. Enable models in the Bedrock console first." },
106157
106157
  { id: "vertex", label: "Google Vertex AI", group: "cloud", exec: "in-loop", authKind: "vertex", envVars: ["GOOGLE_VERTEX_PROJECT", "GOOGLE_VERTEX_LOCATION", "GOOGLE_APPLICATION_CREDENTIALS"], signupUrl: "https://console.cloud.google.com/vertex-ai", defaultModels: ["gemini-3.1-pro-preview", "gemini-3.5-flash", "gemini-3.1-flash-lite"], notes: "ADC (gcloud auth application-default login) or a service-account JSON." },
106158
- { id: "azure", label: "Azure OpenAI", group: "cloud", exec: "in-loop", authKind: "azure", envVars: ["AZURE_API_KEY", "AZURE_RESOURCE_NAME"], signupUrl: "https://oai.azure.com", defaultModels: ["gpt-5.5", "gpt-5.5-mini", "gpt-4.1"], notes: "resourceName (e.g. my-resource) + API key. Model IDs are your deployment names." },
106158
+ { id: "azure", label: "Azure OpenAI", group: "cloud", exec: "in-loop", authKind: "azure", envVars: ["AZURE_API_KEY", "AZURE_RESOURCE_NAME"], signupUrl: "https://oai.azure.com", defaultModels: ["gpt-5.5", "gpt-5.5-mini", "gpt-4.1"], discoverOnly: true, notes: "resourceName (e.g. my-resource) + API key. Model IDs are your deployment NAMES — discovered per resource, not the base model ids." },
106159
106159
  { id: "ollama", label: "Ollama (local)", group: "local", exec: "in-loop", authKind: "openai-compat", envVars: [], baseUrl: "http://localhost:11434/v1", signupUrl: "https://ollama.com", defaultModels: ["qwen2.5-coder:7b", "llama3.3"], notes: "No key; runs on your machine." },
106160
106160
  { id: "lmstudio", label: "LM Studio (local)", group: "local", exec: "in-loop", authKind: "openai-compat", envVars: [], baseUrl: "http://localhost:1234/v1", signupUrl: "https://lmstudio.ai" },
106161
106161
  { id: "vllm", label: "vLLM (local/self-host)", group: "local", exec: "in-loop", authKind: "openai-compat", envVars: [], baseUrl: "http://localhost:8000/v1", signupUrl: "https://docs.vllm.ai" },
@@ -106172,10 +106172,12 @@ function generatedModels() {
106172
106172
  for (const p of CATALOG) {
106173
106173
  if (p.group === "cli")
106174
106174
  continue;
106175
+ if (p.discoverOnly)
106176
+ continue;
106175
106177
  for (const m2 of p.defaultModels ?? []) {
106176
106178
  if (CURATED.some((c) => c.provider === p.id && c.sdkId === m2))
106177
106179
  continue;
106178
- out.push({ id: `${p.id}/${m2}`, provider: p.id, sdkId: m2, label: m2.length > 24 ? m2.slice(0, 24) : m2, contextWindow: 128000 });
106180
+ out.push({ id: `${p.id}/${m2}`, provider: p.id, sdkId: m2, label: m2.length > 24 ? m2.slice(0, 24) : m2, contextWindow: 128000, capabilities: { source: "seeded" } });
106179
106181
  }
106180
106182
  }
106181
106183
  return out;
@@ -106188,7 +106190,7 @@ function accountModelSpecs() {
106188
106190
  for (const sdkId of account.models ?? []) {
106189
106191
  if (!sdkId)
106190
106192
  continue;
106191
- if (MODELS.some((m2) => m2.provider === account.provider && m2.sdkId === sdkId))
106193
+ if (CURATED.some((m2) => m2.provider === account.provider && m2.sdkId === sdkId))
106192
106194
  continue;
106193
106195
  const id = `${account.provider}/${sdkId}`;
106194
106196
  out.push({
@@ -106197,16 +106199,29 @@ function accountModelSpecs() {
106197
106199
  sdkId,
106198
106200
  label: sdkId.length > 24 ? sdkId.slice(0, 24) : sdkId,
106199
106201
  contextWindow: 128000,
106200
- capabilities: { source: "user-configured", tools: "unknown", images: "unknown", jsonSchema: "unknown", usage: "partial" }
106202
+ capabilities: { source: "api-discovered", tools: "unknown", images: "unknown", jsonSchema: "unknown", usage: "partial" }
106201
106203
  });
106202
106204
  }
106203
106205
  }
106204
106206
  return out;
106205
106207
  }
106208
+ function seedSuppressedProviders() {
106209
+ const s2 = new Set;
106210
+ for (const p of CATALOG)
106211
+ if (p.discoverOnly)
106212
+ s2.add(p.id);
106213
+ for (const a of listAccounts()) {
106214
+ if (a.enabled && a.exec !== "cli" && (a.models?.length ?? 0) > 0)
106215
+ s2.add(a.provider);
106216
+ }
106217
+ return s2;
106218
+ }
106206
106219
  function modelRegistry() {
106220
+ const suppressed = seedSuppressedProviders();
106221
+ const base2 = MODELS.filter((m2) => !(m2.capabilities?.source === "seeded" && suppressed.has(m2.provider)));
106207
106222
  const seen = new Set;
106208
106223
  const out = [];
106209
- for (const m2 of [...MODELS, ...accountModelSpecs()]) {
106224
+ for (const m2 of [...base2, ...accountModelSpecs()]) {
106210
106225
  const key = `${m2.provider}\x00${m2.sdkId}`;
106211
106226
  if (seen.has(key))
106212
106227
  continue;
@@ -139628,6 +139643,33 @@ function fmtTokens(n) {
139628
139643
  return `${(n / 1000).toFixed(1)}k`;
139629
139644
  return String(n);
139630
139645
  }
139646
+ var SEP2 = ` ${glyph.bullet} `;
139647
+ function statusBarLayout({
139648
+ model,
139649
+ effort,
139650
+ mode = "normal"
139651
+ }) {
139652
+ const modeLabel = mode === "auto-accept" ? "auto-accept" : mode;
139653
+ const modelStart = 1 + (mode !== "normal" ? modeLabel.length + SEP2.length : 0);
139654
+ const modelZone = [modelStart, modelStart + model.length];
139655
+ if (!effort)
139656
+ return { modelZone, effortZone: null };
139657
+ const effortText = `effort ${effort}`;
139658
+ const effortStart = modelZone[1] + SEP2.length;
139659
+ return { modelZone, effortZone: [effortStart, effortStart + effortText.length] };
139660
+ }
139661
+ function statusBarHit(args) {
139662
+ const statusRow = args.termRows - args.composerLines - args.paletteRows - 2;
139663
+ if (args.y !== statusRow || !args.model)
139664
+ return null;
139665
+ const { modelZone, effortZone } = statusBarLayout(args);
139666
+ const col = args.x - 1;
139667
+ if (col >= modelZone[0] && col < modelZone[1])
139668
+ return "model";
139669
+ if (effortZone && col >= effortZone[0] && col < effortZone[1])
139670
+ return "effort";
139671
+ return null;
139672
+ }
139631
139673
  function StatusBar({
139632
139674
  model,
139633
139675
  branch,
@@ -139638,11 +139680,11 @@ function StatusBar({
139638
139680
  cost = 0,
139639
139681
  width,
139640
139682
  mode = "normal",
139641
- effort = "balanced",
139683
+ effort,
139642
139684
  subscription = null,
139643
139685
  online = true
139644
139686
  }) {
139645
- const sep = ` ${glyph.bullet} `;
139687
+ const sep = SEP2;
139646
139688
  const modeLabel = mode === "auto-accept" ? "auto-accept" : mode;
139647
139689
  const left = [
139648
139690
  model,
@@ -139795,6 +139837,7 @@ var COMMANDS = [
139795
139837
  { name: "/retry", usage: "/retry", desc: "send your last message again", group: "chat" },
139796
139838
  { name: "/compact", usage: "/compact", desc: "shrink the conversation to free up room", group: "chat" },
139797
139839
  { name: "/context", usage: "/context", desc: "see what's loaded and how many tokens it uses", group: "chat" },
139840
+ { name: "/ask", usage: "/ask <q>", desc: "ask about Gearbox itself — answered from its own docs", group: "chat" },
139798
139841
  { name: "/memory", usage: "/memory [note]", desc: "show or add facts to remember (or start a line with #)", group: "chat" },
139799
139842
  { name: "/account", usage: "/account", desc: "list accounts; /account <number> to switch, /account add to add one", group: "accounts" },
139800
139843
  { name: "/onboard", usage: "/onboard", desc: "first-run setup; provider list and import/add commands", group: "accounts" },
@@ -139832,6 +139875,17 @@ var GROUP_TITLES = [
139832
139875
  { id: "settings", title: "settings" },
139833
139876
  { id: "other", title: "other" }
139834
139877
  ];
139878
+ var ACCOUNT_ADD_HELP = `add an account:
139879
+ ` + ` /account add claude Claude subscription (Pro/Max)
139880
+ ` + ` /account add claude <name> a 2nd Claude account, e.g. /account add claude work
139881
+ ` + ` /account add codex ChatGPT subscription (Plus/Pro)
139882
+ ` + ` /account add codex <name> a 2nd ChatGPT account, e.g. /account add codex work
139883
+ ` + ` /account add azure <foundry-endpoint> <api-key> Azure AI Foundry (pass the full https:// endpoint)
139884
+ ` + ` /account add azure <resource-name> <api-key> [api-version] Azure OpenAI (pass the bare resource name)
139885
+ ` + ` /account add openai-compat <name> <base-url> <api-key> <model> [model...]
139886
+ ` + ` /account add <api-key> paste any provider key (auto-detected)
139887
+ ` + ` /account add <provider> <api-key> e.g. anthropic, openai, openrouter
139888
+ ` + "After adding, /account refresh discovers the models the account can actually serve.";
139835
139889
  function helpText() {
139836
139890
  const visible = COMMANDS.filter((c) => !HIDDEN.has(c.name));
139837
139891
  const pad3 = Math.max(...visible.map((c) => c.name.length)) + 2;
@@ -139891,7 +139945,7 @@ function formatAccounts(accounts, activeCliId, importable, statuses = {}) {
139891
139945
  for (const c of importable)
139892
139946
  lines.push(` + ${c.label} (${c.envVar})`);
139893
139947
  }
139894
- lines.push("", " switch: /account <name-or-number>", " add: /account add codex [name] · /account add claude [name] · /account add <api-key>", accounts.length ? " remove: /account remove <name-or-number>" : "");
139948
+ lines.push("", " switch: /account <name-or-number>", " add: /account add codex [name] · /account add claude [name] · /account add <api-key>", accounts.length ? " remove: /account remove <name-or-number>" : "", accounts.length ? " refresh models: /account refresh" : "");
139895
139949
  return lines.filter(Boolean).join(`
139896
139950
  `);
139897
139951
  }
@@ -139926,8 +139980,20 @@ function formatModelList(currentId, showAll = false) {
139926
139980
  const rows = ["models · /model <name> pins one · /model auto routes per task"];
139927
139981
  if (usable.length) {
139928
139982
  rows.push("", "ready to use");
139929
- for (const m2 of usable)
139983
+ const CAP = 8;
139984
+ const shown = new Map;
139985
+ let hidden = 0;
139986
+ for (const m2 of usable) {
139987
+ const n = shown.get(m2.provider) ?? 0;
139988
+ if (!showAll && n >= CAP) {
139989
+ hidden++;
139990
+ continue;
139991
+ }
139992
+ shown.set(m2.provider, n + 1);
139930
139993
  rows.push(line(m2));
139994
+ }
139995
+ if (hidden)
139996
+ rows.push(` + ${hidden} more on your accounts — /model all to list · /model <name> to pick`);
139931
139997
  } else {
139932
139998
  rows.push("", "no accounts yet — /account to add one");
139933
139999
  }
@@ -142127,25 +142193,33 @@ var motionFrame = () => Math.floor(Date.now() / 360);
142127
142193
  var spinnerFrame = () => ["●", "◌", "○", "◌"][motionFrame() % 4];
142128
142194
  var activePhrase2 = (label) => `${label}${["", ".", "..", "..."][motionFrame() % 4]}`;
142129
142195
  var toolColor2 = (it) => it.name === "AskUserQuestion" ? color.accent : it.status === "err" ? color.err : it.status === "running" ? color.run : it.name === "run_shell" || it.name === "command_execution" ? color.accent : it.name.toLowerCase().includes("write") || it.name.toLowerCase().includes("edit") || it.name === "file_change" ? color.ok : color.accentDim;
142196
+ var staticLineCache = new WeakMap;
142197
+ function staticItemLines(it, width) {
142198
+ const hit = staticLineCache.get(it);
142199
+ if (hit && hit.width === width)
142200
+ return hit.lines;
142201
+ const lines = [];
142202
+ if (it.kind === "user") {
142203
+ const wrapped = wrapSpans(proseSpans(it.text, { color: color.user, bold: true, bg: color.userBg }), Math.max(width - 4, 1));
142204
+ wrapped.forEach((l, i2) => lines.push(padBg([
142205
+ { text: i2 === 0 ? "▌ " : " ", color: color.accent, bold: true, bg: color.userBg },
142206
+ ...l.map((s2) => ({ ...s2, bg: color.userBg }))
142207
+ ], width, color.userBg)));
142208
+ } else if (it.kind === "assistant" && it.text) {
142209
+ lines.push(...indent(markdownToLines(it.text, Math.max(width - 2, 1)), 2));
142210
+ }
142211
+ staticLineCache.set(it, { width, lines });
142212
+ return lines;
142213
+ }
142130
142214
  function itemsToLines(items, width, expand = false) {
142131
142215
  const out = [];
142132
142216
  for (const it of items) {
142133
142217
  out.push(BLANK);
142218
+ if (it.kind === "user" || it.kind === "assistant") {
142219
+ out.push(...staticItemLines(it, width));
142220
+ continue;
142221
+ }
142134
142222
  switch (it.kind) {
142135
- case "user": {
142136
- const wrapped = wrapSpans(proseSpans(it.text, { color: color.user, bold: true, bg: color.userBg }), Math.max(width - 4, 1));
142137
- wrapped.forEach((l, i2) => out.push(padBg([
142138
- { text: i2 === 0 ? "▌ " : " ", color: color.accent, bold: true, bg: color.userBg },
142139
- ...l.map((s2) => ({ ...s2, bg: color.userBg }))
142140
- ], width, color.userBg)));
142141
- break;
142142
- }
142143
- case "assistant": {
142144
- if (!it.text)
142145
- break;
142146
- out.push(...indent(markdownToLines(it.text, Math.max(width - 2, 1)), 2));
142147
- break;
142148
- }
142149
142223
  case "tool": {
142150
142224
  const dot = { text: it.status === "running" ? spinnerFrame() : glyph.tool, color: toolColor2(it) };
142151
142225
  const name15 = friendlyTool2(it.name);
@@ -144223,6 +144297,16 @@ function cleanError(err) {
144223
144297
  `)[0].trim();
144224
144298
  return msg.length > 240 ? msg.slice(0, 240) + "…" : msg;
144225
144299
  }
144300
+ var NATIVE_PROVIDERS = new Set(["anthropic", "openai", "google", "deepseek"]);
144301
+ var MODEL_NOT_SERVED = /does not exist|not found|no such model|model_not_found|unknown model|invalid model|deployment.*(does not exist|not)|resource not found/i;
144302
+ function unavailableModelHint(message, model) {
144303
+ if (NATIVE_PROVIDERS.has(model.provider))
144304
+ return message;
144305
+ if (MODEL_NOT_SERVED.test(message)) {
144306
+ return `“${model.sdkId}” isn't available on your ${model.provider} account. Run /account refresh to see what is, then /model <name>. (${message})`;
144307
+ }
144308
+ return message;
144309
+ }
144226
144310
  var resultSummary = (out) => {
144227
144311
  const s2 = typeof out === "string" ? out : JSON.stringify(out);
144228
144312
  const first = s2.split(`
@@ -144240,7 +144324,7 @@ async function runTask(opts) {
144240
144324
  if (errored || signal?.aborted)
144241
144325
  return;
144242
144326
  errored = true;
144243
- onEvent({ type: "error", message: cleanError(err) });
144327
+ onEvent({ type: "error", message: unavailableModelHint(cleanError(err), model) });
144244
144328
  };
144245
144329
  onEvent({ type: "phase", label: "contacting model", detail: model.label, state: "running" });
144246
144330
  const activeTools = await createToolset(onEvent, { readOnly: Boolean(plan) });
@@ -144394,6 +144478,52 @@ async function runTask(opts) {
144394
144478
  onEvent({ type: "done", usage });
144395
144479
  return { messages: next, usage };
144396
144480
  }
144481
+ async function runCompletion(opts) {
144482
+ const { model, system, prompt, onEvent, signal } = opts;
144483
+ const usage = { inputTokens: 0, outputTokens: 0 };
144484
+ const providerOptions = opts.effort ? reasoningOptions(model, opts.effort) : {};
144485
+ let errored = false;
144486
+ const emitErr = (err) => {
144487
+ if (errored || signal?.aborted)
144488
+ return;
144489
+ errored = true;
144490
+ onEvent({ type: "error", message: unavailableModelHint(cleanError(err), model) });
144491
+ };
144492
+ onEvent({ type: "phase", label: "contacting model", detail: model.label, state: "running" });
144493
+ const result2 = opts._stream ? null : streamText({
144494
+ model: resolveModel(model, opts.creds),
144495
+ system,
144496
+ messages: [{ role: "user", content: prompt }],
144497
+ abortSignal: signal,
144498
+ onError: ({ error: error40 }) => emitErr(error40),
144499
+ ...Object.keys(providerOptions).length ? { providerOptions } : {}
144500
+ });
144501
+ const parts = opts._stream ?? result2.fullStream;
144502
+ let text2 = "";
144503
+ try {
144504
+ for await (const part of parts) {
144505
+ if (part.type === "text-delta") {
144506
+ const t2 = part.text ?? part.textDelta ?? "";
144507
+ if (t2) {
144508
+ text2 += t2;
144509
+ onEvent({ type: "text", text: t2 });
144510
+ }
144511
+ } else if (part.type === "error") {
144512
+ emitErr(part.error);
144513
+ } else if (part.type === "finish") {
144514
+ const u = part.totalUsage ?? part.usage ?? {};
144515
+ usage.inputTokens = u.inputTokens ?? u.promptTokens ?? 0;
144516
+ usage.outputTokens = u.outputTokens ?? u.completionTokens ?? 0;
144517
+ }
144518
+ }
144519
+ } catch (e2) {
144520
+ if (!signal?.aborted)
144521
+ emitErr(e2);
144522
+ }
144523
+ onEvent({ type: "phase", label: errored ? "blocked" : "finished", state: errored ? "err" : "ok" });
144524
+ onEvent({ type: "done", usage });
144525
+ return { text: text2, usage };
144526
+ }
144397
144527
  function friendlyToolPhase(name31) {
144398
144528
  if (name31 === "read_file" || name31 === "list_dir" || name31 === "glob" || name31 === "search")
144399
144529
  return "reading context";
@@ -144404,11 +144534,583 @@ function friendlyToolPhase(name31) {
144404
144534
  return "using tool";
144405
144535
  }
144406
144536
 
144537
+ // src/help/docs-bundle.ts
144538
+ var DOCS_BUNDLE = [
144539
+ {
144540
+ file: "README.md",
144541
+ text: `# gearbox
144542
+
144543
+ ## Install
144544
+
144545
+ macOS, Linux, WSL:
144546
+
144547
+ \`\`\`bash
144548
+ curl -fsSL https://unpkg.com/gearbox-code@latest/install.sh | bash
144549
+ \`\`\`
144550
+
144551
+ Windows PowerShell:
144552
+
144553
+ \`\`\`powershell
144554
+ irm https://unpkg.com/gearbox-code@latest/install.ps1 | iex
144555
+ \`\`\`
144556
+
144557
+ These installers do not use \`sudo\`, admin privileges, or \`npm install -g\`.
144558
+ They install Gearbox into a user-owned directory, create the \`gearbox\` command,
144559
+ then start onboarding before the coding app opens.
144560
+
144561
+ Run without installing:
144562
+
144563
+ \`\`\`bash
144564
+ npx gearbox-code@latest
144565
+ \`\`\`
144566
+
144567
+ ## First Run
144568
+
144569
+ Gearbox needs one provider account before it opens the coding app. The installer
144570
+ runs setup automatically. You can also run it yourself:
144571
+
144572
+ \`\`\`bash
144573
+ gearbox onboard
144574
+ \`\`\`
144575
+
144576
+ Common setup commands:
144577
+
144578
+ \`\`\`bash
144579
+ gearbox auth add <api-key> # auto-detects known key prefixes
144580
+ gearbox auth add <provider> <api-key> # anthropic, openai, google, deepseek, openrouter, groq, xai, mistral...
144581
+ gearbox auth add codex # ChatGPT subscription through the Codex CLI
144582
+ gearbox auth add codex work # second ChatGPT account, isolated CODEX_HOME
144583
+ gearbox auth add claude work # second Claude account, isolated config
144584
+ gearbox auth import # import credentials from env/cloud config
144585
+ gearbox auth providers # list supported providers
144586
+ \`\`\`
144587
+
144588
+ After setup:
144589
+
144590
+ \`\`\`bash
144591
+ cd ~/your-project
144592
+ gearbox
144593
+ \`\`\`
144594
+
144595
+ No account configured means no fake/demo model: Gearbox runs onboarding first.
144596
+
144597
+ ## Uninstall
144598
+
144599
+ macOS, Linux, WSL:
144600
+
144601
+ \`\`\`bash
144602
+ rm -f ~/.local/bin/gearbox
144603
+ rm -f ~/.bun/bin/gearbox
144604
+ rm -rf ~/.local/share/gearbox
144605
+ \`\`\`
144606
+
144607
+ Windows PowerShell:
144608
+
144609
+ \`\`\`powershell
144610
+ Remove-Item "$env:LOCALAPPDATA\\Gearbox" -Recurse -Force
144611
+ \`\`\`
144612
+
144613
+ If you previously installed with npm global:
144614
+
144615
+ \`\`\`bash
144616
+ npm uninstall -g gearbox-code
144617
+ \`\`\`
144618
+
144619
+ If \`gearbox\` fails with \`Unknown file extension ".tsx"\`, an old Bun-linked
144620
+ shim is still first on PATH. Remove it and reinstall:
144621
+
144622
+ \`\`\`bash
144623
+ rm -f ~/.bun/bin/gearbox
144624
+ curl -fsSL https://unpkg.com/gearbox-code@latest/install.sh | bash
144625
+ \`\`\`
144626
+
144627
+ ## What It Is
144628
+
144629
+ Gearbox is a terminal coding agent that can use the model accounts you already
144630
+ pay for. It supports provider accounts, local credential storage, model routing,
144631
+ session history, file edits, shell commands, MCP tools, web search, image input,
144632
+ and permission gates.
144633
+
144634
+ Supported setup paths include API keys, detected env/cloud credentials, Azure,
144635
+ and provider CLIs where available.
144636
+
144637
+ ## Capabilities
144638
+
144639
+ Paste or drag an image path into the composer to attach screenshots or UI
144640
+ captures. Local image attachments work with API-backed multimodal models.
144641
+
144642
+ Gearbox loads MCP servers from \`~/.gearbox/mcp.json\`, \`.mcp.json\`, or
144643
+ \`.gearbox/mcp.json\`. Check what loaded with:
144644
+
144645
+ \`\`\`bash
144646
+ gearbox mcp list
144647
+ \`\`\`
144648
+
144649
+ Example MCP config:
144650
+
144651
+ \`\`\`json
144652
+ {
144653
+ "mcpServers": {
144654
+ "github": {
144655
+ "command": "npx",
144656
+ "args": ["-y", "@modelcontextprotocol/server-github"],
144657
+ "env": { "GITHUB_TOKEN": "\${GITHUB_TOKEN}" }
144658
+ }
144659
+ }
144660
+ }
144661
+ \`\`\`
144662
+
144663
+ The built-in \`web_search\` tool works out of the box with DuckDuckGo, and uses
144664
+ Brave or SearXNG when \`BRAVE_SEARCH_API_KEY\` or \`SEARXNG_URL\` is set.
144665
+
144666
+ ## Develop
144667
+
144668
+ Requires [Bun](https://bun.sh).
144669
+
144670
+ \`\`\`bash
144671
+ bun install
144672
+ bun run src/cli.tsx
144673
+ bun test
144674
+ bun run typecheck
144675
+ \`\`\``
144676
+ },
144677
+ {
144678
+ file: "CLAUDE.md",
144679
+ text: "# Gearbox — project guide\n\nGearbox is a multi-provider coding harness for the terminal: a beautiful, simple terminal agent that reads/writes code and runs commands, talking to any provider (Anthropic, OpenAI, Google, DeepSeek) through one clean loop.\n\n**The point of the project:** intelligent per-task *model routing* — automatically picking the right model for each task across every provider and account you pay for. Basic routing is live (`RoutingSelector` — classify → quality bar → cheapest winner); the richer engine (shadow-eval, credit/limit penalties, confidence display) layers on top of the same seam. See `DESIGN.md` for the full vision and `experiments/FINDINGS.md` for the validation behind it.\n\n## The one rule that matters\n\n**Keep the routing seam clean.** The agent must never hardcode a model. It asks a `ModelSelector` for the model to use. `RoutingSelector` is the live default (classify task → filter by quality bar → cheapest winner); `FixedSelector` is used only when a model is explicitly pinned (`--model` flag or `/model <name>`). Concretely:\n\n- `src/model/selector.ts` — the seam. `select(task) => ModelChoice`. Do not bypass it.\n- `src/model/router.ts` — `RoutingSelector`: classify prompt → quality bar → cost-sort candidates → respect `/prefer` preferences.\n- `src/model/profiles.ts` — the data corpus: quality, cost, latency, tokenizer calibration per model. Routing reads this.\n- `src/providers.ts` — maps a provider+model id to an AI SDK model instance. Already multi-provider. Adding a model is data, not code.\n- Every model call captures token usage (`src/agent/run.ts`) so the cost engine has data. Do not drop usage.\n- The UI consumes a normalized `AgentEvent` stream (`src/agent/events.ts`), never the AI SDK's raw types. This decouples the UI from the provider layer and from routing.\n\nIf you find yourself writing `anthropic('claude-...')` anywhere outside `providers.ts`, stop — route it through the selector.\n\n## Layout\n\n```\nsrc/\n cli.tsx entry point; renders the Ink app; picks RoutingSelector by default\n config.ts minimal config (default model, provider from env)\n providers.ts provider+model id -> AI SDK model (multi-provider; contextWindow per model)\n commands.ts slash-command metadata + pure helpers (fuzzy model match, /help, model list)\n tools.ts read / write / edit / list / search / glob / run_shell (AI SDK tools)\n model/\n selector.ts THE ROUTING SEAM — ModelSelector interface + FixedSelector (pinned model)\n router.ts RoutingSelector: classify → quality bar → cost-sort → preferences (the live default)\n profiles.ts model corpus: quality (SWE-bench), cost ($/Mtok), latency, tokenizer calibration\n tokens.ts calibrated token counting (js-tiktoken × per-model calibration factor)\n preferences.ts persist /prefer kind model choices to ~/.gearbox/routing-preferences.json\n reasoning.ts reasoning/thinking config helpers\n context/\n builder.ts context engine: system + memory + repo map + retrieved files + curated history\n retrieve.ts BM25 lexical retrieval — top-K relevant files for a prompt (no model call)\n repomap.ts repo structure summary for the system prompt\n memory.ts project memory (GEARBOX.md / CLAUDE.md loaded into context)\n compact.ts context compaction (/compact)\n accounts/\n types.ts Account + AuthMethod types (API key, AWS, Azure, Vertex, CLI, OpenAI-compat)\n store.ts accounts.json persistence (~/.gearbox/accounts.json)\n catalog.ts provider catalog (known providers, env vars, labels)\n detect.ts auto-detect env creds + cloud credentials\n onboard.ts interactive add/test account flows\n resolve.ts credential resolution (Account → ResolvedCreds, fetching secrets on demand)\n discover.ts per-account model discovery (Azure deployments / Foundry / gateway /models) → account.models; catalog defaultModels are seeds, not callable ids\n usage.ts per-account spend ledger + rate-limit snapshots + balance tracking\n balance.ts provider balance fetch helpers\n help/\n ask.ts /ask corpus: bundled docs + generated command reference, system prompt, meta-question auto-detect\n agent/\n events.ts AgentEvent — normalized stream the UI consumes\n run.ts real agent loop (AI SDK streamText -> AgentEvent), abort-aware; runCompletion = tool-less grounded answer (used by /ask)\n cli-backend.ts claude/codex CLI subprocess backend (for Pro/Max subscriptions)\n mock.ts scripted demo stream (runs with no API key; used by tests)\n ui/\n theme.ts colors + glyphs (the look)\n input.ts pure key→action reducer for the composer (tested)\n history.ts pure ↑/↓ prompt-history nav (tested)\n net.ts background online probe; status bar shows ⚠ offline when down\n useTerminalSize.ts reactive width on resize (everything reflows)\n git.ts current branch for the status line\n App.tsx the Ink app: state, useInput dispatch, commands, turns\n components/ Banner, Transcript, Composer, CommandPalette, StatusBar, PermissionPrompt\ntest/ pure-logic + render tests (ink-testing-library); no keys\nDESIGN.md full product vision (routing, requirements, UX)\nexperiments/ prototypes that validated the architecture\n```\n\nThe composer is custom (Ink `useInput` + `src/ui/input.ts`), not a third-party widget — full control over the cursor, ↑/↓ history, and esc-to-interrupt, with no focus/remount fragility. **Multi-line**: ⌃J (or shift/alt+⏎) inserts a newline, ⏎ submits; ↑/↓ move between lines and fall through to history at the top/bottom line; bracketed paste (enabled in `cli.tsx`) inserts multi-line text literally (CR normalized, paste markers stripped) instead of submitting per line. `caretPos()` is the shared line/col helper. **Readline editing** (all pure in `input.ts`, tested): ⌃U/⌃K kill to line start/end, ⌃W / ⌥⌫ kill word, ⌃D forward-delete, ⌥/⌃ + ←→ word-jump, ⌃A/⌃E line home/end. Keys: ⏎ send · ⌃J newline · ↑↓ line/history · ← → cursor · ⌥←→ word · tab complete @file · **shift+tab cycles mode (normal · auto-accept · plan)** · ⌃Y copy last reply · esc interrupt · ⌃c quit. `/keys` shows the cheatsheet.\n\n**Modes & effort.** Three input modes cycled by shift+tab (`App.tsx` `cycleMode`): **normal** (asks before writes/edits/shell), **auto-accept** (file writes/edits apply without asking — the permission broker auto-resolves `write`/`edit`; shell still gated; diffs still render), **plan** (read-only). Plus **yolo** (auto-approve everything) via `/yolo`. **Effort tiers** (`/effort fast|balanced|max`, or `setEffort`) pin the model through the routing seam (fast→haiku, balanced/max→sonnet) — the active mode + `⚡effort` show as badges in the `StatusBar`. **Click pickers** (fullscreen only): clicking the **model** or **effort** label in the status bar opens a floating picker above it (↑↓ select · ⏎ apply · esc close), reusing the same `/model`/`/effort` command path. The slash commands remain the keyboard path. The fragile row+column hit-test lives in pure, tested `statusBarHit`/`statusBarLayout` (`StatusBar.tsx`); `App.tsx` only supplies live layout (composer line count, `PALETTE_ROWS`, the rendered model/effort/mode) and toggles `quickPicker` state. Inline mode has no mouse grab, so the labels stay informational there. **Copy**: ⌃Y / `/copy` copies the last reply via OSC 52 (`src/ui/clipboard.ts`, works over SSH); `/export [file]` writes the transcript to Markdown. **Terminal integration** (`src/ui/terminal.ts`): the tab title (OSC 2) reflects working/idle, and a long turn (>8s) rings the bell + fires a desktop notification (macOS) so you can step away.\n\n**More UX affordances.** **Type-ahead**: prompts submitted while busy are queued (`queueRef`, shown as chips) and sent when the turn ends. **⌃C** interrupts a turn → clears the composer → \"press again to quit\" (`cli.tsx` renders with `exitOnCtrlC:false`). **Large pastes** collapse to a `[Pasted N lines]` chip (`pasteStoreRef`), expanded back on submit. **Fuzzy** `@file`/`/command` pickers (`src/ui/fuzzy.ts` — substring-first, then subsequence scored by boundary+contiguity; tested). **Cost**: live `$` estimate in the status bar from per-turn model+tokens (`estimateCost` + per-model pricing in `providers.ts`). **Syntax highlighting** for code blocks (`src/ui/highlight.ts` — lightweight per-line tokenizer → Ink spans, NEVER raw ANSI; used by both `lines.ts` `clipSpans` and `Markdown.tsx`). `?` on an empty composer shows the cheatsheet (`KEYS_HELP`).\n\n**Sessions** (`src/session.ts`): conversations persist per-project under `~/.gearbox/sessions/<slug>/` (`GEARBOX_HOME` overrides). Each record holds provider-neutral `messages` + the UI `items` + **per-turn `{model, usage, at}`** (routing/cost data — the record is deliberately not single-model). `gearbox --continue`/`-c` resumes the latest; `/resume [n]` lists/loads in-app; `/clear` starts a fresh session. Prompt history persists across runs (`history.json`). Saving is best-effort (never crashes the app); skipped in demo mode.\n\nFeatures: full markdown via **marked** (parse, `marked.lexer`) + **Ink** (render) in `Markdown.tsx` — headings, bold/italic/inline-code, tables, ordered+nested lists, blockquotes, code blocks. NO foreign ANSI in Ink (cli-highlight/marked-terminal were tried and removed — they corrupt Ink's width/wrapping; render marked's token tree as Ink elements instead). Markdown gets a `width` prop (threaded App→Transcript→Markdown) for table/rule sizing. Colored diffs under edits (`src/diff.ts`, edit/write tools return `{summary,diff}`), plan mode (read-only tools + plan prompt; `/plan` or shift+tab), `!cmd` runs a shell command directly (`src/shell.ts`), `@file` mentions (fuzzy picker `src/ui/mention.ts`+`files.ts`; expanded into the model message on send), live \"working · Ns\" timer.\n\n**Boo (the mascot).** A pixel ghost, now **parametric** (`src/ui/ghost/engine.ts`, ported from a Claude Design handoff). A 20×20 pixel sprite composited from composable layers — body (palette) + face (eyes/mouth) + accessory + persona + a frame-driven overlay (tears/dots/confetti/Z's/sparkle/hearts) — then FOLDED into half-block cells (`▀`/`▄`, top px → `t`/glyph color, bottom px → `b`/bg). `renderGhost(cfg)` is the source of truth for the **default blocks path**; it's pure + memoized. The data: 13 faces (`FACES`), 9 palettes (`PALETTES`), 6 accessories, 9 personas (personas/accessories ported but not yet surfaced in the live UI). Ink `color`/`backgroundColor` props only, NEVER raw ANSI (corrupts Ink's width math). PNG paths are **opt-in** via `GEARBOX_GHOST`:\n\n- `GEARBOX_GHOST=kitty` — real PNG via kitty graphics Unicode placeholders (`U+10EEEE`, fg encodes image id, diacritics encode row/col; PNGs transmitted once in `cli.tsx`). NOTE: the placeholder protocol is young and mis-rendered (squished) in Ghostty during testing — kept opt-in until that's solved.\n- `GEARBOX_GHOST=iterm` — OSC 1337 splash banner (iTerm2/WezTerm).\n\n`detectImageMode()` returns `blocks` unless `GEARBOX_GHOST` opts in. Baked PNGs live in `src/ui/mascot-png.ts`; `bun run scripts/ghost-preview.ts` previews the parametric engine (splash + all faces + the in-flow state crops). **Boo is animated but deliberately calm** on the blocks path (`AnimatedGhost` in `Mascot.tsx`): one shared, unhurried 240ms tick (leaf-local `useTick`, never lifted to App root); talk + overlays advance at half that (~480ms). There is NO idle bob/float and NO splash sparkle — motion is a quiet sign of life, not fidgeting (the splash just blinks every ~6s; in-flow only the state-meaningful overlay/talk moves). `GEARBOX_NO_MOTION=1` freezes to frame 0. `/ghost [mood]` cycles the skin (`skinToCfg` maps it to a cfg; `shades` is the cool face + shades accessory).\n\n**Layout: fullscreen by default; inline is opt-in.** **Fullscreen is the default** (alt-screen frame + virtualized scroll region + scrollbar + mouse wheel scroll); `--inline`, `GEARBOX_INLINE=1`, or `/config inline on` (pref `fullscreen: false`) opts into inline mode. `GEARBOX_FULLSCREEN=1` or `--fullscreen` forces fullscreen explicitly. The decision lives in `cli.tsx` (`wantsFullscreen`). Grabbing the mouse for wheel-scroll is exactly what disables native terminal selection, so in fullscreen mode text selection requires the terminal's modifier (e.g. Option-drag in Ghostty). **Inline mode** (the plain `Transcript` component): no alt-screen, no mouse grab — native click-drag selection / scrollback / copy all work with no modifier. The transcript is a **virtualized line buffer**: `src/ui/lines.ts` (`itemsToLines`) flattens items into styled `Line`s (markdown→lines, wrapping, diffs) — INVARIANT: every line ≤ width (tested), so nothing overflows. **Streaming perf**: flattening the markdown-heavy `assistant`/`user` items is super-linear with their length, so `staticItemLines` memoizes per item in a `WeakMap` keyed by object reference (unchanged items keep identity across renders, so only the changing tail re-parses — history is free; running tools are not cached since their spinner animates). On the producer side, assistant **text deltas are coalesced** on a ~45ms flush timer in `App.tsx`'s `onEvent` (mirroring the tool-stream coalescer), so streaming re-renders at ~22fps instead of per-token — both together stop the auto-scroll jitter that grew with reply length. `finishAssistant`/the turn `finally` flush any buffered text before marking done or on interrupt. In fullscreen, `App` renders only the visible window via `Viewport` (`src/ui/components/Viewport.tsx`) at a computed `transcriptHeight = rows − header − footer` (footer over-estimated so the frame never exceeds the screen; alt-screen clips, so under-filling is safe). Fullscreen scroll: mouse wheel (SGR mouse reporting enabled in `cli.tsx`; parsed off raw stdin in `App` since Ink doesn't model mouse — buttons 64/65) and PgUp/PgDn; new output re-pins to the bottom (`atBottomRef`); a scrollbar sits on the right. (In fullscreen, mouse reporting means text selection needs the terminal's modifier, e.g. Option-drag in Ghostty — which is why inline is now the default.) The virtualized buffer replaced an earlier flex/overflow fullscreen that corrupted on tall output. Chrome spans full width; prose wraps ≤100 cols. The plain `Transcript` component is the inline-fallback renderer. `scripts/gen-mascot.ts` still bakes the PNGs + baked sprites (`mascot-sprite.ts` `GHOSTS`) — but those now feed **only the opt-in kitty/iTerm image path** (`image.ts`); the default blocks path renders the parametric engine instead. The splash scales to the terminal (big=2×/mini=1×/none by rows×cols, in `App.tsx`). The inline/working presence is the compact **state ghost** (see below) — a native-resolution head crop so Boo never dominates the transcript.\n\nCommands are grouped in `/help` (models · conversation · accounts · save · modes · settings · other) and `src/commands.ts` carries plain-language descriptions: /model [name] (fuzzy — \"haiku\"; `/model auto` routes, `/model all` lists every provider) /effort [fast|balanced|max] /prefer [kind model] (remember a confirmed routing preference for a task type) /clear /resume /retry /compact /context /memory /ask &lt;q&gt; (answer questions about Gearbox itself from its bundled docs via a cheap routed model; plain meta-questions auto-route here with a visible affordance) /account (unified: list/add/login/use/rm/refresh — `/accounts` and `/login` are hidden aliases; `/account refresh` re-discovers each account's real callable models) /cost /copy /export [file] /plan /yolo /theme /config (theme·vim·notify·inline; `/vim` is a hidden alias) /init /keys /help /exit. **Hidden** (work but not listed): /accounts /login /vim /ghost. **Removed:** /cwd (the working dir now shows in `/context`). `formatModelList` shows usable models first and collapses no-key providers to a one-line count.\n\n**Permission gate:** `write_file`/`edit_file`/`run_shell` block on a confirm before mutating. Broker: `src/permission.ts` (`requestPermission` in the tools; `setPermissionHandler` installed by `App`; no handler → allow, so tests/headless are unchanged). Decisions: **once** (1), **always** (2, grants that kind for the session), **all/yolo** (a, auto-approves everything until toggled), **deny** (3/esc). YOLO is also toggled by `/yolo` or started with `--yolo`; a `⚡ yolo` badge shows in the status. The `!` prefix is user-initiated so it is NOT gated. Search/nav tools: `search` (ripgrep, Bun-walk fallback) and `glob` (`Bun.Glob`), both read-only (also in plan mode). The working indicator IS Boo now (`components/Working.tsx`): a compact head-crop ghost whose face follows the agent state — thinking (dots) → streaming (talk) → tool (loading dots) → a clean-finish celebrate (party hat + confetti) → error (crying with falling tears). `App.tsx` derives `mascotState` from the `onEvent` stream; the success/error beat **lingers ~1.5s** after the turn (`linger` state — the working line gates on `busy || linger`, since it would otherwise unmount the instant `busy` goes false). Crops are per-state (`stateView`): head (rows 4–14), head+dots (2–14), head+hat (0–14) so overlays outside the head still read. This deliberately supersedes the earlier \"Boo stays on the welcome splash only / in-flow movement reads as noise\" decision — the compact, state-bearing ghost is the point of the design port.\n\n## Conventions\n\n- Runtime: **Bun**. TypeScript + TSX. Run with `bun run src/cli.tsx`.\n- UI: **Ink** (React for terminals) + **@inkjs/ui**. Keep it calm and beautiful: restrained palette (one accent), generous spacing, consistent glyphs. The look lives in `src/ui/theme.ts` — change colors/glyphs there, not inline.\n- Open + free: MIT, no paid dependencies, no hosted backend, no telemetry. The only cost is the user's own model calls on their own keys.\n- Tools must be safe by default: confirm or sandbox anything destructive; never `rm -rf` or write outside the workspace without intent.\n\n## Run it\n\n```bash\nbun install\n# set at least one key:\nexport ANTHROPIC_API_KEY=... # or OPENAI_API_KEY / GOOGLE_GENERATIVE_AI_API_KEY / DEEPSEEK_API_KEY\nbun run src/cli.tsx # or: bun start\n```\n\nWith no key it launches in demo mode (a scripted transcript) so the UI still runs.\n\n## Test\n\n```bash\nbun test # render tests + agent-loop tests; no API key needed\nbun run typecheck # tsc --noEmit\n```"
144680
+ },
144681
+ {
144682
+ file: "DESIGN.md",
144683
+ text: `# Gearbox — Design
144684
+
144685
+ A terminal coding agent whose one job, done better than anything else, is to **route each task to the right model across every provider and account you pay for**. Everything else is table stakes executed well, in service of that.
144686
+
144687
+ Target user: a startup founder / power user who pays for several models (Claude, OpenAI/Codex, Gemini, DeepSeek, Azure) via API keys and/or flat-rate seats, codes heavily, hits limits, and has no intelligent way to use it all.
144688
+
144689
+ Status: architecture validated by 6 experiments (\`experiments/FINDINGS.md\`); routing, event-log ledger, task-boundary switching, ground-truth gate all prototyped; Anthropic payload accepted live.
144690
+
144691
+ ---
144692
+
144693
+ ## Design principles (these decide every tradeoff)
144694
+
144695
+ 1. **Routing is sacred and invisible.** It is the USP and runs on every task. It must add no perceptible latency and no visual noise. You feel its results (cost, no stalls), never its presence.
144696
+ 2. **Earn trust through transparency.** It spends your money. Every decision is explainable in one glance and one keystroke to the full math. Never opaque.
144697
+ 3. **Calm by default, depth on demand.** The screen shows what's happening now, the current model, the running cost. Everything else is one keystroke away.
144698
+ 4. **Honest about state.** Tests failed → it says so. Switched providers → it says so plainly. Never claims done without proof.
144699
+ 5. **Build on proven wheels; own only the differentiator.** The provider layer, tool-call loop, and TUI rendering are solved problems. The routing brain, the ledger, the cost/limit engine, and the UX are ours.
144700
+ 6. **Every milestone is something you actually use.** Routing-first. No big-bang.
144701
+ 7. **Open and free to run.** Fully open-source (MIT). Nothing costs money except the model calls you already pay for, on your own keys. No hosted backend, no paid dependencies, no required account, no paid telemetry. Local-first everywhere.
144702
+
144703
+ ---
144704
+
144705
+ ## What it is
144706
+
144707
+ A terminal app (rich TUI with a live dashboard) plus a scriptable CLI underneath. You run it instead of Claude Code / Codex. Local-first, your keys, your machine. Not a website, not a hosted service, not an IDE plugin. Internal tool first; productizable later because the routing + spend story is exactly what teams want.
144708
+
144709
+ ## Openness & cost
144710
+
144711
+ - **License: MIT.** Fully open-source, permissive, no strings. (Apache-2.0 is the alternative if a patent grant ever matters; MIT chosen for maximum simplicity and openness.)
144712
+ - **Free to run.** Every dependency is permissively licensed and runs locally: AI SDK (Apache-2.0), Bun (MIT), bun:sqlite (public domain), Ink (MIT), ripgrep (MIT/Unlicense), tree-sitter (MIT), MCP SDK (MIT). No copyleft, no hosted service, no required account, no paid backend, no server bill.
144713
+ - **The only money is inference you already pay for**, on your own keys, and it is the whole point of the tool. That includes the optional "make routing smarter" calls (shadow-eval, the LLM classifier): they run on your keys, count against your budget caps, are off-or-sampled by default, and are governed by a calibration-budget knob so you decide how much to spend sharpening routing. Rules-based routing is free.
144714
+ - **Code search is free and local by default** (ripgrep + tree-sitter + LSP). Embeddings are optional and local-first (a local embedding model); never a paid embeddings API by default.
144715
+ - **Telemetry: none by default.** Any analytics is opt-in and local-only; nothing leaves your machine.
144716
+
144717
+ ---
144718
+
144719
+ ## Architecture
144720
+
144721
+ \`\`\`
144722
+ ┌──────────────────────────── Gearbox (owned) ────────────────────────────┐
144723
+ │ TUI / CLI (Ink) │
144724
+ │ │ │
144725
+ │ Session Orchestrator (single-writer) ── multi-session, worktrees │
144726
+ │ │ │
144727
+ │ ┌──────────────┐ ┌──────────────────┐ ┌────────────────────────┐ │
144728
+ │ │ ROUTING BRAIN│ │ Ledger + Memory │ │ Verification / Autonomy│ │
144729
+ │ │ classify → │ │ append-only event│ │ tests/build/types gate │ │
144730
+ │ │ score → │ │ log, curation, │ │ auto-iterate-to-green │ │
144731
+ │ │ pick + log │ │ task-boundary │ │ unattended-safe │ │
144732
+ │ └──────┬───────┘ │ switching │ └────────────────────────┘ │
144733
+ │ │ └──────────────────┘ │
144734
+ │ Cost / Credit / Limit / Plan engine (balances, caps, failover) │
144735
+ └─────────────────────────────────┬────────────────────────────────────────┘
144736
+ │ model selection per task
144737
+ ┌──────────────────────────────── ▼ built on ─────────────────────────────┐
144738
+ │ Vercel AI SDK (\`ai\` + @ai-sdk/{anthropic,openai,google,azure,deepseek}, │
144739
+ │ OpenRouter provider) → unified messages, tool-calling loop, streaming │
144740
+ │ Bun + bun:sqlite (WAL) · Ink (TUI) · MCP SDK · ripgrep/tree-sitter │
144741
+ └──────────────────────────────────────────────────────────────────────────┘
144742
+ \`\`\`
144743
+
144744
+ ### Build on (do NOT reinvent)
144745
+
144746
+ | Need | Use | Why |
144747
+ |---|---|---|
144748
+ | Provider access, unified message format, tool-call normalization, streaming | **Vercel AI SDK** (\`ai\`, provider packages, OpenRouter provider) | Battle-tested, ubiquitous, covers all 5 providers + OpenRouter; its unified message type IS the canonical state I prototyped in E1; its tool-call loop (\`stopWhen\`/steps) is the agent loop mechanics |
144749
+ | Runtime + storage | **Bun** + **bun:sqlite** (WAL) | Fast cold start, native TS, zero-dep embedded DB; event log validated in E3 |
144750
+ | TUI rendering | **Ink** (React for terminals) | Standard for rich TS CLIs (Claude Code, Codex CLI use it); component model fits the dashboard |
144751
+ | Cost estimation | **js-tiktoken** + provider token endpoints | Local, fast token counts for pre-call estimates (used in E1) |
144752
+ | Code search / nav for tools + memory | **ripgrep**, **tree-sitter**, **LSP** (+ optional local embeddings) | Don't build search; start with ripgrep, add tree-sitter/LSP for symbol nav. All free/local. Embeddings optional and local-first (local model) — never a paid embeddings API by default |
144753
+ | Tool implementations (read/write/edit/shell/grep) | adapt from **Pi / OpenCode** (MIT) as reference | Don't redesign well-solved tools |
144754
+ | Tool/extension connections | **MCP SDK** | Standard; reuse your existing MCP servers |
144755
+ | Config + schema validation | **TOML** + **Zod** | Boring and correct |
144756
+ | Seed quality priors | **SWE-bench / Aider leaderboard / public evals** | Don't guess model quality cold |
144757
+
144758
+ ### Own (the differentiator, no wheel exists)
144759
+ Routing brain · cost/credit/limit/plan engine · canonical-state event-log ledger + curation · verification gate + autonomy controller · single-writer multi-session orchestrator · the routing-transparency UX.
144760
+
144761
+ > Foundation note: the AI SDK runs the per-call tool loop on whatever model Gearbox selects; Gearbox injects routing at task boundaries and wraps every call with the ledger, cost engine, and verification. This keeps routing first-class without rebuilding provider integration. Alternative considered: build on Pi's \`pi-agent-core\` (faster start, but retrofitting per-task routing into someone else's loop). Chosen the AI SDK for a clean, owned hot path since routing is the whole point.
144762
+
144763
+ ---
144764
+
144765
+ ## The routing engine (the USP — most of the engineering rigor goes here)
144766
+
144767
+ ### What "a task" is, and the two levels of routing
144768
+
144769
+ A **task** = one user request / one unit of intended work ("fix the failing auth tests"). The main agent thread handles it and **stays warm on one capable model** chosen at task start. The main model only changes at a task boundary, on escalation (the work turns out harder than classified), or on failover (limit/outage). The \`w_switch\` penalty governs these rare main-thread changes.
144770
+
144771
+ Fine-grained savings do **not** come from hopping providers mid-conversation (that loses the cache and risks incoherence). They come from **delegating bounded sub-tasks to cheap models in isolated contexts**: run-and-summarize the tests, search the codebase, read-and-summarize a big file, generate boilerplate. Each sub-task gets its own cheap routing decision and its own clean context, returns a compact result to the warm main thread, and never touches the main conversation's cache. This is the "intelligent leader delegating grunt work" model, and it's where most of the easy-work-to-cheap-model savings actually live.
144772
+
144773
+ This also reconciles E6: cheap context reconstruction from the ledger powers both (a) spinning up many cheap sub-task contexts and (b) the occasional main-thread switch. Frequent cheapness is the sub-task surface; the switch penalty is the main-thread surface. No contradiction.
144774
+
144775
+ So routing runs at two levels:
144776
+ - **Task level:** pick the main-thread model (clears the task bar; warm; scarcity/plan/limit aware).
144777
+ - **Sub-task level:** each delegated bounded op routes independently to the cheapest model clearing that op's (lower) bar, in an isolated context.
144778
+
144779
+ Per task (and per sub-task), before any model call:
144780
+
144781
+ \`\`\`
144782
+ classify(task) → task_type, complexity, est_tokens
144783
+
144784
+ candidates = models where quality_prior[task_type] ≥ bar[task_type] // meet the bar
144785
+
144786
+ for each candidate: score = cost_est
144787
+ + w_scarcity · (cost_est / provider_balance) // preserve scarce credit
144788
+ + w_switch · switch_penalty(currently_warm) // cache locality
144789
+ − w_plan · plan_bonus(flat_rate_seat_free) // use seats you pay for
144790
+ filter out: rate-limited / over-budget / (if interactive) too-slow
144791
+
144792
+ pick = argmin(score); log(decision, per-candidate scores, reason)
144793
+
144794
+ if none clears bar+budget → stop, surface to user (never silently downgrade quality)
144795
+ \`\`\`
144796
+
144797
+ **Inputs, and where each comes from**
144798
+ - \`task_type\` / complexity: rules-first classifier (keywords + changed-file types + action verbs), < 5ms, free. Optional cheap-LLM classifier for ambiguous cases only (off by default).
144799
+ - \`quality_prior[type][model]\`: seeded from public benchmarks; **refined per-repo** by the flywheel (accept/edit/revert signal via git).
144800
+ - \`cost_est\`: local tokenizer × live price table.
144801
+ - \`provider_balance\`, \`rate_limit_headroom\`, \`seat_status\`: from the cost/credit engine (cached; refreshed async + from response headers). Never a blocking network call on the hot path.
144802
+ - \`currently_warm\`: which model this session last used (switch cost from E1).
144803
+
144804
+ **Transparency contract:** every decision writes a one-line reason + the full per-candidate score table to the ledger, viewable live (\`tab\`) and after the fact (\`gearbox why <task>\`).
144805
+
144806
+ **Calibration is part of M1, not deferred — it is what makes routing actually good, not just internally consistent.** Seeded benchmark priors are honest *guesses*; they say nothing about this user's React/TS code. So from day one:
144807
+ - **Confidence is first-class.** Every prior is tagged \`seeded\` or \`measured(n)\`, and the scorecard shows it. Routing is conservative when confidence is low: it will not send a hard task to a cheap model on a seeded guess alone, it shadow-evals first. Presenting a benchmark guess as a confident number is a trust bug, not cosmetics.
144808
+ - **Shadow-eval loop.** On a sampled, budget-capped fraction of tasks/sub-tasks, also run the next-cheaper candidate, diff against the chosen model's output (and against ground truth where tests exist), and update the prior from real data. The git accept/edit/revert signal is a second, noisier input.
144809
+ - **Per-repo priors.** Calibration is scoped to the repo; a model can be strong here and weak elsewhere.
144810
+
144811
+ **The headline measurement (M1 exit criterion):** on a real session with live keys, routed cost vs all-frontier cost, plus an explicit check that the cheap picks were actually good enough (held against tests / not reverted). That is the USP's first real test — every experiment so far used synthetic priors. The flywheel's heavier auto-tuning (M5) refines this; the basic shadow-eval + confidence ship in M1.
144812
+
144813
+ **Cost / credit / limit / plan engine** (routing's data source):
144814
+ - **Onboarding is load-bearing and explicit, not a footnote** (plan-first and limit-failover depend on it). A first-run setup detects keys from env / existing CLI configs, then asks per provider: metered API key, flat-rate seat (and its plan tier → known rate limits), or both. Limits are inferred from response headers where available and overridable in config. Without this, plan-first can't work, so it's a real onboarding UX surface, not config trivia.
144815
+ - Tracks spend per provider locally (authoritative, since balance APIs are inconsistent); reconciles with provider usage headers when present.
144816
+ - **Plan-first:** model a flat-rate seat (Claude Max, ChatGPT Pro) as ~0 marginal cost until its rate limit, then fall back to metered API.
144817
+ - **Limit-aware:** read \`x-ratelimit-*\` headers; as headroom drops, deprioritize; on 429/5xx, failover to the next candidate and continue the same task.
144818
+ - **Hard caps:** per-task / per-session / daily. Pre-flight estimate before each call; if it would breach the cap, halt and ask. Never blow the cap by more than one pre-estimated in-flight call.
144819
+
144820
+ ---
144821
+
144822
+ ## Every feature (tagged by milestone)
144823
+
144824
+ **Routing (M1 — the USP, built to a high bar)**
144825
+ - Per-task automatic model selection across all configured providers.
144826
+ - Sub-task delegation: bounded ops (run tests, search, summarize, boilerplate) routed to cheap models in isolated contexts — the fine-grained savings surface, no cache loss.
144827
+ - Per-repo calibration: shadow-eval loop + seeded-vs-measured confidence on every prior.
144828
+ - Marginal-benefit scoring (cheapest model that clears the task's quality bar).
144829
+ - Credit-scarcity awareness (prefer the flush account; preserve the scarce one).
144830
+ - Plan/subscription-first (use seats you already pay for before metered API).
144831
+ - Rate-limit awareness + seamless failover (don't dead-end on a limit).
144832
+ - Hard budget caps (task/session/daily) with pre-flight enforcement.
144833
+ - Live, per-decision transparency (one-line reason + full scorecard on demand).
144834
+ - One-keystroke override; override logged as a preference.
144835
+ - Latency-class routing (fast model when you're waiting, best when it's background).
144836
+ - Free-tier / local-model (Ollama) tier as the cheapest rung.
144837
+
144838
+ **Agent core (M0 — table stakes, on the AI SDK)**
144839
+ - Plan → tool → observe → act loop; tools: read, write, edit, shell, grep/search, ls.
144840
+ - Streaming output; interruptible.
144841
+ - Project instructions file (a \`GEARBOX.md\` / reuse \`CLAUDE.md\` if present).
144842
+ - Safe-by-default permissions (ask before shell/writes outside cwd).
144843
+ - Plan mode before large changes.
144844
+ - MCP tool connections.
144845
+
144846
+ **Ledger + memory (M2)**
144847
+ - Canonical model-agnostic state as an append-only event log (crash-safe).
144848
+ - Curation → bounded working context (cheap task-boundary switching).
144849
+ - Fact provenance + invalidation (recover from a wrong assumption).
144850
+ - Decision/ADR record that survives compaction.
144851
+ - Durable, resumable sessions (survive kill -9 / reboot).
144852
+
144853
+ **Verification + autonomy (M3 — the "walk away" pillar)**
144854
+ - Ground-truth gate: configured tests / build / type-check must pass before "done".
144855
+ - Auto-iterate to green (bounded attempts), then surface honestly if stuck.
144856
+ - Unattended-safe: no stall on limits, hard cost cap, no drift over long runs.
144857
+ - Honest status protocol (done-with-proof / blocked / needs-input).
144858
+
144859
+ **Multi-session + UX (M4 — design-heavy)**
144860
+ - Concurrent sessions on different tasks; git-worktree isolation.
144861
+ - Shared project memory across sessions.
144862
+ - Live dashboard: session board, per-session model + cost + status.
144863
+ - The always-visible cost meter; amber near caps.
144864
+
144865
+ **Spend record + flywheel (M5)**
144866
+ - One searchable record of what every model changed and what it cost, across accounts.
144867
+ - Per-task / per-project spend attribution.
144868
+ - Routing flywheel: priors auto-tuned per repo from accept/revert.
144869
+
144870
+ **Later (only if earned)**
144871
+ - Background/async task queue (gated by the verification + cost-safety pieces).
144872
+ - Local model fine-tuning of the classifier.
144873
+ - Team mode / shared spend dashboards (the productization path).
144874
+
144875
+ **Explicitly cut** (judged solutions-looking-for-problems): branch/rewind sessions, try-the-same-task-N-ways, cross-model "jury", sensitivity/privacy routing.
144876
+
144877
+ ---
144878
+
144879
+ ## Strict requirements (hard numbers — non-negotiable)
144880
+
144881
+ **Latency (the routing hot path is sacred):**
144882
+ - Routing decision (rules path): **< 10ms p50, < 25ms p99**. Pure local compute.
144883
+ - Total overhead added before time-to-first-token (classify + score + cost-est): **< 50ms p99** — must be dwarfed by model TTFT (300–800ms) and never perceptible.
144884
+ - Optional LLM classifier: **< 500ms p95**, used on **< 15%** of tasks, **off by default**.
144885
+ - Balance / limit / seat read: from in-memory cache, **< 1ms**, never a blocking network call on the hot path; refreshed async (≤ 60s) and from response headers.
144886
+ - Cost estimate (tokenize 16k ctx): **< 20ms**.
144887
+ - Ledger event append: **< 5ms p99**, off the response-critical path, fsync'd for durability.
144888
+ - Failover pick on 429/5xx: **< 50ms** to select the next model.
144889
+ - TUI frame: **< 16ms (60fps)**; routing panel render **< 5ms**; UI thread never blocks on I/O.
144890
+ - Cold start to interactive: **< 400ms**.
144891
+ - Stream relay overhead: **< 50ms** over the provider's own stream.
144892
+
144893
+ **Durability / correctness:**
144894
+ - Crash-safe: every state-changing event fsync'd before ack; a \`kill -9\` session reconstructs to the last completed event.
144895
+ - No lost writes with **≤ 16 concurrent sessions** (single-writer queue + WAL; validated E3).
144896
+ - Routing is **deterministic** on the rules path (same state + config → same pick) and always logged with reasons.
144897
+ - Budget caps are **hard**: a session cannot exceed its cap beyond one pre-estimated in-flight call.
144898
+ - "Done" cannot be declared with failing configured checks.
144899
+
144900
+ **Security:**
144901
+ - API keys never logged, never written to the ledger, never sent to a provider other than their own. Keys read from env or a \`0600\` local file.
144902
+
144903
+ **Cost of the tool itself:**
144904
+ - **Zero-cost-to-run guarantee:** no Gearbox feature requires payment beyond the user's own model inference. No paid dependency, hosted backend, required account, or paid telemetry, ever. The only $ are model calls on the user's keys, all counted against caps.
144905
+ - Rules routing: $0 (local). Optional LLM classifier: **< $0.001/decision**, bounded, off by default.
144906
+ - Shadow-eval/calibration inference is opt-in, sampled, and bounded by a calibration-budget knob; it counts against the normal caps.
144907
+ - Curation keeps typical working context **< 16k tokens**.
144908
+
144909
+ **Scale:**
144910
+ - ≥ 8 concurrent sessions with no UI jank; sessions with 1000+ events with no slowdown (indexed SQLite).
144911
+
144912
+ ---
144913
+
144914
+ ## UX & design (this matters as much as the engine)
144915
+
144916
+ **Main session view** — calm; the routed line is dim, the cost meter always present:
144917
+ \`\`\`
144918
+ ┌ gearbox ·············································· today $0.04 / $20 ┐
144919
+ │ repo gearbox · session fix-auth · ◐ sonnet-4.6 │
144920
+ ├─────────────────────────────────────────────────────────────────────────┤
144921
+ │ › fix the failing auth tests │
144922
+ │ ▸ read auth.ts, token.ts │
144923
+ │ ▸ ran tests → 2 failing (expiry) │
144924
+ │ ● editing auth.ts … exp compared in seconds vs ms │
144925
+ │ │
144926
+ │ ┄ routed debug → sonnet-4.6 · cleared bar, haiku too weak · ~$0.012 ⌃tab│
144927
+ ├─────────────────────────────────────────────────────────────────────────┤
144928
+ │ session $0.03 · anthropic ✓ · openai ⚠ low · ⌃o override ⌃w why │
144929
+ └─────────────────────────────────────────────────────────────────────────┘
144930
+ \`\`\`
144931
+
144932
+ **Routing scorecard** (\`⌃tab\`) — the full math, including *confidence*, which is the real trust-builder (never show a benchmark guess as a confident number):
144933
+ \`\`\`
144934
+ ╭ why: "fix the failing auth tests" (debug, ~3.1k tok) ───────────────────────────╮
144935
+ │ model quality source est$ balance score verdict │
144936
+ │ sonnet-4.6 0.91 ✓ your 47 tasks $0.012 $9,991 0.41 ◀ chosen │
144937
+ │ deepseek-v4 0.90 ✓ seed · guess $0.003 $20 0.43 ≈ shadow-evaling │
144938
+ │ gpt-5.4 0.91 ✓ your 12 tasks $0.010 $10 ⚠ 0.78 scarce credit │
144939
+ │ haiku-4.5 0.78 ✗ your 31 tasks $0.001 $9,991 — below bar (0.86) │
144940
+ │ rule: cheapest clearing 0.86 on a non-scarce account. deepseek's 0.90 is a benchmark│
144941
+ │ guess, so it's being shadow-evaled on your code before it's trusted to win. [o]verride│
144942
+ ╰────────────────────────────────────────────────────────────────────────────────────╯
144943
+ \`\`\`
144944
+
144945
+ **Multi-session board:**
144946
+ \`\`\`
144947
+ ┌ gearbox · 3 sessions ······························· today $0.12 / $20 ┐
144948
+ │ ● fix-auth debug sonnet-4.6 $0.03 editing auth.ts │
144949
+ │ ● add-search feature gpt-5.4 $0.06 running tests │
144950
+ │ ◐ refactor-cache refactor deepseek-v4 $0.03 ✓ done · tests green │
144951
+ └──────────────────────────────────────────────────────────────────────────┘
144952
+ \`\`\`
144953
+
144954
+ **UX rules:**
144955
+ - The hot path is silent: routing shows as one dim line, never a modal, never a spinner of its own.
144956
+ - Cost meter always visible, never alarming; amber approaching a cap, red only on a real failure.
144957
+ - Failover is narrated plainly: \`openai rate-limited → moved to gemini, continuing\`. Not hidden, not scary.
144958
+ - Override is one keystroke and feels respected (logged as preference, feeds the flywheel).
144959
+ - Color discipline: one accent for routing, amber for cost, red only for failures; high-contrast monospace; motion only to show live streaming.
144960
+ - Keyboard-first; every action reachable without the mouse.
144961
+
144962
+ ---
144963
+
144964
+ ## Build sequence (routing-first; each step is usable)
144965
+
144966
+ - **M0 — Foundation spike (~1 wk).** AI SDK provider layer + minimal agent loop + 4 tools + config + streaming, talking to all 5 providers with manual model choice. De-risk: confirm the AI SDK message type carries our canonical state and tool-calls across every provider with real keys (extends E1/E7). *Usable: a bare agent on any provider.*
144967
+ - **M1 — Routing, done insanely well (~3–4 wks). This is the product.** Two-level routing (warm main-thread model + cheap sub-task delegation in isolated contexts), classifier, scorer, cost/credit/limit/plan engine, failover, hard caps, the transparency log + scorecard *with confidence*, override. **Calibration ships here, not later:** shadow-eval loop + per-repo measured priors + the seeded-vs-measured confidence display. Strict latency budget enforced and measured. **Exit criterion (the USP's first real test, live keys):** on a real session, routed cost vs all-frontier cost, *plus* an explicit check that the cheap picks were good enough (held against tests / not reverted). If that check fails, the routing isn't done. *Usable: it routes your real work, shows why with honest confidence, and you trust it with your money.*
144968
+ - **M2 — Ledger + memory + cheap switching (~1–2 wks).** Event-log ledger (single-writer), curation, task-boundary switching, crash-safe resumable sessions, invalidation. *Usable: long sessions stay cheap and coherent; switching is ~free.*
144969
+ - **M3 — Verification + autonomy (~2 wks).** Ground-truth gate, auto-iterate-to-green, unattended-safe controls. **Define "done with proof" for the common case of untested code** (most founder repos): tiered — if tests exist, they pass; otherwise require build + type-check + a smoke run, and offer to generate a characterization test pinning the changed behavior. The gate is never vacuous; it states which tier it cleared. *Usable: hand it a task and walk away.*
144970
+ - **M4 — Multi-session + TUI/UX polish (~2 wks).** Concurrent sessions, worktrees, the dashboard, the design layer. *Usable: run several tasks, one calm board.*
144971
+ - **M5 — Spend record + advanced auto-tuning (~1 wk).** Searchable cross-account record, spend attribution, and heavier auto-tuning of priors (the basic shadow-eval + confidence already shipped in M1). *Usable: spend is one searchable place; routing keeps sharpening on your code.*
144972
+
144973
+ Re-evaluate against daily use before any "Later" item or productization.
144974
+
144975
+ ---
144976
+
144977
+ ## Risks / open
144978
+
144979
+ - **AI SDK fit:** confirm its message type round-trips our canonical state + tool-calls across all 5 providers (M0 spike; only Anthropic live-verified so far).
144980
+ - **Balance APIs are inconsistent:** some providers don't expose balance. Mitigation: local spend tracking is authoritative; reconcile with headers where available.
144981
+ - **Plan/seat modeling is the hardest input:** flat-rate seat limits aren't cleanly exposed. Start with usage-header inference + user-declared limits; refine.
144982
+ - **Quality priors are seeds, not truth (the core risk):** addressed by moving calibration into M1 (shadow-eval + measured per-repo priors + confidence display) rather than deferring it. Residual risk: shadow-eval costs extra on sampled tasks and takes real usage to converge; until it does, routing leans conservative and labels guesses as guesses. Routing is only as good as this loop, so it gets the most rigor.
144983
+ - **"Task" granularity & savings ceiling:** resolved by the two-level model (warm main thread + cheap sub-task delegation). Residual: deciding *what* to delegate vs keep on the main thread is a real heuristic to tune.
144984
+ - **Verification on untested code:** resolved by tiered done-with-proof (tests → build+types+smoke → offered characterization test); residual is how aggressively to auto-generate tests.
144985
+ - **Cross-vendor live acceptance** (OpenAI/Gemini) still unverified — close in M0 with real keys, alongside the M1 headline cost-vs-quality measurement.`
144986
+ },
144987
+ {
144988
+ file: "experiments/FINDINGS.md",
144989
+ text: "# Gearbox — Experimental Findings\n\nGoal: (i) does the proposed structure work, (ii) does any other structure work, (iii) best solution to every problem found. Empirical, runnable experiments — not literature review.\n\n---\n\n## Experiment 1 — Canonical state → render per provider → switch at task boundary\n\n**Hypothesis (the architecture's keystone):** one model-agnostic canonical state can be faithfully rendered into Anthropic, OpenAI, and Gemini wire formats; switching providers at a task boundary is cheap because the curated projection is small; context poisoning is recoverable by invalidating facts.\n\n**Method:** real TS/Bun. `canonical.ts` (state model), `renderers.ts` (3 real provider projections), `validate.ts` (structural + cross-provider fidelity checks), `cost.ts` (js-tiktoken o200k_base + real 2026 prices), `curate.ts` (ledger projection), `scale.ts` (sessions of growing length). Run: `bun run experiments/switch-cost/run.ts`.\n\n**Results — STRUCTURE HOLDS:**\n\n- **Rendering correctness:** all structural checks pass for all 3 providers — role mapping (assistant↔model), tool-call↔result pairing (Anthropic `tool_use`/`tool_result`, OpenAI `tool_calls`+`role:tool`, Gemini `functionCall`/`functionResponse`), system handling (top-level vs system message vs systemInstruction), alternation invariants.\n- **Cross-provider fidelity:** the same canonical state yields *identical* semantics across all three — tool-call counts (4/4/4), user text, assistant text all equal. No information dropped/duplicated/mis-paired in translation.\n- **Switch cost scales the right way:** curated projection is ~bounded, transcript is O(session length). Switch cost advantage grows with session size:\n\n | cycles | full tok | curated tok | ratio | full $switch | curated $switch |\n |-------:|---------:|------------:|------:|-------------:|----------------:|\n | 1 | 770 | 464 | 1.7× | $0.0023 | $0.0014 |\n | 16 | 9,470 | 914 | 10.4× | $0.0284 | $0.0027 |\n | 64 | 37,310 | 2,354 | 15.8× | $0.1119 | $0.0071 |\n | 256 | 148,670 | 8,114 | 18.3× | $0.4460 | $0.0243 |\n\n At a realistic ~149k-token mid-session, a provider switch re-ingests ~8k curated tokens instead of ~149k raw — **18× cheaper**. Within a task you stay warm (cache hit ⇒ ~0 re-ingest); the cost is only paid at a switch.\n- **Context-poisoning recovery:** an invalidated fact (\"bug is in parseToken\") is absent from the curated projection; the corrected fact is present. Retraction works without rewriting history.\n\n**Honest caveats / unresolved risks:**\n1. **Semantic continuity after a switch is NOT yet proven.** Schema correctness ✅ (proven offline). Whether a model actually *continues the task correctly* from a curated projection needs a LIVE call — no provider keys on this box yet. This is the single most important remaining check.\n2. **Token counts use one tokenizer (o200k_base) as a cross-provider proxy.** Per-provider tokenizers differ slightly; the *ratio* (full vs curated) is robust to this, absolute per-provider $ is approximate.\n3. **Gemini has no tool-call IDs** — it matches function responses by name + order. Sequential calls fine; parallel calls to the *same* function are ambiguous. Real wrinkle for the renderer; needs an ordering/disambiguation strategy.\n4. **Curation quality is a policy risk, not an architecture risk.** Dropping bulky tool output assumes the durable conclusion was captured as a fact. A task needing exact historical detail (deep trace debugging) could be starved if the ledger didn't capture it. The facts-capture policy is where quality lives.\n5. Curated size grows with #facts (256 facts ≈ 8k tok). In the real system facts are themselves tiered/retrieved (project vs working memory), so carried context would be smaller still.\n\n**Bearing on alternatives (goal ii):** the \"full transcript\" column IS the transcript-as-truth alternative (translate the running transcript on the fly, no ledger). It is 18× more expensive at scale and accumulates poison irrecoverably. So the canonical-ledger structure beats transcript-as-truth on both cost and poisoning. Verdict: ledger structure justified.\n\n**Status:** Pillar 2 (memory/curation) and Pillar 3 (switching) substrate validated offline. Live semantic-continuity check pending a provider key.\n\n---\n\n## Experiment 2 — Intelligent routing vs naive baselines\n\n**Hypothesis:** a transparent multi-dimensional router (marginal-benefit + credit-scarcity) beats both \"always premium\" (overpays) and \"always cheap\" (under-delivers), respects credit limits, and explains itself.\n\n**Method:** deterministic simulator. `models.ts` (7 models, benchmark-shaped quality priors per task type, real 2026 prices, per-provider balances incl. the user's \"$10k Anthropic / $10 OpenAI\" scenario), `tasks.ts` (100 tasks, 70/20/10 easy/medium/hard), `router.ts` (cheapest-that-clears-the-bar + credit-scarcity penalty), `run.ts`. Run: `bun run experiments/routing/run.ts`.\n\n**Results — ROUTING WORKS:**\n\n| strategy | total $ | success | OpenAI $ spent (of $10) |\n|---|--:|--:|--:|\n| always-opus | $18.90 | 100% | $0 |\n| always-flash-lite | $0.32 | **47%** | $0 |\n| cheapest-adequate (credit-blind) | $3.98 | 100% | **$3.63** |\n| **Gearbox (marginal-benefit + credit)** | $5.59 | 100% | **$0.00** |\n\n- **70% cheaper than always-opus at identical 100% success** — matches the 60-80% industry claim.\n- **always-cheap is only 47% success** — fails every medium/hard task. Routing is doing real work, not just picking the cheapest.\n- **Credit dimension does exactly what was asked:** credit-blind burns 36% of the scarce $10 OpenAI balance on architecture tasks (via gpt-5.4); Gearbox preserves it entirely by routing those to Sonnet on the flush Anthropic pool. Gearbox costs slightly MORE in raw dollars ($5.59 vs $3.98) — the correct tradeoff: it's constraint-respecting optimization (\"prefer Claude unless strong reason\"), not blind cost-minimization. Tunable via one knob (K_SCARCITY).\n- **Marginal-benefit, shown explicitly:** for an architecture task, Opus (q .97) and Sonnet (q .93) both clear the .92 bar ⇒ Gearbox picks Sonnet; paying 1.7× for Opus's extra .04 above the bar is wasted. The full per-model score table prints, so every decision is explainable.\n- **Routing breakdown:** boilerplate/docs → flash-lite; test → haiku; debug/refactor/review → deepseek-v4; architecture → sonnet. Sensible per-tier allocation falls out of the scoring.\n\n**Honest caveats:**\n1. Quality priors are SEEDED (benchmark-shaped), not measured on the user's real tasks. The flywheel (refine priors from a local accept/revert log) is what makes them real — not yet built/tested.\n2. \"Success = quality ≥ threshold\" is a modeling simplification; real success is continuous and noisy. The sim proves the LOGIC is sound given priors, not that the priors are correct.\n3. K_SCARCITY=20 is hand-tuned; it sets the cost-vs-credit-preservation balance and should be tuned to the user's actual preference.\n4. Cache-locality / switch cost (Experiment 1) isn't yet folded into the per-task score — integrating routing + switching cost is future work.\n\n**Status:** Pillar 1 (routing brain) logic validated. Real priors + flywheel pending live use.\n\n---\n\n## Experiment 3 — Multi-session concurrency on a shared ledger\n\n**Hypothesis:** multiple sessions can safely share one ledger (the basis for \"multi-session day one\" + cross-session shared memory). **This experiment found a real bug, then the fix.**\n\n**Method:** 50 REAL concurrent subprocesses (Bun.spawn, genuine OS concurrency, not async) each write a fact to a shared store, four ways. `worker.ts` + `run.ts`. Run: `bun run experiments/concurrency/run.ts`.\n\n**Results:**\n\n| design | survived | worker failures | integrity |\n|---|---:|---:|---|\n| naive JSON (read-modify-write) | 5/50 | 0 | ❌ catastrophic lost-update race |\n| naive multi-process SQLite | 38/50 | 12 | ❌ data loss |\n| SQLite done right (WAL once + busy_timeout + retry) | **50/50** | 0 | ✅ safe |\n| single-writer orchestrator (serialized queue) | **50/50** | 0 | ✅ safe by construction |\n\n**Root cause found (this is the value):** naive multi-process SQLite lost writes because every worker re-ran `PRAGMA journal_mode=WAL` on its own connection — switching journal mode needs an exclusive lock, so 50 processes contended and 12 errored out (the first run *swallowed* those errors; capturing stderr exposed them). WAL is persistent once set, so workers must NOT re-set it. Fix: set WAL once at init, set only `busy_timeout` per connection, retry the write on a transient lock → 50/50.\n\n**Best solution (goal iii):** **single-writer orchestrator** — one process owns the ledger, sessions submit writes through a serialized queue. Race-free by construction, and it's how Gearbox runs anyway (one orchestrator managing N sessions). Pair with an **append-only event log** (asserts + invalidations as events): race-friendly (insert-only, no read-modify-write), fully auditable, and fact-invalidation (Exp 1's poisoning recovery) becomes just another event. For the separate-CLI-processes case, multi-process WAL done right is the fallback.\n\n**Bearing on alternatives (goal ii):** storage structure matters — naive shared-mutable (JSON or careless SQLite) is unsafe; **append-only event log + single writer** is the right structure. Validated.\n\n**Honest caveats:**\n1. This tests fact WRITES. It does not test semantic merge conflicts (two sessions editing the same file region) — that's handled by git-worktree isolation (untested here) + an integration step, not the ledger.\n2. The stderr *sample* line in the harness is mis-attributed (cosmetic bug); the failure COUNTS and survivor counts are accurate and are what the verdict rests on.\n\n**Status:** Pillar 4 (multi-session) concurrency safety validated with a concrete, proven storage design.\n\n---\n\n## Open / highest-value remaining experiment\n\n**Live cross-VENDOR continuity** (handing Gemini/GPT a projection rendered from Anthropic work) still needs raw keys — not on this box. Structurally proven (Exp 1); not yet live across vendors.\n\n---\n\n## Experiment 4 — LIVE: is a curated handoff sufficient, and is poisoning recoverable?\n\n**Hypothesis:** a model handed ONLY the curated projection (never the full transcript) continues the task correctly; and invalidating a poisoned fact stops it misleading the model. Tests the semantic half Exp 1 couldn't (offline).\n\n**Method:** real `claude -p` print-mode calls (claude-sonnet-4-6), existing CLI auth, no API key. Three handoff prompts (`experiments/continuity/prompts.sh`): A = curated post-fix handoff (poison already invalidated); B = pre-fix with poison present; C = pre-fix with poison invalidated. The answering model never saw the prior conversation — a faithful task-boundary handoff.\n\n**Results — LIVE, as predicted:**\n- **A (sufficiency):** → *\"Run the tests to verify the fix.\"* The model continues **correctly** from the curated handoff alone. The \"you curated away too much\" doubt fails here — the small projection carried enough.\n- **B (poison present):** → *\"Read the parseToken function…\"* — chases the poisoned lead.\n- **C (poison invalidated):** → *\"I'd read auth.test.ts to understand the assertions…\"* — does NOT fixate on parseToken.\n\nB vs C is the live proof that fact-invalidation removes the bias. The ledger can flip `valid:false` (concurrency-safe per Exp 3) ⇒ it can convert the B-state into the C-state ⇒ **live context-poisoning recovery**.\n\n**Honest caveats:**\n1. Same vendor (Anthropic). It IS a real handoff to a model that never saw the transcript (curation-sufficiency proven), but cross-VENDOR semantic continuity is still only structurally proven (Exp 1), not live.\n2. n=1 per prompt, one task. Existence proof / smoke test, not a benchmark. A real eval would run many tasks × models with scoring.\n3. Prompt phrasing influences single responses; the B/C contrast is exactly as predicted but isn't statistically robust.\n\n**Status:** curation-sufficiency + poisoning-recovery validated live (single-vendor). Cross-vendor live + statistical eval pending keys.\n\n---\n\n## Experiment 5 — Ground-truth verification gate\n\n**Hypothesis:** executable tests (not LLM self-assessment) should gate \"done\", so an agent can't present a broken or plausible-but-wrong fix. Attacks the #1 dev pain (11.4h/wk review; 43% of AI fixes need prod debugging) and the moat Anay's own fleet notes name (\"ground-truth verification closes the self-graded loop\").\n\n**Method:** a real micro-repo (`experiments/verification/repo/`) with a seconds-vs-ms expiry bug + 4 real `bun test` cases. Driver runs the actual test runner across three code states. Run: `bun run experiments/verification/run.ts`.\n\n**Results — GATE WORKS:**\n\n| state | tests | gate |\n|---|---|---|\n| buggy code | 2 pass / 2 fail | RED — not done |\n| plausible WRONG fix (edited parseToken, the poisoned lead) | 2 pass / 2 fail | RED — rejected |\n| correct fix (auth.ts `exp*1000`) | 4 pass / 0 fail | GREEN — done |\n\nThe wrong-but-plausible fix (chasing the same poisoned hypothesis from Exp 1/4) does NOT pass the gate. Only the correct fix turns it green. An agent that must clear this gate cannot hand over broken or wrong-but-plausible work.\n\n**Honest caveats:**\n1. Ground truth is only as good as the tests. No tests / weak tests ⇒ weak gate. Gearbox should pair this with the skeptic-evaluator (a fresh-context model review) for untested paths — designed, not yet prototyped.\n2. This validates the gate mechanism, not test generation. Generating good tests is its own problem.\n\n---\n\n## Experiment 7 — LIVE API acceptance (Anthropic) — closes E1's biggest caveat\n\n**Hypothesis:** the canonical→provider rendered payload is not just shape-valid per my own validator, but ACCEPTED by the real API, and a model continues correctly from the curated projection hitting the raw endpoint.\n\n**Method:** `experiments/live-check/run.ts` POSTs the curated post-fix projection to `api.anthropic.com/v1/messages` (real key in gitignored `.env.local`, never printed; Haiku; ~$0.0002).\n\n**Result — PASS (Anthropic only):**\n- **HTTP 200 — payload accepted by the real API.** This upgrades E1 from \"valid per my schema understanding\" to \"accepted by the live API.\" The curated payload contains a `tool_use`+`tool_result` pair with declared tools, so the trickiest renderer path is live-verified.\n- Model reply: *\"Now let's verify the fix by running the tests:\"* — correct continuation from the curated handoff, against the raw API (not the CLI as in E4).\n\n**Scope / still open:** Anthropic only. OpenAI / Gemini / DeepSeek payload ACCEPTANCE remains unverified (needs their keys). Cross-VENDOR continuity is now structurally proven (E1) + Anthropic-live (E7), not OpenAI/Gemini-live.\n\n---\n\n## Experiment 6 — Does a SIMPLER alternative architecture suffice? (goal ii, finally addressed)\n\n**Hypothesis:** maybe the canonical-ledger structure is over-engineering and a simpler architecture (gateway-only / transcript-as-truth, like OpenRouter + a thin agent; or Pi-as-is) is sufficient.\n\n**Method:** model three real architectures over a 60-turn session WITH prompt caching modeled honestly (full input $3/Mtok, cache-read $0.30, cache-write $3.75; a provider switch makes the next turn cold = full re-ingest). Plus a structural capability matrix for properties cost can't capture. `experiments/alternatives/run.ts`.\n\n**Results:**\n\n| switches | transcript-as-truth (gateway-only / pi) | gearbox ledger | ledger saves |\n|---:|---:|---:|---:|\n| 0 | $1.13 | $0.36 | 68% |\n| 5 | $1.71 | $0.37 | 78% |\n| 20 | $3.42 | $0.41 | 88% |\n| 40 | $5.71 | $0.46 | 92% |\n\n- **Surprise that corrected my own narrative:** the ledger is ~68% cheaper *even at 0 switches*. Prompt caching does NOT make a big transcript free — you still pay cache-READ on the full prior context every turn; curation shrinks that base. (I had initially written \"nearly equal at 0 switches\"; the numbers refuted it, narrative fixed.)\n- BUT absolute costs are modest ($0.36–$5.71 for 60 turns), so **for light, single-provider use the simpler structure is genuinely good enough** — cost alone does not force the ledger.\n- **Structural matrix is where alternatives actually fail:** gateway-only and pi-as-is CANNOT do cheap mid-workflow switching, per-ACCOUNT credit routing, context-poisoning recovery, or shared multi-session memory — at all. The ledger can.\n- **Coupling insight:** an intelligent router's job is to switch; switching is cheap only on the ledger; so routing + ledger are coupled — you can't bolt cheap intelligent routing onto a transcript-as-truth structure.\n\n**Verdict (ii):** a simpler structure SUFFICES for light / single-provider / single-session use. The ledger is JUSTIFIED — not over-engineering — specifically for Gearbox's target workflow: frequent intelligent switching + long sessions + many providers/accounts + parallel sessions. The structure must be EARNED by that need; if the user's real usage is light, build the simple thing.\n\n**Caveat:** this is a cost MODEL with stated assumptions (caching rates, even switch spacing, curated-growth shape from Exp 1). It's directional, not a billing guarantee.\n\n---\n\n# CONSOLIDATED VERDICT (goal: does the structure work / do alternatives / best solutions)\n\n**Calibration first — what these experiments are.** Four of five are DEMONSTRATIONS that the mechanisms behave correctly given inputs I chose; only E3 is an adversarial TEST (it could have failed silently — instead it found a real bug). E1 is a real cross-provider check but the same author wrote the renderer and the validator, so a shared schema misunderstanding would pass undetected (only a live API POST closes that). Read the claims accordingly.\n\n**(i) Does the proposed structure work / tend to work? — The load-bearing mechanisms are implemented and behave correctly; real-world efficacy is untested.**\n- **Pillar 3 / rendering (Exp 1):** one canonical state renders into Anthropic/OpenAI/Gemini payloads that are *internally consistent* (valid per my schema understanding) and semantically identical across the three. NOT yet verified that the real APIs accept them — needs one live POST per provider.\n- **Pillar 2 / curation (Exp 1 + 4):** the curated projection is bounded; a live model (single-vendor) continued correctly from a handoff, and the poison/clean contrast (E4 B vs C) is a real, if n=1, signal that invalidation removes a misleading lead. E4-A (sufficiency) is weak — the prompt named the fix, so the reply was near-forced.\n- **Pillar 1 / routing (Exp 2):** the scoring logic does what it is designed to do GIVEN priors/prices/mix I assigned. The \"70% cheaper at 100% success\" is arithmetic from those assumptions, not evidence that intelligent routing beats single-model in reality — reality is exactly those priors, which are untested. This is a unit test of the algorithm, not a real-world result.\n- **Pillar 4 / concurrency (Exp 3):** the one genuine test. Naive multi-process writes lose data; root cause found (per-connection WAL contention) and fixed; single-writer orchestrator is 50/50 safe under real concurrent processes. Solid.\n- **Verification (Exp 5):** illustrates that a test gate stays RED for a non-fix and GREEN for the fix. The \"wrong fix\" was a no-op, so this shows \"tests catch bugs when tests exist,\" not that the gate catches subtle wrong fixes.\n\n**Honest switching-cost framing:** the 18× is a CURATION win (a summary is smaller than full history) and helps whether or not you switch; with prompt caching, staying warm is ~$0 regardless. The switching-specific honest claim: curation makes a provider switch cost ~$0.02 of re-ingestion instead of ~$0.45 — not \"switching is 18× cheaper.\"\n\n**(ii) Does any other structure work? — YES, conditionally (Exp 6).**\nA simpler architecture (gateway-only / transcript-as-truth, or Pi-as-is) is genuinely sufficient for light, single-provider, single-session use — absolute costs are modest and prompt caching covers the no-switch case acceptably. The ledger structure is JUSTIFIED, not over-engineering, ONLY for Gearbox's target workflow: frequent intelligent switching + long sessions + many providers/accounts + parallel sessions. There it wins on cost (68→92%) AND does four things the alternatives structurally cannot (cheap switching, per-account credit routing, poisoning recovery, shared multi-session memory). Coupling insight: routing and the ledger are inseparable — cheap intelligent switching is impossible on a transcript-as-truth structure. Storage refinement from Exp 3: append-only event log + single-writer. **Honest scope:** the alternatives are MODELED, not built+benchmarked live; a true A/B needs the live harness.\n\n**(iii) Best solution to each problem found (proposed, partially evidenced):**\n- Model switching → canonical state + per-provider render + switch at task boundaries (warm within a task). [E1, rendering side only]\n- Context cost / poisoning → bounded curated projection + provenance + invalidation. [E1, E4 B/C]\n- Routing / overpay → cheapest-model-that-clears-the-bar + credit-scarcity penalty + transparency + feedback flywheel. [E2, logic only — priors unvalidated]\n- Multi-session safety → single-writer orchestrator + append-only event log. [E3, genuinely tested]\n- Review burden → executable ground-truth gate + fresh-context skeptic for untested paths. [E5, mechanism only]\n\n**The one test that can still falsify the keystone, and is cheap:** POST each rendered payload to the real Anthropic/OpenAI/Gemini APIs (one throwaway key + one curl each). Confirms payloads are *accepted* (not just shaped right per my understanding) and gives a real cross-vendor continuity data point. Worth more than any sixth confirmatory experiment. Blocked only on a key.\n\n**Bottom line:** the architecture is sound; nothing falsified it. E3 is a real adversarial win (found+fixed a bug); E6 answers (ii) honestly (simpler suffices for light use; the ledger is earned by frequent-switching + long + multi-account + parallel-session workflows); E7 live-verified the renderer is accepted by the real Anthropic API and continues correctly. Remaining honesty: E2/E4-A/E5 are demonstrations-by-construction; live acceptance is confirmed for Anthropic only (OpenAI/Gemini/DeepSeek need their keys); cross-vendor continuity is structurally proven, not yet OpenAI/Gemini-live. Net: build the ledger only if your real usage matches the target workflow; the keystone is now live-validated on one vendor — confirm OpenAI+Gemini acceptance before betting the Milestone-1 build on full cross-vendor switching."
144990
+ }
144991
+ ];
144992
+
144993
+ // src/help/ask.ts
144994
+ var TOTAL_CAP = 48000;
144995
+ var cached3 = null;
144996
+ function loadGearboxDocs() {
144997
+ if (cached3 !== null)
144998
+ return cached3;
144999
+ const parts = [];
145000
+ let used = 0;
145001
+ for (const { file: file5, text: text2 } of DOCS_BUNDLE) {
145002
+ if (!text2)
145003
+ continue;
145004
+ const remaining = TOTAL_CAP - used;
145005
+ if (remaining <= 0)
145006
+ break;
145007
+ const body = text2.length > remaining ? text2.slice(0, remaining) + `
145008
+ …(truncated)` : text2;
145009
+ parts.push(`# ${file5}
145010
+
145011
+ ${body}`);
145012
+ used += body.length;
145013
+ }
145014
+ parts.push(`# Command reference (in-app slash commands)
145015
+
145016
+ ${helpText()}
145017
+
145018
+ ${ACCOUNT_ADD_HELP}`);
145019
+ cached3 = parts.join(`
145020
+
145021
+ ---
145022
+
145023
+ `);
145024
+ return cached3;
145025
+ }
145026
+ function buildAskSystem(docs) {
145027
+ return [
145028
+ "You answer questions about Gearbox, a multi-provider coding agent for the terminal.",
145029
+ "Use ONLY the documentation below. Be concise and concrete: when a question is about",
145030
+ "how to do something, quote the exact command, flag, or keybinding. If the answer is",
145031
+ "not in the docs, say so plainly and suggest the user run /help. Do not invent features.",
145032
+ "",
145033
+ "=== GEARBOX DOCUMENTATION ===",
145034
+ docs
145035
+ ].join(`
145036
+ `);
145037
+ }
145038
+ var QUESTION_START = /^(how|what|where|why|which|can|does|do|is|are)\b/;
145039
+ var TOOL_TERMS = /\bgearbox\b|\brouting\b|\broute\b|\bmodel(s)?\b|\baccount(s)?\b|\beffort\b|\bplan mode\b|\byolo\b|\bshortcut(s)?\b|\bkeybind|\bghost\b|\bboo\b|\bsession(s)?\b|\bcompact\b|\bmcp\b|\bprovider(s)?\b|\bapi key\b|\bfullscreen\b|\binline\b|\bsubscription\b/i;
145040
+ var SLASH_CMD = /(^|\s)\/[a-z]/;
145041
+ var CODE_SIGNAL = /```|\bthis (file|function|bug|code|repo|method|class)\b|\bthe bug\b|[\w./-]+\.(ts|tsx|js|jsx|py|go|rs|java|rb|md|json|ya?ml|css|html)\b|\b(function|class|variable|component|endpoint|cache|regex|schema|migration|dependency|import|module)\b/i;
145042
+ function looksLikeGearboxQuestion(text2) {
145043
+ const t2 = text2.trim();
145044
+ if (t2.length < 6 || t2.length > 240)
145045
+ return false;
145046
+ if (CODE_SIGNAL.test(t2))
145047
+ return false;
145048
+ const isQuestion = t2.endsWith("?") || QUESTION_START.test(t2.toLowerCase());
145049
+ if (!isQuestion)
145050
+ return false;
145051
+ return TOOL_TERMS.test(t2) || SLASH_CMD.test(t2);
145052
+ }
145053
+
144407
145054
  // src/ui/App.tsx
144408
145055
  init_resolve();
144409
145056
  init_store();
144410
145057
  init_detect();
144411
145058
  init_onboard();
145059
+
145060
+ // src/accounts/discover.ts
145061
+ init_resolve();
145062
+ init_catalog();
145063
+ var AZURE_LIST_API_VERSION = "2023-03-15-preview";
145064
+ var NATIVE2 = new Set(["anthropic", "openai", "google", "deepseek"]);
145065
+ var NON_CHAT = /embedding|dall-?e|whisper|tts|text-to-speech|speech|sora|moderation|transcrib|\bada\b|\bbabbage\b/i;
145066
+ function parseAzureDeployments(json2) {
145067
+ const data = Array.isArray(json2?.data) ? json2.data : [];
145068
+ const ids = data.filter((d) => !(typeof d?.model === "string" && NON_CHAT.test(d.model))).map((d) => d?.id).filter((x2) => typeof x2 === "string" && x2.length > 0);
145069
+ return [...new Set(ids)];
145070
+ }
145071
+ function parseOpenAIModels(json2) {
145072
+ const data = Array.isArray(json2?.data) ? json2.data : [];
145073
+ const ids = data.filter((m2) => {
145074
+ const cap = m2?.capabilities;
145075
+ if (cap && typeof cap.chat_completion === "boolean") {
145076
+ return cap.chat_completion && m2?.lifecycle_status !== "deprecated";
145077
+ }
145078
+ return true;
145079
+ }).map((m2) => m2?.id).filter((x2) => typeof x2 === "string" && x2.length > 0);
145080
+ return [...new Set(ids)];
145081
+ }
145082
+ async function discoverModels(account, fetchImpl = fetch) {
145083
+ if (NATIVE2.has(account.provider) || account.exec === "cli")
145084
+ return { ok: true, models: [] };
145085
+ try {
145086
+ const creds = await resolveCreds(account);
145087
+ if (creds.azure) {
145088
+ const { resourceName, apiKey } = creds.azure;
145089
+ if (!resourceName || !apiKey)
145090
+ return { ok: false, models: [], note: "azure: missing resource name or key" };
145091
+ const url2 = `https://${resourceName}.openai.azure.com/openai/deployments?api-version=${AZURE_LIST_API_VERSION}`;
145092
+ const r2 = await fetchImpl(url2, { headers: { "api-key": apiKey } });
145093
+ if (!r2.ok)
145094
+ return { ok: false, models: [], note: `no deployments listed (HTTP ${r2.status})` };
145095
+ const models = parseAzureDeployments(await r2.json());
145096
+ return { ok: true, models, note: models.length ? undefined : "no chat deployments yet — create one in Azure, then /account refresh" };
145097
+ }
145098
+ const base2 = creds.baseURL ?? catalogProvider(account.provider)?.baseUrl;
145099
+ if (base2) {
145100
+ const url2 = `${base2.replace(/\/$/, "")}/models`;
145101
+ const r2 = await fetchImpl(url2, { headers: { Authorization: `Bearer ${creds.apiKey ?? ""}`, ...creds.headers ?? {} } });
145102
+ if (!r2.ok)
145103
+ return { ok: false, models: [], note: `models endpoint returned HTTP ${r2.status}` };
145104
+ const models = parseOpenAIModels(await r2.json());
145105
+ return { ok: true, models };
145106
+ }
145107
+ return { ok: true, models: [] };
145108
+ } catch (e2) {
145109
+ return { ok: false, models: [], note: e2?.message ?? "discovery failed" };
145110
+ }
145111
+ }
145112
+
145113
+ // src/ui/App.tsx
144412
145114
  init_catalog();
144413
145115
  init_onboarding();
144414
145116
  init_cli_backend();
@@ -144966,10 +145668,10 @@ function isNetworkError(e2) {
144966
145668
 
144967
145669
  // src/ui/git.ts
144968
145670
  import { execFileSync as execFileSync3 } from "node:child_process";
144969
- var cached3;
145671
+ var cached4;
144970
145672
  function gitBranch() {
144971
- if (cached3 !== undefined)
144972
- return cached3;
145673
+ if (cached4 !== undefined)
145674
+ return cached4;
144973
145675
  try {
144974
145676
  const out = execFileSync3("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
144975
145677
  cwd: process.cwd(),
@@ -144977,11 +145679,11 @@ function gitBranch() {
144977
145679
  stdio: ["ignore", "pipe", "ignore"],
144978
145680
  timeout: 1000
144979
145681
  }).trim();
144980
- cached3 = out || null;
145682
+ cached4 = out || null;
144981
145683
  } catch {
144982
- cached3 = null;
145684
+ cached4 = null;
144983
145685
  }
144984
- return cached3;
145686
+ return cached4;
144985
145687
  }
144986
145688
 
144987
145689
  // src/ui/App.tsx
@@ -145000,6 +145702,7 @@ var KEYS_HELP = [
145000
145702
  " ⌃Y copy last reply · shift+tab cycle mode (normal · auto-accept · plan)",
145001
145703
  " tab @file complete · PgUp/PgDn scroll transcript · type while busy to queue",
145002
145704
  " / commands · @ files · ! shell · # memory · drag/paste image paths · ? this help",
145705
+ " click the model or effort label in the status bar to pick (fullscreen)",
145003
145706
  " input stays fixed at the bottom; /config inline on uses terminal scrollback"
145004
145707
  ].join(`
145005
145708
  `);
@@ -145339,6 +146042,20 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145339
146042
  const [paletteIndex, setPaletteIndexState] = import_react26.useState(0);
145340
146043
  const searchRef = import_react26.useRef(null);
145341
146044
  const paletteIndexRef = import_react26.useRef(0);
146045
+ const [quickPicker, setQuickPickerState] = import_react26.useState(null);
146046
+ const [quickPickerIndex, setQuickPickerIndexState] = import_react26.useState(0);
146047
+ const quickPickerRef = import_react26.useRef(null);
146048
+ const quickPickerIndexRef = import_react26.useRef(0);
146049
+ const setQuickPicker = (p) => {
146050
+ quickPickerRef.current = p;
146051
+ setQuickPickerState(p);
146052
+ quickPickerIndexRef.current = 0;
146053
+ setQuickPickerIndexState(0);
146054
+ };
146055
+ const setQuickPickerIndex = (n) => {
146056
+ quickPickerIndexRef.current = n;
146057
+ setQuickPickerIndexState(n);
146058
+ };
145342
146059
  const setSearch = (s2) => {
145343
146060
  searchRef.current = s2;
145344
146061
  setSearchState(s2);
@@ -145412,6 +146129,8 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145412
146129
  const scrollTopRef = import_react26.useRef(0);
145413
146130
  const viewportHeightRef = import_react26.useRef(1);
145414
146131
  const maxScrollRef = import_react26.useRef(0);
146132
+ const paletteRowsLiveRef = import_react26.useRef(0);
146133
+ const statusBarRenderRef = import_react26.useRef({ model: "", mode: "normal" });
145415
146134
  const setPerm = (p) => {
145416
146135
  permRef.current = p;
145417
146136
  setPermState(p);
@@ -145443,6 +146162,28 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145443
146162
  setActiveCli({ id: a.id, label: bin });
145444
146163
  }
145445
146164
  }, []);
146165
+ const discoveryRanRef = import_react26.useRef(false);
146166
+ import_react26.useEffect(() => {
146167
+ if (discoveryRanRef.current)
146168
+ return;
146169
+ discoveryRanRef.current = true;
146170
+ (async () => {
146171
+ const targets = listAccounts().filter((a) => a.enabled && a.exec !== "cli" && a.models === undefined);
146172
+ let learned = 0;
146173
+ for (const a of targets) {
146174
+ try {
146175
+ const d = await discoverModels(a);
146176
+ if (d.ok) {
146177
+ putAccount({ ...a, models: d.models });
146178
+ if (d.models.length)
146179
+ learned++;
146180
+ }
146181
+ } catch {}
146182
+ }
146183
+ if (learned)
146184
+ notice(`loaded the real model list for ${learned} account${learned === 1 ? "" : "s"} — /model to see them`);
146185
+ })();
146186
+ }, []);
145446
146187
  import_react26.useEffect(() => {
145447
146188
  setPermissionHandler((req) => new Promise((resolve13) => {
145448
146189
  if (modeRef.current === "auto-accept" && (req.kind === "write" || req.kind === "edit")) {
@@ -145551,6 +146292,12 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145551
146292
  const col = Math.max(0, x2 - 4);
145552
146293
  return offsetAt(value, lineIdx, col);
145553
146294
  };
146295
+ const statusBarZoneAt = (x2, y) => {
146296
+ const lineCount = Math.max(1, editRef.current.value.split(`
146297
+ `).length);
146298
+ const { model: model2, effort: effort2, mode: mode3 } = statusBarRenderRef.current;
146299
+ return statusBarHit({ x: x2, y, termRows: rows, composerLines: lineCount, paletteRows: paletteRowsLiveRef.current, model: model2, effort: effort2, mode: mode3 });
146300
+ };
145554
146301
  const viewportTop = 4;
145555
146302
  const transcriptPoint = (x2, y) => {
145556
146303
  const viewportBottom = viewportTop + transcriptHeightLiveRef.current - 1;
@@ -145575,10 +146322,19 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145575
146322
  else if (b === 65)
145576
146323
  delta += 1;
145577
146324
  else {
145578
- const off = composerOffset(x2, y);
145579
- const point = transcriptPoint(x2, y);
145580
146325
  const isDrag = (b & 32) === 32;
145581
146326
  const isPrimary = (b & 3) === 0;
146327
+ if (fullscreen && isPrimary && !isDrag && !up2 && !busyRef.current && !permRef.current) {
146328
+ const zone = statusBarZoneAt(x2, y);
146329
+ if (zone) {
146330
+ setQuickPicker(quickPickerRef.current === zone ? null : zone);
146331
+ continue;
146332
+ }
146333
+ if (quickPickerRef.current)
146334
+ setQuickPicker(null);
146335
+ }
146336
+ const off = composerOffset(x2, y);
146337
+ const point = transcriptPoint(x2, y);
145582
146338
  if (isPrimary && isDrag && transcriptMouseAnchorRef.current && !point) {
145583
146339
  const bottom = viewportTop + transcriptHeightLiveRef.current - 1;
145584
146340
  if (y < viewportTop)
@@ -145703,6 +146459,7 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145703
146459
  setItems(s2.items);
145704
146460
  msgRef.current = s2.messages;
145705
146461
  sessionRef.current = { id: s2.id, createdAt: s2.createdAt, title: s2.title, turns: s2.turns ?? [] };
146462
+ cliSessionRef.current = undefined;
145706
146463
  notice(`resumed · ${s2.items.length} messages · ${new Date(s2.updatedAt).toLocaleString()}`);
145707
146464
  };
145708
146465
  import_react26.useEffect(() => {
@@ -145863,6 +146620,7 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145863
146620
  return take2(listSessions().slice(0, 7).map((s2, i2) => ({ value: `/resume ${i2 + 1}`, label: `${i2 + 1}. ${s2.title || "(untitled)"}`.slice(0, 42), detail: new Date(s2.updatedAt).toLocaleDateString() })));
145864
146621
  return [];
145865
146622
  };
146623
+ const quickPickerRows = (which2) => which2 === "model" ? commandPickerRows("/model") : effortRows();
145866
146624
  const isExactSlashCommand = (draft) => {
145867
146625
  const q = draft.trim();
145868
146626
  if (!/^\/\S+$/.test(q))
@@ -145909,6 +146667,7 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145909
146667
  return model ? effortLevels(model) : [];
145910
146668
  })();
145911
146669
  const displayEffort = activeModelEfforts.length > 0 ? effort : undefined;
146670
+ statusBarRenderRef.current = { model: modelLabel, effort: displayEffort, mode: mode2 };
145912
146671
  const push = (it) => setItems((prev) => [...prev, it]);
145913
146672
  const pushPhase = (label, detail) => {
145914
146673
  const id = idRef.current++;
@@ -145977,7 +146736,29 @@ function App2({ selector: initialSelector, runner, fullscreen = false, resumeId
145977
146736
  statusPad: Math.max(6, ...rows2.map((r2) => r2.status.length))
145978
146737
  };
145979
146738
  };
146739
+ const askModeRef = import_react26.useRef(false);
145980
146740
  const defaultRunner = import_react26.useCallback(async ({ prompt, messages, onEvent, selector: sel, signal }) => {
146741
+ const isAsk = askModeRef.current;
146742
+ askModeRef.current = false;
146743
+ if (isAsk) {
146744
+ const docs = loadGearboxDocs();
146745
+ if (!docs) {
146746
+ onEvent({ type: "error", message: "Gearbox docs aren't bundled with this install — can't answer from them." });
146747
+ return { messages, usage: { inputTokens: 0, outputTokens: 0 } };
146748
+ }
146749
+ const choice3 = sel.select({ prompt, kind: "search" });
146750
+ routedRef.current = { model: choice3.model, reason: choice3.reason };
146751
+ setLastPick({ model: choice3.model, reason: choice3.reason });
146752
+ onEvent({ type: "model-pick", model: choice3.model.label, provider: choice3.model.provider, reason: choice3.reason });
146753
+ const acct = accountResolver.pick(choice3.model.provider);
146754
+ const creds2 = acct ? await resolveCreds(acct) : undefined;
146755
+ usedAccountRef.current = acct?.id ?? null;
146756
+ cliMetaRef.current = null;
146757
+ if (acct)
146758
+ markUsed(acct.id);
146759
+ const r3 = await runCompletion({ model: choice3.model, system: buildAskSystem(docs), prompt, onEvent, signal, creds: creds2 });
146760
+ return { messages, usage: r3.usage };
146761
+ }
145981
146762
  const cli = activeCliRef.current;
145982
146763
  if (cli) {
145983
146764
  if (activeImagesRef.current.length) {
@@ -146239,11 +147020,32 @@ ${fetched.join(`
146239
147020
  const toolMap = new Map;
146240
147021
  const pendingToolStreams = new Map;
146241
147022
  let toolFlushTimer = null;
147023
+ let pendingText = "";
147024
+ let textFlushTimer = null;
146242
147025
  const changedFiles = new Set;
146243
147026
  const checks4 = [];
146244
147027
  const failures = [];
146245
147028
  let hadError = false;
147029
+ const flushText = () => {
147030
+ if (textFlushTimer) {
147031
+ clearTimeout(textFlushTimer);
147032
+ textFlushTimer = null;
147033
+ }
147034
+ if (!pendingText)
147035
+ return;
147036
+ const chunk2 = pendingText;
147037
+ pendingText = "";
147038
+ if (curAsstRef.current === null) {
147039
+ const id = idRef.current++;
147040
+ curAsstRef.current = id;
147041
+ setItems((prev) => [...prev, { kind: "assistant", id, text: chunk2, done: false }]);
147042
+ } else {
147043
+ const id = curAsstRef.current;
147044
+ setItems((prev) => prev.map((i2) => i2.id === id && i2.kind === "assistant" ? { ...i2, text: i2.text + chunk2 } : i2));
147045
+ }
147046
+ };
146246
147047
  const finishAssistant = () => {
147048
+ flushText();
146247
147049
  const id = curAsstRef.current;
146248
147050
  if (id == null)
146249
147051
  return;
@@ -146310,14 +147112,9 @@ ${fetched.join(`
146310
147112
  } else if (e2.type === "text") {
146311
147113
  setMascotState("streaming");
146312
147114
  outCharsRef.current += e2.text.length;
146313
- if (curAsstRef.current === null) {
146314
- const id = idRef.current++;
146315
- curAsstRef.current = id;
146316
- setItems((prev) => [...prev, { kind: "assistant", id, text: e2.text, done: false }]);
146317
- } else {
146318
- const id = curAsstRef.current;
146319
- setItems((prev) => prev.map((i2) => i2.id === id && i2.kind === "assistant" ? { ...i2, text: i2.text + e2.text } : i2));
146320
- }
147115
+ pendingText += e2.text;
147116
+ if (!textFlushTimer)
147117
+ textFlushTimer = setTimeout(flushText, 45);
146321
147118
  } else if (e2.type === "tool-start") {
146322
147119
  setMascotState("tool");
146323
147120
  finishAssistant();
@@ -146423,6 +147220,7 @@ ${fetched.join(`
146423
147220
  }
146424
147221
  } finally {
146425
147222
  activeImagesRef.current = [];
147223
+ flushText();
146426
147224
  flushToolStreams();
146427
147225
  abortRef.current = null;
146428
147226
  setBusy(false);
@@ -146554,6 +147352,7 @@ ${fetched.join(`
146554
147352
  setLastInput(0);
146555
147353
  curAsstRef.current = null;
146556
147354
  routedRef.current = null;
147355
+ cliSessionRef.current = undefined;
146557
147356
  sessionRef.current = { id: newSessionId(), createdAt: Date.now(), title: "", turns: [] };
146558
147357
  notice("started a fresh conversation");
146559
147358
  return;
@@ -146692,6 +147491,22 @@ ${fetched.join(`
146692
147491
  }
146693
147492
  runTurn(lastPromptRef.current);
146694
147493
  return;
147494
+ case "ask": {
147495
+ const question = arg.trim();
147496
+ if (!question) {
147497
+ echo(text2);
147498
+ notice("usage: /ask <question about Gearbox> · e.g. /ask how do I add Azure?");
147499
+ return;
147500
+ }
147501
+ if (busyRef.current) {
147502
+ echo(text2);
147503
+ notice("finish the current turn first, then /ask");
147504
+ return;
147505
+ }
147506
+ askModeRef.current = true;
147507
+ runTurn(question);
147508
+ return;
147509
+ }
146695
147510
  case "model":
146696
147511
  echo(text2);
146697
147512
  if (!arg || arg.toLowerCase() === "all") {
@@ -146998,7 +147813,7 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
146998
147813
  ` + formatAccounts(all, activeId, []) : "no accounts yet — /account add to add one");
146999
147814
  return;
147000
147815
  }
147001
- if (!["add", "remove", "rm", "import", "off"].includes(subL)) {
147816
+ if (!["add", "remove", "rm", "import", "off", "refresh"].includes(subL)) {
147002
147817
  const ref = findAccountRef(arg, all);
147003
147818
  if (ref.account) {
147004
147819
  activate(ref.account);
@@ -147030,16 +147845,7 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147030
147845
  const provGiven = parts[2] ? key : "";
147031
147846
  const keyVal = parts[2] ?? "";
147032
147847
  if (!key) {
147033
- notice(`add an account:
147034
- ` + ` /account add claude Claude subscription (Pro/Max)
147035
- ` + ` /account add claude <name> a 2nd Claude account, e.g. /account add claude work
147036
- ` + ` /account add codex ChatGPT subscription (Plus/Pro)
147037
- ` + ` /account add codex <name> a 2nd ChatGPT account, e.g. /account add codex work
147038
- ` + ` /account add azure <foundry-endpoint> <api-key>
147039
- ` + ` /account add azure <resource-name> <api-key> [api-version]
147040
- ` + ` /account add openai-compat <name> <base-url> <api-key> <model> [model...]
147041
- ` + ` /account add <api-key> paste any provider key (auto-detected)
147042
- ` + " /account add <provider> <api-key> e.g. anthropic, openai, openrouter");
147848
+ notice(ACCOUNT_ADD_HELP);
147043
147849
  return;
147044
147850
  }
147045
147851
  (async () => {
@@ -147068,6 +147874,13 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147068
147874
  notice(`${res.message} — testing…`);
147069
147875
  const t2 = await testAccount(res.account);
147070
147876
  notice(t2.ok ? `✓ added · ${t2.message}` : `added, but the key test failed: ${t2.message}`);
147877
+ const d = await discoverModels(res.account);
147878
+ if (d.models.length) {
147879
+ putAccount({ ...res.account, models: d.models });
147880
+ notice(`found ${d.models.length} model${d.models.length === 1 ? "" : "s"} on this account — /model to pick one`);
147881
+ } else if (d.note) {
147882
+ notice(d.note);
147883
+ }
147071
147884
  })();
147072
147885
  return;
147073
147886
  }
@@ -147107,6 +147920,26 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147107
147920
  })();
147108
147921
  return;
147109
147922
  }
147923
+ if (subL === "refresh") {
147924
+ (async () => {
147925
+ const targets = listAccounts().filter((a) => a.enabled && a.exec !== "cli");
147926
+ if (!targets.length) {
147927
+ notice("no API/cloud accounts to refresh — /account add to add one");
147928
+ return;
147929
+ }
147930
+ notice(`refreshing models for ${targets.length} account${targets.length === 1 ? "" : "s"}…`);
147931
+ for (const a of targets) {
147932
+ const d = await discoverModels(a);
147933
+ if (d.models.length) {
147934
+ putAccount({ ...a, models: d.models });
147935
+ notice(`${accountName(a)}: ${d.models.length} model${d.models.length === 1 ? "" : "s"}`);
147936
+ } else {
147937
+ notice(`${accountName(a)}: ${d.note ?? "no models discovered"}`);
147938
+ }
147939
+ }
147940
+ })();
147941
+ return;
147942
+ }
147110
147943
  notice(`didn't recognize "/account ${arg}".
147111
147944
 
147112
147945
  ` + formatAccounts(all, activeId, importableEnvCreds(), accountStatusCacheRef.current));
@@ -147287,6 +148120,10 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147287
148120
  notice(`queued (${queueRef.current.length}) — sends when the current turn finishes`);
147288
148121
  return;
147289
148122
  }
148123
+ if (looksLikeGearboxQuestion(text2)) {
148124
+ notice("↳ answering from Gearbox's own docs · rephrase as a task, or /help, to run it as a normal turn");
148125
+ askModeRef.current = true;
148126
+ }
147290
148127
  runTurn(text2);
147291
148128
  }, [handleCommand, runTurn, setupRequired, onboardingState]);
147292
148129
  import_react26.useEffect(() => {
@@ -147338,6 +148175,25 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147338
148175
  resolvePerm("deny");
147339
148176
  return;
147340
148177
  }
148178
+ if (quickPickerRef.current) {
148179
+ const rows2 = quickPickerRows(quickPickerRef.current);
148180
+ if (key.upArrow || key.downArrow) {
148181
+ if (rows2.length) {
148182
+ const delta = key.upArrow ? -1 : 1;
148183
+ setQuickPickerIndex((quickPickerIndexRef.current + delta + rows2.length) % rows2.length);
148184
+ }
148185
+ return;
148186
+ }
148187
+ if (key.return) {
148188
+ const row = rows2[Math.min(quickPickerIndexRef.current, rows2.length - 1)];
148189
+ setQuickPicker(null);
148190
+ if (row)
148191
+ submit(row.value);
148192
+ return;
148193
+ }
148194
+ setQuickPicker(null);
148195
+ return;
148196
+ }
147341
148197
  if (key.ctrl && input === "c") {
147342
148198
  if (busyRef.current) {
147343
148199
  interruptedRef.current = true;
@@ -147582,6 +148438,8 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147582
148438
  const lineWidth2 = Math.max(width - 3, 20);
147583
148439
  const lines = import_react26.useMemo(() => itemsToLines(items, lineWidth2, expandAll), [items, lineWidth2, expandAll]);
147584
148440
  const PALETTE_ROWS = pickerRows.length ? Math.min(7, pickerRows.length) : fileMatches.length ? Math.min(5, fileMatches.length) : cmdMatches.length ? Math.min(7, cmdMatches.length) : 0;
148441
+ const quickRows = quickPicker ? quickPickerRows(quickPicker) : [];
148442
+ const quickPickerLimit = Math.min(7, Math.max(1, quickRows.length));
147585
148443
  let footer = 2;
147586
148444
  footer += perm ? 9 : 3;
147587
148445
  footer += PALETTE_ROWS;
@@ -147597,6 +148455,8 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147597
148455
  footer += 1;
147598
148456
  if (copiedNotice)
147599
148457
  footer += 1;
148458
+ if (quickPicker && quickRows.length)
148459
+ footer += quickPickerLimit + 2;
147600
148460
  const HEADER = 3;
147601
148461
  const transcriptHeight = Math.max(1, rows - HEADER - footer);
147602
148462
  const maxScroll = Math.max(0, lines.length - transcriptHeight);
@@ -147605,6 +148465,7 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147605
148465
  scrollTopLiveRef.current = effScroll;
147606
148466
  transcriptHeightLiveRef.current = transcriptHeight;
147607
148467
  viewportHeightRef.current = transcriptHeight;
148468
+ paletteRowsLiveRef.current = PALETTE_ROWS;
147608
148469
  maxScrollRef.current = maxScroll;
147609
148470
  scrollTopRef.current = effScroll;
147610
148471
  import_react26.useEffect(() => {
@@ -147718,6 +148579,32 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147718
148579
  }, undefined, false, undefined, this)
147719
148580
  ]
147720
148581
  }, undefined, true, undefined, this) : null;
148582
+ const quickPickerJsx = quickPicker && quickRows.length ? /* @__PURE__ */ jsx_dev_runtime12.jsxDEV(Box_default, {
148583
+ flexDirection: "column",
148584
+ marginTop: 1,
148585
+ children: [
148586
+ /* @__PURE__ */ jsx_dev_runtime12.jsxDEV(Box_default, {
148587
+ paddingX: 1,
148588
+ children: [
148589
+ /* @__PURE__ */ jsx_dev_runtime12.jsxDEV(Text, {
148590
+ color: color.accent,
148591
+ children: quickPicker === "model" ? "model" : "effort"
148592
+ }, undefined, false, undefined, this),
148593
+ /* @__PURE__ */ jsx_dev_runtime12.jsxDEV(Text, {
148594
+ color: color.faint,
148595
+ children: " · ↑↓ select · ⏎ apply · esc close"
148596
+ }, undefined, false, undefined, this)
148597
+ ]
148598
+ }, undefined, true, undefined, this),
148599
+ /* @__PURE__ */ jsx_dev_runtime12.jsxDEV(CommandPalette, {
148600
+ draft: "",
148601
+ selected: Math.min(quickPickerIndex, quickRows.length - 1),
148602
+ limit: quickPickerLimit,
148603
+ rows: quickRows,
148604
+ width
148605
+ }, undefined, false, undefined, this)
148606
+ ]
148607
+ }, undefined, true, undefined, this) : null;
147721
148608
  const composerJsx = perm ? /* @__PURE__ */ jsx_dev_runtime12.jsxDEV(PermissionPrompt, {
147722
148609
  req: perm,
147723
148610
  width
@@ -147812,6 +148699,7 @@ Example: /mcp add github npx -y @modelcontextprotocol/server-github`);
147812
148699
  ]
147813
148700
  }, undefined, true, undefined, this)
147814
148701
  }, undefined, false, undefined, this) : null,
148702
+ quickPickerJsx,
147815
148703
  /* @__PURE__ */ jsx_dev_runtime12.jsxDEV(StatusBar, {
147816
148704
  model: modelLabel,
147817
148705
  branch,
@@ -147904,7 +148792,7 @@ init_permission();
147904
148792
  var jsx_dev_runtime13 = __toESM(require_jsx_dev_runtime(), 1);
147905
148793
  process.env.LANG = process.env.LANG || "en_US.UTF-8";
147906
148794
  process.env.LC_ALL = process.env.LC_ALL || "en_US.UTF-8";
147907
- var VERSION16 = "0.1.30";
148795
+ var VERSION16 = "0.1.32";
147908
148796
  var args = process.argv.slice(2);
147909
148797
  var supportsAnsi = process.env.FORCE_COLOR === "1" || process.env.TERM !== "dumb" && process.env.NO_COLOR !== "1" && process.stdout.isTTY;
147910
148798
  var ansi = (code) => supportsAnsi ? `\x1B[${code}m` : "";
@@ -147924,123 +148812,78 @@ function onboardingBanner(termWidth) {
147924
148812
  const pad3 = Math.max(0, Math.floor((w - visibleLength(s2)) / 2));
147925
148813
  return " ".repeat(pad3) + s2;
147926
148814
  };
147927
- const rgb = (r2, g, b) => supportsAnsi ? `\x1B[38;2;${r2};${g};${b}m` : "";
147928
148815
  const RST = supportsAnsi ? "\x1B[0m" : "";
147929
- const mainColor = (row) => {
147930
- const t2 = row / 6;
147931
- const r2 = Math.round(80 + (0 - 80) * t2);
147932
- const g = Math.round(230 + (170 - 230) * t2);
147933
- const b = Math.round(255 + (255 - 255) * t2);
147934
- return rgb(r2, g, b);
147935
- };
147936
- const SHADOW = rgb(0, 55, 85);
148816
+ const rgb = (r2, g, b) => supportsAnsi ? `\x1B[38;2;${r2};${g};${b}m` : "";
148817
+ const FACE = rgb(0, 215, 255);
148818
+ const DEPTH = rgb(0, 90, 145);
148819
+ const colorize2 = (s2) => s2.split("").map((c) => c === "█" ? FACE + c + RST : c === " " ? " " : DEPTH + c + RST).join("");
147937
148820
  const F2 = {
147938
148821
  G: [
147939
- " ██████ ",
147940
- "██ ",
147941
- "██ ",
147942
- "██ ████",
147943
- "██ ██",
147944
- "██ ██",
147945
- " ██████ "
148822
+ " ██████╗ ",
148823
+ "██╔════╝ ",
148824
+ "██║ ███╗",
148825
+ "██║ ██║",
148826
+ "╚██████╔╝",
148827
+ " ╚═════╝ "
147946
148828
  ],
147947
148829
  E: [
147948
- "████████",
147949
- "██ ",
147950
- "██ ",
147951
- "██████ ",
147952
- "██ ",
147953
- "██ ",
147954
- "████████"
148830
+ "███████╗",
148831
+ "██╔════╝",
148832
+ "█████╗ ",
148833
+ "██╔══╝ ",
148834
+ "███████╗",
148835
+ "╚══════╝"
147955
148836
  ],
147956
148837
  A: [
147957
- " ████ ",
147958
- " ██ ██ ",
147959
- "██ ██",
147960
- "████████",
147961
- "██ ██",
147962
- "██ ██",
147963
- "██ ██"
148838
+ " █████╗ ",
148839
+ "██╔══██╗",
148840
+ "███████║",
148841
+ "██╔══██║",
148842
+ "██║ ██║",
148843
+ "╚═╝ ╚═╝"
147964
148844
  ],
147965
148845
  R: [
147966
- "███████ ",
147967
- "██ ██",
147968
- "██ ██",
147969
- "███████ ",
147970
- "████ ",
147971
- "██ ██ ",
147972
- "██ ███"
148846
+ "██████╗ ",
148847
+ "██╔══██╗",
148848
+ "██████╔╝",
148849
+ "██╔══╗ ",
148850
+ "██║ ██╗",
148851
+ "╚═╝ ╚═╝"
147973
148852
  ],
147974
148853
  B: [
147975
- "███████ ",
147976
- "██ ██",
147977
- "██ ██",
147978
- "███████ ",
147979
- "██ ██",
147980
- "██ ██",
147981
- "███████ "
148854
+ "██████╗ ",
148855
+ "██╔══██╗",
148856
+ "██████╔╝",
148857
+ "██╔══██╗",
148858
+ "██████╔╝",
148859
+ "╚═════╝ "
147982
148860
  ],
147983
148861
  O: [
147984
- " ██████ ",
147985
- "██ ██",
147986
- "██ ██",
147987
- "██ ██",
147988
- "██ ██",
147989
- "██ ██",
147990
- " ██████ "
148862
+ " ██████╗ ",
148863
+ "██╔═══██╗",
148864
+ "██║ ██║",
148865
+ "██║ ██║",
148866
+ "╚██████╔╝",
148867
+ " ╚═════╝ "
147991
148868
  ],
147992
148869
  X: [
147993
- "██ ██",
147994
- " ██ ██ ",
147995
- " ████ ",
147996
- " ██ ",
147997
- " ████ ",
147998
- " ██ ██ ",
147999
- "██ ██"
148870
+ "██╗ ██╗",
148871
+ "╚██╗██╔╝",
148872
+ " ╚████╔╝",
148873
+ " ██╔╗██ ",
148874
+ "██╔╝╚██╗",
148875
+ "╚═╝ ╚═╝"
148000
148876
  ]
148001
148877
  };
148002
- const LETTER_W = 8, GAP = 2, ROWS = 7;
148003
148878
  const letters = "GEARBOX".split("");
148004
- const totalCols = letters.length * LETTER_W + (letters.length - 1) * GAP;
148005
- const grid = Array.from({ length: ROWS }, () => new Array(totalCols).fill(false));
148006
- let startCol = 0;
148007
- for (const ch of letters) {
148008
- const rows = F2[ch] ?? [];
148009
- for (let r2 = 0;r2 < ROWS; r2++) {
148010
- const row = rows[r2] ?? " ";
148011
- for (let c = 0;c < LETTER_W; c++) {
148012
- if (row[c] === "█") {
148013
- const cell2 = grid[r2];
148014
- if (cell2)
148015
- cell2[startCol + c] = true;
148016
- }
148017
- }
148018
- }
148019
- startCol += LETTER_W + GAP;
148020
- }
148021
- const SDX = 2, SDY = 2;
148022
- const renderW = totalCols + SDX;
148023
- const renderH = ROWS + SDY;
148024
- const leftPad = " ".repeat(Math.max(0, Math.floor((w - renderW) / 2)));
148025
148879
  console.log("");
148026
- for (let r2 = 0;r2 < renderH; r2++) {
148027
- let line = "";
148028
- for (let c = 0;c < renderW; c++) {
148029
- const mainOn = r2 < ROWS && c < totalCols && grid[r2]?.[c] === true;
148030
- const shadOn = r2 - SDY >= 0 && c - SDX >= 0 && r2 - SDY < ROWS && c - SDX < totalCols && grid[r2 - SDY]?.[c - SDX] === true;
148031
- if (mainOn) {
148032
- line += `${mainColor(r2)}█${RST}`;
148033
- } else if (shadOn) {
148034
- line += `${SHADOW}█${RST}`;
148035
- } else {
148036
- line += " ";
148037
- }
148038
- }
148039
- console.log(leftPad + line.trimEnd());
148880
+ for (let r2 = 0;r2 < 6; r2++) {
148881
+ const raw = letters.map((ch) => F2[ch]?.[r2] ?? "").join(" ");
148882
+ console.log(center(colorize2(raw)));
148040
148883
  }
148041
148884
  console.log("");
148042
- console.log(center(`${rgb(0, 160, 200)}one terminal · every model you pay for${RST}`));
148043
- console.log(center(`${rgb(0, 130, 170)}keys stay local · never sent anywhere${RST}`));
148885
+ console.log(center(`${rgb(0, 155, 200)}one terminal · every model you pay for${RST}`));
148886
+ console.log(center(`${rgb(0, 125, 165)}keys stay local · never sent anywhere${RST}`));
148044
148887
  console.log("");
148045
148888
  }
148046
148889
  var centerStr = (text2, width) => {