promptpilot 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1417,6 +1417,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
1417
1417
  };
1418
1418
  }
1419
1419
  if (!this.client.listModels) {
1420
+ if (isClaudeTiersOnlyTargetSet(availableTargets)) {
1421
+ const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
1422
+ if (selected) {
1423
+ return {
1424
+ selectedTarget: stripInternalTargetFields(selected),
1425
+ rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (no local Qwen router available)." }],
1426
+ routingReason: "Selected by Claude tier heuristic (no local Qwen router available).",
1427
+ routingWarnings: [],
1428
+ routingProvider: "heuristic"
1429
+ };
1430
+ }
1431
+ }
1420
1432
  return {
1421
1433
  selectedTarget: null,
1422
1434
  rankedTargets: [],
@@ -1441,12 +1453,13 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
1441
1453
  routingProvider: null
1442
1454
  };
1443
1455
  }
1456
+ const claudeTiersOnly = isClaudeTiersOnlyTargetSet(availableTargets);
1444
1457
  const response = await this.client.generateJson({
1445
1458
  model: routerModel,
1446
1459
  timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
1447
1460
  temperature: 0,
1448
1461
  format: "json",
1449
- systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
1462
+ systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias, claudeTiersOnly),
1450
1463
  prompt: JSON.stringify(
1451
1464
  {
1452
1465
  objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
@@ -1510,6 +1523,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
1510
1523
  routingProvider: routerModel
1511
1524
  };
1512
1525
  } catch {
1526
+ if (isClaudeTiersOnlyTargetSet(availableTargets)) {
1527
+ const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
1528
+ if (selected) {
1529
+ return {
1530
+ selectedTarget: stripInternalTargetFields(selected),
1531
+ rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (Qwen routing failed)." }],
1532
+ routingReason: "Selected by Claude tier heuristic (Qwen routing failed).",
1533
+ routingWarnings: ["Qwen downstream routing failed; fell back to Claude tier heuristic."],
1534
+ routingProvider: "heuristic"
1535
+ };
1536
+ }
1537
+ }
1513
1538
  return {
1514
1539
  selectedTarget: null,
1515
1540
  rankedTargets: [],
@@ -1610,14 +1635,6 @@ ${contextBlock}`);
1610
1635
  if (constraints.length > 0) {
1611
1636
  sections.push(`Constraints:
1612
1637
  - ${constraints.join("\n- ")}`);
1613
- }
1614
- if (isCodeFirstRequest(input.input)) {
1615
- sections.push(`Execution loop:
1616
- - Inspect the relevant files and current behavior.
1617
- - Plan the smallest safe next step.
1618
- - Act with minimal, reversible changes.
1619
- - Test or validate the result.
1620
- - Reflect on gaps or risks, then repeat.`);
1621
1638
  }
1622
1639
  const desiredOutput = [
1623
1640
  input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
@@ -1666,8 +1683,8 @@ function stripInternalTargetFields(target) {
1666
1683
  latencyRank: target.latencyRank
1667
1684
  };
1668
1685
  }
1669
- function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
1670
- return [
1686
+ function buildDownstreamRoutingSystemPrompt(priority, workloadBias, claudeTiersOnly = false) {
1687
+ const lines = [
1671
1688
  "You are a downstream model router for PromptPilot.",
1672
1689
  "Return strict JSON only with this shape:",
1673
1690
  '{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
@@ -1678,7 +1695,71 @@ function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
1678
1695
  "Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
1679
1696
  "Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
1680
1697
  "Do not invent targets. Do not output prose outside JSON."
1681
- ].join("\n");
1698
+ ];
1699
+ if (claudeTiersOnly) {
1700
+ lines.push(
1701
+ "You are choosing between Claude model tiers (Haiku, Sonnet, Opus).",
1702
+ "Haiku: fastest and cheapest. Best for email, chat, support, summarization, and simple rewrites. Avoid for deep coding or multi-step reasoning.",
1703
+ "Sonnet: balanced cost and capability. Best for coding, debugging, refactoring, writing, and general-purpose tasks. The default for most prompts.",
1704
+ "Opus: most capable and most expensive. Reserve for complex architecture decisions, multi-constraint agentic planning, long-horizon reasoning, or prompts that clearly require the strongest model.",
1705
+ "When routing priority is cheapest_adequate: prefer Haiku for lightweight tasks, Sonnet for most code and writing tasks, and Opus only when clearly necessary.",
1706
+ "When routing priority is best_quality: prefer Opus for code and reasoning, Sonnet for writing and simple code.",
1707
+ "When routing priority is fastest_adequate: prefer Haiku unless the task clearly needs Sonnet-level capability."
1708
+ );
1709
+ }
1710
+ return lines.join("\n");
1711
+ }
1712
+ var CLAUDE_TIER_TARGETS = [
1713
+ {
1714
+ provider: "anthropic",
1715
+ model: "claude-haiku-4-5",
1716
+ label: "anthropic:claude-haiku-4-5",
1717
+ capabilities: ["writing", "email", "support", "chat", "summarization"],
1718
+ costRank: 1,
1719
+ latencyRank: 1
1720
+ },
1721
+ {
1722
+ provider: "anthropic",
1723
+ model: "claude-sonnet-4-6",
1724
+ label: "anthropic:claude-sonnet-4-6",
1725
+ capabilities: ["coding", "writing", "agentic", "tool_use", "refactor", "debugging"],
1726
+ costRank: 2,
1727
+ latencyRank: 2
1728
+ },
1729
+ {
1730
+ provider: "anthropic",
1731
+ model: "claude-opus-4-6",
1732
+ label: "anthropic:claude-opus-4-6",
1733
+ capabilities: ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture", "writing"],
1734
+ costRank: 3,
1735
+ latencyRank: 3
1736
+ }
1737
+ ];
1738
+ function isClaudeTiersOnlyTargetSet(targets) {
1739
+ return targets.length >= 2 && targets.every(
1740
+ (t) => t.provider === "anthropic" && /haiku|sonnet|opus/i.test(t.model)
1741
+ );
1742
+ }
1743
+ function selectClaudeTierHeuristic(input, priority, targets) {
1744
+ const haiku = targets.find((t) => /haiku/i.test(t.model)) ?? null;
1745
+ const sonnet = targets.find((t) => /sonnet/i.test(t.model)) ?? null;
1746
+ const opus = targets.find((t) => /opus/i.test(t.model)) ?? null;
1747
+ const task = (input.task ?? "").toLowerCase();
1748
+ const preset = (input.preset ?? "").toLowerCase();
1749
+ const hints = input.targetHints ?? [];
1750
+ const prompt = input.prompt;
1751
+ const isLightweight = ["email", "chat", "support", "summarization"].includes(task) || ["email", "chat", "support", "summarization"].includes(preset) || hints.some((h) => ["email", "support", "chat", "summarization"].includes(h));
1752
+ const needsOpus = /\b(architect|architecture|design system|migration plan|multi.?step|complex.*refactor|long.?horizon|agentic.*plan)\b/i.test(prompt) || hints.includes("architecture") || priority === "best_quality";
1753
+ if (priority === "fastest_adequate") {
1754
+ return isLightweight || !needsOpus ? haiku ?? sonnet : sonnet ?? haiku;
1755
+ }
1756
+ if (needsOpus) {
1757
+ return opus ?? sonnet ?? haiku;
1758
+ }
1759
+ if (isLightweight && priority === "cheapest_adequate") {
1760
+ return haiku ?? sonnet;
1761
+ }
1762
+ return sonnet ?? haiku ?? opus;
1682
1763
  }
1683
1764
  function inferCapabilities(target) {
1684
1765
  const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
@@ -1827,6 +1908,109 @@ function createOptimizer(config = {}) {
1827
1908
  return new PromptOptimizer(config);
1828
1909
  }
1829
1910
 
1911
+ // src/cliMenu.ts
1912
+ var ARROW_UP = "\x1B[A";
1913
+ var ARROW_DOWN = "\x1B[B";
1914
+ var ENTER = "\r";
1915
+ var CTRL_C = "";
1916
+ var ESCAPE = "\x1B";
1917
+ var CLAUDE_TIER_OPTIONS = [
1918
+ {
1919
+ key: "auto",
1920
+ label: "Auto",
1921
+ badge: "recommended",
1922
+ description: "PromptPilot picks the best tier for your prompt"
1923
+ },
1924
+ {
1925
+ key: "haiku",
1926
+ label: "Haiku",
1927
+ badge: "fastest \xB7 cheapest",
1928
+ description: "email, chat, summarization, simple rewrites"
1929
+ },
1930
+ {
1931
+ key: "sonnet",
1932
+ label: "Sonnet",
1933
+ badge: "balanced",
1934
+ description: "coding, debugging, writing, general-purpose"
1935
+ },
1936
+ {
1937
+ key: "opus",
1938
+ label: "Opus",
1939
+ badge: "most capable",
1940
+ description: "architecture, complex reasoning, agentic planning"
1941
+ }
1942
+ ];
1943
+ function renderClaudeMenu(options, selected) {
1944
+ const lines = ["Select Claude model tier:\n"];
1945
+ for (let index = 0; index < options.length; index++) {
1946
+ const opt = options[index];
1947
+ const isSelected = index === selected;
1948
+ const cursor = isSelected ? "\u276F" : " ";
1949
+ const label = isSelected ? `\x1B[1m${opt.label}\x1B[0m` : opt.label;
1950
+ const badge = `\x1B[2m${opt.badge}\x1B[0m`;
1951
+ const desc = `\x1B[2m${opt.description}\x1B[0m`;
1952
+ lines.push(` ${cursor} ${label.padEnd(isSelected ? 14 : 6)} ${badge}`);
1953
+ lines.push(` ${desc}`);
1954
+ }
1955
+ lines.push("\n \x1B[2m\u2191/\u2193 move Enter confirm q cancel\x1B[0m");
1956
+ return lines.join("\n");
1957
+ }
1958
+ async function promptClaudeTierMenu(stderr, stdin) {
1959
+ return new Promise((resolve) => {
1960
+ let selected = 0;
1961
+ const options = CLAUDE_TIER_OPTIONS;
1962
+ const lineCount = options.length * 2 + 3;
1963
+ const draw = (first) => {
1964
+ if (!first) {
1965
+ stderr.write(`\x1B[${lineCount}A`);
1966
+ }
1967
+ stderr.write(`\x1B[?25l${renderClaudeMenu(options, selected)}
1968
+ `);
1969
+ };
1970
+ const cleanup = () => {
1971
+ stderr.write("\x1B[?25h");
1972
+ stdin.setRawMode(false);
1973
+ stdin.pause();
1974
+ stdin.removeListener("data", onData);
1975
+ };
1976
+ const onData = (chunk) => {
1977
+ if (chunk === CTRL_C) {
1978
+ cleanup();
1979
+ process.exit(0);
1980
+ }
1981
+ if (chunk === "q" || chunk === ESCAPE) {
1982
+ cleanup();
1983
+ stderr.write("\n");
1984
+ resolve(null);
1985
+ return;
1986
+ }
1987
+ if (chunk === ARROW_UP) {
1988
+ selected = (selected - 1 + options.length) % options.length;
1989
+ draw(false);
1990
+ return;
1991
+ }
1992
+ if (chunk === ARROW_DOWN) {
1993
+ selected = (selected + 1) % options.length;
1994
+ draw(false);
1995
+ return;
1996
+ }
1997
+ if (chunk === ENTER) {
1998
+ cleanup();
1999
+ stderr.write(`
2000
+ Selected: \x1B[1m${options[selected].label}\x1B[0m
2001
+
2002
+ `);
2003
+ resolve(options[selected].key);
2004
+ }
2005
+ };
2006
+ stdin.setRawMode(true);
2007
+ stdin.resume();
2008
+ stdin.setEncoding("utf8");
2009
+ stdin.on("data", onData);
2010
+ draw(true);
2011
+ });
2012
+ }
2013
+
1830
2014
  // src/cliWelcome.ts
1831
2015
  import { basename } from "path";
1832
2016
  var MIN_WIDE_COLUMNS = 76;
@@ -2087,6 +2271,19 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2087
2271
  io.stderr.write("A prompt is required.\n");
2088
2272
  return 1;
2089
2273
  }
2274
+ let claudeTierChoice = null;
2275
+ if (parsed.autoClaudeTiers && io.stderr.isTTY && io.stdin) {
2276
+ claudeTierChoice = await promptClaudeTierMenu(io.stderr, io.stdin);
2277
+ if (claudeTierChoice === null) {
2278
+ return 0;
2279
+ }
2280
+ }
2281
+ const resolvedTargets = (() => {
2282
+ if (!parsed.autoClaudeTiers) return parsed.targets;
2283
+ if (!claudeTierChoice || claudeTierChoice === "auto") return [...CLAUDE_TIER_TARGETS, ...parsed.targets];
2284
+ const picked = CLAUDE_TIER_TARGETS.find((t) => t.model.toLowerCase().includes(claudeTierChoice));
2285
+ return picked ? [picked, ...parsed.targets] : [...CLAUDE_TIER_TARGETS, ...parsed.targets];
2286
+ })();
2090
2287
  const spinner = createSpinner(io.stderr, io.stderr.isTTY ?? false);
2091
2288
  try {
2092
2289
  spinner.start("optimizing");
@@ -2104,7 +2301,7 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2104
2301
  maxLength: parsed.maxLength,
2105
2302
  tags: parsed.tags,
2106
2303
  pinnedConstraints: parsed.pinnedConstraints,
2107
- availableTargets: parsed.targets,
2304
+ availableTargets: resolvedTargets,
2108
2305
  routingEnabled: parsed.routingEnabled,
2109
2306
  routingPriority: parsed.routingPriority,
2110
2307
  routingTopK: parsed.routingTopK,
@@ -2176,6 +2373,7 @@ function parseOptimizeArgs(args) {
2176
2373
  clearSession: false,
2177
2374
  useContext: true,
2178
2375
  bypassOptimization: false,
2376
+ autoClaudeTiers: false,
2179
2377
  help: false,
2180
2378
  tags: [],
2181
2379
  pinnedConstraints: [],
@@ -2286,6 +2484,9 @@ function parseOptimizeArgs(args) {
2286
2484
  case "--bypass-optimization":
2287
2485
  parsed.bypassOptimization = true;
2288
2486
  break;
2487
+ case "--claude":
2488
+ parsed.autoClaudeTiers = true;
2489
+ break;
2289
2490
  case "--help":
2290
2491
  case "-h":
2291
2492
  parsed.help = true;
@@ -2334,7 +2535,8 @@ function getHelpText() {
2334
2535
  " --max-context-tokens <n>",
2335
2536
  " --max-input-tokens <n>",
2336
2537
  " --timeout <ms>",
2337
- " --bypass-optimization"
2538
+ " --bypass-optimization",
2539
+ " --claude Route between Haiku, Sonnet, and Opus automatically"
2338
2540
  ].join("\n");
2339
2541
  }
2340
2542
  function parseTargetCandidate(raw, index) {