npm - promptpilot - Versions diffs - 0.1.7 → 0.1.8 - Mend

promptpilot 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/cli.js CHANGED Viewed

@@ -1417,6 +1417,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
       };
     }
     if (!this.client.listModels) {
+      if (isClaudeTiersOnlyTargetSet(availableTargets)) {
+        const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
+        if (selected) {
+          return {
+            selectedTarget: stripInternalTargetFields(selected),
+            rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (no local Qwen router available)." }],
+            routingReason: "Selected by Claude tier heuristic (no local Qwen router available).",
+            routingWarnings: [],
+            routingProvider: "heuristic"
+          };
+        }
+      }
       return {
         selectedTarget: null,
         rankedTargets: [],
@@ -1441,12 +1453,13 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
           routingProvider: null
         };
       }
+      const claudeTiersOnly = isClaudeTiersOnlyTargetSet(availableTargets);
       const response = await this.client.generateJson({
         model: routerModel,
         timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
         temperature: 0,
         format: "json",
-        systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
+        systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias, claudeTiersOnly),
         prompt: JSON.stringify(
           {
             objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
@@ -1510,6 +1523,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
         routingProvider: routerModel
       };
     } catch {
+      if (isClaudeTiersOnlyTargetSet(availableTargets)) {
+        const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
+        if (selected) {
+          return {
+            selectedTarget: stripInternalTargetFields(selected),
+            rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (Qwen routing failed)." }],
+            routingReason: "Selected by Claude tier heuristic (Qwen routing failed).",
+            routingWarnings: ["Qwen downstream routing failed; fell back to Claude tier heuristic."],
+            routingProvider: "heuristic"
+          };
+        }
+      }
       return {
         selectedTarget: null,
         rankedTargets: [],
@@ -1658,8 +1683,8 @@ function stripInternalTargetFields(target) {
     latencyRank: target.latencyRank
   };
 }
-function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
-  return [
+function buildDownstreamRoutingSystemPrompt(priority, workloadBias, claudeTiersOnly = false) {
+  const lines = [
     "You are a downstream model router for PromptPilot.",
     "Return strict JSON only with this shape:",
     '{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
@@ -1670,7 +1695,71 @@ function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
     "Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
     "Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
     "Do not invent targets. Do not output prose outside JSON."
-  ].join("\n");
+  ];
+  if (claudeTiersOnly) {
+    lines.push(
+      "You are choosing between Claude model tiers (Haiku, Sonnet, Opus).",
+      "Haiku: fastest and cheapest. Best for email, chat, support, summarization, and simple rewrites. Avoid for deep coding or multi-step reasoning.",
+      "Sonnet: balanced cost and capability. Best for coding, debugging, refactoring, writing, and general-purpose tasks. The default for most prompts.",
+      "Opus: most capable and most expensive. Reserve for complex architecture decisions, multi-constraint agentic planning, long-horizon reasoning, or prompts that clearly require the strongest model.",
+      "When routing priority is cheapest_adequate: prefer Haiku for lightweight tasks, Sonnet for most code and writing tasks, and Opus only when clearly necessary.",
+      "When routing priority is best_quality: prefer Opus for code and reasoning, Sonnet for writing and simple code.",
+      "When routing priority is fastest_adequate: prefer Haiku unless the task clearly needs Sonnet-level capability."
+    );
+  }
+  return lines.join("\n");
+}
+var CLAUDE_TIER_TARGETS = [
+  {
+    provider: "anthropic",
+    model: "claude-haiku-4-5",
+    label: "anthropic:claude-haiku-4-5",
+    capabilities: ["writing", "email", "support", "chat", "summarization"],
+    costRank: 1,
+    latencyRank: 1
+  },
+  {
+    provider: "anthropic",
+    model: "claude-sonnet-4-6",
+    label: "anthropic:claude-sonnet-4-6",
+    capabilities: ["coding", "writing", "agentic", "tool_use", "refactor", "debugging"],
+    costRank: 2,
+    latencyRank: 2
+  },
+  {
+    provider: "anthropic",
+    model: "claude-opus-4-6",
+    label: "anthropic:claude-opus-4-6",
+    capabilities: ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture", "writing"],
+    costRank: 3,
+    latencyRank: 3
+  }
+];
+function isClaudeTiersOnlyTargetSet(targets) {
+  return targets.length >= 2 && targets.every(
+    (t) => t.provider === "anthropic" && /haiku|sonnet|opus/i.test(t.model)
+  );
+}
+function selectClaudeTierHeuristic(input, priority, targets) {
+  const haiku = targets.find((t) => /haiku/i.test(t.model)) ?? null;
+  const sonnet = targets.find((t) => /sonnet/i.test(t.model)) ?? null;
+  const opus = targets.find((t) => /opus/i.test(t.model)) ?? null;
+  const task = (input.task ?? "").toLowerCase();
+  const preset = (input.preset ?? "").toLowerCase();
+  const hints = input.targetHints ?? [];
+  const prompt = input.prompt;
+  const isLightweight = ["email", "chat", "support", "summarization"].includes(task) || ["email", "chat", "support", "summarization"].includes(preset) || hints.some((h) => ["email", "support", "chat", "summarization"].includes(h));
+  const needsOpus = /\b(architect|architecture|design system|migration plan|multi.?step|complex.*refactor|long.?horizon|agentic.*plan)\b/i.test(prompt) || hints.includes("architecture") || priority === "best_quality";
+  if (priority === "fastest_adequate") {
+    return isLightweight || !needsOpus ? haiku ?? sonnet : sonnet ?? haiku;
+  }
+  if (needsOpus) {
+    return opus ?? sonnet ?? haiku;
+  }
+  if (isLightweight && priority === "cheapest_adequate") {
+    return haiku ?? sonnet;
+  }
+  return sonnet ?? haiku ?? opus;
 }
 function inferCapabilities(target) {
   const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
@@ -1819,6 +1908,109 @@ function createOptimizer(config = {}) {
   return new PromptOptimizer(config);
 }
+// src/cliMenu.ts
+var ARROW_UP = "\x1B[A";
+var ARROW_DOWN = "\x1B[B";
+var ENTER = "\r";
+var CTRL_C = "";
+var ESCAPE = "\x1B";
+var CLAUDE_TIER_OPTIONS = [
+  {
+    key: "auto",
+    label: "Auto",
+    badge: "recommended",
+    description: "PromptPilot picks the best tier for your prompt"
+  },
+  {
+    key: "haiku",
+    label: "Haiku",
+    badge: "fastest \xB7 cheapest",
+    description: "email, chat, summarization, simple rewrites"
+  },
+  {
+    key: "sonnet",
+    label: "Sonnet",
+    badge: "balanced",
+    description: "coding, debugging, writing, general-purpose"
+  },
+  {
+    key: "opus",
+    label: "Opus",
+    badge: "most capable",
+    description: "architecture, complex reasoning, agentic planning"
+  }
+];
+function renderClaudeMenu(options, selected) {
+  const lines = ["Select Claude model tier:\n"];
+  for (let index = 0; index < options.length; index++) {
+    const opt = options[index];
+    const isSelected = index === selected;
+    const cursor = isSelected ? "\u276F" : " ";
+    const label = isSelected ? `\x1B[1m${opt.label}\x1B[0m` : opt.label;
+    const badge = `\x1B[2m${opt.badge}\x1B[0m`;
+    const desc = `\x1B[2m${opt.description}\x1B[0m`;
+    lines.push(`  ${cursor} ${label.padEnd(isSelected ? 14 : 6)}  ${badge}`);
+    lines.push(`      ${desc}`);
+  }
+  lines.push("\n  \x1B[2m\u2191/\u2193 move  Enter confirm  q cancel\x1B[0m");
+  return lines.join("\n");
+}
+async function promptClaudeTierMenu(stderr, stdin) {
+  return new Promise((resolve) => {
+    let selected = 0;
+    const options = CLAUDE_TIER_OPTIONS;
+    const lineCount = options.length * 2 + 3;
+    const draw = (first) => {
+      if (!first) {
+        stderr.write(`\x1B[${lineCount}A`);
+      }
+      stderr.write(`\x1B[?25l${renderClaudeMenu(options, selected)}
+`);
+    };
+    const cleanup = () => {
+      stderr.write("\x1B[?25h");
+      stdin.setRawMode(false);
+      stdin.pause();
+      stdin.removeListener("data", onData);
+    };
+    const onData = (chunk) => {
+      if (chunk === CTRL_C) {
+        cleanup();
+        process.exit(0);
+      }
+      if (chunk === "q" || chunk === ESCAPE) {
+        cleanup();
+        stderr.write("\n");
+        resolve(null);
+        return;
+      }
+      if (chunk === ARROW_UP) {
+        selected = (selected - 1 + options.length) % options.length;
+        draw(false);
+        return;
+      }
+      if (chunk === ARROW_DOWN) {
+        selected = (selected + 1) % options.length;
+        draw(false);
+        return;
+      }
+      if (chunk === ENTER) {
+        cleanup();
+        stderr.write(`
+  Selected: \x1B[1m${options[selected].label}\x1B[0m
+`);
+        resolve(options[selected].key);
+      }
+    };
+    stdin.setRawMode(true);
+    stdin.resume();
+    stdin.setEncoding("utf8");
+    stdin.on("data", onData);
+    draw(true);
+  });
+}
 // src/cliWelcome.ts
 import { basename } from "path";
 var MIN_WIDE_COLUMNS = 76;
@@ -2079,6 +2271,19 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
     io.stderr.write("A prompt is required.\n");
     return 1;
   }
+  let claudeTierChoice = null;
+  if (parsed.autoClaudeTiers && io.stderr.isTTY && io.stdin) {
+    claudeTierChoice = await promptClaudeTierMenu(io.stderr, io.stdin);
+    if (claudeTierChoice === null) {
+      return 0;
+    }
+  }
+  const resolvedTargets = (() => {
+    if (!parsed.autoClaudeTiers) return parsed.targets;
+    if (!claudeTierChoice || claudeTierChoice === "auto") return [...CLAUDE_TIER_TARGETS, ...parsed.targets];
+    const picked = CLAUDE_TIER_TARGETS.find((t) => t.model.toLowerCase().includes(claudeTierChoice));
+    return picked ? [picked, ...parsed.targets] : [...CLAUDE_TIER_TARGETS, ...parsed.targets];
+  })();
   const spinner = createSpinner(io.stderr, io.stderr.isTTY ?? false);
   try {
     spinner.start("optimizing");
@@ -2096,7 +2301,7 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
       maxLength: parsed.maxLength,
       tags: parsed.tags,
       pinnedConstraints: parsed.pinnedConstraints,
-      availableTargets: parsed.targets,
+      availableTargets: resolvedTargets,
       routingEnabled: parsed.routingEnabled,
       routingPriority: parsed.routingPriority,
       routingTopK: parsed.routingTopK,
@@ -2168,6 +2373,7 @@ function parseOptimizeArgs(args) {
     clearSession: false,
     useContext: true,
     bypassOptimization: false,
+    autoClaudeTiers: false,
     help: false,
     tags: [],
     pinnedConstraints: [],
@@ -2278,6 +2484,9 @@ function parseOptimizeArgs(args) {
       case "--bypass-optimization":
         parsed.bypassOptimization = true;
         break;
+      case "--claude":
+        parsed.autoClaudeTiers = true;
+        break;
       case "--help":
       case "-h":
         parsed.help = true;
@@ -2326,7 +2535,8 @@ function getHelpText() {
     "  --max-context-tokens <n>",
     "  --max-input-tokens <n>",
     "  --timeout <ms>",
-    "  --bypass-optimization"
+    "  --bypass-optimization",
+    "  --claude              Route between Haiku, Sonnet, and Opus automatically"
   ].join("\n");
 }
 function parseTargetCandidate(raw, index) {