npm - promptpilot - Versions diffs - 0.1.7 → 0.2.1 - Mend

promptpilot 0.1.7 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -199,6 +199,7 @@ declare class PromptOptimizer {
     private heuristicOptimize;
     private reduceToBudget;
 }
+declare const CLAUDE_TIER_TARGETS: TargetModelCandidate[];
 declare class InvalidPromptError extends Error {
     constructor(message?: string);
@@ -328,4 +329,4 @@ declare class SQLiteSessionStore implements SessionStore {
 declare function createOptimizer(config?: OptimizerConfig): PromptOptimizer;
 declare function optimizePrompt(input: OptimizePromptInput, config?: OptimizerConfig): Promise<OptimizePromptResult>;
-export { ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RankedTargetCandidate, type RelevantContextResult, type RoutingDecision, type RoutingPriority, SQLiteSessionStore, type SessionData, type SessionStore, type TargetCapability, type TargetModelCandidate, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, type WorkloadBias, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };
+export { CLAUDE_TIER_TARGETS, ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RankedTargetCandidate, type RelevantContextResult, type RoutingDecision, type RoutingPriority, SQLiteSessionStore, type SessionData, type SessionStore, type TargetCapability, type TargetModelCandidate, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, type WorkloadBias, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };

package/dist/index.js CHANGED Viewed

@@ -1383,6 +1383,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
       };
     }
     if (!this.client.listModels) {
+      if (isClaudeTiersOnlyTargetSet(availableTargets)) {
+        const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
+        if (selected) {
+          return {
+            selectedTarget: stripInternalTargetFields(selected),
+            rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (no local Qwen router available)." }],
+            routingReason: "Selected by Claude tier heuristic (no local Qwen router available).",
+            routingWarnings: [],
+            routingProvider: "heuristic"
+          };
+        }
+      }
       return {
         selectedTarget: null,
         rankedTargets: [],
@@ -1407,12 +1419,23 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
           routingProvider: null
         };
       }
+      const claudeTiersOnly = isClaudeTiersOnlyTargetSet(availableTargets);
+      const routingCandidates = claudeTiersOnly ? filterClaudeTierCandidates(availableTargets, options.input, options.routingPriority) : availableTargets;
+      if (routingCandidates.length === 1) {
+        return {
+          selectedTarget: stripInternalTargetFields(routingCandidates[0]),
+          rankedTargets: [{ ...stripInternalTargetFields(routingCandidates[0]), rank: 1, reason: "Selected by Claude tier pre-filter based on prompt signals." }],
+          routingReason: "Selected by Claude tier pre-filter based on prompt signals.",
+          routingWarnings: [],
+          routingProvider: "heuristic"
+        };
+      }
       const response = await this.client.generateJson({
         model: routerModel,
         timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
         temperature: 0,
         format: "json",
-        systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
+        systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias, claudeTiersOnly),
         prompt: JSON.stringify(
           {
             objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
@@ -1424,7 +1447,7 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
             targetHints: options.input.targetHints ?? [],
             workloadBias: options.workloadBias,
             routingPriority: options.routingPriority,
-            candidateTargets: availableTargets.map((target) => ({
+            candidateTargets: routingCandidates.map((target) => ({
               id: target.id,
               provider: target.provider,
               model: target.model,
@@ -1443,7 +1466,7 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
         new Set((response.rankedTargetIds ?? []).map((value) => value.trim()).filter(Boolean))
       ).slice(0, Math.max(1, options.routingTopK));
       const rankedTargets = rankedTargetIds.map((id, index) => {
-        const target = availableTargets.find((candidate) => candidate.id === id);
+        const target = routingCandidates.find((candidate) => candidate.id === id);
         if (!target) {
           return null;
         }
@@ -1454,7 +1477,7 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
         };
       }).filter((value) => value !== null);
       const selectedTargetId = response.selectedTargetId?.trim();
-      const selectedTargetCandidate = (selectedTargetId && availableTargets.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? availableTargets.find(
+      const selectedTargetCandidate = (selectedTargetId && routingCandidates.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? routingCandidates.find(
         (candidate) => candidate.provider === rankedTargets[0].provider && candidate.model === rankedTargets[0].model && candidate.label === rankedTargets[0].label
       ) ?? null : null);
       if (!selectedTargetCandidate || rankedTargets.length === 0) {
@@ -1476,6 +1499,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
         routingProvider: routerModel
       };
     } catch {
+      if (isClaudeTiersOnlyTargetSet(availableTargets)) {
+        const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
+        if (selected) {
+          return {
+            selectedTarget: stripInternalTargetFields(selected),
+            rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (Qwen routing failed)." }],
+            routingReason: "Selected by Claude tier heuristic (Qwen routing failed).",
+            routingWarnings: ["Qwen downstream routing failed; fell back to Claude tier heuristic."],
+            routingProvider: "heuristic"
+          };
+        }
+      }
       return {
         selectedTarget: null,
         rankedTargets: [],
@@ -1624,8 +1659,8 @@ function stripInternalTargetFields(target) {
     latencyRank: target.latencyRank
   };
 }
-function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
-  return [
+function buildDownstreamRoutingSystemPrompt(priority, workloadBias, claudeTiersOnly = false) {
+  const lines = [
     "You are a downstream model router for PromptPilot.",
     "Return strict JSON only with this shape:",
     '{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
@@ -1636,7 +1671,108 @@ function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
     "Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
     "Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
     "Do not invent targets. Do not output prose outside JSON."
-  ].join("\n");
+  ];
+  if (claudeTiersOnly) {
+    lines.push(
+      "You are choosing between Claude model tiers (Haiku, Sonnet, Opus).",
+      "Haiku: fastest and cheapest. ONLY suitable for email, chat, support, summarization, and trivial one-sentence rewrites. Do NOT use Haiku for any coding, debugging, refactoring, or technical tasks.",
+      "Sonnet: balanced cost and capability. The DEFAULT for all coding, debugging, refactoring, writing, and general-purpose tasks. If the prompt mentions code, a file, a module, a bug, or any technical work, choose Sonnet at minimum.",
+      "Opus: most capable and most expensive. Use for complex architecture decisions, multi-constraint agentic planning, system design, long-horizon reasoning, or when the prompt explicitly requires the strongest model.",
+      "When routing priority is cheapest_adequate: Haiku for non-technical lightweight tasks only, Sonnet for anything involving code or technical content, Opus only when clearly necessary.",
+      "When routing priority is best_quality: Opus for all code and reasoning tasks, Sonnet for writing and non-technical tasks.",
+      "When routing priority is fastest_adequate: Haiku only for lightweight non-technical tasks, Sonnet otherwise.",
+      "IMPORTANT: refactor, debug, fix, auth, module, CI, test, and TypeScript are all coding signals \u2014 always choose Sonnet or Opus for these, never Haiku."
+    );
+  }
+  return lines.join("\n");
+}
+var CLAUDE_TIER_TARGETS = [
+  {
+    provider: "anthropic",
+    model: "claude-haiku-4-5",
+    label: "anthropic:claude-haiku-4-5",
+    capabilities: ["writing", "email", "support", "chat", "summarization"],
+    costRank: 1,
+    latencyRank: 1
+  },
+  {
+    provider: "anthropic",
+    model: "claude-sonnet-4-6",
+    label: "anthropic:claude-sonnet-4-6",
+    capabilities: ["coding", "writing", "agentic", "tool_use", "refactor", "debugging"],
+    costRank: 2,
+    latencyRank: 2
+  },
+  {
+    provider: "anthropic",
+    model: "claude-opus-4-6",
+    label: "anthropic:claude-opus-4-6",
+    capabilities: ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture", "writing"],
+    costRank: 3,
+    latencyRank: 3
+  }
+];
+function isClaudeTiersOnlyTargetSet(targets) {
+  return targets.length >= 2 && targets.every(
+    (t) => t.provider === "anthropic" && /haiku|sonnet|opus/i.test(t.model)
+  );
+}
+function isCodeSignal(input) {
+  const task = (input.task ?? "").toLowerCase();
+  const preset = (input.preset ?? "").toLowerCase();
+  const hints = input.targetHints ?? [];
+  return task === "code" || preset === "code" || hints.some((h) => ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture"].includes(h)) || /\b(refactor|debug|fix|auth|module|ci|test|typescript|javascript|function|class|api|endpoint|build|deploy|lint|migration)\b/i.test(input.prompt);
+}
+function isArchitectureSignal(input) {
+  const hints = input.targetHints ?? [];
+  return hints.includes("architecture") || /\b(architect|architecture|design system|migration plan|multi.?step|long.?horizon|agentic.*plan|system design|microservice|monolith)\b/i.test(input.prompt);
+}
+function filterClaudeTierCandidates(targets, input, priority) {
+  if (priority === "best_quality") {
+    const filtered = targets.filter((t) => /opus|sonnet/i.test(t.model));
+    return filtered.length > 0 ? filtered : targets;
+  }
+  if (priority === "cheapest_adequate") {
+    if (isArchitectureSignal(input)) {
+      const filtered2 = targets.filter((t) => /opus|sonnet/i.test(t.model));
+      return filtered2.length > 0 ? filtered2 : targets;
+    }
+    if (isCodeSignal(input)) {
+      const sonnet = targets.find((t) => /sonnet/i.test(t.model));
+      return sonnet ? [sonnet] : targets.filter((t) => !/haiku/i.test(t.model));
+    }
+    const filtered = targets.filter((t) => /haiku|sonnet/i.test(t.model));
+    return filtered.length > 0 ? filtered : targets;
+  }
+  if (priority === "fastest_adequate") {
+    const filtered = targets.filter((t) => !/opus/i.test(t.model));
+    return filtered.length > 0 ? filtered : targets;
+  }
+  return targets;
+}
+function selectClaudeTierHeuristic(input, priority, targets) {
+  const haiku = targets.find((t) => /haiku/i.test(t.model)) ?? null;
+  const sonnet = targets.find((t) => /sonnet/i.test(t.model)) ?? null;
+  const opus = targets.find((t) => /opus/i.test(t.model)) ?? null;
+  const task = (input.task ?? "").toLowerCase();
+  const preset = (input.preset ?? "").toLowerCase();
+  const hints = input.targetHints ?? [];
+  const prompt = input.prompt;
+  const isLightweight = ["email", "chat", "support", "summarization"].includes(task) || ["email", "chat", "support", "summarization"].includes(preset) || hints.some((h) => ["email", "support", "chat", "summarization"].includes(h));
+  const needsOpus = /\b(architect|architecture|design system|migration plan|multi.?step|complex.*refactor|long.?horizon|agentic.*plan)\b/i.test(prompt) || hints.includes("architecture") || priority === "best_quality";
+  const isCodeTask = ["code"].includes(task) || ["code"].includes(preset) || hints.some((h) => ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture"].includes(h)) || /\b(refactor|debug|fix|auth|module|ci|test|typescript|javascript|function|class|api|endpoint)\b/i.test(prompt);
+  if (priority === "fastest_adequate") {
+    if (needsOpus) return opus ?? sonnet;
+    if (isCodeTask) return sonnet ?? opus;
+    return haiku ?? sonnet;
+  }
+  if (priority === "best_quality") {
+    return opus ?? sonnet ?? haiku;
+  }
+  if (needsOpus) return opus ?? sonnet;
+  if (isCodeTask) return sonnet ?? opus;
+  if (isLightweight) return haiku ?? sonnet;
+  return sonnet ?? haiku ?? opus;
 }
 function inferCapabilities(target) {
   const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
@@ -1789,6 +1925,7 @@ async function optimizePrompt(input, config = {}) {
   return optimizer.optimize(input);
 }
 export {
+  CLAUDE_TIER_TARGETS,
   ContextCompressor,
   ContextManager,
   ContextStoreError,