npm - promptpilot - Versions diffs - 0.1.1 → 0.1.2 - Mend

promptpilot 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 `promptpilot` is a lightweight TypeScript npm package that sits between your app or CLI workflow and a target LLM. It rewrites prompts locally through Ollama when available, stores reusable session context, compresses older turns, and emits a Claude-friendly final prompt for shell pipelines or application code.
-It is designed for local-first workflows on machines like an 18 GB MacBook. By default, `promptpilot` inspects your local Ollama installation and auto-selects a small optimization model, preferring `qwen2.5:3b`, `phi3:mini`, and `llama3.2:3b` in that order. The package still lets you override the model manually when needed.
+It is designed for local-first workflows on machines like an 18 GB MacBook. By default, `promptpilot` inspects your local Ollama installation, uses a small local Qwen model as a router when available, and lets that router choose the best installed small optimization model for each prompt. It still lets you override the model manually when needed.
 ## Why local Ollama
@@ -10,6 +10,7 @@ It is designed for local-first workflows on machines like an 18 GB MacBook. By d
 - It reduces external API calls for prompt rewriting.
 - It lets you use a small, fast model for compression before sending the final prompt to a stronger remote model like Claude.
 - It automatically picks an installed local model that fits a low-memory workflow.
+- It uses Qwen to route prompt optimization to the best available small local model when possible.
 ## What it does
@@ -50,7 +51,7 @@ Install directly from a local tarball:
 ```bash
 npm pack
-npm install -g ./promptpilot-0.1.1.tgz
+npm install -g ./promptpilot-0.1.2.tgz
 ```
 ## Library usage
@@ -142,7 +143,7 @@ prompt.stdout.pipe(claude.stdin);
 By default, if you pass a `sessionId`, `promptpilot` stores optimized turns in a local session store. The default store is JSON files under `~/.promptpilot/sessions`. A SQLite store is also available when `node:sqlite` or `better-sqlite3` is present.
-If you do not pass `ollamaModel` or `--model`, `promptpilot` asks Ollama which models are installed and picks the best small model for the job. For most workflows it prefers `qwen2.5:3b`, then `phi3:mini`, then `llama3.2:3b`. For code-heavy prompts it will prefer `qwen2.5-coder:3b` when that model is installed. If only oversized local models are available, it warns and falls back to deterministic heuristic optimization instead of silently using a heavy model.
+If you do not pass `ollamaModel` or `--model`, `promptpilot` asks Ollama which models are installed and lets a small local Qwen router choose the best small optimizer model for the current prompt. It does not statically rank multiple candidate models anymore. If a suitable Qwen router model is not available when multiple small candidates exist, it falls back to deterministic heuristic prompt optimization instead of making a static model-choice guess. If only oversized local models are available, it also falls back to deterministic heuristic optimization instead of silently using a heavy model.
 Each session stores:

package/dist/cli.js CHANGED Viewed

@@ -594,6 +594,11 @@ var ContextManager = class {
     }
     session.updatedAt = timestamp;
     await this.store.saveSession(session);
+    this.logger.debug("context saved", {
+      sessionId: options.sessionId,
+      entryCount: session.entries.length,
+      summaryCount: session.summaries.length
+    });
   }
   async summarizeContext(sessionId, prompt, task, budgetTokens, timeoutMs) {
     const session = await this.store.loadSession(sessionId);
@@ -728,116 +733,47 @@ var DEFAULT_SMALL_MODEL_PREFERENCES = [
 function getDefaultPreferredModels() {
   return [...DEFAULT_SMALL_MODEL_PREFERENCES];
 }
+function getSuitableAutoModels(installedModels) {
+  return installedModels.filter((model) => isSuitableSmallModel(model));
+}
+function getQwenRouterModel(installedModels, explicitRouterModel) {
+  if (explicitRouterModel) {
+    const match = installedModels.find((model) => model.name === explicitRouterModel);
+    return match?.name ?? null;
+  }
+  const qwenRouters = getSuitableAutoModels(installedModels).filter((model) => /qwen/i.test(model.name)).sort((left, right) => scoreRouterModel(right) - scoreRouterModel(left));
+  return qwenRouters[0]?.name ?? null;
+}
 function selectOllamaModel(input) {
-  const preferred = buildPreferredOrder(input);
-  const smallCandidates = input.installedModels.filter((model) => isSuitableSmallModel(model));
-  const preferredMatch = findPreferredMatch(smallCandidates, preferred);
-  if (preferredMatch) {
+  const smallCandidates = getSuitableAutoModels(input.installedModels);
+  if (smallCandidates.length === 1) {
     return {
-      model: preferredMatch,
-      reason: `Selected installed model "${preferredMatch}" from the preferred low-memory order.`,
+      model: smallCandidates[0].name,
+      reason: `Selected installed model "${smallCandidates[0].name}" because it is the only suitable small local model available.`,
       suitableForAutoUse: true
     };
   }
-  const ranked = [...smallCandidates].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
-  if (ranked[0]) {
+  if (smallCandidates.length > 1) {
     return {
-      model: ranked[0].model.name,
-      reason: `Selected installed model "${ranked[0].model.name}" using task-aware ranking.`,
-      suitableForAutoUse: true
+      model: "",
+      reason: `Multiple suitable small local models are available (${smallCandidates.map((model) => model.name).join(", ")}), so a Qwen router must choose between them.`,
+      suitableForAutoUse: false
     };
   }
-  const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
+  const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).sort((left, right) => compareModelNames(left.name, right.name));
   if (oversizedRanked[0]) {
     return {
-      model: oversizedRanked[0].model.name,
-      reason: `Installed model "${oversizedRanked[0].model.name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
+      model: oversizedRanked[0].name,
+      reason: `Installed model "${oversizedRanked[0].name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
       suitableForAutoUse: false
     };
   }
   return {
-    model: preferred[0] ?? "qwen2.5:3b",
-    reason: "No installed Ollama models were discovered, so the default small-model preference was used.",
+    model: "",
+    reason: "No suitable local generation models were discovered for automatic routing.",
     suitableForAutoUse: false
   };
 }
-function buildPreferredOrder(input) {
-  const taskContext = `${input.task ?? ""} ${input.preset} ${input.mode}`.toLowerCase();
-  const configured = (input.preferredModels ?? []).map((model) => model.toLowerCase());
-  if (taskContext.includes("code")) {
-    return uniqueModels([
-      ...configured,
-      "qwen2.5-coder:3b",
-      "qwen2.5:3b",
-      "phi3:mini",
-      "llama3.2:3b",
-      "qwen2.5:1.5b"
-    ]);
-  }
-  if (taskContext.includes("compress") || taskContext.includes("summar")) {
-    return uniqueModels([
-      ...configured,
-      "qwen2.5:3b",
-      "qwen2.5:1.5b",
-      "phi3:mini",
-      "llama3.2:3b"
-    ]);
-  }
-  return uniqueModels([...configured, ...DEFAULT_SMALL_MODEL_PREFERENCES]);
-}
-function uniqueModels(models) {
-  return Array.from(new Set(models));
-}
-function findPreferredMatch(installedModels, preferred) {
-  const installedNames = installedModels.map((model) => model.name);
-  for (const preferredName of preferred) {
-    const direct = installedNames.find((name) => name.toLowerCase() === preferredName);
-    if (direct) {
-      return direct;
-    }
-  }
-  return null;
-}
-function scoreModel(modelName, preset, mode, task) {
-  const lower = modelName.toLowerCase();
-  let score = 0;
-  if (!isUsefulGenerationModel(lower)) {
-    return -100;
-  }
-  if (lower.includes("qwen2.5")) {
-    score += 4;
-  } else if (lower.includes("phi3")) {
-    score += 3.5;
-  } else if (lower.includes("llama3.2")) {
-    score += 3;
-  } else if (lower.includes("mistral")) {
-    score += 2;
-  }
-  const parameterSize = extractBillions(lower);
-  if (parameterSize !== null) {
-    if (parameterSize <= 4) {
-      score += 4;
-    } else if (parameterSize <= 8) {
-      score += 1;
-    } else {
-      score -= 4;
-    }
-  }
-  if (lower.includes("mini")) {
-    score += 2;
-  }
-  if (lower.includes("instruct") || lower.includes("chat")) {
-    score += 1;
-  }
-  const taskContext = `${task ?? ""} ${preset} ${mode}`.toLowerCase();
-  if (taskContext.includes("code") && lower.includes("coder")) {
-    score += 3;
-  }
-  if ((taskContext.includes("compress") || taskContext.includes("summar")) && lower.includes("qwen2.5")) {
-    score += 1;
-  }
-  return score;
-}
 function extractBillions(modelName) {
   const match = modelName.match(/(\d+(?:\.\d+)?)b/);
   if (!match) {
@@ -871,6 +807,25 @@ function isSuitableSmallModel(model) {
   }
   return /mini|1\.5b|2b|3b|4b/i.test(model.name);
 }
+function scoreRouterModel(model) {
+  const lower = model.name.toLowerCase();
+  let score = 0;
+  if (lower.includes("qwen2.5")) {
+    score += 3;
+  }
+  if (lower.includes("3b")) {
+    score += 2;
+  } else if (lower.includes("1.5b")) {
+    score += 1;
+  }
+  if (lower.includes("coder")) {
+    score -= 1;
+  }
+  return score;
+}
+function compareModelNames(left, right) {
+  return left.localeCompare(right);
+}
 // src/core/optimizer.ts
 var DEFAULT_MODE = "claude_cli";
@@ -904,6 +859,7 @@ var PromptOptimizer = class {
       host: config.host ?? "http://localhost:11434",
       ollamaModel: config.ollamaModel,
       preferredModels: config.preferredModels ?? getDefaultPreferredModels(),
+      modelRoutingStrategy: "qwen",
       timeoutMs: config.timeoutMs ?? 3e4,
       temperature: config.temperature ?? 0.1
     };
@@ -949,6 +905,7 @@ var PromptOptimizer = class {
     let providerChanges = [];
     if (provider === "ollama") {
       const modelSelection = await this.resolveOllamaModel({
+        prompt: originalPrompt,
         mode,
         preset,
         task: input.task
@@ -1128,16 +1085,18 @@ var PromptOptimizer = class {
       };
     }
     if (!this.client.listModels) {
-      const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
       return {
-        model: fallback,
-        warnings: [`Model auto-selection is unavailable in the current Ollama client, so "${fallback}" was assumed.`],
-        reason: `Assumed default model "${fallback}" because model discovery is unsupported.`,
-        forceHeuristic: false
+        model: "heuristic",
+        warnings: [
+          "Model auto-selection is unavailable in the current Ollama client, so prompt optimization is falling back to deterministic heuristic formatting."
+        ],
+        reason: "Model discovery is unsupported, so Qwen-based model routing could not run.",
+        forceHeuristic: true
       };
     }
     try {
       const installedModels = await this.client.listModels();
+      const suitableModels = getSuitableAutoModels(installedModels);
       const selection = selectOllamaModel({
         installedModels,
         mode: options.mode,
@@ -1145,7 +1104,7 @@ var PromptOptimizer = class {
         task: options.task,
         preferredModels: this.config.preferredModels
       });
-      if (!selection.suitableForAutoUse) {
+      if (suitableModels.length === 0) {
         return {
           model: selection.model,
           warnings: [
@@ -1156,19 +1115,131 @@ var PromptOptimizer = class {
           forceHeuristic: true
         };
       }
+      if (suitableModels.length === 1) {
+        return {
+          model: selection.model,
+          warnings: [],
+          reason: selection.reason,
+          forceHeuristic: false
+        };
+      }
+      if (this.config.modelRoutingStrategy === "qwen") {
+        const routed = await this.tryQwenModelRouting({
+          prompt: options.prompt,
+          task: options.task,
+          mode: options.mode,
+          preset: options.preset,
+          installedModels,
+          candidateModels: suitableModels.map((model) => model.name),
+          fallbackModel: selection.model
+        });
+        return {
+          model: routed.model,
+          warnings: routed.warnings,
+          reason: routed.reason,
+          forceHeuristic: routed.model === "heuristic"
+        };
+      }
       return {
-        model: selection.model,
-        warnings: installedModels.length === 0 ? [`No installed Ollama models were reported, so "${selection.model}" was chosen as the default preference.`] : [],
-        reason: selection.reason,
-        forceHeuristic: false
+        model: "heuristic",
+        warnings: ["Qwen model routing is required but was disabled, so prompt optimization is falling back to deterministic heuristic formatting."],
+        reason: "Qwen model routing is required but was disabled.",
+        forceHeuristic: true
       };
     } catch {
-      const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
       return {
-        model: fallback,
-        warnings: [`Failed to inspect local Ollama models, so "${fallback}" was chosen as the default preference.`],
-        reason: `Fell back to default model "${fallback}" because model discovery failed.`,
-        forceHeuristic: false
+        model: "heuristic",
+        warnings: [
+          "Failed to inspect local Ollama models, so prompt optimization is falling back to deterministic heuristic formatting."
+        ],
+        reason: "Local Ollama model discovery failed, so Qwen-based model routing could not run.",
+        forceHeuristic: true
+      };
+    }
+  }
+  async tryQwenModelRouting(options) {
+    const routerModel = getQwenRouterModel(
+      options.installedModels,
+      this.config.routerModel
+    );
+    if (!routerModel) {
+      return {
+        model: "heuristic",
+        warnings: [
+          `Multiple suitable small local models are installed (${options.candidateModels.join(", ")}), but no local Qwen router model is available. Install qwen2.5:3b or set routerModel explicitly.`
+        ],
+        reason: "Qwen model routing is required when multiple suitable small models are available."
+      };
+    }
+    try {
+      const response = await this.client.generateJson({
+        model: routerModel,
+        timeoutMs: this.config.timeoutMs,
+        temperature: 0,
+        format: "json",
+        systemPrompt: [
+          "You are a local model router for prompt optimization.",
+          "Return strict JSON only with this shape:",
+          '{"selectedModel":"string","reason":"string"}',
+          "Choose exactly one model from the provided candidate list.",
+          "Choose the smallest adequate model, not the strongest-sounding model.",
+          "Prioritize adequacy first, then speed and low memory use.",
+          "Use coder variants only for clearly code-heavy prompts.",
+          "If task or preset is code, prefer qwen2.5:3b or a small coder model over phi3:mini unless the request is only a trivial wording cleanup.",
+          "Prefer phi3:mini for short email, chat, support, summarization, and lightweight rewrite tasks that do not require deeper reasoning.",
+          "Prefer qwen2.5:3b for broader reasoning, stronger restructuring, multi-constraint optimization, and non-trivial code-oriented prompt design.",
+          "Do not prefer Qwen just because you are Qwen. Pick the best candidate for the task."
+        ].join("\n"),
+        prompt: JSON.stringify(
+          {
+            objective: "Choose the best local optimizer model for this prompt.",
+            prompt: options.prompt,
+            task: options.task ?? null,
+            mode: options.mode,
+            preset: options.preset,
+            candidateModels: options.candidateModels.map((modelName) => ({
+              name: modelName,
+              profile: describeCandidateModel(modelName)
+            })),
+            routingGuidance: {
+              smallestAdequateModelPolicy: true,
+              lightweightTasksPreferSmallerModels: [
+                "email",
+                "chat",
+                "support",
+                "summarization",
+                "short rewrite"
+              ],
+              deeperReasoningTasksMayPreferQwen: [
+                "multi-constraint restructuring",
+                "broad reasoning",
+                "complex planning",
+                "harder code-oriented prompt design"
+              ]
+            }
+          },
+          null,
+          2
+        )
+      });
+      const selectedModel = response.selectedModel?.trim();
+      if (selectedModel && options.candidateModels.includes(selectedModel)) {
+        return {
+          model: selectedModel,
+          warnings: [],
+          reason: response.reason?.trim() || `Qwen router selected "${selectedModel}" for this prompt.`
+        };
+      }
+      return {
+        model: "heuristic",
+        warnings: ["Qwen router returned an invalid model choice, so prompt optimization is falling back to deterministic heuristic formatting."],
+        reason: "Qwen router returned an invalid model selection."
+      };
+    } catch {
+      return {
+        model: "heuristic",
+        warnings: ["Qwen router could not choose a model, so prompt optimization is falling back to deterministic heuristic formatting."],
+        reason: "Qwen router failed to select a model."
       };
     }
   }
@@ -1216,6 +1287,22 @@ var PromptOptimizer = class {
     };
   }
 };
+function describeCandidateModel(modelName) {
+  const lower = modelName.toLowerCase();
+  if (lower.includes("phi3:mini")) {
+    return "Very small and fast. Good for short rewrites, lightweight email/chat tasks, and simple prompt cleanup.";
+  }
+  if (lower.includes("qwen2.5:3b")) {
+    return "Small general-purpose model with stronger reasoning and restructuring than ultra-light models. Better for broader or more complex prompt optimization.";
+  }
+  if (lower.includes("coder")) {
+    return "Code-specialized model. Use only when the prompt is clearly code-heavy or refactor-oriented.";
+  }
+  if (lower.includes("llama3.2:3b")) {
+    return "Small general chat/rewrite model. Reasonable middle option for general tasks.";
+  }
+  return "Local candidate model for prompt optimization.";
+}
 function resolveSessionStore(config) {
   if (typeof config.contextStore === "object" && config.contextStore !== null) {
     return config.contextStore;