npm - promptpilot - Versions diffs - 0.1.1 → 0.1.2 - Mend

promptpilot 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js CHANGED Viewed

@@ -561,6 +561,11 @@ var ContextManager = class {
     }
     session.updatedAt = timestamp;
     await this.store.saveSession(session);
+    this.logger.debug("context saved", {
+      sessionId: options.sessionId,
+      entryCount: session.entries.length,
+      summaryCount: session.summaries.length
+    });
   }
   async summarizeContext(sessionId, prompt, task, budgetTokens, timeoutMs) {
     const session = await this.store.loadSession(sessionId);
@@ -695,116 +700,47 @@ var DEFAULT_SMALL_MODEL_PREFERENCES = [
 function getDefaultPreferredModels() {
   return [...DEFAULT_SMALL_MODEL_PREFERENCES];
 }
+function getSuitableAutoModels(installedModels) {
+  return installedModels.filter((model) => isSuitableSmallModel(model));
+}
+function getQwenRouterModel(installedModels, explicitRouterModel) {
+  if (explicitRouterModel) {
+    const match = installedModels.find((model) => model.name === explicitRouterModel);
+    return match?.name ?? null;
+  }
+  const qwenRouters = getSuitableAutoModels(installedModels).filter((model) => /qwen/i.test(model.name)).sort((left, right) => scoreRouterModel(right) - scoreRouterModel(left));
+  return qwenRouters[0]?.name ?? null;
+}
 function selectOllamaModel(input) {
-  const preferred = buildPreferredOrder(input);
-  const smallCandidates = input.installedModels.filter((model) => isSuitableSmallModel(model));
-  const preferredMatch = findPreferredMatch(smallCandidates, preferred);
-  if (preferredMatch) {
+  const smallCandidates = getSuitableAutoModels(input.installedModels);
+  if (smallCandidates.length === 1) {
     return {
-      model: preferredMatch,
-      reason: `Selected installed model "${preferredMatch}" from the preferred low-memory order.`,
+      model: smallCandidates[0].name,
+      reason: `Selected installed model "${smallCandidates[0].name}" because it is the only suitable small local model available.`,
       suitableForAutoUse: true
     };
   }
-  const ranked = [...smallCandidates].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
-  if (ranked[0]) {
+  if (smallCandidates.length > 1) {
     return {
-      model: ranked[0].model.name,
-      reason: `Selected installed model "${ranked[0].model.name}" using task-aware ranking.`,
-      suitableForAutoUse: true
+      model: "",
+      reason: `Multiple suitable small local models are available (${smallCandidates.map((model) => model.name).join(", ")}), so a Qwen router must choose between them.`,
+      suitableForAutoUse: false
     };
   }
-  const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
+  const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).sort((left, right) => compareModelNames(left.name, right.name));
   if (oversizedRanked[0]) {
     return {
-      model: oversizedRanked[0].model.name,
-      reason: `Installed model "${oversizedRanked[0].model.name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
+      model: oversizedRanked[0].name,
+      reason: `Installed model "${oversizedRanked[0].name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
       suitableForAutoUse: false
     };
   }
   return {
-    model: preferred[0] ?? "qwen2.5:3b",
-    reason: "No installed Ollama models were discovered, so the default small-model preference was used.",
+    model: "",
+    reason: "No suitable local generation models were discovered for automatic routing.",
     suitableForAutoUse: false
   };
 }
-function buildPreferredOrder(input) {
-  const taskContext = `${input.task ?? ""} ${input.preset} ${input.mode}`.toLowerCase();
-  const configured = (input.preferredModels ?? []).map((model) => model.toLowerCase());
-  if (taskContext.includes("code")) {
-    return uniqueModels([
-      ...configured,
-      "qwen2.5-coder:3b",
-      "qwen2.5:3b",
-      "phi3:mini",
-      "llama3.2:3b",
-      "qwen2.5:1.5b"
-    ]);
-  }
-  if (taskContext.includes("compress") || taskContext.includes("summar")) {
-    return uniqueModels([
-      ...configured,
-      "qwen2.5:3b",
-      "qwen2.5:1.5b",
-      "phi3:mini",
-      "llama3.2:3b"
-    ]);
-  }
-  return uniqueModels([...configured, ...DEFAULT_SMALL_MODEL_PREFERENCES]);
-}
-function uniqueModels(models) {
-  return Array.from(new Set(models));
-}
-function findPreferredMatch(installedModels, preferred) {
-  const installedNames = installedModels.map((model) => model.name);
-  for (const preferredName of preferred) {
-    const direct = installedNames.find((name) => name.toLowerCase() === preferredName);
-    if (direct) {
-      return direct;
-    }
-  }
-  return null;
-}
-function scoreModel(modelName, preset, mode, task) {
-  const lower = modelName.toLowerCase();
-  let score = 0;
-  if (!isUsefulGenerationModel(lower)) {
-    return -100;
-  }
-  if (lower.includes("qwen2.5")) {
-    score += 4;
-  } else if (lower.includes("phi3")) {
-    score += 3.5;
-  } else if (lower.includes("llama3.2")) {
-    score += 3;
-  } else if (lower.includes("mistral")) {
-    score += 2;
-  }
-  const parameterSize = extractBillions(lower);
-  if (parameterSize !== null) {
-    if (parameterSize <= 4) {
-      score += 4;
-    } else if (parameterSize <= 8) {
-      score += 1;
-    } else {
-      score -= 4;
-    }
-  }
-  if (lower.includes("mini")) {
-    score += 2;
-  }
-  if (lower.includes("instruct") || lower.includes("chat")) {
-    score += 1;
-  }
-  const taskContext = `${task ?? ""} ${preset} ${mode}`.toLowerCase();
-  if (taskContext.includes("code") && lower.includes("coder")) {
-    score += 3;
-  }
-  if ((taskContext.includes("compress") || taskContext.includes("summar")) && lower.includes("qwen2.5")) {
-    score += 1;
-  }
-  return score;
-}
 function extractBillions(modelName) {
   const match = modelName.match(/(\d+(?:\.\d+)?)b/);
   if (!match) {
@@ -838,6 +774,25 @@ function isSuitableSmallModel(model) {
   }
   return /mini|1\.5b|2b|3b|4b/i.test(model.name);
 }
+function scoreRouterModel(model) {
+  const lower = model.name.toLowerCase();
+  let score = 0;
+  if (lower.includes("qwen2.5")) {
+    score += 3;
+  }
+  if (lower.includes("3b")) {
+    score += 2;
+  } else if (lower.includes("1.5b")) {
+    score += 1;
+  }
+  if (lower.includes("coder")) {
+    score -= 1;
+  }
+  return score;
+}
+function compareModelNames(left, right) {
+  return left.localeCompare(right);
+}
 // src/core/optimizer.ts
 var DEFAULT_MODE = "claude_cli";
@@ -871,6 +826,7 @@ var PromptOptimizer = class {
       host: config.host ?? "http://localhost:11434",
       ollamaModel: config.ollamaModel,
       preferredModels: config.preferredModels ?? getDefaultPreferredModels(),
+      modelRoutingStrategy: "qwen",
       timeoutMs: config.timeoutMs ?? 3e4,
       temperature: config.temperature ?? 0.1
     };
@@ -916,6 +872,7 @@ var PromptOptimizer = class {
     let providerChanges = [];
     if (provider === "ollama") {
       const modelSelection = await this.resolveOllamaModel({
+        prompt: originalPrompt,
         mode,
         preset,
         task: input.task
@@ -1095,16 +1052,18 @@ var PromptOptimizer = class {
       };
     }
     if (!this.client.listModels) {
-      const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
       return {
-        model: fallback,
-        warnings: [`Model auto-selection is unavailable in the current Ollama client, so "${fallback}" was assumed.`],
-        reason: `Assumed default model "${fallback}" because model discovery is unsupported.`,
-        forceHeuristic: false
+        model: "heuristic",
+        warnings: [
+          "Model auto-selection is unavailable in the current Ollama client, so prompt optimization is falling back to deterministic heuristic formatting."
+        ],
+        reason: "Model discovery is unsupported, so Qwen-based model routing could not run.",
+        forceHeuristic: true
       };
     }
     try {
       const installedModels = await this.client.listModels();
+      const suitableModels = getSuitableAutoModels(installedModels);
       const selection = selectOllamaModel({
         installedModels,
         mode: options.mode,
@@ -1112,7 +1071,7 @@ var PromptOptimizer = class {
         task: options.task,
         preferredModels: this.config.preferredModels
       });
-      if (!selection.suitableForAutoUse) {
+      if (suitableModels.length === 0) {
         return {
           model: selection.model,
           warnings: [
@@ -1123,19 +1082,131 @@ var PromptOptimizer = class {
           forceHeuristic: true
         };
       }
+      if (suitableModels.length === 1) {
+        return {
+          model: selection.model,
+          warnings: [],
+          reason: selection.reason,
+          forceHeuristic: false
+        };
+      }
+      if (this.config.modelRoutingStrategy === "qwen") {
+        const routed = await this.tryQwenModelRouting({
+          prompt: options.prompt,
+          task: options.task,
+          mode: options.mode,
+          preset: options.preset,
+          installedModels,
+          candidateModels: suitableModels.map((model) => model.name),
+          fallbackModel: selection.model
+        });
+        return {
+          model: routed.model,
+          warnings: routed.warnings,
+          reason: routed.reason,
+          forceHeuristic: routed.model === "heuristic"
+        };
+      }
       return {
-        model: selection.model,
-        warnings: installedModels.length === 0 ? [`No installed Ollama models were reported, so "${selection.model}" was chosen as the default preference.`] : [],
-        reason: selection.reason,
-        forceHeuristic: false
+        model: "heuristic",
+        warnings: ["Qwen model routing is required but was disabled, so prompt optimization is falling back to deterministic heuristic formatting."],
+        reason: "Qwen model routing is required but was disabled.",
+        forceHeuristic: true
       };
     } catch {
-      const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
       return {
-        model: fallback,
-        warnings: [`Failed to inspect local Ollama models, so "${fallback}" was chosen as the default preference.`],
-        reason: `Fell back to default model "${fallback}" because model discovery failed.`,
-        forceHeuristic: false
+        model: "heuristic",
+        warnings: [
+          "Failed to inspect local Ollama models, so prompt optimization is falling back to deterministic heuristic formatting."
+        ],
+        reason: "Local Ollama model discovery failed, so Qwen-based model routing could not run.",
+        forceHeuristic: true
+      };
+    }
+  }
+  async tryQwenModelRouting(options) {
+    const routerModel = getQwenRouterModel(
+      options.installedModels,
+      this.config.routerModel
+    );
+    if (!routerModel) {
+      return {
+        model: "heuristic",
+        warnings: [
+          `Multiple suitable small local models are installed (${options.candidateModels.join(", ")}), but no local Qwen router model is available. Install qwen2.5:3b or set routerModel explicitly.`
+        ],
+        reason: "Qwen model routing is required when multiple suitable small models are available."
+      };
+    }
+    try {
+      const response = await this.client.generateJson({
+        model: routerModel,
+        timeoutMs: this.config.timeoutMs,
+        temperature: 0,
+        format: "json",
+        systemPrompt: [
+          "You are a local model router for prompt optimization.",
+          "Return strict JSON only with this shape:",
+          '{"selectedModel":"string","reason":"string"}',
+          "Choose exactly one model from the provided candidate list.",
+          "Choose the smallest adequate model, not the strongest-sounding model.",
+          "Prioritize adequacy first, then speed and low memory use.",
+          "Use coder variants only for clearly code-heavy prompts.",
+          "If task or preset is code, prefer qwen2.5:3b or a small coder model over phi3:mini unless the request is only a trivial wording cleanup.",
+          "Prefer phi3:mini for short email, chat, support, summarization, and lightweight rewrite tasks that do not require deeper reasoning.",
+          "Prefer qwen2.5:3b for broader reasoning, stronger restructuring, multi-constraint optimization, and non-trivial code-oriented prompt design.",
+          "Do not prefer Qwen just because you are Qwen. Pick the best candidate for the task."
+        ].join("\n"),
+        prompt: JSON.stringify(
+          {
+            objective: "Choose the best local optimizer model for this prompt.",
+            prompt: options.prompt,
+            task: options.task ?? null,
+            mode: options.mode,
+            preset: options.preset,
+            candidateModels: options.candidateModels.map((modelName) => ({
+              name: modelName,
+              profile: describeCandidateModel(modelName)
+            })),
+            routingGuidance: {
+              smallestAdequateModelPolicy: true,
+              lightweightTasksPreferSmallerModels: [
+                "email",
+                "chat",
+                "support",
+                "summarization",
+                "short rewrite"
+              ],
+              deeperReasoningTasksMayPreferQwen: [
+                "multi-constraint restructuring",
+                "broad reasoning",
+                "complex planning",
+                "harder code-oriented prompt design"
+              ]
+            }
+          },
+          null,
+          2
+        )
+      });
+      const selectedModel = response.selectedModel?.trim();
+      if (selectedModel && options.candidateModels.includes(selectedModel)) {
+        return {
+          model: selectedModel,
+          warnings: [],
+          reason: response.reason?.trim() || `Qwen router selected "${selectedModel}" for this prompt.`
+        };
+      }
+      return {
+        model: "heuristic",
+        warnings: ["Qwen router returned an invalid model choice, so prompt optimization is falling back to deterministic heuristic formatting."],
+        reason: "Qwen router returned an invalid model selection."
+      };
+    } catch {
+      return {
+        model: "heuristic",
+        warnings: ["Qwen router could not choose a model, so prompt optimization is falling back to deterministic heuristic formatting."],
+        reason: "Qwen router failed to select a model."
       };
     }
   }
@@ -1183,6 +1254,22 @@ var PromptOptimizer = class {
     };
   }
 };
+function describeCandidateModel(modelName) {
+  const lower = modelName.toLowerCase();
+  if (lower.includes("phi3:mini")) {
+    return "Very small and fast. Good for short rewrites, lightweight email/chat tasks, and simple prompt cleanup.";
+  }
+  if (lower.includes("qwen2.5:3b")) {
+    return "Small general-purpose model with stronger reasoning and restructuring than ultra-light models. Better for broader or more complex prompt optimization.";
+  }
+  if (lower.includes("coder")) {
+    return "Code-specialized model. Use only when the prompt is clearly code-heavy or refactor-oriented.";
+  }
+  if (lower.includes("llama3.2:3b")) {
+    return "Small general chat/rewrite model. Reasonable middle option for general tasks.";
+  }
+  return "Local candidate model for prompt optimization.";
+}
 function resolveSessionStore(config) {
   if (typeof config.contextStore === "object" && config.contextStore !== null) {
     return config.contextStore;