promptpilot 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -561,6 +561,11 @@ var ContextManager = class {
561
561
  }
562
562
  session.updatedAt = timestamp;
563
563
  await this.store.saveSession(session);
564
+ this.logger.debug("context saved", {
565
+ sessionId: options.sessionId,
566
+ entryCount: session.entries.length,
567
+ summaryCount: session.summaries.length
568
+ });
564
569
  }
565
570
  async summarizeContext(sessionId, prompt, task, budgetTokens, timeoutMs) {
566
571
  const session = await this.store.loadSession(sessionId);
@@ -695,116 +700,47 @@ var DEFAULT_SMALL_MODEL_PREFERENCES = [
695
700
  function getDefaultPreferredModels() {
696
701
  return [...DEFAULT_SMALL_MODEL_PREFERENCES];
697
702
  }
703
+ function getSuitableAutoModels(installedModels) {
704
+ return installedModels.filter((model) => isSuitableSmallModel(model));
705
+ }
706
+ function getQwenRouterModel(installedModels, explicitRouterModel) {
707
+ if (explicitRouterModel) {
708
+ const match = installedModels.find((model) => model.name === explicitRouterModel);
709
+ return match?.name ?? null;
710
+ }
711
+ const qwenRouters = getSuitableAutoModels(installedModels).filter((model) => /qwen/i.test(model.name)).sort((left, right) => scoreRouterModel(right) - scoreRouterModel(left));
712
+ return qwenRouters[0]?.name ?? null;
713
+ }
698
714
  function selectOllamaModel(input) {
699
- const preferred = buildPreferredOrder(input);
700
- const smallCandidates = input.installedModels.filter((model) => isSuitableSmallModel(model));
701
- const preferredMatch = findPreferredMatch(smallCandidates, preferred);
702
- if (preferredMatch) {
715
+ const smallCandidates = getSuitableAutoModels(input.installedModels);
716
+ if (smallCandidates.length === 1) {
703
717
  return {
704
- model: preferredMatch,
705
- reason: `Selected installed model "${preferredMatch}" from the preferred low-memory order.`,
718
+ model: smallCandidates[0].name,
719
+ reason: `Selected installed model "${smallCandidates[0].name}" because it is the only suitable small local model available.`,
706
720
  suitableForAutoUse: true
707
721
  };
708
722
  }
709
- const ranked = [...smallCandidates].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
710
- if (ranked[0]) {
723
+ if (smallCandidates.length > 1) {
711
724
  return {
712
- model: ranked[0].model.name,
713
- reason: `Selected installed model "${ranked[0].model.name}" using task-aware ranking.`,
714
- suitableForAutoUse: true
725
+ model: "",
726
+ reason: `Multiple suitable small local models are available (${smallCandidates.map((model) => model.name).join(", ")}), so a Qwen router must choose between them.`,
727
+ suitableForAutoUse: false
715
728
  };
716
729
  }
717
- const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
730
+ const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).sort((left, right) => compareModelNames(left.name, right.name));
718
731
  if (oversizedRanked[0]) {
719
732
  return {
720
- model: oversizedRanked[0].model.name,
721
- reason: `Installed model "${oversizedRanked[0].model.name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
733
+ model: oversizedRanked[0].name,
734
+ reason: `Installed model "${oversizedRanked[0].name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
722
735
  suitableForAutoUse: false
723
736
  };
724
737
  }
725
738
  return {
726
- model: preferred[0] ?? "qwen2.5:3b",
727
- reason: "No installed Ollama models were discovered, so the default small-model preference was used.",
739
+ model: "",
740
+ reason: "No suitable local generation models were discovered for automatic routing.",
728
741
  suitableForAutoUse: false
729
742
  };
730
743
  }
731
- function buildPreferredOrder(input) {
732
- const taskContext = `${input.task ?? ""} ${input.preset} ${input.mode}`.toLowerCase();
733
- const configured = (input.preferredModels ?? []).map((model) => model.toLowerCase());
734
- if (taskContext.includes("code")) {
735
- return uniqueModels([
736
- ...configured,
737
- "qwen2.5-coder:3b",
738
- "qwen2.5:3b",
739
- "phi3:mini",
740
- "llama3.2:3b",
741
- "qwen2.5:1.5b"
742
- ]);
743
- }
744
- if (taskContext.includes("compress") || taskContext.includes("summar")) {
745
- return uniqueModels([
746
- ...configured,
747
- "qwen2.5:3b",
748
- "qwen2.5:1.5b",
749
- "phi3:mini",
750
- "llama3.2:3b"
751
- ]);
752
- }
753
- return uniqueModels([...configured, ...DEFAULT_SMALL_MODEL_PREFERENCES]);
754
- }
755
- function uniqueModels(models) {
756
- return Array.from(new Set(models));
757
- }
758
- function findPreferredMatch(installedModels, preferred) {
759
- const installedNames = installedModels.map((model) => model.name);
760
- for (const preferredName of preferred) {
761
- const direct = installedNames.find((name) => name.toLowerCase() === preferredName);
762
- if (direct) {
763
- return direct;
764
- }
765
- }
766
- return null;
767
- }
768
- function scoreModel(modelName, preset, mode, task) {
769
- const lower = modelName.toLowerCase();
770
- let score = 0;
771
- if (!isUsefulGenerationModel(lower)) {
772
- return -100;
773
- }
774
- if (lower.includes("qwen2.5")) {
775
- score += 4;
776
- } else if (lower.includes("phi3")) {
777
- score += 3.5;
778
- } else if (lower.includes("llama3.2")) {
779
- score += 3;
780
- } else if (lower.includes("mistral")) {
781
- score += 2;
782
- }
783
- const parameterSize = extractBillions(lower);
784
- if (parameterSize !== null) {
785
- if (parameterSize <= 4) {
786
- score += 4;
787
- } else if (parameterSize <= 8) {
788
- score += 1;
789
- } else {
790
- score -= 4;
791
- }
792
- }
793
- if (lower.includes("mini")) {
794
- score += 2;
795
- }
796
- if (lower.includes("instruct") || lower.includes("chat")) {
797
- score += 1;
798
- }
799
- const taskContext = `${task ?? ""} ${preset} ${mode}`.toLowerCase();
800
- if (taskContext.includes("code") && lower.includes("coder")) {
801
- score += 3;
802
- }
803
- if ((taskContext.includes("compress") || taskContext.includes("summar")) && lower.includes("qwen2.5")) {
804
- score += 1;
805
- }
806
- return score;
807
- }
808
744
  function extractBillions(modelName) {
809
745
  const match = modelName.match(/(\d+(?:\.\d+)?)b/);
810
746
  if (!match) {
@@ -838,6 +774,25 @@ function isSuitableSmallModel(model) {
838
774
  }
839
775
  return /mini|1\.5b|2b|3b|4b/i.test(model.name);
840
776
  }
777
+ function scoreRouterModel(model) {
778
+ const lower = model.name.toLowerCase();
779
+ let score = 0;
780
+ if (lower.includes("qwen2.5")) {
781
+ score += 3;
782
+ }
783
+ if (lower.includes("3b")) {
784
+ score += 2;
785
+ } else if (lower.includes("1.5b")) {
786
+ score += 1;
787
+ }
788
+ if (lower.includes("coder")) {
789
+ score -= 1;
790
+ }
791
+ return score;
792
+ }
793
+ function compareModelNames(left, right) {
794
+ return left.localeCompare(right);
795
+ }
841
796
 
842
797
  // src/core/optimizer.ts
843
798
  var DEFAULT_MODE = "claude_cli";
@@ -871,6 +826,7 @@ var PromptOptimizer = class {
871
826
  host: config.host ?? "http://localhost:11434",
872
827
  ollamaModel: config.ollamaModel,
873
828
  preferredModels: config.preferredModels ?? getDefaultPreferredModels(),
829
+ modelRoutingStrategy: "qwen",
874
830
  timeoutMs: config.timeoutMs ?? 3e4,
875
831
  temperature: config.temperature ?? 0.1
876
832
  };
@@ -916,6 +872,7 @@ var PromptOptimizer = class {
916
872
  let providerChanges = [];
917
873
  if (provider === "ollama") {
918
874
  const modelSelection = await this.resolveOllamaModel({
875
+ prompt: originalPrompt,
919
876
  mode,
920
877
  preset,
921
878
  task: input.task
@@ -1095,16 +1052,18 @@ var PromptOptimizer = class {
1095
1052
  };
1096
1053
  }
1097
1054
  if (!this.client.listModels) {
1098
- const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
1099
1055
  return {
1100
- model: fallback,
1101
- warnings: [`Model auto-selection is unavailable in the current Ollama client, so "${fallback}" was assumed.`],
1102
- reason: `Assumed default model "${fallback}" because model discovery is unsupported.`,
1103
- forceHeuristic: false
1056
+ model: "heuristic",
1057
+ warnings: [
1058
+ "Model auto-selection is unavailable in the current Ollama client, so prompt optimization is falling back to deterministic heuristic formatting."
1059
+ ],
1060
+ reason: "Model discovery is unsupported, so Qwen-based model routing could not run.",
1061
+ forceHeuristic: true
1104
1062
  };
1105
1063
  }
1106
1064
  try {
1107
1065
  const installedModels = await this.client.listModels();
1066
+ const suitableModels = getSuitableAutoModels(installedModels);
1108
1067
  const selection = selectOllamaModel({
1109
1068
  installedModels,
1110
1069
  mode: options.mode,
@@ -1112,7 +1071,7 @@ var PromptOptimizer = class {
1112
1071
  task: options.task,
1113
1072
  preferredModels: this.config.preferredModels
1114
1073
  });
1115
- if (!selection.suitableForAutoUse) {
1074
+ if (suitableModels.length === 0) {
1116
1075
  return {
1117
1076
  model: selection.model,
1118
1077
  warnings: [
@@ -1123,19 +1082,131 @@ var PromptOptimizer = class {
1123
1082
  forceHeuristic: true
1124
1083
  };
1125
1084
  }
1085
+ if (suitableModels.length === 1) {
1086
+ return {
1087
+ model: selection.model,
1088
+ warnings: [],
1089
+ reason: selection.reason,
1090
+ forceHeuristic: false
1091
+ };
1092
+ }
1093
+ if (this.config.modelRoutingStrategy === "qwen") {
1094
+ const routed = await this.tryQwenModelRouting({
1095
+ prompt: options.prompt,
1096
+ task: options.task,
1097
+ mode: options.mode,
1098
+ preset: options.preset,
1099
+ installedModels,
1100
+ candidateModels: suitableModels.map((model) => model.name),
1101
+ fallbackModel: selection.model
1102
+ });
1103
+ return {
1104
+ model: routed.model,
1105
+ warnings: routed.warnings,
1106
+ reason: routed.reason,
1107
+ forceHeuristic: routed.model === "heuristic"
1108
+ };
1109
+ }
1126
1110
  return {
1127
- model: selection.model,
1128
- warnings: installedModels.length === 0 ? [`No installed Ollama models were reported, so "${selection.model}" was chosen as the default preference.`] : [],
1129
- reason: selection.reason,
1130
- forceHeuristic: false
1111
+ model: "heuristic",
1112
+ warnings: ["Qwen model routing is required but was disabled, so prompt optimization is falling back to deterministic heuristic formatting."],
1113
+ reason: "Qwen model routing is required but was disabled.",
1114
+ forceHeuristic: true
1131
1115
  };
1132
1116
  } catch {
1133
- const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
1134
1117
  return {
1135
- model: fallback,
1136
- warnings: [`Failed to inspect local Ollama models, so "${fallback}" was chosen as the default preference.`],
1137
- reason: `Fell back to default model "${fallback}" because model discovery failed.`,
1138
- forceHeuristic: false
1118
+ model: "heuristic",
1119
+ warnings: [
1120
+ "Failed to inspect local Ollama models, so prompt optimization is falling back to deterministic heuristic formatting."
1121
+ ],
1122
+ reason: "Local Ollama model discovery failed, so Qwen-based model routing could not run.",
1123
+ forceHeuristic: true
1124
+ };
1125
+ }
1126
+ }
1127
+ async tryQwenModelRouting(options) {
1128
+ const routerModel = getQwenRouterModel(
1129
+ options.installedModels,
1130
+ this.config.routerModel
1131
+ );
1132
+ if (!routerModel) {
1133
+ return {
1134
+ model: "heuristic",
1135
+ warnings: [
1136
+ `Multiple suitable small local models are installed (${options.candidateModels.join(", ")}), but no local Qwen router model is available. Install qwen2.5:3b or set routerModel explicitly.`
1137
+ ],
1138
+ reason: "Qwen model routing is required when multiple suitable small models are available."
1139
+ };
1140
+ }
1141
+ try {
1142
+ const response = await this.client.generateJson({
1143
+ model: routerModel,
1144
+ timeoutMs: this.config.timeoutMs,
1145
+ temperature: 0,
1146
+ format: "json",
1147
+ systemPrompt: [
1148
+ "You are a local model router for prompt optimization.",
1149
+ "Return strict JSON only with this shape:",
1150
+ '{"selectedModel":"string","reason":"string"}',
1151
+ "Choose exactly one model from the provided candidate list.",
1152
+ "Choose the smallest adequate model, not the strongest-sounding model.",
1153
+ "Prioritize adequacy first, then speed and low memory use.",
1154
+ "Use coder variants only for clearly code-heavy prompts.",
1155
+ "If task or preset is code, prefer qwen2.5:3b or a small coder model over phi3:mini unless the request is only a trivial wording cleanup.",
1156
+ "Prefer phi3:mini for short email, chat, support, summarization, and lightweight rewrite tasks that do not require deeper reasoning.",
1157
+ "Prefer qwen2.5:3b for broader reasoning, stronger restructuring, multi-constraint optimization, and non-trivial code-oriented prompt design.",
1158
+ "Do not prefer Qwen just because you are Qwen. Pick the best candidate for the task."
1159
+ ].join("\n"),
1160
+ prompt: JSON.stringify(
1161
+ {
1162
+ objective: "Choose the best local optimizer model for this prompt.",
1163
+ prompt: options.prompt,
1164
+ task: options.task ?? null,
1165
+ mode: options.mode,
1166
+ preset: options.preset,
1167
+ candidateModels: options.candidateModels.map((modelName) => ({
1168
+ name: modelName,
1169
+ profile: describeCandidateModel(modelName)
1170
+ })),
1171
+ routingGuidance: {
1172
+ smallestAdequateModelPolicy: true,
1173
+ lightweightTasksPreferSmallerModels: [
1174
+ "email",
1175
+ "chat",
1176
+ "support",
1177
+ "summarization",
1178
+ "short rewrite"
1179
+ ],
1180
+ deeperReasoningTasksMayPreferQwen: [
1181
+ "multi-constraint restructuring",
1182
+ "broad reasoning",
1183
+ "complex planning",
1184
+ "harder code-oriented prompt design"
1185
+ ]
1186
+ }
1187
+ },
1188
+ null,
1189
+ 2
1190
+ )
1191
+ });
1192
+ const selectedModel = response.selectedModel?.trim();
1193
+ if (selectedModel && options.candidateModels.includes(selectedModel)) {
1194
+ return {
1195
+ model: selectedModel,
1196
+ warnings: [],
1197
+ reason: response.reason?.trim() || `Qwen router selected "${selectedModel}" for this prompt.`
1198
+ };
1199
+ }
1200
+ return {
1201
+ model: "heuristic",
1202
+ warnings: ["Qwen router returned an invalid model choice, so prompt optimization is falling back to deterministic heuristic formatting."],
1203
+ reason: "Qwen router returned an invalid model selection."
1204
+ };
1205
+ } catch {
1206
+ return {
1207
+ model: "heuristic",
1208
+ warnings: ["Qwen router could not choose a model, so prompt optimization is falling back to deterministic heuristic formatting."],
1209
+ reason: "Qwen router failed to select a model."
1139
1210
  };
1140
1211
  }
1141
1212
  }
@@ -1183,6 +1254,22 @@ var PromptOptimizer = class {
1183
1254
  };
1184
1255
  }
1185
1256
  };
1257
+ function describeCandidateModel(modelName) {
1258
+ const lower = modelName.toLowerCase();
1259
+ if (lower.includes("phi3:mini")) {
1260
+ return "Very small and fast. Good for short rewrites, lightweight email/chat tasks, and simple prompt cleanup.";
1261
+ }
1262
+ if (lower.includes("qwen2.5:3b")) {
1263
+ return "Small general-purpose model with stronger reasoning and restructuring than ultra-light models. Better for broader or more complex prompt optimization.";
1264
+ }
1265
+ if (lower.includes("coder")) {
1266
+ return "Code-specialized model. Use only when the prompt is clearly code-heavy or refactor-oriented.";
1267
+ }
1268
+ if (lower.includes("llama3.2:3b")) {
1269
+ return "Small general chat/rewrite model. Reasonable middle option for general tasks.";
1270
+ }
1271
+ return "Local candidate model for prompt optimization.";
1272
+ }
1186
1273
  function resolveSessionStore(config) {
1187
1274
  if (typeof config.contextStore === "object" && config.contextStore !== null) {
1188
1275
  return config.contextStore;