promptpilot 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  `promptpilot` is a lightweight TypeScript npm package that sits between your app or CLI workflow and a target LLM. It rewrites prompts locally through Ollama when available, stores reusable session context, compresses older turns, and emits a Claude-friendly final prompt for shell pipelines or application code.
4
4
 
5
- It is designed for local-first workflows on machines like an 18 GB MacBook. By default, `promptpilot` inspects your local Ollama installation and auto-selects a small optimization model, preferring `qwen2.5:3b`, `phi3:mini`, and `llama3.2:3b` in that order. The package still lets you override the model manually when needed.
5
+ It is designed for local-first workflows on machines like an 18 GB MacBook. By default, `promptpilot` inspects your local Ollama installation, uses a small local Qwen model as a router when available, and lets that router choose the best installed small optimization model for each prompt. It still lets you override the model manually when needed.
6
6
 
7
7
  ## Why local Ollama
8
8
 
@@ -10,6 +10,7 @@ It is designed for local-first workflows on machines like an 18 GB MacBook. By d
10
10
  - It reduces external API calls for prompt rewriting.
11
11
  - It lets you use a small, fast model for compression before sending the final prompt to a stronger remote model like Claude.
12
12
  - It automatically picks an installed local model that fits a low-memory workflow.
13
+ - It uses Qwen to route prompt optimization to the best available small local model when possible.
13
14
 
14
15
  ## What it does
15
16
 
@@ -50,7 +51,7 @@ Install directly from a local tarball:
50
51
 
51
52
  ```bash
52
53
  npm pack
53
- npm install -g ./promptpilot-0.1.1.tgz
54
+ npm install -g ./promptpilot-0.1.2.tgz
54
55
  ```
55
56
 
56
57
  ## Library usage
@@ -142,7 +143,7 @@ prompt.stdout.pipe(claude.stdin);
142
143
 
143
144
  By default, if you pass a `sessionId`, `promptpilot` stores optimized turns in a local session store. The default store is JSON files under `~/.promptpilot/sessions`. A SQLite store is also available when `node:sqlite` or `better-sqlite3` is present.
144
145
 
145
- If you do not pass `ollamaModel` or `--model`, `promptpilot` asks Ollama which models are installed and picks the best small model for the job. For most workflows it prefers `qwen2.5:3b`, then `phi3:mini`, then `llama3.2:3b`. For code-heavy prompts it will prefer `qwen2.5-coder:3b` when that model is installed. If only oversized local models are available, it warns and falls back to deterministic heuristic optimization instead of silently using a heavy model.
146
+ If you do not pass `ollamaModel` or `--model`, `promptpilot` asks Ollama which models are installed and lets a small local Qwen router choose the best small optimizer model for the current prompt. It does not statically rank multiple candidate models anymore. If a suitable Qwen router model is not available when multiple small candidates exist, it falls back to deterministic heuristic prompt optimization instead of making a static model-choice guess. If only oversized local models are available, it also falls back to deterministic heuristic optimization instead of silently using a heavy model.
146
147
 
147
148
  Each session stores:
148
149
 
package/dist/cli.js CHANGED
@@ -594,6 +594,11 @@ var ContextManager = class {
594
594
  }
595
595
  session.updatedAt = timestamp;
596
596
  await this.store.saveSession(session);
597
+ this.logger.debug("context saved", {
598
+ sessionId: options.sessionId,
599
+ entryCount: session.entries.length,
600
+ summaryCount: session.summaries.length
601
+ });
597
602
  }
598
603
  async summarizeContext(sessionId, prompt, task, budgetTokens, timeoutMs) {
599
604
  const session = await this.store.loadSession(sessionId);
@@ -728,116 +733,47 @@ var DEFAULT_SMALL_MODEL_PREFERENCES = [
728
733
  function getDefaultPreferredModels() {
729
734
  return [...DEFAULT_SMALL_MODEL_PREFERENCES];
730
735
  }
736
+ function getSuitableAutoModels(installedModels) {
737
+ return installedModels.filter((model) => isSuitableSmallModel(model));
738
+ }
739
+ function getQwenRouterModel(installedModels, explicitRouterModel) {
740
+ if (explicitRouterModel) {
741
+ const match = installedModels.find((model) => model.name === explicitRouterModel);
742
+ return match?.name ?? null;
743
+ }
744
+ const qwenRouters = getSuitableAutoModels(installedModels).filter((model) => /qwen/i.test(model.name)).sort((left, right) => scoreRouterModel(right) - scoreRouterModel(left));
745
+ return qwenRouters[0]?.name ?? null;
746
+ }
731
747
  function selectOllamaModel(input) {
732
- const preferred = buildPreferredOrder(input);
733
- const smallCandidates = input.installedModels.filter((model) => isSuitableSmallModel(model));
734
- const preferredMatch = findPreferredMatch(smallCandidates, preferred);
735
- if (preferredMatch) {
748
+ const smallCandidates = getSuitableAutoModels(input.installedModels);
749
+ if (smallCandidates.length === 1) {
736
750
  return {
737
- model: preferredMatch,
738
- reason: `Selected installed model "${preferredMatch}" from the preferred low-memory order.`,
751
+ model: smallCandidates[0].name,
752
+ reason: `Selected installed model "${smallCandidates[0].name}" because it is the only suitable small local model available.`,
739
753
  suitableForAutoUse: true
740
754
  };
741
755
  }
742
- const ranked = [...smallCandidates].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
743
- if (ranked[0]) {
756
+ if (smallCandidates.length > 1) {
744
757
  return {
745
- model: ranked[0].model.name,
746
- reason: `Selected installed model "${ranked[0].model.name}" using task-aware ranking.`,
747
- suitableForAutoUse: true
758
+ model: "",
759
+ reason: `Multiple suitable small local models are available (${smallCandidates.map((model) => model.name).join(", ")}), so a Qwen router must choose between them.`,
760
+ suitableForAutoUse: false
748
761
  };
749
762
  }
750
- const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
763
+ const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).sort((left, right) => compareModelNames(left.name, right.name));
751
764
  if (oversizedRanked[0]) {
752
765
  return {
753
- model: oversizedRanked[0].model.name,
754
- reason: `Installed model "${oversizedRanked[0].model.name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
766
+ model: oversizedRanked[0].name,
767
+ reason: `Installed model "${oversizedRanked[0].name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
755
768
  suitableForAutoUse: false
756
769
  };
757
770
  }
758
771
  return {
759
- model: preferred[0] ?? "qwen2.5:3b",
760
- reason: "No installed Ollama models were discovered, so the default small-model preference was used.",
772
+ model: "",
773
+ reason: "No suitable local generation models were discovered for automatic routing.",
761
774
  suitableForAutoUse: false
762
775
  };
763
776
  }
764
- function buildPreferredOrder(input) {
765
- const taskContext = `${input.task ?? ""} ${input.preset} ${input.mode}`.toLowerCase();
766
- const configured = (input.preferredModels ?? []).map((model) => model.toLowerCase());
767
- if (taskContext.includes("code")) {
768
- return uniqueModels([
769
- ...configured,
770
- "qwen2.5-coder:3b",
771
- "qwen2.5:3b",
772
- "phi3:mini",
773
- "llama3.2:3b",
774
- "qwen2.5:1.5b"
775
- ]);
776
- }
777
- if (taskContext.includes("compress") || taskContext.includes("summar")) {
778
- return uniqueModels([
779
- ...configured,
780
- "qwen2.5:3b",
781
- "qwen2.5:1.5b",
782
- "phi3:mini",
783
- "llama3.2:3b"
784
- ]);
785
- }
786
- return uniqueModels([...configured, ...DEFAULT_SMALL_MODEL_PREFERENCES]);
787
- }
788
- function uniqueModels(models) {
789
- return Array.from(new Set(models));
790
- }
791
- function findPreferredMatch(installedModels, preferred) {
792
- const installedNames = installedModels.map((model) => model.name);
793
- for (const preferredName of preferred) {
794
- const direct = installedNames.find((name) => name.toLowerCase() === preferredName);
795
- if (direct) {
796
- return direct;
797
- }
798
- }
799
- return null;
800
- }
801
- function scoreModel(modelName, preset, mode, task) {
802
- const lower = modelName.toLowerCase();
803
- let score = 0;
804
- if (!isUsefulGenerationModel(lower)) {
805
- return -100;
806
- }
807
- if (lower.includes("qwen2.5")) {
808
- score += 4;
809
- } else if (lower.includes("phi3")) {
810
- score += 3.5;
811
- } else if (lower.includes("llama3.2")) {
812
- score += 3;
813
- } else if (lower.includes("mistral")) {
814
- score += 2;
815
- }
816
- const parameterSize = extractBillions(lower);
817
- if (parameterSize !== null) {
818
- if (parameterSize <= 4) {
819
- score += 4;
820
- } else if (parameterSize <= 8) {
821
- score += 1;
822
- } else {
823
- score -= 4;
824
- }
825
- }
826
- if (lower.includes("mini")) {
827
- score += 2;
828
- }
829
- if (lower.includes("instruct") || lower.includes("chat")) {
830
- score += 1;
831
- }
832
- const taskContext = `${task ?? ""} ${preset} ${mode}`.toLowerCase();
833
- if (taskContext.includes("code") && lower.includes("coder")) {
834
- score += 3;
835
- }
836
- if ((taskContext.includes("compress") || taskContext.includes("summar")) && lower.includes("qwen2.5")) {
837
- score += 1;
838
- }
839
- return score;
840
- }
841
777
  function extractBillions(modelName) {
842
778
  const match = modelName.match(/(\d+(?:\.\d+)?)b/);
843
779
  if (!match) {
@@ -871,6 +807,25 @@ function isSuitableSmallModel(model) {
871
807
  }
872
808
  return /mini|1\.5b|2b|3b|4b/i.test(model.name);
873
809
  }
810
+ function scoreRouterModel(model) {
811
+ const lower = model.name.toLowerCase();
812
+ let score = 0;
813
+ if (lower.includes("qwen2.5")) {
814
+ score += 3;
815
+ }
816
+ if (lower.includes("3b")) {
817
+ score += 2;
818
+ } else if (lower.includes("1.5b")) {
819
+ score += 1;
820
+ }
821
+ if (lower.includes("coder")) {
822
+ score -= 1;
823
+ }
824
+ return score;
825
+ }
826
+ function compareModelNames(left, right) {
827
+ return left.localeCompare(right);
828
+ }
874
829
 
875
830
  // src/core/optimizer.ts
876
831
  var DEFAULT_MODE = "claude_cli";
@@ -904,6 +859,7 @@ var PromptOptimizer = class {
904
859
  host: config.host ?? "http://localhost:11434",
905
860
  ollamaModel: config.ollamaModel,
906
861
  preferredModels: config.preferredModels ?? getDefaultPreferredModels(),
862
+ modelRoutingStrategy: "qwen",
907
863
  timeoutMs: config.timeoutMs ?? 3e4,
908
864
  temperature: config.temperature ?? 0.1
909
865
  };
@@ -949,6 +905,7 @@ var PromptOptimizer = class {
949
905
  let providerChanges = [];
950
906
  if (provider === "ollama") {
951
907
  const modelSelection = await this.resolveOllamaModel({
908
+ prompt: originalPrompt,
952
909
  mode,
953
910
  preset,
954
911
  task: input.task
@@ -1128,16 +1085,18 @@ var PromptOptimizer = class {
1128
1085
  };
1129
1086
  }
1130
1087
  if (!this.client.listModels) {
1131
- const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
1132
1088
  return {
1133
- model: fallback,
1134
- warnings: [`Model auto-selection is unavailable in the current Ollama client, so "${fallback}" was assumed.`],
1135
- reason: `Assumed default model "${fallback}" because model discovery is unsupported.`,
1136
- forceHeuristic: false
1089
+ model: "heuristic",
1090
+ warnings: [
1091
+ "Model auto-selection is unavailable in the current Ollama client, so prompt optimization is falling back to deterministic heuristic formatting."
1092
+ ],
1093
+ reason: "Model discovery is unsupported, so Qwen-based model routing could not run.",
1094
+ forceHeuristic: true
1137
1095
  };
1138
1096
  }
1139
1097
  try {
1140
1098
  const installedModels = await this.client.listModels();
1099
+ const suitableModels = getSuitableAutoModels(installedModels);
1141
1100
  const selection = selectOllamaModel({
1142
1101
  installedModels,
1143
1102
  mode: options.mode,
@@ -1145,7 +1104,7 @@ var PromptOptimizer = class {
1145
1104
  task: options.task,
1146
1105
  preferredModels: this.config.preferredModels
1147
1106
  });
1148
- if (!selection.suitableForAutoUse) {
1107
+ if (suitableModels.length === 0) {
1149
1108
  return {
1150
1109
  model: selection.model,
1151
1110
  warnings: [
@@ -1156,19 +1115,131 @@ var PromptOptimizer = class {
1156
1115
  forceHeuristic: true
1157
1116
  };
1158
1117
  }
1118
+ if (suitableModels.length === 1) {
1119
+ return {
1120
+ model: selection.model,
1121
+ warnings: [],
1122
+ reason: selection.reason,
1123
+ forceHeuristic: false
1124
+ };
1125
+ }
1126
+ if (this.config.modelRoutingStrategy === "qwen") {
1127
+ const routed = await this.tryQwenModelRouting({
1128
+ prompt: options.prompt,
1129
+ task: options.task,
1130
+ mode: options.mode,
1131
+ preset: options.preset,
1132
+ installedModels,
1133
+ candidateModels: suitableModels.map((model) => model.name),
1134
+ fallbackModel: selection.model
1135
+ });
1136
+ return {
1137
+ model: routed.model,
1138
+ warnings: routed.warnings,
1139
+ reason: routed.reason,
1140
+ forceHeuristic: routed.model === "heuristic"
1141
+ };
1142
+ }
1159
1143
  return {
1160
- model: selection.model,
1161
- warnings: installedModels.length === 0 ? [`No installed Ollama models were reported, so "${selection.model}" was chosen as the default preference.`] : [],
1162
- reason: selection.reason,
1163
- forceHeuristic: false
1144
+ model: "heuristic",
1145
+ warnings: ["Qwen model routing is required but was disabled, so prompt optimization is falling back to deterministic heuristic formatting."],
1146
+ reason: "Qwen model routing is required but was disabled.",
1147
+ forceHeuristic: true
1164
1148
  };
1165
1149
  } catch {
1166
- const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
1167
1150
  return {
1168
- model: fallback,
1169
- warnings: [`Failed to inspect local Ollama models, so "${fallback}" was chosen as the default preference.`],
1170
- reason: `Fell back to default model "${fallback}" because model discovery failed.`,
1171
- forceHeuristic: false
1151
+ model: "heuristic",
1152
+ warnings: [
1153
+ "Failed to inspect local Ollama models, so prompt optimization is falling back to deterministic heuristic formatting."
1154
+ ],
1155
+ reason: "Local Ollama model discovery failed, so Qwen-based model routing could not run.",
1156
+ forceHeuristic: true
1157
+ };
1158
+ }
1159
+ }
1160
+ async tryQwenModelRouting(options) {
1161
+ const routerModel = getQwenRouterModel(
1162
+ options.installedModels,
1163
+ this.config.routerModel
1164
+ );
1165
+ if (!routerModel) {
1166
+ return {
1167
+ model: "heuristic",
1168
+ warnings: [
1169
+ `Multiple suitable small local models are installed (${options.candidateModels.join(", ")}), but no local Qwen router model is available. Install qwen2.5:3b or set routerModel explicitly.`
1170
+ ],
1171
+ reason: "Qwen model routing is required when multiple suitable small models are available."
1172
+ };
1173
+ }
1174
+ try {
1175
+ const response = await this.client.generateJson({
1176
+ model: routerModel,
1177
+ timeoutMs: this.config.timeoutMs,
1178
+ temperature: 0,
1179
+ format: "json",
1180
+ systemPrompt: [
1181
+ "You are a local model router for prompt optimization.",
1182
+ "Return strict JSON only with this shape:",
1183
+ '{"selectedModel":"string","reason":"string"}',
1184
+ "Choose exactly one model from the provided candidate list.",
1185
+ "Choose the smallest adequate model, not the strongest-sounding model.",
1186
+ "Prioritize adequacy first, then speed and low memory use.",
1187
+ "Use coder variants only for clearly code-heavy prompts.",
1188
+ "If task or preset is code, prefer qwen2.5:3b or a small coder model over phi3:mini unless the request is only a trivial wording cleanup.",
1189
+ "Prefer phi3:mini for short email, chat, support, summarization, and lightweight rewrite tasks that do not require deeper reasoning.",
1190
+ "Prefer qwen2.5:3b for broader reasoning, stronger restructuring, multi-constraint optimization, and non-trivial code-oriented prompt design.",
1191
+ "Do not prefer Qwen just because you are Qwen. Pick the best candidate for the task."
1192
+ ].join("\n"),
1193
+ prompt: JSON.stringify(
1194
+ {
1195
+ objective: "Choose the best local optimizer model for this prompt.",
1196
+ prompt: options.prompt,
1197
+ task: options.task ?? null,
1198
+ mode: options.mode,
1199
+ preset: options.preset,
1200
+ candidateModels: options.candidateModels.map((modelName) => ({
1201
+ name: modelName,
1202
+ profile: describeCandidateModel(modelName)
1203
+ })),
1204
+ routingGuidance: {
1205
+ smallestAdequateModelPolicy: true,
1206
+ lightweightTasksPreferSmallerModels: [
1207
+ "email",
1208
+ "chat",
1209
+ "support",
1210
+ "summarization",
1211
+ "short rewrite"
1212
+ ],
1213
+ deeperReasoningTasksMayPreferQwen: [
1214
+ "multi-constraint restructuring",
1215
+ "broad reasoning",
1216
+ "complex planning",
1217
+ "harder code-oriented prompt design"
1218
+ ]
1219
+ }
1220
+ },
1221
+ null,
1222
+ 2
1223
+ )
1224
+ });
1225
+ const selectedModel = response.selectedModel?.trim();
1226
+ if (selectedModel && options.candidateModels.includes(selectedModel)) {
1227
+ return {
1228
+ model: selectedModel,
1229
+ warnings: [],
1230
+ reason: response.reason?.trim() || `Qwen router selected "${selectedModel}" for this prompt.`
1231
+ };
1232
+ }
1233
+ return {
1234
+ model: "heuristic",
1235
+ warnings: ["Qwen router returned an invalid model choice, so prompt optimization is falling back to deterministic heuristic formatting."],
1236
+ reason: "Qwen router returned an invalid model selection."
1237
+ };
1238
+ } catch {
1239
+ return {
1240
+ model: "heuristic",
1241
+ warnings: ["Qwen router could not choose a model, so prompt optimization is falling back to deterministic heuristic formatting."],
1242
+ reason: "Qwen router failed to select a model."
1172
1243
  };
1173
1244
  }
1174
1245
  }
@@ -1216,6 +1287,22 @@ var PromptOptimizer = class {
1216
1287
  };
1217
1288
  }
1218
1289
  };
1290
+ function describeCandidateModel(modelName) {
1291
+ const lower = modelName.toLowerCase();
1292
+ if (lower.includes("phi3:mini")) {
1293
+ return "Very small and fast. Good for short rewrites, lightweight email/chat tasks, and simple prompt cleanup.";
1294
+ }
1295
+ if (lower.includes("qwen2.5:3b")) {
1296
+ return "Small general-purpose model with stronger reasoning and restructuring than ultra-light models. Better for broader or more complex prompt optimization.";
1297
+ }
1298
+ if (lower.includes("coder")) {
1299
+ return "Code-specialized model. Use only when the prompt is clearly code-heavy or refactor-oriented.";
1300
+ }
1301
+ if (lower.includes("llama3.2:3b")) {
1302
+ return "Small general chat/rewrite model. Reasonable middle option for general tasks.";
1303
+ }
1304
+ return "Local candidate model for prompt optimization.";
1305
+ }
1219
1306
  function resolveSessionStore(config) {
1220
1307
  if (typeof config.contextStore === "object" && config.contextStore !== null) {
1221
1308
  return config.contextStore;