promptpilot 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  `promptpilot` is a lightweight TypeScript npm package that sits between your app or CLI workflow and a target LLM. It rewrites prompts locally through Ollama when available, stores reusable session context, compresses older turns, and emits a Claude-friendly final prompt for shell pipelines or application code.
4
4
 
5
- It is designed for local-first workflows on machines like an 18 GB MacBook. By default, `promptpilot` inspects your local Ollama installation and auto-selects a small optimization model, preferring `qwen2.5:3b`, `phi3:mini`, and `llama3.2:3b` in that order. The package still lets you override the model manually when needed.
5
+ It is designed for local-first workflows on machines like an 18 GB MacBook. By default, `promptpilot` inspects your local Ollama installation, uses a small local Qwen model as a router when available, and lets that router choose the best installed small optimization model for each prompt. It still lets you override the model manually when needed.
6
6
 
7
7
  ## Why local Ollama
8
8
 
@@ -10,6 +10,7 @@ It is designed for local-first workflows on machines like an 18 GB MacBook. By d
10
10
  - It reduces external API calls for prompt rewriting.
11
11
  - It lets you use a small, fast model for compression before sending the final prompt to a stronger remote model like Claude.
12
12
  - It automatically picks an installed local model that fits a low-memory workflow.
13
+ - It uses Qwen to route prompt optimization to the best available small local model when possible.
13
14
 
14
15
  ## What it does
15
16
 
@@ -50,7 +51,7 @@ Install directly from a local tarball:
50
51
 
51
52
  ```bash
52
53
  npm pack
53
- npm install -g ./promptpilot-0.1.0.tgz
54
+ npm install -g ./promptpilot-0.1.2.tgz
54
55
  ```
55
56
 
56
57
  ## Library usage
@@ -142,7 +143,7 @@ prompt.stdout.pipe(claude.stdin);
142
143
 
143
144
  By default, if you pass a `sessionId`, `promptpilot` stores optimized turns in a local session store. The default store is JSON files under `~/.promptpilot/sessions`. A SQLite store is also available when `node:sqlite` or `better-sqlite3` is present.
144
145
 
145
- If you do not pass `ollamaModel` or `--model`, `promptpilot` asks Ollama which models are installed and picks the best small model for the job. For most workflows it prefers `qwen2.5:3b`, then `phi3:mini`, then `llama3.2:3b`. For code-heavy prompts it will prefer `qwen2.5-coder:3b` when that model is installed. If only oversized local models are available, it warns and falls back to deterministic heuristic optimization instead of silently using a heavy model.
146
+ If you do not pass `ollamaModel` or `--model`, `promptpilot` asks Ollama which models are installed and lets a small local Qwen router choose the best small optimizer model for the current prompt. It does not statically rank multiple candidate models anymore. If a suitable Qwen router model is not available when multiple small candidates exist, it falls back to deterministic heuristic prompt optimization instead of making a static model-choice guess. If only oversized local models are available, it also falls back to deterministic heuristic optimization instead of silently using a heavy model.
146
147
 
147
148
  Each session stores:
148
149
 
package/dist/cli.js CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/cli.ts
4
- import { pathToFileURL } from "url";
4
+ import { realpathSync } from "fs";
5
+ import { fileURLToPath } from "url";
5
6
 
6
7
  // src/errors.ts
7
8
  var InvalidPromptError = class extends Error {
@@ -593,6 +594,11 @@ var ContextManager = class {
593
594
  }
594
595
  session.updatedAt = timestamp;
595
596
  await this.store.saveSession(session);
597
+ this.logger.debug("context saved", {
598
+ sessionId: options.sessionId,
599
+ entryCount: session.entries.length,
600
+ summaryCount: session.summaries.length
601
+ });
596
602
  }
597
603
  async summarizeContext(sessionId, prompt, task, budgetTokens, timeoutMs) {
598
604
  const session = await this.store.loadSession(sessionId);
@@ -727,116 +733,47 @@ var DEFAULT_SMALL_MODEL_PREFERENCES = [
727
733
  function getDefaultPreferredModels() {
728
734
  return [...DEFAULT_SMALL_MODEL_PREFERENCES];
729
735
  }
736
+ function getSuitableAutoModels(installedModels) {
737
+ return installedModels.filter((model) => isSuitableSmallModel(model));
738
+ }
739
+ function getQwenRouterModel(installedModels, explicitRouterModel) {
740
+ if (explicitRouterModel) {
741
+ const match = installedModels.find((model) => model.name === explicitRouterModel);
742
+ return match?.name ?? null;
743
+ }
744
+ const qwenRouters = getSuitableAutoModels(installedModels).filter((model) => /qwen/i.test(model.name)).sort((left, right) => scoreRouterModel(right) - scoreRouterModel(left));
745
+ return qwenRouters[0]?.name ?? null;
746
+ }
730
747
  function selectOllamaModel(input) {
731
- const preferred = buildPreferredOrder(input);
732
- const smallCandidates = input.installedModels.filter((model) => isSuitableSmallModel(model));
733
- const preferredMatch = findPreferredMatch(smallCandidates, preferred);
734
- if (preferredMatch) {
748
+ const smallCandidates = getSuitableAutoModels(input.installedModels);
749
+ if (smallCandidates.length === 1) {
735
750
  return {
736
- model: preferredMatch,
737
- reason: `Selected installed model "${preferredMatch}" from the preferred low-memory order.`,
751
+ model: smallCandidates[0].name,
752
+ reason: `Selected installed model "${smallCandidates[0].name}" because it is the only suitable small local model available.`,
738
753
  suitableForAutoUse: true
739
754
  };
740
755
  }
741
- const ranked = [...smallCandidates].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
742
- if (ranked[0]) {
756
+ if (smallCandidates.length > 1) {
743
757
  return {
744
- model: ranked[0].model.name,
745
- reason: `Selected installed model "${ranked[0].model.name}" using task-aware ranking.`,
746
- suitableForAutoUse: true
758
+ model: "",
759
+ reason: `Multiple suitable small local models are available (${smallCandidates.map((model) => model.name).join(", ")}), so a Qwen router must choose between them.`,
760
+ suitableForAutoUse: false
747
761
  };
748
762
  }
749
- const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).map((model) => ({ model, score: scoreModel(model.name, input.preset, input.mode, input.task) })).sort((left, right) => right.score - left.score);
763
+ const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).sort((left, right) => compareModelNames(left.name, right.name));
750
764
  if (oversizedRanked[0]) {
751
765
  return {
752
- model: oversizedRanked[0].model.name,
753
- reason: `Installed model "${oversizedRanked[0].model.name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
766
+ model: oversizedRanked[0].name,
767
+ reason: `Installed model "${oversizedRanked[0].name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
754
768
  suitableForAutoUse: false
755
769
  };
756
770
  }
757
771
  return {
758
- model: preferred[0] ?? "qwen2.5:3b",
759
- reason: "No installed Ollama models were discovered, so the default small-model preference was used.",
772
+ model: "",
773
+ reason: "No suitable local generation models were discovered for automatic routing.",
760
774
  suitableForAutoUse: false
761
775
  };
762
776
  }
763
- function buildPreferredOrder(input) {
764
- const taskContext = `${input.task ?? ""} ${input.preset} ${input.mode}`.toLowerCase();
765
- const configured = (input.preferredModels ?? []).map((model) => model.toLowerCase());
766
- if (taskContext.includes("code")) {
767
- return uniqueModels([
768
- ...configured,
769
- "qwen2.5-coder:3b",
770
- "qwen2.5:3b",
771
- "phi3:mini",
772
- "llama3.2:3b",
773
- "qwen2.5:1.5b"
774
- ]);
775
- }
776
- if (taskContext.includes("compress") || taskContext.includes("summar")) {
777
- return uniqueModels([
778
- ...configured,
779
- "qwen2.5:3b",
780
- "qwen2.5:1.5b",
781
- "phi3:mini",
782
- "llama3.2:3b"
783
- ]);
784
- }
785
- return uniqueModels([...configured, ...DEFAULT_SMALL_MODEL_PREFERENCES]);
786
- }
787
- function uniqueModels(models) {
788
- return Array.from(new Set(models));
789
- }
790
- function findPreferredMatch(installedModels, preferred) {
791
- const installedNames = installedModels.map((model) => model.name);
792
- for (const preferredName of preferred) {
793
- const direct = installedNames.find((name) => name.toLowerCase() === preferredName);
794
- if (direct) {
795
- return direct;
796
- }
797
- }
798
- return null;
799
- }
800
- function scoreModel(modelName, preset, mode, task) {
801
- const lower = modelName.toLowerCase();
802
- let score = 0;
803
- if (!isUsefulGenerationModel(lower)) {
804
- return -100;
805
- }
806
- if (lower.includes("qwen2.5")) {
807
- score += 4;
808
- } else if (lower.includes("phi3")) {
809
- score += 3.5;
810
- } else if (lower.includes("llama3.2")) {
811
- score += 3;
812
- } else if (lower.includes("mistral")) {
813
- score += 2;
814
- }
815
- const parameterSize = extractBillions(lower);
816
- if (parameterSize !== null) {
817
- if (parameterSize <= 4) {
818
- score += 4;
819
- } else if (parameterSize <= 8) {
820
- score += 1;
821
- } else {
822
- score -= 4;
823
- }
824
- }
825
- if (lower.includes("mini")) {
826
- score += 2;
827
- }
828
- if (lower.includes("instruct") || lower.includes("chat")) {
829
- score += 1;
830
- }
831
- const taskContext = `${task ?? ""} ${preset} ${mode}`.toLowerCase();
832
- if (taskContext.includes("code") && lower.includes("coder")) {
833
- score += 3;
834
- }
835
- if ((taskContext.includes("compress") || taskContext.includes("summar")) && lower.includes("qwen2.5")) {
836
- score += 1;
837
- }
838
- return score;
839
- }
840
777
  function extractBillions(modelName) {
841
778
  const match = modelName.match(/(\d+(?:\.\d+)?)b/);
842
779
  if (!match) {
@@ -870,6 +807,25 @@ function isSuitableSmallModel(model) {
870
807
  }
871
808
  return /mini|1\.5b|2b|3b|4b/i.test(model.name);
872
809
  }
810
+ function scoreRouterModel(model) {
811
+ const lower = model.name.toLowerCase();
812
+ let score = 0;
813
+ if (lower.includes("qwen2.5")) {
814
+ score += 3;
815
+ }
816
+ if (lower.includes("3b")) {
817
+ score += 2;
818
+ } else if (lower.includes("1.5b")) {
819
+ score += 1;
820
+ }
821
+ if (lower.includes("coder")) {
822
+ score -= 1;
823
+ }
824
+ return score;
825
+ }
826
+ function compareModelNames(left, right) {
827
+ return left.localeCompare(right);
828
+ }
873
829
 
874
830
  // src/core/optimizer.ts
875
831
  var DEFAULT_MODE = "claude_cli";
@@ -903,6 +859,7 @@ var PromptOptimizer = class {
903
859
  host: config.host ?? "http://localhost:11434",
904
860
  ollamaModel: config.ollamaModel,
905
861
  preferredModels: config.preferredModels ?? getDefaultPreferredModels(),
862
+ modelRoutingStrategy: "qwen",
906
863
  timeoutMs: config.timeoutMs ?? 3e4,
907
864
  temperature: config.temperature ?? 0.1
908
865
  };
@@ -948,6 +905,7 @@ var PromptOptimizer = class {
948
905
  let providerChanges = [];
949
906
  if (provider === "ollama") {
950
907
  const modelSelection = await this.resolveOllamaModel({
908
+ prompt: originalPrompt,
951
909
  mode,
952
910
  preset,
953
911
  task: input.task
@@ -1127,16 +1085,18 @@ var PromptOptimizer = class {
1127
1085
  };
1128
1086
  }
1129
1087
  if (!this.client.listModels) {
1130
- const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
1131
1088
  return {
1132
- model: fallback,
1133
- warnings: [`Model auto-selection is unavailable in the current Ollama client, so "${fallback}" was assumed.`],
1134
- reason: `Assumed default model "${fallback}" because model discovery is unsupported.`,
1135
- forceHeuristic: false
1089
+ model: "heuristic",
1090
+ warnings: [
1091
+ "Model auto-selection is unavailable in the current Ollama client, so prompt optimization is falling back to deterministic heuristic formatting."
1092
+ ],
1093
+ reason: "Model discovery is unsupported, so Qwen-based model routing could not run.",
1094
+ forceHeuristic: true
1136
1095
  };
1137
1096
  }
1138
1097
  try {
1139
1098
  const installedModels = await this.client.listModels();
1099
+ const suitableModels = getSuitableAutoModels(installedModels);
1140
1100
  const selection = selectOllamaModel({
1141
1101
  installedModels,
1142
1102
  mode: options.mode,
@@ -1144,7 +1104,7 @@ var PromptOptimizer = class {
1144
1104
  task: options.task,
1145
1105
  preferredModels: this.config.preferredModels
1146
1106
  });
1147
- if (!selection.suitableForAutoUse) {
1107
+ if (suitableModels.length === 0) {
1148
1108
  return {
1149
1109
  model: selection.model,
1150
1110
  warnings: [
@@ -1155,19 +1115,131 @@ var PromptOptimizer = class {
1155
1115
  forceHeuristic: true
1156
1116
  };
1157
1117
  }
1118
+ if (suitableModels.length === 1) {
1119
+ return {
1120
+ model: selection.model,
1121
+ warnings: [],
1122
+ reason: selection.reason,
1123
+ forceHeuristic: false
1124
+ };
1125
+ }
1126
+ if (this.config.modelRoutingStrategy === "qwen") {
1127
+ const routed = await this.tryQwenModelRouting({
1128
+ prompt: options.prompt,
1129
+ task: options.task,
1130
+ mode: options.mode,
1131
+ preset: options.preset,
1132
+ installedModels,
1133
+ candidateModels: suitableModels.map((model) => model.name),
1134
+ fallbackModel: selection.model
1135
+ });
1136
+ return {
1137
+ model: routed.model,
1138
+ warnings: routed.warnings,
1139
+ reason: routed.reason,
1140
+ forceHeuristic: routed.model === "heuristic"
1141
+ };
1142
+ }
1158
1143
  return {
1159
- model: selection.model,
1160
- warnings: installedModels.length === 0 ? [`No installed Ollama models were reported, so "${selection.model}" was chosen as the default preference.`] : [],
1161
- reason: selection.reason,
1162
- forceHeuristic: false
1144
+ model: "heuristic",
1145
+ warnings: ["Qwen model routing is required but was disabled, so prompt optimization is falling back to deterministic heuristic formatting."],
1146
+ reason: "Qwen model routing is required but was disabled.",
1147
+ forceHeuristic: true
1163
1148
  };
1164
1149
  } catch {
1165
- const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
1166
1150
  return {
1167
- model: fallback,
1168
- warnings: [`Failed to inspect local Ollama models, so "${fallback}" was chosen as the default preference.`],
1169
- reason: `Fell back to default model "${fallback}" because model discovery failed.`,
1170
- forceHeuristic: false
1151
+ model: "heuristic",
1152
+ warnings: [
1153
+ "Failed to inspect local Ollama models, so prompt optimization is falling back to deterministic heuristic formatting."
1154
+ ],
1155
+ reason: "Local Ollama model discovery failed, so Qwen-based model routing could not run.",
1156
+ forceHeuristic: true
1157
+ };
1158
+ }
1159
+ }
1160
+ async tryQwenModelRouting(options) {
1161
+ const routerModel = getQwenRouterModel(
1162
+ options.installedModels,
1163
+ this.config.routerModel
1164
+ );
1165
+ if (!routerModel) {
1166
+ return {
1167
+ model: "heuristic",
1168
+ warnings: [
1169
+ `Multiple suitable small local models are installed (${options.candidateModels.join(", ")}), but no local Qwen router model is available. Install qwen2.5:3b or set routerModel explicitly.`
1170
+ ],
1171
+ reason: "Qwen model routing is required when multiple suitable small models are available."
1172
+ };
1173
+ }
1174
+ try {
1175
+ const response = await this.client.generateJson({
1176
+ model: routerModel,
1177
+ timeoutMs: this.config.timeoutMs,
1178
+ temperature: 0,
1179
+ format: "json",
1180
+ systemPrompt: [
1181
+ "You are a local model router for prompt optimization.",
1182
+ "Return strict JSON only with this shape:",
1183
+ '{"selectedModel":"string","reason":"string"}',
1184
+ "Choose exactly one model from the provided candidate list.",
1185
+ "Choose the smallest adequate model, not the strongest-sounding model.",
1186
+ "Prioritize adequacy first, then speed and low memory use.",
1187
+ "Use coder variants only for clearly code-heavy prompts.",
1188
+ "If task or preset is code, prefer qwen2.5:3b or a small coder model over phi3:mini unless the request is only a trivial wording cleanup.",
1189
+ "Prefer phi3:mini for short email, chat, support, summarization, and lightweight rewrite tasks that do not require deeper reasoning.",
1190
+ "Prefer qwen2.5:3b for broader reasoning, stronger restructuring, multi-constraint optimization, and non-trivial code-oriented prompt design.",
1191
+ "Do not prefer Qwen just because you are Qwen. Pick the best candidate for the task."
1192
+ ].join("\n"),
1193
+ prompt: JSON.stringify(
1194
+ {
1195
+ objective: "Choose the best local optimizer model for this prompt.",
1196
+ prompt: options.prompt,
1197
+ task: options.task ?? null,
1198
+ mode: options.mode,
1199
+ preset: options.preset,
1200
+ candidateModels: options.candidateModels.map((modelName) => ({
1201
+ name: modelName,
1202
+ profile: describeCandidateModel(modelName)
1203
+ })),
1204
+ routingGuidance: {
1205
+ smallestAdequateModelPolicy: true,
1206
+ lightweightTasksPreferSmallerModels: [
1207
+ "email",
1208
+ "chat",
1209
+ "support",
1210
+ "summarization",
1211
+ "short rewrite"
1212
+ ],
1213
+ deeperReasoningTasksMayPreferQwen: [
1214
+ "multi-constraint restructuring",
1215
+ "broad reasoning",
1216
+ "complex planning",
1217
+ "harder code-oriented prompt design"
1218
+ ]
1219
+ }
1220
+ },
1221
+ null,
1222
+ 2
1223
+ )
1224
+ });
1225
+ const selectedModel = response.selectedModel?.trim();
1226
+ if (selectedModel && options.candidateModels.includes(selectedModel)) {
1227
+ return {
1228
+ model: selectedModel,
1229
+ warnings: [],
1230
+ reason: response.reason?.trim() || `Qwen router selected "${selectedModel}" for this prompt.`
1231
+ };
1232
+ }
1233
+ return {
1234
+ model: "heuristic",
1235
+ warnings: ["Qwen router returned an invalid model choice, so prompt optimization is falling back to deterministic heuristic formatting."],
1236
+ reason: "Qwen router returned an invalid model selection."
1237
+ };
1238
+ } catch {
1239
+ return {
1240
+ model: "heuristic",
1241
+ warnings: ["Qwen router could not choose a model, so prompt optimization is falling back to deterministic heuristic formatting."],
1242
+ reason: "Qwen router failed to select a model."
1171
1243
  };
1172
1244
  }
1173
1245
  }
@@ -1215,6 +1287,22 @@ var PromptOptimizer = class {
1215
1287
  };
1216
1288
  }
1217
1289
  };
1290
+ function describeCandidateModel(modelName) {
1291
+ const lower = modelName.toLowerCase();
1292
+ if (lower.includes("phi3:mini")) {
1293
+ return "Very small and fast. Good for short rewrites, lightweight email/chat tasks, and simple prompt cleanup.";
1294
+ }
1295
+ if (lower.includes("qwen2.5:3b")) {
1296
+ return "Small general-purpose model with stronger reasoning and restructuring than ultra-light models. Better for broader or more complex prompt optimization.";
1297
+ }
1298
+ if (lower.includes("coder")) {
1299
+ return "Code-specialized model. Use only when the prompt is clearly code-heavy or refactor-oriented.";
1300
+ }
1301
+ if (lower.includes("llama3.2:3b")) {
1302
+ return "Small general chat/rewrite model. Reasonable middle option for general tasks.";
1303
+ }
1304
+ return "Local candidate model for prompt optimization.";
1305
+ }
1218
1306
  function resolveSessionStore(config) {
1219
1307
  if (typeof config.contextStore === "object" && config.contextStore !== null) {
1220
1308
  return config.contextStore;
@@ -1532,10 +1620,30 @@ async function readStdin(stdin = process.stdin) {
1532
1620
  stdin.on("error", reject);
1533
1621
  });
1534
1622
  }
1535
- if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
1536
- runCli(process.argv.slice(2)).then((code) => {
1537
- process.exit(code);
1538
- });
1623
+ if (isMainModule()) {
1624
+ runCli(process.argv.slice(2)).then(
1625
+ (code) => {
1626
+ process.exit(code);
1627
+ },
1628
+ (error) => {
1629
+ const message = error instanceof Error ? error.message : String(error);
1630
+ process.stderr.write(`${message}
1631
+ `);
1632
+ process.exit(1);
1633
+ }
1634
+ );
1635
+ }
1636
+ function isMainModule() {
1637
+ if (!process.argv[1]) {
1638
+ return false;
1639
+ }
1640
+ try {
1641
+ const entryPath = realpathSync(process.argv[1]);
1642
+ const thisPath = realpathSync(fileURLToPath(import.meta.url));
1643
+ return entryPath === thisPath;
1644
+ } catch {
1645
+ return false;
1646
+ }
1539
1647
  }
1540
1648
  export {
1541
1649
  runCli