promptpilot 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -75,6 +75,43 @@ ollama pull qwen2.5:3b
75
75
  ollama pull phi3:mini
76
76
  ```
77
77
 
78
+ ## Custom local compressor model
79
+
80
+ PromptPilot ships a `Modelfile` that defines `promptpilot-compressor`, a text-only compression model built on top of `qwen2.5:3b`. It is tuned to output only the compressed prompt with no reasoning, analysis, or commentary.
81
+
82
+ Build and verify it:
83
+
84
+ ```bash
85
+ ollama pull qwen2.5:3b
86
+ ollama create promptpilot-compressor -f ./Modelfile
87
+ ollama run promptpilot-compressor "explain recursion simply"
88
+ ```
89
+
90
+ Use it via the CLI after installing from npm:
91
+
92
+ ```bash
93
+ # Plain output — pipe directly into Claude
94
+ promptpilot optimize "help me refactor this auth middleware" \
95
+ --model promptpilot-compressor \
96
+ --preset code \
97
+ --plain
98
+
99
+ # JSON output with debug info
100
+ promptpilot optimize "help me refactor this auth middleware" \
101
+ --model promptpilot-compressor \
102
+ --preset code \
103
+ --json --debug
104
+
105
+ # With session memory, piped into Claude
106
+ promptpilot optimize "continue the refactor" \
107
+ --model promptpilot-compressor \
108
+ --session repo-refactor \
109
+ --save-context \
110
+ --plain | claude
111
+ ```
112
+
113
+ `promptpilot-compressor` outputs plain text rather than JSON. PromptPilot detects this automatically and falls back to text-only mode, stripping any reasoning leakage before using the output. Explicit `--model` always takes priority over automatic local model selection.
114
+
78
115
  ## Core behavior
79
116
 
80
117
  PromptPilot has two distinct routing layers.
package/dist/cli.js CHANGED
@@ -3,6 +3,7 @@
3
3
  // src/cli.ts
4
4
  import { readFileSync, realpathSync } from "fs";
5
5
  import { fileURLToPath } from "url";
6
+ import { execSync } from "child_process";
6
7
 
7
8
  // src/errors.ts
8
9
  var InvalidPromptError = class extends Error {
@@ -353,6 +354,17 @@ var OllamaClient = class {
353
354
  }
354
355
  throw new OllamaUnavailableError("Ollama returned JSON that could not be parsed.");
355
356
  }
357
+ async generateJsonWithTextFallback(options, textFallbackHandler) {
358
+ try {
359
+ return await this.generateJson(options);
360
+ } catch {
361
+ const raw = await this.generate({
362
+ ...options,
363
+ format: void 0
364
+ });
365
+ return textFallbackHandler(raw);
366
+ }
367
+ }
356
368
  };
357
369
 
358
370
  // src/core/systemPrompt.ts
@@ -1046,6 +1058,11 @@ var PromptOptimizer = class {
1046
1058
  contextSummary: relevantContext.summary
1047
1059
  });
1048
1060
  }
1061
+ const originalPromptTokens = this.estimator.estimateText(originalPrompt);
1062
+ const promptCompressionSavings = Math.max(0, originalPromptTokens - estimatedTokensAfter.prompt);
1063
+ const contextCompressionSavings = Math.max(0, estimatedTokensBefore.context - estimatedTokensAfter.context);
1064
+ const wrapperOverhead = Math.max(0, estimatedTokensAfter.total - (estimatedTokensAfter.prompt + estimatedTokensAfter.context));
1065
+ const tokenSavings = promptCompressionSavings + contextCompressionSavings;
1049
1066
  return {
1050
1067
  originalPrompt,
1051
1068
  optimizedPrompt,
@@ -1054,7 +1071,10 @@ var PromptOptimizer = class {
1054
1071
  contextSummary: relevantContext.summary,
1055
1072
  estimatedTokensBefore,
1056
1073
  estimatedTokensAfter,
1057
- tokenSavings: Math.max(0, estimatedTokensBefore.total - estimatedTokensAfter.total),
1074
+ tokenSavings,
1075
+ promptCompressionSavings,
1076
+ contextCompressionSavings,
1077
+ wrapperOverhead,
1058
1078
  mode,
1059
1079
  provider,
1060
1080
  model,
@@ -1121,29 +1141,52 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
1121
1141
  let optimizedPrompt = "";
1122
1142
  let responseChanges = [];
1123
1143
  let responseWarnings = [];
1124
- try {
1125
- const response = await this.client.generateJson({
1126
- systemPrompt,
1127
- prompt: optimizationPrompt,
1128
- timeoutMs,
1129
- model: options.model,
1130
- temperature: this.config.temperature,
1131
- format: "json"
1132
- });
1133
- optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
1134
- responseChanges = response.changes ?? [];
1135
- responseWarnings = response.warnings ?? [];
1136
- } catch {
1137
- const raw = await this.client.generate({
1138
- systemPrompt,
1139
- prompt: optimizationPrompt,
1140
- timeoutMs,
1141
- model: options.model,
1142
- temperature: this.config.temperature
1143
- });
1144
- optimizedPrompt = sanitizeTextOptimizationOutput(raw);
1145
- responseChanges = [`Applied text-only Ollama optimization with ${options.model}.`];
1146
- }
1144
+ const generateWithFallback = this.client.generateJsonWithTextFallback ? async () => {
1145
+ const response2 = await this.client.generateJsonWithTextFallback(
1146
+ {
1147
+ systemPrompt,
1148
+ prompt: optimizationPrompt,
1149
+ timeoutMs,
1150
+ model: options.model,
1151
+ temperature: this.config.temperature,
1152
+ format: "json"
1153
+ },
1154
+ (text) => ({
1155
+ optimizedPrompt: sanitizeTextOptimizationOutput(text),
1156
+ changes: [`Applied text-only Ollama optimization with ${options.model}.`],
1157
+ warnings: []
1158
+ })
1159
+ );
1160
+ return response2;
1161
+ } : async () => {
1162
+ try {
1163
+ return await this.client.generateJson({
1164
+ systemPrompt,
1165
+ prompt: optimizationPrompt,
1166
+ timeoutMs,
1167
+ model: options.model,
1168
+ temperature: this.config.temperature,
1169
+ format: "json"
1170
+ });
1171
+ } catch {
1172
+ const raw = await this.client.generate({
1173
+ systemPrompt,
1174
+ prompt: optimizationPrompt,
1175
+ timeoutMs,
1176
+ model: options.model,
1177
+ temperature: this.config.temperature
1178
+ });
1179
+ return {
1180
+ optimizedPrompt: sanitizeTextOptimizationOutput(raw),
1181
+ changes: [`Applied text-only Ollama optimization with ${options.model}.`],
1182
+ warnings: []
1183
+ };
1184
+ }
1185
+ };
1186
+ const response = await generateWithFallback();
1187
+ optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
1188
+ responseChanges = response.changes ?? [];
1189
+ responseWarnings = response.warnings ?? [];
1147
1190
  if (!optimizedPrompt) {
1148
1191
  return {
1149
1192
  optimizedPrompt: preprocessedPrompt,
@@ -1567,14 +1610,6 @@ ${contextBlock}`);
1567
1610
  if (constraints.length > 0) {
1568
1611
  sections.push(`Constraints:
1569
1612
  - ${constraints.join("\n- ")}`);
1570
- }
1571
- if (isCodeFirstRequest(input.input)) {
1572
- sections.push(`Execution loop:
1573
- - Inspect the relevant files and current behavior.
1574
- - Plan the smallest safe next step.
1575
- - Act with minimal, reversible changes.
1576
- - Test or validate the result.
1577
- - Reflect on gaps or risks, then repeat.`);
1578
1613
  }
1579
1614
  const desiredOutput = [
1580
1615
  input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
@@ -1685,14 +1720,16 @@ function sanitizeTextOptimizationOutput(raw) {
1685
1720
  if (!normalized) {
1686
1721
  return "";
1687
1722
  }
1688
- if (!containsReasoningLeak(normalized)) {
1689
- return stripWrappingQuotes(normalized);
1723
+ let cleaned = normalized.replace(/<think>[\s\S]*?<\/think>/gi, "").replace(/<reasoning>[\s\S]*?<\/reasoning>/gi, "").replace(/<analysis>[\s\S]*?<\/analysis>/gi, "").replace(/^(thinking|thinking process|analysis|critique|attempt|final decision|role|task|guidelines)[:=]?[\s\S]*?(?=\n\n|\n[A-Z]|$)/gim, "");
1724
+ if (!containsReasoningLeak(cleaned)) {
1725
+ return stripWrappingQuotes(cleaned);
1690
1726
  }
1691
- const candidates = raw.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => !/^[-*]\s/.test(chunk)).filter((chunk) => !/^\d+\.\s/.test(chunk));
1692
- return candidates.at(-1) ?? stripWrappingQuotes(normalized);
1727
+ const candidates = cleaned.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => chunk.length > 10);
1728
+ const selected = candidates.reduce((a, b) => a.length > b.length ? a : b, "");
1729
+ return selected || stripWrappingQuotes(normalized);
1693
1730
  }
1694
1731
  function containsReasoningLeak(text) {
1695
- return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision)/i.test(text);
1732
+ return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision|^thinking:|^analysis:|<think>|<reasoning>|<analysis>)/i.test(text);
1696
1733
  }
1697
1734
  function stripWrappingQuotes(text) {
1698
1735
  return text.replace(/^["'`]+|["'`]+$/g, "").trim();
@@ -1942,6 +1979,44 @@ function capitalize(value) {
1942
1979
  return value[0].toUpperCase() + value.slice(1);
1943
1980
  }
1944
1981
 
1982
+ // src/utils/spinner.ts
1983
+ var Spinner = class {
1984
+ message = "";
1985
+ frame = 0;
1986
+ interval = null;
1987
+ writer;
1988
+ isTTY;
1989
+ frames = ["\u280B", "\u2819", "\u2839", "\u2838", "\u283C", "\u2834", "\u2826", "\u2827", "\u2807", "\u280F"];
1990
+ constructor(writer, isTTY = false) {
1991
+ this.writer = writer;
1992
+ this.isTTY = isTTY;
1993
+ }
1994
+ start(message) {
1995
+ if (!this.isTTY) {
1996
+ return;
1997
+ }
1998
+ this.message = message;
1999
+ this.frame = 0;
2000
+ this.interval = setInterval(() => {
2001
+ const spinner = this.frames[this.frame % this.frames.length];
2002
+ this.writer.write(`\r${spinner} ${this.message}`);
2003
+ this.frame += 1;
2004
+ }, 80);
2005
+ }
2006
+ stop() {
2007
+ if (this.interval) {
2008
+ clearInterval(this.interval);
2009
+ this.interval = null;
2010
+ }
2011
+ if (this.isTTY) {
2012
+ this.writer.write("\r\x1B[K");
2013
+ }
2014
+ }
2015
+ };
2016
+ function createSpinner(writer, isTTY = false) {
2017
+ return new Spinner(writer, isTTY);
2018
+ }
2019
+
1945
2020
  // src/cli.ts
1946
2021
  async function runCli(argv, io = { stdout: process.stdout, stderr: process.stderr, stdin: process.stdin }, dependencies = { createOptimizer, readStdin, getCliInfo }) {
1947
2022
  const [command, ...rest] = argv;
@@ -2004,7 +2079,9 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2004
2079
  io.stderr.write("A prompt is required.\n");
2005
2080
  return 1;
2006
2081
  }
2082
+ const spinner = createSpinner(io.stderr, io.stderr.isTTY ?? false);
2007
2083
  try {
2084
+ spinner.start("optimizing");
2008
2085
  const result = await optimizer.optimize({
2009
2086
  prompt: parsed.prompt,
2010
2087
  task: parsed.task,
@@ -2033,11 +2110,26 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2033
2110
  timeoutMs: parsed.timeoutMs,
2034
2111
  bypassOptimization: parsed.bypassOptimization
2035
2112
  });
2113
+ spinner.stop();
2036
2114
  if (parsed.json) {
2037
2115
  io.stdout.write(`${toPrettyJson(result)}
2038
2116
  `);
2039
2117
  return 0;
2040
2118
  }
2119
+ if (parsed.clipboard) {
2120
+ const copied = copyToClipboard(result.finalPrompt);
2121
+ if (copied) {
2122
+ io.stderr.write(`\u2713 Copied optimized prompt to clipboard
2123
+ `);
2124
+ return 0;
2125
+ } else {
2126
+ io.stderr.write(`\u2717 Failed to copy to clipboard. Install xclip, xsel, or wl-copy.
2127
+ `);
2128
+ io.stdout.write(`${result.finalPrompt}
2129
+ `);
2130
+ return 1;
2131
+ }
2132
+ }
2041
2133
  if (parsed.plain) {
2042
2134
  io.stdout.write(`${result.finalPrompt}
2043
2135
  `);
@@ -2047,6 +2139,8 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2047
2139
 
2048
2140
  `);
2049
2141
  io.stdout.write(`provider=${result.provider} model=${result.model} tokens=${result.estimatedTokensAfter.total} savings=${result.tokenSavings}
2142
+ `);
2143
+ io.stdout.write(` prompt_savings=${result.promptCompressionSavings} context_savings=${result.contextCompressionSavings} wrapper_overhead=${result.wrapperOverhead}
2050
2144
  `);
2051
2145
  if (result.selectedTarget) {
2052
2146
  io.stdout.write(`selected_target=${formatTarget(result.selectedTarget)}
@@ -2058,6 +2152,7 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2058
2152
  }
2059
2153
  return 0;
2060
2154
  } catch (error) {
2155
+ spinner.stop();
2061
2156
  const message = error instanceof Error ? error.message : "Unknown CLI error.";
2062
2157
  io.stderr.write(`${message}
2063
2158
  `);
@@ -2069,6 +2164,7 @@ function parseOptimizeArgs(args) {
2069
2164
  plain: false,
2070
2165
  json: false,
2071
2166
  debug: false,
2167
+ clipboard: false,
2072
2168
  clearSession: false,
2073
2169
  useContext: true,
2074
2170
  bypassOptimization: false,
@@ -2152,6 +2248,9 @@ function parseOptimizeArgs(args) {
2152
2248
  case "--json":
2153
2249
  parsed.json = true;
2154
2250
  break;
2251
+ case "--clipboard":
2252
+ parsed.clipboard = true;
2253
+ break;
2155
2254
  case "--debug":
2156
2255
  parsed.debug = true;
2157
2256
  break;
@@ -2218,6 +2317,7 @@ function getHelpText() {
2218
2317
  " --sqlite-path <path>",
2219
2318
  " --plain",
2220
2319
  " --json",
2320
+ " --clipboard Copy optimized prompt to clipboard",
2221
2321
  " --debug",
2222
2322
  " --save-context",
2223
2323
  " --no-context",
@@ -2287,6 +2387,23 @@ function readPackageVersion() {
2287
2387
  return "dev";
2288
2388
  }
2289
2389
  }
2390
+ function copyToClipboard(text) {
2391
+ const commands = [
2392
+ { cmd: "xclip", args: ["-selection", "clipboard"], platform: "linux" },
2393
+ { cmd: "xsel", args: ["-b"], platform: "linux" },
2394
+ { cmd: "wl-copy", args: [], platform: "linux" },
2395
+ { cmd: "pbcopy", args: [], platform: "darwin" }
2396
+ ];
2397
+ for (const { cmd } of commands) {
2398
+ try {
2399
+ execSync(`which ${cmd} > /dev/null 2>&1`);
2400
+ execSync(cmd, { input: text, stdio: ["pipe", "ignore", "ignore"] });
2401
+ return true;
2402
+ } catch {
2403
+ }
2404
+ }
2405
+ return false;
2406
+ }
2290
2407
  if (isMainModule()) {
2291
2408
  runCli(process.argv.slice(2)).then(
2292
2409
  (code) => {