promptpilot 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -75,6 +75,43 @@ ollama pull qwen2.5:3b
75
75
  ollama pull phi3:mini
76
76
  ```
77
77
 
78
+ ## Custom local compressor model
79
+
80
+ PromptPilot ships a `Modelfile` that defines `promptpilot-compressor`, a text-only compression model built on top of `qwen2.5:3b`. It is tuned to output only the compressed prompt with no reasoning, analysis, or commentary.
81
+
82
+ Build and verify it:
83
+
84
+ ```bash
85
+ ollama pull qwen2.5:3b
86
+ ollama create promptpilot-compressor -f ./Modelfile
87
+ ollama run promptpilot-compressor "explain recursion simply"
88
+ ```
89
+
90
+ Use it via the CLI after installing from npm:
91
+
92
+ ```bash
93
+ # Plain output — pipe directly into Claude
94
+ promptpilot optimize "help me refactor this auth middleware" \
95
+ --model promptpilot-compressor \
96
+ --preset code \
97
+ --plain
98
+
99
+ # JSON output with debug info
100
+ promptpilot optimize "help me refactor this auth middleware" \
101
+ --model promptpilot-compressor \
102
+ --preset code \
103
+ --json --debug
104
+
105
+ # With session memory, piped into Claude
106
+ promptpilot optimize "continue the refactor" \
107
+ --model promptpilot-compressor \
108
+ --session repo-refactor \
109
+ --save-context \
110
+ --plain | claude
111
+ ```
112
+
113
+ `promptpilot-compressor` outputs plain text rather than JSON. PromptPilot detects this automatically and falls back to text-only mode, stripping any reasoning leakage before using the output. Explicit `--model` always takes priority over automatic local model selection.
114
+
78
115
  ## Core behavior
79
116
 
80
117
  PromptPilot has two distinct routing layers.
package/dist/cli.js CHANGED
@@ -3,6 +3,7 @@
3
3
  // src/cli.ts
4
4
  import { readFileSync, realpathSync } from "fs";
5
5
  import { fileURLToPath } from "url";
6
+ import { execSync } from "child_process";
6
7
 
7
8
  // src/errors.ts
8
9
  var InvalidPromptError = class extends Error {
@@ -353,6 +354,17 @@ var OllamaClient = class {
353
354
  }
354
355
  throw new OllamaUnavailableError("Ollama returned JSON that could not be parsed.");
355
356
  }
357
+ async generateJsonWithTextFallback(options, textFallbackHandler) {
358
+ try {
359
+ return await this.generateJson(options);
360
+ } catch {
361
+ const raw = await this.generate({
362
+ ...options,
363
+ format: void 0
364
+ });
365
+ return textFallbackHandler(raw);
366
+ }
367
+ }
356
368
  };
357
369
 
358
370
  // src/core/systemPrompt.ts
@@ -1046,6 +1058,11 @@ var PromptOptimizer = class {
1046
1058
  contextSummary: relevantContext.summary
1047
1059
  });
1048
1060
  }
1061
+ const originalPromptTokens = this.estimator.estimateText(originalPrompt);
1062
+ const promptCompressionSavings = Math.max(0, originalPromptTokens - estimatedTokensAfter.prompt);
1063
+ const contextCompressionSavings = Math.max(0, estimatedTokensBefore.context - estimatedTokensAfter.context);
1064
+ const wrapperOverhead = Math.max(0, estimatedTokensAfter.total - (estimatedTokensAfter.prompt + estimatedTokensAfter.context));
1065
+ const tokenSavings = promptCompressionSavings + contextCompressionSavings;
1049
1066
  return {
1050
1067
  originalPrompt,
1051
1068
  optimizedPrompt,
@@ -1054,7 +1071,10 @@ var PromptOptimizer = class {
1054
1071
  contextSummary: relevantContext.summary,
1055
1072
  estimatedTokensBefore,
1056
1073
  estimatedTokensAfter,
1057
- tokenSavings: Math.max(0, estimatedTokensBefore.total - estimatedTokensAfter.total),
1074
+ tokenSavings,
1075
+ promptCompressionSavings,
1076
+ contextCompressionSavings,
1077
+ wrapperOverhead,
1058
1078
  mode,
1059
1079
  provider,
1060
1080
  model,
@@ -1121,29 +1141,52 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
1121
1141
  let optimizedPrompt = "";
1122
1142
  let responseChanges = [];
1123
1143
  let responseWarnings = [];
1124
- try {
1125
- const response = await this.client.generateJson({
1126
- systemPrompt,
1127
- prompt: optimizationPrompt,
1128
- timeoutMs,
1129
- model: options.model,
1130
- temperature: this.config.temperature,
1131
- format: "json"
1132
- });
1133
- optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
1134
- responseChanges = response.changes ?? [];
1135
- responseWarnings = response.warnings ?? [];
1136
- } catch {
1137
- const raw = await this.client.generate({
1138
- systemPrompt,
1139
- prompt: optimizationPrompt,
1140
- timeoutMs,
1141
- model: options.model,
1142
- temperature: this.config.temperature
1143
- });
1144
- optimizedPrompt = sanitizeTextOptimizationOutput(raw);
1145
- responseChanges = [`Applied text-only Ollama optimization with ${options.model}.`];
1146
- }
1144
+ const generateWithFallback = this.client.generateJsonWithTextFallback ? async () => {
1145
+ const response2 = await this.client.generateJsonWithTextFallback(
1146
+ {
1147
+ systemPrompt,
1148
+ prompt: optimizationPrompt,
1149
+ timeoutMs,
1150
+ model: options.model,
1151
+ temperature: this.config.temperature,
1152
+ format: "json"
1153
+ },
1154
+ (text) => ({
1155
+ optimizedPrompt: sanitizeTextOptimizationOutput(text),
1156
+ changes: [`Applied text-only Ollama optimization with ${options.model}.`],
1157
+ warnings: []
1158
+ })
1159
+ );
1160
+ return response2;
1161
+ } : async () => {
1162
+ try {
1163
+ return await this.client.generateJson({
1164
+ systemPrompt,
1165
+ prompt: optimizationPrompt,
1166
+ timeoutMs,
1167
+ model: options.model,
1168
+ temperature: this.config.temperature,
1169
+ format: "json"
1170
+ });
1171
+ } catch {
1172
+ const raw = await this.client.generate({
1173
+ systemPrompt,
1174
+ prompt: optimizationPrompt,
1175
+ timeoutMs,
1176
+ model: options.model,
1177
+ temperature: this.config.temperature
1178
+ });
1179
+ return {
1180
+ optimizedPrompt: sanitizeTextOptimizationOutput(raw),
1181
+ changes: [`Applied text-only Ollama optimization with ${options.model}.`],
1182
+ warnings: []
1183
+ };
1184
+ }
1185
+ };
1186
+ const response = await generateWithFallback();
1187
+ optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
1188
+ responseChanges = response.changes ?? [];
1189
+ responseWarnings = response.warnings ?? [];
1147
1190
  if (!optimizedPrompt) {
1148
1191
  return {
1149
1192
  optimizedPrompt: preprocessedPrompt,
@@ -1685,14 +1728,16 @@ function sanitizeTextOptimizationOutput(raw) {
1685
1728
  if (!normalized) {
1686
1729
  return "";
1687
1730
  }
1688
- if (!containsReasoningLeak(normalized)) {
1689
- return stripWrappingQuotes(normalized);
1731
+ let cleaned = normalized.replace(/<think>[\s\S]*?<\/think>/gi, "").replace(/<reasoning>[\s\S]*?<\/reasoning>/gi, "").replace(/<analysis>[\s\S]*?<\/analysis>/gi, "").replace(/^(thinking|thinking process|analysis|critique|attempt|final decision|role|task|guidelines)[:=]?[\s\S]*?(?=\n\n|\n[A-Z]|$)/gim, "");
1732
+ if (!containsReasoningLeak(cleaned)) {
1733
+ return stripWrappingQuotes(cleaned);
1690
1734
  }
1691
- const candidates = raw.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => !/^[-*]\s/.test(chunk)).filter((chunk) => !/^\d+\.\s/.test(chunk));
1692
- return candidates.at(-1) ?? stripWrappingQuotes(normalized);
1735
+ const candidates = cleaned.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => chunk.length > 10);
1736
+ const selected = candidates.reduce((a, b) => a.length > b.length ? a : b, "");
1737
+ return selected || stripWrappingQuotes(normalized);
1693
1738
  }
1694
1739
  function containsReasoningLeak(text) {
1695
- return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision)/i.test(text);
1740
+ return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision|^thinking:|^analysis:|<think>|<reasoning>|<analysis>)/i.test(text);
1696
1741
  }
1697
1742
  function stripWrappingQuotes(text) {
1698
1743
  return text.replace(/^["'`]+|["'`]+$/g, "").trim();
@@ -1942,6 +1987,44 @@ function capitalize(value) {
1942
1987
  return value[0].toUpperCase() + value.slice(1);
1943
1988
  }
1944
1989
 
1990
+ // src/utils/spinner.ts
1991
+ var Spinner = class {
1992
+ message = "";
1993
+ frame = 0;
1994
+ interval = null;
1995
+ writer;
1996
+ isTTY;
1997
+ frames = ["\u280B", "\u2819", "\u2839", "\u2838", "\u283C", "\u2834", "\u2826", "\u2827", "\u2807", "\u280F"];
1998
+ constructor(writer, isTTY = false) {
1999
+ this.writer = writer;
2000
+ this.isTTY = isTTY;
2001
+ }
2002
+ start(message) {
2003
+ if (!this.isTTY) {
2004
+ return;
2005
+ }
2006
+ this.message = message;
2007
+ this.frame = 0;
2008
+ this.interval = setInterval(() => {
2009
+ const spinner = this.frames[this.frame % this.frames.length];
2010
+ this.writer.write(`\r${spinner} ${this.message}`);
2011
+ this.frame += 1;
2012
+ }, 80);
2013
+ }
2014
+ stop() {
2015
+ if (this.interval) {
2016
+ clearInterval(this.interval);
2017
+ this.interval = null;
2018
+ }
2019
+ if (this.isTTY) {
2020
+ this.writer.write("\r\x1B[K");
2021
+ }
2022
+ }
2023
+ };
2024
+ function createSpinner(writer, isTTY = false) {
2025
+ return new Spinner(writer, isTTY);
2026
+ }
2027
+
1945
2028
  // src/cli.ts
1946
2029
  async function runCli(argv, io = { stdout: process.stdout, stderr: process.stderr, stdin: process.stdin }, dependencies = { createOptimizer, readStdin, getCliInfo }) {
1947
2030
  const [command, ...rest] = argv;
@@ -2004,7 +2087,9 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2004
2087
  io.stderr.write("A prompt is required.\n");
2005
2088
  return 1;
2006
2089
  }
2090
+ const spinner = createSpinner(io.stderr, io.stderr.isTTY ?? false);
2007
2091
  try {
2092
+ spinner.start("optimizing");
2008
2093
  const result = await optimizer.optimize({
2009
2094
  prompt: parsed.prompt,
2010
2095
  task: parsed.task,
@@ -2033,11 +2118,26 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2033
2118
  timeoutMs: parsed.timeoutMs,
2034
2119
  bypassOptimization: parsed.bypassOptimization
2035
2120
  });
2121
+ spinner.stop();
2036
2122
  if (parsed.json) {
2037
2123
  io.stdout.write(`${toPrettyJson(result)}
2038
2124
  `);
2039
2125
  return 0;
2040
2126
  }
2127
+ if (parsed.clipboard) {
2128
+ const copied = copyToClipboard(result.finalPrompt);
2129
+ if (copied) {
2130
+ io.stderr.write(`\u2713 Copied optimized prompt to clipboard
2131
+ `);
2132
+ return 0;
2133
+ } else {
2134
+ io.stderr.write(`\u2717 Failed to copy to clipboard. Install xclip, xsel, or wl-copy.
2135
+ `);
2136
+ io.stdout.write(`${result.finalPrompt}
2137
+ `);
2138
+ return 1;
2139
+ }
2140
+ }
2041
2141
  if (parsed.plain) {
2042
2142
  io.stdout.write(`${result.finalPrompt}
2043
2143
  `);
@@ -2047,6 +2147,8 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2047
2147
 
2048
2148
  `);
2049
2149
  io.stdout.write(`provider=${result.provider} model=${result.model} tokens=${result.estimatedTokensAfter.total} savings=${result.tokenSavings}
2150
+ `);
2151
+ io.stdout.write(` prompt_savings=${result.promptCompressionSavings} context_savings=${result.contextCompressionSavings} wrapper_overhead=${result.wrapperOverhead}
2050
2152
  `);
2051
2153
  if (result.selectedTarget) {
2052
2154
  io.stdout.write(`selected_target=${formatTarget(result.selectedTarget)}
@@ -2058,6 +2160,7 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
2058
2160
  }
2059
2161
  return 0;
2060
2162
  } catch (error) {
2163
+ spinner.stop();
2061
2164
  const message = error instanceof Error ? error.message : "Unknown CLI error.";
2062
2165
  io.stderr.write(`${message}
2063
2166
  `);
@@ -2069,6 +2172,7 @@ function parseOptimizeArgs(args) {
2069
2172
  plain: false,
2070
2173
  json: false,
2071
2174
  debug: false,
2175
+ clipboard: false,
2072
2176
  clearSession: false,
2073
2177
  useContext: true,
2074
2178
  bypassOptimization: false,
@@ -2152,6 +2256,9 @@ function parseOptimizeArgs(args) {
2152
2256
  case "--json":
2153
2257
  parsed.json = true;
2154
2258
  break;
2259
+ case "--clipboard":
2260
+ parsed.clipboard = true;
2261
+ break;
2155
2262
  case "--debug":
2156
2263
  parsed.debug = true;
2157
2264
  break;
@@ -2218,6 +2325,7 @@ function getHelpText() {
2218
2325
  " --sqlite-path <path>",
2219
2326
  " --plain",
2220
2327
  " --json",
2328
+ " --clipboard Copy optimized prompt to clipboard",
2221
2329
  " --debug",
2222
2330
  " --save-context",
2223
2331
  " --no-context",
@@ -2287,6 +2395,23 @@ function readPackageVersion() {
2287
2395
  return "dev";
2288
2396
  }
2289
2397
  }
2398
+ function copyToClipboard(text) {
2399
+ const commands = [
2400
+ { cmd: "xclip", args: ["-selection", "clipboard"], platform: "linux" },
2401
+ { cmd: "xsel", args: ["-b"], platform: "linux" },
2402
+ { cmd: "wl-copy", args: [], platform: "linux" },
2403
+ { cmd: "pbcopy", args: [], platform: "darwin" }
2404
+ ];
2405
+ for (const { cmd } of commands) {
2406
+ try {
2407
+ execSync(`which ${cmd} > /dev/null 2>&1`);
2408
+ execSync(cmd, { input: text, stdio: ["pipe", "ignore", "ignore"] });
2409
+ return true;
2410
+ } catch {
2411
+ }
2412
+ }
2413
+ return false;
2414
+ }
2290
2415
  if (isMainModule()) {
2291
2416
  runCli(process.argv.slice(2)).then(
2292
2417
  (code) => {