npm - promptpilot - Versions diffs - 0.1.5 → 0.1.7 - Mend

promptpilot 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -75,6 +75,43 @@ ollama pull qwen2.5:3b
 ollama pull phi3:mini
 ```
+## Custom local compressor model
+PromptPilot ships a `Modelfile` that defines `promptpilot-compressor`, a text-only compression model built on top of `qwen2.5:3b`. It is tuned to output only the compressed prompt with no reasoning, analysis, or commentary.
+Build and verify it:
+```bash
+ollama pull qwen2.5:3b
+ollama create promptpilot-compressor -f ./Modelfile
+ollama run promptpilot-compressor "explain recursion simply"
+```
+Use it via the CLI after installing from npm:
+```bash
+# Plain output — pipe directly into Claude
+promptpilot optimize "help me refactor this auth middleware" \
+  --model promptpilot-compressor \
+  --preset code \
+  --plain
+# JSON output with debug info
+promptpilot optimize "help me refactor this auth middleware" \
+  --model promptpilot-compressor \
+  --preset code \
+  --json --debug
+# With session memory, piped into Claude
+promptpilot optimize "continue the refactor" \
+  --model promptpilot-compressor \
+  --session repo-refactor \
+  --save-context \
+  --plain | claude
+```
+`promptpilot-compressor` outputs plain text rather than JSON. PromptPilot detects this automatically and falls back to text-only mode, stripping any reasoning leakage before using the output. Explicit `--model` always takes priority over automatic local model selection.
 ## Core behavior
 PromptPilot has two distinct routing layers.

package/dist/cli.js CHANGED Viewed

@@ -3,6 +3,7 @@
 // src/cli.ts
 import { readFileSync, realpathSync } from "fs";
 import { fileURLToPath } from "url";
+import { execSync } from "child_process";
 // src/errors.ts
 var InvalidPromptError = class extends Error {
@@ -353,6 +354,17 @@ var OllamaClient = class {
     }
     throw new OllamaUnavailableError("Ollama returned JSON that could not be parsed.");
   }
+  async generateJsonWithTextFallback(options, textFallbackHandler) {
+    try {
+      return await this.generateJson(options);
+    } catch {
+      const raw = await this.generate({
+        ...options,
+        format: void 0
+      });
+      return textFallbackHandler(raw);
+    }
+  }
 };
 // src/core/systemPrompt.ts
@@ -1046,6 +1058,11 @@ var PromptOptimizer = class {
         contextSummary: relevantContext.summary
       });
     }
+    const originalPromptTokens = this.estimator.estimateText(originalPrompt);
+    const promptCompressionSavings = Math.max(0, originalPromptTokens - estimatedTokensAfter.prompt);
+    const contextCompressionSavings = Math.max(0, estimatedTokensBefore.context - estimatedTokensAfter.context);
+    const wrapperOverhead = Math.max(0, estimatedTokensAfter.total - (estimatedTokensAfter.prompt + estimatedTokensAfter.context));
+    const tokenSavings = promptCompressionSavings + contextCompressionSavings;
     return {
       originalPrompt,
       optimizedPrompt,
@@ -1054,7 +1071,10 @@ var PromptOptimizer = class {
       contextSummary: relevantContext.summary,
       estimatedTokensBefore,
       estimatedTokensAfter,
-      tokenSavings: Math.max(0, estimatedTokensBefore.total - estimatedTokensAfter.total),
+      tokenSavings,
+      promptCompressionSavings,
+      contextCompressionSavings,
+      wrapperOverhead,
       mode,
       provider,
       model,
@@ -1121,29 +1141,52 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
       let optimizedPrompt = "";
       let responseChanges = [];
       let responseWarnings = [];
-      try {
-        const response = await this.client.generateJson({
-          systemPrompt,
-          prompt: optimizationPrompt,
-          timeoutMs,
-          model: options.model,
-          temperature: this.config.temperature,
-          format: "json"
-        });
-        optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
-        responseChanges = response.changes ?? [];
-        responseWarnings = response.warnings ?? [];
-      } catch {
-        const raw = await this.client.generate({
-          systemPrompt,
-          prompt: optimizationPrompt,
-          timeoutMs,
-          model: options.model,
-          temperature: this.config.temperature
-        });
-        optimizedPrompt = sanitizeTextOptimizationOutput(raw);
-        responseChanges = [`Applied text-only Ollama optimization with ${options.model}.`];
-      }
+      const generateWithFallback = this.client.generateJsonWithTextFallback ? async () => {
+        const response2 = await this.client.generateJsonWithTextFallback(
+          {
+            systemPrompt,
+            prompt: optimizationPrompt,
+            timeoutMs,
+            model: options.model,
+            temperature: this.config.temperature,
+            format: "json"
+          },
+          (text) => ({
+            optimizedPrompt: sanitizeTextOptimizationOutput(text),
+            changes: [`Applied text-only Ollama optimization with ${options.model}.`],
+            warnings: []
+          })
+        );
+        return response2;
+      } : async () => {
+        try {
+          return await this.client.generateJson({
+            systemPrompt,
+            prompt: optimizationPrompt,
+            timeoutMs,
+            model: options.model,
+            temperature: this.config.temperature,
+            format: "json"
+          });
+        } catch {
+          const raw = await this.client.generate({
+            systemPrompt,
+            prompt: optimizationPrompt,
+            timeoutMs,
+            model: options.model,
+            temperature: this.config.temperature
+          });
+          return {
+            optimizedPrompt: sanitizeTextOptimizationOutput(raw),
+            changes: [`Applied text-only Ollama optimization with ${options.model}.`],
+            warnings: []
+          };
+        }
+      };
+      const response = await generateWithFallback();
+      optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
+      responseChanges = response.changes ?? [];
+      responseWarnings = response.warnings ?? [];
       if (!optimizedPrompt) {
         return {
           optimizedPrompt: preprocessedPrompt,
@@ -1567,14 +1610,6 @@ ${contextBlock}`);
   if (constraints.length > 0) {
     sections.push(`Constraints:
 - ${constraints.join("\n- ")}`);
-  }
-  if (isCodeFirstRequest(input.input)) {
-    sections.push(`Execution loop:
-- Inspect the relevant files and current behavior.
-- Plan the smallest safe next step.
-- Act with minimal, reversible changes.
-- Test or validate the result.
-- Reflect on gaps or risks, then repeat.`);
   }
   const desiredOutput = [
     input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
@@ -1685,14 +1720,16 @@ function sanitizeTextOptimizationOutput(raw) {
   if (!normalized) {
     return "";
   }
-  if (!containsReasoningLeak(normalized)) {
-    return stripWrappingQuotes(normalized);
+  let cleaned = normalized.replace(/<think>[\s\S]*?<\/think>/gi, "").replace(/<reasoning>[\s\S]*?<\/reasoning>/gi, "").replace(/<analysis>[\s\S]*?<\/analysis>/gi, "").replace(/^(thinking|thinking process|analysis|critique|attempt|final decision|role|task|guidelines)[:=]?[\s\S]*?(?=\n\n|\n[A-Z]|$)/gim, "");
+  if (!containsReasoningLeak(cleaned)) {
+    return stripWrappingQuotes(cleaned);
   }
-  const candidates = raw.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => !/^[-*]\s/.test(chunk)).filter((chunk) => !/^\d+\.\s/.test(chunk));
-  return candidates.at(-1) ?? stripWrappingQuotes(normalized);
+  const candidates = cleaned.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => chunk.length > 10);
+  const selected = candidates.reduce((a, b) => a.length > b.length ? a : b, "");
+  return selected || stripWrappingQuotes(normalized);
 }
 function containsReasoningLeak(text) {
-  return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision)/i.test(text);
+  return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision|^thinking:|^analysis:|<think>|<reasoning>|<analysis>)/i.test(text);
 }
 function stripWrappingQuotes(text) {
   return text.replace(/^["'`]+|["'`]+$/g, "").trim();
@@ -1942,6 +1979,44 @@ function capitalize(value) {
   return value[0].toUpperCase() + value.slice(1);
 }
+// src/utils/spinner.ts
+var Spinner = class {
+  message = "";
+  frame = 0;
+  interval = null;
+  writer;
+  isTTY;
+  frames = ["\u280B", "\u2819", "\u2839", "\u2838", "\u283C", "\u2834", "\u2826", "\u2827", "\u2807", "\u280F"];
+  constructor(writer, isTTY = false) {
+    this.writer = writer;
+    this.isTTY = isTTY;
+  }
+  start(message) {
+    if (!this.isTTY) {
+      return;
+    }
+    this.message = message;
+    this.frame = 0;
+    this.interval = setInterval(() => {
+      const spinner = this.frames[this.frame % this.frames.length];
+      this.writer.write(`\r${spinner} ${this.message}`);
+      this.frame += 1;
+    }, 80);
+  }
+  stop() {
+    if (this.interval) {
+      clearInterval(this.interval);
+      this.interval = null;
+    }
+    if (this.isTTY) {
+      this.writer.write("\r\x1B[K");
+    }
+  }
+};
+function createSpinner(writer, isTTY = false) {
+  return new Spinner(writer, isTTY);
+}
 // src/cli.ts
 async function runCli(argv, io = { stdout: process.stdout, stderr: process.stderr, stdin: process.stdin }, dependencies = { createOptimizer, readStdin, getCliInfo }) {
   const [command, ...rest] = argv;
@@ -2004,7 +2079,9 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
     io.stderr.write("A prompt is required.\n");
     return 1;
   }
+  const spinner = createSpinner(io.stderr, io.stderr.isTTY ?? false);
   try {
+    spinner.start("optimizing");
     const result = await optimizer.optimize({
       prompt: parsed.prompt,
       task: parsed.task,
@@ -2033,11 +2110,26 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
       timeoutMs: parsed.timeoutMs,
       bypassOptimization: parsed.bypassOptimization
     });
+    spinner.stop();
     if (parsed.json) {
       io.stdout.write(`${toPrettyJson(result)}
 `);
       return 0;
     }
+    if (parsed.clipboard) {
+      const copied = copyToClipboard(result.finalPrompt);
+      if (copied) {
+        io.stderr.write(`\u2713 Copied optimized prompt to clipboard
+`);
+        return 0;
+      } else {
+        io.stderr.write(`\u2717 Failed to copy to clipboard. Install xclip, xsel, or wl-copy.
+`);
+        io.stdout.write(`${result.finalPrompt}
+`);
+        return 1;
+      }
+    }
     if (parsed.plain) {
       io.stdout.write(`${result.finalPrompt}
 `);
@@ -2047,6 +2139,8 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
 `);
     io.stdout.write(`provider=${result.provider} model=${result.model} tokens=${result.estimatedTokensAfter.total} savings=${result.tokenSavings}
+`);
+    io.stdout.write(`  prompt_savings=${result.promptCompressionSavings} context_savings=${result.contextCompressionSavings} wrapper_overhead=${result.wrapperOverhead}
 `);
     if (result.selectedTarget) {
       io.stdout.write(`selected_target=${formatTarget(result.selectedTarget)}
@@ -2058,6 +2152,7 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
     }
     return 0;
   } catch (error) {
+    spinner.stop();
     const message = error instanceof Error ? error.message : "Unknown CLI error.";
     io.stderr.write(`${message}
 `);
@@ -2069,6 +2164,7 @@ function parseOptimizeArgs(args) {
     plain: false,
     json: false,
     debug: false,
+    clipboard: false,
     clearSession: false,
     useContext: true,
     bypassOptimization: false,
@@ -2152,6 +2248,9 @@ function parseOptimizeArgs(args) {
       case "--json":
         parsed.json = true;
         break;
+      case "--clipboard":
+        parsed.clipboard = true;
+        break;
       case "--debug":
         parsed.debug = true;
         break;
@@ -2218,6 +2317,7 @@ function getHelpText() {
     "  --sqlite-path <path>",
     "  --plain",
     "  --json",
+    "  --clipboard       Copy optimized prompt to clipboard",
     "  --debug",
     "  --save-context",
     "  --no-context",
@@ -2287,6 +2387,23 @@ function readPackageVersion() {
     return "dev";
   }
 }
+function copyToClipboard(text) {
+  const commands = [
+    { cmd: "xclip", args: ["-selection", "clipboard"], platform: "linux" },
+    { cmd: "xsel", args: ["-b"], platform: "linux" },
+    { cmd: "wl-copy", args: [], platform: "linux" },
+    { cmd: "pbcopy", args: [], platform: "darwin" }
+  ];
+  for (const { cmd } of commands) {
+    try {
+      execSync(`which ${cmd} > /dev/null 2>&1`);
+      execSync(cmd, { input: text, stdio: ["pipe", "ignore", "ignore"] });
+      return true;
+    } catch {
+    }
+  }
+  return false;
+}
 if (isMainModule()) {
   runCli(process.argv.slice(2)).then(
     (code) => {