npm - opencode-interrupt-plugin - Versions diffs - 0.4.33 → 0.4.35 - Mend

opencode-interrupt-plugin 0.4.33 → 0.4.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/clean-text.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export declare function cleanText(raw: string): string;
+export declare function processTranscription(raw: string): Promise<{
+    text: string;
+    cleaned: boolean;
+    polished: boolean;
+}>;

package/dist/clean-text.js ADDED Viewed

@@ -0,0 +1,112 @@
+/* ------------------------------------------------------------------ */
+/*  Layer 1: Regex text cleaning — always-on, no API needed           */
+/* ------------------------------------------------------------------ */
+const FILLER_PATTERNS = [
+    /\bum+\b/gi,
+    /\buh+\b/gi,
+    /\blike\b/gi,
+    /\byou know\b/gi,
+    /\bi mean\b/gi,
+    /\bsort of\b/gi,
+    /\bkind of\b/gi,
+    /\byeah\b/gi,
+    /\bso basically\b/gi,
+    /\bright\b/gi,
+    /\bokay\b/gi,
+    /\balright\b/gi,
+    /\banyways?\b/gi,
+    /\bactually\b(?=\s+(?:the|a|an|it|i|we|you|they|he|she)\b)/gi,
+];
+const STUTTER_PATTERN = /\b(\w+)(?: \1\b)+/gi;
+const LEADING_FILLER = /^(?:and |so |but |or |then |well |oh )+/i;
+const CONSECUTIVE_SPACES = /\s{2,}/g;
+export function cleanText(raw) {
+    let t = raw.trim();
+    if (!t)
+        return t;
+    // Remove filler words
+    for (const pat of FILLER_PATTERNS) {
+        t = t.replace(pat, '');
+    }
+    // Remove stutters / repeated words
+    t = t.replace(STUTTER_PATTERN, '$1');
+    // Remove leading fillers (false starts at beginning)
+    t = t.replace(LEADING_FILLER, '');
+    // Collapse whitespace
+    t = t.replace(CONSECUTIVE_SPACES, ' ');
+    // Capitalize first letter
+    if (t.length > 0) {
+        t = t[0].toUpperCase() + t.slice(1);
+    }
+    // Ensure ending punctuation
+    if (t.length > 0 && !/[.!?]/.test(t[t.length - 1])) {
+        t += '.';
+    }
+    return t.trim();
+}
+/* ------------------------------------------------------------------ */
+/*  Layers 2+3: LLM polish — uses OPENAI_API_KEY when set              */
+/* ------------------------------------------------------------------ */
+const POLISH_SYSTEM_PROMPT = `You are a voice transcription cleaner. Your job is to take raw voice-to-text output and produce clean, concise text.
+Rules:
+1. Remove all filler words (um, uh, like, you know, etc.)
+2. If the speaker corrected themselves mid-sentence, keep ONLY the final version
+3. Remove false starts and abandoned sentences
+4. Fix capitalization and punctuation
+5. Remove repeated words
+6. If the text is a command or request, make it direct and clear
+7. Output ONLY the cleaned text — no explanations, no quotes, no prefixes`;
+async function polishViaOpenAI(raw) {
+    const apiKey = process.env.OPENAI_API_KEY;
+    if (!apiKey)
+        return null;
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 10000);
+    try {
+        const resp = await fetch('https://api.openai.com/v1/chat/completions', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                Authorization: `Bearer ${apiKey}`,
+            },
+            body: JSON.stringify({
+                model: 'gpt-4o-mini',
+                messages: [
+                    { role: 'system', content: POLISH_SYSTEM_PROMPT },
+                    { role: 'user', content: raw },
+                ],
+                max_tokens: 500,
+                temperature: 0.1,
+            }),
+            signal: controller.signal,
+        });
+        clearTimeout(timeout);
+        if (!resp.ok)
+            return null;
+        const data = await resp.json();
+        const cleaned = data.choices?.[0]?.message?.content?.trim();
+        return cleaned || null;
+    }
+    catch {
+        clearTimeout(timeout);
+        return null;
+    }
+}
+/* ------------------------------------------------------------------ */
+/*  Public pipeline                                                    */
+/* ------------------------------------------------------------------ */
+export async function processTranscription(raw) {
+    if (!raw)
+        return { text: raw, cleaned: false, polished: false };
+    // Layer 1: always on
+    const layer1 = cleanText(raw);
+    let polished = false;
+    // Layers 2+3: LLM polish when API key is set
+    const llmResult = await polishViaOpenAI(layer1);
+    if (llmResult && llmResult !== layer1) {
+        polished = true;
+        return { text: llmResult, cleaned: true, polished: true };
+    }
+    return { text: layer1, cleaned: true, polished: false };
+}

package/dist/config.d.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export interface PluginConfig {
     tts?: boolean;
     ttsVoice?: string;
     ttsRate?: string;
+    whisperModel?: string;
 }
 export interface ResolvedConfig {
     licenseKey?: string;
@@ -25,6 +26,7 @@ export interface ResolvedConfig {
     tts: boolean;
     ttsVoice: string;
     ttsRate: string;
+    whisperModel: string;
 }
 export declare const FREE_DEFAULTS: ResolvedConfig;
 export declare const SENSITIVITY_PRESETS: Record<string, Partial<ResolvedConfig>>;

package/dist/config.js CHANGED Viewed

@@ -1,5 +1,21 @@
 const DEFAULT_TTS_VOICE = "en-US-AvaNeural";
 const DEFAULT_TTS_RATE = "+25%";
+const MODEL_NAMES = {
+    base: "ggml-base.bin",
+    small: "ggml-small.bin",
+    medium: "ggml-medium.bin",
+    large: "ggml-large-v3.bin",
+};
+function resolveModelPath(model) {
+    const home = process.env.HOME || "/tmp";
+    const envModel = process.env.WHISPER_MODEL;
+    if (envModel)
+        return envModel;
+    const filename = MODEL_NAMES[model];
+    if (filename)
+        return `${home}/.local/bin/${filename}`;
+    return model;
+}
 export const FREE_DEFAULTS = {
     isLicensed: false,
     micThreshold: 0.008,
@@ -13,6 +29,7 @@ export const FREE_DEFAULTS = {
     tts: true,
     ttsVoice: DEFAULT_TTS_VOICE,
     ttsRate: DEFAULT_TTS_RATE,
+    whisperModel: resolveModelPath("base"),
 };
 export const SENSITIVITY_PRESETS = {
     low: {
@@ -49,6 +66,9 @@ export function resolveConfig(userConfig, isLicensed) {
         tts: userConfig.tts ?? FREE_DEFAULTS.tts,
         ttsVoice: userConfig.ttsVoice ?? FREE_DEFAULTS.ttsVoice,
         ttsRate: userConfig.ttsRate ?? FREE_DEFAULTS.ttsRate,
+        whisperModel: userConfig.whisperModel
+            ? resolveModelPath(userConfig.whisperModel)
+            : FREE_DEFAULTS.whisperModel,
     };
     const preset = SENSITIVITY_PRESETS[base.sensitivity];
     if (!userConfig.timingWindowMs)

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { resolveConfig } from './config.js';
 import { checkLicense } from './license/guard.js';
 import { debug } from './log.js';
+import { processTranscription } from './clean-text.js';
 import { getSessionState, updateSessionState, clearSessionState, } from './store.js';
 import { prepareInjection } from './injector.js';
 import { onTTSStart, onTTSEnd, isTTSTool } from './audio/tts-tracker.js';
@@ -12,7 +13,6 @@ import { readFileSync, existsSync, unlinkSync } from "node:fs";
 let activeSessionId = null;
 let pendingInterrupt = null;
 const RECORDING_FILE = "/tmp/interrupt-ptt.wav";
-const WHISPER_MODEL = process.env.WHISPER_MODEL || `${process.env.HOME}/.local/bin/ggml-base.bin`;
 let recordingProcess = null;
 let pttActive = false;
 function pttStartRecording() {
@@ -33,7 +33,7 @@ function pttStopRecording() {
     recordingProcess = null;
 }
 const TXT_FILE = "/tmp/interrupt-ptt.txt";
-async function transcribeLocal() {
+async function transcribeLocal(modelPath) {
     try {
         execSync("whisper --help", { stdio: "ignore", timeout: 3000 });
     }
@@ -45,7 +45,7 @@ async function transcribeLocal() {
     }
     catch { /* ignore */ }
     try {
-        execSync(`whisper -f "${RECORDING_FILE}" -m "${WHISPER_MODEL}" -otxt -of /tmp/interrupt-ptt`, { stdio: "ignore", timeout: 30000 });
+        execSync(`whisper -f "${RECORDING_FILE}" -m "${modelPath}" -otxt -of /tmp/interrupt-ptt`, { stdio: "ignore", timeout: 30000 });
     }
     catch {
         return null;
@@ -83,17 +83,25 @@ async function transcribeAPI() {
         return null;
     }
 }
-async function transcribeAndSend(sessionID, directory, api) {
+async function transcribeAndSend(sessionID, directory, api, modelPath) {
     pttStopRecording();
     if (!existsSync(RECORDING_FILE)) {
         api.ui.toast({ variant: "warning", title: "PTT", message: "No audio captured" });
         return;
     }
-    api.ui.toast({ variant: "info", title: "PTT", message: "⏳ Transcribing with Whisper..." });
+    const hasApiKey = !!process.env.OPENAI_API_KEY;
+    api.ui.toast({ variant: "info", title: "PTT", message: hasApiKey ? "⏳ Transcribing via OpenAI API..." : "⏳ Transcribing with Whisper..." });
     let text = null;
-    text = await transcribeLocal();
-    if (!text)
+    if (hasApiKey) {
         text = await transcribeAPI();
+        if (!text)
+            text = await transcribeLocal(modelPath);
+    }
+    else {
+        text = await transcribeLocal(modelPath);
+        if (!text)
+            text = await transcribeAPI();
+    }
     if (!text) {
         api.ui.toast({ variant: "error", title: "PTT", message: "❌ Install whisper: run scripts/install-whisper.sh, or set OPENAI_API_KEY" });
         try {
@@ -102,6 +110,15 @@ async function transcribeAndSend(sessionID, directory, api) {
         catch { /* ignore */ }
         return;
     }
+    const { text: clean, polished } = await processTranscription(text);
+    if (!clean) {
+        api.ui.toast({ variant: "warning", title: "PTT", message: "⚠️ No meaningful text in recording" });
+        try {
+            unlinkSync(RECORDING_FILE);
+        }
+        catch { /* ignore */ }
+        return;
+    }
     try {
         unlinkSync(RECORDING_FILE);
     }
@@ -110,22 +127,30 @@ async function transcribeAndSend(sessionID, directory, api) {
         api.ui.toast({ variant: "warning", title: "PTT", message: "⚠️ Open a session first, then type /ptt" });
         return;
     }
-    api.ui.toast({ variant: "info", title: "PTT", message: "✉️ Sending transcript..." });
-    await api.client.session.prompt({ sessionID, directory, parts: [{ type: "text", text }] });
-    const preview = text.length > 80 ? text.slice(0, 77) + "..." : text;
+    api.ui.toast({ variant: "info", title: "PTT", message: polished ? "✨ Sending polished transcript..." : "✉️ Sending transcript..." });
+    await api.client.session.prompt({ sessionID, directory, parts: [{ type: "text", text: clean }] });
+    const preview = clean.length > 80 ? clean.slice(0, 77) + "..." : clean;
     api.ui.toast({ variant: "success", title: "PTT", message: `✅ Sent: "${preview}"` });
 }
-async function transcribeAndSendV1(sessionID, client) {
+async function transcribeAndSendV1(sessionID, client, modelPath) {
     pttStopRecording();
     if (!existsSync(RECORDING_FILE)) {
         await client.tui.showToast({ body: { title: "PTT", message: "⚠️ No audio captured — try again", variant: "warning" } });
         return;
     }
-    await client.tui.showToast({ body: { title: "PTT", message: "⏳ Transcribing with Whisper...", variant: "info" } });
+    const hasApiKey = !!process.env.OPENAI_API_KEY;
+    await client.tui.showToast({ body: { title: "PTT", message: hasApiKey ? "⏳ Transcribing via OpenAI API..." : "⏳ Transcribing with Whisper...", variant: "info" } });
     let text = null;
-    text = await transcribeLocal();
-    if (!text)
+    if (hasApiKey) {
         text = await transcribeAPI();
+        if (!text)
+            text = await transcribeLocal(modelPath);
+    }
+    else {
+        text = await transcribeLocal(modelPath);
+        if (!text)
+            text = await transcribeAPI();
+    }
     if (!text) {
         await client.tui.showToast({ body: { title: "PTT", message: "❌ Install whisper: run scripts/install-whisper.sh, or set OPENAI_API_KEY", variant: "error", duration: 8000 } });
         try {
@@ -134,6 +159,15 @@ async function transcribeAndSendV1(sessionID, client) {
         catch { /* ignore */ }
         return;
     }
+    const { text: clean, polished } = await processTranscription(text);
+    if (!clean) {
+        await client.tui.showToast({ body: { title: "PTT", message: "⚠️ No meaningful text in recording", variant: "warning", duration: 4000 } });
+        try {
+            unlinkSync(RECORDING_FILE);
+        }
+        catch { /* ignore */ }
+        return;
+    }
     try {
         unlinkSync(RECORDING_FILE);
     }
@@ -142,9 +176,9 @@ async function transcribeAndSendV1(sessionID, client) {
         await client.tui.showToast({ body: { title: "PTT", message: "⚠️ Open a session first, then type /ptt", variant: "warning", duration: 5000 } });
         return;
     }
-    await client.tui.showToast({ body: { title: "PTT", message: "✉️ Sending transcript...", variant: "info" } });
-    await client.session.prompt({ path: { id: sessionID }, body: { parts: [{ type: "text", text }] } });
-    const preview = text.length > 80 ? text.slice(0, 77) + "..." : text;
+    await client.tui.showToast({ body: { title: "PTT", message: polished ? "✨ Sending polished transcript..." : "✉️ Sending transcript...", variant: "info" } });
+    await client.session.prompt({ path: { id: sessionID }, body: { parts: [{ type: "text", text: clean }] } });
+    const preview = clean.length > 80 ? clean.slice(0, 77) + "..." : clean;
     await client.tui.showToast({ body: { title: "PTT", message: `✅ Sent: "${preview}"`, variant: "success", duration: 5000 } });
 }
 const TTS_COMMANDS = [
@@ -330,7 +364,7 @@ export const InterruptPlugin = (userConfig = {}) => {
                     const sessionID = cmdInput.sessionID;
                     if (pttActive) {
                         pttActive = false;
-                        await transcribeAndSendV1(sessionID, client);
+                        await transcribeAndSendV1(sessionID, client, config.whisperModel);
                     }
                     else {
                         pttActive = true;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-interrupt-plugin",
-  "version": "0.4.33",
+  "version": "0.4.35",
   "description": "Streaming TTS + voice interruption for OpenCode. Speaks responses as they arrive and detects when you talk over it.",
   "type": "module",
   "main": "./dist/index.js",