opencode-interrupt-plugin 0.4.33 → 0.4.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ export declare function cleanText(raw: string): string;
2
+ export declare function processTranscription(raw: string): Promise<{
3
+ text: string;
4
+ cleaned: boolean;
5
+ polished: boolean;
6
+ }>;
@@ -0,0 +1,112 @@
1
+ /* ------------------------------------------------------------------ */
2
+ /* Layer 1: Regex text cleaning — always-on, no API needed */
3
+ /* ------------------------------------------------------------------ */
4
+ const FILLER_PATTERNS = [
5
+ /\bum+\b/gi,
6
+ /\buh+\b/gi,
7
+ /\blike\b/gi,
8
+ /\byou know\b/gi,
9
+ /\bi mean\b/gi,
10
+ /\bsort of\b/gi,
11
+ /\bkind of\b/gi,
12
+ /\byeah\b/gi,
13
+ /\bso basically\b/gi,
14
+ /\bright\b/gi,
15
+ /\bokay\b/gi,
16
+ /\balright\b/gi,
17
+ /\banyways?\b/gi,
18
+ /\bactually\b(?=\s+(?:the|a|an|it|i|we|you|they|he|she)\b)/gi,
19
+ ];
20
+ const STUTTER_PATTERN = /\b(\w+)(?: \1\b)+/gi;
21
+ const LEADING_FILLER = /^(?:and |so |but |or |then |well |oh )+/i;
22
+ const CONSECUTIVE_SPACES = /\s{2,}/g;
23
+ export function cleanText(raw) {
24
+ let t = raw.trim();
25
+ if (!t)
26
+ return t;
27
+ // Remove filler words
28
+ for (const pat of FILLER_PATTERNS) {
29
+ t = t.replace(pat, '');
30
+ }
31
+ // Remove stutters / repeated words
32
+ t = t.replace(STUTTER_PATTERN, '$1');
33
+ // Remove leading fillers (false starts at beginning)
34
+ t = t.replace(LEADING_FILLER, '');
35
+ // Collapse whitespace
36
+ t = t.replace(CONSECUTIVE_SPACES, ' ');
37
+ // Capitalize first letter
38
+ if (t.length > 0) {
39
+ t = t[0].toUpperCase() + t.slice(1);
40
+ }
41
+ // Ensure ending punctuation
42
+ if (t.length > 0 && !/[.!?]/.test(t[t.length - 1])) {
43
+ t += '.';
44
+ }
45
+ return t.trim();
46
+ }
47
+ /* ------------------------------------------------------------------ */
48
+ /* Layers 2+3: LLM polish — uses OPENAI_API_KEY when set */
49
+ /* ------------------------------------------------------------------ */
50
+ const POLISH_SYSTEM_PROMPT = `You are a voice transcription cleaner. Your job is to take raw voice-to-text output and produce clean, concise text.
51
+
52
+ Rules:
53
+ 1. Remove all filler words (um, uh, like, you know, etc.)
54
+ 2. If the speaker corrected themselves mid-sentence, keep ONLY the final version
55
+ 3. Remove false starts and abandoned sentences
56
+ 4. Fix capitalization and punctuation
57
+ 5. Remove repeated words
58
+ 6. If the text is a command or request, make it direct and clear
59
+ 7. Output ONLY the cleaned text — no explanations, no quotes, no prefixes`;
60
+ async function polishViaOpenAI(raw) {
61
+ const apiKey = process.env.OPENAI_API_KEY;
62
+ if (!apiKey)
63
+ return null;
64
+ const controller = new AbortController();
65
+ const timeout = setTimeout(() => controller.abort(), 10000);
66
+ try {
67
+ const resp = await fetch('https://api.openai.com/v1/chat/completions', {
68
+ method: 'POST',
69
+ headers: {
70
+ 'Content-Type': 'application/json',
71
+ Authorization: `Bearer ${apiKey}`,
72
+ },
73
+ body: JSON.stringify({
74
+ model: 'gpt-4o-mini',
75
+ messages: [
76
+ { role: 'system', content: POLISH_SYSTEM_PROMPT },
77
+ { role: 'user', content: raw },
78
+ ],
79
+ max_tokens: 500,
80
+ temperature: 0.1,
81
+ }),
82
+ signal: controller.signal,
83
+ });
84
+ clearTimeout(timeout);
85
+ if (!resp.ok)
86
+ return null;
87
+ const data = await resp.json();
88
+ const cleaned = data.choices?.[0]?.message?.content?.trim();
89
+ return cleaned || null;
90
+ }
91
+ catch {
92
+ clearTimeout(timeout);
93
+ return null;
94
+ }
95
+ }
96
+ /* ------------------------------------------------------------------ */
97
+ /* Public pipeline */
98
+ /* ------------------------------------------------------------------ */
99
+ export async function processTranscription(raw) {
100
+ if (!raw)
101
+ return { text: raw, cleaned: false, polished: false };
102
+ // Layer 1: always on
103
+ const layer1 = cleanText(raw);
104
+ let polished = false;
105
+ // Layers 2+3: LLM polish when API key is set
106
+ const llmResult = await polishViaOpenAI(layer1);
107
+ if (llmResult && llmResult !== layer1) {
108
+ polished = true;
109
+ return { text: llmResult, cleaned: true, polished: true };
110
+ }
111
+ return { text: layer1, cleaned: true, polished: false };
112
+ }
package/dist/config.d.ts CHANGED
@@ -10,6 +10,7 @@ export interface PluginConfig {
10
10
  tts?: boolean;
11
11
  ttsVoice?: string;
12
12
  ttsRate?: string;
13
+ whisperModel?: string;
13
14
  }
14
15
  export interface ResolvedConfig {
15
16
  licenseKey?: string;
@@ -25,6 +26,7 @@ export interface ResolvedConfig {
25
26
  tts: boolean;
26
27
  ttsVoice: string;
27
28
  ttsRate: string;
29
+ whisperModel: string;
28
30
  }
29
31
  export declare const FREE_DEFAULTS: ResolvedConfig;
30
32
  export declare const SENSITIVITY_PRESETS: Record<string, Partial<ResolvedConfig>>;
package/dist/config.js CHANGED
@@ -1,5 +1,21 @@
1
1
  const DEFAULT_TTS_VOICE = "en-US-AvaNeural";
2
2
  const DEFAULT_TTS_RATE = "+25%";
3
+ const MODEL_NAMES = {
4
+ base: "ggml-base.bin",
5
+ small: "ggml-small.bin",
6
+ medium: "ggml-medium.bin",
7
+ large: "ggml-large-v3.bin",
8
+ };
9
+ function resolveModelPath(model) {
10
+ const home = process.env.HOME || "/tmp";
11
+ const envModel = process.env.WHISPER_MODEL;
12
+ if (envModel)
13
+ return envModel;
14
+ const filename = MODEL_NAMES[model];
15
+ if (filename)
16
+ return `${home}/.local/bin/${filename}`;
17
+ return model;
18
+ }
3
19
  export const FREE_DEFAULTS = {
4
20
  isLicensed: false,
5
21
  micThreshold: 0.008,
@@ -13,6 +29,7 @@ export const FREE_DEFAULTS = {
13
29
  tts: true,
14
30
  ttsVoice: DEFAULT_TTS_VOICE,
15
31
  ttsRate: DEFAULT_TTS_RATE,
32
+ whisperModel: resolveModelPath("base"),
16
33
  };
17
34
  export const SENSITIVITY_PRESETS = {
18
35
  low: {
@@ -49,6 +66,9 @@ export function resolveConfig(userConfig, isLicensed) {
49
66
  tts: userConfig.tts ?? FREE_DEFAULTS.tts,
50
67
  ttsVoice: userConfig.ttsVoice ?? FREE_DEFAULTS.ttsVoice,
51
68
  ttsRate: userConfig.ttsRate ?? FREE_DEFAULTS.ttsRate,
69
+ whisperModel: userConfig.whisperModel
70
+ ? resolveModelPath(userConfig.whisperModel)
71
+ : FREE_DEFAULTS.whisperModel,
52
72
  };
53
73
  const preset = SENSITIVITY_PRESETS[base.sensitivity];
54
74
  if (!userConfig.timingWindowMs)
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { resolveConfig } from './config.js';
2
2
  import { checkLicense } from './license/guard.js';
3
3
  import { debug } from './log.js';
4
+ import { processTranscription } from './clean-text.js';
4
5
  import { getSessionState, updateSessionState, clearSessionState, } from './store.js';
5
6
  import { prepareInjection } from './injector.js';
6
7
  import { onTTSStart, onTTSEnd, isTTSTool } from './audio/tts-tracker.js';
@@ -12,7 +13,6 @@ import { readFileSync, existsSync, unlinkSync } from "node:fs";
12
13
  let activeSessionId = null;
13
14
  let pendingInterrupt = null;
14
15
  const RECORDING_FILE = "/tmp/interrupt-ptt.wav";
15
- const WHISPER_MODEL = process.env.WHISPER_MODEL || `${process.env.HOME}/.local/bin/ggml-base.bin`;
16
16
  let recordingProcess = null;
17
17
  let pttActive = false;
18
18
  function pttStartRecording() {
@@ -33,7 +33,7 @@ function pttStopRecording() {
33
33
  recordingProcess = null;
34
34
  }
35
35
  const TXT_FILE = "/tmp/interrupt-ptt.txt";
36
- async function transcribeLocal() {
36
+ async function transcribeLocal(modelPath) {
37
37
  try {
38
38
  execSync("whisper --help", { stdio: "ignore", timeout: 3000 });
39
39
  }
@@ -45,7 +45,7 @@ async function transcribeLocal() {
45
45
  }
46
46
  catch { /* ignore */ }
47
47
  try {
48
- execSync(`whisper -f "${RECORDING_FILE}" -m "${WHISPER_MODEL}" -otxt -of /tmp/interrupt-ptt`, { stdio: "ignore", timeout: 30000 });
48
+ execSync(`whisper -f "${RECORDING_FILE}" -m "${modelPath}" -otxt -of /tmp/interrupt-ptt`, { stdio: "ignore", timeout: 30000 });
49
49
  }
50
50
  catch {
51
51
  return null;
@@ -83,17 +83,25 @@ async function transcribeAPI() {
83
83
  return null;
84
84
  }
85
85
  }
86
- async function transcribeAndSend(sessionID, directory, api) {
86
+ async function transcribeAndSend(sessionID, directory, api, modelPath) {
87
87
  pttStopRecording();
88
88
  if (!existsSync(RECORDING_FILE)) {
89
89
  api.ui.toast({ variant: "warning", title: "PTT", message: "No audio captured" });
90
90
  return;
91
91
  }
92
- api.ui.toast({ variant: "info", title: "PTT", message: "⏳ Transcribing with Whisper..." });
92
+ const hasApiKey = !!process.env.OPENAI_API_KEY;
93
+ api.ui.toast({ variant: "info", title: "PTT", message: hasApiKey ? "⏳ Transcribing via OpenAI API..." : "⏳ Transcribing with Whisper..." });
93
94
  let text = null;
94
- text = await transcribeLocal();
95
- if (!text)
95
+ if (hasApiKey) {
96
96
  text = await transcribeAPI();
97
+ if (!text)
98
+ text = await transcribeLocal(modelPath);
99
+ }
100
+ else {
101
+ text = await transcribeLocal(modelPath);
102
+ if (!text)
103
+ text = await transcribeAPI();
104
+ }
97
105
  if (!text) {
98
106
  api.ui.toast({ variant: "error", title: "PTT", message: "❌ Install whisper: run scripts/install-whisper.sh, or set OPENAI_API_KEY" });
99
107
  try {
@@ -102,6 +110,15 @@ async function transcribeAndSend(sessionID, directory, api) {
102
110
  catch { /* ignore */ }
103
111
  return;
104
112
  }
113
+ const { text: clean, polished } = await processTranscription(text);
114
+ if (!clean) {
115
+ api.ui.toast({ variant: "warning", title: "PTT", message: "⚠️ No meaningful text in recording" });
116
+ try {
117
+ unlinkSync(RECORDING_FILE);
118
+ }
119
+ catch { /* ignore */ }
120
+ return;
121
+ }
105
122
  try {
106
123
  unlinkSync(RECORDING_FILE);
107
124
  }
@@ -110,22 +127,30 @@ async function transcribeAndSend(sessionID, directory, api) {
110
127
  api.ui.toast({ variant: "warning", title: "PTT", message: "⚠️ Open a session first, then type /ptt" });
111
128
  return;
112
129
  }
113
- api.ui.toast({ variant: "info", title: "PTT", message: "✉️ Sending transcript..." });
114
- await api.client.session.prompt({ sessionID, directory, parts: [{ type: "text", text }] });
115
- const preview = text.length > 80 ? text.slice(0, 77) + "..." : text;
130
+ api.ui.toast({ variant: "info", title: "PTT", message: polished ? "✨ Sending polished transcript..." : "✉️ Sending transcript..." });
131
+ await api.client.session.prompt({ sessionID, directory, parts: [{ type: "text", text: clean }] });
132
+ const preview = clean.length > 80 ? clean.slice(0, 77) + "..." : clean;
116
133
  api.ui.toast({ variant: "success", title: "PTT", message: `✅ Sent: "${preview}"` });
117
134
  }
118
- async function transcribeAndSendV1(sessionID, client) {
135
+ async function transcribeAndSendV1(sessionID, client, modelPath) {
119
136
  pttStopRecording();
120
137
  if (!existsSync(RECORDING_FILE)) {
121
138
  await client.tui.showToast({ body: { title: "PTT", message: "⚠️ No audio captured — try again", variant: "warning" } });
122
139
  return;
123
140
  }
124
- await client.tui.showToast({ body: { title: "PTT", message: "⏳ Transcribing with Whisper...", variant: "info" } });
141
+ const hasApiKey = !!process.env.OPENAI_API_KEY;
142
+ await client.tui.showToast({ body: { title: "PTT", message: hasApiKey ? "⏳ Transcribing via OpenAI API..." : "⏳ Transcribing with Whisper...", variant: "info" } });
125
143
  let text = null;
126
- text = await transcribeLocal();
127
- if (!text)
144
+ if (hasApiKey) {
128
145
  text = await transcribeAPI();
146
+ if (!text)
147
+ text = await transcribeLocal(modelPath);
148
+ }
149
+ else {
150
+ text = await transcribeLocal(modelPath);
151
+ if (!text)
152
+ text = await transcribeAPI();
153
+ }
129
154
  if (!text) {
130
155
  await client.tui.showToast({ body: { title: "PTT", message: "❌ Install whisper: run scripts/install-whisper.sh, or set OPENAI_API_KEY", variant: "error", duration: 8000 } });
131
156
  try {
@@ -134,6 +159,15 @@ async function transcribeAndSendV1(sessionID, client) {
134
159
  catch { /* ignore */ }
135
160
  return;
136
161
  }
162
+ const { text: clean, polished } = await processTranscription(text);
163
+ if (!clean) {
164
+ await client.tui.showToast({ body: { title: "PTT", message: "⚠️ No meaningful text in recording", variant: "warning", duration: 4000 } });
165
+ try {
166
+ unlinkSync(RECORDING_FILE);
167
+ }
168
+ catch { /* ignore */ }
169
+ return;
170
+ }
137
171
  try {
138
172
  unlinkSync(RECORDING_FILE);
139
173
  }
@@ -142,9 +176,9 @@ async function transcribeAndSendV1(sessionID, client) {
142
176
  await client.tui.showToast({ body: { title: "PTT", message: "⚠️ Open a session first, then type /ptt", variant: "warning", duration: 5000 } });
143
177
  return;
144
178
  }
145
- await client.tui.showToast({ body: { title: "PTT", message: "✉️ Sending transcript...", variant: "info" } });
146
- await client.session.prompt({ path: { id: sessionID }, body: { parts: [{ type: "text", text }] } });
147
- const preview = text.length > 80 ? text.slice(0, 77) + "..." : text;
179
+ await client.tui.showToast({ body: { title: "PTT", message: polished ? "✨ Sending polished transcript..." : "✉️ Sending transcript...", variant: "info" } });
180
+ await client.session.prompt({ path: { id: sessionID }, body: { parts: [{ type: "text", text: clean }] } });
181
+ const preview = clean.length > 80 ? clean.slice(0, 77) + "..." : clean;
148
182
  await client.tui.showToast({ body: { title: "PTT", message: `✅ Sent: "${preview}"`, variant: "success", duration: 5000 } });
149
183
  }
150
184
  const TTS_COMMANDS = [
@@ -330,7 +364,7 @@ export const InterruptPlugin = (userConfig = {}) => {
330
364
  const sessionID = cmdInput.sessionID;
331
365
  if (pttActive) {
332
366
  pttActive = false;
333
- await transcribeAndSendV1(sessionID, client);
367
+ await transcribeAndSendV1(sessionID, client, config.whisperModel);
334
368
  }
335
369
  else {
336
370
  pttActive = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-interrupt-plugin",
3
- "version": "0.4.33",
3
+ "version": "0.4.35",
4
4
  "description": "Streaming TTS + voice interruption for OpenCode. Speaks responses as they arrive and detects when you talk over it.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",