opencode-interrupt-plugin 0.4.34 → 0.4.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ export declare function cleanText(raw: string): string;
2
+ export declare function processTranscription(raw: string): Promise<{
3
+ text: string;
4
+ cleaned: boolean;
5
+ polished: boolean;
6
+ }>;
@@ -0,0 +1,112 @@
1
+ /* ------------------------------------------------------------------ */
2
+ /* Layer 1: Regex text cleaning — always-on, no API needed */
3
+ /* ------------------------------------------------------------------ */
4
+ const FILLER_PATTERNS = [
5
+ /\bum+\b/gi,
6
+ /\buh+\b/gi,
7
+ /\blike\b/gi,
8
+ /\byou know\b/gi,
9
+ /\bi mean\b/gi,
10
+ /\bsort of\b/gi,
11
+ /\bkind of\b/gi,
12
+ /\byeah\b/gi,
13
+ /\bso basically\b/gi,
14
+ /\bright\b/gi,
15
+ /\bokay\b/gi,
16
+ /\balright\b/gi,
17
+ /\banyways?\b/gi,
18
+ /\bactually\b(?=\s+(?:the|a|an|it|i|we|you|they|he|she)\b)/gi,
19
+ ];
20
+ const STUTTER_PATTERN = /\b(\w+)(?: \1\b)+/gi;
21
+ const LEADING_FILLER = /^(?:and |so |but |or |then |well |oh )+/i;
22
+ const CONSECUTIVE_SPACES = /\s{2,}/g;
23
+ export function cleanText(raw) {
24
+ let t = raw.trim();
25
+ if (!t)
26
+ return t;
27
+ // Remove filler words
28
+ for (const pat of FILLER_PATTERNS) {
29
+ t = t.replace(pat, '');
30
+ }
31
+ // Remove stutters / repeated words
32
+ t = t.replace(STUTTER_PATTERN, '$1');
33
+ // Remove leading fillers (false starts at beginning)
34
+ t = t.replace(LEADING_FILLER, '');
35
+ // Collapse whitespace
36
+ t = t.replace(CONSECUTIVE_SPACES, ' ');
37
+ // Capitalize first letter
38
+ if (t.length > 0) {
39
+ t = t[0].toUpperCase() + t.slice(1);
40
+ }
41
+ // Ensure ending punctuation
42
+ if (t.length > 0 && !/[.!?]/.test(t[t.length - 1])) {
43
+ t += '.';
44
+ }
45
+ return t.trim();
46
+ }
47
+ /* ------------------------------------------------------------------ */
48
+ /* Layers 2+3: LLM polish — uses OPENAI_API_KEY when set */
49
+ /* ------------------------------------------------------------------ */
50
+ const POLISH_SYSTEM_PROMPT = `You are a voice transcription cleaner. Your job is to take raw voice-to-text output and produce clean, concise text.
51
+
52
+ Rules:
53
+ 1. Remove all filler words (um, uh, like, you know, etc.)
54
+ 2. If the speaker corrected themselves mid-sentence, keep ONLY the final version
55
+ 3. Remove false starts and abandoned sentences
56
+ 4. Fix capitalization and punctuation
57
+ 5. Remove repeated words
58
+ 6. If the text is a command or request, make it direct and clear
59
+ 7. Output ONLY the cleaned text — no explanations, no quotes, no prefixes`;
60
+ async function polishViaOpenAI(raw) {
61
+ const apiKey = process.env.OPENAI_API_KEY;
62
+ if (!apiKey)
63
+ return null;
64
+ const controller = new AbortController();
65
+ const timeout = setTimeout(() => controller.abort(), 10000);
66
+ try {
67
+ const resp = await fetch('https://api.openai.com/v1/chat/completions', {
68
+ method: 'POST',
69
+ headers: {
70
+ 'Content-Type': 'application/json',
71
+ Authorization: `Bearer ${apiKey}`,
72
+ },
73
+ body: JSON.stringify({
74
+ model: 'gpt-4o-mini',
75
+ messages: [
76
+ { role: 'system', content: POLISH_SYSTEM_PROMPT },
77
+ { role: 'user', content: raw },
78
+ ],
79
+ max_tokens: 500,
80
+ temperature: 0.1,
81
+ }),
82
+ signal: controller.signal,
83
+ });
84
+ clearTimeout(timeout);
85
+ if (!resp.ok)
86
+ return null;
87
+ const data = await resp.json();
88
+ const cleaned = data.choices?.[0]?.message?.content?.trim();
89
+ return cleaned || null;
90
+ }
91
+ catch {
92
+ clearTimeout(timeout);
93
+ return null;
94
+ }
95
+ }
96
+ /* ------------------------------------------------------------------ */
97
+ /* Public pipeline */
98
+ /* ------------------------------------------------------------------ */
99
+ export async function processTranscription(raw) {
100
+ if (!raw)
101
+ return { text: raw, cleaned: false, polished: false };
102
+ // Layer 1: always on
103
+ const layer1 = cleanText(raw);
104
+ let polished = false;
105
+ // Layers 2+3: LLM polish when API key is set
106
+ const llmResult = await polishViaOpenAI(layer1);
107
+ if (llmResult && llmResult !== layer1) {
108
+ polished = true;
109
+ return { text: llmResult, cleaned: true, polished: true };
110
+ }
111
+ return { text: layer1, cleaned: true, polished: false };
112
+ }
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { resolveConfig } from './config.js';
2
2
  import { checkLicense } from './license/guard.js';
3
3
  import { debug } from './log.js';
4
+ import { processTranscription } from './clean-text.js';
4
5
  import { getSessionState, updateSessionState, clearSessionState, } from './store.js';
5
6
  import { prepareInjection } from './injector.js';
6
7
  import { onTTSStart, onTTSEnd, isTTSTool } from './audio/tts-tracker.js';
@@ -109,6 +110,15 @@ async function transcribeAndSend(sessionID, directory, api, modelPath) {
109
110
  catch { /* ignore */ }
110
111
  return;
111
112
  }
113
+ const { text: clean, polished } = await processTranscription(text);
114
+ if (!clean) {
115
+ api.ui.toast({ variant: "warning", title: "PTT", message: "⚠️ No meaningful text in recording" });
116
+ try {
117
+ unlinkSync(RECORDING_FILE);
118
+ }
119
+ catch { /* ignore */ }
120
+ return;
121
+ }
112
122
  try {
113
123
  unlinkSync(RECORDING_FILE);
114
124
  }
@@ -117,9 +127,9 @@ async function transcribeAndSend(sessionID, directory, api, modelPath) {
117
127
  api.ui.toast({ variant: "warning", title: "PTT", message: "⚠️ Open a session first, then type /ptt" });
118
128
  return;
119
129
  }
120
- api.ui.toast({ variant: "info", title: "PTT", message: "✉️ Sending transcript..." });
121
- await api.client.session.prompt({ sessionID, directory, parts: [{ type: "text", text }] });
122
- const preview = text.length > 80 ? text.slice(0, 77) + "..." : text;
130
+ api.ui.toast({ variant: "info", title: "PTT", message: polished ? "✨ Sending polished transcript..." : "✉️ Sending transcript..." });
131
+ await api.client.session.prompt({ sessionID, directory, parts: [{ type: "text", text: clean }] });
132
+ const preview = clean.length > 80 ? clean.slice(0, 77) + "..." : clean;
123
133
  api.ui.toast({ variant: "success", title: "PTT", message: `✅ Sent: "${preview}"` });
124
134
  }
125
135
  async function transcribeAndSendV1(sessionID, client, modelPath) {
@@ -149,6 +159,15 @@ async function transcribeAndSendV1(sessionID, client, modelPath) {
149
159
  catch { /* ignore */ }
150
160
  return;
151
161
  }
162
+ const { text: clean, polished } = await processTranscription(text);
163
+ if (!clean) {
164
+ await client.tui.showToast({ body: { title: "PTT", message: "⚠️ No meaningful text in recording", variant: "warning", duration: 4000 } });
165
+ try {
166
+ unlinkSync(RECORDING_FILE);
167
+ }
168
+ catch { /* ignore */ }
169
+ return;
170
+ }
152
171
  try {
153
172
  unlinkSync(RECORDING_FILE);
154
173
  }
@@ -157,9 +176,9 @@ async function transcribeAndSendV1(sessionID, client, modelPath) {
157
176
  await client.tui.showToast({ body: { title: "PTT", message: "⚠️ Open a session first, then type /ptt", variant: "warning", duration: 5000 } });
158
177
  return;
159
178
  }
160
- await client.tui.showToast({ body: { title: "PTT", message: "✉️ Sending transcript...", variant: "info" } });
161
- await client.session.prompt({ path: { id: sessionID }, body: { parts: [{ type: "text", text }] } });
162
- const preview = text.length > 80 ? text.slice(0, 77) + "..." : text;
179
+ await client.tui.showToast({ body: { title: "PTT", message: polished ? "✨ Sending polished transcript..." : "✉️ Sending transcript...", variant: "info" } });
180
+ await client.session.prompt({ path: { id: sessionID }, body: { parts: [{ type: "text", text: clean }] } });
181
+ const preview = clean.length > 80 ? clean.slice(0, 77) + "..." : clean;
163
182
  await client.tui.showToast({ body: { title: "PTT", message: `✅ Sent: "${preview}"`, variant: "success", duration: 5000 } });
164
183
  }
165
184
  const TTS_COMMANDS = [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-interrupt-plugin",
3
- "version": "0.4.34",
3
+ "version": "0.4.35",
4
4
  "description": "Streaming TTS + voice interruption for OpenCode. Speaks responses as they arrive and detects when you talk over it.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",