talktocursor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Auto-Submit and Wispr Voice Loop for Cursor TTS MCP
4
+
5
+ Combines two features:
6
+ 1. Auto-submit: Detects when text appears in the focused field and auto-presses Enter
7
+ 2. Wispr voice loop: Watches for listen signals, triggers Wispr, detects silence, and pastes
8
+
9
+ How it works:
10
+ - Monitors the focused text field via Accessibility API for auto-submit
11
+ - Watches for listen-signal.json from the MCP server
12
+ - When signal detected, starts Wispr and monitors mic for silence
13
+ - Registers a manual trigger hotkey to start Wispr anytime
14
+
15
+ Requires macOS Accessibility permissions:
16
+ System Settings > Privacy & Security > Accessibility
17
+ """
18
+
19
+ import time
20
+ import json
21
+ import os
22
+ import sys
23
+ import threading
24
+ import subprocess
25
+ from pathlib import Path
26
+ from ApplicationServices import (
27
+ AXUIElementCreateSystemWide,
28
+ AXUIElementCopyAttributeValue,
29
+ AXIsProcessTrusted,
30
+ )
31
+ from pynput import keyboard
32
+ from pynput.keyboard import Key, KeyCode, Controller, HotKey, GlobalHotKeys
33
+
34
+ # Import our silence detector
35
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
36
+ from silence_detector import wait_for_silence
37
+
38
+ # ─── Configuration ───────────────────────────────────────────────────────────
39
+
40
+ CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'config.json')
41
+ SIGNAL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'listen-signal.json')
42
+ TTS_COMPLETE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'tts-complete.json')
43
+
44
+ def load_config():
45
+ defaults = {
46
+ 'autoSubmit': {
47
+ 'enabled': True,
48
+ 'silenceDelay': 3.0,
49
+ 'minTextLength': 15,
50
+ 'targetApp': 'Cursor',
51
+ },
52
+ 'wisprLoop': {
53
+ 'enabled': False,
54
+ 'ttsDelay': 4.0,
55
+ 'silenceThreshold': 0.02,
56
+ 'silenceDuration': 2.0,
57
+ 'wisprHotkey': 'shift+ctrl',
58
+ 'manualTriggerHotkey': 'ctrl+shift+l',
59
+ }
60
+ }
61
+ try:
62
+ with open(CONFIG_PATH, 'r') as f:
63
+ config = json.load(f)
64
+ for key in defaults:
65
+ if key in config:
66
+ for subkey in defaults[key]:
67
+ if subkey in config[key]:
68
+ defaults[key][subkey] = config[key][subkey]
69
+ except (FileNotFoundError, json.JSONDecodeError):
70
+ pass
71
+ return defaults
72
+
73
+ config = load_config()
74
+
75
+ AUTO_SUBMIT_ENABLED = config['autoSubmit']['enabled']
76
+ SILENCE_DELAY = config['autoSubmit']['silenceDelay']
77
+ MIN_TEXT_LENGTH = config['autoSubmit']['minTextLength']
78
+ TARGET_APP = config['autoSubmit']['targetApp']
79
+
80
+ WISPR_LOOP_ENABLED = config['wisprLoop']['enabled']
81
+ TTS_DELAY = config['wisprLoop']['ttsDelay']
82
+ SILENCE_THRESHOLD = config['wisprLoop']['silenceThreshold']
83
+ SILENCE_DURATION = config['wisprLoop']['silenceDuration']
84
+ WISPR_HOTKEY = config['wisprLoop']['wisprHotkey']
85
+ MANUAL_TRIGGER_HOTKEY = config['wisprLoop']['manualTriggerHotkey']
86
+
87
+ # ─── State ───────────────────────────────────────────────────────────────────
88
+
89
+ # Auto-submit state
90
+ last_text = None
91
+ last_change_time = 0.0
92
+ text_at_change_start = None
93
+ submit_timer = None
94
+ monitoring = True
95
+
96
+ # Controllers
97
+ ctrl = Controller()
98
+
99
+ # ─── Helpers ─────────────────────────────────────────────────────────────────
100
+
101
+ def get_frontmost_app():
102
+ """Get the name of the currently focused application."""
103
+ try:
104
+ result = subprocess.run(
105
+ ['osascript', '-e',
106
+ 'tell application "System Events" to get name of first application process whose frontmost is true'],
107
+ capture_output=True, text=True, timeout=2
108
+ )
109
+ return result.stdout.strip()
110
+ except Exception:
111
+ return ""
112
+
113
+ def get_focused_text():
114
+ """Get the text content of the currently focused UI element via Accessibility API."""
115
+ try:
116
+ system_wide = AXUIElementCreateSystemWide()
117
+ err, focused = AXUIElementCopyAttributeValue(
118
+ system_wide, "AXFocusedUIElement", None
119
+ )
120
+ if err != 0 or focused is None:
121
+ return None
122
+
123
+ err, value = AXUIElementCopyAttributeValue(focused, "AXValue", None)
124
+ if err != 0 or value is None:
125
+ return None
126
+
127
+ return str(value)
128
+ except Exception:
129
+ return None
130
+
131
+ def parse_hotkey(hotkey_str):
132
+ """Parse a hotkey string like 'shift+ctrl' into Key objects."""
133
+ parts = hotkey_str.lower().split('+')
134
+ keys = []
135
+ for part in parts:
136
+ part = part.strip()
137
+ if part == 'shift':
138
+ keys.append(Key.shift)
139
+ elif part == 'ctrl' or part == 'control':
140
+ keys.append(Key.ctrl)
141
+ elif part == 'alt' or part == 'option':
142
+ keys.append(Key.alt)
143
+ elif part == 'cmd' or part == 'command':
144
+ keys.append(Key.cmd)
145
+ elif len(part) == 1:
146
+ keys.append(KeyCode.from_char(part))
147
+ return keys
148
+
149
+ def press_hotkey(keys):
150
+ """Press and release a hotkey combination."""
151
+ # Press all keys
152
+ for key in keys:
153
+ ctrl.press(key)
154
+ time.sleep(0.05)
155
+ # Release in reverse order
156
+ for key in reversed(keys):
157
+ ctrl.release(key)
158
+
159
+ def wait_for_tts_completion(timeout=15.0):
160
+ """Wait for the TTS completion signal file with timeout."""
161
+ print(f"[wispr-loop] Waiting for TTS to complete...")
162
+ start_time = time.time()
163
+
164
+ # Clear any stale completion signal first
165
+ if os.path.exists(TTS_COMPLETE_PATH):
166
+ try:
167
+ os.remove(TTS_COMPLETE_PATH)
168
+ except:
169
+ pass
170
+
171
+ while (time.time() - start_time) < timeout:
172
+ if os.path.exists(TTS_COMPLETE_PATH):
173
+ print(f"[wispr-loop] TTS completion signal received!")
174
+ # Delete the completion signal
175
+ try:
176
+ os.remove(TTS_COMPLETE_PATH)
177
+ except:
178
+ pass
179
+ return True
180
+ time.sleep(0.1) # Poll every 100ms
181
+
182
+ # Timeout - proceed anyway with a warning
183
+ print(f"[wispr-loop] Warning: TTS completion timeout after {timeout}s, proceeding anyway...")
184
+ return False
185
+
186
+ def trigger_wispr_loop():
187
+ """Execute the Wispr voice loop: start Wispr, wait for silence, stop Wispr."""
188
+ print(f"[wispr-loop] Starting Wispr voice loop...")
189
+
190
+ # Parse Wispr hotkey
191
+ wispr_keys = parse_hotkey(WISPR_HOTKEY)
192
+
193
+ # Trigger Wispr to start recording
194
+ print(f"[wispr-loop] Pressing {WISPR_HOTKEY} to start Wispr recording...")
195
+ press_hotkey(wispr_keys)
196
+
197
+ # Wait for silence detection
198
+ print(f"[wispr-loop] Monitoring mic for silence (threshold: {SILENCE_THRESHOLD}, duration: {SILENCE_DURATION}s)...")
199
+ speech_detected = wait_for_silence(
200
+ silence_threshold=SILENCE_THRESHOLD,
201
+ silence_duration=SILENCE_DURATION,
202
+ verbose=True
203
+ )
204
+
205
+ if speech_detected:
206
+ # User spoke, now stop Wispr (triggers paste)
207
+ print(f"[wispr-loop] Pressing {WISPR_HOTKEY} to stop Wispr and paste...")
208
+ press_hotkey(wispr_keys)
209
+ print(f"[wispr-loop] Wispr should paste text now, auto-submit will handle pressing Enter")
210
+ else:
211
+ print(f"[wispr-loop] No speech detected, cancelling")
212
+
213
+ # ─── Auto-Submit Monitor ─────────────────────────────────────────────────────
214
+
215
+ def do_submit(new_text_length):
216
+ """Press Enter if conditions are met."""
217
+ global submit_timer, monitoring
218
+ submit_timer = None
219
+
220
+ if new_text_length < MIN_TEXT_LENGTH:
221
+ return
222
+
223
+ app = get_frontmost_app()
224
+ if app != TARGET_APP:
225
+ return
226
+
227
+ # Briefly pause monitoring to avoid detecting our own Enter keypress
228
+ monitoring = False
229
+ print(f"[auto-submit] Dictation detected ({new_text_length} new chars), submitting...")
230
+ time.sleep(0.15)
231
+ ctrl.press(Key.enter)
232
+ ctrl.release(Key.enter)
233
+ time.sleep(0.5)
234
+ monitoring = True
235
+
236
+ def monitor_text_field():
237
+ """Poll the focused text field for changes (auto-submit monitor)."""
238
+ global last_text, last_change_time, text_at_change_start, submit_timer, monitoring
239
+
240
+ while True:
241
+ if not AUTO_SUBMIT_ENABLED or not monitoring:
242
+ time.sleep(0.2)
243
+ continue
244
+
245
+ try:
246
+ current_text = get_focused_text()
247
+
248
+ if current_text is None:
249
+ time.sleep(0.15)
250
+ continue
251
+
252
+ # Detect text change
253
+ if current_text != last_text:
254
+ now = time.time()
255
+
256
+ # If this is the start of a new burst of changes, record the baseline
257
+ if text_at_change_start is None:
258
+ text_at_change_start = last_text or ""
259
+
260
+ new_chars = len(current_text) - len(text_at_change_start)
261
+
262
+ last_text = current_text
263
+ last_change_time = now
264
+
265
+ # Cancel any pending submit
266
+ if submit_timer is not None:
267
+ submit_timer.cancel()
268
+
269
+ # Only schedule submit if meaningful text was added
270
+ if new_chars >= MIN_TEXT_LENGTH:
271
+ submit_timer = threading.Timer(SILENCE_DELAY, do_submit, args=[new_chars])
272
+ submit_timer.daemon = True
273
+ submit_timer.start()
274
+
275
+ except Exception as e:
276
+ pass
277
+
278
+ time.sleep(0.15) # Poll ~7 times per second
279
+
280
+ # ─── Wispr Loop Signal Watcher ──────────────────────────────────────────────
281
+
282
+ def watch_for_signals():
283
+ """Watch for listen-signal.json and trigger Wispr loop when found."""
284
+ while True:
285
+ if not WISPR_LOOP_ENABLED:
286
+ time.sleep(0.5)
287
+ continue
288
+
289
+ try:
290
+ if os.path.exists(SIGNAL_PATH):
291
+ print(f"[wispr-loop] Listen signal detected!")
292
+
293
+ # Delete the signal file
294
+ os.remove(SIGNAL_PATH)
295
+
296
+ # Wait for TTS to actually finish playing
297
+ wait_for_tts_completion()
298
+
299
+ # Start the Wispr loop in a separate thread so we don't block
300
+ threading.Thread(target=trigger_wispr_loop, daemon=True).start()
301
+
302
+ except Exception as e:
303
+ print(f"[wispr-loop] Error: {e}")
304
+
305
+ time.sleep(0.3) # Poll for signal file every 300ms
306
+
307
+ # ─── Manual Trigger Hotkey ──────────────────────────────────────────────────
308
+
309
+ def setup_manual_trigger():
310
+ """Register a global hotkey to manually trigger the Wispr loop."""
311
+ if not WISPR_LOOP_ENABLED:
312
+ return None
313
+
314
+ # Convert hotkey string to format expected by GlobalHotKeys
315
+ # e.g., "ctrl+shift+l" -> '<ctrl>+<shift>+l'
316
+ parts = MANUAL_TRIGGER_HOTKEY.lower().split('+')
317
+ formatted_parts = []
318
+ for part in parts:
319
+ part = part.strip()
320
+ if part in ['shift', 'ctrl', 'control', 'alt', 'option', 'cmd', 'command']:
321
+ formatted_parts.append(f'<{part}>')
322
+ else:
323
+ formatted_parts.append(part)
324
+ formatted_hotkey = '+'.join(formatted_parts)
325
+
326
+ def on_manual_trigger():
327
+ print(f"[wispr-loop] Manual trigger activated!")
328
+ threading.Thread(target=trigger_wispr_loop, daemon=True).start()
329
+
330
+ try:
331
+ hotkeys = GlobalHotKeys({
332
+ formatted_hotkey: on_manual_trigger
333
+ })
334
+ hotkeys.start()
335
+ return hotkeys
336
+ except Exception as e:
337
+ print(f"[wispr-loop] Failed to register manual trigger hotkey: {e}")
338
+ return None
339
+
340
+ # ─── Main ────────────────────────────────────────────────────────────────────
341
+
342
+ def main():
343
+ # Check accessibility permissions
344
+ if not AXIsProcessTrusted():
345
+ print(" ERROR: Accessibility permissions not granted!")
346
+ print(" Go to: System Settings > Privacy & Security > Accessibility")
347
+ print(" Add your terminal app (Terminal, iTerm, Cursor, etc.)")
348
+ print()
349
+ print(" The script will continue but may not work correctly.")
350
+ print()
351
+
352
+ print(f"""
353
+ Cursor Auto-Submit & Wispr Voice Loop
354
+ ──────────────────────────────────────
355
+
356
+ Auto-Submit: {'Enabled' if AUTO_SUBMIT_ENABLED else 'Disabled'}
357
+ Submit delay: {SILENCE_DELAY}s
358
+ Min text length: {MIN_TEXT_LENGTH} chars
359
+ Target app: {TARGET_APP}
360
+
361
+ Wispr Loop: {'Enabled' if WISPR_LOOP_ENABLED else 'Disabled'}
362
+ TTS delay: {TTS_DELAY}s
363
+ Silence thresh: {SILENCE_THRESHOLD}
364
+ Silence duration: {SILENCE_DURATION}s
365
+ Wispr hotkey: {WISPR_HOTKEY}
366
+ Manual trigger: {MANUAL_TRIGGER_HOTKEY}
367
+
368
+ Press Ctrl+C to stop.
369
+ """)
370
+
371
+ # Start monitors in separate threads
372
+ if AUTO_SUBMIT_ENABLED:
373
+ text_monitor = threading.Thread(target=monitor_text_field, daemon=True)
374
+ text_monitor.start()
375
+ print("[auto-submit] Text field monitor started")
376
+
377
+ if WISPR_LOOP_ENABLED:
378
+ signal_watcher = threading.Thread(target=watch_for_signals, daemon=True)
379
+ signal_watcher.start()
380
+ print("[wispr-loop] Signal watcher started")
381
+
382
+ manual_hotkey = setup_manual_trigger()
383
+ if manual_hotkey:
384
+ print(f"[wispr-loop] Manual trigger registered: {MANUAL_TRIGGER_HOTKEY}")
385
+
386
+ try:
387
+ # Keep main thread alive
388
+ while True:
389
+ time.sleep(1)
390
+ except KeyboardInterrupt:
391
+ print("\n[main] Stopped.")
392
+
393
+ if __name__ == '__main__':
394
+ main()
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Silence Detector Module
4
+
5
+ Monitors microphone input and detects when the user has stopped speaking
6
+ based on RMS (root mean square) amplitude analysis.
7
+
8
+ State machine:
9
+ IDLE -> SPEECH (RMS exceeds threshold)
10
+ SPEECH -> TRAILING_SILENCE (RMS drops below threshold)
11
+ TRAILING_SILENCE -> SPEECH (RMS exceeds threshold again)
12
+ TRAILING_SILENCE -> DONE (silence duration exceeded)
13
+ """
14
+
15
+ import numpy as np
16
+ import sounddevice as sd
17
+ import time
18
+ from enum import Enum
19
+
20
+ class State(Enum):
21
+ IDLE = "idle"
22
+ SPEECH = "speech"
23
+ TRAILING_SILENCE = "trailing_silence"
24
+ DONE = "done"
25
+
26
+ class SilenceDetector:
27
+ def __init__(self,
28
+ silence_threshold=0.02,
29
+ silence_duration=2.0,
30
+ sample_rate=16000,
31
+ chunk_size=1024):
32
+ """
33
+ Initialize the silence detector.
34
+
35
+ Args:
36
+ silence_threshold: RMS amplitude threshold for speech detection
37
+ silence_duration: Seconds of silence needed to confirm user stopped
38
+ sample_rate: Audio sample rate in Hz
39
+ chunk_size: Number of samples per chunk
40
+ """
41
+ self.silence_threshold = silence_threshold
42
+ self.silence_duration = silence_duration
43
+ self.sample_rate = sample_rate
44
+ self.chunk_size = chunk_size
45
+
46
+ self.state = State.IDLE
47
+ self.silence_start_time = None
48
+ self.speech_detected = False
49
+
50
+ def compute_rms(self, audio_chunk):
51
+ """Compute the RMS (root mean square) of an audio chunk."""
52
+ return np.sqrt(np.mean(audio_chunk ** 2))
53
+
54
+ def wait_for_silence(self, verbose=True):
55
+ """
56
+ Monitor the microphone and wait for silence after detecting speech.
57
+
58
+ Returns when the user has stopped speaking (silence_duration exceeded).
59
+ """
60
+ if verbose:
61
+ print(f"[silence-detector] Listening for speech...")
62
+ print(f"[silence-detector] Threshold: {self.silence_threshold:.4f}, Silence duration: {self.silence_duration}s")
63
+
64
+ # Open the microphone stream
65
+ with sd.InputStream(samplerate=self.sample_rate,
66
+ channels=1,
67
+ blocksize=self.chunk_size) as stream:
68
+
69
+ while self.state != State.DONE:
70
+ # Read audio chunk
71
+ audio_data, overflowed = stream.read(self.chunk_size)
72
+
73
+ if overflowed:
74
+ print("[silence-detector] Warning: Audio buffer overflow")
75
+
76
+ # Compute RMS amplitude
77
+ rms = self.compute_rms(audio_data.flatten())
78
+
79
+ # State machine transitions
80
+ if self.state == State.IDLE:
81
+ if rms > self.silence_threshold:
82
+ self.state = State.SPEECH
83
+ self.speech_detected = True
84
+ if verbose:
85
+ print(f"[silence-detector] Speech detected (RMS: {rms:.4f})")
86
+
87
+ elif self.state == State.SPEECH:
88
+ if rms < self.silence_threshold:
89
+ self.state = State.TRAILING_SILENCE
90
+ self.silence_start_time = time.time()
91
+ if verbose:
92
+ print(f"[silence-detector] Trailing silence started...")
93
+ # else: still speaking, remain in SPEECH state
94
+
95
+ elif self.state == State.TRAILING_SILENCE:
96
+ if rms > self.silence_threshold:
97
+ # User started speaking again
98
+ self.state = State.SPEECH
99
+ self.silence_start_time = None
100
+ if verbose:
101
+ print(f"[silence-detector] Speech resumed (RMS: {rms:.4f})")
102
+ else:
103
+ # Check if silence duration exceeded
104
+ elapsed = time.time() - self.silence_start_time
105
+ if elapsed >= self.silence_duration:
106
+ self.state = State.DONE
107
+ if verbose:
108
+ print(f"[silence-detector] Silence confirmed ({elapsed:.1f}s)")
109
+
110
+ if verbose:
111
+ print("[silence-detector] Done")
112
+
113
+ return self.speech_detected
114
+
115
+ def wait_for_silence(silence_threshold=0.02,
116
+ silence_duration=2.0,
117
+ verbose=True):
118
+ """
119
+ Convenience function to detect silence.
120
+
121
+ Returns:
122
+ bool: True if speech was detected, False if no speech
123
+ """
124
+ detector = SilenceDetector(
125
+ silence_threshold=silence_threshold,
126
+ silence_duration=silence_duration
127
+ )
128
+ return detector.wait_for_silence(verbose=verbose)
129
+
130
+ if __name__ == "__main__":
131
+ # Test the detector
132
+ print("Silence Detector Test")
133
+ print("=====================")
134
+ print("Start speaking into your microphone...")
135
+ print()
136
+
137
+ speech_detected = wait_for_silence(
138
+ silence_threshold=0.02,
139
+ silence_duration=2.0,
140
+ verbose=True
141
+ )
142
+
143
+ if speech_detected:
144
+ print("\n✓ Speech was detected and silence confirmed")
145
+ else:
146
+ print("\n✗ No speech detected")