python-voiceio 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voiceio/app.py ADDED
@@ -0,0 +1,415 @@
1
+ """Main VoiceIO engine: state machine, backend wiring, and self-healing."""
2
+ from __future__ import annotations
3
+
4
+ import fcntl
5
+ import logging
6
+ import os
7
+ import signal
8
+ import subprocess
9
+ import threading
10
+ import time
11
+
12
+ import numpy as np
13
+
14
+ from voiceio import config, platform as plat
15
+ from voiceio.hotkeys import chain as hotkey_chain
16
+ from voiceio.hotkeys.socket_backend import SocketHotkey
17
+ from voiceio.recorder import AudioRecorder
18
+ from voiceio.streaming import StreamingSession
19
+ from voiceio.transcriber import Transcriber
20
+ from voiceio.typers import chain as typer_chain
21
+ from voiceio.typers.base import StreamingTyper
22
+ log = logging.getLogger("voiceio")
23
+
24
+
25
+
26
+ class VoiceIO:
27
+ def __init__(self, cfg: config.Config):
28
+ self.cfg = cfg
29
+ self.platform = plat.detect()
30
+
31
+ # Select backends
32
+ self._hotkey = hotkey_chain.select(self.platform, cfg.hotkey.backend)
33
+ self._typer = typer_chain.select(self.platform, cfg.output.method)
34
+ self._auto_fallback = cfg.health.auto_fallback
35
+
36
+ # Always start socket backend alongside native hotkey
37
+ self._socket: SocketHotkey | None = None
38
+ if self._hotkey.name != "socket":
39
+ self._socket = SocketHotkey()
40
+
41
+ print(f"Loading model '{cfg.model.name}'...", end="", flush=True)
42
+ t0 = time.monotonic()
43
+ self.transcriber = Transcriber(cfg.model)
44
+ print(f" ready ({time.monotonic() - t0:.1f}s)")
45
+ self.recorder = AudioRecorder(cfg.audio)
46
+ self._streaming = cfg.output.streaming
47
+ self._session: StreamingSession | None = None
48
+ self._processing = False
49
+ self._record_start: float = 0
50
+ self._prev_ibus_engine: str | None = None
51
+ self._engine_proc: subprocess.Popen | None = None
52
+ self._shutdown = threading.Event()
53
+
54
+ def request_shutdown(self) -> None:
55
+ """Request graceful shutdown from an external signal handler."""
56
+ self._shutdown.set()
57
+
58
+ def on_hotkey(self) -> None:
59
+ if self.recorder.is_recording:
60
+ elapsed = time.monotonic() - self._record_start
61
+ if elapsed < self.cfg.output.cancel_window_secs:
62
+ # Quick double-press = cancel recording without typing
63
+ if self._streaming and self._session is not None:
64
+ self._session.stop()
65
+ self._session = None
66
+ self.recorder.stop()
67
+ if isinstance(self._typer, StreamingTyper):
68
+ self._typer.clear_preedit()
69
+ self._deactivate_ibus()
70
+ log.info("Recording cancelled (double-press)")
71
+ return
72
+ if elapsed < self.cfg.output.min_recording_secs:
73
+ log.debug("Ignoring stop, only %.1fs into recording (min %.1fs)", elapsed, self.cfg.output.min_recording_secs)
74
+ return
75
+
76
+ self._play_record_cue(start=False)
77
+ if self._streaming and self._session is not None:
78
+ final_text = self._session.stop()
79
+ self.recorder.stop()
80
+ self._session = None
81
+ if final_text:
82
+ self._play_feedback(final_text)
83
+ log.info("Streaming done (%.1fs): '%s'", elapsed, final_text)
84
+ else:
85
+ audio = self.recorder.stop()
86
+ log.info("Stopped recording (%.1fs)", elapsed)
87
+ if audio is not None and not self._processing:
88
+ threading.Thread(target=self._process, args=(audio,), daemon=True).start()
89
+ # Deactivate IBus engine, return keyboard to normal
90
+ self._deactivate_ibus()
91
+ elif not self._processing:
92
+ # Activate IBus engine so preedit/commit can reach the focused app
93
+ self._activate_ibus()
94
+ self._record_start = time.monotonic()
95
+ self.recorder.start()
96
+ self._play_record_cue(start=True)
97
+ if self._streaming:
98
+ self._session = StreamingSession(
99
+ self.transcriber, self._typer, self.recorder,
100
+ )
101
+ self._session.start()
102
+ log.info("Recording... press [%s] again to stop", self.cfg.hotkey.key)
103
+
104
+ def _process(self, audio: np.ndarray) -> None:
105
+ self._processing = True
106
+ try:
107
+ text = self.transcriber.transcribe(audio)
108
+ if text:
109
+ self._type_with_fallback(text)
110
+ self._play_feedback(text)
111
+ log.info("Typed: '%s'", text)
112
+ except Exception:
113
+ log.exception("Processing failed")
114
+ finally:
115
+ self._processing = False
116
+ self._deactivate_ibus()
117
+
118
+ def _activate_ibus(self) -> None:
119
+ """Switch GNOME input source to voiceio engine for text injection.
120
+
121
+ Done in a thread to avoid blocking the hotkey handler. The 0.5s
122
+ GNOME activation delay is fine since transcription takes ~1s anyway.
123
+ """
124
+ if self._typer.name != "ibus":
125
+ return
126
+ threading.Thread(
127
+ target=self._switch_gnome_input_source,
128
+ args=("voiceio",), daemon=True,
129
+ ).start()
130
+
131
+ def _deactivate_ibus(self) -> None:
132
+ """Switch GNOME input source back to normal keyboard."""
133
+ if self._typer.name != "ibus":
134
+ return
135
+ self._set_gnome_input_source_index(0)
136
+ log.debug("IBus engine deactivated, keyboard restored")
137
+
138
+ def _play_record_cue(self, start: bool) -> None:
139
+ """Play a subtle click on record start/stop."""
140
+ if not self.cfg.feedback.sound_enabled:
141
+ return
142
+ if start:
143
+ from voiceio.feedback import play_record_start
144
+ play_record_start()
145
+ else:
146
+ from voiceio.feedback import play_record_stop
147
+ play_record_stop()
148
+
149
+ def _play_feedback(self, text: str) -> None:
150
+ """Play sound and/or notification after committing text."""
151
+ if self.cfg.feedback.sound_enabled:
152
+ from voiceio.feedback import play_commit_sound
153
+ play_commit_sound()
154
+ if self.cfg.feedback.notify_clipboard:
155
+ from voiceio.feedback import notify_clipboard
156
+ notify_clipboard(text)
157
+
158
+ def _type_with_fallback(self, text: str) -> None:
159
+ """Type text, falling back to next backend on failure."""
160
+ try:
161
+ self._typer.type_text(text)
162
+ except Exception as e:
163
+ if not self._auto_fallback:
164
+ raise
165
+ log.warning("Typer '%s' failed: %s, trying fallback", self._typer.name, e)
166
+ probe = self._typer.probe()
167
+ if not probe.ok:
168
+ log.warning("Typer '%s' no longer works: %s", self._typer.name, probe.reason)
169
+ try:
170
+ self._typer = typer_chain.select(self.platform)
171
+ log.info("Switched to typer: %s", self._typer.name)
172
+ self._typer.type_text(text)
173
+ except RuntimeError:
174
+ log.error("No working typer backend available")
175
+
176
+ def _ensure_ibus_engine(self) -> None:
177
+ """Start the VoiceIO IBus engine and activate it.
178
+
179
+ We spawn the engine process directly (bypassing `ibus engine` which
180
+ is unreliable), then switch the GNOME input source to voiceio.
181
+ """
182
+ from voiceio.ibus import READY_PATH, SOCKET_PATH
183
+ from voiceio.typers.ibus import LAUNCHER_PATH, _ibus_env
184
+
185
+ ibus_env = _ibus_env()
186
+
187
+ # Save current engine for restore on exit
188
+ try:
189
+ result = subprocess.run(
190
+ ["ibus", "engine"], capture_output=True, text=True,
191
+ timeout=3, env=ibus_env,
192
+ )
193
+ if result.returncode == 0:
194
+ prev = result.stdout.strip()
195
+ if prev != "voiceio":
196
+ self._prev_ibus_engine = prev
197
+ log.debug("Previous IBus engine: %s", prev)
198
+ except (FileNotFoundError, subprocess.TimeoutExpired):
199
+ pass
200
+
201
+ # Kill any stale engine process from a previous session
202
+ self._kill_stale_engine(SOCKET_PATH)
203
+ READY_PATH.unlink(missing_ok=True)
204
+
205
+ # Spawn the engine process directly
206
+ log.info("Starting VoiceIO IBus engine...")
207
+ try:
208
+ self._engine_proc = subprocess.Popen(
209
+ [str(LAUNCHER_PATH)],
210
+ stdout=subprocess.DEVNULL, stderr=subprocess.PIPE,
211
+ env=ibus_env,
212
+ )
213
+ except OSError as e:
214
+ log.warning("Could not start IBus engine: %s", e)
215
+ return
216
+
217
+ # Phase 1: wait for socket (engine process started, accepting commands)
218
+ for i in range(40):
219
+ if SOCKET_PATH.exists():
220
+ log.info("VoiceIO IBus engine socket ready (%.1fs)", i * 0.1)
221
+ break
222
+ time.sleep(0.1)
223
+ else:
224
+ if self._engine_proc.poll() is not None:
225
+ stderr = self._engine_proc.stderr.read().decode(errors="replace") if self._engine_proc.stderr else ""
226
+ log.error("IBus engine crashed (rc=%d): %s", self._engine_proc.returncode, stderr.strip()[-500:])
227
+ else:
228
+ log.warning("IBus engine started but socket not found, commands may fail")
229
+ return
230
+
231
+ # Phase 2: activate via `ibus engine voiceio` to create engine instance.
232
+ # This triggers do_create_engine. We do NOT switch GNOME input source
233
+ # here. That only happens during active recording to avoid blocking
234
+ # keyboard input when voiceio is idle.
235
+ log.info("Activating VoiceIO IBus engine...")
236
+ activate_proc = subprocess.Popen(
237
+ ["ibus", "engine", "voiceio"],
238
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
239
+ env=ibus_env,
240
+ )
241
+
242
+ # Phase 3: wait for engine instance (created by IBus via factory)
243
+ for i in range(200): # up to 20s
244
+ if READY_PATH.exists():
245
+ log.info("VoiceIO IBus engine instance ready (%.1fs)", i * 0.1)
246
+ break
247
+ time.sleep(0.1)
248
+ else:
249
+ log.warning("IBus engine instance not created, preedit may not work")
250
+
251
+ # Clean up the activation process (don't leave it dangling)
252
+ try:
253
+ activate_proc.wait(timeout=1)
254
+ except subprocess.TimeoutExpired:
255
+ activate_proc.kill()
256
+
257
+ # Switch back to normal keyboard. Engine is ready but stays dormant
258
+ # until recording starts
259
+ self._set_gnome_input_source_index(0)
260
+
261
+ log.info("VoiceIO IBus engine ready (dormant until recording)")
262
+
263
+ @staticmethod
264
+ def _kill_stale_engine(socket_path) -> None:
265
+ """Kill any orphaned voiceio-ibus-engine process and remove stale socket."""
266
+ socket_path.unlink(missing_ok=True)
267
+ try:
268
+ result = subprocess.run(
269
+ ["pgrep", "-f", "voiceio.ibus.engine"],
270
+ capture_output=True, text=True, timeout=3,
271
+ )
272
+ if result.returncode == 0:
273
+ for pid in result.stdout.strip().split("\n"):
274
+ pid = pid.strip()
275
+ if pid:
276
+ log.debug("Killing stale engine process %s", pid)
277
+ subprocess.run(["kill", pid], capture_output=True, timeout=3)
278
+ time.sleep(0.3) # let it die
279
+ except (FileNotFoundError, subprocess.TimeoutExpired):
280
+ pass
281
+
282
+ def _switch_gnome_input_source(self, engine_name: str) -> None:
283
+ """Switch GNOME input source to the given IBus engine."""
284
+ if not self.platform.is_gnome:
285
+ return
286
+ try:
287
+ result = subprocess.run(
288
+ ["gsettings", "get", "org.gnome.desktop.input-sources", "sources"],
289
+ capture_output=True, text=True, timeout=3,
290
+ )
291
+ if result.returncode != 0:
292
+ return
293
+ sources = result.stdout.strip()
294
+ # Find index of ('ibus', 'voiceio') in the sources list
295
+ # and set current to that index
296
+ if f"('ibus', '{engine_name}')" not in sources:
297
+ return
298
+ # Parse to find index. Sources format: [('xkb', 'us'), ('ibus', 'voiceio')]
299
+ import ast
300
+ try:
301
+ source_list = ast.literal_eval(sources)
302
+ except (ValueError, SyntaxError):
303
+ return
304
+ for i, (kind, name) in enumerate(source_list):
305
+ if kind == "ibus" and name == engine_name:
306
+ subprocess.run(
307
+ ["gsettings", "set", "org.gnome.desktop.input-sources",
308
+ "current", str(i)],
309
+ capture_output=True, timeout=3,
310
+ )
311
+ log.info("Switched GNOME input source to index %d (%s)", i, engine_name)
312
+ # Give GNOME a moment to activate
313
+ time.sleep(0.5)
314
+ return
315
+ except (FileNotFoundError, subprocess.TimeoutExpired):
316
+ pass
317
+
318
+ def _stop_ibus_engine(self) -> None:
319
+ """Stop the IBus engine process and restore previous input method."""
320
+ # Always restore normal keyboard first, the most critical step
321
+ self._set_gnome_input_source_index(0)
322
+
323
+ # Terminate engine process we spawned
324
+ if self._engine_proc is not None:
325
+ self._engine_proc.terminate()
326
+ try:
327
+ self._engine_proc.wait(timeout=3)
328
+ except subprocess.TimeoutExpired:
329
+ self._engine_proc.kill()
330
+ self._engine_proc = None
331
+ log.debug("Stopped IBus engine process")
332
+
333
+ # Clean up socket
334
+ from voiceio.ibus import SOCKET_PATH
335
+ SOCKET_PATH.unlink(missing_ok=True)
336
+
337
+ # Restore previous IBus engine
338
+ if self._prev_ibus_engine:
339
+ from voiceio.typers.ibus import _ibus_env
340
+ try:
341
+ subprocess.run(
342
+ ["ibus", "engine", self._prev_ibus_engine],
343
+ capture_output=True, timeout=3, env=_ibus_env(),
344
+ )
345
+ log.debug("Restored IBus engine: %s", self._prev_ibus_engine)
346
+ except (FileNotFoundError, subprocess.TimeoutExpired):
347
+ pass
348
+ self._prev_ibus_engine = None
349
+
350
+ def _set_gnome_input_source_index(self, index: int) -> None:
351
+ """Set GNOME input source by index."""
352
+ if not self.platform.is_gnome:
353
+ return
354
+ try:
355
+ subprocess.run(
356
+ ["gsettings", "set", "org.gnome.desktop.input-sources",
357
+ "current", str(index)],
358
+ capture_output=True, timeout=3,
359
+ )
360
+ except (FileNotFoundError, subprocess.TimeoutExpired):
361
+ pass
362
+
363
+ def run(self) -> None:
364
+ from voiceio.config import PID_PATH, LOG_DIR
365
+
366
+ # Single-instance guard via file lock (atomic, no TOCTOU race)
367
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
368
+ self._pid_fd = open(PID_PATH, "w")
369
+ try:
370
+ fcntl.flock(self._pid_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
371
+ except BlockingIOError:
372
+ self._pid_fd.close()
373
+ log.error("Another voiceio instance is already running")
374
+ print("voiceio is already running. Stop it first: voiceio service stop")
375
+ return
376
+ self._pid_fd.write(str(os.getpid()))
377
+ self._pid_fd.flush()
378
+
379
+ # Start IBus engine if needed
380
+ if self._typer.name == "ibus":
381
+ self._ensure_ibus_engine()
382
+
383
+ # Open always-on audio stream for pre-buffering
384
+ self.recorder.open_stream()
385
+
386
+ # Start hotkey backends
387
+ self._hotkey.start(self.cfg.hotkey.key, self.on_hotkey)
388
+ if self._socket is not None:
389
+ self._socket.start(self.cfg.hotkey.key, self.on_hotkey)
390
+
391
+ from voiceio import __version__
392
+ log.info(
393
+ "voiceio v%s ready. Press [%s] to toggle recording (hotkey=%s, typer=%s)",
394
+ __version__, self.cfg.hotkey.key, self._hotkey.name, self._typer.name,
395
+ )
396
+ print(
397
+ f"voiceio v{__version__} ready. Press [{self.cfg.hotkey.key}] to record "
398
+ f"(model={self.cfg.model.name}, typer={self._typer.name})",
399
+ )
400
+
401
+ signal.signal(signal.SIGINT, lambda *_: self._shutdown.set())
402
+ try:
403
+ self._shutdown.wait()
404
+ except KeyboardInterrupt:
405
+ pass
406
+ finally:
407
+ self._hotkey.stop()
408
+ if self._socket is not None:
409
+ self._socket.stop()
410
+ self.recorder.close_stream()
411
+ self.transcriber.shutdown()
412
+ self._stop_ibus_engine()
413
+ self._pid_fd.close()
414
+ PID_PATH.unlink(missing_ok=True)
415
+ log.info("voiceio stopped")
voiceio/backends.py ADDED
@@ -0,0 +1,13 @@
1
+ """Shared types for all backends (hotkey + typer)."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass, field
5
+
6
+
7
+ @dataclass
8
+ class ProbeResult:
9
+ """Result of probing whether a backend can work on this system."""
10
+ ok: bool
11
+ reason: str = ""
12
+ fix_hint: str = ""
13
+ fix_cmd: list[str] = field(default_factory=list) # auto-fixable command