python-voiceio 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- python_voiceio-0.2.0.dist-info/METADATA +260 -0
- python_voiceio-0.2.0.dist-info/RECORD +43 -0
- python_voiceio-0.2.0.dist-info/WHEEL +5 -0
- python_voiceio-0.2.0.dist-info/entry_points.txt +6 -0
- python_voiceio-0.2.0.dist-info/licenses/LICENSE +21 -0
- python_voiceio-0.2.0.dist-info/top_level.txt +1 -0
- voiceio/__init__.py +1 -0
- voiceio/__main__.py +3 -0
- voiceio/app.py +415 -0
- voiceio/backends.py +13 -0
- voiceio/cli.py +475 -0
- voiceio/config.py +136 -0
- voiceio/feedback.py +78 -0
- voiceio/health.py +194 -0
- voiceio/hotkeys/__init__.py +22 -0
- voiceio/hotkeys/base.py +27 -0
- voiceio/hotkeys/chain.py +83 -0
- voiceio/hotkeys/evdev.py +134 -0
- voiceio/hotkeys/pynput_backend.py +80 -0
- voiceio/hotkeys/socket_backend.py +77 -0
- voiceio/ibus/__init__.py +8 -0
- voiceio/ibus/engine.py +268 -0
- voiceio/platform.py +139 -0
- voiceio/recorder.py +208 -0
- voiceio/service.py +234 -0
- voiceio/sounds/__init__.py +0 -0
- voiceio/sounds/commit.wav +0 -0
- voiceio/sounds/start.wav +0 -0
- voiceio/sounds/stop.wav +0 -0
- voiceio/streaming.py +202 -0
- voiceio/transcriber.py +165 -0
- voiceio/tray.py +54 -0
- voiceio/typers/__init__.py +31 -0
- voiceio/typers/base.py +44 -0
- voiceio/typers/chain.py +79 -0
- voiceio/typers/clipboard.py +110 -0
- voiceio/typers/ibus.py +389 -0
- voiceio/typers/pynput_type.py +51 -0
- voiceio/typers/wtype.py +57 -0
- voiceio/typers/xdotool.py +45 -0
- voiceio/typers/ydotool.py +115 -0
- voiceio/wizard.py +882 -0
- voiceio/worker.py +39 -0
voiceio/service.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Systemd user service installation and management."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
LOCAL_BIN = Path.home() / ".local" / "bin"
|
|
14
|
+
SCRIPT_NAMES = ["voiceio", "voiceio-toggle", "voiceio-doctor", "voiceio-setup", "voiceio-test"]
|
|
15
|
+
_PATH_HINT_ADDED = False # track if we already printed the PATH hint
|
|
16
|
+
|
|
17
|
+
SERVICE_NAME = "voiceio.service"
|
|
18
|
+
SERVICE_DIR = Path.home() / ".config" / "systemd" / "user"
|
|
19
|
+
SERVICE_PATH = SERVICE_DIR / SERVICE_NAME
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def has_systemd() -> bool:
|
|
23
|
+
"""Check if systemd is available on this system."""
|
|
24
|
+
return shutil.which("systemctl") is not None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _find_voiceio_bin() -> str:
|
|
28
|
+
"""Find the voiceio binary path."""
|
|
29
|
+
# Check venv first
|
|
30
|
+
venv_bin = Path(sys.prefix) / "bin" / "voiceio"
|
|
31
|
+
if venv_bin.exists():
|
|
32
|
+
return str(venv_bin.resolve())
|
|
33
|
+
found = shutil.which("voiceio")
|
|
34
|
+
if found:
|
|
35
|
+
return str(Path(found).resolve())
|
|
36
|
+
return "voiceio"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _service_unit(bin_path: str) -> str:
|
|
40
|
+
"""Generate the systemd unit file content."""
|
|
41
|
+
return f"""\
|
|
42
|
+
[Unit]
|
|
43
|
+
Description=VoiceIO - voice-to-text input
|
|
44
|
+
Documentation=https://github.com/Hugo0/voiceio
|
|
45
|
+
After=graphical-session.target
|
|
46
|
+
PartOf=graphical-session.target
|
|
47
|
+
|
|
48
|
+
[Service]
|
|
49
|
+
Type=simple
|
|
50
|
+
ExecStart={bin_path}
|
|
51
|
+
Restart=on-failure
|
|
52
|
+
RestartSec=3
|
|
53
|
+
|
|
54
|
+
[Install]
|
|
55
|
+
WantedBy=default.target
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def install_service() -> bool:
|
|
60
|
+
"""Install and enable the voiceio systemd user service.
|
|
61
|
+
|
|
62
|
+
Returns True if installed successfully.
|
|
63
|
+
"""
|
|
64
|
+
if not has_systemd():
|
|
65
|
+
log.warning("systemctl not found: cannot install service")
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
bin_path = _find_voiceio_bin()
|
|
69
|
+
|
|
70
|
+
SERVICE_DIR.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
SERVICE_PATH.write_text(_service_unit(bin_path))
|
|
72
|
+
log.info("Installed systemd service to %s", SERVICE_PATH)
|
|
73
|
+
|
|
74
|
+
# Reload systemd and enable the service
|
|
75
|
+
try:
|
|
76
|
+
subprocess.run(
|
|
77
|
+
["systemctl", "--user", "daemon-reload"],
|
|
78
|
+
capture_output=True, timeout=5,
|
|
79
|
+
)
|
|
80
|
+
subprocess.run(
|
|
81
|
+
["systemctl", "--user", "enable", SERVICE_NAME],
|
|
82
|
+
capture_output=True, timeout=5,
|
|
83
|
+
)
|
|
84
|
+
log.info("Enabled %s", SERVICE_NAME)
|
|
85
|
+
return True
|
|
86
|
+
except (FileNotFoundError, subprocess.TimeoutExpired) as e:
|
|
87
|
+
log.warning("Could not enable service: %s", e)
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def uninstall_service() -> bool:
|
|
92
|
+
"""Disable and remove the voiceio systemd user service."""
|
|
93
|
+
try:
|
|
94
|
+
subprocess.run(
|
|
95
|
+
["systemctl", "--user", "disable", SERVICE_NAME],
|
|
96
|
+
capture_output=True, timeout=5,
|
|
97
|
+
)
|
|
98
|
+
subprocess.run(
|
|
99
|
+
["systemctl", "--user", "stop", SERVICE_NAME],
|
|
100
|
+
capture_output=True, timeout=5,
|
|
101
|
+
)
|
|
102
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
if SERVICE_PATH.exists():
|
|
106
|
+
SERVICE_PATH.unlink()
|
|
107
|
+
log.info("Removed %s", SERVICE_PATH)
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
subprocess.run(
|
|
111
|
+
["systemctl", "--user", "daemon-reload"],
|
|
112
|
+
capture_output=True, timeout=5,
|
|
113
|
+
)
|
|
114
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
return True
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def is_installed() -> bool:
|
|
121
|
+
"""Check if the systemd service is installed."""
|
|
122
|
+
return SERVICE_PATH.exists()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def is_running() -> bool:
|
|
126
|
+
"""Check if the systemd service is currently running."""
|
|
127
|
+
try:
|
|
128
|
+
result = subprocess.run(
|
|
129
|
+
["systemctl", "--user", "is-active", SERVICE_NAME],
|
|
130
|
+
capture_output=True, text=True, timeout=3,
|
|
131
|
+
)
|
|
132
|
+
return result.stdout.strip() == "active"
|
|
133
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def start_service() -> bool:
|
|
138
|
+
"""Start the voiceio systemd user service."""
|
|
139
|
+
try:
|
|
140
|
+
result = subprocess.run(
|
|
141
|
+
["systemctl", "--user", "start", SERVICE_NAME],
|
|
142
|
+
capture_output=True, timeout=5,
|
|
143
|
+
)
|
|
144
|
+
return result.returncode == 0
|
|
145
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _is_pipx_install() -> bool:
|
|
150
|
+
"""Check if voiceio is running from a pipx-managed venv."""
|
|
151
|
+
return "pipx/venvs" in sys.prefix
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _local_bin_on_path() -> bool:
|
|
155
|
+
"""Check if LOCAL_BIN is in the current PATH."""
|
|
156
|
+
path_dirs = os.environ.get("PATH", "").split(os.pathsep)
|
|
157
|
+
return str(LOCAL_BIN) in path_dirs or str(LOCAL_BIN.resolve()) in path_dirs
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def install_symlinks() -> list[str]:
|
|
161
|
+
"""Create symlinks in ~/.local/bin/ pointing to venv scripts.
|
|
162
|
+
|
|
163
|
+
Returns list of names successfully linked.
|
|
164
|
+
For pipx installs, scripts are already in ~/.local/bin/ as real files,
|
|
165
|
+
so we skip symlink creation.
|
|
166
|
+
"""
|
|
167
|
+
if _is_pipx_install():
|
|
168
|
+
# pipx already placed scripts in ~/.local/bin/, nothing to do
|
|
169
|
+
return [name for name in SCRIPT_NAMES if (LOCAL_BIN / name).exists()]
|
|
170
|
+
|
|
171
|
+
venv_bin = Path(sys.prefix) / "bin"
|
|
172
|
+
LOCAL_BIN.mkdir(parents=True, exist_ok=True)
|
|
173
|
+
linked = []
|
|
174
|
+
for name in SCRIPT_NAMES:
|
|
175
|
+
src = venv_bin / name
|
|
176
|
+
dest = LOCAL_BIN / name
|
|
177
|
+
if not src.exists():
|
|
178
|
+
continue
|
|
179
|
+
# Already correct
|
|
180
|
+
if dest.is_symlink() and dest.resolve() == src.resolve():
|
|
181
|
+
linked.append(name)
|
|
182
|
+
continue
|
|
183
|
+
# Remove stale symlink; skip regular files
|
|
184
|
+
if dest.exists() or dest.is_symlink():
|
|
185
|
+
if not dest.is_symlink():
|
|
186
|
+
log.warning("Skipping %s: regular file exists at %s", name, dest)
|
|
187
|
+
continue
|
|
188
|
+
dest.unlink()
|
|
189
|
+
dest.symlink_to(src.resolve())
|
|
190
|
+
log.info("Linked %s → %s", dest, src.resolve())
|
|
191
|
+
linked.append(name)
|
|
192
|
+
|
|
193
|
+
# On macOS (or any system where ~/.local/bin isn't on PATH), add it to shell profile
|
|
194
|
+
if linked and not _local_bin_on_path():
|
|
195
|
+
_add_local_bin_to_path()
|
|
196
|
+
|
|
197
|
+
return linked
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _add_local_bin_to_path() -> None:
|
|
201
|
+
"""Add ~/.local/bin to PATH via shell profile (for macOS etc.)."""
|
|
202
|
+
global _PATH_HINT_ADDED
|
|
203
|
+
line = '\nexport PATH="$HOME/.local/bin:$PATH"\n'
|
|
204
|
+
# Try .zshrc first (macOS default), then .bashrc
|
|
205
|
+
for rc_name in (".zshrc", ".bashrc", ".profile"):
|
|
206
|
+
rc = Path.home() / rc_name
|
|
207
|
+
if rc.exists():
|
|
208
|
+
content = rc.read_text()
|
|
209
|
+
if ".local/bin" in content:
|
|
210
|
+
return # already there
|
|
211
|
+
rc.write_text(content + line)
|
|
212
|
+
log.info("Added ~/.local/bin to PATH in %s", rc)
|
|
213
|
+
_PATH_HINT_ADDED = True
|
|
214
|
+
return
|
|
215
|
+
# No shell rc found, create .profile
|
|
216
|
+
rc = Path.home() / ".profile"
|
|
217
|
+
rc.write_text(line)
|
|
218
|
+
log.info("Created %s with PATH entry", rc)
|
|
219
|
+
_PATH_HINT_ADDED = True
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def symlinks_installed() -> bool:
|
|
223
|
+
"""Check if voiceio is accessible as a command."""
|
|
224
|
+
# Check if it's anywhere on PATH
|
|
225
|
+
if shutil.which("voiceio"):
|
|
226
|
+
return True
|
|
227
|
+
# Check if symlink exists (even if not on PATH yet, a new shell will pick it up)
|
|
228
|
+
dest = LOCAL_BIN / "voiceio"
|
|
229
|
+
return dest.exists()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def path_hint_needed() -> bool:
|
|
233
|
+
"""Return True if we modified shell profile and user needs to restart shell."""
|
|
234
|
+
return _PATH_HINT_ADDED
|
|
File without changes
|
|
Binary file
|
voiceio/sounds/start.wav
ADDED
|
Binary file
|
voiceio/sounds/stop.wav
ADDED
|
Binary file
|
voiceio/streaming.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Streaming transcription with word-level append and final correction."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from voiceio.transcriber import TRANSCRIBE_TIMEOUT
|
|
11
|
+
from voiceio.typers.base import StreamingTyper
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from voiceio.recorder import AudioRecorder
|
|
15
|
+
from voiceio.transcriber import Transcriber
|
|
16
|
+
from voiceio.typers.base import TyperBackend
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger(__name__)
|
|
19
|
+
DELETE_SETTLE_SECS = 0.05 # delay between delete and type for ydotool reliability
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _common_prefix_len(a: str, b: str) -> int:
|
|
23
|
+
"""Length of the longest common prefix between two strings."""
|
|
24
|
+
limit = min(len(a), len(b))
|
|
25
|
+
for i in range(limit):
|
|
26
|
+
if a[i] != b[i]:
|
|
27
|
+
return i
|
|
28
|
+
return limit
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _clean_word(w: str) -> str:
|
|
32
|
+
"""Strip punctuation for fuzzy word matching."""
|
|
33
|
+
return re.sub(r'[^\w]', '', w).lower()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _word_match_len(old_words: list[str], new_words: list[str]) -> int:
|
|
37
|
+
"""Count matching leading words, ignoring punctuation/case."""
|
|
38
|
+
count = 0
|
|
39
|
+
for o, n in zip(old_words, new_words):
|
|
40
|
+
if _clean_word(o) == _clean_word(n):
|
|
41
|
+
count += 1
|
|
42
|
+
else:
|
|
43
|
+
break
|
|
44
|
+
return count
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class StreamingSession:
|
|
48
|
+
"""Manages one streaming transcription cycle.
|
|
49
|
+
|
|
50
|
+
During streaming: append-only with word-level fuzzy matching.
|
|
51
|
+
Whisper changes punctuation/capitalization between passes, so word-level
|
|
52
|
+
matching ignores these, so text keeps growing even when Whisper
|
|
53
|
+
flip-flops on commas vs periods.
|
|
54
|
+
|
|
55
|
+
On stop: one final char-level diff correction to fix accumulated drift.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
transcriber: Transcriber,
|
|
61
|
+
typer: TyperBackend,
|
|
62
|
+
recorder: AudioRecorder,
|
|
63
|
+
):
|
|
64
|
+
self._transcriber = transcriber
|
|
65
|
+
self._typer = typer
|
|
66
|
+
self._recorder = recorder
|
|
67
|
+
self._typed_text = ""
|
|
68
|
+
self._pending = threading.Event()
|
|
69
|
+
self._stop = threading.Event()
|
|
70
|
+
self._worker_thread: threading.Thread | None = None
|
|
71
|
+
|
|
72
|
+
def start(self) -> None:
|
|
73
|
+
"""Begin streaming. Recorder must already be started by caller."""
|
|
74
|
+
self._recorder.set_on_speech_pause(self._on_vad_pause)
|
|
75
|
+
self._worker_thread = threading.Thread(
|
|
76
|
+
target=self._worker_loop, daemon=True,
|
|
77
|
+
)
|
|
78
|
+
self._worker_thread.start()
|
|
79
|
+
log.debug("Streaming session started")
|
|
80
|
+
|
|
81
|
+
def stop(self) -> str:
|
|
82
|
+
"""Stop streaming, run final transcription, return full text."""
|
|
83
|
+
self._stop.set()
|
|
84
|
+
self._pending.set() # wake worker for final pass
|
|
85
|
+
if self._worker_thread is not None:
|
|
86
|
+
self._worker_thread.join(timeout=TRANSCRIBE_TIMEOUT + 2)
|
|
87
|
+
self._recorder.set_on_speech_pause(None)
|
|
88
|
+
log.debug("Streaming session stopped, typed: '%s'", self._typed_text)
|
|
89
|
+
return self._typed_text
|
|
90
|
+
|
|
91
|
+
def _on_vad_pause(self) -> None:
|
|
92
|
+
"""Called from audio thread on speech pause. Signals worker."""
|
|
93
|
+
self._pending.set()
|
|
94
|
+
|
|
95
|
+
def _worker_loop(self) -> None:
|
|
96
|
+
"""Worker thread: wake on Event, transcribe, apply diff."""
|
|
97
|
+
while not self._stop.is_set():
|
|
98
|
+
self._pending.wait(timeout=1.0)
|
|
99
|
+
self._pending.clear()
|
|
100
|
+
if self._stop.is_set():
|
|
101
|
+
break
|
|
102
|
+
self._transcribe_and_apply()
|
|
103
|
+
|
|
104
|
+
# Final transcription on stop: allow full correction
|
|
105
|
+
self._transcribe_and_apply(min_seconds=0.5, final=True)
|
|
106
|
+
|
|
107
|
+
def _transcribe_and_apply(
|
|
108
|
+
self, min_seconds: float = 1.0, final: bool = False,
|
|
109
|
+
) -> None:
|
|
110
|
+
"""Get all audio so far, transcribe, apply correction."""
|
|
111
|
+
audio = self._recorder.get_audio_so_far()
|
|
112
|
+
if audio is None:
|
|
113
|
+
return
|
|
114
|
+
if len(audio) < self._recorder.sample_rate * min_seconds:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
text = self._transcriber.transcribe(audio)
|
|
119
|
+
except Exception:
|
|
120
|
+
log.exception("Streaming transcription failed")
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
if text:
|
|
124
|
+
self._apply_correction(text, final=final)
|
|
125
|
+
|
|
126
|
+
def _apply_correction(self, new_text: str, final: bool = False) -> None:
|
|
127
|
+
"""Apply correction to typed text.
|
|
128
|
+
|
|
129
|
+
With StreamingTyper (IBus): use preedit during streaming, commit on final.
|
|
130
|
+
Without: append-only via word-level matching, char-level diff on final.
|
|
131
|
+
"""
|
|
132
|
+
old = self._typed_text
|
|
133
|
+
|
|
134
|
+
# Preedit path: trivial, just replace the preview text
|
|
135
|
+
if isinstance(self._typer, StreamingTyper):
|
|
136
|
+
if final:
|
|
137
|
+
# Always commit: preedit is just preview, clipboard paste is delivery
|
|
138
|
+
self._typer.commit_text(new_text)
|
|
139
|
+
self._typed_text = new_text
|
|
140
|
+
log.debug("Preedit commit: '%s'", new_text[:60])
|
|
141
|
+
elif new_text != old:
|
|
142
|
+
self._typer.update_preedit(new_text)
|
|
143
|
+
self._typed_text = new_text
|
|
144
|
+
log.debug("Preedit update: '%s'", new_text[:60])
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
if new_text == old:
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
if final:
|
|
151
|
+
self._apply_final_correction(new_text)
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
if not old:
|
|
155
|
+
# First transcription, just type it
|
|
156
|
+
self._typer.type_text(new_text)
|
|
157
|
+
self._typed_text = new_text
|
|
158
|
+
log.debug("Initial: '%s'", new_text)
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
# Short text: allow full replacement (Whisper still warming up)
|
|
162
|
+
if len(old.split()) <= 2:
|
|
163
|
+
self._typer.delete_chars(len(old))
|
|
164
|
+
time.sleep(DELETE_SETTLE_SECS)
|
|
165
|
+
self._typer.type_text(new_text)
|
|
166
|
+
self._typed_text = new_text
|
|
167
|
+
log.debug("Replaced short text: '%s'", new_text)
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
# Word-level append-only
|
|
171
|
+
old_words = old.split()
|
|
172
|
+
new_words = new_text.split()
|
|
173
|
+
matched = _word_match_len(old_words, new_words)
|
|
174
|
+
|
|
175
|
+
if matched >= len(old_words) and len(new_words) > matched:
|
|
176
|
+
# All our typed words match, so append the new ones
|
|
177
|
+
new_tail = " ".join(new_words[matched:])
|
|
178
|
+
to_type = " " + new_tail
|
|
179
|
+
self._typer.type_text(to_type)
|
|
180
|
+
self._typed_text = old + to_type
|
|
181
|
+
log.debug("Appended: '%s'", to_type.strip())
|
|
182
|
+
else:
|
|
183
|
+
log.debug(
|
|
184
|
+
"Skipping (matched %d/%d words)",
|
|
185
|
+
matched, len(old_words),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _apply_final_correction(self, new_text: str) -> None:
|
|
189
|
+
"""Final pass: char-level diff to fix accumulated drift."""
|
|
190
|
+
old = self._typed_text
|
|
191
|
+
prefix_len = _common_prefix_len(old, new_text)
|
|
192
|
+
to_delete = len(old) - prefix_len
|
|
193
|
+
to_type = new_text[prefix_len:]
|
|
194
|
+
|
|
195
|
+
if to_delete > 0:
|
|
196
|
+
log.debug("Final correction: delete %d, type '%s'", to_delete, to_type)
|
|
197
|
+
self._typer.delete_chars(to_delete)
|
|
198
|
+
time.sleep(DELETE_SETTLE_SECS) # let deletions settle
|
|
199
|
+
if to_type:
|
|
200
|
+
self._typer.type_text(to_type)
|
|
201
|
+
|
|
202
|
+
self._typed_text = new_text
|
voiceio/transcriber.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Subprocess-isolated faster-whisper transcriber with crash recovery."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import base64
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from voiceio.config import ModelConfig
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
TRANSCRIBE_TIMEOUT = 30 # seconds
|
|
21
|
+
MAX_RESTARTS = 3
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Transcriber:
|
|
25
|
+
def __init__(self, cfg: ModelConfig):
|
|
26
|
+
self._cfg = cfg
|
|
27
|
+
self._proc: subprocess.Popen | None = None
|
|
28
|
+
self._lock = threading.Lock()
|
|
29
|
+
self._restarts = 0
|
|
30
|
+
self._start_worker()
|
|
31
|
+
|
|
32
|
+
def _start_worker(self) -> None:
|
|
33
|
+
log.info(
|
|
34
|
+
"Loading model '%s' (device=%s, compute_type=%s)...",
|
|
35
|
+
self._cfg.name, self._cfg.device, self._cfg.compute_type,
|
|
36
|
+
)
|
|
37
|
+
language = self._cfg.language if self._cfg.language != "auto" else None
|
|
38
|
+
|
|
39
|
+
args = json.dumps({
|
|
40
|
+
"model": self._cfg.name,
|
|
41
|
+
"device": self._cfg.device,
|
|
42
|
+
"compute_type": self._cfg.compute_type,
|
|
43
|
+
"language": language,
|
|
44
|
+
})
|
|
45
|
+
from voiceio.config import LOG_DIR
|
|
46
|
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
self._stderr_path = LOG_DIR / "worker.log"
|
|
48
|
+
self._stderr_file = open(self._stderr_path, "w")
|
|
49
|
+
self._proc = subprocess.Popen(
|
|
50
|
+
[sys.executable, "-m", "voiceio.worker", args],
|
|
51
|
+
stdin=subprocess.PIPE,
|
|
52
|
+
stdout=subprocess.PIPE,
|
|
53
|
+
stderr=self._stderr_file,
|
|
54
|
+
text=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
t0 = time.monotonic()
|
|
58
|
+
ready = self._proc.stdout.readline().strip()
|
|
59
|
+
if ready != "READY":
|
|
60
|
+
raise RuntimeError(f"Worker failed to start: {ready}")
|
|
61
|
+
elapsed = time.monotonic() - t0
|
|
62
|
+
log.info("Model ready (%.1fs)", elapsed)
|
|
63
|
+
|
|
64
|
+
def _ensure_worker(self) -> None:
|
|
65
|
+
"""Restart worker if it has died."""
|
|
66
|
+
if self._proc is not None and self._proc.poll() is None:
|
|
67
|
+
return
|
|
68
|
+
if hasattr(self, "_stderr_path") and self._stderr_path.exists():
|
|
69
|
+
try:
|
|
70
|
+
stderr = self._stderr_path.read_text().strip()
|
|
71
|
+
if stderr:
|
|
72
|
+
log.error("Worker stderr: %s", stderr[-500:])
|
|
73
|
+
except OSError:
|
|
74
|
+
pass
|
|
75
|
+
if self._restarts >= MAX_RESTARTS:
|
|
76
|
+
raise RuntimeError(f"Transcriber worker crashed {MAX_RESTARTS} times, giving up")
|
|
77
|
+
self._restarts += 1
|
|
78
|
+
log.warning("Worker died, restarting (attempt %d/%d)", self._restarts, MAX_RESTARTS)
|
|
79
|
+
self._start_worker()
|
|
80
|
+
|
|
81
|
+
def transcribe(self, audio: np.ndarray) -> str:
|
|
82
|
+
with self._lock:
|
|
83
|
+
self._ensure_worker()
|
|
84
|
+
|
|
85
|
+
duration = len(audio) / 16000
|
|
86
|
+
t0 = time.monotonic()
|
|
87
|
+
|
|
88
|
+
audio_b64 = base64.b64encode(audio.tobytes()).decode("ascii")
|
|
89
|
+
try:
|
|
90
|
+
self._proc.stdin.write(json.dumps({"audio_b64": audio_b64}) + "\n")
|
|
91
|
+
self._proc.stdin.flush()
|
|
92
|
+
except (BrokenPipeError, OSError):
|
|
93
|
+
log.warning("Worker pipe broken, restarting")
|
|
94
|
+
self._kill_worker()
|
|
95
|
+
self._ensure_worker()
|
|
96
|
+
self._proc.stdin.write(json.dumps({"audio_b64": audio_b64}) + "\n")
|
|
97
|
+
self._proc.stdin.flush()
|
|
98
|
+
|
|
99
|
+
# Read with timeout
|
|
100
|
+
result_line = self._read_with_timeout(TRANSCRIBE_TIMEOUT)
|
|
101
|
+
if result_line is None:
|
|
102
|
+
log.warning("Transcription timed out after %ds, restarting worker", TRANSCRIBE_TIMEOUT)
|
|
103
|
+
self._kill_worker()
|
|
104
|
+
self._ensure_worker()
|
|
105
|
+
return ""
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
result = json.loads(result_line)
|
|
109
|
+
except (json.JSONDecodeError, TypeError):
|
|
110
|
+
log.warning("Invalid response from worker: %s", repr(result_line)[:100])
|
|
111
|
+
return ""
|
|
112
|
+
text = result.get("text", "")
|
|
113
|
+
|
|
114
|
+
elapsed = time.monotonic() - t0
|
|
115
|
+
ratio = duration / elapsed if elapsed > 0 else 999
|
|
116
|
+
log.info(
|
|
117
|
+
"Transcribed %.1fs audio in %.1fs (%.1fx realtime): %s",
|
|
118
|
+
duration, elapsed, ratio, text or "(silence)",
|
|
119
|
+
)
|
|
120
|
+
# Reset restart counter on success
|
|
121
|
+
self._restarts = 0
|
|
122
|
+
return text
|
|
123
|
+
|
|
124
|
+
def _read_with_timeout(self, timeout: float) -> str | None:
|
|
125
|
+
"""Read a line from stdout with a timeout."""
|
|
126
|
+
result = [None]
|
|
127
|
+
|
|
128
|
+
def read():
|
|
129
|
+
try:
|
|
130
|
+
result[0] = self._proc.stdout.readline()
|
|
131
|
+
except (OSError, ValueError):
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
t = threading.Thread(target=read, daemon=True)
|
|
135
|
+
t.start()
|
|
136
|
+
t.join(timeout)
|
|
137
|
+
if t.is_alive():
|
|
138
|
+
return None
|
|
139
|
+
return result[0]
|
|
140
|
+
|
|
141
|
+
def _kill_worker(self) -> None:
|
|
142
|
+
if self._proc is not None:
|
|
143
|
+
try:
|
|
144
|
+
self._proc.terminate()
|
|
145
|
+
self._proc.wait(timeout=2)
|
|
146
|
+
except (subprocess.TimeoutExpired, OSError):
|
|
147
|
+
self._proc.kill()
|
|
148
|
+
self._proc = None
|
|
149
|
+
if hasattr(self, "_stderr_file") and self._stderr_file:
|
|
150
|
+
self._stderr_file.close()
|
|
151
|
+
self._stderr_file = None
|
|
152
|
+
|
|
153
|
+
def shutdown(self) -> None:
|
|
154
|
+
"""Graceful shutdown."""
|
|
155
|
+
if self._proc is not None and self._proc.poll() is None:
|
|
156
|
+
try:
|
|
157
|
+
self._proc.stdin.write("QUIT\n")
|
|
158
|
+
self._proc.stdin.flush()
|
|
159
|
+
self._proc.wait(timeout=2)
|
|
160
|
+
except (BrokenPipeError, OSError, subprocess.TimeoutExpired):
|
|
161
|
+
self._proc.terminate()
|
|
162
|
+
self._proc = None
|
|
163
|
+
|
|
164
|
+
def __del__(self):
|
|
165
|
+
self._kill_worker()
|
voiceio/tray.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
_icon = None
|
|
9
|
+
_thread = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _make_icon(color: str):
|
|
13
|
+
from PIL import Image, ImageDraw
|
|
14
|
+
|
|
15
|
+
img = Image.new("RGBA", (64, 64), (0, 0, 0, 0))
|
|
16
|
+
draw = ImageDraw.Draw(img)
|
|
17
|
+
fill = {"idle": (120, 120, 120, 255), "recording": (220, 40, 40, 255)}[color]
|
|
18
|
+
draw.ellipse([8, 8, 56, 56], fill=fill)
|
|
19
|
+
return img
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def start(quit_callback) -> None:
|
|
23
|
+
global _icon, _thread
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
import pystray
|
|
27
|
+
except ImportError:
|
|
28
|
+
log.warning("pystray not installed, tray icon disabled. Install with: pip install voiceio[tray]")
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
_icon = pystray.Icon(
|
|
32
|
+
"voiceio",
|
|
33
|
+
icon=_make_icon("idle"),
|
|
34
|
+
title="voiceio - idle",
|
|
35
|
+
menu=pystray.Menu(pystray.MenuItem("Quit", lambda: quit_callback())),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
_thread = threading.Thread(target=_icon.run, daemon=True)
|
|
39
|
+
_thread.start()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def set_recording(recording: bool) -> None:
|
|
43
|
+
if _icon is None:
|
|
44
|
+
return
|
|
45
|
+
state = "recording" if recording else "idle"
|
|
46
|
+
_icon.icon = _make_icon(state)
|
|
47
|
+
_icon.title = f"voiceio - {state}"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def stop() -> None:
|
|
51
|
+
global _icon
|
|
52
|
+
if _icon:
|
|
53
|
+
_icon.stop()
|
|
54
|
+
_icon = None
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Text injection backends."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from voiceio.platform import Platform
|
|
8
|
+
from voiceio.typers.base import TyperBackend
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def create_typer_backend(name: str, platform: Platform, **kwargs) -> TyperBackend:
|
|
12
|
+
"""Create a typer backend by name."""
|
|
13
|
+
if name == "xdotool":
|
|
14
|
+
from voiceio.typers.xdotool import XdotoolTyper
|
|
15
|
+
return XdotoolTyper()
|
|
16
|
+
if name == "ydotool":
|
|
17
|
+
from voiceio.typers.ydotool import YdotoolTyper
|
|
18
|
+
return YdotoolTyper()
|
|
19
|
+
if name == "wtype":
|
|
20
|
+
from voiceio.typers.wtype import WtypeTyper
|
|
21
|
+
return WtypeTyper()
|
|
22
|
+
if name == "clipboard":
|
|
23
|
+
from voiceio.typers.clipboard import ClipboardTyper
|
|
24
|
+
return ClipboardTyper()
|
|
25
|
+
if name == "pynput":
|
|
26
|
+
from voiceio.typers.pynput_type import PynputTyper
|
|
27
|
+
return PynputTyper()
|
|
28
|
+
if name == "ibus":
|
|
29
|
+
from voiceio.typers.ibus import IBusTyper
|
|
30
|
+
return IBusTyper(platform, **kwargs)
|
|
31
|
+
raise ValueError(f"Unknown typer backend: {name}")
|