python-voiceio 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voiceio/wizard.py ADDED
@@ -0,0 +1,882 @@
1
+ """Interactive setup wizard for voiceio."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+ import threading
9
+ import time
10
+ from pathlib import Path
11
+
12
+ from voiceio.config import CONFIG_DIR, CONFIG_PATH
13
+
14
+ # ── Colors ──────────────────────────────────────────────────────────────────
15
+
16
+ BOLD = "\033[1m"
17
+ DIM = "\033[2m"
18
+ RED = "\033[31m"
19
+ GREEN = "\033[32m"
20
+ YELLOW = "\033[33m"
21
+ CYAN = "\033[36m"
22
+ MAGENTA = "\033[35m"
23
+ RESET = "\033[0m"
24
+
25
+ LOGO = f"""{CYAN}{BOLD}
26
+ ██╗ ██╗ ██████╗ ██╗ ██████╗███████╗██╗ ██████╗
27
+ ██║ ██║██╔═══██╗██║██╔════╝██╔════╝██║██╔═══██╗
28
+ ██║ ██║██║ ██║██║██║ █████╗ ██║██║ ██║
29
+ ╚██╗ ██╔╝██║ ██║██║██║ ██╔══╝ ██║██║ ██║
30
+ ╚████╔╝ ╚██████╔╝██║╚██████╗███████╗██║╚██████╔╝
31
+ ╚═══╝ ╚═════╝ ╚═╝ ╚═════╝╚══════╝╚═╝ ╚═════╝
32
+ {RESET}{DIM} speak → text, locally, instantly{RESET}
33
+ """
34
+
35
+ MODELS = [
36
+ ("tiny", "75 MB", "Fastest, basic accuracy"),
37
+ ("base", "150 MB", "Fast, good accuracy (recommended)"),
38
+ ("small", "500 MB", "Moderate speed, better accuracy"),
39
+ ("medium", "1.5 GB", "Slower, great accuracy"),
40
+ ("large-v3", "3 GB", "Slowest, best accuracy"),
41
+ ]
42
+
43
+ LANGUAGES = [
44
+ ("en", "English"),
45
+ ("es", "Spanish"),
46
+ ("fr", "French"),
47
+ ("de", "German"),
48
+ ("pt", "Portuguese"),
49
+ ("zh", "Chinese"),
50
+ ("ja", "Japanese"),
51
+ ("auto", "Auto-detect (slower)"),
52
+ ]
53
+
54
+
55
+ def _print_step(n: int, total: int, title: str) -> None:
56
+ bar = f"{MAGENTA}[{n}/{total}]{RESET}"
57
+ print(f"\n{bar} {BOLD}{title}{RESET}")
58
+ print(f"{DIM}{'─' * 50}{RESET}")
59
+
60
+
61
+ def _ask(prompt: str, default: str = "") -> str:
62
+ default_hint = f" {DIM}[{default}]{RESET}" if default else ""
63
+ try:
64
+ answer = input(f" {CYAN}›{RESET} {prompt}{default_hint}: ").strip()
65
+ except (EOFError, KeyboardInterrupt):
66
+ print()
67
+ sys.exit(0)
68
+ return answer or default
69
+
70
+
71
+ def _ask_choice(options: list[tuple[str, ...]], default: int = 0) -> int:
72
+ """Interactive choice with arrow keys, w/s, j/k navigation. Enter to confirm."""
73
+ import atexit
74
+ import re
75
+ import select
76
+ import termios
77
+ import tty
78
+
79
+ # Ensure cursor is restored if process crashes/exits unexpectedly
80
+ atexit.register(lambda: sys.stdout.write("\033[?25h"))
81
+
82
+ selected = default
83
+ n = len(options)
84
+
85
+ try:
86
+ cols = os.get_terminal_size().columns
87
+ except OSError:
88
+ cols = 80
89
+
90
+ _ansi_re = re.compile(r"\033\[[0-9;]*m")
91
+
92
+ def _visible_len(s: str) -> int:
93
+ return len(_ansi_re.sub("", s))
94
+
95
+ def _truncate(s: str, max_width: int) -> str:
96
+ vis = 0
97
+ result = []
98
+ for part in _ansi_re.split(s):
99
+ if _ansi_re.match(part):
100
+ result.append(part)
101
+ else:
102
+ remaining = max_width - vis
103
+ result.append(part[:remaining])
104
+ vis += min(len(part), remaining)
105
+ if vis >= max_width:
106
+ break
107
+ return "".join(result) + RESET
108
+
109
+ def _format_line(i: int) -> str:
110
+ marker = f"{GREEN}●{RESET}" if i == selected else f"{DIM}○{RESET}"
111
+ label = options[i][0]
112
+ detail = f" {DIM}({', '.join(options[i][1:])}){RESET}" if len(options[i]) > 1 else ""
113
+ line = f" {marker} {BOLD}{i + 1}{RESET}. {label}{detail}"
114
+ if _visible_len(line) >= cols:
115
+ line = _truncate(line, cols - 2)
116
+ return line
117
+
118
+ # Hint text shown below options
119
+ hint = f" {DIM}\u2191\u2193 navigate, enter to confirm{RESET}"
120
+
121
+ def _draw() -> None:
122
+ """Draw menu from current cursor position.
123
+
124
+ After this call, cursor is at column 0 of the hint line.
125
+ """
126
+ sys.stdout.write("\033[J") # clear from cursor to end of screen
127
+ for i in range(n):
128
+ sys.stdout.write(f"{_format_line(i)}\r\n")
129
+ sys.stdout.write(f"{hint}\r")
130
+ sys.stdout.flush()
131
+
132
+ def _redraw() -> None:
133
+ """Move cursor from hint line back to first option, then redraw."""
134
+ sys.stdout.write(f"\033[{n}A")
135
+ _draw()
136
+
137
+ def _read_key(fd: int) -> str:
138
+ ch = os.read(fd, 1)
139
+ if ch == b"\x1b":
140
+ if select.select([fd], [], [], 0.05)[0]:
141
+ ch2 = os.read(fd, 1)
142
+ if ch2 == b"[" and select.select([fd], [], [], 0.05)[0]:
143
+ ch3 = os.read(fd, 1)
144
+ if ch3 == b"A":
145
+ return "up"
146
+ if ch3 == b"B":
147
+ return "down"
148
+ return "esc"
149
+ if ch in (b"\r", b"\n"):
150
+ return "enter"
151
+ if ch == b"\x03":
152
+ return "ctrl-c"
153
+ return ch.decode("utf-8", errors="replace")
154
+
155
+ sys.stdout.write("\033[?25l") # hide cursor
156
+ _draw()
157
+
158
+ fd = sys.stdin.fileno()
159
+ old = termios.tcgetattr(fd)
160
+ try:
161
+ tty.setraw(fd)
162
+ while True:
163
+ key = _read_key(fd)
164
+ if key in ("up", "w", "k"):
165
+ selected = (selected - 1) % n
166
+ elif key in ("down", "s", "j"):
167
+ selected = (selected + 1) % n
168
+ elif key == "enter":
169
+ break
170
+ elif key == "ctrl-c":
171
+ termios.tcsetattr(fd, termios.TCSADRAIN, old)
172
+ sys.stdout.write("\033[?25h\r\n")
173
+ sys.exit(0)
174
+ elif key.isdigit():
175
+ idx = int(key) - 1
176
+ if 0 <= idx < n:
177
+ selected = idx
178
+ else:
179
+ continue
180
+ _redraw()
181
+ finally:
182
+ termios.tcsetattr(fd, termios.TCSADRAIN, old)
183
+ termios.tcflush(fd, termios.TCIFLUSH) # flush stale input from raw mode
184
+
185
+ # Final redraw showing confirmed selection, then move past menu
186
+ _redraw()
187
+ sys.stdout.write("\033[?25h\r\n\n") # show cursor, past hint, blank line
188
+ sys.stdout.flush()
189
+ return selected
190
+
191
+
192
+ def _check_binary(name: str) -> bool:
193
+ return shutil.which(name) is not None
194
+
195
+
196
+ def _check_system() -> dict:
197
+ """Check system dependencies and capabilities."""
198
+ checks = {}
199
+
200
+ # Display server
201
+ session = os.environ.get("XDG_SESSION_TYPE", "unknown")
202
+ checks["display"] = session
203
+
204
+ # Typer binaries
205
+ checks["xdotool"] = _check_binary("xdotool")
206
+ checks["xclip"] = _check_binary("xclip")
207
+ checks["ydotool"] = _check_binary("ydotool")
208
+ checks["wtype"] = _check_binary("wtype")
209
+ checks["ibus"] = _check_binary("ibus")
210
+
211
+ # IBus Python bindings (check system Python, not venv)
212
+ checks["ibus_gi"] = False
213
+ if checks["ibus"]:
214
+ from voiceio.typers.ibus import _has_ibus_gi
215
+ checks["ibus_gi"] = _has_ibus_gi()
216
+
217
+ # Audio
218
+ try:
219
+ import sounddevice as sd
220
+ devices = sd.query_devices()
221
+ input_devs = [d for d in devices if d["max_input_channels"] > 0]
222
+ checks["audio"] = len(input_devs) > 0
223
+ checks["audio_devices"] = input_devs
224
+ except Exception:
225
+ checks["audio"] = False
226
+ checks["audio_devices"] = []
227
+
228
+ # GPU
229
+ try:
230
+ import ctranslate2
231
+ checks["cuda"] = "cuda" in ctranslate2.get_supported_compute_types("cuda")
232
+ except Exception:
233
+ checks["cuda"] = False
234
+
235
+ # Input group (for evdev)
236
+ groups = os.getgroups()
237
+ try:
238
+ import grp
239
+ input_gid = grp.getgrnam("input").gr_gid
240
+ checks["input_group"] = input_gid in groups
241
+ except (KeyError, ImportError):
242
+ checks["input_group"] = False
243
+
244
+ return checks
245
+
246
+
247
+ def _print_check(label: str, ok: bool, detail: str = "", optional: bool = False) -> None:
248
+ if ok:
249
+ icon = f"{GREEN}✓{RESET}"
250
+ elif optional:
251
+ icon = f"{YELLOW}○{RESET}"
252
+ else:
253
+ icon = f"{RED}✗{RESET}"
254
+ extra = f" {DIM}{detail}{RESET}" if detail else ""
255
+ print(f" {icon} {label}{extra}")
256
+
257
+
258
+ _cached_model = None
259
+ _cached_model_name: str | None = None
260
+
261
+
262
+ def _get_or_load_model(model_name: str | None = None):
263
+ """Get or load a whisper model. Caches in module global."""
264
+ global _cached_model, _cached_model_name
265
+
266
+ if model_name is None:
267
+ from voiceio.config import load
268
+ cfg = load()
269
+ model_name = cfg.model.name
270
+
271
+ if _cached_model is not None and _cached_model_name == model_name:
272
+ return _cached_model
273
+
274
+ from faster_whisper import WhisperModel
275
+ _cached_model = WhisperModel(model_name, device="cpu", compute_type="int8")
276
+ _cached_model_name = model_name
277
+ return _cached_model
278
+
279
+
280
+ def _download_model(model_name: str) -> bool:
281
+ """Download the whisper model with a progress display."""
282
+ print(f"\n {CYAN}Downloading model '{model_name}'...{RESET}")
283
+ print(f" {DIM}This only happens once. The model is cached locally.{RESET}\n")
284
+
285
+ try:
286
+ # Suppress HuggingFace "unauthenticated requests" warning during download
287
+ import logging
288
+ hf_logger = logging.getLogger("huggingface_hub")
289
+ prev_level = hf_logger.level
290
+ hf_logger.setLevel(logging.ERROR)
291
+ try:
292
+ _get_or_load_model(model_name)
293
+ finally:
294
+ hf_logger.setLevel(prev_level)
295
+ print(f"\n {GREEN}✓{RESET} Model '{model_name}' ready!")
296
+ return True
297
+ except Exception as e:
298
+ print(f"\n {RED}✗{RESET} Download failed: {e}")
299
+ return False
300
+
301
+
302
+ def _write_config(
303
+ model: str, language: str, hotkey: str, method: str, streaming: bool, backend: str,
304
+ sound_enabled: bool = True, notify_clipboard: bool = False,
305
+ ) -> None:
306
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
307
+ config_text = f"""# voiceio configuration, generated by setup wizard
308
+
309
+ [hotkey]
310
+ key = "{hotkey}"
311
+ backend = "{backend}"
312
+
313
+ [model]
314
+ name = "{model}"
315
+ language = "{language}"
316
+ device = "auto"
317
+ compute_type = "int8"
318
+
319
+ [audio]
320
+ sample_rate = 16000
321
+ device = "default"
322
+
323
+ [output]
324
+ method = "{method}"
325
+ streaming = {'true' if streaming else 'false'}
326
+
327
+ [feedback]
328
+ sound_enabled = {'true' if sound_enabled else 'false'}
329
+ notify_clipboard = {'true' if notify_clipboard else 'false'}
330
+
331
+ [tray]
332
+ enabled = false
333
+
334
+ [daemon]
335
+ log_level = "INFO"
336
+ """
337
+ CONFIG_PATH.write_text(config_text)
338
+ print(f"\n {GREEN}✓{RESET} Config saved to {DIM}{CONFIG_PATH}{RESET}")
339
+
340
+
341
+ def _setup_gnome_shortcut(hotkey: str) -> bool:
342
+ # Always use absolute path because GNOME doesn't know about venvs
343
+ toggle_path = None
344
+ # Check venv first (most common case)
345
+ venv_path = Path(sys.prefix) / "bin" / "voiceio-toggle"
346
+ if venv_path.exists():
347
+ toggle_path = str(venv_path.resolve())
348
+ else:
349
+ found = shutil.which("voiceio-toggle")
350
+ if found:
351
+ toggle_path = str(Path(found).resolve())
352
+
353
+ if not toggle_path:
354
+ print(f" {RED}✗{RESET} voiceio-toggle not found")
355
+ return False
356
+
357
+ print(f" {DIM}Command: {toggle_path}{RESET}")
358
+
359
+ schema = "org.gnome.settings-daemon.plugins.media-keys"
360
+ path = "/org/gnome/settings-daemon/plugins/media-keys/custom-keybindings/voiceio/"
361
+
362
+ # Convert "super+v" -> "<Super>v"
363
+ parts = hotkey.split("+")
364
+ gnome_combo = "".join(f"<{p.capitalize()}>" for p in parts[:-1]) + parts[-1]
365
+
366
+ try:
367
+ subprocess.run([
368
+ "gsettings", "set", f"{schema}.custom-keybinding:{path}",
369
+ "name", "voiceio toggle"
370
+ ], check=True, capture_output=True)
371
+ subprocess.run([
372
+ "gsettings", "set", f"{schema}.custom-keybinding:{path}",
373
+ "command", toggle_path
374
+ ], check=True, capture_output=True)
375
+ subprocess.run([
376
+ "gsettings", "set", f"{schema}.custom-keybinding:{path}",
377
+ "binding", gnome_combo
378
+ ], check=True, capture_output=True)
379
+
380
+ result = subprocess.run(
381
+ ["gsettings", "get", schema, "custom-keybindings"],
382
+ capture_output=True, text=True, check=True
383
+ )
384
+ current = result.stdout.strip()
385
+ if path not in current:
386
+ if current == "@as []":
387
+ new = f"['{path}']"
388
+ else:
389
+ new = current.rstrip("]") + f", '{path}']"
390
+ subprocess.run([
391
+ "gsettings", "set", schema, "custom-keybindings", new
392
+ ], check=True, capture_output=True)
393
+
394
+ return True
395
+ except Exception as e:
396
+ print(f" {RED}✗{RESET} Failed: {e}")
397
+ return False
398
+
399
+
400
+ def _streaming_test(model=None) -> None:
401
+ """Record audio and stream transcription results in real-time."""
402
+ import numpy as np
403
+ import sounddevice as sd
404
+
405
+ sample_rate = 16000
406
+ chunk_secs = 0.5
407
+ chunk_size = int(sample_rate * chunk_secs)
408
+ silence_threshold = 0.01
409
+ max_duration = 10
410
+
411
+ if model is None:
412
+ print(f"\n {CYAN}Loading model...{RESET}", end="", flush=True)
413
+ model = _get_or_load_model()
414
+ print(f"\r {GREEN}✓{RESET} Model loaded ")
415
+
416
+ from voiceio.config import load
417
+ cfg = load()
418
+ lang = cfg.model.language if cfg.model.language != "auto" else None
419
+
420
+ print(f"\n {YELLOW}Speak now!{RESET} (up to {max_duration}s, stops on 1.5s silence)")
421
+ print(f" {DIM}{'─' * 40}{RESET}")
422
+ sys.stdout.write(f" {BOLD}")
423
+ sys.stdout.flush()
424
+
425
+ audio_chunks: list[np.ndarray] = []
426
+ silent_time = 0.0
427
+ last_text = ""
428
+ last_text_len = 0 # characters printed on screen
429
+ recording = True
430
+
431
+ def callback(indata, frames, time_info, status):
432
+ nonlocal silent_time, recording
433
+ if not recording:
434
+ return
435
+ audio_chunks.append(indata.copy())
436
+ rms = np.sqrt(np.mean(indata ** 2))
437
+ if rms < silence_threshold:
438
+ silent_time += chunk_secs
439
+ else:
440
+ silent_time = 0.0
441
+
442
+ stream = sd.InputStream(
443
+ samplerate=sample_rate, channels=1, dtype="float32",
444
+ blocksize=chunk_size, callback=callback,
445
+ )
446
+ stream.start()
447
+ start_time = time.time()
448
+
449
+ try:
450
+ while recording:
451
+ time.sleep(0.8)
452
+ elapsed = time.time() - start_time
453
+
454
+ if elapsed > max_duration:
455
+ recording = False
456
+ break
457
+
458
+ # Stop on sustained silence (but only after we've heard something)
459
+ if silent_time >= 1.5 and len(audio_chunks) > 3:
460
+ recording = False
461
+ break
462
+
463
+ if not audio_chunks:
464
+ continue
465
+
466
+ audio = np.concatenate(audio_chunks, axis=0).flatten()
467
+ if len(audio) < sample_rate * 0.5:
468
+ continue
469
+
470
+ # Transcribe everything so far
471
+ segments, _ = model.transcribe(audio, language=lang, beam_size=5, vad_filter=True)
472
+ text = " ".join(seg.text.strip() for seg in segments).strip()
473
+
474
+ if text and text != last_text:
475
+ # Clear previous text and rewrite
476
+ if last_text_len > 0:
477
+ sys.stdout.write("\b" * last_text_len + " " * last_text_len + "\b" * last_text_len)
478
+ sys.stdout.write(text)
479
+ sys.stdout.flush()
480
+ last_text_len = len(text)
481
+ last_text = text
482
+ except KeyboardInterrupt:
483
+ pass
484
+ finally:
485
+ stream.stop()
486
+ stream.close()
487
+
488
+ # Final transcription of complete audio
489
+ if audio_chunks:
490
+ audio = np.concatenate(audio_chunks, axis=0).flatten()
491
+ if len(audio) >= sample_rate * 0.3:
492
+ segments, _ = model.transcribe(audio, language=lang, beam_size=5, vad_filter=True)
493
+ final_text = " ".join(seg.text.strip() for seg in segments).strip()
494
+ if final_text and final_text != last_text:
495
+ if last_text_len > 0:
496
+ sys.stdout.write("\b" * last_text_len + " " * last_text_len + "\b" * last_text_len)
497
+ sys.stdout.write(final_text)
498
+ last_text = final_text
499
+
500
+ sys.stdout.write(f"{RESET}\n")
501
+ print(f" {DIM}{'─' * 40}{RESET}")
502
+
503
+ if last_text:
504
+ print(f" {GREEN}✓{RESET} Transcribed successfully!")
505
+ else:
506
+ print(f" {YELLOW}⚠{RESET} No speech detected. Check your microphone.")
507
+
508
+
509
+ def _test_hotkey(hotkey: str, backend: str) -> bool:
510
+ """Start voiceio daemon briefly and test that the hotkey triggers."""
511
+ from voiceio.hotkeys.socket_backend import SOCKET_PATH
512
+
513
+ print(f"\n {CYAN}Testing hotkey: {BOLD}{hotkey}{RESET}")
514
+ print(f" {DIM}Backend: {backend}{RESET}")
515
+
516
+ if backend == "socket":
517
+ # For socket backend, we test if the GNOME shortcut triggers voiceio-toggle
518
+ # Start a temporary socket listener
519
+ import socket as sock
520
+
521
+ SOCKET_PATH.unlink(missing_ok=True)
522
+
523
+ s = sock.socket(sock.AF_UNIX, sock.SOCK_DGRAM)
524
+ s.bind(str(SOCKET_PATH))
525
+ s.settimeout(10.0)
526
+
527
+ print(f"\n {YELLOW}Press {BOLD}{hotkey}{RESET}{YELLOW} now (in any window)...{RESET}", end="", flush=True)
528
+
529
+ try:
530
+ data = s.recv(64)
531
+ if data == b"toggle":
532
+ print(f"\r {GREEN}✓{RESET} Hotkey works! ")
533
+ s.close()
534
+ SOCKET_PATH.unlink(missing_ok=True)
535
+ return True
536
+ except sock.timeout:
537
+ print(f"\r {RED}✗{RESET} No hotkey detected after 10s ")
538
+ s.close()
539
+ SOCKET_PATH.unlink(missing_ok=True)
540
+
541
+ # Diagnostic: test if voiceio-toggle itself works
542
+ print(f"\n {DIM}Diagnosing...{RESET}")
543
+ venv_toggle = Path(sys.prefix) / "bin" / "voiceio-toggle"
544
+ print(f" {DIM}Running voiceio-toggle directly...{RESET}", end="", flush=True)
545
+
546
+ # Re-bind socket for the direct test
547
+ s2 = sock.socket(sock.AF_UNIX, sock.SOCK_DGRAM)
548
+ s2.bind(str(SOCKET_PATH))
549
+ s2.settimeout(3.0)
550
+
551
+ try:
552
+ subprocess.run([str(venv_toggle)], capture_output=True, timeout=3)
553
+ data = s2.recv(64)
554
+ if data == b"toggle":
555
+ print(f" {GREEN}OK{RESET}")
556
+ print(f"\n {YELLOW}ℹ{RESET} voiceio-toggle works, but the DE shortcut didn't fire.")
557
+ print(" The GNOME shortcut may need a moment to register, or")
558
+ print(f" {BOLD}{hotkey}{RESET} may conflict with an existing shortcut.")
559
+ except Exception:
560
+ print(f" {RED}FAIL{RESET}")
561
+ print(f"\n {YELLOW}ℹ{RESET} voiceio-toggle itself failed. This is unexpected.")
562
+
563
+ s2.close()
564
+ SOCKET_PATH.unlink(missing_ok=True)
565
+ return False
566
+ else:
567
+ # For evdev/x11 backends, use the native listener
568
+ triggered = threading.Event()
569
+
570
+ def on_trigger():
571
+ triggered.set()
572
+
573
+ try:
574
+ from voiceio.hotkeys import chain as hotkey_chain
575
+ from voiceio import platform as plat
576
+ platform = plat.detect()
577
+ hk = hotkey_chain.select(platform, override=backend)
578
+ hk.start(hotkey, on_trigger)
579
+ stop = hk.stop
580
+ except Exception as e:
581
+ print(f"\n {RED}✗{RESET} Backend failed: {e}")
582
+ return False
583
+
584
+ print(f"\n {YELLOW}Press {BOLD}{hotkey}{RESET}{YELLOW} now...{RESET}", end="", flush=True)
585
+
586
+ ok = triggered.wait(timeout=10.0)
587
+ stop()
588
+
589
+ if ok:
590
+ print(f"\r {GREEN}✓{RESET} Hotkey works! ")
591
+ return True
592
+ else:
593
+ print(f"\r {RED}✗{RESET} No hotkey detected after 10s ")
594
+ return False
595
+
596
+
597
+ def run_test() -> None:
598
+ """Standalone test command: voiceio-test."""
599
+ print(f"{CYAN}{BOLD}voiceio test{RESET}\n")
600
+
601
+ options = [
602
+ ("Mic + streaming transcription", "Test your microphone and see real-time transcription"),
603
+ ("Hotkey test", "Verify your keyboard shortcut works"),
604
+ ("Full test", "Both of the above"),
605
+ ]
606
+
607
+ idx = _ask_choice(options, default=0)
608
+
609
+ if idx in (0, 2):
610
+ print(f"\n{BOLD}Mic test{RESET}")
611
+ print(f"{DIM}{'─' * 40}{RESET}")
612
+ _streaming_test()
613
+
614
+ if idx in (1, 2):
615
+ print(f"\n{BOLD}Hotkey test{RESET}")
616
+ print(f"{DIM}{'─' * 40}{RESET}")
617
+ from voiceio.config import load
618
+ cfg = load()
619
+ ok = _test_hotkey(cfg.hotkey.key, cfg.hotkey.backend)
620
+ if not ok:
621
+ print(f"\n {YELLOW}Troubleshooting:{RESET}")
622
+ print(f" {DIM}• On Wayland/GNOME: run {BOLD}voiceio --setup-shortcut{RESET}")
623
+ print(f" {DIM}• Or add shortcut manually: Settings → Keyboard → Custom Shortcuts{RESET}")
624
+ print(f" {DIM} Command: voiceio-toggle{RESET}")
625
+
626
+ print()
627
+
628
+
629
+ def run_wizard() -> None:
630
+ print(LOGO)
631
+
632
+ total_steps = 9
633
+
634
+ # ── Step 1: System check ────────────────────────────────────────────
635
+ _print_step(1, total_steps, "System check")
636
+ checks = _check_system()
637
+
638
+ _print_check("Display server", True, checks["display"])
639
+ _print_check("Audio input", checks["audio"],
640
+ f"{len(checks['audio_devices'])} device(s)" if checks["audio"] else "no devices found")
641
+
642
+ # IBus (preferred typer on Linux)
643
+ if checks["ibus"] and checks["ibus_gi"]:
644
+ _print_check("IBus", True, "recommended, atomic text insertion")
645
+ elif checks["ibus"]:
646
+ _print_check("IBus", False, "install bindings: sudo apt install gir1.2-ibus-1.0")
647
+ else:
648
+ _print_check("IBus", False, "install: sudo apt install ibus gir1.2-ibus-1.0")
649
+
650
+ # Fallback typers (optional)
651
+ if checks["display"] == "wayland":
652
+ _print_check("ydotool", checks["ydotool"],
653
+ "fallback" if checks["ydotool"] else "optional: sudo apt install ydotool",
654
+ optional=True)
655
+ _print_check("wtype", checks["wtype"],
656
+ "fallback" if checks["wtype"] else "optional: sudo apt install wtype",
657
+ optional=True)
658
+ else:
659
+ _print_check("xdotool", checks["xdotool"],
660
+ "fallback" if checks["xdotool"] else "optional: sudo apt install xdotool",
661
+ optional=True)
662
+
663
+ _print_check("CUDA GPU", checks["cuda"],
664
+ "will use GPU" if checks["cuda"] else "will use CPU (still fast)",
665
+ optional=True)
666
+
667
+ if checks["display"] == "wayland":
668
+ _print_check("Input group (evdev)", checks["input_group"],
669
+ "" if checks["input_group"] else "optional: sudo usermod -aG input $USER",
670
+ optional=True)
671
+
672
+ # Install CLI symlinks to ~/.local/bin/
673
+ from voiceio.service import install_symlinks, symlinks_installed, path_hint_needed, _is_pipx_install
674
+ if _is_pipx_install():
675
+ _print_check("CLI commands", True, "installed via pipx (already on PATH)")
676
+ elif not symlinks_installed():
677
+ linked = install_symlinks()
678
+ if linked:
679
+ _print_check("CLI commands", True, f"linked {len(linked)} commands to ~/.local/bin/")
680
+ if path_hint_needed():
681
+ print(f" {YELLOW}ℹ{RESET} {DIM}Restart your terminal for 'voiceio' to be on PATH{RESET}")
682
+ else:
683
+ _print_check("CLI commands", False, "could not create symlinks in ~/.local/bin/")
684
+ else:
685
+ _print_check("CLI commands", True, "voiceio in PATH")
686
+
687
+ if not checks["audio"]:
688
+ print(f"\n {RED}No microphone found. Connect one and try again.{RESET}")
689
+ sys.exit(1)
690
+
691
+ # Need at least one typer
692
+ has_typer = checks["ibus"] and checks["ibus_gi"]
693
+ has_typer = has_typer or checks["xdotool"] or checks["ydotool"] or checks["wtype"]
694
+ if not has_typer:
695
+ print(f"\n {RED}No text injection backend available.{RESET}")
696
+ print(f" {DIM}Install one: sudo apt install ibus gir1.2-ibus-1.0{RESET}")
697
+ sys.exit(1)
698
+
699
+ # ── Step 2: Choose model ────────────────────────────────────────────
700
+ _print_step(2, total_steps, "Choose a Whisper model")
701
+ print(f" {DIM}Larger models are more accurate but slower and use more RAM.{RESET}\n")
702
+ model_idx = _ask_choice(MODELS, default=1)
703
+ model_name = MODELS[model_idx][0]
704
+
705
+ # ── Step 3: Language ────────────────────────────────────────────────
706
+ _print_step(3, total_steps, "Language")
707
+ print(f" {DIM}Pick your primary language, or auto-detect.{RESET}\n")
708
+ lang_idx = _ask_choice(LANGUAGES, default=0)
709
+ language = LANGUAGES[lang_idx][0]
710
+
711
+ # ── Step 4: Hotkey ──────────────────────────────────────────────────
712
+ _print_step(4, total_steps, "Keyboard shortcut")
713
+ print(f" {DIM}This combo toggles recording on/off.{RESET}\n")
714
+ hotkey_options = [
715
+ ("ctrl+alt+v", "Ctrl + Alt + V (recommended)"),
716
+ ("alt+v", "Alt + V"),
717
+ ("ctrl+shift+v", "Ctrl + Shift + V"),
718
+ ("super+v", "Super + V (may not work on Wayland/GNOME)"),
719
+ ("Custom",),
720
+ ]
721
+ hk_idx = _ask_choice(hotkey_options, default=0)
722
+ if hk_idx == len(hotkey_options) - 1:
723
+ hotkey = _ask("Enter combo (e.g. ctrl+shift+r)", "super+v")
724
+ else:
725
+ hotkey = hotkey_options[hk_idx][0]
726
+
727
+ # Output method: auto selects best available (IBus preferred)
728
+ method = "auto"
729
+ if checks["ibus"] and checks["ibus_gi"]:
730
+ print(f"\n {GREEN}✓{RESET} {DIM}Text injection: IBus (best quality, auto-selected){RESET}")
731
+ # Install IBus component and add GNOME input source
732
+ from voiceio.typers.ibus import install_component, _ensure_gnome_input_source
733
+ if install_component():
734
+ print(f" {GREEN}✓{RESET} {DIM}IBus engine component installed{RESET}")
735
+ _ensure_gnome_input_source()
736
+ print(f" {GREEN}✓{RESET} {DIM}Added VoiceIO to GNOME input sources{RESET}")
737
+ else:
738
+ print(f" {YELLOW}⚠{RESET} {DIM}Could not install IBus component, will use fallback{RESET}")
739
+
740
+ # Backend
741
+ if checks["display"] == "wayland":
742
+ if checks["input_group"]:
743
+ backend = "evdev"
744
+ else:
745
+ backend = "socket"
746
+ else:
747
+ backend = "auto"
748
+
749
+ # ── Step 5: Feedback ───────────────────────────────────────────────
750
+ _print_step(5, total_steps, "Feedback")
751
+ print(f" {DIM}Sound plays when text is committed. Notifications show clipboard status.{RESET}\n")
752
+ feedback_options = [
753
+ ("Sound only", "short chime on commit"),
754
+ ("Sound + notification", "also shows a desktop notification"),
755
+ ("None", "silent"),
756
+ ]
757
+ fb_idx = _ask_choice(feedback_options, default=0)
758
+ sound_enabled = fb_idx in (0, 1)
759
+ notify_clipboard = fb_idx == 1
760
+
761
+ # ── Step 6: Download model ──────────────────────────────────────────
762
+ _print_step(6, total_steps, "Download model")
763
+ if not _download_model(model_name):
764
+ sys.exit(1)
765
+
766
+ # ── Step 7: Save config & set up shortcut ───────────────────────────
767
+ _print_step(7, total_steps, "Save config & shortcut")
768
+
769
+ _write_config(model_name, language, hotkey, method, streaming=True, backend=backend,
770
+ sound_enabled=sound_enabled, notify_clipboard=notify_clipboard)
771
+
772
+ # Set up DE shortcut if on GNOME + socket backend
773
+ desktop = os.environ.get("XDG_CURRENT_DESKTOP", "")
774
+ if "GNOME" in desktop and backend == "socket":
775
+ print(f"\n {CYAN}Setting up GNOME keyboard shortcut...{RESET}")
776
+ if _setup_gnome_shortcut(hotkey):
777
+ print(f" {GREEN}✓{RESET} Shortcut {BOLD}{hotkey}{RESET} → voiceio-toggle configured!")
778
+ else:
779
+ print(f" {YELLOW}⚠{RESET} Auto-setup failed. Add manually in Settings → Keyboard → Shortcuts:")
780
+ print(" Command: voiceio-toggle")
781
+ elif backend == "socket":
782
+ print(f"\n {YELLOW}ℹ{RESET} Add a keyboard shortcut manually in your DE settings:")
783
+ print(f" Shortcut: {BOLD}{hotkey}{RESET}")
784
+ print(f" Command: {BOLD}voiceio-toggle{RESET}")
785
+
786
+ # ── Step 8: Autostart ─────────────────────────────────────────────────
787
+ _print_step(8, total_steps, "Autostart")
788
+ from voiceio.service import has_systemd
789
+ autostart_idx = 1 # default: no autostart
790
+ if has_systemd():
791
+ print(f" {DIM}Install a systemd user service so voiceio starts on login{RESET}")
792
+ print(f" {DIM}and restarts automatically if it crashes.{RESET}\n")
793
+ autostart_options = [
794
+ ("Yes", "install & enable systemd service"),
795
+ ("No", "I'll start it manually"),
796
+ ]
797
+ autostart_idx = _ask_choice(autostart_options, default=0)
798
+ if autostart_idx == 0:
799
+ from voiceio.service import install_service
800
+ if install_service():
801
+ print(f" {GREEN}✓{RESET} Systemd service installed and enabled")
802
+ print(f" {DIM}voiceio will start automatically on next login{RESET}")
803
+ else:
804
+ print(f" {YELLOW}⚠{RESET} Could not install systemd service")
805
+ print(f" {DIM}Start manually with: voiceio{RESET}")
806
+ else:
807
+ print(f" {DIM}systemd not available, skipping autostart setup{RESET}")
808
+ print(f" {DIM}Start manually with: voiceio{RESET}")
809
+
810
+ # ── Step 9: Test ────────────────────────────────────────────────────
811
+ _print_step(9, total_steps, "Test")
812
+
813
+ # Hotkey test
814
+ print(f" {DIM}Let's verify your shortcut works.{RESET}")
815
+ hotkey_ok = _test_hotkey(hotkey, backend)
816
+
817
+ if not hotkey_ok:
818
+ print(f"\n {YELLOW}Troubleshooting:{RESET}")
819
+ if backend == "socket" and "GNOME" in desktop:
820
+ print(f" {DIM}• The GNOME shortcut may need a moment to register.{RESET}")
821
+ print(f" {DIM}• Try: Settings → Keyboard → Custom Shortcuts to verify.{RESET}")
822
+ print(f" {DIM}• Shortcut command should be: {BOLD}voiceio-toggle{RESET}")
823
+ retry = _ask("Try a different shortcut? (y/n)", "y")
824
+ if retry.lower() in ("y", "yes"):
825
+ print()
826
+ hk_idx = _ask_choice(hotkey_options, default=1)
827
+ if hk_idx == len(hotkey_options) - 1:
828
+ hotkey = _ask("Enter combo (e.g. ctrl+shift+r)", "ctrl+shift+v")
829
+ else:
830
+ hotkey = hotkey_options[hk_idx][0]
831
+
832
+ # Re-save config and shortcut with new hotkey
833
+ _write_config(model_name, language, hotkey, method, streaming=True, backend=backend,
834
+ sound_enabled=sound_enabled, notify_clipboard=notify_clipboard)
835
+ if "GNOME" in desktop and backend == "socket":
836
+ _setup_gnome_shortcut(hotkey)
837
+
838
+ hotkey_ok = _test_hotkey(hotkey, backend)
839
+
840
+ # Mic + streaming test
841
+ print(f"\n{'─' * 50}")
842
+ test = _ask("Run a streaming mic test? (y/n)", "y")
843
+ if test.lower() in ("y", "yes", ""):
844
+ _streaming_test(model=_get_or_load_model())
845
+
846
+ # ── Done ────────────────────────────────────────────────────────────
847
+ # Restart service if it was already running (setup may have killed the
848
+ # IBus engine via `ibus restart`)
849
+ from voiceio.service import is_running
850
+ if is_running():
851
+ subprocess.run(
852
+ ["systemctl", "--user", "restart", "voiceio.service"],
853
+ capture_output=True, timeout=5,
854
+ )
855
+ print(f" {GREEN}✓{RESET} {DIM}Restarted voiceio service{RESET}")
856
+
857
+ from voiceio.config import LOG_PATH
858
+ log_path = LOG_PATH
859
+ start_hint = (
860
+ f" It will start automatically on next login.\n"
861
+ f" Or start now:\n"
862
+ f" {CYAN}systemctl --user start voiceio{RESET}"
863
+ if autostart_idx == 0
864
+ else f" Start voiceio:\n {CYAN}voiceio{RESET}"
865
+ )
866
+ print(f"""
867
+ {GREEN}{'━' * 50}{RESET}
868
+ {BOLD} Setup complete!{RESET}
869
+
870
+ {start_hint}
871
+
872
+ Press {BOLD}{hotkey}{RESET} to toggle recording.
873
+ Speak naturally, and text streams at your cursor.
874
+
875
+ Useful commands:
876
+ {CYAN}voiceio doctor{RESET} check system health
877
+ {CYAN}voiceio test{RESET} test mic + hotkey
878
+
879
+ Config: {DIM}{CONFIG_PATH}{RESET}
880
+ Logs: {DIM}{log_path}{RESET}
881
+ {GREEN}{'━' * 50}{RESET}
882
+ """)