python-voiceio 0.2.3__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {python_voiceio-0.2.3/python_voiceio.egg-info → python_voiceio-0.3.0}/PKG-INFO +93 -79
  2. python_voiceio-0.3.0/README.md +219 -0
  3. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/pyproject.toml +16 -2
  4. {python_voiceio-0.2.3 → python_voiceio-0.3.0/python_voiceio.egg-info}/PKG-INFO +93 -79
  5. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/SOURCES.txt +44 -1
  6. python_voiceio-0.3.0/python_voiceio.egg-info/requires.txt +41 -0
  7. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_app_wiring.py +92 -25
  8. python_voiceio-0.3.0/tests/test_autocorrect.py +264 -0
  9. python_voiceio-0.3.0/tests/test_clipboard_read.py +91 -0
  10. python_voiceio-0.3.0/tests/test_commands.py +173 -0
  11. python_voiceio-0.3.0/tests/test_corrections.py +148 -0
  12. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_fallback.py +10 -0
  13. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_health.py +6 -6
  14. python_voiceio-0.3.0/tests/test_hints.py +74 -0
  15. python_voiceio-0.3.0/tests/test_history.py +90 -0
  16. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_ibus_typer.py +7 -0
  17. python_voiceio-0.3.0/tests/test_llm.py +217 -0
  18. python_voiceio-0.3.0/tests/test_llm_api.py +118 -0
  19. python_voiceio-0.3.0/tests/test_numbers.py +101 -0
  20. python_voiceio-0.3.0/tests/test_postprocess.py +107 -0
  21. python_voiceio-0.3.0/tests/test_prompt.py +111 -0
  22. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_streaming.py +2 -2
  23. python_voiceio-0.3.0/tests/test_tts.py +158 -0
  24. python_voiceio-0.3.0/tests/test_vad.py +118 -0
  25. python_voiceio-0.3.0/tests/test_vocabulary.py +71 -0
  26. python_voiceio-0.3.0/tests/test_wordfreq.py +80 -0
  27. python_voiceio-0.3.0/voiceio/__init__.py +1 -0
  28. python_voiceio-0.3.0/voiceio/app.py +655 -0
  29. python_voiceio-0.3.0/voiceio/autocorrect.py +284 -0
  30. python_voiceio-0.3.0/voiceio/cli.py +1094 -0
  31. python_voiceio-0.3.0/voiceio/clipboard_read.py +69 -0
  32. python_voiceio-0.3.0/voiceio/commands.py +130 -0
  33. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/config.py +58 -2
  34. python_voiceio-0.3.0/voiceio/corrections.py +160 -0
  35. python_voiceio-0.3.0/voiceio/demo.py +199 -0
  36. python_voiceio-0.3.0/voiceio/feedback.py +162 -0
  37. python_voiceio-0.3.0/voiceio/health.py +408 -0
  38. python_voiceio-0.3.0/voiceio/hints.py +58 -0
  39. python_voiceio-0.3.0/voiceio/history.py +64 -0
  40. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/chain.py +1 -0
  41. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/pynput_backend.py +23 -0
  42. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/socket_backend.py +35 -12
  43. python_voiceio-0.3.0/voiceio/llm.py +258 -0
  44. python_voiceio-0.3.0/voiceio/llm_api.py +130 -0
  45. python_voiceio-0.3.0/voiceio/models/silero_vad.onnx +0 -0
  46. python_voiceio-0.3.0/voiceio/numbers.py +228 -0
  47. python_voiceio-0.3.0/voiceio/pidlock.py +22 -0
  48. python_voiceio-0.3.0/voiceio/platform.py +272 -0
  49. python_voiceio-0.3.0/voiceio/postprocess.py +84 -0
  50. python_voiceio-0.3.0/voiceio/prompt.py +73 -0
  51. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/recorder.py +53 -13
  52. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/service.py +73 -9
  53. python_voiceio-0.3.0/voiceio/sounds/__init__.py +0 -0
  54. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/sounds/commit.wav +0 -0
  55. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/sounds/start.wav +0 -0
  56. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/sounds/stop.wav +0 -0
  57. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/streaming.py +84 -19
  58. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/transcriber.py +14 -2
  59. python_voiceio-0.3.0/voiceio/tray/__init__.py +277 -0
  60. python_voiceio-0.3.0/voiceio/tray/_icons.py +125 -0
  61. python_voiceio-0.3.0/voiceio/tray/_indicator.py +181 -0
  62. python_voiceio-0.3.0/voiceio/tray/_pystray.py +123 -0
  63. python_voiceio-0.3.0/voiceio/tts/__init__.py +11 -0
  64. python_voiceio-0.3.0/voiceio/tts/base.py +29 -0
  65. python_voiceio-0.3.0/voiceio/tts/chain.py +79 -0
  66. python_voiceio-0.3.0/voiceio/tts/edge_engine.py +74 -0
  67. python_voiceio-0.3.0/voiceio/tts/espeak.py +47 -0
  68. python_voiceio-0.3.0/voiceio/tts/piper_engine.py +90 -0
  69. python_voiceio-0.3.0/voiceio/tts/player.py +62 -0
  70. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/chain.py +1 -0
  71. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/clipboard.py +49 -6
  72. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/ibus.py +3 -2
  73. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/pynput_type.py +9 -0
  74. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/wtype.py +2 -1
  75. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/xdotool.py +2 -1
  76. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/ydotool.py +2 -1
  77. python_voiceio-0.3.0/voiceio/vad.py +122 -0
  78. python_voiceio-0.3.0/voiceio/vocabulary.py +59 -0
  79. python_voiceio-0.3.0/voiceio/wizard.py +1463 -0
  80. python_voiceio-0.3.0/voiceio/wordfreq.py +69 -0
  81. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/worker.py +16 -1
  82. python_voiceio-0.2.3/README.md +0 -223
  83. python_voiceio-0.2.3/python_voiceio.egg-info/requires.txt +0 -20
  84. python_voiceio-0.2.3/voiceio/__init__.py +0 -1
  85. python_voiceio-0.2.3/voiceio/app.py +0 -415
  86. python_voiceio-0.2.3/voiceio/cli.py +0 -476
  87. python_voiceio-0.2.3/voiceio/feedback.py +0 -78
  88. python_voiceio-0.2.3/voiceio/health.py +0 -194
  89. python_voiceio-0.2.3/voiceio/platform.py +0 -139
  90. python_voiceio-0.2.3/voiceio/tray.py +0 -54
  91. python_voiceio-0.2.3/voiceio/wizard.py +0 -883
  92. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/LICENSE +0 -0
  93. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/dependency_links.txt +0 -0
  94. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/entry_points.txt +0 -0
  95. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/top_level.txt +0 -0
  96. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/setup.cfg +0 -0
  97. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_backend_probes.py +0 -0
  98. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_config.py +0 -0
  99. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_platform.py +0 -0
  100. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_prebuffer.py +0 -0
  101. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_recorder_integration.py +0 -0
  102. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_transcriber.py +0 -0
  103. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/__main__.py +0 -0
  104. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/backends.py +0 -0
  105. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/__init__.py +0 -0
  106. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/base.py +0 -0
  107. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/evdev.py +0 -0
  108. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/ibus/__init__.py +0 -0
  109. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/ibus/engine.py +0 -0
  110. {python_voiceio-0.2.3/voiceio/sounds → python_voiceio-0.3.0/voiceio/models}/__init__.py +0 -0
  111. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/__init__.py +0 -0
  112. {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/base.py +0 -0
@@ -1,16 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-voiceio
3
- Version: 0.2.3
3
+ Version: 0.3.0
4
4
  Summary: Speak → text, locally, instantly.
5
5
  Author: Hugo Montenegro
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/Hugo0/voiceio
8
+ Project-URL: Repository, https://github.com/Hugo0/voiceio
8
9
  Project-URL: Issues, https://github.com/Hugo0/voiceio/issues
9
- Keywords: voice,speech-to-text,whisper,linux,dictation,wayland,ibus
10
+ Project-URL: Changelog, https://github.com/Hugo0/voiceio/releases
11
+ Keywords: voice,speech-to-text,whisper,linux,windows,dictation,wayland,ibus
10
12
  Classifier: Development Status :: 4 - Beta
11
13
  Classifier: Environment :: X11 Applications
12
14
  Classifier: Intended Audience :: End Users/Desktop
13
15
  Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Operating System :: Microsoft :: Windows
17
+ Classifier: Operating System :: MacOS
14
18
  Classifier: Programming Language :: Python :: 3
15
19
  Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
16
20
  Requires-Python: >=3.11
@@ -19,14 +23,28 @@ License-File: LICENSE
19
23
  Requires-Dist: faster-whisper>=1.0.0
20
24
  Requires-Dist: sounddevice>=0.4.6
21
25
  Requires-Dist: numpy>=1.24.0
26
+ Requires-Dist: onnxruntime>=1.16.0
27
+ Requires-Dist: wordfreq>=3.0
22
28
  Requires-Dist: evdev>=1.6.0; sys_platform == "linux"
29
+ Requires-Dist: pynput>=1.7.6; sys_platform == "win32"
30
+ Requires-Dist: pynput>=1.7.6; sys_platform == "darwin"
31
+ Requires-Dist: pyperclip>=1.8.0; sys_platform == "win32"
32
+ Requires-Dist: win11toast>=0.36; sys_platform == "win32"
23
33
  Provides-Extra: x11
24
34
  Requires-Dist: pynput>=1.7.6; extra == "x11"
25
35
  Provides-Extra: mac
26
36
  Requires-Dist: pynput>=1.7.6; extra == "mac"
37
+ Provides-Extra: win
38
+ Requires-Dist: pynput>=1.7.6; extra == "win"
39
+ Requires-Dist: pyperclip>=1.8.0; extra == "win"
40
+ Requires-Dist: win11toast>=0.36; extra == "win"
27
41
  Provides-Extra: tray
28
42
  Requires-Dist: pystray>=0.19; extra == "tray"
29
43
  Requires-Dist: Pillow>=10.0; extra == "tray"
44
+ Provides-Extra: tts
45
+ Requires-Dist: piper-tts>=1.2.0; extra == "tts"
46
+ Provides-Extra: tts-cloud
47
+ Requires-Dist: edge-tts>=6.1.0; extra == "tts-cloud"
30
48
  Provides-Extra: dev
31
49
  Requires-Dist: pytest>=7.0; extra == "dev"
32
50
  Requires-Dist: pytest-mock; extra == "dev"
@@ -41,15 +59,6 @@ Dynamic: license-file
41
59
 
42
60
  Speak → text, locally, instantly.
43
61
 
44
- <!-- demo video -->
45
- <p align="center">
46
- <a href="https://www.tella.tv/video/YOUR_VIDEO_ID">
47
- <img src="https://github.com/Hugo0/voiceio/raw/main/assets/demo-thumbnail.png" alt="voiceio demo" width="600">
48
- </a>
49
- <br>
50
- <em>Click to watch the demo</em>
51
- </p>
52
-
53
62
  ## Quick start
54
63
 
55
64
  ```bash
@@ -85,6 +94,31 @@ voiceio setup
85
94
  ```
86
95
  </details>
87
96
 
97
+ <details>
98
+ <summary><strong>Windows</strong></summary>
99
+
100
+ ```powershell
101
+ # Option A: Install with pip (requires Python 3.11+)
102
+ pip install python-voiceio
103
+ voiceio setup
104
+
105
+ # Option B: Download the installer from GitHub Releases (no Python needed)
106
+ # https://github.com/Hugo0/voiceio/releases
107
+ # Also available as a portable .zip if you prefer no installation.
108
+ ```
109
+
110
+ Windows uses pynput for hotkeys and text injection. No extra system dependencies required.
111
+ </details>
112
+
113
+ <details>
114
+ <summary><strong>macOS</strong></summary>
115
+
116
+ ```bash
117
+ pipx install python-voiceio
118
+ voiceio setup
119
+ ```
120
+ </details>
121
+
88
122
  <details>
89
123
  <summary><strong>Build from source</strong></summary>
90
124
 
@@ -93,12 +127,16 @@ If you want the source code locally to hack on or customize for personal use. PR
93
127
  ```bash
94
128
  git clone https://github.com/Hugo0/voiceio
95
129
  cd voiceio
96
- pip install -e ".[linux,dev]"
97
- voiceio setup
130
+ uv pip install -e ".[linux,dev]"
131
+
132
+ # Bootstrap CLI commands onto PATH (creates ~/.local/bin/voiceio)
133
+ uv run voiceio setup
98
134
  ```
135
+
136
+ > **Note:** Source installs live inside a virtualenv, so `voiceio` isn't on PATH until setup creates symlinks in `~/.local/bin/`. If `voiceio` isn't found after setup, restart your terminal or run `export PATH="$HOME/.local/bin:$PATH"`.
99
137
  </details>
100
138
 
101
- > You can also install with `uv tool install voiceio` or `pip install voiceio`.
139
+ > You can also install with `uv tool install python-voiceio` or `pip install python-voiceio`.
102
140
 
103
141
  ## How it works
104
142
 
@@ -107,11 +145,7 @@ hotkey → mic capture → whisper (local) → text at cursor
107
145
  pre-buffered streaming IBus / clipboard
108
146
  ```
109
147
 
110
- 1. Press your hotkey: voiceio starts recording (with a 1-second pre-buffer, so it catches the beginning even if you start speaking before pressing)
111
- 2. Speak naturally: text streams into the focused app in real-time as an underlined preview
112
- 3. Press the hotkey again: the final transcription replaces the preview and is committed
113
-
114
- Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Text is injected through IBus (works in any GTK/Qt app: browsers, Telegram, editors) with an automatic clipboard fallback for terminals.
148
+ Press your hotkey to start recording (1s pre-buffer catches the first syllable). Text streams into the focused app as an underlined preview. Press again to commit. Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper), text is injected through IBus (any GTK/Qt app) with clipboard fallback for terminals.
115
149
 
116
150
  ## Features
117
151
 
@@ -143,14 +177,18 @@ voiceio setup Interactive setup wizard
143
177
  voiceio doctor Health check (--fix to auto-repair)
144
178
  voiceio test Test microphone + live transcription
145
179
  voiceio toggle Toggle recording on a running daemon
146
- voiceio service install Autostart on login via systemd
180
+ voiceio update Update to latest version
181
+ voiceio service install Autostart on login (systemd / Windows Startup)
147
182
  voiceio logs View recent logs
148
183
  voiceio uninstall Remove all system integrations
149
184
  ```
150
185
 
151
186
  ## Configuration
152
187
 
153
- `voiceio setup` handles everything interactively. To tweak later, edit `~/.config/voiceio/config.toml` or override at runtime:
188
+ `voiceio setup` handles everything interactively. To tweak later, edit the config file or override at runtime:
189
+
190
+ - Linux/macOS: `~/.config/voiceio/config.toml`
191
+ - Windows: `%LOCALAPPDATA%\voiceio\config\config.toml`
154
192
 
155
193
  ```bash
156
194
  voiceio --model large-v3 --language auto -v
@@ -172,7 +210,9 @@ voiceio logs # check debug output
172
210
  | Hotkey doesn't work on Wayland | `sudo usermod -aG input $USER` then log out and back in |
173
211
  | Transcription too slow | Use a smaller model: `voiceio --model tiny` |
174
212
  | Want to start fresh | `voiceio uninstall` then `voiceio setup` |
175
- | Doesn't work on MacOS | I haven't added proper support for apple yet. either use https://aquavoice.com/ or make a PR |
213
+ | Windows: antivirus blocks hotkeys | pynput uses global keyboard hooks add an exception for voiceio |
214
+ | Windows: no sound feedback | Check `voiceio logs` for audio device info |
215
+ | macOS issues | Experimental — consider [aquavoice.com](https://aquavoice.com/) or contribute a PR |
176
216
 
177
217
  ## Platform support
178
218
 
@@ -183,6 +223,7 @@ voiceio logs # check debug output
183
223
  | Fedora (GNOME) | Supported | IBus | evdev / GNOME shortcut | Yes |
184
224
  | Arch Linux | Supported | IBus | evdev | Yes |
185
225
  | KDE / Sway / Hyprland | Should work | IBus / ydotool / wtype | evdev | Yes |
226
+ | Windows 10/11 | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
186
227
  | macOS | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
187
228
 
188
229
  voiceio auto-detects your platform and picks the best available backends. Run `voiceio doctor` to see what's working on your system.
@@ -194,63 +235,36 @@ voiceio uninstall # removes service, IBus, shortcuts, symlinks
194
235
  pipx uninstall python-voiceio # removes the package
195
236
  ```
196
237
 
197
- ## TODO
198
-
199
- **Launch**
200
- - [ ] Publish to PyPI
201
- - [ ] Record demo video + thumbnail
202
- - [ ] Test clean install on a fresh VM/container
203
- - [ ] GitHub repo: description, topics, social preview image
204
- - [ ] Bump version to 0.2.0
205
-
206
- **Code quality**
207
- - [ ] IBus activation on non-GNOME desktops (KDE, Sway, Hyprland), currently GNOME-only via gsettings
208
- - [ ] `voiceio doctor --json` for machine-readable output
209
- - [ ] Shell completions (`voiceio completion bash/zsh/fish`)
210
- - [ ] Refactor wizard.py (882 lines) into smaller, testable modules
211
- - [ ] Socket protocol versioning (e.g. `v1:preedit:text`)
212
- - [ ] Configurable log file path
213
-
214
- ## Wishlist
215
-
216
- Contributions welcome! Open an issue to discuss before starting.
217
-
218
- **High impact**
219
- - [ ] **Text-to-speech (voice output)**: select text, press a hotkey, hear it spoken aloud. Completes the "io" in voiceio. Use a local TTS engine (Piper, Coqui, espeak-ng), same philosophy: no cloud, no API keys
220
- - [ ] **Wake word**: "Hey voiceio" hands-free activation (no hotkey needed). Use a small always-on keyword model (e.g. openWakeWord, Porcupine)
221
- - [ ] **Custom vocabulary / hot words**: user-defined word list for names, jargon, technical terms that Whisper gets wrong. Boost via `initial_prompt` or fine-tuned logit bias
222
- - [ ] **Per-app profiles**: different language/model/output settings per application (e.g. formal writing in docs, casual in chat)
223
- - [ ] **Voice commands**: "select all", "new line", "undo that", "delete last sentence". Parse transcribed text for command patterns before injecting
224
- - [ ] **Punctuation & formatting commands**: "period", "comma", "new paragraph", "capitalize that"
225
- - [ ] **Auto-punctuation model**: post-process Whisper output with a small punctuation/capitalization model for cleaner text
226
-
227
- **Platform expansion**
228
- - [ ] **macOS Input Method (IMKit)**: native streaming preedit on macOS, matching IBus quality on Linux
229
- - [ ] **Windows support**: Text Services Framework (TSF) for text injection, global hotkeys via win32api
230
- - [ ] **Flatpak / Snap packaging**: sandboxed distribution for Linux
231
- - [ ] **AUR package**: community package for Arch Linux
232
-
233
- **UX polish**
234
- - [ ] **System tray icon with recording animation**: pulsing/colored icon showing recording state, quick menu for model/language switching
235
- - [ ] **Desktop notifications with transcribed text**: show what was typed, with an undo button
236
- - [ ] **Confidence indicator**: visual hint when Whisper is uncertain (maybe highlight low-confidence words)
237
- - [ ] **Recording timeout**: auto-stop after N seconds of silence or max duration, preventing forgotten recordings
238
- - [ ] **Sound themes**: bundled sound packs (subtle, mechanical, sci-fi, none)
239
- - [ ] **First-run onboarding overlay**: lightweight "press Ctrl+Alt+V to start" hint on first launch
240
-
241
- **Power features**
242
- - [ ] **Multi-language in one session**: auto-detect language switches mid-dictation (Whisper supports this but needs tuning)
243
- - [ ] **Speaker diarization**: "Person 1: ... Person 2: ..." for meeting notes (via pyannote or whisperX)
244
- - [ ] **LLM post-processing**: pipe transcription through a local LLM (Ollama) for grammar correction, summarization, or reformatting
245
- - [ ] **Clipboard history**: keep last N transcriptions, quick-paste from history
246
- - [ ] **Transcription log / journal**: searchable history of everything you've dictated, with timestamps
247
- - [ ] **API / webhook**: expose a local API so other tools can trigger recording or receive transcriptions
248
- - [ ] **Browser extension**: inject text into web apps that don't work with IBus (e.g. some Electron apps)
249
-
250
- **Developer experience**
251
- - [ ] **Plugin system**: hooks for pre/post processing (e.g. custom formatters, translators, text transforms)
252
- - [ ] **Alternative STT backends**: support Whisper.cpp, Deepgram, AssemblyAI, OpenAI Whisper API as optional backends
253
- - [ ] **GPU acceleration docs**: CUDA/ROCm setup guide for faster transcription on large models
238
+ ## Roadmap
239
+
240
+ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](https://github.com/Hugo0/voiceio/issues).
241
+
242
+ **Now**
243
+ - [ ] macOS polish (IMKit for native preedit, Accessibility API for text injection)
244
+
245
+ **Soon**
246
+ - [ ] Per-app context awareness (detect focused app, adapt formatting/behavior)
247
+ - [ ] File/audio transcription mode (`voiceio transcribe recording.mp3`)
248
+
249
+ **Backlog**
250
+ - [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
251
+ - [ ] Echo cancellation (filter system audio for meeting use)
252
+ - [ ] Wake word activation ("Hey voiceio")
253
+ - [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
254
+
255
+ **Done**
256
+ - [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
257
+ - [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
258
+ - [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
259
+ - [x] Transcription history — searchable log of everything you've dictated
260
+ - [x] Number-to-digit conversion ("three hundred forty two" "342")
261
+ - [x] VAD-based silence filtering (Silero VAD, prevents Whisper hallucinations)
262
+ - [x] Voice commands "new line", "new paragraph", "scratch that", punctuation by name
263
+ - [x] Custom vocabulary / personal dictionary (bias Whisper via `initial_prompt`)
264
+ - [x] Smart punctuation & capitalization post-processing
265
+ - [x] Windows support
266
+ - [x] System tray icon with animated states
267
+ - [x] Auto-stop on silence
254
268
 
255
269
  ## License
256
270
 
@@ -0,0 +1,219 @@
1
+ # voiceio
2
+
3
+ [![CI](https://github.com/Hugo0/voiceio/actions/workflows/ci.yml/badge.svg)](https://github.com/Hugo0/voiceio/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/python-voiceio)](https://pypi.org/project/python-voiceio/)
5
+ [![Python](https://img.shields.io/pypi/pyversions/python-voiceio)](https://pypi.org/project/python-voiceio/)
6
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
7
+
8
+ Speak → text, locally, instantly.
9
+
10
+ ## Quick start
11
+
12
+ ```bash
13
+ # 1. Install system dependencies (Ubuntu/Debian)
14
+ sudo apt install pipx ibus gir1.2-ibus-1.0 python3-gi portaudio19-dev
15
+
16
+ # 2. Install voiceio
17
+ pipx install python-voiceio
18
+
19
+ # 3. Run the setup wizard
20
+ voiceio setup
21
+ ```
22
+
23
+ That's it. Press **Ctrl+Alt+V** (or your chosen hotkey) to start dictating.
24
+
25
+ <details>
26
+ <summary><strong>Fedora</strong></summary>
27
+
28
+ ```bash
29
+ sudo dnf install pipx ibus python3-gobject portaudio-devel
30
+ pipx install python-voiceio
31
+ voiceio setup
32
+ ```
33
+ </details>
34
+
35
+ <details>
36
+ <summary><strong>Arch Linux</strong></summary>
37
+
38
+ ```bash
39
+ sudo pacman -S python-pipx ibus python-gobject portaudio
40
+ pipx install python-voiceio
41
+ voiceio setup
42
+ ```
43
+ </details>
44
+
45
+ <details>
46
+ <summary><strong>Windows</strong></summary>
47
+
48
+ ```powershell
49
+ # Option A: Install with pip (requires Python 3.11+)
50
+ pip install python-voiceio
51
+ voiceio setup
52
+
53
+ # Option B: Download the installer from GitHub Releases (no Python needed)
54
+ # https://github.com/Hugo0/voiceio/releases
55
+ # Also available as a portable .zip if you prefer no installation.
56
+ ```
57
+
58
+ Windows uses pynput for hotkeys and text injection. No extra system dependencies required.
59
+ </details>
60
+
61
+ <details>
62
+ <summary><strong>macOS</strong></summary>
63
+
64
+ ```bash
65
+ pipx install python-voiceio
66
+ voiceio setup
67
+ ```
68
+ </details>
69
+
70
+ <details>
71
+ <summary><strong>Build from source</strong></summary>
72
+
73
+ If you want the source code locally to hack on or customize for personal use. PRs are welcome!
74
+
75
+ ```bash
76
+ git clone https://github.com/Hugo0/voiceio
77
+ cd voiceio
78
+ uv pip install -e ".[linux,dev]"
79
+
80
+ # Bootstrap CLI commands onto PATH (creates ~/.local/bin/voiceio)
81
+ uv run voiceio setup
82
+ ```
83
+
84
+ > **Note:** Source installs live inside a virtualenv, so `voiceio` isn't on PATH until setup creates symlinks in `~/.local/bin/`. If `voiceio` isn't found after setup, restart your terminal or run `export PATH="$HOME/.local/bin:$PATH"`.
85
+ </details>
86
+
87
+ > You can also install with `uv tool install python-voiceio` or `pip install python-voiceio`.
88
+
89
+ ## How it works
90
+
91
+ ```
92
+ hotkey → mic capture → whisper (local) → text at cursor
93
+ pre-buffered streaming IBus / clipboard
94
+ ```
95
+
96
+ Press your hotkey to start recording (1s pre-buffer catches the first syllable). Text streams into the focused app as an underlined preview. Press again to commit. Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper), text is injected through IBus (any GTK/Qt app) with clipboard fallback for terminals.
97
+
98
+ ## Features
99
+
100
+ - **Streaming**: text appears as you speak, not after you stop
101
+ - **Works everywhere**: IBus input method for GUI apps, clipboard for terminals
102
+ - **Wayland + X11**: evdev hotkeys work on both, no root required
103
+ - **Pre-buffer**: never miss the first syllable
104
+ - **Auto-healing**: falls back to the next working backend if one fails
105
+ - **Autostart**: optional systemd service, restarts on crash
106
+ - **Self-diagnosing**: `voiceio doctor` checks everything, `--fix` repairs it
107
+
108
+ ## Models
109
+
110
+ | Model | Size | Speed | Accuracy | Good for |
111
+ |-------|------|-------|----------|----------|
112
+ | `tiny` | 75 MB | ~10x realtime | Basic | Quick notes, low-end hardware |
113
+ | `base` | 150 MB | ~7x realtime | Good | Daily use (default) |
114
+ | `small` | 500 MB | ~4x realtime | Better | Longer dictation |
115
+ | `medium` | 1.5 GB | ~2x realtime | Great | Accuracy-sensitive work |
116
+ | `large-v3` | 3 GB | ~1x realtime | Best | Maximum quality, GPU recommended |
117
+
118
+ Models download automatically on first use. Switch anytime: `voiceio --model small`.
119
+
120
+ ## Commands
121
+
122
+ ```
123
+ voiceio Start the daemon
124
+ voiceio setup Interactive setup wizard
125
+ voiceio doctor Health check (--fix to auto-repair)
126
+ voiceio test Test microphone + live transcription
127
+ voiceio toggle Toggle recording on a running daemon
128
+ voiceio update Update to latest version
129
+ voiceio service install Autostart on login (systemd / Windows Startup)
130
+ voiceio logs View recent logs
131
+ voiceio uninstall Remove all system integrations
132
+ ```
133
+
134
+ ## Configuration
135
+
136
+ `voiceio setup` handles everything interactively. To tweak later, edit the config file or override at runtime:
137
+
138
+ - Linux/macOS: `~/.config/voiceio/config.toml`
139
+ - Windows: `%LOCALAPPDATA%\voiceio\config\config.toml`
140
+
141
+ ```bash
142
+ voiceio --model large-v3 --language auto -v
143
+ ```
144
+
145
+ See [config.example.toml](config.example.toml) for all options.
146
+
147
+ ## Troubleshooting
148
+
149
+ ```bash
150
+ voiceio doctor # see what's working
151
+ voiceio doctor --fix # auto-fix issues
152
+ voiceio logs # check debug output
153
+ ```
154
+
155
+ | Problem | Fix |
156
+ |---------|-----|
157
+ | No text appears | `voiceio doctor --fix` - usually a missing IBus component or GNOME input source |
158
+ | Hotkey doesn't work on Wayland | `sudo usermod -aG input $USER` then log out and back in |
159
+ | Transcription too slow | Use a smaller model: `voiceio --model tiny` |
160
+ | Want to start fresh | `voiceio uninstall` then `voiceio setup` |
161
+ | Windows: antivirus blocks hotkeys | pynput uses global keyboard hooks — add an exception for voiceio |
162
+ | Windows: no sound feedback | Check `voiceio logs` for audio device info |
163
+ | macOS issues | Experimental — consider [aquavoice.com](https://aquavoice.com/) or contribute a PR |
164
+
165
+ ## Platform support
166
+
167
+ | Platform | Status | Text injection | Hotkeys | Streaming preview |
168
+ |----------|--------|---------------|---------|-------------------|
169
+ | Ubuntu / Debian (GNOME, Wayland) | **Tested daily** | IBus | evdev / GNOME shortcut | Yes |
170
+ | Ubuntu / Debian (GNOME, X11) | Supported | IBus | evdev / pynput | Yes |
171
+ | Fedora (GNOME) | Supported | IBus | evdev / GNOME shortcut | Yes |
172
+ | Arch Linux | Supported | IBus | evdev | Yes |
173
+ | KDE / Sway / Hyprland | Should work | IBus / ydotool / wtype | evdev | Yes |
174
+ | Windows 10/11 | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
175
+ | macOS | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
176
+
177
+ voiceio auto-detects your platform and picks the best available backends. Run `voiceio doctor` to see what's working on your system.
178
+
179
+ ## Uninstall
180
+
181
+ ```bash
182
+ voiceio uninstall # removes service, IBus, shortcuts, symlinks
183
+ pipx uninstall python-voiceio # removes the package
184
+ ```
185
+
186
+ ## Roadmap
187
+
188
+ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](https://github.com/Hugo0/voiceio/issues).
189
+
190
+ **Now**
191
+ - [ ] macOS polish (IMKit for native preedit, Accessibility API for text injection)
192
+
193
+ **Soon**
194
+ - [ ] Per-app context awareness (detect focused app, adapt formatting/behavior)
195
+ - [ ] File/audio transcription mode (`voiceio transcribe recording.mp3`)
196
+
197
+ **Backlog**
198
+ - [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
199
+ - [ ] Echo cancellation (filter system audio for meeting use)
200
+ - [ ] Wake word activation ("Hey voiceio")
201
+ - [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
202
+
203
+ **Done**
204
+ - [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
205
+ - [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
206
+ - [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
207
+ - [x] Transcription history — searchable log of everything you've dictated
208
+ - [x] Number-to-digit conversion ("three hundred forty two" → "342")
209
+ - [x] VAD-based silence filtering (Silero VAD, prevents Whisper hallucinations)
210
+ - [x] Voice commands — "new line", "new paragraph", "scratch that", punctuation by name
211
+ - [x] Custom vocabulary / personal dictionary (bias Whisper via `initial_prompt`)
212
+ - [x] Smart punctuation & capitalization post-processing
213
+ - [x] Windows support
214
+ - [x] System tray icon with animated states
215
+ - [x] Auto-stop on silence
216
+
217
+ ## License
218
+
219
+ MIT
@@ -4,18 +4,20 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "python-voiceio"
7
- version = "0.2.3"
7
+ version = "0.3.0"
8
8
  description = "Speak → text, locally, instantly."
9
9
  readme = "README.md"
10
10
  license = "MIT"
11
11
  requires-python = ">=3.11"
12
12
  authors = [{ name = "Hugo Montenegro" }]
13
- keywords = ["voice", "speech-to-text", "whisper", "linux", "dictation", "wayland", "ibus"]
13
+ keywords = ["voice", "speech-to-text", "whisper", "linux", "windows", "dictation", "wayland", "ibus"]
14
14
  classifiers = [
15
15
  "Development Status :: 4 - Beta",
16
16
  "Environment :: X11 Applications",
17
17
  "Intended Audience :: End Users/Desktop",
18
18
  "Operating System :: POSIX :: Linux",
19
+ "Operating System :: Microsoft :: Windows",
20
+ "Operating System :: MacOS",
19
21
  "Programming Language :: Python :: 3",
20
22
  "Topic :: Multimedia :: Sound/Audio :: Speech",
21
23
  ]
@@ -23,18 +25,29 @@ dependencies = [
23
25
  "faster-whisper>=1.0.0",
24
26
  "sounddevice>=0.4.6",
25
27
  "numpy>=1.24.0",
28
+ "onnxruntime>=1.16.0",
29
+ "wordfreq>=3.0",
26
30
  "evdev>=1.6.0; sys_platform == 'linux'",
31
+ "pynput>=1.7.6; sys_platform == 'win32'",
32
+ "pynput>=1.7.6; sys_platform == 'darwin'",
33
+ "pyperclip>=1.8.0; sys_platform == 'win32'",
34
+ "win11toast>=0.36; sys_platform == 'win32'",
27
35
  ]
28
36
 
29
37
  [project.optional-dependencies]
30
38
  x11 = ["pynput>=1.7.6"]
31
39
  mac = ["pynput>=1.7.6"]
40
+ win = ["pynput>=1.7.6", "pyperclip>=1.8.0", "win11toast>=0.36"]
32
41
  tray = ["pystray>=0.19", "Pillow>=10.0"]
42
+ tts = ["piper-tts>=1.2.0"]
43
+ tts-cloud = ["edge-tts>=6.1.0"]
33
44
  dev = ["pytest>=7.0", "pytest-mock"]
34
45
 
35
46
  [project.urls]
36
47
  Homepage = "https://github.com/Hugo0/voiceio"
48
+ Repository = "https://github.com/Hugo0/voiceio"
37
49
  Issues = "https://github.com/Hugo0/voiceio/issues"
50
+ Changelog = "https://github.com/Hugo0/voiceio/releases"
38
51
 
39
52
  [project.scripts]
40
53
  voiceio = "voiceio.cli:main"
@@ -52,3 +65,4 @@ include = ["voiceio*"]
52
65
 
53
66
  [tool.setuptools.package-data]
54
67
  "voiceio.sounds" = ["*.wav"]
68
+ "voiceio.models" = ["*.onnx"]