python-voiceio 0.2.3__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_voiceio-0.2.3/python_voiceio.egg-info → python_voiceio-0.3.0}/PKG-INFO +93 -79
- python_voiceio-0.3.0/README.md +219 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/pyproject.toml +16 -2
- {python_voiceio-0.2.3 → python_voiceio-0.3.0/python_voiceio.egg-info}/PKG-INFO +93 -79
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/SOURCES.txt +44 -1
- python_voiceio-0.3.0/python_voiceio.egg-info/requires.txt +41 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_app_wiring.py +92 -25
- python_voiceio-0.3.0/tests/test_autocorrect.py +264 -0
- python_voiceio-0.3.0/tests/test_clipboard_read.py +91 -0
- python_voiceio-0.3.0/tests/test_commands.py +173 -0
- python_voiceio-0.3.0/tests/test_corrections.py +148 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_fallback.py +10 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_health.py +6 -6
- python_voiceio-0.3.0/tests/test_hints.py +74 -0
- python_voiceio-0.3.0/tests/test_history.py +90 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_ibus_typer.py +7 -0
- python_voiceio-0.3.0/tests/test_llm.py +217 -0
- python_voiceio-0.3.0/tests/test_llm_api.py +118 -0
- python_voiceio-0.3.0/tests/test_numbers.py +101 -0
- python_voiceio-0.3.0/tests/test_postprocess.py +107 -0
- python_voiceio-0.3.0/tests/test_prompt.py +111 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_streaming.py +2 -2
- python_voiceio-0.3.0/tests/test_tts.py +158 -0
- python_voiceio-0.3.0/tests/test_vad.py +118 -0
- python_voiceio-0.3.0/tests/test_vocabulary.py +71 -0
- python_voiceio-0.3.0/tests/test_wordfreq.py +80 -0
- python_voiceio-0.3.0/voiceio/__init__.py +1 -0
- python_voiceio-0.3.0/voiceio/app.py +655 -0
- python_voiceio-0.3.0/voiceio/autocorrect.py +284 -0
- python_voiceio-0.3.0/voiceio/cli.py +1094 -0
- python_voiceio-0.3.0/voiceio/clipboard_read.py +69 -0
- python_voiceio-0.3.0/voiceio/commands.py +130 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/config.py +58 -2
- python_voiceio-0.3.0/voiceio/corrections.py +160 -0
- python_voiceio-0.3.0/voiceio/demo.py +199 -0
- python_voiceio-0.3.0/voiceio/feedback.py +162 -0
- python_voiceio-0.3.0/voiceio/health.py +408 -0
- python_voiceio-0.3.0/voiceio/hints.py +58 -0
- python_voiceio-0.3.0/voiceio/history.py +64 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/chain.py +1 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/pynput_backend.py +23 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/socket_backend.py +35 -12
- python_voiceio-0.3.0/voiceio/llm.py +258 -0
- python_voiceio-0.3.0/voiceio/llm_api.py +130 -0
- python_voiceio-0.3.0/voiceio/models/silero_vad.onnx +0 -0
- python_voiceio-0.3.0/voiceio/numbers.py +228 -0
- python_voiceio-0.3.0/voiceio/pidlock.py +22 -0
- python_voiceio-0.3.0/voiceio/platform.py +272 -0
- python_voiceio-0.3.0/voiceio/postprocess.py +84 -0
- python_voiceio-0.3.0/voiceio/prompt.py +73 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/recorder.py +53 -13
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/service.py +73 -9
- python_voiceio-0.3.0/voiceio/sounds/__init__.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/sounds/commit.wav +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/sounds/start.wav +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/sounds/stop.wav +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/streaming.py +84 -19
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/transcriber.py +14 -2
- python_voiceio-0.3.0/voiceio/tray/__init__.py +277 -0
- python_voiceio-0.3.0/voiceio/tray/_icons.py +125 -0
- python_voiceio-0.3.0/voiceio/tray/_indicator.py +181 -0
- python_voiceio-0.3.0/voiceio/tray/_pystray.py +123 -0
- python_voiceio-0.3.0/voiceio/tts/__init__.py +11 -0
- python_voiceio-0.3.0/voiceio/tts/base.py +29 -0
- python_voiceio-0.3.0/voiceio/tts/chain.py +79 -0
- python_voiceio-0.3.0/voiceio/tts/edge_engine.py +74 -0
- python_voiceio-0.3.0/voiceio/tts/espeak.py +47 -0
- python_voiceio-0.3.0/voiceio/tts/piper_engine.py +90 -0
- python_voiceio-0.3.0/voiceio/tts/player.py +62 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/chain.py +1 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/clipboard.py +49 -6
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/ibus.py +3 -2
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/pynput_type.py +9 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/wtype.py +2 -1
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/xdotool.py +2 -1
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/ydotool.py +2 -1
- python_voiceio-0.3.0/voiceio/vad.py +122 -0
- python_voiceio-0.3.0/voiceio/vocabulary.py +59 -0
- python_voiceio-0.3.0/voiceio/wizard.py +1463 -0
- python_voiceio-0.3.0/voiceio/wordfreq.py +69 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/worker.py +16 -1
- python_voiceio-0.2.3/README.md +0 -223
- python_voiceio-0.2.3/python_voiceio.egg-info/requires.txt +0 -20
- python_voiceio-0.2.3/voiceio/__init__.py +0 -1
- python_voiceio-0.2.3/voiceio/app.py +0 -415
- python_voiceio-0.2.3/voiceio/cli.py +0 -476
- python_voiceio-0.2.3/voiceio/feedback.py +0 -78
- python_voiceio-0.2.3/voiceio/health.py +0 -194
- python_voiceio-0.2.3/voiceio/platform.py +0 -139
- python_voiceio-0.2.3/voiceio/tray.py +0 -54
- python_voiceio-0.2.3/voiceio/wizard.py +0 -883
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/LICENSE +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/dependency_links.txt +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/entry_points.txt +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/python_voiceio.egg-info/top_level.txt +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/setup.cfg +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_backend_probes.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_config.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_platform.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_prebuffer.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_recorder_integration.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/tests/test_transcriber.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/__main__.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/backends.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/__init__.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/base.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/hotkeys/evdev.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/ibus/__init__.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/ibus/engine.py +0 -0
- {python_voiceio-0.2.3/voiceio/sounds → python_voiceio-0.3.0/voiceio/models}/__init__.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/__init__.py +0 -0
- {python_voiceio-0.2.3 → python_voiceio-0.3.0}/voiceio/typers/base.py +0 -0
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-voiceio
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Speak → text, locally, instantly.
|
|
5
5
|
Author: Hugo Montenegro
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/Hugo0/voiceio
|
|
8
|
+
Project-URL: Repository, https://github.com/Hugo0/voiceio
|
|
8
9
|
Project-URL: Issues, https://github.com/Hugo0/voiceio/issues
|
|
9
|
-
|
|
10
|
+
Project-URL: Changelog, https://github.com/Hugo0/voiceio/releases
|
|
11
|
+
Keywords: voice,speech-to-text,whisper,linux,windows,dictation,wayland,ibus
|
|
10
12
|
Classifier: Development Status :: 4 - Beta
|
|
11
13
|
Classifier: Environment :: X11 Applications
|
|
12
14
|
Classifier: Intended Audience :: End Users/Desktop
|
|
13
15
|
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
14
18
|
Classifier: Programming Language :: Python :: 3
|
|
15
19
|
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
16
20
|
Requires-Python: >=3.11
|
|
@@ -19,14 +23,28 @@ License-File: LICENSE
|
|
|
19
23
|
Requires-Dist: faster-whisper>=1.0.0
|
|
20
24
|
Requires-Dist: sounddevice>=0.4.6
|
|
21
25
|
Requires-Dist: numpy>=1.24.0
|
|
26
|
+
Requires-Dist: onnxruntime>=1.16.0
|
|
27
|
+
Requires-Dist: wordfreq>=3.0
|
|
22
28
|
Requires-Dist: evdev>=1.6.0; sys_platform == "linux"
|
|
29
|
+
Requires-Dist: pynput>=1.7.6; sys_platform == "win32"
|
|
30
|
+
Requires-Dist: pynput>=1.7.6; sys_platform == "darwin"
|
|
31
|
+
Requires-Dist: pyperclip>=1.8.0; sys_platform == "win32"
|
|
32
|
+
Requires-Dist: win11toast>=0.36; sys_platform == "win32"
|
|
23
33
|
Provides-Extra: x11
|
|
24
34
|
Requires-Dist: pynput>=1.7.6; extra == "x11"
|
|
25
35
|
Provides-Extra: mac
|
|
26
36
|
Requires-Dist: pynput>=1.7.6; extra == "mac"
|
|
37
|
+
Provides-Extra: win
|
|
38
|
+
Requires-Dist: pynput>=1.7.6; extra == "win"
|
|
39
|
+
Requires-Dist: pyperclip>=1.8.0; extra == "win"
|
|
40
|
+
Requires-Dist: win11toast>=0.36; extra == "win"
|
|
27
41
|
Provides-Extra: tray
|
|
28
42
|
Requires-Dist: pystray>=0.19; extra == "tray"
|
|
29
43
|
Requires-Dist: Pillow>=10.0; extra == "tray"
|
|
44
|
+
Provides-Extra: tts
|
|
45
|
+
Requires-Dist: piper-tts>=1.2.0; extra == "tts"
|
|
46
|
+
Provides-Extra: tts-cloud
|
|
47
|
+
Requires-Dist: edge-tts>=6.1.0; extra == "tts-cloud"
|
|
30
48
|
Provides-Extra: dev
|
|
31
49
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
32
50
|
Requires-Dist: pytest-mock; extra == "dev"
|
|
@@ -41,15 +59,6 @@ Dynamic: license-file
|
|
|
41
59
|
|
|
42
60
|
Speak → text, locally, instantly.
|
|
43
61
|
|
|
44
|
-
<!-- demo video -->
|
|
45
|
-
<p align="center">
|
|
46
|
-
<a href="https://www.tella.tv/video/YOUR_VIDEO_ID">
|
|
47
|
-
<img src="https://github.com/Hugo0/voiceio/raw/main/assets/demo-thumbnail.png" alt="voiceio demo" width="600">
|
|
48
|
-
</a>
|
|
49
|
-
<br>
|
|
50
|
-
<em>Click to watch the demo</em>
|
|
51
|
-
</p>
|
|
52
|
-
|
|
53
62
|
## Quick start
|
|
54
63
|
|
|
55
64
|
```bash
|
|
@@ -85,6 +94,31 @@ voiceio setup
|
|
|
85
94
|
```
|
|
86
95
|
</details>
|
|
87
96
|
|
|
97
|
+
<details>
|
|
98
|
+
<summary><strong>Windows</strong></summary>
|
|
99
|
+
|
|
100
|
+
```powershell
|
|
101
|
+
# Option A: Install with pip (requires Python 3.11+)
|
|
102
|
+
pip install python-voiceio
|
|
103
|
+
voiceio setup
|
|
104
|
+
|
|
105
|
+
# Option B: Download the installer from GitHub Releases (no Python needed)
|
|
106
|
+
# https://github.com/Hugo0/voiceio/releases
|
|
107
|
+
# Also available as a portable .zip if you prefer no installation.
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Windows uses pynput for hotkeys and text injection. No extra system dependencies required.
|
|
111
|
+
</details>
|
|
112
|
+
|
|
113
|
+
<details>
|
|
114
|
+
<summary><strong>macOS</strong></summary>
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pipx install python-voiceio
|
|
118
|
+
voiceio setup
|
|
119
|
+
```
|
|
120
|
+
</details>
|
|
121
|
+
|
|
88
122
|
<details>
|
|
89
123
|
<summary><strong>Build from source</strong></summary>
|
|
90
124
|
|
|
@@ -93,12 +127,16 @@ If you want the source code locally to hack on or customize for personal use. PR
|
|
|
93
127
|
```bash
|
|
94
128
|
git clone https://github.com/Hugo0/voiceio
|
|
95
129
|
cd voiceio
|
|
96
|
-
pip install -e ".[linux,dev]"
|
|
97
|
-
|
|
130
|
+
uv pip install -e ".[linux,dev]"
|
|
131
|
+
|
|
132
|
+
# Bootstrap CLI commands onto PATH (creates ~/.local/bin/voiceio)
|
|
133
|
+
uv run voiceio setup
|
|
98
134
|
```
|
|
135
|
+
|
|
136
|
+
> **Note:** Source installs live inside a virtualenv, so `voiceio` isn't on PATH until setup creates symlinks in `~/.local/bin/`. If `voiceio` isn't found after setup, restart your terminal or run `export PATH="$HOME/.local/bin:$PATH"`.
|
|
99
137
|
</details>
|
|
100
138
|
|
|
101
|
-
> You can also install with `uv tool install voiceio` or `pip install voiceio`.
|
|
139
|
+
> You can also install with `uv tool install python-voiceio` or `pip install python-voiceio`.
|
|
102
140
|
|
|
103
141
|
## How it works
|
|
104
142
|
|
|
@@ -107,11 +145,7 @@ hotkey → mic capture → whisper (local) → text at cursor
|
|
|
107
145
|
pre-buffered streaming IBus / clipboard
|
|
108
146
|
```
|
|
109
147
|
|
|
110
|
-
|
|
111
|
-
2. Speak naturally: text streams into the focused app in real-time as an underlined preview
|
|
112
|
-
3. Press the hotkey again: the final transcription replaces the preview and is committed
|
|
113
|
-
|
|
114
|
-
Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Text is injected through IBus (works in any GTK/Qt app: browsers, Telegram, editors) with an automatic clipboard fallback for terminals.
|
|
148
|
+
Press your hotkey to start recording (1s pre-buffer catches the first syllable). Text streams into the focused app as an underlined preview. Press again to commit. Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper), text is injected through IBus (any GTK/Qt app) with clipboard fallback for terminals.
|
|
115
149
|
|
|
116
150
|
## Features
|
|
117
151
|
|
|
@@ -143,14 +177,18 @@ voiceio setup Interactive setup wizard
|
|
|
143
177
|
voiceio doctor Health check (--fix to auto-repair)
|
|
144
178
|
voiceio test Test microphone + live transcription
|
|
145
179
|
voiceio toggle Toggle recording on a running daemon
|
|
146
|
-
voiceio
|
|
180
|
+
voiceio update Update to latest version
|
|
181
|
+
voiceio service install Autostart on login (systemd / Windows Startup)
|
|
147
182
|
voiceio logs View recent logs
|
|
148
183
|
voiceio uninstall Remove all system integrations
|
|
149
184
|
```
|
|
150
185
|
|
|
151
186
|
## Configuration
|
|
152
187
|
|
|
153
|
-
`voiceio setup` handles everything interactively. To tweak later, edit
|
|
188
|
+
`voiceio setup` handles everything interactively. To tweak later, edit the config file or override at runtime:
|
|
189
|
+
|
|
190
|
+
- Linux/macOS: `~/.config/voiceio/config.toml`
|
|
191
|
+
- Windows: `%LOCALAPPDATA%\voiceio\config\config.toml`
|
|
154
192
|
|
|
155
193
|
```bash
|
|
156
194
|
voiceio --model large-v3 --language auto -v
|
|
@@ -172,7 +210,9 @@ voiceio logs # check debug output
|
|
|
172
210
|
| Hotkey doesn't work on Wayland | `sudo usermod -aG input $USER` then log out and back in |
|
|
173
211
|
| Transcription too slow | Use a smaller model: `voiceio --model tiny` |
|
|
174
212
|
| Want to start fresh | `voiceio uninstall` then `voiceio setup` |
|
|
175
|
-
|
|
|
213
|
+
| Windows: antivirus blocks hotkeys | pynput uses global keyboard hooks — add an exception for voiceio |
|
|
214
|
+
| Windows: no sound feedback | Check `voiceio logs` for audio device info |
|
|
215
|
+
| macOS issues | Experimental — consider [aquavoice.com](https://aquavoice.com/) or contribute a PR |
|
|
176
216
|
|
|
177
217
|
## Platform support
|
|
178
218
|
|
|
@@ -183,6 +223,7 @@ voiceio logs # check debug output
|
|
|
183
223
|
| Fedora (GNOME) | Supported | IBus | evdev / GNOME shortcut | Yes |
|
|
184
224
|
| Arch Linux | Supported | IBus | evdev | Yes |
|
|
185
225
|
| KDE / Sway / Hyprland | Should work | IBus / ydotool / wtype | evdev | Yes |
|
|
226
|
+
| Windows 10/11 | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
186
227
|
| macOS | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
187
228
|
|
|
188
229
|
voiceio auto-detects your platform and picks the best available backends. Run `voiceio doctor` to see what's working on your system.
|
|
@@ -194,63 +235,36 @@ voiceio uninstall # removes service, IBus, shortcuts, symlinks
|
|
|
194
235
|
pipx uninstall python-voiceio # removes the package
|
|
195
236
|
```
|
|
196
237
|
|
|
197
|
-
##
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
- [ ]
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
- [ ]
|
|
210
|
-
- [ ]
|
|
211
|
-
- [ ]
|
|
212
|
-
- [ ]
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
- [
|
|
220
|
-
- [
|
|
221
|
-
- [
|
|
222
|
-
- [
|
|
223
|
-
- [
|
|
224
|
-
- [
|
|
225
|
-
- [
|
|
226
|
-
|
|
227
|
-
**Platform expansion**
|
|
228
|
-
- [ ] **macOS Input Method (IMKit)**: native streaming preedit on macOS, matching IBus quality on Linux
|
|
229
|
-
- [ ] **Windows support**: Text Services Framework (TSF) for text injection, global hotkeys via win32api
|
|
230
|
-
- [ ] **Flatpak / Snap packaging**: sandboxed distribution for Linux
|
|
231
|
-
- [ ] **AUR package**: community package for Arch Linux
|
|
232
|
-
|
|
233
|
-
**UX polish**
|
|
234
|
-
- [ ] **System tray icon with recording animation**: pulsing/colored icon showing recording state, quick menu for model/language switching
|
|
235
|
-
- [ ] **Desktop notifications with transcribed text**: show what was typed, with an undo button
|
|
236
|
-
- [ ] **Confidence indicator**: visual hint when Whisper is uncertain (maybe highlight low-confidence words)
|
|
237
|
-
- [ ] **Recording timeout**: auto-stop after N seconds of silence or max duration, preventing forgotten recordings
|
|
238
|
-
- [ ] **Sound themes**: bundled sound packs (subtle, mechanical, sci-fi, none)
|
|
239
|
-
- [ ] **First-run onboarding overlay**: lightweight "press Ctrl+Alt+V to start" hint on first launch
|
|
240
|
-
|
|
241
|
-
**Power features**
|
|
242
|
-
- [ ] **Multi-language in one session**: auto-detect language switches mid-dictation (Whisper supports this but needs tuning)
|
|
243
|
-
- [ ] **Speaker diarization**: "Person 1: ... Person 2: ..." for meeting notes (via pyannote or whisperX)
|
|
244
|
-
- [ ] **LLM post-processing**: pipe transcription through a local LLM (Ollama) for grammar correction, summarization, or reformatting
|
|
245
|
-
- [ ] **Clipboard history**: keep last N transcriptions, quick-paste from history
|
|
246
|
-
- [ ] **Transcription log / journal**: searchable history of everything you've dictated, with timestamps
|
|
247
|
-
- [ ] **API / webhook**: expose a local API so other tools can trigger recording or receive transcriptions
|
|
248
|
-
- [ ] **Browser extension**: inject text into web apps that don't work with IBus (e.g. some Electron apps)
|
|
249
|
-
|
|
250
|
-
**Developer experience**
|
|
251
|
-
- [ ] **Plugin system**: hooks for pre/post processing (e.g. custom formatters, translators, text transforms)
|
|
252
|
-
- [ ] **Alternative STT backends**: support Whisper.cpp, Deepgram, AssemblyAI, OpenAI Whisper API as optional backends
|
|
253
|
-
- [ ] **GPU acceleration docs**: CUDA/ROCm setup guide for faster transcription on large models
|
|
238
|
+
## Roadmap
|
|
239
|
+
|
|
240
|
+
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](https://github.com/Hugo0/voiceio/issues).
|
|
241
|
+
|
|
242
|
+
**Now**
|
|
243
|
+
- [ ] macOS polish (IMKit for native preedit, Accessibility API for text injection)
|
|
244
|
+
|
|
245
|
+
**Soon**
|
|
246
|
+
- [ ] Per-app context awareness (detect focused app, adapt formatting/behavior)
|
|
247
|
+
- [ ] File/audio transcription mode (`voiceio transcribe recording.mp3`)
|
|
248
|
+
|
|
249
|
+
**Backlog**
|
|
250
|
+
- [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
|
|
251
|
+
- [ ] Echo cancellation (filter system audio for meeting use)
|
|
252
|
+
- [ ] Wake word activation ("Hey voiceio")
|
|
253
|
+
- [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
|
|
254
|
+
|
|
255
|
+
**Done**
|
|
256
|
+
- [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
|
|
257
|
+
- [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
|
|
258
|
+
- [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
|
|
259
|
+
- [x] Transcription history — searchable log of everything you've dictated
|
|
260
|
+
- [x] Number-to-digit conversion ("three hundred forty two" → "342")
|
|
261
|
+
- [x] VAD-based silence filtering (Silero VAD, prevents Whisper hallucinations)
|
|
262
|
+
- [x] Voice commands — "new line", "new paragraph", "scratch that", punctuation by name
|
|
263
|
+
- [x] Custom vocabulary / personal dictionary (bias Whisper via `initial_prompt`)
|
|
264
|
+
- [x] Smart punctuation & capitalization post-processing
|
|
265
|
+
- [x] Windows support
|
|
266
|
+
- [x] System tray icon with animated states
|
|
267
|
+
- [x] Auto-stop on silence
|
|
254
268
|
|
|
255
269
|
## License
|
|
256
270
|
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# voiceio
|
|
2
|
+
|
|
3
|
+
[](https://github.com/Hugo0/voiceio/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/python-voiceio/)
|
|
5
|
+
[](https://pypi.org/project/python-voiceio/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
Speak → text, locally, instantly.
|
|
9
|
+
|
|
10
|
+
## Quick start
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
# 1. Install system dependencies (Ubuntu/Debian)
|
|
14
|
+
sudo apt install pipx ibus gir1.2-ibus-1.0 python3-gi portaudio19-dev
|
|
15
|
+
|
|
16
|
+
# 2. Install voiceio
|
|
17
|
+
pipx install python-voiceio
|
|
18
|
+
|
|
19
|
+
# 3. Run the setup wizard
|
|
20
|
+
voiceio setup
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
That's it. Press **Ctrl+Alt+V** (or your chosen hotkey) to start dictating.
|
|
24
|
+
|
|
25
|
+
<details>
|
|
26
|
+
<summary><strong>Fedora</strong></summary>
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
sudo dnf install pipx ibus python3-gobject portaudio-devel
|
|
30
|
+
pipx install python-voiceio
|
|
31
|
+
voiceio setup
|
|
32
|
+
```
|
|
33
|
+
</details>
|
|
34
|
+
|
|
35
|
+
<details>
|
|
36
|
+
<summary><strong>Arch Linux</strong></summary>
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
sudo pacman -S python-pipx ibus python-gobject portaudio
|
|
40
|
+
pipx install python-voiceio
|
|
41
|
+
voiceio setup
|
|
42
|
+
```
|
|
43
|
+
</details>
|
|
44
|
+
|
|
45
|
+
<details>
|
|
46
|
+
<summary><strong>Windows</strong></summary>
|
|
47
|
+
|
|
48
|
+
```powershell
|
|
49
|
+
# Option A: Install with pip (requires Python 3.11+)
|
|
50
|
+
pip install python-voiceio
|
|
51
|
+
voiceio setup
|
|
52
|
+
|
|
53
|
+
# Option B: Download the installer from GitHub Releases (no Python needed)
|
|
54
|
+
# https://github.com/Hugo0/voiceio/releases
|
|
55
|
+
# Also available as a portable .zip if you prefer no installation.
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Windows uses pynput for hotkeys and text injection. No extra system dependencies required.
|
|
59
|
+
</details>
|
|
60
|
+
|
|
61
|
+
<details>
|
|
62
|
+
<summary><strong>macOS</strong></summary>
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pipx install python-voiceio
|
|
66
|
+
voiceio setup
|
|
67
|
+
```
|
|
68
|
+
</details>
|
|
69
|
+
|
|
70
|
+
<details>
|
|
71
|
+
<summary><strong>Build from source</strong></summary>
|
|
72
|
+
|
|
73
|
+
If you want the source code locally to hack on or customize for personal use. PRs are welcome!
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
git clone https://github.com/Hugo0/voiceio
|
|
77
|
+
cd voiceio
|
|
78
|
+
uv pip install -e ".[linux,dev]"
|
|
79
|
+
|
|
80
|
+
# Bootstrap CLI commands onto PATH (creates ~/.local/bin/voiceio)
|
|
81
|
+
uv run voiceio setup
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
> **Note:** Source installs live inside a virtualenv, so `voiceio` isn't on PATH until setup creates symlinks in `~/.local/bin/`. If `voiceio` isn't found after setup, restart your terminal or run `export PATH="$HOME/.local/bin:$PATH"`.
|
|
85
|
+
</details>
|
|
86
|
+
|
|
87
|
+
> You can also install with `uv tool install python-voiceio` or `pip install python-voiceio`.
|
|
88
|
+
|
|
89
|
+
## How it works
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
hotkey → mic capture → whisper (local) → text at cursor
|
|
93
|
+
pre-buffered streaming IBus / clipboard
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Press your hotkey to start recording (1s pre-buffer catches the first syllable). Text streams into the focused app as an underlined preview. Press again to commit. Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper), text is injected through IBus (any GTK/Qt app) with clipboard fallback for terminals.
|
|
97
|
+
|
|
98
|
+
## Features
|
|
99
|
+
|
|
100
|
+
- **Streaming**: text appears as you speak, not after you stop
|
|
101
|
+
- **Works everywhere**: IBus input method for GUI apps, clipboard for terminals
|
|
102
|
+
- **Wayland + X11**: evdev hotkeys work on both, no root required
|
|
103
|
+
- **Pre-buffer**: never miss the first syllable
|
|
104
|
+
- **Auto-healing**: falls back to the next working backend if one fails
|
|
105
|
+
- **Autostart**: optional systemd service, restarts on crash
|
|
106
|
+
- **Self-diagnosing**: `voiceio doctor` checks everything, `--fix` repairs it
|
|
107
|
+
|
|
108
|
+
## Models
|
|
109
|
+
|
|
110
|
+
| Model | Size | Speed | Accuracy | Good for |
|
|
111
|
+
|-------|------|-------|----------|----------|
|
|
112
|
+
| `tiny` | 75 MB | ~10x realtime | Basic | Quick notes, low-end hardware |
|
|
113
|
+
| `base` | 150 MB | ~7x realtime | Good | Daily use (default) |
|
|
114
|
+
| `small` | 500 MB | ~4x realtime | Better | Longer dictation |
|
|
115
|
+
| `medium` | 1.5 GB | ~2x realtime | Great | Accuracy-sensitive work |
|
|
116
|
+
| `large-v3` | 3 GB | ~1x realtime | Best | Maximum quality, GPU recommended |
|
|
117
|
+
|
|
118
|
+
Models download automatically on first use. Switch anytime: `voiceio --model small`.
|
|
119
|
+
|
|
120
|
+
## Commands
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
voiceio Start the daemon
|
|
124
|
+
voiceio setup Interactive setup wizard
|
|
125
|
+
voiceio doctor Health check (--fix to auto-repair)
|
|
126
|
+
voiceio test Test microphone + live transcription
|
|
127
|
+
voiceio toggle Toggle recording on a running daemon
|
|
128
|
+
voiceio update Update to latest version
|
|
129
|
+
voiceio service install Autostart on login (systemd / Windows Startup)
|
|
130
|
+
voiceio logs View recent logs
|
|
131
|
+
voiceio uninstall Remove all system integrations
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Configuration
|
|
135
|
+
|
|
136
|
+
`voiceio setup` handles everything interactively. To tweak later, edit the config file or override at runtime:
|
|
137
|
+
|
|
138
|
+
- Linux/macOS: `~/.config/voiceio/config.toml`
|
|
139
|
+
- Windows: `%LOCALAPPDATA%\voiceio\config\config.toml`
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
voiceio --model large-v3 --language auto -v
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
See [config.example.toml](config.example.toml) for all options.
|
|
146
|
+
|
|
147
|
+
## Troubleshooting
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
voiceio doctor # see what's working
|
|
151
|
+
voiceio doctor --fix # auto-fix issues
|
|
152
|
+
voiceio logs # check debug output
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
| Problem | Fix |
|
|
156
|
+
|---------|-----|
|
|
157
|
+
| No text appears | `voiceio doctor --fix` - usually a missing IBus component or GNOME input source |
|
|
158
|
+
| Hotkey doesn't work on Wayland | `sudo usermod -aG input $USER` then log out and back in |
|
|
159
|
+
| Transcription too slow | Use a smaller model: `voiceio --model tiny` |
|
|
160
|
+
| Want to start fresh | `voiceio uninstall` then `voiceio setup` |
|
|
161
|
+
| Windows: antivirus blocks hotkeys | pynput uses global keyboard hooks — add an exception for voiceio |
|
|
162
|
+
| Windows: no sound feedback | Check `voiceio logs` for audio device info |
|
|
163
|
+
| macOS issues | Experimental — consider [aquavoice.com](https://aquavoice.com/) or contribute a PR |
|
|
164
|
+
|
|
165
|
+
## Platform support
|
|
166
|
+
|
|
167
|
+
| Platform | Status | Text injection | Hotkeys | Streaming preview |
|
|
168
|
+
|----------|--------|---------------|---------|-------------------|
|
|
169
|
+
| Ubuntu / Debian (GNOME, Wayland) | **Tested daily** | IBus | evdev / GNOME shortcut | Yes |
|
|
170
|
+
| Ubuntu / Debian (GNOME, X11) | Supported | IBus | evdev / pynput | Yes |
|
|
171
|
+
| Fedora (GNOME) | Supported | IBus | evdev / GNOME shortcut | Yes |
|
|
172
|
+
| Arch Linux | Supported | IBus | evdev | Yes |
|
|
173
|
+
| KDE / Sway / Hyprland | Should work | IBus / ydotool / wtype | evdev | Yes |
|
|
174
|
+
| Windows 10/11 | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
175
|
+
| macOS | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
176
|
+
|
|
177
|
+
voiceio auto-detects your platform and picks the best available backends. Run `voiceio doctor` to see what's working on your system.
|
|
178
|
+
|
|
179
|
+
## Uninstall
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
voiceio uninstall # removes service, IBus, shortcuts, symlinks
|
|
183
|
+
pipx uninstall python-voiceio # removes the package
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Roadmap
|
|
187
|
+
|
|
188
|
+
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](https://github.com/Hugo0/voiceio/issues).
|
|
189
|
+
|
|
190
|
+
**Now**
|
|
191
|
+
- [ ] macOS polish (IMKit for native preedit, Accessibility API for text injection)
|
|
192
|
+
|
|
193
|
+
**Soon**
|
|
194
|
+
- [ ] Per-app context awareness (detect focused app, adapt formatting/behavior)
|
|
195
|
+
- [ ] File/audio transcription mode (`voiceio transcribe recording.mp3`)
|
|
196
|
+
|
|
197
|
+
**Backlog**
|
|
198
|
+
- [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
|
|
199
|
+
- [ ] Echo cancellation (filter system audio for meeting use)
|
|
200
|
+
- [ ] Wake word activation ("Hey voiceio")
|
|
201
|
+
- [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
|
|
202
|
+
|
|
203
|
+
**Done**
|
|
204
|
+
- [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
|
|
205
|
+
- [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
|
|
206
|
+
- [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
|
|
207
|
+
- [x] Transcription history — searchable log of everything you've dictated
|
|
208
|
+
- [x] Number-to-digit conversion ("three hundred forty two" → "342")
|
|
209
|
+
- [x] VAD-based silence filtering (Silero VAD, prevents Whisper hallucinations)
|
|
210
|
+
- [x] Voice commands — "new line", "new paragraph", "scratch that", punctuation by name
|
|
211
|
+
- [x] Custom vocabulary / personal dictionary (bias Whisper via `initial_prompt`)
|
|
212
|
+
- [x] Smart punctuation & capitalization post-processing
|
|
213
|
+
- [x] Windows support
|
|
214
|
+
- [x] System tray icon with animated states
|
|
215
|
+
- [x] Auto-stop on silence
|
|
216
|
+
|
|
217
|
+
## License
|
|
218
|
+
|
|
219
|
+
MIT
|
|
@@ -4,18 +4,20 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "python-voiceio"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Speak → text, locally, instantly."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
11
11
|
requires-python = ">=3.11"
|
|
12
12
|
authors = [{ name = "Hugo Montenegro" }]
|
|
13
|
-
keywords = ["voice", "speech-to-text", "whisper", "linux", "dictation", "wayland", "ibus"]
|
|
13
|
+
keywords = ["voice", "speech-to-text", "whisper", "linux", "windows", "dictation", "wayland", "ibus"]
|
|
14
14
|
classifiers = [
|
|
15
15
|
"Development Status :: 4 - Beta",
|
|
16
16
|
"Environment :: X11 Applications",
|
|
17
17
|
"Intended Audience :: End Users/Desktop",
|
|
18
18
|
"Operating System :: POSIX :: Linux",
|
|
19
|
+
"Operating System :: Microsoft :: Windows",
|
|
20
|
+
"Operating System :: MacOS",
|
|
19
21
|
"Programming Language :: Python :: 3",
|
|
20
22
|
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
21
23
|
]
|
|
@@ -23,18 +25,29 @@ dependencies = [
|
|
|
23
25
|
"faster-whisper>=1.0.0",
|
|
24
26
|
"sounddevice>=0.4.6",
|
|
25
27
|
"numpy>=1.24.0",
|
|
28
|
+
"onnxruntime>=1.16.0",
|
|
29
|
+
"wordfreq>=3.0",
|
|
26
30
|
"evdev>=1.6.0; sys_platform == 'linux'",
|
|
31
|
+
"pynput>=1.7.6; sys_platform == 'win32'",
|
|
32
|
+
"pynput>=1.7.6; sys_platform == 'darwin'",
|
|
33
|
+
"pyperclip>=1.8.0; sys_platform == 'win32'",
|
|
34
|
+
"win11toast>=0.36; sys_platform == 'win32'",
|
|
27
35
|
]
|
|
28
36
|
|
|
29
37
|
[project.optional-dependencies]
|
|
30
38
|
x11 = ["pynput>=1.7.6"]
|
|
31
39
|
mac = ["pynput>=1.7.6"]
|
|
40
|
+
win = ["pynput>=1.7.6", "pyperclip>=1.8.0", "win11toast>=0.36"]
|
|
32
41
|
tray = ["pystray>=0.19", "Pillow>=10.0"]
|
|
42
|
+
tts = ["piper-tts>=1.2.0"]
|
|
43
|
+
tts-cloud = ["edge-tts>=6.1.0"]
|
|
33
44
|
dev = ["pytest>=7.0", "pytest-mock"]
|
|
34
45
|
|
|
35
46
|
[project.urls]
|
|
36
47
|
Homepage = "https://github.com/Hugo0/voiceio"
|
|
48
|
+
Repository = "https://github.com/Hugo0/voiceio"
|
|
37
49
|
Issues = "https://github.com/Hugo0/voiceio/issues"
|
|
50
|
+
Changelog = "https://github.com/Hugo0/voiceio/releases"
|
|
38
51
|
|
|
39
52
|
[project.scripts]
|
|
40
53
|
voiceio = "voiceio.cli:main"
|
|
@@ -52,3 +65,4 @@ include = ["voiceio*"]
|
|
|
52
65
|
|
|
53
66
|
[tool.setuptools.package-data]
|
|
54
67
|
"voiceio.sounds" = ["*.wav"]
|
|
68
|
+
"voiceio.models" = ["*.onnx"]
|