python-voiceio 0.2.4__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_voiceio-0.2.4/python_voiceio.egg-info → python_voiceio-0.3.1}/PKG-INFO +89 -56
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/README.md +72 -55
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/pyproject.toml +14 -2
- {python_voiceio-0.2.4 → python_voiceio-0.3.1/python_voiceio.egg-info}/PKG-INFO +89 -56
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/python_voiceio.egg-info/SOURCES.txt +44 -1
- python_voiceio-0.3.1/python_voiceio.egg-info/requires.txt +41 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_app_wiring.py +92 -25
- python_voiceio-0.3.1/tests/test_autocorrect.py +264 -0
- python_voiceio-0.3.1/tests/test_clipboard_read.py +91 -0
- python_voiceio-0.3.1/tests/test_commands.py +173 -0
- python_voiceio-0.3.1/tests/test_corrections.py +148 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_fallback.py +10 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_health.py +6 -6
- python_voiceio-0.3.1/tests/test_hints.py +74 -0
- python_voiceio-0.3.1/tests/test_history.py +90 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_ibus_typer.py +7 -0
- python_voiceio-0.3.1/tests/test_llm.py +217 -0
- python_voiceio-0.3.1/tests/test_llm_api.py +176 -0
- python_voiceio-0.3.1/tests/test_numbers.py +101 -0
- python_voiceio-0.3.1/tests/test_postprocess.py +107 -0
- python_voiceio-0.3.1/tests/test_prompt.py +111 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_streaming.py +2 -2
- python_voiceio-0.3.1/tests/test_tts.py +158 -0
- python_voiceio-0.3.1/tests/test_vad.py +118 -0
- python_voiceio-0.3.1/tests/test_vocabulary.py +71 -0
- python_voiceio-0.3.1/tests/test_wordfreq.py +80 -0
- python_voiceio-0.3.1/voiceio/__init__.py +1 -0
- python_voiceio-0.3.1/voiceio/app.py +655 -0
- python_voiceio-0.3.1/voiceio/autocorrect.py +284 -0
- python_voiceio-0.3.1/voiceio/cli.py +1094 -0
- python_voiceio-0.3.1/voiceio/clipboard_read.py +69 -0
- python_voiceio-0.3.1/voiceio/commands.py +130 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/config.py +58 -2
- python_voiceio-0.3.1/voiceio/corrections.py +160 -0
- python_voiceio-0.3.1/voiceio/demo.py +199 -0
- python_voiceio-0.3.1/voiceio/feedback.py +162 -0
- python_voiceio-0.3.1/voiceio/health.py +408 -0
- python_voiceio-0.3.1/voiceio/hints.py +58 -0
- python_voiceio-0.3.1/voiceio/history.py +64 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/hotkeys/chain.py +1 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/hotkeys/pynput_backend.py +23 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/hotkeys/socket_backend.py +35 -12
- python_voiceio-0.3.1/voiceio/llm.py +258 -0
- python_voiceio-0.3.1/voiceio/llm_api.py +183 -0
- python_voiceio-0.3.1/voiceio/models/silero_vad.onnx +0 -0
- python_voiceio-0.3.1/voiceio/numbers.py +228 -0
- python_voiceio-0.3.1/voiceio/pidlock.py +22 -0
- python_voiceio-0.3.1/voiceio/platform.py +272 -0
- python_voiceio-0.3.1/voiceio/postprocess.py +84 -0
- python_voiceio-0.3.1/voiceio/prompt.py +73 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/recorder.py +53 -13
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/service.py +73 -9
- python_voiceio-0.3.1/voiceio/sounds/__init__.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/sounds/commit.wav +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/sounds/start.wav +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/sounds/stop.wav +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/streaming.py +84 -19
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/transcriber.py +14 -2
- python_voiceio-0.3.1/voiceio/tray/__init__.py +277 -0
- python_voiceio-0.3.1/voiceio/tray/_icons.py +125 -0
- python_voiceio-0.3.1/voiceio/tray/_indicator.py +181 -0
- python_voiceio-0.3.1/voiceio/tray/_pystray.py +123 -0
- python_voiceio-0.3.1/voiceio/tts/__init__.py +11 -0
- python_voiceio-0.3.1/voiceio/tts/base.py +29 -0
- python_voiceio-0.3.1/voiceio/tts/chain.py +79 -0
- python_voiceio-0.3.1/voiceio/tts/edge_engine.py +74 -0
- python_voiceio-0.3.1/voiceio/tts/espeak.py +47 -0
- python_voiceio-0.3.1/voiceio/tts/piper_engine.py +90 -0
- python_voiceio-0.3.1/voiceio/tts/player.py +62 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/chain.py +1 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/clipboard.py +49 -6
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/ibus.py +3 -2
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/pynput_type.py +9 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/wtype.py +2 -1
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/xdotool.py +2 -1
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/ydotool.py +2 -1
- python_voiceio-0.3.1/voiceio/vad.py +122 -0
- python_voiceio-0.3.1/voiceio/vocabulary.py +59 -0
- python_voiceio-0.3.1/voiceio/wizard.py +1463 -0
- python_voiceio-0.3.1/voiceio/wordfreq.py +69 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/worker.py +16 -1
- python_voiceio-0.2.4/python_voiceio.egg-info/requires.txt +0 -20
- python_voiceio-0.2.4/voiceio/__init__.py +0 -1
- python_voiceio-0.2.4/voiceio/app.py +0 -414
- python_voiceio-0.2.4/voiceio/cli.py +0 -512
- python_voiceio-0.2.4/voiceio/feedback.py +0 -78
- python_voiceio-0.2.4/voiceio/health.py +0 -194
- python_voiceio-0.2.4/voiceio/platform.py +0 -139
- python_voiceio-0.2.4/voiceio/tray.py +0 -54
- python_voiceio-0.2.4/voiceio/wizard.py +0 -883
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/LICENSE +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/python_voiceio.egg-info/dependency_links.txt +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/python_voiceio.egg-info/entry_points.txt +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/python_voiceio.egg-info/top_level.txt +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/setup.cfg +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_backend_probes.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_config.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_platform.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_prebuffer.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_recorder_integration.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/tests/test_transcriber.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/__main__.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/backends.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/hotkeys/__init__.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/hotkeys/base.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/hotkeys/evdev.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/ibus/__init__.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/ibus/engine.py +0 -0
- {python_voiceio-0.2.4/voiceio/sounds → python_voiceio-0.3.1/voiceio/models}/__init__.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/__init__.py +0 -0
- {python_voiceio-0.2.4 → python_voiceio-0.3.1}/voiceio/typers/base.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-voiceio
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Speak → text, locally, instantly.
|
|
5
5
|
Author: Hugo Montenegro
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,13 @@ Project-URL: Homepage, https://github.com/Hugo0/voiceio
|
|
|
8
8
|
Project-URL: Repository, https://github.com/Hugo0/voiceio
|
|
9
9
|
Project-URL: Issues, https://github.com/Hugo0/voiceio/issues
|
|
10
10
|
Project-URL: Changelog, https://github.com/Hugo0/voiceio/releases
|
|
11
|
-
Keywords: voice,speech-to-text,whisper,linux,dictation,wayland,ibus
|
|
11
|
+
Keywords: voice,speech-to-text,whisper,linux,windows,dictation,wayland,ibus
|
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
|
13
13
|
Classifier: Environment :: X11 Applications
|
|
14
14
|
Classifier: Intended Audience :: End Users/Desktop
|
|
15
15
|
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
16
18
|
Classifier: Programming Language :: Python :: 3
|
|
17
19
|
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
18
20
|
Requires-Python: >=3.11
|
|
@@ -21,14 +23,28 @@ License-File: LICENSE
|
|
|
21
23
|
Requires-Dist: faster-whisper>=1.0.0
|
|
22
24
|
Requires-Dist: sounddevice>=0.4.6
|
|
23
25
|
Requires-Dist: numpy>=1.24.0
|
|
26
|
+
Requires-Dist: onnxruntime>=1.16.0
|
|
27
|
+
Requires-Dist: wordfreq>=3.0
|
|
24
28
|
Requires-Dist: evdev>=1.6.0; sys_platform == "linux"
|
|
29
|
+
Requires-Dist: pynput>=1.7.6; sys_platform == "win32"
|
|
30
|
+
Requires-Dist: pynput>=1.7.6; sys_platform == "darwin"
|
|
31
|
+
Requires-Dist: pyperclip>=1.8.0; sys_platform == "win32"
|
|
32
|
+
Requires-Dist: win11toast>=0.36; sys_platform == "win32"
|
|
25
33
|
Provides-Extra: x11
|
|
26
34
|
Requires-Dist: pynput>=1.7.6; extra == "x11"
|
|
27
35
|
Provides-Extra: mac
|
|
28
36
|
Requires-Dist: pynput>=1.7.6; extra == "mac"
|
|
37
|
+
Provides-Extra: win
|
|
38
|
+
Requires-Dist: pynput>=1.7.6; extra == "win"
|
|
39
|
+
Requires-Dist: pyperclip>=1.8.0; extra == "win"
|
|
40
|
+
Requires-Dist: win11toast>=0.36; extra == "win"
|
|
29
41
|
Provides-Extra: tray
|
|
30
42
|
Requires-Dist: pystray>=0.19; extra == "tray"
|
|
31
43
|
Requires-Dist: Pillow>=10.0; extra == "tray"
|
|
44
|
+
Provides-Extra: tts
|
|
45
|
+
Requires-Dist: piper-tts>=1.2.0; extra == "tts"
|
|
46
|
+
Provides-Extra: tts-cloud
|
|
47
|
+
Requires-Dist: edge-tts>=6.1.0; extra == "tts-cloud"
|
|
32
48
|
Provides-Extra: dev
|
|
33
49
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
34
50
|
Requires-Dist: pytest-mock; extra == "dev"
|
|
@@ -78,6 +94,31 @@ voiceio setup
|
|
|
78
94
|
```
|
|
79
95
|
</details>
|
|
80
96
|
|
|
97
|
+
<details>
|
|
98
|
+
<summary><strong>Windows</strong></summary>
|
|
99
|
+
|
|
100
|
+
```powershell
|
|
101
|
+
# Option A: Install with pip (requires Python 3.11+)
|
|
102
|
+
pip install python-voiceio
|
|
103
|
+
voiceio setup
|
|
104
|
+
|
|
105
|
+
# Option B: Download the installer from GitHub Releases (no Python needed)
|
|
106
|
+
# https://github.com/Hugo0/voiceio/releases
|
|
107
|
+
# Also available as a portable .zip if you prefer no installation.
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Windows uses pynput for hotkeys and text injection. No extra system dependencies required.
|
|
111
|
+
</details>
|
|
112
|
+
|
|
113
|
+
<details>
|
|
114
|
+
<summary><strong>macOS</strong></summary>
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pipx install python-voiceio
|
|
118
|
+
voiceio setup
|
|
119
|
+
```
|
|
120
|
+
</details>
|
|
121
|
+
|
|
81
122
|
<details>
|
|
82
123
|
<summary><strong>Build from source</strong></summary>
|
|
83
124
|
|
|
@@ -86,9 +127,13 @@ If you want the source code locally to hack on or customize for personal use. PR
|
|
|
86
127
|
```bash
|
|
87
128
|
git clone https://github.com/Hugo0/voiceio
|
|
88
129
|
cd voiceio
|
|
89
|
-
pip install -e ".[linux,dev]"
|
|
90
|
-
|
|
130
|
+
uv pip install -e ".[linux,dev]"
|
|
131
|
+
|
|
132
|
+
# Bootstrap CLI commands onto PATH (creates ~/.local/bin/voiceio)
|
|
133
|
+
uv run voiceio setup
|
|
91
134
|
```
|
|
135
|
+
|
|
136
|
+
> **Note:** Source installs live inside a virtualenv, so `voiceio` isn't on PATH until setup creates symlinks in `~/.local/bin/`. If `voiceio` isn't found after setup, restart your terminal or run `export PATH="$HOME/.local/bin:$PATH"`.
|
|
92
137
|
</details>
|
|
93
138
|
|
|
94
139
|
> You can also install with `uv tool install python-voiceio` or `pip install python-voiceio`.
|
|
@@ -100,11 +145,7 @@ hotkey → mic capture → whisper (local) → text at cursor
|
|
|
100
145
|
pre-buffered streaming IBus / clipboard
|
|
101
146
|
```
|
|
102
147
|
|
|
103
|
-
|
|
104
|
-
2. Speak naturally: text streams into the focused app in real-time as an underlined preview
|
|
105
|
-
3. Press the hotkey again: the final transcription replaces the preview and is committed
|
|
106
|
-
|
|
107
|
-
Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Text is injected through IBus (works in any GTK/Qt app: browsers, Telegram, editors) with an automatic clipboard fallback for terminals.
|
|
148
|
+
Press your hotkey to start recording (1s pre-buffer catches the first syllable). Text streams into the focused app as an underlined preview. Press again to commit. Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper), text is injected through IBus (any GTK/Qt app) with clipboard fallback for terminals.
|
|
108
149
|
|
|
109
150
|
## Features
|
|
110
151
|
|
|
@@ -137,14 +178,17 @@ voiceio doctor Health check (--fix to auto-repair)
|
|
|
137
178
|
voiceio test Test microphone + live transcription
|
|
138
179
|
voiceio toggle Toggle recording on a running daemon
|
|
139
180
|
voiceio update Update to latest version
|
|
140
|
-
voiceio service install Autostart on login
|
|
181
|
+
voiceio service install Autostart on login (systemd / Windows Startup)
|
|
141
182
|
voiceio logs View recent logs
|
|
142
183
|
voiceio uninstall Remove all system integrations
|
|
143
184
|
```
|
|
144
185
|
|
|
145
186
|
## Configuration
|
|
146
187
|
|
|
147
|
-
`voiceio setup` handles everything interactively. To tweak later, edit
|
|
188
|
+
`voiceio setup` handles everything interactively. To tweak later, edit the config file or override at runtime:
|
|
189
|
+
|
|
190
|
+
- Linux/macOS: `~/.config/voiceio/config.toml`
|
|
191
|
+
- Windows: `%LOCALAPPDATA%\voiceio\config\config.toml`
|
|
148
192
|
|
|
149
193
|
```bash
|
|
150
194
|
voiceio --model large-v3 --language auto -v
|
|
@@ -166,7 +210,9 @@ voiceio logs # check debug output
|
|
|
166
210
|
| Hotkey doesn't work on Wayland | `sudo usermod -aG input $USER` then log out and back in |
|
|
167
211
|
| Transcription too slow | Use a smaller model: `voiceio --model tiny` |
|
|
168
212
|
| Want to start fresh | `voiceio uninstall` then `voiceio setup` |
|
|
169
|
-
|
|
|
213
|
+
| Windows: antivirus blocks hotkeys | pynput uses global keyboard hooks — add an exception for voiceio |
|
|
214
|
+
| Windows: no sound feedback | Check `voiceio logs` for audio device info |
|
|
215
|
+
| macOS issues | Experimental — consider [aquavoice.com](https://aquavoice.com/) or contribute a PR |
|
|
170
216
|
|
|
171
217
|
## Platform support
|
|
172
218
|
|
|
@@ -177,6 +223,7 @@ voiceio logs # check debug output
|
|
|
177
223
|
| Fedora (GNOME) | Supported | IBus | evdev / GNOME shortcut | Yes |
|
|
178
224
|
| Arch Linux | Supported | IBus | evdev | Yes |
|
|
179
225
|
| KDE / Sway / Hyprland | Should work | IBus / ydotool / wtype | evdev | Yes |
|
|
226
|
+
| Windows 10/11 | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
180
227
|
| macOS | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
181
228
|
|
|
182
229
|
voiceio auto-detects your platform and picks the best available backends. Run `voiceio doctor` to see what's working on your system.
|
|
@@ -188,50 +235,36 @@ voiceio uninstall # removes service, IBus, shortcuts, symlinks
|
|
|
188
235
|
pipx uninstall python-voiceio # removes the package
|
|
189
236
|
```
|
|
190
237
|
|
|
191
|
-
##
|
|
192
|
-
|
|
193
|
-
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
194
|
-
|
|
195
|
-
**
|
|
196
|
-
- [ ]
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
- [ ]
|
|
200
|
-
- [ ]
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
- [ ]
|
|
204
|
-
- [ ]
|
|
205
|
-
- [ ]
|
|
206
|
-
- [ ]
|
|
207
|
-
|
|
208
|
-
**
|
|
209
|
-
- [
|
|
210
|
-
- [
|
|
211
|
-
- [
|
|
212
|
-
- [
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
- [
|
|
216
|
-
- [
|
|
217
|
-
- [
|
|
218
|
-
- [
|
|
219
|
-
- [
|
|
220
|
-
- [
|
|
221
|
-
|
|
222
|
-
**Power features**
|
|
223
|
-
- [ ] **Multi-language in one session**: auto-detect language switches mid-dictation (Whisper supports this but needs tuning)
|
|
224
|
-
- [ ] **Speaker diarization**: "Person 1: ... Person 2: ..." for meeting notes (via pyannote or whisperX)
|
|
225
|
-
- [ ] **LLM post-processing**: pipe transcription through a local LLM (Ollama) for grammar correction, summarization, or reformatting
|
|
226
|
-
- [ ] **Clipboard history**: keep last N transcriptions, quick-paste from history
|
|
227
|
-
- [ ] **Transcription log / journal**: searchable history of everything you've dictated, with timestamps
|
|
228
|
-
- [ ] **API / webhook**: expose a local API so other tools can trigger recording or receive transcriptions
|
|
229
|
-
- [ ] **Browser extension**: inject text into web apps that don't work with IBus (e.g. some Electron apps)
|
|
230
|
-
|
|
231
|
-
**Developer experience**
|
|
232
|
-
- [ ] **Plugin system**: hooks for pre/post processing (e.g. custom formatters, translators, text transforms)
|
|
233
|
-
- [ ] **Alternative STT backends**: support Whisper.cpp, Deepgram, AssemblyAI, OpenAI Whisper API as optional backends
|
|
234
|
-
- [ ] **GPU acceleration docs**: CUDA/ROCm setup guide for faster transcription on large models
|
|
238
|
+
## Roadmap
|
|
239
|
+
|
|
240
|
+
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](https://github.com/Hugo0/voiceio/issues).
|
|
241
|
+
|
|
242
|
+
**Now**
|
|
243
|
+
- [ ] macOS polish (IMKit for native preedit, Accessibility API for text injection)
|
|
244
|
+
|
|
245
|
+
**Soon**
|
|
246
|
+
- [ ] Per-app context awareness (detect focused app, adapt formatting/behavior)
|
|
247
|
+
- [ ] File/audio transcription mode (`voiceio transcribe recording.mp3`)
|
|
248
|
+
|
|
249
|
+
**Backlog**
|
|
250
|
+
- [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
|
|
251
|
+
- [ ] Echo cancellation (filter system audio for meeting use)
|
|
252
|
+
- [ ] Wake word activation ("Hey voiceio")
|
|
253
|
+
- [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
|
|
254
|
+
|
|
255
|
+
**Done**
|
|
256
|
+
- [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
|
|
257
|
+
- [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
|
|
258
|
+
- [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
|
|
259
|
+
- [x] Transcription history — searchable log of everything you've dictated
|
|
260
|
+
- [x] Number-to-digit conversion ("three hundred forty two" → "342")
|
|
261
|
+
- [x] VAD-based silence filtering (Silero VAD, prevents Whisper hallucinations)
|
|
262
|
+
- [x] Voice commands — "new line", "new paragraph", "scratch that", punctuation by name
|
|
263
|
+
- [x] Custom vocabulary / personal dictionary (bias Whisper via `initial_prompt`)
|
|
264
|
+
- [x] Smart punctuation & capitalization post-processing
|
|
265
|
+
- [x] Windows support
|
|
266
|
+
- [x] System tray icon with animated states
|
|
267
|
+
- [x] Auto-stop on silence
|
|
235
268
|
|
|
236
269
|
## License
|
|
237
270
|
|
|
@@ -42,6 +42,31 @@ voiceio setup
|
|
|
42
42
|
```
|
|
43
43
|
</details>
|
|
44
44
|
|
|
45
|
+
<details>
|
|
46
|
+
<summary><strong>Windows</strong></summary>
|
|
47
|
+
|
|
48
|
+
```powershell
|
|
49
|
+
# Option A: Install with pip (requires Python 3.11+)
|
|
50
|
+
pip install python-voiceio
|
|
51
|
+
voiceio setup
|
|
52
|
+
|
|
53
|
+
# Option B: Download the installer from GitHub Releases (no Python needed)
|
|
54
|
+
# https://github.com/Hugo0/voiceio/releases
|
|
55
|
+
# Also available as a portable .zip if you prefer no installation.
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Windows uses pynput for hotkeys and text injection. No extra system dependencies required.
|
|
59
|
+
</details>
|
|
60
|
+
|
|
61
|
+
<details>
|
|
62
|
+
<summary><strong>macOS</strong></summary>
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pipx install python-voiceio
|
|
66
|
+
voiceio setup
|
|
67
|
+
```
|
|
68
|
+
</details>
|
|
69
|
+
|
|
45
70
|
<details>
|
|
46
71
|
<summary><strong>Build from source</strong></summary>
|
|
47
72
|
|
|
@@ -50,9 +75,13 @@ If you want the source code locally to hack on or customize for personal use. PR
|
|
|
50
75
|
```bash
|
|
51
76
|
git clone https://github.com/Hugo0/voiceio
|
|
52
77
|
cd voiceio
|
|
53
|
-
pip install -e ".[linux,dev]"
|
|
54
|
-
|
|
78
|
+
uv pip install -e ".[linux,dev]"
|
|
79
|
+
|
|
80
|
+
# Bootstrap CLI commands onto PATH (creates ~/.local/bin/voiceio)
|
|
81
|
+
uv run voiceio setup
|
|
55
82
|
```
|
|
83
|
+
|
|
84
|
+
> **Note:** Source installs live inside a virtualenv, so `voiceio` isn't on PATH until setup creates symlinks in `~/.local/bin/`. If `voiceio` isn't found after setup, restart your terminal or run `export PATH="$HOME/.local/bin:$PATH"`.
|
|
56
85
|
</details>
|
|
57
86
|
|
|
58
87
|
> You can also install with `uv tool install python-voiceio` or `pip install python-voiceio`.
|
|
@@ -64,11 +93,7 @@ hotkey → mic capture → whisper (local) → text at cursor
|
|
|
64
93
|
pre-buffered streaming IBus / clipboard
|
|
65
94
|
```
|
|
66
95
|
|
|
67
|
-
|
|
68
|
-
2. Speak naturally: text streams into the focused app in real-time as an underlined preview
|
|
69
|
-
3. Press the hotkey again: the final transcription replaces the preview and is committed
|
|
70
|
-
|
|
71
|
-
Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Text is injected through IBus (works in any GTK/Qt app: browsers, Telegram, editors) with an automatic clipboard fallback for terminals.
|
|
96
|
+
Press your hotkey to start recording (1s pre-buffer catches the first syllable). Text streams into the focused app as an underlined preview. Press again to commit. Transcription runs locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper), text is injected through IBus (any GTK/Qt app) with clipboard fallback for terminals.
|
|
72
97
|
|
|
73
98
|
## Features
|
|
74
99
|
|
|
@@ -101,14 +126,17 @@ voiceio doctor Health check (--fix to auto-repair)
|
|
|
101
126
|
voiceio test Test microphone + live transcription
|
|
102
127
|
voiceio toggle Toggle recording on a running daemon
|
|
103
128
|
voiceio update Update to latest version
|
|
104
|
-
voiceio service install Autostart on login
|
|
129
|
+
voiceio service install Autostart on login (systemd / Windows Startup)
|
|
105
130
|
voiceio logs View recent logs
|
|
106
131
|
voiceio uninstall Remove all system integrations
|
|
107
132
|
```
|
|
108
133
|
|
|
109
134
|
## Configuration
|
|
110
135
|
|
|
111
|
-
`voiceio setup` handles everything interactively. To tweak later, edit
|
|
136
|
+
`voiceio setup` handles everything interactively. To tweak later, edit the config file or override at runtime:
|
|
137
|
+
|
|
138
|
+
- Linux/macOS: `~/.config/voiceio/config.toml`
|
|
139
|
+
- Windows: `%LOCALAPPDATA%\voiceio\config\config.toml`
|
|
112
140
|
|
|
113
141
|
```bash
|
|
114
142
|
voiceio --model large-v3 --language auto -v
|
|
@@ -130,7 +158,9 @@ voiceio logs # check debug output
|
|
|
130
158
|
| Hotkey doesn't work on Wayland | `sudo usermod -aG input $USER` then log out and back in |
|
|
131
159
|
| Transcription too slow | Use a smaller model: `voiceio --model tiny` |
|
|
132
160
|
| Want to start fresh | `voiceio uninstall` then `voiceio setup` |
|
|
133
|
-
|
|
|
161
|
+
| Windows: antivirus blocks hotkeys | pynput uses global keyboard hooks — add an exception for voiceio |
|
|
162
|
+
| Windows: no sound feedback | Check `voiceio logs` for audio device info |
|
|
163
|
+
| macOS issues | Experimental — consider [aquavoice.com](https://aquavoice.com/) or contribute a PR |
|
|
134
164
|
|
|
135
165
|
## Platform support
|
|
136
166
|
|
|
@@ -141,6 +171,7 @@ voiceio logs # check debug output
|
|
|
141
171
|
| Fedora (GNOME) | Supported | IBus | evdev / GNOME shortcut | Yes |
|
|
142
172
|
| Arch Linux | Supported | IBus | evdev | Yes |
|
|
143
173
|
| KDE / Sway / Hyprland | Should work | IBus / ydotool / wtype | evdev | Yes |
|
|
174
|
+
| Windows 10/11 | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
144
175
|
| macOS | Experimental | pynput / clipboard | pynput | Type-and-correct (no preedit) |
|
|
145
176
|
|
|
146
177
|
voiceio auto-detects your platform and picks the best available backends. Run `voiceio doctor` to see what's working on your system.
|
|
@@ -152,51 +183,37 @@ voiceio uninstall # removes service, IBus, shortcuts, symlinks
|
|
|
152
183
|
pipx uninstall python-voiceio # removes the package
|
|
153
184
|
```
|
|
154
185
|
|
|
155
|
-
##
|
|
156
|
-
|
|
157
|
-
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
158
|
-
|
|
159
|
-
**
|
|
160
|
-
- [ ]
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
- [ ]
|
|
164
|
-
- [ ]
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
- [ ]
|
|
168
|
-
- [ ]
|
|
169
|
-
- [ ]
|
|
170
|
-
- [ ]
|
|
171
|
-
|
|
172
|
-
**
|
|
173
|
-
- [
|
|
174
|
-
- [
|
|
175
|
-
- [
|
|
176
|
-
- [
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
- [
|
|
180
|
-
- [
|
|
181
|
-
- [
|
|
182
|
-
- [
|
|
183
|
-
- [
|
|
184
|
-
- [
|
|
185
|
-
|
|
186
|
-
**Power features**
|
|
187
|
-
- [ ] **Multi-language in one session**: auto-detect language switches mid-dictation (Whisper supports this but needs tuning)
|
|
188
|
-
- [ ] **Speaker diarization**: "Person 1: ... Person 2: ..." for meeting notes (via pyannote or whisperX)
|
|
189
|
-
- [ ] **LLM post-processing**: pipe transcription through a local LLM (Ollama) for grammar correction, summarization, or reformatting
|
|
190
|
-
- [ ] **Clipboard history**: keep last N transcriptions, quick-paste from history
|
|
191
|
-
- [ ] **Transcription log / journal**: searchable history of everything you've dictated, with timestamps
|
|
192
|
-
- [ ] **API / webhook**: expose a local API so other tools can trigger recording or receive transcriptions
|
|
193
|
-
- [ ] **Browser extension**: inject text into web apps that don't work with IBus (e.g. some Electron apps)
|
|
194
|
-
|
|
195
|
-
**Developer experience**
|
|
196
|
-
- [ ] **Plugin system**: hooks for pre/post processing (e.g. custom formatters, translators, text transforms)
|
|
197
|
-
- [ ] **Alternative STT backends**: support Whisper.cpp, Deepgram, AssemblyAI, OpenAI Whisper API as optional backends
|
|
198
|
-
- [ ] **GPU acceleration docs**: CUDA/ROCm setup guide for faster transcription on large models
|
|
186
|
+
## Roadmap
|
|
187
|
+
|
|
188
|
+
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](https://github.com/Hugo0/voiceio/issues).
|
|
189
|
+
|
|
190
|
+
**Now**
|
|
191
|
+
- [ ] macOS polish (IMKit for native preedit, Accessibility API for text injection)
|
|
192
|
+
|
|
193
|
+
**Soon**
|
|
194
|
+
- [ ] Per-app context awareness (detect focused app, adapt formatting/behavior)
|
|
195
|
+
- [ ] File/audio transcription mode (`voiceio transcribe recording.mp3`)
|
|
196
|
+
|
|
197
|
+
**Backlog**
|
|
198
|
+
- [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
|
|
199
|
+
- [ ] Echo cancellation (filter system audio for meeting use)
|
|
200
|
+
- [ ] Wake word activation ("Hey voiceio")
|
|
201
|
+
- [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
|
|
202
|
+
|
|
203
|
+
**Done**
|
|
204
|
+
- [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
|
|
205
|
+
- [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
|
|
206
|
+
- [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
|
|
207
|
+
- [x] Transcription history — searchable log of everything you've dictated
|
|
208
|
+
- [x] Number-to-digit conversion ("three hundred forty two" → "342")
|
|
209
|
+
- [x] VAD-based silence filtering (Silero VAD, prevents Whisper hallucinations)
|
|
210
|
+
- [x] Voice commands — "new line", "new paragraph", "scratch that", punctuation by name
|
|
211
|
+
- [x] Custom vocabulary / personal dictionary (bias Whisper via `initial_prompt`)
|
|
212
|
+
- [x] Smart punctuation & capitalization post-processing
|
|
213
|
+
- [x] Windows support
|
|
214
|
+
- [x] System tray icon with animated states
|
|
215
|
+
- [x] Auto-stop on silence
|
|
199
216
|
|
|
200
217
|
## License
|
|
201
218
|
|
|
202
|
-
MIT
|
|
219
|
+
MIT
|
|
@@ -4,18 +4,20 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "python-voiceio"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
description = "Speak → text, locally, instantly."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
11
11
|
requires-python = ">=3.11"
|
|
12
12
|
authors = [{ name = "Hugo Montenegro" }]
|
|
13
|
-
keywords = ["voice", "speech-to-text", "whisper", "linux", "dictation", "wayland", "ibus"]
|
|
13
|
+
keywords = ["voice", "speech-to-text", "whisper", "linux", "windows", "dictation", "wayland", "ibus"]
|
|
14
14
|
classifiers = [
|
|
15
15
|
"Development Status :: 4 - Beta",
|
|
16
16
|
"Environment :: X11 Applications",
|
|
17
17
|
"Intended Audience :: End Users/Desktop",
|
|
18
18
|
"Operating System :: POSIX :: Linux",
|
|
19
|
+
"Operating System :: Microsoft :: Windows",
|
|
20
|
+
"Operating System :: MacOS",
|
|
19
21
|
"Programming Language :: Python :: 3",
|
|
20
22
|
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
21
23
|
]
|
|
@@ -23,13 +25,22 @@ dependencies = [
|
|
|
23
25
|
"faster-whisper>=1.0.0",
|
|
24
26
|
"sounddevice>=0.4.6",
|
|
25
27
|
"numpy>=1.24.0",
|
|
28
|
+
"onnxruntime>=1.16.0",
|
|
29
|
+
"wordfreq>=3.0",
|
|
26
30
|
"evdev>=1.6.0; sys_platform == 'linux'",
|
|
31
|
+
"pynput>=1.7.6; sys_platform == 'win32'",
|
|
32
|
+
"pynput>=1.7.6; sys_platform == 'darwin'",
|
|
33
|
+
"pyperclip>=1.8.0; sys_platform == 'win32'",
|
|
34
|
+
"win11toast>=0.36; sys_platform == 'win32'",
|
|
27
35
|
]
|
|
28
36
|
|
|
29
37
|
[project.optional-dependencies]
|
|
30
38
|
x11 = ["pynput>=1.7.6"]
|
|
31
39
|
mac = ["pynput>=1.7.6"]
|
|
40
|
+
win = ["pynput>=1.7.6", "pyperclip>=1.8.0", "win11toast>=0.36"]
|
|
32
41
|
tray = ["pystray>=0.19", "Pillow>=10.0"]
|
|
42
|
+
tts = ["piper-tts>=1.2.0"]
|
|
43
|
+
tts-cloud = ["edge-tts>=6.1.0"]
|
|
33
44
|
dev = ["pytest>=7.0", "pytest-mock"]
|
|
34
45
|
|
|
35
46
|
[project.urls]
|
|
@@ -54,3 +65,4 @@ include = ["voiceio*"]
|
|
|
54
65
|
|
|
55
66
|
[tool.setuptools.package-data]
|
|
56
67
|
"voiceio.sounds" = ["*.wav"]
|
|
68
|
+
"voiceio.models" = ["*.onnx"]
|