morvox 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morvox-1.0.0/LICENSE +21 -0
- morvox-1.0.0/PKG-INFO +236 -0
- morvox-1.0.0/README.md +207 -0
- morvox-1.0.0/pyproject.toml +43 -0
- morvox-1.0.0/setup.cfg +4 -0
- morvox-1.0.0/src/morvox/__init__.py +0 -0
- morvox-1.0.0/src/morvox/__main__.py +83 -0
- morvox-1.0.0/src/morvox/backends/__init__.py +26 -0
- morvox-1.0.0/src/morvox/backends/linux.py +281 -0
- morvox-1.0.0/src/morvox/backends/macos.py +236 -0
- morvox-1.0.0/src/morvox/backends/windows.py +870 -0
- morvox-1.0.0/src/morvox/commands.py +31 -0
- morvox-1.0.0/src/morvox/constants.py +125 -0
- morvox-1.0.0/src/morvox/recording.py +489 -0
- morvox-1.0.0/src/morvox/state.py +250 -0
- morvox-1.0.0/src/morvox/widget.py +873 -0
- morvox-1.0.0/src/morvox.egg-info/PKG-INFO +236 -0
- morvox-1.0.0/src/morvox.egg-info/SOURCES.txt +19 -0
- morvox-1.0.0/src/morvox.egg-info/dependency_links.txt +1 -0
- morvox-1.0.0/src/morvox.egg-info/entry_points.txt +2 -0
- morvox-1.0.0/src/morvox.egg-info/top_level.txt +1 -0
morvox-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 morhook
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
morvox-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: morvox
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Offline push-to-talk voice-to-text CLI powered by whisper.cpp
|
|
5
|
+
Author-email: morhook <cruz.fernandez@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/morhook/morvox
|
|
8
|
+
Project-URL: Repository, https://github.com/morhook/morvox
|
|
9
|
+
Project-URL: Issues, https://github.com/morhook/morvox/issues
|
|
10
|
+
Keywords: cli,dictation,speech-to-text,voice,whisper.cpp
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
14
|
+
Classifier: Natural Language :: English
|
|
15
|
+
Classifier: Operating System :: MacOS
|
|
16
|
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 11
|
|
17
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Capture/Recording
|
|
24
|
+
Classifier: Topic :: Utilities
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# morvox
|
|
31
|
+
|
|
32
|
+
An awesome push-to-talk-style voice-to-text widget for everyone.
|
|
33
|
+
|
|
34
|
+
One command (`morvox`) that toggles:
|
|
35
|
+
|
|
36
|
+
1. **First press** → starts recording from the default mic, remembers the
|
|
37
|
+
currently focused window/app, and shows a "Recording…" widget.
|
|
38
|
+
2. **Second press** → stops the recorder, transcribes the clip with
|
|
39
|
+
`whisper-cli` (whisper.cpp), and types the transcription into your
|
|
40
|
+
target app.
|
|
41
|
+
|
|
42
|
+
> **Note:** Windows 11 has a built-in dictation tool — press `Win+H` to open it. macOS has System Dictation built in too, accessible via **System Settings → Keyboard → Dictation** (typically triggered by double-pressing `Fn`). morvox is an alternative: it runs a local [whisper.cpp](https://github.com/ggerganov/whisper.cpp) model entirely offline, gives you a visual VU-meter widget, and wires into any hotkey manager you already use.
|
|
43
|
+
|
|
44
|
+
morvox auto-selects a platform backend:
|
|
45
|
+
|
|
46
|
+
- **Linux** — uses `parecord` for capture and `xdotool` for window
|
|
47
|
+
control + keystroke injection. We also support wayland.
|
|
48
|
+
- **macOS** — uses `ffmpeg` (avfoundation) for capture and `osascript`
|
|
49
|
+
(System Events) for window focus + keystrokes.
|
|
50
|
+
- **Windows 11** — uses `ffmpeg` (WASAPI) for capture and Win32 APIs for
|
|
51
|
+
keystroke injection. On Windows, morvox inserts into the window that is
|
|
52
|
+
focused when transcription finishes: it tries several automatic clipboard
|
|
53
|
+
paste methods first, then direct Unicode typing, and only leaves the
|
|
54
|
+
transcript on the clipboard if all insertion methods are blocked.
|
|
55
|
+
|
|
56
|
+
You can force a backend with `MORVOX_BACKEND=x11`, `MORVOX_BACKEND=macos`,
|
|
57
|
+
or `MORVOX_BACKEND=windows`.
|
|
58
|
+
|
|
59
|
+
## Table of Contents
|
|
60
|
+
|
|
61
|
+
- [Epistemology](#epistemology)
|
|
62
|
+
- [Screenshots](#screenshots)
|
|
63
|
+
- [What it does](#what-it-does)
|
|
64
|
+
- [Setup & installation](https://github.com/morhook/morvox/blob/main/INSTALLATION.md)
|
|
65
|
+
- [Dependencies](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#dependencies)
|
|
66
|
+
- [Installation](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#installation)
|
|
67
|
+
- [Hotkey configuration](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#hotkey-configuration)
|
|
68
|
+
- [Linux hotkey (i3)](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#linux-hotkey-i3)
|
|
69
|
+
- [macOS hotkey](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#macos-hotkey)
|
|
70
|
+
- [skhd](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#skhd)
|
|
71
|
+
- [Hammerspoon](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#hammerspoon)
|
|
72
|
+
- [Windows hotkey](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#windows-hotkey)
|
|
73
|
+
- [Usage](#usage)
|
|
74
|
+
- [The widget](#the-widget)
|
|
75
|
+
- [Troubleshooting](#troubleshooting)
|
|
76
|
+
- [License](#license)
|
|
77
|
+
|
|
78
|
+
## Epistemology
|
|
79
|
+
|
|
80
|
+
The name is based on morhook and voice. mor-vox. I know, if I explain the joke, it's not funny. Don't judge me.
|
|
81
|
+
|
|
82
|
+
## Screenshots
|
|
83
|
+
|
|
84
|
+

|
|
85
|
+

|
|
86
|
+

|
|
87
|
+

|
|
88
|
+
|
|
89
|
+
## What it does
|
|
90
|
+
|
|
91
|
+
- It wraps whisper-cli and shows a VU meter on the user interface. You need to add the hotkey configuration on your OS/Desktop Environment.
|
|
92
|
+
|
|
93
|
+
## Setup & installation
|
|
94
|
+
|
|
95
|
+
Setup, dependencies, install steps, and hotkey configuration are in
|
|
96
|
+
[`INSTALLATION.md`](https://github.com/morhook/morvox/blob/main/INSTALLATION.md).
|
|
97
|
+
|
|
98
|
+
## Usage
|
|
99
|
+
|
|
100
|
+
```sh
|
|
101
|
+
# toggle (start, then stop+transcribe+type)
|
|
102
|
+
morvox
|
|
103
|
+
|
|
104
|
+
# fallback if you prefer module execution
|
|
105
|
+
python -m morvox
|
|
106
|
+
|
|
107
|
+
# status (for i3blocks / polybar)
|
|
108
|
+
morvox --status # prints "recording" or "idle"
|
|
109
|
+
|
|
110
|
+
# abort an in-flight recording without transcribing
|
|
111
|
+
morvox --cancel
|
|
112
|
+
|
|
113
|
+
# keep the wav/txt around for debugging
|
|
114
|
+
morvox --keep-temp
|
|
115
|
+
|
|
116
|
+
# use a different model / source / typing speed
|
|
117
|
+
morvox --model /path/to/ggml-tiny.en.bin
|
|
118
|
+
morvox --source alsa_input.usb-Maono_Maonocaster…
|
|
119
|
+
morvox --threads 8
|
|
120
|
+
morvox --type-delay 5
|
|
121
|
+
|
|
122
|
+
# disable the floating widget (headless / SSH / debugging)
|
|
123
|
+
morvox --no-widget
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
From a source checkout, you can still run `./morvox` before installing.
|
|
127
|
+
|
|
128
|
+
State files live in `$XDG_RUNTIME_DIR/morvox/` on Linux, falling back to
|
|
129
|
+
`/tmp/morvox-$UID/` when `$XDG_RUNTIME_DIR` is unset;
|
|
130
|
+
`~/Library/Caches/morvox/` on macOS; and `%LOCALAPPDATA%\morvox\` on
|
|
131
|
+
Windows. Override with the `MORVOX_STATE_DIR` env var:
|
|
132
|
+
|
|
133
|
+
- `rec.pid` — recorder PID
|
|
134
|
+
- `target_window` — saved focused window id
|
|
135
|
+
- `rec.wav` / `rec.txt` — audio + transcript
|
|
136
|
+
- `parecord.log` / `whisper.log` — diagnostic logs
|
|
137
|
+
|
|
138
|
+
By default these are deleted after a successful type. Pass `--keep-temp`
|
|
139
|
+
to keep them.
|
|
140
|
+
|
|
141
|
+
## The widget
|
|
142
|
+
|
|
143
|
+
While recording, morvox shows a small borderless window centred near the
|
|
144
|
+
bottom of the screen. It contains:
|
|
145
|
+
|
|
146
|
+
- a pulsing red dot (recording indicator),
|
|
147
|
+
- a live VU meter that reacts to your microphone level,
|
|
148
|
+
- an elapsed-time counter.
|
|
149
|
+
|
|
150
|
+
When you stop recording, the meter is replaced by a "Transcribing…"
|
|
151
|
+
spinner that stays visible until whisper finishes and the transcript has
|
|
152
|
+
been typed. If whisper produced only silence the widget briefly shows
|
|
153
|
+
"No speech detected" instead.
|
|
154
|
+
|
|
155
|
+
The widget is a self-spawned subprocess of `morvox` (uses Python's
|
|
156
|
+
stdlib `tkinter`). Its stderr is written to the platform state dir's
|
|
157
|
+
`widget.log` for debugging. On Linux/X11 it uses
|
|
158
|
+
`_NET_WM_WINDOW_TYPE_DOCK` so i3 won't try to tile it. On Wayland-only
|
|
159
|
+
sessions without XWayland, or on hosts without `$DISPLAY`, the widget is
|
|
160
|
+
skipped silently.
|
|
161
|
+
|
|
162
|
+
To disable the widget entirely (e.g. on a headless machine or over SSH),
|
|
163
|
+
pass `--no-widget`.
|
|
164
|
+
|
|
165
|
+
## Troubleshooting
|
|
166
|
+
|
|
167
|
+
- **No audio recorded / empty wav (Linux)**
|
|
168
|
+
Check the active sources: `pactl list short sources`. Pass an explicit
|
|
169
|
+
source with `--source <NAME>`. Inspect
|
|
170
|
+
`$XDG_RUNTIME_DIR/morvox/parecord.log` or `/tmp/morvox-$UID/parecord.log`.
|
|
171
|
+
|
|
172
|
+
- **No audio recorded / empty wav (macOS)**
|
|
173
|
+
List devices with `ffmpeg -f avfoundation -list_devices true -i ""`
|
|
174
|
+
and pass an explicit `--source :<idx>`. Inspect
|
|
175
|
+
`~/Library/Caches/morvox/parecord.log`. If ffmpeg complains about
|
|
176
|
+
permissions, grant the terminal Microphone access.
|
|
177
|
+
|
|
178
|
+
- **No audio recorded / empty wav (Windows)**
|
|
179
|
+
List audio devices with `ffmpeg -list_devices true -f wasapi -i dummy`
|
|
180
|
+
(or `ffmpeg -list_devices true -f dshow -i dummy` if your ffmpeg build
|
|
181
|
+
lacks WASAPI) and pass an explicit `--source "<device name>"`. Inspect
|
|
182
|
+
`%LOCALAPPDATA%\morvox\parecord.log`. If ffmpeg cannot access the
|
|
183
|
+
microphone, check **Settings -> Privacy & security -> Microphone**.
|
|
184
|
+
|
|
185
|
+
- **Text typed into wrong window**
|
|
186
|
+
On Linux and macOS, the originally focused window/app may have been
|
|
187
|
+
destroyed before you stopped recording. morvox falls back to typing
|
|
188
|
+
into whatever is currently focused and prints a warning to stderr. On
|
|
189
|
+
Windows, morvox intentionally types into the window that is focused
|
|
190
|
+
when transcription finishes.
|
|
191
|
+
|
|
192
|
+
- **Linux Wayland: nothing is typed (GNOME/Ubuntu)**
|
|
193
|
+
GNOME/Mutter doesn't implement the `wtype` keyboard protocol and
|
|
194
|
+
`xdotool` is a no-op against native Wayland windows. Either set up
|
|
195
|
+
`ydotoold` (`sudo systemctl enable --now ydotoold` and add your user
|
|
196
|
+
to the `input` group), or install `wl-clipboard` so the transcript
|
|
197
|
+
lands on your clipboard for manual Ctrl+Shift+V. See
|
|
198
|
+
[`INSTALLATION.md` (Linux / Wayland)](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#linux--wayland).
|
|
199
|
+
|
|
200
|
+
- **Linux: widget never appears (asdf/pyenv/conda Python)**
|
|
201
|
+
The widget runs as a Python subprocess and needs `tkinter`. Many
|
|
202
|
+
third-party Python builds ship without it. Check
|
|
203
|
+
`$XDG_RUNTIME_DIR/morvox/widget.log` or `/tmp/morvox-$UID/widget.log` for
|
|
204
|
+
`No module named 'tkinter'`. Install `python3-tk` and run morvox under the
|
|
205
|
+
system Python, rebuild your managed Python with Tk support, or use
|
|
206
|
+
`--no-widget` to silence the warning.
|
|
207
|
+
|
|
208
|
+
- **macOS: keystrokes silently do nothing**
|
|
209
|
+
Accessibility permission isn't granted. **System Settings → Privacy &
|
|
210
|
+
Security → Accessibility** → enable your terminal app.
|
|
211
|
+
|
|
212
|
+
- **Windows: text does not type into an elevated app**
|
|
213
|
+
Windows blocks lower-integrity processes from injecting keystrokes into
|
|
214
|
+
elevated/admin windows. Run morvox from an elevated terminal too, or type
|
|
215
|
+
into a non-elevated app.
|
|
216
|
+
|
|
217
|
+
- **Windows: transcript only appears on the clipboard**
|
|
218
|
+
On Windows 11, morvox first tries several automatic paste methods into the
|
|
219
|
+
currently focused window and then falls back to direct typing. If all of
|
|
220
|
+
those are blocked by the app or OS policy, morvox leaves the transcript on
|
|
221
|
+
the clipboard so you can paste it manually. Inspect
|
|
222
|
+
`%LOCALAPPDATA%\morvox\whisper.log` for a `windows-insert:` trace showing
|
|
223
|
+
which insertion path ran and what failed.
|
|
224
|
+
|
|
225
|
+
- **Whisper too slow**
|
|
226
|
+
Use a smaller model — `ggml-tiny.en.bin` is roughly 5× faster than
|
|
227
|
+
`base.en` with a small accuracy hit. Increase `--threads` up to your
|
|
228
|
+
physical core count.
|
|
229
|
+
|
|
230
|
+
- **Nothing is typed and notification says "Empty recording"**
|
|
231
|
+
Whisper produced only a noise token (e.g. `[BLANK_AUDIO]`). Speak
|
|
232
|
+
closer to the mic or check input gain.
|
|
233
|
+
|
|
234
|
+
## License
|
|
235
|
+
|
|
236
|
+
MIT — see [LICENSE](https://github.com/morhook/morvox/blob/main/LICENSE).
|
morvox-1.0.0/README.md
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# morvox
|
|
2
|
+
|
|
3
|
+
An awesome push-to-talk-style voice-to-text widget for everyone.
|
|
4
|
+
|
|
5
|
+
One command (`morvox`) that toggles:
|
|
6
|
+
|
|
7
|
+
1. **First press** → starts recording from the default mic, remembers the
|
|
8
|
+
currently focused window/app, and shows a "Recording…" widget.
|
|
9
|
+
2. **Second press** → stops the recorder, transcribes the clip with
|
|
10
|
+
`whisper-cli` (whisper.cpp), and types the transcription into your
|
|
11
|
+
target app.
|
|
12
|
+
|
|
13
|
+
> **Note:** Windows 11 has a built-in dictation tool — press `Win+H` to open it. macOS has System Dictation built in too, accessible via **System Settings → Keyboard → Dictation** (typically triggered by double-pressing `Fn`). morvox is an alternative: it runs a local [whisper.cpp](https://github.com/ggerganov/whisper.cpp) model entirely offline, gives you a visual VU-meter widget, and wires into any hotkey manager you already use.
|
|
14
|
+
|
|
15
|
+
morvox auto-selects a platform backend:
|
|
16
|
+
|
|
17
|
+
- **Linux** — uses `parecord` for capture and `xdotool` for window
|
|
18
|
+
control + keystroke injection. We also support wayland.
|
|
19
|
+
- **macOS** — uses `ffmpeg` (avfoundation) for capture and `osascript`
|
|
20
|
+
(System Events) for window focus + keystrokes.
|
|
21
|
+
- **Windows 11** — uses `ffmpeg` (WASAPI) for capture and Win32 APIs for
|
|
22
|
+
keystroke injection. On Windows, morvox inserts into the window that is
|
|
23
|
+
focused when transcription finishes: it tries several automatic clipboard
|
|
24
|
+
paste methods first, then direct Unicode typing, and only leaves the
|
|
25
|
+
transcript on the clipboard if all insertion methods are blocked.
|
|
26
|
+
|
|
27
|
+
You can force a backend with `MORVOX_BACKEND=x11`, `MORVOX_BACKEND=macos`,
|
|
28
|
+
or `MORVOX_BACKEND=windows`.
|
|
29
|
+
|
|
30
|
+
## Table of Contents
|
|
31
|
+
|
|
32
|
+
- [Epistemology](#epistemology)
|
|
33
|
+
- [Screenshots](#screenshots)
|
|
34
|
+
- [What it does](#what-it-does)
|
|
35
|
+
- [Setup & installation](https://github.com/morhook/morvox/blob/main/INSTALLATION.md)
|
|
36
|
+
- [Dependencies](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#dependencies)
|
|
37
|
+
- [Installation](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#installation)
|
|
38
|
+
- [Hotkey configuration](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#hotkey-configuration)
|
|
39
|
+
- [Linux hotkey (i3)](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#linux-hotkey-i3)
|
|
40
|
+
- [macOS hotkey](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#macos-hotkey)
|
|
41
|
+
- [skhd](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#skhd)
|
|
42
|
+
- [Hammerspoon](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#hammerspoon)
|
|
43
|
+
- [Windows hotkey](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#windows-hotkey)
|
|
44
|
+
- [Usage](#usage)
|
|
45
|
+
- [The widget](#the-widget)
|
|
46
|
+
- [Troubleshooting](#troubleshooting)
|
|
47
|
+
- [License](#license)
|
|
48
|
+
|
|
49
|
+
## Epistemology
|
|
50
|
+
|
|
51
|
+
The name is based on morhook and voice. mor-vox. I know, if I explain the joke, it's not funny. Don't judge me.
|
|
52
|
+
|
|
53
|
+
## Screenshots
|
|
54
|
+
|
|
55
|
+

|
|
56
|
+

|
|
57
|
+

|
|
58
|
+

|
|
59
|
+
|
|
60
|
+
## What it does
|
|
61
|
+
|
|
62
|
+
- It wraps whisper-cli and shows a VU meter on the user interface. You need to add the hotkey configuration on your OS/Desktop Environment.
|
|
63
|
+
|
|
64
|
+
## Setup & installation
|
|
65
|
+
|
|
66
|
+
Setup, dependencies, install steps, and hotkey configuration are in
|
|
67
|
+
[`INSTALLATION.md`](https://github.com/morhook/morvox/blob/main/INSTALLATION.md).
|
|
68
|
+
|
|
69
|
+
## Usage
|
|
70
|
+
|
|
71
|
+
```sh
|
|
72
|
+
# toggle (start, then stop+transcribe+type)
|
|
73
|
+
morvox
|
|
74
|
+
|
|
75
|
+
# fallback if you prefer module execution
|
|
76
|
+
python -m morvox
|
|
77
|
+
|
|
78
|
+
# status (for i3blocks / polybar)
|
|
79
|
+
morvox --status # prints "recording" or "idle"
|
|
80
|
+
|
|
81
|
+
# abort an in-flight recording without transcribing
|
|
82
|
+
morvox --cancel
|
|
83
|
+
|
|
84
|
+
# keep the wav/txt around for debugging
|
|
85
|
+
morvox --keep-temp
|
|
86
|
+
|
|
87
|
+
# use a different model / source / typing speed
|
|
88
|
+
morvox --model /path/to/ggml-tiny.en.bin
|
|
89
|
+
morvox --source alsa_input.usb-Maono_Maonocaster…
|
|
90
|
+
morvox --threads 8
|
|
91
|
+
morvox --type-delay 5
|
|
92
|
+
|
|
93
|
+
# disable the floating widget (headless / SSH / debugging)
|
|
94
|
+
morvox --no-widget
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
From a source checkout, you can still run `./morvox` before installing.
|
|
98
|
+
|
|
99
|
+
State files live in `$XDG_RUNTIME_DIR/morvox/` on Linux, falling back to
|
|
100
|
+
`/tmp/morvox-$UID/` when `$XDG_RUNTIME_DIR` is unset;
|
|
101
|
+
`~/Library/Caches/morvox/` on macOS; and `%LOCALAPPDATA%\morvox\` on
|
|
102
|
+
Windows. Override with the `MORVOX_STATE_DIR` env var:
|
|
103
|
+
|
|
104
|
+
- `rec.pid` — recorder PID
|
|
105
|
+
- `target_window` — saved focused window id
|
|
106
|
+
- `rec.wav` / `rec.txt` — audio + transcript
|
|
107
|
+
- `parecord.log` / `whisper.log` — diagnostic logs
|
|
108
|
+
|
|
109
|
+
By default these are deleted after a successful type. Pass `--keep-temp`
|
|
110
|
+
to keep them.
|
|
111
|
+
|
|
112
|
+
## The widget
|
|
113
|
+
|
|
114
|
+
While recording, morvox shows a small borderless window centred near the
|
|
115
|
+
bottom of the screen. It contains:
|
|
116
|
+
|
|
117
|
+
- a pulsing red dot (recording indicator),
|
|
118
|
+
- a live VU meter that reacts to your microphone level,
|
|
119
|
+
- an elapsed-time counter.
|
|
120
|
+
|
|
121
|
+
When you stop recording, the meter is replaced by a "Transcribing…"
|
|
122
|
+
spinner that stays visible until whisper finishes and the transcript has
|
|
123
|
+
been typed. If whisper produced only silence the widget briefly shows
|
|
124
|
+
"No speech detected" instead.
|
|
125
|
+
|
|
126
|
+
The widget is a self-spawned subprocess of `morvox` (uses Python's
|
|
127
|
+
stdlib `tkinter`). Its stderr is written to the platform state dir's
|
|
128
|
+
`widget.log` for debugging. On Linux/X11 it uses
|
|
129
|
+
`_NET_WM_WINDOW_TYPE_DOCK` so i3 won't try to tile it. On Wayland-only
|
|
130
|
+
sessions without XWayland, or on hosts without `$DISPLAY`, the widget is
|
|
131
|
+
skipped silently.
|
|
132
|
+
|
|
133
|
+
To disable the widget entirely (e.g. on a headless machine or over SSH),
|
|
134
|
+
pass `--no-widget`.
|
|
135
|
+
|
|
136
|
+
## Troubleshooting
|
|
137
|
+
|
|
138
|
+
- **No audio recorded / empty wav (Linux)**
|
|
139
|
+
Check the active sources: `pactl list short sources`. Pass an explicit
|
|
140
|
+
source with `--source <NAME>`. Inspect
|
|
141
|
+
`$XDG_RUNTIME_DIR/morvox/parecord.log` or `/tmp/morvox-$UID/parecord.log`.
|
|
142
|
+
|
|
143
|
+
- **No audio recorded / empty wav (macOS)**
|
|
144
|
+
List devices with `ffmpeg -f avfoundation -list_devices true -i ""`
|
|
145
|
+
and pass an explicit `--source :<idx>`. Inspect
|
|
146
|
+
`~/Library/Caches/morvox/parecord.log`. If ffmpeg complains about
|
|
147
|
+
permissions, grant the terminal Microphone access.
|
|
148
|
+
|
|
149
|
+
- **No audio recorded / empty wav (Windows)**
|
|
150
|
+
List audio devices with `ffmpeg -list_devices true -f wasapi -i dummy`
|
|
151
|
+
(or `ffmpeg -list_devices true -f dshow -i dummy` if your ffmpeg build
|
|
152
|
+
lacks WASAPI) and pass an explicit `--source "<device name>"`. Inspect
|
|
153
|
+
`%LOCALAPPDATA%\morvox\parecord.log`. If ffmpeg cannot access the
|
|
154
|
+
microphone, check **Settings -> Privacy & security -> Microphone**.
|
|
155
|
+
|
|
156
|
+
- **Text typed into wrong window**
|
|
157
|
+
On Linux and macOS, the originally focused window/app may have been
|
|
158
|
+
destroyed before you stopped recording. morvox falls back to typing
|
|
159
|
+
into whatever is currently focused and prints a warning to stderr. On
|
|
160
|
+
Windows, morvox intentionally types into the window that is focused
|
|
161
|
+
when transcription finishes.
|
|
162
|
+
|
|
163
|
+
- **Linux Wayland: nothing is typed (GNOME/Ubuntu)**
|
|
164
|
+
GNOME/Mutter doesn't implement the `wtype` keyboard protocol and
|
|
165
|
+
`xdotool` is a no-op against native Wayland windows. Either set up
|
|
166
|
+
`ydotoold` (`sudo systemctl enable --now ydotoold` and add your user
|
|
167
|
+
to the `input` group), or install `wl-clipboard` so the transcript
|
|
168
|
+
lands on your clipboard for manual Ctrl+Shift+V. See
|
|
169
|
+
[`INSTALLATION.md` (Linux / Wayland)](https://github.com/morhook/morvox/blob/main/INSTALLATION.md#linux--wayland).
|
|
170
|
+
|
|
171
|
+
- **Linux: widget never appears (asdf/pyenv/conda Python)**
|
|
172
|
+
The widget runs as a Python subprocess and needs `tkinter`. Many
|
|
173
|
+
third-party Python builds ship without it. Check
|
|
174
|
+
`$XDG_RUNTIME_DIR/morvox/widget.log` or `/tmp/morvox-$UID/widget.log` for
|
|
175
|
+
`No module named 'tkinter'`. Install `python3-tk` and run morvox under the
|
|
176
|
+
system Python, rebuild your managed Python with Tk support, or use
|
|
177
|
+
`--no-widget` to silence the warning.
|
|
178
|
+
|
|
179
|
+
- **macOS: keystrokes silently do nothing**
|
|
180
|
+
Accessibility permission isn't granted. **System Settings → Privacy &
|
|
181
|
+
Security → Accessibility** → enable your terminal app.
|
|
182
|
+
|
|
183
|
+
- **Windows: text does not type into an elevated app**
|
|
184
|
+
Windows blocks lower-integrity processes from injecting keystrokes into
|
|
185
|
+
elevated/admin windows. Run morvox from an elevated terminal too, or type
|
|
186
|
+
into a non-elevated app.
|
|
187
|
+
|
|
188
|
+
- **Windows: transcript only appears on the clipboard**
|
|
189
|
+
On Windows 11, morvox first tries several automatic paste methods into the
|
|
190
|
+
currently focused window and then falls back to direct typing. If all of
|
|
191
|
+
those are blocked by the app or OS policy, morvox leaves the transcript on
|
|
192
|
+
the clipboard so you can paste it manually. Inspect
|
|
193
|
+
`%LOCALAPPDATA%\morvox\whisper.log` for a `windows-insert:` trace showing
|
|
194
|
+
which insertion path ran and what failed.
|
|
195
|
+
|
|
196
|
+
- **Whisper too slow**
|
|
197
|
+
Use a smaller model — `ggml-tiny.en.bin` is roughly 5× faster than
|
|
198
|
+
`base.en` with a small accuracy hit. Increase `--threads` up to your
|
|
199
|
+
physical core count.
|
|
200
|
+
|
|
201
|
+
- **Nothing is typed and notification says "Empty recording"**
|
|
202
|
+
Whisper produced only a noise token (e.g. `[BLANK_AUDIO]`). Speak
|
|
203
|
+
closer to the mic or check input gain.
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
MIT — see [LICENSE](https://github.com/morhook/morvox/blob/main/LICENSE).
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "morvox"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Offline push-to-talk voice-to-text CLI powered by whisper.cpp"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "morhook", email = "cruz.fernandez@gmail.com" },
|
|
15
|
+
]
|
|
16
|
+
keywords = ["cli", "dictation", "speech-to-text", "voice", "whisper.cpp"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 5 - Production/Stable",
|
|
19
|
+
"Environment :: Console",
|
|
20
|
+
"Intended Audience :: End Users/Desktop",
|
|
21
|
+
"Natural Language :: English",
|
|
22
|
+
"Operating System :: MacOS",
|
|
23
|
+
"Operating System :: Microsoft :: Windows :: Windows 11",
|
|
24
|
+
"Operating System :: POSIX :: Linux",
|
|
25
|
+
"Programming Language :: Python :: 3",
|
|
26
|
+
"Programming Language :: Python :: 3.10",
|
|
27
|
+
"Programming Language :: Python :: 3.11",
|
|
28
|
+
"Programming Language :: Python :: 3.12",
|
|
29
|
+
"Programming Language :: Python :: 3.13",
|
|
30
|
+
"Topic :: Multimedia :: Sound/Audio :: Capture/Recording",
|
|
31
|
+
"Topic :: Utilities",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/morhook/morvox"
|
|
36
|
+
Repository = "https://github.com/morhook/morvox"
|
|
37
|
+
Issues = "https://github.com/morhook/morvox/issues"
|
|
38
|
+
|
|
39
|
+
[project.scripts]
|
|
40
|
+
morvox = "morvox.__main__:main"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
morvox-1.0.0/setup.cfg
ADDED
|
File without changes
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""morvox.__main__ — argparse wiring, CLI dispatch, and entry-point main()."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from .constants import DEFAULT_MODEL
|
|
8
|
+
from .state import _state, is_recording, read_pid, _pid_file
|
|
9
|
+
from .commands import cmd_cancel, cmd_status
|
|
10
|
+
from .recording import cmd_recorder, cmd_start, cmd_stop
|
|
11
|
+
from .widget import cmd_widget
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
15
|
+
default_threads = max(1, (os.cpu_count() or 2) // 2)
|
|
16
|
+
p = argparse.ArgumentParser(
|
|
17
|
+
prog="morvox",
|
|
18
|
+
description=(
|
|
19
|
+
"Toggle audio capture and transcribe with whisper.cpp, then type the "
|
|
20
|
+
"transcription into a backend-selected target window."
|
|
21
|
+
),
|
|
22
|
+
)
|
|
23
|
+
p.add_argument("--model", default=DEFAULT_MODEL,
|
|
24
|
+
help=f"Path to whisper.cpp ggml model (default: {DEFAULT_MODEL})")
|
|
25
|
+
p.add_argument("--language", default="en", help="Whisper language code (default: en)")
|
|
26
|
+
p.add_argument("--threads", type=int, default=default_threads,
|
|
27
|
+
help=f"Whisper thread count (default: {default_threads})")
|
|
28
|
+
p.add_argument("--source", default=None,
|
|
29
|
+
help="Audio source/device name (default: system default)")
|
|
30
|
+
p.add_argument("--type-delay", type=int, default=1,
|
|
31
|
+
help="Delay between typed characters in ms (default: 1)")
|
|
32
|
+
p.add_argument("--keep-temp", action="store_true",
|
|
33
|
+
help="Keep temporary files after typing (default: delete)")
|
|
34
|
+
p.add_argument("--no-widget", action="store_true",
|
|
35
|
+
help="Disable the live recording widget (headless mode)")
|
|
36
|
+
p.add_argument("--target-window", default=None, metavar="HANDLE",
|
|
37
|
+
help=("Use a pre-captured target window handle "
|
|
38
|
+
"(useful for hotkey launchers that steal focus)"))
|
|
39
|
+
|
|
40
|
+
mode = p.add_mutually_exclusive_group()
|
|
41
|
+
mode.add_argument("--status", action="store_true",
|
|
42
|
+
help="Print 'recording' or 'idle' and exit")
|
|
43
|
+
mode.add_argument("--cancel", action="store_true",
|
|
44
|
+
help="Cancel any active recording without transcribing/typing")
|
|
45
|
+
# Internal entry point used when morvox spawns its own GUI subprocess.
|
|
46
|
+
mode.add_argument("--widget", action="store_true",
|
|
47
|
+
help=argparse.SUPPRESS)
|
|
48
|
+
mode.add_argument("--recorder", action="store_true",
|
|
49
|
+
help=argparse.SUPPRESS)
|
|
50
|
+
return p
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def main(argv: list[str] | None = None) -> int:
|
|
54
|
+
parser = build_parser()
|
|
55
|
+
args = parser.parse_args(argv)
|
|
56
|
+
|
|
57
|
+
# Ensure state dir exists for any sub-action.
|
|
58
|
+
_state()
|
|
59
|
+
|
|
60
|
+
if args.widget:
|
|
61
|
+
return cmd_widget()
|
|
62
|
+
if args.recorder:
|
|
63
|
+
return cmd_recorder(args)
|
|
64
|
+
if args.status:
|
|
65
|
+
return cmd_status()
|
|
66
|
+
if args.cancel:
|
|
67
|
+
return cmd_cancel(args)
|
|
68
|
+
|
|
69
|
+
# Toggle behavior.
|
|
70
|
+
if is_recording():
|
|
71
|
+
return cmd_stop(args)
|
|
72
|
+
else:
|
|
73
|
+
# Clean any stale leftovers from a previous failed run before starting.
|
|
74
|
+
if read_pid() is not None and not is_recording():
|
|
75
|
+
_pid_file().unlink(missing_ok=True)
|
|
76
|
+
return cmd_start(args)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
try:
|
|
81
|
+
sys.exit(main())
|
|
82
|
+
except KeyboardInterrupt:
|
|
83
|
+
sys.exit(130)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""morvox.backends — backend selection and singleton."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from .linux import LinuxX11Backend
|
|
7
|
+
from .macos import MacOSBackend
|
|
8
|
+
from .windows import WindowsBackend
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _make_backend():
|
|
12
|
+
override = os.environ.get("MORVOX_BACKEND")
|
|
13
|
+
if override == "x11":
|
|
14
|
+
return LinuxX11Backend()
|
|
15
|
+
if override == "macos":
|
|
16
|
+
return MacOSBackend()
|
|
17
|
+
if override in ("windows", "win32"):
|
|
18
|
+
return WindowsBackend()
|
|
19
|
+
if sys.platform == "darwin":
|
|
20
|
+
return MacOSBackend()
|
|
21
|
+
if sys.platform == "win32":
|
|
22
|
+
return WindowsBackend()
|
|
23
|
+
return LinuxX11Backend()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
BACKEND = _make_backend()
|