blurt 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
blurt-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Satya Borgohain
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ include LICENSE
2
+ include README.md
blurt-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.4
2
+ Name: blurt
3
+ Version: 0.1.0
4
+ Summary: Local speech-to-text for macOS — hold a hotkey, release to transcribe and, auto-paste
5
+ Author: Satya Borgohain
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/satyaborg/blurt
8
+ Keywords: speech-to-text,whisper,mlx,apple-silicon,macos,voice,transcription
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: MacOS X
11
+ Classifier: Intended Audience :: End Users/Desktop
12
+ Classifier: Operating System :: MacOS
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: mlx-whisper
19
+ Requires-Dist: sounddevice
20
+ Requires-Dist: pynput
21
+ Requires-Dist: numpy
22
+ Requires-Dist: rich
23
+ Dynamic: license-file
24
+
25
+ # Blurt
26
+
27
+ > [!NOTE]
28
+ > Only runs on macOS with Apple Silicon.
29
+
30
+ Hold right cmd ⌘, speak, release - text appears wherever your cursor is. Runs on-device via [MLX Whisper](https://github.com/ml-explore/mlx-examples/tree/main/whisper). No cloud, no API keys and forever free.
31
+
32
+ ## Setup
33
+
34
+ ```bash
35
+ pip install blurt
36
+ blurt
37
+ ```
38
+
39
+ First run downloads ~1.6 GB model.
40
+
41
+ macOS requires:
42
+ - **Microphone** access for your terminal
43
+ - **Accessibility** access for your terminal (System Settings → Privacy & Security)
44
+
45
+ ## License
46
+
47
+ MIT
blurt-0.1.0/README.md ADDED
@@ -0,0 +1,23 @@
1
+ # Blurt
2
+
3
+ > [!NOTE]
4
+ > Only runs on macOS with Apple Silicon.
5
+
6
+ Hold right cmd ⌘, speak, release - text appears wherever your cursor is. Runs on-device via [MLX Whisper](https://github.com/ml-explore/mlx-examples/tree/main/whisper). No cloud, no API keys and forever free.
7
+
8
+ ## Setup
9
+
10
+ ```bash
11
+ pip install blurt
12
+ blurt
13
+ ```
14
+
15
+ First run downloads ~1.6 GB model.
16
+
17
+ macOS requires:
18
+ - **Microphone** access for your terminal
19
+ - **Accessibility** access for your terminal (System Settings → Privacy & Security)
20
+
21
+ ## License
22
+
23
+ MIT
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.4
2
+ Name: blurt
3
+ Version: 0.1.0
4
+ Summary: Local speech-to-text for macOS — hold a hotkey, release to transcribe and, auto-paste
5
+ Author: Satya Borgohain
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/satyaborg/blurt
8
+ Keywords: speech-to-text,whisper,mlx,apple-silicon,macos,voice,transcription
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: MacOS X
11
+ Classifier: Intended Audience :: End Users/Desktop
12
+ Classifier: Operating System :: MacOS
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: mlx-whisper
19
+ Requires-Dist: sounddevice
20
+ Requires-Dist: pynput
21
+ Requires-Dist: numpy
22
+ Requires-Dist: rich
23
+ Dynamic: license-file
24
+
25
+ # Blurt
26
+
27
+ > [!NOTE]
28
+ > Only runs on macOS with Apple Silicon.
29
+
30
+ Hold right cmd ⌘, speak, release - text appears wherever your cursor is. Runs on-device via [MLX Whisper](https://github.com/ml-explore/mlx-examples/tree/main/whisper). No cloud, no API keys and forever free.
31
+
32
+ ## Setup
33
+
34
+ ```bash
35
+ pip install blurt
36
+ blurt
37
+ ```
38
+
39
+ First run downloads ~1.6 GB model.
40
+
41
+ macOS requires:
42
+ - **Microphone** access for your terminal
43
+ - **Accessibility** access for your terminal (System Settings → Privacy & Security)
44
+
45
+ ## License
46
+
47
+ MIT
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ blurt.py
5
+ pyproject.toml
6
+ blurt.egg-info/PKG-INFO
7
+ blurt.egg-info/SOURCES.txt
8
+ blurt.egg-info/dependency_links.txt
9
+ blurt.egg-info/entry_points.txt
10
+ blurt.egg-info/requires.txt
11
+ blurt.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ blurt = blurt:main
@@ -0,0 +1,5 @@
1
+ mlx-whisper
2
+ sounddevice
3
+ pynput
4
+ numpy
5
+ rich
@@ -0,0 +1 @@
1
+ blurt
blurt-0.1.0/blurt.py ADDED
@@ -0,0 +1,366 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Local speech-to-text blurt with MLX Whisper.
4
+ - Global hotkey to start/stop recording
5
+ - Transcribes locally on Apple Silicon via mlx-whisper
6
+ - Pastes into active input field
7
+ - Copies to clipboard
8
+ - Saves timestamped transcripts as JSONL
9
+ """
10
+
11
+ import sys
12
+ import json
13
+ import time
14
+ import wave
15
+ import subprocess
16
+ import threading
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+
20
+ import numpy as np
21
+ import sounddevice as sd
22
+ from pynput import keyboard
23
+ from rich.console import Console
24
+ from rich.panel import Panel
25
+ from rich.table import Table
26
+
27
+ console = Console()
28
+ __version__ = "0.1.0"
29
+
30
+ # --- Themes ---
31
+ THEMES = ["ocean", "vapor"]
32
+ THEME_COLORS = {
33
+ "ocean": {
34
+ "accent": "dodger_blue2",
35
+ "rec": "orange1",
36
+ "ok": "spring_green3",
37
+ "dim": "grey58",
38
+ "border": "dodger_blue2",
39
+ },
40
+ "vapor": {
41
+ "accent": "medium_purple1",
42
+ "rec": "hot_pink",
43
+ "ok": "orchid1",
44
+ "dim": "grey50",
45
+ "border": "medium_purple1",
46
+ },
47
+ }
48
+
49
+ THEME = "ocean"
50
+ C_ACCENT = C_REC = C_OK = C_DIM = C_BORDER = ""
51
+
52
+
53
+ def _apply_theme(name=None):
54
+ global THEME, C_ACCENT, C_REC, C_OK, C_DIM, C_BORDER
55
+ if name:
56
+ THEME = name
57
+ _t = THEME_COLORS[THEME]
58
+ C_ACCENT = _t["accent"]
59
+ C_REC = _t["rec"]
60
+ C_OK = _t["ok"]
61
+ C_DIM = _t["dim"]
62
+ C_BORDER = _t["border"]
63
+
64
+
65
+ _apply_theme()
66
+
67
+ # --- Config ---
68
+ MODEL = "mlx-community/whisper-large-v3-turbo" # Best accuracy. Alt: "mlx-community/whisper-base-mlx" for speed
69
+ HOTKEY = {keyboard.Key.cmd_r} # Right Cmd only. Alt: {keyboard.Key.cmd, keyboard.Key.shift}
70
+ SAMPLE_RATE = 16000
71
+ CHANNELS = 1
72
+ BLURT_DIR = Path.home() / ".blurt"
73
+ JSONL_PATH = BLURT_DIR / "blurts.jsonl"
74
+ AUDIO_DIR = BLURT_DIR / "audio"
75
+
76
+ # --- State ---
77
+ recording = False
78
+ audio_buffer = []
79
+ pressed_keys = set()
80
+ stream = None
81
+ lock = threading.Lock()
82
+ model_lock = threading.Lock()
83
+ whisper_pipe = None
84
+ rec_status = None
85
+ total_words = 0
86
+
87
+
88
+ def load_stats():
89
+ """Compute global stats from JSONL log."""
90
+ total_w = 0
91
+ total_dur = 0.0
92
+ count = 0
93
+ if JSONL_PATH.exists():
94
+ with open(JSONL_PATH) as f:
95
+ for line in f:
96
+ try:
97
+ e = json.loads(line)
98
+ total_w += e.get("words", 0)
99
+ total_dur += e.get("duration_s", 0)
100
+ count += 1
101
+ except json.JSONDecodeError:
102
+ continue
103
+ avg_wpm = (total_w / (total_dur / 60)) if total_dur > 0 else 0
104
+ return total_w, avg_wpm, count
105
+
106
+
107
+ def ensure_dirs():
108
+ BLURT_DIR.mkdir(exist_ok=True)
109
+ AUDIO_DIR.mkdir(exist_ok=True)
110
+
111
+
112
+ def _model_is_cached(repo_id: str) -> bool:
113
+ """Check if a HuggingFace model is already downloaded."""
114
+ try:
115
+ from huggingface_hub import scan_cache_dir
116
+ cache_info = scan_cache_dir()
117
+ return any(r.repo_id == repo_id for r in cache_info.repos)
118
+ except Exception:
119
+ return False
120
+
121
+
122
+ def load_model():
123
+ """Lazy-load mlx-whisper on first use."""
124
+ global whisper_pipe
125
+ with model_lock:
126
+ if whisper_pipe is None:
127
+ cached = _model_is_cached(MODEL)
128
+ if cached:
129
+ import huggingface_hub
130
+ huggingface_hub.utils.disable_progress_bars()
131
+ with console.status(f"Loading model: {MODEL}{'' if cached else ' (first run downloads ~1.6GB)'}..."):
132
+ import mlx_whisper
133
+ # Warm up with empty audio to trigger download/compile
134
+ dummy = np.zeros(SAMPLE_RATE, dtype=np.float32)
135
+ mlx_whisper.transcribe(dummy, path_or_hf_repo=MODEL, language="en")
136
+ whisper_pipe = mlx_whisper
137
+ if cached:
138
+ import huggingface_hub
139
+ huggingface_hub.utils.enable_progress_bars()
140
+ console.print(" [bold green]Model ready.[/bold green]")
141
+
142
+
143
+ def audio_callback(indata, frames, time_info, status):
144
+ if status:
145
+ console.print(f"Audio: {status}", style="yellow")
146
+ audio_buffer.append(indata.copy())
147
+
148
+
149
+ def start_recording():
150
+ global recording, stream, audio_buffer, rec_status
151
+ with lock:
152
+ if recording:
153
+ return
154
+ recording = True
155
+ audio_buffer = []
156
+ stream = sd.InputStream(
157
+ samplerate=SAMPLE_RATE,
158
+ channels=CHANNELS,
159
+ dtype="float32",
160
+ callback=audio_callback,
161
+ )
162
+ stream.start()
163
+ rec_status = console.status(f" [{C_REC}]Recording...[/{C_REC}]")
164
+ rec_status.start()
165
+
166
+
167
+ def _is_hallucination(segments):
168
+ """Detect Whisper hallucinations from segment-level signals."""
169
+ if not segments:
170
+ return False
171
+ # All segments are likely silence
172
+ if all(s.get("no_speech_prob", 0) > 0.6 for s in segments):
173
+ return True
174
+ # Low confidence + high compression = repetitive hallucination
175
+ for s in segments:
176
+ if s.get("avg_logprob", 0) < -1.0 and s.get("compression_ratio", 0) > 2.4:
177
+ return True
178
+ return False
179
+
180
+
181
+ def stop_recording():
182
+ global recording, stream, rec_status
183
+ with lock:
184
+ if not recording:
185
+ return
186
+ recording = False
187
+ if rec_status:
188
+ rec_status.stop()
189
+ rec_status = None
190
+ if stream:
191
+ stream.stop()
192
+ stream.close()
193
+ stream = None
194
+
195
+ if not audio_buffer:
196
+ return
197
+
198
+ audio_data = np.concatenate(audio_buffer, axis=0).flatten()
199
+ duration_s = round(len(audio_data) / SAMPLE_RATE, 2)
200
+
201
+ if duration_s < 0.5:
202
+ return
203
+
204
+ t0 = time.monotonic()
205
+
206
+ ts = datetime.now(timezone.utc)
207
+ wav_path = AUDIO_DIR / f"{ts.strftime('%Y%m%d_%H%M%S')}.wav"
208
+ save_wav(wav_path, audio_data)
209
+
210
+ with console.status(f" [{C_ACCENT}]Transcribing...[/{C_ACCENT}]"):
211
+ load_model()
212
+ with model_lock:
213
+ result = whisper_pipe.transcribe(
214
+ audio_data,
215
+ path_or_hf_repo=MODEL,
216
+ language="en",
217
+ condition_on_previous_text=False,
218
+ )
219
+
220
+ latency_ms = round((time.monotonic() - t0) * 1000)
221
+
222
+ text = result["text"].strip()
223
+ segments = result.get("segments", [])
224
+
225
+ if not text or _is_hallucination(segments):
226
+ return
227
+
228
+ global total_words
229
+ word_count = len(text.split())
230
+ total_words += word_count
231
+ copy_to_clipboard(text)
232
+ paste_to_active()
233
+
234
+ entry = {
235
+ "ts": ts.isoformat(),
236
+ "text": text,
237
+ "audio": str(wav_path),
238
+ "duration_s": duration_s,
239
+ "words": word_count,
240
+ }
241
+ with open(JSONL_PATH, "a") as f:
242
+ f.write(json.dumps(entry) + "\n")
243
+ preview = text[:60] + ("..." if len(text) > 60 else "")
244
+ console.print(
245
+ f" [{C_OK}]\u2713[/{C_OK}] \"{preview}\" "
246
+ f"[{C_DIM}]{latency_ms}ms[/{C_DIM}]"
247
+ )
248
+
249
+
250
+ def save_wav(path: Path, audio: np.ndarray):
251
+ audio_int16 = (audio * 32767).astype(np.int16)
252
+ with wave.open(str(path), "w") as wf:
253
+ wf.setnchannels(CHANNELS)
254
+ wf.setsampwidth(2)
255
+ wf.setframerate(SAMPLE_RATE)
256
+ wf.writeframes(audio_int16.tobytes())
257
+
258
+
259
+ def copy_to_clipboard(text: str):
260
+ process = subprocess.Popen(["pbcopy"], stdin=subprocess.PIPE)
261
+ process.communicate(text.encode("utf-8"))
262
+
263
+
264
+ def paste_to_active():
265
+ """Simulate Cmd+V to paste into whatever input is focused."""
266
+ time.sleep(0.15)
267
+ subprocess.run([
268
+ "osascript", "-e",
269
+ 'tell application "System Events" to keystroke "v" using command down',
270
+ ])
271
+
272
+
273
+ # --- Hotkey handling ---
274
+ # pynput reports cmd_l/cmd_r/shift_l/shift_r specifically; normalize to generic keys
275
+ _KEY_NORMALIZE = {
276
+ keyboard.Key.cmd_l: keyboard.Key.cmd,
277
+ keyboard.Key.shift_l: keyboard.Key.shift,
278
+ keyboard.Key.shift_r: keyboard.Key.shift,
279
+ keyboard.Key.ctrl_l: keyboard.Key.ctrl,
280
+ keyboard.Key.ctrl_r: keyboard.Key.ctrl,
281
+ keyboard.Key.alt_l: keyboard.Key.alt,
282
+ keyboard.Key.alt_r: keyboard.Key.alt,
283
+ }
284
+
285
+
286
+ def _normalize(key):
287
+ return _KEY_NORMALIZE.get(key, key)
288
+
289
+
290
+ def on_press(key):
291
+ if key == keyboard.Key.esc:
292
+ console.print(f"\n [{C_DIM}]bye[/{C_DIM}]")
293
+ return False
294
+ pressed_keys.add(_normalize(key))
295
+ if HOTKEY.issubset(pressed_keys):
296
+ if not recording:
297
+ threading.Thread(target=start_recording, daemon=True).start()
298
+
299
+
300
+ def on_release(key):
301
+ pressed_keys.discard(_normalize(key))
302
+ if recording and not HOTKEY.issubset(pressed_keys):
303
+ threading.Thread(target=stop_recording, daemon=True).start()
304
+
305
+
306
+ def main():
307
+ if "--version" in sys.argv:
308
+ print(f"blurt {__version__}")
309
+ return
310
+
311
+ if sys.platform != "darwin":
312
+ print("blurt requires macOS (uses pbcopy, osascript, and MLX for Apple Silicon)")
313
+ sys.exit(1)
314
+
315
+ global total_words
316
+ ensure_dirs()
317
+
318
+ hist_words, hist_wpm, hist_count = load_stats()
319
+ total_words = hist_words
320
+
321
+ _KEY_NAMES = {
322
+ "cmd": "\u2318", "cmd_l": "Left \u2318", "cmd_r": "Right \u2318",
323
+ "ctrl": "\u2303", "ctrl_l": "Left \u2303", "ctrl_r": "Right \u2303",
324
+ "alt": "\u2325", "alt_l": "Left \u2325", "alt_r": "Right \u2325",
325
+ "shift": "\u21e7", "shift_l": "Left \u21e7", "shift_r": "Right \u21e7",
326
+ }
327
+ hotkey_str = "+".join(
328
+ _KEY_NAMES.get(k.name, k.name) if hasattr(k, "name") else str(k)
329
+ for k in HOTKEY
330
+ )
331
+ logo_art = (
332
+ "░█▀▄░█░░░█░█░█▀▄░▀█▀\n"
333
+ "░█▀▄░█░░░█░█░█▀▄░░█░\n"
334
+ "░▀▀░░▀▀▀░▀▀▀░▀░▀░░▀░"
335
+ )
336
+ logo = f"[{C_ACCENT}]{logo_art}[/{C_ACCENT}]\n[{C_DIM}]v{__version__}[/{C_DIM}]"
337
+
338
+ info = Table.grid(padding=(0, 2))
339
+ info.add_column(style=f"bold {C_ACCENT}", justify="right")
340
+ info.add_column()
341
+ info.add_row("hotkey", hotkey_str)
342
+ info.add_row("model", MODEL.split("/")[-1])
343
+ info.add_row("log", str(JSONL_PATH))
344
+ info.add_row("audio", str(AUDIO_DIR))
345
+
346
+ console.print()
347
+ console.print(Panel(logo, border_style=C_BORDER, padding=(1, 3)))
348
+ console.print(info)
349
+
350
+ if hist_count > 0:
351
+ console.print(
352
+ f"\n [{C_ACCENT}]stats[/{C_ACCENT}] "
353
+ f"{hist_words} words \u2022 {hist_wpm:.0f} avg wpm \u2022 {hist_count} blurts"
354
+ )
355
+
356
+ console.print(f"\n [{C_DIM}]esc quit \u2022 hold hotkey to record[/{C_DIM}]\n")
357
+
358
+ # Pre-load model in background
359
+ threading.Thread(target=load_model, daemon=True).start()
360
+
361
+ with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
362
+ listener.join()
363
+
364
+
365
+ if __name__ == "__main__":
366
+ main()
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "blurt"
7
+ dynamic = ["version"]
8
+ description = "Local speech-to-text for macOS — hold a hotkey, release to transcribe and, auto-paste"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [{ name = "Satya Borgohain" }]
13
+ keywords = ["speech-to-text", "whisper", "mlx", "apple-silicon", "macos", "voice", "transcription"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Environment :: MacOS X",
17
+ "Intended Audience :: End Users/Desktop",
18
+ "Operating System :: MacOS",
19
+ "Programming Language :: Python :: 3",
20
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
21
+ ]
22
+ dependencies = [
23
+ "mlx-whisper",
24
+ "sounddevice",
25
+ "pynput",
26
+ "numpy",
27
+ "rich",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/satyaborg/blurt"
32
+
33
+ [project.scripts]
34
+ blurt = "blurt:main"
35
+
36
+ [tool.setuptools]
37
+ py-modules = ["blurt"]
38
+
39
+ [tool.setuptools.dynamic]
40
+ version = {attr = "blurt.__version__"}
blurt-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+