birdword 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ name: CI / Publish
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ tags: ["v*"]
7
+ pull_request:
8
+
9
+ jobs:
10
+ check:
11
+ runs-on: macos-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: astral-sh/setup-uv@v5
15
+ - run: uv sync
16
+ - run: uv run python -c "from birdword.cli import main; print('OK')"
17
+
18
+ publish:
19
+ if: startsWith(github.ref, 'refs/tags/v')
20
+ needs: check
21
+ runs-on: ubuntu-latest
22
+ environment: pypi
23
+ permissions:
24
+ contents: read
25
+ id-token: write
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: astral-sh/setup-uv@v5
29
+ - run: uv build
30
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,10 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
@@ -0,0 +1 @@
1
+ 3.13
birdword-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Till Hoffmann
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,99 @@
1
+ Metadata-Version: 2.4
2
+ Name: birdword
3
+ Version: 0.1.0
4
+ Summary: Voice dictation daemon using NVIDIA Parakeet on Apple Silicon
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: mlx-audio
9
+ Requires-Dist: numpy
10
+ Requires-Dist: pyobjc-core
11
+ Requires-Dist: pyobjc-framework-cocoa
12
+ Requires-Dist: pyobjc-framework-quartz
13
+ Requires-Dist: sounddevice
14
+ Description-Content-Type: text/markdown
15
+
16
+ # 🐦 Birdword
17
+
18
+ Contextual voice dictation for macOS. Powered by [NVIDIA Parakeet](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) running locally on Apple Silicon via [MLX](https://github.com/ml-explore/mlx).
19
+
20
+ Press a hotkey, speak, and your words are transcribed and pasted into whatever app is focused. A small LLM (Qwen2.5-0.5B) post-processes the transcription to fix errors, optionally using project-specific context from a `BIRDWORD.md` file.
21
+
22
+ ## Install
23
+
24
+ ```
25
+ pip install birdword
26
+ ```
27
+
28
+ Or run without installing:
29
+
30
+ ```
31
+ uvx birdword
32
+ ```
33
+
34
+ Requires macOS on Apple Silicon (M1+) and Python 3.10+.
35
+
36
+ ## Usage
37
+
38
+ ```bash
39
+ # Run in the foreground
40
+ birdword
41
+
42
+ # Run in the background
43
+ birdword start
44
+ birdword stop
45
+ birdword status
46
+ ```
47
+
48
+ ### Hotkeys
49
+
50
+ | Action | Default |
51
+ |---|---|
52
+ | Toggle recording | Right ⌘ + Space |
53
+ | Hold to record | Hold Right ⌘ for >1s, release to transcribe |
54
+
55
+ ### Options
56
+
57
+ ```
58
+ --model MODEL Transcription model (default: mlx-community/parakeet-tdt-0.6b-v2)
59
+ --fix-model MODEL Post-processor model (default: mlx-community/Qwen2.5-0.5B-Instruct-4bit)
60
+ --no-fix Disable LLM post-processing
61
+ --hold-key KEY Hold key (default: rcmd). Options: rcmd, lcmd, ralt, lalt, rshift, lshift, rctrl, lctrl
62
+ --toggle-key KEY Toggle key (default: space). Options: space, return, tab, escape
63
+ ```
64
+
65
+ ## Permissions
66
+
67
+ Birdword needs three macOS permissions, granted to your terminal app:
68
+
69
+ - **Microphone** — to record your voice
70
+ - **Accessibility** — to paste text and intercept the hotkey
71
+ - **Input Monitoring** — to detect the global hotkey
72
+
73
+ Birdword checks these on startup and tells you what's missing.
74
+
75
+ ## Context-aware correction
76
+
77
+ Drop a `BIRDWORD.md` file in your project directory with domain-specific terms, names, and jargon:
78
+
79
+ ```markdown
80
+ This is a Rust networking project using tokio and hyper.
81
+
82
+ Key terms: epoll, mio, AsyncRead, TcpListener
83
+ Names: Till
84
+ ```
85
+
86
+ When you dictate into a Terminal tab whose shell is in that directory (or a child), birdword feeds this context to the post-processor so it knows not to "correct" your domain terms.
87
+
88
+ ## Menu bar
89
+
90
+ Birdword shows a bird icon in the menu bar:
91
+
92
+ - **White** — idle
93
+ - **Yellow** — connecting mic
94
+ - **Red** — listening
95
+ - **✨ Sparkles** — transcribing
96
+
97
+ ## License
98
+
99
+ MIT
@@ -0,0 +1,84 @@
1
+ # 🐦 Birdword
2
+
3
+ Contextual voice dictation for macOS. Powered by [NVIDIA Parakeet](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) running locally on Apple Silicon via [MLX](https://github.com/ml-explore/mlx).
4
+
5
+ Press a hotkey, speak, and your words are transcribed and pasted into whatever app is focused. A small LLM (Qwen2.5-0.5B) post-processes the transcription to fix errors, optionally using project-specific context from a `BIRDWORD.md` file.
6
+
7
+ ## Install
8
+
9
+ ```
10
+ pip install birdword
11
+ ```
12
+
13
+ Or run without installing:
14
+
15
+ ```
16
+ uvx birdword
17
+ ```
18
+
19
+ Requires macOS on Apple Silicon (M1+) and Python 3.10+.
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ # Run in the foreground
25
+ birdword
26
+
27
+ # Run in the background
28
+ birdword start
29
+ birdword stop
30
+ birdword status
31
+ ```
32
+
33
+ ### Hotkeys
34
+
35
+ | Action | Default |
36
+ |---|---|
37
+ | Toggle recording | Right ⌘ + Space |
38
+ | Hold to record | Hold Right ⌘ for >1s, release to transcribe |
39
+
40
+ ### Options
41
+
42
+ ```
43
+ --model MODEL Transcription model (default: mlx-community/parakeet-tdt-0.6b-v2)
44
+ --fix-model MODEL Post-processor model (default: mlx-community/Qwen2.5-0.5B-Instruct-4bit)
45
+ --no-fix Disable LLM post-processing
46
+ --hold-key KEY Hold key (default: rcmd). Options: rcmd, lcmd, ralt, lalt, rshift, lshift, rctrl, lctrl
47
+ --toggle-key KEY Toggle key (default: space). Options: space, return, tab, escape
48
+ ```
49
+
50
+ ## Permissions
51
+
52
+ Birdword needs three macOS permissions, granted to your terminal app:
53
+
54
+ - **Microphone** — to record your voice
55
+ - **Accessibility** — to paste text and intercept the hotkey
56
+ - **Input Monitoring** — to detect the global hotkey
57
+
58
+ Birdword checks these on startup and tells you what's missing.
59
+
60
+ ## Context-aware correction
61
+
62
+ Drop a `BIRDWORD.md` file in your project directory with domain-specific terms, names, and jargon:
63
+
64
+ ```markdown
65
+ This is a Rust networking project using tokio and hyper.
66
+
67
+ Key terms: epoll, mio, AsyncRead, TcpListener
68
+ Names: Till
69
+ ```
70
+
71
+ When you dictate into a Terminal tab whose shell is in that directory (or a child), birdword feeds this context to the post-processor so it knows not to "correct" your domain terms.
72
+
73
+ ## Menu bar
74
+
75
+ Birdword shows a bird icon in the menu bar:
76
+
77
+ - **White** — idle
78
+ - **Yellow** — connecting mic
79
+ - **Red** — listening
80
+ - **✨ Sparkles** — transcribing
81
+
82
+ ## License
83
+
84
+ MIT
@@ -0,0 +1,28 @@
1
+ [project]
2
+ name = "birdword"
3
+ version = "0.1.0"
4
+ description = "Voice dictation daemon using NVIDIA Parakeet on Apple Silicon"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ dependencies = [
9
+ "mlx-audio",
10
+ "sounddevice",
11
+ "numpy",
12
+ "pyobjc-core",
13
+ "pyobjc-framework-Cocoa",
14
+ "pyobjc-framework-Quartz",
15
+ ]
16
+
17
+ [tool.uv]
18
+ package = true
19
+
20
+ [build-system]
21
+ requires = ["hatchling"]
22
+ build-backend = "hatchling.build"
23
+
24
+ [tool.hatch.build.targets.wheel]
25
+ packages = ["src/birdword"]
26
+
27
+ [project.scripts]
28
+ birdword = "birdword.cli:main"
File without changes
@@ -0,0 +1,5 @@
1
+ """Allow running as python -m birdword."""
2
+
3
+ from birdword.cli import main
4
+
5
+ main()
@@ -0,0 +1,205 @@
1
+ """CLI entry point for birdword."""
2
+
3
+ import argparse
4
+ import os
5
+ import signal
6
+ import subprocess
7
+ import sys
8
+
9
+ PIDFILE = os.path.expanduser("~/.birdword.pid")
10
+
11
+
12
+ def _read_pid() -> int | None:
13
+ """Read PID from pidfile, return None if stale or missing."""
14
+ try:
15
+ with open(PIDFILE) as f:
16
+ pid = int(f.read().strip())
17
+ # Check if process is actually running
18
+ os.kill(pid, 0)
19
+ return pid
20
+ except (FileNotFoundError, ValueError, ProcessLookupError, PermissionError):
21
+ # Clean up stale pidfile
22
+ try:
23
+ os.unlink(PIDFILE)
24
+ except FileNotFoundError:
25
+ pass
26
+ return None
27
+
28
+
29
+ def _write_pid():
30
+ """Write current PID to pidfile."""
31
+ with open(PIDFILE, "w") as f:
32
+ f.write(str(os.getpid()))
33
+
34
+
35
+ def _remove_pid():
36
+ """Remove pidfile."""
37
+ try:
38
+ os.unlink(PIDFILE)
39
+ except FileNotFoundError:
40
+ pass
41
+
42
+
43
+ def _check_permissions() -> bool:
44
+ """Check permissions, blocking until resolved."""
45
+ from birdword.permissions import verify_permissions
46
+
47
+ if not verify_permissions():
48
+ print(" Fix permissions above, then try again.")
49
+ return False
50
+ return True
51
+
52
+
53
+ def _run_daemon(args):
54
+ """Run the daemon (blocking). Enforces singleton."""
55
+ existing = _read_pid()
56
+ if existing is not None:
57
+ print(f"🐦 Birdword is already running (pid {existing}).")
58
+ sys.exit(1)
59
+
60
+ if not _check_permissions():
61
+ sys.exit(1)
62
+
63
+ _write_pid()
64
+ try:
65
+ from birdword.daemon import Daemon
66
+
67
+ daemon = Daemon(
68
+ model_id=args.model,
69
+ fix_model_id=args.fix_model,
70
+ no_fix=args.no_fix,
71
+ hold_key=args.hold_key,
72
+ toggle_key=args.toggle_key,
73
+ )
74
+ daemon.run()
75
+ finally:
76
+ _remove_pid()
77
+
78
+
79
+ def _cmd_start(args):
80
+ """Start birdword in the background."""
81
+ existing = _read_pid()
82
+ if existing is not None:
83
+ print(f"🐦 Birdword is already running (pid {existing}).")
84
+ return
85
+
86
+ # Check permissions in the foreground first
87
+ if not _check_permissions():
88
+ sys.exit(1)
89
+
90
+ print("🐦 Starting birdword in the background...")
91
+
92
+ # Build the command to run ourselves in blocking mode
93
+ cmd = [sys.executable, "-m", "birdword"]
94
+ if args.model:
95
+ cmd += ["--model", args.model]
96
+ if args.fix_model:
97
+ cmd += ["--fix-model", args.fix_model]
98
+ if args.no_fix:
99
+ cmd.append("--no-fix")
100
+ if args.hold_key != "rcmd":
101
+ cmd += ["--hold-key", args.hold_key]
102
+ if args.toggle_key != "space":
103
+ cmd += ["--toggle-key", args.toggle_key]
104
+
105
+ # Launch detached subprocess
106
+ log_path = os.path.expanduser("~/.birdword.log")
107
+ log_file = open(log_path, "a")
108
+
109
+ proc = subprocess.Popen(
110
+ cmd,
111
+ stdout=log_file,
112
+ stderr=log_file,
113
+ stdin=subprocess.DEVNULL,
114
+ start_new_session=True,
115
+ )
116
+
117
+ # Wait briefly to make sure it didn't crash immediately
118
+ try:
119
+ proc.wait(timeout=2)
120
+ # If we get here, the process exited
121
+ print(f" ❌ Failed to start. Check {log_path}")
122
+ sys.exit(1)
123
+ except subprocess.TimeoutExpired:
124
+ pass # Still running — good
125
+
126
+ print(f" ✅ Started (pid {proc.pid}).")
127
+ print(f" 📄 Logs: {log_path}")
128
+
129
+
130
+ def _cmd_stop(args):
131
+ """Stop birdword."""
132
+ pid = _read_pid()
133
+ if pid is None:
134
+ print("🐦 Birdword is not running.")
135
+ return
136
+
137
+ print(f"🐦 Stopping birdword (pid {pid})...")
138
+ try:
139
+ os.kill(pid, signal.SIGTERM)
140
+ print(" ✅ Stopped.")
141
+ except ProcessLookupError:
142
+ print(" ⚠️ Process already gone.")
143
+ _remove_pid()
144
+
145
+
146
+ def _cmd_status(args):
147
+ """Check if birdword is running."""
148
+ pid = _read_pid()
149
+ if pid is not None:
150
+ print(f"🐦 Birdword is running (pid {pid}).")
151
+ else:
152
+ print("🐦 Birdword is not running.")
153
+
154
+
155
+ def main():
156
+ parser = argparse.ArgumentParser(
157
+ description="Voice dictation using Parakeet on Apple Silicon"
158
+ )
159
+ parser.add_argument(
160
+ "--model",
161
+ default=None,
162
+ help="Transcription model (default: mlx-community/parakeet-tdt-0.6b-v2)",
163
+ )
164
+ parser.add_argument(
165
+ "--fix-model",
166
+ default=None,
167
+ help="Post-processor model (default: mlx-community/Qwen2.5-0.5B-Instruct-4bit)",
168
+ )
169
+ parser.add_argument(
170
+ "--no-fix",
171
+ action="store_true",
172
+ help="Disable LLM post-processing of transcription",
173
+ )
174
+ parser.add_argument(
175
+ "--hold-key",
176
+ default="rcmd",
177
+ help="Hold key for record (default: rcmd). Options: rcmd, lcmd, ralt, lalt",
178
+ )
179
+ parser.add_argument(
180
+ "--toggle-key",
181
+ default="space",
182
+ help="Toggle key pressed with hold key (default: space)",
183
+ )
184
+
185
+ sub = parser.add_subparsers(dest="command")
186
+
187
+ sub.add_parser("start", help="Start birdword in the background")
188
+ sub.add_parser("stop", help="Stop birdword")
189
+ sub.add_parser("status", help="Check if birdword is running")
190
+
191
+ args = parser.parse_args()
192
+
193
+ if args.command == "start":
194
+ _cmd_start(args)
195
+ elif args.command == "stop":
196
+ _cmd_stop(args)
197
+ elif args.command == "status":
198
+ _cmd_status(args)
199
+ else:
200
+ # No subcommand — run blocking (default)
201
+ _run_daemon(args)
202
+
203
+
204
+ if __name__ == "__main__":
205
+ main()
@@ -0,0 +1,114 @@
1
+ """Detect the focused app and resolve project context."""
2
+
3
+ import os
4
+ import subprocess
5
+
6
+ import AppKit
7
+
8
+
9
+ def get_frontmost_app() -> tuple[str, str]:
10
+ """Return (bundle_id, app_name) of the frontmost application."""
11
+ workspace = AppKit.NSWorkspace.sharedWorkspace()
12
+ app = workspace.frontmostApplication()
13
+ return (app.bundleIdentifier() or "", app.localizedName() or "")
14
+
15
+
16
+ def get_terminal_cwd() -> str | None:
17
+ """Get the cwd of the shell in the frontmost Terminal.app tab.
18
+
19
+ Only called when Terminal.app is the focused app.
20
+ Requires Automation permission for Terminal.app (prompts once).
21
+ """
22
+ try:
23
+ tty = subprocess.run(
24
+ [
25
+ "osascript",
26
+ "-e",
27
+ 'tell application "Terminal" to tty of selected tab of front window',
28
+ ],
29
+ capture_output=True,
30
+ text=True,
31
+ timeout=5,
32
+ )
33
+ if tty.returncode != 0 or not tty.stdout.strip():
34
+ return None
35
+
36
+ tty_name = tty.stdout.strip()
37
+ # Strip /dev/ prefix for ps
38
+ tty_short = tty_name.replace("/dev/", "")
39
+
40
+ ps = subprocess.run(
41
+ ["ps", "-t", tty_short, "-o", "pid=,comm="],
42
+ capture_output=True,
43
+ text=True,
44
+ timeout=5,
45
+ )
46
+ if ps.returncode != 0:
47
+ return None
48
+
49
+ # Find the shell process
50
+ pid = None
51
+ for line in ps.stdout.strip().splitlines():
52
+ parts = line.split()
53
+ if len(parts) >= 2 and any(
54
+ sh in parts[-1] for sh in ("zsh", "bash", "fish")
55
+ ):
56
+ pid = parts[0]
57
+ break
58
+
59
+ if pid is None:
60
+ return None
61
+
62
+ # Get the cwd of that process
63
+ lsof = subprocess.run(
64
+ ["lsof", "-a", "-p", pid, "-d", "cwd", "-Fn"],
65
+ capture_output=True,
66
+ text=True,
67
+ timeout=5,
68
+ )
69
+ for line in lsof.stdout.strip().splitlines():
70
+ if line.startswith("n/"):
71
+ return line[1:]
72
+
73
+ except Exception:
74
+ pass
75
+
76
+ return None
77
+
78
+
79
+ def find_context_file(start_dir: str) -> str | None:
80
+ """Walk up from start_dir looking for a BIRDWORD.md file."""
81
+ current = os.path.abspath(start_dir)
82
+ while True:
83
+ candidate = os.path.join(current, "BIRDWORD.md")
84
+ if os.path.isfile(candidate):
85
+ return candidate
86
+ parent = os.path.dirname(current)
87
+ if parent == current:
88
+ break
89
+ current = parent
90
+ return None
91
+
92
+
93
+ def get_context() -> tuple[str, str | None]:
94
+ """Get current context: (app_name, BIRDWORD.md contents or None).
95
+
96
+ Only queries Terminal.app for cwd when Terminal is the focused app.
97
+ """
98
+ bundle_id, app_name = get_frontmost_app()
99
+
100
+ context_content = None
101
+
102
+ # Only probe Terminal cwd when Terminal is actually focused
103
+ if bundle_id == "com.apple.Terminal":
104
+ cwd = get_terminal_cwd()
105
+ if cwd:
106
+ context_file = find_context_file(cwd)
107
+ if context_file:
108
+ try:
109
+ with open(context_file) as f:
110
+ context_content = f.read()
111
+ except Exception:
112
+ pass
113
+
114
+ return app_name, context_content