python-voiceio 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {python_voiceio-0.3.0/python_voiceio.egg-info → python_voiceio-0.3.2}/PKG-INFO +10 -3
  2. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/README.md +9 -2
  3. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/pyproject.toml +1 -1
  4. {python_voiceio-0.3.0 → python_voiceio-0.3.2/python_voiceio.egg-info}/PKG-INFO +10 -3
  5. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_llm_api.py +59 -1
  6. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_tts.py +2 -2
  7. python_voiceio-0.3.2/voiceio/__init__.py +1 -0
  8. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/config.py +1 -1
  9. python_voiceio-0.3.2/voiceio/llm_api.py +183 -0
  10. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/numbers.py +9 -0
  11. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/service.py +1 -0
  12. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tts/edge_engine.py +14 -1
  13. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tts/piper_engine.py +3 -2
  14. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/wizard.py +310 -167
  15. python_voiceio-0.3.0/voiceio/__init__.py +0 -1
  16. python_voiceio-0.3.0/voiceio/llm_api.py +0 -130
  17. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/LICENSE +0 -0
  18. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/python_voiceio.egg-info/SOURCES.txt +0 -0
  19. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/python_voiceio.egg-info/dependency_links.txt +0 -0
  20. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/python_voiceio.egg-info/entry_points.txt +0 -0
  21. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/python_voiceio.egg-info/requires.txt +0 -0
  22. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/python_voiceio.egg-info/top_level.txt +0 -0
  23. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/setup.cfg +0 -0
  24. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_app_wiring.py +0 -0
  25. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_autocorrect.py +0 -0
  26. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_backend_probes.py +0 -0
  27. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_clipboard_read.py +0 -0
  28. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_commands.py +0 -0
  29. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_config.py +0 -0
  30. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_corrections.py +0 -0
  31. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_fallback.py +0 -0
  32. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_health.py +0 -0
  33. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_hints.py +0 -0
  34. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_history.py +0 -0
  35. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_ibus_typer.py +0 -0
  36. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_llm.py +0 -0
  37. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_numbers.py +0 -0
  38. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_platform.py +0 -0
  39. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_postprocess.py +0 -0
  40. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_prebuffer.py +0 -0
  41. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_prompt.py +0 -0
  42. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_recorder_integration.py +0 -0
  43. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_streaming.py +0 -0
  44. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_transcriber.py +0 -0
  45. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_vad.py +0 -0
  46. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_vocabulary.py +0 -0
  47. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/tests/test_wordfreq.py +0 -0
  48. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/__main__.py +0 -0
  49. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/app.py +0 -0
  50. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/autocorrect.py +0 -0
  51. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/backends.py +0 -0
  52. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/cli.py +0 -0
  53. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/clipboard_read.py +0 -0
  54. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/commands.py +0 -0
  55. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/corrections.py +0 -0
  56. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/demo.py +0 -0
  57. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/feedback.py +0 -0
  58. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/health.py +0 -0
  59. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/hints.py +0 -0
  60. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/history.py +0 -0
  61. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/hotkeys/__init__.py +0 -0
  62. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/hotkeys/base.py +0 -0
  63. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/hotkeys/chain.py +0 -0
  64. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/hotkeys/evdev.py +0 -0
  65. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/hotkeys/pynput_backend.py +0 -0
  66. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/hotkeys/socket_backend.py +0 -0
  67. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/ibus/__init__.py +0 -0
  68. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/ibus/engine.py +0 -0
  69. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/llm.py +0 -0
  70. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/models/__init__.py +0 -0
  71. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/models/silero_vad.onnx +0 -0
  72. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/pidlock.py +0 -0
  73. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/platform.py +0 -0
  74. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/postprocess.py +0 -0
  75. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/prompt.py +0 -0
  76. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/recorder.py +0 -0
  77. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/sounds/__init__.py +0 -0
  78. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/sounds/commit.wav +0 -0
  79. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/sounds/start.wav +0 -0
  80. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/sounds/stop.wav +0 -0
  81. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/streaming.py +0 -0
  82. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/transcriber.py +0 -0
  83. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tray/__init__.py +0 -0
  84. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tray/_icons.py +0 -0
  85. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tray/_indicator.py +0 -0
  86. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tray/_pystray.py +0 -0
  87. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tts/__init__.py +0 -0
  88. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tts/base.py +0 -0
  89. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tts/chain.py +0 -0
  90. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tts/espeak.py +0 -0
  91. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/tts/player.py +0 -0
  92. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/__init__.py +0 -0
  93. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/base.py +0 -0
  94. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/chain.py +0 -0
  95. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/clipboard.py +0 -0
  96. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/ibus.py +0 -0
  97. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/pynput_type.py +0 -0
  98. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/wtype.py +0 -0
  99. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/xdotool.py +0 -0
  100. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/typers/ydotool.py +0 -0
  101. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/vad.py +0 -0
  102. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/vocabulary.py +0 -0
  103. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/wordfreq.py +0 -0
  104. {python_voiceio-0.3.0 → python_voiceio-0.3.2}/voiceio/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-voiceio
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Speak → text, locally, instantly.
5
5
  Author: Hugo Montenegro
6
6
  License-Expression: MIT
@@ -56,6 +56,7 @@ Dynamic: license-file
56
56
  [![PyPI](https://img.shields.io/pypi/v/python-voiceio)](https://pypi.org/project/python-voiceio/)
57
57
  [![Python](https://img.shields.io/pypi/pyversions/python-voiceio)](https://pypi.org/project/python-voiceio/)
58
58
  [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
59
+ [![Downloads](https://img.shields.io/pepy/dt/python-voiceio)](https://pepy.tech/projects/python-voiceio)
59
60
 
60
61
  Speak → text, locally, instantly.
61
62
 
@@ -153,6 +154,10 @@ Press your hotkey to start recording (1s pre-buffer catches the first syllable).
153
154
  - **Works everywhere**: IBus input method for GUI apps, clipboard for terminals
154
155
  - **Wayland + X11**: evdev hotkeys work on both, no root required
155
156
  - **Pre-buffer**: never miss the first syllable
157
+ - **Voice commands**: "new line", "comma", "scratch that", punctuation by name
158
+ - **Autocorrect**: LLM-powered review of recurring Whisper mistakes (`voiceio correct`)
159
+ - **Text-to-speech**: hear selected text spoken back (Piper, eSpeak, Edge TTS)
160
+ - **Smart post-processing**: numbers ("twenty five" → "25"), punctuation, capitalization
156
161
  - **Auto-healing**: falls back to the next working backend if one fails
157
162
  - **Autostart**: optional systemd service, restarts on crash
158
163
  - **Self-diagnosing**: `voiceio doctor` checks everything, `--fix` repairs it
@@ -176,7 +181,10 @@ voiceio Start the daemon
176
181
  voiceio setup Interactive setup wizard
177
182
  voiceio doctor Health check (--fix to auto-repair)
178
183
  voiceio test Test microphone + live transcription
184
+ voiceio demo Interactive guided tour of all features
179
185
  voiceio toggle Toggle recording on a running daemon
186
+ voiceio correct Review and fix recurring transcription errors
187
+ voiceio history View transcription history
180
188
  voiceio update Update to latest version
181
189
  voiceio service install Autostart on login (systemd / Windows Startup)
182
190
  voiceio logs View recent logs
@@ -250,9 +258,8 @@ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](
250
258
  - [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
251
259
  - [ ] Echo cancellation (filter system audio for meeting use)
252
260
  - [ ] Wake word activation ("Hey voiceio")
253
- - [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
254
-
255
261
  **Done**
262
+ - [x] Text-to-speech output (Piper/eSpeak/Edge TTS — completes the "io")
256
263
  - [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
257
264
  - [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
258
265
  - [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
@@ -4,6 +4,7 @@
4
4
  [![PyPI](https://img.shields.io/pypi/v/python-voiceio)](https://pypi.org/project/python-voiceio/)
5
5
  [![Python](https://img.shields.io/pypi/pyversions/python-voiceio)](https://pypi.org/project/python-voiceio/)
6
6
  [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
7
+ [![Downloads](https://img.shields.io/pepy/dt/python-voiceio)](https://pepy.tech/projects/python-voiceio)
7
8
 
8
9
  Speak → text, locally, instantly.
9
10
 
@@ -101,6 +102,10 @@ Press your hotkey to start recording (1s pre-buffer catches the first syllable).
101
102
  - **Works everywhere**: IBus input method for GUI apps, clipboard for terminals
102
103
  - **Wayland + X11**: evdev hotkeys work on both, no root required
103
104
  - **Pre-buffer**: never miss the first syllable
105
+ - **Voice commands**: "new line", "comma", "scratch that", punctuation by name
106
+ - **Autocorrect**: LLM-powered review of recurring Whisper mistakes (`voiceio correct`)
107
+ - **Text-to-speech**: hear selected text spoken back (Piper, eSpeak, Edge TTS)
108
+ - **Smart post-processing**: numbers ("twenty five" → "25"), punctuation, capitalization
104
109
  - **Auto-healing**: falls back to the next working backend if one fails
105
110
  - **Autostart**: optional systemd service, restarts on crash
106
111
  - **Self-diagnosing**: `voiceio doctor` checks everything, `--fix` repairs it
@@ -124,7 +129,10 @@ voiceio Start the daemon
124
129
  voiceio setup Interactive setup wizard
125
130
  voiceio doctor Health check (--fix to auto-repair)
126
131
  voiceio test Test microphone + live transcription
132
+ voiceio demo Interactive guided tour of all features
127
133
  voiceio toggle Toggle recording on a running daemon
134
+ voiceio correct Review and fix recurring transcription errors
135
+ voiceio history View transcription history
128
136
  voiceio update Update to latest version
129
137
  voiceio service install Autostart on login (systemd / Windows Startup)
130
138
  voiceio logs View recent logs
@@ -198,9 +206,8 @@ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](
198
206
  - [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
199
207
  - [ ] Echo cancellation (filter system audio for meeting use)
200
208
  - [ ] Wake word activation ("Hey voiceio")
201
- - [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
202
-
203
209
  **Done**
210
+ - [x] Text-to-speech output (Piper/eSpeak/Edge TTS — completes the "io")
204
211
  - [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
205
212
  - [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
206
213
  - [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "python-voiceio"
7
- version = "0.3.0"
7
+ version = "0.3.2"
8
8
  description = "Speak → text, locally, instantly."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-voiceio
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Speak → text, locally, instantly.
5
5
  Author: Hugo Montenegro
6
6
  License-Expression: MIT
@@ -56,6 +56,7 @@ Dynamic: license-file
56
56
  [![PyPI](https://img.shields.io/pypi/v/python-voiceio)](https://pypi.org/project/python-voiceio/)
57
57
  [![Python](https://img.shields.io/pypi/pyversions/python-voiceio)](https://pypi.org/project/python-voiceio/)
58
58
  [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
59
+ [![Downloads](https://img.shields.io/pepy/dt/python-voiceio)](https://pepy.tech/projects/python-voiceio)
59
60
 
60
61
  Speak → text, locally, instantly.
61
62
 
@@ -153,6 +154,10 @@ Press your hotkey to start recording (1s pre-buffer catches the first syllable).
153
154
  - **Works everywhere**: IBus input method for GUI apps, clipboard for terminals
154
155
  - **Wayland + X11**: evdev hotkeys work on both, no root required
155
156
  - **Pre-buffer**: never miss the first syllable
157
+ - **Voice commands**: "new line", "comma", "scratch that", punctuation by name
158
+ - **Autocorrect**: LLM-powered review of recurring Whisper mistakes (`voiceio correct`)
159
+ - **Text-to-speech**: hear selected text spoken back (Piper, eSpeak, Edge TTS)
160
+ - **Smart post-processing**: numbers ("twenty five" → "25"), punctuation, capitalization
156
161
  - **Auto-healing**: falls back to the next working backend if one fails
157
162
  - **Autostart**: optional systemd service, restarts on crash
158
163
  - **Self-diagnosing**: `voiceio doctor` checks everything, `--fix` repairs it
@@ -176,7 +181,10 @@ voiceio Start the daemon
176
181
  voiceio setup Interactive setup wizard
177
182
  voiceio doctor Health check (--fix to auto-repair)
178
183
  voiceio test Test microphone + live transcription
184
+ voiceio demo Interactive guided tour of all features
179
185
  voiceio toggle Toggle recording on a running daemon
186
+ voiceio correct Review and fix recurring transcription errors
187
+ voiceio history View transcription history
180
188
  voiceio update Update to latest version
181
189
  voiceio service install Autostart on login (systemd / Windows Startup)
182
190
  voiceio logs View recent logs
@@ -250,9 +258,8 @@ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) and [open issues](
250
258
  - [ ] Multiple engine backends (whisper.cpp for Vulkan/AMD, VOSK for low-end hardware)
251
259
  - [ ] Echo cancellation (filter system audio for meeting use)
252
260
  - [ ] Wake word activation ("Hey voiceio")
253
- - [ ] Text-to-speech output (Piper/espeak-ng — completes the "io")
254
-
255
261
  **Done**
262
+ - [x] Text-to-speech output (Piper/eSpeak/Edge TTS — completes the "io")
256
263
  - [x] LLM auto-audit dictionary (`voiceio correct --auto` — scan history with LLM, interactive correction)
257
264
  - [x] LLM post-processing via Ollama (grammar cleanup, spelling fixes on final pass)
258
265
  - [x] Corrections dictionary — auto-replace misheard words, "correct that" voice command
@@ -6,7 +6,7 @@ import urllib.error
6
6
  from unittest.mock import MagicMock, patch
7
7
 
8
8
  from voiceio.config import AutocorrectConfig
9
- from voiceio.llm_api import chat, check_api_key, resolve_api_key
9
+ from voiceio.llm_api import chat, check_api_key, detect_provider, resolve_api_key
10
10
 
11
11
 
12
12
  def _mock_response(data: dict) -> MagicMock:
@@ -116,3 +116,61 @@ def test_check_empty_key():
116
116
  cfg = _cfg(api_key="")
117
117
  with patch.dict("os.environ", {}, clear=True):
118
118
  assert check_api_key(cfg) is False
119
+
120
+
121
+ # ── Anthropic native API ────────────────────────────────────────────────
122
+
123
+
124
+ @patch("urllib.request.urlopen")
125
+ def test_chat_anthropic_native(mock_urlopen):
126
+ mock_urlopen.return_value = _mock_response({
127
+ "content": [{"type": "text", "text": "Fixed text."}]
128
+ })
129
+ cfg = _cfg(base_url="https://api.anthropic.com/v1")
130
+ result = chat(cfg, "system prompt", "user message")
131
+ assert result == "Fixed text."
132
+
133
+ req = mock_urlopen.call_args[0][0]
134
+ assert req.get_header("X-api-key") == "test-key"
135
+ assert req.get_header("Anthropic-version") == "2023-06-01"
136
+ assert "Authorization" not in dict(req.header_items())
137
+ body = json.loads(req.data)
138
+ assert body["system"] == "system prompt"
139
+ assert body["messages"] == [{"role": "user", "content": "user message"}]
140
+ assert "/messages" in req.full_url
141
+
142
+
143
+ @patch("urllib.request.urlopen")
144
+ def test_check_api_key_anthropic(mock_urlopen):
145
+ mock_urlopen.return_value = _mock_response({
146
+ "content": [{"type": "text", "text": ""}]
147
+ })
148
+ cfg = _cfg(base_url="https://api.anthropic.com/v1")
149
+ assert check_api_key(cfg, "sk-ant-test") is True
150
+ req = mock_urlopen.call_args[0][0]
151
+ assert "/messages" in req.full_url
152
+
153
+
154
+ # ── detect_provider ─────────────────────────────────────────────────────
155
+
156
+
157
+ def test_detect_openrouter():
158
+ base_url, model = detect_provider("sk-or-abc123")
159
+ assert "openrouter" in base_url
160
+ assert "claude" in model
161
+
162
+
163
+ def test_detect_anthropic():
164
+ base_url, model = detect_provider("sk-ant-abc123")
165
+ assert "anthropic.com" in base_url
166
+ assert "claude" in model
167
+
168
+
169
+ def test_detect_openai():
170
+ base_url, model = detect_provider("sk-proj-abc123")
171
+ assert "openai.com" in base_url
172
+
173
+
174
+ def test_detect_unknown_defaults_openrouter():
175
+ base_url, _ = detect_provider("unknown-key-format")
176
+ assert "openrouter" in base_url
@@ -142,7 +142,7 @@ def test_player_empty_audio():
142
142
 
143
143
  def test_tts_config_defaults():
144
144
  cfg = TTSConfig()
145
- assert cfg.enabled is False
145
+ assert cfg.enabled is True
146
146
  assert cfg.engine == "auto"
147
147
  assert cfg.hotkey == "ctrl+alt+s"
148
148
  assert cfg.voice == ""
@@ -155,4 +155,4 @@ def test_tts_config_in_main_config():
155
155
  cfg = Config()
156
156
  assert hasattr(cfg, "tts")
157
157
  assert isinstance(cfg.tts, TTSConfig)
158
- assert cfg.tts.enabled is False
158
+ assert cfg.tts.enabled is True
@@ -0,0 +1 @@
1
+ __version__ = "0.3.2"
@@ -105,7 +105,7 @@ class AutocorrectConfig:
105
105
 
106
106
  @dataclass
107
107
  class TTSConfig:
108
- enabled: bool = False
108
+ enabled: bool = True
109
109
  engine: str = "auto" # "auto" | "piper" | "espeak" | "edge-tts"
110
110
  hotkey: str = "ctrl+alt+s" # "s" for speak
111
111
  voice: str = "" # empty = engine default
@@ -0,0 +1,183 @@
1
+ """Multi-provider chat completions API client.
2
+
3
+ Supports OpenRouter, OpenAI, Anthropic (native Messages API), Together, Groq,
4
+ local Ollama (via /v1/chat/completions), etc. Zero dependencies beyond stdlib.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import urllib.error
12
+ import urllib.request
13
+
14
+ from voiceio.config import AutocorrectConfig
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+
19
+ def _is_anthropic(base_url: str) -> bool:
20
+ """Check if the base URL points to Anthropic's native API."""
21
+ return "api.anthropic.com" in base_url
22
+
23
+
24
+ def resolve_api_key(cfg: AutocorrectConfig) -> str:
25
+ """Resolve API key from config or environment variables."""
26
+ if cfg.api_key:
27
+ return cfg.api_key
28
+ # Check common env vars in priority order
29
+ for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY"):
30
+ val = os.environ.get(var, "")
31
+ if val:
32
+ return val
33
+ return ""
34
+
35
+
36
+ def _anthropic_request(
37
+ base_url: str,
38
+ model: str,
39
+ system: str,
40
+ messages: list[dict],
41
+ api_key: str,
42
+ max_tokens: int,
43
+ timeout: float,
44
+ ) -> str | None:
45
+ """Send a request using Anthropic's native Messages API."""
46
+ url = f"{base_url}/messages"
47
+
48
+ body: dict = {
49
+ "model": model,
50
+ "max_tokens": max_tokens,
51
+ "messages": messages,
52
+ }
53
+ if system:
54
+ body["system"] = system
55
+
56
+ headers = {
57
+ "Content-Type": "application/json",
58
+ "x-api-key": api_key,
59
+ "anthropic-version": "2023-06-01",
60
+ }
61
+
62
+ req = urllib.request.Request(
63
+ url, data=json.dumps(body).encode(), headers=headers, method="POST",
64
+ )
65
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
66
+ data = json.loads(resp.read())
67
+ # Anthropic returns content as a list of blocks
68
+ blocks = data.get("content", [])
69
+ text = "".join(b.get("text", "") for b in blocks if b.get("type") == "text")
70
+ return text.strip() or None
71
+
72
+
73
+ def _openai_request(
74
+ base_url: str,
75
+ model: str,
76
+ system: str,
77
+ messages: list[dict],
78
+ api_key: str,
79
+ max_tokens: int,
80
+ timeout: float,
81
+ ) -> str | None:
82
+ """Send a request using the OpenAI chat completions format."""
83
+ url = f"{base_url}/chat/completions"
84
+
85
+ all_messages = []
86
+ if system:
87
+ all_messages.append({"role": "system", "content": system})
88
+ all_messages.extend(messages)
89
+
90
+ body = {
91
+ "model": model,
92
+ "max_tokens": max_tokens,
93
+ "messages": all_messages,
94
+ }
95
+
96
+ headers = {
97
+ "Content-Type": "application/json",
98
+ "Authorization": f"Bearer {api_key}",
99
+ }
100
+
101
+ req = urllib.request.Request(
102
+ url, data=json.dumps(body).encode(), headers=headers, method="POST",
103
+ )
104
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
105
+ data = json.loads(resp.read())
106
+ return data["choices"][0]["message"]["content"].strip()
107
+
108
+
109
+ def chat(
110
+ cfg: AutocorrectConfig,
111
+ system: str,
112
+ user_message: str,
113
+ *,
114
+ api_key: str = "",
115
+ max_tokens: int = 2048,
116
+ ) -> str | None:
117
+ """Send a chat completion request. Returns response text or None on failure.
118
+
119
+ Automatically detects Anthropic's native API vs OpenAI-compatible format
120
+ based on the configured base_url.
121
+ """
122
+ key = api_key or resolve_api_key(cfg)
123
+ if not key:
124
+ return None
125
+
126
+ base_url = cfg.base_url.rstrip("/")
127
+ messages = [{"role": "user", "content": user_message}]
128
+
129
+ try:
130
+ if _is_anthropic(base_url):
131
+ return _anthropic_request(
132
+ base_url, cfg.model, system, messages, key, max_tokens, cfg.timeout_secs,
133
+ )
134
+ return _openai_request(
135
+ base_url, cfg.model, system, messages, key, max_tokens, cfg.timeout_secs,
136
+ )
137
+ except urllib.error.HTTPError as e:
138
+ body_text = ""
139
+ try:
140
+ body_text = e.read().decode()[:200]
141
+ except Exception:
142
+ pass
143
+ log.warning("API request failed (HTTP %d): %s", e.code, body_text)
144
+ return None
145
+ except Exception as e:
146
+ log.warning("API request failed: %s", e)
147
+ return None
148
+
149
+
150
+ def detect_provider(api_key: str) -> tuple[str, str]:
151
+ """Detect provider from API key prefix. Returns (base_url, model)."""
152
+ if api_key.startswith("sk-or-"):
153
+ return "https://openrouter.ai/api/v1", "anthropic/claude-sonnet-4"
154
+ if api_key.startswith("sk-ant-"):
155
+ return "https://api.anthropic.com/v1", "claude-sonnet-4-20250514"
156
+ if api_key.startswith(("sk-proj-", "sk-")):
157
+ return "https://api.openai.com/v1", "gpt-4o-mini"
158
+ # Default to OpenRouter (works with most keys)
159
+ return "https://openrouter.ai/api/v1", "anthropic/claude-sonnet-4"
160
+
161
+
162
+ def check_api_key(cfg: AutocorrectConfig, api_key: str = "") -> bool:
163
+ """Validate an API key with a minimal request."""
164
+ key = api_key or resolve_api_key(cfg)
165
+ if not key:
166
+ return False
167
+
168
+ base_url = cfg.base_url.rstrip("/")
169
+ messages = [{"role": "user", "content": "hi"}]
170
+
171
+ try:
172
+ if _is_anthropic(base_url):
173
+ _anthropic_request(base_url, cfg.model, "", messages, key, 1, 10)
174
+ else:
175
+ _openai_request(base_url, cfg.model, "", messages, key, 1, 10)
176
+ return True
177
+ except urllib.error.HTTPError as e:
178
+ if e.code == 401:
179
+ return False
180
+ # Other errors (rate limit, etc.) mean the key itself is valid
181
+ return e.code != 403
182
+ except Exception:
183
+ return False
@@ -142,6 +142,7 @@ def convert_numbers(text: str, language: str = "en") -> str:
142
142
  # Collect consecutive number words
143
143
  if _is_number_word(low) and low != "a" and low != "and":
144
144
  num_words = []
145
+ last_category = None # "ones", "tens", "scale"
145
146
  j = i
146
147
  while j < len(words):
147
148
  w = words[j].lower().rstrip(".,;:?!")
@@ -153,6 +154,7 @@ def convert_numbers(text: str, language: str = "en") -> str:
153
154
  # "a" at start: only if followed by scale word
154
155
  if j + 1 < len(words) and words[j + 1].lower().rstrip(".,;:?!") in _SCALES:
155
156
  num_words.append(w)
157
+ last_category = "ones"
156
158
  j += 1
157
159
  continue
158
160
  break
@@ -163,7 +165,14 @@ def convert_numbers(text: str, language: str = "en") -> str:
163
165
  j += 1
164
166
  continue
165
167
  break
168
+ # Two consecutive ones-words = separate numbers
169
+ # e.g. "one two three" should NOT become 6
170
+ # But "twenty three", "one hundred", "thirteen thousand" are valid
171
+ cat = "scale" if w in _SCALES else ("tens" if w in _TENS else "ones")
172
+ if cat == "ones" and last_category == "ones":
173
+ break
166
174
  num_words.append(w)
175
+ last_category = cat
167
176
  j += 1
168
177
  else:
169
178
  break
@@ -57,6 +57,7 @@ Type=simple
57
57
  ExecStart={bin_path}
58
58
  Restart=on-failure
59
59
  RestartSec=3
60
+ PassEnvironment=DISPLAY WAYLAND_DISPLAY XDG_SESSION_TYPE XDG_RUNTIME_DIR
60
61
 
61
62
  [Install]
62
63
  WantedBy=default.target
@@ -19,12 +19,25 @@ class EdgeEngine:
19
19
  def probe(self) -> ProbeResult:
20
20
  try:
21
21
  import edge_tts # noqa: F401
22
- return ProbeResult(ok=True)
23
22
  except ImportError:
24
23
  return ProbeResult(
25
24
  ok=False, reason="edge-tts not installed",
26
25
  fix_hint="pip install edge-tts",
27
26
  )
27
+ try:
28
+ import soundfile # noqa: F401
29
+ return ProbeResult(ok=True)
30
+ except ImportError:
31
+ pass
32
+ try:
33
+ import pydub # noqa: F401
34
+ return ProbeResult(ok=True)
35
+ except ImportError:
36
+ return ProbeResult(
37
+ ok=False,
38
+ reason="edge-tts needs soundfile or pydub to decode audio",
39
+ fix_hint="pip install soundfile",
40
+ )
28
41
 
29
42
  def synthesize(self, text: str, voice: str, speed: float) -> tuple[np.ndarray, int]:
30
43
  import asyncio
@@ -22,10 +22,11 @@ class PiperEngine:
22
22
  def probe(self) -> ProbeResult:
23
23
  try:
24
24
  import piper # noqa: F401
25
+ from piper.download import ensure_voice_exists, get_voices # noqa: F401
25
26
  return ProbeResult(ok=True)
26
- except ImportError:
27
+ except ImportError as e:
27
28
  return ProbeResult(
28
- ok=False, reason="piper-tts not installed",
29
+ ok=False, reason=f"piper-tts not fully installed: {e}",
29
30
  fix_hint="pip install piper-tts",
30
31
  )
31
32