agent-cli 0.73.0__py3-none-any.whl → 0.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/_extras.json +23 -14
- agent_cli/_requirements/whisper-transformers.txt +256 -0
- agent_cli/example-config.toml +4 -1
- agent_cli/install/extras.py +6 -2
- agent_cli/scripts/sync_extras.py +10 -1
- agent_cli/server/cli.py +45 -25
- agent_cli/server/whisper/backends/__init__.py +8 -1
- agent_cli/server/whisper/backends/mlx.py +10 -0
- agent_cli/server/whisper/backends/transformers.py +315 -0
- {agent_cli-0.73.0.dist-info → agent_cli-0.75.0.dist-info}/METADATA +12 -8
- {agent_cli-0.73.0.dist-info → agent_cli-0.75.0.dist-info}/RECORD +14 -12
- {agent_cli-0.73.0.dist-info → agent_cli-0.75.0.dist-info}/WHEEL +0 -0
- {agent_cli-0.73.0.dist-info → agent_cli-0.75.0.dist-info}/entry_points.txt +0 -0
- {agent_cli-0.73.0.dist-info → agent_cli-0.75.0.dist-info}/licenses/LICENSE +0 -0
agent_cli/_extras.json
CHANGED
|
@@ -6,21 +6,19 @@
|
|
|
6
6
|
]
|
|
7
7
|
],
|
|
8
8
|
"faster-whisper": [
|
|
9
|
-
"
|
|
10
|
-
[
|
|
9
|
+
"Whisper ASR via CTranslate2",
|
|
10
|
+
[
|
|
11
|
+
"faster_whisper"
|
|
12
|
+
]
|
|
11
13
|
],
|
|
12
14
|
"kokoro": [
|
|
13
|
-
"
|
|
14
|
-
[]
|
|
15
|
-
],
|
|
16
|
-
"llm": [
|
|
17
|
-
"LLM framework (pydantic-ai)",
|
|
15
|
+
"Kokoro neural TTS (GPU)",
|
|
18
16
|
[
|
|
19
|
-
"
|
|
17
|
+
"kokoro"
|
|
20
18
|
]
|
|
21
19
|
],
|
|
22
|
-
"llm
|
|
23
|
-
"
|
|
20
|
+
"llm": [
|
|
21
|
+
"LLM framework (pydantic-ai)",
|
|
24
22
|
[
|
|
25
23
|
"pydantic_ai"
|
|
26
24
|
]
|
|
@@ -34,12 +32,16 @@
|
|
|
34
32
|
]
|
|
35
33
|
],
|
|
36
34
|
"mlx-whisper": [
|
|
37
|
-
"
|
|
38
|
-
[
|
|
35
|
+
"Whisper ASR for Apple Silicon",
|
|
36
|
+
[
|
|
37
|
+
"mlx_whisper"
|
|
38
|
+
]
|
|
39
39
|
],
|
|
40
40
|
"piper": [
|
|
41
|
-
"
|
|
42
|
-
[
|
|
41
|
+
"Piper TTS (CPU)",
|
|
42
|
+
[
|
|
43
|
+
"piper"
|
|
44
|
+
]
|
|
43
45
|
],
|
|
44
46
|
"rag": [
|
|
45
47
|
"RAG proxy (ChromaDB, embeddings)",
|
|
@@ -66,6 +68,13 @@
|
|
|
66
68
|
"silero_vad"
|
|
67
69
|
]
|
|
68
70
|
],
|
|
71
|
+
"whisper-transformers": [
|
|
72
|
+
"Whisper ASR via HuggingFace transformers",
|
|
73
|
+
[
|
|
74
|
+
"transformers",
|
|
75
|
+
"torch"
|
|
76
|
+
]
|
|
77
|
+
],
|
|
69
78
|
"wyoming": [
|
|
70
79
|
"Wyoming protocol support",
|
|
71
80
|
[
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# This file was autogenerated by uv via the following command:
|
|
2
|
+
# uv export --extra whisper-transformers --no-dev --no-emit-project --no-hashes
|
|
3
|
+
annotated-doc==0.0.4
|
|
4
|
+
# via fastapi
|
|
5
|
+
annotated-types==0.7.0
|
|
6
|
+
# via pydantic
|
|
7
|
+
anyio==4.12.1
|
|
8
|
+
# via
|
|
9
|
+
# httpx
|
|
10
|
+
# starlette
|
|
11
|
+
# watchfiles
|
|
12
|
+
certifi==2026.1.4
|
|
13
|
+
# via
|
|
14
|
+
# httpcore
|
|
15
|
+
# httpx
|
|
16
|
+
# requests
|
|
17
|
+
# sentry-sdk
|
|
18
|
+
charset-normalizer==3.4.4
|
|
19
|
+
# via requests
|
|
20
|
+
click==8.3.1
|
|
21
|
+
# via
|
|
22
|
+
# rich-toolkit
|
|
23
|
+
# typer
|
|
24
|
+
# typer-slim
|
|
25
|
+
# uvicorn
|
|
26
|
+
colorama==0.4.6 ; sys_platform == 'win32'
|
|
27
|
+
# via
|
|
28
|
+
# click
|
|
29
|
+
# tqdm
|
|
30
|
+
# uvicorn
|
|
31
|
+
dnspython==2.8.0
|
|
32
|
+
# via email-validator
|
|
33
|
+
dotenv==0.9.9
|
|
34
|
+
# via agent-cli
|
|
35
|
+
email-validator==2.3.0
|
|
36
|
+
# via
|
|
37
|
+
# fastapi
|
|
38
|
+
# pydantic
|
|
39
|
+
fastapi==0.128.0
|
|
40
|
+
# via agent-cli
|
|
41
|
+
fastapi-cli==0.0.20
|
|
42
|
+
# via fastapi
|
|
43
|
+
fastapi-cloud-cli==0.10.1
|
|
44
|
+
# via fastapi-cli
|
|
45
|
+
fastar==0.8.0
|
|
46
|
+
# via fastapi-cloud-cli
|
|
47
|
+
filelock==3.20.3
|
|
48
|
+
# via
|
|
49
|
+
# huggingface-hub
|
|
50
|
+
# torch
|
|
51
|
+
# transformers
|
|
52
|
+
fsspec==2026.1.0
|
|
53
|
+
# via
|
|
54
|
+
# huggingface-hub
|
|
55
|
+
# torch
|
|
56
|
+
h11==0.16.0
|
|
57
|
+
# via
|
|
58
|
+
# httpcore
|
|
59
|
+
# uvicorn
|
|
60
|
+
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
|
61
|
+
# via huggingface-hub
|
|
62
|
+
httpcore==1.0.9
|
|
63
|
+
# via httpx
|
|
64
|
+
httptools==0.7.1
|
|
65
|
+
# via uvicorn
|
|
66
|
+
httpx==0.28.1
|
|
67
|
+
# via
|
|
68
|
+
# agent-cli
|
|
69
|
+
# fastapi
|
|
70
|
+
# fastapi-cloud-cli
|
|
71
|
+
huggingface-hub==0.36.0
|
|
72
|
+
# via
|
|
73
|
+
# tokenizers
|
|
74
|
+
# transformers
|
|
75
|
+
idna==3.11
|
|
76
|
+
# via
|
|
77
|
+
# anyio
|
|
78
|
+
# email-validator
|
|
79
|
+
# httpx
|
|
80
|
+
# requests
|
|
81
|
+
jinja2==3.1.6
|
|
82
|
+
# via
|
|
83
|
+
# fastapi
|
|
84
|
+
# torch
|
|
85
|
+
markdown-it-py==4.0.0
|
|
86
|
+
# via rich
|
|
87
|
+
markupsafe==3.0.3
|
|
88
|
+
# via jinja2
|
|
89
|
+
mdurl==0.1.2
|
|
90
|
+
# via markdown-it-py
|
|
91
|
+
mpmath==1.3.0
|
|
92
|
+
# via sympy
|
|
93
|
+
networkx==3.6.1
|
|
94
|
+
# via torch
|
|
95
|
+
numpy==2.3.5
|
|
96
|
+
# via transformers
|
|
97
|
+
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
98
|
+
# via
|
|
99
|
+
# nvidia-cudnn-cu12
|
|
100
|
+
# nvidia-cusolver-cu12
|
|
101
|
+
# torch
|
|
102
|
+
nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
103
|
+
# via torch
|
|
104
|
+
nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
105
|
+
# via torch
|
|
106
|
+
nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
107
|
+
# via torch
|
|
108
|
+
nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
109
|
+
# via torch
|
|
110
|
+
nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
111
|
+
# via torch
|
|
112
|
+
nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
113
|
+
# via torch
|
|
114
|
+
nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
115
|
+
# via torch
|
|
116
|
+
nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
117
|
+
# via torch
|
|
118
|
+
nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
119
|
+
# via
|
|
120
|
+
# nvidia-cusolver-cu12
|
|
121
|
+
# torch
|
|
122
|
+
nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
123
|
+
# via torch
|
|
124
|
+
nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
125
|
+
# via torch
|
|
126
|
+
nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
127
|
+
# via
|
|
128
|
+
# nvidia-cufft-cu12
|
|
129
|
+
# nvidia-cusolver-cu12
|
|
130
|
+
# nvidia-cusparse-cu12
|
|
131
|
+
# torch
|
|
132
|
+
nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
133
|
+
# via torch
|
|
134
|
+
nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
135
|
+
# via torch
|
|
136
|
+
packaging==25.0
|
|
137
|
+
# via
|
|
138
|
+
# huggingface-hub
|
|
139
|
+
# transformers
|
|
140
|
+
psutil==7.2.1 ; sys_platform == 'win32'
|
|
141
|
+
# via agent-cli
|
|
142
|
+
pydantic==2.12.5
|
|
143
|
+
# via
|
|
144
|
+
# agent-cli
|
|
145
|
+
# fastapi
|
|
146
|
+
# fastapi-cloud-cli
|
|
147
|
+
# pydantic-extra-types
|
|
148
|
+
# pydantic-settings
|
|
149
|
+
pydantic-core==2.41.5
|
|
150
|
+
# via pydantic
|
|
151
|
+
pydantic-extra-types==2.11.0
|
|
152
|
+
# via fastapi
|
|
153
|
+
pydantic-settings==2.12.0
|
|
154
|
+
# via fastapi
|
|
155
|
+
pygments==2.19.2
|
|
156
|
+
# via rich
|
|
157
|
+
pyperclip==1.11.0
|
|
158
|
+
# via agent-cli
|
|
159
|
+
python-dotenv==1.2.1
|
|
160
|
+
# via
|
|
161
|
+
# dotenv
|
|
162
|
+
# pydantic-settings
|
|
163
|
+
# uvicorn
|
|
164
|
+
python-multipart==0.0.21
|
|
165
|
+
# via fastapi
|
|
166
|
+
pyyaml==6.0.3
|
|
167
|
+
# via
|
|
168
|
+
# huggingface-hub
|
|
169
|
+
# transformers
|
|
170
|
+
# uvicorn
|
|
171
|
+
regex==2026.1.15
|
|
172
|
+
# via transformers
|
|
173
|
+
requests==2.32.5
|
|
174
|
+
# via
|
|
175
|
+
# huggingface-hub
|
|
176
|
+
# transformers
|
|
177
|
+
rich==14.2.0
|
|
178
|
+
# via
|
|
179
|
+
# agent-cli
|
|
180
|
+
# rich-toolkit
|
|
181
|
+
# typer
|
|
182
|
+
# typer-slim
|
|
183
|
+
rich-toolkit==0.17.1
|
|
184
|
+
# via
|
|
185
|
+
# fastapi-cli
|
|
186
|
+
# fastapi-cloud-cli
|
|
187
|
+
rignore==0.7.6
|
|
188
|
+
# via fastapi-cloud-cli
|
|
189
|
+
safetensors==0.7.0
|
|
190
|
+
# via transformers
|
|
191
|
+
sentry-sdk==2.49.0
|
|
192
|
+
# via fastapi-cloud-cli
|
|
193
|
+
setproctitle==1.3.7
|
|
194
|
+
# via agent-cli
|
|
195
|
+
setuptools==80.9.0 ; python_full_version >= '3.12'
|
|
196
|
+
# via torch
|
|
197
|
+
shellingham==1.5.4
|
|
198
|
+
# via
|
|
199
|
+
# typer
|
|
200
|
+
# typer-slim
|
|
201
|
+
starlette==0.50.0
|
|
202
|
+
# via fastapi
|
|
203
|
+
sympy==1.14.0
|
|
204
|
+
# via torch
|
|
205
|
+
tokenizers==0.22.2
|
|
206
|
+
# via transformers
|
|
207
|
+
torch==2.9.1
|
|
208
|
+
# via agent-cli
|
|
209
|
+
tqdm==4.67.1
|
|
210
|
+
# via
|
|
211
|
+
# huggingface-hub
|
|
212
|
+
# transformers
|
|
213
|
+
transformers==4.57.5
|
|
214
|
+
# via agent-cli
|
|
215
|
+
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
216
|
+
# via torch
|
|
217
|
+
typer==0.21.1
|
|
218
|
+
# via
|
|
219
|
+
# agent-cli
|
|
220
|
+
# fastapi-cli
|
|
221
|
+
# fastapi-cloud-cli
|
|
222
|
+
typer-slim==0.21.1
|
|
223
|
+
# via agent-cli
|
|
224
|
+
typing-extensions==4.15.0
|
|
225
|
+
# via
|
|
226
|
+
# anyio
|
|
227
|
+
# fastapi
|
|
228
|
+
# huggingface-hub
|
|
229
|
+
# pydantic
|
|
230
|
+
# pydantic-core
|
|
231
|
+
# pydantic-extra-types
|
|
232
|
+
# rich-toolkit
|
|
233
|
+
# starlette
|
|
234
|
+
# torch
|
|
235
|
+
# typer
|
|
236
|
+
# typer-slim
|
|
237
|
+
# typing-inspection
|
|
238
|
+
typing-inspection==0.4.2
|
|
239
|
+
# via
|
|
240
|
+
# pydantic
|
|
241
|
+
# pydantic-settings
|
|
242
|
+
urllib3==2.3.0
|
|
243
|
+
# via
|
|
244
|
+
# requests
|
|
245
|
+
# sentry-sdk
|
|
246
|
+
uvicorn==0.40.0
|
|
247
|
+
# via
|
|
248
|
+
# fastapi
|
|
249
|
+
# fastapi-cli
|
|
250
|
+
# fastapi-cloud-cli
|
|
251
|
+
uvloop==0.22.1 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
|
|
252
|
+
# via uvicorn
|
|
253
|
+
watchfiles==1.1.1
|
|
254
|
+
# via uvicorn
|
|
255
|
+
websockets==15.0.1
|
|
256
|
+
# via uvicorn
|
agent_cli/example-config.toml
CHANGED
|
@@ -13,7 +13,10 @@
|
|
|
13
13
|
[defaults]
|
|
14
14
|
|
|
15
15
|
# --- Provider Selection ---
|
|
16
|
-
# Select the default provider for each service
|
|
16
|
+
# Select the default provider for each service.
|
|
17
|
+
# LLM: "ollama", "openai", or "gemini"
|
|
18
|
+
# ASR: "wyoming", "openai", or "gemini"
|
|
19
|
+
# TTS: "wyoming", "openai", "kokoro", or "gemini"
|
|
17
20
|
llm-provider = "ollama" # "local" still works as a deprecated alias
|
|
18
21
|
tts-provider = "wyoming"
|
|
19
22
|
|
agent_cli/install/extras.py
CHANGED
|
@@ -63,7 +63,9 @@ def _install_via_uv_tool(extras: list[str], *, quiet: bool = False) -> bool:
|
|
|
63
63
|
"""Reinstall agent-cli via uv tool with the specified extras."""
|
|
64
64
|
extras_str = ",".join(extras)
|
|
65
65
|
package_spec = f"agent-cli[{extras_str}]"
|
|
66
|
-
|
|
66
|
+
# Cap at Python 3.13 for compatibility - some deps (e.g., silero-vad-lite) don't support 3.14 yet
|
|
67
|
+
major, minor = sys.version_info[:2]
|
|
68
|
+
python_version = f"{major}.{min(minor, 13)}"
|
|
67
69
|
cmd = ["uv", "tool", "install", package_spec, "--force", "--python", python_version]
|
|
68
70
|
if quiet:
|
|
69
71
|
cmd.append("-q")
|
|
@@ -130,7 +132,8 @@ def install_extras(
|
|
|
130
132
|
list[str] | None,
|
|
131
133
|
typer.Argument(
|
|
132
134
|
help="Extras to install: `rag`, `memory`, `vad`, `audio`, `piper`, `kokoro`, "
|
|
133
|
-
"`faster-whisper`, `mlx-whisper`, `
|
|
135
|
+
"`faster-whisper`, `mlx-whisper`, `whisper-transformers`, `wyoming`, `server`, "
|
|
136
|
+
"`speed`, `llm`",
|
|
134
137
|
),
|
|
135
138
|
] = None,
|
|
136
139
|
list_extras: Annotated[
|
|
@@ -161,6 +164,7 @@ def install_extras(
|
|
|
161
164
|
- `kokoro` - Kokoro neural TTS engine
|
|
162
165
|
- `faster-whisper` - Whisper ASR for CUDA/CPU
|
|
163
166
|
- `mlx-whisper` - Whisper ASR for Apple Silicon
|
|
167
|
+
- `whisper-transformers` - Whisper ASR via HuggingFace transformers (safetensors)
|
|
164
168
|
- `wyoming` - Wyoming protocol for ASR/TTS servers
|
|
165
169
|
- `server` - FastAPI server components
|
|
166
170
|
- `speed` - Audio speed adjustment
|
agent_cli/scripts/sync_extras.py
CHANGED
|
@@ -33,7 +33,6 @@ EXTRA_METADATA: dict[str, tuple[str, list[str]]] = {
|
|
|
33
33
|
"openai": ("OpenAI API provider", ["openai"]),
|
|
34
34
|
"gemini": ("Google Gemini provider", ["google.genai"]),
|
|
35
35
|
"llm": ("LLM framework (pydantic-ai)", ["pydantic_ai"]),
|
|
36
|
-
"llm-core": ("Minimal LLM support for Docker images", ["pydantic_ai"]),
|
|
37
36
|
# Feature extras
|
|
38
37
|
"rag": ("RAG proxy (ChromaDB, embeddings)", ["chromadb", "pydantic_ai"]),
|
|
39
38
|
"memory": ("Long-term memory proxy", ["chromadb", "yaml", "pydantic_ai"]),
|
|
@@ -44,6 +43,16 @@ EXTRA_METADATA: dict[str, tuple[str, list[str]]] = {
|
|
|
44
43
|
"tts-kokoro": ("Kokoro neural TTS", ["kokoro"]),
|
|
45
44
|
"server": ("FastAPI server components", ["fastapi"]),
|
|
46
45
|
"speed": ("Audio speed adjustment (audiostretchy)", ["audiostretchy"]),
|
|
46
|
+
# Whisper backends
|
|
47
|
+
"faster-whisper": ("Whisper ASR via CTranslate2", ["faster_whisper"]),
|
|
48
|
+
"mlx-whisper": ("Whisper ASR for Apple Silicon", ["mlx_whisper"]),
|
|
49
|
+
"whisper-transformers": (
|
|
50
|
+
"Whisper ASR via HuggingFace transformers",
|
|
51
|
+
["transformers", "torch"],
|
|
52
|
+
),
|
|
53
|
+
# TTS backends
|
|
54
|
+
"piper": ("Piper TTS (CPU)", ["piper"]),
|
|
55
|
+
"kokoro": ("Kokoro neural TTS (GPU)", ["kokoro"]),
|
|
47
56
|
}
|
|
48
57
|
|
|
49
58
|
|
agent_cli/server/cli.py
CHANGED
|
@@ -157,36 +157,43 @@ def _download_tts_models(
|
|
|
157
157
|
def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
|
|
158
158
|
"""Check that Whisper dependencies are available."""
|
|
159
159
|
_check_server_deps()
|
|
160
|
-
if
|
|
161
|
-
if not _has("
|
|
160
|
+
if backend == "mlx":
|
|
161
|
+
if not _has("mlx_whisper"):
|
|
162
162
|
err_console.print(
|
|
163
|
-
"[bold red]Error:[/bold red]
|
|
164
|
-
"Run: [cyan]pip install
|
|
165
|
-
"or [cyan]uv sync --extra whisper[/cyan]",
|
|
163
|
+
"[bold red]Error:[/bold red] MLX Whisper backend requires mlx-whisper. "
|
|
164
|
+
"Run: [cyan]pip install mlx-whisper[/cyan]",
|
|
166
165
|
)
|
|
167
166
|
raise typer.Exit(1)
|
|
168
167
|
return
|
|
169
168
|
|
|
170
|
-
if backend == "
|
|
171
|
-
if not _has("
|
|
169
|
+
if backend == "transformers":
|
|
170
|
+
if not _has("transformers") or not _has("torch"):
|
|
172
171
|
err_console.print(
|
|
173
|
-
"[bold red]Error:[/bold red]
|
|
174
|
-
"Run: [cyan]pip install
|
|
172
|
+
"[bold red]Error:[/bold red] Transformers backend requires transformers and torch. "
|
|
173
|
+
"Run: [cyan]pip install agent-cli\\[whisper-transformers][/cyan] "
|
|
174
|
+
"or [cyan]uv sync --extra whisper-transformers[/cyan]",
|
|
175
175
|
)
|
|
176
176
|
raise typer.Exit(1)
|
|
177
177
|
return
|
|
178
178
|
|
|
179
179
|
if not _has("faster_whisper"):
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
180
|
+
if download_only:
|
|
181
|
+
err_console.print(
|
|
182
|
+
"[bold red]Error:[/bold red] faster-whisper is required for --download-only. "
|
|
183
|
+
"Run: [cyan]pip install agent-cli\\[faster-whisper][/cyan] "
|
|
184
|
+
"or [cyan]uv sync --extra faster-whisper[/cyan]",
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
err_console.print(
|
|
188
|
+
"[bold red]Error:[/bold red] Whisper dependencies not installed. "
|
|
189
|
+
"Run: [cyan]pip install agent-cli\\[faster-whisper][/cyan] "
|
|
190
|
+
"or [cyan]uv sync --extra faster-whisper[/cyan]",
|
|
191
|
+
)
|
|
185
192
|
raise typer.Exit(1)
|
|
186
193
|
|
|
187
194
|
|
|
188
195
|
@app.command("whisper")
|
|
189
|
-
@requires_extras("server", "faster-whisper|mlx-whisper", "wyoming")
|
|
196
|
+
@requires_extras("server", "faster-whisper|mlx-whisper|whisper-transformers", "wyoming")
|
|
190
197
|
def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
191
198
|
model: Annotated[
|
|
192
199
|
list[str] | None,
|
|
@@ -299,7 +306,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
299
306
|
"-b",
|
|
300
307
|
help=(
|
|
301
308
|
"Inference backend: `auto` (faster-whisper on CUDA/CPU, MLX on Apple Silicon), "
|
|
302
|
-
"`faster-whisper`, `mlx`"
|
|
309
|
+
"`faster-whisper`, `mlx`, `transformers` (HuggingFace, supports safetensors)"
|
|
303
310
|
),
|
|
304
311
|
),
|
|
305
312
|
] = "auto",
|
|
@@ -331,7 +338,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
331
338
|
# Setup Rich logging for consistent output
|
|
332
339
|
setup_rich_logging(log_level)
|
|
333
340
|
|
|
334
|
-
valid_backends = ("auto", "faster-whisper", "mlx")
|
|
341
|
+
valid_backends = ("auto", "faster-whisper", "mlx", "transformers")
|
|
335
342
|
if backend not in valid_backends:
|
|
336
343
|
err_console.print(
|
|
337
344
|
f"[bold red]Error:[/bold red] --backend must be one of: {', '.join(valid_backends)}",
|
|
@@ -339,7 +346,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
339
346
|
raise typer.Exit(1)
|
|
340
347
|
|
|
341
348
|
resolved_backend = backend
|
|
342
|
-
if backend == "auto"
|
|
349
|
+
if backend == "auto":
|
|
343
350
|
from agent_cli.server.whisper.backends import detect_backend # noqa: PLC0415
|
|
344
351
|
|
|
345
352
|
resolved_backend = detect_backend()
|
|
@@ -370,13 +377,26 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
370
377
|
for model_name in model:
|
|
371
378
|
console.print(f" Downloading [cyan]{model_name}[/cyan]...")
|
|
372
379
|
try:
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
+
if resolved_backend == "transformers":
|
|
381
|
+
from agent_cli.server.whisper.backends.transformers import ( # noqa: PLC0415
|
|
382
|
+
download_model as download_transformers_model,
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
download_transformers_model(model_name, cache_dir=cache_dir)
|
|
386
|
+
elif resolved_backend == "mlx":
|
|
387
|
+
from agent_cli.server.whisper.backends.mlx import ( # noqa: PLC0415
|
|
388
|
+
download_model as download_mlx_model,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
download_mlx_model(model_name)
|
|
392
|
+
else:
|
|
393
|
+
from faster_whisper import WhisperModel # noqa: PLC0415
|
|
394
|
+
|
|
395
|
+
_ = WhisperModel(
|
|
396
|
+
model_name,
|
|
397
|
+
device="cpu", # Don't need GPU for download
|
|
398
|
+
download_root=str(cache_dir) if cache_dir else None,
|
|
399
|
+
)
|
|
380
400
|
console.print(f" [green]✓[/green] Downloaded {model_name}")
|
|
381
401
|
except Exception as e:
|
|
382
402
|
err_console.print(f" [red]✗[/red] Failed to download {model_name}: {e}")
|
|
@@ -17,7 +17,7 @@ from agent_cli.server.whisper.backends.base import (
|
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
-
BackendType = Literal["faster-whisper", "mlx", "auto"]
|
|
20
|
+
BackendType = Literal["faster-whisper", "mlx", "transformers", "auto"]
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def detect_backend() -> Literal["faster-whisper", "mlx"]:
|
|
@@ -76,6 +76,13 @@ def create_backend(
|
|
|
76
76
|
|
|
77
77
|
return FasterWhisperBackend(config)
|
|
78
78
|
|
|
79
|
+
if backend_type == "transformers":
|
|
80
|
+
from agent_cli.server.whisper.backends.transformers import ( # noqa: PLC0415
|
|
81
|
+
TransformersWhisperBackend,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return TransformersWhisperBackend(config)
|
|
85
|
+
|
|
79
86
|
msg = f"Unknown backend type: {backend_type}"
|
|
80
87
|
raise ValueError(msg)
|
|
81
88
|
|
|
@@ -55,6 +55,16 @@ def _resolve_mlx_model_name(model_name: str) -> str:
|
|
|
55
55
|
return model_name
|
|
56
56
|
|
|
57
57
|
|
|
58
|
+
def download_model(model_name: str) -> str:
|
|
59
|
+
"""Download an MLX Whisper model and return the resolved repo name."""
|
|
60
|
+
import mlx.core as mx # noqa: PLC0415
|
|
61
|
+
from mlx_whisper.transcribe import ModelHolder # noqa: PLC0415
|
|
62
|
+
|
|
63
|
+
resolved_model = _resolve_mlx_model_name(model_name)
|
|
64
|
+
ModelHolder.get_model(resolved_model, mx.float16)
|
|
65
|
+
return resolved_model
|
|
66
|
+
|
|
67
|
+
|
|
58
68
|
def _pcm_to_float(audio_bytes: bytes) -> NDArray[np.float32]:
|
|
59
69
|
"""Convert 16-bit PCM audio bytes to float32 array normalized to [-1, 1]."""
|
|
60
70
|
import numpy as np # noqa: PLC0415
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""Transformers Whisper backend for HuggingFace models with safetensors support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import tempfile
|
|
8
|
+
import time
|
|
9
|
+
import wave
|
|
10
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from multiprocessing import get_context
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Literal
|
|
15
|
+
|
|
16
|
+
from agent_cli.core.process import set_process_title
|
|
17
|
+
from agent_cli.server.whisper.backends.base import (
|
|
18
|
+
BackendConfig,
|
|
19
|
+
TranscriptionResult,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Model name mapping: canonical name -> HuggingFace repo
|
|
25
|
+
_MODEL_MAP: dict[str, str] = {
|
|
26
|
+
"tiny": "openai/whisper-tiny",
|
|
27
|
+
"tiny.en": "openai/whisper-tiny.en",
|
|
28
|
+
"base": "openai/whisper-base",
|
|
29
|
+
"base.en": "openai/whisper-base.en",
|
|
30
|
+
"small": "openai/whisper-small",
|
|
31
|
+
"small.en": "openai/whisper-small.en",
|
|
32
|
+
"medium": "openai/whisper-medium",
|
|
33
|
+
"medium.en": "openai/whisper-medium.en",
|
|
34
|
+
"large": "openai/whisper-large",
|
|
35
|
+
"large-v2": "openai/whisper-large-v2",
|
|
36
|
+
"large-v3": "openai/whisper-large-v3",
|
|
37
|
+
"large-v3-turbo": "openai/whisper-large-v3-turbo",
|
|
38
|
+
"turbo": "openai/whisper-large-v3-turbo",
|
|
39
|
+
# Distil variants (smaller, faster)
|
|
40
|
+
"distil-large-v2": "distil-whisper/distil-large-v2",
|
|
41
|
+
"distil-large-v3": "distil-whisper/distil-large-v3",
|
|
42
|
+
"distil-medium.en": "distil-whisper/distil-medium.en",
|
|
43
|
+
"distil-small.en": "distil-whisper/distil-small.en",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _resolve_model_name(model_name: str) -> str:
|
|
48
|
+
"""Resolve a model name to a HuggingFace repo."""
|
|
49
|
+
if "/" in model_name:
|
|
50
|
+
return model_name
|
|
51
|
+
return _MODEL_MAP.get(model_name, f"openai/whisper-{model_name}")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def download_model(model_name: str, cache_dir: Path | None = None) -> str:
|
|
55
|
+
"""Download a Whisper model from the HuggingFace Hub.
|
|
56
|
+
|
|
57
|
+
Returns the resolved repo name.
|
|
58
|
+
"""
|
|
59
|
+
from huggingface_hub import snapshot_download # noqa: PLC0415
|
|
60
|
+
|
|
61
|
+
resolved_model = _resolve_model_name(model_name)
|
|
62
|
+
snapshot_download(
|
|
63
|
+
repo_id=resolved_model,
|
|
64
|
+
cache_dir=str(cache_dir) if cache_dir else None,
|
|
65
|
+
)
|
|
66
|
+
return resolved_model
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# --- Subprocess state (only used within subprocess worker) ---
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class _SubprocessState:
|
|
74
|
+
"""Container for subprocess-local state. Not shared with main process."""
|
|
75
|
+
|
|
76
|
+
model: Any = None
|
|
77
|
+
processor: Any = None
|
|
78
|
+
device: str | None = None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
_state = _SubprocessState()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# --- Subprocess worker functions (run in isolated process) ---
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _load_model_in_subprocess(
|
|
88
|
+
model_name: str,
|
|
89
|
+
device: str,
|
|
90
|
+
download_root: str | None,
|
|
91
|
+
) -> str:
|
|
92
|
+
"""Load model in subprocess. Returns actual device string."""
|
|
93
|
+
import torch # noqa: PLC0415
|
|
94
|
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor # noqa: PLC0415
|
|
95
|
+
|
|
96
|
+
set_process_title("whisper-transformers")
|
|
97
|
+
|
|
98
|
+
# Resolve device
|
|
99
|
+
if device == "auto":
|
|
100
|
+
if torch.cuda.is_available():
|
|
101
|
+
device = "cuda"
|
|
102
|
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
103
|
+
device = "mps"
|
|
104
|
+
else:
|
|
105
|
+
device = "cpu"
|
|
106
|
+
|
|
107
|
+
_state.processor = AutoProcessor.from_pretrained(
|
|
108
|
+
model_name,
|
|
109
|
+
cache_dir=download_root,
|
|
110
|
+
)
|
|
111
|
+
dtype = torch.float16 if device != "cpu" else torch.float32
|
|
112
|
+
_state.model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
113
|
+
model_name,
|
|
114
|
+
cache_dir=download_root,
|
|
115
|
+
torch_dtype=dtype,
|
|
116
|
+
low_cpu_mem_usage=True,
|
|
117
|
+
)
|
|
118
|
+
_state.model.to(device)
|
|
119
|
+
_state.model.eval()
|
|
120
|
+
_state.device = device
|
|
121
|
+
|
|
122
|
+
return device
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _transcribe_in_subprocess(kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
126
|
+
"""Run transcription in subprocess. Reuses model from _state."""
|
|
127
|
+
import torch # noqa: PLC0415
|
|
128
|
+
|
|
129
|
+
if _state.model is None or _state.processor is None:
|
|
130
|
+
msg = "Model not loaded in subprocess. Call _load_model_in_subprocess first."
|
|
131
|
+
raise RuntimeError(msg)
|
|
132
|
+
|
|
133
|
+
# Parse WAV and extract audio
|
|
134
|
+
with wave.open(kwargs.pop("wav_path"), "rb") as wav_file:
|
|
135
|
+
sample_rate = wav_file.getframerate()
|
|
136
|
+
audio_bytes = wav_file.readframes(wav_file.getnframes())
|
|
137
|
+
duration = wav_file.getnframes() / sample_rate
|
|
138
|
+
|
|
139
|
+
# Convert to float tensor (copy buffer to avoid non-writable tensor warning)
|
|
140
|
+
import numpy as np # noqa: PLC0415
|
|
141
|
+
|
|
142
|
+
audio_array = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
|
143
|
+
audio_tensor = torch.from_numpy(audio_array)
|
|
144
|
+
|
|
145
|
+
# Process audio
|
|
146
|
+
inputs = _state.processor(
|
|
147
|
+
audio_tensor,
|
|
148
|
+
sampling_rate=sample_rate,
|
|
149
|
+
return_tensors="pt",
|
|
150
|
+
)
|
|
151
|
+
inputs = {k: v.to(_state.device) for k, v in inputs.items()}
|
|
152
|
+
|
|
153
|
+
language = kwargs.get("language")
|
|
154
|
+
task = kwargs.get("task", "transcribe")
|
|
155
|
+
initial_prompt = kwargs.get("initial_prompt")
|
|
156
|
+
|
|
157
|
+
# Build generate arguments - use language/task directly instead of deprecated forced_decoder_ids
|
|
158
|
+
generate_args: dict[str, Any] = {
|
|
159
|
+
**inputs,
|
|
160
|
+
"num_beams": kwargs.get("beam_size", 5),
|
|
161
|
+
"task": task,
|
|
162
|
+
"return_timestamps": False,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
# Add attention_mask if available (avoids warning about pad token)
|
|
166
|
+
if "attention_mask" not in generate_args:
|
|
167
|
+
generate_args["attention_mask"] = inputs.get(
|
|
168
|
+
"attention_mask",
|
|
169
|
+
torch.ones_like(inputs["input_features"][:, 0, :]),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if language:
|
|
173
|
+
generate_args["language"] = language
|
|
174
|
+
|
|
175
|
+
if initial_prompt:
|
|
176
|
+
prompt_ids = (
|
|
177
|
+
_state.processor.tokenizer(
|
|
178
|
+
initial_prompt,
|
|
179
|
+
return_tensors="pt",
|
|
180
|
+
add_special_tokens=False,
|
|
181
|
+
)
|
|
182
|
+
.input_ids[0]
|
|
183
|
+
.to(_state.device)
|
|
184
|
+
)
|
|
185
|
+
generate_args["prompt_ids"] = prompt_ids
|
|
186
|
+
|
|
187
|
+
with torch.no_grad():
|
|
188
|
+
generated_ids = _state.model.generate(**generate_args)
|
|
189
|
+
text = _state.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
"text": text.strip(),
|
|
193
|
+
"language": language or "en",
|
|
194
|
+
"language_probability": 1.0 if language else 0.95,
|
|
195
|
+
"duration": duration,
|
|
196
|
+
"segments": [],
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class TransformersWhisperBackend:
|
|
201
|
+
"""Whisper backend using HuggingFace transformers.
|
|
202
|
+
|
|
203
|
+
Supports loading models from safetensors format.
|
|
204
|
+
Uses subprocess isolation for memory management.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
def __init__(self, config: BackendConfig) -> None:
|
|
208
|
+
"""Initialize the backend."""
|
|
209
|
+
self._config = config
|
|
210
|
+
self._resolved_model = _resolve_model_name(config.model_name)
|
|
211
|
+
self._executor: ProcessPoolExecutor | None = None
|
|
212
|
+
self._device: str | None = None
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def is_loaded(self) -> bool:
|
|
216
|
+
"""Check if the model is loaded."""
|
|
217
|
+
return self._executor is not None
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def device(self) -> str | None:
|
|
221
|
+
"""Get the device the model is on."""
|
|
222
|
+
return self._device
|
|
223
|
+
|
|
224
|
+
async def load(self) -> float:
|
|
225
|
+
"""Start subprocess and load model."""
|
|
226
|
+
logger.debug(
|
|
227
|
+
"Starting transformers subprocess for model %s (resolved: %s, device=%s)",
|
|
228
|
+
self._config.model_name,
|
|
229
|
+
self._resolved_model,
|
|
230
|
+
self._config.device,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
start_time = time.time()
|
|
234
|
+
|
|
235
|
+
ctx = get_context("spawn")
|
|
236
|
+
self._executor = ProcessPoolExecutor(max_workers=1, mp_context=ctx)
|
|
237
|
+
|
|
238
|
+
download_root = str(self._config.cache_dir) if self._config.cache_dir else None
|
|
239
|
+
loop = asyncio.get_running_loop()
|
|
240
|
+
self._device = await loop.run_in_executor(
|
|
241
|
+
self._executor,
|
|
242
|
+
_load_model_in_subprocess,
|
|
243
|
+
self._resolved_model,
|
|
244
|
+
self._config.device,
|
|
245
|
+
download_root,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
load_duration = time.time() - start_time
|
|
249
|
+
logger.info(
|
|
250
|
+
"Model %s loaded on %s in %.2fs",
|
|
251
|
+
self._config.model_name,
|
|
252
|
+
self._device,
|
|
253
|
+
load_duration,
|
|
254
|
+
)
|
|
255
|
+
return load_duration
|
|
256
|
+
|
|
257
|
+
async def unload(self) -> None:
|
|
258
|
+
"""Shutdown subprocess, releasing ALL memory."""
|
|
259
|
+
if self._executor is None:
|
|
260
|
+
return
|
|
261
|
+
logger.debug(
|
|
262
|
+
"Shutting down transformers subprocess for model %s",
|
|
263
|
+
self._config.model_name,
|
|
264
|
+
)
|
|
265
|
+
self._executor.shutdown(wait=False, cancel_futures=True)
|
|
266
|
+
self._executor = None
|
|
267
|
+
self._device = None
|
|
268
|
+
logger.info("Model %s unloaded (subprocess terminated)", self._config.model_name)
|
|
269
|
+
|
|
270
|
+
async def transcribe(
|
|
271
|
+
self,
|
|
272
|
+
audio: bytes,
|
|
273
|
+
*,
|
|
274
|
+
source_filename: str | None = None, # noqa: ARG002
|
|
275
|
+
language: str | None = None,
|
|
276
|
+
task: Literal["transcribe", "translate"] = "transcribe",
|
|
277
|
+
initial_prompt: str | None = None,
|
|
278
|
+
temperature: float = 0.0, # noqa: ARG002 - not used by transformers
|
|
279
|
+
vad_filter: bool = True, # noqa: ARG002 - not supported
|
|
280
|
+
word_timestamps: bool = False, # noqa: ARG002 - not supported
|
|
281
|
+
) -> TranscriptionResult:
|
|
282
|
+
"""Transcribe audio using transformers in subprocess."""
|
|
283
|
+
if self._executor is None:
|
|
284
|
+
msg = "Model not loaded. Call load() first."
|
|
285
|
+
raise RuntimeError(msg)
|
|
286
|
+
|
|
287
|
+
# Write audio to temp file for wave parsing in subprocess
|
|
288
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
289
|
+
tmp.write(audio)
|
|
290
|
+
tmp_path = tmp.name
|
|
291
|
+
|
|
292
|
+
kwargs: dict[str, Any] = {
|
|
293
|
+
"wav_path": tmp_path,
|
|
294
|
+
"language": language,
|
|
295
|
+
"task": task,
|
|
296
|
+
"initial_prompt": initial_prompt,
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
loop = asyncio.get_running_loop()
|
|
301
|
+
result = await loop.run_in_executor(
|
|
302
|
+
self._executor,
|
|
303
|
+
_transcribe_in_subprocess,
|
|
304
|
+
kwargs,
|
|
305
|
+
)
|
|
306
|
+
finally:
|
|
307
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
308
|
+
|
|
309
|
+
return TranscriptionResult(
|
|
310
|
+
text=result["text"],
|
|
311
|
+
language=result["language"],
|
|
312
|
+
language_probability=result["language_probability"],
|
|
313
|
+
duration=result["duration"],
|
|
314
|
+
segments=result["segments"],
|
|
315
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.75.0
|
|
4
4
|
Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
|
|
5
5
|
Project-URL: Homepage, https://github.com/basnijholt/agent-cli
|
|
6
6
|
Author-email: Bas Nijholt <bas@nijho.lt>
|
|
@@ -46,8 +46,6 @@ Requires-Dist: soundfile>=0.12.0; extra == 'kokoro'
|
|
|
46
46
|
Requires-Dist: transformers>=4.40.0; extra == 'kokoro'
|
|
47
47
|
Provides-Extra: llm
|
|
48
48
|
Requires-Dist: pydantic-ai-slim[duckduckgo,google,openai,vertexai]>=0.1.1; extra == 'llm'
|
|
49
|
-
Provides-Extra: llm-core
|
|
50
|
-
Requires-Dist: pydantic-ai-slim[google,openai]>=0.1.1; extra == 'llm-core'
|
|
51
49
|
Provides-Extra: memory
|
|
52
50
|
Requires-Dist: chromadb>=0.4.22; extra == 'memory'
|
|
53
51
|
Requires-Dist: fastapi[standard]; extra == 'memory'
|
|
@@ -86,6 +84,10 @@ Requires-Dist: pytest-timeout; extra == 'test'
|
|
|
86
84
|
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
87
85
|
Provides-Extra: vad
|
|
88
86
|
Requires-Dist: silero-vad-lite>=0.2.1; extra == 'vad'
|
|
87
|
+
Provides-Extra: whisper-transformers
|
|
88
|
+
Requires-Dist: fastapi[standard]; extra == 'whisper-transformers'
|
|
89
|
+
Requires-Dist: torch>=2.0.0; extra == 'whisper-transformers'
|
|
90
|
+
Requires-Dist: transformers>=4.30.0; extra == 'whisper-transformers'
|
|
89
91
|
Provides-Extra: wyoming
|
|
90
92
|
Requires-Dist: wyoming>=1.5.2; extra == 'wyoming'
|
|
91
93
|
Description-Content-Type: text/markdown
|
|
@@ -426,7 +428,7 @@ Our installation scripts automatically handle all dependencies:
|
|
|
426
428
|
|---------|---------|-----------------|
|
|
427
429
|
| **[Ollama](https://ollama.ai/)** | Local LLM for text processing | ✅ Yes, with default model |
|
|
428
430
|
| **[Wyoming Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper)** | Speech-to-text | ✅ Yes, via `uvx` |
|
|
429
|
-
| **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[whisper]"` |
|
|
431
|
+
| **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[faster-whisper]"` |
|
|
430
432
|
| **[Wyoming Piper](https://github.com/rhasspy/wyoming-piper)** | Text-to-speech | ✅ Yes, via `uvx` |
|
|
431
433
|
| **[Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI)** | Premium TTS (optional) | ⚙️ Can be added later |
|
|
432
434
|
| **[Wyoming openWakeWord](https://github.com/rhasspy/wyoming-openwakeword)** | Wake word detection | ✅ Yes, for `assistant` |
|
|
@@ -520,6 +522,7 @@ agent-cli install-extras rag memory vad
|
|
|
520
522
|
• kokoro - Kokoro neural TTS engine
|
|
521
523
|
• faster-whisper - Whisper ASR for CUDA/CPU
|
|
522
524
|
• mlx-whisper - Whisper ASR for Apple Silicon
|
|
525
|
+
• whisper-transformers - Whisper ASR via HuggingFace transformers (safetensors)
|
|
523
526
|
• wyoming - Wyoming protocol for ASR/TTS servers
|
|
524
527
|
• server - FastAPI server components
|
|
525
528
|
• speed - Audio speed adjustment
|
|
@@ -536,7 +539,8 @@ agent-cli install-extras rag memory vad
|
|
|
536
539
|
|
|
537
540
|
╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
|
|
538
541
|
│ extras [EXTRAS]... Extras to install: rag, memory, vad, audio, piper, kokoro, │
|
|
539
|
-
│ faster-whisper, mlx-whisper,
|
|
542
|
+
│ faster-whisper, mlx-whisper, whisper-transformers, wyoming, │
|
|
543
|
+
│ server, speed, llm │
|
|
540
544
|
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
541
545
|
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
542
546
|
│ --list -l Show available extras with descriptions (what each one enables) │
|
|
@@ -632,8 +636,8 @@ the `[defaults]` section of your configuration file.
|
|
|
632
636
|
```toml
|
|
633
637
|
[defaults]
|
|
634
638
|
# llm_provider = "ollama" # 'ollama', 'openai', or 'gemini'
|
|
635
|
-
# asr_provider = "wyoming" # 'wyoming' or '
|
|
636
|
-
# tts_provider = "wyoming" # 'wyoming', 'openai', or '
|
|
639
|
+
# asr_provider = "wyoming" # 'wyoming', 'openai', or 'gemini'
|
|
640
|
+
# tts_provider = "wyoming" # 'wyoming', 'openai', 'kokoro', or 'gemini'
|
|
637
641
|
# openai_api_key = "sk-..."
|
|
638
642
|
# gemini_api_key = "..."
|
|
639
643
|
```
|
|
@@ -645,7 +649,7 @@ the `[defaults]` section of your configuration file.
|
|
|
645
649
|
**Workflow:** This is a simple, one-shot command.
|
|
646
650
|
|
|
647
651
|
1. It reads text from your system clipboard (or from a direct argument).
|
|
648
|
-
2. It sends the text to
|
|
652
|
+
2. It sends the text to your configured LLM provider (default: Ollama) with a prompt to perform only technical corrections.
|
|
649
653
|
3. The corrected text is copied back to your clipboard, replacing the original.
|
|
650
654
|
|
|
651
655
|
**How to Use It:** This tool is ideal for integrating with a system-wide hotkey.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
agent_cli/__init__.py,sha256=-bo57j_5TsCug2tGHh7wClAGDhzN239639K40pgVh4g,187
|
|
2
2
|
agent_cli/__main__.py,sha256=2wx_SxA8KRdejM-hBFLN8JTR2rIgtwnDH03MPAbJH5U,106
|
|
3
|
-
agent_cli/_extras.json,sha256=
|
|
3
|
+
agent_cli/_extras.json,sha256=bLtE8Xit-AFL4drPlcaA5jTg5oxGbUV9HAuCd9s-2JE,1200
|
|
4
4
|
agent_cli/_tools.py,sha256=u9Ww-k-sbwFnMTW8sreFGd71nJP6o5hKcM0Zd_D9GZk,12136
|
|
5
5
|
agent_cli/api.py,sha256=FQ_HATc7DaedbEFQ275Z18wV90tkDByD_9x_K0wdSLQ,456
|
|
6
6
|
agent_cli/cli.py,sha256=O3b5Bgv6mjzSIMKikRfeUEg1SSVXhCskLatltbx0ERg,3923
|
|
@@ -8,7 +8,7 @@ agent_cli/config.py,sha256=dgwDV6chrQzGnVZIJ0OOg26jFKLCGIInC4Q9oXcj3rM,15413
|
|
|
8
8
|
agent_cli/config_cmd.py,sha256=PkFY-U09LRIFYrHL_kG4_Ge6DjCWFe3GkO_uiIBMTgI,10359
|
|
9
9
|
agent_cli/constants.py,sha256=-Q17N6qKIGqPDsu3FxpIKP33G0Cs0RUJlHwYNHxVxek,843
|
|
10
10
|
agent_cli/docs_gen.py,sha256=ZX2GYHNumpLhdAEc_4Hy6xeAahAzsEVVnsXUojMYVNY,13885
|
|
11
|
-
agent_cli/example-config.toml,sha256=
|
|
11
|
+
agent_cli/example-config.toml,sha256=upxiTAi8FV5rjrm5IBrnz4YDDC5nXA-DUzYBvWoPHM4,7616
|
|
12
12
|
agent_cli/opts.py,sha256=XgVFv-ip5lkFJNyBGHewCBQc4YaLJUSijIsP1qiqcts,13405
|
|
13
13
|
agent_cli/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
agent_cli/_requirements/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -23,6 +23,7 @@ agent_cli/_requirements/rag.txt,sha256=_MPH-PuDSU90J9EXHTJzN9M34ogmkHhsJ2e-Cimir
|
|
|
23
23
|
agent_cli/_requirements/server.txt,sha256=v29ib07fYE5_lbL00ULOgS13XA5NAOnLq-lExJZ0zbw,3004
|
|
24
24
|
agent_cli/_requirements/speed.txt,sha256=KwBTrZFXWtgwJ5zrcNtm45zfqvNK0trcR1SbV-wDFp4,1464
|
|
25
25
|
agent_cli/_requirements/vad.txt,sha256=Jqw49D5xleqrFyv348GjqRmflewOOEYJklx7b9GbNpY,1359
|
|
26
|
+
agent_cli/_requirements/whisper-transformers.txt,sha256=akxlIsNmHlldeyYJL3_ixCI35oSwWPURv9shiVZlNWs,6097
|
|
26
27
|
agent_cli/_requirements/wyoming.txt,sha256=qsse6lSGYKxnzOYXpS6zIkZ7OG4-E4GH13gfBPuvoY0,1355
|
|
27
28
|
agent_cli/agents/__init__.py,sha256=c1rnncDW5pBvP6BiLzFVpLWDNZzFRaUA7-a97avFVAs,321
|
|
28
29
|
agent_cli/agents/_voice_agent_common.py,sha256=PUAztW84Xf9U7d0C_K5cL7I8OANIE1H6M8dFD_cRqps,4360
|
|
@@ -92,7 +93,7 @@ agent_cli/dev/terminals/warp.py,sha256=j-Jvz_BbWYC3QfLrvl4CbDh03c9OGRFmuCzjyB2ud
|
|
|
92
93
|
agent_cli/dev/terminals/zellij.py,sha256=GnQnopimb9XH67CZGHjnbVWpVSWhaLCATGJizCT5TkY,2321
|
|
93
94
|
agent_cli/install/__init__.py,sha256=JQPrOrtdNd1Y1NmQDkb3Nmm1qdyn3kPjhQwy9D8ryjI,124
|
|
94
95
|
agent_cli/install/common.py,sha256=WvnmcjnFTW0d1HZrKVGzj5Tg3q8Txk_ZOdc4a1MBFWI,3121
|
|
95
|
-
agent_cli/install/extras.py,sha256=
|
|
96
|
+
agent_cli/install/extras.py,sha256=xQ-0A-8X1n6X9ufLG2wPg6UA0Y34M_Zwc27Btc_QKvE,7438
|
|
96
97
|
agent_cli/install/hotkeys.py,sha256=Y7jjtbyjVzIXL1_aczJmOyjL0ud2epbrFbzuWlObqZY,2324
|
|
97
98
|
agent_cli/install/services.py,sha256=mgFjNqvvZ9U2dJ_PcEVWcenlaOtdIZ5a-RyDofDqooY,4209
|
|
98
99
|
agent_cli/memory/__init__.py,sha256=8XNpVzP-qjF8o49A_eXsH_Rbp_FmxTIcknnvxq7vHms,162
|
|
@@ -140,7 +141,7 @@ agent_cli/scripts/setup-macos.sh,sha256=iKWhhONLGDTqYawSDqutnl0mfQomSVPPAsx09-0E
|
|
|
140
141
|
agent_cli/scripts/setup-windows.ps1,sha256=NhyxOuwCjjSw24q2QOqggATos_n06DDbfvMQWuAB3tM,2938
|
|
141
142
|
agent_cli/scripts/start-all-services-windows.ps1,sha256=uOODaPFzniEU7asDgMyf5MEOWcEFsGg_mCLLlDgKoa8,2643
|
|
142
143
|
agent_cli/scripts/start-all-services.sh,sha256=c6pjXoyoQkeF-cYpldeMMo38XxRMmS43FHG5w3ElLxg,7756
|
|
143
|
-
agent_cli/scripts/sync_extras.py,sha256=
|
|
144
|
+
agent_cli/scripts/sync_extras.py,sha256=bxfj88pRN2Uojyd8ubhtlzFCMrXvXmaXlFjzdNLiayw,5179
|
|
144
145
|
agent_cli/scripts/.runtime/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
145
146
|
agent_cli/scripts/linux-hotkeys/README.md,sha256=OW48Xyv096XkUosSJkzED_nnEEncSzhl87FNgEfq8wg,2037
|
|
146
147
|
agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh,sha256=sme-dil3EU4nkdRwxSvARr-hBN9UjrU1IFabLCrvwl8,1251
|
|
@@ -157,7 +158,7 @@ agent_cli/scripts/nvidia-asr-server/server.py,sha256=kPNQIVF3exblvqMtIVk38Y6sZy2
|
|
|
157
158
|
agent_cli/scripts/nvidia-asr-server/shell.nix,sha256=IT20j5YNj_wc7MdXi7ndogGodDNSGwyq8G0bNoZEpmg,1003
|
|
158
159
|
agent_cli/scripts/nvidia-asr-server/uv.lock,sha256=5WWaqWOuV_moMPC-LIZK-A-Y5oaHr1tUn_vbR-IupzY,728608
|
|
159
160
|
agent_cli/server/__init__.py,sha256=NZuJHlLHck9KWrepNZHrJONptYCQI9P-uTqknSFI5Ds,71
|
|
160
|
-
agent_cli/server/cli.py,sha256=
|
|
161
|
+
agent_cli/server/cli.py,sha256=dBg9Iy8BGthxvW_ImYweauQJVKdnqwUkl0EbFvOR-K4,27417
|
|
161
162
|
agent_cli/server/common.py,sha256=hBBp6i-2-yhDY260ffwmFBg_ndcoT5SNcfa6uFyP7Vc,6391
|
|
162
163
|
agent_cli/server/model_manager.py,sha256=93l_eeZeqnPALyDIK24or61tvded9TbM8tnde0okVjY,9225
|
|
163
164
|
agent_cli/server/model_registry.py,sha256=KrRV1XxbFYuXu5rJlHFh6PTl_2BKiWnWsaNrf-0c6wQ,6988
|
|
@@ -179,18 +180,19 @@ agent_cli/server/whisper/languages.py,sha256=Tv3qsIOSQQLxw-v5Wy41jSS6uHG_YBiG-T2
|
|
|
179
180
|
agent_cli/server/whisper/model_manager.py,sha256=LI92mkueu8o8m6AhzlUaaIWygnZucJa295-j7ymx7Ss,4925
|
|
180
181
|
agent_cli/server/whisper/model_registry.py,sha256=qoRkB0ex6aRtUlsUN5BGik-oIZlwJbVHGQKaCbf_yVg,789
|
|
181
182
|
agent_cli/server/whisper/wyoming_handler.py,sha256=HjN565YfDHeVfaGjQfoy9xjCZPx_TvYvjRYgbKn3aOI,6634
|
|
182
|
-
agent_cli/server/whisper/backends/__init__.py,sha256=
|
|
183
|
+
agent_cli/server/whisper/backends/__init__.py,sha256=YzS5g1PAlKi6k00u0iEH_0uRclvO5iVJVw8CkxSk9wk,2581
|
|
183
184
|
agent_cli/server/whisper/backends/base.py,sha256=gQi5EyMCFS464mKXGIKbh1vgtBm99eNkf93SCIYRYg0,2597
|
|
184
185
|
agent_cli/server/whisper/backends/faster_whisper.py,sha256=GN51L-qBjH-YU8ASiu317NrkMKMsK_znXDOTxi90EzU,6966
|
|
185
|
-
agent_cli/server/whisper/backends/mlx.py,sha256=
|
|
186
|
+
agent_cli/server/whisper/backends/mlx.py,sha256=5wHIvGDanA4-D_HUQSDJQS2DeuTg2x59z4hNAx-P0pg,9698
|
|
187
|
+
agent_cli/server/whisper/backends/transformers.py,sha256=4t3gj2AcEqQY3w3ZlzVke10XCVbJk2vPeaGxxNP8tv0,10067
|
|
186
188
|
agent_cli/services/__init__.py,sha256=5FyGCOS2Zpx4e2QWi1ppg9zm0hl8UU4J_id_g8LqGh4,11305
|
|
187
189
|
agent_cli/services/_wyoming_utils.py,sha256=pKPa4fOSdqcG3-kNHJOHHsMnZ1yZJZi6XohVwjAwabo,1971
|
|
188
190
|
agent_cli/services/asr.py,sha256=aRaCLVCygsJ15qyQEPECOZsdSrnlLPbyY4RwAqY0qIw,17258
|
|
189
191
|
agent_cli/services/llm.py,sha256=i01utl1eYWlM13gvW2eR6ErL_ndH-g0d-BSleZra_7k,7229
|
|
190
192
|
agent_cli/services/tts.py,sha256=NX5Qnq7ddLI3mwm3nzhbR3zB1Os4Ip4sSVSjDZDTBcI,14855
|
|
191
193
|
agent_cli/services/wake_word.py,sha256=JFJ1SA22H4yko9DXiQ1t7fcoxeALLAe3iBrLs0z8rX4,4664
|
|
192
|
-
agent_cli-0.
|
|
193
|
-
agent_cli-0.
|
|
194
|
-
agent_cli-0.
|
|
195
|
-
agent_cli-0.
|
|
196
|
-
agent_cli-0.
|
|
194
|
+
agent_cli-0.75.0.dist-info/METADATA,sha256=3xYOYaJno_r35V1RGawNfVhMrgwkxj94LckVUjbJMV0,181041
|
|
195
|
+
agent_cli-0.75.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
196
|
+
agent_cli-0.75.0.dist-info/entry_points.txt,sha256=FUv-fB2atLsPUk_RT4zqnZl1coz4_XHFwRALOKOF38s,97
|
|
197
|
+
agent_cli-0.75.0.dist-info/licenses/LICENSE,sha256=majJU6S9kC8R8bW39NVBHyv32Dq50FL6TDxECG2WVts,1068
|
|
198
|
+
agent_cli-0.75.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|