agent-cli 0.74.0__py3-none-any.whl → 0.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/_extras.json +23 -8
- agent_cli/_requirements/whisper-transformers.txt +256 -0
- agent_cli/example-config.toml +4 -1
- agent_cli/install/extras.py +3 -1
- agent_cli/scripts/sync_extras.py +10 -0
- agent_cli/server/cli.py +45 -25
- agent_cli/server/whisper/backends/__init__.py +8 -1
- agent_cli/server/whisper/backends/mlx.py +10 -0
- agent_cli/server/whisper/backends/transformers.py +315 -0
- {agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/METADATA +12 -6
- {agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/RECORD +14 -12
- {agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/WHEEL +0 -0
- {agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/entry_points.txt +0 -0
- {agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/licenses/LICENSE +0 -0
agent_cli/_extras.json
CHANGED
|
@@ -6,12 +6,16 @@
|
|
|
6
6
|
]
|
|
7
7
|
],
|
|
8
8
|
"faster-whisper": [
|
|
9
|
-
"
|
|
10
|
-
[
|
|
9
|
+
"Whisper ASR via CTranslate2",
|
|
10
|
+
[
|
|
11
|
+
"faster_whisper"
|
|
12
|
+
]
|
|
11
13
|
],
|
|
12
14
|
"kokoro": [
|
|
13
|
-
"
|
|
14
|
-
[
|
|
15
|
+
"Kokoro neural TTS (GPU)",
|
|
16
|
+
[
|
|
17
|
+
"kokoro"
|
|
18
|
+
]
|
|
15
19
|
],
|
|
16
20
|
"llm": [
|
|
17
21
|
"LLM framework (pydantic-ai)",
|
|
@@ -28,12 +32,16 @@
|
|
|
28
32
|
]
|
|
29
33
|
],
|
|
30
34
|
"mlx-whisper": [
|
|
31
|
-
"
|
|
32
|
-
[
|
|
35
|
+
"Whisper ASR for Apple Silicon",
|
|
36
|
+
[
|
|
37
|
+
"mlx_whisper"
|
|
38
|
+
]
|
|
33
39
|
],
|
|
34
40
|
"piper": [
|
|
35
|
-
"
|
|
36
|
-
[
|
|
41
|
+
"Piper TTS (CPU)",
|
|
42
|
+
[
|
|
43
|
+
"piper"
|
|
44
|
+
]
|
|
37
45
|
],
|
|
38
46
|
"rag": [
|
|
39
47
|
"RAG proxy (ChromaDB, embeddings)",
|
|
@@ -60,6 +68,13 @@
|
|
|
60
68
|
"silero_vad"
|
|
61
69
|
]
|
|
62
70
|
],
|
|
71
|
+
"whisper-transformers": [
|
|
72
|
+
"Whisper ASR via HuggingFace transformers",
|
|
73
|
+
[
|
|
74
|
+
"transformers",
|
|
75
|
+
"torch"
|
|
76
|
+
]
|
|
77
|
+
],
|
|
63
78
|
"wyoming": [
|
|
64
79
|
"Wyoming protocol support",
|
|
65
80
|
[
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# This file was autogenerated by uv via the following command:
|
|
2
|
+
# uv export --extra whisper-transformers --no-dev --no-emit-project --no-hashes
|
|
3
|
+
annotated-doc==0.0.4
|
|
4
|
+
# via fastapi
|
|
5
|
+
annotated-types==0.7.0
|
|
6
|
+
# via pydantic
|
|
7
|
+
anyio==4.12.1
|
|
8
|
+
# via
|
|
9
|
+
# httpx
|
|
10
|
+
# starlette
|
|
11
|
+
# watchfiles
|
|
12
|
+
certifi==2026.1.4
|
|
13
|
+
# via
|
|
14
|
+
# httpcore
|
|
15
|
+
# httpx
|
|
16
|
+
# requests
|
|
17
|
+
# sentry-sdk
|
|
18
|
+
charset-normalizer==3.4.4
|
|
19
|
+
# via requests
|
|
20
|
+
click==8.3.1
|
|
21
|
+
# via
|
|
22
|
+
# rich-toolkit
|
|
23
|
+
# typer
|
|
24
|
+
# typer-slim
|
|
25
|
+
# uvicorn
|
|
26
|
+
colorama==0.4.6 ; sys_platform == 'win32'
|
|
27
|
+
# via
|
|
28
|
+
# click
|
|
29
|
+
# tqdm
|
|
30
|
+
# uvicorn
|
|
31
|
+
dnspython==2.8.0
|
|
32
|
+
# via email-validator
|
|
33
|
+
dotenv==0.9.9
|
|
34
|
+
# via agent-cli
|
|
35
|
+
email-validator==2.3.0
|
|
36
|
+
# via
|
|
37
|
+
# fastapi
|
|
38
|
+
# pydantic
|
|
39
|
+
fastapi==0.128.0
|
|
40
|
+
# via agent-cli
|
|
41
|
+
fastapi-cli==0.0.20
|
|
42
|
+
# via fastapi
|
|
43
|
+
fastapi-cloud-cli==0.10.1
|
|
44
|
+
# via fastapi-cli
|
|
45
|
+
fastar==0.8.0
|
|
46
|
+
# via fastapi-cloud-cli
|
|
47
|
+
filelock==3.20.3
|
|
48
|
+
# via
|
|
49
|
+
# huggingface-hub
|
|
50
|
+
# torch
|
|
51
|
+
# transformers
|
|
52
|
+
fsspec==2026.1.0
|
|
53
|
+
# via
|
|
54
|
+
# huggingface-hub
|
|
55
|
+
# torch
|
|
56
|
+
h11==0.16.0
|
|
57
|
+
# via
|
|
58
|
+
# httpcore
|
|
59
|
+
# uvicorn
|
|
60
|
+
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
|
61
|
+
# via huggingface-hub
|
|
62
|
+
httpcore==1.0.9
|
|
63
|
+
# via httpx
|
|
64
|
+
httptools==0.7.1
|
|
65
|
+
# via uvicorn
|
|
66
|
+
httpx==0.28.1
|
|
67
|
+
# via
|
|
68
|
+
# agent-cli
|
|
69
|
+
# fastapi
|
|
70
|
+
# fastapi-cloud-cli
|
|
71
|
+
huggingface-hub==0.36.0
|
|
72
|
+
# via
|
|
73
|
+
# tokenizers
|
|
74
|
+
# transformers
|
|
75
|
+
idna==3.11
|
|
76
|
+
# via
|
|
77
|
+
# anyio
|
|
78
|
+
# email-validator
|
|
79
|
+
# httpx
|
|
80
|
+
# requests
|
|
81
|
+
jinja2==3.1.6
|
|
82
|
+
# via
|
|
83
|
+
# fastapi
|
|
84
|
+
# torch
|
|
85
|
+
markdown-it-py==4.0.0
|
|
86
|
+
# via rich
|
|
87
|
+
markupsafe==3.0.3
|
|
88
|
+
# via jinja2
|
|
89
|
+
mdurl==0.1.2
|
|
90
|
+
# via markdown-it-py
|
|
91
|
+
mpmath==1.3.0
|
|
92
|
+
# via sympy
|
|
93
|
+
networkx==3.6.1
|
|
94
|
+
# via torch
|
|
95
|
+
numpy==2.3.5
|
|
96
|
+
# via transformers
|
|
97
|
+
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
98
|
+
# via
|
|
99
|
+
# nvidia-cudnn-cu12
|
|
100
|
+
# nvidia-cusolver-cu12
|
|
101
|
+
# torch
|
|
102
|
+
nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
103
|
+
# via torch
|
|
104
|
+
nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
105
|
+
# via torch
|
|
106
|
+
nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
107
|
+
# via torch
|
|
108
|
+
nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
109
|
+
# via torch
|
|
110
|
+
nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
111
|
+
# via torch
|
|
112
|
+
nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
113
|
+
# via torch
|
|
114
|
+
nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
115
|
+
# via torch
|
|
116
|
+
nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
117
|
+
# via torch
|
|
118
|
+
nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
119
|
+
# via
|
|
120
|
+
# nvidia-cusolver-cu12
|
|
121
|
+
# torch
|
|
122
|
+
nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
123
|
+
# via torch
|
|
124
|
+
nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
125
|
+
# via torch
|
|
126
|
+
nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
127
|
+
# via
|
|
128
|
+
# nvidia-cufft-cu12
|
|
129
|
+
# nvidia-cusolver-cu12
|
|
130
|
+
# nvidia-cusparse-cu12
|
|
131
|
+
# torch
|
|
132
|
+
nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
133
|
+
# via torch
|
|
134
|
+
nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
135
|
+
# via torch
|
|
136
|
+
packaging==25.0
|
|
137
|
+
# via
|
|
138
|
+
# huggingface-hub
|
|
139
|
+
# transformers
|
|
140
|
+
psutil==7.2.1 ; sys_platform == 'win32'
|
|
141
|
+
# via agent-cli
|
|
142
|
+
pydantic==2.12.5
|
|
143
|
+
# via
|
|
144
|
+
# agent-cli
|
|
145
|
+
# fastapi
|
|
146
|
+
# fastapi-cloud-cli
|
|
147
|
+
# pydantic-extra-types
|
|
148
|
+
# pydantic-settings
|
|
149
|
+
pydantic-core==2.41.5
|
|
150
|
+
# via pydantic
|
|
151
|
+
pydantic-extra-types==2.11.0
|
|
152
|
+
# via fastapi
|
|
153
|
+
pydantic-settings==2.12.0
|
|
154
|
+
# via fastapi
|
|
155
|
+
pygments==2.19.2
|
|
156
|
+
# via rich
|
|
157
|
+
pyperclip==1.11.0
|
|
158
|
+
# via agent-cli
|
|
159
|
+
python-dotenv==1.2.1
|
|
160
|
+
# via
|
|
161
|
+
# dotenv
|
|
162
|
+
# pydantic-settings
|
|
163
|
+
# uvicorn
|
|
164
|
+
python-multipart==0.0.21
|
|
165
|
+
# via fastapi
|
|
166
|
+
pyyaml==6.0.3
|
|
167
|
+
# via
|
|
168
|
+
# huggingface-hub
|
|
169
|
+
# transformers
|
|
170
|
+
# uvicorn
|
|
171
|
+
regex==2026.1.15
|
|
172
|
+
# via transformers
|
|
173
|
+
requests==2.32.5
|
|
174
|
+
# via
|
|
175
|
+
# huggingface-hub
|
|
176
|
+
# transformers
|
|
177
|
+
rich==14.2.0
|
|
178
|
+
# via
|
|
179
|
+
# agent-cli
|
|
180
|
+
# rich-toolkit
|
|
181
|
+
# typer
|
|
182
|
+
# typer-slim
|
|
183
|
+
rich-toolkit==0.17.1
|
|
184
|
+
# via
|
|
185
|
+
# fastapi-cli
|
|
186
|
+
# fastapi-cloud-cli
|
|
187
|
+
rignore==0.7.6
|
|
188
|
+
# via fastapi-cloud-cli
|
|
189
|
+
safetensors==0.7.0
|
|
190
|
+
# via transformers
|
|
191
|
+
sentry-sdk==2.49.0
|
|
192
|
+
# via fastapi-cloud-cli
|
|
193
|
+
setproctitle==1.3.7
|
|
194
|
+
# via agent-cli
|
|
195
|
+
setuptools==80.9.0 ; python_full_version >= '3.12'
|
|
196
|
+
# via torch
|
|
197
|
+
shellingham==1.5.4
|
|
198
|
+
# via
|
|
199
|
+
# typer
|
|
200
|
+
# typer-slim
|
|
201
|
+
starlette==0.50.0
|
|
202
|
+
# via fastapi
|
|
203
|
+
sympy==1.14.0
|
|
204
|
+
# via torch
|
|
205
|
+
tokenizers==0.22.2
|
|
206
|
+
# via transformers
|
|
207
|
+
torch==2.9.1
|
|
208
|
+
# via agent-cli
|
|
209
|
+
tqdm==4.67.1
|
|
210
|
+
# via
|
|
211
|
+
# huggingface-hub
|
|
212
|
+
# transformers
|
|
213
|
+
transformers==4.57.5
|
|
214
|
+
# via agent-cli
|
|
215
|
+
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
|
216
|
+
# via torch
|
|
217
|
+
typer==0.21.1
|
|
218
|
+
# via
|
|
219
|
+
# agent-cli
|
|
220
|
+
# fastapi-cli
|
|
221
|
+
# fastapi-cloud-cli
|
|
222
|
+
typer-slim==0.21.1
|
|
223
|
+
# via agent-cli
|
|
224
|
+
typing-extensions==4.15.0
|
|
225
|
+
# via
|
|
226
|
+
# anyio
|
|
227
|
+
# fastapi
|
|
228
|
+
# huggingface-hub
|
|
229
|
+
# pydantic
|
|
230
|
+
# pydantic-core
|
|
231
|
+
# pydantic-extra-types
|
|
232
|
+
# rich-toolkit
|
|
233
|
+
# starlette
|
|
234
|
+
# torch
|
|
235
|
+
# typer
|
|
236
|
+
# typer-slim
|
|
237
|
+
# typing-inspection
|
|
238
|
+
typing-inspection==0.4.2
|
|
239
|
+
# via
|
|
240
|
+
# pydantic
|
|
241
|
+
# pydantic-settings
|
|
242
|
+
urllib3==2.3.0
|
|
243
|
+
# via
|
|
244
|
+
# requests
|
|
245
|
+
# sentry-sdk
|
|
246
|
+
uvicorn==0.40.0
|
|
247
|
+
# via
|
|
248
|
+
# fastapi
|
|
249
|
+
# fastapi-cli
|
|
250
|
+
# fastapi-cloud-cli
|
|
251
|
+
uvloop==0.22.1 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
|
|
252
|
+
# via uvicorn
|
|
253
|
+
watchfiles==1.1.1
|
|
254
|
+
# via uvicorn
|
|
255
|
+
websockets==15.0.1
|
|
256
|
+
# via uvicorn
|
agent_cli/example-config.toml
CHANGED
|
@@ -13,7 +13,10 @@
|
|
|
13
13
|
[defaults]
|
|
14
14
|
|
|
15
15
|
# --- Provider Selection ---
|
|
16
|
-
# Select the default provider for each service
|
|
16
|
+
# Select the default provider for each service.
|
|
17
|
+
# LLM: "ollama", "openai", or "gemini"
|
|
18
|
+
# ASR: "wyoming", "openai", or "gemini"
|
|
19
|
+
# TTS: "wyoming", "openai", "kokoro", or "gemini"
|
|
17
20
|
llm-provider = "ollama" # "local" still works as a deprecated alias
|
|
18
21
|
tts-provider = "wyoming"
|
|
19
22
|
|
agent_cli/install/extras.py
CHANGED
|
@@ -132,7 +132,8 @@ def install_extras(
|
|
|
132
132
|
list[str] | None,
|
|
133
133
|
typer.Argument(
|
|
134
134
|
help="Extras to install: `rag`, `memory`, `vad`, `audio`, `piper`, `kokoro`, "
|
|
135
|
-
"`faster-whisper`, `mlx-whisper`, `
|
|
135
|
+
"`faster-whisper`, `mlx-whisper`, `whisper-transformers`, `wyoming`, `server`, "
|
|
136
|
+
"`speed`, `llm`",
|
|
136
137
|
),
|
|
137
138
|
] = None,
|
|
138
139
|
list_extras: Annotated[
|
|
@@ -163,6 +164,7 @@ def install_extras(
|
|
|
163
164
|
- `kokoro` - Kokoro neural TTS engine
|
|
164
165
|
- `faster-whisper` - Whisper ASR for CUDA/CPU
|
|
165
166
|
- `mlx-whisper` - Whisper ASR for Apple Silicon
|
|
167
|
+
- `whisper-transformers` - Whisper ASR via HuggingFace transformers (safetensors)
|
|
166
168
|
- `wyoming` - Wyoming protocol for ASR/TTS servers
|
|
167
169
|
- `server` - FastAPI server components
|
|
168
170
|
- `speed` - Audio speed adjustment
|
agent_cli/scripts/sync_extras.py
CHANGED
|
@@ -43,6 +43,16 @@ EXTRA_METADATA: dict[str, tuple[str, list[str]]] = {
|
|
|
43
43
|
"tts-kokoro": ("Kokoro neural TTS", ["kokoro"]),
|
|
44
44
|
"server": ("FastAPI server components", ["fastapi"]),
|
|
45
45
|
"speed": ("Audio speed adjustment (audiostretchy)", ["audiostretchy"]),
|
|
46
|
+
# Whisper backends
|
|
47
|
+
"faster-whisper": ("Whisper ASR via CTranslate2", ["faster_whisper"]),
|
|
48
|
+
"mlx-whisper": ("Whisper ASR for Apple Silicon", ["mlx_whisper"]),
|
|
49
|
+
"whisper-transformers": (
|
|
50
|
+
"Whisper ASR via HuggingFace transformers",
|
|
51
|
+
["transformers", "torch"],
|
|
52
|
+
),
|
|
53
|
+
# TTS backends
|
|
54
|
+
"piper": ("Piper TTS (CPU)", ["piper"]),
|
|
55
|
+
"kokoro": ("Kokoro neural TTS (GPU)", ["kokoro"]),
|
|
46
56
|
}
|
|
47
57
|
|
|
48
58
|
|
agent_cli/server/cli.py
CHANGED
|
@@ -157,36 +157,43 @@ def _download_tts_models(
|
|
|
157
157
|
def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
|
|
158
158
|
"""Check that Whisper dependencies are available."""
|
|
159
159
|
_check_server_deps()
|
|
160
|
-
if
|
|
161
|
-
if not _has("
|
|
160
|
+
if backend == "mlx":
|
|
161
|
+
if not _has("mlx_whisper"):
|
|
162
162
|
err_console.print(
|
|
163
|
-
"[bold red]Error:[/bold red]
|
|
164
|
-
"Run: [cyan]pip install
|
|
165
|
-
"or [cyan]uv sync --extra whisper[/cyan]",
|
|
163
|
+
"[bold red]Error:[/bold red] MLX Whisper backend requires mlx-whisper. "
|
|
164
|
+
"Run: [cyan]pip install mlx-whisper[/cyan]",
|
|
166
165
|
)
|
|
167
166
|
raise typer.Exit(1)
|
|
168
167
|
return
|
|
169
168
|
|
|
170
|
-
if backend == "
|
|
171
|
-
if not _has("
|
|
169
|
+
if backend == "transformers":
|
|
170
|
+
if not _has("transformers") or not _has("torch"):
|
|
172
171
|
err_console.print(
|
|
173
|
-
"[bold red]Error:[/bold red]
|
|
174
|
-
"Run: [cyan]pip install
|
|
172
|
+
"[bold red]Error:[/bold red] Transformers backend requires transformers and torch. "
|
|
173
|
+
"Run: [cyan]pip install agent-cli\\[whisper-transformers][/cyan] "
|
|
174
|
+
"or [cyan]uv sync --extra whisper-transformers[/cyan]",
|
|
175
175
|
)
|
|
176
176
|
raise typer.Exit(1)
|
|
177
177
|
return
|
|
178
178
|
|
|
179
179
|
if not _has("faster_whisper"):
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
180
|
+
if download_only:
|
|
181
|
+
err_console.print(
|
|
182
|
+
"[bold red]Error:[/bold red] faster-whisper is required for --download-only. "
|
|
183
|
+
"Run: [cyan]pip install agent-cli\\[faster-whisper][/cyan] "
|
|
184
|
+
"or [cyan]uv sync --extra faster-whisper[/cyan]",
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
err_console.print(
|
|
188
|
+
"[bold red]Error:[/bold red] Whisper dependencies not installed. "
|
|
189
|
+
"Run: [cyan]pip install agent-cli\\[faster-whisper][/cyan] "
|
|
190
|
+
"or [cyan]uv sync --extra faster-whisper[/cyan]",
|
|
191
|
+
)
|
|
185
192
|
raise typer.Exit(1)
|
|
186
193
|
|
|
187
194
|
|
|
188
195
|
@app.command("whisper")
|
|
189
|
-
@requires_extras("server", "faster-whisper|mlx-whisper", "wyoming")
|
|
196
|
+
@requires_extras("server", "faster-whisper|mlx-whisper|whisper-transformers", "wyoming")
|
|
190
197
|
def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
191
198
|
model: Annotated[
|
|
192
199
|
list[str] | None,
|
|
@@ -299,7 +306,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
299
306
|
"-b",
|
|
300
307
|
help=(
|
|
301
308
|
"Inference backend: `auto` (faster-whisper on CUDA/CPU, MLX on Apple Silicon), "
|
|
302
|
-
"`faster-whisper`, `mlx`"
|
|
309
|
+
"`faster-whisper`, `mlx`, `transformers` (HuggingFace, supports safetensors)"
|
|
303
310
|
),
|
|
304
311
|
),
|
|
305
312
|
] = "auto",
|
|
@@ -331,7 +338,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
331
338
|
# Setup Rich logging for consistent output
|
|
332
339
|
setup_rich_logging(log_level)
|
|
333
340
|
|
|
334
|
-
valid_backends = ("auto", "faster-whisper", "mlx")
|
|
341
|
+
valid_backends = ("auto", "faster-whisper", "mlx", "transformers")
|
|
335
342
|
if backend not in valid_backends:
|
|
336
343
|
err_console.print(
|
|
337
344
|
f"[bold red]Error:[/bold red] --backend must be one of: {', '.join(valid_backends)}",
|
|
@@ -339,7 +346,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
339
346
|
raise typer.Exit(1)
|
|
340
347
|
|
|
341
348
|
resolved_backend = backend
|
|
342
|
-
if backend == "auto"
|
|
349
|
+
if backend == "auto":
|
|
343
350
|
from agent_cli.server.whisper.backends import detect_backend # noqa: PLC0415
|
|
344
351
|
|
|
345
352
|
resolved_backend = detect_backend()
|
|
@@ -370,13 +377,26 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
370
377
|
for model_name in model:
|
|
371
378
|
console.print(f" Downloading [cyan]{model_name}[/cyan]...")
|
|
372
379
|
try:
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
+
if resolved_backend == "transformers":
|
|
381
|
+
from agent_cli.server.whisper.backends.transformers import ( # noqa: PLC0415
|
|
382
|
+
download_model as download_transformers_model,
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
download_transformers_model(model_name, cache_dir=cache_dir)
|
|
386
|
+
elif resolved_backend == "mlx":
|
|
387
|
+
from agent_cli.server.whisper.backends.mlx import ( # noqa: PLC0415
|
|
388
|
+
download_model as download_mlx_model,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
download_mlx_model(model_name)
|
|
392
|
+
else:
|
|
393
|
+
from faster_whisper import WhisperModel # noqa: PLC0415
|
|
394
|
+
|
|
395
|
+
_ = WhisperModel(
|
|
396
|
+
model_name,
|
|
397
|
+
device="cpu", # Don't need GPU for download
|
|
398
|
+
download_root=str(cache_dir) if cache_dir else None,
|
|
399
|
+
)
|
|
380
400
|
console.print(f" [green]✓[/green] Downloaded {model_name}")
|
|
381
401
|
except Exception as e:
|
|
382
402
|
err_console.print(f" [red]✗[/red] Failed to download {model_name}: {e}")
|
|
@@ -17,7 +17,7 @@ from agent_cli.server.whisper.backends.base import (
|
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
-
BackendType = Literal["faster-whisper", "mlx", "auto"]
|
|
20
|
+
BackendType = Literal["faster-whisper", "mlx", "transformers", "auto"]
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def detect_backend() -> Literal["faster-whisper", "mlx"]:
|
|
@@ -76,6 +76,13 @@ def create_backend(
|
|
|
76
76
|
|
|
77
77
|
return FasterWhisperBackend(config)
|
|
78
78
|
|
|
79
|
+
if backend_type == "transformers":
|
|
80
|
+
from agent_cli.server.whisper.backends.transformers import ( # noqa: PLC0415
|
|
81
|
+
TransformersWhisperBackend,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return TransformersWhisperBackend(config)
|
|
85
|
+
|
|
79
86
|
msg = f"Unknown backend type: {backend_type}"
|
|
80
87
|
raise ValueError(msg)
|
|
81
88
|
|
|
@@ -55,6 +55,16 @@ def _resolve_mlx_model_name(model_name: str) -> str:
|
|
|
55
55
|
return model_name
|
|
56
56
|
|
|
57
57
|
|
|
58
|
+
def download_model(model_name: str) -> str:
|
|
59
|
+
"""Download an MLX Whisper model and return the resolved repo name."""
|
|
60
|
+
import mlx.core as mx # noqa: PLC0415
|
|
61
|
+
from mlx_whisper.transcribe import ModelHolder # noqa: PLC0415
|
|
62
|
+
|
|
63
|
+
resolved_model = _resolve_mlx_model_name(model_name)
|
|
64
|
+
ModelHolder.get_model(resolved_model, mx.float16)
|
|
65
|
+
return resolved_model
|
|
66
|
+
|
|
67
|
+
|
|
58
68
|
def _pcm_to_float(audio_bytes: bytes) -> NDArray[np.float32]:
|
|
59
69
|
"""Convert 16-bit PCM audio bytes to float32 array normalized to [-1, 1]."""
|
|
60
70
|
import numpy as np # noqa: PLC0415
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""Transformers Whisper backend for HuggingFace models with safetensors support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import tempfile
|
|
8
|
+
import time
|
|
9
|
+
import wave
|
|
10
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from multiprocessing import get_context
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Literal
|
|
15
|
+
|
|
16
|
+
from agent_cli.core.process import set_process_title
|
|
17
|
+
from agent_cli.server.whisper.backends.base import (
|
|
18
|
+
BackendConfig,
|
|
19
|
+
TranscriptionResult,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Model name mapping: canonical name -> HuggingFace repo
|
|
25
|
+
_MODEL_MAP: dict[str, str] = {
|
|
26
|
+
"tiny": "openai/whisper-tiny",
|
|
27
|
+
"tiny.en": "openai/whisper-tiny.en",
|
|
28
|
+
"base": "openai/whisper-base",
|
|
29
|
+
"base.en": "openai/whisper-base.en",
|
|
30
|
+
"small": "openai/whisper-small",
|
|
31
|
+
"small.en": "openai/whisper-small.en",
|
|
32
|
+
"medium": "openai/whisper-medium",
|
|
33
|
+
"medium.en": "openai/whisper-medium.en",
|
|
34
|
+
"large": "openai/whisper-large",
|
|
35
|
+
"large-v2": "openai/whisper-large-v2",
|
|
36
|
+
"large-v3": "openai/whisper-large-v3",
|
|
37
|
+
"large-v3-turbo": "openai/whisper-large-v3-turbo",
|
|
38
|
+
"turbo": "openai/whisper-large-v3-turbo",
|
|
39
|
+
# Distil variants (smaller, faster)
|
|
40
|
+
"distil-large-v2": "distil-whisper/distil-large-v2",
|
|
41
|
+
"distil-large-v3": "distil-whisper/distil-large-v3",
|
|
42
|
+
"distil-medium.en": "distil-whisper/distil-medium.en",
|
|
43
|
+
"distil-small.en": "distil-whisper/distil-small.en",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _resolve_model_name(model_name: str) -> str:
|
|
48
|
+
"""Resolve a model name to a HuggingFace repo."""
|
|
49
|
+
if "/" in model_name:
|
|
50
|
+
return model_name
|
|
51
|
+
return _MODEL_MAP.get(model_name, f"openai/whisper-{model_name}")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def download_model(model_name: str, cache_dir: Path | None = None) -> str:
|
|
55
|
+
"""Download a Whisper model from the HuggingFace Hub.
|
|
56
|
+
|
|
57
|
+
Returns the resolved repo name.
|
|
58
|
+
"""
|
|
59
|
+
from huggingface_hub import snapshot_download # noqa: PLC0415
|
|
60
|
+
|
|
61
|
+
resolved_model = _resolve_model_name(model_name)
|
|
62
|
+
snapshot_download(
|
|
63
|
+
repo_id=resolved_model,
|
|
64
|
+
cache_dir=str(cache_dir) if cache_dir else None,
|
|
65
|
+
)
|
|
66
|
+
return resolved_model
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# --- Subprocess state (only used within subprocess worker) ---
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class _SubprocessState:
|
|
74
|
+
"""Container for subprocess-local state. Not shared with main process."""
|
|
75
|
+
|
|
76
|
+
model: Any = None
|
|
77
|
+
processor: Any = None
|
|
78
|
+
device: str | None = None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
_state = _SubprocessState()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# --- Subprocess worker functions (run in isolated process) ---
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _load_model_in_subprocess(
|
|
88
|
+
model_name: str,
|
|
89
|
+
device: str,
|
|
90
|
+
download_root: str | None,
|
|
91
|
+
) -> str:
|
|
92
|
+
"""Load model in subprocess. Returns actual device string."""
|
|
93
|
+
import torch # noqa: PLC0415
|
|
94
|
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor # noqa: PLC0415
|
|
95
|
+
|
|
96
|
+
set_process_title("whisper-transformers")
|
|
97
|
+
|
|
98
|
+
# Resolve device
|
|
99
|
+
if device == "auto":
|
|
100
|
+
if torch.cuda.is_available():
|
|
101
|
+
device = "cuda"
|
|
102
|
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
103
|
+
device = "mps"
|
|
104
|
+
else:
|
|
105
|
+
device = "cpu"
|
|
106
|
+
|
|
107
|
+
_state.processor = AutoProcessor.from_pretrained(
|
|
108
|
+
model_name,
|
|
109
|
+
cache_dir=download_root,
|
|
110
|
+
)
|
|
111
|
+
dtype = torch.float16 if device != "cpu" else torch.float32
|
|
112
|
+
_state.model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
113
|
+
model_name,
|
|
114
|
+
cache_dir=download_root,
|
|
115
|
+
torch_dtype=dtype,
|
|
116
|
+
low_cpu_mem_usage=True,
|
|
117
|
+
)
|
|
118
|
+
_state.model.to(device)
|
|
119
|
+
_state.model.eval()
|
|
120
|
+
_state.device = device
|
|
121
|
+
|
|
122
|
+
return device
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _transcribe_in_subprocess(kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
126
|
+
"""Run transcription in subprocess. Reuses model from _state."""
|
|
127
|
+
import torch # noqa: PLC0415
|
|
128
|
+
|
|
129
|
+
if _state.model is None or _state.processor is None:
|
|
130
|
+
msg = "Model not loaded in subprocess. Call _load_model_in_subprocess first."
|
|
131
|
+
raise RuntimeError(msg)
|
|
132
|
+
|
|
133
|
+
# Parse WAV and extract audio
|
|
134
|
+
with wave.open(kwargs.pop("wav_path"), "rb") as wav_file:
|
|
135
|
+
sample_rate = wav_file.getframerate()
|
|
136
|
+
audio_bytes = wav_file.readframes(wav_file.getnframes())
|
|
137
|
+
duration = wav_file.getnframes() / sample_rate
|
|
138
|
+
|
|
139
|
+
# Convert to float tensor (copy buffer to avoid non-writable tensor warning)
|
|
140
|
+
import numpy as np # noqa: PLC0415
|
|
141
|
+
|
|
142
|
+
audio_array = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
|
143
|
+
audio_tensor = torch.from_numpy(audio_array)
|
|
144
|
+
|
|
145
|
+
# Process audio
|
|
146
|
+
inputs = _state.processor(
|
|
147
|
+
audio_tensor,
|
|
148
|
+
sampling_rate=sample_rate,
|
|
149
|
+
return_tensors="pt",
|
|
150
|
+
)
|
|
151
|
+
inputs = {k: v.to(_state.device) for k, v in inputs.items()}
|
|
152
|
+
|
|
153
|
+
language = kwargs.get("language")
|
|
154
|
+
task = kwargs.get("task", "transcribe")
|
|
155
|
+
initial_prompt = kwargs.get("initial_prompt")
|
|
156
|
+
|
|
157
|
+
# Build generate arguments - use language/task directly instead of deprecated forced_decoder_ids
|
|
158
|
+
generate_args: dict[str, Any] = {
|
|
159
|
+
**inputs,
|
|
160
|
+
"num_beams": kwargs.get("beam_size", 5),
|
|
161
|
+
"task": task,
|
|
162
|
+
"return_timestamps": False,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
# Add attention_mask if available (avoids warning about pad token)
|
|
166
|
+
if "attention_mask" not in generate_args:
|
|
167
|
+
generate_args["attention_mask"] = inputs.get(
|
|
168
|
+
"attention_mask",
|
|
169
|
+
torch.ones_like(inputs["input_features"][:, 0, :]),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if language:
|
|
173
|
+
generate_args["language"] = language
|
|
174
|
+
|
|
175
|
+
if initial_prompt:
|
|
176
|
+
prompt_ids = (
|
|
177
|
+
_state.processor.tokenizer(
|
|
178
|
+
initial_prompt,
|
|
179
|
+
return_tensors="pt",
|
|
180
|
+
add_special_tokens=False,
|
|
181
|
+
)
|
|
182
|
+
.input_ids[0]
|
|
183
|
+
.to(_state.device)
|
|
184
|
+
)
|
|
185
|
+
generate_args["prompt_ids"] = prompt_ids
|
|
186
|
+
|
|
187
|
+
with torch.no_grad():
|
|
188
|
+
generated_ids = _state.model.generate(**generate_args)
|
|
189
|
+
text = _state.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
"text": text.strip(),
|
|
193
|
+
"language": language or "en",
|
|
194
|
+
"language_probability": 1.0 if language else 0.95,
|
|
195
|
+
"duration": duration,
|
|
196
|
+
"segments": [],
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class TransformersWhisperBackend:
|
|
201
|
+
"""Whisper backend using HuggingFace transformers.
|
|
202
|
+
|
|
203
|
+
Supports loading models from safetensors format.
|
|
204
|
+
Uses subprocess isolation for memory management.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
def __init__(self, config: BackendConfig) -> None:
|
|
208
|
+
"""Initialize the backend."""
|
|
209
|
+
self._config = config
|
|
210
|
+
self._resolved_model = _resolve_model_name(config.model_name)
|
|
211
|
+
self._executor: ProcessPoolExecutor | None = None
|
|
212
|
+
self._device: str | None = None
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def is_loaded(self) -> bool:
|
|
216
|
+
"""Check if the model is loaded."""
|
|
217
|
+
return self._executor is not None
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def device(self) -> str | None:
|
|
221
|
+
"""Get the device the model is on."""
|
|
222
|
+
return self._device
|
|
223
|
+
|
|
224
|
+
async def load(self) -> float:
|
|
225
|
+
"""Start subprocess and load model."""
|
|
226
|
+
logger.debug(
|
|
227
|
+
"Starting transformers subprocess for model %s (resolved: %s, device=%s)",
|
|
228
|
+
self._config.model_name,
|
|
229
|
+
self._resolved_model,
|
|
230
|
+
self._config.device,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
start_time = time.time()
|
|
234
|
+
|
|
235
|
+
ctx = get_context("spawn")
|
|
236
|
+
self._executor = ProcessPoolExecutor(max_workers=1, mp_context=ctx)
|
|
237
|
+
|
|
238
|
+
download_root = str(self._config.cache_dir) if self._config.cache_dir else None
|
|
239
|
+
loop = asyncio.get_running_loop()
|
|
240
|
+
self._device = await loop.run_in_executor(
|
|
241
|
+
self._executor,
|
|
242
|
+
_load_model_in_subprocess,
|
|
243
|
+
self._resolved_model,
|
|
244
|
+
self._config.device,
|
|
245
|
+
download_root,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
load_duration = time.time() - start_time
|
|
249
|
+
logger.info(
|
|
250
|
+
"Model %s loaded on %s in %.2fs",
|
|
251
|
+
self._config.model_name,
|
|
252
|
+
self._device,
|
|
253
|
+
load_duration,
|
|
254
|
+
)
|
|
255
|
+
return load_duration
|
|
256
|
+
|
|
257
|
+
async def unload(self) -> None:
|
|
258
|
+
"""Shutdown subprocess, releasing ALL memory."""
|
|
259
|
+
if self._executor is None:
|
|
260
|
+
return
|
|
261
|
+
logger.debug(
|
|
262
|
+
"Shutting down transformers subprocess for model %s",
|
|
263
|
+
self._config.model_name,
|
|
264
|
+
)
|
|
265
|
+
self._executor.shutdown(wait=False, cancel_futures=True)
|
|
266
|
+
self._executor = None
|
|
267
|
+
self._device = None
|
|
268
|
+
logger.info("Model %s unloaded (subprocess terminated)", self._config.model_name)
|
|
269
|
+
|
|
270
|
+
async def transcribe(
|
|
271
|
+
self,
|
|
272
|
+
audio: bytes,
|
|
273
|
+
*,
|
|
274
|
+
source_filename: str | None = None, # noqa: ARG002
|
|
275
|
+
language: str | None = None,
|
|
276
|
+
task: Literal["transcribe", "translate"] = "transcribe",
|
|
277
|
+
initial_prompt: str | None = None,
|
|
278
|
+
temperature: float = 0.0, # noqa: ARG002 - not used by transformers
|
|
279
|
+
vad_filter: bool = True, # noqa: ARG002 - not supported
|
|
280
|
+
word_timestamps: bool = False, # noqa: ARG002 - not supported
|
|
281
|
+
) -> TranscriptionResult:
|
|
282
|
+
"""Transcribe audio using transformers in subprocess."""
|
|
283
|
+
if self._executor is None:
|
|
284
|
+
msg = "Model not loaded. Call load() first."
|
|
285
|
+
raise RuntimeError(msg)
|
|
286
|
+
|
|
287
|
+
# Write audio to temp file for wave parsing in subprocess
|
|
288
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
289
|
+
tmp.write(audio)
|
|
290
|
+
tmp_path = tmp.name
|
|
291
|
+
|
|
292
|
+
kwargs: dict[str, Any] = {
|
|
293
|
+
"wav_path": tmp_path,
|
|
294
|
+
"language": language,
|
|
295
|
+
"task": task,
|
|
296
|
+
"initial_prompt": initial_prompt,
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
loop = asyncio.get_running_loop()
|
|
301
|
+
result = await loop.run_in_executor(
|
|
302
|
+
self._executor,
|
|
303
|
+
_transcribe_in_subprocess,
|
|
304
|
+
kwargs,
|
|
305
|
+
)
|
|
306
|
+
finally:
|
|
307
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
308
|
+
|
|
309
|
+
return TranscriptionResult(
|
|
310
|
+
text=result["text"],
|
|
311
|
+
language=result["language"],
|
|
312
|
+
language_probability=result["language_probability"],
|
|
313
|
+
duration=result["duration"],
|
|
314
|
+
segments=result["segments"],
|
|
315
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.75.0
|
|
4
4
|
Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
|
|
5
5
|
Project-URL: Homepage, https://github.com/basnijholt/agent-cli
|
|
6
6
|
Author-email: Bas Nijholt <bas@nijho.lt>
|
|
@@ -84,6 +84,10 @@ Requires-Dist: pytest-timeout; extra == 'test'
|
|
|
84
84
|
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
85
85
|
Provides-Extra: vad
|
|
86
86
|
Requires-Dist: silero-vad-lite>=0.2.1; extra == 'vad'
|
|
87
|
+
Provides-Extra: whisper-transformers
|
|
88
|
+
Requires-Dist: fastapi[standard]; extra == 'whisper-transformers'
|
|
89
|
+
Requires-Dist: torch>=2.0.0; extra == 'whisper-transformers'
|
|
90
|
+
Requires-Dist: transformers>=4.30.0; extra == 'whisper-transformers'
|
|
87
91
|
Provides-Extra: wyoming
|
|
88
92
|
Requires-Dist: wyoming>=1.5.2; extra == 'wyoming'
|
|
89
93
|
Description-Content-Type: text/markdown
|
|
@@ -424,7 +428,7 @@ Our installation scripts automatically handle all dependencies:
|
|
|
424
428
|
|---------|---------|-----------------|
|
|
425
429
|
| **[Ollama](https://ollama.ai/)** | Local LLM for text processing | ✅ Yes, with default model |
|
|
426
430
|
| **[Wyoming Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper)** | Speech-to-text | ✅ Yes, via `uvx` |
|
|
427
|
-
| **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[whisper]"` |
|
|
431
|
+
| **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[faster-whisper]"` |
|
|
428
432
|
| **[Wyoming Piper](https://github.com/rhasspy/wyoming-piper)** | Text-to-speech | ✅ Yes, via `uvx` |
|
|
429
433
|
| **[Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI)** | Premium TTS (optional) | ⚙️ Can be added later |
|
|
430
434
|
| **[Wyoming openWakeWord](https://github.com/rhasspy/wyoming-openwakeword)** | Wake word detection | ✅ Yes, for `assistant` |
|
|
@@ -518,6 +522,7 @@ agent-cli install-extras rag memory vad
|
|
|
518
522
|
• kokoro - Kokoro neural TTS engine
|
|
519
523
|
• faster-whisper - Whisper ASR for CUDA/CPU
|
|
520
524
|
• mlx-whisper - Whisper ASR for Apple Silicon
|
|
525
|
+
• whisper-transformers - Whisper ASR via HuggingFace transformers (safetensors)
|
|
521
526
|
• wyoming - Wyoming protocol for ASR/TTS servers
|
|
522
527
|
• server - FastAPI server components
|
|
523
528
|
• speed - Audio speed adjustment
|
|
@@ -534,7 +539,8 @@ agent-cli install-extras rag memory vad
|
|
|
534
539
|
|
|
535
540
|
╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
|
|
536
541
|
│ extras [EXTRAS]... Extras to install: rag, memory, vad, audio, piper, kokoro, │
|
|
537
|
-
│ faster-whisper, mlx-whisper,
|
|
542
|
+
│ faster-whisper, mlx-whisper, whisper-transformers, wyoming, │
|
|
543
|
+
│ server, speed, llm │
|
|
538
544
|
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
539
545
|
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
540
546
|
│ --list -l Show available extras with descriptions (what each one enables) │
|
|
@@ -630,8 +636,8 @@ the `[defaults]` section of your configuration file.
|
|
|
630
636
|
```toml
|
|
631
637
|
[defaults]
|
|
632
638
|
# llm_provider = "ollama" # 'ollama', 'openai', or 'gemini'
|
|
633
|
-
# asr_provider = "wyoming" # 'wyoming' or '
|
|
634
|
-
# tts_provider = "wyoming" # 'wyoming', 'openai', or '
|
|
639
|
+
# asr_provider = "wyoming" # 'wyoming', 'openai', or 'gemini'
|
|
640
|
+
# tts_provider = "wyoming" # 'wyoming', 'openai', 'kokoro', or 'gemini'
|
|
635
641
|
# openai_api_key = "sk-..."
|
|
636
642
|
# gemini_api_key = "..."
|
|
637
643
|
```
|
|
@@ -643,7 +649,7 @@ the `[defaults]` section of your configuration file.
|
|
|
643
649
|
**Workflow:** This is a simple, one-shot command.
|
|
644
650
|
|
|
645
651
|
1. It reads text from your system clipboard (or from a direct argument).
|
|
646
|
-
2. It sends the text to
|
|
652
|
+
2. It sends the text to your configured LLM provider (default: Ollama) with a prompt to perform only technical corrections.
|
|
647
653
|
3. The corrected text is copied back to your clipboard, replacing the original.
|
|
648
654
|
|
|
649
655
|
**How to Use It:** This tool is ideal for integrating with a system-wide hotkey.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
agent_cli/__init__.py,sha256=-bo57j_5TsCug2tGHh7wClAGDhzN239639K40pgVh4g,187
|
|
2
2
|
agent_cli/__main__.py,sha256=2wx_SxA8KRdejM-hBFLN8JTR2rIgtwnDH03MPAbJH5U,106
|
|
3
|
-
agent_cli/_extras.json,sha256=
|
|
3
|
+
agent_cli/_extras.json,sha256=bLtE8Xit-AFL4drPlcaA5jTg5oxGbUV9HAuCd9s-2JE,1200
|
|
4
4
|
agent_cli/_tools.py,sha256=u9Ww-k-sbwFnMTW8sreFGd71nJP6o5hKcM0Zd_D9GZk,12136
|
|
5
5
|
agent_cli/api.py,sha256=FQ_HATc7DaedbEFQ275Z18wV90tkDByD_9x_K0wdSLQ,456
|
|
6
6
|
agent_cli/cli.py,sha256=O3b5Bgv6mjzSIMKikRfeUEg1SSVXhCskLatltbx0ERg,3923
|
|
@@ -8,7 +8,7 @@ agent_cli/config.py,sha256=dgwDV6chrQzGnVZIJ0OOg26jFKLCGIInC4Q9oXcj3rM,15413
|
|
|
8
8
|
agent_cli/config_cmd.py,sha256=PkFY-U09LRIFYrHL_kG4_Ge6DjCWFe3GkO_uiIBMTgI,10359
|
|
9
9
|
agent_cli/constants.py,sha256=-Q17N6qKIGqPDsu3FxpIKP33G0Cs0RUJlHwYNHxVxek,843
|
|
10
10
|
agent_cli/docs_gen.py,sha256=ZX2GYHNumpLhdAEc_4Hy6xeAahAzsEVVnsXUojMYVNY,13885
|
|
11
|
-
agent_cli/example-config.toml,sha256=
|
|
11
|
+
agent_cli/example-config.toml,sha256=upxiTAi8FV5rjrm5IBrnz4YDDC5nXA-DUzYBvWoPHM4,7616
|
|
12
12
|
agent_cli/opts.py,sha256=XgVFv-ip5lkFJNyBGHewCBQc4YaLJUSijIsP1qiqcts,13405
|
|
13
13
|
agent_cli/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
agent_cli/_requirements/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -23,6 +23,7 @@ agent_cli/_requirements/rag.txt,sha256=_MPH-PuDSU90J9EXHTJzN9M34ogmkHhsJ2e-Cimir
|
|
|
23
23
|
agent_cli/_requirements/server.txt,sha256=v29ib07fYE5_lbL00ULOgS13XA5NAOnLq-lExJZ0zbw,3004
|
|
24
24
|
agent_cli/_requirements/speed.txt,sha256=KwBTrZFXWtgwJ5zrcNtm45zfqvNK0trcR1SbV-wDFp4,1464
|
|
25
25
|
agent_cli/_requirements/vad.txt,sha256=Jqw49D5xleqrFyv348GjqRmflewOOEYJklx7b9GbNpY,1359
|
|
26
|
+
agent_cli/_requirements/whisper-transformers.txt,sha256=akxlIsNmHlldeyYJL3_ixCI35oSwWPURv9shiVZlNWs,6097
|
|
26
27
|
agent_cli/_requirements/wyoming.txt,sha256=qsse6lSGYKxnzOYXpS6zIkZ7OG4-E4GH13gfBPuvoY0,1355
|
|
27
28
|
agent_cli/agents/__init__.py,sha256=c1rnncDW5pBvP6BiLzFVpLWDNZzFRaUA7-a97avFVAs,321
|
|
28
29
|
agent_cli/agents/_voice_agent_common.py,sha256=PUAztW84Xf9U7d0C_K5cL7I8OANIE1H6M8dFD_cRqps,4360
|
|
@@ -92,7 +93,7 @@ agent_cli/dev/terminals/warp.py,sha256=j-Jvz_BbWYC3QfLrvl4CbDh03c9OGRFmuCzjyB2ud
|
|
|
92
93
|
agent_cli/dev/terminals/zellij.py,sha256=GnQnopimb9XH67CZGHjnbVWpVSWhaLCATGJizCT5TkY,2321
|
|
93
94
|
agent_cli/install/__init__.py,sha256=JQPrOrtdNd1Y1NmQDkb3Nmm1qdyn3kPjhQwy9D8ryjI,124
|
|
94
95
|
agent_cli/install/common.py,sha256=WvnmcjnFTW0d1HZrKVGzj5Tg3q8Txk_ZOdc4a1MBFWI,3121
|
|
95
|
-
agent_cli/install/extras.py,sha256=
|
|
96
|
+
agent_cli/install/extras.py,sha256=xQ-0A-8X1n6X9ufLG2wPg6UA0Y34M_Zwc27Btc_QKvE,7438
|
|
96
97
|
agent_cli/install/hotkeys.py,sha256=Y7jjtbyjVzIXL1_aczJmOyjL0ud2epbrFbzuWlObqZY,2324
|
|
97
98
|
agent_cli/install/services.py,sha256=mgFjNqvvZ9U2dJ_PcEVWcenlaOtdIZ5a-RyDofDqooY,4209
|
|
98
99
|
agent_cli/memory/__init__.py,sha256=8XNpVzP-qjF8o49A_eXsH_Rbp_FmxTIcknnvxq7vHms,162
|
|
@@ -140,7 +141,7 @@ agent_cli/scripts/setup-macos.sh,sha256=iKWhhONLGDTqYawSDqutnl0mfQomSVPPAsx09-0E
|
|
|
140
141
|
agent_cli/scripts/setup-windows.ps1,sha256=NhyxOuwCjjSw24q2QOqggATos_n06DDbfvMQWuAB3tM,2938
|
|
141
142
|
agent_cli/scripts/start-all-services-windows.ps1,sha256=uOODaPFzniEU7asDgMyf5MEOWcEFsGg_mCLLlDgKoa8,2643
|
|
142
143
|
agent_cli/scripts/start-all-services.sh,sha256=c6pjXoyoQkeF-cYpldeMMo38XxRMmS43FHG5w3ElLxg,7756
|
|
143
|
-
agent_cli/scripts/sync_extras.py,sha256=
|
|
144
|
+
agent_cli/scripts/sync_extras.py,sha256=bxfj88pRN2Uojyd8ubhtlzFCMrXvXmaXlFjzdNLiayw,5179
|
|
144
145
|
agent_cli/scripts/.runtime/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
145
146
|
agent_cli/scripts/linux-hotkeys/README.md,sha256=OW48Xyv096XkUosSJkzED_nnEEncSzhl87FNgEfq8wg,2037
|
|
146
147
|
agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh,sha256=sme-dil3EU4nkdRwxSvARr-hBN9UjrU1IFabLCrvwl8,1251
|
|
@@ -157,7 +158,7 @@ agent_cli/scripts/nvidia-asr-server/server.py,sha256=kPNQIVF3exblvqMtIVk38Y6sZy2
|
|
|
157
158
|
agent_cli/scripts/nvidia-asr-server/shell.nix,sha256=IT20j5YNj_wc7MdXi7ndogGodDNSGwyq8G0bNoZEpmg,1003
|
|
158
159
|
agent_cli/scripts/nvidia-asr-server/uv.lock,sha256=5WWaqWOuV_moMPC-LIZK-A-Y5oaHr1tUn_vbR-IupzY,728608
|
|
159
160
|
agent_cli/server/__init__.py,sha256=NZuJHlLHck9KWrepNZHrJONptYCQI9P-uTqknSFI5Ds,71
|
|
160
|
-
agent_cli/server/cli.py,sha256=
|
|
161
|
+
agent_cli/server/cli.py,sha256=dBg9Iy8BGthxvW_ImYweauQJVKdnqwUkl0EbFvOR-K4,27417
|
|
161
162
|
agent_cli/server/common.py,sha256=hBBp6i-2-yhDY260ffwmFBg_ndcoT5SNcfa6uFyP7Vc,6391
|
|
162
163
|
agent_cli/server/model_manager.py,sha256=93l_eeZeqnPALyDIK24or61tvded9TbM8tnde0okVjY,9225
|
|
163
164
|
agent_cli/server/model_registry.py,sha256=KrRV1XxbFYuXu5rJlHFh6PTl_2BKiWnWsaNrf-0c6wQ,6988
|
|
@@ -179,18 +180,19 @@ agent_cli/server/whisper/languages.py,sha256=Tv3qsIOSQQLxw-v5Wy41jSS6uHG_YBiG-T2
|
|
|
179
180
|
agent_cli/server/whisper/model_manager.py,sha256=LI92mkueu8o8m6AhzlUaaIWygnZucJa295-j7ymx7Ss,4925
|
|
180
181
|
agent_cli/server/whisper/model_registry.py,sha256=qoRkB0ex6aRtUlsUN5BGik-oIZlwJbVHGQKaCbf_yVg,789
|
|
181
182
|
agent_cli/server/whisper/wyoming_handler.py,sha256=HjN565YfDHeVfaGjQfoy9xjCZPx_TvYvjRYgbKn3aOI,6634
|
|
182
|
-
agent_cli/server/whisper/backends/__init__.py,sha256=
|
|
183
|
+
agent_cli/server/whisper/backends/__init__.py,sha256=YzS5g1PAlKi6k00u0iEH_0uRclvO5iVJVw8CkxSk9wk,2581
|
|
183
184
|
agent_cli/server/whisper/backends/base.py,sha256=gQi5EyMCFS464mKXGIKbh1vgtBm99eNkf93SCIYRYg0,2597
|
|
184
185
|
agent_cli/server/whisper/backends/faster_whisper.py,sha256=GN51L-qBjH-YU8ASiu317NrkMKMsK_znXDOTxi90EzU,6966
|
|
185
|
-
agent_cli/server/whisper/backends/mlx.py,sha256=
|
|
186
|
+
agent_cli/server/whisper/backends/mlx.py,sha256=5wHIvGDanA4-D_HUQSDJQS2DeuTg2x59z4hNAx-P0pg,9698
|
|
187
|
+
agent_cli/server/whisper/backends/transformers.py,sha256=4t3gj2AcEqQY3w3ZlzVke10XCVbJk2vPeaGxxNP8tv0,10067
|
|
186
188
|
agent_cli/services/__init__.py,sha256=5FyGCOS2Zpx4e2QWi1ppg9zm0hl8UU4J_id_g8LqGh4,11305
|
|
187
189
|
agent_cli/services/_wyoming_utils.py,sha256=pKPa4fOSdqcG3-kNHJOHHsMnZ1yZJZi6XohVwjAwabo,1971
|
|
188
190
|
agent_cli/services/asr.py,sha256=aRaCLVCygsJ15qyQEPECOZsdSrnlLPbyY4RwAqY0qIw,17258
|
|
189
191
|
agent_cli/services/llm.py,sha256=i01utl1eYWlM13gvW2eR6ErL_ndH-g0d-BSleZra_7k,7229
|
|
190
192
|
agent_cli/services/tts.py,sha256=NX5Qnq7ddLI3mwm3nzhbR3zB1Os4Ip4sSVSjDZDTBcI,14855
|
|
191
193
|
agent_cli/services/wake_word.py,sha256=JFJ1SA22H4yko9DXiQ1t7fcoxeALLAe3iBrLs0z8rX4,4664
|
|
192
|
-
agent_cli-0.
|
|
193
|
-
agent_cli-0.
|
|
194
|
-
agent_cli-0.
|
|
195
|
-
agent_cli-0.
|
|
196
|
-
agent_cli-0.
|
|
194
|
+
agent_cli-0.75.0.dist-info/METADATA,sha256=3xYOYaJno_r35V1RGawNfVhMrgwkxj94LckVUjbJMV0,181041
|
|
195
|
+
agent_cli-0.75.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
196
|
+
agent_cli-0.75.0.dist-info/entry_points.txt,sha256=FUv-fB2atLsPUk_RT4zqnZl1coz4_XHFwRALOKOF38s,97
|
|
197
|
+
agent_cli-0.75.0.dist-info/licenses/LICENSE,sha256=majJU6S9kC8R8bW39NVBHyv32Dq50FL6TDxECG2WVts,1068
|
|
198
|
+
agent_cli-0.75.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|