agent-cli 0.70.5__py3-none-any.whl → 0.72.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/_extras.json +2 -2
- agent_cli/_requirements/memory.txt +14 -1
- agent_cli/_requirements/rag.txt +14 -1
- agent_cli/_requirements/vad.txt +1 -85
- agent_cli/agents/assistant.py +23 -27
- agent_cli/agents/autocorrect.py +29 -3
- agent_cli/agents/chat.py +44 -14
- agent_cli/agents/memory/__init__.py +19 -1
- agent_cli/agents/memory/add.py +3 -3
- agent_cli/agents/memory/proxy.py +20 -11
- agent_cli/agents/rag_proxy.py +42 -10
- agent_cli/agents/speak.py +22 -2
- agent_cli/agents/transcribe.py +20 -2
- agent_cli/agents/transcribe_daemon.py +33 -21
- agent_cli/agents/voice_edit.py +17 -9
- agent_cli/cli.py +25 -2
- agent_cli/config_cmd.py +30 -11
- agent_cli/core/deps.py +6 -3
- agent_cli/core/vad.py +6 -24
- agent_cli/dev/cli.py +295 -65
- agent_cli/docs_gen.py +18 -8
- agent_cli/install/extras.py +44 -13
- agent_cli/install/hotkeys.py +22 -11
- agent_cli/install/services.py +54 -14
- agent_cli/opts.py +25 -21
- agent_cli/server/cli.py +121 -47
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/METADATA +466 -195
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/RECORD +31 -31
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0
agent_cli/server/cli.py
CHANGED
|
@@ -29,7 +29,30 @@ def _has(package: str) -> bool:
|
|
|
29
29
|
|
|
30
30
|
app = typer.Typer(
|
|
31
31
|
name="server",
|
|
32
|
-
help="Run ASR/TTS servers
|
|
32
|
+
help="""Run local ASR/TTS servers with OpenAI-compatible APIs.
|
|
33
|
+
|
|
34
|
+
**Available servers:**
|
|
35
|
+
|
|
36
|
+
- `whisper` - Local speech-to-text using Whisper models (faster-whisper or MLX)
|
|
37
|
+
- `tts` - Local text-to-speech using Piper (CPU) or Kokoro (GPU)
|
|
38
|
+
- `transcribe-proxy` - Proxy to external ASR providers (OpenAI, Gemini, Wyoming)
|
|
39
|
+
|
|
40
|
+
**Common workflows:**
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Run local Whisper server (lazy loads large-v3 by default)
|
|
44
|
+
agent-cli server whisper
|
|
45
|
+
|
|
46
|
+
# Run local TTS with Kokoro backend (GPU-accelerated)
|
|
47
|
+
agent-cli server tts --backend kokoro
|
|
48
|
+
|
|
49
|
+
# Run transcription proxy using your configured ASR provider
|
|
50
|
+
agent-cli server transcribe-proxy
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
All servers support Home Assistant via Wyoming protocol and can be used as
|
|
54
|
+
drop-in replacements for OpenAI's audio APIs.
|
|
55
|
+
""",
|
|
33
56
|
add_completion=True,
|
|
34
57
|
rich_markup_mode="markdown",
|
|
35
58
|
no_args_is_help=True,
|
|
@@ -170,14 +193,18 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
170
193
|
typer.Option(
|
|
171
194
|
"--model",
|
|
172
195
|
"-m",
|
|
173
|
-
help=
|
|
196
|
+
help=(
|
|
197
|
+
"Whisper model(s) to load. Common models: `tiny`, `base`, `small`, "
|
|
198
|
+
"`medium`, `large-v3`, `distil-large-v3`. Can specify multiple for "
|
|
199
|
+
"different accuracy/speed tradeoffs. Default: `large-v3`"
|
|
200
|
+
),
|
|
174
201
|
),
|
|
175
202
|
] = None,
|
|
176
203
|
default_model: Annotated[
|
|
177
204
|
str | None,
|
|
178
205
|
typer.Option(
|
|
179
206
|
"--default-model",
|
|
180
|
-
help="
|
|
207
|
+
help=("Model to use when client doesn't specify one. Must be in the `--model` list"),
|
|
181
208
|
),
|
|
182
209
|
] = None,
|
|
183
210
|
device: Annotated[
|
|
@@ -185,42 +212,54 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
185
212
|
typer.Option(
|
|
186
213
|
"--device",
|
|
187
214
|
"-d",
|
|
188
|
-
help=
|
|
215
|
+
help=(
|
|
216
|
+
"Compute device: `auto` (detect GPU), `cuda`, `cuda:0`, `cpu`. "
|
|
217
|
+
"MLX backend always uses Apple Silicon"
|
|
218
|
+
),
|
|
189
219
|
),
|
|
190
220
|
] = "auto",
|
|
191
221
|
compute_type: Annotated[
|
|
192
222
|
str,
|
|
193
223
|
typer.Option(
|
|
194
224
|
"--compute-type",
|
|
195
|
-
help=
|
|
225
|
+
help=(
|
|
226
|
+
"Precision for faster-whisper: `auto`, `float16`, `int8`, `int8_float16`. "
|
|
227
|
+
"Lower precision = faster + less VRAM"
|
|
228
|
+
),
|
|
196
229
|
),
|
|
197
230
|
] = "auto",
|
|
198
231
|
cache_dir: Annotated[
|
|
199
232
|
Path | None,
|
|
200
233
|
typer.Option(
|
|
201
234
|
"--cache-dir",
|
|
202
|
-
help="
|
|
235
|
+
help="Custom directory for downloaded models (default: HuggingFace cache)",
|
|
203
236
|
),
|
|
204
237
|
] = None,
|
|
205
238
|
ttl: Annotated[
|
|
206
239
|
int,
|
|
207
240
|
typer.Option(
|
|
208
241
|
"--ttl",
|
|
209
|
-
help=
|
|
242
|
+
help=(
|
|
243
|
+
"Seconds of inactivity before unloading model from memory. "
|
|
244
|
+
"Set to 0 to keep loaded indefinitely"
|
|
245
|
+
),
|
|
210
246
|
),
|
|
211
247
|
] = 300,
|
|
212
248
|
preload: Annotated[
|
|
213
249
|
bool,
|
|
214
250
|
typer.Option(
|
|
215
251
|
"--preload",
|
|
216
|
-
help=
|
|
252
|
+
help=(
|
|
253
|
+
"Load model(s) immediately at startup instead of on first request. "
|
|
254
|
+
"Useful for reducing first-request latency"
|
|
255
|
+
),
|
|
217
256
|
),
|
|
218
257
|
] = False,
|
|
219
258
|
host: Annotated[
|
|
220
259
|
str,
|
|
221
260
|
typer.Option(
|
|
222
261
|
"--host",
|
|
223
|
-
help="
|
|
262
|
+
help="Network interface to bind. Use `0.0.0.0` for all interfaces",
|
|
224
263
|
),
|
|
225
264
|
] = "0.0.0.0", # noqa: S104
|
|
226
265
|
port: Annotated[
|
|
@@ -228,37 +267,40 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
228
267
|
typer.Option(
|
|
229
268
|
"--port",
|
|
230
269
|
"-p",
|
|
231
|
-
help="HTTP API
|
|
270
|
+
help="Port for OpenAI-compatible HTTP API (`/v1/audio/transcriptions`)",
|
|
232
271
|
),
|
|
233
272
|
] = 10301,
|
|
234
273
|
wyoming_port: Annotated[
|
|
235
274
|
int,
|
|
236
275
|
typer.Option(
|
|
237
276
|
"--wyoming-port",
|
|
238
|
-
help="Wyoming protocol
|
|
277
|
+
help="Port for Wyoming protocol (Home Assistant integration)",
|
|
239
278
|
),
|
|
240
279
|
] = 10300,
|
|
241
280
|
no_wyoming: Annotated[
|
|
242
281
|
bool,
|
|
243
282
|
typer.Option(
|
|
244
283
|
"--no-wyoming",
|
|
245
|
-
help="Disable Wyoming server",
|
|
284
|
+
help="Disable Wyoming protocol server (only run HTTP API)",
|
|
246
285
|
),
|
|
247
286
|
] = False,
|
|
248
287
|
download_only: Annotated[
|
|
249
288
|
bool,
|
|
250
289
|
typer.Option(
|
|
251
290
|
"--download-only",
|
|
252
|
-
help="Download model(s) and exit
|
|
291
|
+
help="Download model(s) to cache and exit. Useful for Docker builds",
|
|
253
292
|
),
|
|
254
293
|
] = False,
|
|
255
|
-
log_level: opts.LogLevel = opts.
|
|
294
|
+
log_level: opts.LogLevel = opts.SERVER_LOG_LEVEL,
|
|
256
295
|
backend: Annotated[
|
|
257
296
|
str,
|
|
258
297
|
typer.Option(
|
|
259
298
|
"--backend",
|
|
260
299
|
"-b",
|
|
261
|
-
help=
|
|
300
|
+
help=(
|
|
301
|
+
"Inference backend: `auto` (faster-whisper on CUDA/CPU, MLX on Apple Silicon), "
|
|
302
|
+
"`faster-whisper`, `mlx`"
|
|
303
|
+
),
|
|
262
304
|
),
|
|
263
305
|
] = "auto",
|
|
264
306
|
) -> None:
|
|
@@ -272,7 +314,8 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
272
314
|
Models are loaded lazily on first request and unloaded after being
|
|
273
315
|
idle for the TTL duration, freeing VRAM for other applications.
|
|
274
316
|
|
|
275
|
-
Examples
|
|
317
|
+
**Examples:**
|
|
318
|
+
|
|
276
319
|
# Run with default large-v3 model
|
|
277
320
|
agent-cli server whisper
|
|
278
321
|
|
|
@@ -284,7 +327,6 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
284
327
|
|
|
285
328
|
# Download model without starting server
|
|
286
329
|
agent-cli server whisper --model large-v3 --download-only
|
|
287
|
-
|
|
288
330
|
"""
|
|
289
331
|
# Setup Rich logging for consistent output
|
|
290
332
|
setup_rich_logging(log_level)
|
|
@@ -421,36 +463,52 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
421
463
|
def transcribe_proxy_cmd(
|
|
422
464
|
host: Annotated[
|
|
423
465
|
str,
|
|
424
|
-
typer.Option("--host", help="
|
|
466
|
+
typer.Option("--host", help="Network interface to bind. Use `0.0.0.0` for all interfaces"),
|
|
425
467
|
] = "0.0.0.0", # noqa: S104
|
|
426
468
|
port: Annotated[
|
|
427
469
|
int,
|
|
428
|
-
typer.Option("--port", "-p", help="Port
|
|
470
|
+
typer.Option("--port", "-p", help="Port for the HTTP API"),
|
|
429
471
|
] = 61337,
|
|
430
472
|
reload: Annotated[
|
|
431
473
|
bool,
|
|
432
|
-
typer.Option("--reload", help="
|
|
474
|
+
typer.Option("--reload", help="Auto-reload on code changes (development only)"),
|
|
433
475
|
] = False,
|
|
434
|
-
log_level: opts.LogLevel = opts.
|
|
476
|
+
log_level: opts.LogLevel = opts.SERVER_LOG_LEVEL,
|
|
435
477
|
) -> None:
|
|
436
|
-
"""Run transcription proxy
|
|
478
|
+
r"""Run transcription proxy that forwards to your configured ASR provider.
|
|
479
|
+
|
|
480
|
+
Unlike `server whisper` which runs a local Whisper model, this proxy
|
|
481
|
+
forwards audio to external ASR providers configured in your agent-cli
|
|
482
|
+
config file or environment variables.
|
|
483
|
+
|
|
484
|
+
**Supported ASR providers:** `wyoming`, `openai`, `gemini`
|
|
485
|
+
**Supported LLM providers for cleanup:** `ollama`, `openai`, `gemini`
|
|
486
|
+
|
|
487
|
+
The server exposes:
|
|
437
488
|
|
|
438
|
-
|
|
439
|
-
|
|
489
|
+
- `POST /transcribe` - Accepts audio files, returns `{raw_transcript, cleaned_transcript}`
|
|
490
|
+
- `GET /health` - Health check endpoint
|
|
440
491
|
|
|
441
|
-
|
|
442
|
-
- /transcribe endpoint for audio transcription
|
|
443
|
-
- /health endpoint for health checks
|
|
492
|
+
**When to use this vs `server whisper`:**
|
|
444
493
|
|
|
445
|
-
|
|
494
|
+
- Use `transcribe-proxy` when you want to use cloud ASR (OpenAI/Gemini)
|
|
495
|
+
or connect to a remote Wyoming server
|
|
496
|
+
- Use `server whisper` when you want to run a local Whisper model
|
|
446
497
|
|
|
447
|
-
|
|
448
|
-
|
|
498
|
+
Configuration is read from `~/.config/agent-cli/config.yaml` or env vars
|
|
499
|
+
like `ASR_PROVIDER`, `LLM_PROVIDER`, `OPENAI_API_KEY`, etc.
|
|
500
|
+
|
|
501
|
+
**Examples:**
|
|
502
|
+
|
|
503
|
+
# Run with providers from config file
|
|
449
504
|
agent-cli server transcribe-proxy
|
|
450
505
|
|
|
451
|
-
# Run
|
|
452
|
-
agent-cli server transcribe-proxy
|
|
506
|
+
# Run with OpenAI ASR via env vars
|
|
507
|
+
ASR_PROVIDER=openai OPENAI_API_KEY=sk-... agent-cli server transcribe-proxy
|
|
453
508
|
|
|
509
|
+
# Test with curl
|
|
510
|
+
curl -X POST http://localhost:61337/transcribe \\
|
|
511
|
+
-F "audio=@recording.wav" -F "cleanup=true"
|
|
454
512
|
"""
|
|
455
513
|
_check_server_deps()
|
|
456
514
|
setup_rich_logging(log_level)
|
|
@@ -481,14 +539,18 @@ def tts_cmd( # noqa: PLR0915
|
|
|
481
539
|
typer.Option(
|
|
482
540
|
"--model",
|
|
483
541
|
"-m",
|
|
484
|
-
help=
|
|
542
|
+
help=(
|
|
543
|
+
"Model/voice(s) to load. Piper: `en_US-lessac-medium`, `en_GB-alan-medium`. "
|
|
544
|
+
"Kokoro: `af_heart`, `af_bella`, `am_adam`. "
|
|
545
|
+
"Auto-downloads on first use"
|
|
546
|
+
),
|
|
485
547
|
),
|
|
486
548
|
] = None,
|
|
487
549
|
default_model: Annotated[
|
|
488
550
|
str | None,
|
|
489
551
|
typer.Option(
|
|
490
552
|
"--default-model",
|
|
491
|
-
help="
|
|
553
|
+
help=("Voice to use when client doesn't specify one. Must be in the `--model` list"),
|
|
492
554
|
),
|
|
493
555
|
] = None,
|
|
494
556
|
device: Annotated[
|
|
@@ -496,35 +558,44 @@ def tts_cmd( # noqa: PLR0915
|
|
|
496
558
|
typer.Option(
|
|
497
559
|
"--device",
|
|
498
560
|
"-d",
|
|
499
|
-
help=
|
|
561
|
+
help=(
|
|
562
|
+
"Compute device: `auto`, `cpu`, `cuda`, `mps`. "
|
|
563
|
+
"Piper is CPU-only; Kokoro supports GPU acceleration"
|
|
564
|
+
),
|
|
500
565
|
),
|
|
501
566
|
] = "auto",
|
|
502
567
|
cache_dir: Annotated[
|
|
503
568
|
Path | None,
|
|
504
569
|
typer.Option(
|
|
505
570
|
"--cache-dir",
|
|
506
|
-
help="
|
|
571
|
+
help="Custom directory for downloaded models (default: ~/.cache/agent-cli/tts/)",
|
|
507
572
|
),
|
|
508
573
|
] = None,
|
|
509
574
|
ttl: Annotated[
|
|
510
575
|
int,
|
|
511
576
|
typer.Option(
|
|
512
577
|
"--ttl",
|
|
513
|
-
help=
|
|
578
|
+
help=(
|
|
579
|
+
"Seconds of inactivity before unloading model from memory. "
|
|
580
|
+
"Set to 0 to keep loaded indefinitely"
|
|
581
|
+
),
|
|
514
582
|
),
|
|
515
583
|
] = 300,
|
|
516
584
|
preload: Annotated[
|
|
517
585
|
bool,
|
|
518
586
|
typer.Option(
|
|
519
587
|
"--preload",
|
|
520
|
-
help=
|
|
588
|
+
help=(
|
|
589
|
+
"Load model(s) immediately at startup instead of on first request. "
|
|
590
|
+
"Useful for reducing first-request latency"
|
|
591
|
+
),
|
|
521
592
|
),
|
|
522
593
|
] = False,
|
|
523
594
|
host: Annotated[
|
|
524
595
|
str,
|
|
525
596
|
typer.Option(
|
|
526
597
|
"--host",
|
|
527
|
-
help="
|
|
598
|
+
help="Network interface to bind. Use `0.0.0.0` for all interfaces",
|
|
528
599
|
),
|
|
529
600
|
] = "0.0.0.0", # noqa: S104
|
|
530
601
|
port: Annotated[
|
|
@@ -532,37 +603,40 @@ def tts_cmd( # noqa: PLR0915
|
|
|
532
603
|
typer.Option(
|
|
533
604
|
"--port",
|
|
534
605
|
"-p",
|
|
535
|
-
help="HTTP API
|
|
606
|
+
help="Port for OpenAI-compatible HTTP API (`/v1/audio/speech`)",
|
|
536
607
|
),
|
|
537
608
|
] = 10201,
|
|
538
609
|
wyoming_port: Annotated[
|
|
539
610
|
int,
|
|
540
611
|
typer.Option(
|
|
541
612
|
"--wyoming-port",
|
|
542
|
-
help="Wyoming protocol
|
|
613
|
+
help="Port for Wyoming protocol (Home Assistant integration)",
|
|
543
614
|
),
|
|
544
615
|
] = 10200,
|
|
545
616
|
no_wyoming: Annotated[
|
|
546
617
|
bool,
|
|
547
618
|
typer.Option(
|
|
548
619
|
"--no-wyoming",
|
|
549
|
-
help="Disable Wyoming server",
|
|
620
|
+
help="Disable Wyoming protocol server (only run HTTP API)",
|
|
550
621
|
),
|
|
551
622
|
] = False,
|
|
552
623
|
download_only: Annotated[
|
|
553
624
|
bool,
|
|
554
625
|
typer.Option(
|
|
555
626
|
"--download-only",
|
|
556
|
-
help="Download model(s) and exit
|
|
627
|
+
help="Download model(s)/voice(s) to cache and exit. Useful for Docker builds",
|
|
557
628
|
),
|
|
558
629
|
] = False,
|
|
559
|
-
log_level: opts.LogLevel = opts.
|
|
630
|
+
log_level: opts.LogLevel = opts.SERVER_LOG_LEVEL,
|
|
560
631
|
backend: Annotated[
|
|
561
632
|
str,
|
|
562
633
|
typer.Option(
|
|
563
634
|
"--backend",
|
|
564
635
|
"-b",
|
|
565
|
-
help=
|
|
636
|
+
help=(
|
|
637
|
+
"TTS engine: `auto` (prefer Kokoro if available), "
|
|
638
|
+
"`piper` (CPU, many languages), `kokoro` (GPU, high quality)"
|
|
639
|
+
),
|
|
566
640
|
),
|
|
567
641
|
] = "auto",
|
|
568
642
|
) -> None:
|
|
@@ -585,7 +659,8 @@ def tts_cmd( # noqa: PLR0915
|
|
|
585
659
|
Voices: af_heart, af_bella, am_adam, bf_emma, bm_george, etc.
|
|
586
660
|
See https://huggingface.co/hexgrad/Kokoro-82M for all voices.
|
|
587
661
|
|
|
588
|
-
Examples
|
|
662
|
+
**Examples:**
|
|
663
|
+
|
|
589
664
|
# Run with Kokoro (auto-downloads model and voices)
|
|
590
665
|
agent-cli server tts --backend kokoro
|
|
591
666
|
|
|
@@ -600,7 +675,6 @@ def tts_cmd( # noqa: PLR0915
|
|
|
600
675
|
|
|
601
676
|
# Download Piper model without starting server
|
|
602
677
|
agent-cli server tts --backend piper --model en_US-lessac-medium --download-only
|
|
603
|
-
|
|
604
678
|
"""
|
|
605
679
|
# Setup Rich logging for consistent output
|
|
606
680
|
setup_rich_logging(log_level)
|