agent-cli 0.70.5__py3-none-any.whl → 0.71.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agent_cli/server/cli.py CHANGED
@@ -29,7 +29,30 @@ def _has(package: str) -> bool:
29
29
 
30
30
  app = typer.Typer(
31
31
  name="server",
32
- help="Run ASR/TTS servers (Whisper, TTS, or proxy mode).",
32
+ help="""Run local ASR/TTS servers with OpenAI-compatible APIs.
33
+
34
+ **Available servers:**
35
+
36
+ - `whisper` - Local speech-to-text using Whisper models (faster-whisper or MLX)
37
+ - `tts` - Local text-to-speech using Piper (CPU) or Kokoro (GPU)
38
+ - `transcribe-proxy` - Proxy to external ASR providers (OpenAI, Gemini, Wyoming)
39
+
40
+ **Common workflows:**
41
+
42
+ ```bash
43
+ # Run local Whisper server (lazy loads large-v3 by default)
44
+ agent-cli server whisper
45
+
46
+ # Run local TTS with Kokoro backend (GPU-accelerated)
47
+ agent-cli server tts --backend kokoro
48
+
49
+ # Run transcription proxy using your configured ASR provider
50
+ agent-cli server transcribe-proxy
51
+ ```
52
+
53
+ All servers support Home Assistant via Wyoming protocol and can be used as
54
+ drop-in replacements for OpenAI's audio APIs.
55
+ """,
33
56
  add_completion=True,
34
57
  rich_markup_mode="markdown",
35
58
  no_args_is_help=True,
@@ -170,14 +193,18 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
170
193
  typer.Option(
171
194
  "--model",
172
195
  "-m",
173
- help="Model name(s) to load (can specify multiple)",
196
+ help=(
197
+ "Whisper model(s) to load. Common models: `tiny`, `base`, `small`, "
198
+ "`medium`, `large-v3`, `distil-large-v3`. Can specify multiple for "
199
+ "different accuracy/speed tradeoffs. Default: `large-v3`"
200
+ ),
174
201
  ),
175
202
  ] = None,
176
203
  default_model: Annotated[
177
204
  str | None,
178
205
  typer.Option(
179
206
  "--default-model",
180
- help="Default model when not specified in request",
207
+ help=("Model to use when client doesn't specify one. Must be in the `--model` list"),
181
208
  ),
182
209
  ] = None,
183
210
  device: Annotated[
@@ -185,42 +212,54 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
185
212
  typer.Option(
186
213
  "--device",
187
214
  "-d",
188
- help="Device: auto, cuda, cuda:0, cpu",
215
+ help=(
216
+ "Compute device: `auto` (detect GPU), `cuda`, `cuda:0`, `cpu`. "
217
+ "MLX backend always uses Apple Silicon"
218
+ ),
189
219
  ),
190
220
  ] = "auto",
191
221
  compute_type: Annotated[
192
222
  str,
193
223
  typer.Option(
194
224
  "--compute-type",
195
- help="Compute type: auto, float16, int8, int8_float16",
225
+ help=(
226
+ "Precision for faster-whisper: `auto`, `float16`, `int8`, `int8_float16`. "
227
+ "Lower precision = faster + less VRAM"
228
+ ),
196
229
  ),
197
230
  ] = "auto",
198
231
  cache_dir: Annotated[
199
232
  Path | None,
200
233
  typer.Option(
201
234
  "--cache-dir",
202
- help="Model cache directory",
235
+ help="Custom directory for downloaded models (default: HuggingFace cache)",
203
236
  ),
204
237
  ] = None,
205
238
  ttl: Annotated[
206
239
  int,
207
240
  typer.Option(
208
241
  "--ttl",
209
- help="Seconds before unloading idle model",
242
+ help=(
243
+ "Seconds of inactivity before unloading model from memory. "
244
+ "Set to 0 to keep loaded indefinitely"
245
+ ),
210
246
  ),
211
247
  ] = 300,
212
248
  preload: Annotated[
213
249
  bool,
214
250
  typer.Option(
215
251
  "--preload",
216
- help="Load model(s) at startup and wait for completion",
252
+ help=(
253
+ "Load model(s) immediately at startup instead of on first request. "
254
+ "Useful for reducing first-request latency"
255
+ ),
217
256
  ),
218
257
  ] = False,
219
258
  host: Annotated[
220
259
  str,
221
260
  typer.Option(
222
261
  "--host",
223
- help="Host to bind the server to",
262
+ help="Network interface to bind. Use `0.0.0.0` for all interfaces",
224
263
  ),
225
264
  ] = "0.0.0.0", # noqa: S104
226
265
  port: Annotated[
@@ -228,28 +267,28 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
228
267
  typer.Option(
229
268
  "--port",
230
269
  "-p",
231
- help="HTTP API port",
270
+ help="Port for OpenAI-compatible HTTP API (`/v1/audio/transcriptions`)",
232
271
  ),
233
272
  ] = 10301,
234
273
  wyoming_port: Annotated[
235
274
  int,
236
275
  typer.Option(
237
276
  "--wyoming-port",
238
- help="Wyoming protocol port",
277
+ help="Port for Wyoming protocol (Home Assistant integration)",
239
278
  ),
240
279
  ] = 10300,
241
280
  no_wyoming: Annotated[
242
281
  bool,
243
282
  typer.Option(
244
283
  "--no-wyoming",
245
- help="Disable Wyoming server",
284
+ help="Disable Wyoming protocol server (only run HTTP API)",
246
285
  ),
247
286
  ] = False,
248
287
  download_only: Annotated[
249
288
  bool,
250
289
  typer.Option(
251
290
  "--download-only",
252
- help="Download model(s) and exit without starting server",
291
+ help="Download model(s) to cache and exit. Useful for Docker builds",
253
292
  ),
254
293
  ] = False,
255
294
  log_level: opts.LogLevel = opts.LOG_LEVEL,
@@ -258,7 +297,10 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
258
297
  typer.Option(
259
298
  "--backend",
260
299
  "-b",
261
- help="Backend: auto (platform detection), faster-whisper, mlx",
300
+ help=(
301
+ "Inference backend: `auto` (faster-whisper on CUDA/CPU, MLX on Apple Silicon), "
302
+ "`faster-whisper`, `mlx`"
303
+ ),
262
304
  ),
263
305
  ] = "auto",
264
306
  ) -> None:
@@ -272,7 +314,8 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
272
314
  Models are loaded lazily on first request and unloaded after being
273
315
  idle for the TTL duration, freeing VRAM for other applications.
274
316
 
275
- Examples:
317
+ **Examples:**
318
+
276
319
  # Run with default large-v3 model
277
320
  agent-cli server whisper
278
321
 
@@ -284,7 +327,6 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
284
327
 
285
328
  # Download model without starting server
286
329
  agent-cli server whisper --model large-v3 --download-only
287
-
288
330
  """
289
331
  # Setup Rich logging for consistent output
290
332
  setup_rich_logging(log_level)
@@ -421,36 +463,52 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
421
463
  def transcribe_proxy_cmd(
422
464
  host: Annotated[
423
465
  str,
424
- typer.Option("--host", help="Host to bind the server to"),
466
+ typer.Option("--host", help="Network interface to bind. Use `0.0.0.0` for all interfaces"),
425
467
  ] = "0.0.0.0", # noqa: S104
426
468
  port: Annotated[
427
469
  int,
428
- typer.Option("--port", "-p", help="Port to bind the server to"),
470
+ typer.Option("--port", "-p", help="Port for the HTTP API"),
429
471
  ] = 61337,
430
472
  reload: Annotated[
431
473
  bool,
432
- typer.Option("--reload", help="Enable auto-reload for development"),
474
+ typer.Option("--reload", help="Auto-reload on code changes (development only)"),
433
475
  ] = False,
434
476
  log_level: opts.LogLevel = opts.LOG_LEVEL,
435
477
  ) -> None:
436
- """Run transcription proxy server.
478
+ r"""Run transcription proxy that forwards to your configured ASR provider.
479
+
480
+ Unlike `server whisper` which runs a local Whisper model, this proxy
481
+ forwards audio to external ASR providers configured in your agent-cli
482
+ config file or environment variables.
483
+
484
+ **Supported ASR providers:** `wyoming`, `openai`, `gemini`
485
+ **Supported LLM providers for cleanup:** `ollama`, `openai`, `gemini`
486
+
487
+ The server exposes:
437
488
 
438
- This server proxies transcription requests to configured ASR providers
439
- (Wyoming, OpenAI, or Gemini) based on your agent-cli configuration.
489
+ - `POST /transcribe` - Accepts audio files, returns `{raw_transcript, cleaned_transcript}`
490
+ - `GET /health` - Health check endpoint
440
491
 
441
- It exposes:
442
- - /transcribe endpoint for audio transcription
443
- - /health endpoint for health checks
492
+ **When to use this vs `server whisper`:**
444
493
 
445
- This is the original server command functionality.
494
+ - Use `transcribe-proxy` when you want to use cloud ASR (OpenAI/Gemini)
495
+ or connect to a remote Wyoming server
496
+ - Use `server whisper` when you want to run a local Whisper model
446
497
 
447
- Examples:
448
- # Run on default port
498
+ Configuration is read from `~/.config/agent-cli/config.yaml` or env vars
499
+ like `ASR_PROVIDER`, `LLM_PROVIDER`, `OPENAI_API_KEY`, etc.
500
+
501
+ **Examples:**
502
+
503
+ # Run with providers from config file
449
504
  agent-cli server transcribe-proxy
450
505
 
451
- # Run on custom port
452
- agent-cli server transcribe-proxy --port 8080
506
+ # Run with OpenAI ASR via env vars
507
+ ASR_PROVIDER=openai OPENAI_API_KEY=sk-... agent-cli server transcribe-proxy
453
508
 
509
+ # Test with curl
510
+ curl -X POST http://localhost:61337/transcribe \\
511
+ -F "audio=@recording.wav" -F "cleanup=true"
454
512
  """
455
513
  _check_server_deps()
456
514
  setup_rich_logging(log_level)
@@ -481,14 +539,18 @@ def tts_cmd( # noqa: PLR0915
481
539
  typer.Option(
482
540
  "--model",
483
541
  "-m",
484
- help="Model name(s) to load. Piper: 'en_US-lessac-medium'. Kokoro: 'kokoro' (auto-downloads)",
542
+ help=(
543
+ "Model/voice(s) to load. Piper: `en_US-lessac-medium`, `en_GB-alan-medium`. "
544
+ "Kokoro: `af_heart`, `af_bella`, `am_adam`. "
545
+ "Auto-downloads on first use"
546
+ ),
485
547
  ),
486
548
  ] = None,
487
549
  default_model: Annotated[
488
550
  str | None,
489
551
  typer.Option(
490
552
  "--default-model",
491
- help="Default model when not specified in request",
553
+ help=("Voice to use when client doesn't specify one. Must be in the `--model` list"),
492
554
  ),
493
555
  ] = None,
494
556
  device: Annotated[
@@ -496,35 +558,44 @@ def tts_cmd( # noqa: PLR0915
496
558
  typer.Option(
497
559
  "--device",
498
560
  "-d",
499
- help="Device: auto, cpu, cuda, mps (Piper is CPU-only, Kokoro supports GPU)",
561
+ help=(
562
+ "Compute device: `auto`, `cpu`, `cuda`, `mps`. "
563
+ "Piper is CPU-only; Kokoro supports GPU acceleration"
564
+ ),
500
565
  ),
501
566
  ] = "auto",
502
567
  cache_dir: Annotated[
503
568
  Path | None,
504
569
  typer.Option(
505
570
  "--cache-dir",
506
- help="Model cache directory",
571
+ help="Custom directory for downloaded models (default: ~/.cache/agent-cli/tts/)",
507
572
  ),
508
573
  ] = None,
509
574
  ttl: Annotated[
510
575
  int,
511
576
  typer.Option(
512
577
  "--ttl",
513
- help="Seconds before unloading idle model",
578
+ help=(
579
+ "Seconds of inactivity before unloading model from memory. "
580
+ "Set to 0 to keep loaded indefinitely"
581
+ ),
514
582
  ),
515
583
  ] = 300,
516
584
  preload: Annotated[
517
585
  bool,
518
586
  typer.Option(
519
587
  "--preload",
520
- help="Load model(s) at startup and wait for completion",
588
+ help=(
589
+ "Load model(s) immediately at startup instead of on first request. "
590
+ "Useful for reducing first-request latency"
591
+ ),
521
592
  ),
522
593
  ] = False,
523
594
  host: Annotated[
524
595
  str,
525
596
  typer.Option(
526
597
  "--host",
527
- help="Host to bind the server to",
598
+ help="Network interface to bind. Use `0.0.0.0` for all interfaces",
528
599
  ),
529
600
  ] = "0.0.0.0", # noqa: S104
530
601
  port: Annotated[
@@ -532,28 +603,28 @@ def tts_cmd( # noqa: PLR0915
532
603
  typer.Option(
533
604
  "--port",
534
605
  "-p",
535
- help="HTTP API port",
606
+ help="Port for OpenAI-compatible HTTP API (`/v1/audio/speech`)",
536
607
  ),
537
608
  ] = 10201,
538
609
  wyoming_port: Annotated[
539
610
  int,
540
611
  typer.Option(
541
612
  "--wyoming-port",
542
- help="Wyoming protocol port",
613
+ help="Port for Wyoming protocol (Home Assistant integration)",
543
614
  ),
544
615
  ] = 10200,
545
616
  no_wyoming: Annotated[
546
617
  bool,
547
618
  typer.Option(
548
619
  "--no-wyoming",
549
- help="Disable Wyoming server",
620
+ help="Disable Wyoming protocol server (only run HTTP API)",
550
621
  ),
551
622
  ] = False,
552
623
  download_only: Annotated[
553
624
  bool,
554
625
  typer.Option(
555
626
  "--download-only",
556
- help="Download model(s) and exit without starting server",
627
+ help="Download model(s)/voice(s) to cache and exit. Useful for Docker builds",
557
628
  ),
558
629
  ] = False,
559
630
  log_level: opts.LogLevel = opts.LOG_LEVEL,
@@ -562,7 +633,10 @@ def tts_cmd( # noqa: PLR0915
562
633
  typer.Option(
563
634
  "--backend",
564
635
  "-b",
565
- help="Backend: auto, piper, kokoro",
636
+ help=(
637
+ "TTS engine: `auto` (prefer Kokoro if available), "
638
+ "`piper` (CPU, many languages), `kokoro` (GPU, high quality)"
639
+ ),
566
640
  ),
567
641
  ] = "auto",
568
642
  ) -> None:
@@ -585,7 +659,8 @@ def tts_cmd( # noqa: PLR0915
585
659
  Voices: af_heart, af_bella, am_adam, bf_emma, bm_george, etc.
586
660
  See https://huggingface.co/hexgrad/Kokoro-82M for all voices.
587
661
 
588
- Examples:
662
+ **Examples:**
663
+
589
664
  # Run with Kokoro (auto-downloads model and voices)
590
665
  agent-cli server tts --backend kokoro
591
666
 
@@ -600,7 +675,6 @@ def tts_cmd( # noqa: PLR0915
600
675
 
601
676
  # Download Piper model without starting server
602
677
  agent-cli server tts --backend piper --model en_US-lessac-medium --download-only
603
-
604
678
  """
605
679
  # Setup Rich logging for consistent output
606
680
  setup_rich_logging(log_level)