agent-cli 0.70.5__py3-none-any.whl → 0.72.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/_extras.json +2 -2
- agent_cli/_requirements/memory.txt +14 -1
- agent_cli/_requirements/rag.txt +14 -1
- agent_cli/_requirements/vad.txt +1 -85
- agent_cli/agents/assistant.py +23 -27
- agent_cli/agents/autocorrect.py +29 -3
- agent_cli/agents/chat.py +44 -14
- agent_cli/agents/memory/__init__.py +19 -1
- agent_cli/agents/memory/add.py +3 -3
- agent_cli/agents/memory/proxy.py +20 -11
- agent_cli/agents/rag_proxy.py +42 -10
- agent_cli/agents/speak.py +22 -2
- agent_cli/agents/transcribe.py +20 -2
- agent_cli/agents/transcribe_daemon.py +33 -21
- agent_cli/agents/voice_edit.py +17 -9
- agent_cli/cli.py +25 -2
- agent_cli/config_cmd.py +30 -11
- agent_cli/core/deps.py +6 -3
- agent_cli/core/vad.py +6 -24
- agent_cli/dev/cli.py +295 -65
- agent_cli/docs_gen.py +18 -8
- agent_cli/install/extras.py +44 -13
- agent_cli/install/hotkeys.py +22 -11
- agent_cli/install/services.py +54 -14
- agent_cli/opts.py +25 -21
- agent_cli/server/cli.py +121 -47
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/METADATA +466 -195
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/RECORD +31 -31
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
- {agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0
agent_cli/agents/rag_proxy.py
CHANGED
|
@@ -23,12 +23,12 @@ from agent_cli.core.utils import (
|
|
|
23
23
|
def rag_proxy(
|
|
24
24
|
docs_folder: Path = typer.Option( # noqa: B008
|
|
25
25
|
"./rag_docs",
|
|
26
|
-
help="Folder to watch for documents",
|
|
26
|
+
help="Folder to watch for documents. Files are auto-indexed on startup and when changed. Must not overlap with `--chroma-path`.",
|
|
27
27
|
rich_help_panel="RAG Configuration",
|
|
28
28
|
),
|
|
29
29
|
chroma_path: Path = typer.Option( # noqa: B008
|
|
30
30
|
"./rag_db",
|
|
31
|
-
help="
|
|
31
|
+
help="ChromaDB storage directory for vector embeddings. Must be separate from `--docs-folder` to avoid indexing database files.",
|
|
32
32
|
rich_help_panel="RAG Configuration",
|
|
33
33
|
),
|
|
34
34
|
openai_base_url: str | None = opts.OPENAI_BASE_URL,
|
|
@@ -36,30 +36,62 @@ def rag_proxy(
|
|
|
36
36
|
openai_api_key: str | None = opts.OPENAI_API_KEY,
|
|
37
37
|
limit: int = typer.Option(
|
|
38
38
|
3,
|
|
39
|
-
help="Number of document chunks to retrieve per query.",
|
|
39
|
+
help="Number of document chunks to retrieve per query. Higher values provide more context but use more tokens. Can be overridden per-request via `rag_top_k` in the JSON body.",
|
|
40
40
|
rich_help_panel="RAG Configuration",
|
|
41
41
|
),
|
|
42
42
|
host: str = opts.SERVER_HOST,
|
|
43
43
|
port: int = typer.Option(
|
|
44
44
|
8000,
|
|
45
|
-
help="Port
|
|
45
|
+
help="Port for the RAG proxy API (e.g., `http://localhost:8000/v1/chat/completions`).",
|
|
46
46
|
rich_help_panel="Server Configuration",
|
|
47
47
|
),
|
|
48
|
-
log_level: opts.LogLevel = opts.
|
|
48
|
+
log_level: opts.LogLevel = opts.SERVER_LOG_LEVEL,
|
|
49
49
|
config_file: str | None = opts.CONFIG_FILE,
|
|
50
50
|
print_args: bool = opts.PRINT_ARGS,
|
|
51
51
|
enable_rag_tools: bool = typer.Option(
|
|
52
52
|
True, # noqa: FBT003
|
|
53
53
|
"--rag-tools/--no-rag-tools",
|
|
54
|
-
help="
|
|
54
|
+
help="Enable `read_full_document()` tool so the LLM can request full document content when retrieved snippets are insufficient. Can be overridden per-request via `rag_enable_tools` in the JSON body.",
|
|
55
55
|
rich_help_panel="RAG Configuration",
|
|
56
56
|
),
|
|
57
57
|
) -> None:
|
|
58
|
-
"""Start
|
|
58
|
+
"""Start a RAG proxy server that enables "chat with your documents".
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
60
|
+
Watches a folder for documents, indexes them into a vector store, and provides an
|
|
61
|
+
OpenAI-compatible API at `/v1/chat/completions`. When you send a chat request,
|
|
62
|
+
the server retrieves relevant document chunks and injects them as context before
|
|
63
|
+
forwarding to your LLM backend.
|
|
64
|
+
|
|
65
|
+
**Quick start:**
|
|
66
|
+
|
|
67
|
+
- `agent-cli rag-proxy` — Start with defaults (./rag_docs, OpenAI-compatible API)
|
|
68
|
+
- `agent-cli rag-proxy --docs-folder ~/notes` — Index your notes folder
|
|
69
|
+
|
|
70
|
+
**How it works:**
|
|
71
|
+
|
|
72
|
+
1. Documents in `--docs-folder` are chunked, embedded, and stored in ChromaDB
|
|
73
|
+
2. A file watcher auto-reindexes when files change
|
|
74
|
+
3. Chat requests trigger a semantic search for relevant chunks
|
|
75
|
+
4. Retrieved context is injected into the prompt before forwarding to the LLM
|
|
76
|
+
5. Responses include a `rag_sources` field listing which documents were used
|
|
77
|
+
|
|
78
|
+
**Supported file formats:**
|
|
79
|
+
|
|
80
|
+
Text: `.txt`, `.md`, `.json`, `.py`, `.js`, `.ts`, `.yaml`, `.toml`, `.rst`, etc.
|
|
81
|
+
Rich documents (via MarkItDown): `.pdf`, `.docx`, `.pptx`, `.xlsx`, `.html`, `.csv`
|
|
82
|
+
|
|
83
|
+
**API endpoints:**
|
|
84
|
+
|
|
85
|
+
- `POST /v1/chat/completions` — Main chat endpoint (OpenAI-compatible)
|
|
86
|
+
- `GET /health` — Health check with configuration info
|
|
87
|
+
- `GET /files` — List indexed files with chunk counts
|
|
88
|
+
- `POST /reindex` — Trigger manual reindex
|
|
89
|
+
- All other paths are proxied to the LLM backend
|
|
90
|
+
|
|
91
|
+
**Per-request overrides (in JSON body):**
|
|
92
|
+
|
|
93
|
+
- `rag_top_k`: Override `--limit` for this request
|
|
94
|
+
- `rag_enable_tools`: Override `--rag-tools` for this request
|
|
63
95
|
"""
|
|
64
96
|
if print_args:
|
|
65
97
|
print_command_line_args(locals())
|
agent_cli/agents/speak.py
CHANGED
|
@@ -86,7 +86,7 @@ def speak(
|
|
|
86
86
|
*,
|
|
87
87
|
text: str | None = typer.Argument(
|
|
88
88
|
None,
|
|
89
|
-
help="Text to
|
|
89
|
+
help="Text to synthesize. If not provided, reads from clipboard.",
|
|
90
90
|
rich_help_panel="General Options",
|
|
91
91
|
),
|
|
92
92
|
# --- Provider Selection ---
|
|
@@ -127,7 +127,27 @@ def speak(
|
|
|
127
127
|
config_file: str | None = opts.CONFIG_FILE,
|
|
128
128
|
print_args: bool = opts.PRINT_ARGS,
|
|
129
129
|
) -> None:
|
|
130
|
-
"""Convert text to speech
|
|
130
|
+
"""Convert text to speech and play audio through speakers.
|
|
131
|
+
|
|
132
|
+
By default, synthesized audio plays immediately. Use `--save-file` to save
|
|
133
|
+
to a WAV file instead (skips playback).
|
|
134
|
+
|
|
135
|
+
Text can be provided as an argument or read from clipboard automatically.
|
|
136
|
+
|
|
137
|
+
**Examples:**
|
|
138
|
+
|
|
139
|
+
Speak text directly:
|
|
140
|
+
`agent-cli speak "Hello, world!"`
|
|
141
|
+
|
|
142
|
+
Speak clipboard contents:
|
|
143
|
+
`agent-cli speak`
|
|
144
|
+
|
|
145
|
+
Save to file instead of playing:
|
|
146
|
+
`agent-cli speak "Hello" --save-file greeting.wav`
|
|
147
|
+
|
|
148
|
+
Use OpenAI-compatible TTS:
|
|
149
|
+
`agent-cli speak "Hello" --tts-provider openai`
|
|
150
|
+
"""
|
|
131
151
|
if print_args:
|
|
132
152
|
print_command_line_args(locals())
|
|
133
153
|
|
agent_cli/agents/transcribe.py
CHANGED
|
@@ -471,7 +471,7 @@ def transcribe( # noqa: PLR0912
|
|
|
471
471
|
extra_instructions: str | None = typer.Option(
|
|
472
472
|
None,
|
|
473
473
|
"--extra-instructions",
|
|
474
|
-
help="
|
|
474
|
+
help="Extra instructions appended to the LLM cleanup prompt (requires `--llm`).",
|
|
475
475
|
rich_help_panel="LLM Configuration",
|
|
476
476
|
),
|
|
477
477
|
from_file: Path | None = opts.FROM_FILE,
|
|
@@ -513,7 +513,25 @@ def transcribe( # noqa: PLR0912
|
|
|
513
513
|
print_args: bool = opts.PRINT_ARGS,
|
|
514
514
|
transcription_log: Path | None = opts.TRANSCRIPTION_LOG,
|
|
515
515
|
) -> None:
|
|
516
|
-
"""
|
|
516
|
+
"""Record audio from microphone and transcribe to text.
|
|
517
|
+
|
|
518
|
+
Records until you press Ctrl+C (or send SIGINT), then transcribes using your
|
|
519
|
+
configured ASR provider. The transcript is copied to the clipboard by default.
|
|
520
|
+
|
|
521
|
+
**With `--llm`**: Passes the raw transcript through an LLM to clean up speech
|
|
522
|
+
recognition errors, add punctuation, remove filler words, and improve readability.
|
|
523
|
+
|
|
524
|
+
**With `--toggle`**: Bind to a hotkey for push-to-talk. First call starts recording,
|
|
525
|
+
second call stops and transcribes.
|
|
526
|
+
|
|
527
|
+
**Examples**:
|
|
528
|
+
|
|
529
|
+
- Record and transcribe: `agent-cli transcribe`
|
|
530
|
+
|
|
531
|
+
- With LLM cleanup: `agent-cli transcribe --llm`
|
|
532
|
+
|
|
533
|
+
- Re-transcribe last recording: `agent-cli transcribe --last-recording 1`
|
|
534
|
+
"""
|
|
517
535
|
if print_args:
|
|
518
536
|
print_command_line_args(locals())
|
|
519
537
|
|
|
@@ -296,45 +296,45 @@ def transcribe_daemon( # noqa: PLR0912
|
|
|
296
296
|
"user",
|
|
297
297
|
"--role",
|
|
298
298
|
"-r",
|
|
299
|
-
help="
|
|
299
|
+
help="Label for log entries. Use to distinguish speakers or contexts in logs.",
|
|
300
300
|
),
|
|
301
301
|
silence_threshold: float = typer.Option(
|
|
302
302
|
1.0,
|
|
303
303
|
"--silence-threshold",
|
|
304
304
|
"-s",
|
|
305
|
-
help="Seconds of silence to
|
|
305
|
+
help="Seconds of silence after speech to finalize a segment. Increase for slower speakers.",
|
|
306
306
|
),
|
|
307
307
|
min_segment: float = typer.Option(
|
|
308
308
|
0.25,
|
|
309
309
|
"--min-segment",
|
|
310
310
|
"-m",
|
|
311
|
-
help="Minimum
|
|
311
|
+
help="Minimum seconds of speech required before a segment is processed. Filters brief sounds.",
|
|
312
312
|
),
|
|
313
313
|
vad_threshold: float = typer.Option(
|
|
314
314
|
0.3,
|
|
315
315
|
"--vad-threshold",
|
|
316
|
-
help="VAD
|
|
316
|
+
help="Silero VAD confidence threshold (0.0-1.0). Higher values require clearer speech; lower values are more sensitive to quiet/distant voices.",
|
|
317
317
|
),
|
|
318
318
|
save_audio: bool = typer.Option(
|
|
319
319
|
True, # noqa: FBT003
|
|
320
320
|
"--save-audio/--no-save-audio",
|
|
321
|
-
help="Save
|
|
321
|
+
help="Save each speech segment as MP3. Requires `ffmpeg` to be installed.",
|
|
322
322
|
),
|
|
323
323
|
audio_dir: Path | None = typer.Option( # noqa: B008
|
|
324
324
|
None,
|
|
325
325
|
"--audio-dir",
|
|
326
|
-
help="
|
|
326
|
+
help="Base directory for MP3 files. Files are organized by date: `YYYY/MM/DD/HHMMSS_mmm.mp3`. Default: `~/.config/agent-cli/audio`.",
|
|
327
327
|
),
|
|
328
328
|
transcription_log: Path | None = typer.Option( # noqa: B008
|
|
329
329
|
None,
|
|
330
330
|
"--transcription-log",
|
|
331
331
|
"-t",
|
|
332
|
-
help="JSON
|
|
332
|
+
help="JSONL file for transcript logging (one JSON object per line with timestamp, role, raw/processed text, audio path). Default: `~/.config/agent-cli/transcriptions.jsonl`.",
|
|
333
333
|
),
|
|
334
334
|
clipboard: bool = typer.Option(
|
|
335
335
|
False, # noqa: FBT003
|
|
336
336
|
"--clipboard/--no-clipboard",
|
|
337
|
-
help="Copy each transcription to clipboard.",
|
|
337
|
+
help="Copy each completed transcription to clipboard (overwrites previous). Useful with `--llm` to get cleaned text.",
|
|
338
338
|
),
|
|
339
339
|
# --- Provider Selection ---
|
|
340
340
|
asr_provider: str = opts.ASR_PROVIDER,
|
|
@@ -368,25 +368,37 @@ def transcribe_daemon( # noqa: PLR0912
|
|
|
368
368
|
config_file: str | None = opts.CONFIG_FILE,
|
|
369
369
|
print_args: bool = opts.PRINT_ARGS,
|
|
370
370
|
) -> None:
|
|
371
|
-
"""
|
|
371
|
+
"""Continuous transcription daemon using Silero VAD for speech detection.
|
|
372
372
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
373
|
+
Unlike `transcribe` (single recording session), this daemon runs indefinitely
|
|
374
|
+
and automatically detects speech segments using Voice Activity Detection (VAD).
|
|
375
|
+
Each detected segment is transcribed and logged with timestamps.
|
|
376
376
|
|
|
377
|
-
|
|
378
|
-
# Basic daemon
|
|
379
|
-
agent-cli transcribe-daemon
|
|
377
|
+
**How it works:**
|
|
380
378
|
|
|
381
|
-
|
|
382
|
-
|
|
379
|
+
1. Listens continuously to microphone input
|
|
380
|
+
2. Silero VAD detects when you start/stop speaking
|
|
381
|
+
3. After `--silence-threshold` seconds of silence, the segment is finalized
|
|
382
|
+
4. Segment is transcribed (and optionally cleaned by LLM with `--llm`)
|
|
383
|
+
5. Results are appended to the JSONL log file
|
|
384
|
+
6. Audio is saved as MP3 if `--save-audio` is enabled (requires `ffmpeg`)
|
|
385
|
+
|
|
386
|
+
**Use cases:** Meeting transcription, note-taking, voice journaling, accessibility.
|
|
383
387
|
|
|
384
|
-
|
|
385
|
-
|
|
388
|
+
**Examples:**
|
|
389
|
+
|
|
390
|
+
agent-cli transcribe-daemon
|
|
391
|
+
agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
|
|
392
|
+
agent-cli transcribe-daemon --llm --clipboard --role notes
|
|
393
|
+
agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --no-save-audio
|
|
394
|
+
agent-cli transcribe-daemon --asr-provider openai --llm-provider gemini --llm
|
|
386
395
|
|
|
387
|
-
|
|
388
|
-
agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
|
|
396
|
+
**Tips:**
|
|
389
397
|
|
|
398
|
+
- Use `--role` to tag entries (e.g., `speaker1`, `meeting`, `personal`)
|
|
399
|
+
- Adjust `--vad-threshold` if detection is too sensitive (increase) or missing speech (decrease)
|
|
400
|
+
- Use `--stop` to cleanly terminate a running daemon
|
|
401
|
+
- With `--llm`, transcripts are cleaned up (punctuation, filler words removed)
|
|
390
402
|
"""
|
|
391
403
|
if print_args:
|
|
392
404
|
print_command_line_args(locals())
|
agent_cli/agents/voice_edit.py
CHANGED
|
@@ -229,15 +229,23 @@ def voice_edit(
|
|
|
229
229
|
config_file: str | None = opts.CONFIG_FILE,
|
|
230
230
|
print_args: bool = opts.PRINT_ARGS,
|
|
231
231
|
) -> None:
|
|
232
|
-
"""
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
232
|
+
"""Edit or query clipboard text using voice commands.
|
|
233
|
+
|
|
234
|
+
**Workflow:** Captures clipboard text → records your voice command → transcribes
|
|
235
|
+
it → sends both to an LLM → copies result back to clipboard.
|
|
236
|
+
|
|
237
|
+
Use this for hands-free text editing (e.g., "make this more formal") or
|
|
238
|
+
asking questions about clipboard content (e.g., "summarize this").
|
|
239
|
+
|
|
240
|
+
**Typical hotkey integration:** Run `voice-edit &` on keypress to start
|
|
241
|
+
recording, then send SIGINT (via `--stop`) on second keypress to process.
|
|
242
|
+
|
|
243
|
+
**Examples:**
|
|
244
|
+
|
|
245
|
+
- Basic usage: `agent-cli voice-edit`
|
|
246
|
+
- With TTS response: `agent-cli voice-edit --tts`
|
|
247
|
+
- Toggle on/off: `agent-cli voice-edit --toggle`
|
|
248
|
+
- List audio devices: `agent-cli voice-edit --list-devices`
|
|
241
249
|
"""
|
|
242
250
|
if print_args:
|
|
243
251
|
print_command_line_args(locals())
|
agent_cli/cli.py
CHANGED
|
@@ -14,9 +14,32 @@ from .config import load_config, normalize_provider_defaults
|
|
|
14
14
|
from .core.process import set_process_title
|
|
15
15
|
from .core.utils import console
|
|
16
16
|
|
|
17
|
+
_HELP = """\
|
|
18
|
+
AI-powered voice, text, and development tools.
|
|
19
|
+
|
|
20
|
+
**Voice & Text:**
|
|
21
|
+
|
|
22
|
+
- **Voice-to-text** - Transcribe speech with optional LLM cleanup
|
|
23
|
+
- **Text-to-speech** - Convert text to natural-sounding audio
|
|
24
|
+
- **Voice chat** - Conversational AI with memory and tool use
|
|
25
|
+
- **Text correction** - Fix grammar, spelling, and punctuation
|
|
26
|
+
|
|
27
|
+
**Development:**
|
|
28
|
+
|
|
29
|
+
- **Parallel development** - Git worktrees with integrated coding agents
|
|
30
|
+
- **Local servers** - ASR/TTS with Wyoming + OpenAI-compatible APIs,
|
|
31
|
+
MLX on macOS ARM, CUDA/CPU Whisper, and automatic model TTL
|
|
32
|
+
|
|
33
|
+
**Provider Flexibility:**
|
|
34
|
+
|
|
35
|
+
Mix local (Ollama, Wyoming) and cloud (OpenAI, Gemini) backends freely.
|
|
36
|
+
|
|
37
|
+
Run `agent-cli <command> --help` for detailed command documentation.
|
|
38
|
+
"""
|
|
39
|
+
|
|
17
40
|
app = typer.Typer(
|
|
18
41
|
name="agent-cli",
|
|
19
|
-
help=
|
|
42
|
+
help=_HELP,
|
|
20
43
|
context_settings={"help_option_names": ["-h", "--help"]},
|
|
21
44
|
add_completion=True,
|
|
22
45
|
rich_markup_mode="markdown",
|
|
@@ -56,7 +79,7 @@ def main(
|
|
|
56
79
|
),
|
|
57
80
|
] = False,
|
|
58
81
|
) -> None:
|
|
59
|
-
"""
|
|
82
|
+
"""AI-powered voice, text, and development tools."""
|
|
60
83
|
if ctx.invoked_subcommand is None:
|
|
61
84
|
console.print("[bold red]No command specified.[/bold red]")
|
|
62
85
|
console.print("[bold yellow]Running --help for your convenience.[/bold yellow]")
|
agent_cli/config_cmd.py
CHANGED
|
@@ -20,7 +20,17 @@ from agent_cli.core.utils import console
|
|
|
20
20
|
|
|
21
21
|
config_app = typer.Typer(
|
|
22
22
|
name="config",
|
|
23
|
-
help="Manage agent-cli configuration files.
|
|
23
|
+
help="""Manage agent-cli configuration files.
|
|
24
|
+
|
|
25
|
+
Config files are TOML format and searched in order:
|
|
26
|
+
|
|
27
|
+
1. `./agent-cli-config.toml` (project-local)
|
|
28
|
+
2. `~/.config/agent-cli/config.toml` (user default)
|
|
29
|
+
|
|
30
|
+
Settings in `[defaults]` apply to all commands. Override per-command
|
|
31
|
+
with sections like `[chat]` or `[transcribe]`. CLI arguments override
|
|
32
|
+
config file settings.
|
|
33
|
+
""",
|
|
24
34
|
add_completion=True,
|
|
25
35
|
rich_markup_mode="markdown",
|
|
26
36
|
no_args_is_help=True,
|
|
@@ -40,30 +50,30 @@ CONFIG_PATH_OPTION: Path | None = typer.Option(
|
|
|
40
50
|
None,
|
|
41
51
|
"--path",
|
|
42
52
|
"-p",
|
|
43
|
-
help="
|
|
53
|
+
help="Override auto-detection and use this config file path.",
|
|
44
54
|
)
|
|
45
55
|
CONFIG_PATH_INIT_OPTION: Path | None = typer.Option(
|
|
46
56
|
None,
|
|
47
57
|
"--path",
|
|
48
58
|
"-p",
|
|
49
|
-
help="
|
|
59
|
+
help="Where to create the config file (default: `~/.config/agent-cli/config.toml`).",
|
|
50
60
|
)
|
|
51
61
|
FORCE_OPTION: bool = typer.Option(
|
|
52
62
|
False, # noqa: FBT003
|
|
53
63
|
"--force",
|
|
54
64
|
"-f",
|
|
55
|
-
help="Overwrite existing config without confirmation.",
|
|
65
|
+
help="Overwrite existing config without prompting for confirmation.",
|
|
56
66
|
)
|
|
57
67
|
RAW_OPTION: bool = typer.Option(
|
|
58
68
|
False, # noqa: FBT003
|
|
59
69
|
"--raw",
|
|
60
70
|
"-r",
|
|
61
|
-
help="
|
|
71
|
+
help="Print plain file contents without syntax highlighting or line numbers.",
|
|
62
72
|
)
|
|
63
73
|
JSON_OPTION: bool = typer.Option(
|
|
64
74
|
False, # noqa: FBT003
|
|
65
75
|
"--json",
|
|
66
|
-
help="Output as JSON
|
|
76
|
+
help="Output as JSON with `path`, `exists`, and `content` fields.",
|
|
67
77
|
)
|
|
68
78
|
|
|
69
79
|
|
|
@@ -149,10 +159,13 @@ def config_init(
|
|
|
149
159
|
path: Path | None = CONFIG_PATH_INIT_OPTION,
|
|
150
160
|
force: bool = FORCE_OPTION,
|
|
151
161
|
) -> None:
|
|
152
|
-
"""Create a new config file with all options commented
|
|
162
|
+
"""Create a new config file with all options as commented-out examples.
|
|
153
163
|
|
|
154
|
-
|
|
155
|
-
|
|
164
|
+
Generates a TOML template with `[defaults]` for global settings and
|
|
165
|
+
command-specific sections like `[chat]`, `[transcribe]`, etc. Uncomment
|
|
166
|
+
and edit the options you want to customize.
|
|
167
|
+
|
|
168
|
+
Example: `agent-cli config init && agent-cli config edit`
|
|
156
169
|
"""
|
|
157
170
|
target_path = _get_config_file(path) or USER_CONFIG_PATH
|
|
158
171
|
|
|
@@ -182,7 +195,9 @@ def config_edit(
|
|
|
182
195
|
) -> None:
|
|
183
196
|
"""Open the config file in your default editor.
|
|
184
197
|
|
|
185
|
-
|
|
198
|
+
Editor preference: `$EDITOR` → `$VISUAL` → `nano`/`vim` → `vi` (or
|
|
199
|
+
`notepad` on Windows). If no config exists, run `agent-cli config init`
|
|
200
|
+
first.
|
|
186
201
|
"""
|
|
187
202
|
config_file = _get_config_file(path)
|
|
188
203
|
|
|
@@ -234,7 +249,11 @@ def config_show(
|
|
|
234
249
|
raw: bool = RAW_OPTION,
|
|
235
250
|
json_output: bool = JSON_OPTION,
|
|
236
251
|
) -> None:
|
|
237
|
-
"""Display the config file
|
|
252
|
+
"""Display the active config file path and contents.
|
|
253
|
+
|
|
254
|
+
By default, shows syntax-highlighted TOML with line numbers. Use `--raw`
|
|
255
|
+
for plain output (useful for piping), or `--json` for programmatic access.
|
|
256
|
+
"""
|
|
238
257
|
config_file = _get_config_file(path)
|
|
239
258
|
|
|
240
259
|
if config_file is None:
|
agent_cli/core/deps.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import functools
|
|
6
|
+
import importlib
|
|
6
7
|
import json
|
|
7
8
|
import os
|
|
8
9
|
from importlib.util import find_spec
|
|
@@ -12,7 +13,7 @@ from typing import TYPE_CHECKING, TypeVar
|
|
|
12
13
|
import typer
|
|
13
14
|
|
|
14
15
|
from agent_cli.config import load_config
|
|
15
|
-
from agent_cli.core.utils import
|
|
16
|
+
from agent_cli.core.utils import err_console, print_error_message
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from collections.abc import Callable
|
|
@@ -139,7 +140,7 @@ def _try_auto_install(missing: list[str]) -> bool:
|
|
|
139
140
|
else:
|
|
140
141
|
extras_to_install.append(extra)
|
|
141
142
|
|
|
142
|
-
|
|
143
|
+
err_console.print(
|
|
143
144
|
f"[yellow]Auto-installing missing extras: {', '.join(extras_to_install)}[/]",
|
|
144
145
|
)
|
|
145
146
|
return install_extras_programmatic(extras_to_install, quiet=True)
|
|
@@ -159,7 +160,9 @@ def _check_and_install_extras(extras: tuple[str, ...]) -> list[str]:
|
|
|
159
160
|
print_error_message("Auto-install failed.\n" + get_combined_install_hint(missing))
|
|
160
161
|
return missing
|
|
161
162
|
|
|
162
|
-
|
|
163
|
+
err_console.print("[green]Installation complete![/]")
|
|
164
|
+
# Invalidate import caches so find_spec() can see newly installed packages
|
|
165
|
+
importlib.invalidate_caches()
|
|
163
166
|
still_missing = [e for e in extras if not check_extra_installed(e)]
|
|
164
167
|
if still_missing:
|
|
165
168
|
print_error_message(
|
agent_cli/core/vad.py
CHANGED
|
@@ -3,38 +3,22 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
import urllib.request
|
|
7
6
|
from collections import deque
|
|
8
|
-
from pathlib import Path
|
|
9
7
|
|
|
10
8
|
from agent_cli import constants
|
|
11
9
|
|
|
12
10
|
try:
|
|
13
11
|
import numpy as np
|
|
14
|
-
import
|
|
12
|
+
from silero_vad_lite import SileroVAD
|
|
15
13
|
except ImportError as e:
|
|
16
14
|
msg = (
|
|
17
|
-
"silero-vad is required for the transcribe-daemon command. "
|
|
15
|
+
"silero-vad-lite is required for the transcribe-daemon command. "
|
|
18
16
|
"Install it with: `pip install agent-cli[vad]` or `uv sync --extra vad`."
|
|
19
17
|
)
|
|
20
18
|
raise ImportError(msg) from e
|
|
21
19
|
|
|
22
20
|
LOGGER = logging.getLogger(__name__)
|
|
23
21
|
|
|
24
|
-
_SILERO_VAD_ONNX_URL = (
|
|
25
|
-
"https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx"
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _get_model_path() -> Path:
|
|
30
|
-
"""Get the path to the Silero VAD ONNX model, downloading if needed."""
|
|
31
|
-
cache_dir = Path.home() / ".cache" / "silero-vad"
|
|
32
|
-
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
33
|
-
model_path = cache_dir / "silero_vad.onnx"
|
|
34
|
-
if not model_path.exists():
|
|
35
|
-
urllib.request.urlretrieve(_SILERO_VAD_ONNX_URL, model_path) # noqa: S310
|
|
36
|
-
return model_path
|
|
37
|
-
|
|
38
22
|
|
|
39
23
|
class VoiceActivityDetector:
|
|
40
24
|
"""Silero VAD-based voice activity detection for audio segmentation.
|
|
@@ -56,8 +40,6 @@ class VoiceActivityDetector:
|
|
|
56
40
|
msg = f"Sample rate must be 8000 or 16000, got {sample_rate}"
|
|
57
41
|
raise ValueError(msg)
|
|
58
42
|
|
|
59
|
-
from silero_vad.utils_vad import OnnxWrapper # noqa: PLC0415
|
|
60
|
-
|
|
61
43
|
self.sample_rate = sample_rate
|
|
62
44
|
self.threshold = threshold
|
|
63
45
|
self.silence_threshold_ms = silence_threshold_ms
|
|
@@ -74,7 +56,7 @@ class VoiceActivityDetector:
|
|
|
74
56
|
)
|
|
75
57
|
|
|
76
58
|
# Model and state
|
|
77
|
-
self._model =
|
|
59
|
+
self._model = SileroVAD(sample_rate=sample_rate)
|
|
78
60
|
self._pre_speech_buffer: deque[bytes] = deque(maxlen=pre_speech_windows)
|
|
79
61
|
self._pending = bytearray()
|
|
80
62
|
self._audio_buffer = bytearray()
|
|
@@ -92,7 +74,7 @@ class VoiceActivityDetector:
|
|
|
92
74
|
|
|
93
75
|
def reset(self) -> None:
|
|
94
76
|
"""Reset VAD state for a new recording session."""
|
|
95
|
-
self._model.
|
|
77
|
+
self._model = SileroVAD(sample_rate=self.sample_rate)
|
|
96
78
|
self._pre_speech_buffer.clear()
|
|
97
79
|
self._pending.clear()
|
|
98
80
|
self._audio_buffer.clear()
|
|
@@ -103,7 +85,7 @@ class VoiceActivityDetector:
|
|
|
103
85
|
def _is_speech(self, window: bytes) -> bool:
|
|
104
86
|
"""Check if audio window contains speech."""
|
|
105
87
|
audio = np.frombuffer(window, dtype=np.int16).astype(np.float32) / 32768.0
|
|
106
|
-
prob =
|
|
88
|
+
prob = self._model.process(audio)
|
|
107
89
|
LOGGER.debug("Speech prob: %.3f, threshold: %.2f", prob, self.threshold)
|
|
108
90
|
return prob >= self.threshold
|
|
109
91
|
|
|
@@ -154,7 +136,7 @@ class VoiceActivityDetector:
|
|
|
154
136
|
self._silence_samples = 0
|
|
155
137
|
self._speech_samples = 0
|
|
156
138
|
self._audio_buffer.clear()
|
|
157
|
-
self._model.
|
|
139
|
+
self._model = SileroVAD(sample_rate=self.sample_rate)
|
|
158
140
|
else:
|
|
159
141
|
# Not speaking - maintain rolling pre-speech buffer (auto-limited by deque maxlen)
|
|
160
142
|
self._pre_speech_buffer.append(window)
|