agent-cli 0.61.2__py3-none-any.whl → 0.70.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/_extras.json +13 -0
- agent_cli/_requirements/.gitkeep +0 -0
- agent_cli/_requirements/audio.txt +79 -0
- agent_cli/_requirements/faster-whisper.txt +215 -0
- agent_cli/_requirements/kokoro.txt +425 -0
- agent_cli/_requirements/llm.txt +183 -0
- agent_cli/_requirements/memory.txt +355 -0
- agent_cli/_requirements/mlx-whisper.txt +222 -0
- agent_cli/_requirements/piper.txt +176 -0
- agent_cli/_requirements/rag.txt +402 -0
- agent_cli/_requirements/server.txt +154 -0
- agent_cli/_requirements/speed.txt +77 -0
- agent_cli/_requirements/vad.txt +155 -0
- agent_cli/agents/assistant.py +3 -1
- agent_cli/agents/autocorrect.py +5 -2
- agent_cli/agents/chat.py +3 -1
- agent_cli/agents/memory/__init__.py +2 -1
- agent_cli/agents/memory/add.py +2 -0
- agent_cli/agents/memory/proxy.py +7 -12
- agent_cli/agents/rag_proxy.py +5 -10
- agent_cli/agents/speak.py +3 -1
- agent_cli/agents/transcribe.py +7 -2
- agent_cli/agents/transcribe_daemon.py +3 -1
- agent_cli/agents/voice_edit.py +3 -1
- agent_cli/cli.py +19 -3
- agent_cli/config_cmd.py +1 -0
- agent_cli/core/chroma.py +4 -4
- agent_cli/core/deps.py +177 -25
- agent_cli/core/openai_proxy.py +9 -4
- agent_cli/core/process.py +2 -2
- agent_cli/core/reranker.py +5 -4
- agent_cli/core/utils.py +5 -3
- agent_cli/core/vad.py +2 -1
- agent_cli/core/watch.py +8 -6
- agent_cli/dev/cli.py +31 -34
- agent_cli/dev/coding_agents/base.py +1 -2
- agent_cli/dev/skill/SKILL.md +141 -0
- agent_cli/dev/skill/examples.md +571 -0
- agent_cli/dev/worktree.py +53 -5
- agent_cli/docs_gen.py +12 -42
- agent_cli/install/__init__.py +1 -1
- agent_cli/install/extras.py +174 -0
- agent_cli/memory/__init__.py +1 -18
- agent_cli/memory/_files.py +4 -1
- agent_cli/memory/_indexer.py +3 -2
- agent_cli/memory/_ingest.py +6 -5
- agent_cli/memory/_retrieval.py +18 -8
- agent_cli/memory/_streaming.py +2 -2
- agent_cli/memory/api.py +1 -1
- agent_cli/memory/client.py +1 -1
- agent_cli/memory/engine.py +1 -1
- agent_cli/rag/__init__.py +0 -19
- agent_cli/rag/_indexer.py +3 -2
- agent_cli/rag/api.py +1 -0
- agent_cli/scripts/.runtime/.gitkeep +0 -0
- agent_cli/scripts/check_plugin_skill_sync.py +50 -0
- agent_cli/scripts/sync_extras.py +138 -0
- agent_cli/server/cli.py +26 -24
- agent_cli/server/common.py +3 -4
- agent_cli/server/tts/api.py +1 -1
- agent_cli/server/whisper/backends/faster_whisper.py +30 -23
- agent_cli/server/whisper/wyoming_handler.py +22 -27
- agent_cli/services/_wyoming_utils.py +4 -2
- agent_cli/services/asr.py +13 -3
- agent_cli/services/llm.py +2 -1
- agent_cli/services/tts.py +5 -2
- agent_cli/services/wake_word.py +6 -3
- {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/METADATA +168 -73
- {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/RECORD +72 -54
- {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/WHEEL +1 -2
- agent_cli-0.61.2.dist-info/top_level.txt +0 -1
- {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/entry_points.txt +0 -0
- {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Check that plugin skill files are in sync with source files."""
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
SYNC_PAIRS = [
|
|
8
|
+
# Plugin marketplace distribution
|
|
9
|
+
("agent_cli/dev/skill/SKILL.md", ".claude-plugin/skills/agent-cli-dev/SKILL.md"),
|
|
10
|
+
("agent_cli/dev/skill/examples.md", ".claude-plugin/skills/agent-cli-dev/examples.md"),
|
|
11
|
+
# Project-local skill (for Claude Code working on this repo)
|
|
12
|
+
("agent_cli/dev/skill/SKILL.md", ".claude/skills/agent-cli-dev/SKILL.md"),
|
|
13
|
+
("agent_cli/dev/skill/examples.md", ".claude/skills/agent-cli-dev/examples.md"),
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main() -> int:
|
|
18
|
+
"""Check that plugin skill files match source files."""
|
|
19
|
+
root = Path(__file__).parent.parent
|
|
20
|
+
out_of_sync = []
|
|
21
|
+
|
|
22
|
+
for source, target in SYNC_PAIRS:
|
|
23
|
+
source_path = root / source
|
|
24
|
+
target_path = root / target
|
|
25
|
+
|
|
26
|
+
if not source_path.exists():
|
|
27
|
+
print(f"Source not found: {source}")
|
|
28
|
+
continue
|
|
29
|
+
|
|
30
|
+
if not target_path.exists():
|
|
31
|
+
out_of_sync.append((source, target, "target missing"))
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
if source_path.read_text() != target_path.read_text():
|
|
35
|
+
out_of_sync.append((source, target, "content differs"))
|
|
36
|
+
|
|
37
|
+
if out_of_sync:
|
|
38
|
+
print("Plugin skill files are out of sync:")
|
|
39
|
+
for source, target, reason in out_of_sync:
|
|
40
|
+
print(f" {source} -> {target} ({reason})")
|
|
41
|
+
print("\nRun:")
|
|
42
|
+
print(" cp agent_cli/dev/skill/*.md .claude-plugin/skills/agent-cli-dev/")
|
|
43
|
+
print(" cp agent_cli/dev/skill/*.md .claude/skills/agent-cli-dev/")
|
|
44
|
+
return 1
|
|
45
|
+
|
|
46
|
+
return 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
sys.exit(main())
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Generate _extras.json from pyproject.toml.
|
|
3
|
+
|
|
4
|
+
This script parses the optional-dependencies in pyproject.toml and generates
|
|
5
|
+
the agent_cli/_extras.json file with package-to-import mappings.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python scripts/sync_extras.py
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
import sys
|
|
16
|
+
import tomllib
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
REPO_ROOT = Path(__file__).parent.parent
|
|
20
|
+
PYPROJECT = REPO_ROOT / "pyproject.toml"
|
|
21
|
+
EXTRAS_FILE = REPO_ROOT / "agent_cli" / "_extras.json"
|
|
22
|
+
|
|
23
|
+
# Extras to skip (dev/test dependencies, not runtime installable)
|
|
24
|
+
SKIP_EXTRAS = {"dev", "test"}
|
|
25
|
+
|
|
26
|
+
# Manual mapping of extra name -> (description, list of import names)
|
|
27
|
+
# Import names should be the Python module name (how you import it)
|
|
28
|
+
# Bundle extras (voice, cloud, full) have empty import lists since they just install other extras
|
|
29
|
+
EXTRA_METADATA: dict[str, tuple[str, list[str]]] = {
|
|
30
|
+
# Provider extras (base dependencies now optional)
|
|
31
|
+
"audio": ("Audio recording/playback", ["sounddevice"]),
|
|
32
|
+
"wyoming": ("Wyoming protocol support", ["wyoming"]),
|
|
33
|
+
"openai": ("OpenAI API provider", ["openai"]),
|
|
34
|
+
"gemini": ("Google Gemini provider", ["google.genai"]),
|
|
35
|
+
"llm": ("LLM framework (pydantic-ai)", ["pydantic_ai"]),
|
|
36
|
+
# Feature extras
|
|
37
|
+
"rag": ("RAG proxy (ChromaDB, embeddings)", ["chromadb"]),
|
|
38
|
+
"memory": ("Long-term memory proxy", ["chromadb", "yaml"]),
|
|
39
|
+
"vad": ("Voice Activity Detection (silero-vad)", ["silero_vad"]),
|
|
40
|
+
"whisper": ("Local Whisper ASR (faster-whisper)", ["faster_whisper"]),
|
|
41
|
+
"whisper-mlx": ("MLX Whisper for Apple Silicon", ["mlx_whisper"]),
|
|
42
|
+
"tts": ("Local Piper TTS", ["piper"]),
|
|
43
|
+
"tts-kokoro": ("Kokoro neural TTS", ["kokoro"]),
|
|
44
|
+
"server": ("FastAPI server components", ["fastapi"]),
|
|
45
|
+
"speed": ("Audio speed adjustment (audiostretchy)", ["audiostretchy"]),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_extras_from_pyproject() -> set[str]:
|
|
50
|
+
"""Parse optional-dependencies from pyproject.toml."""
|
|
51
|
+
with PYPROJECT.open("rb") as f:
|
|
52
|
+
data = tomllib.load(f)
|
|
53
|
+
all_extras = set(data.get("project", {}).get("optional-dependencies", {}).keys())
|
|
54
|
+
return all_extras - SKIP_EXTRAS
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def extract_package_name(dep: str) -> str:
|
|
58
|
+
"""Extract the package name from a dependency specification.
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
"chromadb>=0.4.22" -> "chromadb"
|
|
62
|
+
"pydantic-ai-slim[openai,duckduckgo]" -> "pydantic-ai-slim"
|
|
63
|
+
'mlx-whisper>=0.4.0; sys_platform == "darwin"' -> "mlx-whisper"
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
# Remove markers (;...) and extras ([...])
|
|
67
|
+
dep = re.split(r"[;\[]", dep)[0]
|
|
68
|
+
# Remove version specifiers
|
|
69
|
+
dep = re.split(r"[<>=!~]", dep)[0]
|
|
70
|
+
return dep.strip()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def package_to_import_name(package: str) -> str:
|
|
74
|
+
"""Convert a package name to its Python import name.
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
"google-genai" -> "google.genai"
|
|
78
|
+
"pydantic-ai-slim" -> "pydantic_ai"
|
|
79
|
+
"silero-vad" -> "silero_vad"
|
|
80
|
+
"faster-whisper" -> "faster_whisper"
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
# Special cases where the import name differs significantly
|
|
84
|
+
special_cases = {
|
|
85
|
+
"google-genai": "google.genai",
|
|
86
|
+
"pydantic-ai-slim": "pydantic_ai",
|
|
87
|
+
"silero-vad": "silero_vad",
|
|
88
|
+
"faster-whisper": "faster_whisper",
|
|
89
|
+
"mlx-whisper": "mlx_whisper",
|
|
90
|
+
"piper-tts": "piper",
|
|
91
|
+
"huggingface-hub": "huggingface_hub",
|
|
92
|
+
"fastapi": "fastapi",
|
|
93
|
+
"audiostretchy": "audiostretchy",
|
|
94
|
+
}
|
|
95
|
+
if package in special_cases:
|
|
96
|
+
return special_cases[package]
|
|
97
|
+
# Default: replace hyphens with underscores
|
|
98
|
+
return package.replace("-", "_")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def generate_extras_json(extras: set[str]) -> dict[str, list]:
|
|
102
|
+
"""Generate the content for _extras.json."""
|
|
103
|
+
result = {}
|
|
104
|
+
for extra in sorted(extras):
|
|
105
|
+
if extra in EXTRA_METADATA:
|
|
106
|
+
desc, imports = EXTRA_METADATA[extra]
|
|
107
|
+
result[extra] = [desc, imports]
|
|
108
|
+
else:
|
|
109
|
+
# Unknown extra - add a placeholder
|
|
110
|
+
result[extra] = ["TODO: add description", []]
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def check_missing_metadata(extras: set[str]) -> list[str]:
|
|
115
|
+
"""Check for extras that don't have metadata defined."""
|
|
116
|
+
return [e for e in extras if e not in EXTRA_METADATA]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def main() -> int:
|
|
120
|
+
"""Generate _extras.json from pyproject.toml."""
|
|
121
|
+
extras = get_extras_from_pyproject()
|
|
122
|
+
|
|
123
|
+
# Check for missing metadata
|
|
124
|
+
missing = check_missing_metadata(extras)
|
|
125
|
+
if missing:
|
|
126
|
+
print(f"Warning: The following extras need metadata in EXTRA_METADATA: {missing}")
|
|
127
|
+
print("Please update EXTRA_METADATA in scripts/sync_extras.py")
|
|
128
|
+
|
|
129
|
+
# Generate the file
|
|
130
|
+
content = generate_extras_json(extras)
|
|
131
|
+
EXTRAS_FILE.write_text(json.dumps(content, indent=2) + "\n")
|
|
132
|
+
print(f"Generated {EXTRAS_FILE}")
|
|
133
|
+
|
|
134
|
+
return 0
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
sys.exit(main())
|
agent_cli/server/cli.py
CHANGED
|
@@ -9,23 +9,22 @@ from pathlib import Path # noqa: TC003 - Typer needs this at runtime
|
|
|
9
9
|
from typing import Annotated
|
|
10
10
|
|
|
11
11
|
import typer
|
|
12
|
-
from rich.console import Console
|
|
13
12
|
|
|
14
13
|
from agent_cli.cli import app as main_app
|
|
14
|
+
from agent_cli.core.deps import requires_extras
|
|
15
15
|
from agent_cli.core.process import set_process_title
|
|
16
|
+
from agent_cli.core.utils import console, err_console
|
|
16
17
|
from agent_cli.server.common import setup_rich_logging
|
|
17
18
|
|
|
18
|
-
console = Console()
|
|
19
|
-
err_console = Console(stderr=True)
|
|
20
19
|
logger = logging.getLogger(__name__)
|
|
21
20
|
|
|
22
|
-
# Check for optional dependencies
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
21
|
+
# Check for optional dependencies at call time (not module load time)
|
|
22
|
+
# This is important because auto-install may install packages after the module is loaded
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _has(package: str) -> bool:
|
|
26
|
+
return find_spec(package) is not None
|
|
27
|
+
|
|
29
28
|
|
|
30
29
|
app = typer.Typer(
|
|
31
30
|
name="server",
|
|
@@ -34,7 +33,7 @@ app = typer.Typer(
|
|
|
34
33
|
rich_markup_mode="markdown",
|
|
35
34
|
no_args_is_help=True,
|
|
36
35
|
)
|
|
37
|
-
main_app.add_typer(app, name="server")
|
|
36
|
+
main_app.add_typer(app, name="server", rich_help_panel="Servers")
|
|
38
37
|
|
|
39
38
|
|
|
40
39
|
@app.callback()
|
|
@@ -47,7 +46,7 @@ def server_callback(ctx: typer.Context) -> None:
|
|
|
47
46
|
|
|
48
47
|
def _check_server_deps() -> None:
|
|
49
48
|
"""Check that server dependencies are available."""
|
|
50
|
-
if not
|
|
49
|
+
if not _has("uvicorn") or not _has("fastapi"):
|
|
51
50
|
err_console.print(
|
|
52
51
|
"[bold red]Error:[/bold red] Server dependencies not installed. "
|
|
53
52
|
"Run: [cyan]pip install agent-cli\\[server][/cyan] "
|
|
@@ -61,7 +60,7 @@ def _check_tts_deps(backend: str = "auto") -> None:
|
|
|
61
60
|
_check_server_deps()
|
|
62
61
|
|
|
63
62
|
if backend == "kokoro":
|
|
64
|
-
if not
|
|
63
|
+
if not _has("kokoro"):
|
|
65
64
|
err_console.print(
|
|
66
65
|
"[bold red]Error:[/bold red] Kokoro backend requires kokoro. "
|
|
67
66
|
"Run: [cyan]pip install agent-cli\\[tts-kokoro][/cyan] "
|
|
@@ -71,7 +70,7 @@ def _check_tts_deps(backend: str = "auto") -> None:
|
|
|
71
70
|
return
|
|
72
71
|
|
|
73
72
|
if backend == "piper":
|
|
74
|
-
if not
|
|
73
|
+
if not _has("piper"):
|
|
75
74
|
err_console.print(
|
|
76
75
|
"[bold red]Error:[/bold red] Piper backend requires piper-tts. "
|
|
77
76
|
"Run: [cyan]pip install agent-cli\\[tts][/cyan] "
|
|
@@ -81,7 +80,7 @@ def _check_tts_deps(backend: str = "auto") -> None:
|
|
|
81
80
|
return
|
|
82
81
|
|
|
83
82
|
# For auto, check if either is available
|
|
84
|
-
if not
|
|
83
|
+
if not _has("piper") and not _has("kokoro"):
|
|
85
84
|
err_console.print(
|
|
86
85
|
"[bold red]Error:[/bold red] No TTS backend available. "
|
|
87
86
|
"Run: [cyan]pip install agent-cli\\[tts][/cyan] for Piper "
|
|
@@ -135,7 +134,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
|
|
|
135
134
|
"""Check that Whisper dependencies are available."""
|
|
136
135
|
_check_server_deps()
|
|
137
136
|
if download_only:
|
|
138
|
-
if not
|
|
137
|
+
if not _has("faster_whisper"):
|
|
139
138
|
err_console.print(
|
|
140
139
|
"[bold red]Error:[/bold red] faster-whisper is required for --download-only. "
|
|
141
140
|
"Run: [cyan]pip install agent-cli\\[whisper][/cyan] "
|
|
@@ -145,7 +144,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
|
|
|
145
144
|
return
|
|
146
145
|
|
|
147
146
|
if backend == "mlx":
|
|
148
|
-
if not
|
|
147
|
+
if not _has("mlx_whisper"):
|
|
149
148
|
err_console.print(
|
|
150
149
|
"[bold red]Error:[/bold red] MLX Whisper backend requires mlx-whisper. "
|
|
151
150
|
"Run: [cyan]pip install mlx-whisper[/cyan]",
|
|
@@ -153,7 +152,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
|
|
|
153
152
|
raise typer.Exit(1)
|
|
154
153
|
return
|
|
155
154
|
|
|
156
|
-
if not
|
|
155
|
+
if not _has("faster_whisper"):
|
|
157
156
|
err_console.print(
|
|
158
157
|
"[bold red]Error:[/bold red] Whisper dependencies not installed. "
|
|
159
158
|
"Run: [cyan]pip install agent-cli\\[whisper][/cyan] "
|
|
@@ -163,6 +162,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
|
|
|
163
162
|
|
|
164
163
|
|
|
165
164
|
@app.command("whisper")
|
|
165
|
+
@requires_extras("server", "faster-whisper|mlx-whisper")
|
|
166
166
|
def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
167
167
|
model: Annotated[
|
|
168
168
|
list[str] | None,
|
|
@@ -293,7 +293,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
293
293
|
|
|
294
294
|
"""
|
|
295
295
|
# Setup Rich logging for consistent output
|
|
296
|
-
setup_rich_logging(log_level
|
|
296
|
+
setup_rich_logging(log_level)
|
|
297
297
|
|
|
298
298
|
valid_backends = ("auto", "faster-whisper", "mlx")
|
|
299
299
|
if backend not in valid_backends:
|
|
@@ -421,8 +421,9 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
|
|
|
421
421
|
)
|
|
422
422
|
|
|
423
423
|
|
|
424
|
-
@app.command("
|
|
425
|
-
|
|
424
|
+
@app.command("transcribe-proxy")
|
|
425
|
+
@requires_extras("server", "audio", "llm")
|
|
426
|
+
def transcribe_proxy_cmd(
|
|
426
427
|
host: Annotated[
|
|
427
428
|
str,
|
|
428
429
|
typer.Option("--host", help="Host to bind the server to"),
|
|
@@ -449,10 +450,10 @@ def transcription_proxy_cmd(
|
|
|
449
450
|
|
|
450
451
|
Examples:
|
|
451
452
|
# Run on default port
|
|
452
|
-
agent-cli server
|
|
453
|
+
agent-cli server transcribe-proxy
|
|
453
454
|
|
|
454
455
|
# Run on custom port
|
|
455
|
-
agent-cli server
|
|
456
|
+
agent-cli server transcribe-proxy --port 8080
|
|
456
457
|
|
|
457
458
|
"""
|
|
458
459
|
_check_server_deps()
|
|
@@ -475,6 +476,7 @@ def transcription_proxy_cmd(
|
|
|
475
476
|
|
|
476
477
|
|
|
477
478
|
@app.command("tts")
|
|
479
|
+
@requires_extras("server", "piper|kokoro")
|
|
478
480
|
def tts_cmd( # noqa: PLR0915
|
|
479
481
|
model: Annotated[
|
|
480
482
|
list[str] | None,
|
|
@@ -610,7 +612,7 @@ def tts_cmd( # noqa: PLR0915
|
|
|
610
612
|
|
|
611
613
|
"""
|
|
612
614
|
# Setup Rich logging for consistent output
|
|
613
|
-
setup_rich_logging(log_level
|
|
615
|
+
setup_rich_logging(log_level)
|
|
614
616
|
|
|
615
617
|
valid_backends = ("auto", "piper", "kokoro")
|
|
616
618
|
if backend not in valid_backends:
|
agent_cli/server/common.py
CHANGED
|
@@ -9,10 +9,10 @@ import logging
|
|
|
9
9
|
from contextlib import asynccontextmanager
|
|
10
10
|
from typing import TYPE_CHECKING, Any, Protocol
|
|
11
11
|
|
|
12
|
-
from rich.console import Console
|
|
13
12
|
from rich.logging import RichHandler
|
|
14
13
|
|
|
15
14
|
from agent_cli import constants
|
|
15
|
+
from agent_cli.core.utils import console
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
import wave
|
|
@@ -128,7 +128,7 @@ def configure_app(app: FastAPI) -> None:
|
|
|
128
128
|
return await log_requests_middleware(request, call_next)
|
|
129
129
|
|
|
130
130
|
|
|
131
|
-
def setup_rich_logging(log_level: str = "info"
|
|
131
|
+
def setup_rich_logging(log_level: str = "info") -> None:
|
|
132
132
|
"""Configure logging to use Rich for consistent, pretty output.
|
|
133
133
|
|
|
134
134
|
This configures:
|
|
@@ -141,11 +141,10 @@ def setup_rich_logging(log_level: str = "info", *, console: Console | None = Non
|
|
|
141
141
|
|
|
142
142
|
"""
|
|
143
143
|
level = getattr(logging, log_level.upper(), logging.INFO)
|
|
144
|
-
rich_console = console or Console()
|
|
145
144
|
|
|
146
145
|
# Create Rich handler with clean format
|
|
147
146
|
handler = RichHandler(
|
|
148
|
-
console=
|
|
147
|
+
console=console,
|
|
149
148
|
show_time=True,
|
|
150
149
|
show_level=True,
|
|
151
150
|
show_path=False, # Don't show file:line - too verbose
|
agent_cli/server/tts/api.py
CHANGED
|
@@ -109,7 +109,7 @@ class SpeechRequest(BaseModel):
|
|
|
109
109
|
input: str
|
|
110
110
|
model: str = "tts-1"
|
|
111
111
|
voice: str = "alloy"
|
|
112
|
-
response_format: Literal["
|
|
112
|
+
response_format: Literal["mp3", "wav", "pcm"] = "mp3"
|
|
113
113
|
speed: float = 1.0
|
|
114
114
|
stream_format: Literal["audio"] | None = None
|
|
115
115
|
|
|
@@ -6,6 +6,7 @@ import asyncio
|
|
|
6
6
|
import logging
|
|
7
7
|
import tempfile
|
|
8
8
|
from concurrent.futures import ProcessPoolExecutor
|
|
9
|
+
from dataclasses import dataclass
|
|
9
10
|
from multiprocessing import get_context
|
|
10
11
|
from pathlib import Path
|
|
11
12
|
from typing import Any, Literal
|
|
@@ -19,6 +20,24 @@ from agent_cli.server.whisper.backends.base import (
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
21
22
|
|
|
23
|
+
# --- Subprocess state (only used within subprocess worker) ---
|
|
24
|
+
# This state persists across function calls within the subprocess because:
|
|
25
|
+
# 1. Model loading is expensive and must be reused across transcription calls
|
|
26
|
+
# 2. CTranslate2 models cannot be pickled/passed through IPC queues
|
|
27
|
+
# 3. The subprocess is long-lived (ProcessPoolExecutor reuses workers)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class _SubprocessState:
|
|
32
|
+
"""Container for subprocess-local state. Not shared with main process."""
|
|
33
|
+
|
|
34
|
+
model: Any = None
|
|
35
|
+
device: str | None = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
_state = _SubprocessState()
|
|
39
|
+
|
|
40
|
+
|
|
22
41
|
# --- Subprocess worker functions (run in isolated process) ---
|
|
23
42
|
|
|
24
43
|
|
|
@@ -40,28 +59,22 @@ def _load_model_in_subprocess(
|
|
|
40
59
|
cpu_threads=cpu_threads,
|
|
41
60
|
download_root=download_root,
|
|
42
61
|
)
|
|
43
|
-
|
|
62
|
+
|
|
63
|
+
# Store in subprocess state for reuse across transcription calls
|
|
64
|
+
_state.model = model
|
|
65
|
+
_state.device = str(model.model.device)
|
|
66
|
+
|
|
67
|
+
return _state.device
|
|
44
68
|
|
|
45
69
|
|
|
46
70
|
def _transcribe_in_subprocess(
|
|
47
|
-
model_name: str,
|
|
48
|
-
device: str,
|
|
49
|
-
compute_type: str,
|
|
50
|
-
cpu_threads: int,
|
|
51
|
-
download_root: str | None,
|
|
52
71
|
audio_bytes: bytes,
|
|
53
72
|
kwargs: dict[str, Any],
|
|
54
73
|
) -> dict[str, Any]:
|
|
55
|
-
"""Run transcription in subprocess.
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
model_name,
|
|
60
|
-
device=device,
|
|
61
|
-
compute_type=compute_type,
|
|
62
|
-
cpu_threads=cpu_threads,
|
|
63
|
-
download_root=download_root,
|
|
64
|
-
)
|
|
74
|
+
"""Run transcription in subprocess. Reuses model from _state."""
|
|
75
|
+
if _state.model is None:
|
|
76
|
+
msg = "Model not loaded in subprocess. Call _load_model_in_subprocess first."
|
|
77
|
+
raise RuntimeError(msg)
|
|
65
78
|
|
|
66
79
|
# Write audio to temp file - faster-whisper needs a file path
|
|
67
80
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
@@ -69,7 +82,7 @@ def _transcribe_in_subprocess(
|
|
|
69
82
|
tmp_path = tmp.name
|
|
70
83
|
|
|
71
84
|
try:
|
|
72
|
-
segments, info = model.transcribe(tmp_path, **kwargs)
|
|
85
|
+
segments, info = _state.model.transcribe(tmp_path, **kwargs)
|
|
73
86
|
segment_list = list(segments) # Consume lazy generator
|
|
74
87
|
finally:
|
|
75
88
|
Path(tmp_path).unlink(missing_ok=True)
|
|
@@ -195,16 +208,10 @@ class FasterWhisperBackend:
|
|
|
195
208
|
"word_timestamps": word_timestamps,
|
|
196
209
|
}
|
|
197
210
|
|
|
198
|
-
download_root = str(self._config.cache_dir) if self._config.cache_dir else None
|
|
199
211
|
loop = asyncio.get_running_loop()
|
|
200
212
|
result = await loop.run_in_executor(
|
|
201
213
|
self._executor,
|
|
202
214
|
_transcribe_in_subprocess,
|
|
203
|
-
self._config.model_name,
|
|
204
|
-
self._config.device,
|
|
205
|
-
self._config.compute_type,
|
|
206
|
-
self._config.cpu_threads,
|
|
207
|
-
download_root,
|
|
208
215
|
audio,
|
|
209
216
|
kwargs,
|
|
210
217
|
)
|
|
@@ -2,19 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import io
|
|
6
5
|
import logging
|
|
7
|
-
import wave
|
|
8
6
|
from functools import partial
|
|
9
7
|
from typing import TYPE_CHECKING
|
|
10
8
|
|
|
11
9
|
from wyoming.asr import Transcribe, Transcript
|
|
12
|
-
from wyoming.audio import AudioChunk, AudioStop
|
|
10
|
+
from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop
|
|
13
11
|
from wyoming.info import AsrModel, AsrProgram, Attribution, Describe, Info
|
|
14
12
|
from wyoming.server import AsyncEventHandler, AsyncServer
|
|
15
13
|
|
|
16
|
-
from agent_cli
|
|
14
|
+
from agent_cli import constants
|
|
17
15
|
from agent_cli.server.whisper.languages import WHISPER_LANGUAGE_CODES
|
|
16
|
+
from agent_cli.services import pcm_to_wav
|
|
18
17
|
|
|
19
18
|
if TYPE_CHECKING:
|
|
20
19
|
from wyoming.event import Event
|
|
@@ -49,8 +48,12 @@ class WyomingWhisperHandler(AsyncEventHandler):
|
|
|
49
48
|
"""
|
|
50
49
|
super().__init__(*args, **kwargs)
|
|
51
50
|
self._registry = registry
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
51
|
+
self._audio_bytes: bytes = b""
|
|
52
|
+
self._audio_converter = AudioChunkConverter(
|
|
53
|
+
rate=constants.AUDIO_RATE,
|
|
54
|
+
width=constants.AUDIO_FORMAT_WIDTH,
|
|
55
|
+
channels=constants.AUDIO_CHANNELS,
|
|
56
|
+
)
|
|
54
57
|
self._language: str | None = None
|
|
55
58
|
self._initial_prompt: str | None = None
|
|
56
59
|
|
|
@@ -80,39 +83,31 @@ class WyomingWhisperHandler(AsyncEventHandler):
|
|
|
80
83
|
|
|
81
84
|
async def _handle_audio_chunk(self, event: Event) -> bool:
|
|
82
85
|
"""Handle an audio chunk event."""
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if self._wav_file is None:
|
|
86
|
+
if not self._audio_bytes:
|
|
86
87
|
logger.debug("AudioChunk begin")
|
|
87
|
-
self._audio_buffer = io.BytesIO()
|
|
88
|
-
self._wav_file = wave.open(self._audio_buffer, "wb") # noqa: SIM115
|
|
89
|
-
setup_wav_file(
|
|
90
|
-
self._wav_file,
|
|
91
|
-
rate=chunk.rate,
|
|
92
|
-
channels=chunk.channels,
|
|
93
|
-
sample_width=chunk.width,
|
|
94
|
-
)
|
|
95
88
|
|
|
96
|
-
|
|
89
|
+
chunk = AudioChunk.from_event(event)
|
|
90
|
+
chunk = self._audio_converter.convert(chunk)
|
|
91
|
+
self._audio_bytes += chunk.audio
|
|
97
92
|
return True
|
|
98
93
|
|
|
99
94
|
async def _handle_audio_stop(self) -> bool:
|
|
100
95
|
"""Handle audio stop event - transcribe the collected audio."""
|
|
101
96
|
logger.debug("AudioStop")
|
|
102
97
|
|
|
103
|
-
if
|
|
98
|
+
if not self._audio_bytes:
|
|
104
99
|
logger.warning("AudioStop received but no audio data")
|
|
105
100
|
await self.write_event(Transcript(text="").event())
|
|
106
101
|
return False
|
|
107
102
|
|
|
108
|
-
#
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
self.
|
|
103
|
+
# Wrap PCM in WAV format for the backend
|
|
104
|
+
audio_data = pcm_to_wav(
|
|
105
|
+
self._audio_bytes,
|
|
106
|
+
sample_rate=constants.AUDIO_RATE,
|
|
107
|
+
sample_width=constants.AUDIO_FORMAT_WIDTH,
|
|
108
|
+
channels=constants.AUDIO_CHANNELS,
|
|
109
|
+
)
|
|
110
|
+
self._audio_bytes = b""
|
|
116
111
|
|
|
117
112
|
# Transcribe
|
|
118
113
|
try:
|
|
@@ -5,14 +5,14 @@ from __future__ import annotations
|
|
|
5
5
|
from contextlib import asynccontextmanager
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
|
-
from wyoming.client import AsyncClient
|
|
9
|
-
|
|
10
8
|
from agent_cli.core.utils import print_error_message
|
|
11
9
|
|
|
12
10
|
if TYPE_CHECKING:
|
|
13
11
|
import logging
|
|
14
12
|
from collections.abc import AsyncGenerator
|
|
15
13
|
|
|
14
|
+
from wyoming.client import AsyncClient
|
|
15
|
+
|
|
16
16
|
|
|
17
17
|
@asynccontextmanager
|
|
18
18
|
async def wyoming_client_context(
|
|
@@ -40,6 +40,8 @@ async def wyoming_client_context(
|
|
|
40
40
|
Exception: For other connection errors
|
|
41
41
|
|
|
42
42
|
"""
|
|
43
|
+
from wyoming.client import AsyncClient # noqa: PLC0415
|
|
44
|
+
|
|
43
45
|
uri = f"tcp://{server_ip}:{server_port}"
|
|
44
46
|
logger.info("Connecting to Wyoming %s server at %s", server_type, uri)
|
|
45
47
|
|
agent_cli/services/asr.py
CHANGED
|
@@ -10,9 +10,6 @@ from functools import partial
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import TYPE_CHECKING
|
|
12
12
|
|
|
13
|
-
from wyoming.asr import Transcribe, Transcript, TranscriptChunk, TranscriptStart, TranscriptStop
|
|
14
|
-
from wyoming.audio import AudioChunk, AudioStart, AudioStop
|
|
15
|
-
|
|
16
13
|
from agent_cli import constants
|
|
17
14
|
from agent_cli.core.audio import (
|
|
18
15
|
open_audio_stream,
|
|
@@ -225,6 +222,9 @@ async def _send_audio(
|
|
|
225
222
|
initial_prompt: str | None = None,
|
|
226
223
|
) -> None:
|
|
227
224
|
"""Read from mic and send to Wyoming server."""
|
|
225
|
+
from wyoming.asr import Transcribe # noqa: PLC0415
|
|
226
|
+
from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
|
|
227
|
+
|
|
228
228
|
# Build context with initial_prompt if provided
|
|
229
229
|
context = {"initial_prompt": initial_prompt} if initial_prompt else None
|
|
230
230
|
await client.write_event(Transcribe(context=context).event())
|
|
@@ -282,6 +282,13 @@ async def _receive_transcript(
|
|
|
282
282
|
final_callback: Callable[[str], None] | None = None,
|
|
283
283
|
) -> str:
|
|
284
284
|
"""Receive transcription events and return the final transcript."""
|
|
285
|
+
from wyoming.asr import ( # noqa: PLC0415
|
|
286
|
+
Transcript,
|
|
287
|
+
TranscriptChunk,
|
|
288
|
+
TranscriptStart,
|
|
289
|
+
TranscriptStop,
|
|
290
|
+
)
|
|
291
|
+
|
|
285
292
|
transcript_text = ""
|
|
286
293
|
while True:
|
|
287
294
|
event = await client.read_event()
|
|
@@ -370,6 +377,9 @@ async def _transcribe_recorded_audio_wyoming(
|
|
|
370
377
|
**_kwargs: object,
|
|
371
378
|
) -> str:
|
|
372
379
|
"""Process pre-recorded audio data with Wyoming ASR server."""
|
|
380
|
+
from wyoming.asr import Transcribe # noqa: PLC0415
|
|
381
|
+
from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
|
|
382
|
+
|
|
373
383
|
try:
|
|
374
384
|
async with wyoming_client_context(
|
|
375
385
|
wyoming_asr_cfg.asr_wyoming_ip,
|
agent_cli/services/llm.py
CHANGED
|
@@ -6,7 +6,6 @@ import sys
|
|
|
6
6
|
import time
|
|
7
7
|
from typing import TYPE_CHECKING
|
|
8
8
|
|
|
9
|
-
import pyperclip
|
|
10
9
|
from rich.live import Live
|
|
11
10
|
|
|
12
11
|
from agent_cli.core.utils import console, live_timer, print_error_message, print_output_panel
|
|
@@ -156,6 +155,8 @@ async def get_llm_response(
|
|
|
156
155
|
result_text = result.output
|
|
157
156
|
|
|
158
157
|
if clipboard:
|
|
158
|
+
import pyperclip # noqa: PLC0415
|
|
159
|
+
|
|
159
160
|
pyperclip.copy(result_text)
|
|
160
161
|
logger.info("Copied result to clipboard.")
|
|
161
162
|
|