agent-cli 0.70.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/__init__.py +5 -0
- agent_cli/__main__.py +6 -0
- agent_cli/_extras.json +14 -0
- agent_cli/_requirements/.gitkeep +0 -0
- agent_cli/_requirements/audio.txt +79 -0
- agent_cli/_requirements/faster-whisper.txt +215 -0
- agent_cli/_requirements/kokoro.txt +425 -0
- agent_cli/_requirements/llm.txt +183 -0
- agent_cli/_requirements/memory.txt +355 -0
- agent_cli/_requirements/mlx-whisper.txt +222 -0
- agent_cli/_requirements/piper.txt +176 -0
- agent_cli/_requirements/rag.txt +402 -0
- agent_cli/_requirements/server.txt +154 -0
- agent_cli/_requirements/speed.txt +77 -0
- agent_cli/_requirements/vad.txt +155 -0
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/_tools.py +368 -0
- agent_cli/agents/__init__.py +23 -0
- agent_cli/agents/_voice_agent_common.py +136 -0
- agent_cli/agents/assistant.py +383 -0
- agent_cli/agents/autocorrect.py +284 -0
- agent_cli/agents/chat.py +496 -0
- agent_cli/agents/memory/__init__.py +31 -0
- agent_cli/agents/memory/add.py +190 -0
- agent_cli/agents/memory/proxy.py +160 -0
- agent_cli/agents/rag_proxy.py +128 -0
- agent_cli/agents/speak.py +209 -0
- agent_cli/agents/transcribe.py +671 -0
- agent_cli/agents/transcribe_daemon.py +499 -0
- agent_cli/agents/voice_edit.py +291 -0
- agent_cli/api.py +22 -0
- agent_cli/cli.py +106 -0
- agent_cli/config.py +503 -0
- agent_cli/config_cmd.py +307 -0
- agent_cli/constants.py +27 -0
- agent_cli/core/__init__.py +1 -0
- agent_cli/core/audio.py +461 -0
- agent_cli/core/audio_format.py +299 -0
- agent_cli/core/chroma.py +88 -0
- agent_cli/core/deps.py +191 -0
- agent_cli/core/openai_proxy.py +139 -0
- agent_cli/core/process.py +195 -0
- agent_cli/core/reranker.py +120 -0
- agent_cli/core/sse.py +87 -0
- agent_cli/core/transcription_logger.py +70 -0
- agent_cli/core/utils.py +526 -0
- agent_cli/core/vad.py +175 -0
- agent_cli/core/watch.py +65 -0
- agent_cli/dev/__init__.py +14 -0
- agent_cli/dev/cli.py +1588 -0
- agent_cli/dev/coding_agents/__init__.py +19 -0
- agent_cli/dev/coding_agents/aider.py +24 -0
- agent_cli/dev/coding_agents/base.py +167 -0
- agent_cli/dev/coding_agents/claude.py +39 -0
- agent_cli/dev/coding_agents/codex.py +24 -0
- agent_cli/dev/coding_agents/continue_dev.py +15 -0
- agent_cli/dev/coding_agents/copilot.py +24 -0
- agent_cli/dev/coding_agents/cursor_agent.py +48 -0
- agent_cli/dev/coding_agents/gemini.py +28 -0
- agent_cli/dev/coding_agents/opencode.py +15 -0
- agent_cli/dev/coding_agents/registry.py +49 -0
- agent_cli/dev/editors/__init__.py +19 -0
- agent_cli/dev/editors/base.py +89 -0
- agent_cli/dev/editors/cursor.py +15 -0
- agent_cli/dev/editors/emacs.py +46 -0
- agent_cli/dev/editors/jetbrains.py +56 -0
- agent_cli/dev/editors/nano.py +31 -0
- agent_cli/dev/editors/neovim.py +33 -0
- agent_cli/dev/editors/registry.py +59 -0
- agent_cli/dev/editors/sublime.py +20 -0
- agent_cli/dev/editors/vim.py +42 -0
- agent_cli/dev/editors/vscode.py +15 -0
- agent_cli/dev/editors/zed.py +20 -0
- agent_cli/dev/project.py +568 -0
- agent_cli/dev/registry.py +52 -0
- agent_cli/dev/skill/SKILL.md +141 -0
- agent_cli/dev/skill/examples.md +571 -0
- agent_cli/dev/terminals/__init__.py +19 -0
- agent_cli/dev/terminals/apple_terminal.py +82 -0
- agent_cli/dev/terminals/base.py +56 -0
- agent_cli/dev/terminals/gnome.py +51 -0
- agent_cli/dev/terminals/iterm2.py +84 -0
- agent_cli/dev/terminals/kitty.py +77 -0
- agent_cli/dev/terminals/registry.py +48 -0
- agent_cli/dev/terminals/tmux.py +58 -0
- agent_cli/dev/terminals/warp.py +132 -0
- agent_cli/dev/terminals/zellij.py +78 -0
- agent_cli/dev/worktree.py +856 -0
- agent_cli/docs_gen.py +417 -0
- agent_cli/example-config.toml +185 -0
- agent_cli/install/__init__.py +5 -0
- agent_cli/install/common.py +89 -0
- agent_cli/install/extras.py +174 -0
- agent_cli/install/hotkeys.py +48 -0
- agent_cli/install/services.py +87 -0
- agent_cli/memory/__init__.py +7 -0
- agent_cli/memory/_files.py +250 -0
- agent_cli/memory/_filters.py +63 -0
- agent_cli/memory/_git.py +157 -0
- agent_cli/memory/_indexer.py +142 -0
- agent_cli/memory/_ingest.py +408 -0
- agent_cli/memory/_persistence.py +182 -0
- agent_cli/memory/_prompt.py +91 -0
- agent_cli/memory/_retrieval.py +294 -0
- agent_cli/memory/_store.py +169 -0
- agent_cli/memory/_streaming.py +44 -0
- agent_cli/memory/_tasks.py +48 -0
- agent_cli/memory/api.py +113 -0
- agent_cli/memory/client.py +272 -0
- agent_cli/memory/engine.py +361 -0
- agent_cli/memory/entities.py +43 -0
- agent_cli/memory/models.py +112 -0
- agent_cli/opts.py +433 -0
- agent_cli/py.typed +0 -0
- agent_cli/rag/__init__.py +3 -0
- agent_cli/rag/_indexer.py +67 -0
- agent_cli/rag/_indexing.py +226 -0
- agent_cli/rag/_prompt.py +30 -0
- agent_cli/rag/_retriever.py +156 -0
- agent_cli/rag/_store.py +48 -0
- agent_cli/rag/_utils.py +218 -0
- agent_cli/rag/api.py +175 -0
- agent_cli/rag/client.py +299 -0
- agent_cli/rag/engine.py +302 -0
- agent_cli/rag/models.py +55 -0
- agent_cli/scripts/.runtime/.gitkeep +0 -0
- agent_cli/scripts/__init__.py +1 -0
- agent_cli/scripts/check_plugin_skill_sync.py +50 -0
- agent_cli/scripts/linux-hotkeys/README.md +63 -0
- agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
- agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
- agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
- agent_cli/scripts/macos-hotkeys/README.md +45 -0
- agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
- agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
- agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
- agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
- agent_cli/scripts/nvidia-asr-server/README.md +99 -0
- agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
- agent_cli/scripts/nvidia-asr-server/server.py +255 -0
- agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
- agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
- agent_cli/scripts/run-openwakeword.sh +11 -0
- agent_cli/scripts/run-piper-windows.ps1 +30 -0
- agent_cli/scripts/run-piper.sh +24 -0
- agent_cli/scripts/run-whisper-linux.sh +40 -0
- agent_cli/scripts/run-whisper-macos.sh +6 -0
- agent_cli/scripts/run-whisper-windows.ps1 +51 -0
- agent_cli/scripts/run-whisper.sh +9 -0
- agent_cli/scripts/run_faster_whisper_server.py +136 -0
- agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
- agent_cli/scripts/setup-linux.sh +108 -0
- agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
- agent_cli/scripts/setup-macos.sh +76 -0
- agent_cli/scripts/setup-windows.ps1 +63 -0
- agent_cli/scripts/start-all-services-windows.ps1 +53 -0
- agent_cli/scripts/start-all-services.sh +178 -0
- agent_cli/scripts/sync_extras.py +138 -0
- agent_cli/server/__init__.py +3 -0
- agent_cli/server/cli.py +721 -0
- agent_cli/server/common.py +222 -0
- agent_cli/server/model_manager.py +288 -0
- agent_cli/server/model_registry.py +225 -0
- agent_cli/server/proxy/__init__.py +3 -0
- agent_cli/server/proxy/api.py +444 -0
- agent_cli/server/streaming.py +67 -0
- agent_cli/server/tts/__init__.py +3 -0
- agent_cli/server/tts/api.py +335 -0
- agent_cli/server/tts/backends/__init__.py +82 -0
- agent_cli/server/tts/backends/base.py +139 -0
- agent_cli/server/tts/backends/kokoro.py +403 -0
- agent_cli/server/tts/backends/piper.py +253 -0
- agent_cli/server/tts/model_manager.py +201 -0
- agent_cli/server/tts/model_registry.py +28 -0
- agent_cli/server/tts/wyoming_handler.py +249 -0
- agent_cli/server/whisper/__init__.py +3 -0
- agent_cli/server/whisper/api.py +413 -0
- agent_cli/server/whisper/backends/__init__.py +89 -0
- agent_cli/server/whisper/backends/base.py +97 -0
- agent_cli/server/whisper/backends/faster_whisper.py +225 -0
- agent_cli/server/whisper/backends/mlx.py +270 -0
- agent_cli/server/whisper/languages.py +116 -0
- agent_cli/server/whisper/model_manager.py +157 -0
- agent_cli/server/whisper/model_registry.py +28 -0
- agent_cli/server/whisper/wyoming_handler.py +203 -0
- agent_cli/services/__init__.py +343 -0
- agent_cli/services/_wyoming_utils.py +64 -0
- agent_cli/services/asr.py +506 -0
- agent_cli/services/llm.py +228 -0
- agent_cli/services/tts.py +450 -0
- agent_cli/services/wake_word.py +142 -0
- agent_cli-0.70.5.dist-info/METADATA +2118 -0
- agent_cli-0.70.5.dist-info/RECORD +196 -0
- agent_cli-0.70.5.dist-info/WHEEL +4 -0
- agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
- agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Whisper model manager with TTL-based unloading."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TYPE_CHECKING, Literal
|
|
9
|
+
|
|
10
|
+
from agent_cli.server.model_manager import ModelConfig, ModelManager, ModelStats
|
|
11
|
+
from agent_cli.server.whisper.backends import (
|
|
12
|
+
BackendConfig,
|
|
13
|
+
BackendType,
|
|
14
|
+
TranscriptionResult,
|
|
15
|
+
create_backend,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from agent_cli.server.whisper.backends.base import WhisperBackend
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class WhisperModelConfig(ModelConfig):
|
|
26
|
+
"""Configuration for a Whisper model."""
|
|
27
|
+
|
|
28
|
+
compute_type: str = "auto"
|
|
29
|
+
cpu_threads: int = 4
|
|
30
|
+
backend_type: BackendType = "auto"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class WhisperModelManager:
|
|
34
|
+
"""Manages a Whisper model with TTL-based unloading.
|
|
35
|
+
|
|
36
|
+
Thin wrapper around ModelManager that adds the transcribe() method.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, config: WhisperModelConfig) -> None:
|
|
40
|
+
"""Initialize the Whisper model manager."""
|
|
41
|
+
self.config = config
|
|
42
|
+
backend = create_backend(
|
|
43
|
+
BackendConfig(
|
|
44
|
+
model_name=config.model_name,
|
|
45
|
+
device=config.device,
|
|
46
|
+
compute_type=config.compute_type,
|
|
47
|
+
cpu_threads=config.cpu_threads,
|
|
48
|
+
cache_dir=config.cache_dir,
|
|
49
|
+
),
|
|
50
|
+
backend_type=config.backend_type,
|
|
51
|
+
)
|
|
52
|
+
self._manager = ModelManager(backend, config)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def stats(self) -> ModelStats:
|
|
56
|
+
"""Get the model statistics."""
|
|
57
|
+
return self._manager.stats
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def is_loaded(self) -> bool:
|
|
61
|
+
"""Check if the model is currently loaded."""
|
|
62
|
+
return self._manager.is_loaded
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def device(self) -> str | None:
|
|
66
|
+
"""Get the device the model is loaded on."""
|
|
67
|
+
return self._manager.device
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def active_requests(self) -> int:
|
|
71
|
+
"""Get the number of active requests."""
|
|
72
|
+
return self._manager.active_requests
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def ttl_remaining(self) -> float | None:
|
|
76
|
+
"""Get seconds remaining before model unloads."""
|
|
77
|
+
return self._manager.ttl_remaining
|
|
78
|
+
|
|
79
|
+
async def start(self) -> None:
|
|
80
|
+
"""Start the TTL unload watcher."""
|
|
81
|
+
await self._manager.start()
|
|
82
|
+
|
|
83
|
+
async def stop(self) -> None:
|
|
84
|
+
"""Stop the manager and unload the model."""
|
|
85
|
+
await self._manager.stop()
|
|
86
|
+
|
|
87
|
+
async def get_model(self) -> WhisperBackend:
|
|
88
|
+
"""Get the backend, loading it if necessary."""
|
|
89
|
+
return await self._manager.get_model()
|
|
90
|
+
|
|
91
|
+
async def unload(self) -> bool:
|
|
92
|
+
"""Unload the model from memory."""
|
|
93
|
+
return await self._manager.unload()
|
|
94
|
+
|
|
95
|
+
async def transcribe(
|
|
96
|
+
self,
|
|
97
|
+
audio: bytes,
|
|
98
|
+
*,
|
|
99
|
+
source_filename: str | None = None,
|
|
100
|
+
language: str | None = None,
|
|
101
|
+
task: Literal["transcribe", "translate"] = "transcribe",
|
|
102
|
+
initial_prompt: str | None = None,
|
|
103
|
+
temperature: float = 0.0,
|
|
104
|
+
vad_filter: bool = True,
|
|
105
|
+
word_timestamps: bool = False,
|
|
106
|
+
) -> TranscriptionResult:
|
|
107
|
+
"""Transcribe audio data.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
audio: Audio data as bytes (WAV format preferred)
|
|
111
|
+
source_filename: Optional filename to help detect audio format.
|
|
112
|
+
language: Language code (e.g., "en") or None for auto-detection
|
|
113
|
+
task: "transcribe" or "translate"
|
|
114
|
+
initial_prompt: Optional prompt to guide transcription
|
|
115
|
+
temperature: Sampling temperature
|
|
116
|
+
vad_filter: Whether to use VAD filtering
|
|
117
|
+
word_timestamps: Whether to include word-level timestamps
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
TranscriptionResult with text and metadata
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
start_time = time.time()
|
|
124
|
+
|
|
125
|
+
async with self._manager.request():
|
|
126
|
+
backend: WhisperBackend = self._manager.backend # type: ignore[assignment]
|
|
127
|
+
result = await backend.transcribe(
|
|
128
|
+
audio,
|
|
129
|
+
source_filename=source_filename,
|
|
130
|
+
language=language,
|
|
131
|
+
task=task,
|
|
132
|
+
initial_prompt=initial_prompt,
|
|
133
|
+
temperature=temperature,
|
|
134
|
+
vad_filter=vad_filter,
|
|
135
|
+
word_timestamps=word_timestamps,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
transcription_duration = time.time() - start_time
|
|
139
|
+
|
|
140
|
+
# Update stats
|
|
141
|
+
stats = self._manager.stats
|
|
142
|
+
stats.total_requests += 1
|
|
143
|
+
stats.total_audio_seconds += result.duration
|
|
144
|
+
stats.total_processing_seconds += transcription_duration
|
|
145
|
+
stats.extra["total_transcription_seconds"] = (
|
|
146
|
+
stats.extra.get("total_transcription_seconds", 0.0) + transcription_duration
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
logger.debug(
|
|
150
|
+
"Transcribed %.1fs audio in %.2fs (model=%s, lang=%s)",
|
|
151
|
+
result.duration,
|
|
152
|
+
transcription_duration,
|
|
153
|
+
self.config.model_name,
|
|
154
|
+
result.language,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return result
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Registry for managing multiple Whisper models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from agent_cli.server.model_registry import ModelRegistry
|
|
6
|
+
from agent_cli.server.whisper.model_manager import WhisperModelConfig, WhisperModelManager
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def create_whisper_registry(
|
|
10
|
+
default_model: str | None = None,
|
|
11
|
+
) -> ModelRegistry[WhisperModelManager, WhisperModelConfig]:
|
|
12
|
+
"""Create a Whisper model registry.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
default_model: Name of the default model to use when not specified.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Configured ModelRegistry for Whisper models.
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
return ModelRegistry(
|
|
22
|
+
manager_factory=WhisperModelManager,
|
|
23
|
+
default_model=default_model,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Alias for type hints
|
|
28
|
+
WhisperModelRegistry = ModelRegistry[WhisperModelManager, WhisperModelConfig]
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Wyoming protocol handler for Whisper ASR server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from functools import partial
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from wyoming.asr import Transcribe, Transcript
|
|
10
|
+
from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop
|
|
11
|
+
from wyoming.info import AsrModel, AsrProgram, Attribution, Describe, Info
|
|
12
|
+
from wyoming.server import AsyncEventHandler, AsyncServer
|
|
13
|
+
|
|
14
|
+
from agent_cli import constants
|
|
15
|
+
from agent_cli.server.whisper.languages import WHISPER_LANGUAGE_CODES
|
|
16
|
+
from agent_cli.services import pcm_to_wav
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from wyoming.event import Event
|
|
20
|
+
|
|
21
|
+
from agent_cli.server.whisper.model_registry import WhisperModelRegistry
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WyomingWhisperHandler(AsyncEventHandler):
|
|
27
|
+
"""Wyoming event handler for Whisper ASR.
|
|
28
|
+
|
|
29
|
+
Handles the Wyoming protocol for ASR (Automatic Speech Recognition):
|
|
30
|
+
- Receives audio chunks
|
|
31
|
+
- Transcribes audio when AudioStop is received
|
|
32
|
+
- Returns transcript
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
registry: WhisperModelRegistry,
|
|
38
|
+
*args: object,
|
|
39
|
+
**kwargs: object,
|
|
40
|
+
) -> None:
|
|
41
|
+
"""Initialize the handler.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
registry: Model registry for getting transcription models.
|
|
45
|
+
*args: Passed to parent class.
|
|
46
|
+
**kwargs: Passed to parent class.
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
super().__init__(*args, **kwargs)
|
|
50
|
+
self._registry = registry
|
|
51
|
+
self._audio_bytes: bytes = b""
|
|
52
|
+
self._audio_converter = AudioChunkConverter(
|
|
53
|
+
rate=constants.AUDIO_RATE,
|
|
54
|
+
width=constants.AUDIO_FORMAT_WIDTH,
|
|
55
|
+
channels=constants.AUDIO_CHANNELS,
|
|
56
|
+
)
|
|
57
|
+
self._language: str | None = None
|
|
58
|
+
self._initial_prompt: str | None = None
|
|
59
|
+
|
|
60
|
+
async def handle_event(self, event: Event) -> bool:
|
|
61
|
+
"""Handle a Wyoming event.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
event: The event to handle.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
True to continue processing events, False to stop.
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
if AudioChunk.is_type(event.type):
|
|
71
|
+
return await self._handle_audio_chunk(event)
|
|
72
|
+
|
|
73
|
+
if AudioStop.is_type(event.type):
|
|
74
|
+
return await self._handle_audio_stop()
|
|
75
|
+
|
|
76
|
+
if Transcribe.is_type(event.type):
|
|
77
|
+
return self._handle_transcribe(event)
|
|
78
|
+
|
|
79
|
+
if Describe.is_type(event.type):
|
|
80
|
+
return await self._handle_describe()
|
|
81
|
+
|
|
82
|
+
return True
|
|
83
|
+
|
|
84
|
+
async def _handle_audio_chunk(self, event: Event) -> bool:
|
|
85
|
+
"""Handle an audio chunk event."""
|
|
86
|
+
if not self._audio_bytes:
|
|
87
|
+
logger.debug("AudioChunk begin")
|
|
88
|
+
|
|
89
|
+
chunk = AudioChunk.from_event(event)
|
|
90
|
+
chunk = self._audio_converter.convert(chunk)
|
|
91
|
+
self._audio_bytes += chunk.audio
|
|
92
|
+
return True
|
|
93
|
+
|
|
94
|
+
async def _handle_audio_stop(self) -> bool:
|
|
95
|
+
"""Handle audio stop event - transcribe the collected audio."""
|
|
96
|
+
logger.debug("AudioStop")
|
|
97
|
+
|
|
98
|
+
if not self._audio_bytes:
|
|
99
|
+
logger.warning("AudioStop received but no audio data")
|
|
100
|
+
await self.write_event(Transcript(text="").event())
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
# Wrap PCM in WAV format for the backend
|
|
104
|
+
audio_data = pcm_to_wav(
|
|
105
|
+
self._audio_bytes,
|
|
106
|
+
sample_rate=constants.AUDIO_RATE,
|
|
107
|
+
sample_width=constants.AUDIO_FORMAT_WIDTH,
|
|
108
|
+
channels=constants.AUDIO_CHANNELS,
|
|
109
|
+
)
|
|
110
|
+
self._audio_bytes = b""
|
|
111
|
+
|
|
112
|
+
# Transcribe
|
|
113
|
+
try:
|
|
114
|
+
manager = self._registry.get_manager()
|
|
115
|
+
result = await manager.transcribe(
|
|
116
|
+
audio_data,
|
|
117
|
+
language=self._language,
|
|
118
|
+
task="transcribe",
|
|
119
|
+
initial_prompt=self._initial_prompt,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
logger.info("Wyoming transcription: %s", result.text[:100] if result.text else "")
|
|
123
|
+
await self.write_event(Transcript(text=result.text).event())
|
|
124
|
+
|
|
125
|
+
except Exception:
|
|
126
|
+
logger.exception("Wyoming transcription failed")
|
|
127
|
+
await self.write_event(Transcript(text="").event())
|
|
128
|
+
|
|
129
|
+
# Reset state for next request
|
|
130
|
+
self._language = None
|
|
131
|
+
self._initial_prompt = None
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
def _handle_transcribe(self, event: Event) -> bool:
|
|
135
|
+
"""Handle transcribe event - sets language and prompt preferences."""
|
|
136
|
+
logger.debug("Transcribe event")
|
|
137
|
+
transcribe = Transcribe.from_event(event)
|
|
138
|
+
if transcribe.language:
|
|
139
|
+
self._language = transcribe.language
|
|
140
|
+
# Extract initial_prompt from context if provided
|
|
141
|
+
if transcribe.context and "initial_prompt" in transcribe.context:
|
|
142
|
+
self._initial_prompt = transcribe.context["initial_prompt"]
|
|
143
|
+
logger.debug("Using initial_prompt from context")
|
|
144
|
+
return True
|
|
145
|
+
|
|
146
|
+
async def _handle_describe(self) -> bool:
|
|
147
|
+
"""Handle describe event - return server capabilities."""
|
|
148
|
+
logger.debug("Describe event")
|
|
149
|
+
|
|
150
|
+
# Get list of available models
|
|
151
|
+
models = [
|
|
152
|
+
AsrModel(
|
|
153
|
+
name=status.name,
|
|
154
|
+
description=f"Whisper {status.name}",
|
|
155
|
+
attribution=Attribution(
|
|
156
|
+
name="OpenAI",
|
|
157
|
+
url="https://github.com/openai/whisper",
|
|
158
|
+
),
|
|
159
|
+
installed=True,
|
|
160
|
+
languages=WHISPER_LANGUAGE_CODES,
|
|
161
|
+
version="1.0",
|
|
162
|
+
)
|
|
163
|
+
for status in self._registry.list_status()
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
await self.write_event(
|
|
167
|
+
Info(
|
|
168
|
+
asr=[
|
|
169
|
+
AsrProgram(
|
|
170
|
+
name="agent-cli-whisper",
|
|
171
|
+
description="Agent CLI Whisper ASR Server with TTL-based model unloading",
|
|
172
|
+
attribution=Attribution(
|
|
173
|
+
name="agent-cli",
|
|
174
|
+
url="https://github.com/basnijholt/agent-cli",
|
|
175
|
+
),
|
|
176
|
+
installed=True,
|
|
177
|
+
version="1.0",
|
|
178
|
+
models=models,
|
|
179
|
+
),
|
|
180
|
+
],
|
|
181
|
+
).event(),
|
|
182
|
+
)
|
|
183
|
+
return True
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
async def start_wyoming_server(
|
|
187
|
+
registry: WhisperModelRegistry,
|
|
188
|
+
uri: str = "tcp://0.0.0.0:10300",
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Start the Wyoming ASR server.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
registry: Model registry for transcription.
|
|
194
|
+
uri: URI to bind the server to (e.g., "tcp://0.0.0.0:10300").
|
|
195
|
+
|
|
196
|
+
"""
|
|
197
|
+
server = AsyncServer.from_uri(uri)
|
|
198
|
+
logger.debug("Wyoming server listening on %s", uri)
|
|
199
|
+
|
|
200
|
+
# Create handler factory with registry
|
|
201
|
+
handler_factory = partial(WyomingWhisperHandler, registry)
|
|
202
|
+
|
|
203
|
+
await server.run(handler_factory)
|