agent-cli 0.70.2__py3-none-any.whl → 0.72.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/_extras.json +4 -3
- agent_cli/_requirements/memory.txt +14 -1
- agent_cli/_requirements/rag.txt +14 -1
- agent_cli/_requirements/vad.txt +1 -85
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/agents/assistant.py +24 -28
- agent_cli/agents/autocorrect.py +30 -4
- agent_cli/agents/chat.py +45 -15
- agent_cli/agents/memory/__init__.py +19 -1
- agent_cli/agents/memory/add.py +3 -3
- agent_cli/agents/memory/proxy.py +20 -11
- agent_cli/agents/rag_proxy.py +42 -10
- agent_cli/agents/speak.py +23 -3
- agent_cli/agents/transcribe.py +21 -3
- agent_cli/agents/transcribe_daemon.py +34 -22
- agent_cli/agents/voice_edit.py +18 -10
- agent_cli/cli.py +25 -2
- agent_cli/config_cmd.py +30 -11
- agent_cli/core/deps.py +6 -3
- agent_cli/core/transcription_logger.py +1 -1
- agent_cli/core/vad.py +6 -24
- agent_cli/dev/cli.py +295 -65
- agent_cli/docs_gen.py +18 -8
- agent_cli/install/extras.py +44 -13
- agent_cli/install/hotkeys.py +22 -11
- agent_cli/install/services.py +54 -14
- agent_cli/opts.py +43 -22
- agent_cli/server/cli.py +128 -62
- agent_cli/server/proxy/api.py +77 -19
- agent_cli/services/__init__.py +46 -5
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/METADATA +627 -246
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/RECORD +35 -34
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0
agent_cli/core/vad.py
CHANGED
|
@@ -3,38 +3,22 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
import urllib.request
|
|
7
6
|
from collections import deque
|
|
8
|
-
from pathlib import Path
|
|
9
7
|
|
|
10
8
|
from agent_cli import constants
|
|
11
9
|
|
|
12
10
|
try:
|
|
13
11
|
import numpy as np
|
|
14
|
-
import
|
|
12
|
+
from silero_vad_lite import SileroVAD
|
|
15
13
|
except ImportError as e:
|
|
16
14
|
msg = (
|
|
17
|
-
"silero-vad is required for the transcribe-daemon command. "
|
|
15
|
+
"silero-vad-lite is required for the transcribe-daemon command. "
|
|
18
16
|
"Install it with: `pip install agent-cli[vad]` or `uv sync --extra vad`."
|
|
19
17
|
)
|
|
20
18
|
raise ImportError(msg) from e
|
|
21
19
|
|
|
22
20
|
LOGGER = logging.getLogger(__name__)
|
|
23
21
|
|
|
24
|
-
_SILERO_VAD_ONNX_URL = (
|
|
25
|
-
"https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx"
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _get_model_path() -> Path:
|
|
30
|
-
"""Get the path to the Silero VAD ONNX model, downloading if needed."""
|
|
31
|
-
cache_dir = Path.home() / ".cache" / "silero-vad"
|
|
32
|
-
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
33
|
-
model_path = cache_dir / "silero_vad.onnx"
|
|
34
|
-
if not model_path.exists():
|
|
35
|
-
urllib.request.urlretrieve(_SILERO_VAD_ONNX_URL, model_path) # noqa: S310
|
|
36
|
-
return model_path
|
|
37
|
-
|
|
38
22
|
|
|
39
23
|
class VoiceActivityDetector:
|
|
40
24
|
"""Silero VAD-based voice activity detection for audio segmentation.
|
|
@@ -56,8 +40,6 @@ class VoiceActivityDetector:
|
|
|
56
40
|
msg = f"Sample rate must be 8000 or 16000, got {sample_rate}"
|
|
57
41
|
raise ValueError(msg)
|
|
58
42
|
|
|
59
|
-
from silero_vad.utils_vad import OnnxWrapper # noqa: PLC0415
|
|
60
|
-
|
|
61
43
|
self.sample_rate = sample_rate
|
|
62
44
|
self.threshold = threshold
|
|
63
45
|
self.silence_threshold_ms = silence_threshold_ms
|
|
@@ -74,7 +56,7 @@ class VoiceActivityDetector:
|
|
|
74
56
|
)
|
|
75
57
|
|
|
76
58
|
# Model and state
|
|
77
|
-
self._model =
|
|
59
|
+
self._model = SileroVAD(sample_rate=sample_rate)
|
|
78
60
|
self._pre_speech_buffer: deque[bytes] = deque(maxlen=pre_speech_windows)
|
|
79
61
|
self._pending = bytearray()
|
|
80
62
|
self._audio_buffer = bytearray()
|
|
@@ -92,7 +74,7 @@ class VoiceActivityDetector:
|
|
|
92
74
|
|
|
93
75
|
def reset(self) -> None:
|
|
94
76
|
"""Reset VAD state for a new recording session."""
|
|
95
|
-
self._model.
|
|
77
|
+
self._model = SileroVAD(sample_rate=self.sample_rate)
|
|
96
78
|
self._pre_speech_buffer.clear()
|
|
97
79
|
self._pending.clear()
|
|
98
80
|
self._audio_buffer.clear()
|
|
@@ -103,7 +85,7 @@ class VoiceActivityDetector:
|
|
|
103
85
|
def _is_speech(self, window: bytes) -> bool:
|
|
104
86
|
"""Check if audio window contains speech."""
|
|
105
87
|
audio = np.frombuffer(window, dtype=np.int16).astype(np.float32) / 32768.0
|
|
106
|
-
prob =
|
|
88
|
+
prob = self._model.process(audio)
|
|
107
89
|
LOGGER.debug("Speech prob: %.3f, threshold: %.2f", prob, self.threshold)
|
|
108
90
|
return prob >= self.threshold
|
|
109
91
|
|
|
@@ -154,7 +136,7 @@ class VoiceActivityDetector:
|
|
|
154
136
|
self._silence_samples = 0
|
|
155
137
|
self._speech_samples = 0
|
|
156
138
|
self._audio_buffer.clear()
|
|
157
|
-
self._model.
|
|
139
|
+
self._model = SileroVAD(sample_rate=self.sample_rate)
|
|
158
140
|
else:
|
|
159
141
|
# Not speaking - maintain rolling pre-speech buffer (auto-limited by deque maxlen)
|
|
160
142
|
self._pre_speech_buffer.append(window)
|