agent-cli 0.70.2__py3-none-any.whl → 0.72.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. agent_cli/_extras.json +4 -3
  2. agent_cli/_requirements/memory.txt +14 -1
  3. agent_cli/_requirements/rag.txt +14 -1
  4. agent_cli/_requirements/vad.txt +1 -85
  5. agent_cli/_requirements/wyoming.txt +71 -0
  6. agent_cli/agents/assistant.py +24 -28
  7. agent_cli/agents/autocorrect.py +30 -4
  8. agent_cli/agents/chat.py +45 -15
  9. agent_cli/agents/memory/__init__.py +19 -1
  10. agent_cli/agents/memory/add.py +3 -3
  11. agent_cli/agents/memory/proxy.py +20 -11
  12. agent_cli/agents/rag_proxy.py +42 -10
  13. agent_cli/agents/speak.py +23 -3
  14. agent_cli/agents/transcribe.py +21 -3
  15. agent_cli/agents/transcribe_daemon.py +34 -22
  16. agent_cli/agents/voice_edit.py +18 -10
  17. agent_cli/cli.py +25 -2
  18. agent_cli/config_cmd.py +30 -11
  19. agent_cli/core/deps.py +6 -3
  20. agent_cli/core/transcription_logger.py +1 -1
  21. agent_cli/core/vad.py +6 -24
  22. agent_cli/dev/cli.py +295 -65
  23. agent_cli/docs_gen.py +18 -8
  24. agent_cli/install/extras.py +44 -13
  25. agent_cli/install/hotkeys.py +22 -11
  26. agent_cli/install/services.py +54 -14
  27. agent_cli/opts.py +43 -22
  28. agent_cli/server/cli.py +128 -62
  29. agent_cli/server/proxy/api.py +77 -19
  30. agent_cli/services/__init__.py +46 -5
  31. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/METADATA +627 -246
  32. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/RECORD +35 -34
  33. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
  34. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
  35. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0
agent_cli/core/vad.py CHANGED
@@ -3,38 +3,22 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
- import urllib.request
7
6
  from collections import deque
8
- from pathlib import Path
9
7
 
10
8
  from agent_cli import constants
11
9
 
12
10
  try:
13
11
  import numpy as np
14
- import torch
12
+ from silero_vad_lite import SileroVAD
15
13
  except ImportError as e:
16
14
  msg = (
17
- "silero-vad is required for the transcribe-daemon command. "
15
+ "silero-vad-lite is required for the transcribe-daemon command. "
18
16
  "Install it with: `pip install agent-cli[vad]` or `uv sync --extra vad`."
19
17
  )
20
18
  raise ImportError(msg) from e
21
19
 
22
20
  LOGGER = logging.getLogger(__name__)
23
21
 
24
- _SILERO_VAD_ONNX_URL = (
25
- "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx"
26
- )
27
-
28
-
29
- def _get_model_path() -> Path:
30
- """Get the path to the Silero VAD ONNX model, downloading if needed."""
31
- cache_dir = Path.home() / ".cache" / "silero-vad"
32
- cache_dir.mkdir(parents=True, exist_ok=True)
33
- model_path = cache_dir / "silero_vad.onnx"
34
- if not model_path.exists():
35
- urllib.request.urlretrieve(_SILERO_VAD_ONNX_URL, model_path) # noqa: S310
36
- return model_path
37
-
38
22
 
39
23
  class VoiceActivityDetector:
40
24
  """Silero VAD-based voice activity detection for audio segmentation.
@@ -56,8 +40,6 @@ class VoiceActivityDetector:
56
40
  msg = f"Sample rate must be 8000 or 16000, got {sample_rate}"
57
41
  raise ValueError(msg)
58
42
 
59
- from silero_vad.utils_vad import OnnxWrapper # noqa: PLC0415
60
-
61
43
  self.sample_rate = sample_rate
62
44
  self.threshold = threshold
63
45
  self.silence_threshold_ms = silence_threshold_ms
@@ -74,7 +56,7 @@ class VoiceActivityDetector:
74
56
  )
75
57
 
76
58
  # Model and state
77
- self._model = OnnxWrapper(str(_get_model_path()))
59
+ self._model = SileroVAD(sample_rate=sample_rate)
78
60
  self._pre_speech_buffer: deque[bytes] = deque(maxlen=pre_speech_windows)
79
61
  self._pending = bytearray()
80
62
  self._audio_buffer = bytearray()
@@ -92,7 +74,7 @@ class VoiceActivityDetector:
92
74
 
93
75
  def reset(self) -> None:
94
76
  """Reset VAD state for a new recording session."""
95
- self._model.reset_states()
77
+ self._model = SileroVAD(sample_rate=self.sample_rate)
96
78
  self._pre_speech_buffer.clear()
97
79
  self._pending.clear()
98
80
  self._audio_buffer.clear()
@@ -103,7 +85,7 @@ class VoiceActivityDetector:
103
85
  def _is_speech(self, window: bytes) -> bool:
104
86
  """Check if audio window contains speech."""
105
87
  audio = np.frombuffer(window, dtype=np.int16).astype(np.float32) / 32768.0
106
- prob = float(self._model(torch.from_numpy(audio), self.sample_rate).item())
88
+ prob = self._model.process(audio)
107
89
  LOGGER.debug("Speech prob: %.3f, threshold: %.2f", prob, self.threshold)
108
90
  return prob >= self.threshold
109
91
 
@@ -154,7 +136,7 @@ class VoiceActivityDetector:
154
136
  self._silence_samples = 0
155
137
  self._speech_samples = 0
156
138
  self._audio_buffer.clear()
157
- self._model.reset_states()
139
+ self._model = SileroVAD(sample_rate=self.sample_rate)
158
140
  else:
159
141
  # Not speaking - maintain rolling pre-speech buffer (auto-limited by deque maxlen)
160
142
  self._pre_speech_buffer.append(window)