agent-cli 0.70.2__py3-none-any.whl → 0.72.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. agent_cli/_extras.json +4 -3
  2. agent_cli/_requirements/memory.txt +14 -1
  3. agent_cli/_requirements/rag.txt +14 -1
  4. agent_cli/_requirements/vad.txt +1 -85
  5. agent_cli/_requirements/wyoming.txt +71 -0
  6. agent_cli/agents/assistant.py +24 -28
  7. agent_cli/agents/autocorrect.py +30 -4
  8. agent_cli/agents/chat.py +45 -15
  9. agent_cli/agents/memory/__init__.py +19 -1
  10. agent_cli/agents/memory/add.py +3 -3
  11. agent_cli/agents/memory/proxy.py +20 -11
  12. agent_cli/agents/rag_proxy.py +42 -10
  13. agent_cli/agents/speak.py +23 -3
  14. agent_cli/agents/transcribe.py +21 -3
  15. agent_cli/agents/transcribe_daemon.py +34 -22
  16. agent_cli/agents/voice_edit.py +18 -10
  17. agent_cli/cli.py +25 -2
  18. agent_cli/config_cmd.py +30 -11
  19. agent_cli/core/deps.py +6 -3
  20. agent_cli/core/transcription_logger.py +1 -1
  21. agent_cli/core/vad.py +6 -24
  22. agent_cli/dev/cli.py +295 -65
  23. agent_cli/docs_gen.py +18 -8
  24. agent_cli/install/extras.py +44 -13
  25. agent_cli/install/hotkeys.py +22 -11
  26. agent_cli/install/services.py +54 -14
  27. agent_cli/opts.py +43 -22
  28. agent_cli/server/cli.py +128 -62
  29. agent_cli/server/proxy/api.py +77 -19
  30. agent_cli/services/__init__.py +46 -5
  31. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/METADATA +627 -246
  32. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/RECORD +35 -34
  33. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
  34. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
  35. {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
 
18
18
 
19
19
  _RIFF_HEADER = b"RIFF"
20
+ _LOG_TRUNCATE_LENGTH = 100
20
21
 
21
22
 
22
23
  def _is_wav_file(data: bytes) -> bool:
@@ -112,9 +113,17 @@ async def transcribe_audio_gemini(
112
113
  # Determine MIME type from file suffix
113
114
  mime_type = _GEMINI_MIME_TYPES.get(file_suffix.lower(), "audio/wav")
114
115
 
116
+ logger.debug(
117
+ "Received audio: size=%d bytes, file_suffix=%s, is_wav=%s",
118
+ len(audio_data),
119
+ file_suffix,
120
+ _is_wav_file(audio_data),
121
+ )
122
+
115
123
  # If raw PCM (no recognized format header), convert to WAV
124
+ # Only do this if file_suffix is .wav but data doesn't have WAV header (indicating raw PCM)
116
125
  if not _is_wav_file(audio_data) and file_suffix.lower() == ".wav":
117
- logger.debug("Converting raw PCM to WAV format for Gemini")
126
+ logger.debug("Wrapping raw PCM data with WAV header (16kHz, 16-bit, mono)")
118
127
  audio_data = pcm_to_wav(
119
128
  audio_data,
120
129
  sample_rate=constants.AUDIO_RATE,
@@ -141,7 +150,19 @@ async def transcribe_audio_gemini(
141
150
  types.Part.from_bytes(data=audio_data, mime_type=mime_type),
142
151
  ],
143
152
  )
144
- return response.text.strip()
153
+ text = response.text.strip()
154
+
155
+ if text:
156
+ logger.info(
157
+ "Transcription result: %s",
158
+ text[:_LOG_TRUNCATE_LENGTH] + "..." if len(text) > _LOG_TRUNCATE_LENGTH else text,
159
+ )
160
+ else:
161
+ logger.warning(
162
+ "Empty transcription returned - audio may be silent, corrupted, or in wrong format",
163
+ )
164
+
165
+ return text
145
166
 
146
167
 
147
168
  def _get_openai_client(api_key: str | None, base_url: str | None = None) -> AsyncOpenAI:
@@ -197,9 +218,17 @@ async def transcribe_audio_openai(
197
218
  base_url=openai_asr_cfg.openai_base_url,
198
219
  )
199
220
 
221
+ logger.debug(
222
+ "Received audio: size=%d bytes, file_suffix=%s, is_wav=%s",
223
+ len(audio_data),
224
+ file_suffix,
225
+ _is_wav_file(audio_data),
226
+ )
227
+
200
228
  # Convert raw PCM to WAV if needed (custom endpoints like faster-whisper require proper format)
229
+ # Only do this if file_suffix is .wav but data doesn't have WAV header (indicating raw PCM)
201
230
  if not _is_wav_file(audio_data) and file_suffix.lower() == ".wav":
202
- logger.debug("Converting raw PCM to WAV format")
231
+ logger.debug("Wrapping raw PCM data with WAV header (16kHz, 16-bit, mono)")
203
232
  audio_data = pcm_to_wav(
204
233
  audio_data,
205
234
  sample_rate=constants.AUDIO_RATE,
@@ -211,7 +240,7 @@ async def transcribe_audio_openai(
211
240
  # Use the correct file extension so OpenAI knows the format
212
241
  audio_file.name = f"audio{file_suffix}"
213
242
 
214
- logger.debug("Using filename: %s", audio_file.name)
243
+ logger.debug("Sending to OpenAI with filename: %s", audio_file.name)
215
244
 
216
245
  transcription_params: dict[str, object] = {
217
246
  "model": openai_asr_cfg.asr_openai_model,
@@ -225,7 +254,19 @@ async def transcribe_audio_openai(
225
254
  logger.debug("Using OpenAI ASR with prompt")
226
255
 
227
256
  response = await client.audio.transcriptions.create(**transcription_params)
228
- return response.text
257
+ text = response.text
258
+
259
+ if text:
260
+ logger.info(
261
+ "Transcription result: %s",
262
+ text[:_LOG_TRUNCATE_LENGTH] + "..." if len(text) > _LOG_TRUNCATE_LENGTH else text,
263
+ )
264
+ else:
265
+ logger.warning(
266
+ "Empty transcription returned - audio may be silent, corrupted, or in wrong format",
267
+ )
268
+
269
+ return text
229
270
 
230
271
 
231
272
  async def synthesize_speech_openai(