PyPI - agent-cli - Versions diffs - 0.70.2__py3-none-any.whl → 0.72.1__py3-none-any.whl - Mend

agent-cli 0.70.2py3-none-any.whl → 0.72.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

agent_cli/_extras.json +4 -3
agent_cli/_requirements/memory.txt +14 -1
agent_cli/_requirements/rag.txt +14 -1
agent_cli/_requirements/vad.txt +1 -85
agent_cli/_requirements/wyoming.txt +71 -0
agent_cli/agents/assistant.py +24 -28
agent_cli/agents/autocorrect.py +30 -4
agent_cli/agents/chat.py +45 -15
agent_cli/agents/memory/__init__.py +19 -1
agent_cli/agents/memory/add.py +3 -3
agent_cli/agents/memory/proxy.py +20 -11
agent_cli/agents/rag_proxy.py +42 -10
agent_cli/agents/speak.py +23 -3
agent_cli/agents/transcribe.py +21 -3
agent_cli/agents/transcribe_daemon.py +34 -22
agent_cli/agents/voice_edit.py +18 -10
agent_cli/cli.py +25 -2
agent_cli/config_cmd.py +30 -11
agent_cli/core/deps.py +6 -3
agent_cli/core/transcription_logger.py +1 -1
agent_cli/core/vad.py +6 -24
agent_cli/dev/cli.py +295 -65
agent_cli/docs_gen.py +18 -8
agent_cli/install/extras.py +44 -13
agent_cli/install/hotkeys.py +22 -11
agent_cli/install/services.py +54 -14
agent_cli/opts.py +43 -22
agent_cli/server/cli.py +128 -62
agent_cli/server/proxy/api.py +77 -19
agent_cli/services/__init__.py +46 -5
{agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/METADATA +627 -246
{agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/RECORD +35 -34
{agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
{agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
{agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0

agent_cli/services/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@ if TYPE_CHECKING:
 _RIFF_HEADER = b"RIFF"
+_LOG_TRUNCATE_LENGTH = 100
 def _is_wav_file(data: bytes) -> bool:
@@ -112,9 +113,17 @@ async def transcribe_audio_gemini(
     # Determine MIME type from file suffix
     mime_type = _GEMINI_MIME_TYPES.get(file_suffix.lower(), "audio/wav")
+    logger.debug(
+        "Received audio: size=%d bytes, file_suffix=%s, is_wav=%s",
+        len(audio_data),
+        file_suffix,
+        _is_wav_file(audio_data),
+    )
     # If raw PCM (no recognized format header), convert to WAV
+    # Only do this if file_suffix is .wav but data doesn't have WAV header (indicating raw PCM)
     if not _is_wav_file(audio_data) and file_suffix.lower() == ".wav":
-        logger.debug("Converting raw PCM to WAV format for Gemini")
+        logger.debug("Wrapping raw PCM data with WAV header (16kHz, 16-bit, mono)")
         audio_data = pcm_to_wav(
             audio_data,
             sample_rate=constants.AUDIO_RATE,
@@ -141,7 +150,19 @@ async def transcribe_audio_gemini(
             types.Part.from_bytes(data=audio_data, mime_type=mime_type),
         ],
     )
-    return response.text.strip()
+    text = response.text.strip()
+    if text:
+        logger.info(
+            "Transcription result: %s",
+            text[:_LOG_TRUNCATE_LENGTH] + "..." if len(text) > _LOG_TRUNCATE_LENGTH else text,
+        )
+    else:
+        logger.warning(
+            "Empty transcription returned - audio may be silent, corrupted, or in wrong format",
+        )
+    return text
 def _get_openai_client(api_key: str | None, base_url: str | None = None) -> AsyncOpenAI:
@@ -197,9 +218,17 @@ async def transcribe_audio_openai(
         base_url=openai_asr_cfg.openai_base_url,
     )
+    logger.debug(
+        "Received audio: size=%d bytes, file_suffix=%s, is_wav=%s",
+        len(audio_data),
+        file_suffix,
+        _is_wav_file(audio_data),
+    )
     # Convert raw PCM to WAV if needed (custom endpoints like faster-whisper require proper format)
+    # Only do this if file_suffix is .wav but data doesn't have WAV header (indicating raw PCM)
     if not _is_wav_file(audio_data) and file_suffix.lower() == ".wav":
-        logger.debug("Converting raw PCM to WAV format")
+        logger.debug("Wrapping raw PCM data with WAV header (16kHz, 16-bit, mono)")
         audio_data = pcm_to_wav(
             audio_data,
             sample_rate=constants.AUDIO_RATE,
@@ -211,7 +240,7 @@ async def transcribe_audio_openai(
     # Use the correct file extension so OpenAI knows the format
     audio_file.name = f"audio{file_suffix}"
-    logger.debug("Using filename: %s", audio_file.name)
+    logger.debug("Sending to OpenAI with filename: %s", audio_file.name)
     transcription_params: dict[str, object] = {
         "model": openai_asr_cfg.asr_openai_model,
@@ -225,7 +254,19 @@ async def transcribe_audio_openai(
         logger.debug("Using OpenAI ASR with prompt")
     response = await client.audio.transcriptions.create(**transcription_params)
-    return response.text
+    text = response.text
+    if text:
+        logger.info(
+            "Transcription result: %s",
+            text[:_LOG_TRUNCATE_LENGTH] + "..." if len(text) > _LOG_TRUNCATE_LENGTH else text,
+        )
+    else:
+        logger.warning(
+            "Empty transcription returned - audio may be silent, corrupted, or in wrong format",
+        )
+    return text
 async def synthesize_speech_openai(

agent-cli 0.70.2__py3-none-any.whl → 0.72.1__py3-none-any.whl

agent-cli 0.70.2py3-none-any.whl → 0.72.1py3-none-any.whl