agent-cli 0.70.2__py3-none-any.whl → 0.72.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/_extras.json +4 -3
- agent_cli/_requirements/memory.txt +14 -1
- agent_cli/_requirements/rag.txt +14 -1
- agent_cli/_requirements/vad.txt +1 -85
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/agents/assistant.py +24 -28
- agent_cli/agents/autocorrect.py +30 -4
- agent_cli/agents/chat.py +45 -15
- agent_cli/agents/memory/__init__.py +19 -1
- agent_cli/agents/memory/add.py +3 -3
- agent_cli/agents/memory/proxy.py +20 -11
- agent_cli/agents/rag_proxy.py +42 -10
- agent_cli/agents/speak.py +23 -3
- agent_cli/agents/transcribe.py +21 -3
- agent_cli/agents/transcribe_daemon.py +34 -22
- agent_cli/agents/voice_edit.py +18 -10
- agent_cli/cli.py +25 -2
- agent_cli/config_cmd.py +30 -11
- agent_cli/core/deps.py +6 -3
- agent_cli/core/transcription_logger.py +1 -1
- agent_cli/core/vad.py +6 -24
- agent_cli/dev/cli.py +295 -65
- agent_cli/docs_gen.py +18 -8
- agent_cli/install/extras.py +44 -13
- agent_cli/install/hotkeys.py +22 -11
- agent_cli/install/services.py +54 -14
- agent_cli/opts.py +43 -22
- agent_cli/server/cli.py +128 -62
- agent_cli/server/proxy/api.py +77 -19
- agent_cli/services/__init__.py +46 -5
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/METADATA +627 -246
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/RECORD +35 -34
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
- {agent_cli-0.70.2.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0
agent_cli/services/__init__.py
CHANGED
|
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
_RIFF_HEADER = b"RIFF"
|
|
20
|
+
_LOG_TRUNCATE_LENGTH = 100
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def _is_wav_file(data: bytes) -> bool:
|
|
@@ -112,9 +113,17 @@ async def transcribe_audio_gemini(
|
|
|
112
113
|
# Determine MIME type from file suffix
|
|
113
114
|
mime_type = _GEMINI_MIME_TYPES.get(file_suffix.lower(), "audio/wav")
|
|
114
115
|
|
|
116
|
+
logger.debug(
|
|
117
|
+
"Received audio: size=%d bytes, file_suffix=%s, is_wav=%s",
|
|
118
|
+
len(audio_data),
|
|
119
|
+
file_suffix,
|
|
120
|
+
_is_wav_file(audio_data),
|
|
121
|
+
)
|
|
122
|
+
|
|
115
123
|
# If raw PCM (no recognized format header), convert to WAV
|
|
124
|
+
# Only do this if file_suffix is .wav but data doesn't have WAV header (indicating raw PCM)
|
|
116
125
|
if not _is_wav_file(audio_data) and file_suffix.lower() == ".wav":
|
|
117
|
-
logger.debug("
|
|
126
|
+
logger.debug("Wrapping raw PCM data with WAV header (16kHz, 16-bit, mono)")
|
|
118
127
|
audio_data = pcm_to_wav(
|
|
119
128
|
audio_data,
|
|
120
129
|
sample_rate=constants.AUDIO_RATE,
|
|
@@ -141,7 +150,19 @@ async def transcribe_audio_gemini(
|
|
|
141
150
|
types.Part.from_bytes(data=audio_data, mime_type=mime_type),
|
|
142
151
|
],
|
|
143
152
|
)
|
|
144
|
-
|
|
153
|
+
text = response.text.strip()
|
|
154
|
+
|
|
155
|
+
if text:
|
|
156
|
+
logger.info(
|
|
157
|
+
"Transcription result: %s",
|
|
158
|
+
text[:_LOG_TRUNCATE_LENGTH] + "..." if len(text) > _LOG_TRUNCATE_LENGTH else text,
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
logger.warning(
|
|
162
|
+
"Empty transcription returned - audio may be silent, corrupted, or in wrong format",
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
return text
|
|
145
166
|
|
|
146
167
|
|
|
147
168
|
def _get_openai_client(api_key: str | None, base_url: str | None = None) -> AsyncOpenAI:
|
|
@@ -197,9 +218,17 @@ async def transcribe_audio_openai(
|
|
|
197
218
|
base_url=openai_asr_cfg.openai_base_url,
|
|
198
219
|
)
|
|
199
220
|
|
|
221
|
+
logger.debug(
|
|
222
|
+
"Received audio: size=%d bytes, file_suffix=%s, is_wav=%s",
|
|
223
|
+
len(audio_data),
|
|
224
|
+
file_suffix,
|
|
225
|
+
_is_wav_file(audio_data),
|
|
226
|
+
)
|
|
227
|
+
|
|
200
228
|
# Convert raw PCM to WAV if needed (custom endpoints like faster-whisper require proper format)
|
|
229
|
+
# Only do this if file_suffix is .wav but data doesn't have WAV header (indicating raw PCM)
|
|
201
230
|
if not _is_wav_file(audio_data) and file_suffix.lower() == ".wav":
|
|
202
|
-
logger.debug("
|
|
231
|
+
logger.debug("Wrapping raw PCM data with WAV header (16kHz, 16-bit, mono)")
|
|
203
232
|
audio_data = pcm_to_wav(
|
|
204
233
|
audio_data,
|
|
205
234
|
sample_rate=constants.AUDIO_RATE,
|
|
@@ -211,7 +240,7 @@ async def transcribe_audio_openai(
|
|
|
211
240
|
# Use the correct file extension so OpenAI knows the format
|
|
212
241
|
audio_file.name = f"audio{file_suffix}"
|
|
213
242
|
|
|
214
|
-
logger.debug("
|
|
243
|
+
logger.debug("Sending to OpenAI with filename: %s", audio_file.name)
|
|
215
244
|
|
|
216
245
|
transcription_params: dict[str, object] = {
|
|
217
246
|
"model": openai_asr_cfg.asr_openai_model,
|
|
@@ -225,7 +254,19 @@ async def transcribe_audio_openai(
|
|
|
225
254
|
logger.debug("Using OpenAI ASR with prompt")
|
|
226
255
|
|
|
227
256
|
response = await client.audio.transcriptions.create(**transcription_params)
|
|
228
|
-
|
|
257
|
+
text = response.text
|
|
258
|
+
|
|
259
|
+
if text:
|
|
260
|
+
logger.info(
|
|
261
|
+
"Transcription result: %s",
|
|
262
|
+
text[:_LOG_TRUNCATE_LENGTH] + "..." if len(text) > _LOG_TRUNCATE_LENGTH else text,
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
logger.warning(
|
|
266
|
+
"Empty transcription returned - audio may be silent, corrupted, or in wrong format",
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
return text
|
|
229
270
|
|
|
230
271
|
|
|
231
272
|
async def synthesize_speech_openai(
|