npm - myagent-ai - Versions diffs - 1.15.65 → 1.15.66 - Mend

myagent-ai 1.15.65 → 1.15.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/core/deps_checker.py CHANGED Viewed

@@ -76,7 +76,9 @@ DEPENDENCIES: List[DepInfo] = [
     # ── 语音识别 (STT) ──
     DepInfo("faster_whisper", "faster-whisper", "1.0.0", "stt", "all",
-            note="本地语音识别引擎 (首次使用自动下载模型)"),
+            note="本地语音识别引擎 (需要 C++ 编译)"),
+    DepInfo("speech_recognition", "SpeechRecognition", "3.10.0", "stt", "all",
+            note="在线语音识别 (Google API，纯 Python 无需编译，Termux 兼容)"),
     # ── 浏览器自动化 (ChromeDev MCP) ──
     # Playwright 已移除，浏览器自动化统一使用 ChromeDevTools Protocol (MCP)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "myagent-ai",
-  "version": "1.15.65",
+  "version": "1.15.66",
   "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
   "main": "main.py",
   "bin": {

package/web/api_server.py CHANGED Viewed

@@ -1219,11 +1219,41 @@ class ApiServer:
             except Exception as e:
                 logger.warning(f"vosk 转录失败: {e}")
+            # ── 尝试 SpeechRecognition (Google Web Speech API, 纯 Python 无需编译) ──
+            try:
+                import speech_recognition as sr
+                wav_buf = io.BytesIO(audio_data)
+                try:
+                    audio_buf = io.BytesIO(audio_data)
+                    from pydub import AudioSegment
+                    seg = AudioSegment.from_file(audio_buf, format=audio_format or "webm")
+                    seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
+                    seg.export(wav_buf, format="wav")
+                except Exception:
+                    wav_buf = io.BytesIO(audio_data)
+                wav_buf.seek(0)
+                recognizer = sr.Recognizer()
+                with sr.AudioFile(wav_buf) as source:
+                    audio = recognizer.record(source)
+                text = recognizer.recognize_google(audio, language="zh-CN")
+                if text:
+                    logger.info("SpeechRecognition (Google API) 转录成功")
+                    return web.json_response({"text": text, "engine": "speech_recognition"})
+            except ImportError:
+                logger.debug("SpeechRecognition 未安装，跳过")
+            except sr.UnknownValueError:
+                logger.debug("SpeechRecognition 无法识别音频内容")
+            except sr.RequestError as e:
+                logger.warning(f"SpeechRecognition API 请求失败: {e}")
+            except Exception as e:
+                logger.warning(f"SpeechRecognition 转录失败: {e}")
             # ── 没有可用的 STT 引擎 ──
             return web.json_response({
-                "error": "未检测到本地 STT 引擎。请安装 faster-whisper（推荐）或 vosk：\n"
-                         "  pip install faster-whisper  (首次使用会自动下载 tiny 模型 ~39MB)\n"
-                         "  或 pip install vosk",
+                "error": "未检测到本地 STT 引擎。请安装以下任一引擎：\n"
+                         "  pip install faster-whisper  (推荐，离线，首次下载模型 ~39MB)\n"
+                         "  pip install vosk             (离线，需下载模型)\n"
+                         "  pip install SpeechRecognition (在线，使用 Google API，无需编译)",
                 "available": False,
             }, status=503)