myagent-ai 1.19.9 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/deps_checker.py +1 -2
- package/package.json +1 -1
- package/requirements-optional.txt +1 -1
- package/setup.py +1 -3
- package/web/api_server.py +11 -138
package/core/deps_checker.py
CHANGED
|
@@ -107,8 +107,7 @@ DEPENDENCIES: List[DepInfo] = [
|
|
|
107
107
|
note="PyTorch 音频处理库 (SenseVoice 必需)"),
|
|
108
108
|
DepInfo("funasr", "funasr", "1.1.0", "stt", "all",
|
|
109
109
|
note="[v1.18.8] SenseVoice 中文语音识别(首选,阿里达摩院)"),
|
|
110
|
-
|
|
111
|
-
note="Whisper 本地语音识别引擎 (备选,需 C++ 编译)"),
|
|
110
|
+
|
|
112
111
|
DepInfo("speech_recognition", "SpeechRecognition", "3.10.0", "stt", "all",
|
|
113
112
|
note="在线语音识别 (Google API,纯 Python 无需编译,Termux 兼容)"),
|
|
114
113
|
|
package/package.json
CHANGED
package/setup.py
CHANGED
|
@@ -41,7 +41,6 @@ setup(
|
|
|
41
41
|
"funasr>=1.1.0",
|
|
42
42
|
"torch>=2.0.0",
|
|
43
43
|
"torchaudio>=2.0.0",
|
|
44
|
-
"faster-whisper>=1.0.0",
|
|
45
44
|
# 浏览器自动化 (ChromeDev MCP, 无需 Playwright)
|
|
46
45
|
# 桌面 GUI 自动化 (内置技能)
|
|
47
46
|
"pynput>=1.7.6",
|
|
@@ -53,7 +52,7 @@ setup(
|
|
|
53
52
|
"discord": ["discord.py>=2.3.0"],
|
|
54
53
|
"anthropic": ["anthropic>=0.18.0"],
|
|
55
54
|
"communication": ["cryptography>=41.0.0", "websockets>=12.0"],
|
|
56
|
-
"voice": ["funasr>=1.1.0", "torch>=2.0.0", "torchaudio>=2.0.0"
|
|
55
|
+
"voice": ["funasr>=1.1.0", "torch>=2.0.0", "torchaudio>=2.0.0"],
|
|
57
56
|
"all": [
|
|
58
57
|
"python-telegram-bot>=21.0",
|
|
59
58
|
"discord.py>=2.3.0",
|
|
@@ -63,7 +62,6 @@ setup(
|
|
|
63
62
|
"funasr>=1.1.0",
|
|
64
63
|
"torch>=2.0.0",
|
|
65
64
|
"torchaudio>=2.0.0",
|
|
66
|
-
"faster-whisper>=1.0.0",
|
|
67
65
|
],
|
|
68
66
|
},
|
|
69
67
|
entry_points={
|
package/web/api_server.py
CHANGED
|
@@ -1549,11 +1549,10 @@ window.toggleFullscreen = function() {{
|
|
|
1549
1549
|
|
|
1550
1550
|
接受音频文件(WAV/WEBM/OGG),使用本地 STT 引擎转录。
|
|
1551
1551
|
支持的引擎(按优先级):
|
|
1552
|
-
1.
|
|
1553
|
-
2.
|
|
1554
|
-
3.
|
|
1555
|
-
4.
|
|
1556
|
-
5. SpeechRecognition(Google,需外网)
|
|
1552
|
+
1. SenseVoice(推荐,中文识别最佳,需:pip install funasr torch torchaudio)
|
|
1553
|
+
2. vosk(备选,需安装:pip install vosk)
|
|
1554
|
+
3. LLM API Whisper 兼容端点
|
|
1555
|
+
4. SpeechRecognition(Google,需外网)
|
|
1557
1556
|
"""
|
|
1558
1557
|
try:
|
|
1559
1558
|
reader = await request.multipart()
|
|
@@ -1639,117 +1638,6 @@ window.toggleFullscreen = function() {{
|
|
|
1639
1638
|
except Exception as e:
|
|
1640
1639
|
logger.warning(f"SenseVoice 转录失败: {e}")
|
|
1641
1640
|
|
|
1642
|
-
# ── 尝试 faster-whisper ──
|
|
1643
|
-
try:
|
|
1644
|
-
whisper_model = self._whisper_model
|
|
1645
|
-
if whisper_model is None:
|
|
1646
|
-
# 预加载未完成或未安装,尝试懒加载
|
|
1647
|
-
import warnings as _w
|
|
1648
|
-
_w.filterwarnings("ignore", message=".*HF_TOKEN.*", category=UserWarning)
|
|
1649
|
-
_w.filterwarnings("ignore", message=".*huggingface_hub.*token.*", category=UserWarning)
|
|
1650
|
-
_w.filterwarnings("ignore", message=".*ffmpeg or avconv.*", category=RuntimeWarning)
|
|
1651
|
-
os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
|
|
1652
|
-
os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
|
|
1653
|
-
os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
|
|
1654
|
-
from faster_whisper import WhisperModel
|
|
1655
|
-
model_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'whisper')
|
|
1656
|
-
self._whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8",
|
|
1657
|
-
download_root=model_dir)
|
|
1658
|
-
whisper_model = self._whisper_model
|
|
1659
|
-
logger.info("faster-whisper tiny 模型已加载 (CPU int8)")
|
|
1660
|
-
|
|
1661
|
-
# faster-whisper 需要 16kHz WAV
|
|
1662
|
-
# [v1.15.8] 使用 pydub+ffmpeg 正确转换 WebM/Opus/OGG 等格式
|
|
1663
|
-
wav_buf = io.BytesIO()
|
|
1664
|
-
try:
|
|
1665
|
-
from pydub import AudioSegment
|
|
1666
|
-
audio_buf = io.BytesIO(audio_data)
|
|
1667
|
-
seg = AudioSegment.from_file(audio_buf, format=audio_format or "webm")
|
|
1668
|
-
seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
|
|
1669
|
-
seg.export(wav_buf, format="wav")
|
|
1670
|
-
wav_buf.seek(0)
|
|
1671
|
-
except Exception as _pydub_err:
|
|
1672
|
-
# pydub 不可用时 fallback:仅处理已是 WAV 的情况
|
|
1673
|
-
import wave
|
|
1674
|
-
audio_buf = io.BytesIO(audio_data)
|
|
1675
|
-
try:
|
|
1676
|
-
with wave.open(audio_buf, 'rb') as rf:
|
|
1677
|
-
wav_buf = io.BytesIO()
|
|
1678
|
-
with wave.open(wav_buf, 'wb') as wf:
|
|
1679
|
-
wf.setnchannels(1)
|
|
1680
|
-
wf.setsampwidth(2)
|
|
1681
|
-
wf.setframerate(16000)
|
|
1682
|
-
frames = rf.readframes(rf.getnframes())
|
|
1683
|
-
wf.writeframes(frames)
|
|
1684
|
-
wav_buf.seek(0)
|
|
1685
|
-
except Exception:
|
|
1686
|
-
logger.warning(f"音频格式转换失败(pydub: {_pydub_err})")
|
|
1687
|
-
return web.json_response({"error": "音频格式不支持,需要 WAV 或安装 pydub+ffmpeg"}, status=400)
|
|
1688
|
-
|
|
1689
|
-
wav_buf.seek(0)
|
|
1690
|
-
segments, info = whisper_model.transcribe(wav_buf, beam_size=1,
|
|
1691
|
-
language="zh",
|
|
1692
|
-
initial_prompt="以下是普通话的句子",
|
|
1693
|
-
vad_filter=True, vad_parameters=dict(
|
|
1694
|
-
min_silence_duration_ms=300))
|
|
1695
|
-
text = "".join(seg.text for seg in segments).strip()
|
|
1696
|
-
|
|
1697
|
-
if text:
|
|
1698
|
-
return web.json_response({"text": text, "engine": "faster-whisper"})
|
|
1699
|
-
except ImportError:
|
|
1700
|
-
logger.debug("faster-whisper 未安装,尝试自动安装...")
|
|
1701
|
-
try:
|
|
1702
|
-
from core.deps_checker import ensure_skill_deps
|
|
1703
|
-
installed = ensure_skill_deps("stt")
|
|
1704
|
-
if installed:
|
|
1705
|
-
logger.info("faster-whisper 自动安装成功,重新尝试转录")
|
|
1706
|
-
from faster_whisper import WhisperModel
|
|
1707
|
-
import os
|
|
1708
|
-
model_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'whisper')
|
|
1709
|
-
self._whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8",
|
|
1710
|
-
download_root=model_dir)
|
|
1711
|
-
whisper_model = self._whisper_model
|
|
1712
|
-
# 重新执行转录(跳过上面的 try 已完成的逻辑,直接进入转录流程)
|
|
1713
|
-
import io
|
|
1714
|
-
# [v1.15.8] 使用 pydub+ffmpeg 正确转换音频格式
|
|
1715
|
-
wav_buf = io.BytesIO()
|
|
1716
|
-
try:
|
|
1717
|
-
from pydub import AudioSegment
|
|
1718
|
-
audio_buf = io.BytesIO(audio_data)
|
|
1719
|
-
seg = AudioSegment.from_file(audio_buf, format=audio_format or "webm")
|
|
1720
|
-
seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
|
|
1721
|
-
seg.export(wav_buf, format="wav")
|
|
1722
|
-
wav_buf.seek(0)
|
|
1723
|
-
except Exception as _pydub_err2:
|
|
1724
|
-
import wave
|
|
1725
|
-
audio_buf = io.BytesIO(audio_data)
|
|
1726
|
-
try:
|
|
1727
|
-
with wave.open(audio_buf, 'rb') as rf:
|
|
1728
|
-
wav_buf = io.BytesIO()
|
|
1729
|
-
with wave.open(wav_buf, 'wb') as wf:
|
|
1730
|
-
wf.setnchannels(1)
|
|
1731
|
-
wf.setsampwidth(2)
|
|
1732
|
-
wf.setframerate(16000)
|
|
1733
|
-
frames = rf.readframes(rf.getnframes())
|
|
1734
|
-
wf.writeframes(frames)
|
|
1735
|
-
wav_buf.seek(0)
|
|
1736
|
-
except Exception:
|
|
1737
|
-
logger.warning(f"音频格式转换失败(pydub: {_pydub_err2})")
|
|
1738
|
-
return web.json_response({"error": "音频格式不支持"}, status=400)
|
|
1739
|
-
wav_buf.seek(0)
|
|
1740
|
-
segments, info = whisper_model.transcribe(wav_buf, beam_size=1,
|
|
1741
|
-
language="zh",
|
|
1742
|
-
initial_prompt="以下是普通话的句子",
|
|
1743
|
-
vad_filter=True, vad_parameters=dict(
|
|
1744
|
-
min_silence_duration_ms=300))
|
|
1745
|
-
text = "".join(seg.text for seg in segments).strip()
|
|
1746
|
-
if text:
|
|
1747
|
-
return web.json_response({"text": text, "engine": "faster-whisper"})
|
|
1748
|
-
except Exception as inst_err:
|
|
1749
|
-
logger.warning(f"faster-whisper 自动安装/转录失败: {inst_err}")
|
|
1750
|
-
except Exception as e:
|
|
1751
|
-
logger.warning(f"faster-whisper 转录失败: {e}")
|
|
1752
|
-
|
|
1753
1641
|
# ── 尝试 vosk ──
|
|
1754
1642
|
try:
|
|
1755
1643
|
import vosk
|
|
@@ -1872,9 +1760,8 @@ window.toggleFullscreen = function() {{
|
|
|
1872
1760
|
"error": "未检测到可用的 STT 引擎。请尝试以下方案:\n"
|
|
1873
1761
|
" 1. pip install funasr torch torchaudio (SenseVoice,中文最佳,推荐)\n"
|
|
1874
1762
|
" 2. 配置支持 Whisper 的 LLM API(自动使用,无需安装)\n"
|
|
1875
|
-
" 3. pip install
|
|
1876
|
-
" 4. pip install
|
|
1877
|
-
" 5. pip install SpeechRecognition (需外网,国内不可用)",
|
|
1763
|
+
" 3. pip install vosk (离线本地,需下载模型)\n"
|
|
1764
|
+
" 4. pip install SpeechRecognition (需外网,国内不可用)",
|
|
1878
1765
|
"available": False,
|
|
1879
1766
|
}, status=503)
|
|
1880
1767
|
|
|
@@ -6772,8 +6659,8 @@ window.toggleFullscreen = function() {{
|
|
|
6772
6659
|
except Exception:
|
|
6773
6660
|
pass
|
|
6774
6661
|
|
|
6775
|
-
#
|
|
6776
|
-
#
|
|
6662
|
+
# 后台预加载 STT 模型,避免首次语音识别时等待数秒
|
|
6663
|
+
# 仅加载 SenseVoice(中文识别最佳)
|
|
6777
6664
|
try:
|
|
6778
6665
|
import threading
|
|
6779
6666
|
def _preload_stt():
|
|
@@ -6784,30 +6671,16 @@ window.toggleFullscreen = function() {{
|
|
|
6784
6671
|
os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
|
|
6785
6672
|
os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
|
|
6786
6673
|
|
|
6787
|
-
# 首选: SenseVoice (funasr)
|
|
6788
6674
|
try:
|
|
6789
6675
|
from funasr import AutoModel
|
|
6790
6676
|
model_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'sensevoice')
|
|
6791
6677
|
self._sensevoice_model = AutoModel(model="iic/SenseVoiceSmall", model_dir=model_dir,
|
|
6792
6678
|
device="cpu", disable_pbar=True, disable_update=True)
|
|
6793
|
-
logger.info("STT SenseVoice 模型预加载完成
|
|
6794
|
-
return # 成功则不加载 whisper
|
|
6795
|
-
except ImportError:
|
|
6796
|
-
logger.debug("SenseVoice (funasr) 未安装,尝试 faster-whisper")
|
|
6797
|
-
except Exception as e:
|
|
6798
|
-
logger.debug(f"SenseVoice 预加载失败: {e},尝试 faster-whisper")
|
|
6799
|
-
|
|
6800
|
-
# 备选: faster-whisper
|
|
6801
|
-
try:
|
|
6802
|
-
from faster_whisper import WhisperModel
|
|
6803
|
-
model_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'whisper')
|
|
6804
|
-
self._whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8",
|
|
6805
|
-
download_root=model_dir)
|
|
6806
|
-
logger.info("STT faster-whisper 模型预加载完成 (备选引擎)")
|
|
6679
|
+
logger.info("STT SenseVoice 模型预加载完成")
|
|
6807
6680
|
except ImportError:
|
|
6808
|
-
logger.debug("
|
|
6681
|
+
logger.debug("SenseVoice (funasr) 未安装,跳过 STT 预加载")
|
|
6809
6682
|
except Exception as e:
|
|
6810
|
-
logger.debug(f"
|
|
6683
|
+
logger.debug(f"SenseVoice 预加载失败(不影响使用): {e}")
|
|
6811
6684
|
except Exception as e:
|
|
6812
6685
|
logger.debug(f"STT 预加载异常(不影响使用): {e}")
|
|
6813
6686
|
threading.Thread(target=_preload_stt, daemon=True).start()
|