myagent-ai 1.23.2 → 1.23.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/stt.py +16 -2
- package/package.json +1 -1
- package/scripts/cli.py +18 -18
- package/web/api_server.py +43 -22
package/core/stt.py
CHANGED
|
@@ -37,12 +37,21 @@ def _convert_to_wav(audio_data: bytes, audio_format: Optional[str] = None) -> by
|
|
|
37
37
|
from pydub import AudioSegment
|
|
38
38
|
audio_buf = io.BytesIO(audio_data)
|
|
39
39
|
seg = AudioSegment.from_file(audio_buf, format=audio_format or "webm")
|
|
40
|
+
# [v1.23.2] 检查音频时长,过短直接返回原始数据
|
|
41
|
+
if seg.duration_seconds < 0.1:
|
|
42
|
+
logger.debug(f"音频过短 ({seg.duration_seconds:.2f}s),跳过转换")
|
|
43
|
+
return audio_data
|
|
40
44
|
seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
|
|
41
45
|
wav_buf = io.BytesIO()
|
|
42
46
|
seg.export(wav_buf, format="wav")
|
|
43
47
|
wav_buf.seek(0)
|
|
44
48
|
return wav_buf.read()
|
|
45
|
-
except Exception:
|
|
49
|
+
except Exception as e:
|
|
50
|
+
import shutil
|
|
51
|
+
if not shutil.which("ffmpeg"):
|
|
52
|
+
logger.warning(f"pydub 转换失败且缺少 ffmpeg: {e}")
|
|
53
|
+
else:
|
|
54
|
+
logger.warning(f"pydub 音频转换失败: {e}")
|
|
46
55
|
return audio_data
|
|
47
56
|
|
|
48
57
|
|
|
@@ -68,10 +77,15 @@ async def _stt_sensevoice(audio_data: bytes, audio_format: Optional[str] = None)
|
|
|
68
77
|
)
|
|
69
78
|
logger.info("SenseVoice 模型已加载 (iic/SenseVoiceSmall, CPU)")
|
|
70
79
|
|
|
71
|
-
#
|
|
80
|
+
# [v1.23.2] 增强: pydub 转换失败记录警告、WAV 头验证、音频长度检查
|
|
72
81
|
wav_data = _convert_to_wav(audio_data, audio_format)
|
|
73
82
|
wav_path = f"/tmp/myagent_stt_{id(audio_data) % 100000}.wav"
|
|
74
83
|
try:
|
|
84
|
+
# 验证 WAV 文件头 (RIFF....WAVE)
|
|
85
|
+
if len(wav_data) < 44 or wav_data[:4] != b'RIFF' or wav_data[8:12] != b'WAVE':
|
|
86
|
+
logger.warning(f"SenseVoice 跳过: 无效 WAV 数据 (size={len(wav_data)}, header={wav_data[:12].hex()})")
|
|
87
|
+
return None
|
|
88
|
+
|
|
75
89
|
with open(wav_path, 'wb') as f:
|
|
76
90
|
f.write(wav_data)
|
|
77
91
|
|
package/package.json
CHANGED
package/scripts/cli.py
CHANGED
|
@@ -402,8 +402,8 @@ async def cmd_docx_create(args):
|
|
|
402
402
|
a = p.parse_args(args)
|
|
403
403
|
|
|
404
404
|
content = _parse_json_arg(a.content)
|
|
405
|
-
from skills.docx_skill import
|
|
406
|
-
skill =
|
|
405
|
+
from skills.docx_skill import DOCXCreateSkill
|
|
406
|
+
skill = DOCXCreateSkill()
|
|
407
407
|
result = await skill.execute(content=content, title=a.title, output_path=a.output)
|
|
408
408
|
_print_result({"success": result.success, "message": result.message,
|
|
409
409
|
"data": result.data, "error": result.error})
|
|
@@ -418,8 +418,8 @@ async def cmd_docx_read(args):
|
|
|
418
418
|
p.add_argument("path", help="Word 文件路径 (.docx)")
|
|
419
419
|
a = p.parse_args(args)
|
|
420
420
|
|
|
421
|
-
from skills.docx_skill import
|
|
422
|
-
skill =
|
|
421
|
+
from skills.docx_skill import DOCXReadSkill
|
|
422
|
+
skill = DOCXReadSkill()
|
|
423
423
|
result = await skill.execute(path=a.path)
|
|
424
424
|
_print_result({"success": result.success, "message": result.message,
|
|
425
425
|
"data": result.data, "error": result.error})
|
|
@@ -435,8 +435,8 @@ async def cmd_xlsx_create(args):
|
|
|
435
435
|
a = p.parse_args(args)
|
|
436
436
|
|
|
437
437
|
sheets = _parse_json_arg(a.sheets)
|
|
438
|
-
from skills.xlsx_skill import
|
|
439
|
-
skill =
|
|
438
|
+
from skills.xlsx_skill import XLSXCreateSkill
|
|
439
|
+
skill = XLSXCreateSkill()
|
|
440
440
|
result = await skill.execute(sheets=sheets, title=a.title, output_path=a.output)
|
|
441
441
|
_print_result({"success": result.success, "message": result.message,
|
|
442
442
|
"data": result.data, "error": result.error})
|
|
@@ -452,8 +452,8 @@ async def cmd_xlsx_read(args):
|
|
|
452
452
|
p.add_argument("--sheet", default="", help="工作表名称 (默认全部)")
|
|
453
453
|
a = p.parse_args(args)
|
|
454
454
|
|
|
455
|
-
from skills.xlsx_skill import
|
|
456
|
-
skill =
|
|
455
|
+
from skills.xlsx_skill import XLSXReadSkill
|
|
456
|
+
skill = XLSXReadSkill()
|
|
457
457
|
result = await skill.execute(path=a.path, sheet_name=a.sheet)
|
|
458
458
|
_print_result({"success": result.success, "message": result.message,
|
|
459
459
|
"data": result.data, "error": result.error})
|
|
@@ -471,8 +471,8 @@ async def cmd_xlsx_edit(args):
|
|
|
471
471
|
a = p.parse_args(args)
|
|
472
472
|
|
|
473
473
|
data = _parse_json_arg(a.data)
|
|
474
|
-
from skills.xlsx_skill import
|
|
475
|
-
skill =
|
|
474
|
+
from skills.xlsx_skill import XLSXEditSkill
|
|
475
|
+
skill = XLSXEditSkill()
|
|
476
476
|
result = await skill.execute(path=a.path, action=a.action, data=data, sheet_name=a.sheet)
|
|
477
477
|
_print_result({"success": result.success, "message": result.message,
|
|
478
478
|
"data": result.data, "error": result.error})
|
|
@@ -488,8 +488,8 @@ async def cmd_ppt_create(args):
|
|
|
488
488
|
a = p.parse_args(args)
|
|
489
489
|
|
|
490
490
|
slides = _parse_json_arg(a.slides)
|
|
491
|
-
from skills.ppt_skill import
|
|
492
|
-
skill =
|
|
491
|
+
from skills.ppt_skill import PPTCreateSkill
|
|
492
|
+
skill = PPTCreateSkill()
|
|
493
493
|
result = await skill.execute(slides=slides, theme=a.theme, output_path=a.output)
|
|
494
494
|
_print_result({"success": result.success, "message": result.message,
|
|
495
495
|
"data": result.data, "error": result.error})
|
|
@@ -504,8 +504,8 @@ async def cmd_ppt_read(args):
|
|
|
504
504
|
p.add_argument("path", help="PPT 文件路径 (.pptx)")
|
|
505
505
|
a = p.parse_args(args)
|
|
506
506
|
|
|
507
|
-
from skills.ppt_skill import
|
|
508
|
-
skill =
|
|
507
|
+
from skills.ppt_skill import PPTReadSkill
|
|
508
|
+
skill = PPTReadSkill()
|
|
509
509
|
result = await skill.execute(path=a.path)
|
|
510
510
|
_print_result({"success": result.success, "message": result.message,
|
|
511
511
|
"data": result.data, "error": result.error})
|
|
@@ -521,8 +521,8 @@ async def cmd_pdf_create(args):
|
|
|
521
521
|
a = p.parse_args(args)
|
|
522
522
|
|
|
523
523
|
content = _parse_json_arg(a.content)
|
|
524
|
-
from skills.pdf_skill import
|
|
525
|
-
skill =
|
|
524
|
+
from skills.pdf_skill import PDFCreateSkill
|
|
525
|
+
skill = PDFCreateSkill()
|
|
526
526
|
result = await skill.execute(content=content, palette=a.palette, output_path=a.output)
|
|
527
527
|
_print_result({"success": result.success, "message": result.message,
|
|
528
528
|
"data": result.data, "error": result.error})
|
|
@@ -539,8 +539,8 @@ async def cmd_pdf_read(args):
|
|
|
539
539
|
p.add_argument("--end", type=int, default=0, help="结束页 (默认全部)")
|
|
540
540
|
a = p.parse_args(args)
|
|
541
541
|
|
|
542
|
-
from skills.pdf_skill import
|
|
543
|
-
skill =
|
|
542
|
+
from skills.pdf_skill import PDFReadSkill
|
|
543
|
+
skill = PDFReadSkill()
|
|
544
544
|
result = await skill.execute(path=a.path, start_page=a.start, end_page=a.end)
|
|
545
545
|
_print_result({"success": result.success, "message": result.message,
|
|
546
546
|
"data": result.data, "error": result.error})
|
package/web/api_server.py
CHANGED
|
@@ -2497,36 +2497,57 @@ window.addEventListener('beforeunload', function() {{
|
|
|
2497
2497
|
logger.info("SenseVoice 模型已加载 (iic/SenseVoiceSmall, CPU)")
|
|
2498
2498
|
|
|
2499
2499
|
# SenseVoice 接受 16kHz WAV
|
|
2500
|
+
# [v1.23.2] 增强: pydub 转换失败时记录警告、验证 WAV 头、检查音频长度
|
|
2500
2501
|
wav_path = f"/tmp/myagent_stt_{id(audio_data) % 100000}.wav"
|
|
2501
2502
|
wav_buf = io.BytesIO()
|
|
2503
|
+
pydub_ok = False
|
|
2502
2504
|
try:
|
|
2503
2505
|
from pydub import AudioSegment
|
|
2504
2506
|
audio_buf = io.BytesIO(audio_data)
|
|
2505
2507
|
seg = AudioSegment.from_file(audio_buf, format=audio_format or "webm")
|
|
2506
|
-
|
|
2507
|
-
seg.
|
|
2508
|
-
|
|
2508
|
+
# 检查音频时长,过短直接跳过
|
|
2509
|
+
if seg.duration_seconds < 0.1:
|
|
2510
|
+
logger.debug(f"SenseVoice 跳过: 音频过短 ({seg.duration_seconds:.2f}s)")
|
|
2511
|
+
else:
|
|
2512
|
+
seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
|
|
2513
|
+
seg.export(wav_buf, format="wav")
|
|
2514
|
+
pydub_ok = True
|
|
2515
|
+
except Exception as conv_err:
|
|
2516
|
+
import shutil
|
|
2517
|
+
if not shutil.which("ffmpeg"):
|
|
2518
|
+
logger.warning(f"pydub 转换失败且缺少 ffmpeg: {conv_err}. 安装: sudo apt install ffmpeg")
|
|
2519
|
+
else:
|
|
2520
|
+
logger.warning(f"pydub 音频转换失败: {conv_err}")
|
|
2521
|
+
|
|
2522
|
+
if not pydub_ok:
|
|
2509
2523
|
wav_buf = io.BytesIO(audio_data)
|
|
2524
|
+
|
|
2510
2525
|
wav_buf.seek(0)
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2526
|
+
wav_bytes = wav_buf.read()
|
|
2527
|
+
|
|
2528
|
+
# 验证 WAV 文件头 (RIFF....WAVE)
|
|
2529
|
+
if len(wav_bytes) < 44 or wav_bytes[:4] != b'RIFF' or wav_bytes[8:12] != b'WAVE':
|
|
2530
|
+
logger.warning(f"SenseVoice 跳过: 无效 WAV 数据 (size={len(wav_bytes)}, header={wav_bytes[:12].hex()})")
|
|
2531
|
+
else:
|
|
2532
|
+
with open(wav_path, 'wb') as f:
|
|
2533
|
+
f.write(wav_bytes)
|
|
2534
|
+
|
|
2535
|
+
# SenseVoice 推理
|
|
2536
|
+
res = sv_model.generate(input=wav_path, cache={},
|
|
2537
|
+
language="auto", # 自动检测语言
|
|
2538
|
+
use_itn=True, # 逆文本标准化(数字/日期等)
|
|
2539
|
+
batch_size_s=300)
|
|
2540
|
+
if res and len(res) > 0 and len(res[0]) > 0:
|
|
2541
|
+
text = res[0][0]["text"] if isinstance(res[0][0], dict) else str(res[0][0])
|
|
2542
|
+
# SenseVoice 可能输出带 <|zh|><|en|><|EMO|> 等特殊 token,清理掉
|
|
2543
|
+
import re
|
|
2544
|
+
text = re.sub(r'<\|[^|]+\|>', '', text).strip()
|
|
2545
|
+
if text:
|
|
2546
|
+
try:
|
|
2547
|
+
os.remove(wav_path)
|
|
2548
|
+
except Exception:
|
|
2549
|
+
pass
|
|
2550
|
+
return web.json_response({"text": text, "engine": "sensevoice"})
|
|
2530
2551
|
try:
|
|
2531
2552
|
os.remove(wav_path)
|
|
2532
2553
|
except Exception:
|