myagent-ai 1.23.2 → 1.23.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/stt.py CHANGED
@@ -37,12 +37,21 @@ def _convert_to_wav(audio_data: bytes, audio_format: Optional[str] = None) -> by
37
37
  from pydub import AudioSegment
38
38
  audio_buf = io.BytesIO(audio_data)
39
39
  seg = AudioSegment.from_file(audio_buf, format=audio_format or "webm")
40
+ # [v1.23.2] 检查音频时长,过短直接返回原始数据
41
+ if seg.duration_seconds < 0.1:
42
+ logger.debug(f"音频过短 ({seg.duration_seconds:.2f}s),跳过转换")
43
+ return audio_data
40
44
  seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
41
45
  wav_buf = io.BytesIO()
42
46
  seg.export(wav_buf, format="wav")
43
47
  wav_buf.seek(0)
44
48
  return wav_buf.read()
45
- except Exception:
49
+ except Exception as e:
50
+ import shutil
51
+ if not shutil.which("ffmpeg"):
52
+ logger.warning(f"pydub 转换失败且缺少 ffmpeg: {e}")
53
+ else:
54
+ logger.warning(f"pydub 音频转换失败: {e}")
46
55
  return audio_data
47
56
 
48
57
 
@@ -68,10 +77,15 @@ async def _stt_sensevoice(audio_data: bytes, audio_format: Optional[str] = None)
68
77
  )
69
78
  logger.info("SenseVoice 模型已加载 (iic/SenseVoiceSmall, CPU)")
70
79
 
71
- # 转换为 16kHz WAV
80
+ # [v1.23.2] 增强: pydub 转换失败记录警告、WAV 头验证、音频长度检查
72
81
  wav_data = _convert_to_wav(audio_data, audio_format)
73
82
  wav_path = f"/tmp/myagent_stt_{id(audio_data) % 100000}.wav"
74
83
  try:
84
+ # 验证 WAV 文件头 (RIFF....WAVE)
85
+ if len(wav_data) < 44 or wav_data[:4] != b'RIFF' or wav_data[8:12] != b'WAVE':
86
+ logger.warning(f"SenseVoice 跳过: 无效 WAV 数据 (size={len(wav_data)}, header={wav_data[:12].hex()})")
87
+ return None
88
+
75
89
  with open(wav_path, 'wb') as f:
76
90
  f.write(wav_data)
77
91
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.23.2",
3
+ "version": "1.23.4",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/scripts/cli.py CHANGED
@@ -402,8 +402,8 @@ async def cmd_docx_create(args):
402
402
  a = p.parse_args(args)
403
403
 
404
404
  content = _parse_json_arg(a.content)
405
- from skills.docx_skill import DocxCreateSkill
406
- skill = DocxCreateSkill()
405
+ from skills.docx_skill import DOCXCreateSkill
406
+ skill = DOCXCreateSkill()
407
407
  result = await skill.execute(content=content, title=a.title, output_path=a.output)
408
408
  _print_result({"success": result.success, "message": result.message,
409
409
  "data": result.data, "error": result.error})
@@ -418,8 +418,8 @@ async def cmd_docx_read(args):
418
418
  p.add_argument("path", help="Word 文件路径 (.docx)")
419
419
  a = p.parse_args(args)
420
420
 
421
- from skills.docx_skill import DocxReadSkill
422
- skill = DocxReadSkill()
421
+ from skills.docx_skill import DOCXReadSkill
422
+ skill = DOCXReadSkill()
423
423
  result = await skill.execute(path=a.path)
424
424
  _print_result({"success": result.success, "message": result.message,
425
425
  "data": result.data, "error": result.error})
@@ -435,8 +435,8 @@ async def cmd_xlsx_create(args):
435
435
  a = p.parse_args(args)
436
436
 
437
437
  sheets = _parse_json_arg(a.sheets)
438
- from skills.xlsx_skill import XlsxCreateSkill
439
- skill = XlsxCreateSkill()
438
+ from skills.xlsx_skill import XLSXCreateSkill
439
+ skill = XLSXCreateSkill()
440
440
  result = await skill.execute(sheets=sheets, title=a.title, output_path=a.output)
441
441
  _print_result({"success": result.success, "message": result.message,
442
442
  "data": result.data, "error": result.error})
@@ -452,8 +452,8 @@ async def cmd_xlsx_read(args):
452
452
  p.add_argument("--sheet", default="", help="工作表名称 (默认全部)")
453
453
  a = p.parse_args(args)
454
454
 
455
- from skills.xlsx_skill import XlsxReadSkill
456
- skill = XlsxReadSkill()
455
+ from skills.xlsx_skill import XLSXReadSkill
456
+ skill = XLSXReadSkill()
457
457
  result = await skill.execute(path=a.path, sheet_name=a.sheet)
458
458
  _print_result({"success": result.success, "message": result.message,
459
459
  "data": result.data, "error": result.error})
@@ -471,8 +471,8 @@ async def cmd_xlsx_edit(args):
471
471
  a = p.parse_args(args)
472
472
 
473
473
  data = _parse_json_arg(a.data)
474
- from skills.xlsx_skill import XlsxEditSkill
475
- skill = XlsxEditSkill()
474
+ from skills.xlsx_skill import XLSXEditSkill
475
+ skill = XLSXEditSkill()
476
476
  result = await skill.execute(path=a.path, action=a.action, data=data, sheet_name=a.sheet)
477
477
  _print_result({"success": result.success, "message": result.message,
478
478
  "data": result.data, "error": result.error})
@@ -488,8 +488,8 @@ async def cmd_ppt_create(args):
488
488
  a = p.parse_args(args)
489
489
 
490
490
  slides = _parse_json_arg(a.slides)
491
- from skills.ppt_skill import PptCreateSkill
492
- skill = PptCreateSkill()
491
+ from skills.ppt_skill import PPTCreateSkill
492
+ skill = PPTCreateSkill()
493
493
  result = await skill.execute(slides=slides, theme=a.theme, output_path=a.output)
494
494
  _print_result({"success": result.success, "message": result.message,
495
495
  "data": result.data, "error": result.error})
@@ -504,8 +504,8 @@ async def cmd_ppt_read(args):
504
504
  p.add_argument("path", help="PPT 文件路径 (.pptx)")
505
505
  a = p.parse_args(args)
506
506
 
507
- from skills.ppt_skill import PptReadSkill
508
- skill = PptReadSkill()
507
+ from skills.ppt_skill import PPTReadSkill
508
+ skill = PPTReadSkill()
509
509
  result = await skill.execute(path=a.path)
510
510
  _print_result({"success": result.success, "message": result.message,
511
511
  "data": result.data, "error": result.error})
@@ -521,8 +521,8 @@ async def cmd_pdf_create(args):
521
521
  a = p.parse_args(args)
522
522
 
523
523
  content = _parse_json_arg(a.content)
524
- from skills.pdf_skill import PdfCreateSkill
525
- skill = PdfCreateSkill()
524
+ from skills.pdf_skill import PDFCreateSkill
525
+ skill = PDFCreateSkill()
526
526
  result = await skill.execute(content=content, palette=a.palette, output_path=a.output)
527
527
  _print_result({"success": result.success, "message": result.message,
528
528
  "data": result.data, "error": result.error})
@@ -539,8 +539,8 @@ async def cmd_pdf_read(args):
539
539
  p.add_argument("--end", type=int, default=0, help="结束页 (默认全部)")
540
540
  a = p.parse_args(args)
541
541
 
542
- from skills.pdf_skill import PdfReadSkill
543
- skill = PdfReadSkill()
542
+ from skills.pdf_skill import PDFReadSkill
543
+ skill = PDFReadSkill()
544
544
  result = await skill.execute(path=a.path, start_page=a.start, end_page=a.end)
545
545
  _print_result({"success": result.success, "message": result.message,
546
546
  "data": result.data, "error": result.error})
package/web/api_server.py CHANGED
@@ -2497,36 +2497,57 @@ window.addEventListener('beforeunload', function() {{
2497
2497
  logger.info("SenseVoice 模型已加载 (iic/SenseVoiceSmall, CPU)")
2498
2498
 
2499
2499
  # SenseVoice 接受 16kHz WAV
2500
+ # [v1.23.2] 增强: pydub 转换失败时记录警告、验证 WAV 头、检查音频长度
2500
2501
  wav_path = f"/tmp/myagent_stt_{id(audio_data) % 100000}.wav"
2501
2502
  wav_buf = io.BytesIO()
2503
+ pydub_ok = False
2502
2504
  try:
2503
2505
  from pydub import AudioSegment
2504
2506
  audio_buf = io.BytesIO(audio_data)
2505
2507
  seg = AudioSegment.from_file(audio_buf, format=audio_format or "webm")
2506
- seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
2507
- seg.export(wav_buf, format="wav")
2508
- except Exception:
2508
+ # 检查音频时长,过短直接跳过
2509
+ if seg.duration_seconds < 0.1:
2510
+ logger.debug(f"SenseVoice 跳过: 音频过短 ({seg.duration_seconds:.2f}s)")
2511
+ else:
2512
+ seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
2513
+ seg.export(wav_buf, format="wav")
2514
+ pydub_ok = True
2515
+ except Exception as conv_err:
2516
+ import shutil
2517
+ if not shutil.which("ffmpeg"):
2518
+ logger.warning(f"pydub 转换失败且缺少 ffmpeg: {conv_err}. 安装: sudo apt install ffmpeg")
2519
+ else:
2520
+ logger.warning(f"pydub 音频转换失败: {conv_err}")
2521
+
2522
+ if not pydub_ok:
2509
2523
  wav_buf = io.BytesIO(audio_data)
2524
+
2510
2525
  wav_buf.seek(0)
2511
- with open(wav_path, 'wb') as f:
2512
- f.write(wav_buf.read())
2513
-
2514
- # SenseVoice 推理
2515
- res = sv_model.generate(input=wav_path, cache={},
2516
- language="auto", # 自动检测语言
2517
- use_itn=True, # 逆文本标准化(数字/日期等)
2518
- batch_size_s=300)
2519
- if res and len(res) > 0 and len(res[0]) > 0:
2520
- text = res[0][0]["text"] if isinstance(res[0][0], dict) else str(res[0][0])
2521
- # SenseVoice 可能输出带 <|zh|><|en|><|EMO|> 等特殊 token,清理掉
2522
- import re
2523
- text = re.sub(r'<\|[^|]+\|>', '', text).strip()
2524
- if text:
2525
- try:
2526
- os.remove(wav_path)
2527
- except Exception:
2528
- pass
2529
- return web.json_response({"text": text, "engine": "sensevoice"})
2526
+ wav_bytes = wav_buf.read()
2527
+
2528
+ # 验证 WAV 文件头 (RIFF....WAVE)
2529
+ if len(wav_bytes) < 44 or wav_bytes[:4] != b'RIFF' or wav_bytes[8:12] != b'WAVE':
2530
+ logger.warning(f"SenseVoice 跳过: 无效 WAV 数据 (size={len(wav_bytes)}, header={wav_bytes[:12].hex()})")
2531
+ else:
2532
+ with open(wav_path, 'wb') as f:
2533
+ f.write(wav_bytes)
2534
+
2535
+ # SenseVoice 推理
2536
+ res = sv_model.generate(input=wav_path, cache={},
2537
+ language="auto", # 自动检测语言
2538
+ use_itn=True, # 逆文本标准化(数字/日期等)
2539
+ batch_size_s=300)
2540
+ if res and len(res) > 0 and len(res[0]) > 0:
2541
+ text = res[0][0]["text"] if isinstance(res[0][0], dict) else str(res[0][0])
2542
+ # SenseVoice 可能输出带 <|zh|><|en|><|EMO|> 等特殊 token,清理掉
2543
+ import re
2544
+ text = re.sub(r'<\|[^|]+\|>', '', text).strip()
2545
+ if text:
2546
+ try:
2547
+ os.remove(wav_path)
2548
+ except Exception:
2549
+ pass
2550
+ return web.json_response({"text": text, "engine": "sensevoice"})
2530
2551
  try:
2531
2552
  os.remove(wav_path)
2532
2553
  except Exception: