myagent-ai 1.17.3 → 1.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -706,46 +706,35 @@ class MainAgent(BaseAgent):
706
706
  )
707
707
  break
708
708
 
709
- # [v1.16.13→17.0] 特殊处理模型不支持图片输入 — 去掉图片用纯文本重试
710
- # 支持中英文错误关键词匹配(ModelScope 等国产 API 可能返回中文错误)
709
+ # [v1.18.0] 特殊处理模型不支持图片输入
710
+ # 策略: 保留图片数据,break 后让 _stream_process_message 返回 ⚠️ 标记
711
+ # model chain 的 _try_model_chain_stream_inner 检测到 ⚠️ 后会继续尝试下一个模型
711
712
  _vision_keywords = [
712
713
  "doesn't support image", "does not support image", "model_incompatible",
713
714
  "image input", "not support vision", "unsupported multimodal", "image capability",
714
715
  "不支持图片", "不支持图像", "图片输入", "图像输入", "不支持多模态",
715
716
  "视觉", "image_url", "multimodal", "vision",
716
717
  ]
717
- if any(kw.lower() in _llm_error.lower() for kw in _vision_keywords) and context.metadata.get("user_images"):
718
- logger.warning(f"[{task_id}] 模型不支持图片输入,去掉图片用纯文本重试")
719
- context.metadata["user_images"] = []
720
- # 用纯文本消息替换最后一条多模态消息
721
- _text_only_msg = context.user_message or "请处理上述上下文。"
722
- if len(messages) > 0 and isinstance(messages[-1].content, list):
723
- messages[-1] = Message(role="user", content=_text_only_msg)
724
- # 重试 LLM 调用
725
- if stream_response and self.llm:
726
- response = await self._call_llm_stream(
727
- messages, text_delta_callback=text_delta_callback,
728
- stream_response=stream_response,
718
+ _is_vision_error = (
719
+ any(kw.lower() in _llm_error.lower() for kw in _vision_keywords)
720
+ and context.metadata.get("user_images")
721
+ )
722
+
723
+ if _is_vision_error:
724
+ logger.warning(f"[{task_id}] 模型 {self.llm.model} 不支持图片输入,"
725
+ f"将切换到支持图片的模型重试")
726
+ # 返回 ⚠️ 标记的错误,让 model chain 继续尝试下一个模型
727
+ # 注意: 不清除 context.metadata["user_images"],下一个模型仍可使用
728
+ _vision_skip_msg = f"⚠️ 模型 {self.llm.model} 不支持图片,正在切换..."
729
+ context.working_memory["final_response"] = _vision_skip_msg
730
+ await self._emit_v2_event("v2_reasoning", {"content": _vision_skip_msg}, stream_callback)
731
+ if self.memory:
732
+ self.memory.add_session(
733
+ session_id=context.session_id,
734
+ role="assistant",
735
+ content=_vision_skip_msg,
729
736
  )
730
- else:
731
- response = await self._call_llm(messages)
732
- if response.success:
733
- # 纯文本重试成功,给回复加上提示前缀
734
- _vision_prefix = "⚠️ 当前模型不支持图片识别,已自动使用纯文本模式处理(图片未发送给模型)。\n\n"
735
- llm_raw = _vision_prefix + response.content
736
- context.working_memory["final_response"] = llm_raw
737
- await self._emit_v2_event("v2_reasoning", {"content": llm_raw}, stream_callback)
738
- if self.memory:
739
- self.memory.add_session(
740
- session_id=context.session_id,
741
- role="assistant",
742
- content=llm_raw,
743
- )
744
- break
745
- else:
746
- # 纯文本也失败了,走下面的通用错误处理
747
- _llm_error = response.error or ""
748
- logger.error(f"[{task_id}] 纯文本重试也失败: {_llm_error}")
737
+ break # 退出 agent 循环,让 model chain 尝试下一个模型
749
738
 
750
739
  # 其他 LLM 错误
751
740
  error_msg = f"LLM 调用失败: {response.error}"
@@ -68,14 +68,20 @@ DEPENDENCIES: List[DepInfo] = [
68
68
  # ── PDF 处理 ──
69
69
  DepInfo("PyPDF2", "PyPDF2", "3.0.0", "pdf", "all",
70
70
  note="纯 Python PDF 文本提取(pdftotext 失败时的备用方案)"),
71
+ DepInfo("pypdf", "pypdf", "4.0.0", "pdf", "all",
72
+ note="PDF 文本提取 (PyPDF2 新版, 优先使用)"),
73
+ DepInfo("reportlab", "reportlab", "4.0.0", "pdf", "all",
74
+ note="PDF 文档生成引擎"),
75
+ DepInfo("PIL", "Pillow", "10.0.0", "pdf", "all",
76
+ note="图片处理 (PDF 插图, 报告图表)"),
71
77
 
72
78
  # ── 文档处理 (Excel/Word/PPT) ──
73
79
  DepInfo("openpyxl", "openpyxl", "3.1.0", "doc", "all",
74
- note="Excel (.xlsx) 文件读取"),
80
+ note="Excel (.xlsx) 文件读取和生成"),
75
81
  DepInfo("docx", "python-docx", "1.1.0", "doc", "all",
76
- note="Word (.docx) 文件读取"),
82
+ note="Word (.docx) 文件读取和生成"),
77
83
  DepInfo("pptx", "python-pptx", "0.6.21", "doc", "all",
78
- note="PowerPoint (.pptx) 文件读取"),
84
+ note="PowerPoint (.pptx) 文件读取和生成"),
79
85
  DepInfo("xlrd", "xlrd", "2.0.0", "doc", "all",
80
86
  note="旧版 Excel (.xls) 文件读取"),
81
87
 
@@ -597,6 +597,12 @@ class VNCManager:
597
597
 
598
598
  env = {**os.environ, "DISPLAY": self.display}
599
599
 
600
+ # [v1.18.0] proot/Termux 兼容: 可能需要额外的安全参数
601
+ cmd.append("-nobell")
602
+ cmd.append("-noxdamage")
603
+ # 跳过 Xinerama 检查(proot 环境下可能失败)
604
+ env["X11VNC_NO_UNIXPW"] = "1"
605
+
600
606
  logger.info(f"启动 x11vnc: {' '.join(cmd)}")
601
607
  self._x11vnc_process = subprocess.Popen(
602
608
  cmd,
@@ -612,7 +618,7 @@ class VNCManager:
612
618
  if self._x11vnc_process.poll() is not None:
613
619
  stderr = ""
614
620
  try:
615
- stderr = self._x11vnc_process.stderr.read().decode("utf-8", errors="replace")[:500]
621
+ stderr = self._x11vnc_process.stderr.read().decode("utf-8", errors="replace")[:2000]
616
622
  except Exception:
617
623
  pass
618
624
  logger.error(f"x11vnc 启动失败: {stderr}")
package/main.py CHANGED
@@ -390,6 +390,19 @@ class MyAgentApp:
390
390
  ]:
391
391
  self.skill_registry.register(skill_cls())
392
392
 
393
+ # ── 文档生成技能 (v1.17.3, 融合 MiniMax Skills 设计理念) ──
394
+ from skills.pdf_skill import PDFCreateSkill, PDFReadSkill
395
+ from skills.docx_skill import DOCXCreateSkill, DOCXReadSkill
396
+ from skills.xlsx_skill import XLSXCreateSkill, XLSXReadSkill, XLSXEditSkill
397
+ from skills.ppt_skill import PPTCreateSkill, PPTReadSkill
398
+ for skill_cls in [
399
+ PDFCreateSkill, PDFReadSkill,
400
+ DOCXCreateSkill, DOCXReadSkill,
401
+ XLSXCreateSkill, XLSXReadSkill, XLSXEditSkill,
402
+ PPTCreateSkill, PPTReadSkill,
403
+ ]:
404
+ self.skill_registry.register(skill_cls())
405
+
393
406
  async def process_message(
394
407
  self,
395
408
  user_message: str,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.17.3",
3
+ "version": "1.18.1",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
File without changes
package/skills/base.py CHANGED
File without changes
File without changes
File without changes
@@ -0,0 +1,199 @@
1
+ """
2
+ skills/docx_skill.py - DOCX Word 文档生成/编辑技能
3
+ ===============================================
4
+ 基于 python-docx 生成和编辑 Word 文档,融入 MiniMax DOCX Skill 设计理念。
5
+ 支持标题、段落、列表、表格、图片、页眉页脚、样式等。
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from skills.base import Skill, SkillParameter, SkillResult
15
+
16
+
17
+ class DOCXCreateSkill(Skill):
18
+ """生成 Word 文档"""
19
+ name = "docx_create"
20
+ description = (
21
+ "生成 Word (DOCX) 文档。支持多级标题、段落、列表、表格、图片等。"
22
+ "content 为 JSON 数组,每项: {type, text/items/headers/rows/path...}。"
23
+ "type 可选: h1/h2/h3/body/bullet/numbered/table/image/pagebreak/spacer。"
24
+ )
25
+ category = "doc"
26
+ dangerous = True
27
+ parameters = [
28
+ SkillParameter("content", "string",
29
+ "内容块 JSON 数组。示例: [{\"type\":\"h1\",\"text\":\"报告\"},{\"type\":\"body\",\"text\":\"正文\"}]",
30
+ required=True),
31
+ SkillParameter("output_path", "string", "输出 DOCX 文件路径", required=True),
32
+ SkillParameter("title", "string", "文档标题", required=False, default=""),
33
+ SkillParameter("author", "string", "作者", required=False, default=""),
34
+ SkillParameter("font", "string", "中文字体", required=False, default="SimHei"),
35
+ ]
36
+
37
+ async def execute(self, content: str = "", output_path: str = "",
38
+ title: str = "", author: str = "",
39
+ font: str = "SimHei", **kwargs) -> SkillResult:
40
+ try:
41
+ from docx import Document
42
+ from docx.shared import Inches, Cm, Pt, Emu, RGBColor
43
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
44
+ from docx.enum.table import WD_TABLE_ALIGNMENT
45
+ except ImportError:
46
+ return SkillResult(success=False, error="python-docx 未安装: pip install python-docx")
47
+
48
+ try:
49
+ blocks = json.loads(content) if isinstance(content, str) else content
50
+ if not isinstance(blocks, list):
51
+ blocks = [{"type": "body", "text": str(blocks)}]
52
+ except json.JSONDecodeError as e:
53
+ return SkillResult(success=False, error=f"content JSON 解析失败: {e}")
54
+
55
+ try:
56
+ out = Path(output_path).expanduser().resolve()
57
+ out.parent.mkdir(parents=True, exist_ok=True)
58
+
59
+ doc = Document()
60
+ if title:
61
+ doc.core_properties.title = title
62
+ if author:
63
+ doc.core_properties.author = author
64
+
65
+ style = doc.styles["Normal"]
66
+ rpr = style.font
67
+ rpr.name = font
68
+ rpr.size = Pt(11)
69
+
70
+ for level, size in [("Heading 1", 22), ("Heading 2", 16), ("Heading 3", 13)]:
71
+ if level in doc.styles:
72
+ hs = doc.styles[level]
73
+ hs.font.name = font
74
+ hs.font.size = Pt(size)
75
+ hs.font.color.rgb = RGBColor(0x1a, 0x36, 0x5d)
76
+
77
+ for block in blocks:
78
+ if not isinstance(block, dict):
79
+ continue
80
+ bt = block.get("type", "body")
81
+
82
+ if bt in ("h1", "h2", "h3"):
83
+ heading_map = {"h1": 0, "h2": 1, "h3": 2}
84
+ doc.add_heading(block.get("text", ""), level=heading_map[bt] + 1)
85
+ elif bt == "body":
86
+ p = doc.add_paragraph(block.get("text", ""))
87
+ p.paragraph_format.first_line_indent = Cm(0.74)
88
+ p.paragraph_format.line_spacing = 1.5
89
+ elif bt == "bullet":
90
+ for item in block.get("items", []):
91
+ doc.add_paragraph(item, style="List Bullet")
92
+ elif bt == "numbered":
93
+ for item in block.get("items", []):
94
+ doc.add_paragraph(item, style="List Number")
95
+ elif bt == "table":
96
+ headers = block.get("headers", [])
97
+ rows = block.get("rows", [])
98
+ if headers:
99
+ table = doc.add_table(rows=1 + len(rows), cols=len(headers))
100
+ table.style = "Light Grid Accent 1"
101
+ table.alignment = WD_TABLE_ALIGNMENT.CENTER
102
+ for j, h in enumerate(headers):
103
+ cell = table.rows[0].cells[j]
104
+ cell.text = str(h)
105
+ for run in cell.paragraphs[0].runs:
106
+ run.bold = True
107
+ for i, row in enumerate(rows):
108
+ for j, val in enumerate(row):
109
+ if j < len(table.columns):
110
+ table.rows[i + 1].cells[j].text = str(val)
111
+ elif bt == "image":
112
+ img_path = block.get("path", "")
113
+ img_w = block.get("width", 0)
114
+ if img_path and os.path.isfile(img_path):
115
+ try:
116
+ if img_w > 0:
117
+ doc.add_picture(img_path, width=Emu(int(img_w)))
118
+ else:
119
+ doc.add_picture(img_path, width=Inches(5.5))
120
+ except Exception as e:
121
+ doc.add_paragraph(f"[图片加载失败: {e}]")
122
+ elif bt == "pagebreak":
123
+ doc.add_page_break()
124
+ elif bt == "spacer":
125
+ doc.add_paragraph("")
126
+
127
+ doc.save(str(out))
128
+ return SkillResult(
129
+ success=True,
130
+ message=f"Word 文档已生成: {out}",
131
+ files=[str(out)],
132
+ data={"path": str(out), "blocks": len(blocks)},
133
+ )
134
+ except Exception as e:
135
+ return SkillResult(success=False, error=f"DOCX 生成失败: {e}")
136
+
137
+
138
+ class DOCXReadSkill(Skill):
139
+ """读取 Word 文档内容"""
140
+ name = "docx_read"
141
+ description = "读取 Word (DOCX) 文件,提取文本内容。支持提取表格数据。"
142
+ category = "doc"
143
+ parameters = [
144
+ SkillParameter("path", "string", "DOCX 文件路径", required=True),
145
+ SkillParameter("max_chars", "integer", "最大字符数", required=False, default=50000),
146
+ ]
147
+
148
+ async def execute(self, path: str = "", max_chars: int = 50000, **kwargs) -> SkillResult:
149
+ try:
150
+ from docx import Document
151
+ except ImportError:
152
+ return SkillResult(success=False, error="python-docx 未安装: pip install python-docx")
153
+
154
+ fp = Path(path).expanduser().resolve()
155
+ if not fp.exists():
156
+ return SkillResult(success=False, error=f"文件不存在: {path}")
157
+
158
+ try:
159
+ doc = Document(str(fp))
160
+ parts = []
161
+ for para in doc.paragraphs:
162
+ style_name = para.style.name if para.style else ""
163
+ prefix = ""
164
+ if "Heading" in style_name:
165
+ level = style_name.replace("Heading", "").strip()
166
+ prefix = f"[{'#' * int(level) if level.isdigit() else '#'}] "
167
+ elif "Bullet" in style_name:
168
+ prefix = " * "
169
+ elif "Number" in style_name or "List" in style_name:
170
+ prefix = " - "
171
+ text = para.text.strip()
172
+ if text:
173
+ parts.append(f"{prefix}{text}")
174
+
175
+ for i, table in enumerate(doc.tables):
176
+ rows = []
177
+ for row in table.rows:
178
+ cells = [cell.text.strip() for cell in row.cells]
179
+ rows.append(" | ".join(cells))
180
+ if rows:
181
+ parts.append(f"\n[表格 {i+1}]")
182
+ parts.append(rows[0])
183
+ if len(rows) > 1:
184
+ parts.append("-" * len(rows[0]))
185
+ parts.extend(rows[1:])
186
+
187
+ full = "\n".join(parts)
188
+ if len(full) > max_chars:
189
+ full = full[:max_chars] + f"\n\n... (截断,共 {len(full)} 字符)"
190
+
191
+ return SkillResult(
192
+ success=True,
193
+ message=f"已读取 Word 文档: {fp.name}",
194
+ data={"path": str(fp), "paragraphs": len(doc.paragraphs),
195
+ "tables": len(doc.tables)},
196
+ output=full,
197
+ )
198
+ except Exception as e:
199
+ return SkillResult(success=False, error=f"DOCX 读取失败: {e}")
File without changes
File without changes