npm - myagent-ai - Versions diffs - 1.17.3 → 1.18.1 - Mend

myagent-ai 1.17.3 → 1.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/agents/main_agent.py CHANGED Viewed

@@ -706,46 +706,35 @@ class MainAgent(BaseAgent):
                         )
                     break
-                # [v1.16.13→17.0] 特殊处理模型不支持图片输入 — 去掉图片用纯文本重试
-                # 支持中英文错误关键词匹配（ModelScope 等国产 API 可能返回中文错误）
+                # [v1.18.0] 特殊处理模型不支持图片输入
+                # 策略: 保留图片数据，break 后让 _stream_process_message 返回 ⚠️ 标记
+                # model chain 的 _try_model_chain_stream_inner 检测到 ⚠️ 后会继续尝试下一个模型
                 _vision_keywords = [
                     "doesn't support image", "does not support image", "model_incompatible",
                     "image input", "not support vision", "unsupported multimodal", "image capability",
                     "不支持图片", "不支持图像", "图片输入", "图像输入", "不支持多模态",
                     "视觉", "image_url", "multimodal", "vision",
                 ]
-                if any(kw.lower() in _llm_error.lower() for kw in _vision_keywords) and context.metadata.get("user_images"):
-                    logger.warning(f"[{task_id}] 模型不支持图片输入，去掉图片用纯文本重试")
-                    context.metadata["user_images"] = []
-                    # 用纯文本消息替换最后一条多模态消息
-                    _text_only_msg = context.user_message or "请处理上述上下文。"
-                    if len(messages) > 0 and isinstance(messages[-1].content, list):
-                        messages[-1] = Message(role="user", content=_text_only_msg)
-                    # 重试 LLM 调用
-                    if stream_response and self.llm:
-                        response = await self._call_llm_stream(
-                            messages, text_delta_callback=text_delta_callback,
-                            stream_response=stream_response,
+                _is_vision_error = (
+                    any(kw.lower() in _llm_error.lower() for kw in _vision_keywords)
+                    and context.metadata.get("user_images")
+                )
+                if _is_vision_error:
+                    logger.warning(f"[{task_id}] 模型 {self.llm.model} 不支持图片输入，"
+                                   f"将切换到支持图片的模型重试")
+                    # 返回 ⚠️ 标记的错误，让 model chain 继续尝试下一个模型
+                    # 注意: 不清除 context.metadata["user_images"]，下一个模型仍可使用
+                    _vision_skip_msg = f"⚠️ 模型 {self.llm.model} 不支持图片，正在切换..."
+                    context.working_memory["final_response"] = _vision_skip_msg
+                    await self._emit_v2_event("v2_reasoning", {"content": _vision_skip_msg}, stream_callback)
+                    if self.memory:
+                        self.memory.add_session(
+                            session_id=context.session_id,
+                            role="assistant",
+                            content=_vision_skip_msg,
                         )
-                    else:
-                        response = await self._call_llm(messages)
-                    if response.success:
-                        # 纯文本重试成功，给回复加上提示前缀
-                        _vision_prefix = "⚠️ 当前模型不支持图片识别，已自动使用纯文本模式处理（图片未发送给模型）。\n\n"
-                        llm_raw = _vision_prefix + response.content
-                        context.working_memory["final_response"] = llm_raw
-                        await self._emit_v2_event("v2_reasoning", {"content": llm_raw}, stream_callback)
-                        if self.memory:
-                            self.memory.add_session(
-                                session_id=context.session_id,
-                                role="assistant",
-                                content=llm_raw,
-                            )
-                        break
-                    else:
-                        # 纯文本也失败了，走下面的通用错误处理
-                        _llm_error = response.error or ""
-                        logger.error(f"[{task_id}] 纯文本重试也失败: {_llm_error}")
+                    break  # 退出 agent 循环，让 model chain 尝试下一个模型
                 # 其他 LLM 错误
                 error_msg = f"LLM 调用失败: {response.error}"

package/core/deps_checker.py CHANGED Viewed

@@ -68,14 +68,20 @@ DEPENDENCIES: List[DepInfo] = [
     # ── PDF 处理 ──
     DepInfo("PyPDF2", "PyPDF2", "3.0.0", "pdf", "all",
             note="纯 Python PDF 文本提取（pdftotext 失败时的备用方案）"),
+    DepInfo("pypdf", "pypdf", "4.0.0", "pdf", "all",
+            note="PDF 文本提取 (PyPDF2 新版, 优先使用)"),
+    DepInfo("reportlab", "reportlab", "4.0.0", "pdf", "all",
+            note="PDF 文档生成引擎"),
+    DepInfo("PIL", "Pillow", "10.0.0", "pdf", "all",
+            note="图片处理 (PDF 插图, 报告图表)"),
     # ── 文档处理 (Excel/Word/PPT) ──
     DepInfo("openpyxl", "openpyxl", "3.1.0", "doc", "all",
-            note="Excel (.xlsx) 文件读取"),
+            note="Excel (.xlsx) 文件读取和生成"),
     DepInfo("docx", "python-docx", "1.1.0", "doc", "all",
-            note="Word (.docx) 文件读取"),
+            note="Word (.docx) 文件读取和生成"),
     DepInfo("pptx", "python-pptx", "0.6.21", "doc", "all",
-            note="PowerPoint (.pptx) 文件读取"),
+            note="PowerPoint (.pptx) 文件读取和生成"),
     DepInfo("xlrd", "xlrd", "2.0.0", "doc", "all",
             note="旧版 Excel (.xls) 文件读取"),

package/core/vnc_manager.py CHANGED Viewed

@@ -597,6 +597,12 @@ class VNCManager:
             env = {**os.environ, "DISPLAY": self.display}
+            # [v1.18.0] proot/Termux 兼容: 可能需要额外的安全参数
+            cmd.append("-nobell")
+            cmd.append("-noxdamage")
+            # 跳过 Xinerama 检查（proot 环境下可能失败）
+            env["X11VNC_NO_UNIXPW"] = "1"
             logger.info(f"启动 x11vnc: {' '.join(cmd)}")
             self._x11vnc_process = subprocess.Popen(
                 cmd,
@@ -612,7 +618,7 @@ class VNCManager:
             if self._x11vnc_process.poll() is not None:
                 stderr = ""
                 try:
-                    stderr = self._x11vnc_process.stderr.read().decode("utf-8", errors="replace")[:500]
+                    stderr = self._x11vnc_process.stderr.read().decode("utf-8", errors="replace")[:2000]
                 except Exception:
                     pass
                 logger.error(f"x11vnc 启动失败: {stderr}")

package/main.py CHANGED Viewed

@@ -390,6 +390,19 @@ class MyAgentApp:
         ]:
             self.skill_registry.register(skill_cls())
+        # ── 文档生成技能 (v1.17.3, 融合 MiniMax Skills 设计理念) ──
+        from skills.pdf_skill import PDFCreateSkill, PDFReadSkill
+        from skills.docx_skill import DOCXCreateSkill, DOCXReadSkill
+        from skills.xlsx_skill import XLSXCreateSkill, XLSXReadSkill, XLSXEditSkill
+        from skills.ppt_skill import PPTCreateSkill, PPTReadSkill
+        for skill_cls in [
+            PDFCreateSkill, PDFReadSkill,
+            DOCXCreateSkill, DOCXReadSkill,
+            XLSXCreateSkill, XLSXReadSkill, XLSXEditSkill,
+            PPTCreateSkill, PPTReadSkill,
+        ]:
+            self.skill_registry.register(skill_cls())
     async def process_message(
         self,
         user_message: str,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "myagent-ai",
-  "version": "1.17.3",
+  "version": "1.18.1",
   "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
   "main": "main.py",
   "bin": {

package/skills/__init__.py CHANGED Viewed

File without changes

package/skills/base.py CHANGED Viewed

File without changes

package/skills/browser_skill.py CHANGED Viewed

File without changes

package/skills/chromedev_mcp.py CHANGED Viewed

File without changes

package/skills/docx_skill.py ADDED Viewed

@@ -0,0 +1,199 @@
+"""
+skills/docx_skill.py - DOCX Word 文档生成/编辑技能
+===============================================
+基于 python-docx 生成和编辑 Word 文档，融入 MiniMax DOCX Skill 设计理念。
+支持标题、段落、列表、表格、图片、页眉页脚、样式等。
+"""
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from skills.base import Skill, SkillParameter, SkillResult
+class DOCXCreateSkill(Skill):
+    """生成 Word 文档"""
+    name = "docx_create"
+    description = (
+        "生成 Word (DOCX) 文档。支持多级标题、段落、列表、表格、图片等。"
+        "content 为 JSON 数组，每项: {type, text/items/headers/rows/path...}。"
+        "type 可选: h1/h2/h3/body/bullet/numbered/table/image/pagebreak/spacer。"
+    )
+    category = "doc"
+    dangerous = True
+    parameters = [
+        SkillParameter("content", "string",
+            "内容块 JSON 数组。示例: [{\"type\":\"h1\",\"text\":\"报告\"},{\"type\":\"body\",\"text\":\"正文\"}]",
+            required=True),
+        SkillParameter("output_path", "string", "输出 DOCX 文件路径", required=True),
+        SkillParameter("title", "string", "文档标题", required=False, default=""),
+        SkillParameter("author", "string", "作者", required=False, default=""),
+        SkillParameter("font", "string", "中文字体", required=False, default="SimHei"),
+    ]
+    async def execute(self, content: str = "", output_path: str = "",
+                      title: str = "", author: str = "",
+                      font: str = "SimHei", **kwargs) -> SkillResult:
+        try:
+            from docx import Document
+            from docx.shared import Inches, Cm, Pt, Emu, RGBColor
+            from docx.enum.text import WD_ALIGN_PARAGRAPH
+            from docx.enum.table import WD_TABLE_ALIGNMENT
+        except ImportError:
+            return SkillResult(success=False, error="python-docx 未安装: pip install python-docx")
+        try:
+            blocks = json.loads(content) if isinstance(content, str) else content
+            if not isinstance(blocks, list):
+                blocks = [{"type": "body", "text": str(blocks)}]
+        except json.JSONDecodeError as e:
+            return SkillResult(success=False, error=f"content JSON 解析失败: {e}")
+        try:
+            out = Path(output_path).expanduser().resolve()
+            out.parent.mkdir(parents=True, exist_ok=True)
+            doc = Document()
+            if title:
+                doc.core_properties.title = title
+            if author:
+                doc.core_properties.author = author
+            style = doc.styles["Normal"]
+            rpr = style.font
+            rpr.name = font
+            rpr.size = Pt(11)
+            for level, size in [("Heading 1", 22), ("Heading 2", 16), ("Heading 3", 13)]:
+                if level in doc.styles:
+                    hs = doc.styles[level]
+                    hs.font.name = font
+                    hs.font.size = Pt(size)
+                    hs.font.color.rgb = RGBColor(0x1a, 0x36, 0x5d)
+            for block in blocks:
+                if not isinstance(block, dict):
+                    continue
+                bt = block.get("type", "body")
+                if bt in ("h1", "h2", "h3"):
+                    heading_map = {"h1": 0, "h2": 1, "h3": 2}
+                    doc.add_heading(block.get("text", ""), level=heading_map[bt] + 1)
+                elif bt == "body":
+                    p = doc.add_paragraph(block.get("text", ""))
+                    p.paragraph_format.first_line_indent = Cm(0.74)
+                    p.paragraph_format.line_spacing = 1.5
+                elif bt == "bullet":
+                    for item in block.get("items", []):
+                        doc.add_paragraph(item, style="List Bullet")
+                elif bt == "numbered":
+                    for item in block.get("items", []):
+                        doc.add_paragraph(item, style="List Number")
+                elif bt == "table":
+                    headers = block.get("headers", [])
+                    rows = block.get("rows", [])
+                    if headers:
+                        table = doc.add_table(rows=1 + len(rows), cols=len(headers))
+                        table.style = "Light Grid Accent 1"
+                        table.alignment = WD_TABLE_ALIGNMENT.CENTER
+                        for j, h in enumerate(headers):
+                            cell = table.rows[0].cells[j]
+                            cell.text = str(h)
+                            for run in cell.paragraphs[0].runs:
+                                run.bold = True
+                        for i, row in enumerate(rows):
+                            for j, val in enumerate(row):
+                                if j < len(table.columns):
+                                    table.rows[i + 1].cells[j].text = str(val)
+                elif bt == "image":
+                    img_path = block.get("path", "")
+                    img_w = block.get("width", 0)
+                    if img_path and os.path.isfile(img_path):
+                        try:
+                            if img_w > 0:
+                                doc.add_picture(img_path, width=Emu(int(img_w)))
+                            else:
+                                doc.add_picture(img_path, width=Inches(5.5))
+                        except Exception as e:
+                            doc.add_paragraph(f"[图片加载失败: {e}]")
+                elif bt == "pagebreak":
+                    doc.add_page_break()
+                elif bt == "spacer":
+                    doc.add_paragraph("")
+            doc.save(str(out))
+            return SkillResult(
+                success=True,
+                message=f"Word 文档已生成: {out}",
+                files=[str(out)],
+                data={"path": str(out), "blocks": len(blocks)},
+            )
+        except Exception as e:
+            return SkillResult(success=False, error=f"DOCX 生成失败: {e}")
+class DOCXReadSkill(Skill):
+    """读取 Word 文档内容"""
+    name = "docx_read"
+    description = "读取 Word (DOCX) 文件，提取文本内容。支持提取表格数据。"
+    category = "doc"
+    parameters = [
+        SkillParameter("path", "string", "DOCX 文件路径", required=True),
+        SkillParameter("max_chars", "integer", "最大字符数", required=False, default=50000),
+    ]
+    async def execute(self, path: str = "", max_chars: int = 50000, **kwargs) -> SkillResult:
+        try:
+            from docx import Document
+        except ImportError:
+            return SkillResult(success=False, error="python-docx 未安装: pip install python-docx")
+        fp = Path(path).expanduser().resolve()
+        if not fp.exists():
+            return SkillResult(success=False, error=f"文件不存在: {path}")
+        try:
+            doc = Document(str(fp))
+            parts = []
+            for para in doc.paragraphs:
+                style_name = para.style.name if para.style else ""
+                prefix = ""
+                if "Heading" in style_name:
+                    level = style_name.replace("Heading", "").strip()
+                    prefix = f"[{'#' * int(level) if level.isdigit() else '#'}] "
+                elif "Bullet" in style_name:
+                    prefix = "  * "
+                elif "Number" in style_name or "List" in style_name:
+                    prefix = "  - "
+                text = para.text.strip()
+                if text:
+                    parts.append(f"{prefix}{text}")
+            for i, table in enumerate(doc.tables):
+                rows = []
+                for row in table.rows:
+                    cells = [cell.text.strip() for cell in row.cells]
+                    rows.append(" | ".join(cells))
+                if rows:
+                    parts.append(f"\n[表格 {i+1}]")
+                    parts.append(rows[0])
+                    if len(rows) > 1:
+                        parts.append("-" * len(rows[0]))
+                        parts.extend(rows[1:])
+            full = "\n".join(parts)
+            if len(full) > max_chars:
+                full = full[:max_chars] + f"\n\n... (截断，共 {len(full)} 字符)"
+            return SkillResult(
+                success=True,
+                message=f"已读取 Word 文档: {fp.name}",
+                data={"path": str(fp), "paragraphs": len(doc.paragraphs),
+                      "tables": len(doc.tables)},
+                output=full,
+            )
+        except Exception as e:
+            return SkillResult(success=False, error=f"DOCX 读取失败: {e}")

package/skills/file_skill.py CHANGED Viewed

File without changes

package/skills/gui_skill.py CHANGED Viewed

File without changes