PyPI - Undefined-bot - Versions diffs - 2.1.0__py3-none-any.whl - Mend

Undefined-bot 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

Undefined/skills/agents/file_analysis_agent/tools/extract_archive/handler.py ADDED Viewed

@@ -0,0 +1,190 @@
+from pathlib import Path
+from typing import Any, Dict
+import logging
+logger = logging.getLogger(__name__)
+async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str:
+    file_path: str = args.get("file_path", "")
+    action: str = args.get("action", "list")
+    extract_path: str | None = args.get("extract_path")
+    path = Path(file_path)
+    if not path.exists():
+        return f"错误：文件不存在 {file_path}"
+    if not path.is_file():
+        return f"错误：{file_path} 不是文件"
+    suffix = path.suffix.lower()
+    try:
+        if suffix == ".zip":
+            return await _extract_zip(path, action, extract_path)
+        elif suffix in [".tar", ".gz", ".bz2", ".xz"]:
+            return await _extract_tar(path, action, extract_path)
+        elif suffix == ".7z":
+            return await _extract_7z(path, action, extract_path)
+        elif suffix == ".rar":
+            return await _extract_rar(path, action, extract_path)
+        else:
+            return f"不支持的压缩格式: {suffix}"
+    except Exception as e:
+        logger.exception(f"解析压缩包失败: {e}")
+        return f"解析压缩包失败: {e}"
+async def _extract_zip(path: Path, action: str, extract_path: str | None) -> str:
+    import zipfile
+    with zipfile.ZipFile(path, "r") as zip_ref:
+        file_list = zip_ref.namelist()
+        total_size = sum(info.file_size for info in zip_ref.infolist())
+    info: list[str] = []
+    info.append(f"文件大小：{path.stat().st_size} 字节")
+    info.append(f"压缩包内文件数：{len(file_list)}")
+    info.append(f"解压后总大小：{total_size} 字节")
+    if action == "list":
+        info.append("\n文件列表（前 100 个）：")
+        for i, name in enumerate(file_list[:100], 1):
+            info.append(f"  {i}. {name}")
+        if len(file_list) > 100:
+            info.append(f"  ... (共 {len(file_list)} 个文件)")
+        return "\n".join(info)
+    else:
+        if extract_path:
+            target_dir = Path(extract_path)
+        else:
+            target_dir = path.parent / f"extracted_{path.stem}"
+            target_dir.mkdir(exist_ok=True)
+        with zipfile.ZipFile(path, "r") as zip_ref:
+            zip_ref.extractall(target_dir)
+        info.append(f"\n已解压到：{target_dir}")
+        info.append(f"解压文件数：{len(file_list)}")
+        info.append("\n文件列表（前 50 个）：")
+        for i, name in enumerate(file_list[:50], 1):
+            info.append(f"  {i}. {name}")
+        if len(file_list) > 50:
+            info.append(f"  ... (共 {len(file_list)} 个文件)")
+        return "\n".join(info)
+async def _extract_tar(path: Path, action: str, extract_path: str | None) -> str:
+    import tarfile
+    with tarfile.open(path, "r:*") as tar_ref:
+        members = tar_ref.getmembers()
+        file_list = [m.name for m in members]
+        total_size = sum(m.size for m in members)
+    info: list[str] = []
+    info.append(f"文件大小：{path.stat().st_size} 字节")
+    info.append(f"压缩包内文件数：{len(file_list)}")
+    info.append(f"解压后总大小：{total_size} 字节")
+    if action == "list":
+        info.append("\n文件列表（前 100 个）：")
+        for i, name in enumerate(file_list[:100], 1):
+            info.append(f"  {i}. {name}")
+        if len(file_list) > 100:
+            info.append(f"  ... (共 {len(file_list)} 个文件)")
+        return "\n".join(info)
+    else:
+        if extract_path:
+            target_dir = Path(extract_path)
+        else:
+            target_dir = path.parent / f"extracted_{path.stem}"
+            target_dir.mkdir(exist_ok=True)
+        with tarfile.open(path, "r:*") as tar_ref:
+            tar_ref.extractall(target_dir)
+        info.append(f"\n已解压到：{target_dir}")
+        info.append(f"解压文件数：{len(file_list)}")
+        return "\n".join(info)
+async def _extract_7z(path: Path, action: str, extract_path: str | None) -> str:
+    import py7zr
+    with py7zr.SevenZipFile(path, "r") as archive:
+        file_list: list[str] = []
+        total_size = 0
+        for file_info in archive.files:
+            file_list.append(file_info.filename)
+            total_size += file_info.uncompressed
+    info: list[str] = []
+    info.append(f"文件大小：{path.stat().st_size} 字节")
+    info.append(f"压缩包内文件数：{len(file_list)}")
+    info.append(f"解压后总大小：{total_size} 字节")
+    if action == "list":
+        info.append("\n文件列表（前 100 个）：")
+        for i, name in enumerate(file_list[:100], 1):
+            info.append(f"  {i}. {name}")
+        if len(file_list) > 100:
+            info.append(f"  ... (共 {len(file_list)} 个文件)")
+        return "\n".join(info)
+    else:
+        if extract_path:
+            target_dir = Path(extract_path)
+        else:
+            target_dir = path.parent / f"extracted_{path.stem}"
+            target_dir.mkdir(exist_ok=True)
+        with py7zr.SevenZipFile(path, "r") as archive:
+            archive.extractall(target_dir)
+        info.append(f"\n已解压到：{target_dir}")
+        info.append(f"解压文件数：{len(file_list)}")
+        return "\n".join(info)
+async def _extract_rar(path: Path, action: str, extract_path: str | None) -> str:
+    import rarfile
+    with rarfile.RarFile(path, "r") as rar_ref:
+        file_list = rar_ref.namelist()
+        total_size = sum(info.file_size for info in rar_ref.infolist())
+    info: list[str] = []
+    info.append(f"文件大小：{path.stat().st_size} 字节")
+    info.append(f"压缩包内文件数：{len(file_list)}")
+    info.append(f"解压后总大小：{total_size} 字节")
+    if action == "list":
+        info.append("\n文件列表（前 100 个）：")
+        for i, name in enumerate(file_list[:100], 1):
+            info.append(f"  {i}. {name}")
+        if len(file_list) > 100:
+            info.append(f"  ... (共 {len(file_list)} 个文件)")
+        return "\n".join(info)
+    else:
+        if extract_path:
+            target_dir = Path(extract_path)
+        else:
+            target_dir = path.parent / f"extracted_{path.stem}"
+            target_dir.mkdir(exist_ok=True)
+        with rarfile.RarFile(path, "r") as rar_ref:
+            rar_ref.extractall(target_dir)
+        info.append(f"\n已解压到：{target_dir}")
+        info.append(f"解压文件数：{len(file_list)}")
+        return "\n".join(info)

Undefined/skills/agents/file_analysis_agent/tools/extract_docx/config.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+    "type": "function",
+    "function": {
+        "name": "extract_docx",
+        "description": "提取 Microsoft Word 文档 (.docx) 的文本内容和基本元数据。",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_path": {
+                    "type": "string",
+                    "description": "本地 Word 文档路径"
+                }
+            },
+            "required": ["file_path"]
+        }
+    }
+}

Undefined/skills/agents/file_analysis_agent/tools/extract_docx/handler.py ADDED Viewed

@@ -0,0 +1,78 @@
+from docx import Document
+from pathlib import Path
+from typing import Any, Dict
+import logging
+logger = logging.getLogger(__name__)
+async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str:
+    file_path: str = args.get("file_path", "")
+    path = Path(file_path)
+    if not path.exists():
+        return f"错误：文件不存在 {file_path}"
+    if not path.is_file():
+        return f"错误：{file_path} 不是文件"
+    try:
+        doc = Document(str(path))
+        info: list[str] = []
+        info.append(f"文件大小：{path.stat().st_size} 字节")
+        core_props = doc.core_properties
+        if core_props:
+            info.append("\n文档属性：")
+            if core_props.title:
+                info.append(f"  标题: {core_props.title}")
+            if core_props.author:
+                info.append(f"  作者: {core_props.author}")
+            if core_props.last_modified_by:
+                info.append(f"  最后修改者: {core_props.last_modified_by}")
+            if core_props.created:
+                info.append(f"  创建时间: {core_props.created}")
+            if core_props.modified:
+                info.append(f"  修改时间: {core_props.modified}")
+            if core_props.comments:
+                info.append(f"  备注: {core_props.comments}")
+        paragraphs = list(doc.paragraphs)
+        tables = doc.tables
+        info.append(f"\n段落数：{len(paragraphs)}")
+        info.append(f"表格数：{len(tables)}")
+        text_content = ""
+        for para in paragraphs:
+            text = para.text.strip()
+            if text:
+                text_content += text + "\n"
+        if not text_content.strip():
+            text_content = "(文档未检测到文本内容)"
+        info.append(f"\n文本内容预览（前 5000 字符）：\n{text_content[:5000]}")
+        if len(text_content) > 5000:
+            info.append(f"\n... (共 {len(text_content)} 字符)")
+        if tables:
+            info.append("\n表格内容：")
+            for i, table in enumerate(tables[:3], 1):
+                info.append(f"\n表格 {i}:")
+                for row in table.rows[:5]:
+                    cells = [cell.text.strip() for cell in row.cells]
+                    row_text = " | ".join(cells)
+                    info.append(f"  {row_text}")
+                if len(table.rows) > 5:
+                    info.append(f"  ... (共 {len(table.rows)} 行)")
+            if len(tables) > 3:
+                info.append(f"\n... (还有 {len(tables) - 3} 个表格)")
+        return "\n".join(info)
+    except Exception as e:
+        logger.exception(f"解析 Word 文档失败: {e}")
+        return f"解析 Word 文档失败: {e}"

Undefined/skills/agents/file_analysis_agent/tools/extract_pdf/config.json ADDED Viewed

@@ -0,0 +1,21 @@
+{
+    "type": "function",
+    "function": {
+        "name": "extract_pdf",
+        "description": "提取 PDF 文档的文本内容和元数据。使用 PyMuPDF (fitz) 解析，支持获取页数、作者、标题等信息。",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_path": {
+                    "type": "string",
+                    "description": "本地 PDF 文件路径"
+                },
+                "extract_images": {
+                    "type": "boolean",
+                    "description": "是否提取图片（会返回图片数量和位置信息），默认为 false"
+                }
+            },
+            "required": ["file_path"]
+        }
+    }
+}

Undefined/skills/agents/file_analysis_agent/tools/extract_pdf/handler.py ADDED Viewed

@@ -0,0 +1,67 @@
+import fitz
+from pathlib import Path
+from typing import Any, Dict
+import logging
+logger = logging.getLogger(__name__)
+async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str:
+    file_path: str = args.get("file_path", "")
+    extract_images: bool = args.get("extract_images", False)
+    path = Path(file_path)
+    if not path.exists():
+        return f"错误：文件不存在 {file_path}"
+    if not path.is_file():
+        return f"错误：{file_path} 不是文件"
+    try:
+        doc = fitz.open(str(path))
+        page_count = len(doc)
+        info: list[str] = []
+        info.append(f"文件大小：{path.stat().st_size} 字节")
+        info.append(f"页数：{page_count}")
+        metadata = doc.metadata
+        if metadata:
+            info.append("\n文档元数据：")
+            for key, value in metadata.items():
+                if value:
+                    info.append(f"  {key}: {value}")
+        text_content = ""
+        image_count = 0
+        for page_num in range(page_count):
+            page = doc.load_page(page_num)
+            text_raw = page.get_text()
+            text = str(text_raw) if text_raw else ""
+            text_content += f"\n--- 第 {page_num + 1} 页 ---\n"
+            text_content += text
+            if extract_images:
+                images = page.get_images()
+                for _ in images:
+                    image_count += 1
+        if extract_images:
+            info.append(f"\n图片数量：{image_count}")
+        if not text_content.strip():
+            text_content = "(文档未检测到文本内容，可能是扫描版 PDF 或图片)"
+        info.append(f"\n文本内容预览（前 5000 字符）：\n{text_content[:5000]}")
+        if len(text_content) > 5000:
+            info.append(f"\n... (共 {len(text_content)} 字符)")
+        doc.close()
+        return "\n".join(info)
+    except Exception as e:
+        logger.exception(f"解析 PDF 失败: {e}")
+        return f"解析 PDF 失败: {e}"

Undefined/skills/agents/file_analysis_agent/tools/extract_pptx/config.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+    "type": "function",
+    "function": {
+        "name": "extract_pptx",
+        "description": "提取 Microsoft PowerPoint 演示文稿 (.pptx) 的文本内容，包括幻灯片文本和基本结构。",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_path": {
+                    "type": "string",
+                    "description": "本地 PowerPoint 文件路径"
+                }
+            },
+            "required": ["file_path"]
+        }
+    }
+}

Undefined/skills/agents/file_analysis_agent/tools/extract_pptx/handler.py ADDED Viewed

@@ -0,0 +1,73 @@
+from pptx import Presentation
+from pathlib import Path
+from typing import Any, Dict
+import logging
+logger = logging.getLogger(__name__)
+async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str:
+    file_path: str = args.get("file_path", "")
+    path = Path(file_path)
+    if not path.exists():
+        return f"错误：文件不存在 {file_path}"
+    if not path.is_file():
+        return f"错误：{file_path} 不是文件"
+    try:
+        prs = Presentation(str(path))
+        info: list[str] = []
+        info.append(f"文件大小：{path.stat().st_size} 字节")
+        slide_count = len(prs.slides)
+        info.append(f"幻灯片数量：{slide_count}")
+        if prs.core_properties.title:
+            info.append(f"标题: {prs.core_properties.title}")
+        if prs.core_properties.author:
+            info.append(f"作者: {prs.core_properties.author}")
+        if prs.core_properties.last_modified_by:
+            info.append(f"最后修改者: {prs.core_properties.last_modified_by}")
+        text_content = ""
+        for slide_num, slide in enumerate(prs.slides, 1):
+            slide_text = f"\n--- 幻灯片 {slide_num} ---\n"
+            title = ""
+            if slide.shapes.title:
+                title = slide.shapes.title.text.strip()
+                if title:
+                    slide_text += f"标题: {title}\n"
+            content_count = 0
+            for shape in slide.shapes:
+                if shape != slide.shapes.title:
+                    shape_text = getattr(shape, "text", None)
+                    if shape_text:
+                        text = shape_text.strip()
+                        if text and len(text) > 0:
+                            content_count += 1
+                            slide_text += f"{text}\n"
+            if not title and content_count == 0:
+                slide_text += "(空白幻灯片)"
+            text_content += slide_text
+        if not text_content.strip():
+            text_content = "(演示文稿未检测到文本内容)"
+        info.append(f"\n内容预览（前 5000 字符）：\n{text_content[:5000]}")
+        if len(text_content) > 5000:
+            info.append(f"\n... (共 {len(text_content)} 字符)")
+        return "\n".join(info)
+    except Exception as e:
+        logger.exception(f"解析 PowerPoint 失败: {e}")
+        return f"解析 PowerPoint 失败: {e}"

Undefined/skills/agents/file_analysis_agent/tools/extract_xlsx/config.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+    "type": "function",
+    "function": {
+        "name": "extract_xlsx",
+        "description": "提取 Microsoft Excel 工作簿 (.xlsx, .xls, .csv) 的表格数据，包括工作表列表和内容预览。",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_path": {
+                    "type": "string",
+                    "description": "本地 Excel 或 CSV 文件路径"
+                }
+            },
+            "required": ["file_path"]
+        }
+    }
+}

Undefined/skills/agents/file_analysis_agent/tools/extract_xlsx/handler.py ADDED Viewed

@@ -0,0 +1,101 @@
+from pathlib import Path
+from typing import Any, Dict
+import logging
+import csv
+logger = logging.getLogger(__name__)
+async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str:
+    file_path: str = args.get("file_path", "")
+    path = Path(file_path)
+    if not path.exists():
+        return f"错误：文件不存在 {file_path}"
+    if not path.is_file():
+        return f"错误：{file_path} 不是文件"
+    try:
+        info: list[str] = []
+        info.append(f"文件大小：{path.stat().st_size} 字节")
+        if path.suffix.lower() == ".csv":
+            return await _extract_csv(path, info)
+        else:
+            return await _extract_excel(path, info)
+    except Exception as e:
+        logger.exception(f"解析 Excel/CSV 失败: {e}")
+        return f"解析 Excel/CSV 失败: {e}"
+async def _extract_csv(path: Path, info: list[str]) -> str:
+    try:
+        with open(path, "r", encoding="utf-8-sig") as f:
+            reader = csv.reader(f)
+            rows = list(reader)
+        if not rows:
+            return "CSV 文件为空"
+        info.append("工作表：CSV (单个)")
+        info.append(f"总行数：{len(rows)}")
+        if rows:
+            info.append(f"列数：{len(rows[0])}")
+        preview = []
+        preview.append("\n--- 前 20 行预览 ---\n")
+        for i, row in enumerate(rows[:20], 1):
+            preview.append(f"行 {i}: {' | '.join(row)}")
+        if len(rows) > 20:
+            preview.append(f"\n... (共 {len(rows)} 行)")
+        return "\n".join(info) + "\n" + "\n".join(preview)
+    except Exception as e:
+        return f"解析 CSV 失败: {e}"
+async def _extract_excel(path: Path, info: list[str]) -> str:
+    try:
+        from openpyxl import load_workbook
+        wb = load_workbook(str(path), read_only=True, data_only=True)
+        sheet_names = wb.sheetnames
+        info.append(f"工作表数量：{len(sheet_names)}")
+        info.append(f"工作表列表：{', '.join(sheet_names)}")
+        all_content = []
+        for sheet_name in sheet_names[:5]:
+            ws = wb[sheet_name]
+            rows = list(ws.iter_rows(max_row=100, values_only=True))
+            if not rows:
+                continue
+            max_cols = max(len(row) for row in rows if row)
+            info.append(f"\n工作表 '{sheet_name}':")
+            info.append(f"  行数（前 100 行）: {len(rows)}")
+            info.append(f"  列数: {max_cols}")
+            preview = []
+            preview.append(f"  --- {sheet_name} 前 15 行预览 ---\n")
+            for i, row in enumerate(rows[:15], 1):
+                row_values = [str(cell) if cell is not None else "" for cell in row]
+                preview.append(f"    行 {i}: {' | '.join(row_values)}")
+            if len(rows) > 15:
+                preview.append(f"    ... (共 {len(rows)} 行)")
+            all_content.append("\n".join(preview))
+        if len(sheet_names) > 5:
+            info.append(f"\n... (还有 {len(sheet_names) - 5} 个工作表)")
+        return "\n".join(info) + "\n" + "\n".join(all_content)
+    except Exception as e:
+        logger.exception(f"解析 Excel 失败: {e}")
+        return f"解析 Excel 失败: {e}"

Undefined/skills/agents/file_analysis_agent/tools/get_current_time/config.json ADDED Viewed

@@ -0,0 +1,12 @@
+{
+    "type": "function",
+    "function": {
+        "name": "get_current_time",
+        "description": "获取当前系统时间。",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "required": []
+        }
+    }
+}

Undefined/skills/agents/file_analysis_agent/tools/get_current_time/handler.py ADDED Viewed

@@ -0,0 +1,5 @@
+from typing import Any, Dict
+from datetime import datetime
+async def execute(args: Dict[str, Any], context: Dict[str, Any]) -> str:
+    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

Undefined/skills/agents/file_analysis_agent/tools/read_text_file/config.json ADDED Viewed

@@ -0,0 +1,21 @@
+{
+    "type": "function",
+    "function": {
+        "name": "read_text_file",
+        "description": "读取文本文件内容，自动检测文件编码。支持 txt, md, log, rst, json, yaml, xml, html, css, js, py 等纯文本格式。大文件可限制读取行数。",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "file_path": {
+                    "type": "string",
+                    "description": "本地文件路径"
+                },
+                "max_lines": {
+                    "type": "number",
+                    "description": "最大读取行数，默认读取全部"
+                }
+            },
+            "required": ["file_path"]
+        }
+    }
+}