npm - @birthday8/doc-mcp - Versions diffs - 1.0.1 → 1.0.2 - Mend

@birthday8/doc-mcp 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/index.js +2 -11
package/package.json +2 -4
package/python/docx_converter.py +1220 -428
package/python/html_fixer.py +125 -0
package/python/html_validator.py +389 -0
package/python/sample/example.html +407 -0
package/python/server.py +193 -120
package/python/test_error_detection.py +84 -0

package/python/server.py CHANGED Viewed

@@ -15,6 +15,8 @@ from mcp.types import TextContent, Tool
 # Import the conversion module
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 from docx_converter import convert_html_to_docx as docx_convert
+from html_validator import validator, template_generator
+from html_fixer import HTMLFixer
 # Create MCP server
 app = Server("doc-creator")
@@ -32,100 +34,186 @@ async def list_tools() -> list[Tool]:
                 "properties": {
                     "html_path": {
                         "type": "string",
-                        "description": "HTML文件的完整路径"
+                        "description": "HTML文件的完整路径",
                     },
                     "output_path": {
                         "type": "string",
-                        "description": "输出DOCX文件的路径（可选，默认与HTML同目录）"
-                    }
+                        "description": "输出DOCX文件的路径（必填）",
+                    },
                 },
-                "required": ["html_path"]
-            }
+                "required": ["html_path", "output_path"],
+            },
         ),
         Tool(
             name="generate_document",
-            description="根据HTML内容生成Word文档",
+            description="""
+            ⚠️ 重要：生成Word文档 - 必须严格遵循以下步骤：
+            【步骤1：获取模板】
+            必须先调用 get_html_template() 获取标准模板
+            示例：get_html_template({ title: "文档标题" })
+            【步骤2：验证内容】
+            在生成文档前，必须调用 validate_html() 验证格式
+            示例：validate_html({ html_content: "你的HTML内容" })
+            【步骤3：生成文档】
+            验证通过后，调用此工具生成Word文档
+            ⚠️ 格式约束（必须遵守）：
+            1. 颜色必须用十六进制格式 #RRGGBB（禁止用 red, blue, rgb() 等）
+            2. 空标签必须自闭合：<img /> <br /> <hr />
+            3. 只支持标签：h1-h6, p, strong, em, u, s, sup, sub, img, table, ul, ol, div
+            4. 禁止属性：onclick, onload, loading, float, text-shadow 等
+            5. 嵌套规则：p不能包含div，strong不能包含p等
+            6. 颜色示例：
+               ✅ 正确：<span style="color: #FF0000;">红色</span>
+               ❌ 错误：<span style="color: red;">红色</span>
+            系统会自动修复常见错误，但仍建议生成时遵守规范。
+            """,
             inputSchema={
                 "type": "object",
                 "properties": {
                     "content": {
                         "type": "string",
-                        "description": "HTML内容（不需要完整的HTML结构，只需要body内的内容）"
+                        "description": "HTML内容（不需要完整的HTML结构，只需要body内的内容）",
                     },
                     "title": {
                         "type": "string",
                         "description": "文档标题",
-                        "default": "文档"
+                        "default": "文档",
                     },
-                    "output_dir": {
-                        "type": "string",
-                        "description": "输出目录（可选，默认使用workspace/documents/）"
-                    }
+                    "output_dir": {"type": "string", "description": "输出目录（必填）"},
                 },
-                "required": ["content"]
-            }
+                "required": ["content", "output_dir"],
+            },
         ),
         Tool(
             name="get_html_template",
-            description="获取完整的HTML模板，包含CSS样式",
+            description="""
+            🔑 获取HTML模板 - 生成文档前的第一步（必须调用）
+            📋 用法：
+            1. 在生成任何文档前，必须先调用此工具获取标准模板
+            2. 基于模板生成内容，确保格式正确
+            3. 模板已包含所有必要的样式和结构
+            ⚠️ 重要：
+            - 必须使用此工具获取的模板作为基础
+            - 不要自己创建HTML结构
+            - 模板已预设所有支持的格式
+            """,
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "title": {
+                        "type": "string",
+                        "description": "文档标题",
+                        "default": "文档",
+                    },
+                    "content": {"type": "string", "description": "初始内容（可选）"},
+                },
+            },
+        ),
+        Tool(
+            name="validate_html",
+            description="""
+            ✅ 验证HTML格式 - 生成文档前的必须步骤
+            📋 用法：
+            1. 在调用 generate_document 前，必须先验证HTML格式
+            2. 如果验证失败，必须修复错误后再生成
+            3. 系统会自动修复常见错误，但复杂错误需要手动修复
+            🔍 检查项：
+            • HTML基本结构
+            • 支持的标签和属性
+            • 颜色格式（必须是 #RRGGBB）
+            • 标签闭合和嵌套
+            • 自闭合标签格式
+            ⚠️ 如果验证失败：
+            - 查看错误报告
+            - 修复所有错误
+            - 重新验证
+            - 验证通过后再生成文档
+            """,
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "html_content": {"type": "string", "description": "HTML内容"}
+                },
+                "required": ["html_content"],
+            },
+        ),
+        Tool(
+            name="get_html_spec",
+            description="获取HTML格式规范文档",
+            inputSchema={"type": "object", "properties": {}},
+        ),
+        Tool(
+            name="get_element_examples",
+            description="获取各种HTML元素的示例代码",
             inputSchema={
                 "type": "object",
-                "properties": {}
-            }
-        )
+                "properties": {
+                    "category": {
+                        "type": "string",
+                        "description": "元素类别: headings, text, colors, lists, table, image, formula",
+                        "default": "all",
+                    }
+                },
+            },
+        ),
     ]
 @app.call_tool()
 async def call_tool(name: str, arguments: dict) -> list[TextContent]:
     """调用工具"""
     if name == "convert_document":
         html_path = arguments.get("html_path")
         output_path = arguments.get("output_path")
         if not html_path:
             return [TextContent(type="text", text="错误：必须提供html_path参数")]
-        if not os.path.exists(html_path):
-            return [TextContent(type="text", text=f"错误：HTML文件不存在: {html_path}")]
-        # 如果未指定输出路径，使用相同目录和文件名
         if not output_path:
-            base_name = os.path.splitext(html_path)[0]
-            output_path = base_name + ".docx"
+            return [TextContent(type="text", text="错误：必须提供output_path参数")]
+        if not os.path.exists(html_path):
+            return [TextContent(type="text", text="错误：HTML文件不存在")]
         try:
             docx_convert(html_path, output_path)
-            return [TextContent(
-                type="text",
-                text=f"✅ 转换成功！\n📄 HTML文件: {html_path}\n📝 Word文件: {output_path}"
-            )]
+            return [TextContent(type="text", text=f"✅ 转换成功！\n📝 Word文件已生成")]
         except Exception as e:
             return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
     elif name == "generate_document":
         content = arguments.get("content", "")
         title = arguments.get("title", "文档")
         output_dir = arguments.get("output_dir")
         if not content:
             return [TextContent(type="text", text="错误：必须提供content参数")]
-        # 构建输出目录
         if not output_dir:
-            workspace_dir = Path(__file__).parent.parent / "workspace" / "documents"
-            from datetime import datetime
-            timestamp = datetime.now().strftime("%Y%m%d")
-            output_dir = workspace_dir / f"{timestamp}_{title.replace(' ', '_')}"
+            return [TextContent(type="text", text="错误：必须提供output_dir参数")]
         output_dir = Path(output_dir)
         output_dir.mkdir(parents=True, exist_ok=True)
         # 生成HTML文件
         html_path = output_dir / "document.html"
         docx_path = output_dir / "document.docx"
+        # 🔧 自动修复HTML内容
+        fixer = HTMLFixer()
+        fixed_content = fixer.fix(content)
         # 构建完整HTML
         html_template = f"""<!DOCTYPE html>
 <html lang="zh-CN">
@@ -217,104 +305,89 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
 </head>
 <body>
     <h1>{title}</h1>
-    {content}
+    {fixed_content}
 </body>
 </html>"""
         # 保存HTML
-        with open(html_path, 'w', encoding='utf-8') as f:
+        with open(html_path, "w", encoding="utf-8") as f:
             f.write(html_template)
         # 转换为DOCX
         try:
             docx_convert(str(html_path), str(docx_path))
-            return [TextContent(
-                type="text",
-                text=f"✅ 文档生成成功！\n📁 输出目录: {output_dir}\n📄 HTML文件: {html_path}\n📝 Word文件: {docx_path}"
-            )]
+            # 返回结果（包含修复报告）
+            result = f"✅ 文档生成成功！\n📝 Word文件已生成\n\n"
+            if fixer.fixes:
+                result += f"🔧 自动修复报告：\n{fixer.get_fixes_report()}\n\n"
+                result += "⚠️ 注意：部分内容已自动修复，建议下次生成时直接使用正确格式"
+            else:
+                result += "✓ 格式完美，无需修复"
+            return [TextContent(type="text", text=result)]
         except Exception as e:
             return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
     elif name == "get_html_template":
-        template = """<!DOCTYPE html>
-<html lang="zh-CN">
-<head>
-    <meta charset="UTF-8">
-    <title>文档标题</title>
-    <style>
-        /* 全局配置 */
-        :root {
-            --default-font: '微软雅黑';
-            --default-size: 12pt;
-            --default-color: #333;
-            --line-height: 1.8;
-        }
-        body {
-            font-family: var(--default-font);
-            font-size: var(--default-size);
-            color: var(--default-color);
-            line-height: var(--line-height);
-            padding: 20px;
-            max-width: 800px;
-            margin: 0 auto;
-        }
-        /* 标题样式 */
-        h1 { font-size: 18pt; color: #4a3f6b; text-align: center; }
-        h2 { font-size: 16pt; color: #5b4e8c; border-bottom: 2px solid #667eea; }
-        h3 { font-size: 14pt; color: #6b5b7a; }
-        /* 段落 */
-        p { text-indent: 2em; margin-bottom: 10pt; }
-        /* 文本格式 */
-        .red { color: red; }
-        .blue { color: blue; }
-        .green { color: green; }
-        .highlight { background-color: yellow; }
-        /* 提示框 */
-        .info { background-color: #e3f2fd; padding: 10px; border-left: 4px solid #2196F3; }
-        .warning { background-color: #fff3cd; padding: 10px; border-left: 4px solid #ffc107; }
-        .success { background-color: #d4edda; padding: 10px; border-left: 4px solid #28a745; }
-        /* 表格 */
-        table { width: 100%; border-collapse: collapse; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 12px; text-align: center; }
-        th { background-color: #667eea; color: white; }
-        tr:nth-child(even) { background-color: #f9f9f9; }
-    </style>
-</head>
-<body>
-    <h1>文档标题</h1>
-    <h2>一、章节标题</h2>
-    <p>这是正文段落，<strong>支持加粗</strong>、<em>斜体</em>、<span class="red">彩色文字</span>等格式。</p>
-    <div class="info">
-        <strong>提示：</strong> 这是信息提示框
-    </div>
-</body>
-</html>"""
+        title = arguments.get("title", "文档")
+        content = arguments.get("content", "")
+        template = template_generator.generate_template(title, content)
         return [TextContent(type="text", text=template)]
+    elif name == "validate_html":
+        html_content = arguments.get("html_content", "")
+        if not html_content:
+            return [TextContent(type="text", text="错误：必须提供html_content参数")]
+        result = validator.validate(html_content)
+        report = validator.get_validation_report(result)
+        return [TextContent(type="text", text=report)]
+    elif name == "get_html_spec":
+        spec_path = os.path.join(os.path.dirname(__file__), "..", "HTML_FORMAT_SPEC.md")
+        if os.path.exists(spec_path):
+            with open(spec_path, "r", encoding="utf-8") as f:
+                spec_content = f.read()
+            return [TextContent(type="text", text=spec_content)]
+        else:
+            return [TextContent(type="text", text="规范文档未找到")]
+    elif name == "get_element_examples":
+        category = arguments.get("category", "all")
+        examples = template_generator.get_element_examples()
+        if category == "all":
+            result = "=== HTML 元素示例 ===\n\n"
+            for cat, elems in examples.items():
+                result += f"## {cat.upper()}\n\n"
+                for name, code in elems.items():
+                    result += f"### {name}\n```html\n{code}\n```\n\n"
+        elif category in examples:
+            result = f"## {category.upper()}\n\n"
+            for name, code in examples[category].items():
+                result += f"### {name}\n```html\n{code}\n```\n\n"
+        else:
+            result = f"未知类别: {category}\n可用类别: {', '.join(examples.keys())}"
+        return [TextContent(type="text", text=result)]
     else:
         return [TextContent(type="text", text=f"未知工具: {name}")]
 async def main():
     """主函数"""
     from mcp.server.stdio import stdio_server
     async with stdio_server() as (read_stream, write_stream):
-        await app.run(
-            read_stream,
-            write_stream,
-            app.create_initialization_options()
-        )
+        await app.run(read_stream, write_stream, app.create_initialization_options())
 if __name__ == "__main__":
     import asyncio
     asyncio.run(main())

package/python/test_error_detection.py ADDED Viewed

@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""测试错误检测和修复机制"""
+from html_validator import validator
+from html_fixer import HTMLFixer
+print("=" * 50)
+print("错误检测和修复机制测试")
+print("=" * 50)
+# 测试1: 包含错误的HTML
+print("\n【测试1】包含错误的HTML")
+html_with_errors = """
+<p style="color: red;">红色文本</p>
+<img src="test.jpg" loading="lazy">
+<p style="background-color: blue;">蓝色背景</p>
+<p style="color: rgb(255,0,0);">RGB红色</p>
+<br>
+<hr>
+"""
+print("原始HTML:")
+print(html_with_errors)
+# 验证
+print("\n【验证结果】")
+result = validator.validate(html_with_errors)
+print(f"✓ 验证通过: {result['valid']}")
+print(f"✗ 错误数量: {len(result['errors'])}")
+print(f"⚠ 警告数量: {len(result['warnings'])}")
+if result["errors"]:
+    print("\n错误详情:")
+    for error in result["errors"]:
+        print(f"  - {error}")
+if result["warnings"]:
+    print("\n警告详情:")
+    for warning in result["warnings"]:
+        print(f"  - {warning}")
+# 修复
+print("\n【自动修复】")
+fixer = HTMLFixer()
+fixed_html = fixer.fix(html_with_errors)
+print(f"修复报告:\n{fixer.get_fixes_report()}")
+print("\n修复后的HTML:")
+print(fixed_html)
+# 验证修复后的结果
+print("\n【修复后验证】")
+result_after_fix = validator.validate(fixed_html)
+print(f"✓ 验证通过: {result_after_fix['valid']}")
+print(f"✗ 错误数量: {len(result_after_fix['errors'])}")
+# 测试2: 正确的HTML
+print("\n" + "=" * 50)
+print("\n【测试2】正确的HTML")
+correct_html = """
+<p style="color: #FF0000;">红色文本</p>
+<img src="test.png" alt="图片" />
+<p style="background-color: #0000FF;">蓝色背景</p>
+<p style="color: #FF0000;">十六进制红色</p>
+<br />
+<hr />
+"""
+print("HTML内容:")
+print(correct_html)
+result2 = validator.validate(correct_html)
+print(f"\n验证结果:")
+print(f"✓ 验证通过: {result2['valid']}")
+print(f"✗ 错误数量: {len(result2['errors'])}")
+print(f"⚠ 警告数量: {len(result2['warnings'])}")
+fixer2 = HTMLFixer()
+fixed_html2 = fixer2.fix(correct_html)
+print(f"\n修复报告:\n{fixer2.get_fixes_report()}")
+print("\n" + "=" * 50)
+print("测试完成！")
+print("=" * 50)