npm - @birthday8/doc-mcp - Versions diffs - 1.0.1 → 1.0.3 - Mend

@birthday8/doc-mcp 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +2 -2
package/index.js +61 -65
package/install.js +45 -35
package/package.json +2 -4
package/python/docx_converter.py +1152 -428
package/python/html_fixer.py +125 -0
package/python/html_rules.py +570 -0
package/python/html_validator.py +174 -0
package/python/html_validator_strict.py +428 -0
package/python/sample/example.html +407 -0
package/python/sample/html_schema.py +283 -0
package/python/server.py +233 -123
package/python/test_error_detection.py +84 -0
package/python/test_strict_validation.py +118 -0

package/python/server.py CHANGED Viewed

@@ -15,6 +15,9 @@ from mcp.types import TextContent, Tool
 # Import the conversion module
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 from docx_converter import convert_html_to_docx as docx_convert
+from html_validator import validator, template_generator
+from html_validator_strict import StrictHTMLValidator, ValidationError, strict_validator
+from html_fixer import HTMLFixer
 # Create MCP server
 app = Server("doc-creator")
@@ -32,100 +35,214 @@ async def list_tools() -> list[Tool]:
                 "properties": {
                     "html_path": {
                         "type": "string",
-                        "description": "HTML文件的完整路径"
+                        "description": "HTML文件的完整路径",
                     },
                     "output_path": {
                         "type": "string",
-                        "description": "输出DOCX文件的路径（可选，默认与HTML同目录）"
-                    }
+                        "description": "输出DOCX文件的路径",
+                    },
                 },
-                "required": ["html_path"]
-            }
+                "required": ["html_path", "output_path"],
+            },
         ),
         Tool(
             name="generate_document",
-            description="根据HTML内容生成Word文档",
+            description="""
+            ⚠️ 生成Word文档 - 步骤6（必须先完成步骤1-5）
+            📋 必须按顺序执行的完整流程：
+            1. get_html_schema() - 获取结构化格式约束
+            2. get_html_constraints() - 获取HTML约束示例
+            3. 严格根据约束生成HTML内容
+            4. validate_html() - 验证HTML格式
+            5. 修复错误（重复步骤4直到无错误）
+            6. generate_document() - 生成文档（若有错误回到步骤5）
+            ⚠️ 重要：必须先完成步骤1-5，且validate_html()验证通过后才能调用此工具
+            🔧 功能：
+            - 如果验证失败，返回详细错误报告
+            - 根据报错修复HTML后重新调用
+            """,
             inputSchema={
                 "type": "object",
                 "properties": {
                     "content": {
                         "type": "string",
-                        "description": "HTML内容（不需要完整的HTML结构，只需要body内的内容）"
+                        "description": "HTML内容（不需要完整的HTML结构，只需要body内的内容）",
                     },
                     "title": {
                         "type": "string",
                         "description": "文档标题",
-                        "default": "文档"
+                        "default": "文档",
                     },
-                    "output_dir": {
-                        "type": "string",
-                        "description": "输出目录（可选，默认使用workspace/documents/）"
-                    }
+                    "output_dir": {"type": "string", "description": "输出目录（必填）"},
                 },
-                "required": ["content"]
-            }
+                "required": ["content", "output_dir"],
+            },
         ),
         Tool(
-            name="get_html_template",
-            description="获取完整的HTML模板，包含CSS样式",
+            name="get_html_constraints",
+            description="""
+            🔑 获取HTML格式约束示例 - 步骤2（必须在步骤3之前执行）
+            📋 说明：
+            - 这是步骤2，必须在步骤3之前执行
+            - 返回预定义的HTML约束示例，包含所有支持的格式
+            - 示例包含：字体、颜色、大小、段距、嵌套结构、表格、图片、公式等
+            - 参考示例格式生成自己的HTML内容
+            ⚠️ 重要：
+            - 必须在步骤1之后执行
+            - 参考此示例确保格式正确
+            - 不要直接复制示例内容，而是学习格式用法
+            """,
             inputSchema={
                 "type": "object",
-                "properties": {}
-            }
-        )
+                "properties": {},
+            },
+        ),
+        Tool(
+            name="validate_html",
+            description="""
+            ✅ 验证HTML格式 - 步骤4（必须在步骤6之前执行）
+            📋 用法：
+            - 验证HTML格式是否正确
+            - 查看详细的错误和警告信息
+            - 获取格式建议
+            ⚠️ 重要：这是步骤4，必须在步骤6之前执行
+            - 验证通过后才能进入步骤6
+            - 验证失败必须修复并重新验证，直到无错误
+            🔍 检查项：
+            • HTML基本结构
+            • 支持的标签和属性
+            • 颜色格式（必须是 #RRGGBB）
+            • 标签闭合和嵌套
+            • 自闭合标签格式
+            """,
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "html_content": {"type": "string", "description": "HTML内容"}
+                },
+                "required": ["html_content"],
+            },
+        ),
+        Tool(
+            name="get_element_examples",
+            description="获取各种HTML元素的示例代码",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "category": {
+                        "type": "string",
+                        "description": "元素类别: headings, text, colors, lists, table, image, formula",
+                        "default": "all",
+                    }
+                },
+            },
+        ),
+        Tool(
+            name="get_html_schema",
+            description="""
+            🔑 获取HTML格式约束Schema - 步骤1（必须首先执行）
+            📋 说明：
+            - 这是步骤1，必须首先执行
+            - 获取完整的JSON格式约束
+            - 了解所有支持的标签、属性、样式
+            - 查看颜色格式、嵌套规则等约束
+            ⚠️ 重要：
+            - 必须在生成HTML之前调用
+            - 完整了解所有约束后再进入步骤2
+            """,
+            inputSchema={"type": "object", "properties": {}},
+        ),
     ]
 @app.call_tool()
 async def call_tool(name: str, arguments: dict) -> list[TextContent]:
     """调用工具"""
     if name == "convert_document":
         html_path = arguments.get("html_path")
         output_path = arguments.get("output_path")
         if not html_path:
             return [TextContent(type="text", text="错误：必须提供html_path参数")]
-        if not os.path.exists(html_path):
-            return [TextContent(type="text", text=f"错误：HTML文件不存在: {html_path}")]
-        # 如果未指定输出路径，使用相同目录和文件名
         if not output_path:
-            base_name = os.path.splitext(html_path)[0]
-            output_path = base_name + ".docx"
+            return [TextContent(type="text", text="错误：必须提供output_path参数")]
+        if not os.path.exists(html_path):
+            return [TextContent(type="text", text="错误：HTML文件不存在")]
         try:
             docx_convert(html_path, output_path)
-            return [TextContent(
-                type="text",
-                text=f"✅ 转换成功！\n📄 HTML文件: {html_path}\n📝 Word文件: {output_path}"
-            )]
+            return [TextContent(type="text", text=f"✅ 转换成功！\n📝 Word文件已生成")]
         except Exception as e:
             return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
     elif name == "generate_document":
         content = arguments.get("content", "")
         title = arguments.get("title", "文档")
         output_dir = arguments.get("output_dir")
         if not content:
             return [TextContent(type="text", text="错误：必须提供content参数")]
-        # 构建输出目录
         if not output_dir:
-            workspace_dir = Path(__file__).parent.parent / "workspace" / "documents"
-            from datetime import datetime
-            timestamp = datetime.now().strftime("%Y%m%d")
-            output_dir = workspace_dir / f"{timestamp}_{title.replace(' ', '_')}"
+            return [TextContent(type="text", text="错误：必须提供output_dir参数")]
         output_dir = Path(output_dir)
         output_dir.mkdir(parents=True, exist_ok=True)
+        # 🔧 自动修复HTML内容
+        fixer = HTMLFixer()
+        fixed_content = fixer.fix(content)
+        # 🔒 严格验证HTML内容
+        validation_result = strict_validator.validate(fixed_content, strict_mode=False)
+        # 检查是否有错误
+        if not validation_result.is_valid:
+            # 抛出验证错误异常，返回结构化JSON
+            error_result = validation_result.to_json()
+            error_message = f"❌ HTML验证失败！\n\n"
+            error_message += f"发现 {len(validation_result.errors)} 个错误和 {len(validation_result.warnings)} 个警告\n\n"
+            # 列出所有错误
+            if validation_result.errors:
+                error_message += "【错误详情】\n"
+                for i, error in enumerate(validation_result.errors, 1):
+                    error_message += f"{i}. [{error['code']}] {error['message']}\n"
+                    if error.get("details"):
+                        error_message += f"   详情: {error['details']}\n"
+                error_message += "\n"
+            # 列出所有警告
+            if validation_result.warnings:
+                error_message += "【警告详情】\n"
+                for i, warning in enumerate(validation_result.warnings, 1):
+                    error_message += f"{i}. [{warning['code']}] {warning['message']}\n"
+                    if warning.get("details"):
+                        error_message += f"   详情: {warning['details']}\n"
+                error_message += "\n"
+            error_message += "请修复以上错误后重新生成文档。\n"
+            error_message += "💡 提示：调用 get_html_schema() 查看完整的格式约束规则。"
+            return [TextContent(type="text", text=error_message)]
         # 生成HTML文件
         html_path = output_dir / "document.html"
         docx_path = output_dir / "document.docx"
         # 构建完整HTML
         html_template = f"""<!DOCTYPE html>
 <html lang="zh-CN">
@@ -217,88 +334,84 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
 </head>
 <body>
     <h1>{title}</h1>
-    {content}
+    {fixed_content}
 </body>
 </html>"""
         # 保存HTML
-        with open(html_path, 'w', encoding='utf-8') as f:
+        with open(html_path, "w", encoding="utf-8") as f:
             f.write(html_template)
         # 转换为DOCX
         try:
             docx_convert(str(html_path), str(docx_path))
-            return [TextContent(
-                type="text",
-                text=f"✅ 文档生成成功！\n📁 输出目录: {output_dir}\n📄 HTML文件: {html_path}\n📝 Word文件: {docx_path}"
-            )]
+            # 返回结果（包含修复报告）
+            result = f"✅ 文档生成成功！\n📝 Word文件已生成\n\n"
+            if fixer.fixes:
+                result += f"🔧 自动修复报告：\n{fixer.get_fixes_report()}\n\n"
+                result += "⚠️ 注意：部分内容已自动修复，建议下次生成时直接使用正确格式"
+            else:
+                result += "✓ 格式完美，无需修复"
+            return [TextContent(type="text", text=result)]
         except Exception as e:
             return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
-    elif name == "get_html_template":
-        template = """<!DOCTYPE html>
-<html lang="zh-CN">
-<head>
-    <meta charset="UTF-8">
-    <title>文档标题</title>
-    <style>
-        /* 全局配置 */
-        :root {
-            --default-font: '微软雅黑';
-            --default-size: 12pt;
-            --default-color: #333;
-            --line-height: 1.8;
-        }
-        body {
-            font-family: var(--default-font);
-            font-size: var(--default-size);
-            color: var(--default-color);
-            line-height: var(--line-height);
-            padding: 20px;
-            max-width: 800px;
-            margin: 0 auto;
-        }
-        /* 标题样式 */
-        h1 { font-size: 18pt; color: #4a3f6b; text-align: center; }
-        h2 { font-size: 16pt; color: #5b4e8c; border-bottom: 2px solid #667eea; }
-        h3 { font-size: 14pt; color: #6b5b7a; }
-        /* 段落 */
-        p { text-indent: 2em; margin-bottom: 10pt; }
-        /* 文本格式 */
-        .red { color: red; }
-        .blue { color: blue; }
-        .green { color: green; }
-        .highlight { background-color: yellow; }
-        /* 提示框 */
-        .info { background-color: #e3f2fd; padding: 10px; border-left: 4px solid #2196F3; }
-        .warning { background-color: #fff3cd; padding: 10px; border-left: 4px solid #ffc107; }
-        .success { background-color: #d4edda; padding: 10px; border-left: 4px solid #28a745; }
-        /* 表格 */
-        table { width: 100%; border-collapse: collapse; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 12px; text-align: center; }
-        th { background-color: #667eea; color: white; }
-        tr:nth-child(even) { background-color: #f9f9f9; }
-    </style>
-</head>
-<body>
-    <h1>文档标题</h1>
-    <h2>一、章节标题</h2>
-    <p>这是正文段落，<strong>支持加粗</strong>、<em>斜体</em>、<span class="red">彩色文字</span>等格式。</p>
-    <div class="info">
-        <strong>提示：</strong> 这是信息提示框
-    </div>
-</body>
-</html>"""
-        return [TextContent(type="text", text=template)]
+    elif name == "get_html_constraints":
+        constraint_example = template_generator.get_constraint_example()
+        return [TextContent(type="text", text=constraint_example)]
+    elif name == "validate_html":
+        html_content = arguments.get("html_content", "")
+        if not html_content:
+            return [TextContent(type="text", text="错误：必须提供html_content参数")]
+        # 使用严格验证器
+        result = strict_validator.validate(html_content, strict_mode=False)
+        json_result = result.to_json()
+        # 生成可读报告
+        report = strict_validator.get_validation_report(result)
+        # 返回JSON格式结果
+        import json
+        return [
+            TextContent(
+                type="text",
+                text=f"{report}\n\n【JSON格式结果】\n```json\n{json.dumps(json_result, ensure_ascii=False, indent=2)}\n```",
+            )
+        ]
+    elif name == "get_element_examples":
+        category = arguments.get("category", "all")
+        examples = template_generator.get_element_examples()
+        if category == "all":
+            result = "=== HTML 元素示例 ===\n\n"
+            for cat, elems in examples.items():
+                result += f"## {cat.upper()}\n\n"
+                for name, code in elems.items():
+                    result += f"### {name}\n```html\n{code}\n```\n\n"
+        elif category in examples:
+            result = f"## {category.upper()}\n\n"
+            for name, code in examples[category].items():
+                result += f"### {name}\n```html\n{code}\n```\n\n"
+        else:
+            result = f"未知类别: {category}\n可用类别: {', '.join(examples.keys())}"
+        return [TextContent(type="text", text=result)]
+    elif name == "get_html_schema":
+        import json
+        schema = template_generator.get_schema()
+        schema_json = json.dumps(schema, ensure_ascii=False, indent=2)
+        return [TextContent(type="text", text=schema_json)]
     else:
         return [TextContent(type="text", text=f"未知工具: {name}")]
@@ -306,15 +419,12 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
 async def main():
     """主函数"""
     from mcp.server.stdio import stdio_server
     async with stdio_server() as (read_stream, write_stream):
-        await app.run(
-            read_stream,
-            write_stream,
-            app.create_initialization_options()
-        )
+        await app.run(read_stream, write_stream, app.create_initialization_options())
 if __name__ == "__main__":
     import asyncio
     asyncio.run(main())

package/python/test_error_detection.py ADDED Viewed

@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""测试错误检测和修复机制"""
+from html_validator import validator
+from html_fixer import HTMLFixer
+print("=" * 50)
+print("错误检测和修复机制测试")
+print("=" * 50)
+# 测试1: 包含错误的HTML
+print("\n【测试1】包含错误的HTML")
+html_with_errors = """
+<p style="color: red;">红色文本</p>
+<img src="test.jpg" loading="lazy">
+<p style="background-color: blue;">蓝色背景</p>
+<p style="color: rgb(255,0,0);">RGB红色</p>
+<br>
+<hr>
+"""
+print("原始HTML:")
+print(html_with_errors)
+# 验证
+print("\n【验证结果】")
+result = validator.validate(html_with_errors)
+print(f"✓ 验证通过: {result['valid']}")
+print(f"✗ 错误数量: {len(result['errors'])}")
+print(f"⚠ 警告数量: {len(result['warnings'])}")
+if result["errors"]:
+    print("\n错误详情:")
+    for error in result["errors"]:
+        print(f"  - {error}")
+if result["warnings"]:
+    print("\n警告详情:")
+    for warning in result["warnings"]:
+        print(f"  - {warning}")
+# 修复
+print("\n【自动修复】")
+fixer = HTMLFixer()
+fixed_html = fixer.fix(html_with_errors)
+print(f"修复报告:\n{fixer.get_fixes_report()}")
+print("\n修复后的HTML:")
+print(fixed_html)
+# 验证修复后的结果
+print("\n【修复后验证】")
+result_after_fix = validator.validate(fixed_html)
+print(f"✓ 验证通过: {result_after_fix['valid']}")
+print(f"✗ 错误数量: {len(result_after_fix['errors'])}")
+# 测试2: 正确的HTML
+print("\n" + "=" * 50)
+print("\n【测试2】正确的HTML")
+correct_html = """
+<p style="color: #FF0000;">红色文本</p>
+<img src="test.png" alt="图片" />
+<p style="background-color: #0000FF;">蓝色背景</p>
+<p style="color: #FF0000;">十六进制红色</p>
+<br />
+<hr />
+"""
+print("HTML内容:")
+print(correct_html)
+result2 = validator.validate(correct_html)
+print(f"\n验证结果:")
+print(f"✓ 验证通过: {result2['valid']}")
+print(f"✗ 错误数量: {len(result2['errors'])}")
+print(f"⚠ 警告数量: {len(result2['warnings'])}")
+fixer2 = HTMLFixer()
+fixed_html2 = fixer2.fix(correct_html)
+print(f"\n修复报告:\n{fixer2.get_fixes_report()}")
+print("\n" + "=" * 50)
+print("测试完成！")
+print("=" * 50)

package/python/test_strict_validation.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""
+测试严格验证器
+"""
+from html_validator_strict import StrictHTMLValidator, ValidationError, strict_validator
+# 测试1: 不允许的标签
+print("=== 测试1: 不允许的标签 ===")
+html1 = """<!DOCTYPE html>
+<html>
+<body>
+    <p>正常段落</p>
+    <script>alert('hello')</script>
+    <p>另一个段落</p>
+</body>
+</html>"""
+result1 = strict_validator.validate(html1, strict_mode=False)
+print(f"验证通过: {result1.is_valid}")
+print(f"错误数: {len(result1.errors)}")
+for error in result1.errors:
+    print(f"  - {error['code']}: {error['message']}")
+print()
+# 测试2: 不允许的属性
+print("=== 测试2: 不允许的属性 ===")
+html2 = """<!DOCTYPE html>
+<html>
+<body>
+    <img src="test.jpg" alt="测试" onclick="alert('click')" />
+</body>
+</html>"""
+result2 = strict_validator.validate(html2, strict_mode=False)
+print(f"验证通过: {result2.is_valid}")
+print(f"错误数: {len(result2.errors)}")
+for error in result2.errors:
+    print(f"  - {error['code']}: {error['message']}")
+print()
+# 测试3: 错误的颜色格式
+print("=== 测试3: 错误的颜色格式 ===")
+html3 = """<!DOCTYPE html>
+<html>
+<body>
+    <p style="color: red;">红色文本</p>
+    <p style="color: rgb(255,0,0);">RGB颜色</p>
+    <p style="color: #FF0000;">正确的颜色</p>
+</body>
+</html>"""
+result3 = strict_validator.validate(html3, strict_mode=False)
+print(f"验证通过: {result3.is_valid}")
+print(f"错误数: {len(result3.errors)}")
+for error in result3.errors:
+    print(f"  - {error['code']}: {error['message']}")
+print()
+# 测试4: 正确的HTML
+print("=== 测试4: 正确的HTML ===")
+html4 = """<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+</head>
+<body>
+    <h1>标题</h1>
+    <p><strong>加粗</strong>文本</p>
+    <p style="color: #FF0000;">红色文本</p>
+    <img src="test.jpg" alt="测试" />
+</body>
+</html>"""
+result4 = strict_validator.validate(html4, strict_mode=False)
+print(f"验证通过: {result4.is_valid}")
+print(f"错误数: {len(result4.errors)}")
+print(f"警告数: {len(result4.warnings)}")
+if result4.warnings:
+    for warning in result4.warnings:
+        print(f"  - {warning['code']}: {warning['message']}")
+print()
+# 测试5: 错误的嵌套
+print("=== 测试5: 错误的嵌套 ===")
+html5 = """<!DOCTYPE html>
+<html>
+<body>
+    <p>段落包含<div>块级元素</div></p>
+</body>
+</html>"""
+result5 = strict_validator.validate(html5, strict_mode=False)
+print(f"验证通过: {result5.is_valid}")
+print(f"错误数: {len(result5.errors)}")
+for error in result5.errors:
+    print(f"  - {error['code']}: {error['message']}")
+print()
+# 测试6: 测试example.html
+print("=== 测试6: 测试example.html ===")
+import os
+example_path = os.path.join("sample", "example.html")
+if os.path.exists(example_path):
+    with open(example_path, "r", encoding="utf-8") as f:
+        html6 = f.read()
+    result6 = strict_validator.validate(html6, strict_mode=False)
+    print(f"验证通过: {result6.is_valid}")
+    print(f"错误数: {len(result6.errors)}")
+    print(f"警告数: {len(result6.warnings)}")
+    if result6.errors:
+        print("错误详情:")
+        for error in result6.errors:
+            print(f"  - {error['code']}: {error['message']}")
+    if result6.warnings:
+        print("警告详情:")
+        for warning in result6.warnings:
+            print(f"  - {warning['code']}: {warning['message']}")
+else:
+    print("example.html 不存在")
+print()
+print("✅ 所有测试完成！")