@birthday8/doc-mcp 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/python/server.py CHANGED
@@ -15,6 +15,8 @@ from mcp.types import TextContent, Tool
15
15
  # Import the conversion module
16
16
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
17
17
  from docx_converter import convert_html_to_docx as docx_convert
18
+ from html_validator import validator, template_generator
19
+ from html_fixer import HTMLFixer
18
20
 
19
21
  # Create MCP server
20
22
  app = Server("doc-creator")
@@ -32,100 +34,186 @@ async def list_tools() -> list[Tool]:
32
34
  "properties": {
33
35
  "html_path": {
34
36
  "type": "string",
35
- "description": "HTML文件的完整路径"
37
+ "description": "HTML文件的完整路径",
36
38
  },
37
39
  "output_path": {
38
40
  "type": "string",
39
- "description": "输出DOCX文件的路径(可选,默认与HTML同目录)"
40
- }
41
+ "description": "输出DOCX文件的路径(必填)",
42
+ },
41
43
  },
42
- "required": ["html_path"]
43
- }
44
+ "required": ["html_path", "output_path"],
45
+ },
44
46
  ),
45
47
  Tool(
46
48
  name="generate_document",
47
- description="根据HTML内容生成Word文档",
49
+ description="""
50
+ ⚠️ 重要:生成Word文档 - 必须严格遵循以下步骤:
51
+
52
+ 【步骤1:获取模板】
53
+ 必须先调用 get_html_template() 获取标准模板
54
+ 示例:get_html_template({ title: "文档标题" })
55
+
56
+ 【步骤2:验证内容】
57
+ 在生成文档前,必须调用 validate_html() 验证格式
58
+ 示例:validate_html({ html_content: "你的HTML内容" })
59
+
60
+ 【步骤3:生成文档】
61
+ 验证通过后,调用此工具生成Word文档
62
+
63
+ ⚠️ 格式约束(必须遵守):
64
+ 1. 颜色必须用十六进制格式 #RRGGBB(禁止用 red, blue, rgb() 等)
65
+ 2. 空标签必须自闭合:<img /> <br /> <hr />
66
+ 3. 只支持标签:h1-h6, p, strong, em, u, s, sup, sub, img, table, ul, ol, div
67
+ 4. 禁止属性:onclick, onload, loading, float, text-shadow 等
68
+ 5. 嵌套规则:p不能包含div,strong不能包含p等
69
+ 6. 颜色示例:
70
+ ✅ 正确:<span style="color: #FF0000;">红色</span>
71
+ ❌ 错误:<span style="color: red;">红色</span>
72
+
73
+ 系统会自动修复常见错误,但仍建议生成时遵守规范。
74
+ """,
48
75
  inputSchema={
49
76
  "type": "object",
50
77
  "properties": {
51
78
  "content": {
52
79
  "type": "string",
53
- "description": "HTML内容(不需要完整的HTML结构,只需要body内的内容)"
80
+ "description": "HTML内容(不需要完整的HTML结构,只需要body内的内容)",
54
81
  },
55
82
  "title": {
56
83
  "type": "string",
57
84
  "description": "文档标题",
58
- "default": "文档"
85
+ "default": "文档",
59
86
  },
60
- "output_dir": {
61
- "type": "string",
62
- "description": "输出目录(可选,默认使用workspace/documents/)"
63
- }
87
+ "output_dir": {"type": "string", "description": "输出目录(必填)"},
64
88
  },
65
- "required": ["content"]
66
- }
89
+ "required": ["content", "output_dir"],
90
+ },
67
91
  ),
68
92
  Tool(
69
93
  name="get_html_template",
70
- description="获取完整的HTML模板,包含CSS样式",
94
+ description="""
95
+ 🔑 获取HTML模板 - 生成文档前的第一步(必须调用)
96
+
97
+ 📋 用法:
98
+ 1. 在生成任何文档前,必须先调用此工具获取标准模板
99
+ 2. 基于模板生成内容,确保格式正确
100
+ 3. 模板已包含所有必要的样式和结构
101
+
102
+ ⚠️ 重要:
103
+ - 必须使用此工具获取的模板作为基础
104
+ - 不要自己创建HTML结构
105
+ - 模板已预设所有支持的格式
106
+ """,
107
+ inputSchema={
108
+ "type": "object",
109
+ "properties": {
110
+ "title": {
111
+ "type": "string",
112
+ "description": "文档标题",
113
+ "default": "文档",
114
+ },
115
+ "content": {"type": "string", "description": "初始内容(可选)"},
116
+ },
117
+ },
118
+ ),
119
+ Tool(
120
+ name="validate_html",
121
+ description="""
122
+ ✅ 验证HTML格式 - 生成文档前的必须步骤
123
+
124
+ 📋 用法:
125
+ 1. 在调用 generate_document 前,必须先验证HTML格式
126
+ 2. 如果验证失败,必须修复错误后再生成
127
+ 3. 系统会自动修复常见错误,但复杂错误需要手动修复
128
+
129
+ 🔍 检查项:
130
+ • HTML基本结构
131
+ • 支持的标签和属性
132
+ • 颜色格式(必须是 #RRGGBB)
133
+ • 标签闭合和嵌套
134
+ • 自闭合标签格式
135
+
136
+ ⚠️ 如果验证失败:
137
+ - 查看错误报告
138
+ - 修复所有错误
139
+ - 重新验证
140
+ - 验证通过后再生成文档
141
+ """,
142
+ inputSchema={
143
+ "type": "object",
144
+ "properties": {
145
+ "html_content": {"type": "string", "description": "HTML内容"}
146
+ },
147
+ "required": ["html_content"],
148
+ },
149
+ ),
150
+ Tool(
151
+ name="get_html_spec",
152
+ description="获取HTML格式规范文档",
153
+ inputSchema={"type": "object", "properties": {}},
154
+ ),
155
+ Tool(
156
+ name="get_element_examples",
157
+ description="获取各种HTML元素的示例代码",
71
158
  inputSchema={
72
159
  "type": "object",
73
- "properties": {}
74
- }
75
- )
160
+ "properties": {
161
+ "category": {
162
+ "type": "string",
163
+ "description": "元素类别: headings, text, colors, lists, table, image, formula",
164
+ "default": "all",
165
+ }
166
+ },
167
+ },
168
+ ),
76
169
  ]
77
170
 
78
171
 
79
172
  @app.call_tool()
80
173
  async def call_tool(name: str, arguments: dict) -> list[TextContent]:
81
174
  """调用工具"""
82
-
175
+
83
176
  if name == "convert_document":
84
177
  html_path = arguments.get("html_path")
85
178
  output_path = arguments.get("output_path")
86
-
179
+
87
180
  if not html_path:
88
181
  return [TextContent(type="text", text="错误:必须提供html_path参数")]
89
-
90
- if not os.path.exists(html_path):
91
- return [TextContent(type="text", text=f"错误:HTML文件不存在: {html_path}")]
92
-
93
- # 如果未指定输出路径,使用相同目录和文件名
182
+
94
183
  if not output_path:
95
- base_name = os.path.splitext(html_path)[0]
96
- output_path = base_name + ".docx"
97
-
184
+ return [TextContent(type="text", text="错误:必须提供output_path参数")]
185
+
186
+ if not os.path.exists(html_path):
187
+ return [TextContent(type="text", text="错误:HTML文件不存在")]
188
+
98
189
  try:
99
190
  docx_convert(html_path, output_path)
100
- return [TextContent(
101
- type="text",
102
- text=f"✅ 转换成功!\n📄 HTML文件: {html_path}\n📝 Word文件: {output_path}"
103
- )]
191
+ return [TextContent(type="text", text=f"✅ 转换成功!\n📝 Word文件已生成")]
104
192
  except Exception as e:
105
193
  return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
106
-
194
+
107
195
  elif name == "generate_document":
108
196
  content = arguments.get("content", "")
109
197
  title = arguments.get("title", "文档")
110
198
  output_dir = arguments.get("output_dir")
111
-
199
+
112
200
  if not content:
113
201
  return [TextContent(type="text", text="错误:必须提供content参数")]
114
-
115
- # 构建输出目录
202
+
116
203
  if not output_dir:
117
- workspace_dir = Path(__file__).parent.parent / "workspace" / "documents"
118
- from datetime import datetime
119
- timestamp = datetime.now().strftime("%Y%m%d")
120
- output_dir = workspace_dir / f"{timestamp}_{title.replace(' ', '_')}"
121
-
204
+ return [TextContent(type="text", text="错误:必须提供output_dir参数")]
205
+
122
206
  output_dir = Path(output_dir)
123
207
  output_dir.mkdir(parents=True, exist_ok=True)
124
-
208
+
125
209
  # 生成HTML文件
126
210
  html_path = output_dir / "document.html"
127
211
  docx_path = output_dir / "document.docx"
128
-
212
+
213
+ # 🔧 自动修复HTML内容
214
+ fixer = HTMLFixer()
215
+ fixed_content = fixer.fix(content)
216
+
129
217
  # 构建完整HTML
130
218
  html_template = f"""<!DOCTYPE html>
131
219
  <html lang="zh-CN">
@@ -217,104 +305,89 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
217
305
  </head>
218
306
  <body>
219
307
  <h1>{title}</h1>
220
- {content}
308
+ {fixed_content}
221
309
  </body>
222
310
  </html>"""
223
-
311
+
224
312
  # 保存HTML
225
- with open(html_path, 'w', encoding='utf-8') as f:
313
+ with open(html_path, "w", encoding="utf-8") as f:
226
314
  f.write(html_template)
227
-
315
+
228
316
  # 转换为DOCX
229
317
  try:
230
318
  docx_convert(str(html_path), str(docx_path))
231
- return [TextContent(
232
- type="text",
233
- text=f"✅ 文档生成成功!\n📁 输出目录: {output_dir}\n📄 HTML文件: {html_path}\n📝 Word文件: {docx_path}"
234
- )]
319
+
320
+ # 返回结果(包含修复报告)
321
+ result = f"✅ 文档生成成功!\n📝 Word文件已生成\n\n"
322
+
323
+ if fixer.fixes:
324
+ result += f"🔧 自动修复报告:\n{fixer.get_fixes_report()}\n\n"
325
+ result += "⚠️ 注意:部分内容已自动修复,建议下次生成时直接使用正确格式"
326
+ else:
327
+ result += "✓ 格式完美,无需修复"
328
+
329
+ return [TextContent(type="text", text=result)]
330
+
235
331
  except Exception as e:
236
332
  return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
237
-
333
+
238
334
  elif name == "get_html_template":
239
- template = """<!DOCTYPE html>
240
- <html lang="zh-CN">
241
- <head>
242
- <meta charset="UTF-8">
243
- <title>文档标题</title>
244
- <style>
245
- /* 全局配置 */
246
- :root {
247
- --default-font: '微软雅黑';
248
- --default-size: 12pt;
249
- --default-color: #333;
250
- --line-height: 1.8;
251
- }
252
-
253
- body {
254
- font-family: var(--default-font);
255
- font-size: var(--default-size);
256
- color: var(--default-color);
257
- line-height: var(--line-height);
258
- padding: 20px;
259
- max-width: 800px;
260
- margin: 0 auto;
261
- }
262
-
263
- /* 标题样式 */
264
- h1 { font-size: 18pt; color: #4a3f6b; text-align: center; }
265
- h2 { font-size: 16pt; color: #5b4e8c; border-bottom: 2px solid #667eea; }
266
- h3 { font-size: 14pt; color: #6b5b7a; }
267
-
268
- /* 段落 */
269
- p { text-indent: 2em; margin-bottom: 10pt; }
270
-
271
- /* 文本格式 */
272
- .red { color: red; }
273
- .blue { color: blue; }
274
- .green { color: green; }
275
- .highlight { background-color: yellow; }
276
-
277
- /* 提示框 */
278
- .info { background-color: #e3f2fd; padding: 10px; border-left: 4px solid #2196F3; }
279
- .warning { background-color: #fff3cd; padding: 10px; border-left: 4px solid #ffc107; }
280
- .success { background-color: #d4edda; padding: 10px; border-left: 4px solid #28a745; }
281
-
282
- /* 表格 */
283
- table { width: 100%; border-collapse: collapse; margin: 20px 0; }
284
- th, td { border: 1px solid #ddd; padding: 12px; text-align: center; }
285
- th { background-color: #667eea; color: white; }
286
- tr:nth-child(even) { background-color: #f9f9f9; }
287
- </style>
288
- </head>
289
- <body>
290
- <h1>文档标题</h1>
291
-
292
- <h2>一、章节标题</h2>
293
- <p>这是正文段落,<strong>支持加粗</strong>、<em>斜体</em>、<span class="red">彩色文字</span>等格式。</p>
294
-
295
- <div class="info">
296
- <strong>提示:</strong> 这是信息提示框
297
- </div>
298
- </body>
299
- </html>"""
335
+ title = arguments.get("title", "文档")
336
+ content = arguments.get("content", "")
337
+ template = template_generator.generate_template(title, content)
300
338
  return [TextContent(type="text", text=template)]
301
-
339
+
340
+ elif name == "validate_html":
341
+ html_content = arguments.get("html_content", "")
342
+ if not html_content:
343
+ return [TextContent(type="text", text="错误:必须提供html_content参数")]
344
+
345
+ result = validator.validate(html_content)
346
+ report = validator.get_validation_report(result)
347
+ return [TextContent(type="text", text=report)]
348
+
349
+ elif name == "get_html_spec":
350
+ spec_path = os.path.join(os.path.dirname(__file__), "..", "HTML_FORMAT_SPEC.md")
351
+ if os.path.exists(spec_path):
352
+ with open(spec_path, "r", encoding="utf-8") as f:
353
+ spec_content = f.read()
354
+ return [TextContent(type="text", text=spec_content)]
355
+ else:
356
+ return [TextContent(type="text", text="规范文档未找到")]
357
+
358
+ elif name == "get_element_examples":
359
+ category = arguments.get("category", "all")
360
+ examples = template_generator.get_element_examples()
361
+
362
+ if category == "all":
363
+ result = "=== HTML 元素示例 ===\n\n"
364
+ for cat, elems in examples.items():
365
+ result += f"## {cat.upper()}\n\n"
366
+ for name, code in elems.items():
367
+ result += f"### {name}\n```html\n{code}\n```\n\n"
368
+ elif category in examples:
369
+ result = f"## {category.upper()}\n\n"
370
+ for name, code in examples[category].items():
371
+ result += f"### {name}\n```html\n{code}\n```\n\n"
372
+ else:
373
+ result = f"未知类别: {category}\n可用类别: {', '.join(examples.keys())}"
374
+
375
+ return [TextContent(type="text", text=result)]
376
+
302
377
  else:
378
+
303
379
  return [TextContent(type="text", text=f"未知工具: {name}")]
304
380
 
305
381
 
306
382
  async def main():
307
383
  """主函数"""
308
384
  from mcp.server.stdio import stdio_server
309
-
385
+
310
386
  async with stdio_server() as (read_stream, write_stream):
311
- await app.run(
312
- read_stream,
313
- write_stream,
314
- app.create_initialization_options()
315
- )
387
+ await app.run(read_stream, write_stream, app.create_initialization_options())
316
388
 
317
389
 
318
390
  if __name__ == "__main__":
319
391
  import asyncio
392
+
320
393
  asyncio.run(main())
@@ -0,0 +1,84 @@
1
+ # -*- coding: utf-8 -*-
2
+ """测试错误检测和修复机制"""
3
+
4
+ from html_validator import validator
5
+ from html_fixer import HTMLFixer
6
+
7
+ print("=" * 50)
8
+ print("错误检测和修复机制测试")
9
+ print("=" * 50)
10
+
11
+ # 测试1: 包含错误的HTML
12
+ print("\n【测试1】包含错误的HTML")
13
+ html_with_errors = """
14
+ <p style="color: red;">红色文本</p>
15
+ <img src="test.jpg" loading="lazy">
16
+ <p style="background-color: blue;">蓝色背景</p>
17
+ <p style="color: rgb(255,0,0);">RGB红色</p>
18
+ <br>
19
+ <hr>
20
+ """
21
+
22
+ print("原始HTML:")
23
+ print(html_with_errors)
24
+
25
+ # 验证
26
+ print("\n【验证结果】")
27
+ result = validator.validate(html_with_errors)
28
+ print(f"✓ 验证通过: {result['valid']}")
29
+ print(f"✗ 错误数量: {len(result['errors'])}")
30
+ print(f"⚠ 警告数量: {len(result['warnings'])}")
31
+
32
+ if result["errors"]:
33
+ print("\n错误详情:")
34
+ for error in result["errors"]:
35
+ print(f" - {error}")
36
+
37
+ if result["warnings"]:
38
+ print("\n警告详情:")
39
+ for warning in result["warnings"]:
40
+ print(f" - {warning}")
41
+
42
+ # 修复
43
+ print("\n【自动修复】")
44
+ fixer = HTMLFixer()
45
+ fixed_html = fixer.fix(html_with_errors)
46
+ print(f"修复报告:\n{fixer.get_fixes_report()}")
47
+
48
+ print("\n修复后的HTML:")
49
+ print(fixed_html)
50
+
51
+ # 验证修复后的结果
52
+ print("\n【修复后验证】")
53
+ result_after_fix = validator.validate(fixed_html)
54
+ print(f"✓ 验证通过: {result_after_fix['valid']}")
55
+ print(f"✗ 错误数量: {len(result_after_fix['errors'])}")
56
+
57
+ # 测试2: 正确的HTML
58
+ print("\n" + "=" * 50)
59
+ print("\n【测试2】正确的HTML")
60
+ correct_html = """
61
+ <p style="color: #FF0000;">红色文本</p>
62
+ <img src="test.png" alt="图片" />
63
+ <p style="background-color: #0000FF;">蓝色背景</p>
64
+ <p style="color: #FF0000;">十六进制红色</p>
65
+ <br />
66
+ <hr />
67
+ """
68
+
69
+ print("HTML内容:")
70
+ print(correct_html)
71
+
72
+ result2 = validator.validate(correct_html)
73
+ print(f"\n验证结果:")
74
+ print(f"✓ 验证通过: {result2['valid']}")
75
+ print(f"✗ 错误数量: {len(result2['errors'])}")
76
+ print(f"⚠ 警告数量: {len(result2['warnings'])}")
77
+
78
+ fixer2 = HTMLFixer()
79
+ fixed_html2 = fixer2.fix(correct_html)
80
+ print(f"\n修复报告:\n{fixer2.get_fixes_report()}")
81
+
82
+ print("\n" + "=" * 50)
83
+ print("测试完成!")
84
+ print("=" * 50)