@birthday8/doc-mcp 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/index.js +61 -65
- package/install.js +45 -35
- package/package.json +2 -4
- package/python/docx_converter.py +1152 -428
- package/python/html_fixer.py +125 -0
- package/python/html_rules.py +570 -0
- package/python/html_validator.py +174 -0
- package/python/html_validator_strict.py +428 -0
- package/python/sample/example.html +407 -0
- package/python/sample/html_schema.py +283 -0
- package/python/server.py +233 -123
- package/python/test_error_detection.py +84 -0
- package/python/test_strict_validation.py +118 -0
package/python/server.py
CHANGED
|
@@ -15,6 +15,9 @@ from mcp.types import TextContent, Tool
|
|
|
15
15
|
# Import the conversion module
|
|
16
16
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
17
17
|
from docx_converter import convert_html_to_docx as docx_convert
|
|
18
|
+
from html_validator import validator, template_generator
|
|
19
|
+
from html_validator_strict import StrictHTMLValidator, ValidationError, strict_validator
|
|
20
|
+
from html_fixer import HTMLFixer
|
|
18
21
|
|
|
19
22
|
# Create MCP server
|
|
20
23
|
app = Server("doc-creator")
|
|
@@ -32,100 +35,214 @@ async def list_tools() -> list[Tool]:
|
|
|
32
35
|
"properties": {
|
|
33
36
|
"html_path": {
|
|
34
37
|
"type": "string",
|
|
35
|
-
"description": "HTML文件的完整路径"
|
|
38
|
+
"description": "HTML文件的完整路径",
|
|
36
39
|
},
|
|
37
40
|
"output_path": {
|
|
38
41
|
"type": "string",
|
|
39
|
-
"description": "输出DOCX
|
|
40
|
-
}
|
|
42
|
+
"description": "输出DOCX文件的路径",
|
|
43
|
+
},
|
|
41
44
|
},
|
|
42
|
-
"required": ["html_path"]
|
|
43
|
-
}
|
|
45
|
+
"required": ["html_path", "output_path"],
|
|
46
|
+
},
|
|
44
47
|
),
|
|
45
48
|
Tool(
|
|
46
49
|
name="generate_document",
|
|
47
|
-
description="
|
|
50
|
+
description="""
|
|
51
|
+
⚠️ 生成Word文档 - 步骤6(必须先完成步骤1-5)
|
|
52
|
+
|
|
53
|
+
📋 必须按顺序执行的完整流程:
|
|
54
|
+
1. get_html_schema() - 获取结构化格式约束
|
|
55
|
+
2. get_html_constraints() - 获取HTML约束示例
|
|
56
|
+
3. 严格根据约束生成HTML内容
|
|
57
|
+
4. validate_html() - 验证HTML格式
|
|
58
|
+
5. 修复错误(重复步骤4直到无错误)
|
|
59
|
+
6. generate_document() - 生成文档(若有错误回到步骤5)
|
|
60
|
+
|
|
61
|
+
⚠️ 重要:必须先完成步骤1-5,且validate_html()验证通过后才能调用此工具
|
|
62
|
+
|
|
63
|
+
🔧 功能:
|
|
64
|
+
- 如果验证失败,返回详细错误报告
|
|
65
|
+
- 根据报错修复HTML后重新调用
|
|
66
|
+
""",
|
|
48
67
|
inputSchema={
|
|
49
68
|
"type": "object",
|
|
50
69
|
"properties": {
|
|
51
70
|
"content": {
|
|
52
71
|
"type": "string",
|
|
53
|
-
"description": "HTML内容(不需要完整的HTML结构,只需要body内的内容)"
|
|
72
|
+
"description": "HTML内容(不需要完整的HTML结构,只需要body内的内容)",
|
|
54
73
|
},
|
|
55
74
|
"title": {
|
|
56
75
|
"type": "string",
|
|
57
76
|
"description": "文档标题",
|
|
58
|
-
"default": "文档"
|
|
77
|
+
"default": "文档",
|
|
59
78
|
},
|
|
60
|
-
"output_dir": {
|
|
61
|
-
"type": "string",
|
|
62
|
-
"description": "输出目录(可选,默认使用workspace/documents/)"
|
|
63
|
-
}
|
|
79
|
+
"output_dir": {"type": "string", "description": "输出目录(必填)"},
|
|
64
80
|
},
|
|
65
|
-
"required": ["content"]
|
|
66
|
-
}
|
|
81
|
+
"required": ["content", "output_dir"],
|
|
82
|
+
},
|
|
67
83
|
),
|
|
68
84
|
Tool(
|
|
69
|
-
name="
|
|
70
|
-
description="
|
|
85
|
+
name="get_html_constraints",
|
|
86
|
+
description="""
|
|
87
|
+
🔑 获取HTML格式约束示例 - 步骤2(必须在步骤3之前执行)
|
|
88
|
+
|
|
89
|
+
📋 说明:
|
|
90
|
+
- 这是步骤2,必须在步骤3之前执行
|
|
91
|
+
- 返回预定义的HTML约束示例,包含所有支持的格式
|
|
92
|
+
- 示例包含:字体、颜色、大小、段距、嵌套结构、表格、图片、公式等
|
|
93
|
+
- 参考示例格式生成自己的HTML内容
|
|
94
|
+
|
|
95
|
+
⚠️ 重要:
|
|
96
|
+
- 必须在步骤1之后执行
|
|
97
|
+
- 参考此示例确保格式正确
|
|
98
|
+
- 不要直接复制示例内容,而是学习格式用法
|
|
99
|
+
""",
|
|
71
100
|
inputSchema={
|
|
72
101
|
"type": "object",
|
|
73
|
-
"properties": {}
|
|
74
|
-
}
|
|
75
|
-
)
|
|
102
|
+
"properties": {},
|
|
103
|
+
},
|
|
104
|
+
),
|
|
105
|
+
Tool(
|
|
106
|
+
name="validate_html",
|
|
107
|
+
description="""
|
|
108
|
+
✅ 验证HTML格式 - 步骤4(必须在步骤6之前执行)
|
|
109
|
+
|
|
110
|
+
📋 用法:
|
|
111
|
+
- 验证HTML格式是否正确
|
|
112
|
+
- 查看详细的错误和警告信息
|
|
113
|
+
- 获取格式建议
|
|
114
|
+
|
|
115
|
+
⚠️ 重要:这是步骤4,必须在步骤6之前执行
|
|
116
|
+
- 验证通过后才能进入步骤6
|
|
117
|
+
- 验证失败必须修复并重新验证,直到无错误
|
|
118
|
+
|
|
119
|
+
🔍 检查项:
|
|
120
|
+
• HTML基本结构
|
|
121
|
+
• 支持的标签和属性
|
|
122
|
+
• 颜色格式(必须是 #RRGGBB)
|
|
123
|
+
• 标签闭合和嵌套
|
|
124
|
+
• 自闭合标签格式
|
|
125
|
+
""",
|
|
126
|
+
inputSchema={
|
|
127
|
+
"type": "object",
|
|
128
|
+
"properties": {
|
|
129
|
+
"html_content": {"type": "string", "description": "HTML内容"}
|
|
130
|
+
},
|
|
131
|
+
"required": ["html_content"],
|
|
132
|
+
},
|
|
133
|
+
),
|
|
134
|
+
Tool(
|
|
135
|
+
name="get_element_examples",
|
|
136
|
+
description="获取各种HTML元素的示例代码",
|
|
137
|
+
inputSchema={
|
|
138
|
+
"type": "object",
|
|
139
|
+
"properties": {
|
|
140
|
+
"category": {
|
|
141
|
+
"type": "string",
|
|
142
|
+
"description": "元素类别: headings, text, colors, lists, table, image, formula",
|
|
143
|
+
"default": "all",
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
),
|
|
148
|
+
Tool(
|
|
149
|
+
name="get_html_schema",
|
|
150
|
+
description="""
|
|
151
|
+
🔑 获取HTML格式约束Schema - 步骤1(必须首先执行)
|
|
152
|
+
|
|
153
|
+
📋 说明:
|
|
154
|
+
- 这是步骤1,必须首先执行
|
|
155
|
+
- 获取完整的JSON格式约束
|
|
156
|
+
- 了解所有支持的标签、属性、样式
|
|
157
|
+
- 查看颜色格式、嵌套规则等约束
|
|
158
|
+
|
|
159
|
+
⚠️ 重要:
|
|
160
|
+
- 必须在生成HTML之前调用
|
|
161
|
+
- 完整了解所有约束后再进入步骤2
|
|
162
|
+
""",
|
|
163
|
+
inputSchema={"type": "object", "properties": {}},
|
|
164
|
+
),
|
|
76
165
|
]
|
|
77
166
|
|
|
78
167
|
|
|
79
168
|
@app.call_tool()
|
|
80
169
|
async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
|
81
170
|
"""调用工具"""
|
|
82
|
-
|
|
171
|
+
|
|
83
172
|
if name == "convert_document":
|
|
84
173
|
html_path = arguments.get("html_path")
|
|
85
174
|
output_path = arguments.get("output_path")
|
|
86
|
-
|
|
175
|
+
|
|
87
176
|
if not html_path:
|
|
88
177
|
return [TextContent(type="text", text="错误:必须提供html_path参数")]
|
|
89
|
-
|
|
90
|
-
if not os.path.exists(html_path):
|
|
91
|
-
return [TextContent(type="text", text=f"错误:HTML文件不存在: {html_path}")]
|
|
92
|
-
|
|
93
|
-
# 如果未指定输出路径,使用相同目录和文件名
|
|
178
|
+
|
|
94
179
|
if not output_path:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
180
|
+
return [TextContent(type="text", text="错误:必须提供output_path参数")]
|
|
181
|
+
|
|
182
|
+
if not os.path.exists(html_path):
|
|
183
|
+
return [TextContent(type="text", text="错误:HTML文件不存在")]
|
|
184
|
+
|
|
98
185
|
try:
|
|
99
186
|
docx_convert(html_path, output_path)
|
|
100
|
-
return [TextContent(
|
|
101
|
-
type="text",
|
|
102
|
-
text=f"✅ 转换成功!\n📄 HTML文件: {html_path}\n📝 Word文件: {output_path}"
|
|
103
|
-
)]
|
|
187
|
+
return [TextContent(type="text", text=f"✅ 转换成功!\n📝 Word文件已生成")]
|
|
104
188
|
except Exception as e:
|
|
105
189
|
return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
|
|
106
|
-
|
|
190
|
+
|
|
107
191
|
elif name == "generate_document":
|
|
108
192
|
content = arguments.get("content", "")
|
|
109
193
|
title = arguments.get("title", "文档")
|
|
110
194
|
output_dir = arguments.get("output_dir")
|
|
111
|
-
|
|
195
|
+
|
|
112
196
|
if not content:
|
|
113
197
|
return [TextContent(type="text", text="错误:必须提供content参数")]
|
|
114
|
-
|
|
115
|
-
# 构建输出目录
|
|
198
|
+
|
|
116
199
|
if not output_dir:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
timestamp = datetime.now().strftime("%Y%m%d")
|
|
120
|
-
output_dir = workspace_dir / f"{timestamp}_{title.replace(' ', '_')}"
|
|
121
|
-
|
|
200
|
+
return [TextContent(type="text", text="错误:必须提供output_dir参数")]
|
|
201
|
+
|
|
122
202
|
output_dir = Path(output_dir)
|
|
123
203
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
124
|
-
|
|
204
|
+
|
|
205
|
+
# 🔧 自动修复HTML内容
|
|
206
|
+
fixer = HTMLFixer()
|
|
207
|
+
fixed_content = fixer.fix(content)
|
|
208
|
+
|
|
209
|
+
# 🔒 严格验证HTML内容
|
|
210
|
+
validation_result = strict_validator.validate(fixed_content, strict_mode=False)
|
|
211
|
+
|
|
212
|
+
# 检查是否有错误
|
|
213
|
+
if not validation_result.is_valid:
|
|
214
|
+
# 抛出验证错误异常,返回结构化JSON
|
|
215
|
+
error_result = validation_result.to_json()
|
|
216
|
+
error_message = f"❌ HTML验证失败!\n\n"
|
|
217
|
+
error_message += f"发现 {len(validation_result.errors)} 个错误和 {len(validation_result.warnings)} 个警告\n\n"
|
|
218
|
+
|
|
219
|
+
# 列出所有错误
|
|
220
|
+
if validation_result.errors:
|
|
221
|
+
error_message += "【错误详情】\n"
|
|
222
|
+
for i, error in enumerate(validation_result.errors, 1):
|
|
223
|
+
error_message += f"{i}. [{error['code']}] {error['message']}\n"
|
|
224
|
+
if error.get("details"):
|
|
225
|
+
error_message += f" 详情: {error['details']}\n"
|
|
226
|
+
error_message += "\n"
|
|
227
|
+
|
|
228
|
+
# 列出所有警告
|
|
229
|
+
if validation_result.warnings:
|
|
230
|
+
error_message += "【警告详情】\n"
|
|
231
|
+
for i, warning in enumerate(validation_result.warnings, 1):
|
|
232
|
+
error_message += f"{i}. [{warning['code']}] {warning['message']}\n"
|
|
233
|
+
if warning.get("details"):
|
|
234
|
+
error_message += f" 详情: {warning['details']}\n"
|
|
235
|
+
error_message += "\n"
|
|
236
|
+
|
|
237
|
+
error_message += "请修复以上错误后重新生成文档。\n"
|
|
238
|
+
error_message += "💡 提示:调用 get_html_schema() 查看完整的格式约束规则。"
|
|
239
|
+
|
|
240
|
+
return [TextContent(type="text", text=error_message)]
|
|
241
|
+
|
|
125
242
|
# 生成HTML文件
|
|
126
243
|
html_path = output_dir / "document.html"
|
|
127
244
|
docx_path = output_dir / "document.docx"
|
|
128
|
-
|
|
245
|
+
|
|
129
246
|
# 构建完整HTML
|
|
130
247
|
html_template = f"""<!DOCTYPE html>
|
|
131
248
|
<html lang="zh-CN">
|
|
@@ -217,88 +334,84 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
|
|
217
334
|
</head>
|
|
218
335
|
<body>
|
|
219
336
|
<h1>{title}</h1>
|
|
220
|
-
{
|
|
337
|
+
{fixed_content}
|
|
221
338
|
</body>
|
|
222
339
|
</html>"""
|
|
223
|
-
|
|
340
|
+
|
|
224
341
|
# 保存HTML
|
|
225
|
-
with open(html_path,
|
|
342
|
+
with open(html_path, "w", encoding="utf-8") as f:
|
|
226
343
|
f.write(html_template)
|
|
227
|
-
|
|
344
|
+
|
|
228
345
|
# 转换为DOCX
|
|
229
346
|
try:
|
|
230
347
|
docx_convert(str(html_path), str(docx_path))
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
348
|
+
|
|
349
|
+
# 返回结果(包含修复报告)
|
|
350
|
+
result = f"✅ 文档生成成功!\n📝 Word文件已生成\n\n"
|
|
351
|
+
|
|
352
|
+
if fixer.fixes:
|
|
353
|
+
result += f"🔧 自动修复报告:\n{fixer.get_fixes_report()}\n\n"
|
|
354
|
+
result += "⚠️ 注意:部分内容已自动修复,建议下次生成时直接使用正确格式"
|
|
355
|
+
else:
|
|
356
|
+
result += "✓ 格式完美,无需修复"
|
|
357
|
+
|
|
358
|
+
return [TextContent(type="text", text=result)]
|
|
359
|
+
|
|
235
360
|
except Exception as e:
|
|
236
361
|
return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
|
|
237
|
-
|
|
238
|
-
elif name == "
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
<h1>文档标题</h1>
|
|
291
|
-
|
|
292
|
-
<h2>一、章节标题</h2>
|
|
293
|
-
<p>这是正文段落,<strong>支持加粗</strong>、<em>斜体</em>、<span class="red">彩色文字</span>等格式。</p>
|
|
294
|
-
|
|
295
|
-
<div class="info">
|
|
296
|
-
<strong>提示:</strong> 这是信息提示框
|
|
297
|
-
</div>
|
|
298
|
-
</body>
|
|
299
|
-
</html>"""
|
|
300
|
-
return [TextContent(type="text", text=template)]
|
|
301
|
-
|
|
362
|
+
|
|
363
|
+
elif name == "get_html_constraints":
|
|
364
|
+
constraint_example = template_generator.get_constraint_example()
|
|
365
|
+
return [TextContent(type="text", text=constraint_example)]
|
|
366
|
+
|
|
367
|
+
elif name == "validate_html":
|
|
368
|
+
html_content = arguments.get("html_content", "")
|
|
369
|
+
if not html_content:
|
|
370
|
+
return [TextContent(type="text", text="错误:必须提供html_content参数")]
|
|
371
|
+
|
|
372
|
+
# 使用严格验证器
|
|
373
|
+
result = strict_validator.validate(html_content, strict_mode=False)
|
|
374
|
+
json_result = result.to_json()
|
|
375
|
+
|
|
376
|
+
# 生成可读报告
|
|
377
|
+
report = strict_validator.get_validation_report(result)
|
|
378
|
+
|
|
379
|
+
# 返回JSON格式结果
|
|
380
|
+
import json
|
|
381
|
+
|
|
382
|
+
return [
|
|
383
|
+
TextContent(
|
|
384
|
+
type="text",
|
|
385
|
+
text=f"{report}\n\n【JSON格式结果】\n```json\n{json.dumps(json_result, ensure_ascii=False, indent=2)}\n```",
|
|
386
|
+
)
|
|
387
|
+
]
|
|
388
|
+
|
|
389
|
+
elif name == "get_element_examples":
|
|
390
|
+
category = arguments.get("category", "all")
|
|
391
|
+
examples = template_generator.get_element_examples()
|
|
392
|
+
|
|
393
|
+
if category == "all":
|
|
394
|
+
result = "=== HTML 元素示例 ===\n\n"
|
|
395
|
+
for cat, elems in examples.items():
|
|
396
|
+
result += f"## {cat.upper()}\n\n"
|
|
397
|
+
for name, code in elems.items():
|
|
398
|
+
result += f"### {name}\n```html\n{code}\n```\n\n"
|
|
399
|
+
elif category in examples:
|
|
400
|
+
result = f"## {category.upper()}\n\n"
|
|
401
|
+
for name, code in examples[category].items():
|
|
402
|
+
result += f"### {name}\n```html\n{code}\n```\n\n"
|
|
403
|
+
else:
|
|
404
|
+
result = f"未知类别: {category}\n可用类别: {', '.join(examples.keys())}"
|
|
405
|
+
|
|
406
|
+
return [TextContent(type="text", text=result)]
|
|
407
|
+
|
|
408
|
+
elif name == "get_html_schema":
|
|
409
|
+
import json
|
|
410
|
+
|
|
411
|
+
schema = template_generator.get_schema()
|
|
412
|
+
schema_json = json.dumps(schema, ensure_ascii=False, indent=2)
|
|
413
|
+
return [TextContent(type="text", text=schema_json)]
|
|
414
|
+
|
|
302
415
|
else:
|
|
303
416
|
return [TextContent(type="text", text=f"未知工具: {name}")]
|
|
304
417
|
|
|
@@ -306,15 +419,12 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
|
|
306
419
|
async def main():
|
|
307
420
|
"""主函数"""
|
|
308
421
|
from mcp.server.stdio import stdio_server
|
|
309
|
-
|
|
422
|
+
|
|
310
423
|
async with stdio_server() as (read_stream, write_stream):
|
|
311
|
-
await app.run(
|
|
312
|
-
read_stream,
|
|
313
|
-
write_stream,
|
|
314
|
-
app.create_initialization_options()
|
|
315
|
-
)
|
|
424
|
+
await app.run(read_stream, write_stream, app.create_initialization_options())
|
|
316
425
|
|
|
317
426
|
|
|
318
427
|
if __name__ == "__main__":
|
|
319
428
|
import asyncio
|
|
429
|
+
|
|
320
430
|
asyncio.run(main())
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""测试错误检测和修复机制"""
|
|
3
|
+
|
|
4
|
+
from html_validator import validator
|
|
5
|
+
from html_fixer import HTMLFixer
|
|
6
|
+
|
|
7
|
+
print("=" * 50)
|
|
8
|
+
print("错误检测和修复机制测试")
|
|
9
|
+
print("=" * 50)
|
|
10
|
+
|
|
11
|
+
# 测试1: 包含错误的HTML
|
|
12
|
+
print("\n【测试1】包含错误的HTML")
|
|
13
|
+
html_with_errors = """
|
|
14
|
+
<p style="color: red;">红色文本</p>
|
|
15
|
+
<img src="test.jpg" loading="lazy">
|
|
16
|
+
<p style="background-color: blue;">蓝色背景</p>
|
|
17
|
+
<p style="color: rgb(255,0,0);">RGB红色</p>
|
|
18
|
+
<br>
|
|
19
|
+
<hr>
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
print("原始HTML:")
|
|
23
|
+
print(html_with_errors)
|
|
24
|
+
|
|
25
|
+
# 验证
|
|
26
|
+
print("\n【验证结果】")
|
|
27
|
+
result = validator.validate(html_with_errors)
|
|
28
|
+
print(f"✓ 验证通过: {result['valid']}")
|
|
29
|
+
print(f"✗ 错误数量: {len(result['errors'])}")
|
|
30
|
+
print(f"⚠ 警告数量: {len(result['warnings'])}")
|
|
31
|
+
|
|
32
|
+
if result["errors"]:
|
|
33
|
+
print("\n错误详情:")
|
|
34
|
+
for error in result["errors"]:
|
|
35
|
+
print(f" - {error}")
|
|
36
|
+
|
|
37
|
+
if result["warnings"]:
|
|
38
|
+
print("\n警告详情:")
|
|
39
|
+
for warning in result["warnings"]:
|
|
40
|
+
print(f" - {warning}")
|
|
41
|
+
|
|
42
|
+
# 修复
|
|
43
|
+
print("\n【自动修复】")
|
|
44
|
+
fixer = HTMLFixer()
|
|
45
|
+
fixed_html = fixer.fix(html_with_errors)
|
|
46
|
+
print(f"修复报告:\n{fixer.get_fixes_report()}")
|
|
47
|
+
|
|
48
|
+
print("\n修复后的HTML:")
|
|
49
|
+
print(fixed_html)
|
|
50
|
+
|
|
51
|
+
# 验证修复后的结果
|
|
52
|
+
print("\n【修复后验证】")
|
|
53
|
+
result_after_fix = validator.validate(fixed_html)
|
|
54
|
+
print(f"✓ 验证通过: {result_after_fix['valid']}")
|
|
55
|
+
print(f"✗ 错误数量: {len(result_after_fix['errors'])}")
|
|
56
|
+
|
|
57
|
+
# 测试2: 正确的HTML
|
|
58
|
+
print("\n" + "=" * 50)
|
|
59
|
+
print("\n【测试2】正确的HTML")
|
|
60
|
+
correct_html = """
|
|
61
|
+
<p style="color: #FF0000;">红色文本</p>
|
|
62
|
+
<img src="test.png" alt="图片" />
|
|
63
|
+
<p style="background-color: #0000FF;">蓝色背景</p>
|
|
64
|
+
<p style="color: #FF0000;">十六进制红色</p>
|
|
65
|
+
<br />
|
|
66
|
+
<hr />
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
print("HTML内容:")
|
|
70
|
+
print(correct_html)
|
|
71
|
+
|
|
72
|
+
result2 = validator.validate(correct_html)
|
|
73
|
+
print(f"\n验证结果:")
|
|
74
|
+
print(f"✓ 验证通过: {result2['valid']}")
|
|
75
|
+
print(f"✗ 错误数量: {len(result2['errors'])}")
|
|
76
|
+
print(f"⚠ 警告数量: {len(result2['warnings'])}")
|
|
77
|
+
|
|
78
|
+
fixer2 = HTMLFixer()
|
|
79
|
+
fixed_html2 = fixer2.fix(correct_html)
|
|
80
|
+
print(f"\n修复报告:\n{fixer2.get_fixes_report()}")
|
|
81
|
+
|
|
82
|
+
print("\n" + "=" * 50)
|
|
83
|
+
print("测试完成!")
|
|
84
|
+
print("=" * 50)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
测试严格验证器
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from html_validator_strict import StrictHTMLValidator, ValidationError, strict_validator
|
|
6
|
+
|
|
7
|
+
# 测试1: 不允许的标签
|
|
8
|
+
print("=== 测试1: 不允许的标签 ===")
|
|
9
|
+
html1 = """<!DOCTYPE html>
|
|
10
|
+
<html>
|
|
11
|
+
<body>
|
|
12
|
+
<p>正常段落</p>
|
|
13
|
+
<script>alert('hello')</script>
|
|
14
|
+
<p>另一个段落</p>
|
|
15
|
+
</body>
|
|
16
|
+
</html>"""
|
|
17
|
+
result1 = strict_validator.validate(html1, strict_mode=False)
|
|
18
|
+
print(f"验证通过: {result1.is_valid}")
|
|
19
|
+
print(f"错误数: {len(result1.errors)}")
|
|
20
|
+
for error in result1.errors:
|
|
21
|
+
print(f" - {error['code']}: {error['message']}")
|
|
22
|
+
print()
|
|
23
|
+
|
|
24
|
+
# 测试2: 不允许的属性
|
|
25
|
+
print("=== 测试2: 不允许的属性 ===")
|
|
26
|
+
html2 = """<!DOCTYPE html>
|
|
27
|
+
<html>
|
|
28
|
+
<body>
|
|
29
|
+
<img src="test.jpg" alt="测试" onclick="alert('click')" />
|
|
30
|
+
</body>
|
|
31
|
+
</html>"""
|
|
32
|
+
result2 = strict_validator.validate(html2, strict_mode=False)
|
|
33
|
+
print(f"验证通过: {result2.is_valid}")
|
|
34
|
+
print(f"错误数: {len(result2.errors)}")
|
|
35
|
+
for error in result2.errors:
|
|
36
|
+
print(f" - {error['code']}: {error['message']}")
|
|
37
|
+
print()
|
|
38
|
+
|
|
39
|
+
# 测试3: 错误的颜色格式
|
|
40
|
+
print("=== 测试3: 错误的颜色格式 ===")
|
|
41
|
+
html3 = """<!DOCTYPE html>
|
|
42
|
+
<html>
|
|
43
|
+
<body>
|
|
44
|
+
<p style="color: red;">红色文本</p>
|
|
45
|
+
<p style="color: rgb(255,0,0);">RGB颜色</p>
|
|
46
|
+
<p style="color: #FF0000;">正确的颜色</p>
|
|
47
|
+
</body>
|
|
48
|
+
</html>"""
|
|
49
|
+
result3 = strict_validator.validate(html3, strict_mode=False)
|
|
50
|
+
print(f"验证通过: {result3.is_valid}")
|
|
51
|
+
print(f"错误数: {len(result3.errors)}")
|
|
52
|
+
for error in result3.errors:
|
|
53
|
+
print(f" - {error['code']}: {error['message']}")
|
|
54
|
+
print()
|
|
55
|
+
|
|
56
|
+
# 测试4: 正确的HTML
|
|
57
|
+
print("=== 测试4: 正确的HTML ===")
|
|
58
|
+
html4 = """<!DOCTYPE html>
|
|
59
|
+
<html lang="zh-CN">
|
|
60
|
+
<head>
|
|
61
|
+
<meta charset="UTF-8">
|
|
62
|
+
</head>
|
|
63
|
+
<body>
|
|
64
|
+
<h1>标题</h1>
|
|
65
|
+
<p><strong>加粗</strong>文本</p>
|
|
66
|
+
<p style="color: #FF0000;">红色文本</p>
|
|
67
|
+
<img src="test.jpg" alt="测试" />
|
|
68
|
+
</body>
|
|
69
|
+
</html>"""
|
|
70
|
+
result4 = strict_validator.validate(html4, strict_mode=False)
|
|
71
|
+
print(f"验证通过: {result4.is_valid}")
|
|
72
|
+
print(f"错误数: {len(result4.errors)}")
|
|
73
|
+
print(f"警告数: {len(result4.warnings)}")
|
|
74
|
+
if result4.warnings:
|
|
75
|
+
for warning in result4.warnings:
|
|
76
|
+
print(f" - {warning['code']}: {warning['message']}")
|
|
77
|
+
print()
|
|
78
|
+
|
|
79
|
+
# 测试5: 错误的嵌套
|
|
80
|
+
print("=== 测试5: 错误的嵌套 ===")
|
|
81
|
+
html5 = """<!DOCTYPE html>
|
|
82
|
+
<html>
|
|
83
|
+
<body>
|
|
84
|
+
<p>段落包含<div>块级元素</div></p>
|
|
85
|
+
</body>
|
|
86
|
+
</html>"""
|
|
87
|
+
result5 = strict_validator.validate(html5, strict_mode=False)
|
|
88
|
+
print(f"验证通过: {result5.is_valid}")
|
|
89
|
+
print(f"错误数: {len(result5.errors)}")
|
|
90
|
+
for error in result5.errors:
|
|
91
|
+
print(f" - {error['code']}: {error['message']}")
|
|
92
|
+
print()
|
|
93
|
+
|
|
94
|
+
# 测试6: 测试example.html
|
|
95
|
+
print("=== 测试6: 测试example.html ===")
|
|
96
|
+
import os
|
|
97
|
+
|
|
98
|
+
example_path = os.path.join("sample", "example.html")
|
|
99
|
+
if os.path.exists(example_path):
|
|
100
|
+
with open(example_path, "r", encoding="utf-8") as f:
|
|
101
|
+
html6 = f.read()
|
|
102
|
+
result6 = strict_validator.validate(html6, strict_mode=False)
|
|
103
|
+
print(f"验证通过: {result6.is_valid}")
|
|
104
|
+
print(f"错误数: {len(result6.errors)}")
|
|
105
|
+
print(f"警告数: {len(result6.warnings)}")
|
|
106
|
+
if result6.errors:
|
|
107
|
+
print("错误详情:")
|
|
108
|
+
for error in result6.errors:
|
|
109
|
+
print(f" - {error['code']}: {error['message']}")
|
|
110
|
+
if result6.warnings:
|
|
111
|
+
print("警告详情:")
|
|
112
|
+
for warning in result6.warnings:
|
|
113
|
+
print(f" - {warning['code']}: {warning['message']}")
|
|
114
|
+
else:
|
|
115
|
+
print("example.html 不存在")
|
|
116
|
+
print()
|
|
117
|
+
|
|
118
|
+
print("✅ 所有测试完成!")
|