@birthday8/doc-mcp 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -21
- package/index.js +61 -56
- package/install.js +45 -35
- package/package.json +1 -1
- package/python/docx_converter.py +15 -83
- package/python/html_rules.py +652 -0
- package/python/html_validator.py +59 -274
- package/python/html_validator_strict.py +430 -0
- package/python/sample/example.html +0 -14
- package/python/sample/html_schema.py +352 -0
- package/python/server.py +112 -75
- package/python/test_strict_validation.py +118 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""HTML格式约束Schema - 基于实际代码支持"""
|
|
3
|
+
|
|
4
|
+
HTML_SCHEMA = {
|
|
5
|
+
"version": "2.0",
|
|
6
|
+
"description": "HTML转Word文档的结构化格式约束(基于实际代码支持)",
|
|
7
|
+
|
|
8
|
+
"globalStyle": {
|
|
9
|
+
"body": {
|
|
10
|
+
"font-family": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"description": "全局默认字体",
|
|
13
|
+
"default": "微软雅黑",
|
|
14
|
+
"note": "支持任意中英文字体名称,如:微软雅黑、宋体、黑体、Arial、Times New Roman等"
|
|
15
|
+
},
|
|
16
|
+
"font-size": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "全局默认字号",
|
|
19
|
+
"default": "12pt",
|
|
20
|
+
"pattern": "\\d+pt",
|
|
21
|
+
"note": "单位必须是pt,支持任意正整数"
|
|
22
|
+
},
|
|
23
|
+
"color": {
|
|
24
|
+
"type": "string",
|
|
25
|
+
"description": "文本颜色",
|
|
26
|
+
"default": "#333333",
|
|
27
|
+
"pattern": "#[0-9A-Fa-f]{6}",
|
|
28
|
+
"note": "必须使用6位十六进制格式"
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
|
|
33
|
+
"allowedTags": [
|
|
34
|
+
"h1", "h2", "h3", "h4", "h5", "h6",
|
|
35
|
+
"p", "strong", "em", "u", "s", "sup", "sub", "code",
|
|
36
|
+
"span", "div", "ul", "ol", "li",
|
|
37
|
+
"table", "tr", "td", "th",
|
|
38
|
+
"img", "br", "hr",
|
|
39
|
+
"math", "latex"
|
|
40
|
+
],
|
|
41
|
+
|
|
42
|
+
"allowedStyles": {
|
|
43
|
+
"color": {
|
|
44
|
+
"type": "color",
|
|
45
|
+
"pattern": "#[0-9A-Fa-f]{6}",
|
|
46
|
+
"description": "文本颜色,必须使用十六进制格式",
|
|
47
|
+
"note": "例如:#FF0000(红色)、#0000FF(蓝色)、#00FF00(绿色)"
|
|
48
|
+
},
|
|
49
|
+
"background-color": {
|
|
50
|
+
"type": "color",
|
|
51
|
+
"pattern": "#[0-9A-Fa-f]{6}",
|
|
52
|
+
"description": "背景颜色,必须使用十六进制格式",
|
|
53
|
+
"note": "例如:#E3F2FD(浅蓝)、#D4EDDA(浅绿)"
|
|
54
|
+
},
|
|
55
|
+
"font-family": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"description": "字体名称(支持任意字体)",
|
|
58
|
+
"note": "可以使用系统中已安装的任何字体,如:微软雅黑、宋体、黑体、楷体、Arial、Times New Roman、Consolas等"
|
|
59
|
+
},
|
|
60
|
+
"font-size": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"pattern": "\\d+pt",
|
|
63
|
+
"description": "字号(单位必须是pt)",
|
|
64
|
+
"note": "支持任意正整数,如:12pt、14pt、16pt、18pt、20pt等"
|
|
65
|
+
},
|
|
66
|
+
"text-align": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"allowed": ["left", "center", "right", "justify"],
|
|
69
|
+
"description": "文本对齐方式",
|
|
70
|
+
"note": "left(左对齐)、center(居中)、right(右对齐)、justify(两端对齐)"
|
|
71
|
+
},
|
|
72
|
+
"line-height": {
|
|
73
|
+
"type": "string",
|
|
74
|
+
"pattern": "\\d+(\\.\\d+)?",
|
|
75
|
+
"description": "行距(数字或小数)",
|
|
76
|
+
"note": "如:1.5、1.8、2.0等"
|
|
77
|
+
},
|
|
78
|
+
"margin-top": {
|
|
79
|
+
"type": "string",
|
|
80
|
+
"pattern": "\\d+pt",
|
|
81
|
+
"description": "上边距(单位必须是pt)",
|
|
82
|
+
"note": "如:10pt、12pt、15pt等"
|
|
83
|
+
},
|
|
84
|
+
"margin-bottom": {
|
|
85
|
+
"type": "string",
|
|
86
|
+
"pattern": "\\d+pt",
|
|
87
|
+
"description": "下边距(单位必须是pt)",
|
|
88
|
+
"note": "如:10pt、12pt、15pt等"
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
"allowedAttributes": {
|
|
93
|
+
"src": {
|
|
94
|
+
"tags": ["img"],
|
|
95
|
+
"required": True,
|
|
96
|
+
"description": "资源路径",
|
|
97
|
+
"note": "图片文件路径,可以是相对路径或绝对路径"
|
|
98
|
+
},
|
|
99
|
+
"alt": {
|
|
100
|
+
"tags": ["img"],
|
|
101
|
+
"description": "替代文本",
|
|
102
|
+
"note": "图片的描述文本,提高可访问性"
|
|
103
|
+
},
|
|
104
|
+
"width": {
|
|
105
|
+
"tags": ["img"],
|
|
106
|
+
"type": "number",
|
|
107
|
+
"description": "宽度(像素)",
|
|
108
|
+
"note": "如:384、512、768等"
|
|
109
|
+
},
|
|
110
|
+
"height": {
|
|
111
|
+
"tags": ["img"],
|
|
112
|
+
"type": "number",
|
|
113
|
+
"description": "高度(像素)",
|
|
114
|
+
"note": "如:288、384、512等"
|
|
115
|
+
},
|
|
116
|
+
"align": {
|
|
117
|
+
"tags": ["img"],
|
|
118
|
+
"allowed": ["left", "center", "right"],
|
|
119
|
+
"description": "对齐方式",
|
|
120
|
+
"note": "图片在段落中的对齐方式"
|
|
121
|
+
},
|
|
122
|
+
"data-indent": {
|
|
123
|
+
"tags": ["p"],
|
|
124
|
+
"type": "string",
|
|
125
|
+
"description": "首行缩进(em单位)",
|
|
126
|
+
"note": "如:0.5em、1em、2em等"
|
|
127
|
+
},
|
|
128
|
+
"data-cols": {
|
|
129
|
+
"tags": ["div"],
|
|
130
|
+
"type": "number",
|
|
131
|
+
"description": "栏数",
|
|
132
|
+
"note": "支持任意正整数,常用:1(单栏)、2(双栏)、3(三栏)、4(四栏)等。多栏结束后必须用 data-cols=\"1\" 恢复单栏"
|
|
133
|
+
},
|
|
134
|
+
"colspan": {
|
|
135
|
+
"tags": ["td", "th"],
|
|
136
|
+
"type": "number",
|
|
137
|
+
"description": "跨列数",
|
|
138
|
+
"note": "单元格跨越的列数"
|
|
139
|
+
},
|
|
140
|
+
"rowspan": {
|
|
141
|
+
"tags": ["td", "th"],
|
|
142
|
+
"type": "number",
|
|
143
|
+
"description": "跨行数",
|
|
144
|
+
"note": "单元格跨越的行数"
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
|
|
148
|
+
"supportedClasses": [
|
|
149
|
+
{
|
|
150
|
+
"name": "center",
|
|
151
|
+
"description": "居中对齐段落",
|
|
152
|
+
"note": "用于<p class=\"center\">"
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
"name": "right",
|
|
156
|
+
"description": "右对齐段落",
|
|
157
|
+
"note": "用于<p class=\"right\">"
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
"name": "left",
|
|
161
|
+
"description": "左对齐段落",
|
|
162
|
+
"note": "用于<p class=\"left\">"
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"name": "info",
|
|
166
|
+
"description": "信息提示框",
|
|
167
|
+
"note": "用于<div class=\"info\">"
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"name": "warning",
|
|
171
|
+
"description": "警告提示框",
|
|
172
|
+
"note": "用于<div class=\"warning\">"
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
"name": "success",
|
|
176
|
+
"description": "成功提示框",
|
|
177
|
+
"note": "用于<div class=\"success\">"
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
"name": "columns",
|
|
181
|
+
"description": "多栏布局",
|
|
182
|
+
"note": "用于<div class=\"columns\" data-cols=\"2\">"
|
|
183
|
+
}
|
|
184
|
+
],
|
|
185
|
+
|
|
186
|
+
"colorFormatRules": {
|
|
187
|
+
"required": "hex",
|
|
188
|
+
"pattern": "#[0-9A-Fa-f]{6}",
|
|
189
|
+
"examples": {
|
|
190
|
+
"correct": ["#FF0000", "#00FF00", "#0000FF", "#FFFFFF", "#000000", "#333333", "#666666", "#999999"],
|
|
191
|
+
"incorrect": ["red", "blue", "green", "yellow", "rgb(255,0,0)", "rgba(255,0,0,0.5)", "hsl(0,100%,50%)"]
|
|
192
|
+
}
|
|
193
|
+
},
|
|
194
|
+
|
|
195
|
+
"selfClosingTags": ["img", "br", "hr"],
|
|
196
|
+
|
|
197
|
+
"nestingRules": {
|
|
198
|
+
"forbidden": [
|
|
199
|
+
{
|
|
200
|
+
"parent": "p",
|
|
201
|
+
"children": ["p", "h1", "h2", "h3", "h4", "h5", "h6", "div", "table", "ul", "ol"],
|
|
202
|
+
"reason": "段落不能包含块级元素(p只能包含行内元素)",
|
|
203
|
+
"example": "<p><div>内容</div></p> ❌",
|
|
204
|
+
"correct": "<div><p>内容</p></div> ✓"
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
"parent": "strong",
|
|
208
|
+
"children": ["p", "div", "table", "ul", "ol"],
|
|
209
|
+
"reason": "行内元素不能包含块级元素",
|
|
210
|
+
"example": "<strong><p>段落</p></strong> ❌",
|
|
211
|
+
"correct": "<p><strong>段落</strong></p> ✓"
|
|
212
|
+
},
|
|
213
|
+
{
|
|
214
|
+
"parent": "em",
|
|
215
|
+
"children": ["p", "div", "table", "ul", "ol"],
|
|
216
|
+
"reason": "行内元素不能包含块级元素"
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
"parent": "table",
|
|
220
|
+
"children": ["p", "div", "h1", "h2", "h3", "ul", "ol"],
|
|
221
|
+
"reason": "表格只能包含行元素(tr)",
|
|
222
|
+
"example": "<table><p>内容</p></table> ❌",
|
|
223
|
+
"correct": "<table><tr><td>内容</td></tr></table> ✓"
|
|
224
|
+
}
|
|
225
|
+
],
|
|
226
|
+
"recommended": [
|
|
227
|
+
{
|
|
228
|
+
"parent": "p",
|
|
229
|
+
"children": ["strong", "em", "u", "s", "sup", "sub", "code", "span"],
|
|
230
|
+
"reason": "段落可以包含行内格式化元素",
|
|
231
|
+
"example": "<p><strong>加粗</strong><em>斜体</em></p> ✓"
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
"parent": "div",
|
|
235
|
+
"children": ["h1", "h2", "h3", "h4", "h5", "h6", "p", "table", "ul", "ol"],
|
|
236
|
+
"reason": "div可以包含块级元素",
|
|
237
|
+
"example": "<div><h2>标题</h2><p>段落</p></div> ✓"
|
|
238
|
+
}
|
|
239
|
+
]
|
|
240
|
+
},
|
|
241
|
+
|
|
242
|
+
"commonErrors": [
|
|
243
|
+
{
|
|
244
|
+
"error": "颜色格式错误",
|
|
245
|
+
"example": "<span style=\"color: red;\">红色</span>",
|
|
246
|
+
"correct": "<span style=\"color: #FF0000;\">红色</span>",
|
|
247
|
+
"note": "必须使用6位十六进制格式 #RRGGBB"
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
"error": "未自闭合标签",
|
|
251
|
+
"example": "<img src=\"image.jpg\">",
|
|
252
|
+
"correct": "<img src=\"image.jpg\" alt=\"描述\" />",
|
|
253
|
+
"note": "空标签必须自闭合,包含斜杠"
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
"error": "不支持的样式",
|
|
257
|
+
"example": "<p style=\"float: right;\">浮动</p>",
|
|
258
|
+
"correct": "<p class=\"right\">右对齐</p>",
|
|
259
|
+
"note": "不支持float、text-shadow、display、position等CSS属性"
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
"error": "错误的嵌套",
|
|
263
|
+
"example": "<strong><p>段落</p></strong>",
|
|
264
|
+
"correct": "<p><strong>段落</strong></p>",
|
|
265
|
+
"note": "块级元素不能嵌套在行内元素内"
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
"error": "忘记恢复单栏",
|
|
269
|
+
"example": "<div class=\"columns\" data-cols=\"2\">双栏</div>",
|
|
270
|
+
"correct": "<div class=\"columns\" data-cols=\"2\">双栏</div><div class=\"columns\" data-cols=\"1\"></div>",
|
|
271
|
+
"note": "多栏布局后必须使用 data-cols=\"1\" 恢复单栏"
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
"error": "RGB颜色格式",
|
|
275
|
+
"example": "<span style=\"color: rgb(255,0,0);\">红色</span>",
|
|
276
|
+
"correct": "<span style=\"color: #FF0000;\">红色</span>",
|
|
277
|
+
"note": "不支持rgb()和rgba()格式,必须使用十六进制"
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
"error": "使用style标签",
|
|
281
|
+
"example": "<style>h1 { font-size: 18pt; }</style>",
|
|
282
|
+
"correct": "<h1 style=\"font-size: 18pt;\">标题</h1>",
|
|
283
|
+
"note": "不支持<style>标签中的CSS规则,只支持内联style属性"
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
"error": "不支持的类名",
|
|
287
|
+
"example": "<span class=\"abstract-title\">摘要</span>",
|
|
288
|
+
"correct": "<span style=\"font-family: 黑体; font-size: 12pt; font-weight: bold;\">摘要</span>",
|
|
289
|
+
"note": "只支持:center, right, left, info, warning, success, columns"
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
"error": "字号格式错误",
|
|
293
|
+
"example": "<span style=\"font-size: 14px;\">14号字</span>",
|
|
294
|
+
"correct": "<span style=\"font-size: 14pt;\">14号字</span>",
|
|
295
|
+
"note": "字号单位必须是pt(磅),不支持px、em、rem等单位"
|
|
296
|
+
},
|
|
297
|
+
{
|
|
298
|
+
"error": "边距格式错误",
|
|
299
|
+
"example": "<p style=\"margin-bottom: 10px;\">段落</p>",
|
|
300
|
+
"correct": "<p style=\"margin-bottom: 10pt;\">段落</p>",
|
|
301
|
+
"note": "边距单位必须是pt(磅),不支持px、em、rem等单位"
|
|
302
|
+
}
|
|
303
|
+
],
|
|
304
|
+
|
|
305
|
+
"bestPractices": [
|
|
306
|
+
"颜色必须使用十六进制格式 #RRGGBB(如 #FF0000)",
|
|
307
|
+
"空标签必须自闭合(如 <img /> <br /> <hr />)",
|
|
308
|
+
"字体可以使用任意系统已安装的字体(如 微软雅黑、宋体、Arial、Times New Roman)",
|
|
309
|
+
"字号单位必须是pt(如 12pt、14pt、16pt、18pt)",
|
|
310
|
+
"栏数支持任意正整数(1=单栏、2=双栏、3=三栏、4=四栏等)",
|
|
311
|
+
"多栏布局后必须恢复单栏(data-cols=\"1\")",
|
|
312
|
+
"图片必须包含 alt 属性以提高可访问性",
|
|
313
|
+
"优先使用语义化标签(h1-h6)而非仅用 font-size",
|
|
314
|
+
"表格单元格使用 th(表头)和 td(数据)区分",
|
|
315
|
+
"行内元素(strong、em、u、s、sup、sub、code、span)可以嵌套在块级元素(p、div)内",
|
|
316
|
+
"块级元素(p、div、table、ul、ol)不能嵌套在行内元素内",
|
|
317
|
+
"行距可以是数字或小数(如 1.5、1.8、2.0)",
|
|
318
|
+
"不要使用<style>标签中的CSS,只使用内联style属性(如 style=\"font-size: 14pt;\")",
|
|
319
|
+
"只支持特定类名:center, right, left, info, warning, success, columns,其他样式请用内联style"
|
|
320
|
+
],
|
|
321
|
+
|
|
322
|
+
"validationWorkflow": [
|
|
323
|
+
"1. 检查HTML基本结构(DOCTYPE, html, head, body)",
|
|
324
|
+
"2. 验证所有标签是否在允许列表中",
|
|
325
|
+
"3. 检查所有属性是否被支持",
|
|
326
|
+
"4. 验证类名是否在支持列表中(只支持:center, right, left, info, warning, success, columns)",
|
|
327
|
+
"5. 验证所有样式是否符合规范(特别是颜色格式)",
|
|
328
|
+
"6. 检查颜色格式是否为十六进制 #RRGGBB",
|
|
329
|
+
"7. 检查字号格式是否为pt单位",
|
|
330
|
+
"8. 检查行距格式是否为数字或小数",
|
|
331
|
+
"9. 检查边距格式是否为pt单位",
|
|
332
|
+
"10. 验证标签嵌套是否正确",
|
|
333
|
+
"11. 确认自闭合标签格式正确",
|
|
334
|
+
"12. 检查多栏布局是否恢复单栏",
|
|
335
|
+
"13. 检查是否使用了<style>标签(警告)"
|
|
336
|
+
]
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def get_schema():
|
|
341
|
+
"""获取HTML格式约束schema"""
|
|
342
|
+
return HTML_SCHEMA
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def get_schema_json():
|
|
346
|
+
"""获取JSON格式的schema"""
|
|
347
|
+
import json
|
|
348
|
+
return json.dumps(HTML_SCHEMA, ensure_ascii=False, indent=2)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
if __name__ == "__main__":
|
|
352
|
+
print(get_schema_json())
|
package/python/server.py
CHANGED
|
@@ -16,6 +16,7 @@ from mcp.types import TextContent, Tool
|
|
|
16
16
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
17
17
|
from docx_converter import convert_html_to_docx as docx_convert
|
|
18
18
|
from html_validator import validator, template_generator
|
|
19
|
+
from html_validator_strict import StrictHTMLValidator, ValidationError, strict_validator
|
|
19
20
|
from html_fixer import HTMLFixer
|
|
20
21
|
|
|
21
22
|
# Create MCP server
|
|
@@ -38,7 +39,7 @@ async def list_tools() -> list[Tool]:
|
|
|
38
39
|
},
|
|
39
40
|
"output_path": {
|
|
40
41
|
"type": "string",
|
|
41
|
-
"description": "输出DOCX
|
|
42
|
+
"description": "输出DOCX文件的路径",
|
|
42
43
|
},
|
|
43
44
|
},
|
|
44
45
|
"required": ["html_path", "output_path"],
|
|
@@ -47,30 +48,21 @@ async def list_tools() -> list[Tool]:
|
|
|
47
48
|
Tool(
|
|
48
49
|
name="generate_document",
|
|
49
50
|
description="""
|
|
50
|
-
⚠️
|
|
51
|
+
⚠️ 生成Word文档 - 步骤6(必须先完成步骤1-5)
|
|
51
52
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
📋 必须按顺序执行的完整流程:
|
|
54
|
+
1. get_html_schema() - 获取结构化格式约束
|
|
55
|
+
2. get_html_constraints() - 获取HTML约束示例
|
|
56
|
+
3. 严格根据约束生成HTML内容
|
|
57
|
+
4. validate_html() - 验证HTML格式
|
|
58
|
+
5. 修复错误(重复步骤4直到无错误)
|
|
59
|
+
6. generate_document() - 生成文档(若有错误回到步骤5)
|
|
55
60
|
|
|
56
|
-
|
|
57
|
-
在生成文档前,必须调用 validate_html() 验证格式
|
|
58
|
-
示例:validate_html({ html_content: "你的HTML内容" })
|
|
61
|
+
⚠️ 重要:必须先完成步骤1-5,且validate_html()验证通过后才能调用此工具
|
|
59
62
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
⚠️ 格式约束(必须遵守):
|
|
64
|
-
1. 颜色必须用十六进制格式 #RRGGBB(禁止用 red, blue, rgb() 等)
|
|
65
|
-
2. 空标签必须自闭合:<img /> <br /> <hr />
|
|
66
|
-
3. 只支持标签:h1-h6, p, strong, em, u, s, sup, sub, img, table, ul, ol, div
|
|
67
|
-
4. 禁止属性:onclick, onload, loading, float, text-shadow 等
|
|
68
|
-
5. 嵌套规则:p不能包含div,strong不能包含p等
|
|
69
|
-
6. 颜色示例:
|
|
70
|
-
✅ 正确:<span style="color: #FF0000;">红色</span>
|
|
71
|
-
❌ 错误:<span style="color: red;">红色</span>
|
|
72
|
-
|
|
73
|
-
系统会自动修复常见错误,但仍建议生成时遵守规范。
|
|
63
|
+
🔧 功能:
|
|
64
|
+
- 如果验证失败,返回详细错误报告
|
|
65
|
+
- 根据报错修复HTML后重新调用
|
|
74
66
|
""",
|
|
75
67
|
inputSchema={
|
|
76
68
|
"type": "object",
|
|
@@ -90,41 +82,39 @@ async def list_tools() -> list[Tool]:
|
|
|
90
82
|
},
|
|
91
83
|
),
|
|
92
84
|
Tool(
|
|
93
|
-
name="
|
|
85
|
+
name="get_html_constraints",
|
|
94
86
|
description="""
|
|
95
|
-
🔑 获取HTML
|
|
87
|
+
🔑 获取HTML格式约束示例 - 步骤2(必须在步骤3之前执行)
|
|
96
88
|
|
|
97
|
-
📋
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
89
|
+
📋 说明:
|
|
90
|
+
- 这是步骤2,必须在步骤3之前执行
|
|
91
|
+
- 返回预定义的HTML约束示例,包含所有支持的格式
|
|
92
|
+
- 示例包含:字体、颜色、大小、段距、嵌套结构、表格、图片、公式等
|
|
93
|
+
- 参考示例格式生成自己的HTML内容
|
|
101
94
|
|
|
102
95
|
⚠️ 重要:
|
|
103
|
-
-
|
|
104
|
-
-
|
|
105
|
-
-
|
|
96
|
+
- 必须在步骤1之后执行
|
|
97
|
+
- 参考此示例确保格式正确
|
|
98
|
+
- 不要直接复制示例内容,而是学习格式用法
|
|
106
99
|
""",
|
|
107
100
|
inputSchema={
|
|
108
101
|
"type": "object",
|
|
109
|
-
"properties": {
|
|
110
|
-
"title": {
|
|
111
|
-
"type": "string",
|
|
112
|
-
"description": "文档标题",
|
|
113
|
-
"default": "文档",
|
|
114
|
-
},
|
|
115
|
-
"content": {"type": "string", "description": "初始内容(可选)"},
|
|
116
|
-
},
|
|
102
|
+
"properties": {},
|
|
117
103
|
},
|
|
118
104
|
),
|
|
119
105
|
Tool(
|
|
120
106
|
name="validate_html",
|
|
121
107
|
description="""
|
|
122
|
-
✅ 验证HTML格式 -
|
|
108
|
+
✅ 验证HTML格式 - 步骤4(必须在步骤6之前执行)
|
|
123
109
|
|
|
124
110
|
📋 用法:
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
111
|
+
- 验证HTML格式是否正确
|
|
112
|
+
- 查看详细的错误和警告信息
|
|
113
|
+
- 获取格式建议
|
|
114
|
+
|
|
115
|
+
⚠️ 重要:这是步骤4,必须在步骤6之前执行
|
|
116
|
+
- 验证通过后才能进入步骤6
|
|
117
|
+
- 验证失败必须修复并重新验证,直到无错误
|
|
128
118
|
|
|
129
119
|
🔍 检查项:
|
|
130
120
|
• HTML基本结构
|
|
@@ -132,12 +122,6 @@ async def list_tools() -> list[Tool]:
|
|
|
132
122
|
• 颜色格式(必须是 #RRGGBB)
|
|
133
123
|
• 标签闭合和嵌套
|
|
134
124
|
• 自闭合标签格式
|
|
135
|
-
|
|
136
|
-
⚠️ 如果验证失败:
|
|
137
|
-
- 查看错误报告
|
|
138
|
-
- 修复所有错误
|
|
139
|
-
- 重新验证
|
|
140
|
-
- 验证通过后再生成文档
|
|
141
125
|
""",
|
|
142
126
|
inputSchema={
|
|
143
127
|
"type": "object",
|
|
@@ -147,11 +131,6 @@ async def list_tools() -> list[Tool]:
|
|
|
147
131
|
"required": ["html_content"],
|
|
148
132
|
},
|
|
149
133
|
),
|
|
150
|
-
Tool(
|
|
151
|
-
name="get_html_spec",
|
|
152
|
-
description="获取HTML格式规范文档",
|
|
153
|
-
inputSchema={"type": "object", "properties": {}},
|
|
154
|
-
),
|
|
155
134
|
Tool(
|
|
156
135
|
name="get_element_examples",
|
|
157
136
|
description="获取各种HTML元素的示例代码",
|
|
@@ -166,6 +145,23 @@ async def list_tools() -> list[Tool]:
|
|
|
166
145
|
},
|
|
167
146
|
},
|
|
168
147
|
),
|
|
148
|
+
Tool(
|
|
149
|
+
name="get_html_schema",
|
|
150
|
+
description="""
|
|
151
|
+
🔑 获取HTML格式约束Schema - 步骤1(必须首先执行)
|
|
152
|
+
|
|
153
|
+
📋 说明:
|
|
154
|
+
- 这是步骤1,必须首先执行
|
|
155
|
+
- 获取完整的JSON格式约束
|
|
156
|
+
- 了解所有支持的标签、属性、样式
|
|
157
|
+
- 查看颜色格式、嵌套规则等约束
|
|
158
|
+
|
|
159
|
+
⚠️ 重要:
|
|
160
|
+
- 必须在生成HTML之前调用
|
|
161
|
+
- 完整了解所有约束后再进入步骤2
|
|
162
|
+
""",
|
|
163
|
+
inputSchema={"type": "object", "properties": {}},
|
|
164
|
+
),
|
|
169
165
|
]
|
|
170
166
|
|
|
171
167
|
|
|
@@ -206,14 +202,47 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
|
|
206
202
|
output_dir = Path(output_dir)
|
|
207
203
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
208
204
|
|
|
209
|
-
# 生成HTML文件
|
|
210
|
-
html_path = output_dir / "document.html"
|
|
211
|
-
docx_path = output_dir / "document.docx"
|
|
212
|
-
|
|
213
205
|
# 🔧 自动修复HTML内容
|
|
214
206
|
fixer = HTMLFixer()
|
|
215
207
|
fixed_content = fixer.fix(content)
|
|
216
208
|
|
|
209
|
+
# 🔒 严格验证HTML内容
|
|
210
|
+
validation_result = strict_validator.validate(fixed_content, strict_mode=False)
|
|
211
|
+
|
|
212
|
+
# 检查是否有错误
|
|
213
|
+
if not validation_result.is_valid:
|
|
214
|
+
# 抛出验证错误异常,返回结构化JSON
|
|
215
|
+
error_result = validation_result.to_json()
|
|
216
|
+
error_message = f"❌ HTML验证失败!\n\n"
|
|
217
|
+
error_message += f"发现 {len(validation_result.errors)} 个错误和 {len(validation_result.warnings)} 个警告\n\n"
|
|
218
|
+
|
|
219
|
+
# 列出所有错误
|
|
220
|
+
if validation_result.errors:
|
|
221
|
+
error_message += "【错误详情】\n"
|
|
222
|
+
for i, error in enumerate(validation_result.errors, 1):
|
|
223
|
+
error_message += f"{i}. [{error['code']}] {error['message']}\n"
|
|
224
|
+
if error.get("details"):
|
|
225
|
+
error_message += f" 详情: {error['details']}\n"
|
|
226
|
+
error_message += "\n"
|
|
227
|
+
|
|
228
|
+
# 列出所有警告
|
|
229
|
+
if validation_result.warnings:
|
|
230
|
+
error_message += "【警告详情】\n"
|
|
231
|
+
for i, warning in enumerate(validation_result.warnings, 1):
|
|
232
|
+
error_message += f"{i}. [{warning['code']}] {warning['message']}\n"
|
|
233
|
+
if warning.get("details"):
|
|
234
|
+
error_message += f" 详情: {warning['details']}\n"
|
|
235
|
+
error_message += "\n"
|
|
236
|
+
|
|
237
|
+
error_message += "请修复以上错误后重新生成文档。\n"
|
|
238
|
+
error_message += "💡 提示:调用 get_html_schema() 查看完整的格式约束规则。"
|
|
239
|
+
|
|
240
|
+
return [TextContent(type="text", text=error_message)]
|
|
241
|
+
|
|
242
|
+
# 生成HTML文件
|
|
243
|
+
html_path = output_dir / "document.html"
|
|
244
|
+
docx_path = output_dir / "document.docx"
|
|
245
|
+
|
|
217
246
|
# 构建完整HTML
|
|
218
247
|
html_template = f"""<!DOCTYPE html>
|
|
219
248
|
<html lang="zh-CN">
|
|
@@ -331,29 +360,31 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
|
|
331
360
|
except Exception as e:
|
|
332
361
|
return [TextContent(type="text", text=f"❌ 转换失败: {str(e)}")]
|
|
333
362
|
|
|
334
|
-
elif name == "
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
template = template_generator.generate_template(title, content)
|
|
338
|
-
return [TextContent(type="text", text=template)]
|
|
363
|
+
elif name == "get_html_constraints":
|
|
364
|
+
constraint_example = template_generator.get_constraint_example()
|
|
365
|
+
return [TextContent(type="text", text=constraint_example)]
|
|
339
366
|
|
|
340
367
|
elif name == "validate_html":
|
|
341
368
|
html_content = arguments.get("html_content", "")
|
|
342
369
|
if not html_content:
|
|
343
370
|
return [TextContent(type="text", text="错误:必须提供html_content参数")]
|
|
344
371
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
372
|
+
# 使用严格验证器
|
|
373
|
+
result = strict_validator.validate(html_content, strict_mode=False)
|
|
374
|
+
json_result = result.to_json()
|
|
348
375
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
376
|
+
# 生成可读报告
|
|
377
|
+
report = strict_validator.get_validation_report(result)
|
|
378
|
+
|
|
379
|
+
# 返回JSON格式结果
|
|
380
|
+
import json
|
|
381
|
+
|
|
382
|
+
return [
|
|
383
|
+
TextContent(
|
|
384
|
+
type="text",
|
|
385
|
+
text=f"{report}\n\n【JSON格式结果】\n```json\n{json.dumps(json_result, ensure_ascii=False, indent=2)}\n```",
|
|
386
|
+
)
|
|
387
|
+
]
|
|
357
388
|
|
|
358
389
|
elif name == "get_element_examples":
|
|
359
390
|
category = arguments.get("category", "all")
|
|
@@ -374,8 +405,14 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
|
|
374
405
|
|
|
375
406
|
return [TextContent(type="text", text=result)]
|
|
376
407
|
|
|
377
|
-
|
|
408
|
+
elif name == "get_html_schema":
|
|
409
|
+
import json
|
|
378
410
|
|
|
411
|
+
schema = template_generator.get_schema()
|
|
412
|
+
schema_json = json.dumps(schema, ensure_ascii=False, indent=2)
|
|
413
|
+
return [TextContent(type="text", text=schema_json)]
|
|
414
|
+
|
|
415
|
+
else:
|
|
379
416
|
return [TextContent(type="text", text=f"未知工具: {name}")]
|
|
380
417
|
|
|
381
418
|
|