@birthday8/doc-mcp 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,7 +12,6 @@ Doc Creator MCP Server - Generate Word documents from HTML with rich formatting
12
12
  - ✅ Tables with styles
13
13
  - ✅ Info/Warning/Success boxes
14
14
  - ✅ Code blocks
15
- - ✅ Blockquotes
16
15
  - ✅ Multi-column layout
17
16
  - ✅ Page breaks
18
17
 
@@ -66,6 +65,18 @@ Generate a Word document from HTML content.
66
65
  ### 3. get_html_constraints
67
66
  Get HTML format constraints example with all supported formats.
68
67
 
68
+ ### 4. get_html_schema
69
+ Get structured HTML format constraints as JSON.
70
+
71
+ ### 5. validate_html
72
+ Validate HTML content against format constraints.
73
+
74
+ ```json
75
+ {
76
+ "html_content": "<h1>Title</h1><p>Content...</p>"
77
+ }
78
+ ```
79
+
69
80
  ## HTML Conventions
70
81
 
71
82
  ### Basic Structure
@@ -75,12 +86,9 @@ Get HTML format constraints example with all supported formats.
75
86
  <head>
76
87
  <meta charset="UTF-8">
77
88
  <title>Document Title</title>
78
- <style>
79
- /* CSS styles */
80
- </style>
81
89
  </head>
82
90
  <body>
83
- <h1>Title</h1>
91
+ <h1 style="font-family: 黑体; font-size: 18pt; text-align: center;">Title</h1>
84
92
  <p>Content with <strong>bold</strong> and <em>italic</em>.</p>
85
93
  </body>
86
94
  </html>
@@ -92,8 +100,8 @@ Get HTML format constraints example with all supported formats.
92
100
  <em>Italic</em>
93
101
  <u>Underline</u>
94
102
  <s>Strikethrough</s>
95
- <span class="red">Red text</span>
96
- <span class="highlight">Highlighted</span>
103
+ <span style="color: #FF0000;">Red text</span>
104
+ <span style="background-color: #FFFF00;">Highlighted</span>
97
105
  ```
98
106
 
99
107
  ### Paragraph Indentation
@@ -110,17 +118,13 @@ Get HTML format constraints example with all supported formats.
110
118
 
111
119
  ### Tables
112
120
  ```html
113
- <table style="width: 100%; border-collapse: collapse;">
114
- <thead>
115
- <tr>
116
- <th style="border: 1px solid #ddd; padding: 12px;">Header</th>
117
- </tr>
118
- </thead>
119
- <tbody>
120
- <tr>
121
- <td style="border: 1px solid #ddd; padding: 12px;">Cell</td>
122
- </tr>
123
- </tbody>
121
+ <table>
122
+ <tr>
123
+ <th style="background-color: #E3F2FD;">Header</th>
124
+ </tr>
125
+ <tr>
126
+ <td>Cell</td>
127
+ </tr>
124
128
  </table>
125
129
  ```
126
130
 
@@ -130,8 +134,44 @@ Get HTML format constraints example with all supported formats.
130
134
  <p>Column 1 content...</p>
131
135
  <p>Column 2 content...</p>
132
136
  </div>
137
+ <!-- 必须恢复单栏 -->
138
+ <div class="columns" data-cols="1"></div>
139
+ ```
140
+
141
+ ## Important Notes
142
+
143
+ ### Style Usage
144
+ - **Inline styles only**: Use `style="..."` attributes on elements
145
+ - **No `<style>` tags**: CSS in `<style>` tags is not supported
146
+ - **Supported styles**: color, background-color, font-family, font-size, text-align, line-height, margin-top, margin-bottom
147
+ - **Color format**: Must use hex format `#RRGGBB` (e.g., `#FF0000`), NOT `red`, `rgb(255,0,0)`, etc.
148
+ - **Font size format**: Must use `pt` units (e.g., `14pt`), NOT `px`, `em`, `rem`
149
+ - **Margin format**: Must use `pt` units (e.g., `10pt`), NOT `px`, `em`, `rem`
150
+ - **Line height format**: Must be numeric (e.g., `1.5`, `1.8`, `2.0`)
151
+
152
+ ### Class Names
153
+ **Only 7 class names are supported**:
154
+ - `center` - Center aligned paragraph
155
+ - `right` - Right aligned paragraph
156
+ - `left` - Left aligned paragraph
157
+ - `info` - Info message box
158
+ - `warning` - Warning message box
159
+ - `success` - Success message box
160
+ - `columns` - Multi-column layout
161
+
162
+ All other styles must use inline `style` attributes.
163
+
164
+ ### Example with inline styles
165
+ ```html
166
+ <h1 style="font-family: 黑体; font-size: 18pt; text-align: center;">Title</h1>
167
+ <p style="color: #FF0000;">Red text</p>
168
+ <span style="background-color: #FFFF00;">Highlighted</span>
169
+
170
+ <!-- 不支持的类名示例 -->
171
+ <!-- ❌ <span class="abstract-title">摘要</span> -->
172
+ <!-- ✅ <span style="font-family: 黑体; font-size: 12pt; font-weight: bold;">摘要</span> -->
133
173
  ```
134
174
 
135
175
  ## License
136
176
 
137
- MIT
177
+ MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@birthday8/doc-mcp",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "Doc Creator MCP Server - Generate Word documents from HTML",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -339,6 +339,63 @@ class RuleEngine:
339
339
  description="元数据内容",
340
340
  )
341
341
 
342
+ # 通用属性
343
+ self.attr_rules["class"] = AttributeRule(
344
+ attr_name="class",
345
+ allowed=True,
346
+ applicable_tags={
347
+ "p",
348
+ "div",
349
+ "span",
350
+ "table",
351
+ "td",
352
+ "th",
353
+ "ul",
354
+ "ol",
355
+ "li",
356
+ "h1",
357
+ "h2",
358
+ "h3",
359
+ "h4",
360
+ "h5",
361
+ "h6",
362
+ },
363
+ description="CSS类名",
364
+ )
365
+
366
+ self.attr_rules["style"] = AttributeRule(
367
+ attr_name="style",
368
+ allowed=True,
369
+ applicable_tags={
370
+ "h1",
371
+ "h2",
372
+ "h3",
373
+ "h4",
374
+ "h5",
375
+ "h6",
376
+ "p",
377
+ "div",
378
+ "span",
379
+ "strong",
380
+ "em",
381
+ "u",
382
+ "s",
383
+ "sup",
384
+ "sub",
385
+ "code",
386
+ "table",
387
+ "tr",
388
+ "td",
389
+ "th",
390
+ "ul",
391
+ "ol",
392
+ "li",
393
+ "blockquote",
394
+ "pre",
395
+ },
396
+ description="内联样式",
397
+ )
398
+
342
399
  def _init_style_rules(self):
343
400
  """初始化样式规则"""
344
401
 
@@ -497,6 +554,31 @@ class RuleEngine:
497
554
  return False
498
555
  return rule.is_applicable_to(tag_name)
499
556
 
557
+ def is_nesting_allowed(self, parent_tag: str, child_tag: str) -> bool:
558
+ """检查嵌套是否允许"""
559
+ parent_rule = self.get_tag_rule(parent_tag)
560
+ child_rule = self.get_tag_rule(child_tag)
561
+
562
+ if not parent_rule or not child_rule:
563
+ return False
564
+
565
+ # 检查父标签是否允许包含子标签
566
+ if not parent_rule.can_contain(child_tag):
567
+ return False
568
+
569
+ # 检查子标签是否可以是父标签的子元素
570
+ if not child_rule.can_be_child_of(parent_tag):
571
+ return False
572
+
573
+ return True
574
+
575
+ def is_self_closing_tag(self, tag_name: str) -> bool:
576
+ """检查标签是否是自闭合标签"""
577
+ rule = self.get_tag_rule(tag_name)
578
+ if not rule:
579
+ return False
580
+ return rule.must_be_self_closing
581
+
500
582
  def get_disallowed_tags(self) -> List[Dict[str, str]]:
501
583
  """获取所有不允许的标签(用于文档说明)"""
502
584
  return [
@@ -11,6 +11,17 @@ from html_rules import RuleEngine, ValidationResult
11
11
  class StrictHTMLValidator:
12
12
  """严格HTML验证器"""
13
13
 
14
+ # 支持的类名列表
15
+ SUPPORTED_CLASSES = {
16
+ "center",
17
+ "right",
18
+ "left",
19
+ "info",
20
+ "warning",
21
+ "success",
22
+ "columns",
23
+ }
24
+
14
25
  def __init__(self, rule_engine: RuleEngine = None):
15
26
  self.rule_engine = rule_engine or RuleEngine()
16
27
 
@@ -39,6 +50,9 @@ class StrictHTMLValidator:
39
50
  # 检查属性
40
51
  self._validate_attributes(soup, result, strict_mode)
41
52
 
53
+ # 检查类名
54
+ self._validate_classes(soup, result, strict_mode)
55
+
42
56
  # 检查样式
43
57
  self._validate_styles(soup, result, strict_mode)
44
58
 
@@ -48,14 +62,14 @@ class StrictHTMLValidator:
48
62
  # 检查自闭合标签
49
63
  self._validate_self_closing(soup, result, strict_mode)
50
64
 
51
- # 收集统计信息
52
- self._collect_stats(soup, result)
65
+ # 检查style标签
66
+ self._validate_style_tags(soup, result, strict_mode)
53
67
 
54
68
  except Exception as e:
55
69
  result.add_error(
56
- code="PARSE_ERROR",
70
+ code="PARSING_ERROR",
57
71
  message=f"HTML解析错误: {str(e)}",
58
- details={"error": str(e)},
72
+ details={"exception": str(e)},
59
73
  )
60
74
 
61
75
  return result
@@ -154,85 +168,142 @@ class StrictHTMLValidator:
154
168
 
155
169
  tag_name = element.name
156
170
 
157
- for attr_name, attr_value in element.attrs.items():
158
- # 跳过特殊属性
159
- if attr_name in ["class", "style"]:
160
- continue
171
+ # 检查属性是否允许
172
+ tag_rule = self.rule_engine.get_tag_rule(tag_name)
173
+ if not tag_rule:
174
+ continue
161
175
 
162
- # 检查属性是否允许
176
+ for attr_name, attr_value in element.attrs.items():
163
177
  if not self.rule_engine.is_attribute_allowed(attr_name, tag_name):
164
178
  result.add_error(
165
179
  code="DISALLOWED_ATTR",
166
180
  message=f"标签 <{tag_name}> 的属性 '{attr_name}' 不在允许列表中",
167
181
  details={
168
182
  "tag": tag_name,
169
- "attr": attr_name,
183
+ "attribute": attr_name,
170
184
  "value": str(attr_value)[:100],
171
- "allowed_attrs_for_tag": self._get_allowed_attrs_for_tag(
172
- tag_name
173
- ),
174
185
  },
175
186
  )
176
187
  if strict_mode:
177
188
  continue
178
189
 
179
- # 验证属性值
180
- attr_rule = self.rule_engine.get_attribute_rule(attr_name)
181
- if attr_rule and attr_rule.validator and attr_value:
182
- if not attr_rule.validator(str(attr_value)):
183
- result.add_error(
184
- code="INVALID_ATTR_VALUE",
185
- message=f"标签 <{tag_name}> 的属性 '{attr_name}' 值 '{attr_value}' 无效",
186
- details={
187
- "tag": tag_name,
188
- "attr": attr_name,
189
- "value": str(attr_value),
190
- },
191
- )
190
+ def _validate_classes(
191
+ self, soup: BeautifulSoup, result: ValidationResult, strict_mode: bool
192
+ ):
193
+ """验证类名是否支持"""
194
+ for element in soup.find_all(True):
195
+ if not isinstance(element, Tag):
196
+ continue
197
+
198
+ class_attr = element.get("class")
199
+ if not class_attr:
200
+ continue
201
+
202
+ # 处理类名列表
203
+ if isinstance(class_attr, list):
204
+ classes = class_attr
205
+ else:
206
+ classes = class_attr.split()
207
+
208
+ unsupported_classes = []
209
+ for cls in classes:
210
+ if cls not in self.SUPPORTED_CLASSES:
211
+ unsupported_classes.append(cls)
212
+
213
+ if unsupported_classes:
214
+ result.add_error(
215
+ code="UNSUPPORTED_CLASS",
216
+ message=f"类名 '{', '.join(unsupported_classes)}' 不在支持列表中,请使用内联 style 属性代替",
217
+ details={
218
+ "unsupported_classes": unsupported_classes,
219
+ "supported_classes": list(self.SUPPORTED_CLASSES),
220
+ "element": element.name,
221
+ "note": "只支持:center, right, left, info, warning, success, columns",
222
+ },
223
+ )
224
+ if strict_mode:
225
+ continue
192
226
 
193
227
  def _validate_styles(
194
228
  self, soup: BeautifulSoup, result: ValidationResult, strict_mode: bool
195
229
  ):
196
230
  """验证样式"""
197
- for element in soup.find_all(style=True):
231
+ for element in soup.find_all(True):
198
232
  if not isinstance(element, Tag):
199
233
  continue
200
234
 
201
- tag_name = element.name
202
- style_str = element.get("style", "")
203
- styles = self._parse_style(style_str)
235
+ style_attr = element.get("style")
236
+ if not style_attr:
237
+ continue
204
238
 
205
- for style_name, style_value in styles.items():
206
- # 检查样式是否允许
207
- if not self.rule_engine.is_style_allowed(style_name, tag_name):
239
+ # 解析样式
240
+ styles = self._parse_style_string(style_attr)
241
+
242
+ for prop_name, prop_value in styles.items():
243
+ if not self.rule_engine.is_style_allowed(prop_name, element.name):
208
244
  result.add_error(
209
245
  code="DISALLOWED_STYLE",
210
- message=f"标签 <{tag_name}> 的样式 '{style_name}' 不在允许列表中",
246
+ message=f"样式属性 '{prop_name}' 不在允许列表中",
211
247
  details={
212
- "tag": tag_name,
213
- "style": style_name,
214
- "value": style_value,
215
- "allowed_styles_for_tag": self._get_allowed_styles_for_tag(
216
- tag_name
217
- ),
248
+ "style": prop_name,
249
+ "value": prop_value,
250
+ "element": element.name,
218
251
  },
219
252
  )
220
253
  if strict_mode:
221
254
  continue
222
255
 
223
- # 验证样式值
224
- style_rule = self.rule_engine.get_style_rule(style_name)
225
- if style_rule and style_value:
226
- is_valid, error_msg = style_rule.validate_value(style_value)
227
- if not is_valid:
256
+ # 验证颜色格式
257
+ if prop_name in ["color", "background-color"]:
258
+ if not self.rule_engine.is_valid_color(prop_value):
228
259
  result.add_error(
229
- code="INVALID_STYLE_VALUE",
230
- message=f"标签 <{tag_name}> 的样式 '{style_name}' 值 '{style_value}' 无效: {error_msg}",
260
+ code="INVALID_COLOR_FORMAT",
261
+ message="颜色格式错误,必须使用十六进制格式 #RRGGBB",
231
262
  details={
232
- "tag": tag_name,
233
- "style": style_name,
234
- "value": style_value,
235
- "error": error_msg,
263
+ "style": prop_name,
264
+ "value": prop_value,
265
+ "correct_format": "#RRGGBB",
266
+ },
267
+ )
268
+
269
+ # 验证字号格式
270
+ if prop_name == "font-size":
271
+ if not prop_value.endswith("pt"):
272
+ result.add_error(
273
+ code="INVALID_FONT_SIZE_FORMAT",
274
+ message="字号格式错误,必须使用 pt 单位",
275
+ details={
276
+ "style": prop_name,
277
+ "value": prop_value,
278
+ "correct_format": "14pt, 16pt, 18pt",
279
+ },
280
+ )
281
+
282
+ # 验证行距格式
283
+ if prop_name == "line-height":
284
+ try:
285
+ float(prop_value)
286
+ except ValueError:
287
+ result.add_error(
288
+ code="INVALID_LINE_HEIGHT_FORMAT",
289
+ message="行距格式错误,必须是数字或小数",
290
+ details={
291
+ "style": prop_name,
292
+ "value": prop_value,
293
+ "correct_format": "1.5, 1.8, 2.0",
294
+ },
295
+ )
296
+
297
+ # 验证边距格式
298
+ if prop_name in ["margin-top", "margin-bottom"]:
299
+ if not prop_value.endswith("pt"):
300
+ result.add_error(
301
+ code="INVALID_MARGIN_FORMAT",
302
+ message="边距格式错误,必须使用 pt 单位",
303
+ details={
304
+ "style": prop_name,
305
+ "value": prop_value,
306
+ "correct_format": "10pt, 12pt, 15pt",
236
307
  },
237
308
  )
238
309
 
@@ -240,53 +311,27 @@ class StrictHTMLValidator:
240
311
  self, soup: BeautifulSoup, result: ValidationResult, strict_mode: bool
241
312
  ):
242
313
  """验证嵌套规则"""
243
- for element in soup.find_all(True):
244
- if not isinstance(element, Tag):
314
+ for parent in soup.find_all(True):
315
+ if not isinstance(parent, Tag):
245
316
  continue
246
317
 
247
- tag_name = element.name
248
- parent = element.parent
249
-
250
- if parent and isinstance(parent, Tag):
251
- parent_name = parent.name
252
-
253
- # 检查是否可以是父元素的子元素
254
- tag_rule = self.rule_engine.get_tag_rule(tag_name)
255
- parent_rule = self.rule_engine.get_tag_rule(parent_name)
318
+ for child in parent.find_all(True, recursive=False):
319
+ if not isinstance(child, Tag):
320
+ continue
256
321
 
257
- if tag_rule and not tag_rule.can_be_child_of(parent_name):
322
+ if not self.rule_engine.is_nesting_allowed(parent.name, child.name):
258
323
  result.add_error(
259
324
  code="INVALID_NESTING",
260
- message=f"标签 <{tag_name}> 不能作为 <{parent_name}> 的子元素",
325
+ message=f"标签 <{child.name}> 不能嵌套在 <{parent.name}> ",
261
326
  details={
262
- "child_tag": tag_name,
263
- "parent_tag": parent_name,
264
- "allowed_parents": (
265
- list(tag_rule.allowed_parents)
266
- if tag_rule.allowed_parents
267
- else ["任意"]
268
- ),
327
+ "parent": parent.name,
328
+ "child": child.name,
329
+ "context": str(parent)[:100],
269
330
  },
270
331
  )
271
332
  if strict_mode:
272
333
  continue
273
334
 
274
- # 检查父元素是否可以包含此元素
275
- if parent_rule and not parent_rule.can_contain(tag_name):
276
- result.add_error(
277
- code="INVALID_CONTAINMENT",
278
- message=f"标签 <{parent_name}> 不能包含 <{tag_name}>",
279
- details={
280
- "parent_tag": parent_name,
281
- "child_tag": tag_name,
282
- "allowed_children": (
283
- list(parent_rule.allowed_children)
284
- if parent_rule.allowed_children
285
- else ["任意"]
286
- ),
287
- },
288
- )
289
-
290
335
  def _validate_self_closing(
291
336
  self, soup: BeautifulSoup, result: ValidationResult, strict_mode: bool
292
337
  ):
@@ -296,64 +341,51 @@ class StrictHTMLValidator:
296
341
  continue
297
342
 
298
343
  tag_name = element.name
299
- tag_rule = self.rule_engine.get_tag_rule(tag_name)
300
344
 
301
- if tag_rule and tag_rule.must_be_self_closing:
345
+ # 检查空标签是否自闭合
346
+ if self.rule_engine.is_self_closing_tag(tag_name):
302
347
  # 检查是否有内容
303
348
  if element.contents and not all(
304
- isinstance(c, NavigableString) and not c.strip()
349
+ isinstance(c, NavigableString) and not str(c).strip()
305
350
  for c in element.contents
306
351
  ):
307
352
  result.add_warning(
308
- code="SELF_CLOSING_WARNING",
309
- message=f"标签 <{tag_name}> 应该是自闭合标签,但包含内容",
310
- details={"tag": tag_name, "content": str(element)[:100]},
353
+ code="NON_EMPTY_SELF_CLOSING_TAG",
354
+ message=f"标签 <{tag_name}> 应该是自闭合的,但包含内容",
355
+ details={"tag": tag_name},
311
356
  )
312
357
 
313
- def _collect_stats(self, soup: BeautifulSoup, result: ValidationResult):
314
- """收集统计信息"""
315
- stats = {}
358
+ def _validate_style_tags(
359
+ self, soup: BeautifulSoup, result: ValidationResult, strict_mode: bool
360
+ ):
361
+ """验证style标签(检测并警告)"""
362
+ style_tags = soup.find_all("style")
363
+ if style_tags:
364
+ result.add_warning(
365
+ code="STYLE_TAG_DETECTED",
366
+ message=f"检测到 <style> 标签,其中的 CSS 规则不会被转换。请使用内联 style 属性代替。",
367
+ details={
368
+ "count": len(style_tags),
369
+ "note": '例如:将 <style>h1 { font-size: 18pt; }</style> 改为 <h1 style="font-size: 18pt;">标题</h1>',
370
+ },
371
+ )
316
372
 
317
- # 标签统计
318
- tag_counts = {}
319
- for element in soup.find_all(True):
320
- if isinstance(element, Tag):
321
- tag_counts[element.name] = tag_counts.get(element.name, 0) + 1
373
+ def _parse_style_string(self, style_str: str) -> Dict[str, str]:
374
+ """解析style字符串为字典"""
375
+ styles = {}
376
+ if not style_str:
377
+ return styles
322
378
 
323
- stats["tag_counts"] = tag_counts
324
- stats["total_tags"] = sum(tag_counts.values())
325
- stats["text_length"] = len(soup.get_text())
326
- stats["total_errors"] = len(result.errors)
327
- stats["total_warnings"] = len(result.warnings)
379
+ for item in style_str.split(";"):
380
+ item = item.strip()
381
+ if not item or ":" not in item:
382
+ continue
328
383
 
329
- # 添加到结果中
330
- result.details = stats
384
+ key, value = item.split(":", 1)
385
+ styles[key.strip()] = value.strip()
331
386
 
332
- def _parse_style(self, style_str: str) -> Dict[str, str]:
333
- """解析样式字符串"""
334
- styles = {}
335
- for item in style_str.split(";"):
336
- if ":" in item:
337
- key, value = item.split(":", 1)
338
- styles[key.strip()] = value.strip()
339
387
  return styles
340
388
 
341
- def _get_allowed_attrs_for_tag(self, tag_name: str) -> List[str]:
342
- """获取标签允许的属性列表"""
343
- allowed = []
344
- for attr_name, attr_rule in self.rule_engine.attr_rules.items():
345
- if attr_rule.allowed and attr_rule.is_applicable_to(tag_name):
346
- allowed.append(attr_name)
347
- return allowed
348
-
349
- def _get_allowed_styles_for_tag(self, tag_name: str) -> List[str]:
350
- """获取标签允许的样式列表"""
351
- allowed = []
352
- for style_name, style_rule in self.rule_engine.style_rules.items():
353
- if style_rule.allowed and style_rule.is_applicable_to(tag_name):
354
- allowed.append(style_name)
355
- return allowed
356
-
357
389
  def get_validation_report(self, result: ValidationResult) -> str:
358
390
  """生成验证报告"""
359
391
  report = []
@@ -362,33 +394,30 @@ class StrictHTMLValidator:
362
394
 
363
395
  # 验证结果
364
396
  status = "✅ 通过" if result.is_valid else "❌ 失败"
365
- report.append(f"验证状态: {status}")
366
- report.append(f"错误数: {len(result.errors)}")
367
- report.append(f"警告数: {len(result.warnings)}\n")
397
+ report.append(f"验证状态: {status}\n")
398
+
399
+ # 统计信息
400
+ report.append("📊 统计信息:")
401
+ report.append(f" - 错误数量: {len(result.errors)}")
402
+ report.append(f" - 警告数量: {len(result.warnings)}")
403
+ report.append("")
368
404
 
369
405
  # 错误
370
406
  if result.errors:
371
407
  report.append("❌ 错误:")
372
408
  for i, error in enumerate(result.errors, 1):
373
- report.append(f"\n{i}. [{error['code']}] {error['message']}")
409
+ report.append(f" {i}. [{error['code']}] {error['message']}")
374
410
  if error.get("details"):
375
- report.append(f" 详情: {error['details']}")
411
+ report.append(f" 详情: {error['details']}")
376
412
  report.append("")
377
413
 
378
414
  # 警告
379
415
  if result.warnings:
380
416
  report.append("⚠️ 警告:")
381
417
  for i, warning in enumerate(result.warnings, 1):
382
- report.append(f"\n{i}. [{warning['code']}] {warning['message']}")
418
+ report.append(f" {i}. [{warning['code']}] {warning['message']}")
383
419
  if warning.get("details"):
384
- report.append(f" 详情: {warning['details']}")
385
- report.append("")
386
-
387
- # 统计信息
388
- if hasattr(result, "details") and result.details:
389
- report.append("📊 统计信息:")
390
- for key, value in result.details.items():
391
- report.append(f" - {key}: {value}")
420
+ report.append(f" 详情: {warning['details']}")
392
421
  report.append("")
393
422
 
394
423
  if not result.errors and not result.warnings:
@@ -397,32 +426,5 @@ class StrictHTMLValidator:
397
426
  return "\n".join(report)
398
427
 
399
428
 
400
- class ValidationError(Exception):
401
- """验证错误异常"""
402
-
403
- def __init__(self, result: ValidationResult):
404
- self.result = result
405
- message = (
406
- f"HTML验证失败: {len(result.errors)} 个错误, {len(result.warnings)} 个警告"
407
- )
408
- super().__init__(message)
409
-
410
- def to_dict(self) -> Dict[str, Any]:
411
- """转换为字典"""
412
- return self.result.to_json()
413
-
414
-
415
- # 延迟创建全局实例
416
- _strict_validator_instance = None
417
-
418
-
419
- def get_strict_validator():
420
- """获取严格验证器单例"""
421
- global _strict_validator_instance
422
- if _strict_validator_instance is None:
423
- _strict_validator_instance = StrictHTMLValidator()
424
- return _strict_validator_instance
425
-
426
-
427
- # 向后兼容的全局实例
428
- strict_validator = get_strict_validator()
429
+ # 全局实例
430
+ strict_validator = StrictHTMLValidator()
@@ -2,21 +2,7 @@
2
2
  <html lang="zh-CN">
3
3
  <head>
4
4
  <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
5
  <title>MCP复杂格式测试</title>
7
- <style>
8
- body {
9
- font-family: '微软雅黑';
10
- font-size: 12pt;
11
- line-height: 1.8;
12
- padding: 20px;
13
- max-width: 800px;
14
- margin: 0 auto;
15
- }
16
- h1 { font-family: '微软雅黑'; font-size: 18pt; text-align: center; color: #333; }
17
- h2 { font-family: '微软雅黑'; font-size: 16pt; color: #4a3f6b; border-bottom: 2px solid #667eea; padding-bottom: 10px; }
18
- h3 { font-family: '微软雅黑'; font-size: 14pt; color: #5b4e8c; }
19
- </style>
20
6
  </head>
21
7
  <body>
22
8
  <h1>MCP复杂格式测试文档</h1>
@@ -145,6 +145,44 @@ HTML_SCHEMA = {
145
145
  }
146
146
  },
147
147
 
148
+ "supportedClasses": [
149
+ {
150
+ "name": "center",
151
+ "description": "居中对齐段落",
152
+ "note": "用于<p class=\"center\">"
153
+ },
154
+ {
155
+ "name": "right",
156
+ "description": "右对齐段落",
157
+ "note": "用于<p class=\"right\">"
158
+ },
159
+ {
160
+ "name": "left",
161
+ "description": "左对齐段落",
162
+ "note": "用于<p class=\"left\">"
163
+ },
164
+ {
165
+ "name": "info",
166
+ "description": "信息提示框",
167
+ "note": "用于<div class=\"info\">"
168
+ },
169
+ {
170
+ "name": "warning",
171
+ "description": "警告提示框",
172
+ "note": "用于<div class=\"warning\">"
173
+ },
174
+ {
175
+ "name": "success",
176
+ "description": "成功提示框",
177
+ "note": "用于<div class=\"success\">"
178
+ },
179
+ {
180
+ "name": "columns",
181
+ "description": "多栏布局",
182
+ "note": "用于<div class=\"columns\" data-cols=\"2\">"
183
+ }
184
+ ],
185
+
148
186
  "colorFormatRules": {
149
187
  "required": "hex",
150
188
  "pattern": "#[0-9A-Fa-f]{6}",
@@ -237,6 +275,30 @@ HTML_SCHEMA = {
237
275
  "example": "<span style=\"color: rgb(255,0,0);\">红色</span>",
238
276
  "correct": "<span style=\"color: #FF0000;\">红色</span>",
239
277
  "note": "不支持rgb()和rgba()格式,必须使用十六进制"
278
+ },
279
+ {
280
+ "error": "使用style标签",
281
+ "example": "<style>h1 { font-size: 18pt; }</style>",
282
+ "correct": "<h1 style=\"font-size: 18pt;\">标题</h1>",
283
+ "note": "不支持<style>标签中的CSS规则,只支持内联style属性"
284
+ },
285
+ {
286
+ "error": "不支持的类名",
287
+ "example": "<span class=\"abstract-title\">摘要</span>",
288
+ "correct": "<span style=\"font-family: 黑体; font-size: 12pt; font-weight: bold;\">摘要</span>",
289
+ "note": "只支持:center, right, left, info, warning, success, columns"
290
+ },
291
+ {
292
+ "error": "字号格式错误",
293
+ "example": "<span style=\"font-size: 14px;\">14号字</span>",
294
+ "correct": "<span style=\"font-size: 14pt;\">14号字</span>",
295
+ "note": "字号单位必须是pt(磅),不支持px、em、rem等单位"
296
+ },
297
+ {
298
+ "error": "边距格式错误",
299
+ "example": "<p style=\"margin-bottom: 10px;\">段落</p>",
300
+ "correct": "<p style=\"margin-bottom: 10pt;\">段落</p>",
301
+ "note": "边距单位必须是pt(磅),不支持px、em、rem等单位"
240
302
  }
241
303
  ],
242
304
 
@@ -252,18 +314,25 @@ HTML_SCHEMA = {
252
314
  "表格单元格使用 th(表头)和 td(数据)区分",
253
315
  "行内元素(strong、em、u、s、sup、sub、code、span)可以嵌套在块级元素(p、div)内",
254
316
  "块级元素(p、div、table、ul、ol)不能嵌套在行内元素内",
255
- "行距可以是数字或小数(如 1.5、1.8、2.0)"
317
+ "行距可以是数字或小数(如 1.5、1.8、2.0)",
318
+ "不要使用<style>标签中的CSS,只使用内联style属性(如 style=\"font-size: 14pt;\")",
319
+ "只支持特定类名:center, right, left, info, warning, success, columns,其他样式请用内联style"
256
320
  ],
257
321
 
258
322
  "validationWorkflow": [
259
323
  "1. 检查HTML基本结构(DOCTYPE, html, head, body)",
260
324
  "2. 验证所有标签是否在允许列表中",
261
325
  "3. 检查所有属性是否被支持",
262
- "4. 验证所有样式是否符合规范(特别是颜色格式)",
263
- "5. 检查颜色格式是否为十六进制 #RRGGBB",
264
- "6. 验证标签嵌套是否正确",
265
- "7. 确认自闭合标签格式正确",
266
- "8. 检查多栏布局是否恢复单栏"
326
+ "4. 验证类名是否在支持列表中(只支持:center, right, left, info, warning, success, columns)",
327
+ "5. 验证所有样式是否符合规范(特别是颜色格式)",
328
+ "6. 检查颜色格式是否为十六进制 #RRGGBB",
329
+ "7. 检查字号格式是否为pt单位",
330
+ "8. 检查行距格式是否为数字或小数",
331
+ "9. 检查边距格式是否为pt单位",
332
+ "10. 验证标签嵌套是否正确",
333
+ "11. 确认自闭合标签格式正确",
334
+ "12. 检查多栏布局是否恢复单栏",
335
+ "13. 检查是否使用了<style>标签(警告)"
267
336
  ]
268
337
  }
269
338
 
@@ -280,4 +349,4 @@ def get_schema_json():
280
349
 
281
350
 
282
351
  if __name__ == "__main__":
283
- print(get_schema_json())
352
+ print(get_schema_json())
package/python/server.py CHANGED
@@ -16,7 +16,7 @@ from mcp.types import TextContent, Tool
16
16
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
17
17
  from docx_converter import convert_html_to_docx as docx_convert
18
18
  from html_validator import validator, template_generator
19
- from html_validator_strict import StrictHTMLValidator, ValidationError, strict_validator
19
+ from html_validator_strict import StrictHTMLValidator, strict_validator
20
20
  from html_fixer import HTMLFixer
21
21
 
22
22
  # Create MCP server