@birthday8/doc-mcp 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -21
- package/index.js +61 -56
- package/install.js +45 -35
- package/package.json +1 -1
- package/python/docx_converter.py +15 -83
- package/python/html_rules.py +652 -0
- package/python/html_validator.py +59 -274
- package/python/html_validator_strict.py +430 -0
- package/python/sample/example.html +0 -14
- package/python/sample/html_schema.py +352 -0
- package/python/server.py +112 -75
- package/python/test_strict_validation.py +118 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTML验证规则引擎
|
|
3
|
+
面向对象的规则定义和验证系统
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Dict, List, Set, Optional, Any, Callable
|
|
8
|
+
from enum import Enum
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Severity(Enum):
|
|
12
|
+
"""严重程度"""
|
|
13
|
+
|
|
14
|
+
ERROR = "error"
|
|
15
|
+
WARNING = "warning"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ValidationResult:
|
|
20
|
+
"""验证结果"""
|
|
21
|
+
|
|
22
|
+
is_valid: bool
|
|
23
|
+
errors: List[Dict[str, Any]] = field(default_factory=list)
|
|
24
|
+
warnings: List[Dict[str, Any]] = field(default_factory=list)
|
|
25
|
+
|
|
26
|
+
def add_error(self, code: str, message: str, details: Optional[Dict] = None):
|
|
27
|
+
"""添加错误"""
|
|
28
|
+
self.errors.append({"code": code, "message": message, "details": details or {}})
|
|
29
|
+
self.is_valid = False
|
|
30
|
+
|
|
31
|
+
def add_warning(self, code: str, message: str, details: Optional[Dict] = None):
|
|
32
|
+
"""添加警告"""
|
|
33
|
+
self.warnings.append(
|
|
34
|
+
{"code": code, "message": message, "details": details or {}}
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def to_json(self) -> Dict[str, Any]:
|
|
38
|
+
"""转换为JSON格式"""
|
|
39
|
+
return {
|
|
40
|
+
"valid": self.is_valid,
|
|
41
|
+
"errors": self.errors,
|
|
42
|
+
"warnings": self.warnings,
|
|
43
|
+
"summary": {
|
|
44
|
+
"error_count": len(self.errors),
|
|
45
|
+
"warning_count": len(self.warnings),
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class TagRule:
|
|
52
|
+
"""标签规则"""
|
|
53
|
+
|
|
54
|
+
tag_name: str
|
|
55
|
+
allowed: bool
|
|
56
|
+
description: str = ""
|
|
57
|
+
required_attrs: Set[str] = field(default_factory=set)
|
|
58
|
+
optional_attrs: Set[str] = field(default_factory=set)
|
|
59
|
+
allowed_classes: Set[str] = field(default_factory=set)
|
|
60
|
+
must_be_self_closing: bool = False
|
|
61
|
+
allowed_parents: Set[str] = field(default_factory=set)
|
|
62
|
+
disallowed_parents: Set[str] = field(default_factory=set)
|
|
63
|
+
allowed_children: Set[str] = field(default_factory=set)
|
|
64
|
+
disallowed_children: Set[str] = field(default_factory=set)
|
|
65
|
+
is_block_level: bool = False
|
|
66
|
+
|
|
67
|
+
def can_be_child_of(self, parent_tag: str) -> bool:
|
|
68
|
+
"""检查是否可以是某个标签的子元素"""
|
|
69
|
+
if self.disallowed_parents and parent_tag in self.disallowed_parents:
|
|
70
|
+
return False
|
|
71
|
+
if self.allowed_parents and parent_tag not in self.allowed_parents:
|
|
72
|
+
return False
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
def can_contain(self, child_tag: str) -> bool:
|
|
76
|
+
"""检查是否可以包含某个子元素"""
|
|
77
|
+
if self.disallowed_children and child_tag in self.disallowed_children:
|
|
78
|
+
return False
|
|
79
|
+
if self.allowed_children and child_tag not in self.allowed_children:
|
|
80
|
+
return False
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class AttributeRule:
|
|
86
|
+
"""属性规则"""
|
|
87
|
+
|
|
88
|
+
attr_name: str
|
|
89
|
+
allowed: bool
|
|
90
|
+
applicable_tags: Set[str] = field(default_factory=set)
|
|
91
|
+
description: str = ""
|
|
92
|
+
required_for: Set[str] = field(default_factory=set)
|
|
93
|
+
validator: Optional[Callable[[str], bool]] = None
|
|
94
|
+
default_value: Optional[str] = None
|
|
95
|
+
enum_values: Optional[List[str]] = None
|
|
96
|
+
|
|
97
|
+
def is_applicable_to(self, tag: str) -> bool:
|
|
98
|
+
"""检查属性是否适用于某个标签"""
|
|
99
|
+
return not self.applicable_tags or tag in self.applicable_tags
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class StyleRule:
|
|
104
|
+
"""样式规则"""
|
|
105
|
+
|
|
106
|
+
style_name: str
|
|
107
|
+
allowed: bool
|
|
108
|
+
applicable_tags: Set[str] = field(default_factory=set)
|
|
109
|
+
description: str = ""
|
|
110
|
+
validator: Optional[Callable[[str], bool]] = None
|
|
111
|
+
unit_required: Optional[str] = None
|
|
112
|
+
enum_values: Optional[List[str]] = None
|
|
113
|
+
|
|
114
|
+
def is_applicable_to(self, tag: str) -> bool:
|
|
115
|
+
"""检查样式是否适用于某个标签"""
|
|
116
|
+
return not self.applicable_tags or tag in self.applicable_tags
|
|
117
|
+
|
|
118
|
+
def validate_value(self, value: str) -> tuple[bool, Optional[str]]:
|
|
119
|
+
"""验证样式值"""
|
|
120
|
+
if self.validator:
|
|
121
|
+
try:
|
|
122
|
+
result = self.validator(value)
|
|
123
|
+
if isinstance(result, tuple):
|
|
124
|
+
return result
|
|
125
|
+
elif isinstance(result, bool):
|
|
126
|
+
return result, None
|
|
127
|
+
else:
|
|
128
|
+
return False, f"验证失败: {result}"
|
|
129
|
+
except Exception as e:
|
|
130
|
+
return False, f"验证错误: {str(e)}"
|
|
131
|
+
|
|
132
|
+
if self.enum_values and value not in self.enum_values:
|
|
133
|
+
return False, f"必须是以下值之一: {', '.join(self.enum_values)}"
|
|
134
|
+
|
|
135
|
+
if self.unit_required and not value.endswith(self.unit_required):
|
|
136
|
+
return False, f"单位必须是 {self.unit_required}"
|
|
137
|
+
|
|
138
|
+
return True, None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class RuleEngine:
|
|
142
|
+
"""规则引擎"""
|
|
143
|
+
|
|
144
|
+
def __init__(self):
|
|
145
|
+
self.tag_rules: Dict[str, TagRule] = {}
|
|
146
|
+
self.attr_rules: Dict[str, AttributeRule] = {}
|
|
147
|
+
self.style_rules: Dict[str, StyleRule] = {}
|
|
148
|
+
self._initialize_rules()
|
|
149
|
+
|
|
150
|
+
def _initialize_rules(self):
|
|
151
|
+
"""初始化所有规则"""
|
|
152
|
+
self._init_tag_rules()
|
|
153
|
+
self._init_attribute_rules()
|
|
154
|
+
self._init_style_rules()
|
|
155
|
+
|
|
156
|
+
def _init_tag_rules(self):
|
|
157
|
+
"""初始化标签规则"""
|
|
158
|
+
# 结构标签(HTML文档结构)
|
|
159
|
+
structure_tags = {"html", "head", "body", "meta", "title", "style"}
|
|
160
|
+
|
|
161
|
+
# 块级元素
|
|
162
|
+
block_tags = {
|
|
163
|
+
"h1",
|
|
164
|
+
"h2",
|
|
165
|
+
"h3",
|
|
166
|
+
"h4",
|
|
167
|
+
"h5",
|
|
168
|
+
"h6",
|
|
169
|
+
"p",
|
|
170
|
+
"div",
|
|
171
|
+
"ul",
|
|
172
|
+
"ol",
|
|
173
|
+
"table",
|
|
174
|
+
"blockquote",
|
|
175
|
+
"pre",
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# 表格相关
|
|
179
|
+
table_tags = {"thead", "tbody", "tfoot"}
|
|
180
|
+
|
|
181
|
+
# 行内元素
|
|
182
|
+
inline_tags = {"strong", "em", "u", "s", "sup", "sub", "code", "span"}
|
|
183
|
+
|
|
184
|
+
# 自闭合标签
|
|
185
|
+
self_closing_tags = {"img", "br", "hr", "meta"}
|
|
186
|
+
|
|
187
|
+
# 允许的标签列表
|
|
188
|
+
allowed_tags = (
|
|
189
|
+
structure_tags
|
|
190
|
+
| block_tags
|
|
191
|
+
| table_tags
|
|
192
|
+
| inline_tags
|
|
193
|
+
| self_closing_tags
|
|
194
|
+
| {"li", "tr", "td", "th", "math", "latex"}
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# 创建标签规则
|
|
198
|
+
for tag in allowed_tags:
|
|
199
|
+
self.tag_rules[tag] = TagRule(
|
|
200
|
+
tag_name=tag,
|
|
201
|
+
allowed=True,
|
|
202
|
+
description=f"{tag} 标签",
|
|
203
|
+
must_be_self_closing=tag in self_closing_tags,
|
|
204
|
+
is_block_level=tag in block_tags,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# 特殊规则:段落不能包含块级元素
|
|
208
|
+
self.tag_rules["p"].disallowed_children = block_tags
|
|
209
|
+
|
|
210
|
+
# 特殊规则:table只能包含tr
|
|
211
|
+
self.tag_rules["table"].allowed_children = {"tr"}
|
|
212
|
+
|
|
213
|
+
# 特殊规则:tr只能包含td或th
|
|
214
|
+
self.tag_rules["tr"].allowed_children = {"td", "th"}
|
|
215
|
+
|
|
216
|
+
# 特殊规则:li可以包含ul或ol(嵌套列表)
|
|
217
|
+
self.tag_rules["li"].allowed_children = {"ul", "ol"}
|
|
218
|
+
|
|
219
|
+
# 特殊规则:blockquote可以嵌套
|
|
220
|
+
self.tag_rules["blockquote"].allowed_children = {
|
|
221
|
+
"blockquote",
|
|
222
|
+
"p",
|
|
223
|
+
"div",
|
|
224
|
+
} | inline_tags
|
|
225
|
+
|
|
226
|
+
# 特殊规则:table可以包含thead, tbody, tfoot
|
|
227
|
+
self.tag_rules["table"].allowed_children = {"thead", "tbody", "tfoot", "tr"}
|
|
228
|
+
|
|
229
|
+
# 特殊规则:thead, tbody, tfoot只能包含tr
|
|
230
|
+
for tag in ["thead", "tbody", "tfoot"]:
|
|
231
|
+
if tag in self.tag_rules:
|
|
232
|
+
self.tag_rules[tag].allowed_children = {"tr"}
|
|
233
|
+
|
|
234
|
+
def _init_attribute_rules(self):
|
|
235
|
+
"""初始化属性规则"""
|
|
236
|
+
# 图片属性
|
|
237
|
+
self.attr_rules["src"] = AttributeRule(
|
|
238
|
+
attr_name="src",
|
|
239
|
+
allowed=True,
|
|
240
|
+
applicable_tags={"img"},
|
|
241
|
+
description="图片源路径",
|
|
242
|
+
required_for={"img"},
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
self.attr_rules["alt"] = AttributeRule(
|
|
246
|
+
attr_name="alt",
|
|
247
|
+
allowed=True,
|
|
248
|
+
applicable_tags={"img"},
|
|
249
|
+
description="图片替代文本",
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
self.attr_rules["width"] = AttributeRule(
|
|
253
|
+
attr_name="width",
|
|
254
|
+
allowed=True,
|
|
255
|
+
applicable_tags={"img", "td", "th"},
|
|
256
|
+
description="宽度(像素)",
|
|
257
|
+
validator=lambda x: x.lstrip("-").isdigit(),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
self.attr_rules["height"] = AttributeRule(
|
|
261
|
+
attr_name="height",
|
|
262
|
+
allowed=True,
|
|
263
|
+
applicable_tags={"img", "td", "th"},
|
|
264
|
+
description="高度(像素)",
|
|
265
|
+
validator=lambda x: x.lstrip("-").isdigit(),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
self.attr_rules["align"] = AttributeRule(
|
|
269
|
+
attr_name="align",
|
|
270
|
+
allowed=True,
|
|
271
|
+
applicable_tags={"img"},
|
|
272
|
+
description="对齐方式",
|
|
273
|
+
enum_values=["left", "center", "right"],
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# 表格属性
|
|
277
|
+
self.attr_rules["colspan"] = AttributeRule(
|
|
278
|
+
attr_name="colspan",
|
|
279
|
+
allowed=True,
|
|
280
|
+
applicable_tags={"td", "th"},
|
|
281
|
+
description="跨列数",
|
|
282
|
+
validator=lambda x: x.lstrip("-").isdigit(),
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
self.attr_rules["rowspan"] = AttributeRule(
|
|
286
|
+
attr_name="rowspan",
|
|
287
|
+
allowed=True,
|
|
288
|
+
applicable_tags={"td", "th"},
|
|
289
|
+
description="跨行数",
|
|
290
|
+
validator=lambda x: x.lstrip("-").isdigit(),
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# 多栏布局
|
|
294
|
+
self.attr_rules["data-cols"] = AttributeRule(
|
|
295
|
+
attr_name="data-cols",
|
|
296
|
+
allowed=True,
|
|
297
|
+
applicable_tags={"div"},
|
|
298
|
+
description="栏数",
|
|
299
|
+
validator=lambda x: x.lstrip("-").isdigit(),
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# 缩进
|
|
303
|
+
self.attr_rules["data-indent"] = AttributeRule(
|
|
304
|
+
attr_name="data-indent",
|
|
305
|
+
allowed=True,
|
|
306
|
+
applicable_tags={"p", "div"},
|
|
307
|
+
description="缩进级别",
|
|
308
|
+
validator=lambda x: x.lstrip("-").isdigit(),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# HTML属性
|
|
312
|
+
self.attr_rules["lang"] = AttributeRule(
|
|
313
|
+
attr_name="lang",
|
|
314
|
+
allowed=True,
|
|
315
|
+
applicable_tags={"html"},
|
|
316
|
+
description="语言",
|
|
317
|
+
enum_values=["zh-CN", "en", "en-US"],
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
self.attr_rules["charset"] = AttributeRule(
|
|
321
|
+
attr_name="charset",
|
|
322
|
+
allowed=True,
|
|
323
|
+
applicable_tags={"meta"},
|
|
324
|
+
description="字符编码",
|
|
325
|
+
enum_values=["UTF-8", "utf-8"],
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
self.attr_rules["name"] = AttributeRule(
|
|
329
|
+
attr_name="name",
|
|
330
|
+
allowed=True,
|
|
331
|
+
applicable_tags={"meta"},
|
|
332
|
+
description="元数据名称",
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
self.attr_rules["content"] = AttributeRule(
|
|
336
|
+
attr_name="content",
|
|
337
|
+
allowed=True,
|
|
338
|
+
applicable_tags={"meta"},
|
|
339
|
+
description="元数据内容",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# 通用属性
|
|
343
|
+
self.attr_rules["class"] = AttributeRule(
|
|
344
|
+
attr_name="class",
|
|
345
|
+
allowed=True,
|
|
346
|
+
applicable_tags={
|
|
347
|
+
"p",
|
|
348
|
+
"div",
|
|
349
|
+
"span",
|
|
350
|
+
"table",
|
|
351
|
+
"td",
|
|
352
|
+
"th",
|
|
353
|
+
"ul",
|
|
354
|
+
"ol",
|
|
355
|
+
"li",
|
|
356
|
+
"h1",
|
|
357
|
+
"h2",
|
|
358
|
+
"h3",
|
|
359
|
+
"h4",
|
|
360
|
+
"h5",
|
|
361
|
+
"h6",
|
|
362
|
+
},
|
|
363
|
+
description="CSS类名",
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
self.attr_rules["style"] = AttributeRule(
|
|
367
|
+
attr_name="style",
|
|
368
|
+
allowed=True,
|
|
369
|
+
applicable_tags={
|
|
370
|
+
"h1",
|
|
371
|
+
"h2",
|
|
372
|
+
"h3",
|
|
373
|
+
"h4",
|
|
374
|
+
"h5",
|
|
375
|
+
"h6",
|
|
376
|
+
"p",
|
|
377
|
+
"div",
|
|
378
|
+
"span",
|
|
379
|
+
"strong",
|
|
380
|
+
"em",
|
|
381
|
+
"u",
|
|
382
|
+
"s",
|
|
383
|
+
"sup",
|
|
384
|
+
"sub",
|
|
385
|
+
"code",
|
|
386
|
+
"table",
|
|
387
|
+
"tr",
|
|
388
|
+
"td",
|
|
389
|
+
"th",
|
|
390
|
+
"ul",
|
|
391
|
+
"ol",
|
|
392
|
+
"li",
|
|
393
|
+
"blockquote",
|
|
394
|
+
"pre",
|
|
395
|
+
},
|
|
396
|
+
description="内联样式",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _init_style_rules(self):
|
|
400
|
+
"""初始化样式规则"""
|
|
401
|
+
|
|
402
|
+
# 颜色样式
|
|
403
|
+
def validate_color(value: str) -> tuple[bool, Optional[str]]:
|
|
404
|
+
"""验证颜色格式"""
|
|
405
|
+
if not value.startswith("#"):
|
|
406
|
+
return False, "颜色必须使用十六进制格式 #RRGGBB"
|
|
407
|
+
if len(value) != 7:
|
|
408
|
+
return False, "颜色必须是6位十六进制 #RRGGBB"
|
|
409
|
+
try:
|
|
410
|
+
int(value[1:], 16)
|
|
411
|
+
return True, None
|
|
412
|
+
except ValueError:
|
|
413
|
+
return False, "无效的十六进制颜色"
|
|
414
|
+
|
|
415
|
+
self.style_rules["color"] = StyleRule(
|
|
416
|
+
style_name="color",
|
|
417
|
+
allowed=True,
|
|
418
|
+
description="文本颜色",
|
|
419
|
+
validator=validate_color,
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
self.style_rules["background-color"] = StyleRule(
|
|
423
|
+
style_name="background-color",
|
|
424
|
+
allowed=True,
|
|
425
|
+
description="背景颜色",
|
|
426
|
+
validator=validate_color,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# 字体样式
|
|
430
|
+
self.style_rules["font-family"] = StyleRule(
|
|
431
|
+
style_name="font-family",
|
|
432
|
+
allowed=True,
|
|
433
|
+
description="支持任意系统已安装的字体",
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
self.style_rules["font-size"] = StyleRule(
|
|
437
|
+
style_name="font-size",
|
|
438
|
+
allowed=True,
|
|
439
|
+
description="字体大小",
|
|
440
|
+
unit_required="pt",
|
|
441
|
+
validator=lambda x: x.endswith("pt") and x[:-2].lstrip("-").isdigit(),
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
self.style_rules["font-weight"] = StyleRule(
|
|
445
|
+
style_name="font-weight",
|
|
446
|
+
allowed=True,
|
|
447
|
+
description="字体粗细",
|
|
448
|
+
enum_values=[
|
|
449
|
+
"normal",
|
|
450
|
+
"bold",
|
|
451
|
+
"bolder",
|
|
452
|
+
"lighter",
|
|
453
|
+
"100",
|
|
454
|
+
"200",
|
|
455
|
+
"300",
|
|
456
|
+
"400",
|
|
457
|
+
"500",
|
|
458
|
+
"600",
|
|
459
|
+
"700",
|
|
460
|
+
"800",
|
|
461
|
+
"900",
|
|
462
|
+
],
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
self.style_rules["font-style"] = StyleRule(
|
|
466
|
+
style_name="font-style",
|
|
467
|
+
allowed=True,
|
|
468
|
+
description="字体样式",
|
|
469
|
+
enum_values=["normal", "italic", "oblique"],
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# 文本装饰
|
|
473
|
+
self.style_rules["text-decoration"] = StyleRule(
|
|
474
|
+
style_name="text-decoration",
|
|
475
|
+
allowed=True,
|
|
476
|
+
description="文本装饰",
|
|
477
|
+
enum_values=["none", "underline", "overline", "line-through", "blink"],
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
# 段落样式
|
|
481
|
+
self.style_rules["text-align"] = StyleRule(
|
|
482
|
+
style_name="text-align",
|
|
483
|
+
allowed=True,
|
|
484
|
+
description="文本对齐",
|
|
485
|
+
enum_values=["left", "center", "right", "justify"],
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
self.style_rules["line-height"] = StyleRule(
|
|
489
|
+
style_name="line-height",
|
|
490
|
+
allowed=True,
|
|
491
|
+
description="行高",
|
|
492
|
+
validator=lambda x: x.replace(".", "", 1).isdigit(),
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
# 边距
|
|
496
|
+
margin_padding_styles = [
|
|
497
|
+
"margin-top",
|
|
498
|
+
"margin-bottom",
|
|
499
|
+
"margin-left",
|
|
500
|
+
"margin-right",
|
|
501
|
+
"padding-top",
|
|
502
|
+
"padding-bottom",
|
|
503
|
+
"padding-left",
|
|
504
|
+
"padding-right",
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
for style_name in margin_padding_styles:
|
|
508
|
+
self.style_rules[style_name] = StyleRule(
|
|
509
|
+
style_name=style_name,
|
|
510
|
+
allowed=True,
|
|
511
|
+
description=f"{style_name.replace('-', ' ')}",
|
|
512
|
+
unit_required="pt",
|
|
513
|
+
validator=lambda x: (x.endswith("pt") or x.endswith("px"))
|
|
514
|
+
and x[:-2].lstrip("-").isdigit(),
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# 表格样式
|
|
518
|
+
table_styles = ["width", "border-collapse", "margin", "border", "padding"]
|
|
519
|
+
for style_name in table_styles:
|
|
520
|
+
self.style_rules[style_name] = StyleRule(
|
|
521
|
+
style_name=style_name,
|
|
522
|
+
allowed=True,
|
|
523
|
+
description=f"表格样式: {style_name}",
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def get_tag_rule(self, tag_name: str) -> Optional[TagRule]:
|
|
527
|
+
"""获取标签规则"""
|
|
528
|
+
return self.tag_rules.get(tag_name)
|
|
529
|
+
|
|
530
|
+
def get_attribute_rule(self, attr_name: str) -> Optional[AttributeRule]:
|
|
531
|
+
"""获取属性规则"""
|
|
532
|
+
return self.attr_rules.get(attr_name)
|
|
533
|
+
|
|
534
|
+
def get_style_rule(self, style_name: str) -> Optional[StyleRule]:
|
|
535
|
+
"""获取样式规则"""
|
|
536
|
+
return self.style_rules.get(style_name)
|
|
537
|
+
|
|
538
|
+
def is_tag_allowed(self, tag_name: str) -> bool:
|
|
539
|
+
"""检查标签是否允许"""
|
|
540
|
+
rule = self.get_tag_rule(tag_name)
|
|
541
|
+
return rule is not None and rule.allowed
|
|
542
|
+
|
|
543
|
+
def is_attribute_allowed(self, attr_name: str, tag_name: str) -> bool:
|
|
544
|
+
"""检查属性是否允许"""
|
|
545
|
+
rule = self.get_attribute_rule(attr_name)
|
|
546
|
+
if not rule or not rule.allowed:
|
|
547
|
+
return False
|
|
548
|
+
return rule.is_applicable_to(tag_name)
|
|
549
|
+
|
|
550
|
+
def is_style_allowed(self, style_name: str, tag_name: str) -> bool:
|
|
551
|
+
"""检查样式是否允许"""
|
|
552
|
+
rule = self.get_style_rule(style_name)
|
|
553
|
+
if not rule or not rule.allowed:
|
|
554
|
+
return False
|
|
555
|
+
return rule.is_applicable_to(tag_name)
|
|
556
|
+
|
|
557
|
+
def is_nesting_allowed(self, parent_tag: str, child_tag: str) -> bool:
|
|
558
|
+
"""检查嵌套是否允许"""
|
|
559
|
+
parent_rule = self.get_tag_rule(parent_tag)
|
|
560
|
+
child_rule = self.get_tag_rule(child_tag)
|
|
561
|
+
|
|
562
|
+
if not parent_rule or not child_rule:
|
|
563
|
+
return False
|
|
564
|
+
|
|
565
|
+
# 检查父标签是否允许包含子标签
|
|
566
|
+
if not parent_rule.can_contain(child_tag):
|
|
567
|
+
return False
|
|
568
|
+
|
|
569
|
+
# 检查子标签是否可以是父标签的子元素
|
|
570
|
+
if not child_rule.can_be_child_of(parent_tag):
|
|
571
|
+
return False
|
|
572
|
+
|
|
573
|
+
return True
|
|
574
|
+
|
|
575
|
+
def is_self_closing_tag(self, tag_name: str) -> bool:
|
|
576
|
+
"""检查标签是否是自闭合标签"""
|
|
577
|
+
rule = self.get_tag_rule(tag_name)
|
|
578
|
+
if not rule:
|
|
579
|
+
return False
|
|
580
|
+
return rule.must_be_self_closing
|
|
581
|
+
|
|
582
|
+
def get_disallowed_tags(self) -> List[Dict[str, str]]:
|
|
583
|
+
"""获取所有不允许的标签(用于文档说明)"""
|
|
584
|
+
return [
|
|
585
|
+
{"tag": tag, "reason": "不在允许列表中"}
|
|
586
|
+
for tag in [
|
|
587
|
+
"script",
|
|
588
|
+
"style",
|
|
589
|
+
"iframe",
|
|
590
|
+
"form",
|
|
591
|
+
"input",
|
|
592
|
+
"button",
|
|
593
|
+
"select",
|
|
594
|
+
"textarea",
|
|
595
|
+
"a",
|
|
596
|
+
"canvas",
|
|
597
|
+
"video",
|
|
598
|
+
"audio",
|
|
599
|
+
]
|
|
600
|
+
]
|
|
601
|
+
|
|
602
|
+
def get_disallowed_attributes(self) -> List[Dict[str, str]]:
|
|
603
|
+
"""获取所有不允许的属性(用于文档说明)"""
|
|
604
|
+
return [
|
|
605
|
+
{"attr": attr, "reason": "不在允许列表中"}
|
|
606
|
+
for attr in [
|
|
607
|
+
"onclick",
|
|
608
|
+
"onload",
|
|
609
|
+
"onerror",
|
|
610
|
+
"class",
|
|
611
|
+
"id",
|
|
612
|
+
"style",
|
|
613
|
+
"loading",
|
|
614
|
+
"float",
|
|
615
|
+
"position",
|
|
616
|
+
"display",
|
|
617
|
+
"transform",
|
|
618
|
+
]
|
|
619
|
+
]
|
|
620
|
+
|
|
621
|
+
def get_disallowed_styles(self) -> List[Dict[str, str]]:
|
|
622
|
+
"""获取所有不允许的样式(用于文档说明)"""
|
|
623
|
+
return [
|
|
624
|
+
{"style": style, "reason": "不在允许列表中"}
|
|
625
|
+
for style in [
|
|
626
|
+
"float",
|
|
627
|
+
"text-shadow",
|
|
628
|
+
"display",
|
|
629
|
+
"transform",
|
|
630
|
+
"position",
|
|
631
|
+
"opacity",
|
|
632
|
+
"overflow",
|
|
633
|
+
"visibility",
|
|
634
|
+
"z-index",
|
|
635
|
+
]
|
|
636
|
+
]
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
# 延迟创建全局实例
|
|
640
|
+
_rule_engine_instance = None
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def get_rule_engine():
|
|
644
|
+
"""获取规则引擎单例"""
|
|
645
|
+
global _rule_engine_instance
|
|
646
|
+
if _rule_engine_instance is None:
|
|
647
|
+
_rule_engine_instance = RuleEngine()
|
|
648
|
+
return _rule_engine_instance
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
# 向后兼容的全局实例
|
|
652
|
+
rule_engine = get_rule_engine()
|