patent-xml-generator 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +167 -0
- package/dist/generator.d.ts +46 -0
- package/dist/generator.js +246 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +5 -0
- package/dist/types.d.ts +95 -0
- package/dist/types.js +2 -0
- package/dist/xml-utils.d.ts +31 -0
- package/dist/xml-utils.js +95 -0
- package/dtd/cn-application-body.dtd +1778 -0
- package/package.json +36 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* XML 工具函数
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.escapeXml = escapeXml;
|
|
7
|
+
exports.escapeXmlPreservingInlineTags = escapeXmlPreservingInlineTags;
|
|
8
|
+
exports.indent = indent;
|
|
9
|
+
exports.newline = newline;
|
|
10
|
+
exports.formatParagraphNum = formatParagraphNum;
|
|
11
|
+
exports.checkGB18030 = checkGB18030;
|
|
12
|
+
/** XML 特殊字符转义 */
|
|
13
|
+
function escapeXml(text) {
|
|
14
|
+
return text
|
|
15
|
+
.replace(/&/g, '&')
|
|
16
|
+
.replace(/</g, '<')
|
|
17
|
+
.replace(/>/g, '>')
|
|
18
|
+
.replace(/"/g, '"')
|
|
19
|
+
.replace(/'/g, ''');
|
|
20
|
+
}
|
|
21
|
+
/** 检测文本中是否包含需要保留的行内标记 */
|
|
22
|
+
const INLINE_TAGS = ['sup', 'sub', 'b', 'i', 'u', 'img', 'br', 'smallcaps', 'overscore'];
|
|
23
|
+
const INLINE_TAG_PATTERN = new RegExp(`<(${INLINE_TAGS.join('|')})(\\s[^>]*)?>.*?</(${INLINE_TAGS.join('|')})>|<(br)\\s*/?>`, 'g');
|
|
24
|
+
/**
|
|
25
|
+
* 智能转义:保留合法的行内XML标记,转义其余内容
|
|
26
|
+
* 支持 <sup>、<sub>、<b>、<i>、<u>、<br/> 等 DTD 允许的行内标记
|
|
27
|
+
*/
|
|
28
|
+
function escapeXmlPreservingInlineTags(text) {
|
|
29
|
+
// 找出所有合法的行内标记位置
|
|
30
|
+
const matches = [];
|
|
31
|
+
let match;
|
|
32
|
+
const pattern = new RegExp(INLINE_TAG_PATTERN.source, 'g');
|
|
33
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
34
|
+
matches.push({
|
|
35
|
+
start: match.index,
|
|
36
|
+
end: match.index + match[0].length,
|
|
37
|
+
tag: match[0]
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
if (matches.length === 0) {
|
|
41
|
+
return escapeXml(text);
|
|
42
|
+
}
|
|
43
|
+
// 逐段处理:标记外的文本转义,标记内的保留
|
|
44
|
+
let result = '';
|
|
45
|
+
let cursor = 0;
|
|
46
|
+
for (const m of matches) {
|
|
47
|
+
if (cursor < m.start) {
|
|
48
|
+
result += escapeXml(text.slice(cursor, m.start));
|
|
49
|
+
}
|
|
50
|
+
result += m.tag;
|
|
51
|
+
cursor = m.end;
|
|
52
|
+
}
|
|
53
|
+
if (cursor < text.length) {
|
|
54
|
+
result += escapeXml(text.slice(cursor));
|
|
55
|
+
}
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* 生成缩进字符串
|
|
60
|
+
*/
|
|
61
|
+
function indent(level, useIndent) {
|
|
62
|
+
return useIndent ? ' '.repeat(level) : '';
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* 生成换行符
|
|
66
|
+
*/
|
|
67
|
+
function newline(useIndent) {
|
|
68
|
+
return useIndent ? '\n' : '';
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* 段落编号格式化:将数字转为4位补零字符串
|
|
72
|
+
* 1 -> "0001", 12 -> "0012"
|
|
73
|
+
*/
|
|
74
|
+
function formatParagraphNum(num) {
|
|
75
|
+
return num.toString().padStart(4, '0');
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* 检测文本是否为GB18030字符集范围内
|
|
79
|
+
* 超出范围的字符需要转为图片
|
|
80
|
+
*/
|
|
81
|
+
function checkGB18030(text) {
|
|
82
|
+
const invalidChars = [];
|
|
83
|
+
for (const char of text) {
|
|
84
|
+
const code = char.codePointAt(0);
|
|
85
|
+
// 基本判断:BMP范围内大部分CJK字符都在GB18030内
|
|
86
|
+
// 超出BMP的字符(如emoji、生僻字扩展区)可能不在GB18030内
|
|
87
|
+
if (code > 0xFFFF) {
|
|
88
|
+
invalidChars.push(char);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return {
|
|
92
|
+
valid: invalidChars.length === 0,
|
|
93
|
+
invalidChars
|
|
94
|
+
};
|
|
95
|
+
}
|