@birthday8/doc-mcp 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -21
- package/index.js +61 -56
- package/install.js +45 -35
- package/package.json +1 -1
- package/python/docx_converter.py +15 -83
- package/python/html_rules.py +652 -0
- package/python/html_validator.py +59 -274
- package/python/html_validator_strict.py +430 -0
- package/python/sample/example.html +0 -14
- package/python/sample/html_schema.py +352 -0
- package/python/server.py +112 -75
- package/python/test_strict_validation.py +118 -0
package/README.md
CHANGED
|
@@ -12,7 +12,6 @@ Doc Creator MCP Server - Generate Word documents from HTML with rich formatting
|
|
|
12
12
|
- ✅ Tables with styles
|
|
13
13
|
- ✅ Info/Warning/Success boxes
|
|
14
14
|
- ✅ Code blocks
|
|
15
|
-
- ✅ Blockquotes
|
|
16
15
|
- ✅ Multi-column layout
|
|
17
16
|
- ✅ Page breaks
|
|
18
17
|
|
|
@@ -63,8 +62,20 @@ Generate a Word document from HTML content.
|
|
|
63
62
|
}
|
|
64
63
|
```
|
|
65
64
|
|
|
66
|
-
### 3.
|
|
67
|
-
Get
|
|
65
|
+
### 3. get_html_constraints
|
|
66
|
+
Get HTML format constraints example with all supported formats.
|
|
67
|
+
|
|
68
|
+
### 4. get_html_schema
|
|
69
|
+
Get structured HTML format constraints as JSON.
|
|
70
|
+
|
|
71
|
+
### 5. validate_html
|
|
72
|
+
Validate HTML content against format constraints.
|
|
73
|
+
|
|
74
|
+
```json
|
|
75
|
+
{
|
|
76
|
+
"html_content": "<h1>Title</h1><p>Content...</p>"
|
|
77
|
+
}
|
|
78
|
+
```
|
|
68
79
|
|
|
69
80
|
## HTML Conventions
|
|
70
81
|
|
|
@@ -75,12 +86,9 @@ Get the HTML template with CSS styles.
|
|
|
75
86
|
<head>
|
|
76
87
|
<meta charset="UTF-8">
|
|
77
88
|
<title>Document Title</title>
|
|
78
|
-
<style>
|
|
79
|
-
/* CSS styles */
|
|
80
|
-
</style>
|
|
81
89
|
</head>
|
|
82
90
|
<body>
|
|
83
|
-
<h1>Title</h1>
|
|
91
|
+
<h1 style="font-family: 黑体; font-size: 18pt; text-align: center;">Title</h1>
|
|
84
92
|
<p>Content with <strong>bold</strong> and <em>italic</em>.</p>
|
|
85
93
|
</body>
|
|
86
94
|
</html>
|
|
@@ -92,8 +100,8 @@ Get the HTML template with CSS styles.
|
|
|
92
100
|
<em>Italic</em>
|
|
93
101
|
<u>Underline</u>
|
|
94
102
|
<s>Strikethrough</s>
|
|
95
|
-
<span
|
|
96
|
-
<span
|
|
103
|
+
<span style="color: #FF0000;">Red text</span>
|
|
104
|
+
<span style="background-color: #FFFF00;">Highlighted</span>
|
|
97
105
|
```
|
|
98
106
|
|
|
99
107
|
### Paragraph Indentation
|
|
@@ -110,17 +118,13 @@ Get the HTML template with CSS styles.
|
|
|
110
118
|
|
|
111
119
|
### Tables
|
|
112
120
|
```html
|
|
113
|
-
<table
|
|
114
|
-
<
|
|
115
|
-
<
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
<tr>
|
|
121
|
-
<td style="border: 1px solid #ddd; padding: 12px;">Cell</td>
|
|
122
|
-
</tr>
|
|
123
|
-
</tbody>
|
|
121
|
+
<table>
|
|
122
|
+
<tr>
|
|
123
|
+
<th style="background-color: #E3F2FD;">Header</th>
|
|
124
|
+
</tr>
|
|
125
|
+
<tr>
|
|
126
|
+
<td>Cell</td>
|
|
127
|
+
</tr>
|
|
124
128
|
</table>
|
|
125
129
|
```
|
|
126
130
|
|
|
@@ -130,8 +134,44 @@ Get the HTML template with CSS styles.
|
|
|
130
134
|
<p>Column 1 content...</p>
|
|
131
135
|
<p>Column 2 content...</p>
|
|
132
136
|
</div>
|
|
137
|
+
<!-- 必须恢复单栏 -->
|
|
138
|
+
<div class="columns" data-cols="1"></div>
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Important Notes
|
|
142
|
+
|
|
143
|
+
### Style Usage
|
|
144
|
+
- **Inline styles only**: Use `style="..."` attributes on elements
|
|
145
|
+
- **No `<style>` tags**: CSS in `<style>` tags is not supported
|
|
146
|
+
- **Supported styles**: color, background-color, font-family, font-size, text-align, line-height, margin-top, margin-bottom
|
|
147
|
+
- **Color format**: Must use hex format `#RRGGBB` (e.g., `#FF0000`), NOT `red`, `rgb(255,0,0)`, etc.
|
|
148
|
+
- **Font size format**: Must use `pt` units (e.g., `14pt`), NOT `px`, `em`, `rem`
|
|
149
|
+
- **Margin format**: Must use `pt` units (e.g., `10pt`), NOT `px`, `em`, `rem`
|
|
150
|
+
- **Line height format**: Must be numeric (e.g., `1.5`, `1.8`, `2.0`)
|
|
151
|
+
|
|
152
|
+
### Class Names
|
|
153
|
+
**Only 7 class names are supported**:
|
|
154
|
+
- `center` - Center aligned paragraph
|
|
155
|
+
- `right` - Right aligned paragraph
|
|
156
|
+
- `left` - Left aligned paragraph
|
|
157
|
+
- `info` - Info message box
|
|
158
|
+
- `warning` - Warning message box
|
|
159
|
+
- `success` - Success message box
|
|
160
|
+
- `columns` - Multi-column layout
|
|
161
|
+
|
|
162
|
+
All other styles must use inline `style` attributes.
|
|
163
|
+
|
|
164
|
+
### Example with inline styles
|
|
165
|
+
```html
|
|
166
|
+
<h1 style="font-family: 黑体; font-size: 18pt; text-align: center;">Title</h1>
|
|
167
|
+
<p style="color: #FF0000;">Red text</p>
|
|
168
|
+
<span style="background-color: #FFFF00;">Highlighted</span>
|
|
169
|
+
|
|
170
|
+
<!-- 不支持的类名示例 -->
|
|
171
|
+
<!-- ❌ <span class="abstract-title">摘要</span> -->
|
|
172
|
+
<!-- ✅ <span style="font-family: 黑体; font-size: 12pt; font-weight: bold;">摘要</span> -->
|
|
133
173
|
```
|
|
134
174
|
|
|
135
175
|
## License
|
|
136
176
|
|
|
137
|
-
MIT
|
|
177
|
+
MIT
|
package/index.js
CHANGED
|
@@ -5,48 +5,50 @@
|
|
|
5
5
|
* 通过npx运行Python MCP Server
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
const { spawn } = require(
|
|
9
|
-
const path = require(
|
|
10
|
-
const fs = require(
|
|
8
|
+
const { spawn } = require("child_process");
|
|
9
|
+
const path = require("path");
|
|
10
|
+
const fs = require("fs");
|
|
11
11
|
|
|
12
12
|
// Python脚本路径
|
|
13
|
-
const pythonDir = path.join(__dirname,
|
|
14
|
-
const serverScript = path.join(pythonDir,
|
|
15
|
-
const requirementsFile = path.join(pythonDir,
|
|
13
|
+
const pythonDir = path.join(__dirname, "python");
|
|
14
|
+
const serverScript = path.join(pythonDir, "server.py");
|
|
15
|
+
const requirementsFile = path.join(pythonDir, "requirements.txt");
|
|
16
16
|
|
|
17
17
|
// 检查Python是否安装
|
|
18
18
|
function checkPython() {
|
|
19
19
|
return new Promise((resolve, reject) => {
|
|
20
|
-
const pythonCommands = [
|
|
20
|
+
const pythonCommands = ["python3", "python", "py"];
|
|
21
21
|
let found = false;
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
function tryNext(index) {
|
|
24
24
|
if (index >= pythonCommands.length) {
|
|
25
|
-
reject(
|
|
25
|
+
reject(
|
|
26
|
+
new Error("Python not found. Please install Python 3.8 or higher."),
|
|
27
|
+
);
|
|
26
28
|
return;
|
|
27
29
|
}
|
|
28
|
-
|
|
30
|
+
|
|
29
31
|
const cmd = pythonCommands[index];
|
|
30
|
-
const check = spawn(cmd, [
|
|
31
|
-
|
|
32
|
-
check.on(
|
|
32
|
+
const check = spawn(cmd, ["--version"], { shell: true });
|
|
33
|
+
|
|
34
|
+
check.on("error", () => {
|
|
33
35
|
tryNext(index + 1);
|
|
34
36
|
});
|
|
35
|
-
|
|
36
|
-
check.stdout.on(
|
|
37
|
+
|
|
38
|
+
check.stdout.on("data", (data) => {
|
|
37
39
|
const version = data.toString().trim();
|
|
38
40
|
console.error(`[Doc Creator] Found Python: ${version}`);
|
|
39
41
|
found = true;
|
|
40
42
|
resolve(cmd);
|
|
41
43
|
});
|
|
42
|
-
|
|
43
|
-
check.on(
|
|
44
|
+
|
|
45
|
+
check.on("close", (code) => {
|
|
44
46
|
if (!found) {
|
|
45
47
|
tryNext(index + 1);
|
|
46
48
|
}
|
|
47
49
|
});
|
|
48
50
|
}
|
|
49
|
-
|
|
51
|
+
|
|
50
52
|
tryNext(0);
|
|
51
53
|
});
|
|
52
54
|
}
|
|
@@ -54,16 +56,16 @@ function checkPython() {
|
|
|
54
56
|
// 检查依赖是否已安装
|
|
55
57
|
function checkDependencies(pythonCmd) {
|
|
56
58
|
return new Promise((resolve, reject) => {
|
|
57
|
-
const check = spawn(pythonCmd, [
|
|
59
|
+
const check = spawn(pythonCmd, ["-c", "import mcp, docx, bs4"], {
|
|
58
60
|
cwd: pythonDir,
|
|
59
|
-
shell: true
|
|
61
|
+
shell: true,
|
|
60
62
|
});
|
|
61
|
-
|
|
62
|
-
check.on(
|
|
63
|
+
|
|
64
|
+
check.on("close", (code) => {
|
|
63
65
|
resolve(code === 0);
|
|
64
66
|
});
|
|
65
|
-
|
|
66
|
-
check.on(
|
|
67
|
+
|
|
68
|
+
check.on("error", () => {
|
|
67
69
|
resolve(false);
|
|
68
70
|
});
|
|
69
71
|
});
|
|
@@ -72,25 +74,29 @@ function checkDependencies(pythonCmd) {
|
|
|
72
74
|
// 安装Python依赖
|
|
73
75
|
function installDependencies(pythonCmd) {
|
|
74
76
|
return new Promise((resolve, reject) => {
|
|
75
|
-
console.error(
|
|
76
|
-
|
|
77
|
-
const install = spawn(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
77
|
+
console.error("[Doc Creator] Installing Python dependencies...");
|
|
78
|
+
|
|
79
|
+
const install = spawn(
|
|
80
|
+
pythonCmd,
|
|
81
|
+
["-m", "pip", "install", "-r", requirementsFile],
|
|
82
|
+
{
|
|
83
|
+
cwd: pythonDir,
|
|
84
|
+
shell: true,
|
|
85
|
+
stdio: "pipe",
|
|
86
|
+
},
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
install.on("close", (code) => {
|
|
84
90
|
if (code === 0) {
|
|
85
|
-
console.error(
|
|
91
|
+
console.error("[Doc Creator] Dependencies installed successfully.");
|
|
86
92
|
resolve();
|
|
87
93
|
} else {
|
|
88
|
-
reject(new Error(
|
|
94
|
+
reject(new Error("Failed to install dependencies"));
|
|
89
95
|
}
|
|
90
96
|
});
|
|
91
|
-
|
|
92
|
-
install.on(
|
|
93
|
-
reject(new Error(
|
|
97
|
+
|
|
98
|
+
install.on("error", (err) => {
|
|
99
|
+
reject(new Error("Failed to run pip"));
|
|
94
100
|
});
|
|
95
101
|
});
|
|
96
102
|
}
|
|
@@ -100,46 +106,45 @@ async function startServer() {
|
|
|
100
106
|
try {
|
|
101
107
|
// 检查Python
|
|
102
108
|
const pythonCmd = await checkPython();
|
|
103
|
-
|
|
109
|
+
|
|
104
110
|
// 检查依赖
|
|
105
111
|
const depsInstalled = await checkDependencies(pythonCmd);
|
|
106
|
-
|
|
112
|
+
|
|
107
113
|
// 如果依赖未安装,尝试安装
|
|
108
114
|
if (!depsInstalled) {
|
|
109
115
|
try {
|
|
110
116
|
await installDependencies(pythonCmd);
|
|
111
117
|
} catch (err) {
|
|
112
118
|
console.error(`[Doc Creator] Warning: ${err.message}`);
|
|
113
|
-
console.error(
|
|
119
|
+
console.error("[Doc Creator] Trying to continue anyway...");
|
|
114
120
|
}
|
|
115
121
|
}
|
|
116
|
-
|
|
122
|
+
|
|
117
123
|
// 启动Python MCP Server
|
|
118
|
-
console.error(
|
|
119
|
-
|
|
124
|
+
console.error("[Doc Creator] Starting MCP Server...");
|
|
125
|
+
|
|
120
126
|
const server = spawn(pythonCmd, [serverScript], {
|
|
121
127
|
cwd: pythonDir,
|
|
122
|
-
stdio: [
|
|
128
|
+
stdio: ["inherit", "inherit", "inherit"],
|
|
123
129
|
});
|
|
124
|
-
|
|
125
|
-
server.on(
|
|
130
|
+
|
|
131
|
+
server.on("error", (err) => {
|
|
126
132
|
console.error(`[Doc Creator] Failed to start server: ${err.message}`);
|
|
127
133
|
process.exit(1);
|
|
128
134
|
});
|
|
129
|
-
|
|
130
|
-
server.on(
|
|
135
|
+
|
|
136
|
+
server.on("close", (code) => {
|
|
131
137
|
process.exit(code);
|
|
132
138
|
});
|
|
133
|
-
|
|
139
|
+
|
|
134
140
|
// 处理进程信号
|
|
135
|
-
process.on(
|
|
136
|
-
server.kill(
|
|
141
|
+
process.on("SIGINT", () => {
|
|
142
|
+
server.kill("SIGINT");
|
|
137
143
|
});
|
|
138
|
-
|
|
139
|
-
process.on(
|
|
140
|
-
server.kill(
|
|
144
|
+
|
|
145
|
+
process.on("SIGTERM", () => {
|
|
146
|
+
server.kill("SIGTERM");
|
|
141
147
|
});
|
|
142
|
-
|
|
143
148
|
} catch (err) {
|
|
144
149
|
console.error(`[Doc Creator] Error: ${err.message}`);
|
|
145
150
|
process.exit(1);
|
package/install.js
CHANGED
|
@@ -5,46 +5,48 @@
|
|
|
5
5
|
* 在安装npm包时自动安装Python依赖
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
const { spawn } = require(
|
|
9
|
-
const path = require(
|
|
8
|
+
const { spawn } = require("child_process");
|
|
9
|
+
const path = require("path");
|
|
10
10
|
|
|
11
|
-
const pythonDir = path.join(__dirname,
|
|
12
|
-
const requirementsFile = path.join(pythonDir,
|
|
11
|
+
const pythonDir = path.join(__dirname, "python");
|
|
12
|
+
const requirementsFile = path.join(pythonDir, "requirements.txt");
|
|
13
13
|
|
|
14
14
|
// 检查Python是否安装
|
|
15
15
|
function checkPython() {
|
|
16
16
|
return new Promise((resolve, reject) => {
|
|
17
|
-
const pythonCommands = [
|
|
17
|
+
const pythonCommands = ["python3", "python", "py"];
|
|
18
18
|
let found = false;
|
|
19
|
-
|
|
19
|
+
|
|
20
20
|
function tryNext(index) {
|
|
21
21
|
if (index >= pythonCommands.length) {
|
|
22
|
-
console.log(
|
|
22
|
+
console.log(
|
|
23
|
+
"[Doc Creator] Python not found. Will check again at runtime.",
|
|
24
|
+
);
|
|
23
25
|
resolve(null);
|
|
24
26
|
return;
|
|
25
27
|
}
|
|
26
|
-
|
|
28
|
+
|
|
27
29
|
const cmd = pythonCommands[index];
|
|
28
|
-
const check = spawn(cmd, [
|
|
29
|
-
|
|
30
|
-
check.on(
|
|
30
|
+
const check = spawn(cmd, ["--version"], { shell: true });
|
|
31
|
+
|
|
32
|
+
check.on("error", () => {
|
|
31
33
|
tryNext(index + 1);
|
|
32
34
|
});
|
|
33
|
-
|
|
34
|
-
check.stdout.on(
|
|
35
|
+
|
|
36
|
+
check.stdout.on("data", () => {
|
|
35
37
|
if (!found) {
|
|
36
38
|
found = true;
|
|
37
39
|
resolve(cmd);
|
|
38
40
|
}
|
|
39
41
|
});
|
|
40
|
-
|
|
41
|
-
check.on(
|
|
42
|
+
|
|
43
|
+
check.on("close", () => {
|
|
42
44
|
if (!found) {
|
|
43
45
|
tryNext(index + 1);
|
|
44
46
|
}
|
|
45
47
|
});
|
|
46
48
|
}
|
|
47
|
-
|
|
49
|
+
|
|
48
50
|
tryNext(0);
|
|
49
51
|
});
|
|
50
52
|
}
|
|
@@ -52,26 +54,34 @@ function checkPython() {
|
|
|
52
54
|
// 安装Python依赖
|
|
53
55
|
function installDependencies(pythonCmd) {
|
|
54
56
|
return new Promise((resolve, reject) => {
|
|
55
|
-
console.log(
|
|
56
|
-
|
|
57
|
-
const install = spawn(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
57
|
+
console.log("[Doc Creator] Installing Python dependencies...");
|
|
58
|
+
|
|
59
|
+
const install = spawn(
|
|
60
|
+
pythonCmd,
|
|
61
|
+
["-m", "pip", "install", "-r", requirementsFile],
|
|
62
|
+
{
|
|
63
|
+
cwd: pythonDir,
|
|
64
|
+
shell: true,
|
|
65
|
+
stdio: "inherit",
|
|
66
|
+
},
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
install.on("close", (code) => {
|
|
64
70
|
if (code === 0) {
|
|
65
|
-
console.log(
|
|
71
|
+
console.log(
|
|
72
|
+
"[Doc Creator] Python dependencies installed successfully.",
|
|
73
|
+
);
|
|
66
74
|
resolve();
|
|
67
75
|
} else {
|
|
68
|
-
console.log(
|
|
76
|
+
console.log(
|
|
77
|
+
"[Doc Creator] Failed to install dependencies. Will retry at runtime.",
|
|
78
|
+
);
|
|
69
79
|
resolve(); // 不中断安装流程
|
|
70
80
|
}
|
|
71
81
|
});
|
|
72
|
-
|
|
73
|
-
install.on(
|
|
74
|
-
console.log(
|
|
82
|
+
|
|
83
|
+
install.on("error", () => {
|
|
84
|
+
console.log("[Doc Creator] Failed to run pip. Will retry at runtime.");
|
|
75
85
|
resolve(); // 不中断安装流程
|
|
76
86
|
});
|
|
77
87
|
});
|
|
@@ -79,15 +89,15 @@ function installDependencies(pythonCmd) {
|
|
|
79
89
|
|
|
80
90
|
// 主函数
|
|
81
91
|
async function main() {
|
|
82
|
-
console.log(
|
|
83
|
-
|
|
92
|
+
console.log("[Doc Creator] Running post-install script...");
|
|
93
|
+
|
|
84
94
|
const pythonCmd = await checkPython();
|
|
85
|
-
|
|
95
|
+
|
|
86
96
|
if (pythonCmd) {
|
|
87
97
|
await installDependencies(pythonCmd);
|
|
88
98
|
}
|
|
89
|
-
|
|
90
|
-
console.log(
|
|
99
|
+
|
|
100
|
+
console.log("[Doc Creator] Post-install complete.");
|
|
91
101
|
}
|
|
92
102
|
|
|
93
103
|
main().catch(() => {
|
package/package.json
CHANGED
package/python/docx_converter.py
CHANGED
|
@@ -204,11 +204,6 @@ def latex_to_unicode_formula(latex_formula):
|
|
|
204
204
|
def add_native_formula(
|
|
205
205
|
para,
|
|
206
206
|
latex_formula,
|
|
207
|
-
color=None,
|
|
208
|
-
font_name=None,
|
|
209
|
-
font_size=None,
|
|
210
|
-
bold=False,
|
|
211
|
-
italic=False,
|
|
212
207
|
):
|
|
213
208
|
"""添加 Word 原生公式"""
|
|
214
209
|
if HAS_MATH2DOCX:
|
|
@@ -236,6 +231,7 @@ def parse_color(color_str):
|
|
|
236
231
|
return RGBColor(r, g, b)
|
|
237
232
|
except:
|
|
238
233
|
return None
|
|
234
|
+
raise
|
|
239
235
|
|
|
240
236
|
|
|
241
237
|
def set_font(
|
|
@@ -399,11 +395,8 @@ def process_paragraph(
|
|
|
399
395
|
para.paragraph_format.line_spacing = para_size * float(line_height_str[:-2])
|
|
400
396
|
else:
|
|
401
397
|
# 尝试作为倍数处理
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
para.paragraph_format.line_spacing = line_spacing_value
|
|
405
|
-
except:
|
|
406
|
-
pass
|
|
398
|
+
line_spacing_value = float(line_height_str)
|
|
399
|
+
para.paragraph_format.line_spacing = line_spacing_value
|
|
407
400
|
|
|
408
401
|
# 解析段后距
|
|
409
402
|
margin_bottom_match = MARGIN_RE.search(style)
|
|
@@ -421,10 +414,7 @@ def process_paragraph(
|
|
|
421
414
|
)
|
|
422
415
|
else:
|
|
423
416
|
# 尝试作为pt处理
|
|
424
|
-
|
|
425
|
-
para.paragraph_format.space_after = Pt(float(margin_bottom_str))
|
|
426
|
-
except:
|
|
427
|
-
pass
|
|
417
|
+
para.paragraph_format.space_after = Pt(float(margin_bottom_str))
|
|
428
418
|
|
|
429
419
|
# 解析段前距
|
|
430
420
|
margin_top_match = MARGIN_RE.search(style)
|
|
@@ -442,10 +432,7 @@ def process_paragraph(
|
|
|
442
432
|
)
|
|
443
433
|
else:
|
|
444
434
|
# 尝试作为pt处理
|
|
445
|
-
|
|
446
|
-
para.paragraph_format.space_before = Pt(float(margin_top_str))
|
|
447
|
-
except:
|
|
448
|
-
pass
|
|
435
|
+
para.paragraph_format.space_before = Pt(float(margin_top_str))
|
|
449
436
|
|
|
450
437
|
# 解析段落的字号
|
|
451
438
|
para_size = default_size
|
|
@@ -464,10 +451,7 @@ def process_paragraph(
|
|
|
464
451
|
para_size = float(size_str[:-2]) * 12
|
|
465
452
|
else:
|
|
466
453
|
# 尝试直接解析为数字
|
|
467
|
-
|
|
468
|
-
para_size = float(size_str)
|
|
469
|
-
except:
|
|
470
|
-
pass
|
|
454
|
+
para_size = float(size_str)
|
|
471
455
|
|
|
472
456
|
# 处理段落内容 - 递归处理所有子元素
|
|
473
457
|
_process_element_to_runs(paragraph, para, default_font, para_size)
|
|
@@ -713,10 +697,7 @@ def _process_element_to_runs(
|
|
|
713
697
|
span_size = float(size_str[:-2]) * 12
|
|
714
698
|
else:
|
|
715
699
|
# 尝试直接解析为数字
|
|
716
|
-
|
|
717
|
-
span_size = float(size_str)
|
|
718
|
-
except:
|
|
719
|
-
pass
|
|
700
|
+
span_size = float(size_str)
|
|
720
701
|
|
|
721
702
|
# 解析class中的颜色
|
|
722
703
|
class_set = set(classes) # 转换为集合提高查找性能
|
|
@@ -1067,7 +1048,7 @@ def _process_heading(element, doc, default_font):
|
|
|
1067
1048
|
heading.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
|
1068
1049
|
|
|
1069
1050
|
|
|
1070
|
-
def _process_paragraph_element(element, doc
|
|
1051
|
+
def _process_paragraph_element(element, doc):
|
|
1071
1052
|
"""处理段落元素"""
|
|
1072
1053
|
classes = element.get("class", [])
|
|
1073
1054
|
class_set = set(classes)
|
|
@@ -1124,17 +1105,12 @@ def _process_image_element(element, doc, html_file):
|
|
|
1124
1105
|
width_inch = None
|
|
1125
1106
|
height_inch = None
|
|
1126
1107
|
if width:
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
except:
|
|
1131
|
-
pass
|
|
1108
|
+
width_px = float(width)
|
|
1109
|
+
width_inch = width_px / 96 # 假设96 DPI
|
|
1110
|
+
|
|
1132
1111
|
if height:
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
height_inch = height_px / 96
|
|
1136
|
-
except:
|
|
1137
|
-
pass
|
|
1112
|
+
height_px = float(height)
|
|
1113
|
+
height_inch = height_px / 96
|
|
1138
1114
|
|
|
1139
1115
|
# 处理相对路径(相对于HTML文件)
|
|
1140
1116
|
html_dir = os.path.dirname(html_file)
|
|
@@ -1251,7 +1227,7 @@ def _process_elements(soup, doc, html_file, default_font, default_size):
|
|
|
1251
1227
|
if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
|
|
1252
1228
|
_process_heading(element, doc, default_font)
|
|
1253
1229
|
elif element.name == "p":
|
|
1254
|
-
_process_paragraph_element(element, doc
|
|
1230
|
+
_process_paragraph_element(element, doc)
|
|
1255
1231
|
elif element.name == "ul":
|
|
1256
1232
|
_process_list_element(element, doc, ordered=False)
|
|
1257
1233
|
elif element.name == "ol":
|
|
@@ -1384,51 +1360,6 @@ def convert_html_to_docx(
|
|
|
1384
1360
|
elif element.name == "table":
|
|
1385
1361
|
process_table(element, doc)
|
|
1386
1362
|
|
|
1387
|
-
elif element.name == "img":
|
|
1388
|
-
# 处理图片
|
|
1389
|
-
src = element.get("src", "")
|
|
1390
|
-
alt = element.get("alt", "")
|
|
1391
|
-
|
|
1392
|
-
if src:
|
|
1393
|
-
# 解析宽度、高度和对齐方式
|
|
1394
|
-
width = element.get("width")
|
|
1395
|
-
height = element.get("height")
|
|
1396
|
-
style = element.get("style", "")
|
|
1397
|
-
align = element.get("align", "center")
|
|
1398
|
-
|
|
1399
|
-
# 从 style 中提取对齐方式
|
|
1400
|
-
if "text-align: right" in style or "float: right" in style:
|
|
1401
|
-
align = "right"
|
|
1402
|
-
elif "text-align: left" in style or "float: left" in style:
|
|
1403
|
-
align = "left"
|
|
1404
|
-
elif "text-align: center" in style:
|
|
1405
|
-
align = "center"
|
|
1406
|
-
|
|
1407
|
-
# 处理宽度高度(支持像素转英寸)
|
|
1408
|
-
width_inch = None
|
|
1409
|
-
height_inch = None
|
|
1410
|
-
if width:
|
|
1411
|
-
try:
|
|
1412
|
-
width_px = float(width)
|
|
1413
|
-
width_inch = width_px / 96 # 假设96 DPI
|
|
1414
|
-
except:
|
|
1415
|
-
pass
|
|
1416
|
-
if height:
|
|
1417
|
-
try:
|
|
1418
|
-
height_px = float(height)
|
|
1419
|
-
height_inch = height_px / 96
|
|
1420
|
-
except:
|
|
1421
|
-
pass
|
|
1422
|
-
|
|
1423
|
-
# 处理相对路径(相对于HTML文件)
|
|
1424
|
-
html_dir = os.path.dirname(html_file)
|
|
1425
|
-
image_path = (
|
|
1426
|
-
os.path.join(html_dir, src) if not os.path.isabs(src) else src
|
|
1427
|
-
)
|
|
1428
|
-
|
|
1429
|
-
# 添加图片
|
|
1430
|
-
add_image(doc, image_path, width_inch, height_inch, align)
|
|
1431
|
-
|
|
1432
1363
|
elif element.name == "div":
|
|
1433
1364
|
# 检查是否是特殊div
|
|
1434
1365
|
classes = element.get("class", [])
|
|
@@ -1578,6 +1509,7 @@ def convert_html_to_docx(
|
|
|
1578
1509
|
except:
|
|
1579
1510
|
para = doc.add_paragraph(f"[图片: {alt}]")
|
|
1580
1511
|
para.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
|
1512
|
+
raise
|
|
1581
1513
|
else:
|
|
1582
1514
|
para = doc.add_paragraph(f"[图片: {alt} - 路径: {src}]")
|
|
1583
1515
|
para.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|