@heylemon/lemonade 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/build-info.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
b3ce500e7c71ed0a3415b4d2d8e472926cb0135cdb8a2f66ab05abb127c334ac
|
package/package.json
CHANGED
package/skills/docx/SKILL.md
CHANGED
|
@@ -7,14 +7,30 @@ description: "Create professional, polished documents — reports, memos, propos
|
|
|
7
7
|
|
|
8
8
|
Create polished, production-ready documents using docx-js, the clean standards-compliant Node.js library for producing .docx files. This skill writes bespoke code for each request rather than using JSON templates — you have full control over the document API.
|
|
9
9
|
|
|
10
|
+
## ⚠️ MANDATORY: Post-Processing After Every Build
|
|
11
|
+
|
|
12
|
+
docx-js has TWO bugs that cause documents to look wrong in Apple Pages and Google Docs. **After generating ANY document, you MUST run BOTH scripts:**
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# Step 1: Fix table column grids (tables break in Pages without this)
|
|
16
|
+
python scripts/fix_tables.py output.docx
|
|
17
|
+
|
|
18
|
+
# Step 2: Fix heading styles (headings show wrong colors/fonts in Pages without this)
|
|
19
|
+
python scripts/fix_styles.py output.docx
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
These are not optional. Without fix_tables.py, tables render as 1-character-wide columns in Pages. Without fix_styles.py, headings appear in default blue instead of your custom colors. Run both immediately after `node create-doc.js`, before delivering the file. The scripts are located in the same skill folder as this SKILL.md.
|
|
23
|
+
|
|
10
24
|
## Quick Reference
|
|
11
25
|
|
|
12
26
|
| Task | Approach | Tool |
|
|
13
27
|
|------|----------|------|
|
|
14
|
-
| **Create new document** | Write docx-js code
|
|
28
|
+
| **Create new document** | Write docx-js code, then run both fix scripts | Node.js + docx-js |
|
|
15
29
|
| **Read/analyze existing** | Unzip → read document.xml or use pandoc for content extraction | unzip or pandoc |
|
|
16
30
|
| **Edit existing file** | Unzip → modify XML → repack into .docx | unzip + JSZip |
|
|
17
31
|
| **Validate output** | Check XML, fonts, structure, paragraphs | `python scripts/validate.py` |
|
|
32
|
+
| **Fix tables (MANDATORY)** | Patch tblGrid for cross-platform | `python scripts/fix_tables.py output.docx` |
|
|
33
|
+
| **Fix styles (MANDATORY)** | Patch heading styles for cross-platform | `python scripts/fix_styles.py output.docx` |
|
|
18
34
|
| **Visual QA** | Convert to PDF then to images | soffice + pdftoppm |
|
|
19
35
|
|
|
20
36
|
---
|
|
@@ -102,33 +118,37 @@ const COLORS = {
|
|
|
102
118
|
|
|
103
119
|
### Creating Headings with Styles
|
|
104
120
|
|
|
105
|
-
|
|
121
|
+
**CRITICAL: Use `children` with TextRun, NOT the `run` property.** The `run` property puts formatting in `pPr/rPr` (paragraph default run properties) but the actual `<w:r>` gets no `<w:rPr>`. Word inherits from `pPr/rPr` just fine, but Apple Pages reads from `styles.xml` instead and ignores inline paragraph-level overrides. By using `children` with an explicit `TextRun`, the formatting goes directly on the run itself, which Pages always respects.
|
|
106
122
|
|
|
107
123
|
```javascript
|
|
124
|
+
// ✅ CORRECT — formatting on the TextRun (works in Pages)
|
|
108
125
|
new Paragraph({
|
|
109
|
-
text: "Section Title",
|
|
110
126
|
heading: HeadingLevel.HEADING_1,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
127
|
+
spacing: { before: 480, after: 160 },
|
|
128
|
+
children: [
|
|
129
|
+
new TextRun({
|
|
130
|
+
text: "Section Title",
|
|
131
|
+
bold: true,
|
|
132
|
+
size: TYPOGRAPHY.h1,
|
|
133
|
+
color: COLORS.primary,
|
|
134
|
+
font: "Arial"
|
|
135
|
+
})
|
|
136
|
+
]
|
|
120
137
|
});
|
|
121
138
|
|
|
139
|
+
// ❌ WRONG — formatting on `run` property (breaks in Pages)
|
|
122
140
|
new Paragraph({
|
|
123
141
|
text: "Subsection",
|
|
124
|
-
heading: HeadingLevel.
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
spacing: { before: 360, after: 120 }
|
|
142
|
+
heading: HeadingLevel.HEADING_1,
|
|
143
|
+
style: "Heading1",
|
|
144
|
+
run: { bold: true, size: TYPOGRAPHY.h1, color: COLORS.primary, font: "Arial" },
|
|
145
|
+
spacing: { before: 480, after: 160 }
|
|
129
146
|
});
|
|
130
147
|
```
|
|
131
148
|
|
|
149
|
+
Also note: use `italics: true` (with 's'), not `italic: true`. The latter silently does nothing in docx-js.
|
|
150
|
+
```
|
|
151
|
+
|
|
132
152
|
### Lists: Never Use Unicode Bullets
|
|
133
153
|
|
|
134
154
|
CRITICAL: Never manually insert bullet characters like •, ◦, ▪. Use the list APIs:
|
|
@@ -324,7 +344,8 @@ Or use a more sophisticated approach with field codes (requires XML manipulation
|
|
|
324
344
|
- Set both table width AND individual cell widths in DXA
|
|
325
345
|
- Use `ShadingType.CLEAR` for cell backgrounds
|
|
326
346
|
- Use `HeadingLevel.HEADING_1`, etc., with `outlineLevel` for TOC
|
|
327
|
-
-
|
|
347
|
+
- Use `children` with TextRun for headings, NOT the `run` property (Pages ignores `run`)
|
|
348
|
+
- Use `italics: true` (with 's'), not `italic: true`
|
|
328
349
|
- Set `spacing.before` and `spacing.after` explicitly on headings
|
|
329
350
|
|
|
330
351
|
**DON'T:**
|
|
@@ -336,6 +357,8 @@ Or use a more sophisticated approach with field codes (requires XML manipulation
|
|
|
336
357
|
- Forget cell width specifications (columns collapse)
|
|
337
358
|
- Use `ShadingType.SOLID` (use CLEAR)
|
|
338
359
|
- Assume A4 page size (always set explicitly to US Letter)
|
|
360
|
+
- Use `run` property on headings (use `children` with TextRun instead — Pages ignores `run`)
|
|
361
|
+
- Use `italic: true` without the 's' (use `italics: true`)
|
|
339
362
|
- Mix font styles — stick to 2 fonts (heading + body)
|
|
340
363
|
|
|
341
364
|
---
|
|
@@ -444,7 +467,23 @@ This script:
|
|
|
444
467
|
|
|
445
468
|
**Always run this after generating any document with tables.** Without it, tables will look correct in Word but break in Pages and Google Docs.
|
|
446
469
|
|
|
447
|
-
### 3.
|
|
470
|
+
### 3. Fix Heading Styles for Cross-Platform (MANDATORY)
|
|
471
|
+
|
|
472
|
+
docx-js generates default blue heading styles in `styles.xml` regardless of your inline formatting. Word uses inline run properties, but Pages reads `styles.xml` first, causing headings to appear in the wrong color/font/size.
|
|
473
|
+
|
|
474
|
+
```bash
|
|
475
|
+
python scripts/fix_styles.py report.docx
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
This script:
|
|
479
|
+
- Reads each heading's actual inline formatting (color, font, size, bold) from the document
|
|
480
|
+
- Patches `styles.xml` heading style definitions to match
|
|
481
|
+
- Adds paragraph spacing to style definitions
|
|
482
|
+
- Overwrites the file in place (or pass a second arg for a new file)
|
|
483
|
+
|
|
484
|
+
**Always run this after generating any document with headings.** Without it, headings will look correct in Word but show default blue colors in Pages.
|
|
485
|
+
|
|
486
|
+
### 4. Visual Check (PDF Conversion)
|
|
448
487
|
|
|
449
488
|
```bash
|
|
450
489
|
# Convert to PDF
|
|
@@ -456,14 +495,15 @@ pdftoppm -jpeg -r 150 report.pdf page
|
|
|
456
495
|
# View: page-1.jpg, page-2.jpg, etc.
|
|
457
496
|
```
|
|
458
497
|
|
|
459
|
-
###
|
|
498
|
+
### 5. Fix & Re-validate
|
|
460
499
|
|
|
461
500
|
If the validator reports issues or the PDF looks off:
|
|
462
501
|
1. Fix the docx-js code
|
|
463
502
|
2. Re-run to regenerate .docx
|
|
464
503
|
3. Run `fix_tables.py` again (step 2)
|
|
465
|
-
4.
|
|
466
|
-
5. Re-
|
|
504
|
+
4. Run `fix_styles.py` again (step 3)
|
|
505
|
+
5. Re-validate with `validate.py`
|
|
506
|
+
6. Re-convert to PDF if visual issues
|
|
467
507
|
|
|
468
508
|
Iterate until both validation passes and visual output looks polished.
|
|
469
509
|
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
fix_styles.py — Patch styles.xml in a .docx for Apple Pages compatibility.
|
|
4
|
+
|
|
5
|
+
docx-js generates styles.xml with THREE problems that break Pages rendering:
|
|
6
|
+
|
|
7
|
+
1. Empty docDefaults — <w:rPrDefault/> and <w:pPrDefault/> with no content.
|
|
8
|
+
Pages needs proper defaults (font, size, language, paragraph spacing) to
|
|
9
|
+
correctly interpret inline paragraph properties like w:jc (alignment).
|
|
10
|
+
|
|
11
|
+
2. Missing Normal style — No default paragraph style is defined. Pages needs
|
|
12
|
+
a Normal style with w:default="1" as the base for all paragraph rendering.
|
|
13
|
+
Without it, Pages ignores inline paragraph formatting like center alignment.
|
|
14
|
+
|
|
15
|
+
3. Wrong heading colors — Heading styles have default blue (#2E74B5) colors
|
|
16
|
+
regardless of what colors you use in the document. Pages reads styles.xml
|
|
17
|
+
instead of inline run properties.
|
|
18
|
+
|
|
19
|
+
This script fixes all three issues by:
|
|
20
|
+
- Adding proper docDefaults with font, size, language, and paragraph spacing
|
|
21
|
+
- Adding a Normal style with w:default="1" if it doesn't exist
|
|
22
|
+
- Reading actual heading formatting from the document and patching styles.xml
|
|
23
|
+
|
|
24
|
+
Usage:
|
|
25
|
+
python fix_styles.py input.docx [output.docx]
|
|
26
|
+
|
|
27
|
+
If output is omitted, the file is modified in place.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import os
|
|
31
|
+
import shutil
|
|
32
|
+
import sys
|
|
33
|
+
import tempfile
|
|
34
|
+
import zipfile
|
|
35
|
+
from xml.etree import ElementTree as ET
|
|
36
|
+
|
|
37
|
+
NS = {
|
|
38
|
+
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
|
39
|
+
"mc": "http://schemas.openxmlformats.org/markup-compatibility/2006",
|
|
40
|
+
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
# Register all namespaces to preserve them on write
|
|
44
|
+
for prefix, uri in NS.items():
|
|
45
|
+
ET.register_namespace(prefix, uri)
|
|
46
|
+
|
|
47
|
+
# Register additional namespaces commonly in docx
|
|
48
|
+
EXTRA_NS = {
|
|
49
|
+
"wpc": "http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas",
|
|
50
|
+
"o": "urn:schemas-microsoft-com:office:office",
|
|
51
|
+
"m": "http://schemas.openxmlformats.org/officeDocument/2006/math",
|
|
52
|
+
"v": "urn:schemas-microsoft-com:vml",
|
|
53
|
+
"wp14": "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing",
|
|
54
|
+
"wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
|
|
55
|
+
"w10": "urn:schemas-microsoft-com:office:word",
|
|
56
|
+
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
|
|
57
|
+
"w15": "http://schemas.microsoft.com/office/word/2012/wordml",
|
|
58
|
+
"wpg": "http://schemas.microsoft.com/office/word/2010/wordprocessingGroup",
|
|
59
|
+
"wpi": "http://schemas.microsoft.com/office/word/2010/wordprocessingInk",
|
|
60
|
+
"wne": "http://schemas.microsoft.com/office/word/2006/wordml",
|
|
61
|
+
"wps": "http://schemas.microsoft.com/office/word/2010/wordprocessingShape",
|
|
62
|
+
}
|
|
63
|
+
for prefix, uri in EXTRA_NS.items():
|
|
64
|
+
ET.register_namespace(prefix, uri)
|
|
65
|
+
|
|
66
|
+
W = NS["w"]
|
|
67
|
+
WNS = f"{{{W}}}"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def fix_doc_defaults(root):
|
|
71
|
+
"""
|
|
72
|
+
Ensure docDefaults has proper rPrDefault and pPrDefault.
|
|
73
|
+
Pages needs these to correctly resolve inline paragraph properties.
|
|
74
|
+
"""
|
|
75
|
+
doc_defaults = root.find(f"{WNS}docDefaults")
|
|
76
|
+
if doc_defaults is None:
|
|
77
|
+
# Insert docDefaults as first child
|
|
78
|
+
doc_defaults = ET.SubElement(root, f"{WNS}docDefaults")
|
|
79
|
+
root.insert(0, doc_defaults)
|
|
80
|
+
|
|
81
|
+
fixed = False
|
|
82
|
+
|
|
83
|
+
# Fix rPrDefault
|
|
84
|
+
rpr_default = doc_defaults.find(f"{WNS}rPrDefault")
|
|
85
|
+
if rpr_default is None:
|
|
86
|
+
rpr_default = ET.SubElement(doc_defaults, f"{WNS}rPrDefault")
|
|
87
|
+
|
|
88
|
+
rpr = rpr_default.find(f"{WNS}rPr")
|
|
89
|
+
if rpr is None or len(rpr) == 0:
|
|
90
|
+
# Empty rPrDefault — add proper defaults
|
|
91
|
+
if rpr is not None:
|
|
92
|
+
doc_defaults.remove(rpr_default)
|
|
93
|
+
rpr_default = ET.SubElement(doc_defaults, f"{WNS}rPrDefault")
|
|
94
|
+
|
|
95
|
+
rpr = ET.SubElement(rpr_default, f"{WNS}rPr")
|
|
96
|
+
|
|
97
|
+
# Default fonts
|
|
98
|
+
fonts = ET.SubElement(rpr, f"{WNS}rFonts")
|
|
99
|
+
fonts.set(f"{WNS}asciiTheme", "minorHAnsi")
|
|
100
|
+
fonts.set(f"{WNS}eastAsiaTheme", "minorEastAsia")
|
|
101
|
+
fonts.set(f"{WNS}hAnsiTheme", "minorHAnsi")
|
|
102
|
+
fonts.set(f"{WNS}cstheme", "minorBidi")
|
|
103
|
+
|
|
104
|
+
# Default size (11pt = 22 half-points)
|
|
105
|
+
sz = ET.SubElement(rpr, f"{WNS}sz")
|
|
106
|
+
sz.set(f"{WNS}val", "22")
|
|
107
|
+
sz_cs = ET.SubElement(rpr, f"{WNS}szCs")
|
|
108
|
+
sz_cs.set(f"{WNS}val", "22")
|
|
109
|
+
|
|
110
|
+
# Language
|
|
111
|
+
lang = ET.SubElement(rpr, f"{WNS}lang")
|
|
112
|
+
lang.set(f"{WNS}val", "en-US")
|
|
113
|
+
lang.set(f"{WNS}eastAsia", "en-US")
|
|
114
|
+
lang.set(f"{WNS}bidi", "ar-SA")
|
|
115
|
+
|
|
116
|
+
fixed = True
|
|
117
|
+
|
|
118
|
+
# Fix pPrDefault
|
|
119
|
+
ppr_default = doc_defaults.find(f"{WNS}pPrDefault")
|
|
120
|
+
if ppr_default is None:
|
|
121
|
+
ppr_default = ET.SubElement(doc_defaults, f"{WNS}pPrDefault")
|
|
122
|
+
|
|
123
|
+
ppr = ppr_default.find(f"{WNS}pPr")
|
|
124
|
+
if ppr is None or len(ppr) == 0:
|
|
125
|
+
# Empty pPrDefault — add proper defaults
|
|
126
|
+
if ppr is not None:
|
|
127
|
+
doc_defaults.remove(ppr_default)
|
|
128
|
+
ppr_default = ET.SubElement(doc_defaults, f"{WNS}pPrDefault")
|
|
129
|
+
|
|
130
|
+
ppr = ET.SubElement(ppr_default, f"{WNS}pPr")
|
|
131
|
+
|
|
132
|
+
# Default paragraph spacing (after=200, line=276, lineRule=auto)
|
|
133
|
+
spacing = ET.SubElement(ppr, f"{WNS}spacing")
|
|
134
|
+
spacing.set(f"{WNS}after", "200")
|
|
135
|
+
spacing.set(f"{WNS}line", "276")
|
|
136
|
+
spacing.set(f"{WNS}lineRule", "auto")
|
|
137
|
+
|
|
138
|
+
fixed = True
|
|
139
|
+
|
|
140
|
+
return fixed
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def fix_normal_style(root):
|
|
144
|
+
"""
|
|
145
|
+
Ensure a proper Normal style exists with w:default="1".
|
|
146
|
+
Pages needs this as the base paragraph style to correctly resolve
|
|
147
|
+
inline formatting like center alignment and spacing.
|
|
148
|
+
"""
|
|
149
|
+
# Check if Normal style already exists
|
|
150
|
+
for style_el in root.findall(f"{WNS}style"):
|
|
151
|
+
style_id = style_el.get(f"{WNS}styleId", "")
|
|
152
|
+
if style_id == "Normal":
|
|
153
|
+
# Ensure it has w:default="1"
|
|
154
|
+
if style_el.get(f"{WNS}default") != "1":
|
|
155
|
+
style_el.set(f"{WNS}default", "1")
|
|
156
|
+
return True
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
# Normal style doesn't exist — create it
|
|
160
|
+
normal = ET.SubElement(root, f"{WNS}style")
|
|
161
|
+
normal.set(f"{WNS}type", "paragraph")
|
|
162
|
+
normal.set(f"{WNS}default", "1")
|
|
163
|
+
normal.set(f"{WNS}styleId", "Normal")
|
|
164
|
+
|
|
165
|
+
name = ET.SubElement(normal, f"{WNS}name")
|
|
166
|
+
name.set(f"{WNS}val", "Normal")
|
|
167
|
+
|
|
168
|
+
ET.SubElement(normal, f"{WNS}qFormat")
|
|
169
|
+
|
|
170
|
+
# Move it to be the first style (after docDefaults and latentStyles)
|
|
171
|
+
root.remove(normal)
|
|
172
|
+
insert_pos = 0
|
|
173
|
+
for i, child in enumerate(root):
|
|
174
|
+
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
|
175
|
+
if tag in ("docDefaults", "latentStyles"):
|
|
176
|
+
insert_pos = i + 1
|
|
177
|
+
root.insert(insert_pos, normal)
|
|
178
|
+
|
|
179
|
+
return True
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def extract_heading_styles_from_document(doc_tree):
|
|
183
|
+
"""
|
|
184
|
+
Scan document.xml for heading paragraphs and extract their inline formatting.
|
|
185
|
+
Returns dict: { 'Heading1': { 'color': '6B2D2D', 'sz': '36', ... }, ... }
|
|
186
|
+
"""
|
|
187
|
+
root = doc_tree.getroot()
|
|
188
|
+
body = root.find(f".//{WNS}body")
|
|
189
|
+
if body is None:
|
|
190
|
+
return {}
|
|
191
|
+
|
|
192
|
+
heading_styles = {}
|
|
193
|
+
|
|
194
|
+
for para in body.findall(f".//{WNS}p"):
|
|
195
|
+
p_pr = para.find(f"{WNS}pPr")
|
|
196
|
+
if p_pr is None:
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
p_style = p_pr.find(f"{WNS}pStyle")
|
|
200
|
+
if p_style is None:
|
|
201
|
+
continue
|
|
202
|
+
style_id = p_style.get(f"{WNS}val", "")
|
|
203
|
+
if not style_id.startswith("Heading"):
|
|
204
|
+
continue
|
|
205
|
+
if style_id in heading_styles:
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
fmt = {}
|
|
209
|
+
# Try inline run properties first (w:r/w:rPr)
|
|
210
|
+
run = para.find(f"{WNS}r")
|
|
211
|
+
if run is not None:
|
|
212
|
+
r_pr = run.find(f"{WNS}rPr")
|
|
213
|
+
if r_pr is not None:
|
|
214
|
+
_extract_rpr(r_pr, fmt)
|
|
215
|
+
|
|
216
|
+
# Fall back to paragraph default rPr (pPr/rPr)
|
|
217
|
+
if not fmt:
|
|
218
|
+
p_rpr = p_pr.find(f"{WNS}rPr")
|
|
219
|
+
if p_rpr is not None:
|
|
220
|
+
_extract_rpr(p_rpr, fmt)
|
|
221
|
+
|
|
222
|
+
# Extract paragraph spacing
|
|
223
|
+
spacing = p_pr.find(f"{WNS}spacing")
|
|
224
|
+
if spacing is not None:
|
|
225
|
+
before = spacing.get(f"{WNS}before")
|
|
226
|
+
after = spacing.get(f"{WNS}after")
|
|
227
|
+
if before:
|
|
228
|
+
fmt["spacing_before"] = before
|
|
229
|
+
if after:
|
|
230
|
+
fmt["spacing_after"] = after
|
|
231
|
+
|
|
232
|
+
if fmt:
|
|
233
|
+
heading_styles[style_id] = fmt
|
|
234
|
+
|
|
235
|
+
return heading_styles
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _extract_rpr(r_pr, fmt):
|
|
239
|
+
"""Extract formatting properties from a w:rPr element."""
|
|
240
|
+
color = r_pr.find(f"{WNS}color")
|
|
241
|
+
if color is not None:
|
|
242
|
+
fmt["color"] = color.get(f"{WNS}val", "")
|
|
243
|
+
|
|
244
|
+
sz = r_pr.find(f"{WNS}sz")
|
|
245
|
+
if sz is not None:
|
|
246
|
+
fmt["sz"] = sz.get(f"{WNS}val", "")
|
|
247
|
+
|
|
248
|
+
fonts = r_pr.find(f"{WNS}rFonts")
|
|
249
|
+
if fonts is not None:
|
|
250
|
+
fmt["font"] = fonts.get(f"{WNS}ascii", "")
|
|
251
|
+
|
|
252
|
+
bold = r_pr.find(f"{WNS}b")
|
|
253
|
+
if bold is not None:
|
|
254
|
+
fmt["bold"] = True
|
|
255
|
+
|
|
256
|
+
italics = r_pr.find(f"{WNS}i")
|
|
257
|
+
if italics is not None:
|
|
258
|
+
fmt["italics"] = True
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def patch_heading_styles(root, heading_styles):
|
|
262
|
+
"""Update heading style definitions with actual formatting from the document."""
|
|
263
|
+
patched = 0
|
|
264
|
+
|
|
265
|
+
for style_el in root.findall(f"{WNS}style"):
|
|
266
|
+
style_id = style_el.get(f"{WNS}styleId", "")
|
|
267
|
+
if style_id not in heading_styles:
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
fmt = heading_styles[style_id]
|
|
271
|
+
|
|
272
|
+
# Patch run properties
|
|
273
|
+
r_pr = style_el.find(f"{WNS}rPr")
|
|
274
|
+
if r_pr is None:
|
|
275
|
+
r_pr = ET.SubElement(style_el, f"{WNS}rPr")
|
|
276
|
+
|
|
277
|
+
if "color" in fmt:
|
|
278
|
+
color = r_pr.find(f"{WNS}color")
|
|
279
|
+
if color is None:
|
|
280
|
+
color = ET.SubElement(r_pr, f"{WNS}color")
|
|
281
|
+
color.set(f"{WNS}val", fmt["color"])
|
|
282
|
+
|
|
283
|
+
if "sz" in fmt:
|
|
284
|
+
for tag_name in ["sz", "szCs"]:
|
|
285
|
+
el = r_pr.find(f"{WNS}{tag_name}")
|
|
286
|
+
if el is None:
|
|
287
|
+
el = ET.SubElement(r_pr, f"{WNS}{tag_name}")
|
|
288
|
+
el.set(f"{WNS}val", fmt["sz"])
|
|
289
|
+
|
|
290
|
+
if "font" in fmt:
|
|
291
|
+
fonts = r_pr.find(f"{WNS}rFonts")
|
|
292
|
+
if fonts is None:
|
|
293
|
+
fonts = ET.SubElement(r_pr, f"{WNS}rFonts")
|
|
294
|
+
for attr in ["ascii", "hAnsi", "eastAsia", "cs"]:
|
|
295
|
+
fonts.set(f"{WNS}{attr}", fmt["font"])
|
|
296
|
+
|
|
297
|
+
if fmt.get("bold"):
|
|
298
|
+
if r_pr.find(f"{WNS}b") is None:
|
|
299
|
+
ET.SubElement(r_pr, f"{WNS}b")
|
|
300
|
+
if r_pr.find(f"{WNS}bCs") is None:
|
|
301
|
+
ET.SubElement(r_pr, f"{WNS}bCs")
|
|
302
|
+
|
|
303
|
+
if fmt.get("italics"):
|
|
304
|
+
if r_pr.find(f"{WNS}i") is None:
|
|
305
|
+
ET.SubElement(r_pr, f"{WNS}i")
|
|
306
|
+
|
|
307
|
+
# Patch paragraph spacing
|
|
308
|
+
if "spacing_before" in fmt or "spacing_after" in fmt:
|
|
309
|
+
p_pr = style_el.find(f"{WNS}pPr")
|
|
310
|
+
if p_pr is None:
|
|
311
|
+
p_pr = ET.SubElement(style_el, f"{WNS}pPr")
|
|
312
|
+
spacing = p_pr.find(f"{WNS}spacing")
|
|
313
|
+
if spacing is None:
|
|
314
|
+
spacing = ET.SubElement(p_pr, f"{WNS}spacing")
|
|
315
|
+
if "spacing_before" in fmt:
|
|
316
|
+
spacing.set(f"{WNS}before", fmt["spacing_before"])
|
|
317
|
+
if "spacing_after" in fmt:
|
|
318
|
+
spacing.set(f"{WNS}after", fmt["spacing_after"])
|
|
319
|
+
|
|
320
|
+
patched += 1
|
|
321
|
+
|
|
322
|
+
return patched
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def fix_styles(input_path, output_path=None):
|
|
326
|
+
if output_path is None:
|
|
327
|
+
output_path = input_path
|
|
328
|
+
|
|
329
|
+
tmp_dir = tempfile.mkdtemp(prefix="fix_styles_")
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
extract_dir = os.path.join(tmp_dir, "extracted")
|
|
333
|
+
with zipfile.ZipFile(input_path, "r") as zf:
|
|
334
|
+
zf.extractall(extract_dir)
|
|
335
|
+
|
|
336
|
+
doc_path = os.path.join(extract_dir, "word", "document.xml")
|
|
337
|
+
styles_path = os.path.join(extract_dir, "word", "styles.xml")
|
|
338
|
+
|
|
339
|
+
if not os.path.exists(doc_path) or not os.path.exists(styles_path):
|
|
340
|
+
print("Error: Missing document.xml or styles.xml")
|
|
341
|
+
return
|
|
342
|
+
|
|
343
|
+
doc_tree = ET.parse(doc_path)
|
|
344
|
+
styles_tree = ET.parse(styles_path)
|
|
345
|
+
styles_root = styles_tree.getroot()
|
|
346
|
+
|
|
347
|
+
fixes = []
|
|
348
|
+
|
|
349
|
+
# Fix 1: docDefaults
|
|
350
|
+
if fix_doc_defaults(styles_root):
|
|
351
|
+
fixes.append("docDefaults")
|
|
352
|
+
|
|
353
|
+
# Fix 2: Normal style
|
|
354
|
+
if fix_normal_style(styles_root):
|
|
355
|
+
fixes.append("Normal style")
|
|
356
|
+
|
|
357
|
+
# Fix 3: Heading styles
|
|
358
|
+
heading_styles = extract_heading_styles_from_document(doc_tree)
|
|
359
|
+
if heading_styles:
|
|
360
|
+
patched = patch_heading_styles(styles_root, heading_styles)
|
|
361
|
+
if patched:
|
|
362
|
+
fixes.append(f"{patched} heading style(s)")
|
|
363
|
+
|
|
364
|
+
if not fixes:
|
|
365
|
+
print(f"No fixes needed in {output_path}")
|
|
366
|
+
return
|
|
367
|
+
|
|
368
|
+
# Write back
|
|
369
|
+
styles_tree.write(styles_path, xml_declaration=True, encoding="UTF-8")
|
|
370
|
+
|
|
371
|
+
# Repack
|
|
372
|
+
tmp_docx = os.path.join(tmp_dir, "output.docx")
|
|
373
|
+
with zipfile.ZipFile(tmp_docx, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
374
|
+
for root_dir, _, files in os.walk(extract_dir):
|
|
375
|
+
for f in files:
|
|
376
|
+
full_path = os.path.join(root_dir, f)
|
|
377
|
+
arc_name = os.path.relpath(full_path, extract_dir)
|
|
378
|
+
zf.write(full_path, arc_name)
|
|
379
|
+
|
|
380
|
+
shutil.copy2(tmp_docx, output_path)
|
|
381
|
+
print(f"Fixed {', '.join(fixes)} in {output_path}")
|
|
382
|
+
|
|
383
|
+
finally:
|
|
384
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
if __name__ == "__main__":
|
|
388
|
+
if len(sys.argv) < 2:
|
|
389
|
+
print("Usage: python fix_styles.py input.docx [output.docx]")
|
|
390
|
+
sys.exit(1)
|
|
391
|
+
|
|
392
|
+
input_file = sys.argv[1]
|
|
393
|
+
output_file = sys.argv[2] if len(sys.argv) > 2 else None
|
|
394
|
+
fix_styles(input_file, output_file)
|
|
@@ -76,8 +76,8 @@ def fix_docx(input_path, output_path=None):
|
|
|
76
76
|
if output_path is None:
|
|
77
77
|
output_path = input_path
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
import tempfile
|
|
80
|
+
tmp_dir = tempfile.mkdtemp(prefix='fix_tables_')
|
|
81
81
|
|
|
82
82
|
try:
|
|
83
83
|
# Extract
|