docgen-utils 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/dist/bundle.js +36086 -0
- package/dist/bundle.min.js +197 -0
- package/dist/cli.js +47432 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/packages/cli/commands/export-docs.d.ts +5 -0
- package/dist/packages/cli/commands/export-docs.d.ts.map +1 -0
- package/dist/packages/cli/commands/export-docs.js +24 -0
- package/dist/packages/cli/commands/export-docs.js.map +1 -0
- package/dist/packages/cli/commands/export-slides.d.ts +5 -0
- package/dist/packages/cli/commands/export-slides.d.ts.map +1 -0
- package/dist/packages/cli/commands/export-slides.js +86 -0
- package/dist/packages/cli/commands/export-slides.js.map +1 -0
- package/dist/packages/cli/commands/import-docx.d.ts +5 -0
- package/dist/packages/cli/commands/import-docx.d.ts.map +1 -0
- package/dist/packages/cli/commands/import-docx.js +27 -0
- package/dist/packages/cli/commands/import-docx.js.map +1 -0
- package/dist/packages/cli/commands/import-pptx.d.ts +5 -0
- package/dist/packages/cli/commands/import-pptx.d.ts.map +1 -0
- package/dist/packages/cli/commands/import-pptx.js +44 -0
- package/dist/packages/cli/commands/import-pptx.js.map +1 -0
- package/dist/packages/cli/index.d.ts +11 -0
- package/dist/packages/cli/index.d.ts.map +1 -0
- package/dist/packages/cli/index.js +103 -0
- package/dist/packages/cli/index.js.map +1 -0
- package/dist/packages/docs/common.d.ts +183 -0
- package/dist/packages/docs/common.d.ts.map +1 -0
- package/dist/packages/docs/common.js +27 -0
- package/dist/packages/docs/common.js.map +1 -0
- package/dist/packages/docs/convert.d.ts +7 -0
- package/dist/packages/docs/convert.d.ts.map +1 -0
- package/dist/packages/docs/convert.js +1399 -0
- package/dist/packages/docs/convert.js.map +1 -0
- package/dist/packages/docs/create-document.d.ts +30 -0
- package/dist/packages/docs/create-document.d.ts.map +1 -0
- package/dist/packages/docs/create-document.js +170 -0
- package/dist/packages/docs/create-document.js.map +1 -0
- package/dist/packages/docs/export.d.ts +57 -0
- package/dist/packages/docs/export.d.ts.map +1 -0
- package/dist/packages/docs/export.js +430 -0
- package/dist/packages/docs/export.js.map +1 -0
- package/dist/packages/docs/import-docx.d.ts +13 -0
- package/dist/packages/docs/import-docx.d.ts.map +1 -0
- package/dist/packages/docs/import-docx.js +2299 -0
- package/dist/packages/docs/import-docx.js.map +1 -0
- package/dist/packages/docs/parse.d.ts +6 -0
- package/dist/packages/docs/parse.d.ts.map +1 -0
- package/dist/packages/docs/parse.js +4253 -0
- package/dist/packages/docs/parse.js.map +1 -0
- package/dist/packages/shared/dom-parser-shim.d.ts +30 -0
- package/dist/packages/shared/dom-parser-shim.d.ts.map +1 -0
- package/dist/packages/shared/dom-parser-shim.js +152 -0
- package/dist/packages/shared/dom-parser-shim.js.map +1 -0
- package/dist/packages/slides/common.d.ts +325 -0
- package/dist/packages/slides/common.d.ts.map +1 -0
- package/dist/packages/slides/common.js +12 -0
- package/dist/packages/slides/common.js.map +1 -0
- package/dist/packages/slides/convert.d.ts +35 -0
- package/dist/packages/slides/convert.d.ts.map +1 -0
- package/dist/packages/slides/convert.js +308 -0
- package/dist/packages/slides/convert.js.map +1 -0
- package/dist/packages/slides/createPresentation.d.ts +51 -0
- package/dist/packages/slides/createPresentation.d.ts.map +1 -0
- package/dist/packages/slides/createPresentation.js +265 -0
- package/dist/packages/slides/createPresentation.js.map +1 -0
- package/dist/packages/slides/export.d.ts +24 -0
- package/dist/packages/slides/export.d.ts.map +1 -0
- package/dist/packages/slides/export.js +52 -0
- package/dist/packages/slides/export.js.map +1 -0
- package/dist/packages/slides/import-pptx.d.ts +13 -0
- package/dist/packages/slides/import-pptx.d.ts.map +1 -0
- package/dist/packages/slides/import-pptx.js +619 -0
- package/dist/packages/slides/import-pptx.js.map +1 -0
- package/dist/packages/slides/parse.d.ts +45 -0
- package/dist/packages/slides/parse.d.ts.map +1 -0
- package/dist/packages/slides/parse.js +1185 -0
- package/dist/packages/slides/parse.js.map +1 -0
- package/dist/packages/slides/transform.d.ts +37 -0
- package/dist/packages/slides/transform.d.ts.map +1 -0
- package/dist/packages/slides/transform.js +140 -0
- package/dist/packages/slides/transform.js.map +1 -0
- package/dist/packages/slides/vendor/VENDORING.md +58 -0
- package/dist/packages/slides/vendor/pptxgen.d.ts +805 -0
- package/dist/packages/slides/vendor/pptxgen.js +7442 -0
- package/package.json +57 -0
|
@@ -0,0 +1,2299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX Import: Parses a DOCX file (ArrayBuffer) into HTML string.
|
|
3
|
+
* Modeled after slides/import-pptx.ts
|
|
4
|
+
*
|
|
5
|
+
* Usage: const html = await importDocx(arrayBuffer);
|
|
6
|
+
*/
|
|
7
|
+
import JSZip from "jszip";
|
|
8
|
+
// ============================================================================
|
|
9
|
+
// Constants
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Twips to pixels: 1 inch = 1440 twips, 96 DPI
|
|
12
|
+
const TWIPS_PER_PX = 1440 / 96;
|
|
13
|
+
// EMU (English Metric Units) to pixels: 1 inch = 914400 EMU, 96 DPI
|
|
14
|
+
const EMU_PER_PX = 914400 / 96;
|
|
15
|
+
// Half-points to points
|
|
16
|
+
const HALF_POINTS_TO_PT = 0.5;
|
|
17
|
+
// ============================================================================
|
|
18
|
+
// Utility Functions
|
|
19
|
+
// ============================================================================
|
|
20
|
+
function twipsToPx(twips) {
|
|
21
|
+
return twips / TWIPS_PER_PX;
|
|
22
|
+
}
|
|
23
|
+
function emuToPx(emu) {
|
|
24
|
+
return emu / EMU_PER_PX;
|
|
25
|
+
}
|
|
26
|
+
function halfPointsToPt(halfPts) {
|
|
27
|
+
return halfPts * HALF_POINTS_TO_PT;
|
|
28
|
+
}
|
|
29
|
+
function findChild(parent, localName) {
|
|
30
|
+
for (let i = 0; i < parent.children.length; i++) {
|
|
31
|
+
if (parent.children[i].localName === localName) {
|
|
32
|
+
return parent.children[i];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
function findChildren(parent, localName) {
|
|
38
|
+
const result = [];
|
|
39
|
+
for (let i = 0; i < parent.children.length; i++) {
|
|
40
|
+
if (parent.children[i].localName === localName) {
|
|
41
|
+
result.push(parent.children[i]);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return result;
|
|
45
|
+
}
|
|
46
|
+
function findDescendant(parent, localName) {
|
|
47
|
+
for (let i = 0; i < parent.children.length; i++) {
|
|
48
|
+
const child = parent.children[i];
|
|
49
|
+
if (child.localName === localName) {
|
|
50
|
+
return child;
|
|
51
|
+
}
|
|
52
|
+
const found = findDescendant(child, localName);
|
|
53
|
+
if (found)
|
|
54
|
+
return found;
|
|
55
|
+
}
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
function escapeHtml(text) {
|
|
59
|
+
return text
|
|
60
|
+
.replace(/&/g, "&")
|
|
61
|
+
.replace(/</g, "<")
|
|
62
|
+
.replace(/>/g, ">")
|
|
63
|
+
.replace(/"/g, """)
|
|
64
|
+
.replace(/'/g, "'");
|
|
65
|
+
}
|
|
66
|
+
function resolveThemeColor(colorRef, themeColors) {
|
|
67
|
+
// Map theme color names to our stored keys
|
|
68
|
+
const themeMap = {
|
|
69
|
+
dk1: "dk1",
|
|
70
|
+
dk2: "dk2",
|
|
71
|
+
lt1: "lt1",
|
|
72
|
+
lt2: "lt2",
|
|
73
|
+
accent1: "accent1",
|
|
74
|
+
accent2: "accent2",
|
|
75
|
+
accent3: "accent3",
|
|
76
|
+
accent4: "accent4",
|
|
77
|
+
accent5: "accent5",
|
|
78
|
+
accent6: "accent6",
|
|
79
|
+
hlink: "hlink",
|
|
80
|
+
folHlink: "folHlink",
|
|
81
|
+
// Additional common mappings
|
|
82
|
+
text1: "dk1",
|
|
83
|
+
text2: "dk2",
|
|
84
|
+
background1: "lt1",
|
|
85
|
+
background2: "lt2",
|
|
86
|
+
// Short form mappings (bg1/bg2 = background1/background2, tx1/tx2 = text1/text2)
|
|
87
|
+
bg1: "lt1",
|
|
88
|
+
bg2: "lt2",
|
|
89
|
+
tx1: "dk1",
|
|
90
|
+
tx2: "dk2",
|
|
91
|
+
};
|
|
92
|
+
const key = themeMap[colorRef] ?? colorRef;
|
|
93
|
+
return themeColors.get(key);
|
|
94
|
+
}
|
|
95
|
+
function resolveColor(parent, themeColors) {
|
|
96
|
+
// Check for direct color value
|
|
97
|
+
const colorAttr = parent.getAttribute("w:val");
|
|
98
|
+
if (colorAttr && colorAttr !== "auto" && /^[0-9A-Fa-f]{6}$/.test(colorAttr)) {
|
|
99
|
+
return "#" + colorAttr;
|
|
100
|
+
}
|
|
101
|
+
// Check for theme color
|
|
102
|
+
const themeColor = parent.getAttribute("w:themeColor");
|
|
103
|
+
if (themeColor) {
|
|
104
|
+
const resolved = resolveThemeColor(themeColor, themeColors);
|
|
105
|
+
if (resolved)
|
|
106
|
+
return resolved;
|
|
107
|
+
}
|
|
108
|
+
return undefined;
|
|
109
|
+
}
|
|
110
|
+
function resolveThemeFont(fontRef, themeFonts) {
|
|
111
|
+
if (fontRef === "majorHAnsi" || fontRef === "majorAscii") {
|
|
112
|
+
return themeFonts.major;
|
|
113
|
+
}
|
|
114
|
+
if (fontRef === "minorHAnsi" || fontRef === "minorAscii") {
|
|
115
|
+
return themeFonts.minor;
|
|
116
|
+
}
|
|
117
|
+
return undefined;
|
|
118
|
+
}
|
|
119
|
+
function getBorderStyleCss(style) {
|
|
120
|
+
const styleMap = {
|
|
121
|
+
single: "solid",
|
|
122
|
+
double: "double",
|
|
123
|
+
dotted: "dotted",
|
|
124
|
+
dashed: "dashed",
|
|
125
|
+
dashSmallGap: "dashed",
|
|
126
|
+
dotDash: "dashed",
|
|
127
|
+
dotDotDash: "dotted",
|
|
128
|
+
triple: "double",
|
|
129
|
+
thick: "solid",
|
|
130
|
+
nil: "none",
|
|
131
|
+
none: "none",
|
|
132
|
+
};
|
|
133
|
+
return styleMap[style] ?? "solid";
|
|
134
|
+
}
|
|
135
|
+
// ============================================================================
|
|
136
|
+
// Parsing Functions
|
|
137
|
+
// ============================================================================
|
|
138
|
+
function parseThemeColors(themeDoc) {
|
|
139
|
+
const colors = new Map();
|
|
140
|
+
const clrScheme = themeDoc.getElementsByTagName("a:clrScheme")[0];
|
|
141
|
+
if (!clrScheme)
|
|
142
|
+
return colors;
|
|
143
|
+
const colorNames = [
|
|
144
|
+
"dk1", "dk2", "lt1", "lt2",
|
|
145
|
+
"accent1", "accent2", "accent3", "accent4", "accent5", "accent6",
|
|
146
|
+
"hlink", "folHlink"
|
|
147
|
+
];
|
|
148
|
+
for (const name of colorNames) {
|
|
149
|
+
const el = findChild(clrScheme, name);
|
|
150
|
+
if (!el)
|
|
151
|
+
continue;
|
|
152
|
+
const srgbClr = findChild(el, "srgbClr");
|
|
153
|
+
if (srgbClr) {
|
|
154
|
+
const val = srgbClr.getAttribute("val");
|
|
155
|
+
if (val)
|
|
156
|
+
colors.set(name, "#" + val);
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
const sysClr = findChild(el, "sysClr");
|
|
160
|
+
if (sysClr) {
|
|
161
|
+
const lastClr = sysClr.getAttribute("lastClr");
|
|
162
|
+
if (lastClr)
|
|
163
|
+
colors.set(name, "#" + lastClr);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return colors;
|
|
167
|
+
}
|
|
168
|
+
function parseThemeFonts(themeDoc) {
|
|
169
|
+
const fonts = { major: "Calibri", minor: "Calibri" };
|
|
170
|
+
const fontScheme = themeDoc.getElementsByTagName("a:fontScheme")[0];
|
|
171
|
+
if (!fontScheme)
|
|
172
|
+
return fonts;
|
|
173
|
+
const majorFont = findChild(fontScheme, "majorFont");
|
|
174
|
+
if (majorFont) {
|
|
175
|
+
const latin = findChild(majorFont, "latin");
|
|
176
|
+
if (latin) {
|
|
177
|
+
const typeface = latin.getAttribute("typeface");
|
|
178
|
+
if (typeface)
|
|
179
|
+
fonts.major = typeface;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const minorFont = findChild(fontScheme, "minorFont");
|
|
183
|
+
if (minorFont) {
|
|
184
|
+
const latin = findChild(minorFont, "latin");
|
|
185
|
+
if (latin) {
|
|
186
|
+
const typeface = latin.getAttribute("typeface");
|
|
187
|
+
if (typeface)
|
|
188
|
+
fonts.minor = typeface;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return fonts;
|
|
192
|
+
}
|
|
193
|
+
function parseStyles(stylesDoc, themeColors, themeFonts) {
|
|
194
|
+
const styles = new Map();
|
|
195
|
+
const defaults = {};
|
|
196
|
+
// Parse document defaults (w:docDefaults)
|
|
197
|
+
const docDefaults = stylesDoc.getElementsByTagName("w:docDefaults")[0];
|
|
198
|
+
if (docDefaults) {
|
|
199
|
+
// Parse default run properties
|
|
200
|
+
const rPrDefault = findChild(docDefaults, "rPrDefault");
|
|
201
|
+
if (rPrDefault) {
|
|
202
|
+
const rPr = findChild(rPrDefault, "rPr");
|
|
203
|
+
if (rPr) {
|
|
204
|
+
defaults.rPr = parseRunProps(rPr, themeColors, themeFonts);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
// Parse default paragraph properties
|
|
208
|
+
const pPrDefault = findChild(docDefaults, "pPrDefault");
|
|
209
|
+
if (pPrDefault) {
|
|
210
|
+
const pPr = findChild(pPrDefault, "pPr");
|
|
211
|
+
if (pPr) {
|
|
212
|
+
defaults.pPr = parseParagraphProps(pPr);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
const styleEls = stylesDoc.getElementsByTagName("w:style");
|
|
217
|
+
for (let i = 0; i < styleEls.length; i++) {
|
|
218
|
+
const styleEl = styleEls[i];
|
|
219
|
+
const styleId = styleEl.getAttribute("w:styleId");
|
|
220
|
+
if (!styleId)
|
|
221
|
+
continue;
|
|
222
|
+
const basedOnEl = findChild(styleEl, "basedOn");
|
|
223
|
+
const basedOn = basedOnEl?.getAttribute("w:val") ?? undefined;
|
|
224
|
+
const nameEl = findChild(styleEl, "name");
|
|
225
|
+
const name = nameEl?.getAttribute("w:val") ?? undefined;
|
|
226
|
+
const pPrEl = findChild(styleEl, "pPr");
|
|
227
|
+
const rPrEl = findChild(styleEl, "rPr");
|
|
228
|
+
const pPr = pPrEl ? parseParagraphProps(pPrEl) : undefined;
|
|
229
|
+
const rPr = rPrEl ? parseRunProps(rPrEl, themeColors, themeFonts) : undefined;
|
|
230
|
+
styles.set(styleId, { basedOn, pPr, rPr, name });
|
|
231
|
+
}
|
|
232
|
+
return { styles, defaults };
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Parse section properties from sectPr element.
|
|
236
|
+
* Contains page size, margins, columns, etc.
|
|
237
|
+
*/
|
|
238
|
+
function parseSectionProps(sectPr) {
|
|
239
|
+
const props = {};
|
|
240
|
+
if (!sectPr)
|
|
241
|
+
return props;
|
|
242
|
+
// Page size
|
|
243
|
+
const pgSz = findChild(sectPr, "pgSz");
|
|
244
|
+
if (pgSz) {
|
|
245
|
+
const w = pgSz.getAttribute("w:w");
|
|
246
|
+
const h = pgSz.getAttribute("w:h");
|
|
247
|
+
if (w)
|
|
248
|
+
props.pageWidth = parseInt(w, 10);
|
|
249
|
+
if (h)
|
|
250
|
+
props.pageHeight = parseInt(h, 10);
|
|
251
|
+
}
|
|
252
|
+
// Page margins
|
|
253
|
+
const pgMar = findChild(sectPr, "pgMar");
|
|
254
|
+
if (pgMar) {
|
|
255
|
+
const top = pgMar.getAttribute("w:top");
|
|
256
|
+
const right = pgMar.getAttribute("w:right");
|
|
257
|
+
const bottom = pgMar.getAttribute("w:bottom");
|
|
258
|
+
const left = pgMar.getAttribute("w:left");
|
|
259
|
+
if (top)
|
|
260
|
+
props.marginTop = parseInt(top, 10);
|
|
261
|
+
if (right)
|
|
262
|
+
props.marginRight = parseInt(right, 10);
|
|
263
|
+
if (bottom)
|
|
264
|
+
props.marginBottom = parseInt(bottom, 10);
|
|
265
|
+
if (left)
|
|
266
|
+
props.marginLeft = parseInt(left, 10);
|
|
267
|
+
}
|
|
268
|
+
// Columns
|
|
269
|
+
const cols = findChild(sectPr, "cols");
|
|
270
|
+
if (cols) {
|
|
271
|
+
const num = cols.getAttribute("w:num");
|
|
272
|
+
const space = cols.getAttribute("w:space");
|
|
273
|
+
if (num)
|
|
274
|
+
props.columns = parseInt(num, 10);
|
|
275
|
+
if (space)
|
|
276
|
+
props.columnSpace = parseInt(space, 10);
|
|
277
|
+
}
|
|
278
|
+
return props;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Parse positioned (anchored) elements from a drawing element.
|
|
282
|
+
* These are shapes, images, or textboxes with absolute/relative positioning.
|
|
283
|
+
*/
|
|
284
|
+
function parsePositionedElement(drawing, themeColors, themeFonts) {
|
|
285
|
+
const anchor = findDescendant(drawing, "anchor");
|
|
286
|
+
if (!anchor)
|
|
287
|
+
return null;
|
|
288
|
+
// Get positioning
|
|
289
|
+
const positionH = findChild(anchor, "positionH");
|
|
290
|
+
const positionV = findChild(anchor, "positionV");
|
|
291
|
+
const relativeFromH = positionH?.getAttribute("relativeFrom");
|
|
292
|
+
const relativeFromV = positionV?.getAttribute("relativeFrom");
|
|
293
|
+
// Check for alignment or offset
|
|
294
|
+
let posH = "absolute";
|
|
295
|
+
let posV = "absolute";
|
|
296
|
+
let offsetX = 0;
|
|
297
|
+
let offsetY = 0;
|
|
298
|
+
if (positionH) {
|
|
299
|
+
const align = findChild(positionH, "align");
|
|
300
|
+
const posOffset = findChild(positionH, "posOffset");
|
|
301
|
+
if (align) {
|
|
302
|
+
const alignVal = align.textContent?.trim();
|
|
303
|
+
if (alignVal === "left" || alignVal === "center" || alignVal === "right") {
|
|
304
|
+
posH = alignVal;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
else if (posOffset) {
|
|
308
|
+
offsetX = parseInt(posOffset.textContent ?? "0", 10);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
if (positionV) {
|
|
312
|
+
const align = findChild(positionV, "align");
|
|
313
|
+
const posOffset = findChild(positionV, "posOffset");
|
|
314
|
+
if (align) {
|
|
315
|
+
const alignVal = align.textContent?.trim();
|
|
316
|
+
if (alignVal === "top" || alignVal === "center" || alignVal === "bottom") {
|
|
317
|
+
posV = alignVal;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
else if (posOffset) {
|
|
321
|
+
offsetY = parseInt(posOffset.textContent ?? "0", 10);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
// Get extent (size)
|
|
325
|
+
const extent = findChild(anchor, "extent");
|
|
326
|
+
const cx = parseInt(extent?.getAttribute("cx") ?? "0", 10);
|
|
327
|
+
const cy = parseInt(extent?.getAttribute("cy") ?? "0", 10);
|
|
328
|
+
// Check if behind document
|
|
329
|
+
const behindDoc = anchor.getAttribute("behindDoc") === "1";
|
|
330
|
+
// Check wrap type
|
|
331
|
+
let wrapType = "none";
|
|
332
|
+
if (findChild(anchor, "wrapTopAndBottom")) {
|
|
333
|
+
wrapType = "topAndBottom";
|
|
334
|
+
}
|
|
335
|
+
else if (findChild(anchor, "wrapSquare")) {
|
|
336
|
+
wrapType = "square";
|
|
337
|
+
}
|
|
338
|
+
else if (findChild(anchor, "wrapTight")) {
|
|
339
|
+
wrapType = "tight";
|
|
340
|
+
}
|
|
341
|
+
// Look for shape with fill color
|
|
342
|
+
const wsp = findDescendant(drawing, "wsp");
|
|
343
|
+
let fillColor;
|
|
344
|
+
let gradientFill;
|
|
345
|
+
let elementType = "shape";
|
|
346
|
+
let shapeType;
|
|
347
|
+
let shapeAdjust;
|
|
348
|
+
let textElements;
|
|
349
|
+
if (wsp) {
|
|
350
|
+
const spPr = findChild(wsp, "spPr");
|
|
351
|
+
if (spPr) {
|
|
352
|
+
// Get preset geometry type
|
|
353
|
+
const prstGeom = findChild(spPr, "prstGeom");
|
|
354
|
+
if (prstGeom) {
|
|
355
|
+
shapeType = prstGeom.getAttribute("prst") ?? undefined;
|
|
356
|
+
// Get adjustment value if present
|
|
357
|
+
const avLst = findChild(prstGeom, "avLst");
|
|
358
|
+
if (avLst) {
|
|
359
|
+
const gd = findChild(avLst, "gd");
|
|
360
|
+
if (gd) {
|
|
361
|
+
const fmla = gd.getAttribute("fmla");
|
|
362
|
+
if (fmla) {
|
|
363
|
+
// Parse "val XXXX" formula
|
|
364
|
+
const match = fmla.match(/val\s+(\d+)/);
|
|
365
|
+
if (match) {
|
|
366
|
+
// Value is in 1/100000ths (so 2604 = 2.604%)
|
|
367
|
+
shapeAdjust = parseInt(match[1], 10) / 100000;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
// Check for gradient fill
|
|
374
|
+
const gradFill = findChild(spPr, "gradFill");
|
|
375
|
+
if (gradFill) {
|
|
376
|
+
const gsLst = findChild(gradFill, "gsLst");
|
|
377
|
+
if (gsLst) {
|
|
378
|
+
const stops = [];
|
|
379
|
+
const gsElements = findChildren(gsLst, "gs");
|
|
380
|
+
for (const gs of gsElements) {
|
|
381
|
+
const pos = parseInt(gs.getAttribute("pos") ?? "0", 10) / 1000; // Convert from 100000ths to percentage
|
|
382
|
+
let color = "#000000";
|
|
383
|
+
const srgbClr = findChild(gs, "srgbClr");
|
|
384
|
+
const schemeClr = findChild(gs, "schemeClr");
|
|
385
|
+
if (srgbClr) {
|
|
386
|
+
color = "#" + (srgbClr.getAttribute("val") ?? "000000");
|
|
387
|
+
}
|
|
388
|
+
else if (schemeClr) {
|
|
389
|
+
const colorName = schemeClr.getAttribute("val");
|
|
390
|
+
if (colorName) {
|
|
391
|
+
color = resolveThemeColor(colorName, themeColors) ?? "#000000";
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
stops.push({ position: pos, color });
|
|
395
|
+
}
|
|
396
|
+
// Check gradient direction
|
|
397
|
+
const lin = findChild(gradFill, "lin");
|
|
398
|
+
let angle = 180; // Default to vertical (top to bottom)
|
|
399
|
+
if (lin) {
|
|
400
|
+
// Angle is in 60000ths of a degree
|
|
401
|
+
const angVal = parseInt(lin.getAttribute("ang") ?? "0", 10);
|
|
402
|
+
angle = Math.round(angVal / 60000);
|
|
403
|
+
}
|
|
404
|
+
if (stops.length >= 2) {
|
|
405
|
+
gradientFill = { type: "linear", angle, stops };
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
else {
|
|
410
|
+
// Check for solid fill
|
|
411
|
+
const solidFill = findChild(spPr, "solidFill");
|
|
412
|
+
if (solidFill) {
|
|
413
|
+
const srgbClr = findChild(solidFill, "srgbClr");
|
|
414
|
+
if (srgbClr) {
|
|
415
|
+
fillColor = "#" + (srgbClr.getAttribute("val") ?? "000000");
|
|
416
|
+
}
|
|
417
|
+
else {
|
|
418
|
+
// Check theme color
|
|
419
|
+
const schemeClr = findChild(solidFill, "schemeClr");
|
|
420
|
+
if (schemeClr) {
|
|
421
|
+
const colorName = schemeClr.getAttribute("val");
|
|
422
|
+
if (colorName) {
|
|
423
|
+
fillColor = resolveThemeColor(colorName, themeColors);
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
// Check for textbox and extract text content
|
|
431
|
+
const txbx = findDescendant(wsp, "txbx");
|
|
432
|
+
if (txbx) {
|
|
433
|
+
elementType = "textbox";
|
|
434
|
+
// Parse text content from txbxContent
|
|
435
|
+
const txbxContent = findDescendant(txbx, "txbxContent");
|
|
436
|
+
if (txbxContent) {
|
|
437
|
+
textElements = [];
|
|
438
|
+
const paragraphs = txbxContent.getElementsByTagName("w:p");
|
|
439
|
+
for (let i = 0; i < paragraphs.length; i++) {
|
|
440
|
+
const para = parseParagraph(paragraphs[i], themeColors, themeFonts);
|
|
441
|
+
if (para.runs.length > 0 || para.props.styleId) {
|
|
442
|
+
textElements.push(para);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
else {
|
|
449
|
+
// Check if it's an image
|
|
450
|
+
const blip = findDescendant(drawing, "blip");
|
|
451
|
+
if (blip) {
|
|
452
|
+
elementType = "image";
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
// For images, extract the rId from the blip element and transform/effect properties
|
|
456
|
+
let imageRId;
|
|
457
|
+
let imageRotation;
|
|
458
|
+
let imageFlipH;
|
|
459
|
+
let imageFlipV;
|
|
460
|
+
let imageShadow;
|
|
461
|
+
let imageBorder;
|
|
462
|
+
if (elementType === "image" || !wsp) {
|
|
463
|
+
const blip = findDescendant(drawing, "blip");
|
|
464
|
+
if (blip) {
|
|
465
|
+
// r:embed is the relationship ID for the image
|
|
466
|
+
imageRId = blip.getAttribute("r:embed") ?? blip.getAttributeNS("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "embed") ?? undefined;
|
|
467
|
+
if (imageRId) {
|
|
468
|
+
elementType = "image";
|
|
469
|
+
// Look for pic:spPr > a:xfrm for rotation and flip
|
|
470
|
+
const pic = findDescendant(drawing, "pic");
|
|
471
|
+
const picSpPr = pic ? findChild(pic, "spPr") : null;
|
|
472
|
+
const xfrm = picSpPr ? findChild(picSpPr, "xfrm") : null;
|
|
473
|
+
if (xfrm) {
|
|
474
|
+
// Rotation is in EMUs (60,000 per degree)
|
|
475
|
+
const rotAttr = xfrm.getAttribute("rot");
|
|
476
|
+
if (rotAttr) {
|
|
477
|
+
const rotEmu = parseInt(rotAttr, 10);
|
|
478
|
+
// Convert EMUs to degrees (60000 EMUs = 1 degree)
|
|
479
|
+
imageRotation = rotEmu / 60000;
|
|
480
|
+
// Normalize negative rotations (values like 21253759 = 354.2° = -5.8°)
|
|
481
|
+
if (imageRotation > 180) {
|
|
482
|
+
imageRotation = imageRotation - 360;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
// Horizontal and vertical flip
|
|
486
|
+
imageFlipH = xfrm.getAttribute("flipH") === "1" || undefined;
|
|
487
|
+
imageFlipV = xfrm.getAttribute("flipV") === "1" || undefined;
|
|
488
|
+
}
|
|
489
|
+
// Parse shadow from a:effectLst > a:outerShdw
|
|
490
|
+
const effectLst = picSpPr ? findDescendant(picSpPr, "effectLst") : null;
|
|
491
|
+
if (effectLst) {
|
|
492
|
+
const outerShdw = findChild(effectLst, "outerShdw");
|
|
493
|
+
if (outerShdw) {
|
|
494
|
+
const blurRad = parseInt(outerShdw.getAttribute("blurRad") ?? "0", 10);
|
|
495
|
+
const distAttr = outerShdw.getAttribute("dist");
|
|
496
|
+
const dirAttr = outerShdw.getAttribute("dir");
|
|
497
|
+
// Get shadow color and alpha
|
|
498
|
+
let shadowColor = "000000"; // default black
|
|
499
|
+
let shadowAlpha = 100;
|
|
500
|
+
// Check for prstClr (preset color like "black")
|
|
501
|
+
const prstClr = findChild(outerShdw, "prstClr");
|
|
502
|
+
if (prstClr) {
|
|
503
|
+
const valAttr = prstClr.getAttribute("val");
|
|
504
|
+
if (valAttr === "black")
|
|
505
|
+
shadowColor = "000000";
|
|
506
|
+
else if (valAttr === "white")
|
|
507
|
+
shadowColor = "FFFFFF";
|
|
508
|
+
// Check for alpha inside prstClr
|
|
509
|
+
const alphaEl = findChild(prstClr, "alpha");
|
|
510
|
+
if (alphaEl) {
|
|
511
|
+
const alphaVal = parseInt(alphaEl.getAttribute("val") ?? "100000", 10);
|
|
512
|
+
shadowAlpha = Math.round(alphaVal / 1000); // val is in thousandths (40000 = 40%)
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
// Check for srgbClr (explicit RGB color)
|
|
516
|
+
const srgbClr = findChild(outerShdw, "srgbClr");
|
|
517
|
+
if (srgbClr) {
|
|
518
|
+
shadowColor = srgbClr.getAttribute("val") ?? "000000";
|
|
519
|
+
const alphaEl = findChild(srgbClr, "alpha");
|
|
520
|
+
if (alphaEl) {
|
|
521
|
+
const alphaVal = parseInt(alphaEl.getAttribute("val") ?? "100000", 10);
|
|
522
|
+
shadowAlpha = Math.round(alphaVal / 1000);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
// Calculate offset from distance and direction
|
|
526
|
+
let shadowOffsetX = 0;
|
|
527
|
+
let shadowOffsetY = 0;
|
|
528
|
+
if (distAttr && dirAttr) {
|
|
529
|
+
const dist = parseInt(distAttr, 10);
|
|
530
|
+
const dir = parseInt(dirAttr, 10) / 60000; // Convert EMUs to degrees
|
|
531
|
+
const dirRad = (dir * Math.PI) / 180;
|
|
532
|
+
shadowOffsetX = Math.round(dist * Math.cos(dirRad));
|
|
533
|
+
shadowOffsetY = Math.round(dist * Math.sin(dirRad));
|
|
534
|
+
}
|
|
535
|
+
imageShadow = {
|
|
536
|
+
blurRadius: blurRad,
|
|
537
|
+
color: shadowColor,
|
|
538
|
+
alpha: shadowAlpha,
|
|
539
|
+
offsetX: shadowOffsetX,
|
|
540
|
+
offsetY: shadowOffsetY,
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
// Parse border from a:ln
|
|
545
|
+
if (picSpPr) {
|
|
546
|
+
const ln = findChild(picSpPr, "ln");
|
|
547
|
+
if (ln) {
|
|
548
|
+
const widthAttr = ln.getAttribute("w");
|
|
549
|
+
const borderWidth = widthAttr ? parseInt(widthAttr, 10) : 0;
|
|
550
|
+
if (borderWidth > 0) {
|
|
551
|
+
let borderColor;
|
|
552
|
+
// Check for solidFill
|
|
553
|
+
const solidFill = findChild(ln, "solidFill");
|
|
554
|
+
if (solidFill) {
|
|
555
|
+
// Check for srgbClr
|
|
556
|
+
const srgbClr = findChild(solidFill, "srgbClr");
|
|
557
|
+
if (srgbClr) {
|
|
558
|
+
borderColor = srgbClr.getAttribute("val") ?? undefined;
|
|
559
|
+
}
|
|
560
|
+
// Check for sysClr (system color like "window" = white)
|
|
561
|
+
const sysClr = findChild(solidFill, "sysClr");
|
|
562
|
+
if (sysClr) {
|
|
563
|
+
const lastClr = sysClr.getAttribute("lastClr");
|
|
564
|
+
if (lastClr) {
|
|
565
|
+
borderColor = lastClr;
|
|
566
|
+
}
|
|
567
|
+
else {
|
|
568
|
+
const valAttr = sysClr.getAttribute("val");
|
|
569
|
+
if (valAttr === "window")
|
|
570
|
+
borderColor = "FFFFFF";
|
|
571
|
+
else if (valAttr === "windowText")
|
|
572
|
+
borderColor = "000000";
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
// Check for schemeClr (theme color reference like "bg1")
|
|
576
|
+
const schemeClr = findChild(solidFill, "schemeClr");
|
|
577
|
+
if (schemeClr) {
|
|
578
|
+
const colorName = schemeClr.getAttribute("val");
|
|
579
|
+
if (colorName) {
|
|
580
|
+
const resolved = resolveThemeColor(colorName, themeColors);
|
|
581
|
+
if (resolved) {
|
|
582
|
+
// Remove # prefix if present
|
|
583
|
+
borderColor = resolved.startsWith("#") ? resolved.substring(1) : resolved;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
// Only set border if we found a valid color
|
|
589
|
+
if (borderColor) {
|
|
590
|
+
imageBorder = {
|
|
591
|
+
width: borderWidth,
|
|
592
|
+
color: borderColor,
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
return {
|
|
602
|
+
type: elementType,
|
|
603
|
+
shapeType,
|
|
604
|
+
shapeAdjust,
|
|
605
|
+
positionH: posH,
|
|
606
|
+
positionV: posV,
|
|
607
|
+
offsetX,
|
|
608
|
+
offsetY,
|
|
609
|
+
width: cx,
|
|
610
|
+
height: cy,
|
|
611
|
+
relativeFromH,
|
|
612
|
+
relativeFromV,
|
|
613
|
+
behindDoc,
|
|
614
|
+
fillColor,
|
|
615
|
+
gradientFill,
|
|
616
|
+
textElements,
|
|
617
|
+
wrapType,
|
|
618
|
+
imageRId,
|
|
619
|
+
imageRotation,
|
|
620
|
+
imageFlipH,
|
|
621
|
+
imageFlipV,
|
|
622
|
+
imageShadow,
|
|
623
|
+
imageBorder,
|
|
624
|
+
};
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* Parse header or footer XML content.
|
|
628
|
+
*/
|
|
629
|
+
function parseHeaderFooter(doc, type, themeColors, themeFonts) {
|
|
630
|
+
const result = {
|
|
631
|
+
type,
|
|
632
|
+
elements: [],
|
|
633
|
+
positionedElements: [],
|
|
634
|
+
};
|
|
635
|
+
const root = doc.getElementsByTagName(type === "header" ? "w:hdr" : "w:ftr")[0];
|
|
636
|
+
if (!root)
|
|
637
|
+
return result;
|
|
638
|
+
// Parse positioned elements (anchored drawings)
|
|
639
|
+
const drawings = root.getElementsByTagName("w:drawing");
|
|
640
|
+
for (let i = 0; i < drawings.length; i++) {
|
|
641
|
+
const positioned = parsePositionedElement(drawings[i], themeColors, themeFonts);
|
|
642
|
+
if (positioned) {
|
|
643
|
+
result.positionedElements.push(positioned);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
// Parse regular paragraphs
|
|
647
|
+
const paragraphs = findChildren(root, "p");
|
|
648
|
+
for (const p of paragraphs) {
|
|
649
|
+
const para = parseParagraph(p, themeColors, themeFonts);
|
|
650
|
+
if (para.runs.length > 0) {
|
|
651
|
+
result.elements.push({ kind: "paragraph", data: para });
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
return result;
|
|
655
|
+
}
|
|
656
|
+
function parseNumbering(numberingDoc) {
|
|
657
|
+
const numbering = new Map();
|
|
658
|
+
if (!numberingDoc)
|
|
659
|
+
return numbering;
|
|
660
|
+
// Parse abstract numbering definitions
|
|
661
|
+
const abstractNums = new Map();
|
|
662
|
+
const abstractNumEls = numberingDoc.getElementsByTagName("w:abstractNum");
|
|
663
|
+
for (let i = 0; i < abstractNumEls.length; i++) {
|
|
664
|
+
const abstractNumEl = abstractNumEls[i];
|
|
665
|
+
const abstractNumId = parseInt(abstractNumEl.getAttribute("w:abstractNumId") ?? "0", 10);
|
|
666
|
+
const levels = new Map();
|
|
667
|
+
const lvlEls = findChildren(abstractNumEl, "lvl");
|
|
668
|
+
for (const lvlEl of lvlEls) {
|
|
669
|
+
const ilvl = parseInt(lvlEl.getAttribute("w:ilvl") ?? "0", 10);
|
|
670
|
+
const numFmtEl = findChild(lvlEl, "numFmt");
|
|
671
|
+
const lvlTextEl = findChild(lvlEl, "lvlText");
|
|
672
|
+
const numFmt = numFmtEl?.getAttribute("w:val") ?? "bullet";
|
|
673
|
+
const lvlText = lvlTextEl?.getAttribute("w:val") ?? "•";
|
|
674
|
+
levels.set(ilvl, { numFmt, lvlText });
|
|
675
|
+
}
|
|
676
|
+
abstractNums.set(abstractNumId, levels);
|
|
677
|
+
}
|
|
678
|
+
// Parse num definitions that reference abstract nums
|
|
679
|
+
const numEls = numberingDoc.getElementsByTagName("w:num");
|
|
680
|
+
for (let i = 0; i < numEls.length; i++) {
|
|
681
|
+
const numEl = numEls[i];
|
|
682
|
+
const numId = parseInt(numEl.getAttribute("w:numId") ?? "0", 10);
|
|
683
|
+
const abstractNumIdEl = findChild(numEl, "abstractNumId");
|
|
684
|
+
const abstractNumId = parseInt(abstractNumIdEl?.getAttribute("w:val") ?? "0", 10);
|
|
685
|
+
const levels = abstractNums.get(abstractNumId);
|
|
686
|
+
if (levels) {
|
|
687
|
+
numbering.set(numId, levels);
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
return numbering;
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Parse paragraph borders from pBdr element.
|
|
694
|
+
* Note: These are simpler than table borders - no theme color support here.
|
|
695
|
+
*/
|
|
696
|
+
function parseParagraphBorders(pBdr) {
|
|
697
|
+
const borders = {};
|
|
698
|
+
const parseBorder = (el) => {
|
|
699
|
+
if (!el)
|
|
700
|
+
return undefined;
|
|
701
|
+
const style = el.getAttribute("w:val");
|
|
702
|
+
if (!style || style === "nil" || style === "none")
|
|
703
|
+
return undefined;
|
|
704
|
+
const size = parseInt(el.getAttribute("w:sz") ?? "4", 10);
|
|
705
|
+
let color = el.getAttribute("w:color") ?? "000000";
|
|
706
|
+
if (color === "auto")
|
|
707
|
+
color = "000000";
|
|
708
|
+
return {
|
|
709
|
+
style,
|
|
710
|
+
size: size / 8, // Convert from eighths of a point to points
|
|
711
|
+
color,
|
|
712
|
+
};
|
|
713
|
+
};
|
|
714
|
+
borders.top = parseBorder(findChild(pBdr, "top"));
|
|
715
|
+
borders.bottom = parseBorder(findChild(pBdr, "bottom"));
|
|
716
|
+
borders.left = parseBorder(findChild(pBdr, "left"));
|
|
717
|
+
borders.right = parseBorder(findChild(pBdr, "right"));
|
|
718
|
+
return borders;
|
|
719
|
+
}
|
|
720
|
+
function parseParagraphProps(pPr) {
|
|
721
|
+
const props = {};
|
|
722
|
+
const pStyle = findChild(pPr, "pStyle");
|
|
723
|
+
if (pStyle) {
|
|
724
|
+
props.styleId = pStyle.getAttribute("w:val") ?? undefined;
|
|
725
|
+
}
|
|
726
|
+
const jc = findChild(pPr, "jc");
|
|
727
|
+
if (jc) {
|
|
728
|
+
const val = jc.getAttribute("w:val");
|
|
729
|
+
if (val === "center")
|
|
730
|
+
props.alignment = "center";
|
|
731
|
+
else if (val === "right" || val === "end")
|
|
732
|
+
props.alignment = "right";
|
|
733
|
+
else if (val === "both" || val === "distribute")
|
|
734
|
+
props.alignment = "justify";
|
|
735
|
+
else
|
|
736
|
+
props.alignment = "left";
|
|
737
|
+
}
|
|
738
|
+
const ind = findChild(pPr, "ind");
|
|
739
|
+
if (ind) {
|
|
740
|
+
const left = ind.getAttribute("w:left");
|
|
741
|
+
if (left)
|
|
742
|
+
props.indentLeft = parseInt(left, 10);
|
|
743
|
+
const right = ind.getAttribute("w:right");
|
|
744
|
+
if (right)
|
|
745
|
+
props.indentRight = parseInt(right, 10);
|
|
746
|
+
const firstLine = ind.getAttribute("w:firstLine");
|
|
747
|
+
if (firstLine)
|
|
748
|
+
props.indentFirstLine = parseInt(firstLine, 10);
|
|
749
|
+
const hanging = ind.getAttribute("w:hanging");
|
|
750
|
+
if (hanging)
|
|
751
|
+
props.indentFirstLine = -parseInt(hanging, 10);
|
|
752
|
+
}
|
|
753
|
+
const spacing = findChild(pPr, "spacing");
|
|
754
|
+
if (spacing) {
|
|
755
|
+
const before = spacing.getAttribute("w:before");
|
|
756
|
+
if (before)
|
|
757
|
+
props.spacingBefore = parseInt(before, 10);
|
|
758
|
+
const after = spacing.getAttribute("w:after");
|
|
759
|
+
if (after)
|
|
760
|
+
props.spacingAfter = parseInt(after, 10);
|
|
761
|
+
const line = spacing.getAttribute("w:line");
|
|
762
|
+
if (line)
|
|
763
|
+
props.lineSpacing = parseInt(line, 10);
|
|
764
|
+
}
|
|
765
|
+
const numPr = findChild(pPr, "numPr");
|
|
766
|
+
if (numPr) {
|
|
767
|
+
const numIdEl = findChild(numPr, "numId");
|
|
768
|
+
const ilvlEl = findChild(numPr, "ilvl");
|
|
769
|
+
if (numIdEl) {
|
|
770
|
+
props.numId = parseInt(numIdEl.getAttribute("w:val") ?? "0", 10);
|
|
771
|
+
}
|
|
772
|
+
props.ilvl = parseInt(ilvlEl?.getAttribute("w:val") ?? "0", 10);
|
|
773
|
+
}
|
|
774
|
+
// Paragraph borders
|
|
775
|
+
const pBdr = findChild(pPr, "pBdr");
|
|
776
|
+
if (pBdr) {
|
|
777
|
+
props.borders = parseParagraphBorders(pBdr);
|
|
778
|
+
}
|
|
779
|
+
// Paragraph shading (background color)
|
|
780
|
+
const shd = findChild(pPr, "shd");
|
|
781
|
+
if (shd) {
|
|
782
|
+
const fill = shd.getAttribute("w:fill");
|
|
783
|
+
if (fill && fill !== "auto") {
|
|
784
|
+
props.shading = "#" + fill;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
return props;
|
|
788
|
+
}
|
|
789
|
+
function parseRunProps(rPr, themeColors, themeFonts) {
|
|
790
|
+
const props = {};
|
|
791
|
+
// Bold
|
|
792
|
+
const b = findChild(rPr, "b");
|
|
793
|
+
if (b) {
|
|
794
|
+
const val = b.getAttribute("w:val");
|
|
795
|
+
props.bold = val !== "0" && val !== "false";
|
|
796
|
+
}
|
|
797
|
+
// Italic
|
|
798
|
+
const i = findChild(rPr, "i");
|
|
799
|
+
if (i) {
|
|
800
|
+
const val = i.getAttribute("w:val");
|
|
801
|
+
props.italic = val !== "0" && val !== "false";
|
|
802
|
+
}
|
|
803
|
+
// Underline
|
|
804
|
+
const u = findChild(rPr, "u");
|
|
805
|
+
if (u) {
|
|
806
|
+
const val = u.getAttribute("w:val");
|
|
807
|
+
props.underline = val !== "none" && val !== undefined;
|
|
808
|
+
}
|
|
809
|
+
// Strikethrough
|
|
810
|
+
const strike = findChild(rPr, "strike");
|
|
811
|
+
if (strike) {
|
|
812
|
+
const val = strike.getAttribute("w:val");
|
|
813
|
+
props.strike = val !== "0" && val !== "false";
|
|
814
|
+
}
|
|
815
|
+
// Color
|
|
816
|
+
const color = findChild(rPr, "color");
|
|
817
|
+
if (color) {
|
|
818
|
+
props.color = resolveColor(color, themeColors);
|
|
819
|
+
}
|
|
820
|
+
// Font size (in half-points, convert to points)
|
|
821
|
+
const sz = findChild(rPr, "sz");
|
|
822
|
+
if (sz) {
|
|
823
|
+
const val = sz.getAttribute("w:val");
|
|
824
|
+
if (val)
|
|
825
|
+
props.fontSize = halfPointsToPt(parseInt(val, 10));
|
|
826
|
+
}
|
|
827
|
+
// Font family
|
|
828
|
+
const rFonts = findChild(rPr, "rFonts");
|
|
829
|
+
if (rFonts) {
|
|
830
|
+
// Try direct font first
|
|
831
|
+
const ascii = rFonts.getAttribute("w:ascii") ?? rFonts.getAttribute("w:hAnsi");
|
|
832
|
+
if (ascii) {
|
|
833
|
+
props.fontFamily = ascii;
|
|
834
|
+
}
|
|
835
|
+
else {
|
|
836
|
+
// Try theme font
|
|
837
|
+
const asciiTheme = rFonts.getAttribute("w:asciiTheme") ?? rFonts.getAttribute("w:hAnsiTheme");
|
|
838
|
+
if (asciiTheme) {
|
|
839
|
+
const resolved = resolveThemeFont(asciiTheme, themeFonts);
|
|
840
|
+
if (resolved)
|
|
841
|
+
props.fontFamily = resolved;
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
// Highlight
|
|
846
|
+
const highlight = findChild(rPr, "highlight");
|
|
847
|
+
if (highlight) {
|
|
848
|
+
const val = highlight.getAttribute("w:val");
|
|
849
|
+
if (val)
|
|
850
|
+
props.highlight = val;
|
|
851
|
+
}
|
|
852
|
+
// Shading (background color)
|
|
853
|
+
const shd = findChild(rPr, "shd");
|
|
854
|
+
if (shd) {
|
|
855
|
+
const fill = shd.getAttribute("w:fill");
|
|
856
|
+
if (fill && fill !== "auto") {
|
|
857
|
+
props.highlight = "#" + fill;
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
// Vertical alignment (superscript/subscript)
|
|
861
|
+
const vertAlign = findChild(rPr, "vertAlign");
|
|
862
|
+
if (vertAlign) {
|
|
863
|
+
const val = vertAlign.getAttribute("w:val");
|
|
864
|
+
if (val === "superscript")
|
|
865
|
+
props.vertAlign = "superscript";
|
|
866
|
+
else if (val === "subscript")
|
|
867
|
+
props.vertAlign = "subscript";
|
|
868
|
+
}
|
|
869
|
+
// All caps text transform
|
|
870
|
+
const caps = findChild(rPr, "caps");
|
|
871
|
+
if (caps) {
|
|
872
|
+
const val = caps.getAttribute("w:val");
|
|
873
|
+
props.caps = val !== "0" && val !== "false";
|
|
874
|
+
}
|
|
875
|
+
// Small caps text transform
|
|
876
|
+
const smallCaps = findChild(rPr, "smallCaps");
|
|
877
|
+
if (smallCaps) {
|
|
878
|
+
const val = smallCaps.getAttribute("w:val");
|
|
879
|
+
props.smallCaps = val !== "0" && val !== "false";
|
|
880
|
+
}
|
|
881
|
+
return props;
|
|
882
|
+
}
|
|
883
|
+
function parseTextRun(r, themeColors, themeFonts) {
|
|
884
|
+
const rPr = findChild(r, "rPr");
|
|
885
|
+
const props = rPr ? parseRunProps(rPr, themeColors, themeFonts) : {};
|
|
886
|
+
// Collect all text elements
|
|
887
|
+
const textParts = [];
|
|
888
|
+
let image;
|
|
889
|
+
for (let i = 0; i < r.children.length; i++) {
|
|
890
|
+
const child = r.children[i];
|
|
891
|
+
if (child.localName === "t") {
|
|
892
|
+
textParts.push(child.textContent ?? "");
|
|
893
|
+
}
|
|
894
|
+
else if (child.localName === "tab") {
|
|
895
|
+
textParts.push("\t");
|
|
896
|
+
}
|
|
897
|
+
else if (child.localName === "br") {
|
|
898
|
+
textParts.push("\n");
|
|
899
|
+
}
|
|
900
|
+
else if (child.localName === "cr") {
|
|
901
|
+
textParts.push("\n");
|
|
902
|
+
}
|
|
903
|
+
else if (child.localName === "drawing") {
|
|
904
|
+
// Parse inline drawing (image) - skip anchored drawings which are handled separately
|
|
905
|
+
const drawingEl = parseDrawing(child, themeColors, true);
|
|
906
|
+
if (drawingEl?.type === "image") {
|
|
907
|
+
image = drawingEl.data;
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
const text = textParts.join("");
|
|
912
|
+
// Return run if it has text or an image
|
|
913
|
+
if (!text && !image)
|
|
914
|
+
return null;
|
|
915
|
+
return { text, props, image };
|
|
916
|
+
}
|
|
917
|
+
function parseParagraph(p, themeColors, themeFonts) {
|
|
918
|
+
const pPr = findChild(p, "pPr");
|
|
919
|
+
const props = pPr ? parseParagraphProps(pPr) : {};
|
|
920
|
+
const runs = [];
|
|
921
|
+
// Process all children including SDT content
|
|
922
|
+
const processElement = (el) => {
|
|
923
|
+
for (let i = 0; i < el.children.length; i++) {
|
|
924
|
+
const child = el.children[i];
|
|
925
|
+
if (child.localName === "r") {
|
|
926
|
+
const run = parseTextRun(child, themeColors, themeFonts);
|
|
927
|
+
if (run)
|
|
928
|
+
runs.push(run);
|
|
929
|
+
}
|
|
930
|
+
else if (child.localName === "sdt") {
|
|
931
|
+
// Structured document tag - process its content
|
|
932
|
+
const sdtContent = findChild(child, "sdtContent");
|
|
933
|
+
if (sdtContent) {
|
|
934
|
+
processElement(sdtContent);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
else if (child.localName === "hyperlink") {
|
|
938
|
+
// Process hyperlink runs
|
|
939
|
+
processElement(child);
|
|
940
|
+
}
|
|
941
|
+
else if (child.localName === "bookmarkStart" || child.localName === "bookmarkEnd") {
|
|
942
|
+
// Skip bookmarks
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
};
|
|
946
|
+
processElement(p);
|
|
947
|
+
return { runs, props };
|
|
948
|
+
}
|
|
949
|
+
function parseTableCell(tc, themeColors, themeFonts) {
|
|
950
|
+
const cell = { paragraphs: [] };
|
|
951
|
+
const tcPr = findChild(tc, "tcPr");
|
|
952
|
+
if (tcPr) {
|
|
953
|
+
// Grid span
|
|
954
|
+
const gridSpan = findChild(tcPr, "gridSpan");
|
|
955
|
+
if (gridSpan) {
|
|
956
|
+
cell.gridSpan = parseInt(gridSpan.getAttribute("w:val") ?? "1", 10);
|
|
957
|
+
}
|
|
958
|
+
// Vertical merge
|
|
959
|
+
const vMerge = findChild(tcPr, "vMerge");
|
|
960
|
+
if (vMerge) {
|
|
961
|
+
const val = vMerge.getAttribute("w:val");
|
|
962
|
+
cell.vMerge = val === "restart" ? "restart" : "continue";
|
|
963
|
+
}
|
|
964
|
+
// Cell width
|
|
965
|
+
const tcW = findChild(tcPr, "tcW");
|
|
966
|
+
if (tcW) {
|
|
967
|
+
const w = tcW.getAttribute("w:w");
|
|
968
|
+
if (w)
|
|
969
|
+
cell.width = parseInt(w, 10);
|
|
970
|
+
}
|
|
971
|
+
// Shading/background
|
|
972
|
+
const shd = findChild(tcPr, "shd");
|
|
973
|
+
if (shd) {
|
|
974
|
+
const fill = shd.getAttribute("w:fill");
|
|
975
|
+
if (fill && fill !== "auto") {
|
|
976
|
+
cell.shading = "#" + fill;
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
// Cell borders
|
|
980
|
+
const tcBorders = findChild(tcPr, "tcBorders");
|
|
981
|
+
if (tcBorders) {
|
|
982
|
+
cell.borders = parseBorders(tcBorders, themeColors);
|
|
983
|
+
}
|
|
984
|
+
// Vertical alignment
|
|
985
|
+
const vAlign = findChild(tcPr, "vAlign");
|
|
986
|
+
if (vAlign) {
|
|
987
|
+
const val = vAlign.getAttribute("w:val");
|
|
988
|
+
if (val === "center" || val === "bottom" || val === "top") {
|
|
989
|
+
cell.vAlign = val;
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
// Parse paragraphs in cell (including those inside SDT elements)
|
|
994
|
+
const collectParagraphs = (parent) => {
|
|
995
|
+
const result = [];
|
|
996
|
+
for (let i = 0; i < parent.children.length; i++) {
|
|
997
|
+
const child = parent.children[i];
|
|
998
|
+
if (child.localName === "p") {
|
|
999
|
+
result.push(child);
|
|
1000
|
+
}
|
|
1001
|
+
else if (child.localName === "sdt") {
|
|
1002
|
+
// Structured document tag - look inside sdtContent for paragraphs
|
|
1003
|
+
const sdtContent = findChild(child, "sdtContent");
|
|
1004
|
+
if (sdtContent) {
|
|
1005
|
+
result.push(...collectParagraphs(sdtContent));
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
return result;
|
|
1010
|
+
};
|
|
1011
|
+
const paragraphs = collectParagraphs(tc);
|
|
1012
|
+
for (const p of paragraphs) {
|
|
1013
|
+
cell.paragraphs.push(parseParagraph(p, themeColors, themeFonts));
|
|
1014
|
+
}
|
|
1015
|
+
// Parse nested tables in cell (including those inside SDT elements)
|
|
1016
|
+
const collectTables = (parent) => {
|
|
1017
|
+
const result = [];
|
|
1018
|
+
for (let i = 0; i < parent.children.length; i++) {
|
|
1019
|
+
const child = parent.children[i];
|
|
1020
|
+
if (child.localName === "tbl") {
|
|
1021
|
+
result.push(child);
|
|
1022
|
+
}
|
|
1023
|
+
else if (child.localName === "sdt") {
|
|
1024
|
+
// Structured document tag - look inside sdtContent for tables
|
|
1025
|
+
const sdtContent = findChild(child, "sdtContent");
|
|
1026
|
+
if (sdtContent) {
|
|
1027
|
+
result.push(...collectTables(sdtContent));
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
return result;
|
|
1032
|
+
};
|
|
1033
|
+
const nestedTables = collectTables(tc);
|
|
1034
|
+
if (nestedTables.length > 0) {
|
|
1035
|
+
cell.nestedTables = nestedTables.map(tbl => parseTable(tbl, themeColors, themeFonts));
|
|
1036
|
+
}
|
|
1037
|
+
return cell;
|
|
1038
|
+
}
|
|
1039
|
+
function parseBorders(bordersEl, themeColors) {
|
|
1040
|
+
const borders = {};
|
|
1041
|
+
const parseBorder = (el) => {
|
|
1042
|
+
if (!el)
|
|
1043
|
+
return undefined;
|
|
1044
|
+
const style = el.getAttribute("w:val");
|
|
1045
|
+
if (!style || style === "nil" || style === "none")
|
|
1046
|
+
return undefined;
|
|
1047
|
+
const size = parseInt(el.getAttribute("w:sz") ?? "4", 10);
|
|
1048
|
+
let color = el.getAttribute("w:color") ?? "000000";
|
|
1049
|
+
// Handle theme colors
|
|
1050
|
+
const themeColor = el.getAttribute("w:themeColor");
|
|
1051
|
+
if (themeColor) {
|
|
1052
|
+
const resolved = resolveThemeColor(themeColor, themeColors);
|
|
1053
|
+
if (resolved)
|
|
1054
|
+
color = resolved.replace("#", "");
|
|
1055
|
+
}
|
|
1056
|
+
return {
|
|
1057
|
+
style,
|
|
1058
|
+
size: size / 8, // Convert from eighths of a point to points
|
|
1059
|
+
color: color === "auto" ? "000000" : color,
|
|
1060
|
+
};
|
|
1061
|
+
};
|
|
1062
|
+
borders.top = parseBorder(findChild(bordersEl, "top"));
|
|
1063
|
+
borders.bottom = parseBorder(findChild(bordersEl, "bottom"));
|
|
1064
|
+
borders.left = parseBorder(findChild(bordersEl, "left"));
|
|
1065
|
+
borders.right = parseBorder(findChild(bordersEl, "right"));
|
|
1066
|
+
borders.insideH = parseBorder(findChild(bordersEl, "insideH"));
|
|
1067
|
+
borders.insideV = parseBorder(findChild(bordersEl, "insideV"));
|
|
1068
|
+
return borders;
|
|
1069
|
+
}
|
|
1070
|
+
function parseTable(tbl, themeColors, themeFonts) {
|
|
1071
|
+
const table = { rows: [] };
|
|
1072
|
+
const tblPr = findChild(tbl, "tblPr");
|
|
1073
|
+
if (tblPr) {
|
|
1074
|
+
const tblBorders = findChild(tblPr, "tblBorders");
|
|
1075
|
+
if (tblBorders) {
|
|
1076
|
+
table.borders = parseBorders(tblBorders, themeColors);
|
|
1077
|
+
}
|
|
1078
|
+
// Parse table width
|
|
1079
|
+
const tblW = findChild(tblPr, "tblW");
|
|
1080
|
+
if (tblW) {
|
|
1081
|
+
const w = tblW.getAttribute("w:w");
|
|
1082
|
+
const type = tblW.getAttribute("w:type");
|
|
1083
|
+
if (w && type === "dxa") {
|
|
1084
|
+
table.tableWidth = parseInt(w, 10);
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
// Parse table indent (can be negative for extending into margins)
|
|
1088
|
+
const tblInd = findChild(tblPr, "tblInd");
|
|
1089
|
+
if (tblInd) {
|
|
1090
|
+
const w = tblInd.getAttribute("w:w");
|
|
1091
|
+
const type = tblInd.getAttribute("w:type");
|
|
1092
|
+
if (w && type === "dxa") {
|
|
1093
|
+
table.tableIndent = parseInt(w, 10);
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
// Parse column widths from tblGrid
|
|
1098
|
+
const tblGrid = findChild(tbl, "tblGrid");
|
|
1099
|
+
if (tblGrid) {
|
|
1100
|
+
const gridCols = findChildren(tblGrid, "gridCol");
|
|
1101
|
+
table.columnWidths = gridCols.map(col => {
|
|
1102
|
+
const w = col.getAttribute("w:w");
|
|
1103
|
+
return w ? parseInt(w, 10) : 0;
|
|
1104
|
+
});
|
|
1105
|
+
}
|
|
1106
|
+
const trs = findChildren(tbl, "tr");
|
|
1107
|
+
for (const tr of trs) {
|
|
1108
|
+
const row = { cells: [] };
|
|
1109
|
+
const trPr = findChild(tr, "trPr");
|
|
1110
|
+
if (trPr) {
|
|
1111
|
+
const trHeight = findChild(trPr, "trHeight");
|
|
1112
|
+
if (trHeight) {
|
|
1113
|
+
const val = trHeight.getAttribute("w:val");
|
|
1114
|
+
if (val)
|
|
1115
|
+
row.height = parseInt(val, 10);
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
const tcs = findChildren(tr, "tc");
|
|
1119
|
+
for (const tc of tcs) {
|
|
1120
|
+
row.cells.push(parseTableCell(tc, themeColors, themeFonts));
|
|
1121
|
+
}
|
|
1122
|
+
table.rows.push(row);
|
|
1123
|
+
}
|
|
1124
|
+
return table;
|
|
1125
|
+
}
|
|
1126
|
+
function parseDrawing(drawing, themeColors, inlineOnly = false) {
|
|
1127
|
+
// Look for inline or anchor images
|
|
1128
|
+
const inline = findDescendant(drawing, "inline");
|
|
1129
|
+
const anchor = findDescendant(drawing, "anchor");
|
|
1130
|
+
// If inlineOnly is true, skip anchored drawings (they're handled separately as positioned elements)
|
|
1131
|
+
if (inlineOnly && anchor && !inline) {
|
|
1132
|
+
return null;
|
|
1133
|
+
}
|
|
1134
|
+
const container = inline ?? anchor;
|
|
1135
|
+
if (!container)
|
|
1136
|
+
return null;
|
|
1137
|
+
const extent = findChild(container, "extent");
|
|
1138
|
+
if (!extent)
|
|
1139
|
+
return null;
|
|
1140
|
+
const cx = parseInt(extent.getAttribute("cx") ?? "0", 10);
|
|
1141
|
+
const cy = parseInt(extent.getAttribute("cy") ?? "0", 10);
|
|
1142
|
+
// Get the blip element which contains the image reference
|
|
1143
|
+
const blip = findDescendant(container, "blip");
|
|
1144
|
+
if (!blip)
|
|
1145
|
+
return null;
|
|
1146
|
+
const rEmbed = blip.getAttribute("r:embed") ??
|
|
1147
|
+
blip.getAttributeNS("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "embed");
|
|
1148
|
+
if (!rEmbed)
|
|
1149
|
+
return null;
|
|
1150
|
+
// Get alt text
|
|
1151
|
+
const docPr = findDescendant(container, "docPr");
|
|
1152
|
+
const alt = docPr?.getAttribute("title") ?? docPr?.getAttribute("descr") ?? undefined;
|
|
1153
|
+
// Parse transform properties (rotation, flip) from a:xfrm
|
|
1154
|
+
// Look in pic:spPr > a:xfrm
|
|
1155
|
+
const spPr = findDescendant(container, "spPr");
|
|
1156
|
+
const xfrm = spPr ? findChild(spPr, "xfrm") : null;
|
|
1157
|
+
let rotation;
|
|
1158
|
+
let flipH;
|
|
1159
|
+
let flipV;
|
|
1160
|
+
if (xfrm) {
|
|
1161
|
+
// Rotation is in EMUs (60,000 per degree)
|
|
1162
|
+
const rotAttr = xfrm.getAttribute("rot");
|
|
1163
|
+
if (rotAttr) {
|
|
1164
|
+
const rotEmu = parseInt(rotAttr, 10);
|
|
1165
|
+
// Convert EMUs to degrees (60000 EMUs = 1 degree)
|
|
1166
|
+
rotation = rotEmu / 60000;
|
|
1167
|
+
// Normalize negative rotations (values like 21253759 = 354.2° = -5.8°)
|
|
1168
|
+
if (rotation > 180) {
|
|
1169
|
+
rotation = rotation - 360;
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
// Horizontal and vertical flip
|
|
1173
|
+
flipH = xfrm.getAttribute("flipH") === "1";
|
|
1174
|
+
flipV = xfrm.getAttribute("flipV") === "1";
|
|
1175
|
+
}
|
|
1176
|
+
// Parse shadow from a:effectLst > a:outerShdw
|
|
1177
|
+
let shadow;
|
|
1178
|
+
const effectLst = spPr ? findDescendant(spPr, "effectLst") : null;
|
|
1179
|
+
if (effectLst) {
|
|
1180
|
+
const outerShdw = findChild(effectLst, "outerShdw");
|
|
1181
|
+
if (outerShdw) {
|
|
1182
|
+
const blurRad = parseInt(outerShdw.getAttribute("blurRad") ?? "0", 10);
|
|
1183
|
+
const distAttr = outerShdw.getAttribute("dist");
|
|
1184
|
+
const dirAttr = outerShdw.getAttribute("dir");
|
|
1185
|
+
// Get shadow color and alpha
|
|
1186
|
+
let shadowColor = "000000"; // default black
|
|
1187
|
+
let shadowAlpha = 100;
|
|
1188
|
+
// Check for prstClr (preset color like "black")
|
|
1189
|
+
const prstClr = findChild(outerShdw, "prstClr");
|
|
1190
|
+
if (prstClr) {
|
|
1191
|
+
const valAttr = prstClr.getAttribute("val");
|
|
1192
|
+
if (valAttr === "black")
|
|
1193
|
+
shadowColor = "000000";
|
|
1194
|
+
else if (valAttr === "white")
|
|
1195
|
+
shadowColor = "FFFFFF";
|
|
1196
|
+
// Check for alpha inside prstClr
|
|
1197
|
+
const alphaEl = findChild(prstClr, "alpha");
|
|
1198
|
+
if (alphaEl) {
|
|
1199
|
+
const alphaVal = parseInt(alphaEl.getAttribute("val") ?? "100000", 10);
|
|
1200
|
+
shadowAlpha = Math.round(alphaVal / 1000); // val is in thousandths (40000 = 40%)
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
// Check for srgbClr (explicit RGB color)
|
|
1204
|
+
const srgbClr = findChild(outerShdw, "srgbClr");
|
|
1205
|
+
if (srgbClr) {
|
|
1206
|
+
shadowColor = srgbClr.getAttribute("val") ?? "000000";
|
|
1207
|
+
const alphaEl = findChild(srgbClr, "alpha");
|
|
1208
|
+
if (alphaEl) {
|
|
1209
|
+
const alphaVal = parseInt(alphaEl.getAttribute("val") ?? "100000", 10);
|
|
1210
|
+
shadowAlpha = Math.round(alphaVal / 1000);
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
// Calculate offset from distance and direction
|
|
1214
|
+
let offsetX = 0;
|
|
1215
|
+
let offsetY = 0;
|
|
1216
|
+
if (distAttr && dirAttr) {
|
|
1217
|
+
const dist = parseInt(distAttr, 10);
|
|
1218
|
+
const dir = parseInt(dirAttr, 10) / 60000; // Convert EMUs to degrees
|
|
1219
|
+
const dirRad = (dir * Math.PI) / 180;
|
|
1220
|
+
offsetX = Math.round(dist * Math.cos(dirRad));
|
|
1221
|
+
offsetY = Math.round(dist * Math.sin(dirRad));
|
|
1222
|
+
}
|
|
1223
|
+
shadow = {
|
|
1224
|
+
blurRadius: blurRad,
|
|
1225
|
+
color: shadowColor,
|
|
1226
|
+
alpha: shadowAlpha,
|
|
1227
|
+
offsetX,
|
|
1228
|
+
offsetY,
|
|
1229
|
+
};
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
// Parse border from a:ln
|
|
1233
|
+
let border;
|
|
1234
|
+
if (spPr) {
|
|
1235
|
+
const ln = findChild(spPr, "ln");
|
|
1236
|
+
if (ln) {
|
|
1237
|
+
const widthAttr = ln.getAttribute("w");
|
|
1238
|
+
const width = widthAttr ? parseInt(widthAttr, 10) : 0;
|
|
1239
|
+
if (width > 0) {
|
|
1240
|
+
let borderColor;
|
|
1241
|
+
// Check for solidFill
|
|
1242
|
+
const solidFill = findChild(ln, "solidFill");
|
|
1243
|
+
if (solidFill) {
|
|
1244
|
+
// Check for srgbClr
|
|
1245
|
+
const srgbClr = findChild(solidFill, "srgbClr");
|
|
1246
|
+
if (srgbClr) {
|
|
1247
|
+
borderColor = srgbClr.getAttribute("val") ?? undefined;
|
|
1248
|
+
}
|
|
1249
|
+
// Check for sysClr (system color like "window" = white)
|
|
1250
|
+
const sysClr = findChild(solidFill, "sysClr");
|
|
1251
|
+
if (sysClr) {
|
|
1252
|
+
const lastClr = sysClr.getAttribute("lastClr");
|
|
1253
|
+
if (lastClr) {
|
|
1254
|
+
borderColor = lastClr;
|
|
1255
|
+
}
|
|
1256
|
+
else {
|
|
1257
|
+
const valAttr = sysClr.getAttribute("val");
|
|
1258
|
+
if (valAttr === "window")
|
|
1259
|
+
borderColor = "FFFFFF";
|
|
1260
|
+
else if (valAttr === "windowText")
|
|
1261
|
+
borderColor = "000000";
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
// Check for schemeClr (theme color reference like "bg1")
|
|
1265
|
+
const schemeClr = findChild(solidFill, "schemeClr");
|
|
1266
|
+
if (schemeClr && themeColors) {
|
|
1267
|
+
const colorName = schemeClr.getAttribute("val");
|
|
1268
|
+
if (colorName) {
|
|
1269
|
+
const resolved = resolveThemeColor(colorName, themeColors);
|
|
1270
|
+
if (resolved) {
|
|
1271
|
+
// Remove # prefix if present
|
|
1272
|
+
borderColor = resolved.startsWith("#") ? resolved.substring(1) : resolved;
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
// Only set border if we found a valid color
|
|
1278
|
+
if (borderColor) {
|
|
1279
|
+
border = {
|
|
1280
|
+
width,
|
|
1281
|
+
color: borderColor,
|
|
1282
|
+
};
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1287
|
+
return {
|
|
1288
|
+
type: "image",
|
|
1289
|
+
data: {
|
|
1290
|
+
rId: rEmbed,
|
|
1291
|
+
width: cx,
|
|
1292
|
+
height: cy,
|
|
1293
|
+
alt,
|
|
1294
|
+
rotation,
|
|
1295
|
+
flipH: flipH || undefined,
|
|
1296
|
+
flipV: flipV || undefined,
|
|
1297
|
+
shadow,
|
|
1298
|
+
border,
|
|
1299
|
+
},
|
|
1300
|
+
};
|
|
1301
|
+
}
|
|
1302
|
+
function parseDocument(docDoc, themeColors, themeFonts) {
|
|
1303
|
+
const elements = [];
|
|
1304
|
+
const positionedElements = [];
|
|
1305
|
+
const body = docDoc.getElementsByTagName("w:body")[0];
|
|
1306
|
+
if (!body)
|
|
1307
|
+
return { elements, positionedElements };
|
|
1308
|
+
// Collect anchored drawings from the entire body (these include header banners, shapes with text, etc.)
|
|
1309
|
+
const drawings = body.getElementsByTagName("w:drawing");
|
|
1310
|
+
for (let i = 0; i < drawings.length; i++) {
|
|
1311
|
+
const drawing = drawings[i];
|
|
1312
|
+
// Only process anchored drawings (positioned elements), not inline drawings (images in text flow)
|
|
1313
|
+
const anchor = findDescendant(drawing, "anchor");
|
|
1314
|
+
if (anchor) {
|
|
1315
|
+
const positioned = parsePositionedElement(drawing, themeColors, themeFonts);
|
|
1316
|
+
if (positioned) {
|
|
1317
|
+
positionedElements.push(positioned);
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
const processElement = (el) => {
|
|
1322
|
+
if (el.localName === "p") {
|
|
1323
|
+
// Parse paragraph text (includes any inline drawings/images in runs)
|
|
1324
|
+
const para = parseParagraph(el, themeColors, themeFonts);
|
|
1325
|
+
if (para.runs.length > 0 || para.props.styleId) {
|
|
1326
|
+
elements.push({ kind: "paragraph", data: para });
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
else if (el.localName === "tbl") {
|
|
1330
|
+
elements.push({ kind: "table", data: parseTable(el, themeColors, themeFonts) });
|
|
1331
|
+
}
|
|
1332
|
+
else if (el.localName === "sdt") {
|
|
1333
|
+
// Structured document tag - process its content
|
|
1334
|
+
const sdtContent = findChild(el, "sdtContent");
|
|
1335
|
+
if (sdtContent) {
|
|
1336
|
+
for (let i = 0; i < sdtContent.children.length; i++) {
|
|
1337
|
+
processElement(sdtContent.children[i]);
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
}
|
|
1341
|
+
};
|
|
1342
|
+
for (let i = 0; i < body.children.length; i++) {
|
|
1343
|
+
processElement(body.children[i]);
|
|
1344
|
+
}
|
|
1345
|
+
return { elements, positionedElements };
|
|
1346
|
+
}
|
|
1347
|
+
// ============================================================================
|
|
1348
|
+
// HTML Rendering Functions
|
|
1349
|
+
// ============================================================================
|
|
1350
|
+
function getHighlightColor(highlight) {
|
|
1351
|
+
// Named highlight colors
|
|
1352
|
+
const highlightColors = {
|
|
1353
|
+
yellow: "#ffff00",
|
|
1354
|
+
green: "#00ff00",
|
|
1355
|
+
cyan: "#00ffff",
|
|
1356
|
+
magenta: "#ff00ff",
|
|
1357
|
+
blue: "#0000ff",
|
|
1358
|
+
red: "#ff0000",
|
|
1359
|
+
darkBlue: "#00008b",
|
|
1360
|
+
darkCyan: "#008b8b",
|
|
1361
|
+
darkGreen: "#006400",
|
|
1362
|
+
darkMagenta: "#8b008b",
|
|
1363
|
+
darkRed: "#8b0000",
|
|
1364
|
+
darkYellow: "#808000",
|
|
1365
|
+
darkGray: "#a9a9a9",
|
|
1366
|
+
lightGray: "#d3d3d3",
|
|
1367
|
+
black: "#000000",
|
|
1368
|
+
white: "#ffffff",
|
|
1369
|
+
};
|
|
1370
|
+
if (highlight.startsWith("#"))
|
|
1371
|
+
return highlight;
|
|
1372
|
+
return highlightColors[highlight] ?? highlight;
|
|
1373
|
+
}
|
|
1374
|
+
function renderRunToHtml(run, imageMap) {
|
|
1375
|
+
const { text, props, image } = run;
|
|
1376
|
+
let result = "";
|
|
1377
|
+
// Handle inline image
|
|
1378
|
+
if (image) {
|
|
1379
|
+
result += renderImageToHtml(image, imageMap);
|
|
1380
|
+
}
|
|
1381
|
+
// Handle text
|
|
1382
|
+
if (text) {
|
|
1383
|
+
const styles = [];
|
|
1384
|
+
if (props.bold)
|
|
1385
|
+
styles.push("font-weight:bold");
|
|
1386
|
+
if (props.italic)
|
|
1387
|
+
styles.push("font-style:italic");
|
|
1388
|
+
if (props.underline)
|
|
1389
|
+
styles.push("text-decoration:underline");
|
|
1390
|
+
if (props.strike)
|
|
1391
|
+
styles.push("text-decoration:line-through");
|
|
1392
|
+
if (props.color)
|
|
1393
|
+
styles.push(`color:${props.color}`);
|
|
1394
|
+
if (props.fontSize)
|
|
1395
|
+
styles.push(`font-size:${props.fontSize}pt`);
|
|
1396
|
+
if (props.fontFamily)
|
|
1397
|
+
styles.push(`font-family:'${props.fontFamily}',sans-serif`);
|
|
1398
|
+
if (props.highlight)
|
|
1399
|
+
styles.push(`background-color:${getHighlightColor(props.highlight)}`);
|
|
1400
|
+
if (props.caps)
|
|
1401
|
+
styles.push("text-transform:uppercase");
|
|
1402
|
+
if (props.smallCaps)
|
|
1403
|
+
styles.push("font-variant:small-caps");
|
|
1404
|
+
let escapedText = escapeHtml(text);
|
|
1405
|
+
// Handle line breaks
|
|
1406
|
+
escapedText = escapedText.replace(/\n/g, "<br>");
|
|
1407
|
+
// Handle tabs
|
|
1408
|
+
escapedText = escapedText.replace(/\t/g, " ");
|
|
1409
|
+
if (props.vertAlign === "superscript") {
|
|
1410
|
+
escapedText = `<sup>${escapedText}</sup>`;
|
|
1411
|
+
}
|
|
1412
|
+
else if (props.vertAlign === "subscript") {
|
|
1413
|
+
escapedText = `<sub>${escapedText}</sub>`;
|
|
1414
|
+
}
|
|
1415
|
+
if (styles.length > 0) {
|
|
1416
|
+
result += `<span style="${styles.join(";")}">${escapedText}</span>`;
|
|
1417
|
+
}
|
|
1418
|
+
else {
|
|
1419
|
+
result += escapedText;
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
return result;
|
|
1423
|
+
}
|
|
1424
|
+
function renderParagraphToHtml(para, styleMap, numberingMap, imageMap = new Map(), docDefaults = {}) {
|
|
1425
|
+
const { runs, props } = para;
|
|
1426
|
+
const styles = [];
|
|
1427
|
+
// Resolve style hierarchy: document defaults -> style -> paragraph direct props
|
|
1428
|
+
let resolvedProps = { ...props };
|
|
1429
|
+
// Start with document default run properties
|
|
1430
|
+
let resolvedRunProps = docDefaults.rPr ? { ...docDefaults.rPr } : {};
|
|
1431
|
+
if (props.styleId) {
|
|
1432
|
+
const style = styleMap.get(props.styleId);
|
|
1433
|
+
if (style) {
|
|
1434
|
+
if (style.pPr) {
|
|
1435
|
+
resolvedProps = { ...style.pPr, ...resolvedProps };
|
|
1436
|
+
}
|
|
1437
|
+
if (style.rPr) {
|
|
1438
|
+
// Style properties override document defaults
|
|
1439
|
+
resolvedRunProps = { ...resolvedRunProps, ...style.rPr };
|
|
1440
|
+
}
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
// Determine HTML tag based on style
|
|
1444
|
+
let tag = "p";
|
|
1445
|
+
let headingLevel = 0;
|
|
1446
|
+
let isTitleStyle = false;
|
|
1447
|
+
let isSubtitleStyle = false;
|
|
1448
|
+
if (props.styleId) {
|
|
1449
|
+
const styleName = styleMap.get(props.styleId)?.name?.toLowerCase() ?? props.styleId.toLowerCase();
|
|
1450
|
+
const headingMatch = styleName.match(/heading\s*(\d)/i);
|
|
1451
|
+
if (headingMatch) {
|
|
1452
|
+
headingLevel = parseInt(headingMatch[1], 10);
|
|
1453
|
+
if (headingLevel >= 1 && headingLevel <= 6) {
|
|
1454
|
+
tag = `h${headingLevel}`;
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
else if (styleName === "subtitle" || styleName.startsWith("subtitle")) {
|
|
1458
|
+
// Check subtitle FIRST since "subtitle" contains "title"
|
|
1459
|
+
tag = "h2";
|
|
1460
|
+
isSubtitleStyle = true;
|
|
1461
|
+
}
|
|
1462
|
+
else if (styleName.includes("title")) {
|
|
1463
|
+
tag = "h1";
|
|
1464
|
+
isTitleStyle = true;
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
// Check for list formatting
|
|
1468
|
+
const isListItem = props.numId !== undefined && props.numId > 0;
|
|
1469
|
+
// Apply paragraph styles
|
|
1470
|
+
if (resolvedProps.alignment) {
|
|
1471
|
+
styles.push(`text-align:${resolvedProps.alignment}`);
|
|
1472
|
+
}
|
|
1473
|
+
if (resolvedProps.indentLeft) {
|
|
1474
|
+
styles.push(`margin-left:${twipsToPx(resolvedProps.indentLeft)}px`);
|
|
1475
|
+
}
|
|
1476
|
+
if (resolvedProps.indentRight) {
|
|
1477
|
+
styles.push(`margin-right:${twipsToPx(resolvedProps.indentRight)}px`);
|
|
1478
|
+
}
|
|
1479
|
+
if (resolvedProps.indentFirstLine) {
|
|
1480
|
+
styles.push(`text-indent:${twipsToPx(resolvedProps.indentFirstLine)}px`);
|
|
1481
|
+
}
|
|
1482
|
+
if (resolvedProps.spacingBefore) {
|
|
1483
|
+
styles.push(`margin-top:${twipsToPx(resolvedProps.spacingBefore)}px`);
|
|
1484
|
+
}
|
|
1485
|
+
else if (isTitleStyle || isSubtitleStyle) {
|
|
1486
|
+
// Reset margin-top for Title/Subtitle since global h1/h2 CSS has default margins
|
|
1487
|
+
styles.push("margin-top:0");
|
|
1488
|
+
}
|
|
1489
|
+
if (resolvedProps.spacingAfter) {
|
|
1490
|
+
styles.push(`margin-bottom:${twipsToPx(resolvedProps.spacingAfter)}px`);
|
|
1491
|
+
}
|
|
1492
|
+
else if (isTitleStyle || isSubtitleStyle) {
|
|
1493
|
+
// Reset margin-bottom for Title/Subtitle
|
|
1494
|
+
styles.push("margin-bottom:0");
|
|
1495
|
+
}
|
|
1496
|
+
// Apply inherited run properties to paragraph
|
|
1497
|
+
if (resolvedRunProps.fontSize) {
|
|
1498
|
+
styles.push(`font-size:${resolvedRunProps.fontSize}pt`);
|
|
1499
|
+
}
|
|
1500
|
+
if (resolvedRunProps.fontFamily) {
|
|
1501
|
+
styles.push(`font-family:'${resolvedRunProps.fontFamily}',sans-serif`);
|
|
1502
|
+
}
|
|
1503
|
+
if (resolvedRunProps.color) {
|
|
1504
|
+
styles.push(`color:${resolvedRunProps.color}`);
|
|
1505
|
+
}
|
|
1506
|
+
if (resolvedRunProps.bold) {
|
|
1507
|
+
styles.push("font-weight:bold");
|
|
1508
|
+
}
|
|
1509
|
+
if (resolvedRunProps.italic) {
|
|
1510
|
+
styles.push("font-style:italic");
|
|
1511
|
+
}
|
|
1512
|
+
// Apply text transform from inherited run properties
|
|
1513
|
+
if (resolvedRunProps.caps) {
|
|
1514
|
+
styles.push("text-transform:uppercase");
|
|
1515
|
+
}
|
|
1516
|
+
if (resolvedRunProps.smallCaps) {
|
|
1517
|
+
styles.push("font-variant:small-caps");
|
|
1518
|
+
}
|
|
1519
|
+
// Apply paragraph borders
|
|
1520
|
+
if (resolvedProps.borders) {
|
|
1521
|
+
if (resolvedProps.borders.top) {
|
|
1522
|
+
const b = resolvedProps.borders.top;
|
|
1523
|
+
styles.push(`border-top:${b.size}pt ${getBorderStyleCss(b.style)} #${b.color}`);
|
|
1524
|
+
}
|
|
1525
|
+
if (resolvedProps.borders.bottom) {
|
|
1526
|
+
const b = resolvedProps.borders.bottom;
|
|
1527
|
+
styles.push(`border-bottom:${b.size}pt ${getBorderStyleCss(b.style)} #${b.color}`);
|
|
1528
|
+
}
|
|
1529
|
+
if (resolvedProps.borders.left) {
|
|
1530
|
+
const b = resolvedProps.borders.left;
|
|
1531
|
+
styles.push(`border-left:${b.size}pt ${getBorderStyleCss(b.style)} #${b.color}`);
|
|
1532
|
+
}
|
|
1533
|
+
if (resolvedProps.borders.right) {
|
|
1534
|
+
const b = resolvedProps.borders.right;
|
|
1535
|
+
styles.push(`border-right:${b.size}pt ${getBorderStyleCss(b.style)} #${b.color}`);
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
else if (isTitleStyle || isSubtitleStyle) {
|
|
1539
|
+
// Reset borders for Title/Subtitle styles since global h1/h2 CSS may have borders
|
|
1540
|
+
// that don't apply to these specific styles
|
|
1541
|
+
styles.push("border:none");
|
|
1542
|
+
}
|
|
1543
|
+
// Apply paragraph shading (background color)
|
|
1544
|
+
if (resolvedProps.shading) {
|
|
1545
|
+
styles.push(`background-color:${resolvedProps.shading}`);
|
|
1546
|
+
}
|
|
1547
|
+
// Render runs
|
|
1548
|
+
let content = runs.map(run => renderRunToHtml(run, imageMap)).join("");
|
|
1549
|
+
// Add list marker if this is a list item
|
|
1550
|
+
// Note: Using CSS for list styling since we render as individual paragraphs
|
|
1551
|
+
// Proper list grouping would require changes at the parent rendering level
|
|
1552
|
+
if (isListItem && props.numId !== undefined) {
|
|
1553
|
+
const numDef = numberingMap.get(props.numId);
|
|
1554
|
+
if (numDef) {
|
|
1555
|
+
const lvlDef = numDef.get(props.ilvl ?? 0);
|
|
1556
|
+
if (lvlDef) {
|
|
1557
|
+
const isBullet = lvlDef.numFmt === "bullet";
|
|
1558
|
+
// Use Unicode bullet point or number as marker
|
|
1559
|
+
const marker = isBullet ? "• " : `${(props.ilvl ?? 0) + 1}. `;
|
|
1560
|
+
const indent = ((props.ilvl ?? 0) + 1) * 20;
|
|
1561
|
+
styles.push(`padding-left:${indent}px`);
|
|
1562
|
+
styles.push("display:block");
|
|
1563
|
+
// Prefix with styled marker
|
|
1564
|
+
content = `<span style="margin-left:-20px;display:inline-block;width:20px">${marker}</span>${content}`;
|
|
1565
|
+
}
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
const styleAttr = styles.length > 0 ? ` style="${styles.join(";")}"` : "";
|
|
1569
|
+
// Empty paragraph
|
|
1570
|
+
if (!content) {
|
|
1571
|
+
return `<${tag}${styleAttr}> </${tag}>`;
|
|
1572
|
+
}
|
|
1573
|
+
return `<${tag}${styleAttr}>${content}</${tag}>`;
|
|
1574
|
+
}
|
|
1575
|
+
function renderTableToHtml(table, styleMap, numberingMap, themeColors, imageMap, docDefaults = {}) {
|
|
1576
|
+
const styles = [
|
|
1577
|
+
"border-collapse:collapse",
|
|
1578
|
+
];
|
|
1579
|
+
// Set table width from parsed width or 100%
|
|
1580
|
+
if (table.tableWidth) {
|
|
1581
|
+
styles.push(`width:${twipsToPx(table.tableWidth)}px`);
|
|
1582
|
+
}
|
|
1583
|
+
else {
|
|
1584
|
+
styles.push("width:100%");
|
|
1585
|
+
}
|
|
1586
|
+
// Apply table indent (can be negative for extending into margins)
|
|
1587
|
+
if (table.tableIndent) {
|
|
1588
|
+
styles.push(`margin-left:${twipsToPx(table.tableIndent)}px`);
|
|
1589
|
+
}
|
|
1590
|
+
let html = `<table style="${styles.join(";")}">`;
|
|
1591
|
+
// Track column index for applying widths
|
|
1592
|
+
let colIndex = 0;
|
|
1593
|
+
for (const row of table.rows) {
|
|
1594
|
+
const rowStyles = [];
|
|
1595
|
+
if (row.height) {
|
|
1596
|
+
rowStyles.push(`height:${twipsToPx(row.height)}px`);
|
|
1597
|
+
}
|
|
1598
|
+
html += `<tr${rowStyles.length > 0 ? ` style="${rowStyles.join(";")}"` : ""}>`;
|
|
1599
|
+
colIndex = 0;
|
|
1600
|
+
for (const cell of row.cells) {
|
|
1601
|
+
// Skip cells that are part of a vertical merge
|
|
1602
|
+
if (cell.vMerge === "continue") {
|
|
1603
|
+
colIndex += cell.gridSpan ?? 1;
|
|
1604
|
+
continue;
|
|
1605
|
+
}
|
|
1606
|
+
const cellStyles = ["padding:4px 8px"];
|
|
1607
|
+
const cellAttrs = [];
|
|
1608
|
+
// Vertical alignment - use cell's vAlign or default to top
|
|
1609
|
+
const vAlign = cell.vAlign ?? "top";
|
|
1610
|
+
cellStyles.push(`vertical-align:${vAlign}`);
|
|
1611
|
+
// Set cell width from cell width or column grid
|
|
1612
|
+
if (cell.width) {
|
|
1613
|
+
cellStyles.push(`width:${twipsToPx(cell.width)}px`);
|
|
1614
|
+
}
|
|
1615
|
+
else if (table.columnWidths && table.columnWidths[colIndex]) {
|
|
1616
|
+
// Calculate width from column grid (handling colspan)
|
|
1617
|
+
const span = cell.gridSpan ?? 1;
|
|
1618
|
+
let totalWidth = 0;
|
|
1619
|
+
for (let i = 0; i < span && colIndex + i < table.columnWidths.length; i++) {
|
|
1620
|
+
totalWidth += table.columnWidths[colIndex + i];
|
|
1621
|
+
}
|
|
1622
|
+
if (totalWidth > 0) {
|
|
1623
|
+
cellStyles.push(`width:${twipsToPx(totalWidth)}px`);
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
if (cell.gridSpan && cell.gridSpan > 1) {
|
|
1627
|
+
cellAttrs.push(`colspan="${cell.gridSpan}"`);
|
|
1628
|
+
}
|
|
1629
|
+
if (cell.shading) {
|
|
1630
|
+
cellStyles.push(`background-color:${cell.shading}`);
|
|
1631
|
+
}
|
|
1632
|
+
// Cell borders
|
|
1633
|
+
if (cell.borders) {
|
|
1634
|
+
if (cell.borders.top) {
|
|
1635
|
+
cellStyles.push(`border-top:${cell.borders.top.size}pt ${getBorderStyleCss(cell.borders.top.style)} #${cell.borders.top.color}`);
|
|
1636
|
+
}
|
|
1637
|
+
if (cell.borders.bottom) {
|
|
1638
|
+
cellStyles.push(`border-bottom:${cell.borders.bottom.size}pt ${getBorderStyleCss(cell.borders.bottom.style)} #${cell.borders.bottom.color}`);
|
|
1639
|
+
}
|
|
1640
|
+
if (cell.borders.left) {
|
|
1641
|
+
cellStyles.push(`border-left:${cell.borders.left.size}pt ${getBorderStyleCss(cell.borders.left.style)} #${cell.borders.left.color}`);
|
|
1642
|
+
}
|
|
1643
|
+
if (cell.borders.right) {
|
|
1644
|
+
cellStyles.push(`border-right:${cell.borders.right.size}pt ${getBorderStyleCss(cell.borders.right.style)} #${cell.borders.right.color}`);
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
else if (table.borders) {
|
|
1648
|
+
// Fall back to table borders
|
|
1649
|
+
if (table.borders.top) {
|
|
1650
|
+
cellStyles.push(`border-top:${table.borders.top.size}pt ${getBorderStyleCss(table.borders.top.style)} #${table.borders.top.color}`);
|
|
1651
|
+
}
|
|
1652
|
+
if (table.borders.bottom) {
|
|
1653
|
+
cellStyles.push(`border-bottom:${table.borders.bottom.size}pt ${getBorderStyleCss(table.borders.bottom.style)} #${table.borders.bottom.color}`);
|
|
1654
|
+
}
|
|
1655
|
+
if (table.borders.left) {
|
|
1656
|
+
cellStyles.push(`border-left:${table.borders.left.size}pt ${getBorderStyleCss(table.borders.left.style)} #${table.borders.left.color}`);
|
|
1657
|
+
}
|
|
1658
|
+
if (table.borders.right) {
|
|
1659
|
+
cellStyles.push(`border-right:${table.borders.right.size}pt ${getBorderStyleCss(table.borders.right.style)} #${table.borders.right.color}`);
|
|
1660
|
+
}
|
|
1661
|
+
if (table.borders.insideH) {
|
|
1662
|
+
cellStyles.push(`border-top:${table.borders.insideH.size}pt ${getBorderStyleCss(table.borders.insideH.style)} #${table.borders.insideH.color}`);
|
|
1663
|
+
cellStyles.push(`border-bottom:${table.borders.insideH.size}pt ${getBorderStyleCss(table.borders.insideH.style)} #${table.borders.insideH.color}`);
|
|
1664
|
+
}
|
|
1665
|
+
if (table.borders.insideV) {
|
|
1666
|
+
cellStyles.push(`border-left:${table.borders.insideV.size}pt ${getBorderStyleCss(table.borders.insideV.style)} #${table.borders.insideV.color}`);
|
|
1667
|
+
cellStyles.push(`border-right:${table.borders.insideV.size}pt ${getBorderStyleCss(table.borders.insideV.style)} #${table.borders.insideV.color}`);
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
const attrsStr = cellAttrs.length > 0 ? " " + cellAttrs.join(" ") : "";
|
|
1671
|
+
html += `<td${attrsStr} style="${cellStyles.join(";")}">`;
|
|
1672
|
+
for (const para of cell.paragraphs) {
|
|
1673
|
+
html += renderParagraphToHtml(para, styleMap, numberingMap, imageMap, docDefaults);
|
|
1674
|
+
}
|
|
1675
|
+
// Render nested tables
|
|
1676
|
+
if (cell.nestedTables) {
|
|
1677
|
+
for (const nestedTable of cell.nestedTables) {
|
|
1678
|
+
html += renderTableToHtml(nestedTable, styleMap, numberingMap, themeColors, imageMap, docDefaults);
|
|
1679
|
+
}
|
|
1680
|
+
}
|
|
1681
|
+
html += "</td>";
|
|
1682
|
+
colIndex += cell.gridSpan ?? 1;
|
|
1683
|
+
}
|
|
1684
|
+
html += "</tr>";
|
|
1685
|
+
}
|
|
1686
|
+
html += "</table>";
|
|
1687
|
+
return html;
|
|
1688
|
+
}
|
|
1689
|
+
function renderImageToHtml(img, imageMap) {
|
|
1690
|
+
const dataUri = imageMap.get(img.rId);
|
|
1691
|
+
if (!dataUri)
|
|
1692
|
+
return "";
|
|
1693
|
+
const width = Math.round(emuToPx(img.width));
|
|
1694
|
+
const height = Math.round(emuToPx(img.height));
|
|
1695
|
+
const alt = img.alt ? ` alt="${escapeHtml(img.alt)}"` : "";
|
|
1696
|
+
const styles = [
|
|
1697
|
+
"max-width:100%",
|
|
1698
|
+
`width:${width}px`,
|
|
1699
|
+
`height:${height}px`,
|
|
1700
|
+
"display:block",
|
|
1701
|
+
"margin:8px 0",
|
|
1702
|
+
];
|
|
1703
|
+
// Build CSS transform for rotation and flip
|
|
1704
|
+
const transforms = [];
|
|
1705
|
+
if (img.rotation) {
|
|
1706
|
+
transforms.push(`rotate(${img.rotation}deg)`);
|
|
1707
|
+
}
|
|
1708
|
+
if (img.flipH) {
|
|
1709
|
+
transforms.push("scaleX(-1)");
|
|
1710
|
+
}
|
|
1711
|
+
if (img.flipV) {
|
|
1712
|
+
transforms.push("scaleY(-1)");
|
|
1713
|
+
}
|
|
1714
|
+
if (transforms.length > 0) {
|
|
1715
|
+
styles.push(`transform:${transforms.join(" ")}`);
|
|
1716
|
+
}
|
|
1717
|
+
// Apply shadow
|
|
1718
|
+
if (img.shadow) {
|
|
1719
|
+
const blurPx = Math.round(emuToPx(img.shadow.blurRadius));
|
|
1720
|
+
const offsetXPx = img.shadow.offsetX ? Math.round(emuToPx(img.shadow.offsetX)) : 0;
|
|
1721
|
+
const offsetYPx = img.shadow.offsetY ? Math.round(emuToPx(img.shadow.offsetY)) : 0;
|
|
1722
|
+
const alpha = img.shadow.alpha / 100;
|
|
1723
|
+
const r = parseInt(img.shadow.color.substring(0, 2), 16);
|
|
1724
|
+
const g = parseInt(img.shadow.color.substring(2, 4), 16);
|
|
1725
|
+
const b = parseInt(img.shadow.color.substring(4, 6), 16);
|
|
1726
|
+
styles.push(`box-shadow:${offsetXPx}px ${offsetYPx}px ${blurPx}px rgba(${r},${g},${b},${alpha})`);
|
|
1727
|
+
}
|
|
1728
|
+
// Apply border
|
|
1729
|
+
if (img.border) {
|
|
1730
|
+
const borderWidthPx = Math.round(emuToPx(img.border.width));
|
|
1731
|
+
styles.push(`border:${borderWidthPx}px solid #${img.border.color}`);
|
|
1732
|
+
}
|
|
1733
|
+
return `<img src="${dataUri}"${alt} style="${styles.join(";")}">`;
|
|
1734
|
+
}
|
|
1735
|
+
/**
|
|
1736
|
+
* Render positioned elements (shapes, decorative frames, textboxes, images) as positioned divs.
|
|
1737
|
+
*/
|
|
1738
|
+
function renderPositionedElements(positionedElements, sectionProps, styleMap, themeColors, imageMap, docDefaults = {}) {
|
|
1739
|
+
if (positionedElements.length === 0)
|
|
1740
|
+
return "";
|
|
1741
|
+
let html = "";
|
|
1742
|
+
for (const el of positionedElements) {
|
|
1743
|
+
const styles = [];
|
|
1744
|
+
let innerContent = "";
|
|
1745
|
+
// Width and height
|
|
1746
|
+
const width = Math.round(emuToPx(el.width));
|
|
1747
|
+
const height = Math.round(emuToPx(el.height));
|
|
1748
|
+
styles.push(`width:${width}px`);
|
|
1749
|
+
styles.push(`height:${height}px`);
|
|
1750
|
+
// Determine positioning based on wrap type and relative position
|
|
1751
|
+
if (el.wrapType === "topAndBottom") {
|
|
1752
|
+
// Top and bottom wrapping - element is in the normal flow with margins
|
|
1753
|
+
styles.push("position:relative");
|
|
1754
|
+
styles.push("margin:0 auto"); // Center horizontally by default
|
|
1755
|
+
if (el.positionH === "center") {
|
|
1756
|
+
styles.push("margin-left:auto");
|
|
1757
|
+
styles.push("margin-right:auto");
|
|
1758
|
+
}
|
|
1759
|
+
else if (el.positionH === "left") {
|
|
1760
|
+
styles.push("margin-left:0");
|
|
1761
|
+
styles.push("margin-right:auto");
|
|
1762
|
+
}
|
|
1763
|
+
else if (el.positionH === "right") {
|
|
1764
|
+
styles.push("margin-left:auto");
|
|
1765
|
+
styles.push("margin-right:0");
|
|
1766
|
+
}
|
|
1767
|
+
}
|
|
1768
|
+
else {
|
|
1769
|
+
// Absolute positioning for other wrap types
|
|
1770
|
+
styles.push("position:absolute");
|
|
1771
|
+
// Horizontal positioning
|
|
1772
|
+
if (el.positionH === "center") {
|
|
1773
|
+
styles.push("left:50%");
|
|
1774
|
+
styles.push(`margin-left:${-width / 2}px`);
|
|
1775
|
+
}
|
|
1776
|
+
else if (el.positionH === "right") {
|
|
1777
|
+
styles.push("right:0");
|
|
1778
|
+
}
|
|
1779
|
+
else if (el.positionH === "left") {
|
|
1780
|
+
styles.push("left:0");
|
|
1781
|
+
}
|
|
1782
|
+
else if (el.offsetX !== undefined) {
|
|
1783
|
+
const offsetPx = Math.round(emuToPx(el.offsetX));
|
|
1784
|
+
styles.push(`left:${offsetPx}px`);
|
|
1785
|
+
}
|
|
1786
|
+
// Vertical positioning
|
|
1787
|
+
if (el.positionV === "center") {
|
|
1788
|
+
styles.push("top:50%");
|
|
1789
|
+
styles.push(`margin-top:${-height / 2}px`);
|
|
1790
|
+
}
|
|
1791
|
+
else if (el.positionV === "bottom") {
|
|
1792
|
+
styles.push("bottom:0");
|
|
1793
|
+
}
|
|
1794
|
+
else if (el.positionV === "top") {
|
|
1795
|
+
styles.push("top:0");
|
|
1796
|
+
}
|
|
1797
|
+
else if (el.offsetY !== undefined) {
|
|
1798
|
+
const offsetPx = Math.round(emuToPx(el.offsetY));
|
|
1799
|
+
styles.push(`top:${offsetPx}px`);
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1802
|
+
// Z-index for behind/in-front (only for absolutely positioned elements)
|
|
1803
|
+
if (el.wrapType !== "topAndBottom") {
|
|
1804
|
+
if (el.behindDoc) {
|
|
1805
|
+
styles.push("z-index:-1");
|
|
1806
|
+
}
|
|
1807
|
+
else {
|
|
1808
|
+
styles.push("z-index:1");
|
|
1809
|
+
}
|
|
1810
|
+
}
|
|
1811
|
+
// Handle positioned images
|
|
1812
|
+
if (el.type === "image" && el.imageRId) {
|
|
1813
|
+
const dataUri = imageMap.get(el.imageRId);
|
|
1814
|
+
if (dataUri) {
|
|
1815
|
+
// Build CSS transform for rotation and flip (extracted from XML, not hardcoded)
|
|
1816
|
+
const transforms = [];
|
|
1817
|
+
if (el.imageRotation) {
|
|
1818
|
+
transforms.push(`rotate(${el.imageRotation}deg)`);
|
|
1819
|
+
}
|
|
1820
|
+
if (el.imageFlipH) {
|
|
1821
|
+
transforms.push("scaleX(-1)");
|
|
1822
|
+
}
|
|
1823
|
+
if (el.imageFlipV) {
|
|
1824
|
+
transforms.push("scaleY(-1)");
|
|
1825
|
+
}
|
|
1826
|
+
if (transforms.length > 0) {
|
|
1827
|
+
styles.push(`transform:${transforms.join(" ")}`);
|
|
1828
|
+
}
|
|
1829
|
+
// Apply shadow (extracted from XML, not hardcoded)
|
|
1830
|
+
if (el.imageShadow) {
|
|
1831
|
+
const blurPx = Math.round(emuToPx(el.imageShadow.blurRadius));
|
|
1832
|
+
const offsetXPx = el.imageShadow.offsetX ? Math.round(emuToPx(el.imageShadow.offsetX)) : 0;
|
|
1833
|
+
const offsetYPx = el.imageShadow.offsetY ? Math.round(emuToPx(el.imageShadow.offsetY)) : 0;
|
|
1834
|
+
const alpha = el.imageShadow.alpha / 100;
|
|
1835
|
+
const r = parseInt(el.imageShadow.color.substring(0, 2), 16);
|
|
1836
|
+
const g = parseInt(el.imageShadow.color.substring(2, 4), 16);
|
|
1837
|
+
const b = parseInt(el.imageShadow.color.substring(4, 6), 16);
|
|
1838
|
+
styles.push(`box-shadow:${offsetXPx}px ${offsetYPx}px ${blurPx}px rgba(${r},${g},${b},${alpha})`);
|
|
1839
|
+
}
|
|
1840
|
+
// Apply border (extracted from XML, not hardcoded)
|
|
1841
|
+
if (el.imageBorder) {
|
|
1842
|
+
const borderWidthPx = Math.round(emuToPx(el.imageBorder.width));
|
|
1843
|
+
styles.push(`border:${borderWidthPx}px solid #${el.imageBorder.color}`);
|
|
1844
|
+
}
|
|
1845
|
+
innerContent = `<img src="${dataUri}" style="width:100%;height:100%;object-fit:cover">`;
|
|
1846
|
+
}
|
|
1847
|
+
}
|
|
1848
|
+
// Handle frame shape specially - render as border, not solid fill
|
|
1849
|
+
else if (el.shapeType === "frame" && el.fillColor) {
|
|
1850
|
+
// Frame shape: render as a border with transparent center
|
|
1851
|
+
const borderPercent = el.shapeAdjust ?? 0.025;
|
|
1852
|
+
const minDimension = Math.min(width, height);
|
|
1853
|
+
const borderWidth = Math.round(minDimension * borderPercent);
|
|
1854
|
+
styles.push(`border:${borderWidth}px solid ${el.fillColor}`);
|
|
1855
|
+
styles.push("box-sizing:border-box");
|
|
1856
|
+
}
|
|
1857
|
+
else if (el.gradientFill) {
|
|
1858
|
+
// Gradient fill
|
|
1859
|
+
const { angle, stops } = el.gradientFill;
|
|
1860
|
+
// Convert DrawingML angle to CSS angle (DrawingML uses different coordinate system)
|
|
1861
|
+
// DrawingML 0° = right, 90° = top; CSS 0° = top, 90° = right
|
|
1862
|
+
const cssAngle = (90 - (angle ?? 90) + 360) % 360;
|
|
1863
|
+
const gradientStops = stops
|
|
1864
|
+
.map(stop => `${stop.color} ${stop.position}%`)
|
|
1865
|
+
.join(", ");
|
|
1866
|
+
styles.push(`background:linear-gradient(${cssAngle}deg, ${gradientStops})`);
|
|
1867
|
+
styles.push("box-sizing:border-box");
|
|
1868
|
+
}
|
|
1869
|
+
else if (el.fillColor) {
|
|
1870
|
+
// Regular shape with solid fill
|
|
1871
|
+
styles.push(`background-color:${el.fillColor}`);
|
|
1872
|
+
styles.push("box-sizing:border-box");
|
|
1873
|
+
}
|
|
1874
|
+
// Render text content if this is a textbox with content
|
|
1875
|
+
if (el.type === "textbox" && el.textElements && el.textElements.length > 0) {
|
|
1876
|
+
styles.push("overflow:hidden");
|
|
1877
|
+
styles.push("padding:8px 12px");
|
|
1878
|
+
// Enable pointer events for text content
|
|
1879
|
+
styles.push("pointer-events:auto");
|
|
1880
|
+
for (const para of el.textElements) {
|
|
1881
|
+
// Use empty numberingMap and imageMap since textboxes typically don't contain lists or images
|
|
1882
|
+
innerContent += renderParagraphToHtml(para, styleMap, new Map(), new Map(), docDefaults);
|
|
1883
|
+
}
|
|
1884
|
+
}
|
|
1885
|
+
else if (el.type !== "image" && el.wrapType !== "topAndBottom") {
|
|
1886
|
+
// Non-text, non-image shapes don't need pointer events
|
|
1887
|
+
styles.push("pointer-events:none");
|
|
1888
|
+
}
|
|
1889
|
+
html += `<div style="${styles.join(";")}">`;
|
|
1890
|
+
html += innerContent;
|
|
1891
|
+
html += `</div>`;
|
|
1892
|
+
}
|
|
1893
|
+
return html;
|
|
1894
|
+
}
|
|
1895
|
+
/**
|
|
1896
|
+
* Generate CSS for multi-column layout if needed.
|
|
1897
|
+
*/
|
|
1898
|
+
function generateColumnLayoutCss(sectionProps) {
|
|
1899
|
+
if (!sectionProps.columns || sectionProps.columns <= 1)
|
|
1900
|
+
return "";
|
|
1901
|
+
const gap = sectionProps.columnSpace ? twipsToPx(sectionProps.columnSpace) : 36;
|
|
1902
|
+
return `column-count:${sectionProps.columns};column-gap:${gap}px;`;
|
|
1903
|
+
}
|
|
1904
|
+
/**
|
|
1905
|
+
* Generate CSS rules from extracted DOCX styles.
|
|
1906
|
+
* Maps Word style definitions to CSS classes.
|
|
1907
|
+
*/
|
|
1908
|
+
function generateStylesCss(styleMap, themeFonts) {
|
|
1909
|
+
const cssRules = [];
|
|
1910
|
+
const headingStyles = new Map();
|
|
1911
|
+
// Base paragraph style
|
|
1912
|
+
cssRules.push(`p { margin: 0 0 10pt 0; }`);
|
|
1913
|
+
// Base table style
|
|
1914
|
+
cssRules.push(`table { margin: 10pt 0; border-collapse: collapse; }`);
|
|
1915
|
+
// Base image style
|
|
1916
|
+
cssRules.push(`img { max-width: 100%; }`);
|
|
1917
|
+
// Base list styles
|
|
1918
|
+
cssRules.push(`ul, ol { margin: 0 0 10pt 0; padding-left: 24px; }`);
|
|
1919
|
+
cssRules.push(`li { margin: 0 0 4pt 0; }`);
|
|
1920
|
+
// Generate styles for each heading level from styleMap
|
|
1921
|
+
// Only use the first (most specific) definition for each heading level
|
|
1922
|
+
for (const [styleId, style] of styleMap) {
|
|
1923
|
+
const styleName = style.name?.toLowerCase() ?? styleId.toLowerCase();
|
|
1924
|
+
const headingMatch = styleName.match(/heading\s*(\d)/i);
|
|
1925
|
+
if (headingMatch) {
|
|
1926
|
+
const level = parseInt(headingMatch[1], 10);
|
|
1927
|
+
if (level >= 1 && level <= 6 && !headingStyles.has(level)) {
|
|
1928
|
+
const cssProps = [];
|
|
1929
|
+
// Font family from style or theme
|
|
1930
|
+
if (style.rPr?.fontFamily) {
|
|
1931
|
+
cssProps.push(`font-family: '${style.rPr.fontFamily}', sans-serif`);
|
|
1932
|
+
}
|
|
1933
|
+
else {
|
|
1934
|
+
cssProps.push(`font-family: '${themeFonts.major}', 'Segoe UI', Arial, sans-serif`);
|
|
1935
|
+
}
|
|
1936
|
+
// Font size from style
|
|
1937
|
+
if (style.rPr?.fontSize) {
|
|
1938
|
+
cssProps.push(`font-size: ${style.rPr.fontSize}pt`);
|
|
1939
|
+
}
|
|
1940
|
+
else {
|
|
1941
|
+
// Default heading sizes
|
|
1942
|
+
const defaultSizes = [24, 18, 14, 12, 11, 10];
|
|
1943
|
+
cssProps.push(`font-size: ${defaultSizes[level - 1]}pt`);
|
|
1944
|
+
}
|
|
1945
|
+
// Font weight
|
|
1946
|
+
if (style.rPr?.bold !== false) {
|
|
1947
|
+
cssProps.push(`font-weight: bold`);
|
|
1948
|
+
}
|
|
1949
|
+
// Color
|
|
1950
|
+
if (style.rPr?.color) {
|
|
1951
|
+
cssProps.push(`color: ${style.rPr.color}`);
|
|
1952
|
+
}
|
|
1953
|
+
// Text transform
|
|
1954
|
+
if (style.rPr?.caps) {
|
|
1955
|
+
cssProps.push(`text-transform: uppercase`);
|
|
1956
|
+
}
|
|
1957
|
+
if (style.rPr?.smallCaps) {
|
|
1958
|
+
cssProps.push(`font-variant: small-caps`);
|
|
1959
|
+
}
|
|
1960
|
+
// Margins from paragraph props
|
|
1961
|
+
if (style.pPr?.spacingBefore !== undefined) {
|
|
1962
|
+
cssProps.push(`margin-top: ${twipsToPx(style.pPr.spacingBefore)}px`);
|
|
1963
|
+
}
|
|
1964
|
+
else {
|
|
1965
|
+
cssProps.push(`margin-top: 1em`);
|
|
1966
|
+
}
|
|
1967
|
+
if (style.pPr?.spacingAfter !== undefined) {
|
|
1968
|
+
cssProps.push(`margin-bottom: ${twipsToPx(style.pPr.spacingAfter)}px`);
|
|
1969
|
+
}
|
|
1970
|
+
else {
|
|
1971
|
+
cssProps.push(`margin-bottom: 0.5em`);
|
|
1972
|
+
}
|
|
1973
|
+
cssProps.push(`line-height: 1.2`);
|
|
1974
|
+
// Borders
|
|
1975
|
+
if (style.pPr?.borders) {
|
|
1976
|
+
if (style.pPr.borders.top) {
|
|
1977
|
+
const b = style.pPr.borders.top;
|
|
1978
|
+
cssProps.push(`border-top: ${b.size}pt ${getBorderStyleCss(b.style)} #${b.color}`);
|
|
1979
|
+
}
|
|
1980
|
+
if (style.pPr.borders.bottom) {
|
|
1981
|
+
const b = style.pPr.borders.bottom;
|
|
1982
|
+
cssProps.push(`border-bottom: ${b.size}pt ${getBorderStyleCss(b.style)} #${b.color}`);
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
headingStyles.set(level, `h${level} { ${cssProps.join("; ")}; }`);
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
}
|
|
1989
|
+
// Add heading styles in order
|
|
1990
|
+
for (let level = 1; level <= 6; level++) {
|
|
1991
|
+
if (headingStyles.has(level)) {
|
|
1992
|
+
cssRules.push(headingStyles.get(level));
|
|
1993
|
+
}
|
|
1994
|
+
else {
|
|
1995
|
+
// Fallback heading styles for any levels not defined in styleMap
|
|
1996
|
+
const defaultSizes = [24, 18, 14, 12, 11, 10];
|
|
1997
|
+
cssRules.push(`h${level} { font-family: '${themeFonts.major}', 'Segoe UI', Arial, sans-serif; font-size: ${defaultSizes[level - 1]}pt; font-weight: bold; margin: 1em 0 0.5em 0; line-height: 1.2; }`);
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
return cssRules.join("\n");
|
|
2001
|
+
}
|
|
2002
|
+
// ============================================================================
|
|
2003
|
+
// Main Export Function
|
|
2004
|
+
// ============================================================================
|
|
2005
|
+
/**
|
|
2006
|
+
* Imports a DOCX file and returns an HTML string.
|
|
2007
|
+
* @param arrayBuffer - The DOCX file as an ArrayBuffer
|
|
2008
|
+
* @returns HTML string representing the document
|
|
2009
|
+
*/
|
|
2010
|
+
export default async function importDocx(arrayBuffer) {
|
|
2011
|
+
const zip = await JSZip.loadAsync(arrayBuffer);
|
|
2012
|
+
const parser = new DOMParser();
|
|
2013
|
+
// Parse document.xml
|
|
2014
|
+
const docXml = await zip.file("word/document.xml")?.async("text");
|
|
2015
|
+
if (!docXml)
|
|
2016
|
+
throw new Error("Invalid DOCX: no document.xml");
|
|
2017
|
+
const docDoc = parser.parseFromString(docXml, "application/xml");
|
|
2018
|
+
// Parse theme for colors and fonts
|
|
2019
|
+
let themeColors = new Map();
|
|
2020
|
+
let themeFonts = { major: "Calibri", minor: "Calibri" };
|
|
2021
|
+
const themeXml = await zip.file("word/theme/theme1.xml")?.async("text");
|
|
2022
|
+
if (themeXml) {
|
|
2023
|
+
const themeDoc = parser.parseFromString(themeXml, "application/xml");
|
|
2024
|
+
themeColors = parseThemeColors(themeDoc);
|
|
2025
|
+
themeFonts = parseThemeFonts(themeDoc);
|
|
2026
|
+
}
|
|
2027
|
+
// Parse styles - try multiple locations
|
|
2028
|
+
let styleMap = new Map();
|
|
2029
|
+
let docDefaults = {};
|
|
2030
|
+
// Try word/styles.xml first (standard location)
|
|
2031
|
+
let stylesXml = await zip.file("word/styles.xml")?.async("text");
|
|
2032
|
+
// If not found, try glossary/styles.xml (used in some templates)
|
|
2033
|
+
if (!stylesXml) {
|
|
2034
|
+
stylesXml = await zip.file("word/glossary/styles.xml")?.async("text");
|
|
2035
|
+
}
|
|
2036
|
+
if (stylesXml) {
|
|
2037
|
+
const stylesDoc = parser.parseFromString(stylesXml, "application/xml");
|
|
2038
|
+
const parsed = parseStyles(stylesDoc, themeColors, themeFonts);
|
|
2039
|
+
styleMap = parsed.styles;
|
|
2040
|
+
docDefaults = parsed.defaults;
|
|
2041
|
+
}
|
|
2042
|
+
// Parse numbering (for lists) - try multiple locations
|
|
2043
|
+
let numberingMap = new Map();
|
|
2044
|
+
let numberingXml = await zip.file("word/numbering.xml")?.async("text");
|
|
2045
|
+
// If not found, try glossary/numbering.xml
|
|
2046
|
+
if (!numberingXml) {
|
|
2047
|
+
numberingXml = await zip.file("word/glossary/numbering.xml")?.async("text");
|
|
2048
|
+
}
|
|
2049
|
+
if (numberingXml) {
|
|
2050
|
+
const numberingDoc = parser.parseFromString(numberingXml, "application/xml");
|
|
2051
|
+
numberingMap = parseNumbering(numberingDoc);
|
|
2052
|
+
}
|
|
2053
|
+
// Build image map from relationships
|
|
2054
|
+
const imageMap = new Map();
|
|
2055
|
+
const headerFooterRels = new Map(); // rId -> file path
|
|
2056
|
+
const relsXml = await zip.file("word/_rels/document.xml.rels")?.async("text");
|
|
2057
|
+
if (relsXml) {
|
|
2058
|
+
const relsDoc = parser.parseFromString(relsXml, "application/xml");
|
|
2059
|
+
const rels = relsDoc.getElementsByTagName("Relationship");
|
|
2060
|
+
for (let i = 0; i < rels.length; i++) {
|
|
2061
|
+
const rel = rels[i];
|
|
2062
|
+
const type = rel.getAttribute("Type") ?? "";
|
|
2063
|
+
const rId = rel.getAttribute("Id");
|
|
2064
|
+
const target = rel.getAttribute("Target");
|
|
2065
|
+
if (!rId || !target)
|
|
2066
|
+
continue;
|
|
2067
|
+
// Track header/footer relationships
|
|
2068
|
+
if (type.includes("/header") || type.includes("/footer")) {
|
|
2069
|
+
const filePath = target.startsWith("/") ? target.slice(1) : "word/" + target;
|
|
2070
|
+
headerFooterRels.set(rId, filePath);
|
|
2071
|
+
continue;
|
|
2072
|
+
}
|
|
2073
|
+
if (!type.includes("/image"))
|
|
2074
|
+
continue;
|
|
2075
|
+
const mediaPath = target.startsWith("../")
|
|
2076
|
+
? "word/" + target.slice(3)
|
|
2077
|
+
: target.startsWith("/")
|
|
2078
|
+
? target.slice(1)
|
|
2079
|
+
: "word/" + target;
|
|
2080
|
+
const imgFile = zip.file(mediaPath);
|
|
2081
|
+
if (!imgFile)
|
|
2082
|
+
continue;
|
|
2083
|
+
const imgData = await imgFile.async("base64");
|
|
2084
|
+
const ext = mediaPath.split(".").pop()?.toLowerCase() ?? "png";
|
|
2085
|
+
const mime = ext === "jpg" || ext === "jpeg"
|
|
2086
|
+
? "image/jpeg"
|
|
2087
|
+
: ext === "gif"
|
|
2088
|
+
? "image/gif"
|
|
2089
|
+
: ext === "svg"
|
|
2090
|
+
? "image/svg+xml"
|
|
2091
|
+
: `image/${ext}`;
|
|
2092
|
+
imageMap.set(rId, `data:${mime};base64,${imgData}`);
|
|
2093
|
+
}
|
|
2094
|
+
}
|
|
2095
|
+
// Parse section properties from document
|
|
2096
|
+
const body = docDoc.getElementsByTagName("w:body")[0];
|
|
2097
|
+
const sectPr = body ? findChild(body, "sectPr") : null;
|
|
2098
|
+
const sectionProps = parseSectionProps(sectPr);
|
|
2099
|
+
// Parse headers and footers for positioned elements (decorative frames, etc.)
|
|
2100
|
+
const headers = [];
|
|
2101
|
+
const footers = [];
|
|
2102
|
+
for (const [rId, filePath] of headerFooterRels) {
|
|
2103
|
+
const hfXml = await zip.file(filePath)?.async("text");
|
|
2104
|
+
if (!hfXml)
|
|
2105
|
+
continue;
|
|
2106
|
+
const hfDoc = parser.parseFromString(hfXml, "application/xml");
|
|
2107
|
+
const isHeader = filePath.includes("header");
|
|
2108
|
+
const hf = parseHeaderFooter(hfDoc, isHeader ? "header" : "footer", themeColors, themeFonts);
|
|
2109
|
+
if (isHeader) {
|
|
2110
|
+
headers.push(hf);
|
|
2111
|
+
}
|
|
2112
|
+
else {
|
|
2113
|
+
footers.push(hf);
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
// Collect all positioned elements from headers
|
|
2117
|
+
const positionedElements = [];
|
|
2118
|
+
for (const header of headers) {
|
|
2119
|
+
positionedElements.push(...header.positionedElements);
|
|
2120
|
+
}
|
|
2121
|
+
// Parse document elements and body positioned elements
|
|
2122
|
+
const parsedDoc = parseDocument(docDoc, themeColors, themeFonts);
|
|
2123
|
+
const elements = parsedDoc.elements;
|
|
2124
|
+
// Add positioned elements from document body
|
|
2125
|
+
positionedElements.push(...parsedDoc.positionedElements);
|
|
2126
|
+
// Separate inline elements (topAndBottom wrap) from absolutely positioned elements
|
|
2127
|
+
const inlinePositionedElements = positionedElements.filter(el => el.wrapType === "topAndBottom");
|
|
2128
|
+
const absolutePositionedElements = positionedElements.filter(el => el.wrapType !== "topAndBottom");
|
|
2129
|
+
// Calculate page dimensions for CSS
|
|
2130
|
+
const pageWidthPx = sectionProps.pageWidth ? twipsToPx(sectionProps.pageWidth) : 816;
|
|
2131
|
+
const marginLeftPx = sectionProps.marginLeft ? twipsToPx(sectionProps.marginLeft) : 72;
|
|
2132
|
+
const marginRightPx = sectionProps.marginRight ? twipsToPx(sectionProps.marginRight) : 72;
|
|
2133
|
+
const marginTopPx = sectionProps.marginTop ? twipsToPx(sectionProps.marginTop) : 72;
|
|
2134
|
+
const marginBottomPx = sectionProps.marginBottom ? twipsToPx(sectionProps.marginBottom) : 72;
|
|
2135
|
+
const contentWidth = pageWidthPx - marginLeftPx - marginRightPx;
|
|
2136
|
+
// Generate column layout CSS if multi-column
|
|
2137
|
+
const columnLayoutCss = generateColumnLayoutCss(sectionProps);
|
|
2138
|
+
// Check if we have absolutely positioned elements that need a relative container
|
|
2139
|
+
const hasAbsolutePositionedElements = absolutePositionedElements.length > 0;
|
|
2140
|
+
// Render to HTML - extract actual styles from document
|
|
2141
|
+
// Use computed page dimensions and margins from sectPr
|
|
2142
|
+
// Use document default color if available
|
|
2143
|
+
const defaultColor = docDefaults.rPr?.color ?? "#333";
|
|
2144
|
+
let html = `<!DOCTYPE html>
|
|
2145
|
+
<html>
|
|
2146
|
+
<head>
|
|
2147
|
+
<meta charset="UTF-8">
|
|
2148
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
2149
|
+
<style>
|
|
2150
|
+
body {
|
|
2151
|
+
font-family: '${themeFonts.minor}', 'Segoe UI', Arial, sans-serif;
|
|
2152
|
+
font-size: 11pt;
|
|
2153
|
+
line-height: 1.5;
|
|
2154
|
+
color: ${defaultColor};
|
|
2155
|
+
max-width: ${Math.round(pageWidthPx)}px;
|
|
2156
|
+
margin: 0 auto;
|
|
2157
|
+
padding: ${Math.round(marginTopPx)}px ${Math.round(marginRightPx)}px ${Math.round(marginBottomPx)}px ${Math.round(marginLeftPx)}px;
|
|
2158
|
+
background: #fff;
|
|
2159
|
+
${columnLayoutCss}
|
|
2160
|
+
${hasAbsolutePositionedElements ? "position:relative;" : ""}
|
|
2161
|
+
}
|
|
2162
|
+
${generateStylesCss(styleMap, themeFonts)}
|
|
2163
|
+
</style>
|
|
2164
|
+
</head>
|
|
2165
|
+
<body>
|
|
2166
|
+
`;
|
|
2167
|
+
// Render inline positioned elements (topAndBottom wrap) first - they appear before document content
|
|
2168
|
+
if (inlinePositionedElements.length > 0) {
|
|
2169
|
+
html += renderPositionedElements(inlinePositionedElements, sectionProps, styleMap, themeColors, imageMap, docDefaults);
|
|
2170
|
+
}
|
|
2171
|
+
// Render elements with list grouping
|
|
2172
|
+
html += renderElementsWithListGrouping(elements, styleMap, numberingMap, themeColors, imageMap, docDefaults);
|
|
2173
|
+
// Render absolutely positioned elements (they overlay content)
|
|
2174
|
+
if (absolutePositionedElements.length > 0) {
|
|
2175
|
+
html += renderPositionedElements(absolutePositionedElements, sectionProps, styleMap, themeColors, imageMap, docDefaults);
|
|
2176
|
+
}
|
|
2177
|
+
html += `
|
|
2178
|
+
</body>
|
|
2179
|
+
</html>`;
|
|
2180
|
+
return html;
|
|
2181
|
+
}
|
|
2182
|
+
/**
|
|
2183
|
+
* Render document elements, grouping consecutive list items into proper HTML lists.
|
|
2184
|
+
*/
|
|
2185
|
+
function renderElementsWithListGrouping(elements, styleMap, numberingMap, themeColors, imageMap, docDefaults = {}) {
|
|
2186
|
+
let html = "";
|
|
2187
|
+
let currentListState = null;
|
|
2188
|
+
/**
|
|
2189
|
+
* Get numId and ilvl from paragraph props or from its style.
|
|
2190
|
+
*/
|
|
2191
|
+
const getListInfo = (para) => {
|
|
2192
|
+
// First check paragraph's own numPr
|
|
2193
|
+
if (para.props.numId !== undefined && para.props.numId > 0) {
|
|
2194
|
+
return { numId: para.props.numId, ilvl: para.props.ilvl ?? 0 };
|
|
2195
|
+
}
|
|
2196
|
+
// If not, check if the style has numPr
|
|
2197
|
+
if (para.props.styleId) {
|
|
2198
|
+
const style = styleMap.get(para.props.styleId);
|
|
2199
|
+
if (style?.pPr?.numId !== undefined && style.pPr.numId > 0) {
|
|
2200
|
+
return { numId: style.pPr.numId, ilvl: style.pPr.ilvl ?? 0 };
|
|
2201
|
+
}
|
|
2202
|
+
}
|
|
2203
|
+
return { numId: undefined, ilvl: 0 };
|
|
2204
|
+
};
|
|
2205
|
+
for (let i = 0; i < elements.length; i++) {
|
|
2206
|
+
const el = elements[i];
|
|
2207
|
+
if (el.kind === "paragraph") {
|
|
2208
|
+
const para = el.data;
|
|
2209
|
+
const listInfo = getListInfo(para);
|
|
2210
|
+
const isListItem = listInfo.numId !== undefined && listInfo.numId > 0;
|
|
2211
|
+
if (isListItem) {
|
|
2212
|
+
const numDef = numberingMap.get(listInfo.numId);
|
|
2213
|
+
const lvlDef = numDef?.get(listInfo.ilvl);
|
|
2214
|
+
const isBullet = lvlDef?.numFmt === "bullet";
|
|
2215
|
+
const ilvl = listInfo.ilvl;
|
|
2216
|
+
// Check if we need to start a new list or continue existing one
|
|
2217
|
+
if (!currentListState || currentListState.numId !== listInfo.numId || currentListState.ilvl !== ilvl) {
|
|
2218
|
+
// Close previous list if any
|
|
2219
|
+
if (currentListState) {
|
|
2220
|
+
html += currentListState.isBullet ? "</ul>" : "</ol>";
|
|
2221
|
+
}
|
|
2222
|
+
// Start new list
|
|
2223
|
+
currentListState = { numId: listInfo.numId, ilvl, isBullet: isBullet ?? false };
|
|
2224
|
+
const listStyle = ilvl > 0 ? ` style="margin-left:${ilvl * 24}px"` : "";
|
|
2225
|
+
html += isBullet ? `<ul${listStyle}>` : `<ol${listStyle}>`;
|
|
2226
|
+
}
|
|
2227
|
+
// Render as list item (without the list marker - will be handled by CSS)
|
|
2228
|
+
html += renderListItemToHtml(para, styleMap, imageMap, docDefaults);
|
|
2229
|
+
}
|
|
2230
|
+
else {
|
|
2231
|
+
// Not a list item - close any open list
|
|
2232
|
+
if (currentListState) {
|
|
2233
|
+
html += currentListState.isBullet ? "</ul>" : "</ol>";
|
|
2234
|
+
currentListState = null;
|
|
2235
|
+
}
|
|
2236
|
+
html += renderParagraphToHtml(para, styleMap, numberingMap, imageMap, docDefaults);
|
|
2237
|
+
}
|
|
2238
|
+
}
|
|
2239
|
+
else if (el.kind === "table") {
|
|
2240
|
+
// Close any open list
|
|
2241
|
+
if (currentListState) {
|
|
2242
|
+
html += currentListState.isBullet ? "</ul>" : "</ol>";
|
|
2243
|
+
currentListState = null;
|
|
2244
|
+
}
|
|
2245
|
+
html += renderTableToHtml(el.data, styleMap, numberingMap, themeColors, imageMap, docDefaults);
|
|
2246
|
+
}
|
|
2247
|
+
else if (el.kind === "drawing") {
|
|
2248
|
+
// Close any open list
|
|
2249
|
+
if (currentListState) {
|
|
2250
|
+
html += currentListState.isBullet ? "</ul>" : "</ol>";
|
|
2251
|
+
currentListState = null;
|
|
2252
|
+
}
|
|
2253
|
+
if (el.data.type === "image") {
|
|
2254
|
+
html += renderImageToHtml(el.data.data, imageMap);
|
|
2255
|
+
}
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2258
|
+
// Close final list if any
|
|
2259
|
+
if (currentListState) {
|
|
2260
|
+
html += currentListState.isBullet ? "</ul>" : "</ol>";
|
|
2261
|
+
}
|
|
2262
|
+
return html;
|
|
2263
|
+
}
|
|
2264
|
+
/**
|
|
2265
|
+
* Render a paragraph as a list item (li element).
|
|
2266
|
+
*/
|
|
2267
|
+
function renderListItemToHtml(para, styleMap, imageMap = new Map(), docDefaults = {}) {
|
|
2268
|
+
const { runs, props } = para;
|
|
2269
|
+
const styles = [];
|
|
2270
|
+
// Resolve style hierarchy for text styling: document defaults -> style
|
|
2271
|
+
let resolvedRunProps = docDefaults.rPr ? { ...docDefaults.rPr } : {};
|
|
2272
|
+
if (props.styleId) {
|
|
2273
|
+
const style = styleMap.get(props.styleId);
|
|
2274
|
+
if (style?.rPr) {
|
|
2275
|
+
resolvedRunProps = { ...resolvedRunProps, ...style.rPr };
|
|
2276
|
+
}
|
|
2277
|
+
}
|
|
2278
|
+
// Apply inherited run properties
|
|
2279
|
+
if (resolvedRunProps.fontSize) {
|
|
2280
|
+
styles.push(`font-size:${resolvedRunProps.fontSize}pt`);
|
|
2281
|
+
}
|
|
2282
|
+
if (resolvedRunProps.fontFamily) {
|
|
2283
|
+
styles.push(`font-family:'${resolvedRunProps.fontFamily}',sans-serif`);
|
|
2284
|
+
}
|
|
2285
|
+
if (resolvedRunProps.color) {
|
|
2286
|
+
styles.push(`color:${resolvedRunProps.color}`);
|
|
2287
|
+
}
|
|
2288
|
+
if (resolvedRunProps.bold) {
|
|
2289
|
+
styles.push("font-weight:bold");
|
|
2290
|
+
}
|
|
2291
|
+
if (resolvedRunProps.italic) {
|
|
2292
|
+
styles.push("font-style:italic");
|
|
2293
|
+
}
|
|
2294
|
+
// Render runs
|
|
2295
|
+
const content = runs.map(run => renderRunToHtml(run, imageMap)).join("");
|
|
2296
|
+
const styleAttr = styles.length > 0 ? ` style="${styles.join(";")}"` : "";
|
|
2297
|
+
return `<li${styleAttr}>${content || " "}</li>`;
|
|
2298
|
+
}
|
|
2299
|
+
//# sourceMappingURL=import-docx.js.map
|