docgen-utils 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/dist/bundle.js +36086 -0
- package/dist/bundle.min.js +197 -0
- package/dist/cli.js +47432 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/packages/cli/commands/export-docs.d.ts +5 -0
- package/dist/packages/cli/commands/export-docs.d.ts.map +1 -0
- package/dist/packages/cli/commands/export-docs.js +24 -0
- package/dist/packages/cli/commands/export-docs.js.map +1 -0
- package/dist/packages/cli/commands/export-slides.d.ts +5 -0
- package/dist/packages/cli/commands/export-slides.d.ts.map +1 -0
- package/dist/packages/cli/commands/export-slides.js +86 -0
- package/dist/packages/cli/commands/export-slides.js.map +1 -0
- package/dist/packages/cli/commands/import-docx.d.ts +5 -0
- package/dist/packages/cli/commands/import-docx.d.ts.map +1 -0
- package/dist/packages/cli/commands/import-docx.js +27 -0
- package/dist/packages/cli/commands/import-docx.js.map +1 -0
- package/dist/packages/cli/commands/import-pptx.d.ts +5 -0
- package/dist/packages/cli/commands/import-pptx.d.ts.map +1 -0
- package/dist/packages/cli/commands/import-pptx.js +44 -0
- package/dist/packages/cli/commands/import-pptx.js.map +1 -0
- package/dist/packages/cli/index.d.ts +11 -0
- package/dist/packages/cli/index.d.ts.map +1 -0
- package/dist/packages/cli/index.js +103 -0
- package/dist/packages/cli/index.js.map +1 -0
- package/dist/packages/docs/common.d.ts +183 -0
- package/dist/packages/docs/common.d.ts.map +1 -0
- package/dist/packages/docs/common.js +27 -0
- package/dist/packages/docs/common.js.map +1 -0
- package/dist/packages/docs/convert.d.ts +7 -0
- package/dist/packages/docs/convert.d.ts.map +1 -0
- package/dist/packages/docs/convert.js +1399 -0
- package/dist/packages/docs/convert.js.map +1 -0
- package/dist/packages/docs/create-document.d.ts +30 -0
- package/dist/packages/docs/create-document.d.ts.map +1 -0
- package/dist/packages/docs/create-document.js +170 -0
- package/dist/packages/docs/create-document.js.map +1 -0
- package/dist/packages/docs/export.d.ts +57 -0
- package/dist/packages/docs/export.d.ts.map +1 -0
- package/dist/packages/docs/export.js +430 -0
- package/dist/packages/docs/export.js.map +1 -0
- package/dist/packages/docs/import-docx.d.ts +13 -0
- package/dist/packages/docs/import-docx.d.ts.map +1 -0
- package/dist/packages/docs/import-docx.js +2299 -0
- package/dist/packages/docs/import-docx.js.map +1 -0
- package/dist/packages/docs/parse.d.ts +6 -0
- package/dist/packages/docs/parse.d.ts.map +1 -0
- package/dist/packages/docs/parse.js +4253 -0
- package/dist/packages/docs/parse.js.map +1 -0
- package/dist/packages/shared/dom-parser-shim.d.ts +30 -0
- package/dist/packages/shared/dom-parser-shim.d.ts.map +1 -0
- package/dist/packages/shared/dom-parser-shim.js +152 -0
- package/dist/packages/shared/dom-parser-shim.js.map +1 -0
- package/dist/packages/slides/common.d.ts +325 -0
- package/dist/packages/slides/common.d.ts.map +1 -0
- package/dist/packages/slides/common.js +12 -0
- package/dist/packages/slides/common.js.map +1 -0
- package/dist/packages/slides/convert.d.ts +35 -0
- package/dist/packages/slides/convert.d.ts.map +1 -0
- package/dist/packages/slides/convert.js +308 -0
- package/dist/packages/slides/convert.js.map +1 -0
- package/dist/packages/slides/createPresentation.d.ts +51 -0
- package/dist/packages/slides/createPresentation.d.ts.map +1 -0
- package/dist/packages/slides/createPresentation.js +265 -0
- package/dist/packages/slides/createPresentation.js.map +1 -0
- package/dist/packages/slides/export.d.ts +24 -0
- package/dist/packages/slides/export.d.ts.map +1 -0
- package/dist/packages/slides/export.js +52 -0
- package/dist/packages/slides/export.js.map +1 -0
- package/dist/packages/slides/import-pptx.d.ts +13 -0
- package/dist/packages/slides/import-pptx.d.ts.map +1 -0
- package/dist/packages/slides/import-pptx.js +619 -0
- package/dist/packages/slides/import-pptx.js.map +1 -0
- package/dist/packages/slides/parse.d.ts +45 -0
- package/dist/packages/slides/parse.d.ts.map +1 -0
- package/dist/packages/slides/parse.js +1185 -0
- package/dist/packages/slides/parse.js.map +1 -0
- package/dist/packages/slides/transform.d.ts +37 -0
- package/dist/packages/slides/transform.d.ts.map +1 -0
- package/dist/packages/slides/transform.js +140 -0
- package/dist/packages/slides/transform.js.map +1 -0
- package/dist/packages/slides/vendor/VENDORING.md +58 -0
- package/dist/packages/slides/vendor/pptxgen.d.ts +805 -0
- package/dist/packages/slides/vendor/pptxgen.js +7442 -0
- package/package.json +57 -0
|
@@ -0,0 +1,4253 @@
|
|
|
1
|
+
import { CONTAINER_TAGS } from "./common";
|
|
2
|
+
/**
|
|
3
|
+
* Remove @media blocks from CSS text by tracking brace nesting.
|
|
4
|
+
* This properly handles nested braces within media queries.
|
|
5
|
+
*/
|
|
6
|
+
function removeMediaQueries(cssText) {
|
|
7
|
+
let result = "";
|
|
8
|
+
let i = 0;
|
|
9
|
+
while (i < cssText.length) {
|
|
10
|
+
// Check for @media at current position
|
|
11
|
+
if (cssText.substring(i, i + 6).toLowerCase() === "@media") {
|
|
12
|
+
// Find the opening brace
|
|
13
|
+
let braceStart = cssText.indexOf("{", i);
|
|
14
|
+
if (braceStart === -1) {
|
|
15
|
+
// Malformed CSS, include rest as-is
|
|
16
|
+
result += cssText.substring(i);
|
|
17
|
+
break;
|
|
18
|
+
}
|
|
19
|
+
// Track brace depth to find matching closing brace
|
|
20
|
+
let depth = 1;
|
|
21
|
+
let j = braceStart + 1;
|
|
22
|
+
while (j < cssText.length && depth > 0) {
|
|
23
|
+
if (cssText[j] === "{") {
|
|
24
|
+
depth++;
|
|
25
|
+
}
|
|
26
|
+
else if (cssText[j] === "}") {
|
|
27
|
+
depth--;
|
|
28
|
+
}
|
|
29
|
+
j++;
|
|
30
|
+
}
|
|
31
|
+
// Skip the entire @media block (from @media to matching })
|
|
32
|
+
i = j;
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
result += cssText[i];
|
|
36
|
+
i++;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return result;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Extract the first color from a CSS gradient value.
|
|
43
|
+
* Handles linear-gradient, radial-gradient, and conic-gradient.
|
|
44
|
+
* Returns the first color stop (hex, rgb, rgba, hsl, or named color).
|
|
45
|
+
*
|
|
46
|
+
* Examples:
|
|
47
|
+
* "linear-gradient(135deg, #7c3aed, #a78bfa, #c084fc)" -> "#7c3aed"
|
|
48
|
+
* "linear-gradient(to right, red, blue)" -> "red"
|
|
49
|
+
* "radial-gradient(circle, rgb(124, 58, 237), purple)" -> "rgb(124, 58, 237)"
|
|
50
|
+
*/
|
|
51
|
+
function extractFirstGradientColor(value) {
|
|
52
|
+
if (!value)
|
|
53
|
+
return undefined;
|
|
54
|
+
// Check if it's a gradient
|
|
55
|
+
const gradientMatch = value.match(/(?:linear|radial|conic)-gradient\s*\(([^)]+)\)/i);
|
|
56
|
+
if (!gradientMatch)
|
|
57
|
+
return undefined;
|
|
58
|
+
const gradientContent = gradientMatch[1];
|
|
59
|
+
// Split by commas, but handle rgb/rgba/hsl/hsla which contain commas
|
|
60
|
+
// Find color values after the direction/angle part
|
|
61
|
+
const parts = [];
|
|
62
|
+
let current = "";
|
|
63
|
+
let parenDepth = 0;
|
|
64
|
+
for (let i = 0; i < gradientContent.length; i++) {
|
|
65
|
+
const char = gradientContent[i];
|
|
66
|
+
if (char === "(") {
|
|
67
|
+
parenDepth++;
|
|
68
|
+
current += char;
|
|
69
|
+
}
|
|
70
|
+
else if (char === ")") {
|
|
71
|
+
parenDepth--;
|
|
72
|
+
current += char;
|
|
73
|
+
}
|
|
74
|
+
else if (char === "," && parenDepth === 0) {
|
|
75
|
+
parts.push(current.trim());
|
|
76
|
+
current = "";
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
current += char;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (current.trim()) {
|
|
83
|
+
parts.push(current.trim());
|
|
84
|
+
}
|
|
85
|
+
// Direction/angle keywords to skip
|
|
86
|
+
const directionKeywords = /^(?:\d+deg|to\s+(?:top|bottom|left|right|top\s+left|top\s+right|bottom\s+left|bottom\s+right)|circle|ellipse|at\s+|closest-side|farthest-side|closest-corner|farthest-corner)/i;
|
|
87
|
+
// Color patterns
|
|
88
|
+
const colorPattern = /^(?:#[0-9a-fA-F]{3,8}|rgba?\s*\([^)]+\)|hsla?\s*\([^)]+\)|[a-zA-Z]+)(?:\s+\d+%)?$/i;
|
|
89
|
+
for (const part of parts) {
|
|
90
|
+
// Skip direction/angle specifications
|
|
91
|
+
if (directionKeywords.test(part))
|
|
92
|
+
continue;
|
|
93
|
+
// Check if this looks like a color (possibly with a percentage stop position)
|
|
94
|
+
if (colorPattern.test(part)) {
|
|
95
|
+
// Extract just the color part (remove percentage if present)
|
|
96
|
+
const colorOnly = part.replace(/\s+\d+%$/, "").trim();
|
|
97
|
+
return colorOnly;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return undefined;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Parse a CSS gradient value into a TextGradient structure.
|
|
104
|
+
* Extracts angle and all color stops for use in DOCX gradient fills.
|
|
105
|
+
*
|
|
106
|
+
* Examples:
|
|
107
|
+
* "linear-gradient(135deg, #7c3aed, #a78bfa, #c084fc)" ->
|
|
108
|
+
* { angle: 135, stops: [{ color: "7C3AED", position: 0 }, { color: "A78BFA", position: 50 }, { color: "C084FC", position: 100 }] }
|
|
109
|
+
*/
|
|
110
|
+
function parseGradient(value) {
|
|
111
|
+
if (!value)
|
|
112
|
+
return undefined;
|
|
113
|
+
// Check if it's a linear gradient (only linear gradients are supported in DOCX)
|
|
114
|
+
const gradientMatch = value.match(/linear-gradient\s*\(([^)]+)\)/i);
|
|
115
|
+
if (!gradientMatch)
|
|
116
|
+
return undefined;
|
|
117
|
+
const gradientContent = gradientMatch[1];
|
|
118
|
+
// Split by commas, but handle rgb/rgba/hsl/hsla which contain commas
|
|
119
|
+
const parts = [];
|
|
120
|
+
let current = "";
|
|
121
|
+
let parenDepth = 0;
|
|
122
|
+
for (let i = 0; i < gradientContent.length; i++) {
|
|
123
|
+
const char = gradientContent[i];
|
|
124
|
+
if (char === "(") {
|
|
125
|
+
parenDepth++;
|
|
126
|
+
current += char;
|
|
127
|
+
}
|
|
128
|
+
else if (char === ")") {
|
|
129
|
+
parenDepth--;
|
|
130
|
+
current += char;
|
|
131
|
+
}
|
|
132
|
+
else if (char === "," && parenDepth === 0) {
|
|
133
|
+
parts.push(current.trim());
|
|
134
|
+
current = "";
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
current += char;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
if (current.trim()) {
|
|
141
|
+
parts.push(current.trim());
|
|
142
|
+
}
|
|
143
|
+
// Extract angle from first part if it's a direction
|
|
144
|
+
let angle = 180; // Default: top to bottom
|
|
145
|
+
let colorStartIndex = 0;
|
|
146
|
+
if (parts.length > 0) {
|
|
147
|
+
const firstPart = parts[0];
|
|
148
|
+
// Check for angle in degrees
|
|
149
|
+
const degMatch = firstPart.match(/^(\d+)deg$/i);
|
|
150
|
+
if (degMatch) {
|
|
151
|
+
angle = parseInt(degMatch[1], 10);
|
|
152
|
+
colorStartIndex = 1;
|
|
153
|
+
}
|
|
154
|
+
else if (firstPart.match(/^to\s+/i)) {
|
|
155
|
+
// Convert direction keywords to angles
|
|
156
|
+
const direction = firstPart.toLowerCase();
|
|
157
|
+
if (direction.includes("right") && direction.includes("bottom"))
|
|
158
|
+
angle = 135;
|
|
159
|
+
else if (direction.includes("right") && direction.includes("top"))
|
|
160
|
+
angle = 45;
|
|
161
|
+
else if (direction.includes("left") && direction.includes("bottom"))
|
|
162
|
+
angle = 225;
|
|
163
|
+
else if (direction.includes("left") && direction.includes("top"))
|
|
164
|
+
angle = 315;
|
|
165
|
+
else if (direction.includes("right"))
|
|
166
|
+
angle = 90;
|
|
167
|
+
else if (direction.includes("left"))
|
|
168
|
+
angle = 270;
|
|
169
|
+
else if (direction.includes("bottom"))
|
|
170
|
+
angle = 180;
|
|
171
|
+
else if (direction.includes("top"))
|
|
172
|
+
angle = 0;
|
|
173
|
+
colorStartIndex = 1;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// Extract color stops
|
|
177
|
+
const colorParts = parts.slice(colorStartIndex);
|
|
178
|
+
if (colorParts.length === 0)
|
|
179
|
+
return undefined;
|
|
180
|
+
const stops = [];
|
|
181
|
+
const colorPattern = /^(#[0-9a-fA-F]{3,8}|rgba?\s*\([^)]+\)|hsla?\s*\([^)]+\)|[a-zA-Z]+)(?:\s+(\d+)%)?$/i;
|
|
182
|
+
for (let i = 0; i < colorParts.length; i++) {
|
|
183
|
+
const part = colorParts[i];
|
|
184
|
+
const match = part.match(colorPattern);
|
|
185
|
+
if (match) {
|
|
186
|
+
let color = match[1];
|
|
187
|
+
// Convert color to hex without #
|
|
188
|
+
color = normalizeColorToHex(color);
|
|
189
|
+
// Get position (default: evenly distributed)
|
|
190
|
+
let position;
|
|
191
|
+
if (match[2]) {
|
|
192
|
+
position = parseInt(match[2], 10);
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
// Evenly distribute if no position specified
|
|
196
|
+
position = colorParts.length === 1 ? 0 : (i / (colorParts.length - 1)) * 100;
|
|
197
|
+
}
|
|
198
|
+
stops.push({ color, position: Math.round(position) });
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
if (stops.length < 2)
|
|
202
|
+
return undefined;
|
|
203
|
+
return { angle, stops };
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Normalize a CSS color value to hex format (without #).
|
|
207
|
+
* Handles hex (#RGB, #RRGGBB), rgb(), rgba(), and named colors.
|
|
208
|
+
*/
|
|
209
|
+
function normalizeColorToHex(color) {
|
|
210
|
+
color = color.trim();
|
|
211
|
+
// Already hex
|
|
212
|
+
if (color.startsWith("#")) {
|
|
213
|
+
let hex = color.slice(1).toUpperCase();
|
|
214
|
+
// Expand shorthand (#RGB -> #RRGGBB)
|
|
215
|
+
if (hex.length === 3) {
|
|
216
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
217
|
+
}
|
|
218
|
+
// Strip alpha if present (#RRGGBBAA -> #RRGGBB)
|
|
219
|
+
if (hex.length === 8) {
|
|
220
|
+
hex = hex.slice(0, 6);
|
|
221
|
+
}
|
|
222
|
+
return hex;
|
|
223
|
+
}
|
|
224
|
+
// RGB/RGBA
|
|
225
|
+
const rgbMatch = color.match(/rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)/i);
|
|
226
|
+
if (rgbMatch) {
|
|
227
|
+
const r = parseInt(rgbMatch[1], 10).toString(16).padStart(2, "0");
|
|
228
|
+
const g = parseInt(rgbMatch[2], 10).toString(16).padStart(2, "0");
|
|
229
|
+
const b = parseInt(rgbMatch[3], 10).toString(16).padStart(2, "0");
|
|
230
|
+
return (r + g + b).toUpperCase();
|
|
231
|
+
}
|
|
232
|
+
// Named colors (common ones)
|
|
233
|
+
const namedColors = {
|
|
234
|
+
red: "FF0000", green: "008000", blue: "0000FF", white: "FFFFFF", black: "000000",
|
|
235
|
+
yellow: "FFFF00", cyan: "00FFFF", magenta: "FF00FF", orange: "FFA500", purple: "800080",
|
|
236
|
+
pink: "FFC0CB", gray: "808080", grey: "808080", navy: "000080", teal: "008080",
|
|
237
|
+
maroon: "800000", olive: "808000", lime: "00FF00", aqua: "00FFFF", silver: "C0C0C0",
|
|
238
|
+
fuchsia: "FF00FF", transparent: "FFFFFF",
|
|
239
|
+
};
|
|
240
|
+
const lowerColor = color.toLowerCase();
|
|
241
|
+
if (namedColors[lowerColor]) {
|
|
242
|
+
return namedColors[lowerColor];
|
|
243
|
+
}
|
|
244
|
+
// Fallback: return as-is (uppercase)
|
|
245
|
+
return color.toUpperCase().replace("#", "");
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Extract the primary (first) font name from a CSS font-family value.
|
|
249
|
+
* Handles font stacks like "'Source Sans Pro', -apple-system, sans-serif"
|
|
250
|
+
* Returns the first non-generic font name, cleaned of quotes.
|
|
251
|
+
*
|
|
252
|
+
* Generic fonts (sans-serif, serif, monospace, cursive, fantasy, system-ui) are skipped
|
|
253
|
+
* unless they're the only option.
|
|
254
|
+
*/
|
|
255
|
+
function extractPrimaryFont(fontFamily) {
|
|
256
|
+
if (!fontFamily)
|
|
257
|
+
return undefined;
|
|
258
|
+
// Split by comma, handling quoted font names
|
|
259
|
+
const fonts = [];
|
|
260
|
+
let current = "";
|
|
261
|
+
let inQuote = false;
|
|
262
|
+
let quoteChar = "";
|
|
263
|
+
for (let i = 0; i < fontFamily.length; i++) {
|
|
264
|
+
const char = fontFamily[i];
|
|
265
|
+
if ((char === '"' || char === "'") && !inQuote) {
|
|
266
|
+
inQuote = true;
|
|
267
|
+
quoteChar = char;
|
|
268
|
+
}
|
|
269
|
+
else if (char === quoteChar && inQuote) {
|
|
270
|
+
inQuote = false;
|
|
271
|
+
quoteChar = "";
|
|
272
|
+
}
|
|
273
|
+
else if (char === "," && !inQuote) {
|
|
274
|
+
const trimmed = current.trim().replace(/^['"]|['"]$/g, "");
|
|
275
|
+
if (trimmed)
|
|
276
|
+
fonts.push(trimmed);
|
|
277
|
+
current = "";
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
current += char;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
// Add last font
|
|
284
|
+
const trimmed = current.trim().replace(/^['"]|['"]$/g, "");
|
|
285
|
+
if (trimmed)
|
|
286
|
+
fonts.push(trimmed);
|
|
287
|
+
// Generic font families to skip
|
|
288
|
+
const genericFonts = new Set([
|
|
289
|
+
"sans-serif", "serif", "monospace", "cursive", "fantasy", "system-ui",
|
|
290
|
+
"-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", "Helvetica Neue",
|
|
291
|
+
"Arial", "Helvetica", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans",
|
|
292
|
+
"Noto Sans", "sans-serif", "Oxygen", "Open Sans",
|
|
293
|
+
]);
|
|
294
|
+
// Find first non-generic font
|
|
295
|
+
for (const font of fonts) {
|
|
296
|
+
if (!genericFonts.has(font) && !font.startsWith("-")) {
|
|
297
|
+
return font;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
// If all fonts are generic, return the first one
|
|
301
|
+
return fonts[0];
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Parse CSS variables and class color rules from a style element.
|
|
305
|
+
*/
|
|
306
|
+
function parseCssContext(doc) {
|
|
307
|
+
const variables = new Map();
|
|
308
|
+
const classColors = new Map();
|
|
309
|
+
const calloutStyles = new Map();
|
|
310
|
+
const classStyles = new Map();
|
|
311
|
+
const elementStyles = new Map();
|
|
312
|
+
const nestedStyles = new Map();
|
|
313
|
+
// Helper to resolve CSS variable or return value as-is
|
|
314
|
+
// Handles multiple var() references in a single value (e.g., "var(--a) var(--b)")
|
|
315
|
+
const resolveValue = (value) => {
|
|
316
|
+
// Replace all var() references with their resolved values
|
|
317
|
+
return value.replace(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/g, (match, varName) => {
|
|
318
|
+
const resolved = variables.get(varName);
|
|
319
|
+
return resolved || match; // Keep original if not found
|
|
320
|
+
});
|
|
321
|
+
};
|
|
322
|
+
// Find all style elements
|
|
323
|
+
const styleElements = doc.querySelectorAll("style");
|
|
324
|
+
for (const styleEl of styleElements) {
|
|
325
|
+
let cssText = styleEl.textContent || "";
|
|
326
|
+
// Remove media queries to avoid mobile-specific styles overriding desktop defaults
|
|
327
|
+
// Use a function-based approach to correctly handle nested braces
|
|
328
|
+
cssText = removeMediaQueries(cssText);
|
|
329
|
+
// Extract CSS variables from :root { --name: value; }
|
|
330
|
+
const rootMatch = cssText.match(/:root\s*\{([^}]+)\}/);
|
|
331
|
+
if (rootMatch) {
|
|
332
|
+
const rootContent = rootMatch[1];
|
|
333
|
+
const varMatches = rootContent.matchAll(/--([a-zA-Z0-9-]+)\s*:\s*([^;]+)/g);
|
|
334
|
+
for (const match of varMatches) {
|
|
335
|
+
const varName = `--${match[1]}`;
|
|
336
|
+
const varValue = match[2].trim();
|
|
337
|
+
variables.set(varName, varValue);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
// Also extract CSS variables from theme classes (e.g., .theme-professional)
|
|
341
|
+
// These are commonly used to scope CSS variables to body elements
|
|
342
|
+
const themeClassMatches = cssText.matchAll(/\.theme-[a-zA-Z0-9_-]+\s*\{([^}]+)\}/g);
|
|
343
|
+
for (const themeMatch of themeClassMatches) {
|
|
344
|
+
const themeContent = themeMatch[1];
|
|
345
|
+
const varMatches = themeContent.matchAll(/--([a-zA-Z0-9-]+)\s*:\s*([^;]+)/g);
|
|
346
|
+
for (const match of varMatches) {
|
|
347
|
+
const varName = `--${match[1]}`;
|
|
348
|
+
const varValue = match[2].trim();
|
|
349
|
+
// Only set if not already defined (prefer :root values)
|
|
350
|
+
if (!variables.has(varName)) {
|
|
351
|
+
variables.set(varName, varValue);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// Extract class color rules: .classname { color: value; }
|
|
356
|
+
// NOTE: Use negative lookbehind to exclude nested selectors like ".parent .child { ... }"
|
|
357
|
+
// The (?<!\S\s+) ensures we only match top-level class selectors
|
|
358
|
+
const classRuleMatches = cssText.matchAll(/(?:^|[;\n}])\s*\.([a-zA-Z0-9_-]+)\s*\{([^}]+)\}/gm);
|
|
359
|
+
for (const match of classRuleMatches) {
|
|
360
|
+
const fullMatch = match[0];
|
|
361
|
+
const className = match[1];
|
|
362
|
+
const ruleContent = match[2];
|
|
363
|
+
// Skip if this looks like a nested selector (has another class before it)
|
|
364
|
+
// Check if the match is preceded by another class selector on the same line
|
|
365
|
+
const precedingText = cssText.substring(0, match.index).split('\n').pop() || '';
|
|
366
|
+
if (precedingText.match(/\.[a-zA-Z0-9_-]+\s*$/)) {
|
|
367
|
+
// This is a nested selector like ".parent .child { ... }" - skip it here
|
|
368
|
+
// Nested selectors will be handled by storing parent-child relationships
|
|
369
|
+
continue;
|
|
370
|
+
}
|
|
371
|
+
// Extract ALL style properties for this class (generalized approach)
|
|
372
|
+
const style = {};
|
|
373
|
+
// Text color
|
|
374
|
+
const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
|
|
375
|
+
if (colorMatch) {
|
|
376
|
+
const colorValue = resolveValue(colorMatch[1].trim());
|
|
377
|
+
classColors.set(className, colorValue);
|
|
378
|
+
style.color = colorValue;
|
|
379
|
+
}
|
|
380
|
+
// Background color
|
|
381
|
+
const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
382
|
+
if (bgMatch) {
|
|
383
|
+
const bgValue = resolveValue(bgMatch[1].trim());
|
|
384
|
+
// For gradient backgrounds, extract the first color as fallback for DOCX
|
|
385
|
+
// Also store the full gradient for containers that support gradient rendering
|
|
386
|
+
const gradientColor = extractFirstGradientColor(bgValue);
|
|
387
|
+
if (gradientColor) {
|
|
388
|
+
style.backgroundColor = gradientColor;
|
|
389
|
+
// Parse full gradient for container backgrounds (not text gradients)
|
|
390
|
+
const gradient = parseGradient(bgValue);
|
|
391
|
+
if (gradient) {
|
|
392
|
+
style.backgroundGradient = gradient;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
else {
|
|
396
|
+
style.backgroundColor = bgValue;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// Gradient text detection: when background-clip: text is used with a gradient background,
|
|
400
|
+
// extract the first color from the gradient as fallback and also store full gradient
|
|
401
|
+
const hasBackgroundClipText = ruleContent.match(/(?:-webkit-)?background-clip\s*:\s*text/i);
|
|
402
|
+
if (hasBackgroundClipText && bgMatch) {
|
|
403
|
+
const bgValue = resolveValue(bgMatch[1].trim());
|
|
404
|
+
// Parse full gradient
|
|
405
|
+
const gradient = parseGradient(bgValue);
|
|
406
|
+
if (gradient) {
|
|
407
|
+
style.gradient = gradient;
|
|
408
|
+
// Also set fallback color from first stop
|
|
409
|
+
if (!style.color) {
|
|
410
|
+
style.color = gradient.stops[0]?.color;
|
|
411
|
+
if (style.color) {
|
|
412
|
+
classColors.set(className, style.color);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
else {
|
|
417
|
+
// Fallback: extract first color if gradient parsing fails
|
|
418
|
+
const gradientColor = extractFirstGradientColor(bgValue);
|
|
419
|
+
if (gradientColor && !style.color) {
|
|
420
|
+
style.color = gradientColor;
|
|
421
|
+
classColors.set(className, gradientColor);
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
// Border color
|
|
426
|
+
const borderColorMatch = ruleContent.match(/border-color\s*:\s*([^;]+)/i);
|
|
427
|
+
if (borderColorMatch) {
|
|
428
|
+
style.borderColor = resolveValue(borderColorMatch[1].trim());
|
|
429
|
+
}
|
|
430
|
+
// Border shorthand (e.g., "1px solid #e5e7eb")
|
|
431
|
+
const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
|
|
432
|
+
if (borderMatch) {
|
|
433
|
+
style.border = resolveValue(borderMatch[1].trim());
|
|
434
|
+
}
|
|
435
|
+
// Border-left (used by callouts: "4px solid #2563eb")
|
|
436
|
+
const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
|
|
437
|
+
if (borderLeftMatch) {
|
|
438
|
+
const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
|
|
439
|
+
// If it contains a color, extract it as borderColor
|
|
440
|
+
const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
441
|
+
if (colorInBorder && !style.borderColor) {
|
|
442
|
+
style.borderColor = `#${colorInBorder[1]}`;
|
|
443
|
+
}
|
|
444
|
+
// Store the full border-left value
|
|
445
|
+
style.borderLeft = borderLeftValue;
|
|
446
|
+
style.border = style.border || borderLeftValue;
|
|
447
|
+
}
|
|
448
|
+
// Border-right (used by sidebar dividers, etc.)
|
|
449
|
+
const borderRightMatch = ruleContent.match(/border-right\s*:\s*([^;]+)/i);
|
|
450
|
+
if (borderRightMatch) {
|
|
451
|
+
style.borderRight = resolveValue(borderRightMatch[1].trim());
|
|
452
|
+
}
|
|
453
|
+
// Border-bottom (used by title blocks with divider lines)
|
|
454
|
+
const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
|
|
455
|
+
if (borderBottomMatch) {
|
|
456
|
+
style.borderBottom = resolveValue(borderBottomMatch[1].trim());
|
|
457
|
+
}
|
|
458
|
+
// Border-top (used by footer sections with top divider lines)
|
|
459
|
+
const borderTopMatch = ruleContent.match(/border-top\s*:\s*([^;]+)/i);
|
|
460
|
+
if (borderTopMatch) {
|
|
461
|
+
style.borderTop = resolveValue(borderTopMatch[1].trim());
|
|
462
|
+
}
|
|
463
|
+
// Display property (grid, flex, block, etc.)
|
|
464
|
+
const displayMatch = ruleContent.match(/display\s*:\s*([^;]+)/i);
|
|
465
|
+
if (displayMatch) {
|
|
466
|
+
style.display = resolveValue(displayMatch[1].trim());
|
|
467
|
+
}
|
|
468
|
+
// Flex property (for flex item sizing like "flex: 1")
|
|
469
|
+
const flexMatch = ruleContent.match(/(?:^|;)\s*flex\s*:\s*([^;]+)/i);
|
|
470
|
+
if (flexMatch) {
|
|
471
|
+
style.flex = resolveValue(flexMatch[1].trim());
|
|
472
|
+
}
|
|
473
|
+
// Flex-direction property (for horizontal vs vertical flex containers)
|
|
474
|
+
const flexDirectionMatch = ruleContent.match(/flex-direction\s*:\s*([^;]+)/i);
|
|
475
|
+
if (flexDirectionMatch) {
|
|
476
|
+
style.flexDirection = resolveValue(flexDirectionMatch[1].trim());
|
|
477
|
+
}
|
|
478
|
+
// Flex-wrap property (for wrapping behavior)
|
|
479
|
+
const flexWrapMatch = ruleContent.match(/flex-wrap\s*:\s*([^;]+)/i);
|
|
480
|
+
if (flexWrapMatch) {
|
|
481
|
+
style.flexWrap = resolveValue(flexWrapMatch[1].trim());
|
|
482
|
+
}
|
|
483
|
+
// Gap property (for flex/grid spacing)
|
|
484
|
+
const gapMatch = ruleContent.match(/(?:^|;)\s*gap\s*:\s*([^;]+)/i);
|
|
485
|
+
if (gapMatch) {
|
|
486
|
+
style.gap = resolveValue(gapMatch[1].trim());
|
|
487
|
+
}
|
|
488
|
+
// Grid template columns (for two-column layout detection)
|
|
489
|
+
const gridColsMatch = ruleContent.match(/grid-template-columns\s*:\s*([^;]+)/i);
|
|
490
|
+
if (gridColsMatch) {
|
|
491
|
+
style.gridTemplateColumns = resolveValue(gridColsMatch[1].trim());
|
|
492
|
+
}
|
|
493
|
+
// Text alignment
|
|
494
|
+
const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
|
|
495
|
+
if (textAlignMatch) {
|
|
496
|
+
style.textAlign = resolveValue(textAlignMatch[1].trim());
|
|
497
|
+
}
|
|
498
|
+
// Font size
|
|
499
|
+
const fontSizeMatch = ruleContent.match(/font-size\s*:\s*([^;]+)/i);
|
|
500
|
+
if (fontSizeMatch) {
|
|
501
|
+
style.fontSize = resolveValue(fontSizeMatch[1].trim());
|
|
502
|
+
}
|
|
503
|
+
// Font weight
|
|
504
|
+
const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
|
|
505
|
+
if (fontWeightMatch) {
|
|
506
|
+
style.fontWeight = resolveValue(fontWeightMatch[1].trim());
|
|
507
|
+
}
|
|
508
|
+
// Padding
|
|
509
|
+
const paddingMatch = ruleContent.match(/padding\s*:\s*([^;]+)/i);
|
|
510
|
+
if (paddingMatch) {
|
|
511
|
+
style.padding = resolveValue(paddingMatch[1].trim());
|
|
512
|
+
}
|
|
513
|
+
// Font style (italic, normal)
|
|
514
|
+
const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
|
|
515
|
+
if (fontStyleMatch) {
|
|
516
|
+
style.fontStyle = resolveValue(fontStyleMatch[1].trim());
|
|
517
|
+
}
|
|
518
|
+
// Font family (extract primary font from font stack)
|
|
519
|
+
const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
|
|
520
|
+
if (fontFamilyMatch) {
|
|
521
|
+
const resolved = resolveValue(fontFamilyMatch[1].trim());
|
|
522
|
+
const primaryFont = extractPrimaryFont(resolved);
|
|
523
|
+
if (primaryFont) {
|
|
524
|
+
style.fontFamily = primaryFont;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
// Text indent (e.g., "2rem", "-2rem" for hanging indents)
|
|
528
|
+
const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
|
|
529
|
+
if (textIndentMatch) {
|
|
530
|
+
style.textIndent = resolveValue(textIndentMatch[1].trim());
|
|
531
|
+
}
|
|
532
|
+
// Text transform (uppercase, lowercase, capitalize)
|
|
533
|
+
const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
|
|
534
|
+
if (textTransformMatch) {
|
|
535
|
+
style.textTransform = resolveValue(textTransformMatch[1].trim());
|
|
536
|
+
}
|
|
537
|
+
// GENERALIZED: Margin-bottom (for paragraph spacing)
|
|
538
|
+
const marginBottomMatch = ruleContent.match(/margin-bottom\s*:\s*([^;]+)/i);
|
|
539
|
+
if (marginBottomMatch) {
|
|
540
|
+
style.marginBottom = resolveValue(marginBottomMatch[1].trim());
|
|
541
|
+
}
|
|
542
|
+
// GENERALIZED: Line-height (for vertical spacing within text)
|
|
543
|
+
const lineHeightMatch = ruleContent.match(/line-height\s*:\s*([^;]+)/i);
|
|
544
|
+
if (lineHeightMatch) {
|
|
545
|
+
style.lineHeight = resolveValue(lineHeightMatch[1].trim());
|
|
546
|
+
}
|
|
547
|
+
// Store if we found any properties
|
|
548
|
+
// MERGE with existing styles (later rules can add properties without overwriting)
|
|
549
|
+
if (Object.keys(style).length > 0) {
|
|
550
|
+
const existing = classStyles.get(className) || {};
|
|
551
|
+
classStyles.set(className, { ...existing, ...style });
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
// Parse element.class combined selectors like "dd.dish-description { font-style: italic; }"
|
|
555
|
+
// These are more specific than just .class, but we store under the class name
|
|
556
|
+
// since getElementStyles already verifies element type separately
|
|
557
|
+
const elementClassMatches = cssText.matchAll(/(?:^|[;\n}])\s*([a-zA-Z][a-zA-Z0-9]*)\s*\.\s*([a-zA-Z0-9_-]+)\s*\{([^}]+)\}/gm);
|
|
558
|
+
for (const match of elementClassMatches) {
|
|
559
|
+
const elementName = match[1].toLowerCase();
|
|
560
|
+
const className = match[2];
|
|
561
|
+
const ruleContent = match[3];
|
|
562
|
+
// Extract style properties (same as class rules above)
|
|
563
|
+
const style = {};
|
|
564
|
+
// Font style (italic, normal)
|
|
565
|
+
const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
|
|
566
|
+
if (fontStyleMatch) {
|
|
567
|
+
style.fontStyle = resolveValue(fontStyleMatch[1].trim());
|
|
568
|
+
}
|
|
569
|
+
// Font weight
|
|
570
|
+
const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
|
|
571
|
+
if (fontWeightMatch) {
|
|
572
|
+
style.fontWeight = resolveValue(fontWeightMatch[1].trim());
|
|
573
|
+
}
|
|
574
|
+
// Text color
|
|
575
|
+
const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
|
|
576
|
+
if (colorMatch) {
|
|
577
|
+
const colorValue = resolveValue(colorMatch[1].trim());
|
|
578
|
+
style.color = colorValue;
|
|
579
|
+
}
|
|
580
|
+
// Background color
|
|
581
|
+
const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
582
|
+
if (bgMatch) {
|
|
583
|
+
const bgValue = resolveValue(bgMatch[1].trim());
|
|
584
|
+
// For gradient backgrounds, extract the first color as fallback for DOCX
|
|
585
|
+
// Also store the full gradient for containers that support gradient rendering
|
|
586
|
+
const gradientColor = extractFirstGradientColor(bgValue);
|
|
587
|
+
if (gradientColor) {
|
|
588
|
+
style.backgroundColor = gradientColor;
|
|
589
|
+
// Parse full gradient for container backgrounds
|
|
590
|
+
const gradient = parseGradient(bgValue);
|
|
591
|
+
if (gradient) {
|
|
592
|
+
style.backgroundGradient = gradient;
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
else {
|
|
596
|
+
style.backgroundColor = bgValue;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
// Text alignment
|
|
600
|
+
const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
|
|
601
|
+
if (textAlignMatch) {
|
|
602
|
+
style.textAlign = resolveValue(textAlignMatch[1].trim());
|
|
603
|
+
}
|
|
604
|
+
// Font family (extract primary font from font stack)
|
|
605
|
+
const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
|
|
606
|
+
if (fontFamilyMatch) {
|
|
607
|
+
const resolved = resolveValue(fontFamilyMatch[1].trim());
|
|
608
|
+
const primaryFont = extractPrimaryFont(resolved);
|
|
609
|
+
if (primaryFont) {
|
|
610
|
+
style.fontFamily = primaryFont;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
// Store under the class name (merge with existing)
|
|
614
|
+
if (Object.keys(style).length > 0) {
|
|
615
|
+
const existing = classStyles.get(className) || {};
|
|
616
|
+
classStyles.set(className, { ...existing, ...style });
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
// Parse nested CSS selectors like ".parent .child { color: ... }" or ".parent element { color: ... }"
|
|
620
|
+
// Also handles "element.parent .child { ... }" like "figure.menu-image .image-placeholder { ... }"
|
|
621
|
+
// Store the parent-child relationship so we can look up styles based on context
|
|
622
|
+
// Supports: .works-cited p { text-indent: -2rem; } or .callout .callout-label { color: ... }
|
|
623
|
+
// Extended pattern to match:
|
|
624
|
+
// - .parentClass .child { ... }
|
|
625
|
+
// - element.parentClass .child { ... }
|
|
626
|
+
const nestedSelectorMatches = cssText.matchAll(/(?:[a-zA-Z0-9_-]*)?\.([a-zA-Z0-9_-]+)\s+(\.?[a-zA-Z0-9_-]+)\s*\{([^}]+)\}/g);
|
|
627
|
+
for (const match of nestedSelectorMatches) {
|
|
628
|
+
const parentClass = match[1];
|
|
629
|
+
let childSelector = match[2];
|
|
630
|
+
// Remove leading dot if present (for .class selectors)
|
|
631
|
+
if (childSelector.startsWith('.')) {
|
|
632
|
+
childSelector = childSelector.slice(1);
|
|
633
|
+
}
|
|
634
|
+
const ruleContent = match[3];
|
|
635
|
+
// Extract style properties for the child when inside this parent
|
|
636
|
+
const style = {};
|
|
637
|
+
// Text color
|
|
638
|
+
const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
|
|
639
|
+
if (colorMatch) {
|
|
640
|
+
style.color = resolveValue(colorMatch[1].trim());
|
|
641
|
+
}
|
|
642
|
+
// Background color
|
|
643
|
+
const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
644
|
+
if (bgMatch) {
|
|
645
|
+
const bgValue = resolveValue(bgMatch[1].trim());
|
|
646
|
+
// For gradient backgrounds, extract the first color as fallback for DOCX
|
|
647
|
+
// Also store the full gradient for containers that support gradient rendering
|
|
648
|
+
const gradientColor = extractFirstGradientColor(bgValue);
|
|
649
|
+
if (gradientColor) {
|
|
650
|
+
style.backgroundColor = gradientColor;
|
|
651
|
+
// Parse full gradient for container backgrounds
|
|
652
|
+
const gradient = parseGradient(bgValue);
|
|
653
|
+
if (gradient) {
|
|
654
|
+
style.backgroundGradient = gradient;
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
else {
|
|
658
|
+
style.backgroundColor = bgValue;
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
// Gradient text detection: when background-clip: text is used with a gradient background,
|
|
662
|
+
// extract the first color from the gradient as fallback and also store full gradient
|
|
663
|
+
const hasBackgroundClipText = ruleContent.match(/(?:-webkit-)?background-clip\s*:\s*text/i);
|
|
664
|
+
if (hasBackgroundClipText && bgMatch) {
|
|
665
|
+
const bgValue = resolveValue(bgMatch[1].trim());
|
|
666
|
+
// Parse full gradient
|
|
667
|
+
const gradient = parseGradient(bgValue);
|
|
668
|
+
if (gradient) {
|
|
669
|
+
style.gradient = gradient;
|
|
670
|
+
// Also set fallback color from first stop
|
|
671
|
+
if (!style.color) {
|
|
672
|
+
style.color = gradient.stops[0]?.color;
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
else {
|
|
676
|
+
// Fallback: extract first color if gradient parsing fails
|
|
677
|
+
const gradientColor = extractFirstGradientColor(bgValue);
|
|
678
|
+
if (gradientColor && !style.color) {
|
|
679
|
+
style.color = gradientColor;
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
// Font weight
|
|
684
|
+
const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
|
|
685
|
+
if (fontWeightMatch) {
|
|
686
|
+
style.fontWeight = resolveValue(fontWeightMatch[1].trim());
|
|
687
|
+
}
|
|
688
|
+
// Text indent (for nested hanging indents like .works-cited p)
|
|
689
|
+
const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
|
|
690
|
+
if (textIndentMatch) {
|
|
691
|
+
style.textIndent = resolveValue(textIndentMatch[1].trim());
|
|
692
|
+
}
|
|
693
|
+
// Font style
|
|
694
|
+
const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
|
|
695
|
+
if (fontStyleMatch) {
|
|
696
|
+
style.fontStyle = resolveValue(fontStyleMatch[1].trim());
|
|
697
|
+
}
|
|
698
|
+
// Font family (extract primary font from font stack)
|
|
699
|
+
const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
|
|
700
|
+
if (fontFamilyMatch) {
|
|
701
|
+
const resolved = resolveValue(fontFamilyMatch[1].trim());
|
|
702
|
+
const primaryFont = extractPrimaryFont(resolved);
|
|
703
|
+
if (primaryFont) {
|
|
704
|
+
style.fontFamily = primaryFont;
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
// Border (full shorthand, e.g., "1px solid #e5e7eb")
|
|
708
|
+
const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
|
|
709
|
+
if (borderMatch) {
|
|
710
|
+
style.border = resolveValue(borderMatch[1].trim());
|
|
711
|
+
}
|
|
712
|
+
// Border-left (callout styling)
|
|
713
|
+
const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
|
|
714
|
+
if (borderLeftMatch) {
|
|
715
|
+
const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
|
|
716
|
+
style.borderLeft = borderLeftValue;
|
|
717
|
+
style.border = style.border || borderLeftValue;
|
|
718
|
+
// Extract color from border value
|
|
719
|
+
const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
720
|
+
if (colorInBorder && !style.borderColor) {
|
|
721
|
+
style.borderColor = `#${colorInBorder[1]}`;
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
// Border-bottom (heading underline styling, e.g., h2 { border-bottom: 3px solid #7c3aed; })
|
|
725
|
+
const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
|
|
726
|
+
if (borderBottomMatch) {
|
|
727
|
+
const borderBottomValue = resolveValue(borderBottomMatch[1].trim());
|
|
728
|
+
style.borderBottom = borderBottomValue;
|
|
729
|
+
// Extract color from border value
|
|
730
|
+
const colorInBorder = borderBottomValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
731
|
+
if (colorInBorder && !style.borderColor) {
|
|
732
|
+
style.borderColor = `#${colorInBorder[1]}`;
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
// Text-transform (uppercase, lowercase, capitalize for headings, labels, etc.)
|
|
736
|
+
const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
|
|
737
|
+
if (textTransformMatch) {
|
|
738
|
+
style.textTransform = resolveValue(textTransformMatch[1].trim());
|
|
739
|
+
}
|
|
740
|
+
// Store in nestedStyles: parentClass -> (childSelector -> style)
|
|
741
|
+
if (Object.keys(style).length > 0) {
|
|
742
|
+
if (!nestedStyles.has(parentClass)) {
|
|
743
|
+
nestedStyles.set(parentClass, new Map());
|
|
744
|
+
}
|
|
745
|
+
const existing = nestedStyles.get(parentClass).get(childSelector) || {};
|
|
746
|
+
nestedStyles.get(parentClass).set(childSelector, { ...existing, ...style });
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
// Parse element type selectors (body, p, h1, h2, h3, h4, h5, h6, etc.)
|
|
750
|
+
// These are rules like: body { color: var(--color-text); }
|
|
751
|
+
// or grouped rules like: h1, h2, h3, h4, h5, h6 { color: var(--color-heading); }
|
|
752
|
+
// IMPORTANT: Only match STANDALONE element selectors, not selectors like ".class p" or "#id p"
|
|
753
|
+
// The (?:^|[,\s]) lookbehind ensures we're not part of a complex selector
|
|
754
|
+
// We need to be careful to not match ".timeline-content p" as just "p"
|
|
755
|
+
const elementSelectorPattern = /(?:^|[\n\r])(\s*(?:body|p|h[1-6]|span|div|ul|ol|li|table|th|td|blockquote|section|article|aside|nav|header|footer|figure|figcaption|address|abbr)(?:\s*,\s*(?:body|p|h[1-6]|span|div|ul|ol|li|table|th|td|blockquote|section|article|aside|nav|header|footer|figure|figcaption|address|abbr))*)\s*\{([^}]+)\}/gi;
|
|
756
|
+
const elementMatches = cssText.matchAll(elementSelectorPattern);
|
|
757
|
+
for (const match of elementMatches) {
|
|
758
|
+
const selectorList = match[1];
|
|
759
|
+
const ruleContent = match[2];
|
|
760
|
+
// Split comma-separated selectors
|
|
761
|
+
const selectors = selectorList.split(/\s*,\s*/).map(s => s.trim().toLowerCase());
|
|
762
|
+
// Extract style properties
|
|
763
|
+
const style = {};
|
|
764
|
+
// Text color
|
|
765
|
+
const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
|
|
766
|
+
if (colorMatch) {
|
|
767
|
+
style.color = resolveValue(colorMatch[1].trim());
|
|
768
|
+
}
|
|
769
|
+
// Background color
|
|
770
|
+
const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
771
|
+
if (bgMatch) {
|
|
772
|
+
const bgValue = resolveValue(bgMatch[1].trim());
|
|
773
|
+
// For gradient backgrounds, extract the first color as fallback for DOCX
|
|
774
|
+
// Also store the full gradient for containers that support gradient rendering
|
|
775
|
+
const gradientColor = extractFirstGradientColor(bgValue);
|
|
776
|
+
if (gradientColor) {
|
|
777
|
+
style.backgroundColor = gradientColor;
|
|
778
|
+
// Parse full gradient for container backgrounds
|
|
779
|
+
const gradient = parseGradient(bgValue);
|
|
780
|
+
if (gradient) {
|
|
781
|
+
style.backgroundGradient = gradient;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
else {
|
|
785
|
+
style.backgroundColor = bgValue;
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
// Font size
|
|
789
|
+
const fontSizeMatch = ruleContent.match(/font-size\s*:\s*([^;]+)/i);
|
|
790
|
+
if (fontSizeMatch) {
|
|
791
|
+
style.fontSize = resolveValue(fontSizeMatch[1].trim());
|
|
792
|
+
}
|
|
793
|
+
// Font weight
|
|
794
|
+
const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
|
|
795
|
+
if (fontWeightMatch) {
|
|
796
|
+
style.fontWeight = resolveValue(fontWeightMatch[1].trim());
|
|
797
|
+
}
|
|
798
|
+
// Padding (for table cells th, td)
|
|
799
|
+
const paddingMatch = ruleContent.match(/padding\s*:\s*([^;]+)/i);
|
|
800
|
+
if (paddingMatch) {
|
|
801
|
+
style.padding = resolveValue(paddingMatch[1].trim());
|
|
802
|
+
}
|
|
803
|
+
// Font style (italic, normal) - for blockquotes
|
|
804
|
+
const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
|
|
805
|
+
if (fontStyleMatch) {
|
|
806
|
+
style.fontStyle = resolveValue(fontStyleMatch[1].trim());
|
|
807
|
+
}
|
|
808
|
+
// Font family (extract primary font from font stack) - for body, headings, etc.
|
|
809
|
+
const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
|
|
810
|
+
if (fontFamilyMatch) {
|
|
811
|
+
const resolved = resolveValue(fontFamilyMatch[1].trim());
|
|
812
|
+
const primaryFont = extractPrimaryFont(resolved);
|
|
813
|
+
if (primaryFont) {
|
|
814
|
+
style.fontFamily = primaryFont;
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
// Text indent (for paragraph first-line indentation)
|
|
818
|
+
const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
|
|
819
|
+
if (textIndentMatch) {
|
|
820
|
+
style.textIndent = resolveValue(textIndentMatch[1].trim());
|
|
821
|
+
}
|
|
822
|
+
// Text transform (uppercase, lowercase, capitalize)
|
|
823
|
+
const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
|
|
824
|
+
if (textTransformMatch) {
|
|
825
|
+
style.textTransform = resolveValue(textTransformMatch[1].trim());
|
|
826
|
+
}
|
|
827
|
+
// GENERALIZED: Margin-bottom (for paragraph spacing)
|
|
828
|
+
const marginBottomMatch = ruleContent.match(/margin-bottom\s*:\s*([^;]+)/i);
|
|
829
|
+
if (marginBottomMatch) {
|
|
830
|
+
style.marginBottom = resolveValue(marginBottomMatch[1].trim());
|
|
831
|
+
}
|
|
832
|
+
// GENERALIZED: Line-height (for vertical spacing within text)
|
|
833
|
+
const lineHeightMatch = ruleContent.match(/line-height\s*:\s*([^;]+)/i);
|
|
834
|
+
if (lineHeightMatch) {
|
|
835
|
+
style.lineHeight = resolveValue(lineHeightMatch[1].trim());
|
|
836
|
+
}
|
|
837
|
+
// Text alignment (left, center, right, justify)
|
|
838
|
+
const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
|
|
839
|
+
if (textAlignMatch) {
|
|
840
|
+
style.textAlign = resolveValue(textAlignMatch[1].trim());
|
|
841
|
+
}
|
|
842
|
+
// Border shorthand (e.g., "1px solid #e5e7eb")
|
|
843
|
+
const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
|
|
844
|
+
if (borderMatch) {
|
|
845
|
+
style.border = resolveValue(borderMatch[1].trim());
|
|
846
|
+
}
|
|
847
|
+
// Border-left (used by blockquotes/callouts: "4px solid #8b4513")
|
|
848
|
+
const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
|
|
849
|
+
if (borderLeftMatch) {
|
|
850
|
+
const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
|
|
851
|
+
// If it contains a color, extract it as borderColor
|
|
852
|
+
const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
853
|
+
if (colorInBorder && !style.borderColor) {
|
|
854
|
+
style.borderColor = `#${colorInBorder[1]}`;
|
|
855
|
+
}
|
|
856
|
+
// Store the full border-left value
|
|
857
|
+
style.borderLeft = borderLeftValue;
|
|
858
|
+
style.border = style.border || borderLeftValue;
|
|
859
|
+
}
|
|
860
|
+
// Border-right (used by sidebar dividers, etc.)
|
|
861
|
+
const borderRightMatch = ruleContent.match(/border-right\s*:\s*([^;]+)/i);
|
|
862
|
+
if (borderRightMatch) {
|
|
863
|
+
style.borderRight = resolveValue(borderRightMatch[1].trim());
|
|
864
|
+
}
|
|
865
|
+
// Border color (direct property)
|
|
866
|
+
const borderColorMatch = ruleContent.match(/border-color\s*:\s*([^;]+)/i);
|
|
867
|
+
if (borderColorMatch) {
|
|
868
|
+
style.borderColor = resolveValue(borderColorMatch[1].trim());
|
|
869
|
+
}
|
|
870
|
+
// GENERALIZED: Border-bottom (used by h2 underlines, title blocks, etc.)
|
|
871
|
+
// Any element can have border-bottom - extract from element selectors
|
|
872
|
+
const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
|
|
873
|
+
if (borderBottomMatch) {
|
|
874
|
+
style.borderBottom = resolveValue(borderBottomMatch[1].trim());
|
|
875
|
+
}
|
|
876
|
+
// GENERALIZED: Border-top (used by footer sections with top divider lines)
|
|
877
|
+
// Any element can have border-top - extract from element selectors
|
|
878
|
+
const borderTopMatch = ruleContent.match(/border-top\s*:\s*([^;]+)/i);
|
|
879
|
+
if (borderTopMatch) {
|
|
880
|
+
style.borderTop = resolveValue(borderTopMatch[1].trim());
|
|
881
|
+
}
|
|
882
|
+
// Apply style to each selector
|
|
883
|
+
if (Object.keys(style).length > 0) {
|
|
884
|
+
for (const selector of selectors) {
|
|
885
|
+
// Merge with existing styles (later rules override)
|
|
886
|
+
const existing = elementStyles.get(selector) || {};
|
|
887
|
+
elementStyles.set(selector, { ...existing, ...style });
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
return { variables, classColors, calloutStyles, classStyles, elementStyles, nestedStyles };
|
|
893
|
+
}
|
|
894
|
+
/**
|
|
895
|
+
* Get merged styles for an element by combining all its class styles.
|
|
896
|
+
* Later classes override earlier ones.
|
|
897
|
+
* Also checks element type selectors as a base layer.
|
|
898
|
+
*
|
|
899
|
+
* @param element The element to get styles for
|
|
900
|
+
* @param cssContext The CSS context
|
|
901
|
+
* @param parentElement Optional parent element for nested style lookups
|
|
902
|
+
*/
|
|
903
|
+
function getElementStyles(element, cssContext, parentElement) {
|
|
904
|
+
const result = {};
|
|
905
|
+
// First, apply element type selector styles (lowest priority)
|
|
906
|
+
const tagName = element.tagName.toLowerCase();
|
|
907
|
+
const elementTypeStyle = cssContext.elementStyles.get(tagName);
|
|
908
|
+
if (elementTypeStyle) {
|
|
909
|
+
Object.assign(result, elementTypeStyle);
|
|
910
|
+
}
|
|
911
|
+
// Then apply class styles (higher priority)
|
|
912
|
+
const classAttr = element.getAttribute("class");
|
|
913
|
+
const elementClasses = classAttr ? classAttr.split(/\s+/).filter(c => c.length > 0) : [];
|
|
914
|
+
for (const className of elementClasses) {
|
|
915
|
+
const classStyle = cssContext.classStyles.get(className);
|
|
916
|
+
if (classStyle) {
|
|
917
|
+
Object.assign(result, classStyle);
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
// Apply nested styles if parent is provided (highest CSS priority for nested selectors)
|
|
921
|
+
// This handles rules like ".parent .child { color: ... }" and ".parent element { color: ... }"
|
|
922
|
+
if (parentElement) {
|
|
923
|
+
const parentClassAttr = parentElement.getAttribute("class");
|
|
924
|
+
const parentClasses = parentClassAttr ? parentClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
|
|
925
|
+
// First, inherit text color from parent's class styles (CSS color inheritance)
|
|
926
|
+
// This handles cases like ".cta { color: white; }" where children should inherit the color
|
|
927
|
+
if (!result.color) {
|
|
928
|
+
for (const parentClass of parentClasses) {
|
|
929
|
+
const parentClassStyle = cssContext.classStyles.get(parentClass);
|
|
930
|
+
if (parentClassStyle?.color) {
|
|
931
|
+
result.color = parentClassStyle.color;
|
|
932
|
+
break;
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
// For each parent class, check if there are nested styles for our element's classes OR tag name
|
|
937
|
+
for (const parentClass of parentClasses) {
|
|
938
|
+
const nestedMap = cssContext.nestedStyles.get(parentClass);
|
|
939
|
+
if (nestedMap) {
|
|
940
|
+
// Check for element tag name (e.g., .works-cited p { ... })
|
|
941
|
+
const nestedTagStyle = nestedMap.get(tagName);
|
|
942
|
+
if (nestedTagStyle) {
|
|
943
|
+
Object.assign(result, nestedTagStyle);
|
|
944
|
+
}
|
|
945
|
+
// Check for element's classes
|
|
946
|
+
for (const childClass of elementClasses) {
|
|
947
|
+
const nestedStyle = nestedMap.get(childClass);
|
|
948
|
+
if (nestedStyle) {
|
|
949
|
+
Object.assign(result, nestedStyle);
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
// Also walk up the DOM tree to find ancestor containers with nested styles
|
|
956
|
+
// This handles cases where the parent element isn't passed explicitly
|
|
957
|
+
if (!parentElement) {
|
|
958
|
+
let ancestor = element.parentElement;
|
|
959
|
+
while (ancestor) {
|
|
960
|
+
const ancestorClassAttr = ancestor.getAttribute("class");
|
|
961
|
+
const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
|
|
962
|
+
for (const ancestorClass of ancestorClasses) {
|
|
963
|
+
const nestedMap = cssContext.nestedStyles.get(ancestorClass);
|
|
964
|
+
if (nestedMap) {
|
|
965
|
+
// Check for element tag name
|
|
966
|
+
const nestedTagStyle = nestedMap.get(tagName);
|
|
967
|
+
if (nestedTagStyle) {
|
|
968
|
+
Object.assign(result, nestedTagStyle);
|
|
969
|
+
}
|
|
970
|
+
// Check for element's classes
|
|
971
|
+
for (const childClass of elementClasses) {
|
|
972
|
+
const nestedStyle = nestedMap.get(childClass);
|
|
973
|
+
if (nestedStyle) {
|
|
974
|
+
Object.assign(result, nestedStyle);
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
ancestor = ancestor.parentElement;
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
// Inherit font-family from ancestor elements if not set on this element
|
|
983
|
+
// CSS font-family is an inherited property, so we need to walk up the DOM tree
|
|
984
|
+
if (!result.fontFamily) {
|
|
985
|
+
let ancestor = element.parentElement;
|
|
986
|
+
while (ancestor && !result.fontFamily) {
|
|
987
|
+
const ancestorTagName = ancestor.tagName?.toLowerCase();
|
|
988
|
+
if (ancestorTagName) {
|
|
989
|
+
// Check element type selector for ancestor (e.g., body { font-family: ... })
|
|
990
|
+
const ancestorTypeStyle = cssContext.elementStyles.get(ancestorTagName);
|
|
991
|
+
if (ancestorTypeStyle?.fontFamily) {
|
|
992
|
+
result.fontFamily = ancestorTypeStyle.fontFamily;
|
|
993
|
+
break;
|
|
994
|
+
}
|
|
995
|
+
// Check ancestor's class styles
|
|
996
|
+
const ancestorClassAttr = ancestor.getAttribute("class");
|
|
997
|
+
const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
|
|
998
|
+
for (const ancestorClass of ancestorClasses) {
|
|
999
|
+
const classStyle = cssContext.classStyles.get(ancestorClass);
|
|
1000
|
+
if (classStyle?.fontFamily) {
|
|
1001
|
+
result.fontFamily = classStyle.fontFamily;
|
|
1002
|
+
break;
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
ancestor = ancestor.parentElement;
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
// Also check inline styles, which override CSS classes
|
|
1010
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
1011
|
+
if (inlineStyle) {
|
|
1012
|
+
const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
1013
|
+
if (bgMatch)
|
|
1014
|
+
result.backgroundColor = bgMatch[1].trim();
|
|
1015
|
+
const colorMatch = inlineStyle.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
|
|
1016
|
+
if (colorMatch)
|
|
1017
|
+
result.color = colorMatch[1].trim();
|
|
1018
|
+
const borderMatch = inlineStyle.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
|
|
1019
|
+
if (borderMatch)
|
|
1020
|
+
result.border = borderMatch[1].trim();
|
|
1021
|
+
const borderColorMatch = inlineStyle.match(/border-color\s*:\s*([^;]+)/i);
|
|
1022
|
+
if (borderColorMatch)
|
|
1023
|
+
result.borderColor = borderColorMatch[1].trim();
|
|
1024
|
+
const borderBottomMatch = inlineStyle.match(/border-bottom\s*:\s*([^;]+)/i);
|
|
1025
|
+
if (borderBottomMatch)
|
|
1026
|
+
result.borderBottom = borderBottomMatch[1].trim();
|
|
1027
|
+
const displayMatch = inlineStyle.match(/display\s*:\s*([^;]+)/i);
|
|
1028
|
+
if (displayMatch)
|
|
1029
|
+
result.display = displayMatch[1].trim();
|
|
1030
|
+
const flexMatch = inlineStyle.match(/(?:^|;)\s*flex\s*:\s*([^;]+)/i);
|
|
1031
|
+
if (flexMatch)
|
|
1032
|
+
result.flex = flexMatch[1].trim();
|
|
1033
|
+
const gridColsMatch = inlineStyle.match(/grid-template-columns\s*:\s*([^;]+)/i);
|
|
1034
|
+
if (gridColsMatch)
|
|
1035
|
+
result.gridTemplateColumns = gridColsMatch[1].trim();
|
|
1036
|
+
const textAlignMatch = inlineStyle.match(/text-align\s*:\s*([^;]+)/i);
|
|
1037
|
+
if (textAlignMatch)
|
|
1038
|
+
result.textAlign = textAlignMatch[1].trim();
|
|
1039
|
+
// Inline font-family (highest priority)
|
|
1040
|
+
const fontFamilyMatch = inlineStyle.match(/font-family\s*:\s*([^;]+)/i);
|
|
1041
|
+
if (fontFamilyMatch) {
|
|
1042
|
+
const primaryFont = extractPrimaryFont(fontFamilyMatch[1].trim());
|
|
1043
|
+
if (primaryFont)
|
|
1044
|
+
result.fontFamily = primaryFont;
|
|
1045
|
+
}
|
|
1046
|
+
// Inline line-height
|
|
1047
|
+
const lineHeightMatch = inlineStyle.match(/line-height\s*:\s*([^;]+)/i);
|
|
1048
|
+
if (lineHeightMatch) {
|
|
1049
|
+
result.lineHeight = lineHeightMatch[1].trim();
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
// GENERALIZED: Use getComputedStyle as fallback for font-family and line-height
|
|
1053
|
+
// This ensures we get the ACTUAL computed values from Playwright's browser context,
|
|
1054
|
+
// including all CSS variable resolution and inheritance
|
|
1055
|
+
if (typeof window !== "undefined" && window.getComputedStyle) {
|
|
1056
|
+
try {
|
|
1057
|
+
const computed = window.getComputedStyle(element);
|
|
1058
|
+
// Font-family: Use computed style if not already set from CSS parsing
|
|
1059
|
+
if (!result.fontFamily && computed.fontFamily) {
|
|
1060
|
+
const primaryFont = extractPrimaryFont(computed.fontFamily);
|
|
1061
|
+
if (primaryFont) {
|
|
1062
|
+
result.fontFamily = primaryFont;
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
// Line-height: Use computed style if not already set from CSS parsing
|
|
1066
|
+
if (!result.lineHeight && computed.lineHeight) {
|
|
1067
|
+
// Convert computed lineHeight (e.g., "27.2px") to a ratio or keep as-is
|
|
1068
|
+
result.lineHeight = computed.lineHeight;
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
catch {
|
|
1072
|
+
// getComputedStyle may fail in some environments
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
return result;
|
|
1076
|
+
}
|
|
1077
|
+
/**
|
|
1078
|
+
* Extract border color from a border shorthand or border-color property.
|
|
1079
|
+
* Also handles borderLeft, borderRight, borderTop, borderBottom.
|
|
1080
|
+
*/
|
|
1081
|
+
function extractBorderColorFromStyle(style) {
|
|
1082
|
+
if (style.borderColor) {
|
|
1083
|
+
return extractHexColor(style.borderColor);
|
|
1084
|
+
}
|
|
1085
|
+
// Check border shorthand
|
|
1086
|
+
if (style.border) {
|
|
1087
|
+
// Parse "1px solid #e5e7eb" or similar
|
|
1088
|
+
const colorMatch = style.border.match(/#([0-9a-fA-F]{3,6})/);
|
|
1089
|
+
if (colorMatch) {
|
|
1090
|
+
let hex = colorMatch[1];
|
|
1091
|
+
if (hex.length === 3) {
|
|
1092
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
1093
|
+
}
|
|
1094
|
+
return hex.toUpperCase();
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
// Check individual border properties (borderLeft, borderRight, borderTop, borderBottom)
|
|
1098
|
+
const borderProps = [style.borderLeft, style.borderRight, style.borderTop, style.borderBottom];
|
|
1099
|
+
for (const borderValue of borderProps) {
|
|
1100
|
+
if (borderValue) {
|
|
1101
|
+
// Parse "4px solid #7c3aed" or similar
|
|
1102
|
+
const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
1103
|
+
if (colorMatch) {
|
|
1104
|
+
let hex = colorMatch[1];
|
|
1105
|
+
if (hex.length === 3) {
|
|
1106
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
1107
|
+
}
|
|
1108
|
+
return hex.toUpperCase();
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
return undefined;
|
|
1113
|
+
}
|
|
1114
|
+
/**
|
|
1115
|
+
* Check if an element is a grid or flex container based on its CSS.
|
|
1116
|
+
*/
|
|
1117
|
+
function isGridOrFlexContainer(element, cssContext) {
|
|
1118
|
+
const styles = getElementStyles(element, cssContext);
|
|
1119
|
+
return styles.display === "grid" || styles.display === "flex";
|
|
1120
|
+
}
|
|
1121
|
+
/**
|
|
1122
|
+
* GENERALIZED: Check if an element is a horizontal flex container.
|
|
1123
|
+
* A horizontal flex container has display: flex with flex-direction: row (or unset, since row is default).
|
|
1124
|
+
* This is used to detect containers where flex items should be visually separated
|
|
1125
|
+
* with a separator character (like " • ") to represent the CSS gap.
|
|
1126
|
+
*
|
|
1127
|
+
* @param element - The element to check
|
|
1128
|
+
* @param cssContext - CSS context for style resolution
|
|
1129
|
+
* @returns true if this is a horizontal flex container, false otherwise
|
|
1130
|
+
*/
|
|
1131
|
+
function isHorizontalFlexContainer(element, cssContext) {
|
|
1132
|
+
const styles = getElementStyles(element, cssContext);
|
|
1133
|
+
// Must be a flex container
|
|
1134
|
+
if (styles.display !== "flex") {
|
|
1135
|
+
return false;
|
|
1136
|
+
}
|
|
1137
|
+
// Check flex-direction - horizontal if "row" or "row-reverse" or not set (default is row)
|
|
1138
|
+
// Vertical (column) containers should NOT have separators between items
|
|
1139
|
+
const direction = styles.flexDirection?.toLowerCase();
|
|
1140
|
+
if (direction === "column" || direction === "column-reverse") {
|
|
1141
|
+
return false;
|
|
1142
|
+
}
|
|
1143
|
+
// Also check inline styles for flex-direction override
|
|
1144
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
1145
|
+
const directionMatch = inlineStyle.match(/flex-direction\s*:\s*([^;]+)/i);
|
|
1146
|
+
if (directionMatch) {
|
|
1147
|
+
const inlineDirection = directionMatch[1].trim().toLowerCase();
|
|
1148
|
+
if (inlineDirection === "column" || inlineDirection === "column-reverse") {
|
|
1149
|
+
return false;
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
return true;
|
|
1153
|
+
}
|
|
1154
|
+
/**
|
|
1155
|
+
* Check if an SVG element is purely decorative (background pattern, decoration, etc).
|
|
1156
|
+
* Decorative SVGs should NOT be converted to chart placeholders.
|
|
1157
|
+
*
|
|
1158
|
+
* Detection criteria (style-based, not class-name-based per skill rules):
|
|
1159
|
+
* 1. Fixed/absolute positioning (background elements)
|
|
1160
|
+
* 2. Very low opacity (< 0.5)
|
|
1161
|
+
* 3. Contains only pattern definitions (<defs>, <pattern>, <linearGradient>)
|
|
1162
|
+
* 4. Is a background overlay (pointer-events: none)
|
|
1163
|
+
* 5. Very small viewBox (icons < 50x50)
|
|
1164
|
+
*/
|
|
1165
|
+
function isDecorativeSvg(svgElement, parentElement, cssContext) {
|
|
1166
|
+
// Check parent container styles
|
|
1167
|
+
const parentStyles = getElementStyles(parentElement, cssContext);
|
|
1168
|
+
const parentInlineStyle = parentElement.getAttribute("style") || "";
|
|
1169
|
+
// Check SVG's own styles
|
|
1170
|
+
const svgInlineStyle = svgElement.getAttribute("style") || "";
|
|
1171
|
+
// 1. Check for fixed/absolute positioning (typically background elements)
|
|
1172
|
+
const positionMatch = parentInlineStyle.match(/position\s*:\s*(fixed|absolute)/i) ||
|
|
1173
|
+
svgInlineStyle.match(/position\s*:\s*(fixed|absolute)/i);
|
|
1174
|
+
if (positionMatch) {
|
|
1175
|
+
return true;
|
|
1176
|
+
}
|
|
1177
|
+
// 2. Check for low opacity (decorative overlays)
|
|
1178
|
+
const parentOpacityMatch = parentInlineStyle.match(/opacity\s*:\s*([0-9.]+)/i);
|
|
1179
|
+
const svgOpacityMatch = svgInlineStyle.match(/opacity\s*:\s*([0-9.]+)/i);
|
|
1180
|
+
const opacityAttr = svgElement.getAttribute("opacity");
|
|
1181
|
+
const opacity = parseFloat(parentOpacityMatch?.[1] || svgOpacityMatch?.[1] || opacityAttr || "1");
|
|
1182
|
+
if (opacity < 0.5) {
|
|
1183
|
+
return true;
|
|
1184
|
+
}
|
|
1185
|
+
// 3. Check for pointer-events: none (non-interactive background)
|
|
1186
|
+
if (parentInlineStyle.includes("pointer-events: none") ||
|
|
1187
|
+
svgInlineStyle.includes("pointer-events: none") ||
|
|
1188
|
+
parentStyles.display === "none") {
|
|
1189
|
+
return true;
|
|
1190
|
+
}
|
|
1191
|
+
// 4. Check if SVG contains only definitions (patterns, gradients) - no actual shapes
|
|
1192
|
+
const hasOnlyDefs = svgElement.children.length > 0 &&
|
|
1193
|
+
Array.from(svgElement.children).every((child) => ["defs", "style", "title", "desc"].includes(child.tagName.toLowerCase()));
|
|
1194
|
+
if (hasOnlyDefs) {
|
|
1195
|
+
return true;
|
|
1196
|
+
}
|
|
1197
|
+
// 5. Check for very small viewBox (likely an icon, not a chart)
|
|
1198
|
+
// Charts must be at least 100x50 to match export.ts isChartSvg()
|
|
1199
|
+
const viewBox = svgElement.getAttribute("viewBox");
|
|
1200
|
+
if (viewBox) {
|
|
1201
|
+
const parts = viewBox.split(/\s+/).map(Number);
|
|
1202
|
+
if (parts.length >= 4) {
|
|
1203
|
+
const width = parts[2] || 0;
|
|
1204
|
+
const height = parts[3] || 0;
|
|
1205
|
+
// Charts must be at least 100x50 - anything smaller is decorative
|
|
1206
|
+
if (width > 0 && height > 0 && (width < 100 || height < 50)) {
|
|
1207
|
+
return true;
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
else {
|
|
1212
|
+
// No viewBox - check width/height attributes
|
|
1213
|
+
const widthAttr = svgElement.getAttribute("width");
|
|
1214
|
+
const heightAttr = svgElement.getAttribute("height");
|
|
1215
|
+
if (widthAttr && heightAttr) {
|
|
1216
|
+
const width = parseFloat(widthAttr) || 0;
|
|
1217
|
+
const height = parseFloat(heightAttr) || 0;
|
|
1218
|
+
// Charts must be at least 100x50
|
|
1219
|
+
if (width > 0 && height > 0 && (width < 100 || height < 50)) {
|
|
1220
|
+
return true;
|
|
1221
|
+
}
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
// 6. Check if SVG is purely a pattern/gradient container
|
|
1225
|
+
// These typically have <rect> with fill="url(#pattern)" or only <circle>/<path> with very low complexity
|
|
1226
|
+
const shapes = svgElement.querySelectorAll("rect, circle, ellipse, path, polygon, polyline, line");
|
|
1227
|
+
if (shapes.length > 0) {
|
|
1228
|
+
// Count shapes that use pattern/gradient fills (decorative)
|
|
1229
|
+
let decorativeShapeCount = 0;
|
|
1230
|
+
for (const shape of shapes) {
|
|
1231
|
+
const fill = shape.getAttribute("fill") || "";
|
|
1232
|
+
const stroke = shape.getAttribute("stroke") || "";
|
|
1233
|
+
if (fill.includes("url(#") || stroke.includes("url(#")) {
|
|
1234
|
+
decorativeShapeCount++;
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
// If all shapes use pattern/gradient fills, it's likely decorative
|
|
1238
|
+
if (decorativeShapeCount === shapes.length && shapes.length <= 5) {
|
|
1239
|
+
return true;
|
|
1240
|
+
}
|
|
1241
|
+
}
|
|
1242
|
+
// 7. Check if this appears to be a progress circle (small circular skills indicator)
|
|
1243
|
+
// These have circle elements with stroke-dasharray for progress
|
|
1244
|
+
const circles = svgElement.querySelectorAll("circle");
|
|
1245
|
+
if (circles.length > 0 && circles.length <= 2) {
|
|
1246
|
+
const hasStrokeDasharray = Array.from(circles).some((c) => c.getAttribute("stroke-dasharray"));
|
|
1247
|
+
if (hasStrokeDasharray) {
|
|
1248
|
+
// This is a progress indicator, not a chart - treat as decorative for now
|
|
1249
|
+
// In the future, we could extract the percentage
|
|
1250
|
+
return true;
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
// 8. Check if parent is a centered flex container (typical hero image pattern)
|
|
1254
|
+
// Hero images typically have: display: flex; align-items: center; justify-content: center
|
|
1255
|
+
// These contain illustrative SVGs which should be included as images, not filtered
|
|
1256
|
+
// We no longer filter these - they are legitimate content images
|
|
1257
|
+
// (Previously we filtered centered flex containers, but hero images should be included)
|
|
1258
|
+
// 9. SVGs with sufficient size should be included as images
|
|
1259
|
+
// We only filter out truly decorative elements (icons, patterns, progress circles)
|
|
1260
|
+
// Large SVGs (illustrations, charts, diagrams) should all be included
|
|
1261
|
+
return false;
|
|
1262
|
+
}
|
|
1263
|
+
/**
|
|
1264
|
+
* Check if an element is a two-column CSS grid layout.
|
|
1265
|
+
* Detects patterns like: display: grid; grid-template-columns: Xpx 1fr
|
|
1266
|
+
* Returns the sidebar width percentage if it's a two-column layout, undefined otherwise.
|
|
1267
|
+
*/
|
|
1268
|
+
function isTwoColumnGridLayout(element, cssContext) {
|
|
1269
|
+
const styles = getElementStyles(element, cssContext);
|
|
1270
|
+
// Must be a grid container
|
|
1271
|
+
if (styles.display !== "grid") {
|
|
1272
|
+
return undefined;
|
|
1273
|
+
}
|
|
1274
|
+
// Check for two-column pattern (e.g., "280px 1fr", "300px auto", "25% 75%")
|
|
1275
|
+
const gridCols = styles.gridTemplateColumns;
|
|
1276
|
+
if (!gridCols) {
|
|
1277
|
+
return undefined;
|
|
1278
|
+
}
|
|
1279
|
+
// Parse grid-template-columns to detect two-column sidebar layout
|
|
1280
|
+
// Common patterns:
|
|
1281
|
+
// - "280px 1fr" (fixed sidebar + fluid main)
|
|
1282
|
+
// - "300px auto" (fixed sidebar + auto main)
|
|
1283
|
+
// - "25% 1fr" (percentage sidebar + fluid main)
|
|
1284
|
+
const parts = gridCols.trim().split(/\s+/);
|
|
1285
|
+
if (parts.length !== 2) {
|
|
1286
|
+
return undefined;
|
|
1287
|
+
}
|
|
1288
|
+
const [first, second] = parts;
|
|
1289
|
+
// Check if first column is a fixed width or small percentage (sidebar)
|
|
1290
|
+
// and second column is flexible (1fr, auto, or larger percentage)
|
|
1291
|
+
const firstIsSidebar = (first.endsWith("px") && parseInt(first, 10) <= 400) ||
|
|
1292
|
+
(first.endsWith("%") && parseFloat(first) <= 35);
|
|
1293
|
+
const secondIsMain = second === "1fr" ||
|
|
1294
|
+
second === "auto" ||
|
|
1295
|
+
(second.endsWith("%") && parseFloat(second) >= 50) ||
|
|
1296
|
+
(second.endsWith("fr") && parseFloat(second) >= 1);
|
|
1297
|
+
if (firstIsSidebar && secondIsMain) {
|
|
1298
|
+
// Calculate sidebar width percentage
|
|
1299
|
+
if (first.endsWith("px")) {
|
|
1300
|
+
// Assume ~1100px total width for typical documents
|
|
1301
|
+
const pxWidth = parseInt(first, 10);
|
|
1302
|
+
return Math.round((pxWidth / 1100) * 100);
|
|
1303
|
+
}
|
|
1304
|
+
else if (first.endsWith("%")) {
|
|
1305
|
+
return parseFloat(first);
|
|
1306
|
+
}
|
|
1307
|
+
return 25; // Default 25% for other cases
|
|
1308
|
+
}
|
|
1309
|
+
return undefined;
|
|
1310
|
+
}
|
|
1311
|
+
/**
|
|
1312
|
+
* Find the sidebar and main content elements in a two-column grid container.
|
|
1313
|
+
* Returns the first two direct children as [sidebar, main] or undefined.
|
|
1314
|
+
*/
|
|
1315
|
+
function findTwoColumnChildren(container) {
|
|
1316
|
+
const children = Array.from(container.children).filter((child) => child.nodeType === Node.ELEMENT_NODE);
|
|
1317
|
+
if (children.length < 2) {
|
|
1318
|
+
return undefined;
|
|
1319
|
+
}
|
|
1320
|
+
// First child is sidebar (aside, div, etc.), second is main content
|
|
1321
|
+
return [children[0], children[1]];
|
|
1322
|
+
}
|
|
1323
|
+
/**
|
|
1324
|
+
* GENERALIZED: Detect if an element is a flex container with equal-width columns.
|
|
1325
|
+
* This handles layouts like:
|
|
1326
|
+
* display: flex; with children having flex: 1 (equal width)
|
|
1327
|
+
*
|
|
1328
|
+
* Returns the column children if detected, undefined otherwise.
|
|
1329
|
+
* Each child will become a column in a DOCX table.
|
|
1330
|
+
*/
|
|
1331
|
+
function detectFlexEqualColumns(element, cssContext) {
|
|
1332
|
+
const styles = getElementStyles(element, cssContext);
|
|
1333
|
+
// Must be a flex container
|
|
1334
|
+
if (styles.display !== "flex") {
|
|
1335
|
+
return undefined;
|
|
1336
|
+
}
|
|
1337
|
+
// Get direct children
|
|
1338
|
+
const children = Array.from(element.children).filter((child) => child.nodeType === Node.ELEMENT_NODE);
|
|
1339
|
+
// Need at least 2 children for multi-column layout
|
|
1340
|
+
if (children.length < 2 || children.length > 4) {
|
|
1341
|
+
return undefined;
|
|
1342
|
+
}
|
|
1343
|
+
// Check if children have flex: 1 or similar equal-width pattern
|
|
1344
|
+
// Also check inline styles since class-based flex: 1 is common
|
|
1345
|
+
let hasEqualFlexChildren = true;
|
|
1346
|
+
for (const child of children) {
|
|
1347
|
+
const childStyles = getElementStyles(child, cssContext);
|
|
1348
|
+
const inlineStyle = child.getAttribute("style") || "";
|
|
1349
|
+
// Check for flex: 1 in CSS or inline styles
|
|
1350
|
+
const hasFlex1 = childStyles.flex === "1" ||
|
|
1351
|
+
inlineStyle.includes("flex:") ||
|
|
1352
|
+
inlineStyle.includes("flex: 1");
|
|
1353
|
+
// Also detect if class has flex: 1 (we can't directly read this, but
|
|
1354
|
+
// children in a flex container with no explicit width typically share space)
|
|
1355
|
+
// For now, assume if parent is flex and has 2+ children, they share space
|
|
1356
|
+
// If any child has explicit width that's not equal, skip
|
|
1357
|
+
// (We're being permissive here - assuming equal columns if parent is flex)
|
|
1358
|
+
}
|
|
1359
|
+
// Return children as columns if this looks like an equal-column flex layout
|
|
1360
|
+
return children;
|
|
1361
|
+
}
|
|
1362
|
+
/**
|
|
1363
|
+
* Detect if an element is a "skill item" by its structure.
|
|
1364
|
+
* Pattern: The element has a direct child that contains exactly 2 text elements (name + percentage).
|
|
1365
|
+
* The element should have exactly 1-2 direct children (header + optional progress bar).
|
|
1366
|
+
*
|
|
1367
|
+
* Returns { name, percentage } if detected, undefined otherwise.
|
|
1368
|
+
*/
|
|
1369
|
+
function detectSkillItem(element) {
|
|
1370
|
+
const children = Array.from(element.children);
|
|
1371
|
+
// Skill items typically have 1-2 direct children:
|
|
1372
|
+
// - A header div with name + percentage
|
|
1373
|
+
// - Optionally a progress bar container
|
|
1374
|
+
if (children.length === 0 || children.length > 3)
|
|
1375
|
+
return undefined;
|
|
1376
|
+
// Look for the header child that contains name + percentage
|
|
1377
|
+
for (const child of children) {
|
|
1378
|
+
// Skip if this child has too many children (not a header)
|
|
1379
|
+
if (child.children.length !== 2)
|
|
1380
|
+
continue;
|
|
1381
|
+
const directChildren = Array.from(child.children).filter((c) => c.children.length === 0 && c.textContent?.trim());
|
|
1382
|
+
// Must have exactly 2 leaf text elements
|
|
1383
|
+
if (directChildren.length !== 2)
|
|
1384
|
+
continue;
|
|
1385
|
+
const text1 = directChildren[0].textContent?.trim() || "";
|
|
1386
|
+
const text2 = directChildren[1].textContent?.trim() || "";
|
|
1387
|
+
// Check if one is a percentage (number followed by %)
|
|
1388
|
+
if (text2.match(/^\d+%$/)) {
|
|
1389
|
+
return { name: text1, percentage: text2 };
|
|
1390
|
+
}
|
|
1391
|
+
if (text1.match(/^\d+%$/)) {
|
|
1392
|
+
return { name: text2, percentage: text1 };
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
return undefined;
|
|
1396
|
+
}
|
|
1397
|
+
/**
|
|
1398
|
+
* Detect if an element is a "language item" with proficiency dots/indicators.
|
|
1399
|
+
* Pattern: container with text (language name) and a child with multiple small indicators.
|
|
1400
|
+
*
|
|
1401
|
+
* Returns { name, filledCount, totalCount } if detected, undefined otherwise.
|
|
1402
|
+
*/
|
|
1403
|
+
function detectLanguageItem(element, cssContext) {
|
|
1404
|
+
const children = Array.from(element.children);
|
|
1405
|
+
if (children.length < 2)
|
|
1406
|
+
return undefined;
|
|
1407
|
+
let languageName = "";
|
|
1408
|
+
let filledCount = 0;
|
|
1409
|
+
let totalCount = 0;
|
|
1410
|
+
for (const child of children) {
|
|
1411
|
+
const childTagName = child.tagName.toLowerCase();
|
|
1412
|
+
// GENERALIZED: Skip headings - they're not language names
|
|
1413
|
+
// This prevents title-block patterns (h1 + metadata div) from being detected
|
|
1414
|
+
if (childTagName.match(/^h[1-6]$/)) {
|
|
1415
|
+
return undefined;
|
|
1416
|
+
}
|
|
1417
|
+
// Check if this is a text element (language name)
|
|
1418
|
+
// Must be a simple span or similar inline element, not a complex structure
|
|
1419
|
+
if (child.children.length === 0 && child.textContent?.trim()) {
|
|
1420
|
+
const text = child.textContent.trim();
|
|
1421
|
+
// GENERALIZED: Language names are typically short (1-3 words)
|
|
1422
|
+
// Skip if the text is too long (like a title or paragraph)
|
|
1423
|
+
if (text.length > 50 || text.split(/\s+/).length > 5) {
|
|
1424
|
+
return undefined;
|
|
1425
|
+
}
|
|
1426
|
+
languageName = text;
|
|
1427
|
+
continue;
|
|
1428
|
+
}
|
|
1429
|
+
// Check if this is a container with multiple similar small children (dots/indicators)
|
|
1430
|
+
const indicators = Array.from(child.children);
|
|
1431
|
+
if (indicators.length >= 3 && indicators.length <= 10) {
|
|
1432
|
+
// Check if they look like indicators (similar structure)
|
|
1433
|
+
const allSpans = indicators.every((ind) => ind.tagName.toLowerCase() === "span");
|
|
1434
|
+
if (allSpans) {
|
|
1435
|
+
totalCount = indicators.length;
|
|
1436
|
+
// Count "filled" indicators by checking for background color or specific styling
|
|
1437
|
+
for (const indicator of indicators) {
|
|
1438
|
+
const styles = getElementStyles(indicator, cssContext);
|
|
1439
|
+
const bgColor = styles.backgroundColor;
|
|
1440
|
+
// A "filled" indicator typically has a non-white/non-transparent background
|
|
1441
|
+
// or has more than just border styling
|
|
1442
|
+
if (bgColor && bgColor !== "transparent" && bgColor !== "inherit") {
|
|
1443
|
+
const hexBg = extractHexColor(bgColor);
|
|
1444
|
+
// Check if it's a "filled" color (not white/very light)
|
|
1445
|
+
if (hexBg && hexBg !== "FFFFFF" && !hexBg.startsWith("FF")) {
|
|
1446
|
+
filledCount++;
|
|
1447
|
+
}
|
|
1448
|
+
else if (hexBg) {
|
|
1449
|
+
// Light color means filled if rgba has low opacity
|
|
1450
|
+
// For now, count any background as potentially filled
|
|
1451
|
+
// We'll need to be smarter about this
|
|
1452
|
+
const alpha = bgColor.match(/rgba.*,\s*([0-9.]+)\s*\)/)?.[1];
|
|
1453
|
+
if (!alpha || parseFloat(alpha) > 0.15) {
|
|
1454
|
+
filledCount++;
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
}
|
|
1462
|
+
if (languageName && totalCount > 0) {
|
|
1463
|
+
return { name: languageName, filledCount, totalCount };
|
|
1464
|
+
}
|
|
1465
|
+
return undefined;
|
|
1466
|
+
}
|
|
1467
|
+
/**
|
|
1468
|
+
* Parse content from a container element (like sidebar or main content).
|
|
1469
|
+
* Handles headings, paragraphs, lists, and nested containers with color inheritance.
|
|
1470
|
+
*/
|
|
1471
|
+
function parseContainerContent(element, cssContext, inheritedColor) {
|
|
1472
|
+
const innerElements = [];
|
|
1473
|
+
function processInnerNode(node, color) {
|
|
1474
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
1475
|
+
const text = node.textContent?.trim();
|
|
1476
|
+
if (text) {
|
|
1477
|
+
if (color) {
|
|
1478
|
+
innerElements.push({ type: "paragraph", text, color });
|
|
1479
|
+
}
|
|
1480
|
+
else {
|
|
1481
|
+
innerElements.push({ type: "paragraph", text });
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
return;
|
|
1485
|
+
}
|
|
1486
|
+
if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
|
|
1487
|
+
return;
|
|
1488
|
+
}
|
|
1489
|
+
const el = node;
|
|
1490
|
+
const tagName = el.tagName.toLowerCase();
|
|
1491
|
+
// Extract color from this element
|
|
1492
|
+
const elementColor = extractTextColor(el, cssContext) || color;
|
|
1493
|
+
// Skip decorative SVGs
|
|
1494
|
+
if (tagName === "svg") {
|
|
1495
|
+
const parent = el.parentElement || el;
|
|
1496
|
+
if (isDecorativeSvg(el, parent, cssContext)) {
|
|
1497
|
+
return;
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
// Handle headings
|
|
1501
|
+
const headingLevel = parseHeadingLevel(tagName);
|
|
1502
|
+
if (headingLevel !== null) {
|
|
1503
|
+
const text = getTextContent(el).trim();
|
|
1504
|
+
if (text) {
|
|
1505
|
+
innerElements.push({ type: "heading", level: headingLevel, text, color: elementColor });
|
|
1506
|
+
}
|
|
1507
|
+
return;
|
|
1508
|
+
}
|
|
1509
|
+
// Handle paragraphs
|
|
1510
|
+
if (tagName === "p") {
|
|
1511
|
+
const runs = extractInlineRuns(el, cssContext);
|
|
1512
|
+
if (runs.length > 0) {
|
|
1513
|
+
const text = runs.map((r) => r.text).join("");
|
|
1514
|
+
// Apply inherited color to runs that don't have their own color
|
|
1515
|
+
if (elementColor) {
|
|
1516
|
+
runs.forEach(run => {
|
|
1517
|
+
if (!run.color) {
|
|
1518
|
+
run.color = elementColor;
|
|
1519
|
+
}
|
|
1520
|
+
});
|
|
1521
|
+
}
|
|
1522
|
+
if (hasInlineFormatting(runs)) {
|
|
1523
|
+
innerElements.push({ type: "paragraph", text, runs });
|
|
1524
|
+
}
|
|
1525
|
+
else if (elementColor) {
|
|
1526
|
+
innerElements.push({ type: "paragraph", text, color: elementColor });
|
|
1527
|
+
}
|
|
1528
|
+
else {
|
|
1529
|
+
innerElements.push({ type: "paragraph", text });
|
|
1530
|
+
}
|
|
1531
|
+
}
|
|
1532
|
+
return;
|
|
1533
|
+
}
|
|
1534
|
+
// Handle lists
|
|
1535
|
+
if (tagName === "ul" || tagName === "ol") {
|
|
1536
|
+
const items = [];
|
|
1537
|
+
for (const child of el.children) {
|
|
1538
|
+
if (child.tagName.toLowerCase() === "li") {
|
|
1539
|
+
const runs = extractInlineRuns(child, cssContext);
|
|
1540
|
+
if (runs.length > 0) {
|
|
1541
|
+
// Apply color to runs
|
|
1542
|
+
if (elementColor) {
|
|
1543
|
+
runs.forEach(run => {
|
|
1544
|
+
if (!run.color) {
|
|
1545
|
+
run.color = elementColor;
|
|
1546
|
+
}
|
|
1547
|
+
});
|
|
1548
|
+
}
|
|
1549
|
+
if (hasInlineFormatting(runs)) {
|
|
1550
|
+
items.push(runs);
|
|
1551
|
+
}
|
|
1552
|
+
else {
|
|
1553
|
+
items.push(runs.map((r) => r.text).join(""));
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
if (items.length > 0) {
|
|
1559
|
+
innerElements.push({ type: "list", ordered: tagName === "ol", items });
|
|
1560
|
+
}
|
|
1561
|
+
return;
|
|
1562
|
+
}
|
|
1563
|
+
// Handle tables
|
|
1564
|
+
if (tagName === "table") {
|
|
1565
|
+
const rows = [];
|
|
1566
|
+
for (const tr of el.querySelectorAll("tr")) {
|
|
1567
|
+
const cells = [];
|
|
1568
|
+
for (const cell of tr.querySelectorAll("td, th")) {
|
|
1569
|
+
const runs = extractInlineRuns(cell, cssContext);
|
|
1570
|
+
if (runs.length > 0) {
|
|
1571
|
+
if (hasInlineFormatting(runs)) {
|
|
1572
|
+
cells.push(runs);
|
|
1573
|
+
}
|
|
1574
|
+
else {
|
|
1575
|
+
cells.push(runs.map((r) => r.text).join(""));
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
else {
|
|
1579
|
+
cells.push("");
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
if (cells.length > 0) {
|
|
1583
|
+
rows.push(cells);
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
if (rows.length > 0) {
|
|
1587
|
+
// Extract cell padding from CSS (th, td selectors)
|
|
1588
|
+
let cellPadding;
|
|
1589
|
+
if (cssContext) {
|
|
1590
|
+
// Try td first, then th (they usually have the same padding)
|
|
1591
|
+
const tdStyle = cssContext.elementStyles.get("td");
|
|
1592
|
+
const thStyle = cssContext.elementStyles.get("th");
|
|
1593
|
+
const paddingStr = tdStyle?.padding || thStyle?.padding;
|
|
1594
|
+
if (paddingStr) {
|
|
1595
|
+
cellPadding = parseCssPaddingToTwips(paddingStr);
|
|
1596
|
+
}
|
|
1597
|
+
}
|
|
1598
|
+
innerElements.push({ type: "table", rows, cellPadding });
|
|
1599
|
+
}
|
|
1600
|
+
return;
|
|
1601
|
+
}
|
|
1602
|
+
// Detect skill item pattern: container with name + percentage
|
|
1603
|
+
// This handles divs containing "Skill Name" + "95%" on separate lines
|
|
1604
|
+
if (tagName === "div") {
|
|
1605
|
+
const skillItem = detectSkillItem(el);
|
|
1606
|
+
if (skillItem) {
|
|
1607
|
+
// Output as single line: "Skill Name: 95%"
|
|
1608
|
+
const text = `${skillItem.name}: ${skillItem.percentage}`;
|
|
1609
|
+
if (elementColor) {
|
|
1610
|
+
innerElements.push({ type: "paragraph", text, color: elementColor });
|
|
1611
|
+
}
|
|
1612
|
+
else {
|
|
1613
|
+
innerElements.push({ type: "paragraph", text });
|
|
1614
|
+
}
|
|
1615
|
+
return;
|
|
1616
|
+
}
|
|
1617
|
+
// Detect language item pattern: language name + proficiency dots
|
|
1618
|
+
const languageItem = detectLanguageItem(el, cssContext);
|
|
1619
|
+
if (languageItem) {
|
|
1620
|
+
// Create visual representation with filled and empty dots
|
|
1621
|
+
// ● = filled, ○ = empty
|
|
1622
|
+
const filledDots = "●".repeat(languageItem.filledCount);
|
|
1623
|
+
const emptyDots = "○".repeat(languageItem.totalCount - languageItem.filledCount);
|
|
1624
|
+
const text = `${languageItem.name}: ${filledDots}${emptyDots}`;
|
|
1625
|
+
if (elementColor) {
|
|
1626
|
+
innerElements.push({ type: "paragraph", text, color: elementColor });
|
|
1627
|
+
}
|
|
1628
|
+
else {
|
|
1629
|
+
innerElements.push({ type: "paragraph", text });
|
|
1630
|
+
}
|
|
1631
|
+
return;
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
// Container tags - recurse into children
|
|
1635
|
+
if (CONTAINER_TAGS.includes(tagName)) {
|
|
1636
|
+
for (const child of el.childNodes) {
|
|
1637
|
+
processInnerNode(child, elementColor);
|
|
1638
|
+
}
|
|
1639
|
+
return;
|
|
1640
|
+
}
|
|
1641
|
+
// Fallback - try to get text
|
|
1642
|
+
const text = getTextContent(el).trim();
|
|
1643
|
+
if (text && el.children.length === 0) {
|
|
1644
|
+
if (elementColor) {
|
|
1645
|
+
innerElements.push({ type: "paragraph", text, color: elementColor });
|
|
1646
|
+
}
|
|
1647
|
+
else {
|
|
1648
|
+
innerElements.push({ type: "paragraph", text });
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1651
|
+
else {
|
|
1652
|
+
for (const child of el.childNodes) {
|
|
1653
|
+
processInnerNode(child, elementColor);
|
|
1654
|
+
}
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
// Process all direct children
|
|
1658
|
+
for (const child of element.childNodes) {
|
|
1659
|
+
processInnerNode(child, inheritedColor);
|
|
1660
|
+
}
|
|
1661
|
+
return innerElements;
|
|
1662
|
+
}
|
|
1663
|
+
/**
|
|
1664
|
+
* Get color for an element by checking its classes against parsed CSS rules.
|
|
1665
|
+
*/
|
|
1666
|
+
function getColorFromClasses(element, cssContext) {
|
|
1667
|
+
const classAttr = element.getAttribute("class");
|
|
1668
|
+
if (!classAttr)
|
|
1669
|
+
return undefined;
|
|
1670
|
+
const classes = classAttr.split(/\s+/).filter(c => c.length > 0);
|
|
1671
|
+
for (const className of classes) {
|
|
1672
|
+
const color = cssContext.classColors.get(className);
|
|
1673
|
+
if (color) {
|
|
1674
|
+
return extractHexColor(color);
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
return undefined;
|
|
1678
|
+
}
|
|
1679
|
+
function isHeadingLevel(level) {
|
|
1680
|
+
return level >= 1 && level <= 6;
|
|
1681
|
+
}
|
|
1682
|
+
function parseHeadingLevel(tagName) {
|
|
1683
|
+
const match = /^h([1-6])$/.exec(tagName);
|
|
1684
|
+
if (!match || match.length < 2) {
|
|
1685
|
+
return null;
|
|
1686
|
+
}
|
|
1687
|
+
const level = parseInt(match[1], 10);
|
|
1688
|
+
if (isHeadingLevel(level)) {
|
|
1689
|
+
return level;
|
|
1690
|
+
}
|
|
1691
|
+
return null;
|
|
1692
|
+
}
|
|
1693
|
+
/**
|
|
1694
|
+
* Extract text alignment from element's CSS styles (via cssContext) or inline style.
|
|
1695
|
+
* Does NOT use class names directly - uses CSS style extraction.
|
|
1696
|
+
*/
|
|
1697
|
+
function getTextAlignment(element, cssContext) {
|
|
1698
|
+
// Get styles from CSS classes (generalized approach)
|
|
1699
|
+
const styles = getElementStyles(element, cssContext);
|
|
1700
|
+
// Check CSS text-align property
|
|
1701
|
+
if (styles.textAlign) {
|
|
1702
|
+
const align = styles.textAlign.toLowerCase();
|
|
1703
|
+
if (align === "left" || align === "center" || align === "right" || align === "justify") {
|
|
1704
|
+
return align;
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
// Also check inline style for text-align (overrides CSS)
|
|
1708
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
1709
|
+
const alignMatch = inlineStyle.match(/text-align:\s*(left|center|right|justify)/i);
|
|
1710
|
+
if (alignMatch) {
|
|
1711
|
+
return alignMatch[1].toLowerCase();
|
|
1712
|
+
}
|
|
1713
|
+
return undefined;
|
|
1714
|
+
}
|
|
1715
|
+
/**
|
|
1716
|
+
* Safely get text content from an element, returning empty string if null.
|
|
1717
|
+
*/
|
|
1718
|
+
function getTextContent(element) {
|
|
1719
|
+
// textContent can be null for certain element types (document, doctype)
|
|
1720
|
+
// Use String() to safely convert any value
|
|
1721
|
+
return String(element.textContent || "");
|
|
1722
|
+
}
|
|
1723
|
+
/**
|
|
1724
|
+
* Inline element tags that should NOT break a container into multiple paragraphs.
|
|
1725
|
+
* These elements can be combined into a single paragraph with multiple runs.
|
|
1726
|
+
*/
|
|
1727
|
+
const INLINE_TAGS = [
|
|
1728
|
+
"span", "a", "b", "strong", "i", "em", "u", "s", "strike", "del", "ins",
|
|
1729
|
+
"sub", "sup", "small", "mark", "abbr", "cite", "code", "kbd", "samp", "var",
|
|
1730
|
+
"time", "data", "q", "dfn", "ruby", "rt", "rp", "bdi", "bdo", "wbr", "br",
|
|
1731
|
+
"label", "font"
|
|
1732
|
+
];
|
|
1733
|
+
/**
|
|
1734
|
+
* Check if a container element has ONLY inline content (text nodes, spans, inline formatting).
|
|
1735
|
+
* Returns true if the container should be treated as a single paragraph.
|
|
1736
|
+
* Returns false if the container has block-level children that need separate paragraphs.
|
|
1737
|
+
*/
|
|
1738
|
+
function isInlineOnlyContainer(element) {
|
|
1739
|
+
// Check all child nodes
|
|
1740
|
+
for (const child of element.childNodes) {
|
|
1741
|
+
// Text nodes are inline - continue checking
|
|
1742
|
+
if (child.nodeType === Node.TEXT_NODE) {
|
|
1743
|
+
continue;
|
|
1744
|
+
}
|
|
1745
|
+
// Skip comments and other non-element nodes
|
|
1746
|
+
if (child.nodeType !== Node.ELEMENT_NODE || !(child instanceof Element)) {
|
|
1747
|
+
continue;
|
|
1748
|
+
}
|
|
1749
|
+
const tagName = child.tagName.toLowerCase();
|
|
1750
|
+
// If child is an inline tag, recursively check its children
|
|
1751
|
+
if (INLINE_TAGS.includes(tagName)) {
|
|
1752
|
+
// Recursively check if this inline element also has only inline content
|
|
1753
|
+
if (!isInlineOnlyContainer(child)) {
|
|
1754
|
+
return false;
|
|
1755
|
+
}
|
|
1756
|
+
continue;
|
|
1757
|
+
}
|
|
1758
|
+
// Any other element tag is considered block-level (div, p, ul, table, h1-h6, etc.)
|
|
1759
|
+
// The container has block-level children, so it should NOT be treated as inline-only
|
|
1760
|
+
return false;
|
|
1761
|
+
}
|
|
1762
|
+
// All children are inline - this container can be a single paragraph
|
|
1763
|
+
return true;
|
|
1764
|
+
}
|
|
1765
|
+
/**
|
|
1766
|
+
* Check if a tag name indicates bold formatting.
|
|
1767
|
+
*/
|
|
1768
|
+
function isBoldTag(tagName) {
|
|
1769
|
+
return tagName === "strong" || tagName === "b";
|
|
1770
|
+
}
|
|
1771
|
+
/**
|
|
1772
|
+
* Check if a tag name indicates italic formatting.
|
|
1773
|
+
*/
|
|
1774
|
+
function isItalicTag(tagName) {
|
|
1775
|
+
return tagName === "em" || tagName === "i";
|
|
1776
|
+
}
|
|
1777
|
+
/**
|
|
1778
|
+
* Extract inline runs with formatting from an element.
|
|
1779
|
+
* Walks the DOM tree and collects text with bold/italic state.
|
|
1780
|
+
* Also extracts CSS ::before and ::after pseudo-element content.
|
|
1781
|
+
*/
|
|
1782
|
+
function extractInlineRuns(element, cssContext, inheritedColor, inheritedFontFamily) {
|
|
1783
|
+
const runs = [];
|
|
1784
|
+
// GENERALIZED: Extract ::before pseudo-element content
|
|
1785
|
+
// This handles CSS rules like "dd.dish-pairings::before { content: 'Suggested Pairing: '; }"
|
|
1786
|
+
if (typeof window !== "undefined" && window.getComputedStyle) {
|
|
1787
|
+
try {
|
|
1788
|
+
const beforeStyles = window.getComputedStyle(element, "::before");
|
|
1789
|
+
const beforeContent = beforeStyles.content;
|
|
1790
|
+
// content property returns "none" or a quoted string like '"Suggested Pairing: "'
|
|
1791
|
+
if (beforeContent && beforeContent !== "none" && beforeContent !== "normal") {
|
|
1792
|
+
// Remove quotes from the content string
|
|
1793
|
+
const cleanContent = beforeContent.replace(/^["']|["']$/g, "");
|
|
1794
|
+
if (cleanContent) {
|
|
1795
|
+
// Get styling from the pseudo-element
|
|
1796
|
+
const fontWeight = beforeStyles.fontWeight;
|
|
1797
|
+
const fontStyle = beforeStyles.fontStyle;
|
|
1798
|
+
const color = beforeStyles.color ? extractHexColor(beforeStyles.color) : undefined;
|
|
1799
|
+
runs.push({
|
|
1800
|
+
text: cleanContent,
|
|
1801
|
+
bold: fontWeight === "700" || fontWeight === "bold" || fontWeight === "600" || undefined,
|
|
1802
|
+
italic: fontStyle === "italic" || undefined,
|
|
1803
|
+
color,
|
|
1804
|
+
});
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
catch {
|
|
1809
|
+
// getComputedStyle may fail in some environments
|
|
1810
|
+
}
|
|
1811
|
+
}
|
|
1812
|
+
function walkNode(node, bold, italic, color, backgroundColor, superscript, subscript, underline, fontFamily, size) {
|
|
1813
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
1814
|
+
const text = node.textContent || "";
|
|
1815
|
+
if (text) {
|
|
1816
|
+
// Normalize whitespace but preserve spaces between words
|
|
1817
|
+
const normalizedText = text.replace(/\s+/g, " ");
|
|
1818
|
+
if (normalizedText.trim() || normalizedText === " ") {
|
|
1819
|
+
runs.push({ text: normalizedText, bold, italic, color, backgroundColor, superscript, subscript, underline, fontFamily, size });
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
return;
|
|
1823
|
+
}
|
|
1824
|
+
if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
|
|
1825
|
+
return;
|
|
1826
|
+
}
|
|
1827
|
+
const tagName = node.tagName.toLowerCase();
|
|
1828
|
+
// Handle <br> tags - insert a newline
|
|
1829
|
+
if (tagName === "br") {
|
|
1830
|
+
runs.push({ text: "\n", bold, italic, color, backgroundColor, fontFamily, size });
|
|
1831
|
+
return;
|
|
1832
|
+
}
|
|
1833
|
+
let newBold = bold || isBoldTag(tagName);
|
|
1834
|
+
let newItalic = italic || isItalicTag(tagName);
|
|
1835
|
+
let newSuperscript = superscript || tagName === "sup";
|
|
1836
|
+
let newSubscript = subscript || tagName === "sub";
|
|
1837
|
+
let newUnderline = underline;
|
|
1838
|
+
let newFontFamily = fontFamily;
|
|
1839
|
+
let newSize = size;
|
|
1840
|
+
// Handle underline from <u> tag or <abbr> tag
|
|
1841
|
+
if (tagName === "u") {
|
|
1842
|
+
newUnderline = { type: "single" };
|
|
1843
|
+
}
|
|
1844
|
+
else if (tagName === "abbr") {
|
|
1845
|
+
// abbr elements typically have dotted underlines (border-bottom: dotted)
|
|
1846
|
+
// Check CSS for border-bottom color
|
|
1847
|
+
let underlineColor;
|
|
1848
|
+
if (cssContext) {
|
|
1849
|
+
const styles = getElementStyles(node, cssContext);
|
|
1850
|
+
if (styles.borderBottom) {
|
|
1851
|
+
// Extract color from border-bottom (e.g., "1px dotted #b8860b")
|
|
1852
|
+
const colorMatch = styles.borderBottom.match(/#([0-9a-fA-F]{3,6})/i);
|
|
1853
|
+
if (colorMatch) {
|
|
1854
|
+
underlineColor = colorMatch[1].toUpperCase();
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
}
|
|
1858
|
+
newUnderline = { type: "dotted", color: underlineColor };
|
|
1859
|
+
}
|
|
1860
|
+
// Extract colors from this element
|
|
1861
|
+
let newColor = color;
|
|
1862
|
+
let newBackgroundColor = backgroundColor;
|
|
1863
|
+
if (cssContext) {
|
|
1864
|
+
const styles = getElementStyles(node, cssContext);
|
|
1865
|
+
if (styles.color) {
|
|
1866
|
+
const hexColor = extractHexColor(styles.color);
|
|
1867
|
+
if (hexColor)
|
|
1868
|
+
newColor = hexColor;
|
|
1869
|
+
}
|
|
1870
|
+
if (styles.backgroundColor) {
|
|
1871
|
+
const hexBg = extractHexColor(styles.backgroundColor);
|
|
1872
|
+
if (hexBg)
|
|
1873
|
+
newBackgroundColor = hexBg;
|
|
1874
|
+
}
|
|
1875
|
+
// Check for font-weight: bold/700/600 from CSS classes
|
|
1876
|
+
if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
|
|
1877
|
+
newBold = true;
|
|
1878
|
+
}
|
|
1879
|
+
// Check for font-style: italic from CSS classes
|
|
1880
|
+
if (styles.fontStyle === "italic") {
|
|
1881
|
+
newItalic = true;
|
|
1882
|
+
}
|
|
1883
|
+
// Check for font-family from CSS classes
|
|
1884
|
+
if (styles.fontFamily) {
|
|
1885
|
+
newFontFamily = styles.fontFamily;
|
|
1886
|
+
}
|
|
1887
|
+
// GENERALIZED: Check for font-size from CSS classes
|
|
1888
|
+
if (styles.fontSize) {
|
|
1889
|
+
const halfPoints = parseFontSizeToHalfPoints(styles.fontSize);
|
|
1890
|
+
if (halfPoints)
|
|
1891
|
+
newSize = halfPoints;
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1894
|
+
// Also check inline styles
|
|
1895
|
+
const inlineStyle = node.getAttribute("style") || "";
|
|
1896
|
+
if (inlineStyle) {
|
|
1897
|
+
const colorMatch = inlineStyle.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
|
|
1898
|
+
if (colorMatch) {
|
|
1899
|
+
const hexColor = extractHexColor(colorMatch[1]);
|
|
1900
|
+
if (hexColor)
|
|
1901
|
+
newColor = hexColor;
|
|
1902
|
+
}
|
|
1903
|
+
const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
1904
|
+
if (bgMatch) {
|
|
1905
|
+
const hexBg = extractHexColor(bgMatch[1]);
|
|
1906
|
+
if (hexBg)
|
|
1907
|
+
newBackgroundColor = hexBg;
|
|
1908
|
+
}
|
|
1909
|
+
// Check inline font-weight
|
|
1910
|
+
const weightMatch = inlineStyle.match(/font-weight\s*:\s*([^;]+)/i);
|
|
1911
|
+
if (weightMatch) {
|
|
1912
|
+
const weight = weightMatch[1].trim();
|
|
1913
|
+
if (weight === "700" || weight === "bold" || weight === "600") {
|
|
1914
|
+
newBold = true;
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1917
|
+
// Check inline font-style
|
|
1918
|
+
const styleMatch = inlineStyle.match(/font-style\s*:\s*([^;]+)/i);
|
|
1919
|
+
if (styleMatch) {
|
|
1920
|
+
const style = styleMatch[1].trim();
|
|
1921
|
+
if (style === "italic") {
|
|
1922
|
+
newItalic = true;
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
// Check inline font-family
|
|
1926
|
+
const fontFamilyMatch = inlineStyle.match(/font-family\s*:\s*([^;]+)/i);
|
|
1927
|
+
if (fontFamilyMatch) {
|
|
1928
|
+
const primaryFont = extractPrimaryFont(fontFamilyMatch[1].trim());
|
|
1929
|
+
if (primaryFont) {
|
|
1930
|
+
newFontFamily = primaryFont;
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
// GENERALIZED: Check inline font-size
|
|
1934
|
+
const fontSizeMatch = inlineStyle.match(/font-size\s*:\s*([^;]+)/i);
|
|
1935
|
+
if (fontSizeMatch) {
|
|
1936
|
+
const halfPoints = parseFontSizeToHalfPoints(fontSizeMatch[1].trim());
|
|
1937
|
+
if (halfPoints)
|
|
1938
|
+
newSize = halfPoints;
|
|
1939
|
+
}
|
|
1940
|
+
}
|
|
1941
|
+
for (const child of node.childNodes) {
|
|
1942
|
+
walkNode(child, newBold, newItalic, newColor, newBackgroundColor, newSuperscript, newSubscript, newUnderline, newFontFamily, newSize);
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
// GENERALIZED: Extract styles from the element itself (e.g., <td class="positive">)
|
|
1946
|
+
// These styles should be inherited by all text children
|
|
1947
|
+
let elementBold = false;
|
|
1948
|
+
let elementItalic = false;
|
|
1949
|
+
let elementColor = inheritedColor;
|
|
1950
|
+
let elementBackgroundColor;
|
|
1951
|
+
let elementFontFamily = inheritedFontFamily;
|
|
1952
|
+
let elementSize;
|
|
1953
|
+
if (cssContext) {
|
|
1954
|
+
const styles = getElementStyles(element, cssContext);
|
|
1955
|
+
if (styles.color) {
|
|
1956
|
+
const hexColor = extractHexColor(styles.color);
|
|
1957
|
+
if (hexColor)
|
|
1958
|
+
elementColor = hexColor;
|
|
1959
|
+
}
|
|
1960
|
+
if (styles.backgroundColor) {
|
|
1961
|
+
const hexBg = extractHexColor(styles.backgroundColor);
|
|
1962
|
+
if (hexBg)
|
|
1963
|
+
elementBackgroundColor = hexBg;
|
|
1964
|
+
}
|
|
1965
|
+
if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
|
|
1966
|
+
elementBold = true;
|
|
1967
|
+
}
|
|
1968
|
+
if (styles.fontStyle === "italic") {
|
|
1969
|
+
elementItalic = true;
|
|
1970
|
+
}
|
|
1971
|
+
if (styles.fontFamily) {
|
|
1972
|
+
elementFontFamily = styles.fontFamily;
|
|
1973
|
+
}
|
|
1974
|
+
// GENERALIZED: Extract font-size from element's CSS classes
|
|
1975
|
+
if (styles.fontSize) {
|
|
1976
|
+
const halfPoints = parseFontSizeToHalfPoints(styles.fontSize);
|
|
1977
|
+
if (halfPoints)
|
|
1978
|
+
elementSize = halfPoints;
|
|
1979
|
+
}
|
|
1980
|
+
}
|
|
1981
|
+
for (const child of element.childNodes) {
|
|
1982
|
+
walkNode(child, elementBold, elementItalic, elementColor, elementBackgroundColor, false, false, undefined, elementFontFamily, elementSize);
|
|
1983
|
+
}
|
|
1984
|
+
// Merge adjacent runs with same formatting and normalize
|
|
1985
|
+
return mergeAndNormalizeRuns(runs);
|
|
1986
|
+
}
|
|
1987
|
+
/**
|
|
1988
|
+
* Merge adjacent runs with same formatting and normalize whitespace.
|
|
1989
|
+
*/
|
|
1990
|
+
function mergeAndNormalizeRuns(runs) {
|
|
1991
|
+
if (runs.length === 0)
|
|
1992
|
+
return [];
|
|
1993
|
+
const merged = [];
|
|
1994
|
+
// Helper to compare underline properties
|
|
1995
|
+
const underlineEqual = (a, b) => {
|
|
1996
|
+
if (!a && !b)
|
|
1997
|
+
return true;
|
|
1998
|
+
if (!a || !b)
|
|
1999
|
+
return false;
|
|
2000
|
+
return a.type === b.type && a.color === b.color;
|
|
2001
|
+
};
|
|
2002
|
+
for (const run of runs) {
|
|
2003
|
+
const last = merged[merged.length - 1];
|
|
2004
|
+
// Only merge if all formatting properties match
|
|
2005
|
+
if (last &&
|
|
2006
|
+
last.bold === run.bold &&
|
|
2007
|
+
last.italic === run.italic &&
|
|
2008
|
+
last.color === run.color &&
|
|
2009
|
+
last.backgroundColor === run.backgroundColor &&
|
|
2010
|
+
last.superscript === run.superscript &&
|
|
2011
|
+
last.subscript === run.subscript &&
|
|
2012
|
+
last.fontFamily === run.fontFamily &&
|
|
2013
|
+
last.size === run.size &&
|
|
2014
|
+
underlineEqual(last.underline, run.underline)) {
|
|
2015
|
+
last.text += run.text;
|
|
2016
|
+
}
|
|
2017
|
+
else {
|
|
2018
|
+
merged.push({ ...run });
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
2021
|
+
// Normalize: trim leading whitespace from first run, trailing from last
|
|
2022
|
+
if (merged.length > 0) {
|
|
2023
|
+
merged[0].text = merged[0].text.trimStart();
|
|
2024
|
+
merged[merged.length - 1].text = merged[merged.length - 1].text.trimEnd();
|
|
2025
|
+
}
|
|
2026
|
+
// Filter out empty runs
|
|
2027
|
+
return merged.filter((r) => r.text.length > 0);
|
|
2028
|
+
}
|
|
2029
|
+
/**
|
|
2030
|
+
* Check if any inline run has formatting (bold, italic, color, backgroundColor, fontFamily, size, etc.).
|
|
2031
|
+
*/
|
|
2032
|
+
function hasInlineFormatting(runs) {
|
|
2033
|
+
return runs.some((r) => r.bold || r.italic || r.color || r.backgroundColor || r.fontFamily || r.size || r.superscript || r.subscript || r.underline);
|
|
2034
|
+
}
|
|
2035
|
+
/**
|
|
2036
|
+
* Block-level HTML elements that indicate complex content inside a list item.
|
|
2037
|
+
* If a <li> contains any of these, it should be processed recursively,
|
|
2038
|
+
* not just as inline text.
|
|
2039
|
+
*/
|
|
2040
|
+
const BLOCK_LEVEL_TAGS = new Set([
|
|
2041
|
+
"div", "p", "table", "ul", "ol", "blockquote", "pre", "figure",
|
|
2042
|
+
"svg", "h1", "h2", "h3", "h4", "h5", "h6", "section", "article",
|
|
2043
|
+
"aside", "nav", "header", "footer", "main", "form", "fieldset",
|
|
2044
|
+
"dl", "dd", "dt", "address", "hr", "canvas", "video", "audio",
|
|
2045
|
+
"picture", "iframe", "object", "embed", "details", "summary",
|
|
2046
|
+
]);
|
|
2047
|
+
/**
|
|
2048
|
+
* Check if an element contains any block-level children.
|
|
2049
|
+
* Used to detect complex list items that need recursive processing.
|
|
2050
|
+
*/
|
|
2051
|
+
function hasBlockLevelChildren(element) {
|
|
2052
|
+
for (const child of element.children) {
|
|
2053
|
+
const tagName = child.tagName.toLowerCase();
|
|
2054
|
+
if (BLOCK_LEVEL_TAGS.has(tagName)) {
|
|
2055
|
+
return true;
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
return false;
|
|
2059
|
+
}
|
|
2060
|
+
/**
|
|
2061
|
+
* Check if a list item contains nested lists (ul or ol).
|
|
2062
|
+
*/
|
|
2063
|
+
function hasNestedLists(element) {
|
|
2064
|
+
return element.querySelector("ul, ol") !== null;
|
|
2065
|
+
}
|
|
2066
|
+
/**
|
|
2067
|
+
* Recursively extract list items from a ul/ol element, flattening nested lists
|
|
2068
|
+
* while preserving level information for proper indentation in DOCX.
|
|
2069
|
+
*
|
|
2070
|
+
* @param listElement - The ul or ol element to extract items from
|
|
2071
|
+
* @param cssContext - CSS context for style extraction
|
|
2072
|
+
* @param level - Current nesting level (0 = top level)
|
|
2073
|
+
* @param parentOrdered - Whether the parent list is ordered
|
|
2074
|
+
* @returns Array of ListItem objects with level information
|
|
2075
|
+
*/
|
|
2076
|
+
function extractNestedListItems(listElement, cssContext, level = 0, parentOrdered) {
|
|
2077
|
+
const items = [];
|
|
2078
|
+
const tagName = listElement.tagName.toLowerCase();
|
|
2079
|
+
const isOrdered = parentOrdered !== undefined ? parentOrdered : tagName === "ol";
|
|
2080
|
+
for (const child of listElement.children) {
|
|
2081
|
+
if (child.tagName.toLowerCase() !== "li")
|
|
2082
|
+
continue;
|
|
2083
|
+
// Extract inline content from this li (excluding nested lists)
|
|
2084
|
+
const inlineNodes = [];
|
|
2085
|
+
const nestedLists = [];
|
|
2086
|
+
for (const node of child.childNodes) {
|
|
2087
|
+
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
2088
|
+
const childTagName = node.tagName.toLowerCase();
|
|
2089
|
+
if (childTagName === "ul" || childTagName === "ol") {
|
|
2090
|
+
nestedLists.push(node);
|
|
2091
|
+
}
|
|
2092
|
+
else {
|
|
2093
|
+
inlineNodes.push(node);
|
|
2094
|
+
}
|
|
2095
|
+
}
|
|
2096
|
+
else if (node.nodeType === Node.TEXT_NODE) {
|
|
2097
|
+
inlineNodes.push(node);
|
|
2098
|
+
}
|
|
2099
|
+
}
|
|
2100
|
+
// Create a temporary container to extract inline runs from inline nodes only
|
|
2101
|
+
// We need to get text content excluding nested lists
|
|
2102
|
+
if (inlineNodes.length > 0) {
|
|
2103
|
+
// Build inline content by extracting runs from the li, but we need to
|
|
2104
|
+
// handle it carefully to exclude nested list content
|
|
2105
|
+
const runs = extractInlineRunsFromNodes(inlineNodes, cssContext);
|
|
2106
|
+
if (runs.length > 0 && runs.some(r => r.text.trim())) {
|
|
2107
|
+
const content = hasInlineFormatting(runs) ? runs : runs.map(r => r.text).join("");
|
|
2108
|
+
items.push({
|
|
2109
|
+
content,
|
|
2110
|
+
level,
|
|
2111
|
+
ordered: isOrdered,
|
|
2112
|
+
});
|
|
2113
|
+
}
|
|
2114
|
+
}
|
|
2115
|
+
// Process nested lists recursively
|
|
2116
|
+
for (const nestedList of nestedLists) {
|
|
2117
|
+
const nestedTagName = nestedList.tagName.toLowerCase();
|
|
2118
|
+
const nestedOrdered = nestedTagName === "ol";
|
|
2119
|
+
const nestedItems = extractNestedListItems(nestedList, cssContext, level + 1, nestedOrdered);
|
|
2120
|
+
items.push(...nestedItems);
|
|
2121
|
+
}
|
|
2122
|
+
}
|
|
2123
|
+
return items;
|
|
2124
|
+
}
|
|
2125
|
+
/**
|
|
2126
|
+
* Extract inline runs from a collection of nodes (excluding nested lists).
|
|
2127
|
+
* This is similar to extractInlineRuns but works on a subset of nodes.
|
|
2128
|
+
*/
|
|
2129
|
+
function extractInlineRunsFromNodes(nodes, cssContext) {
|
|
2130
|
+
const runs = [];
|
|
2131
|
+
function walkNode(node, bold, italic, color, backgroundColor, superscript, subscript) {
|
|
2132
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
2133
|
+
const text = node.textContent || "";
|
|
2134
|
+
if (text) {
|
|
2135
|
+
runs.push({
|
|
2136
|
+
text,
|
|
2137
|
+
bold: bold || undefined,
|
|
2138
|
+
italic: italic || undefined,
|
|
2139
|
+
color,
|
|
2140
|
+
backgroundColor,
|
|
2141
|
+
superscript: superscript || undefined,
|
|
2142
|
+
subscript: subscript || undefined,
|
|
2143
|
+
});
|
|
2144
|
+
}
|
|
2145
|
+
return;
|
|
2146
|
+
}
|
|
2147
|
+
if (node.nodeType !== Node.ELEMENT_NODE)
|
|
2148
|
+
return;
|
|
2149
|
+
const element = node;
|
|
2150
|
+
const tagName = element.tagName.toLowerCase();
|
|
2151
|
+
// Skip nested lists entirely - they're handled separately
|
|
2152
|
+
if (tagName === "ul" || tagName === "ol")
|
|
2153
|
+
return;
|
|
2154
|
+
// Determine formatting from tag
|
|
2155
|
+
let newBold = bold || tagName === "strong" || tagName === "b";
|
|
2156
|
+
let newItalic = italic || tagName === "em" || tagName === "i";
|
|
2157
|
+
let newSuperscript = superscript || tagName === "sup";
|
|
2158
|
+
let newSubscript = subscript || tagName === "sub";
|
|
2159
|
+
let newColor = color;
|
|
2160
|
+
let newBackgroundColor = backgroundColor;
|
|
2161
|
+
// Get styles from CSS context
|
|
2162
|
+
const styles = getElementStyles(element, cssContext);
|
|
2163
|
+
if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
|
|
2164
|
+
newBold = true;
|
|
2165
|
+
}
|
|
2166
|
+
if (styles.fontStyle === "italic") {
|
|
2167
|
+
newItalic = true;
|
|
2168
|
+
}
|
|
2169
|
+
if (styles.color) {
|
|
2170
|
+
const extracted = extractHexColor(styles.color);
|
|
2171
|
+
if (extracted)
|
|
2172
|
+
newColor = extracted;
|
|
2173
|
+
}
|
|
2174
|
+
if (styles.backgroundColor) {
|
|
2175
|
+
const extracted = extractHexColor(styles.backgroundColor);
|
|
2176
|
+
if (extracted)
|
|
2177
|
+
newBackgroundColor = extracted;
|
|
2178
|
+
}
|
|
2179
|
+
// Recurse into children
|
|
2180
|
+
for (const child of node.childNodes) {
|
|
2181
|
+
walkNode(child, newBold, newItalic, newColor, newBackgroundColor, newSuperscript, newSubscript);
|
|
2182
|
+
}
|
|
2183
|
+
}
|
|
2184
|
+
for (const node of nodes) {
|
|
2185
|
+
walkNode(node, false, false);
|
|
2186
|
+
}
|
|
2187
|
+
return mergeAndNormalizeRuns(runs);
|
|
2188
|
+
}
|
|
2189
|
+
/**
|
|
2190
|
+
* Check if an element is a blockquote or callout based on visual styling.
|
|
2191
|
+
* Detects elements with:
|
|
2192
|
+
* - Semantic <blockquote> tag
|
|
2193
|
+
* - OR a div/section/header with both background color AND left border (visual callout pattern)
|
|
2194
|
+
* - OR a div/section/header with background/gradient AND padding (styled content box like CTAs)
|
|
2195
|
+
* - OR a paragraph with left border (intro sections with accent border)
|
|
2196
|
+
*/
|
|
2197
|
+
function isBlockquoteOrCallout(element, cssContext) {
|
|
2198
|
+
const tagName = element.tagName.toLowerCase();
|
|
2199
|
+
// Semantic blockquote element
|
|
2200
|
+
if (tagName === "blockquote") {
|
|
2201
|
+
return true;
|
|
2202
|
+
}
|
|
2203
|
+
// Get styles for all container-type elements (div, section, article, header, p)
|
|
2204
|
+
const styles = getElementStyles(element, cssContext);
|
|
2205
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
2206
|
+
// Helper to check for border-left
|
|
2207
|
+
const hasBorderLeft = inlineStyle.includes("border-left") ||
|
|
2208
|
+
styles.border?.includes("solid") ||
|
|
2209
|
+
!!styles.borderColor ||
|
|
2210
|
+
!!styles.borderLeft;
|
|
2211
|
+
// For paragraphs with border-left, treat as blockquote (intro sections, etc.)
|
|
2212
|
+
// This handles patterns like: <p class="intro-section"> with border-left: 4px solid ...
|
|
2213
|
+
if (tagName === "p" && hasBorderLeft) {
|
|
2214
|
+
return true;
|
|
2215
|
+
}
|
|
2216
|
+
// For divs, sections, articles, and headers - detect based on visual styling (NOT class names)
|
|
2217
|
+
if (tagName === "div" || tagName === "section" || tagName === "article" || tagName === "header") {
|
|
2218
|
+
// GENERALIZED: Don't treat as blockquotes if they contain figure > img elements
|
|
2219
|
+
// Figures with external images need special handling via export.ts which fetches them
|
|
2220
|
+
// and they can't be processed inside parseBlockquoteContent
|
|
2221
|
+
const figuresWithImages = element.querySelectorAll("figure img, figure picture");
|
|
2222
|
+
if (figuresWithImages.length > 0) {
|
|
2223
|
+
return false;
|
|
2224
|
+
}
|
|
2225
|
+
// Also check CSS border property
|
|
2226
|
+
const cssBorder = styles.border || "";
|
|
2227
|
+
const hasCssBorder = cssBorder.includes("solid") || cssBorder.includes("px");
|
|
2228
|
+
// Check for background color
|
|
2229
|
+
const hasBackground = !!styles.backgroundColor &&
|
|
2230
|
+
styles.backgroundColor !== "transparent" &&
|
|
2231
|
+
styles.backgroundColor !== "inherit";
|
|
2232
|
+
// If it has both background AND border, it's likely a callout/blockquote styled box
|
|
2233
|
+
if (hasBackground && (hasBorderLeft || hasCssBorder)) {
|
|
2234
|
+
return true;
|
|
2235
|
+
}
|
|
2236
|
+
// Check for styled content boxes (like CTAs, key-takeaways)
|
|
2237
|
+
// These have: background + padding + may have border-radius
|
|
2238
|
+
// But NOT border (which distinguishes them from callouts)
|
|
2239
|
+
if (hasBackground) {
|
|
2240
|
+
// Check for padding (indicates a styled content box, not just a wrapper)
|
|
2241
|
+
const hasPadding = !!styles.padding ||
|
|
2242
|
+
inlineStyle.includes("padding");
|
|
2243
|
+
// Check if element has meaningful content (text nodes or block elements)
|
|
2244
|
+
// This is more general - any non-empty div with styled background and padding
|
|
2245
|
+
const hasContent = element.textContent?.trim().length &&
|
|
2246
|
+
element.textContent.trim().length > 0;
|
|
2247
|
+
// A styled box with background + padding + content = treat as blockquote
|
|
2248
|
+
if (hasPadding && hasContent) {
|
|
2249
|
+
return true;
|
|
2250
|
+
}
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
return false;
|
|
2254
|
+
}
|
|
2255
|
+
/**
|
|
2256
|
+
* Extract color from CSS color value (hex or rgb).
|
|
2257
|
+
*/
|
|
2258
|
+
/**
|
|
2259
|
+
* Check if an element has a border-bottom style that should become a horizontal rule.
|
|
2260
|
+
* Returns the border color if found, undefined otherwise.
|
|
2261
|
+
*/
|
|
2262
|
+
function extractBorderBottomColor(element, cssContext) {
|
|
2263
|
+
// Check inline style for border-bottom
|
|
2264
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
2265
|
+
const borderMatch = inlineStyle.match(/border-bottom:\s*([^;]+)/i);
|
|
2266
|
+
if (borderMatch) {
|
|
2267
|
+
const borderValue = borderMatch[1];
|
|
2268
|
+
// Parse "1px solid #e5e7eb" or similar
|
|
2269
|
+
const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
2270
|
+
if (colorMatch) {
|
|
2271
|
+
let hex = colorMatch[1];
|
|
2272
|
+
if (hex.length === 3) {
|
|
2273
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
2274
|
+
}
|
|
2275
|
+
return hex.toUpperCase();
|
|
2276
|
+
}
|
|
2277
|
+
// Check for CSS variable in border
|
|
2278
|
+
const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
|
|
2279
|
+
if (varMatch) {
|
|
2280
|
+
const varValue = cssContext.variables.get(varMatch[1]);
|
|
2281
|
+
if (varValue) {
|
|
2282
|
+
const hex = extractHexColor(varValue);
|
|
2283
|
+
if (hex)
|
|
2284
|
+
return hex;
|
|
2285
|
+
}
|
|
2286
|
+
}
|
|
2287
|
+
}
|
|
2288
|
+
// Check CSS styles for border-bottom (generalized approach)
|
|
2289
|
+
const styles = getElementStyles(element, cssContext);
|
|
2290
|
+
// Check border-bottom first (most specific for horizontal rules)
|
|
2291
|
+
if (styles.borderBottom) {
|
|
2292
|
+
const borderValue = styles.borderBottom;
|
|
2293
|
+
const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
2294
|
+
if (colorMatch) {
|
|
2295
|
+
let hex = colorMatch[1];
|
|
2296
|
+
if (hex.length === 3) {
|
|
2297
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
2298
|
+
}
|
|
2299
|
+
return hex.toUpperCase();
|
|
2300
|
+
}
|
|
2301
|
+
// Check for CSS variable reference in border-bottom
|
|
2302
|
+
const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
|
|
2303
|
+
if (varMatch) {
|
|
2304
|
+
const varValue = cssContext.variables.get(varMatch[1]);
|
|
2305
|
+
if (varValue) {
|
|
2306
|
+
const hex = extractHexColor(varValue);
|
|
2307
|
+
if (hex)
|
|
2308
|
+
return hex;
|
|
2309
|
+
}
|
|
2310
|
+
}
|
|
2311
|
+
}
|
|
2312
|
+
// Check if this element has a border-bottom defined in CSS
|
|
2313
|
+
// The border property might be a shorthand like "1px solid #e5e7eb"
|
|
2314
|
+
if (styles.border) {
|
|
2315
|
+
const borderValue = styles.border;
|
|
2316
|
+
const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
2317
|
+
if (colorMatch) {
|
|
2318
|
+
let hex = colorMatch[1];
|
|
2319
|
+
if (hex.length === 3) {
|
|
2320
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
2321
|
+
}
|
|
2322
|
+
return hex.toUpperCase();
|
|
2323
|
+
}
|
|
2324
|
+
// Check for CSS variable reference
|
|
2325
|
+
const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
|
|
2326
|
+
if (varMatch) {
|
|
2327
|
+
const varValue = cssContext.variables.get(varMatch[1]);
|
|
2328
|
+
if (varValue) {
|
|
2329
|
+
const hex = extractHexColor(varValue);
|
|
2330
|
+
if (hex)
|
|
2331
|
+
return hex;
|
|
2332
|
+
}
|
|
2333
|
+
}
|
|
2334
|
+
}
|
|
2335
|
+
// Also check borderColor directly
|
|
2336
|
+
if (styles.borderColor) {
|
|
2337
|
+
const hex = extractHexColor(styles.borderColor);
|
|
2338
|
+
if (hex)
|
|
2339
|
+
return hex;
|
|
2340
|
+
}
|
|
2341
|
+
return undefined;
|
|
2342
|
+
}
|
|
2343
|
+
/**
|
|
2344
|
+
* Extract color from a border-top CSS property on an element.
|
|
2345
|
+
* Checks inline styles and CSS classes, and resolves CSS variables.
|
|
2346
|
+
*/
|
|
2347
|
+
function extractBorderTopColor(element, cssContext) {
|
|
2348
|
+
// First, check CSS class styles (e.g., .menu-footer { border-top: 2px solid #b8860b; })
|
|
2349
|
+
const styles = getElementStyles(element, cssContext);
|
|
2350
|
+
if (styles.borderTop) {
|
|
2351
|
+
const borderValue = styles.borderTop;
|
|
2352
|
+
// Parse "2px solid #b8860b" or similar
|
|
2353
|
+
const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
2354
|
+
if (colorMatch) {
|
|
2355
|
+
let hex = colorMatch[1];
|
|
2356
|
+
if (hex.length === 3) {
|
|
2357
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
2358
|
+
}
|
|
2359
|
+
return hex.toUpperCase();
|
|
2360
|
+
}
|
|
2361
|
+
// Check for CSS variable in border that was already resolved
|
|
2362
|
+
// (resolveValue in parseCssContext should have resolved it)
|
|
2363
|
+
const resolved = extractHexColor(borderValue);
|
|
2364
|
+
if (resolved)
|
|
2365
|
+
return resolved;
|
|
2366
|
+
}
|
|
2367
|
+
// Then check inline style for border-top (higher specificity)
|
|
2368
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
2369
|
+
const borderMatch = inlineStyle.match(/border-top:\s*([^;]+)/i);
|
|
2370
|
+
if (borderMatch) {
|
|
2371
|
+
const borderValue = borderMatch[1];
|
|
2372
|
+
// Parse "1px solid #e5e7eb" or similar
|
|
2373
|
+
const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
2374
|
+
if (colorMatch) {
|
|
2375
|
+
let hex = colorMatch[1];
|
|
2376
|
+
if (hex.length === 3) {
|
|
2377
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
2378
|
+
}
|
|
2379
|
+
return hex.toUpperCase();
|
|
2380
|
+
}
|
|
2381
|
+
// Check for CSS variable in border
|
|
2382
|
+
const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
|
|
2383
|
+
if (varMatch) {
|
|
2384
|
+
const varValue = cssContext.variables.get(varMatch[1]);
|
|
2385
|
+
if (varValue) {
|
|
2386
|
+
const hex = extractHexColor(varValue);
|
|
2387
|
+
if (hex)
|
|
2388
|
+
return hex;
|
|
2389
|
+
}
|
|
2390
|
+
}
|
|
2391
|
+
}
|
|
2392
|
+
return undefined;
|
|
2393
|
+
}
|
|
2394
|
+
/**
|
|
2395
|
+
* Parse a CSS length value to twips.
|
|
2396
|
+
* Supports: px, rem, em, pt
|
|
2397
|
+
* Returns undefined if parsing fails.
|
|
2398
|
+
*
|
|
2399
|
+
* Conversions:
|
|
2400
|
+
* - 1 inch = 1440 twips
|
|
2401
|
+
* - 1 point = 20 twips
|
|
2402
|
+
* - 1 rem = 16px (assumed base)
|
|
2403
|
+
* - 1px ≈ 0.75pt ≈ 15 twips
|
|
2404
|
+
*/
|
|
2405
|
+
function parseCssLengthToTwips(value) {
|
|
2406
|
+
const match = value.trim().match(/^(-?[0-9.]+)(px|rem|em|pt)?$/);
|
|
2407
|
+
if (!match)
|
|
2408
|
+
return undefined;
|
|
2409
|
+
const num = parseFloat(match[1]);
|
|
2410
|
+
const unit = match[2] || 'px';
|
|
2411
|
+
switch (unit) {
|
|
2412
|
+
case 'px':
|
|
2413
|
+
return Math.round(num * 15); // 1px ≈ 15 twips
|
|
2414
|
+
case 'rem':
|
|
2415
|
+
case 'em':
|
|
2416
|
+
return Math.round(num * 16 * 15); // 1rem = 16px = 240 twips
|
|
2417
|
+
case 'pt':
|
|
2418
|
+
return Math.round(num * 20); // 1pt = 20 twips
|
|
2419
|
+
default:
|
|
2420
|
+
return undefined;
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
/**
|
|
2424
|
+
* Parse CSS padding shorthand into individual values in twips.
|
|
2425
|
+
* Supports: "value", "vertical horizontal", "top horizontal bottom", "top right bottom left"
|
|
2426
|
+
* Returns object with top, right, bottom, left in twips.
|
|
2427
|
+
*/
|
|
2428
|
+
function parseCssPaddingToTwips(padding) {
|
|
2429
|
+
const parts = padding.trim().split(/\s+/);
|
|
2430
|
+
if (parts.length === 0)
|
|
2431
|
+
return undefined;
|
|
2432
|
+
const values = parts.map(p => parseCssLengthToTwips(p)).filter(v => v !== undefined);
|
|
2433
|
+
if (values.length === 0)
|
|
2434
|
+
return undefined;
|
|
2435
|
+
// CSS shorthand rules:
|
|
2436
|
+
// 1 value: all sides
|
|
2437
|
+
// 2 values: vertical horizontal
|
|
2438
|
+
// 3 values: top horizontal bottom
|
|
2439
|
+
// 4 values: top right bottom left
|
|
2440
|
+
switch (values.length) {
|
|
2441
|
+
case 1:
|
|
2442
|
+
return { top: values[0], right: values[0], bottom: values[0], left: values[0] };
|
|
2443
|
+
case 2:
|
|
2444
|
+
return { top: values[0], right: values[1], bottom: values[0], left: values[1] };
|
|
2445
|
+
case 3:
|
|
2446
|
+
return { top: values[0], right: values[1], bottom: values[2], left: values[1] };
|
|
2447
|
+
case 4:
|
|
2448
|
+
return { top: values[0], right: values[1], bottom: values[2], left: values[3] };
|
|
2449
|
+
default:
|
|
2450
|
+
return undefined;
|
|
2451
|
+
}
|
|
2452
|
+
}
|
|
2453
|
+
/**
|
|
2454
|
+
* GENERALIZED: Convert CSS font-size value to DOCX half-points.
|
|
2455
|
+
* CSS font-size values: "0.875rem", "14px", "12pt", "1.5rem"
|
|
2456
|
+
* DOCX size is in half-points (1pt = 2 half-points)
|
|
2457
|
+
* Default browser font-size is 16px = 12pt = 24 half-points
|
|
2458
|
+
*/
|
|
2459
|
+
function parseFontSizeToHalfPoints(fontSize) {
|
|
2460
|
+
if (!fontSize)
|
|
2461
|
+
return undefined;
|
|
2462
|
+
const fs = fontSize.trim();
|
|
2463
|
+
if (fs.endsWith("rem")) {
|
|
2464
|
+
// rem relative to 16px base, convert to points then half-points
|
|
2465
|
+
// 1rem = 16px = 12pt = 24 half-points
|
|
2466
|
+
const remValue = parseFloat(fs);
|
|
2467
|
+
if (!isNaN(remValue)) {
|
|
2468
|
+
return Math.round(remValue * 12 * 2);
|
|
2469
|
+
}
|
|
2470
|
+
}
|
|
2471
|
+
else if (fs.endsWith("em")) {
|
|
2472
|
+
// em relative to parent, assume 16px base
|
|
2473
|
+
const emValue = parseFloat(fs);
|
|
2474
|
+
if (!isNaN(emValue)) {
|
|
2475
|
+
return Math.round(emValue * 12 * 2);
|
|
2476
|
+
}
|
|
2477
|
+
}
|
|
2478
|
+
else if (fs.endsWith("px")) {
|
|
2479
|
+
// px to points: 1px = 0.75pt, then to half-points
|
|
2480
|
+
const pxValue = parseFloat(fs);
|
|
2481
|
+
if (!isNaN(pxValue)) {
|
|
2482
|
+
return Math.round(pxValue * 0.75 * 2);
|
|
2483
|
+
}
|
|
2484
|
+
}
|
|
2485
|
+
else if (fs.endsWith("pt")) {
|
|
2486
|
+
const ptValue = parseFloat(fs);
|
|
2487
|
+
if (!isNaN(ptValue)) {
|
|
2488
|
+
return Math.round(ptValue * 2);
|
|
2489
|
+
}
|
|
2490
|
+
}
|
|
2491
|
+
return undefined;
|
|
2492
|
+
}
|
|
2493
|
+
/**
|
|
2494
|
+
* GENERALIZED: Convert CSS margin/spacing value to DOCX twips.
|
|
2495
|
+
* CSS margin values: "0.5rem", "8px", "6pt", "1rem"
|
|
2496
|
+
* DOCX spacing is in twips (1 inch = 1440 twips, 1pt = 20 twips)
|
|
2497
|
+
* Default browser font-size is 16px = 12pt
|
|
2498
|
+
* 1rem = 16px = 12pt = 240 twips
|
|
2499
|
+
*/
|
|
2500
|
+
function parseMarginToTwips(margin) {
|
|
2501
|
+
if (!margin)
|
|
2502
|
+
return undefined;
|
|
2503
|
+
const m = margin.trim();
|
|
2504
|
+
if (m.endsWith("rem")) {
|
|
2505
|
+
// rem relative to 16px base
|
|
2506
|
+
// 1rem = 16px = 12pt = 240 twips
|
|
2507
|
+
const remValue = parseFloat(m);
|
|
2508
|
+
if (!isNaN(remValue)) {
|
|
2509
|
+
return Math.round(remValue * 240);
|
|
2510
|
+
}
|
|
2511
|
+
}
|
|
2512
|
+
else if (m.endsWith("em")) {
|
|
2513
|
+
// em relative to parent, assume 16px base
|
|
2514
|
+
const emValue = parseFloat(m);
|
|
2515
|
+
if (!isNaN(emValue)) {
|
|
2516
|
+
return Math.round(emValue * 240);
|
|
2517
|
+
}
|
|
2518
|
+
}
|
|
2519
|
+
else if (m.endsWith("px")) {
|
|
2520
|
+
// px to twips: 1px = 15 twips (1 inch = 96px = 1440 twips)
|
|
2521
|
+
const pxValue = parseFloat(m);
|
|
2522
|
+
if (!isNaN(pxValue)) {
|
|
2523
|
+
return Math.round(pxValue * 15);
|
|
2524
|
+
}
|
|
2525
|
+
}
|
|
2526
|
+
else if (m.endsWith("pt")) {
|
|
2527
|
+
// pt to twips: 1pt = 20 twips
|
|
2528
|
+
const ptValue = parseFloat(m);
|
|
2529
|
+
if (!isNaN(ptValue)) {
|
|
2530
|
+
return Math.round(ptValue * 20);
|
|
2531
|
+
}
|
|
2532
|
+
}
|
|
2533
|
+
return undefined;
|
|
2534
|
+
}
|
|
2535
|
+
/**
|
|
2536
|
+
* GENERALIZED: Convert CSS line-height value to DOCX line spacing.
|
|
2537
|
+
* CSS line-height: "1.3", "1.7", "24px", "1.5em", "normal"
|
|
2538
|
+
* DOCX line spacing with LineRuleType.AUTO uses "240ths of a line"
|
|
2539
|
+
* where 240 = single spacing (1.0), 360 = 1.5, 480 = double (2.0)
|
|
2540
|
+
* Formula: lineHeight * 240 = DOCX line value
|
|
2541
|
+
*
|
|
2542
|
+
* @param lineHeight CSS line-height value
|
|
2543
|
+
* @param fontSize Optional font size in half-points for px conversion (e.g., 24 = 12pt)
|
|
2544
|
+
*/
|
|
2545
|
+
function parseLineHeightToDocx(lineHeight, fontSize) {
|
|
2546
|
+
if (!lineHeight)
|
|
2547
|
+
return undefined;
|
|
2548
|
+
const lh = lineHeight.trim();
|
|
2549
|
+
// "normal" typically equals 1.2 in browsers
|
|
2550
|
+
if (lh === "normal") {
|
|
2551
|
+
return Math.round(1.2 * 240);
|
|
2552
|
+
}
|
|
2553
|
+
// Unitless number (e.g., "1.3", "1.7") - most common
|
|
2554
|
+
const unitlessValue = parseFloat(lh);
|
|
2555
|
+
if (!isNaN(unitlessValue) && !lh.match(/[a-z%]/i)) {
|
|
2556
|
+
return Math.round(unitlessValue * 240);
|
|
2557
|
+
}
|
|
2558
|
+
// em units (e.g., "1.5em") - treat as multiplier
|
|
2559
|
+
if (lh.endsWith("em")) {
|
|
2560
|
+
const emValue = parseFloat(lh);
|
|
2561
|
+
if (!isNaN(emValue)) {
|
|
2562
|
+
return Math.round(emValue * 240);
|
|
2563
|
+
}
|
|
2564
|
+
}
|
|
2565
|
+
// px units (e.g., "27.2px" from getComputedStyle)
|
|
2566
|
+
// Convert to ratio using font size if available, otherwise estimate
|
|
2567
|
+
if (lh.endsWith("px")) {
|
|
2568
|
+
const pxValue = parseFloat(lh);
|
|
2569
|
+
if (!isNaN(pxValue)) {
|
|
2570
|
+
// If we have font size (in half-points), convert:
|
|
2571
|
+
// fontSize in half-points / 2 = points
|
|
2572
|
+
// points * 4/3 = px (approx: 1pt ≈ 1.333px)
|
|
2573
|
+
// lineHeight ratio = pxValue / (fontSize/2 * 1.333)
|
|
2574
|
+
if (fontSize && fontSize > 0) {
|
|
2575
|
+
const fontSizePx = (fontSize / 2) * (4 / 3);
|
|
2576
|
+
const ratio = pxValue / fontSizePx;
|
|
2577
|
+
return Math.round(ratio * 240);
|
|
2578
|
+
}
|
|
2579
|
+
// Without font size, estimate using 16px base (common default)
|
|
2580
|
+
// This gives us a rough ratio
|
|
2581
|
+
const estimatedRatio = pxValue / 16;
|
|
2582
|
+
// Only use if the ratio is reasonable (0.8 to 3.0)
|
|
2583
|
+
if (estimatedRatio >= 0.8 && estimatedRatio <= 3.0) {
|
|
2584
|
+
return Math.round(estimatedRatio * 240);
|
|
2585
|
+
}
|
|
2586
|
+
}
|
|
2587
|
+
}
|
|
2588
|
+
// pt units (e.g., "18pt")
|
|
2589
|
+
if (lh.endsWith("pt")) {
|
|
2590
|
+
const ptValue = parseFloat(lh);
|
|
2591
|
+
if (!isNaN(ptValue) && fontSize && fontSize > 0) {
|
|
2592
|
+
// fontSize in half-points / 2 = points
|
|
2593
|
+
const fontSizePt = fontSize / 2;
|
|
2594
|
+
const ratio = ptValue / fontSizePt;
|
|
2595
|
+
return Math.round(ratio * 240);
|
|
2596
|
+
}
|
|
2597
|
+
}
|
|
2598
|
+
// Return undefined to use default line-height
|
|
2599
|
+
return undefined;
|
|
2600
|
+
}
|
|
2601
|
+
function extractHexColor(value) {
|
|
2602
|
+
// Handle named CSS colors (common ones used in documents)
|
|
2603
|
+
const namedColors = {
|
|
2604
|
+
white: "FFFFFF",
|
|
2605
|
+
black: "000000",
|
|
2606
|
+
red: "FF0000",
|
|
2607
|
+
green: "008000",
|
|
2608
|
+
blue: "0000FF",
|
|
2609
|
+
yellow: "FFFF00",
|
|
2610
|
+
orange: "FFA500",
|
|
2611
|
+
purple: "800080",
|
|
2612
|
+
gray: "808080",
|
|
2613
|
+
grey: "808080",
|
|
2614
|
+
transparent: "", // Return empty for transparent
|
|
2615
|
+
};
|
|
2616
|
+
const lowerValue = value.toLowerCase().trim();
|
|
2617
|
+
if (namedColors[lowerValue] !== undefined) {
|
|
2618
|
+
return namedColors[lowerValue] || undefined;
|
|
2619
|
+
}
|
|
2620
|
+
// Handle hex colors
|
|
2621
|
+
const hexMatch = value.match(/#([0-9a-fA-F]{3,6})/);
|
|
2622
|
+
if (hexMatch) {
|
|
2623
|
+
let hex = hexMatch[1];
|
|
2624
|
+
// Expand 3-digit hex to 6-digit
|
|
2625
|
+
if (hex.length === 3) {
|
|
2626
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
2627
|
+
}
|
|
2628
|
+
return hex.toUpperCase();
|
|
2629
|
+
}
|
|
2630
|
+
// Handle linear-gradient - extract and blend the first color
|
|
2631
|
+
const gradientMatch = value.match(/linear-gradient\s*\([^)]+\)/i);
|
|
2632
|
+
if (gradientMatch) {
|
|
2633
|
+
// Find all rgba colors in the gradient
|
|
2634
|
+
const rgbaMatches = value.matchAll(/rgba\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*([0-9.]+)\s*\)/g);
|
|
2635
|
+
for (const match of rgbaMatches) {
|
|
2636
|
+
const alpha = parseFloat(match[4]);
|
|
2637
|
+
// Blend with white based on alpha
|
|
2638
|
+
const r = Math.round(parseInt(match[1], 10) * alpha + 255 * (1 - alpha));
|
|
2639
|
+
const g = Math.round(parseInt(match[2], 10) * alpha + 255 * (1 - alpha));
|
|
2640
|
+
const b = Math.round(parseInt(match[3], 10) * alpha + 255 * (1 - alpha));
|
|
2641
|
+
return (r.toString(16).padStart(2, "0") +
|
|
2642
|
+
g.toString(16).padStart(2, "0") +
|
|
2643
|
+
b.toString(16).padStart(2, "0")).toUpperCase();
|
|
2644
|
+
}
|
|
2645
|
+
// Fallback: try to find a hex color in the gradient
|
|
2646
|
+
const gradientHexMatch = value.match(/#([0-9a-fA-F]{3,6})/);
|
|
2647
|
+
if (gradientHexMatch) {
|
|
2648
|
+
let hex = gradientHexMatch[1];
|
|
2649
|
+
if (hex.length === 3) {
|
|
2650
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
2651
|
+
}
|
|
2652
|
+
return hex.toUpperCase();
|
|
2653
|
+
}
|
|
2654
|
+
// For gradients without extractable colors, return undefined (no background)
|
|
2655
|
+
return undefined;
|
|
2656
|
+
}
|
|
2657
|
+
// Handle rgb/rgba - check for alpha value
|
|
2658
|
+
const rgbaMatch = value.match(/rgba\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*([0-9.]+)\s*\)/);
|
|
2659
|
+
if (rgbaMatch) {
|
|
2660
|
+
const alpha = parseFloat(rgbaMatch[4]);
|
|
2661
|
+
// If alpha is very low (< 0.3), the color is mostly transparent
|
|
2662
|
+
// Blend with white to get the effective color
|
|
2663
|
+
if (alpha < 0.3) {
|
|
2664
|
+
const r = Math.round(parseInt(rgbaMatch[1], 10) * alpha + 255 * (1 - alpha));
|
|
2665
|
+
const g = Math.round(parseInt(rgbaMatch[2], 10) * alpha + 255 * (1 - alpha));
|
|
2666
|
+
const b = Math.round(parseInt(rgbaMatch[3], 10) * alpha + 255 * (1 - alpha));
|
|
2667
|
+
return (r.toString(16).padStart(2, "0") +
|
|
2668
|
+
g.toString(16).padStart(2, "0") +
|
|
2669
|
+
b.toString(16).padStart(2, "0")).toUpperCase();
|
|
2670
|
+
}
|
|
2671
|
+
// For higher alpha, just use the RGB values
|
|
2672
|
+
const r = parseInt(rgbaMatch[1], 10).toString(16).padStart(2, "0");
|
|
2673
|
+
const g = parseInt(rgbaMatch[2], 10).toString(16).padStart(2, "0");
|
|
2674
|
+
const b = parseInt(rgbaMatch[3], 10).toString(16).padStart(2, "0");
|
|
2675
|
+
return (r + g + b).toUpperCase();
|
|
2676
|
+
}
|
|
2677
|
+
// Handle rgb (no alpha)
|
|
2678
|
+
const rgbMatch = value.match(/rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)/);
|
|
2679
|
+
if (rgbMatch) {
|
|
2680
|
+
const r = parseInt(rgbMatch[1], 10).toString(16).padStart(2, "0");
|
|
2681
|
+
const g = parseInt(rgbMatch[2], 10).toString(16).padStart(2, "0");
|
|
2682
|
+
const b = parseInt(rgbMatch[3], 10).toString(16).padStart(2, "0");
|
|
2683
|
+
return (r + g + b).toUpperCase();
|
|
2684
|
+
}
|
|
2685
|
+
return undefined;
|
|
2686
|
+
}
|
|
2687
|
+
/**
|
|
2688
|
+
* Extract text color from an element.
|
|
2689
|
+
* Checks inline style first, then CSS class rules, then element type rules, then computed style if available.
|
|
2690
|
+
* Returns the actual color from the HTML - does NOT filter out any colors.
|
|
2691
|
+
* Per the skill rules: ALL styling values MUST be extracted from HTML, never filtered.
|
|
2692
|
+
*/
|
|
2693
|
+
function extractTextColor(element, cssContext) {
|
|
2694
|
+
// Check inline style first (highest priority)
|
|
2695
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
2696
|
+
const colorMatch = inlineStyle.match(/(?:^|;)\s*color:\s*([^;]+)/i);
|
|
2697
|
+
if (colorMatch) {
|
|
2698
|
+
let colorValue = colorMatch[1].trim();
|
|
2699
|
+
// Resolve CSS variables in inline styles (e.g., var(--color-muted))
|
|
2700
|
+
if (cssContext && colorValue.includes("var(")) {
|
|
2701
|
+
const varMatch = colorValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
|
|
2702
|
+
if (varMatch) {
|
|
2703
|
+
const resolvedValue = cssContext.variables.get(varMatch[1]);
|
|
2704
|
+
if (resolvedValue) {
|
|
2705
|
+
colorValue = resolvedValue;
|
|
2706
|
+
}
|
|
2707
|
+
}
|
|
2708
|
+
}
|
|
2709
|
+
const color = extractHexColor(colorValue);
|
|
2710
|
+
if (color) {
|
|
2711
|
+
return color;
|
|
2712
|
+
}
|
|
2713
|
+
}
|
|
2714
|
+
// Check CSS class rules (resolves CSS variables)
|
|
2715
|
+
if (cssContext) {
|
|
2716
|
+
const classColor = getColorFromClasses(element, cssContext);
|
|
2717
|
+
if (classColor) {
|
|
2718
|
+
return classColor;
|
|
2719
|
+
}
|
|
2720
|
+
// Check element type styles (e.g., body { color: ... }, p { color: ... })
|
|
2721
|
+
const elementStyles = getElementStyles(element, cssContext);
|
|
2722
|
+
if (elementStyles.color) {
|
|
2723
|
+
const color = extractHexColor(elementStyles.color);
|
|
2724
|
+
if (color) {
|
|
2725
|
+
return color;
|
|
2726
|
+
}
|
|
2727
|
+
}
|
|
2728
|
+
}
|
|
2729
|
+
// Try computed style if available (browser environment)
|
|
2730
|
+
if (typeof window !== "undefined" && window.getComputedStyle) {
|
|
2731
|
+
try {
|
|
2732
|
+
const computed = window.getComputedStyle(element);
|
|
2733
|
+
const computedColor = computed.color;
|
|
2734
|
+
if (computedColor) {
|
|
2735
|
+
const color = extractHexColor(computedColor);
|
|
2736
|
+
if (color) {
|
|
2737
|
+
return color;
|
|
2738
|
+
}
|
|
2739
|
+
}
|
|
2740
|
+
}
|
|
2741
|
+
catch {
|
|
2742
|
+
// getComputedStyle may fail in some environments
|
|
2743
|
+
}
|
|
2744
|
+
}
|
|
2745
|
+
return undefined;
|
|
2746
|
+
}
|
|
2747
|
+
/**
|
|
2748
|
+
* Parse content from a blockquote/callout element.
|
|
2749
|
+
* Directly iterates children instead of re-parsing innerHTML.
|
|
2750
|
+
*
|
|
2751
|
+
* @param element The blockquote/callout element
|
|
2752
|
+
* @param cssContext The CSS context for resolving styles
|
|
2753
|
+
*/
|
|
2754
|
+
function parseBlockquoteContent(element, cssContext) {
|
|
2755
|
+
const innerElements = [];
|
|
2756
|
+
// GENERALIZED: Extract blockquote's font-style from CSS element selector
|
|
2757
|
+
// This handles rules like "blockquote { font-style: italic; }"
|
|
2758
|
+
const blockquoteStyles = getElementStyles(element, cssContext);
|
|
2759
|
+
const blockquoteIsItalic = blockquoteStyles.fontStyle === "italic";
|
|
2760
|
+
function processInnerNode(node) {
|
|
2761
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
2762
|
+
const text = node.textContent?.trim();
|
|
2763
|
+
if (text) {
|
|
2764
|
+
// Apply blockquote's italic style to direct text nodes
|
|
2765
|
+
if (blockquoteIsItalic) {
|
|
2766
|
+
innerElements.push({ type: "paragraph", text, italic: true });
|
|
2767
|
+
}
|
|
2768
|
+
else {
|
|
2769
|
+
innerElements.push({ type: "paragraph", text });
|
|
2770
|
+
}
|
|
2771
|
+
}
|
|
2772
|
+
return;
|
|
2773
|
+
}
|
|
2774
|
+
if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
|
|
2775
|
+
return;
|
|
2776
|
+
}
|
|
2777
|
+
const el = node;
|
|
2778
|
+
const tagName = el.tagName.toLowerCase();
|
|
2779
|
+
// Handle headings
|
|
2780
|
+
const headingLevel = parseHeadingLevel(tagName);
|
|
2781
|
+
if (headingLevel !== null) {
|
|
2782
|
+
const text = getTextContent(el).trim();
|
|
2783
|
+
if (text) {
|
|
2784
|
+
// Extract color from the heading element, using the blockquote as parent for nested style lookups
|
|
2785
|
+
// This handles CSS rules like ".key-takeaways h3 { color: #7c3aed; }"
|
|
2786
|
+
const headingStyles = getElementStyles(el, cssContext, element);
|
|
2787
|
+
const color = headingStyles.color ? extractHexColor(headingStyles.color) : undefined;
|
|
2788
|
+
innerElements.push({ type: "heading", level: headingLevel, text, color });
|
|
2789
|
+
}
|
|
2790
|
+
return;
|
|
2791
|
+
}
|
|
2792
|
+
// Handle paragraphs
|
|
2793
|
+
if (tagName === "p") {
|
|
2794
|
+
const runs = extractInlineRuns(el, cssContext);
|
|
2795
|
+
if (runs.length > 0) {
|
|
2796
|
+
const text = runs.map((r) => r.text).join("");
|
|
2797
|
+
// Extract color from the paragraph element, using the blockquote as parent for nested style lookups
|
|
2798
|
+
// This handles CSS rules like ".cta p { color: white; }" or ".cta { color: white; }"
|
|
2799
|
+
const paragraphStyles = getElementStyles(el, cssContext, element);
|
|
2800
|
+
const color = paragraphStyles.color ? extractHexColor(paragraphStyles.color) : undefined;
|
|
2801
|
+
// Apply blockquote's italic style to runs that don't have their own italic setting
|
|
2802
|
+
if (blockquoteIsItalic) {
|
|
2803
|
+
runs.forEach(run => {
|
|
2804
|
+
if (run.italic === undefined) {
|
|
2805
|
+
run.italic = true;
|
|
2806
|
+
}
|
|
2807
|
+
});
|
|
2808
|
+
}
|
|
2809
|
+
if (hasInlineFormatting(runs) || blockquoteIsItalic) {
|
|
2810
|
+
// Apply color to runs that don't have their own color
|
|
2811
|
+
if (color) {
|
|
2812
|
+
runs.forEach(run => {
|
|
2813
|
+
if (!run.color) {
|
|
2814
|
+
run.color = color;
|
|
2815
|
+
}
|
|
2816
|
+
});
|
|
2817
|
+
}
|
|
2818
|
+
innerElements.push({ type: "paragraph", text, runs, color, italic: blockquoteIsItalic });
|
|
2819
|
+
}
|
|
2820
|
+
else {
|
|
2821
|
+
innerElements.push({ type: "paragraph", text, color });
|
|
2822
|
+
}
|
|
2823
|
+
}
|
|
2824
|
+
return;
|
|
2825
|
+
}
|
|
2826
|
+
// Handle lists
|
|
2827
|
+
if (tagName === "ul" || tagName === "ol") {
|
|
2828
|
+
const items = [];
|
|
2829
|
+
// Use direct children li, not querySelectorAll to avoid nested lists
|
|
2830
|
+
for (const child of el.children) {
|
|
2831
|
+
if (child.tagName.toLowerCase() === "li") {
|
|
2832
|
+
const runs = extractInlineRuns(child, cssContext);
|
|
2833
|
+
if (runs.length > 0) {
|
|
2834
|
+
// Apply blockquote's italic style to list item runs
|
|
2835
|
+
if (blockquoteIsItalic) {
|
|
2836
|
+
runs.forEach(run => {
|
|
2837
|
+
if (run.italic === undefined) {
|
|
2838
|
+
run.italic = true;
|
|
2839
|
+
}
|
|
2840
|
+
});
|
|
2841
|
+
}
|
|
2842
|
+
if (hasInlineFormatting(runs) || blockquoteIsItalic) {
|
|
2843
|
+
items.push(runs);
|
|
2844
|
+
}
|
|
2845
|
+
else {
|
|
2846
|
+
items.push(runs.map((r) => r.text).join(""));
|
|
2847
|
+
}
|
|
2848
|
+
}
|
|
2849
|
+
}
|
|
2850
|
+
}
|
|
2851
|
+
if (items.length > 0) {
|
|
2852
|
+
innerElements.push({ type: "list", ordered: tagName === "ol", items });
|
|
2853
|
+
}
|
|
2854
|
+
return;
|
|
2855
|
+
}
|
|
2856
|
+
// Handle labels/titles - detect by visual styling (bold/font-weight)
|
|
2857
|
+
// Extract color from the element's own CSS styles (handles nested selectors)
|
|
2858
|
+
// Pass the blockquote element as parent for nested style lookups
|
|
2859
|
+
if (tagName === "div") {
|
|
2860
|
+
const styles = getElementStyles(el, cssContext, element);
|
|
2861
|
+
const isBoldOrLabel = styles.fontWeight === "700" ||
|
|
2862
|
+
styles.fontWeight === "bold" ||
|
|
2863
|
+
styles.fontWeight === "600";
|
|
2864
|
+
if (isBoldOrLabel) {
|
|
2865
|
+
const text = getTextContent(el).trim();
|
|
2866
|
+
if (text) {
|
|
2867
|
+
// Extract the label's color from its own styles (includes nested CSS rules)
|
|
2868
|
+
const labelColor = styles.color ? extractHexColor(styles.color) : undefined;
|
|
2869
|
+
if (labelColor) {
|
|
2870
|
+
const runs = [{
|
|
2871
|
+
text,
|
|
2872
|
+
bold: true,
|
|
2873
|
+
italic: false,
|
|
2874
|
+
color: labelColor,
|
|
2875
|
+
}];
|
|
2876
|
+
innerElements.push({ type: "paragraph", text, runs });
|
|
2877
|
+
}
|
|
2878
|
+
else {
|
|
2879
|
+
innerElements.push({ type: "paragraph", text, bold: true });
|
|
2880
|
+
}
|
|
2881
|
+
}
|
|
2882
|
+
return;
|
|
2883
|
+
}
|
|
2884
|
+
}
|
|
2885
|
+
// Container tags - recurse into children
|
|
2886
|
+
// Include definition list elements (dl, dt, dd) for proper menu parsing
|
|
2887
|
+
if (["div", "span", "section", "dl", "dd", "figure", "figcaption"].includes(tagName)) {
|
|
2888
|
+
// GENERALIZED: Check if this container has only inline content (spans, text nodes)
|
|
2889
|
+
// and is a horizontal flex container - if so, render as a table to preserve horizontal layout
|
|
2890
|
+
const hasOnlyInlineContent = isInlineOnlyContainer(el);
|
|
2891
|
+
const isHorizFlex = isHorizontalFlexContainer(el, cssContext);
|
|
2892
|
+
const directChildren = Array.from(el.children);
|
|
2893
|
+
const hasMultipleFlexChildren = isHorizFlex && directChildren.length > 1;
|
|
2894
|
+
if (hasOnlyInlineContent && hasMultipleFlexChildren) {
|
|
2895
|
+
// GENERALIZED: Convert horizontal flexbox to a single-row borderless table
|
|
2896
|
+
// Each flex item becomes a table cell, preserving the horizontal layout
|
|
2897
|
+
const containerStyles = getElementStyles(el, cssContext);
|
|
2898
|
+
const containerFontFamily = containerStyles.fontFamily;
|
|
2899
|
+
const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
|
|
2900
|
+
const tableCells = [];
|
|
2901
|
+
for (const flexChild of flexChildren) {
|
|
2902
|
+
// Extract runs from this flex child
|
|
2903
|
+
const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
|
|
2904
|
+
if (childRuns.length > 0) {
|
|
2905
|
+
// Apply blockquote's italic style if applicable
|
|
2906
|
+
if (blockquoteIsItalic) {
|
|
2907
|
+
childRuns.forEach(run => {
|
|
2908
|
+
if (run.italic === undefined) {
|
|
2909
|
+
run.italic = true;
|
|
2910
|
+
}
|
|
2911
|
+
});
|
|
2912
|
+
}
|
|
2913
|
+
// Use runs as cell content
|
|
2914
|
+
tableCells.push(childRuns);
|
|
2915
|
+
}
|
|
2916
|
+
}
|
|
2917
|
+
if (tableCells.length > 0) {
|
|
2918
|
+
// Create a single-row borderless table to represent horizontal flexbox
|
|
2919
|
+
innerElements.push({
|
|
2920
|
+
type: "table",
|
|
2921
|
+
rows: [tableCells],
|
|
2922
|
+
hasHeader: false,
|
|
2923
|
+
noBorders: true,
|
|
2924
|
+
});
|
|
2925
|
+
}
|
|
2926
|
+
return;
|
|
2927
|
+
}
|
|
2928
|
+
else if (hasOnlyInlineContent) {
|
|
2929
|
+
// Single inline container (not horizontal flex) - extract as one paragraph
|
|
2930
|
+
const containerStyles = getElementStyles(el, cssContext);
|
|
2931
|
+
const containerFontFamily = containerStyles.fontFamily;
|
|
2932
|
+
const runs = extractInlineRuns(el, cssContext, undefined, containerFontFamily);
|
|
2933
|
+
if (runs.length > 0) {
|
|
2934
|
+
const text = runs.map(r => r.text).join("");
|
|
2935
|
+
if (blockquoteIsItalic) {
|
|
2936
|
+
runs.forEach(run => {
|
|
2937
|
+
if (run.italic === undefined) {
|
|
2938
|
+
run.italic = true;
|
|
2939
|
+
}
|
|
2940
|
+
});
|
|
2941
|
+
}
|
|
2942
|
+
if (hasInlineFormatting(runs)) {
|
|
2943
|
+
innerElements.push({ type: "paragraph", text, runs });
|
|
2944
|
+
}
|
|
2945
|
+
else {
|
|
2946
|
+
innerElements.push({ type: "paragraph", text });
|
|
2947
|
+
}
|
|
2948
|
+
}
|
|
2949
|
+
return;
|
|
2950
|
+
}
|
|
2951
|
+
// Default: recurse into children
|
|
2952
|
+
for (const child of el.childNodes) {
|
|
2953
|
+
processInnerNode(child);
|
|
2954
|
+
}
|
|
2955
|
+
return;
|
|
2956
|
+
}
|
|
2957
|
+
// Handle dt (definition term) elements - typically have flex layout with name + price
|
|
2958
|
+
// Combine all inline content into a single paragraph
|
|
2959
|
+
if (tagName === "dt") {
|
|
2960
|
+
const runs = extractInlineRuns(el, cssContext);
|
|
2961
|
+
if (runs.length > 0) {
|
|
2962
|
+
const text = runs.map((r) => r.text).join("");
|
|
2963
|
+
if (hasInlineFormatting(runs)) {
|
|
2964
|
+
innerElements.push({ type: "paragraph", text, runs });
|
|
2965
|
+
}
|
|
2966
|
+
else {
|
|
2967
|
+
innerElements.push({ type: "paragraph", text });
|
|
2968
|
+
}
|
|
2969
|
+
}
|
|
2970
|
+
return;
|
|
2971
|
+
}
|
|
2972
|
+
// Fallback - try to get text
|
|
2973
|
+
const text = getTextContent(el).trim();
|
|
2974
|
+
if (text && el.children.length === 0) {
|
|
2975
|
+
innerElements.push({ type: "paragraph", text });
|
|
2976
|
+
}
|
|
2977
|
+
else {
|
|
2978
|
+
for (const child of el.childNodes) {
|
|
2979
|
+
processInnerNode(child);
|
|
2980
|
+
}
|
|
2981
|
+
}
|
|
2982
|
+
}
|
|
2983
|
+
// Process all direct children of the blockquote element
|
|
2984
|
+
for (const child of element.childNodes) {
|
|
2985
|
+
processInnerNode(child);
|
|
2986
|
+
}
|
|
2987
|
+
return innerElements;
|
|
2988
|
+
}
|
|
2989
|
+
/**
|
|
2990
|
+
* Parse HTML string and extract content elements.
|
|
2991
|
+
*/
|
|
2992
|
+
export function parseHtmlContent(html) {
|
|
2993
|
+
const parser = new DOMParser();
|
|
2994
|
+
const doc = parser.parseFromString(html, "text/html");
|
|
2995
|
+
const elements = [];
|
|
2996
|
+
// Parse CSS context for resolving variables and class-based colors
|
|
2997
|
+
const cssContext = parseCssContext(doc);
|
|
2998
|
+
// Track SVGs that have been processed (to avoid duplicate processing)
|
|
2999
|
+
const processedSvgs = new Set();
|
|
3000
|
+
const { body } = doc;
|
|
3001
|
+
function processNode(node, inheritedAlignment, inheritedColor) {
|
|
3002
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
3003
|
+
const text = node.textContent?.trim();
|
|
3004
|
+
if (text) {
|
|
3005
|
+
elements.push({ type: "paragraph", text, alignment: inheritedAlignment, color: inheritedColor });
|
|
3006
|
+
}
|
|
3007
|
+
return;
|
|
3008
|
+
}
|
|
3009
|
+
if (node.nodeType !== Node.ELEMENT_NODE) {
|
|
3010
|
+
return;
|
|
3011
|
+
}
|
|
3012
|
+
// At this point we know it's an Element node by nodeType
|
|
3013
|
+
// Note: We use nodeType check instead of instanceof Element because
|
|
3014
|
+
// linkedom has separate HTMLElement and SVGElement classes, and the
|
|
3015
|
+
// global Element shim only covers HTMLElement. SVG elements have
|
|
3016
|
+
// nodeType 1 but fail instanceof Element check in linkedom.
|
|
3017
|
+
const element = node;
|
|
3018
|
+
if (!element.tagName) {
|
|
3019
|
+
return;
|
|
3020
|
+
}
|
|
3021
|
+
const tagName = element.tagName.toLowerCase();
|
|
3022
|
+
// Skip script, style, noscript, and other non-visual elements
|
|
3023
|
+
if (tagName === "script" || tagName === "style" || tagName === "noscript" ||
|
|
3024
|
+
tagName === "iframe" || tagName === "link" || tagName === "meta" ||
|
|
3025
|
+
tagName === "template" || tagName === "object" || tagName === "embed") {
|
|
3026
|
+
return;
|
|
3027
|
+
}
|
|
3028
|
+
// Get alignment from this element's CSS styles, or use inherited
|
|
3029
|
+
const elementAlignment = getTextAlignment(element, cssContext);
|
|
3030
|
+
const alignment = elementAlignment || inheritedAlignment;
|
|
3031
|
+
// Get color from this element (via CSS classes or inline styles)
|
|
3032
|
+
// Color inheritance rules:
|
|
3033
|
+
// 1. Inline styles on THIS element take precedence
|
|
3034
|
+
// 2. CSS class colors on THIS element take precedence
|
|
3035
|
+
// 3. Inline style colors from PARENT elements ARE inherited (CSS spec)
|
|
3036
|
+
// 4. CSS class colors from parent elements are NOT inherited here (handled by CSS cascade)
|
|
3037
|
+
const elementColor = extractTextColor(element, cssContext);
|
|
3038
|
+
// Use element's own color if set, otherwise use inherited color (from parent inline styles)
|
|
3039
|
+
const effectiveColor = elementColor || inheritedColor;
|
|
3040
|
+
const headingLevel = parseHeadingLevel(tagName);
|
|
3041
|
+
if (headingLevel !== null) {
|
|
3042
|
+
const text = getTextContent(element).trim();
|
|
3043
|
+
if (text) {
|
|
3044
|
+
// Extract text-transform from CSS
|
|
3045
|
+
const hStyles = getElementStyles(element, cssContext);
|
|
3046
|
+
let textTransform;
|
|
3047
|
+
if (hStyles.textTransform === "uppercase" || hStyles.textTransform === "lowercase" || hStyles.textTransform === "capitalize") {
|
|
3048
|
+
textTransform = hStyles.textTransform;
|
|
3049
|
+
}
|
|
3050
|
+
// Extract font-family for heading (e.g., h1 { font-family: var(--font-heading); })
|
|
3051
|
+
const headingFontFamily = hStyles.fontFamily;
|
|
3052
|
+
// GENERALIZED: Check for border-bottom on heading (e.g., h2 with underline style)
|
|
3053
|
+
// Any element can have borders - extract from CSS/inline styles
|
|
3054
|
+
const headingBorderColor = extractBorderBottomColor(element, cssContext);
|
|
3055
|
+
// When heading has border-bottom, reduce heading after spacing
|
|
3056
|
+
// and add HR with top border to simulate CSS padding-bottom
|
|
3057
|
+
// CSS padding-bottom: 0.5rem ≈ 100 twips (space from text to border)
|
|
3058
|
+
const spacingAfter = headingBorderColor ? 100 : undefined;
|
|
3059
|
+
// GENERALIZED: Extract line-height for vertical spacing
|
|
3060
|
+
// CSS line-height: 1.2 for titles, 1.3 for other headings, etc.
|
|
3061
|
+
const lineSpacing = hStyles.lineHeight ? parseLineHeightToDocx(hStyles.lineHeight) : undefined;
|
|
3062
|
+
// Check for gradient text (CSS background-clip: text)
|
|
3063
|
+
// If gradient exists, create runs array with gradient info
|
|
3064
|
+
let runs;
|
|
3065
|
+
if (hStyles.gradient) {
|
|
3066
|
+
runs = [{ text, gradient: hStyles.gradient, color: elementColor }];
|
|
3067
|
+
}
|
|
3068
|
+
elements.push({
|
|
3069
|
+
type: "heading",
|
|
3070
|
+
level: headingLevel,
|
|
3071
|
+
text,
|
|
3072
|
+
alignment,
|
|
3073
|
+
color: elementColor,
|
|
3074
|
+
textTransform,
|
|
3075
|
+
spacingAfter,
|
|
3076
|
+
fontFamily: headingFontFamily,
|
|
3077
|
+
runs,
|
|
3078
|
+
lineSpacing,
|
|
3079
|
+
});
|
|
3080
|
+
// Add horizontal-rule for border-bottom effect
|
|
3081
|
+
// Use top border with minimal before spacing - border appears right at top
|
|
3082
|
+
// CSS margin-bottom: 1rem ≈ 200 twips (space after border to content)
|
|
3083
|
+
if (headingBorderColor) {
|
|
3084
|
+
elements.push({ type: "horizontal-rule", color: headingBorderColor, spacingBefore: 0, spacingAfter: 200, borderPosition: "top" });
|
|
3085
|
+
}
|
|
3086
|
+
}
|
|
3087
|
+
return;
|
|
3088
|
+
}
|
|
3089
|
+
// Handle <hr> elements
|
|
3090
|
+
if (tagName === "hr") {
|
|
3091
|
+
// Try to extract color from inline style or CSS
|
|
3092
|
+
const hrStyle = element.getAttribute("style") || "";
|
|
3093
|
+
const colorMatch = hrStyle.match(/(?:border-color|background-color|color):\s*([^;]+)/i);
|
|
3094
|
+
let hrColor;
|
|
3095
|
+
if (colorMatch) {
|
|
3096
|
+
hrColor = extractHexColor(colorMatch[1]);
|
|
3097
|
+
}
|
|
3098
|
+
elements.push({ type: "horizontal-rule", color: hrColor });
|
|
3099
|
+
return;
|
|
3100
|
+
}
|
|
3101
|
+
// GENERALIZED: Check if this paragraph is a styled callout (has border-left)
|
|
3102
|
+
// This handles patterns like: <p class="intro-section"> with border-left: 4px solid ...
|
|
3103
|
+
// Must come BEFORE regular paragraph handling
|
|
3104
|
+
if (tagName === "p" && isBlockquoteOrCallout(element, cssContext)) {
|
|
3105
|
+
const content = parseBlockquoteContent(element, cssContext);
|
|
3106
|
+
if (content.length > 0) {
|
|
3107
|
+
const elementStyles = getElementStyles(element, cssContext);
|
|
3108
|
+
let borderColor;
|
|
3109
|
+
let backgroundColor;
|
|
3110
|
+
// Extract background color from element styles
|
|
3111
|
+
if (elementStyles.backgroundColor) {
|
|
3112
|
+
const hex = extractHexColor(elementStyles.backgroundColor);
|
|
3113
|
+
if (hex)
|
|
3114
|
+
backgroundColor = hex;
|
|
3115
|
+
}
|
|
3116
|
+
// Extract border color from borderLeft property
|
|
3117
|
+
if (elementStyles.borderLeft) {
|
|
3118
|
+
const hex = extractBorderColorFromStyle({ borderLeft: elementStyles.borderLeft });
|
|
3119
|
+
if (hex)
|
|
3120
|
+
borderColor = hex;
|
|
3121
|
+
}
|
|
3122
|
+
else if (elementStyles.borderColor) {
|
|
3123
|
+
const hex = extractHexColor(elementStyles.borderColor);
|
|
3124
|
+
if (hex)
|
|
3125
|
+
borderColor = hex;
|
|
3126
|
+
}
|
|
3127
|
+
elements.push({
|
|
3128
|
+
type: "blockquote",
|
|
3129
|
+
content,
|
|
3130
|
+
borderColor,
|
|
3131
|
+
backgroundColor,
|
|
3132
|
+
borderStyle: "left",
|
|
3133
|
+
});
|
|
3134
|
+
}
|
|
3135
|
+
return;
|
|
3136
|
+
}
|
|
3137
|
+
if (tagName === "p") {
|
|
3138
|
+
const runs = extractInlineRuns(element, cssContext, effectiveColor);
|
|
3139
|
+
if (runs.length > 0) {
|
|
3140
|
+
const text = runs.map((r) => r.text).join("");
|
|
3141
|
+
// Use effectiveColor which includes inheritance from parent inline styles
|
|
3142
|
+
// Extract text-indent, font-style, text-transform from CSS
|
|
3143
|
+
const pStyles = getElementStyles(element, cssContext);
|
|
3144
|
+
let firstLineIndent;
|
|
3145
|
+
let hangingIndent;
|
|
3146
|
+
let textTransform;
|
|
3147
|
+
let italic = false;
|
|
3148
|
+
// Handle text-indent (can be positive for first-line indent, negative for hanging indent)
|
|
3149
|
+
if (pStyles.textIndent) {
|
|
3150
|
+
const indentTwips = parseCssLengthToTwips(pStyles.textIndent);
|
|
3151
|
+
if (indentTwips !== undefined) {
|
|
3152
|
+
if (indentTwips >= 0) {
|
|
3153
|
+
firstLineIndent = indentTwips;
|
|
3154
|
+
}
|
|
3155
|
+
else {
|
|
3156
|
+
// Negative text-indent creates a hanging indent
|
|
3157
|
+
hangingIndent = Math.abs(indentTwips);
|
|
3158
|
+
}
|
|
3159
|
+
}
|
|
3160
|
+
}
|
|
3161
|
+
// Handle font-style: italic
|
|
3162
|
+
if (pStyles.fontStyle === "italic") {
|
|
3163
|
+
italic = true;
|
|
3164
|
+
// Apply italic to all runs
|
|
3165
|
+
runs.forEach(run => run.italic = true);
|
|
3166
|
+
}
|
|
3167
|
+
// Handle font-weight: bold/700/600
|
|
3168
|
+
let bold = false;
|
|
3169
|
+
if (pStyles.fontWeight === "700" || pStyles.fontWeight === "bold" || pStyles.fontWeight === "600") {
|
|
3170
|
+
bold = true;
|
|
3171
|
+
// Apply bold to all runs
|
|
3172
|
+
runs.forEach(run => run.bold = true);
|
|
3173
|
+
}
|
|
3174
|
+
// Handle text-transform
|
|
3175
|
+
if (pStyles.textTransform === "uppercase" || pStyles.textTransform === "lowercase" || pStyles.textTransform === "capitalize") {
|
|
3176
|
+
textTransform = pStyles.textTransform;
|
|
3177
|
+
}
|
|
3178
|
+
// Extract font-family from paragraph styles (will be inherited from body if not set on p)
|
|
3179
|
+
const paragraphFontFamily = pStyles.fontFamily;
|
|
3180
|
+
// Apply fontFamily to runs if present and runs don't have their own fontFamily
|
|
3181
|
+
if (paragraphFontFamily) {
|
|
3182
|
+
runs.forEach(run => {
|
|
3183
|
+
if (!run.fontFamily) {
|
|
3184
|
+
run.fontFamily = paragraphFontFamily;
|
|
3185
|
+
}
|
|
3186
|
+
});
|
|
3187
|
+
}
|
|
3188
|
+
if (hasInlineFormatting(runs)) {
|
|
3189
|
+
// Has mixed formatting - use runs array
|
|
3190
|
+
// Apply effective color to runs that don't have their own color
|
|
3191
|
+
if (effectiveColor) {
|
|
3192
|
+
runs.forEach(run => {
|
|
3193
|
+
if (!run.color) {
|
|
3194
|
+
run.color = effectiveColor;
|
|
3195
|
+
}
|
|
3196
|
+
});
|
|
3197
|
+
}
|
|
3198
|
+
elements.push({ type: "paragraph", text, runs, alignment, firstLineIndent, hangingIndent, textTransform });
|
|
3199
|
+
}
|
|
3200
|
+
else if (effectiveColor || italic || bold || paragraphFontFamily) {
|
|
3201
|
+
// Has color, italic, bold, or fontFamily - use runs to preserve formatting
|
|
3202
|
+
// Create runs with the appropriate formatting
|
|
3203
|
+
const formattedRuns = [{ text, bold, italic, color: effectiveColor, fontFamily: paragraphFontFamily }];
|
|
3204
|
+
elements.push({ type: "paragraph", text, runs: formattedRuns, alignment, firstLineIndent, hangingIndent, textTransform });
|
|
3205
|
+
}
|
|
3206
|
+
else {
|
|
3207
|
+
// No formatting - simple paragraph (but may have indent/transform)
|
|
3208
|
+
elements.push({ type: "paragraph", text, alignment, firstLineIndent, hangingIndent, textTransform });
|
|
3209
|
+
}
|
|
3210
|
+
}
|
|
3211
|
+
return;
|
|
3212
|
+
}
|
|
3213
|
+
if (tagName === "ul" || tagName === "ol") {
|
|
3214
|
+
const items = [];
|
|
3215
|
+
let hasComplexItems = false;
|
|
3216
|
+
let hasOnlyNestedLists = true; // Track if complexity is ONLY from nested lists
|
|
3217
|
+
// First pass: check if any list items contain block-level elements
|
|
3218
|
+
// Use direct children to avoid nested lists
|
|
3219
|
+
const listItems = [];
|
|
3220
|
+
for (const child of element.children) {
|
|
3221
|
+
if (child.tagName.toLowerCase() === "li") {
|
|
3222
|
+
listItems.push(child);
|
|
3223
|
+
if (hasBlockLevelChildren(child)) {
|
|
3224
|
+
hasComplexItems = true;
|
|
3225
|
+
// Check if the block children are only nested lists
|
|
3226
|
+
for (const blockChild of child.children) {
|
|
3227
|
+
const blockTagName = blockChild.tagName.toLowerCase();
|
|
3228
|
+
if (BLOCK_LEVEL_TAGS.has(blockTagName) && blockTagName !== "ul" && blockTagName !== "ol") {
|
|
3229
|
+
hasOnlyNestedLists = false;
|
|
3230
|
+
}
|
|
3231
|
+
}
|
|
3232
|
+
}
|
|
3233
|
+
}
|
|
3234
|
+
}
|
|
3235
|
+
if (hasComplexItems && hasOnlyNestedLists) {
|
|
3236
|
+
// List contains nested lists - use the nested list extraction with level tracking
|
|
3237
|
+
const nestedItems = extractNestedListItems(element, cssContext, 0);
|
|
3238
|
+
if (nestedItems.length > 0) {
|
|
3239
|
+
elements.push({ type: "list", ordered: tagName === "ol", items: nestedItems });
|
|
3240
|
+
}
|
|
3241
|
+
}
|
|
3242
|
+
else if (hasComplexItems) {
|
|
3243
|
+
// Complex list: process each list item recursively to capture block elements
|
|
3244
|
+
// This handles SVGs, tables, nested divs, etc. inside list items
|
|
3245
|
+
for (const li of listItems) {
|
|
3246
|
+
// For complex list items, we need to:
|
|
3247
|
+
// 1. Extract inline text that appears directly in the li (not inside block children)
|
|
3248
|
+
// 2. Process block-level children through processNode
|
|
3249
|
+
// Check which children are block-level
|
|
3250
|
+
const blockChildren = [];
|
|
3251
|
+
const inlineNodes = [];
|
|
3252
|
+
for (const child of li.childNodes) {
|
|
3253
|
+
if (child.nodeType === Node.ELEMENT_NODE) {
|
|
3254
|
+
const childTagName = child.tagName.toLowerCase();
|
|
3255
|
+
if (BLOCK_LEVEL_TAGS.has(childTagName)) {
|
|
3256
|
+
blockChildren.push(child);
|
|
3257
|
+
}
|
|
3258
|
+
else {
|
|
3259
|
+
// Inline element like <strong>, <span>, <a>, etc.
|
|
3260
|
+
inlineNodes.push(child);
|
|
3261
|
+
}
|
|
3262
|
+
}
|
|
3263
|
+
else if (child.nodeType === Node.TEXT_NODE) {
|
|
3264
|
+
inlineNodes.push(child);
|
|
3265
|
+
}
|
|
3266
|
+
}
|
|
3267
|
+
// Process block-level children (this captures SVGs, tables, divs, etc.)
|
|
3268
|
+
for (const child of blockChildren) {
|
|
3269
|
+
processNode(child);
|
|
3270
|
+
}
|
|
3271
|
+
}
|
|
3272
|
+
// Note: We intentionally don't create list items from inline content in complex lists
|
|
3273
|
+
// because the block children (paragraphs, etc.) already contain all the meaningful content.
|
|
3274
|
+
// The inline content in <li> for complex lists is typically whitespace.
|
|
3275
|
+
}
|
|
3276
|
+
else {
|
|
3277
|
+
// Simple list: all items are inline-only, use efficient path
|
|
3278
|
+
for (const li of listItems) {
|
|
3279
|
+
const runs = extractInlineRuns(li, cssContext);
|
|
3280
|
+
if (runs.length > 0) {
|
|
3281
|
+
if (hasInlineFormatting(runs)) {
|
|
3282
|
+
items.push(runs);
|
|
3283
|
+
}
|
|
3284
|
+
else {
|
|
3285
|
+
items.push(runs.map((r) => r.text).join(""));
|
|
3286
|
+
}
|
|
3287
|
+
}
|
|
3288
|
+
}
|
|
3289
|
+
if (items.length > 0) {
|
|
3290
|
+
elements.push({ type: "list", ordered: tagName === "ol", items });
|
|
3291
|
+
}
|
|
3292
|
+
}
|
|
3293
|
+
return;
|
|
3294
|
+
}
|
|
3295
|
+
if (tagName === "table") {
|
|
3296
|
+
const rows = [];
|
|
3297
|
+
for (const tr of element.querySelectorAll("tr")) {
|
|
3298
|
+
const cells = [];
|
|
3299
|
+
for (const cell of tr.querySelectorAll("td, th")) {
|
|
3300
|
+
// Extract inline runs to preserve bold/italic formatting in cells
|
|
3301
|
+
const runs = extractInlineRuns(cell, cssContext);
|
|
3302
|
+
if (runs.length > 0) {
|
|
3303
|
+
if (hasInlineFormatting(runs)) {
|
|
3304
|
+
// Has formatting - store as runs
|
|
3305
|
+
cells.push(runs);
|
|
3306
|
+
}
|
|
3307
|
+
else {
|
|
3308
|
+
// Plain text - store as string
|
|
3309
|
+
cells.push(runs.map((r) => r.text).join(""));
|
|
3310
|
+
}
|
|
3311
|
+
}
|
|
3312
|
+
else {
|
|
3313
|
+
cells.push("");
|
|
3314
|
+
}
|
|
3315
|
+
}
|
|
3316
|
+
if (cells.length > 0) {
|
|
3317
|
+
rows.push(cells);
|
|
3318
|
+
}
|
|
3319
|
+
}
|
|
3320
|
+
if (rows.length > 0) {
|
|
3321
|
+
// Extract cell padding from CSS (th, td selectors)
|
|
3322
|
+
let cellPadding;
|
|
3323
|
+
let headerBackgroundColor;
|
|
3324
|
+
let headerTextColor;
|
|
3325
|
+
let evenRowBackgroundColor;
|
|
3326
|
+
if (cssContext) {
|
|
3327
|
+
// Try td first, then th (they usually have the same padding)
|
|
3328
|
+
const tdStyle = cssContext.elementStyles.get("td");
|
|
3329
|
+
const thStyle = cssContext.elementStyles.get("th");
|
|
3330
|
+
const paddingStr = tdStyle?.padding || thStyle?.padding;
|
|
3331
|
+
if (paddingStr) {
|
|
3332
|
+
cellPadding = parseCssPaddingToTwips(paddingStr);
|
|
3333
|
+
}
|
|
3334
|
+
// GENERALIZED: Look for nested header styles from ancestor containers
|
|
3335
|
+
// Walk up the DOM tree to find containers with nested th styles
|
|
3336
|
+
let ancestor = element.parentElement;
|
|
3337
|
+
while (ancestor && !headerBackgroundColor) {
|
|
3338
|
+
const ancestorClassAttr = ancestor.getAttribute("class");
|
|
3339
|
+
const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
|
|
3340
|
+
for (const ancestorClass of ancestorClasses) {
|
|
3341
|
+
const nestedMap = cssContext.nestedStyles.get(ancestorClass);
|
|
3342
|
+
if (nestedMap) {
|
|
3343
|
+
// Check for th nested styles (e.g., .comparison-table-wrapper th { ... })
|
|
3344
|
+
const thNestedStyle = nestedMap.get("th");
|
|
3345
|
+
if (thNestedStyle) {
|
|
3346
|
+
if (thNestedStyle.backgroundColor) {
|
|
3347
|
+
const hex = extractHexColor(thNestedStyle.backgroundColor);
|
|
3348
|
+
if (hex)
|
|
3349
|
+
headerBackgroundColor = hex;
|
|
3350
|
+
}
|
|
3351
|
+
if (thNestedStyle.color) {
|
|
3352
|
+
const hex = extractHexColor(thNestedStyle.color);
|
|
3353
|
+
if (hex)
|
|
3354
|
+
headerTextColor = hex;
|
|
3355
|
+
}
|
|
3356
|
+
}
|
|
3357
|
+
// Check for tr:nth-child(even) or similar for even row styling
|
|
3358
|
+
const trNestedStyle = nestedMap.get("tr");
|
|
3359
|
+
if (trNestedStyle && trNestedStyle.backgroundColor) {
|
|
3360
|
+
const hex = extractHexColor(trNestedStyle.backgroundColor);
|
|
3361
|
+
if (hex)
|
|
3362
|
+
evenRowBackgroundColor = hex;
|
|
3363
|
+
}
|
|
3364
|
+
}
|
|
3365
|
+
}
|
|
3366
|
+
ancestor = ancestor.parentElement;
|
|
3367
|
+
}
|
|
3368
|
+
// Also check direct element styles for th (standalone th { ... } rules)
|
|
3369
|
+
if (!headerBackgroundColor && thStyle?.backgroundColor) {
|
|
3370
|
+
const hex = extractHexColor(thStyle.backgroundColor);
|
|
3371
|
+
if (hex)
|
|
3372
|
+
headerBackgroundColor = hex;
|
|
3373
|
+
}
|
|
3374
|
+
if (!headerTextColor && thStyle?.color) {
|
|
3375
|
+
const hex = extractHexColor(thStyle.color);
|
|
3376
|
+
if (hex)
|
|
3377
|
+
headerTextColor = hex;
|
|
3378
|
+
}
|
|
3379
|
+
}
|
|
3380
|
+
elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor });
|
|
3381
|
+
}
|
|
3382
|
+
return;
|
|
3383
|
+
}
|
|
3384
|
+
if (tagName === "pre" || tagName === "code") {
|
|
3385
|
+
const text = getTextContent(element).trim();
|
|
3386
|
+
if (text) {
|
|
3387
|
+
elements.push({ type: "code", text });
|
|
3388
|
+
}
|
|
3389
|
+
return;
|
|
3390
|
+
}
|
|
3391
|
+
// Check for chart containers (divs with SVG children)
|
|
3392
|
+
if (tagName === "div") {
|
|
3393
|
+
// Check for two-column grid layout (like resume templates with sidebar)
|
|
3394
|
+
const sidebarWidthPercent = isTwoColumnGridLayout(element, cssContext);
|
|
3395
|
+
if (sidebarWidthPercent !== undefined) {
|
|
3396
|
+
const columnChildren = findTwoColumnChildren(element);
|
|
3397
|
+
if (columnChildren) {
|
|
3398
|
+
const [sidebarEl, mainEl] = columnChildren;
|
|
3399
|
+
// Extract sidebar styling
|
|
3400
|
+
const sidebarStyles = getElementStyles(sidebarEl, cssContext);
|
|
3401
|
+
const sidebarBgColor = sidebarStyles.backgroundColor
|
|
3402
|
+
? extractHexColor(sidebarStyles.backgroundColor)
|
|
3403
|
+
: undefined;
|
|
3404
|
+
const sidebarTextColor = sidebarStyles.color
|
|
3405
|
+
? extractHexColor(sidebarStyles.color)
|
|
3406
|
+
: undefined;
|
|
3407
|
+
// Parse sidebar and main content separately
|
|
3408
|
+
const sidebarContent = parseContainerContent(sidebarEl, cssContext, sidebarTextColor);
|
|
3409
|
+
const mainContent = parseContainerContent(mainEl, cssContext);
|
|
3410
|
+
if (sidebarContent.length > 0 || mainContent.length > 0) {
|
|
3411
|
+
// TODO: Two-column layout is detected but causes DOCX rendering issues.
|
|
3412
|
+
// For now, just process the children normally instead of creating a two-column element.
|
|
3413
|
+
// This ensures content is extracted even if layout is not preserved.
|
|
3414
|
+
// elements.push({ type: "two-column-layout", ... });
|
|
3415
|
+
}
|
|
3416
|
+
}
|
|
3417
|
+
}
|
|
3418
|
+
// Fall through to normal processing for now since two-column DOCX has issues
|
|
3419
|
+
// GENERALIZED: Check for flex containers with equal-width columns (like signature blocks)
|
|
3420
|
+
// This handles layouts like: display: flex; with children having flex: 1
|
|
3421
|
+
// Render as a DOCX table with each child as a column
|
|
3422
|
+
const flexColumns = detectFlexEqualColumns(element, cssContext);
|
|
3423
|
+
if (flexColumns && flexColumns.length >= 2) {
|
|
3424
|
+
// Check if children have enough content to warrant a table layout
|
|
3425
|
+
// (Skip if children are simple/empty - those are handled by stats-grid or normal processing)
|
|
3426
|
+
const hasComplexChildren = flexColumns.some(col => {
|
|
3427
|
+
const childCount = col.children.length;
|
|
3428
|
+
return childCount > 4; // Complex structure like signature blocks
|
|
3429
|
+
});
|
|
3430
|
+
if (hasComplexChildren) {
|
|
3431
|
+
// Build rows: align content from each column side by side
|
|
3432
|
+
// First, extract content lines from each column (with styling preserved as InlineRuns)
|
|
3433
|
+
const columnLines = [];
|
|
3434
|
+
let maxLines = 0;
|
|
3435
|
+
for (const col of flexColumns) {
|
|
3436
|
+
const lines = [];
|
|
3437
|
+
for (const child of col.children) {
|
|
3438
|
+
// Handle signature lines (border-bottom divs) as horizontal rules in text
|
|
3439
|
+
const childStyles = getElementStyles(child, cssContext);
|
|
3440
|
+
const inlineStyle = child.getAttribute("style") || "";
|
|
3441
|
+
const hasBorderBottom = inlineStyle.includes("border-bottom") ||
|
|
3442
|
+
childStyles.borderBottom ||
|
|
3443
|
+
childStyles.border?.includes("solid");
|
|
3444
|
+
if (hasBorderBottom && !getTextContent(child).trim()) {
|
|
3445
|
+
// Empty div with border = signature line
|
|
3446
|
+
lines.push("________________________");
|
|
3447
|
+
}
|
|
3448
|
+
else {
|
|
3449
|
+
const text = getTextContent(child).trim();
|
|
3450
|
+
if (text) {
|
|
3451
|
+
// Extract styling for this element
|
|
3452
|
+
const isBold = childStyles.fontWeight === "700" ||
|
|
3453
|
+
childStyles.fontWeight === "bold" ||
|
|
3454
|
+
child.querySelector("strong, b") !== null;
|
|
3455
|
+
const isItalic = childStyles.fontStyle === "italic" ||
|
|
3456
|
+
child.querySelector("em, i") !== null;
|
|
3457
|
+
// Extract color - support both CSS class colors and inline styles
|
|
3458
|
+
let textColor = childStyles.color;
|
|
3459
|
+
if (!textColor) {
|
|
3460
|
+
const styleAttr = child.getAttribute("style") || "";
|
|
3461
|
+
const colorMatch = styleAttr.match(/color:\s*([^;]+)/);
|
|
3462
|
+
if (colorMatch) {
|
|
3463
|
+
textColor = colorMatch[1].trim();
|
|
3464
|
+
}
|
|
3465
|
+
}
|
|
3466
|
+
// Convert color to hex without #
|
|
3467
|
+
let hexColor;
|
|
3468
|
+
if (textColor) {
|
|
3469
|
+
if (textColor.startsWith("#")) {
|
|
3470
|
+
hexColor = textColor.slice(1);
|
|
3471
|
+
}
|
|
3472
|
+
else if (textColor.startsWith("rgb")) {
|
|
3473
|
+
// Parse rgb(r, g, b) to hex
|
|
3474
|
+
const rgbMatch = textColor.match(/rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)/);
|
|
3475
|
+
if (rgbMatch) {
|
|
3476
|
+
const r = parseInt(rgbMatch[1], 10);
|
|
3477
|
+
const g = parseInt(rgbMatch[2], 10);
|
|
3478
|
+
const b = parseInt(rgbMatch[3], 10);
|
|
3479
|
+
hexColor = ((1 << 24) + (r << 16) + (g << 8) + b).toString(16).slice(1).toUpperCase();
|
|
3480
|
+
}
|
|
3481
|
+
}
|
|
3482
|
+
else {
|
|
3483
|
+
hexColor = textColor;
|
|
3484
|
+
}
|
|
3485
|
+
}
|
|
3486
|
+
// Extract font size and convert to half-points
|
|
3487
|
+
// CSS font-size values: "0.875rem", "14px", "12pt"
|
|
3488
|
+
// DOCX size is in half-points (1pt = 2 half-points)
|
|
3489
|
+
let fontSize;
|
|
3490
|
+
if (childStyles.fontSize) {
|
|
3491
|
+
const fs = childStyles.fontSize;
|
|
3492
|
+
if (fs.endsWith("rem")) {
|
|
3493
|
+
// rem relative to 16px base, convert to points then half-points
|
|
3494
|
+
const remValue = parseFloat(fs);
|
|
3495
|
+
if (!isNaN(remValue)) {
|
|
3496
|
+
// 1rem = 16px = 12pt, so 0.875rem = 10.5pt = 21 half-points
|
|
3497
|
+
fontSize = Math.round(remValue * 12 * 2);
|
|
3498
|
+
}
|
|
3499
|
+
}
|
|
3500
|
+
else if (fs.endsWith("px")) {
|
|
3501
|
+
// px to points: 1px = 0.75pt
|
|
3502
|
+
const pxValue = parseFloat(fs);
|
|
3503
|
+
if (!isNaN(pxValue)) {
|
|
3504
|
+
fontSize = Math.round(pxValue * 0.75 * 2);
|
|
3505
|
+
}
|
|
3506
|
+
}
|
|
3507
|
+
else if (fs.endsWith("pt")) {
|
|
3508
|
+
const ptValue = parseFloat(fs);
|
|
3509
|
+
if (!isNaN(ptValue)) {
|
|
3510
|
+
fontSize = Math.round(ptValue * 2);
|
|
3511
|
+
}
|
|
3512
|
+
}
|
|
3513
|
+
}
|
|
3514
|
+
// If there's any styling, use InlineRun; otherwise plain string
|
|
3515
|
+
if (isBold || isItalic || hexColor || fontSize) {
|
|
3516
|
+
lines.push([{ text, bold: isBold, italic: isItalic, color: hexColor, size: fontSize }]);
|
|
3517
|
+
}
|
|
3518
|
+
else {
|
|
3519
|
+
lines.push(text);
|
|
3520
|
+
}
|
|
3521
|
+
}
|
|
3522
|
+
}
|
|
3523
|
+
}
|
|
3524
|
+
columnLines.push(lines);
|
|
3525
|
+
maxLines = Math.max(maxLines, lines.length);
|
|
3526
|
+
}
|
|
3527
|
+
// Create rows: each row has one cell per column
|
|
3528
|
+
const rows = [];
|
|
3529
|
+
for (let i = 0; i < maxLines; i++) {
|
|
3530
|
+
const rowCells = [];
|
|
3531
|
+
for (const lines of columnLines) {
|
|
3532
|
+
rowCells.push(lines[i] || ""); // Empty string if this column has fewer lines
|
|
3533
|
+
}
|
|
3534
|
+
rows.push(rowCells);
|
|
3535
|
+
}
|
|
3536
|
+
if (rows.length > 0) {
|
|
3537
|
+
elements.push({ type: "table", rows, hasHeader: false, noBorders: true });
|
|
3538
|
+
return;
|
|
3539
|
+
}
|
|
3540
|
+
}
|
|
3541
|
+
}
|
|
3542
|
+
// GENERALIZED: Check for grid/flex containers with styled card children
|
|
3543
|
+
// This detects any div that:
|
|
3544
|
+
// 1. Uses grid or flex layout
|
|
3545
|
+
// 2. Contains multiple similar child divs
|
|
3546
|
+
// 3. Each child has a "value" (larger/bold text) and "label" (smaller/muted text) pattern
|
|
3547
|
+
if (isGridOrFlexContainer(element, cssContext)) {
|
|
3548
|
+
const childDivs = Array.from(element.children).filter((child) => child.tagName.toLowerCase() === "div");
|
|
3549
|
+
// Check if this looks like a stats/card grid (2+ similar structured children)
|
|
3550
|
+
if (childDivs.length >= 2) {
|
|
3551
|
+
const cards = [];
|
|
3552
|
+
for (const card of childDivs) {
|
|
3553
|
+
// Look for value/label/change pattern in child structure
|
|
3554
|
+
// Value: element with larger font, bold, or accent color
|
|
3555
|
+
// Label: element with smaller font or muted color
|
|
3556
|
+
// Change: element with very small font and positive/negative color (comparison text like "↑ 8% vs Q4 2024")
|
|
3557
|
+
const cardChildren = Array.from(card.children);
|
|
3558
|
+
// GENERALIZED VALIDATION: Stats cards should have simple structure (2-4 elements)
|
|
3559
|
+
// Skip complex structures like signature blocks that have many children
|
|
3560
|
+
if (cardChildren.length > 6) {
|
|
3561
|
+
continue;
|
|
3562
|
+
}
|
|
3563
|
+
let valueEl = null;
|
|
3564
|
+
let labelEl = null;
|
|
3565
|
+
let changeEl = null;
|
|
3566
|
+
for (const child of cardChildren) {
|
|
3567
|
+
const childStyles = getElementStyles(child, cssContext);
|
|
3568
|
+
const fontSize = childStyles.fontSize || "";
|
|
3569
|
+
const fontWeight = childStyles.fontWeight || "";
|
|
3570
|
+
const color = childStyles.color || "";
|
|
3571
|
+
// Detect value element: larger font, bold, or accent color
|
|
3572
|
+
const isLargerFont = fontSize.includes("1.75rem") || fontSize.includes("1.5rem") ||
|
|
3573
|
+
fontSize.includes("2rem") || parseFloat(fontSize) > 1;
|
|
3574
|
+
const isBold = fontWeight === "700" || fontWeight === "bold";
|
|
3575
|
+
const isAccentColor = color.includes("accent") || color.includes("#2563eb") ||
|
|
3576
|
+
color.includes("--color-accent");
|
|
3577
|
+
// Detect label element: smaller font or muted color
|
|
3578
|
+
const isSmallerFont = fontSize.includes("0.875rem") || fontSize.includes("0.75rem");
|
|
3579
|
+
const isMutedColor = color.includes("muted") || color.includes("#6b7280") ||
|
|
3580
|
+
color.includes("--color-muted");
|
|
3581
|
+
// Detect change element: very small font (0.75rem) with semantic colors (green/red for positive/negative)
|
|
3582
|
+
const isVerySmallFont = fontSize.includes("0.75rem") || fontSize.includes("0.7rem");
|
|
3583
|
+
const isPositiveColor = color.includes("#10b981") || color.includes("10b981") || color.includes("green");
|
|
3584
|
+
const isNegativeColor = color.includes("#ef4444") || color.includes("ef4444") || color.includes("red");
|
|
3585
|
+
const isChangeColor = isPositiveColor || isNegativeColor;
|
|
3586
|
+
if ((isLargerFont || isBold || isAccentColor) && !valueEl) {
|
|
3587
|
+
valueEl = child;
|
|
3588
|
+
}
|
|
3589
|
+
else if ((isSmallerFont || isMutedColor) && !labelEl) {
|
|
3590
|
+
labelEl = child;
|
|
3591
|
+
}
|
|
3592
|
+
else if ((isVerySmallFont || isChangeColor) && !changeEl) {
|
|
3593
|
+
// Change element: usually the third child with very small font and trend indicator
|
|
3594
|
+
changeEl = child;
|
|
3595
|
+
}
|
|
3596
|
+
}
|
|
3597
|
+
// If we couldn't detect by styles, fall back to position (first = value, second = label, third = change)
|
|
3598
|
+
if (!valueEl && cardChildren.length >= 1) {
|
|
3599
|
+
valueEl = cardChildren[0];
|
|
3600
|
+
}
|
|
3601
|
+
if (!labelEl && cardChildren.length >= 2) {
|
|
3602
|
+
labelEl = cardChildren[1];
|
|
3603
|
+
}
|
|
3604
|
+
if (!changeEl && cardChildren.length >= 3) {
|
|
3605
|
+
changeEl = cardChildren[2];
|
|
3606
|
+
}
|
|
3607
|
+
if (valueEl && labelEl) {
|
|
3608
|
+
const value = getTextContent(valueEl).trim();
|
|
3609
|
+
const label = getTextContent(labelEl).trim();
|
|
3610
|
+
const change = changeEl ? getTextContent(changeEl).trim() : undefined;
|
|
3611
|
+
// GENERALIZED VALIDATION: Stats cards should have short, distinct content
|
|
3612
|
+
// Skip if value or label are too long (indicates paragraph content, not stats)
|
|
3613
|
+
// Skip if they contain multiple sentences or newlines
|
|
3614
|
+
const MAX_VALUE_LENGTH = 50; // e.g., "$4,500.00", "127%", "2.5M"
|
|
3615
|
+
const MAX_LABEL_LENGTH = 100; // e.g., "Total Revenue", "Active Users"
|
|
3616
|
+
const isValueLike = value.length > 0 && value.length <= MAX_VALUE_LENGTH &&
|
|
3617
|
+
!value.includes('\n') && !value.includes('.');
|
|
3618
|
+
const isLabelLike = label.length > 0 && label.length <= MAX_LABEL_LENGTH &&
|
|
3619
|
+
!label.includes('\n');
|
|
3620
|
+
// GENERALIZED: Values that end with ':' are labels, not stats values
|
|
3621
|
+
// This prevents press-meta patterns like "Date:" / "February 12, 2026" from matching
|
|
3622
|
+
const isFieldLabel = value.endsWith(':');
|
|
3623
|
+
// Skip this card if it doesn't look like stats content
|
|
3624
|
+
if (!isValueLike || !isLabelLike || isFieldLabel) {
|
|
3625
|
+
continue;
|
|
3626
|
+
}
|
|
3627
|
+
// Extract colors from elements
|
|
3628
|
+
const valueColor = extractTextColor(valueEl, cssContext);
|
|
3629
|
+
const labelColor = extractTextColor(labelEl, cssContext);
|
|
3630
|
+
const changeColor = changeEl ? extractTextColor(changeEl, cssContext) : undefined;
|
|
3631
|
+
// Extract card background and border using generalized style extraction
|
|
3632
|
+
const cardStyles = getElementStyles(card, cssContext);
|
|
3633
|
+
const backgroundColor = cardStyles.backgroundColor
|
|
3634
|
+
? extractHexColor(cardStyles.backgroundColor)
|
|
3635
|
+
: undefined;
|
|
3636
|
+
const borderColor = extractBorderColorFromStyle(cardStyles);
|
|
3637
|
+
cards.push({
|
|
3638
|
+
value,
|
|
3639
|
+
label,
|
|
3640
|
+
change,
|
|
3641
|
+
valueColor,
|
|
3642
|
+
labelColor,
|
|
3643
|
+
changeColor,
|
|
3644
|
+
backgroundColor,
|
|
3645
|
+
borderColor,
|
|
3646
|
+
});
|
|
3647
|
+
}
|
|
3648
|
+
}
|
|
3649
|
+
if (cards.length >= 2) {
|
|
3650
|
+
elements.push({ type: "stats-grid", cards });
|
|
3651
|
+
return;
|
|
3652
|
+
}
|
|
3653
|
+
}
|
|
3654
|
+
}
|
|
3655
|
+
// GENERALIZED: Check for flex containers with inline key-value metadata items
|
|
3656
|
+
// Pattern: flex container with child divs, each containing label:value pairs
|
|
3657
|
+
// Example: .press-meta with "Date: Feb 12" and "Location: San Francisco"
|
|
3658
|
+
// These should render as a single inline paragraph, not separate lines
|
|
3659
|
+
if (isGridOrFlexContainer(element, cssContext)) {
|
|
3660
|
+
const childDivs = Array.from(element.children).filter((child) => child.tagName.toLowerCase() === "div");
|
|
3661
|
+
if (childDivs.length >= 2) {
|
|
3662
|
+
// Check if each child looks like a key-value pair (label ending with ":" + value)
|
|
3663
|
+
const metaItems = [];
|
|
3664
|
+
let isMetadataPattern = true;
|
|
3665
|
+
for (const child of childDivs) {
|
|
3666
|
+
// Get all text-containing children
|
|
3667
|
+
const textElements = Array.from(child.querySelectorAll("span, strong, em, b, i"));
|
|
3668
|
+
if (textElements.length >= 2) {
|
|
3669
|
+
const labelEl = textElements[0];
|
|
3670
|
+
const valueEl = textElements[1];
|
|
3671
|
+
if (labelEl && valueEl) {
|
|
3672
|
+
const label = getTextContent(labelEl).trim();
|
|
3673
|
+
const value = getTextContent(valueEl).trim();
|
|
3674
|
+
// Check if label ends with ":"
|
|
3675
|
+
if (label.endsWith(":") && value.length > 0) {
|
|
3676
|
+
metaItems.push({ label, value });
|
|
3677
|
+
continue;
|
|
3678
|
+
}
|
|
3679
|
+
}
|
|
3680
|
+
}
|
|
3681
|
+
// If any child doesn't match the pattern, break
|
|
3682
|
+
isMetadataPattern = false;
|
|
3683
|
+
break;
|
|
3684
|
+
}
|
|
3685
|
+
// If all children are key-value pairs, render as inline paragraph
|
|
3686
|
+
if (isMetadataPattern && metaItems.length >= 2) {
|
|
3687
|
+
// Build inline runs with spacing between items
|
|
3688
|
+
const runs = [];
|
|
3689
|
+
const elementStyles = getElementStyles(element, cssContext);
|
|
3690
|
+
const elementColor = elementStyles.color ? extractHexColor(elementStyles.color) : undefined;
|
|
3691
|
+
for (let i = 0; i < metaItems.length; i++) {
|
|
3692
|
+
const item = metaItems[i];
|
|
3693
|
+
// Add label (bold)
|
|
3694
|
+
runs.push({ text: item.label, bold: true, color: elementColor });
|
|
3695
|
+
// Add value with space
|
|
3696
|
+
runs.push({ text: ` ${item.value}`, color: elementColor });
|
|
3697
|
+
// Add separator between items (except last)
|
|
3698
|
+
if (i < metaItems.length - 1) {
|
|
3699
|
+
runs.push({ text: " ", color: elementColor }); // Tab-like spacing
|
|
3700
|
+
}
|
|
3701
|
+
}
|
|
3702
|
+
const text = runs.map(r => r.text).join("");
|
|
3703
|
+
elements.push({ type: "paragraph", text, runs });
|
|
3704
|
+
return;
|
|
3705
|
+
}
|
|
3706
|
+
}
|
|
3707
|
+
}
|
|
3708
|
+
// GENERALIZED: Check for div that contains an SVG chart
|
|
3709
|
+
// If div has ONLY an SVG (no text), treat entire div as chart wrapper
|
|
3710
|
+
// If div has BOTH SVG and text content, extract SVG as chart AND process text separately
|
|
3711
|
+
const svgElement = element.querySelector(":scope > svg, :scope > div > svg");
|
|
3712
|
+
// Recursively check if element has meaningful text content (direct or in nested containers)
|
|
3713
|
+
// This determines if the div is a dedicated SVG wrapper or a mixed content container
|
|
3714
|
+
const checkForTextContent = (el) => {
|
|
3715
|
+
for (const node of Array.from(el.childNodes)) {
|
|
3716
|
+
if (node.nodeType === 3) { // Text node
|
|
3717
|
+
if ((node.textContent || "").trim().length > 0)
|
|
3718
|
+
return true;
|
|
3719
|
+
}
|
|
3720
|
+
if (node.nodeType === 1) { // Element node
|
|
3721
|
+
const childEl = node;
|
|
3722
|
+
const tagLower = childEl.tagName?.toLowerCase();
|
|
3723
|
+
// Skip SVG elements - we're looking for text, not the chart itself
|
|
3724
|
+
if (tagLower === "svg")
|
|
3725
|
+
continue;
|
|
3726
|
+
// Check text-containing elements directly
|
|
3727
|
+
if (tagLower === "h1" || tagLower === "h2" || tagLower === "h3" ||
|
|
3728
|
+
tagLower === "h4" || tagLower === "h5" || tagLower === "h6" ||
|
|
3729
|
+
tagLower === "p" || tagLower === "span") {
|
|
3730
|
+
if ((getTextContent(childEl) || "").trim().length > 0)
|
|
3731
|
+
return true;
|
|
3732
|
+
}
|
|
3733
|
+
// Recursively check nested div/section/article containers for text content
|
|
3734
|
+
if (tagLower === "div" || tagLower === "section" || tagLower === "article" ||
|
|
3735
|
+
tagLower === "aside" || tagLower === "main" || tagLower === "nav" ||
|
|
3736
|
+
tagLower === "header" || tagLower === "footer") {
|
|
3737
|
+
if (checkForTextContent(childEl))
|
|
3738
|
+
return true;
|
|
3739
|
+
}
|
|
3740
|
+
}
|
|
3741
|
+
}
|
|
3742
|
+
return false;
|
|
3743
|
+
};
|
|
3744
|
+
const hasTextContent = checkForTextContent(element);
|
|
3745
|
+
// Process SVG if present, not decorative, and not already processed
|
|
3746
|
+
if (svgElement && !processedSvgs.has(svgElement) && !isDecorativeSvg(svgElement, element, cssContext)) {
|
|
3747
|
+
// Extract SVG content and dimensions
|
|
3748
|
+
const svgContent = svgElement.outerHTML;
|
|
3749
|
+
const viewBox = svgElement.getAttribute("viewBox");
|
|
3750
|
+
let width = 700;
|
|
3751
|
+
let height = 320;
|
|
3752
|
+
if (viewBox) {
|
|
3753
|
+
const parts = viewBox.split(/\s+/).map(Number);
|
|
3754
|
+
if (parts.length >= 4) {
|
|
3755
|
+
width = parts[2] || width;
|
|
3756
|
+
height = parts[3] || height;
|
|
3757
|
+
}
|
|
3758
|
+
}
|
|
3759
|
+
else {
|
|
3760
|
+
// Try width/height attributes
|
|
3761
|
+
const widthAttr = svgElement.getAttribute("width");
|
|
3762
|
+
const heightAttr = svgElement.getAttribute("height");
|
|
3763
|
+
if (widthAttr && !widthAttr.includes("%")) {
|
|
3764
|
+
width = parseInt(widthAttr, 10) || width;
|
|
3765
|
+
}
|
|
3766
|
+
if (heightAttr && !heightAttr.includes("%")) {
|
|
3767
|
+
height = parseInt(heightAttr, 10) || height;
|
|
3768
|
+
}
|
|
3769
|
+
}
|
|
3770
|
+
// Look for title - but ONLY for dedicated SVG wrappers (no mixed text content)
|
|
3771
|
+
// For mixed content containers, the heading will be processed separately as DOM content
|
|
3772
|
+
let title;
|
|
3773
|
+
if (!hasTextContent) {
|
|
3774
|
+
const titleEl = element.querySelector("h4, h3, h5, h6");
|
|
3775
|
+
if (titleEl) {
|
|
3776
|
+
title = getTextContent(titleEl).trim();
|
|
3777
|
+
}
|
|
3778
|
+
if (!title) {
|
|
3779
|
+
const prevSibling = element.previousElementSibling;
|
|
3780
|
+
if (prevSibling && /^h[1-6]$/i.test(prevSibling.tagName)) {
|
|
3781
|
+
title = getTextContent(prevSibling).trim();
|
|
3782
|
+
}
|
|
3783
|
+
}
|
|
3784
|
+
}
|
|
3785
|
+
// GENERALIZED: Extract background color from parent container for SVG rendering
|
|
3786
|
+
// This handles cases like .hero-image { background: linear-gradient(...) }
|
|
3787
|
+
// where the SVG needs to be rendered with the container's background color
|
|
3788
|
+
const containerStyles = getElementStyles(element, cssContext);
|
|
3789
|
+
let backgroundColor;
|
|
3790
|
+
if (containerStyles.backgroundColor) {
|
|
3791
|
+
backgroundColor = extractHexColor(containerStyles.backgroundColor);
|
|
3792
|
+
}
|
|
3793
|
+
elements.push({ type: "svg-chart", svgContent, width, height, title, backgroundColor });
|
|
3794
|
+
// Mark this SVG as processed to avoid duplicate processing when iterating children
|
|
3795
|
+
processedSvgs.add(svgElement);
|
|
3796
|
+
// If no text content, we're done (dedicated SVG wrapper)
|
|
3797
|
+
if (!hasTextContent) {
|
|
3798
|
+
return;
|
|
3799
|
+
}
|
|
3800
|
+
// Otherwise, continue to process remaining children (mixed content)
|
|
3801
|
+
// Fall through to CONTAINER_TAGS processing - SVG children will be skipped there
|
|
3802
|
+
}
|
|
3803
|
+
}
|
|
3804
|
+
// Check for SVG elements (standalone charts)
|
|
3805
|
+
// Filter out decorative SVGs (backgrounds, patterns, icons)
|
|
3806
|
+
if (tagName === "svg") {
|
|
3807
|
+
// Skip if this SVG was already processed by a parent container's mixed content handler
|
|
3808
|
+
if (processedSvgs.has(element)) {
|
|
3809
|
+
return;
|
|
3810
|
+
}
|
|
3811
|
+
// For standalone SVGs, use the SVG's parent as context for decorative detection
|
|
3812
|
+
// If no parent or parent is body, check SVG itself for decorative patterns
|
|
3813
|
+
const parent = element.parentElement || element;
|
|
3814
|
+
if (isDecorativeSvg(element, parent, cssContext)) {
|
|
3815
|
+
// Skip decorative SVGs - don't convert to chart
|
|
3816
|
+
return;
|
|
3817
|
+
}
|
|
3818
|
+
// Extract SVG content and dimensions
|
|
3819
|
+
const svgContent = element.outerHTML;
|
|
3820
|
+
const viewBox = element.getAttribute("viewBox");
|
|
3821
|
+
let width = 700;
|
|
3822
|
+
let height = 320;
|
|
3823
|
+
if (viewBox) {
|
|
3824
|
+
const parts = viewBox.split(/\s+/).map(Number);
|
|
3825
|
+
if (parts.length >= 4) {
|
|
3826
|
+
width = parts[2] || width;
|
|
3827
|
+
height = parts[3] || height;
|
|
3828
|
+
}
|
|
3829
|
+
}
|
|
3830
|
+
else {
|
|
3831
|
+
const widthAttr = element.getAttribute("width");
|
|
3832
|
+
const heightAttr = element.getAttribute("height");
|
|
3833
|
+
if (widthAttr && !widthAttr.includes("%")) {
|
|
3834
|
+
width = parseInt(widthAttr, 10) || width;
|
|
3835
|
+
}
|
|
3836
|
+
if (heightAttr && !heightAttr.includes("%")) {
|
|
3837
|
+
height = parseInt(heightAttr, 10) || height;
|
|
3838
|
+
}
|
|
3839
|
+
}
|
|
3840
|
+
// Try to find a title
|
|
3841
|
+
const titleEl = element.querySelector("title");
|
|
3842
|
+
const title = titleEl ? getTextContent(titleEl).trim() : undefined;
|
|
3843
|
+
// GENERALIZED: Extract background color from parent container for SVG rendering
|
|
3844
|
+
// This handles cases where an SVG is placed inside a styled container
|
|
3845
|
+
let backgroundColor;
|
|
3846
|
+
if (parent && parent !== element) {
|
|
3847
|
+
const parentStyles = getElementStyles(parent, cssContext);
|
|
3848
|
+
if (parentStyles.backgroundColor) {
|
|
3849
|
+
backgroundColor = extractHexColor(parentStyles.backgroundColor);
|
|
3850
|
+
}
|
|
3851
|
+
}
|
|
3852
|
+
elements.push({ type: "svg-chart", svgContent, width, height, title, backgroundColor });
|
|
3853
|
+
return;
|
|
3854
|
+
}
|
|
3855
|
+
// Handle <img> elements - external images that need to be fetched
|
|
3856
|
+
if (tagName === "img") {
|
|
3857
|
+
const src = element.getAttribute("src");
|
|
3858
|
+
if (src) {
|
|
3859
|
+
const alt = element.getAttribute("alt") || undefined;
|
|
3860
|
+
// Extract width and height from attributes only
|
|
3861
|
+
// Computed styles are not available in linkedom (Node.js)
|
|
3862
|
+
// Actual dimensions will be obtained when the image is fetched
|
|
3863
|
+
let width;
|
|
3864
|
+
let height;
|
|
3865
|
+
const widthAttr = element.getAttribute("width");
|
|
3866
|
+
const heightAttr = element.getAttribute("height");
|
|
3867
|
+
if (widthAttr && !widthAttr.includes("%")) {
|
|
3868
|
+
width = parseInt(widthAttr, 10) || undefined;
|
|
3869
|
+
}
|
|
3870
|
+
if (heightAttr && !heightAttr.includes("%")) {
|
|
3871
|
+
height = parseInt(heightAttr, 10) || undefined;
|
|
3872
|
+
}
|
|
3873
|
+
// Check if img is inside a figure with figcaption
|
|
3874
|
+
let caption;
|
|
3875
|
+
const parentFigure = element.closest("figure");
|
|
3876
|
+
if (parentFigure) {
|
|
3877
|
+
const figcaption = parentFigure.querySelector("figcaption");
|
|
3878
|
+
if (figcaption) {
|
|
3879
|
+
caption = getTextContent(figcaption).trim() || undefined;
|
|
3880
|
+
}
|
|
3881
|
+
}
|
|
3882
|
+
elements.push({ type: "image", src, alt, width, height, caption });
|
|
3883
|
+
}
|
|
3884
|
+
return;
|
|
3885
|
+
}
|
|
3886
|
+
// Handle <picture> elements - get the best source or fallback img
|
|
3887
|
+
if (tagName === "picture") {
|
|
3888
|
+
// Find the fallback img inside picture
|
|
3889
|
+
const imgEl = element.querySelector("img");
|
|
3890
|
+
if (imgEl) {
|
|
3891
|
+
const src = imgEl.getAttribute("src");
|
|
3892
|
+
if (src) {
|
|
3893
|
+
const alt = imgEl.getAttribute("alt") || undefined;
|
|
3894
|
+
// Extract width and height
|
|
3895
|
+
let width;
|
|
3896
|
+
let height;
|
|
3897
|
+
const widthAttr = imgEl.getAttribute("width");
|
|
3898
|
+
const heightAttr = imgEl.getAttribute("height");
|
|
3899
|
+
if (widthAttr && !widthAttr.includes("%")) {
|
|
3900
|
+
width = parseInt(widthAttr, 10) || undefined;
|
|
3901
|
+
}
|
|
3902
|
+
if (heightAttr && !heightAttr.includes("%")) {
|
|
3903
|
+
height = parseInt(heightAttr, 10) || undefined;
|
|
3904
|
+
}
|
|
3905
|
+
// Check for figcaption
|
|
3906
|
+
let caption;
|
|
3907
|
+
const parentFigure = element.closest("figure");
|
|
3908
|
+
if (parentFigure) {
|
|
3909
|
+
const figcaption = parentFigure.querySelector("figcaption");
|
|
3910
|
+
if (figcaption) {
|
|
3911
|
+
caption = getTextContent(figcaption).trim() || undefined;
|
|
3912
|
+
}
|
|
3913
|
+
}
|
|
3914
|
+
elements.push({ type: "image", src, alt, width, height, caption });
|
|
3915
|
+
}
|
|
3916
|
+
}
|
|
3917
|
+
return;
|
|
3918
|
+
}
|
|
3919
|
+
// Handle <figure> elements containing images - extract img and caption together
|
|
3920
|
+
if (tagName === "figure") {
|
|
3921
|
+
const imgEl = element.querySelector("img") || element.querySelector("picture img");
|
|
3922
|
+
if (imgEl) {
|
|
3923
|
+
const src = imgEl.getAttribute("src");
|
|
3924
|
+
if (src) {
|
|
3925
|
+
const alt = imgEl.getAttribute("alt") || undefined;
|
|
3926
|
+
// Extract width and height from attributes only
|
|
3927
|
+
// Computed styles are not available in linkedom (Node.js)
|
|
3928
|
+
// Actual dimensions will be obtained when the image is fetched
|
|
3929
|
+
let width;
|
|
3930
|
+
let height;
|
|
3931
|
+
const widthAttr = imgEl.getAttribute("width");
|
|
3932
|
+
const heightAttr = imgEl.getAttribute("height");
|
|
3933
|
+
if (widthAttr && !widthAttr.includes("%")) {
|
|
3934
|
+
width = parseInt(widthAttr, 10) || undefined;
|
|
3935
|
+
}
|
|
3936
|
+
if (heightAttr && !heightAttr.includes("%")) {
|
|
3937
|
+
height = parseInt(heightAttr, 10) || undefined;
|
|
3938
|
+
}
|
|
3939
|
+
// Extract caption from figcaption
|
|
3940
|
+
let caption;
|
|
3941
|
+
const figcaption = element.querySelector("figcaption");
|
|
3942
|
+
if (figcaption) {
|
|
3943
|
+
caption = getTextContent(figcaption).trim() || undefined;
|
|
3944
|
+
}
|
|
3945
|
+
elements.push({ type: "image", src, alt, width, height, caption });
|
|
3946
|
+
return;
|
|
3947
|
+
}
|
|
3948
|
+
}
|
|
3949
|
+
// If no img found, fall through to container handling
|
|
3950
|
+
}
|
|
3951
|
+
// Check for blockquote/callout before generic container handling
|
|
3952
|
+
// Uses style-based detection, NOT class names
|
|
3953
|
+
if (isBlockquoteOrCallout(element, cssContext)) {
|
|
3954
|
+
const content = parseBlockquoteContent(element, cssContext);
|
|
3955
|
+
if (content.length > 0) {
|
|
3956
|
+
// Extract styling from CSS classes and inline styles (generalized approach)
|
|
3957
|
+
const elementStyles = getElementStyles(element, cssContext);
|
|
3958
|
+
let borderColor;
|
|
3959
|
+
let backgroundColor;
|
|
3960
|
+
// Extract background color from element styles
|
|
3961
|
+
if (elementStyles.backgroundColor) {
|
|
3962
|
+
const hex = extractHexColor(elementStyles.backgroundColor);
|
|
3963
|
+
if (hex)
|
|
3964
|
+
backgroundColor = hex;
|
|
3965
|
+
}
|
|
3966
|
+
// Extract border color from element styles
|
|
3967
|
+
if (elementStyles.borderColor) {
|
|
3968
|
+
const hex = extractHexColor(elementStyles.borderColor);
|
|
3969
|
+
if (hex)
|
|
3970
|
+
borderColor = hex;
|
|
3971
|
+
}
|
|
3972
|
+
else if (elementStyles.border) {
|
|
3973
|
+
// Try to extract color from border shorthand (e.g., "4px solid #2563eb")
|
|
3974
|
+
const hex = extractBorderColorFromStyle(elementStyles);
|
|
3975
|
+
if (hex)
|
|
3976
|
+
borderColor = hex;
|
|
3977
|
+
}
|
|
3978
|
+
// Also check inline styles (overrides CSS)
|
|
3979
|
+
const inlineStyle = element.getAttribute("style") || "";
|
|
3980
|
+
const bgMatch = inlineStyle.match(/background(?:-color)?:\s*([^;]+)/i);
|
|
3981
|
+
if (bgMatch) {
|
|
3982
|
+
const extracted = extractHexColor(bgMatch[1]);
|
|
3983
|
+
if (extracted)
|
|
3984
|
+
backgroundColor = extracted;
|
|
3985
|
+
}
|
|
3986
|
+
const borderMatch = inlineStyle.match(/border(?:-left)?(?:-color)?:\s*([^;]+)/i);
|
|
3987
|
+
if (borderMatch) {
|
|
3988
|
+
// Parse border value which could be shorthand or just color
|
|
3989
|
+
const borderValue = borderMatch[1];
|
|
3990
|
+
const colorInBorder = borderValue.match(/#([0-9a-fA-F]{3,6})/);
|
|
3991
|
+
if (colorInBorder) {
|
|
3992
|
+
let hex = colorInBorder[1];
|
|
3993
|
+
if (hex.length === 3) {
|
|
3994
|
+
hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
|
|
3995
|
+
}
|
|
3996
|
+
borderColor = hex.toUpperCase();
|
|
3997
|
+
}
|
|
3998
|
+
}
|
|
3999
|
+
// Determine variant based on styling characteristics (NOT class names)
|
|
4000
|
+
// - "executive-summary": typically uses accent blue border with light background
|
|
4001
|
+
// - "callout": typically has themed background (yellow, blue, red, green)
|
|
4002
|
+
let variant;
|
|
4003
|
+
// If it's a semantic blockquote, treat as generic
|
|
4004
|
+
if (element.tagName.toLowerCase() === "blockquote") {
|
|
4005
|
+
variant = undefined;
|
|
4006
|
+
}
|
|
4007
|
+
else {
|
|
4008
|
+
// For styled divs, default to callout variant
|
|
4009
|
+
variant = "callout";
|
|
4010
|
+
}
|
|
4011
|
+
// GENERALIZED: Determine border style - "full" (all sides), "left" (left accent), or "none" (no border)
|
|
4012
|
+
// If CSS has `border:` shorthand (all sides), use full border
|
|
4013
|
+
// If CSS has only `border-left:`, use left accent border (callout style)
|
|
4014
|
+
// If NO border is specified but has background, use no border (hero sections, title blocks)
|
|
4015
|
+
let borderStyle;
|
|
4016
|
+
// Check if element has any border styling
|
|
4017
|
+
const hasBorderStyling = !!borderColor ||
|
|
4018
|
+
!!elementStyles.border ||
|
|
4019
|
+
!!elementStyles.borderLeft ||
|
|
4020
|
+
!!elementStyles.borderColor;
|
|
4021
|
+
if (!hasBorderStyling && backgroundColor) {
|
|
4022
|
+
// Has background but no border - use "none" style (title blocks, hero sections)
|
|
4023
|
+
borderStyle = "none";
|
|
4024
|
+
}
|
|
4025
|
+
else if (elementStyles.border && !elementStyles.border.includes("none")) {
|
|
4026
|
+
// Has full border shorthand (e.g., "1px solid #e5e7eb")
|
|
4027
|
+
// Check it's not just inherited from border-left
|
|
4028
|
+
if (!elementStyles.borderLeft || elementStyles.border !== elementStyles.borderLeft) {
|
|
4029
|
+
borderStyle = "full";
|
|
4030
|
+
}
|
|
4031
|
+
}
|
|
4032
|
+
// Otherwise borderStyle remains undefined, which defaults to "left" in convert.ts
|
|
4033
|
+
// GENERALIZED: Extract background gradient for container gradient rendering
|
|
4034
|
+
const backgroundGradient = elementStyles.backgroundGradient;
|
|
4035
|
+
elements.push({
|
|
4036
|
+
type: "blockquote",
|
|
4037
|
+
content,
|
|
4038
|
+
borderColor,
|
|
4039
|
+
backgroundColor,
|
|
4040
|
+
backgroundGradient,
|
|
4041
|
+
variant,
|
|
4042
|
+
borderStyle,
|
|
4043
|
+
});
|
|
4044
|
+
}
|
|
4045
|
+
return;
|
|
4046
|
+
}
|
|
4047
|
+
// Handle span elements specially - they're often inline styled elements
|
|
4048
|
+
// When a span is a direct child being processed, it should be part of a paragraph
|
|
4049
|
+
// but the parent div handling below should catch most cases
|
|
4050
|
+
if (tagName === "span") {
|
|
4051
|
+
const text = getTextContent(element).trim();
|
|
4052
|
+
if (text) {
|
|
4053
|
+
// Extract styling from the span
|
|
4054
|
+
const styles = getElementStyles(element, cssContext);
|
|
4055
|
+
const backgroundColor = styles.backgroundColor ? extractHexColor(styles.backgroundColor) : undefined;
|
|
4056
|
+
const spanColor = styles.color ? extractHexColor(styles.color) : undefined;
|
|
4057
|
+
if (backgroundColor || spanColor) {
|
|
4058
|
+
// Has styling - use runs to preserve it
|
|
4059
|
+
const runs = [{
|
|
4060
|
+
text,
|
|
4061
|
+
bold: false,
|
|
4062
|
+
italic: false,
|
|
4063
|
+
color: spanColor,
|
|
4064
|
+
backgroundColor,
|
|
4065
|
+
}];
|
|
4066
|
+
elements.push({ type: "paragraph", text, runs, alignment });
|
|
4067
|
+
}
|
|
4068
|
+
else {
|
|
4069
|
+
elements.push({ type: "paragraph", text, alignment, color: elementColor });
|
|
4070
|
+
}
|
|
4071
|
+
}
|
|
4072
|
+
return;
|
|
4073
|
+
}
|
|
4074
|
+
// Detect skill item pattern in main processNode too (for resumes without two-column DOCX support)
|
|
4075
|
+
if (tagName === "div") {
|
|
4076
|
+
const skillItem = detectSkillItem(element);
|
|
4077
|
+
if (skillItem) {
|
|
4078
|
+
// Output as single line: "Skill Name: 95%"
|
|
4079
|
+
const text = `${skillItem.name}: ${skillItem.percentage}`;
|
|
4080
|
+
elements.push({ type: "paragraph", text, color: elementColor });
|
|
4081
|
+
return;
|
|
4082
|
+
}
|
|
4083
|
+
// Detect language item pattern: language name + proficiency dots
|
|
4084
|
+
const languageItem = detectLanguageItem(element, cssContext);
|
|
4085
|
+
if (languageItem) {
|
|
4086
|
+
// Create visual representation with filled and empty dots
|
|
4087
|
+
const filledDots = "●".repeat(languageItem.filledCount);
|
|
4088
|
+
const emptyDots = "○".repeat(languageItem.totalCount - languageItem.filledCount);
|
|
4089
|
+
const text = `${languageItem.name}: ${filledDots}${emptyDots}`;
|
|
4090
|
+
elements.push({ type: "paragraph", text, color: elementColor });
|
|
4091
|
+
return;
|
|
4092
|
+
}
|
|
4093
|
+
}
|
|
4094
|
+
if (CONTAINER_TAGS.includes(tagName)) {
|
|
4095
|
+
// Before processing content, check if this element has border-top
|
|
4096
|
+
// (like footer sections which have a horizontal rule before the content)
|
|
4097
|
+
const borderTopColor = extractBorderTopColor(element, cssContext);
|
|
4098
|
+
if (borderTopColor) {
|
|
4099
|
+
elements.push({ type: "horizontal-rule", color: borderTopColor });
|
|
4100
|
+
}
|
|
4101
|
+
// Check if this container has ONLY inline content (spans, text nodes, inline formatting)
|
|
4102
|
+
// If so, treat the entire container as a single paragraph instead of separate elements
|
|
4103
|
+
const hasOnlyInlineContent = isInlineOnlyContainer(element);
|
|
4104
|
+
if (hasOnlyInlineContent) {
|
|
4105
|
+
// Get container's styling (italic, bold, color, font-family, text-transform) from CSS
|
|
4106
|
+
const containerStyles = getElementStyles(element, cssContext);
|
|
4107
|
+
const containerItalic = containerStyles.fontStyle === "italic";
|
|
4108
|
+
const containerBold = containerStyles.fontWeight === "700" || containerStyles.fontWeight === "bold" || containerStyles.fontWeight === "600";
|
|
4109
|
+
const containerColor = containerStyles.color ? extractHexColor(containerStyles.color) : undefined;
|
|
4110
|
+
// GENERALIZED: Extract font-family for inheritance to runs
|
|
4111
|
+
const containerFontFamily = containerStyles.fontFamily;
|
|
4112
|
+
// GENERALIZED: Extract text-transform (uppercase, lowercase, capitalize)
|
|
4113
|
+
let textTransform;
|
|
4114
|
+
if (containerStyles.textTransform === "uppercase" || containerStyles.textTransform === "lowercase" || containerStyles.textTransform === "capitalize") {
|
|
4115
|
+
textTransform = containerStyles.textTransform;
|
|
4116
|
+
}
|
|
4117
|
+
// GENERALIZED: Extract margin-bottom for paragraph spacing
|
|
4118
|
+
const spacingAfter = containerStyles.marginBottom ? parseMarginToTwips(containerStyles.marginBottom) : undefined;
|
|
4119
|
+
// GENERALIZED: Extract line-height for vertical spacing
|
|
4120
|
+
const lineSpacing = containerStyles.lineHeight ? parseLineHeightToDocx(containerStyles.lineHeight) : undefined;
|
|
4121
|
+
// GENERALIZED: Check if this is a horizontal flex container with multiple children
|
|
4122
|
+
// If so, we need to render as a horizontal table to represent the CSS gap
|
|
4123
|
+
const isHorizFlex = isHorizontalFlexContainer(element, cssContext);
|
|
4124
|
+
const directChildren = Array.from(element.children);
|
|
4125
|
+
const hasMultipleFlexChildren = isHorizFlex && directChildren.length > 1;
|
|
4126
|
+
if (hasMultipleFlexChildren) {
|
|
4127
|
+
// GENERALIZED: Convert horizontal flexbox to a single-row borderless table
|
|
4128
|
+
// Each flex item becomes a table cell, preserving the horizontal layout
|
|
4129
|
+
const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
|
|
4130
|
+
const tableCells = [];
|
|
4131
|
+
for (const flexChild of flexChildren) {
|
|
4132
|
+
// Extract runs from this flex child
|
|
4133
|
+
const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
|
|
4134
|
+
if (childRuns.length > 0) {
|
|
4135
|
+
// Apply container's styles to runs that don't have their own
|
|
4136
|
+
childRuns.forEach(run => {
|
|
4137
|
+
if (!run.color && (elementColor || containerColor)) {
|
|
4138
|
+
run.color = elementColor || containerColor;
|
|
4139
|
+
}
|
|
4140
|
+
if (!run.italic && containerItalic) {
|
|
4141
|
+
run.italic = true;
|
|
4142
|
+
}
|
|
4143
|
+
if (!run.bold && containerBold) {
|
|
4144
|
+
run.bold = true;
|
|
4145
|
+
}
|
|
4146
|
+
if (!run.fontFamily && containerFontFamily) {
|
|
4147
|
+
run.fontFamily = containerFontFamily;
|
|
4148
|
+
}
|
|
4149
|
+
});
|
|
4150
|
+
// Use runs as cell content
|
|
4151
|
+
tableCells.push(childRuns);
|
|
4152
|
+
}
|
|
4153
|
+
}
|
|
4154
|
+
if (tableCells.length > 0) {
|
|
4155
|
+
// Create a single-row borderless table to represent horizontal flexbox
|
|
4156
|
+
elements.push({
|
|
4157
|
+
type: "table",
|
|
4158
|
+
rows: [tableCells],
|
|
4159
|
+
hasHeader: false,
|
|
4160
|
+
noBorders: true,
|
|
4161
|
+
});
|
|
4162
|
+
}
|
|
4163
|
+
}
|
|
4164
|
+
else {
|
|
4165
|
+
// Normal inline extraction - not a horizontal flex container
|
|
4166
|
+
const runs = extractInlineRuns(element, cssContext, undefined, containerFontFamily);
|
|
4167
|
+
if (runs.length > 0) {
|
|
4168
|
+
const text = runs.map(r => r.text).join("");
|
|
4169
|
+
// Apply container's styles to runs that don't have their own
|
|
4170
|
+
runs.forEach(run => {
|
|
4171
|
+
if (!run.color && (elementColor || containerColor)) {
|
|
4172
|
+
run.color = elementColor || containerColor;
|
|
4173
|
+
}
|
|
4174
|
+
if (!run.italic && containerItalic) {
|
|
4175
|
+
run.italic = true;
|
|
4176
|
+
}
|
|
4177
|
+
if (!run.bold && containerBold) {
|
|
4178
|
+
run.bold = true;
|
|
4179
|
+
}
|
|
4180
|
+
// GENERALIZED: Apply container's font-family to runs that don't have their own
|
|
4181
|
+
if (!run.fontFamily && containerFontFamily) {
|
|
4182
|
+
run.fontFamily = containerFontFamily;
|
|
4183
|
+
}
|
|
4184
|
+
});
|
|
4185
|
+
// Build paragraph with all extracted styles
|
|
4186
|
+
const hasStyling = hasInlineFormatting(runs) || elementColor || containerColor || spacingAfter !== undefined || lineSpacing !== undefined || textTransform !== undefined;
|
|
4187
|
+
if (hasStyling) {
|
|
4188
|
+
elements.push({
|
|
4189
|
+
type: "paragraph",
|
|
4190
|
+
text,
|
|
4191
|
+
runs: hasInlineFormatting(runs) ? runs : undefined,
|
|
4192
|
+
color: !hasInlineFormatting(runs) ? (elementColor || containerColor) : undefined,
|
|
4193
|
+
alignment,
|
|
4194
|
+
spacingAfter,
|
|
4195
|
+
lineSpacing,
|
|
4196
|
+
textTransform,
|
|
4197
|
+
});
|
|
4198
|
+
}
|
|
4199
|
+
else {
|
|
4200
|
+
elements.push({ type: "paragraph", text, alignment });
|
|
4201
|
+
}
|
|
4202
|
+
}
|
|
4203
|
+
}
|
|
4204
|
+
}
|
|
4205
|
+
else {
|
|
4206
|
+
// Process children individually (block-level content)
|
|
4207
|
+
// Pass elementColor only if this container explicitly sets a color (for special containers)
|
|
4208
|
+
for (const child of element.childNodes) {
|
|
4209
|
+
processNode(child, alignment, elementColor);
|
|
4210
|
+
}
|
|
4211
|
+
}
|
|
4212
|
+
// After processing children, check if this element has border-bottom
|
|
4213
|
+
// (like .title-block which should have a horizontal rule after the heading)
|
|
4214
|
+
const borderBottomColor = extractBorderBottomColor(element, cssContext);
|
|
4215
|
+
if (borderBottomColor) {
|
|
4216
|
+
elements.push({ type: "horizontal-rule", color: borderBottomColor });
|
|
4217
|
+
}
|
|
4218
|
+
return;
|
|
4219
|
+
}
|
|
4220
|
+
const text = getTextContent(element).trim();
|
|
4221
|
+
if (text && element.children.length === 0) {
|
|
4222
|
+
// Check for styling from CSS (background color, italic, etc.)
|
|
4223
|
+
const styles = getElementStyles(element, cssContext);
|
|
4224
|
+
const backgroundColor = styles.backgroundColor ? extractHexColor(styles.backgroundColor) : undefined;
|
|
4225
|
+
const isItalic = styles.fontStyle === "italic";
|
|
4226
|
+
const isBold = styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600";
|
|
4227
|
+
if (backgroundColor || elementColor || isItalic || isBold) {
|
|
4228
|
+
// Has styling - use runs to preserve formatting
|
|
4229
|
+
const runs = [{
|
|
4230
|
+
text,
|
|
4231
|
+
bold: isBold,
|
|
4232
|
+
italic: isItalic,
|
|
4233
|
+
color: elementColor,
|
|
4234
|
+
backgroundColor,
|
|
4235
|
+
}];
|
|
4236
|
+
elements.push({ type: "paragraph", text, runs, alignment });
|
|
4237
|
+
}
|
|
4238
|
+
else {
|
|
4239
|
+
elements.push({ type: "paragraph", text, alignment });
|
|
4240
|
+
}
|
|
4241
|
+
}
|
|
4242
|
+
else {
|
|
4243
|
+
for (const child of element.childNodes) {
|
|
4244
|
+
processNode(child, alignment, elementColor);
|
|
4245
|
+
}
|
|
4246
|
+
}
|
|
4247
|
+
}
|
|
4248
|
+
for (const node of body.childNodes) {
|
|
4249
|
+
processNode(node);
|
|
4250
|
+
}
|
|
4251
|
+
return elements;
|
|
4252
|
+
}
|
|
4253
|
+
//# sourceMappingURL=parse.js.map
|