docgen-utils 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +118 -0
  2. package/dist/bundle.js +36086 -0
  3. package/dist/bundle.min.js +197 -0
  4. package/dist/cli.js +47432 -0
  5. package/dist/index.d.ts +9 -0
  6. package/dist/index.d.ts.map +1 -0
  7. package/dist/index.js +9 -0
  8. package/dist/index.js.map +1 -0
  9. package/dist/packages/cli/commands/export-docs.d.ts +5 -0
  10. package/dist/packages/cli/commands/export-docs.d.ts.map +1 -0
  11. package/dist/packages/cli/commands/export-docs.js +24 -0
  12. package/dist/packages/cli/commands/export-docs.js.map +1 -0
  13. package/dist/packages/cli/commands/export-slides.d.ts +5 -0
  14. package/dist/packages/cli/commands/export-slides.d.ts.map +1 -0
  15. package/dist/packages/cli/commands/export-slides.js +86 -0
  16. package/dist/packages/cli/commands/export-slides.js.map +1 -0
  17. package/dist/packages/cli/commands/import-docx.d.ts +5 -0
  18. package/dist/packages/cli/commands/import-docx.d.ts.map +1 -0
  19. package/dist/packages/cli/commands/import-docx.js +27 -0
  20. package/dist/packages/cli/commands/import-docx.js.map +1 -0
  21. package/dist/packages/cli/commands/import-pptx.d.ts +5 -0
  22. package/dist/packages/cli/commands/import-pptx.d.ts.map +1 -0
  23. package/dist/packages/cli/commands/import-pptx.js +44 -0
  24. package/dist/packages/cli/commands/import-pptx.js.map +1 -0
  25. package/dist/packages/cli/index.d.ts +11 -0
  26. package/dist/packages/cli/index.d.ts.map +1 -0
  27. package/dist/packages/cli/index.js +103 -0
  28. package/dist/packages/cli/index.js.map +1 -0
  29. package/dist/packages/docs/common.d.ts +183 -0
  30. package/dist/packages/docs/common.d.ts.map +1 -0
  31. package/dist/packages/docs/common.js +27 -0
  32. package/dist/packages/docs/common.js.map +1 -0
  33. package/dist/packages/docs/convert.d.ts +7 -0
  34. package/dist/packages/docs/convert.d.ts.map +1 -0
  35. package/dist/packages/docs/convert.js +1399 -0
  36. package/dist/packages/docs/convert.js.map +1 -0
  37. package/dist/packages/docs/create-document.d.ts +30 -0
  38. package/dist/packages/docs/create-document.d.ts.map +1 -0
  39. package/dist/packages/docs/create-document.js +170 -0
  40. package/dist/packages/docs/create-document.js.map +1 -0
  41. package/dist/packages/docs/export.d.ts +57 -0
  42. package/dist/packages/docs/export.d.ts.map +1 -0
  43. package/dist/packages/docs/export.js +430 -0
  44. package/dist/packages/docs/export.js.map +1 -0
  45. package/dist/packages/docs/import-docx.d.ts +13 -0
  46. package/dist/packages/docs/import-docx.d.ts.map +1 -0
  47. package/dist/packages/docs/import-docx.js +2299 -0
  48. package/dist/packages/docs/import-docx.js.map +1 -0
  49. package/dist/packages/docs/parse.d.ts +6 -0
  50. package/dist/packages/docs/parse.d.ts.map +1 -0
  51. package/dist/packages/docs/parse.js +4253 -0
  52. package/dist/packages/docs/parse.js.map +1 -0
  53. package/dist/packages/shared/dom-parser-shim.d.ts +30 -0
  54. package/dist/packages/shared/dom-parser-shim.d.ts.map +1 -0
  55. package/dist/packages/shared/dom-parser-shim.js +152 -0
  56. package/dist/packages/shared/dom-parser-shim.js.map +1 -0
  57. package/dist/packages/slides/common.d.ts +325 -0
  58. package/dist/packages/slides/common.d.ts.map +1 -0
  59. package/dist/packages/slides/common.js +12 -0
  60. package/dist/packages/slides/common.js.map +1 -0
  61. package/dist/packages/slides/convert.d.ts +35 -0
  62. package/dist/packages/slides/convert.d.ts.map +1 -0
  63. package/dist/packages/slides/convert.js +308 -0
  64. package/dist/packages/slides/convert.js.map +1 -0
  65. package/dist/packages/slides/createPresentation.d.ts +51 -0
  66. package/dist/packages/slides/createPresentation.d.ts.map +1 -0
  67. package/dist/packages/slides/createPresentation.js +265 -0
  68. package/dist/packages/slides/createPresentation.js.map +1 -0
  69. package/dist/packages/slides/export.d.ts +24 -0
  70. package/dist/packages/slides/export.d.ts.map +1 -0
  71. package/dist/packages/slides/export.js +52 -0
  72. package/dist/packages/slides/export.js.map +1 -0
  73. package/dist/packages/slides/import-pptx.d.ts +13 -0
  74. package/dist/packages/slides/import-pptx.d.ts.map +1 -0
  75. package/dist/packages/slides/import-pptx.js +619 -0
  76. package/dist/packages/slides/import-pptx.js.map +1 -0
  77. package/dist/packages/slides/parse.d.ts +45 -0
  78. package/dist/packages/slides/parse.d.ts.map +1 -0
  79. package/dist/packages/slides/parse.js +1185 -0
  80. package/dist/packages/slides/parse.js.map +1 -0
  81. package/dist/packages/slides/transform.d.ts +37 -0
  82. package/dist/packages/slides/transform.d.ts.map +1 -0
  83. package/dist/packages/slides/transform.js +140 -0
  84. package/dist/packages/slides/transform.js.map +1 -0
  85. package/dist/packages/slides/vendor/VENDORING.md +58 -0
  86. package/dist/packages/slides/vendor/pptxgen.d.ts +805 -0
  87. package/dist/packages/slides/vendor/pptxgen.js +7442 -0
  88. package/package.json +57 -0
@@ -0,0 +1,4253 @@
1
+ import { CONTAINER_TAGS } from "./common";
2
+ /**
3
+ * Remove @media blocks from CSS text by tracking brace nesting.
4
+ * This properly handles nested braces within media queries.
5
+ */
6
+ function removeMediaQueries(cssText) {
7
+ let result = "";
8
+ let i = 0;
9
+ while (i < cssText.length) {
10
+ // Check for @media at current position
11
+ if (cssText.substring(i, i + 6).toLowerCase() === "@media") {
12
+ // Find the opening brace
13
+ let braceStart = cssText.indexOf("{", i);
14
+ if (braceStart === -1) {
15
+ // Malformed CSS, include rest as-is
16
+ result += cssText.substring(i);
17
+ break;
18
+ }
19
+ // Track brace depth to find matching closing brace
20
+ let depth = 1;
21
+ let j = braceStart + 1;
22
+ while (j < cssText.length && depth > 0) {
23
+ if (cssText[j] === "{") {
24
+ depth++;
25
+ }
26
+ else if (cssText[j] === "}") {
27
+ depth--;
28
+ }
29
+ j++;
30
+ }
31
+ // Skip the entire @media block (from @media to matching })
32
+ i = j;
33
+ }
34
+ else {
35
+ result += cssText[i];
36
+ i++;
37
+ }
38
+ }
39
+ return result;
40
+ }
41
+ /**
42
+ * Extract the first color from a CSS gradient value.
43
+ * Handles linear-gradient, radial-gradient, and conic-gradient.
44
+ * Returns the first color stop (hex, rgb, rgba, hsl, or named color).
45
+ *
46
+ * Examples:
47
+ * "linear-gradient(135deg, #7c3aed, #a78bfa, #c084fc)" -> "#7c3aed"
48
+ * "linear-gradient(to right, red, blue)" -> "red"
49
+ * "radial-gradient(circle, rgb(124, 58, 237), purple)" -> "rgb(124, 58, 237)"
50
+ */
51
+ function extractFirstGradientColor(value) {
52
+ if (!value)
53
+ return undefined;
54
+ // Check if it's a gradient
55
+ const gradientMatch = value.match(/(?:linear|radial|conic)-gradient\s*\(([^)]+)\)/i);
56
+ if (!gradientMatch)
57
+ return undefined;
58
+ const gradientContent = gradientMatch[1];
59
+ // Split by commas, but handle rgb/rgba/hsl/hsla which contain commas
60
+ // Find color values after the direction/angle part
61
+ const parts = [];
62
+ let current = "";
63
+ let parenDepth = 0;
64
+ for (let i = 0; i < gradientContent.length; i++) {
65
+ const char = gradientContent[i];
66
+ if (char === "(") {
67
+ parenDepth++;
68
+ current += char;
69
+ }
70
+ else if (char === ")") {
71
+ parenDepth--;
72
+ current += char;
73
+ }
74
+ else if (char === "," && parenDepth === 0) {
75
+ parts.push(current.trim());
76
+ current = "";
77
+ }
78
+ else {
79
+ current += char;
80
+ }
81
+ }
82
+ if (current.trim()) {
83
+ parts.push(current.trim());
84
+ }
85
+ // Direction/angle keywords to skip
86
+ const directionKeywords = /^(?:\d+deg|to\s+(?:top|bottom|left|right|top\s+left|top\s+right|bottom\s+left|bottom\s+right)|circle|ellipse|at\s+|closest-side|farthest-side|closest-corner|farthest-corner)/i;
87
+ // Color patterns
88
+ const colorPattern = /^(?:#[0-9a-fA-F]{3,8}|rgba?\s*\([^)]+\)|hsla?\s*\([^)]+\)|[a-zA-Z]+)(?:\s+\d+%)?$/i;
89
+ for (const part of parts) {
90
+ // Skip direction/angle specifications
91
+ if (directionKeywords.test(part))
92
+ continue;
93
+ // Check if this looks like a color (possibly with a percentage stop position)
94
+ if (colorPattern.test(part)) {
95
+ // Extract just the color part (remove percentage if present)
96
+ const colorOnly = part.replace(/\s+\d+%$/, "").trim();
97
+ return colorOnly;
98
+ }
99
+ }
100
+ return undefined;
101
+ }
102
+ /**
103
+ * Parse a CSS gradient value into a TextGradient structure.
104
+ * Extracts angle and all color stops for use in DOCX gradient fills.
105
+ *
106
+ * Examples:
107
+ * "linear-gradient(135deg, #7c3aed, #a78bfa, #c084fc)" ->
108
+ * { angle: 135, stops: [{ color: "7C3AED", position: 0 }, { color: "A78BFA", position: 50 }, { color: "C084FC", position: 100 }] }
109
+ */
110
+ function parseGradient(value) {
111
+ if (!value)
112
+ return undefined;
113
+ // Check if it's a linear gradient (only linear gradients are supported in DOCX)
114
+ const gradientMatch = value.match(/linear-gradient\s*\(([^)]+)\)/i);
115
+ if (!gradientMatch)
116
+ return undefined;
117
+ const gradientContent = gradientMatch[1];
118
+ // Split by commas, but handle rgb/rgba/hsl/hsla which contain commas
119
+ const parts = [];
120
+ let current = "";
121
+ let parenDepth = 0;
122
+ for (let i = 0; i < gradientContent.length; i++) {
123
+ const char = gradientContent[i];
124
+ if (char === "(") {
125
+ parenDepth++;
126
+ current += char;
127
+ }
128
+ else if (char === ")") {
129
+ parenDepth--;
130
+ current += char;
131
+ }
132
+ else if (char === "," && parenDepth === 0) {
133
+ parts.push(current.trim());
134
+ current = "";
135
+ }
136
+ else {
137
+ current += char;
138
+ }
139
+ }
140
+ if (current.trim()) {
141
+ parts.push(current.trim());
142
+ }
143
+ // Extract angle from first part if it's a direction
144
+ let angle = 180; // Default: top to bottom
145
+ let colorStartIndex = 0;
146
+ if (parts.length > 0) {
147
+ const firstPart = parts[0];
148
+ // Check for angle in degrees
149
+ const degMatch = firstPart.match(/^(\d+)deg$/i);
150
+ if (degMatch) {
151
+ angle = parseInt(degMatch[1], 10);
152
+ colorStartIndex = 1;
153
+ }
154
+ else if (firstPart.match(/^to\s+/i)) {
155
+ // Convert direction keywords to angles
156
+ const direction = firstPart.toLowerCase();
157
+ if (direction.includes("right") && direction.includes("bottom"))
158
+ angle = 135;
159
+ else if (direction.includes("right") && direction.includes("top"))
160
+ angle = 45;
161
+ else if (direction.includes("left") && direction.includes("bottom"))
162
+ angle = 225;
163
+ else if (direction.includes("left") && direction.includes("top"))
164
+ angle = 315;
165
+ else if (direction.includes("right"))
166
+ angle = 90;
167
+ else if (direction.includes("left"))
168
+ angle = 270;
169
+ else if (direction.includes("bottom"))
170
+ angle = 180;
171
+ else if (direction.includes("top"))
172
+ angle = 0;
173
+ colorStartIndex = 1;
174
+ }
175
+ }
176
+ // Extract color stops
177
+ const colorParts = parts.slice(colorStartIndex);
178
+ if (colorParts.length === 0)
179
+ return undefined;
180
+ const stops = [];
181
+ const colorPattern = /^(#[0-9a-fA-F]{3,8}|rgba?\s*\([^)]+\)|hsla?\s*\([^)]+\)|[a-zA-Z]+)(?:\s+(\d+)%)?$/i;
182
+ for (let i = 0; i < colorParts.length; i++) {
183
+ const part = colorParts[i];
184
+ const match = part.match(colorPattern);
185
+ if (match) {
186
+ let color = match[1];
187
+ // Convert color to hex without #
188
+ color = normalizeColorToHex(color);
189
+ // Get position (default: evenly distributed)
190
+ let position;
191
+ if (match[2]) {
192
+ position = parseInt(match[2], 10);
193
+ }
194
+ else {
195
+ // Evenly distribute if no position specified
196
+ position = colorParts.length === 1 ? 0 : (i / (colorParts.length - 1)) * 100;
197
+ }
198
+ stops.push({ color, position: Math.round(position) });
199
+ }
200
+ }
201
+ if (stops.length < 2)
202
+ return undefined;
203
+ return { angle, stops };
204
+ }
205
+ /**
206
+ * Normalize a CSS color value to hex format (without #).
207
+ * Handles hex (#RGB, #RRGGBB), rgb(), rgba(), and named colors.
208
+ */
209
+ function normalizeColorToHex(color) {
210
+ color = color.trim();
211
+ // Already hex
212
+ if (color.startsWith("#")) {
213
+ let hex = color.slice(1).toUpperCase();
214
+ // Expand shorthand (#RGB -> #RRGGBB)
215
+ if (hex.length === 3) {
216
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
217
+ }
218
+ // Strip alpha if present (#RRGGBBAA -> #RRGGBB)
219
+ if (hex.length === 8) {
220
+ hex = hex.slice(0, 6);
221
+ }
222
+ return hex;
223
+ }
224
+ // RGB/RGBA
225
+ const rgbMatch = color.match(/rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)/i);
226
+ if (rgbMatch) {
227
+ const r = parseInt(rgbMatch[1], 10).toString(16).padStart(2, "0");
228
+ const g = parseInt(rgbMatch[2], 10).toString(16).padStart(2, "0");
229
+ const b = parseInt(rgbMatch[3], 10).toString(16).padStart(2, "0");
230
+ return (r + g + b).toUpperCase();
231
+ }
232
+ // Named colors (common ones)
233
+ const namedColors = {
234
+ red: "FF0000", green: "008000", blue: "0000FF", white: "FFFFFF", black: "000000",
235
+ yellow: "FFFF00", cyan: "00FFFF", magenta: "FF00FF", orange: "FFA500", purple: "800080",
236
+ pink: "FFC0CB", gray: "808080", grey: "808080", navy: "000080", teal: "008080",
237
+ maroon: "800000", olive: "808000", lime: "00FF00", aqua: "00FFFF", silver: "C0C0C0",
238
+ fuchsia: "FF00FF", transparent: "FFFFFF",
239
+ };
240
+ const lowerColor = color.toLowerCase();
241
+ if (namedColors[lowerColor]) {
242
+ return namedColors[lowerColor];
243
+ }
244
+ // Fallback: return as-is (uppercase)
245
+ return color.toUpperCase().replace("#", "");
246
+ }
247
+ /**
248
+ * Extract the primary (first) font name from a CSS font-family value.
249
+ * Handles font stacks like "'Source Sans Pro', -apple-system, sans-serif"
250
+ * Returns the first non-generic font name, cleaned of quotes.
251
+ *
252
+ * Generic fonts (sans-serif, serif, monospace, cursive, fantasy, system-ui) are skipped
253
+ * unless they're the only option.
254
+ */
255
+ function extractPrimaryFont(fontFamily) {
256
+ if (!fontFamily)
257
+ return undefined;
258
+ // Split by comma, handling quoted font names
259
+ const fonts = [];
260
+ let current = "";
261
+ let inQuote = false;
262
+ let quoteChar = "";
263
+ for (let i = 0; i < fontFamily.length; i++) {
264
+ const char = fontFamily[i];
265
+ if ((char === '"' || char === "'") && !inQuote) {
266
+ inQuote = true;
267
+ quoteChar = char;
268
+ }
269
+ else if (char === quoteChar && inQuote) {
270
+ inQuote = false;
271
+ quoteChar = "";
272
+ }
273
+ else if (char === "," && !inQuote) {
274
+ const trimmed = current.trim().replace(/^['"]|['"]$/g, "");
275
+ if (trimmed)
276
+ fonts.push(trimmed);
277
+ current = "";
278
+ }
279
+ else {
280
+ current += char;
281
+ }
282
+ }
283
+ // Add last font
284
+ const trimmed = current.trim().replace(/^['"]|['"]$/g, "");
285
+ if (trimmed)
286
+ fonts.push(trimmed);
287
+ // Generic font families to skip
288
+ const genericFonts = new Set([
289
+ "sans-serif", "serif", "monospace", "cursive", "fantasy", "system-ui",
290
+ "-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", "Helvetica Neue",
291
+ "Arial", "Helvetica", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans",
292
+ "Noto Sans", "sans-serif", "Oxygen", "Open Sans",
293
+ ]);
294
+ // Find first non-generic font
295
+ for (const font of fonts) {
296
+ if (!genericFonts.has(font) && !font.startsWith("-")) {
297
+ return font;
298
+ }
299
+ }
300
+ // If all fonts are generic, return the first one
301
+ return fonts[0];
302
+ }
303
+ /**
304
+ * Parse CSS variables and class color rules from a style element.
305
+ */
306
+ function parseCssContext(doc) {
307
+ const variables = new Map();
308
+ const classColors = new Map();
309
+ const calloutStyles = new Map();
310
+ const classStyles = new Map();
311
+ const elementStyles = new Map();
312
+ const nestedStyles = new Map();
313
+ // Helper to resolve CSS variable or return value as-is
314
+ // Handles multiple var() references in a single value (e.g., "var(--a) var(--b)")
315
+ const resolveValue = (value) => {
316
+ // Replace all var() references with their resolved values
317
+ return value.replace(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/g, (match, varName) => {
318
+ const resolved = variables.get(varName);
319
+ return resolved || match; // Keep original if not found
320
+ });
321
+ };
322
+ // Find all style elements
323
+ const styleElements = doc.querySelectorAll("style");
324
+ for (const styleEl of styleElements) {
325
+ let cssText = styleEl.textContent || "";
326
+ // Remove media queries to avoid mobile-specific styles overriding desktop defaults
327
+ // Use a function-based approach to correctly handle nested braces
328
+ cssText = removeMediaQueries(cssText);
329
+ // Extract CSS variables from :root { --name: value; }
330
+ const rootMatch = cssText.match(/:root\s*\{([^}]+)\}/);
331
+ if (rootMatch) {
332
+ const rootContent = rootMatch[1];
333
+ const varMatches = rootContent.matchAll(/--([a-zA-Z0-9-]+)\s*:\s*([^;]+)/g);
334
+ for (const match of varMatches) {
335
+ const varName = `--${match[1]}`;
336
+ const varValue = match[2].trim();
337
+ variables.set(varName, varValue);
338
+ }
339
+ }
340
+ // Also extract CSS variables from theme classes (e.g., .theme-professional)
341
+ // These are commonly used to scope CSS variables to body elements
342
+ const themeClassMatches = cssText.matchAll(/\.theme-[a-zA-Z0-9_-]+\s*\{([^}]+)\}/g);
343
+ for (const themeMatch of themeClassMatches) {
344
+ const themeContent = themeMatch[1];
345
+ const varMatches = themeContent.matchAll(/--([a-zA-Z0-9-]+)\s*:\s*([^;]+)/g);
346
+ for (const match of varMatches) {
347
+ const varName = `--${match[1]}`;
348
+ const varValue = match[2].trim();
349
+ // Only set if not already defined (prefer :root values)
350
+ if (!variables.has(varName)) {
351
+ variables.set(varName, varValue);
352
+ }
353
+ }
354
+ }
355
+ // Extract class color rules: .classname { color: value; }
356
+ // NOTE: Use negative lookbehind to exclude nested selectors like ".parent .child { ... }"
357
+ // The (?<!\S\s+) ensures we only match top-level class selectors
358
+ const classRuleMatches = cssText.matchAll(/(?:^|[;\n}])\s*\.([a-zA-Z0-9_-]+)\s*\{([^}]+)\}/gm);
359
+ for (const match of classRuleMatches) {
360
+ const fullMatch = match[0];
361
+ const className = match[1];
362
+ const ruleContent = match[2];
363
+ // Skip if this looks like a nested selector (has another class before it)
364
+ // Check if the match is preceded by another class selector on the same line
365
+ const precedingText = cssText.substring(0, match.index).split('\n').pop() || '';
366
+ if (precedingText.match(/\.[a-zA-Z0-9_-]+\s*$/)) {
367
+ // This is a nested selector like ".parent .child { ... }" - skip it here
368
+ // Nested selectors will be handled by storing parent-child relationships
369
+ continue;
370
+ }
371
+ // Extract ALL style properties for this class (generalized approach)
372
+ const style = {};
373
+ // Text color
374
+ const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
375
+ if (colorMatch) {
376
+ const colorValue = resolveValue(colorMatch[1].trim());
377
+ classColors.set(className, colorValue);
378
+ style.color = colorValue;
379
+ }
380
+ // Background color
381
+ const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
382
+ if (bgMatch) {
383
+ const bgValue = resolveValue(bgMatch[1].trim());
384
+ // For gradient backgrounds, extract the first color as fallback for DOCX
385
+ // Also store the full gradient for containers that support gradient rendering
386
+ const gradientColor = extractFirstGradientColor(bgValue);
387
+ if (gradientColor) {
388
+ style.backgroundColor = gradientColor;
389
+ // Parse full gradient for container backgrounds (not text gradients)
390
+ const gradient = parseGradient(bgValue);
391
+ if (gradient) {
392
+ style.backgroundGradient = gradient;
393
+ }
394
+ }
395
+ else {
396
+ style.backgroundColor = bgValue;
397
+ }
398
+ }
399
+ // Gradient text detection: when background-clip: text is used with a gradient background,
400
+ // extract the first color from the gradient as fallback and also store full gradient
401
+ const hasBackgroundClipText = ruleContent.match(/(?:-webkit-)?background-clip\s*:\s*text/i);
402
+ if (hasBackgroundClipText && bgMatch) {
403
+ const bgValue = resolveValue(bgMatch[1].trim());
404
+ // Parse full gradient
405
+ const gradient = parseGradient(bgValue);
406
+ if (gradient) {
407
+ style.gradient = gradient;
408
+ // Also set fallback color from first stop
409
+ if (!style.color) {
410
+ style.color = gradient.stops[0]?.color;
411
+ if (style.color) {
412
+ classColors.set(className, style.color);
413
+ }
414
+ }
415
+ }
416
+ else {
417
+ // Fallback: extract first color if gradient parsing fails
418
+ const gradientColor = extractFirstGradientColor(bgValue);
419
+ if (gradientColor && !style.color) {
420
+ style.color = gradientColor;
421
+ classColors.set(className, gradientColor);
422
+ }
423
+ }
424
+ }
425
+ // Border color
426
+ const borderColorMatch = ruleContent.match(/border-color\s*:\s*([^;]+)/i);
427
+ if (borderColorMatch) {
428
+ style.borderColor = resolveValue(borderColorMatch[1].trim());
429
+ }
430
+ // Border shorthand (e.g., "1px solid #e5e7eb")
431
+ const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
432
+ if (borderMatch) {
433
+ style.border = resolveValue(borderMatch[1].trim());
434
+ }
435
+ // Border-left (used by callouts: "4px solid #2563eb")
436
+ const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
437
+ if (borderLeftMatch) {
438
+ const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
439
+ // If it contains a color, extract it as borderColor
440
+ const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
441
+ if (colorInBorder && !style.borderColor) {
442
+ style.borderColor = `#${colorInBorder[1]}`;
443
+ }
444
+ // Store the full border-left value
445
+ style.borderLeft = borderLeftValue;
446
+ style.border = style.border || borderLeftValue;
447
+ }
448
+ // Border-right (used by sidebar dividers, etc.)
449
+ const borderRightMatch = ruleContent.match(/border-right\s*:\s*([^;]+)/i);
450
+ if (borderRightMatch) {
451
+ style.borderRight = resolveValue(borderRightMatch[1].trim());
452
+ }
453
+ // Border-bottom (used by title blocks with divider lines)
454
+ const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
455
+ if (borderBottomMatch) {
456
+ style.borderBottom = resolveValue(borderBottomMatch[1].trim());
457
+ }
458
+ // Border-top (used by footer sections with top divider lines)
459
+ const borderTopMatch = ruleContent.match(/border-top\s*:\s*([^;]+)/i);
460
+ if (borderTopMatch) {
461
+ style.borderTop = resolveValue(borderTopMatch[1].trim());
462
+ }
463
+ // Display property (grid, flex, block, etc.)
464
+ const displayMatch = ruleContent.match(/display\s*:\s*([^;]+)/i);
465
+ if (displayMatch) {
466
+ style.display = resolveValue(displayMatch[1].trim());
467
+ }
468
+ // Flex property (for flex item sizing like "flex: 1")
469
+ const flexMatch = ruleContent.match(/(?:^|;)\s*flex\s*:\s*([^;]+)/i);
470
+ if (flexMatch) {
471
+ style.flex = resolveValue(flexMatch[1].trim());
472
+ }
473
+ // Flex-direction property (for horizontal vs vertical flex containers)
474
+ const flexDirectionMatch = ruleContent.match(/flex-direction\s*:\s*([^;]+)/i);
475
+ if (flexDirectionMatch) {
476
+ style.flexDirection = resolveValue(flexDirectionMatch[1].trim());
477
+ }
478
+ // Flex-wrap property (for wrapping behavior)
479
+ const flexWrapMatch = ruleContent.match(/flex-wrap\s*:\s*([^;]+)/i);
480
+ if (flexWrapMatch) {
481
+ style.flexWrap = resolveValue(flexWrapMatch[1].trim());
482
+ }
483
+ // Gap property (for flex/grid spacing)
484
+ const gapMatch = ruleContent.match(/(?:^|;)\s*gap\s*:\s*([^;]+)/i);
485
+ if (gapMatch) {
486
+ style.gap = resolveValue(gapMatch[1].trim());
487
+ }
488
+ // Grid template columns (for two-column layout detection)
489
+ const gridColsMatch = ruleContent.match(/grid-template-columns\s*:\s*([^;]+)/i);
490
+ if (gridColsMatch) {
491
+ style.gridTemplateColumns = resolveValue(gridColsMatch[1].trim());
492
+ }
493
+ // Text alignment
494
+ const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
495
+ if (textAlignMatch) {
496
+ style.textAlign = resolveValue(textAlignMatch[1].trim());
497
+ }
498
+ // Font size
499
+ const fontSizeMatch = ruleContent.match(/font-size\s*:\s*([^;]+)/i);
500
+ if (fontSizeMatch) {
501
+ style.fontSize = resolveValue(fontSizeMatch[1].trim());
502
+ }
503
+ // Font weight
504
+ const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
505
+ if (fontWeightMatch) {
506
+ style.fontWeight = resolveValue(fontWeightMatch[1].trim());
507
+ }
508
+ // Padding
509
+ const paddingMatch = ruleContent.match(/padding\s*:\s*([^;]+)/i);
510
+ if (paddingMatch) {
511
+ style.padding = resolveValue(paddingMatch[1].trim());
512
+ }
513
+ // Font style (italic, normal)
514
+ const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
515
+ if (fontStyleMatch) {
516
+ style.fontStyle = resolveValue(fontStyleMatch[1].trim());
517
+ }
518
+ // Font family (extract primary font from font stack)
519
+ const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
520
+ if (fontFamilyMatch) {
521
+ const resolved = resolveValue(fontFamilyMatch[1].trim());
522
+ const primaryFont = extractPrimaryFont(resolved);
523
+ if (primaryFont) {
524
+ style.fontFamily = primaryFont;
525
+ }
526
+ }
527
+ // Text indent (e.g., "2rem", "-2rem" for hanging indents)
528
+ const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
529
+ if (textIndentMatch) {
530
+ style.textIndent = resolveValue(textIndentMatch[1].trim());
531
+ }
532
+ // Text transform (uppercase, lowercase, capitalize)
533
+ const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
534
+ if (textTransformMatch) {
535
+ style.textTransform = resolveValue(textTransformMatch[1].trim());
536
+ }
537
+ // GENERALIZED: Margin-bottom (for paragraph spacing)
538
+ const marginBottomMatch = ruleContent.match(/margin-bottom\s*:\s*([^;]+)/i);
539
+ if (marginBottomMatch) {
540
+ style.marginBottom = resolveValue(marginBottomMatch[1].trim());
541
+ }
542
+ // GENERALIZED: Line-height (for vertical spacing within text)
543
+ const lineHeightMatch = ruleContent.match(/line-height\s*:\s*([^;]+)/i);
544
+ if (lineHeightMatch) {
545
+ style.lineHeight = resolveValue(lineHeightMatch[1].trim());
546
+ }
547
+ // Store if we found any properties
548
+ // MERGE with existing styles (later rules can add properties without overwriting)
549
+ if (Object.keys(style).length > 0) {
550
+ const existing = classStyles.get(className) || {};
551
+ classStyles.set(className, { ...existing, ...style });
552
+ }
553
+ }
554
+ // Parse element.class combined selectors like "dd.dish-description { font-style: italic; }"
555
+ // These are more specific than just .class, but we store under the class name
556
+ // since getElementStyles already verifies element type separately
557
+ const elementClassMatches = cssText.matchAll(/(?:^|[;\n}])\s*([a-zA-Z][a-zA-Z0-9]*)\s*\.\s*([a-zA-Z0-9_-]+)\s*\{([^}]+)\}/gm);
558
+ for (const match of elementClassMatches) {
559
+ const elementName = match[1].toLowerCase();
560
+ const className = match[2];
561
+ const ruleContent = match[3];
562
+ // Extract style properties (same as class rules above)
563
+ const style = {};
564
+ // Font style (italic, normal)
565
+ const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
566
+ if (fontStyleMatch) {
567
+ style.fontStyle = resolveValue(fontStyleMatch[1].trim());
568
+ }
569
+ // Font weight
570
+ const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
571
+ if (fontWeightMatch) {
572
+ style.fontWeight = resolveValue(fontWeightMatch[1].trim());
573
+ }
574
+ // Text color
575
+ const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
576
+ if (colorMatch) {
577
+ const colorValue = resolveValue(colorMatch[1].trim());
578
+ style.color = colorValue;
579
+ }
580
+ // Background color
581
+ const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
582
+ if (bgMatch) {
583
+ const bgValue = resolveValue(bgMatch[1].trim());
584
+ // For gradient backgrounds, extract the first color as fallback for DOCX
585
+ // Also store the full gradient for containers that support gradient rendering
586
+ const gradientColor = extractFirstGradientColor(bgValue);
587
+ if (gradientColor) {
588
+ style.backgroundColor = gradientColor;
589
+ // Parse full gradient for container backgrounds
590
+ const gradient = parseGradient(bgValue);
591
+ if (gradient) {
592
+ style.backgroundGradient = gradient;
593
+ }
594
+ }
595
+ else {
596
+ style.backgroundColor = bgValue;
597
+ }
598
+ }
599
+ // Text alignment
600
+ const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
601
+ if (textAlignMatch) {
602
+ style.textAlign = resolveValue(textAlignMatch[1].trim());
603
+ }
604
+ // Font family (extract primary font from font stack)
605
+ const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
606
+ if (fontFamilyMatch) {
607
+ const resolved = resolveValue(fontFamilyMatch[1].trim());
608
+ const primaryFont = extractPrimaryFont(resolved);
609
+ if (primaryFont) {
610
+ style.fontFamily = primaryFont;
611
+ }
612
+ }
613
+ // Store under the class name (merge with existing)
614
+ if (Object.keys(style).length > 0) {
615
+ const existing = classStyles.get(className) || {};
616
+ classStyles.set(className, { ...existing, ...style });
617
+ }
618
+ }
619
+ // Parse nested CSS selectors like ".parent .child { color: ... }" or ".parent element { color: ... }"
620
+ // Also handles "element.parent .child { ... }" like "figure.menu-image .image-placeholder { ... }"
621
+ // Store the parent-child relationship so we can look up styles based on context
622
+ // Supports: .works-cited p { text-indent: -2rem; } or .callout .callout-label { color: ... }
623
+ // Extended pattern to match:
624
+ // - .parentClass .child { ... }
625
+ // - element.parentClass .child { ... }
626
+ const nestedSelectorMatches = cssText.matchAll(/(?:[a-zA-Z0-9_-]*)?\.([a-zA-Z0-9_-]+)\s+(\.?[a-zA-Z0-9_-]+)\s*\{([^}]+)\}/g);
627
+ for (const match of nestedSelectorMatches) {
628
+ const parentClass = match[1];
629
+ let childSelector = match[2];
630
+ // Remove leading dot if present (for .class selectors)
631
+ if (childSelector.startsWith('.')) {
632
+ childSelector = childSelector.slice(1);
633
+ }
634
+ const ruleContent = match[3];
635
+ // Extract style properties for the child when inside this parent
636
+ const style = {};
637
+ // Text color
638
+ const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
639
+ if (colorMatch) {
640
+ style.color = resolveValue(colorMatch[1].trim());
641
+ }
642
+ // Background color
643
+ const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
644
+ if (bgMatch) {
645
+ const bgValue = resolveValue(bgMatch[1].trim());
646
+ // For gradient backgrounds, extract the first color as fallback for DOCX
647
+ // Also store the full gradient for containers that support gradient rendering
648
+ const gradientColor = extractFirstGradientColor(bgValue);
649
+ if (gradientColor) {
650
+ style.backgroundColor = gradientColor;
651
+ // Parse full gradient for container backgrounds
652
+ const gradient = parseGradient(bgValue);
653
+ if (gradient) {
654
+ style.backgroundGradient = gradient;
655
+ }
656
+ }
657
+ else {
658
+ style.backgroundColor = bgValue;
659
+ }
660
+ }
661
+ // Gradient text detection: when background-clip: text is used with a gradient background,
662
+ // extract the first color from the gradient as fallback and also store full gradient
663
+ const hasBackgroundClipText = ruleContent.match(/(?:-webkit-)?background-clip\s*:\s*text/i);
664
+ if (hasBackgroundClipText && bgMatch) {
665
+ const bgValue = resolveValue(bgMatch[1].trim());
666
+ // Parse full gradient
667
+ const gradient = parseGradient(bgValue);
668
+ if (gradient) {
669
+ style.gradient = gradient;
670
+ // Also set fallback color from first stop
671
+ if (!style.color) {
672
+ style.color = gradient.stops[0]?.color;
673
+ }
674
+ }
675
+ else {
676
+ // Fallback: extract first color if gradient parsing fails
677
+ const gradientColor = extractFirstGradientColor(bgValue);
678
+ if (gradientColor && !style.color) {
679
+ style.color = gradientColor;
680
+ }
681
+ }
682
+ }
683
+ // Font weight
684
+ const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
685
+ if (fontWeightMatch) {
686
+ style.fontWeight = resolveValue(fontWeightMatch[1].trim());
687
+ }
688
+ // Text indent (for nested hanging indents like .works-cited p)
689
+ const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
690
+ if (textIndentMatch) {
691
+ style.textIndent = resolveValue(textIndentMatch[1].trim());
692
+ }
693
+ // Font style
694
+ const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
695
+ if (fontStyleMatch) {
696
+ style.fontStyle = resolveValue(fontStyleMatch[1].trim());
697
+ }
698
+ // Font family (extract primary font from font stack)
699
+ const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
700
+ if (fontFamilyMatch) {
701
+ const resolved = resolveValue(fontFamilyMatch[1].trim());
702
+ const primaryFont = extractPrimaryFont(resolved);
703
+ if (primaryFont) {
704
+ style.fontFamily = primaryFont;
705
+ }
706
+ }
707
+ // Border (full shorthand, e.g., "1px solid #e5e7eb")
708
+ const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
709
+ if (borderMatch) {
710
+ style.border = resolveValue(borderMatch[1].trim());
711
+ }
712
+ // Border-left (callout styling)
713
+ const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
714
+ if (borderLeftMatch) {
715
+ const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
716
+ style.borderLeft = borderLeftValue;
717
+ style.border = style.border || borderLeftValue;
718
+ // Extract color from border value
719
+ const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
720
+ if (colorInBorder && !style.borderColor) {
721
+ style.borderColor = `#${colorInBorder[1]}`;
722
+ }
723
+ }
724
+ // Border-bottom (heading underline styling, e.g., h2 { border-bottom: 3px solid #7c3aed; })
725
+ const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
726
+ if (borderBottomMatch) {
727
+ const borderBottomValue = resolveValue(borderBottomMatch[1].trim());
728
+ style.borderBottom = borderBottomValue;
729
+ // Extract color from border value
730
+ const colorInBorder = borderBottomValue.match(/#([0-9a-fA-F]{3,6})/);
731
+ if (colorInBorder && !style.borderColor) {
732
+ style.borderColor = `#${colorInBorder[1]}`;
733
+ }
734
+ }
735
+ // Text-transform (uppercase, lowercase, capitalize for headings, labels, etc.)
736
+ const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
737
+ if (textTransformMatch) {
738
+ style.textTransform = resolveValue(textTransformMatch[1].trim());
739
+ }
740
+ // Store in nestedStyles: parentClass -> (childSelector -> style)
741
+ if (Object.keys(style).length > 0) {
742
+ if (!nestedStyles.has(parentClass)) {
743
+ nestedStyles.set(parentClass, new Map());
744
+ }
745
+ const existing = nestedStyles.get(parentClass).get(childSelector) || {};
746
+ nestedStyles.get(parentClass).set(childSelector, { ...existing, ...style });
747
+ }
748
+ }
749
+ // Parse element type selectors (body, p, h1, h2, h3, h4, h5, h6, etc.)
750
+ // These are rules like: body { color: var(--color-text); }
751
+ // or grouped rules like: h1, h2, h3, h4, h5, h6 { color: var(--color-heading); }
752
+ // IMPORTANT: Only match STANDALONE element selectors, not selectors like ".class p" or "#id p"
753
+ // The (?:^|[,\s]) lookbehind ensures we're not part of a complex selector
754
+ // We need to be careful to not match ".timeline-content p" as just "p"
755
+ const elementSelectorPattern = /(?:^|[\n\r])(\s*(?:body|p|h[1-6]|span|div|ul|ol|li|table|th|td|blockquote|section|article|aside|nav|header|footer|figure|figcaption|address|abbr)(?:\s*,\s*(?:body|p|h[1-6]|span|div|ul|ol|li|table|th|td|blockquote|section|article|aside|nav|header|footer|figure|figcaption|address|abbr))*)\s*\{([^}]+)\}/gi;
756
+ const elementMatches = cssText.matchAll(elementSelectorPattern);
757
+ for (const match of elementMatches) {
758
+ const selectorList = match[1];
759
+ const ruleContent = match[2];
760
+ // Split comma-separated selectors
761
+ const selectors = selectorList.split(/\s*,\s*/).map(s => s.trim().toLowerCase());
762
+ // Extract style properties
763
+ const style = {};
764
+ // Text color
765
+ const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
766
+ if (colorMatch) {
767
+ style.color = resolveValue(colorMatch[1].trim());
768
+ }
769
+ // Background color
770
+ const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
771
+ if (bgMatch) {
772
+ const bgValue = resolveValue(bgMatch[1].trim());
773
+ // For gradient backgrounds, extract the first color as fallback for DOCX
774
+ // Also store the full gradient for containers that support gradient rendering
775
+ const gradientColor = extractFirstGradientColor(bgValue);
776
+ if (gradientColor) {
777
+ style.backgroundColor = gradientColor;
778
+ // Parse full gradient for container backgrounds
779
+ const gradient = parseGradient(bgValue);
780
+ if (gradient) {
781
+ style.backgroundGradient = gradient;
782
+ }
783
+ }
784
+ else {
785
+ style.backgroundColor = bgValue;
786
+ }
787
+ }
788
+ // Font size
789
+ const fontSizeMatch = ruleContent.match(/font-size\s*:\s*([^;]+)/i);
790
+ if (fontSizeMatch) {
791
+ style.fontSize = resolveValue(fontSizeMatch[1].trim());
792
+ }
793
+ // Font weight
794
+ const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
795
+ if (fontWeightMatch) {
796
+ style.fontWeight = resolveValue(fontWeightMatch[1].trim());
797
+ }
798
+ // Padding (for table cells th, td)
799
+ const paddingMatch = ruleContent.match(/padding\s*:\s*([^;]+)/i);
800
+ if (paddingMatch) {
801
+ style.padding = resolveValue(paddingMatch[1].trim());
802
+ }
803
+ // Font style (italic, normal) - for blockquotes
804
+ const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
805
+ if (fontStyleMatch) {
806
+ style.fontStyle = resolveValue(fontStyleMatch[1].trim());
807
+ }
808
+ // Font family (extract primary font from font stack) - for body, headings, etc.
809
+ const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
810
+ if (fontFamilyMatch) {
811
+ const resolved = resolveValue(fontFamilyMatch[1].trim());
812
+ const primaryFont = extractPrimaryFont(resolved);
813
+ if (primaryFont) {
814
+ style.fontFamily = primaryFont;
815
+ }
816
+ }
817
+ // Text indent (for paragraph first-line indentation)
818
+ const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
819
+ if (textIndentMatch) {
820
+ style.textIndent = resolveValue(textIndentMatch[1].trim());
821
+ }
822
+ // Text transform (uppercase, lowercase, capitalize)
823
+ const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
824
+ if (textTransformMatch) {
825
+ style.textTransform = resolveValue(textTransformMatch[1].trim());
826
+ }
827
+ // GENERALIZED: Margin-bottom (for paragraph spacing)
828
+ const marginBottomMatch = ruleContent.match(/margin-bottom\s*:\s*([^;]+)/i);
829
+ if (marginBottomMatch) {
830
+ style.marginBottom = resolveValue(marginBottomMatch[1].trim());
831
+ }
832
+ // GENERALIZED: Line-height (for vertical spacing within text)
833
+ const lineHeightMatch = ruleContent.match(/line-height\s*:\s*([^;]+)/i);
834
+ if (lineHeightMatch) {
835
+ style.lineHeight = resolveValue(lineHeightMatch[1].trim());
836
+ }
837
+ // Text alignment (left, center, right, justify)
838
+ const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
839
+ if (textAlignMatch) {
840
+ style.textAlign = resolveValue(textAlignMatch[1].trim());
841
+ }
842
+ // Border shorthand (e.g., "1px solid #e5e7eb")
843
+ const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
844
+ if (borderMatch) {
845
+ style.border = resolveValue(borderMatch[1].trim());
846
+ }
847
+ // Border-left (used by blockquotes/callouts: "4px solid #8b4513")
848
+ const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
849
+ if (borderLeftMatch) {
850
+ const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
851
+ // If it contains a color, extract it as borderColor
852
+ const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
853
+ if (colorInBorder && !style.borderColor) {
854
+ style.borderColor = `#${colorInBorder[1]}`;
855
+ }
856
+ // Store the full border-left value
857
+ style.borderLeft = borderLeftValue;
858
+ style.border = style.border || borderLeftValue;
859
+ }
860
+ // Border-right (used by sidebar dividers, etc.)
861
+ const borderRightMatch = ruleContent.match(/border-right\s*:\s*([^;]+)/i);
862
+ if (borderRightMatch) {
863
+ style.borderRight = resolveValue(borderRightMatch[1].trim());
864
+ }
865
+ // Border color (direct property)
866
+ const borderColorMatch = ruleContent.match(/border-color\s*:\s*([^;]+)/i);
867
+ if (borderColorMatch) {
868
+ style.borderColor = resolveValue(borderColorMatch[1].trim());
869
+ }
870
+ // GENERALIZED: Border-bottom (used by h2 underlines, title blocks, etc.)
871
+ // Any element can have border-bottom - extract from element selectors
872
+ const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
873
+ if (borderBottomMatch) {
874
+ style.borderBottom = resolveValue(borderBottomMatch[1].trim());
875
+ }
876
+ // GENERALIZED: Border-top (used by footer sections with top divider lines)
877
+ // Any element can have border-top - extract from element selectors
878
+ const borderTopMatch = ruleContent.match(/border-top\s*:\s*([^;]+)/i);
879
+ if (borderTopMatch) {
880
+ style.borderTop = resolveValue(borderTopMatch[1].trim());
881
+ }
882
+ // Apply style to each selector
883
+ if (Object.keys(style).length > 0) {
884
+ for (const selector of selectors) {
885
+ // Merge with existing styles (later rules override)
886
+ const existing = elementStyles.get(selector) || {};
887
+ elementStyles.set(selector, { ...existing, ...style });
888
+ }
889
+ }
890
+ }
891
+ }
892
+ return { variables, classColors, calloutStyles, classStyles, elementStyles, nestedStyles };
893
+ }
894
+ /**
895
+ * Get merged styles for an element by combining all its class styles.
896
+ * Later classes override earlier ones.
897
+ * Also checks element type selectors as a base layer.
898
+ *
899
+ * @param element The element to get styles for
900
+ * @param cssContext The CSS context
901
+ * @param parentElement Optional parent element for nested style lookups
902
+ */
903
+ function getElementStyles(element, cssContext, parentElement) {
904
+ const result = {};
905
+ // First, apply element type selector styles (lowest priority)
906
+ const tagName = element.tagName.toLowerCase();
907
+ const elementTypeStyle = cssContext.elementStyles.get(tagName);
908
+ if (elementTypeStyle) {
909
+ Object.assign(result, elementTypeStyle);
910
+ }
911
+ // Then apply class styles (higher priority)
912
+ const classAttr = element.getAttribute("class");
913
+ const elementClasses = classAttr ? classAttr.split(/\s+/).filter(c => c.length > 0) : [];
914
+ for (const className of elementClasses) {
915
+ const classStyle = cssContext.classStyles.get(className);
916
+ if (classStyle) {
917
+ Object.assign(result, classStyle);
918
+ }
919
+ }
920
+ // Apply nested styles if parent is provided (highest CSS priority for nested selectors)
921
+ // This handles rules like ".parent .child { color: ... }" and ".parent element { color: ... }"
922
+ if (parentElement) {
923
+ const parentClassAttr = parentElement.getAttribute("class");
924
+ const parentClasses = parentClassAttr ? parentClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
925
+ // First, inherit text color from parent's class styles (CSS color inheritance)
926
+ // This handles cases like ".cta { color: white; }" where children should inherit the color
927
+ if (!result.color) {
928
+ for (const parentClass of parentClasses) {
929
+ const parentClassStyle = cssContext.classStyles.get(parentClass);
930
+ if (parentClassStyle?.color) {
931
+ result.color = parentClassStyle.color;
932
+ break;
933
+ }
934
+ }
935
+ }
936
+ // For each parent class, check if there are nested styles for our element's classes OR tag name
937
+ for (const parentClass of parentClasses) {
938
+ const nestedMap = cssContext.nestedStyles.get(parentClass);
939
+ if (nestedMap) {
940
+ // Check for element tag name (e.g., .works-cited p { ... })
941
+ const nestedTagStyle = nestedMap.get(tagName);
942
+ if (nestedTagStyle) {
943
+ Object.assign(result, nestedTagStyle);
944
+ }
945
+ // Check for element's classes
946
+ for (const childClass of elementClasses) {
947
+ const nestedStyle = nestedMap.get(childClass);
948
+ if (nestedStyle) {
949
+ Object.assign(result, nestedStyle);
950
+ }
951
+ }
952
+ }
953
+ }
954
+ }
955
+ // Also walk up the DOM tree to find ancestor containers with nested styles
956
+ // This handles cases where the parent element isn't passed explicitly
957
+ if (!parentElement) {
958
+ let ancestor = element.parentElement;
959
+ while (ancestor) {
960
+ const ancestorClassAttr = ancestor.getAttribute("class");
961
+ const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
962
+ for (const ancestorClass of ancestorClasses) {
963
+ const nestedMap = cssContext.nestedStyles.get(ancestorClass);
964
+ if (nestedMap) {
965
+ // Check for element tag name
966
+ const nestedTagStyle = nestedMap.get(tagName);
967
+ if (nestedTagStyle) {
968
+ Object.assign(result, nestedTagStyle);
969
+ }
970
+ // Check for element's classes
971
+ for (const childClass of elementClasses) {
972
+ const nestedStyle = nestedMap.get(childClass);
973
+ if (nestedStyle) {
974
+ Object.assign(result, nestedStyle);
975
+ }
976
+ }
977
+ }
978
+ }
979
+ ancestor = ancestor.parentElement;
980
+ }
981
+ }
982
+ // Inherit font-family from ancestor elements if not set on this element
983
+ // CSS font-family is an inherited property, so we need to walk up the DOM tree
984
+ if (!result.fontFamily) {
985
+ let ancestor = element.parentElement;
986
+ while (ancestor && !result.fontFamily) {
987
+ const ancestorTagName = ancestor.tagName?.toLowerCase();
988
+ if (ancestorTagName) {
989
+ // Check element type selector for ancestor (e.g., body { font-family: ... })
990
+ const ancestorTypeStyle = cssContext.elementStyles.get(ancestorTagName);
991
+ if (ancestorTypeStyle?.fontFamily) {
992
+ result.fontFamily = ancestorTypeStyle.fontFamily;
993
+ break;
994
+ }
995
+ // Check ancestor's class styles
996
+ const ancestorClassAttr = ancestor.getAttribute("class");
997
+ const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
998
+ for (const ancestorClass of ancestorClasses) {
999
+ const classStyle = cssContext.classStyles.get(ancestorClass);
1000
+ if (classStyle?.fontFamily) {
1001
+ result.fontFamily = classStyle.fontFamily;
1002
+ break;
1003
+ }
1004
+ }
1005
+ }
1006
+ ancestor = ancestor.parentElement;
1007
+ }
1008
+ }
1009
+ // Also check inline styles, which override CSS classes
1010
+ const inlineStyle = element.getAttribute("style") || "";
1011
+ if (inlineStyle) {
1012
+ const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
1013
+ if (bgMatch)
1014
+ result.backgroundColor = bgMatch[1].trim();
1015
+ const colorMatch = inlineStyle.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
1016
+ if (colorMatch)
1017
+ result.color = colorMatch[1].trim();
1018
+ const borderMatch = inlineStyle.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
1019
+ if (borderMatch)
1020
+ result.border = borderMatch[1].trim();
1021
+ const borderColorMatch = inlineStyle.match(/border-color\s*:\s*([^;]+)/i);
1022
+ if (borderColorMatch)
1023
+ result.borderColor = borderColorMatch[1].trim();
1024
+ const borderBottomMatch = inlineStyle.match(/border-bottom\s*:\s*([^;]+)/i);
1025
+ if (borderBottomMatch)
1026
+ result.borderBottom = borderBottomMatch[1].trim();
1027
+ const displayMatch = inlineStyle.match(/display\s*:\s*([^;]+)/i);
1028
+ if (displayMatch)
1029
+ result.display = displayMatch[1].trim();
1030
+ const flexMatch = inlineStyle.match(/(?:^|;)\s*flex\s*:\s*([^;]+)/i);
1031
+ if (flexMatch)
1032
+ result.flex = flexMatch[1].trim();
1033
+ const gridColsMatch = inlineStyle.match(/grid-template-columns\s*:\s*([^;]+)/i);
1034
+ if (gridColsMatch)
1035
+ result.gridTemplateColumns = gridColsMatch[1].trim();
1036
+ const textAlignMatch = inlineStyle.match(/text-align\s*:\s*([^;]+)/i);
1037
+ if (textAlignMatch)
1038
+ result.textAlign = textAlignMatch[1].trim();
1039
+ // Inline font-family (highest priority)
1040
+ const fontFamilyMatch = inlineStyle.match(/font-family\s*:\s*([^;]+)/i);
1041
+ if (fontFamilyMatch) {
1042
+ const primaryFont = extractPrimaryFont(fontFamilyMatch[1].trim());
1043
+ if (primaryFont)
1044
+ result.fontFamily = primaryFont;
1045
+ }
1046
+ // Inline line-height
1047
+ const lineHeightMatch = inlineStyle.match(/line-height\s*:\s*([^;]+)/i);
1048
+ if (lineHeightMatch) {
1049
+ result.lineHeight = lineHeightMatch[1].trim();
1050
+ }
1051
+ }
1052
+ // GENERALIZED: Use getComputedStyle as fallback for font-family and line-height
1053
+ // This ensures we get the ACTUAL computed values from Playwright's browser context,
1054
+ // including all CSS variable resolution and inheritance
1055
+ if (typeof window !== "undefined" && window.getComputedStyle) {
1056
+ try {
1057
+ const computed = window.getComputedStyle(element);
1058
+ // Font-family: Use computed style if not already set from CSS parsing
1059
+ if (!result.fontFamily && computed.fontFamily) {
1060
+ const primaryFont = extractPrimaryFont(computed.fontFamily);
1061
+ if (primaryFont) {
1062
+ result.fontFamily = primaryFont;
1063
+ }
1064
+ }
1065
+ // Line-height: Use computed style if not already set from CSS parsing
1066
+ if (!result.lineHeight && computed.lineHeight) {
1067
+ // Convert computed lineHeight (e.g., "27.2px") to a ratio or keep as-is
1068
+ result.lineHeight = computed.lineHeight;
1069
+ }
1070
+ }
1071
+ catch {
1072
+ // getComputedStyle may fail in some environments
1073
+ }
1074
+ }
1075
+ return result;
1076
+ }
1077
+ /**
1078
+ * Extract border color from a border shorthand or border-color property.
1079
+ * Also handles borderLeft, borderRight, borderTop, borderBottom.
1080
+ */
1081
+ function extractBorderColorFromStyle(style) {
1082
+ if (style.borderColor) {
1083
+ return extractHexColor(style.borderColor);
1084
+ }
1085
+ // Check border shorthand
1086
+ if (style.border) {
1087
+ // Parse "1px solid #e5e7eb" or similar
1088
+ const colorMatch = style.border.match(/#([0-9a-fA-F]{3,6})/);
1089
+ if (colorMatch) {
1090
+ let hex = colorMatch[1];
1091
+ if (hex.length === 3) {
1092
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
1093
+ }
1094
+ return hex.toUpperCase();
1095
+ }
1096
+ }
1097
+ // Check individual border properties (borderLeft, borderRight, borderTop, borderBottom)
1098
+ const borderProps = [style.borderLeft, style.borderRight, style.borderTop, style.borderBottom];
1099
+ for (const borderValue of borderProps) {
1100
+ if (borderValue) {
1101
+ // Parse "4px solid #7c3aed" or similar
1102
+ const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
1103
+ if (colorMatch) {
1104
+ let hex = colorMatch[1];
1105
+ if (hex.length === 3) {
1106
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
1107
+ }
1108
+ return hex.toUpperCase();
1109
+ }
1110
+ }
1111
+ }
1112
+ return undefined;
1113
+ }
1114
+ /**
1115
+ * Check if an element is a grid or flex container based on its CSS.
1116
+ */
1117
+ function isGridOrFlexContainer(element, cssContext) {
1118
+ const styles = getElementStyles(element, cssContext);
1119
+ return styles.display === "grid" || styles.display === "flex";
1120
+ }
1121
+ /**
1122
+ * GENERALIZED: Check if an element is a horizontal flex container.
1123
+ * A horizontal flex container has display: flex with flex-direction: row (or unset, since row is default).
1124
+ * This is used to detect containers where flex items should be visually separated
1125
+ * with a separator character (like " • ") to represent the CSS gap.
1126
+ *
1127
+ * @param element - The element to check
1128
+ * @param cssContext - CSS context for style resolution
1129
+ * @returns true if this is a horizontal flex container, false otherwise
1130
+ */
1131
+ function isHorizontalFlexContainer(element, cssContext) {
1132
+ const styles = getElementStyles(element, cssContext);
1133
+ // Must be a flex container
1134
+ if (styles.display !== "flex") {
1135
+ return false;
1136
+ }
1137
+ // Check flex-direction - horizontal if "row" or "row-reverse" or not set (default is row)
1138
+ // Vertical (column) containers should NOT have separators between items
1139
+ const direction = styles.flexDirection?.toLowerCase();
1140
+ if (direction === "column" || direction === "column-reverse") {
1141
+ return false;
1142
+ }
1143
+ // Also check inline styles for flex-direction override
1144
+ const inlineStyle = element.getAttribute("style") || "";
1145
+ const directionMatch = inlineStyle.match(/flex-direction\s*:\s*([^;]+)/i);
1146
+ if (directionMatch) {
1147
+ const inlineDirection = directionMatch[1].trim().toLowerCase();
1148
+ if (inlineDirection === "column" || inlineDirection === "column-reverse") {
1149
+ return false;
1150
+ }
1151
+ }
1152
+ return true;
1153
+ }
1154
+ /**
1155
+ * Check if an SVG element is purely decorative (background pattern, decoration, etc).
1156
+ * Decorative SVGs should NOT be converted to chart placeholders.
1157
+ *
1158
+ * Detection criteria (style-based, not class-name-based per skill rules):
1159
+ * 1. Fixed/absolute positioning (background elements)
1160
+ * 2. Very low opacity (< 0.5)
1161
+ * 3. Contains only pattern definitions (<defs>, <pattern>, <linearGradient>)
1162
+ * 4. Is a background overlay (pointer-events: none)
1163
+ * 5. Very small viewBox (icons < 50x50)
1164
+ */
1165
+ function isDecorativeSvg(svgElement, parentElement, cssContext) {
1166
+ // Check parent container styles
1167
+ const parentStyles = getElementStyles(parentElement, cssContext);
1168
+ const parentInlineStyle = parentElement.getAttribute("style") || "";
1169
+ // Check SVG's own styles
1170
+ const svgInlineStyle = svgElement.getAttribute("style") || "";
1171
+ // 1. Check for fixed/absolute positioning (typically background elements)
1172
+ const positionMatch = parentInlineStyle.match(/position\s*:\s*(fixed|absolute)/i) ||
1173
+ svgInlineStyle.match(/position\s*:\s*(fixed|absolute)/i);
1174
+ if (positionMatch) {
1175
+ return true;
1176
+ }
1177
+ // 2. Check for low opacity (decorative overlays)
1178
+ const parentOpacityMatch = parentInlineStyle.match(/opacity\s*:\s*([0-9.]+)/i);
1179
+ const svgOpacityMatch = svgInlineStyle.match(/opacity\s*:\s*([0-9.]+)/i);
1180
+ const opacityAttr = svgElement.getAttribute("opacity");
1181
+ const opacity = parseFloat(parentOpacityMatch?.[1] || svgOpacityMatch?.[1] || opacityAttr || "1");
1182
+ if (opacity < 0.5) {
1183
+ return true;
1184
+ }
1185
+ // 3. Check for pointer-events: none (non-interactive background)
1186
+ if (parentInlineStyle.includes("pointer-events: none") ||
1187
+ svgInlineStyle.includes("pointer-events: none") ||
1188
+ parentStyles.display === "none") {
1189
+ return true;
1190
+ }
1191
+ // 4. Check if SVG contains only definitions (patterns, gradients) - no actual shapes
1192
+ const hasOnlyDefs = svgElement.children.length > 0 &&
1193
+ Array.from(svgElement.children).every((child) => ["defs", "style", "title", "desc"].includes(child.tagName.toLowerCase()));
1194
+ if (hasOnlyDefs) {
1195
+ return true;
1196
+ }
1197
+ // 5. Check for very small viewBox (likely an icon, not a chart)
1198
+ // Charts must be at least 100x50 to match export.ts isChartSvg()
1199
+ const viewBox = svgElement.getAttribute("viewBox");
1200
+ if (viewBox) {
1201
+ const parts = viewBox.split(/\s+/).map(Number);
1202
+ if (parts.length >= 4) {
1203
+ const width = parts[2] || 0;
1204
+ const height = parts[3] || 0;
1205
+ // Charts must be at least 100x50 - anything smaller is decorative
1206
+ if (width > 0 && height > 0 && (width < 100 || height < 50)) {
1207
+ return true;
1208
+ }
1209
+ }
1210
+ }
1211
+ else {
1212
+ // No viewBox - check width/height attributes
1213
+ const widthAttr = svgElement.getAttribute("width");
1214
+ const heightAttr = svgElement.getAttribute("height");
1215
+ if (widthAttr && heightAttr) {
1216
+ const width = parseFloat(widthAttr) || 0;
1217
+ const height = parseFloat(heightAttr) || 0;
1218
+ // Charts must be at least 100x50
1219
+ if (width > 0 && height > 0 && (width < 100 || height < 50)) {
1220
+ return true;
1221
+ }
1222
+ }
1223
+ }
1224
+ // 6. Check if SVG is purely a pattern/gradient container
1225
+ // These typically have <rect> with fill="url(#pattern)" or only <circle>/<path> with very low complexity
1226
+ const shapes = svgElement.querySelectorAll("rect, circle, ellipse, path, polygon, polyline, line");
1227
+ if (shapes.length > 0) {
1228
+ // Count shapes that use pattern/gradient fills (decorative)
1229
+ let decorativeShapeCount = 0;
1230
+ for (const shape of shapes) {
1231
+ const fill = shape.getAttribute("fill") || "";
1232
+ const stroke = shape.getAttribute("stroke") || "";
1233
+ if (fill.includes("url(#") || stroke.includes("url(#")) {
1234
+ decorativeShapeCount++;
1235
+ }
1236
+ }
1237
+ // If all shapes use pattern/gradient fills, it's likely decorative
1238
+ if (decorativeShapeCount === shapes.length && shapes.length <= 5) {
1239
+ return true;
1240
+ }
1241
+ }
1242
+ // 7. Check if this appears to be a progress circle (small circular skills indicator)
1243
+ // These have circle elements with stroke-dasharray for progress
1244
+ const circles = svgElement.querySelectorAll("circle");
1245
+ if (circles.length > 0 && circles.length <= 2) {
1246
+ const hasStrokeDasharray = Array.from(circles).some((c) => c.getAttribute("stroke-dasharray"));
1247
+ if (hasStrokeDasharray) {
1248
+ // This is a progress indicator, not a chart - treat as decorative for now
1249
+ // In the future, we could extract the percentage
1250
+ return true;
1251
+ }
1252
+ }
1253
+ // 8. Check if parent is a centered flex container (typical hero image pattern)
1254
+ // Hero images typically have: display: flex; align-items: center; justify-content: center
1255
+ // These contain illustrative SVGs which should be included as images, not filtered
1256
+ // We no longer filter these - they are legitimate content images
1257
+ // (Previously we filtered centered flex containers, but hero images should be included)
1258
+ // 9. SVGs with sufficient size should be included as images
1259
+ // We only filter out truly decorative elements (icons, patterns, progress circles)
1260
+ // Large SVGs (illustrations, charts, diagrams) should all be included
1261
+ return false;
1262
+ }
1263
+ /**
1264
+ * Check if an element is a two-column CSS grid layout.
1265
+ * Detects patterns like: display: grid; grid-template-columns: Xpx 1fr
1266
+ * Returns the sidebar width percentage if it's a two-column layout, undefined otherwise.
1267
+ */
1268
+ function isTwoColumnGridLayout(element, cssContext) {
1269
+ const styles = getElementStyles(element, cssContext);
1270
+ // Must be a grid container
1271
+ if (styles.display !== "grid") {
1272
+ return undefined;
1273
+ }
1274
+ // Check for two-column pattern (e.g., "280px 1fr", "300px auto", "25% 75%")
1275
+ const gridCols = styles.gridTemplateColumns;
1276
+ if (!gridCols) {
1277
+ return undefined;
1278
+ }
1279
+ // Parse grid-template-columns to detect two-column sidebar layout
1280
+ // Common patterns:
1281
+ // - "280px 1fr" (fixed sidebar + fluid main)
1282
+ // - "300px auto" (fixed sidebar + auto main)
1283
+ // - "25% 1fr" (percentage sidebar + fluid main)
1284
+ const parts = gridCols.trim().split(/\s+/);
1285
+ if (parts.length !== 2) {
1286
+ return undefined;
1287
+ }
1288
+ const [first, second] = parts;
1289
+ // Check if first column is a fixed width or small percentage (sidebar)
1290
+ // and second column is flexible (1fr, auto, or larger percentage)
1291
+ const firstIsSidebar = (first.endsWith("px") && parseInt(first, 10) <= 400) ||
1292
+ (first.endsWith("%") && parseFloat(first) <= 35);
1293
+ const secondIsMain = second === "1fr" ||
1294
+ second === "auto" ||
1295
+ (second.endsWith("%") && parseFloat(second) >= 50) ||
1296
+ (second.endsWith("fr") && parseFloat(second) >= 1);
1297
+ if (firstIsSidebar && secondIsMain) {
1298
+ // Calculate sidebar width percentage
1299
+ if (first.endsWith("px")) {
1300
+ // Assume ~1100px total width for typical documents
1301
+ const pxWidth = parseInt(first, 10);
1302
+ return Math.round((pxWidth / 1100) * 100);
1303
+ }
1304
+ else if (first.endsWith("%")) {
1305
+ return parseFloat(first);
1306
+ }
1307
+ return 25; // Default 25% for other cases
1308
+ }
1309
+ return undefined;
1310
+ }
1311
+ /**
1312
+ * Find the sidebar and main content elements in a two-column grid container.
1313
+ * Returns the first two direct children as [sidebar, main] or undefined.
1314
+ */
1315
+ function findTwoColumnChildren(container) {
1316
+ const children = Array.from(container.children).filter((child) => child.nodeType === Node.ELEMENT_NODE);
1317
+ if (children.length < 2) {
1318
+ return undefined;
1319
+ }
1320
+ // First child is sidebar (aside, div, etc.), second is main content
1321
+ return [children[0], children[1]];
1322
+ }
1323
+ /**
1324
+ * GENERALIZED: Detect if an element is a flex container with equal-width columns.
1325
+ * This handles layouts like:
1326
+ * display: flex; with children having flex: 1 (equal width)
1327
+ *
1328
+ * Returns the column children if detected, undefined otherwise.
1329
+ * Each child will become a column in a DOCX table.
1330
+ */
1331
+ function detectFlexEqualColumns(element, cssContext) {
1332
+ const styles = getElementStyles(element, cssContext);
1333
+ // Must be a flex container
1334
+ if (styles.display !== "flex") {
1335
+ return undefined;
1336
+ }
1337
+ // Get direct children
1338
+ const children = Array.from(element.children).filter((child) => child.nodeType === Node.ELEMENT_NODE);
1339
+ // Need at least 2 children for multi-column layout
1340
+ if (children.length < 2 || children.length > 4) {
1341
+ return undefined;
1342
+ }
1343
+ // Check if children have flex: 1 or similar equal-width pattern
1344
+ // Also check inline styles since class-based flex: 1 is common
1345
+ let hasEqualFlexChildren = true;
1346
+ for (const child of children) {
1347
+ const childStyles = getElementStyles(child, cssContext);
1348
+ const inlineStyle = child.getAttribute("style") || "";
1349
+ // Check for flex: 1 in CSS or inline styles
1350
+ const hasFlex1 = childStyles.flex === "1" ||
1351
+ inlineStyle.includes("flex:") ||
1352
+ inlineStyle.includes("flex: 1");
1353
+ // Also detect if class has flex: 1 (we can't directly read this, but
1354
+ // children in a flex container with no explicit width typically share space)
1355
+ // For now, assume if parent is flex and has 2+ children, they share space
1356
+ // If any child has explicit width that's not equal, skip
1357
+ // (We're being permissive here - assuming equal columns if parent is flex)
1358
+ }
1359
+ // Return children as columns if this looks like an equal-column flex layout
1360
+ return children;
1361
+ }
1362
+ /**
1363
+ * Detect if an element is a "skill item" by its structure.
1364
+ * Pattern: The element has a direct child that contains exactly 2 text elements (name + percentage).
1365
+ * The element should have exactly 1-2 direct children (header + optional progress bar).
1366
+ *
1367
+ * Returns { name, percentage } if detected, undefined otherwise.
1368
+ */
1369
+ function detectSkillItem(element) {
1370
+ const children = Array.from(element.children);
1371
+ // Skill items typically have 1-2 direct children:
1372
+ // - A header div with name + percentage
1373
+ // - Optionally a progress bar container
1374
+ if (children.length === 0 || children.length > 3)
1375
+ return undefined;
1376
+ // Look for the header child that contains name + percentage
1377
+ for (const child of children) {
1378
+ // Skip if this child has too many children (not a header)
1379
+ if (child.children.length !== 2)
1380
+ continue;
1381
+ const directChildren = Array.from(child.children).filter((c) => c.children.length === 0 && c.textContent?.trim());
1382
+ // Must have exactly 2 leaf text elements
1383
+ if (directChildren.length !== 2)
1384
+ continue;
1385
+ const text1 = directChildren[0].textContent?.trim() || "";
1386
+ const text2 = directChildren[1].textContent?.trim() || "";
1387
+ // Check if one is a percentage (number followed by %)
1388
+ if (text2.match(/^\d+%$/)) {
1389
+ return { name: text1, percentage: text2 };
1390
+ }
1391
+ if (text1.match(/^\d+%$/)) {
1392
+ return { name: text2, percentage: text1 };
1393
+ }
1394
+ }
1395
+ return undefined;
1396
+ }
1397
+ /**
1398
+ * Detect if an element is a "language item" with proficiency dots/indicators.
1399
+ * Pattern: container with text (language name) and a child with multiple small indicators.
1400
+ *
1401
+ * Returns { name, filledCount, totalCount } if detected, undefined otherwise.
1402
+ */
1403
+ function detectLanguageItem(element, cssContext) {
1404
+ const children = Array.from(element.children);
1405
+ if (children.length < 2)
1406
+ return undefined;
1407
+ let languageName = "";
1408
+ let filledCount = 0;
1409
+ let totalCount = 0;
1410
+ for (const child of children) {
1411
+ const childTagName = child.tagName.toLowerCase();
1412
+ // GENERALIZED: Skip headings - they're not language names
1413
+ // This prevents title-block patterns (h1 + metadata div) from being detected
1414
+ if (childTagName.match(/^h[1-6]$/)) {
1415
+ return undefined;
1416
+ }
1417
+ // Check if this is a text element (language name)
1418
+ // Must be a simple span or similar inline element, not a complex structure
1419
+ if (child.children.length === 0 && child.textContent?.trim()) {
1420
+ const text = child.textContent.trim();
1421
+ // GENERALIZED: Language names are typically short (1-3 words)
1422
+ // Skip if the text is too long (like a title or paragraph)
1423
+ if (text.length > 50 || text.split(/\s+/).length > 5) {
1424
+ return undefined;
1425
+ }
1426
+ languageName = text;
1427
+ continue;
1428
+ }
1429
+ // Check if this is a container with multiple similar small children (dots/indicators)
1430
+ const indicators = Array.from(child.children);
1431
+ if (indicators.length >= 3 && indicators.length <= 10) {
1432
+ // Check if they look like indicators (similar structure)
1433
+ const allSpans = indicators.every((ind) => ind.tagName.toLowerCase() === "span");
1434
+ if (allSpans) {
1435
+ totalCount = indicators.length;
1436
+ // Count "filled" indicators by checking for background color or specific styling
1437
+ for (const indicator of indicators) {
1438
+ const styles = getElementStyles(indicator, cssContext);
1439
+ const bgColor = styles.backgroundColor;
1440
+ // A "filled" indicator typically has a non-white/non-transparent background
1441
+ // or has more than just border styling
1442
+ if (bgColor && bgColor !== "transparent" && bgColor !== "inherit") {
1443
+ const hexBg = extractHexColor(bgColor);
1444
+ // Check if it's a "filled" color (not white/very light)
1445
+ if (hexBg && hexBg !== "FFFFFF" && !hexBg.startsWith("FF")) {
1446
+ filledCount++;
1447
+ }
1448
+ else if (hexBg) {
1449
+ // Light color means filled if rgba has low opacity
1450
+ // For now, count any background as potentially filled
1451
+ // We'll need to be smarter about this
1452
+ const alpha = bgColor.match(/rgba.*,\s*([0-9.]+)\s*\)/)?.[1];
1453
+ if (!alpha || parseFloat(alpha) > 0.15) {
1454
+ filledCount++;
1455
+ }
1456
+ }
1457
+ }
1458
+ }
1459
+ }
1460
+ }
1461
+ }
1462
+ if (languageName && totalCount > 0) {
1463
+ return { name: languageName, filledCount, totalCount };
1464
+ }
1465
+ return undefined;
1466
+ }
1467
+ /**
1468
+ * Parse content from a container element (like sidebar or main content).
1469
+ * Handles headings, paragraphs, lists, and nested containers with color inheritance.
1470
+ */
1471
+ function parseContainerContent(element, cssContext, inheritedColor) {
1472
+ const innerElements = [];
1473
+ function processInnerNode(node, color) {
1474
+ if (node.nodeType === Node.TEXT_NODE) {
1475
+ const text = node.textContent?.trim();
1476
+ if (text) {
1477
+ if (color) {
1478
+ innerElements.push({ type: "paragraph", text, color });
1479
+ }
1480
+ else {
1481
+ innerElements.push({ type: "paragraph", text });
1482
+ }
1483
+ }
1484
+ return;
1485
+ }
1486
+ if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
1487
+ return;
1488
+ }
1489
+ const el = node;
1490
+ const tagName = el.tagName.toLowerCase();
1491
+ // Extract color from this element
1492
+ const elementColor = extractTextColor(el, cssContext) || color;
1493
+ // Skip decorative SVGs
1494
+ if (tagName === "svg") {
1495
+ const parent = el.parentElement || el;
1496
+ if (isDecorativeSvg(el, parent, cssContext)) {
1497
+ return;
1498
+ }
1499
+ }
1500
+ // Handle headings
1501
+ const headingLevel = parseHeadingLevel(tagName);
1502
+ if (headingLevel !== null) {
1503
+ const text = getTextContent(el).trim();
1504
+ if (text) {
1505
+ innerElements.push({ type: "heading", level: headingLevel, text, color: elementColor });
1506
+ }
1507
+ return;
1508
+ }
1509
+ // Handle paragraphs
1510
+ if (tagName === "p") {
1511
+ const runs = extractInlineRuns(el, cssContext);
1512
+ if (runs.length > 0) {
1513
+ const text = runs.map((r) => r.text).join("");
1514
+ // Apply inherited color to runs that don't have their own color
1515
+ if (elementColor) {
1516
+ runs.forEach(run => {
1517
+ if (!run.color) {
1518
+ run.color = elementColor;
1519
+ }
1520
+ });
1521
+ }
1522
+ if (hasInlineFormatting(runs)) {
1523
+ innerElements.push({ type: "paragraph", text, runs });
1524
+ }
1525
+ else if (elementColor) {
1526
+ innerElements.push({ type: "paragraph", text, color: elementColor });
1527
+ }
1528
+ else {
1529
+ innerElements.push({ type: "paragraph", text });
1530
+ }
1531
+ }
1532
+ return;
1533
+ }
1534
+ // Handle lists
1535
+ if (tagName === "ul" || tagName === "ol") {
1536
+ const items = [];
1537
+ for (const child of el.children) {
1538
+ if (child.tagName.toLowerCase() === "li") {
1539
+ const runs = extractInlineRuns(child, cssContext);
1540
+ if (runs.length > 0) {
1541
+ // Apply color to runs
1542
+ if (elementColor) {
1543
+ runs.forEach(run => {
1544
+ if (!run.color) {
1545
+ run.color = elementColor;
1546
+ }
1547
+ });
1548
+ }
1549
+ if (hasInlineFormatting(runs)) {
1550
+ items.push(runs);
1551
+ }
1552
+ else {
1553
+ items.push(runs.map((r) => r.text).join(""));
1554
+ }
1555
+ }
1556
+ }
1557
+ }
1558
+ if (items.length > 0) {
1559
+ innerElements.push({ type: "list", ordered: tagName === "ol", items });
1560
+ }
1561
+ return;
1562
+ }
1563
+ // Handle tables
1564
+ if (tagName === "table") {
1565
+ const rows = [];
1566
+ for (const tr of el.querySelectorAll("tr")) {
1567
+ const cells = [];
1568
+ for (const cell of tr.querySelectorAll("td, th")) {
1569
+ const runs = extractInlineRuns(cell, cssContext);
1570
+ if (runs.length > 0) {
1571
+ if (hasInlineFormatting(runs)) {
1572
+ cells.push(runs);
1573
+ }
1574
+ else {
1575
+ cells.push(runs.map((r) => r.text).join(""));
1576
+ }
1577
+ }
1578
+ else {
1579
+ cells.push("");
1580
+ }
1581
+ }
1582
+ if (cells.length > 0) {
1583
+ rows.push(cells);
1584
+ }
1585
+ }
1586
+ if (rows.length > 0) {
1587
+ // Extract cell padding from CSS (th, td selectors)
1588
+ let cellPadding;
1589
+ if (cssContext) {
1590
+ // Try td first, then th (they usually have the same padding)
1591
+ const tdStyle = cssContext.elementStyles.get("td");
1592
+ const thStyle = cssContext.elementStyles.get("th");
1593
+ const paddingStr = tdStyle?.padding || thStyle?.padding;
1594
+ if (paddingStr) {
1595
+ cellPadding = parseCssPaddingToTwips(paddingStr);
1596
+ }
1597
+ }
1598
+ innerElements.push({ type: "table", rows, cellPadding });
1599
+ }
1600
+ return;
1601
+ }
1602
+ // Detect skill item pattern: container with name + percentage
1603
+ // This handles divs containing "Skill Name" + "95%" on separate lines
1604
+ if (tagName === "div") {
1605
+ const skillItem = detectSkillItem(el);
1606
+ if (skillItem) {
1607
+ // Output as single line: "Skill Name: 95%"
1608
+ const text = `${skillItem.name}: ${skillItem.percentage}`;
1609
+ if (elementColor) {
1610
+ innerElements.push({ type: "paragraph", text, color: elementColor });
1611
+ }
1612
+ else {
1613
+ innerElements.push({ type: "paragraph", text });
1614
+ }
1615
+ return;
1616
+ }
1617
+ // Detect language item pattern: language name + proficiency dots
1618
+ const languageItem = detectLanguageItem(el, cssContext);
1619
+ if (languageItem) {
1620
+ // Create visual representation with filled and empty dots
1621
+ // ● = filled, ○ = empty
1622
+ const filledDots = "●".repeat(languageItem.filledCount);
1623
+ const emptyDots = "○".repeat(languageItem.totalCount - languageItem.filledCount);
1624
+ const text = `${languageItem.name}: ${filledDots}${emptyDots}`;
1625
+ if (elementColor) {
1626
+ innerElements.push({ type: "paragraph", text, color: elementColor });
1627
+ }
1628
+ else {
1629
+ innerElements.push({ type: "paragraph", text });
1630
+ }
1631
+ return;
1632
+ }
1633
+ }
1634
+ // Container tags - recurse into children
1635
+ if (CONTAINER_TAGS.includes(tagName)) {
1636
+ for (const child of el.childNodes) {
1637
+ processInnerNode(child, elementColor);
1638
+ }
1639
+ return;
1640
+ }
1641
+ // Fallback - try to get text
1642
+ const text = getTextContent(el).trim();
1643
+ if (text && el.children.length === 0) {
1644
+ if (elementColor) {
1645
+ innerElements.push({ type: "paragraph", text, color: elementColor });
1646
+ }
1647
+ else {
1648
+ innerElements.push({ type: "paragraph", text });
1649
+ }
1650
+ }
1651
+ else {
1652
+ for (const child of el.childNodes) {
1653
+ processInnerNode(child, elementColor);
1654
+ }
1655
+ }
1656
+ }
1657
+ // Process all direct children
1658
+ for (const child of element.childNodes) {
1659
+ processInnerNode(child, inheritedColor);
1660
+ }
1661
+ return innerElements;
1662
+ }
1663
+ /**
1664
+ * Get color for an element by checking its classes against parsed CSS rules.
1665
+ */
1666
+ function getColorFromClasses(element, cssContext) {
1667
+ const classAttr = element.getAttribute("class");
1668
+ if (!classAttr)
1669
+ return undefined;
1670
+ const classes = classAttr.split(/\s+/).filter(c => c.length > 0);
1671
+ for (const className of classes) {
1672
+ const color = cssContext.classColors.get(className);
1673
+ if (color) {
1674
+ return extractHexColor(color);
1675
+ }
1676
+ }
1677
+ return undefined;
1678
+ }
1679
+ function isHeadingLevel(level) {
1680
+ return level >= 1 && level <= 6;
1681
+ }
1682
+ function parseHeadingLevel(tagName) {
1683
+ const match = /^h([1-6])$/.exec(tagName);
1684
+ if (!match || match.length < 2) {
1685
+ return null;
1686
+ }
1687
+ const level = parseInt(match[1], 10);
1688
+ if (isHeadingLevel(level)) {
1689
+ return level;
1690
+ }
1691
+ return null;
1692
+ }
1693
+ /**
1694
+ * Extract text alignment from element's CSS styles (via cssContext) or inline style.
1695
+ * Does NOT use class names directly - uses CSS style extraction.
1696
+ */
1697
+ function getTextAlignment(element, cssContext) {
1698
+ // Get styles from CSS classes (generalized approach)
1699
+ const styles = getElementStyles(element, cssContext);
1700
+ // Check CSS text-align property
1701
+ if (styles.textAlign) {
1702
+ const align = styles.textAlign.toLowerCase();
1703
+ if (align === "left" || align === "center" || align === "right" || align === "justify") {
1704
+ return align;
1705
+ }
1706
+ }
1707
+ // Also check inline style for text-align (overrides CSS)
1708
+ const inlineStyle = element.getAttribute("style") || "";
1709
+ const alignMatch = inlineStyle.match(/text-align:\s*(left|center|right|justify)/i);
1710
+ if (alignMatch) {
1711
+ return alignMatch[1].toLowerCase();
1712
+ }
1713
+ return undefined;
1714
+ }
1715
+ /**
1716
+ * Safely get text content from an element, returning empty string if null.
1717
+ */
1718
+ function getTextContent(element) {
1719
+ // textContent can be null for certain element types (document, doctype)
1720
+ // Use String() to safely convert any value
1721
+ return String(element.textContent || "");
1722
+ }
1723
+ /**
1724
+ * Inline element tags that should NOT break a container into multiple paragraphs.
1725
+ * These elements can be combined into a single paragraph with multiple runs.
1726
+ */
1727
+ const INLINE_TAGS = [
1728
+ "span", "a", "b", "strong", "i", "em", "u", "s", "strike", "del", "ins",
1729
+ "sub", "sup", "small", "mark", "abbr", "cite", "code", "kbd", "samp", "var",
1730
+ "time", "data", "q", "dfn", "ruby", "rt", "rp", "bdi", "bdo", "wbr", "br",
1731
+ "label", "font"
1732
+ ];
1733
+ /**
1734
+ * Check if a container element has ONLY inline content (text nodes, spans, inline formatting).
1735
+ * Returns true if the container should be treated as a single paragraph.
1736
+ * Returns false if the container has block-level children that need separate paragraphs.
1737
+ */
1738
+ function isInlineOnlyContainer(element) {
1739
+ // Check all child nodes
1740
+ for (const child of element.childNodes) {
1741
+ // Text nodes are inline - continue checking
1742
+ if (child.nodeType === Node.TEXT_NODE) {
1743
+ continue;
1744
+ }
1745
+ // Skip comments and other non-element nodes
1746
+ if (child.nodeType !== Node.ELEMENT_NODE || !(child instanceof Element)) {
1747
+ continue;
1748
+ }
1749
+ const tagName = child.tagName.toLowerCase();
1750
+ // If child is an inline tag, recursively check its children
1751
+ if (INLINE_TAGS.includes(tagName)) {
1752
+ // Recursively check if this inline element also has only inline content
1753
+ if (!isInlineOnlyContainer(child)) {
1754
+ return false;
1755
+ }
1756
+ continue;
1757
+ }
1758
+ // Any other element tag is considered block-level (div, p, ul, table, h1-h6, etc.)
1759
+ // The container has block-level children, so it should NOT be treated as inline-only
1760
+ return false;
1761
+ }
1762
+ // All children are inline - this container can be a single paragraph
1763
+ return true;
1764
+ }
1765
+ /**
1766
+ * Check if a tag name indicates bold formatting.
1767
+ */
1768
+ function isBoldTag(tagName) {
1769
+ return tagName === "strong" || tagName === "b";
1770
+ }
1771
+ /**
1772
+ * Check if a tag name indicates italic formatting.
1773
+ */
1774
+ function isItalicTag(tagName) {
1775
+ return tagName === "em" || tagName === "i";
1776
+ }
1777
+ /**
1778
+ * Extract inline runs with formatting from an element.
1779
+ * Walks the DOM tree and collects text with bold/italic state.
1780
+ * Also extracts CSS ::before and ::after pseudo-element content.
1781
+ */
1782
+ function extractInlineRuns(element, cssContext, inheritedColor, inheritedFontFamily) {
1783
+ const runs = [];
1784
+ // GENERALIZED: Extract ::before pseudo-element content
1785
+ // This handles CSS rules like "dd.dish-pairings::before { content: 'Suggested Pairing: '; }"
1786
+ if (typeof window !== "undefined" && window.getComputedStyle) {
1787
+ try {
1788
+ const beforeStyles = window.getComputedStyle(element, "::before");
1789
+ const beforeContent = beforeStyles.content;
1790
+ // content property returns "none" or a quoted string like '"Suggested Pairing: "'
1791
+ if (beforeContent && beforeContent !== "none" && beforeContent !== "normal") {
1792
+ // Remove quotes from the content string
1793
+ const cleanContent = beforeContent.replace(/^["']|["']$/g, "");
1794
+ if (cleanContent) {
1795
+ // Get styling from the pseudo-element
1796
+ const fontWeight = beforeStyles.fontWeight;
1797
+ const fontStyle = beforeStyles.fontStyle;
1798
+ const color = beforeStyles.color ? extractHexColor(beforeStyles.color) : undefined;
1799
+ runs.push({
1800
+ text: cleanContent,
1801
+ bold: fontWeight === "700" || fontWeight === "bold" || fontWeight === "600" || undefined,
1802
+ italic: fontStyle === "italic" || undefined,
1803
+ color,
1804
+ });
1805
+ }
1806
+ }
1807
+ }
1808
+ catch {
1809
+ // getComputedStyle may fail in some environments
1810
+ }
1811
+ }
1812
+ function walkNode(node, bold, italic, color, backgroundColor, superscript, subscript, underline, fontFamily, size) {
1813
+ if (node.nodeType === Node.TEXT_NODE) {
1814
+ const text = node.textContent || "";
1815
+ if (text) {
1816
+ // Normalize whitespace but preserve spaces between words
1817
+ const normalizedText = text.replace(/\s+/g, " ");
1818
+ if (normalizedText.trim() || normalizedText === " ") {
1819
+ runs.push({ text: normalizedText, bold, italic, color, backgroundColor, superscript, subscript, underline, fontFamily, size });
1820
+ }
1821
+ }
1822
+ return;
1823
+ }
1824
+ if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
1825
+ return;
1826
+ }
1827
+ const tagName = node.tagName.toLowerCase();
1828
+ // Handle <br> tags - insert a newline
1829
+ if (tagName === "br") {
1830
+ runs.push({ text: "\n", bold, italic, color, backgroundColor, fontFamily, size });
1831
+ return;
1832
+ }
1833
+ let newBold = bold || isBoldTag(tagName);
1834
+ let newItalic = italic || isItalicTag(tagName);
1835
+ let newSuperscript = superscript || tagName === "sup";
1836
+ let newSubscript = subscript || tagName === "sub";
1837
+ let newUnderline = underline;
1838
+ let newFontFamily = fontFamily;
1839
+ let newSize = size;
1840
+ // Handle underline from <u> tag or <abbr> tag
1841
+ if (tagName === "u") {
1842
+ newUnderline = { type: "single" };
1843
+ }
1844
+ else if (tagName === "abbr") {
1845
+ // abbr elements typically have dotted underlines (border-bottom: dotted)
1846
+ // Check CSS for border-bottom color
1847
+ let underlineColor;
1848
+ if (cssContext) {
1849
+ const styles = getElementStyles(node, cssContext);
1850
+ if (styles.borderBottom) {
1851
+ // Extract color from border-bottom (e.g., "1px dotted #b8860b")
1852
+ const colorMatch = styles.borderBottom.match(/#([0-9a-fA-F]{3,6})/i);
1853
+ if (colorMatch) {
1854
+ underlineColor = colorMatch[1].toUpperCase();
1855
+ }
1856
+ }
1857
+ }
1858
+ newUnderline = { type: "dotted", color: underlineColor };
1859
+ }
1860
+ // Extract colors from this element
1861
+ let newColor = color;
1862
+ let newBackgroundColor = backgroundColor;
1863
+ if (cssContext) {
1864
+ const styles = getElementStyles(node, cssContext);
1865
+ if (styles.color) {
1866
+ const hexColor = extractHexColor(styles.color);
1867
+ if (hexColor)
1868
+ newColor = hexColor;
1869
+ }
1870
+ if (styles.backgroundColor) {
1871
+ const hexBg = extractHexColor(styles.backgroundColor);
1872
+ if (hexBg)
1873
+ newBackgroundColor = hexBg;
1874
+ }
1875
+ // Check for font-weight: bold/700/600 from CSS classes
1876
+ if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
1877
+ newBold = true;
1878
+ }
1879
+ // Check for font-style: italic from CSS classes
1880
+ if (styles.fontStyle === "italic") {
1881
+ newItalic = true;
1882
+ }
1883
+ // Check for font-family from CSS classes
1884
+ if (styles.fontFamily) {
1885
+ newFontFamily = styles.fontFamily;
1886
+ }
1887
+ // GENERALIZED: Check for font-size from CSS classes
1888
+ if (styles.fontSize) {
1889
+ const halfPoints = parseFontSizeToHalfPoints(styles.fontSize);
1890
+ if (halfPoints)
1891
+ newSize = halfPoints;
1892
+ }
1893
+ }
1894
+ // Also check inline styles
1895
+ const inlineStyle = node.getAttribute("style") || "";
1896
+ if (inlineStyle) {
1897
+ const colorMatch = inlineStyle.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
1898
+ if (colorMatch) {
1899
+ const hexColor = extractHexColor(colorMatch[1]);
1900
+ if (hexColor)
1901
+ newColor = hexColor;
1902
+ }
1903
+ const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
1904
+ if (bgMatch) {
1905
+ const hexBg = extractHexColor(bgMatch[1]);
1906
+ if (hexBg)
1907
+ newBackgroundColor = hexBg;
1908
+ }
1909
+ // Check inline font-weight
1910
+ const weightMatch = inlineStyle.match(/font-weight\s*:\s*([^;]+)/i);
1911
+ if (weightMatch) {
1912
+ const weight = weightMatch[1].trim();
1913
+ if (weight === "700" || weight === "bold" || weight === "600") {
1914
+ newBold = true;
1915
+ }
1916
+ }
1917
+ // Check inline font-style
1918
+ const styleMatch = inlineStyle.match(/font-style\s*:\s*([^;]+)/i);
1919
+ if (styleMatch) {
1920
+ const style = styleMatch[1].trim();
1921
+ if (style === "italic") {
1922
+ newItalic = true;
1923
+ }
1924
+ }
1925
+ // Check inline font-family
1926
+ const fontFamilyMatch = inlineStyle.match(/font-family\s*:\s*([^;]+)/i);
1927
+ if (fontFamilyMatch) {
1928
+ const primaryFont = extractPrimaryFont(fontFamilyMatch[1].trim());
1929
+ if (primaryFont) {
1930
+ newFontFamily = primaryFont;
1931
+ }
1932
+ }
1933
+ // GENERALIZED: Check inline font-size
1934
+ const fontSizeMatch = inlineStyle.match(/font-size\s*:\s*([^;]+)/i);
1935
+ if (fontSizeMatch) {
1936
+ const halfPoints = parseFontSizeToHalfPoints(fontSizeMatch[1].trim());
1937
+ if (halfPoints)
1938
+ newSize = halfPoints;
1939
+ }
1940
+ }
1941
+ for (const child of node.childNodes) {
1942
+ walkNode(child, newBold, newItalic, newColor, newBackgroundColor, newSuperscript, newSubscript, newUnderline, newFontFamily, newSize);
1943
+ }
1944
+ }
1945
+ // GENERALIZED: Extract styles from the element itself (e.g., <td class="positive">)
1946
+ // These styles should be inherited by all text children
1947
+ let elementBold = false;
1948
+ let elementItalic = false;
1949
+ let elementColor = inheritedColor;
1950
+ let elementBackgroundColor;
1951
+ let elementFontFamily = inheritedFontFamily;
1952
+ let elementSize;
1953
+ if (cssContext) {
1954
+ const styles = getElementStyles(element, cssContext);
1955
+ if (styles.color) {
1956
+ const hexColor = extractHexColor(styles.color);
1957
+ if (hexColor)
1958
+ elementColor = hexColor;
1959
+ }
1960
+ if (styles.backgroundColor) {
1961
+ const hexBg = extractHexColor(styles.backgroundColor);
1962
+ if (hexBg)
1963
+ elementBackgroundColor = hexBg;
1964
+ }
1965
+ if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
1966
+ elementBold = true;
1967
+ }
1968
+ if (styles.fontStyle === "italic") {
1969
+ elementItalic = true;
1970
+ }
1971
+ if (styles.fontFamily) {
1972
+ elementFontFamily = styles.fontFamily;
1973
+ }
1974
+ // GENERALIZED: Extract font-size from element's CSS classes
1975
+ if (styles.fontSize) {
1976
+ const halfPoints = parseFontSizeToHalfPoints(styles.fontSize);
1977
+ if (halfPoints)
1978
+ elementSize = halfPoints;
1979
+ }
1980
+ }
1981
+ for (const child of element.childNodes) {
1982
+ walkNode(child, elementBold, elementItalic, elementColor, elementBackgroundColor, false, false, undefined, elementFontFamily, elementSize);
1983
+ }
1984
+ // Merge adjacent runs with same formatting and normalize
1985
+ return mergeAndNormalizeRuns(runs);
1986
+ }
1987
+ /**
1988
+ * Merge adjacent runs with same formatting and normalize whitespace.
1989
+ */
1990
+ function mergeAndNormalizeRuns(runs) {
1991
+ if (runs.length === 0)
1992
+ return [];
1993
+ const merged = [];
1994
+ // Helper to compare underline properties
1995
+ const underlineEqual = (a, b) => {
1996
+ if (!a && !b)
1997
+ return true;
1998
+ if (!a || !b)
1999
+ return false;
2000
+ return a.type === b.type && a.color === b.color;
2001
+ };
2002
+ for (const run of runs) {
2003
+ const last = merged[merged.length - 1];
2004
+ // Only merge if all formatting properties match
2005
+ if (last &&
2006
+ last.bold === run.bold &&
2007
+ last.italic === run.italic &&
2008
+ last.color === run.color &&
2009
+ last.backgroundColor === run.backgroundColor &&
2010
+ last.superscript === run.superscript &&
2011
+ last.subscript === run.subscript &&
2012
+ last.fontFamily === run.fontFamily &&
2013
+ last.size === run.size &&
2014
+ underlineEqual(last.underline, run.underline)) {
2015
+ last.text += run.text;
2016
+ }
2017
+ else {
2018
+ merged.push({ ...run });
2019
+ }
2020
+ }
2021
+ // Normalize: trim leading whitespace from first run, trailing from last
2022
+ if (merged.length > 0) {
2023
+ merged[0].text = merged[0].text.trimStart();
2024
+ merged[merged.length - 1].text = merged[merged.length - 1].text.trimEnd();
2025
+ }
2026
+ // Filter out empty runs
2027
+ return merged.filter((r) => r.text.length > 0);
2028
+ }
2029
+ /**
2030
+ * Check if any inline run has formatting (bold, italic, color, backgroundColor, fontFamily, size, etc.).
2031
+ */
2032
+ function hasInlineFormatting(runs) {
2033
+ return runs.some((r) => r.bold || r.italic || r.color || r.backgroundColor || r.fontFamily || r.size || r.superscript || r.subscript || r.underline);
2034
+ }
2035
+ /**
2036
+ * Block-level HTML elements that indicate complex content inside a list item.
2037
+ * If a <li> contains any of these, it should be processed recursively,
2038
+ * not just as inline text.
2039
+ */
2040
+ const BLOCK_LEVEL_TAGS = new Set([
2041
+ "div", "p", "table", "ul", "ol", "blockquote", "pre", "figure",
2042
+ "svg", "h1", "h2", "h3", "h4", "h5", "h6", "section", "article",
2043
+ "aside", "nav", "header", "footer", "main", "form", "fieldset",
2044
+ "dl", "dd", "dt", "address", "hr", "canvas", "video", "audio",
2045
+ "picture", "iframe", "object", "embed", "details", "summary",
2046
+ ]);
2047
+ /**
2048
+ * Check if an element contains any block-level children.
2049
+ * Used to detect complex list items that need recursive processing.
2050
+ */
2051
+ function hasBlockLevelChildren(element) {
2052
+ for (const child of element.children) {
2053
+ const tagName = child.tagName.toLowerCase();
2054
+ if (BLOCK_LEVEL_TAGS.has(tagName)) {
2055
+ return true;
2056
+ }
2057
+ }
2058
+ return false;
2059
+ }
2060
+ /**
2061
+ * Check if a list item contains nested lists (ul or ol).
2062
+ */
2063
+ function hasNestedLists(element) {
2064
+ return element.querySelector("ul, ol") !== null;
2065
+ }
2066
+ /**
2067
+ * Recursively extract list items from a ul/ol element, flattening nested lists
2068
+ * while preserving level information for proper indentation in DOCX.
2069
+ *
2070
+ * @param listElement - The ul or ol element to extract items from
2071
+ * @param cssContext - CSS context for style extraction
2072
+ * @param level - Current nesting level (0 = top level)
2073
+ * @param parentOrdered - Whether the parent list is ordered
2074
+ * @returns Array of ListItem objects with level information
2075
+ */
2076
+ function extractNestedListItems(listElement, cssContext, level = 0, parentOrdered) {
2077
+ const items = [];
2078
+ const tagName = listElement.tagName.toLowerCase();
2079
+ const isOrdered = parentOrdered !== undefined ? parentOrdered : tagName === "ol";
2080
+ for (const child of listElement.children) {
2081
+ if (child.tagName.toLowerCase() !== "li")
2082
+ continue;
2083
+ // Extract inline content from this li (excluding nested lists)
2084
+ const inlineNodes = [];
2085
+ const nestedLists = [];
2086
+ for (const node of child.childNodes) {
2087
+ if (node.nodeType === Node.ELEMENT_NODE) {
2088
+ const childTagName = node.tagName.toLowerCase();
2089
+ if (childTagName === "ul" || childTagName === "ol") {
2090
+ nestedLists.push(node);
2091
+ }
2092
+ else {
2093
+ inlineNodes.push(node);
2094
+ }
2095
+ }
2096
+ else if (node.nodeType === Node.TEXT_NODE) {
2097
+ inlineNodes.push(node);
2098
+ }
2099
+ }
2100
+ // Create a temporary container to extract inline runs from inline nodes only
2101
+ // We need to get text content excluding nested lists
2102
+ if (inlineNodes.length > 0) {
2103
+ // Build inline content by extracting runs from the li, but we need to
2104
+ // handle it carefully to exclude nested list content
2105
+ const runs = extractInlineRunsFromNodes(inlineNodes, cssContext);
2106
+ if (runs.length > 0 && runs.some(r => r.text.trim())) {
2107
+ const content = hasInlineFormatting(runs) ? runs : runs.map(r => r.text).join("");
2108
+ items.push({
2109
+ content,
2110
+ level,
2111
+ ordered: isOrdered,
2112
+ });
2113
+ }
2114
+ }
2115
+ // Process nested lists recursively
2116
+ for (const nestedList of nestedLists) {
2117
+ const nestedTagName = nestedList.tagName.toLowerCase();
2118
+ const nestedOrdered = nestedTagName === "ol";
2119
+ const nestedItems = extractNestedListItems(nestedList, cssContext, level + 1, nestedOrdered);
2120
+ items.push(...nestedItems);
2121
+ }
2122
+ }
2123
+ return items;
2124
+ }
2125
+ /**
2126
+ * Extract inline runs from a collection of nodes (excluding nested lists).
2127
+ * This is similar to extractInlineRuns but works on a subset of nodes.
2128
+ */
2129
+ function extractInlineRunsFromNodes(nodes, cssContext) {
2130
+ const runs = [];
2131
+ function walkNode(node, bold, italic, color, backgroundColor, superscript, subscript) {
2132
+ if (node.nodeType === Node.TEXT_NODE) {
2133
+ const text = node.textContent || "";
2134
+ if (text) {
2135
+ runs.push({
2136
+ text,
2137
+ bold: bold || undefined,
2138
+ italic: italic || undefined,
2139
+ color,
2140
+ backgroundColor,
2141
+ superscript: superscript || undefined,
2142
+ subscript: subscript || undefined,
2143
+ });
2144
+ }
2145
+ return;
2146
+ }
2147
+ if (node.nodeType !== Node.ELEMENT_NODE)
2148
+ return;
2149
+ const element = node;
2150
+ const tagName = element.tagName.toLowerCase();
2151
+ // Skip nested lists entirely - they're handled separately
2152
+ if (tagName === "ul" || tagName === "ol")
2153
+ return;
2154
+ // Determine formatting from tag
2155
+ let newBold = bold || tagName === "strong" || tagName === "b";
2156
+ let newItalic = italic || tagName === "em" || tagName === "i";
2157
+ let newSuperscript = superscript || tagName === "sup";
2158
+ let newSubscript = subscript || tagName === "sub";
2159
+ let newColor = color;
2160
+ let newBackgroundColor = backgroundColor;
2161
+ // Get styles from CSS context
2162
+ const styles = getElementStyles(element, cssContext);
2163
+ if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
2164
+ newBold = true;
2165
+ }
2166
+ if (styles.fontStyle === "italic") {
2167
+ newItalic = true;
2168
+ }
2169
+ if (styles.color) {
2170
+ const extracted = extractHexColor(styles.color);
2171
+ if (extracted)
2172
+ newColor = extracted;
2173
+ }
2174
+ if (styles.backgroundColor) {
2175
+ const extracted = extractHexColor(styles.backgroundColor);
2176
+ if (extracted)
2177
+ newBackgroundColor = extracted;
2178
+ }
2179
+ // Recurse into children
2180
+ for (const child of node.childNodes) {
2181
+ walkNode(child, newBold, newItalic, newColor, newBackgroundColor, newSuperscript, newSubscript);
2182
+ }
2183
+ }
2184
+ for (const node of nodes) {
2185
+ walkNode(node, false, false);
2186
+ }
2187
+ return mergeAndNormalizeRuns(runs);
2188
+ }
2189
+ /**
2190
+ * Check if an element is a blockquote or callout based on visual styling.
2191
+ * Detects elements with:
2192
+ * - Semantic <blockquote> tag
2193
+ * - OR a div/section/header with both background color AND left border (visual callout pattern)
2194
+ * - OR a div/section/header with background/gradient AND padding (styled content box like CTAs)
2195
+ * - OR a paragraph with left border (intro sections with accent border)
2196
+ */
2197
+ function isBlockquoteOrCallout(element, cssContext) {
2198
+ const tagName = element.tagName.toLowerCase();
2199
+ // Semantic blockquote element
2200
+ if (tagName === "blockquote") {
2201
+ return true;
2202
+ }
2203
+ // Get styles for all container-type elements (div, section, article, header, p)
2204
+ const styles = getElementStyles(element, cssContext);
2205
+ const inlineStyle = element.getAttribute("style") || "";
2206
+ // Helper to check for border-left
2207
+ const hasBorderLeft = inlineStyle.includes("border-left") ||
2208
+ styles.border?.includes("solid") ||
2209
+ !!styles.borderColor ||
2210
+ !!styles.borderLeft;
2211
+ // For paragraphs with border-left, treat as blockquote (intro sections, etc.)
2212
+ // This handles patterns like: <p class="intro-section"> with border-left: 4px solid ...
2213
+ if (tagName === "p" && hasBorderLeft) {
2214
+ return true;
2215
+ }
2216
+ // For divs, sections, articles, and headers - detect based on visual styling (NOT class names)
2217
+ if (tagName === "div" || tagName === "section" || tagName === "article" || tagName === "header") {
2218
+ // GENERALIZED: Don't treat as blockquotes if they contain figure > img elements
2219
+ // Figures with external images need special handling via export.ts which fetches them
2220
+ // and they can't be processed inside parseBlockquoteContent
2221
+ const figuresWithImages = element.querySelectorAll("figure img, figure picture");
2222
+ if (figuresWithImages.length > 0) {
2223
+ return false;
2224
+ }
2225
+ // Also check CSS border property
2226
+ const cssBorder = styles.border || "";
2227
+ const hasCssBorder = cssBorder.includes("solid") || cssBorder.includes("px");
2228
+ // Check for background color
2229
+ const hasBackground = !!styles.backgroundColor &&
2230
+ styles.backgroundColor !== "transparent" &&
2231
+ styles.backgroundColor !== "inherit";
2232
+ // If it has both background AND border, it's likely a callout/blockquote styled box
2233
+ if (hasBackground && (hasBorderLeft || hasCssBorder)) {
2234
+ return true;
2235
+ }
2236
+ // Check for styled content boxes (like CTAs, key-takeaways)
2237
+ // These have: background + padding + may have border-radius
2238
+ // But NOT border (which distinguishes them from callouts)
2239
+ if (hasBackground) {
2240
+ // Check for padding (indicates a styled content box, not just a wrapper)
2241
+ const hasPadding = !!styles.padding ||
2242
+ inlineStyle.includes("padding");
2243
+ // Check if element has meaningful content (text nodes or block elements)
2244
+ // This is more general - any non-empty div with styled background and padding
2245
+ const hasContent = element.textContent?.trim().length &&
2246
+ element.textContent.trim().length > 0;
2247
+ // A styled box with background + padding + content = treat as blockquote
2248
+ if (hasPadding && hasContent) {
2249
+ return true;
2250
+ }
2251
+ }
2252
+ }
2253
+ return false;
2254
+ }
2255
+ /**
2256
+ * Extract color from CSS color value (hex or rgb).
2257
+ */
2258
+ /**
2259
+ * Check if an element has a border-bottom style that should become a horizontal rule.
2260
+ * Returns the border color if found, undefined otherwise.
2261
+ */
2262
+ function extractBorderBottomColor(element, cssContext) {
2263
+ // Check inline style for border-bottom
2264
+ const inlineStyle = element.getAttribute("style") || "";
2265
+ const borderMatch = inlineStyle.match(/border-bottom:\s*([^;]+)/i);
2266
+ if (borderMatch) {
2267
+ const borderValue = borderMatch[1];
2268
+ // Parse "1px solid #e5e7eb" or similar
2269
+ const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2270
+ if (colorMatch) {
2271
+ let hex = colorMatch[1];
2272
+ if (hex.length === 3) {
2273
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2274
+ }
2275
+ return hex.toUpperCase();
2276
+ }
2277
+ // Check for CSS variable in border
2278
+ const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2279
+ if (varMatch) {
2280
+ const varValue = cssContext.variables.get(varMatch[1]);
2281
+ if (varValue) {
2282
+ const hex = extractHexColor(varValue);
2283
+ if (hex)
2284
+ return hex;
2285
+ }
2286
+ }
2287
+ }
2288
+ // Check CSS styles for border-bottom (generalized approach)
2289
+ const styles = getElementStyles(element, cssContext);
2290
+ // Check border-bottom first (most specific for horizontal rules)
2291
+ if (styles.borderBottom) {
2292
+ const borderValue = styles.borderBottom;
2293
+ const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2294
+ if (colorMatch) {
2295
+ let hex = colorMatch[1];
2296
+ if (hex.length === 3) {
2297
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2298
+ }
2299
+ return hex.toUpperCase();
2300
+ }
2301
+ // Check for CSS variable reference in border-bottom
2302
+ const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2303
+ if (varMatch) {
2304
+ const varValue = cssContext.variables.get(varMatch[1]);
2305
+ if (varValue) {
2306
+ const hex = extractHexColor(varValue);
2307
+ if (hex)
2308
+ return hex;
2309
+ }
2310
+ }
2311
+ }
2312
+ // Check if this element has a border-bottom defined in CSS
2313
+ // The border property might be a shorthand like "1px solid #e5e7eb"
2314
+ if (styles.border) {
2315
+ const borderValue = styles.border;
2316
+ const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2317
+ if (colorMatch) {
2318
+ let hex = colorMatch[1];
2319
+ if (hex.length === 3) {
2320
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2321
+ }
2322
+ return hex.toUpperCase();
2323
+ }
2324
+ // Check for CSS variable reference
2325
+ const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2326
+ if (varMatch) {
2327
+ const varValue = cssContext.variables.get(varMatch[1]);
2328
+ if (varValue) {
2329
+ const hex = extractHexColor(varValue);
2330
+ if (hex)
2331
+ return hex;
2332
+ }
2333
+ }
2334
+ }
2335
+ // Also check borderColor directly
2336
+ if (styles.borderColor) {
2337
+ const hex = extractHexColor(styles.borderColor);
2338
+ if (hex)
2339
+ return hex;
2340
+ }
2341
+ return undefined;
2342
+ }
2343
+ /**
2344
+ * Extract color from a border-top CSS property on an element.
2345
+ * Checks inline styles and CSS classes, and resolves CSS variables.
2346
+ */
2347
+ function extractBorderTopColor(element, cssContext) {
2348
+ // First, check CSS class styles (e.g., .menu-footer { border-top: 2px solid #b8860b; })
2349
+ const styles = getElementStyles(element, cssContext);
2350
+ if (styles.borderTop) {
2351
+ const borderValue = styles.borderTop;
2352
+ // Parse "2px solid #b8860b" or similar
2353
+ const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2354
+ if (colorMatch) {
2355
+ let hex = colorMatch[1];
2356
+ if (hex.length === 3) {
2357
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2358
+ }
2359
+ return hex.toUpperCase();
2360
+ }
2361
+ // Check for CSS variable in border that was already resolved
2362
+ // (resolveValue in parseCssContext should have resolved it)
2363
+ const resolved = extractHexColor(borderValue);
2364
+ if (resolved)
2365
+ return resolved;
2366
+ }
2367
+ // Then check inline style for border-top (higher specificity)
2368
+ const inlineStyle = element.getAttribute("style") || "";
2369
+ const borderMatch = inlineStyle.match(/border-top:\s*([^;]+)/i);
2370
+ if (borderMatch) {
2371
+ const borderValue = borderMatch[1];
2372
+ // Parse "1px solid #e5e7eb" or similar
2373
+ const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2374
+ if (colorMatch) {
2375
+ let hex = colorMatch[1];
2376
+ if (hex.length === 3) {
2377
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2378
+ }
2379
+ return hex.toUpperCase();
2380
+ }
2381
+ // Check for CSS variable in border
2382
+ const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2383
+ if (varMatch) {
2384
+ const varValue = cssContext.variables.get(varMatch[1]);
2385
+ if (varValue) {
2386
+ const hex = extractHexColor(varValue);
2387
+ if (hex)
2388
+ return hex;
2389
+ }
2390
+ }
2391
+ }
2392
+ return undefined;
2393
+ }
2394
+ /**
2395
+ * Parse a CSS length value to twips.
2396
+ * Supports: px, rem, em, pt
2397
+ * Returns undefined if parsing fails.
2398
+ *
2399
+ * Conversions:
2400
+ * - 1 inch = 1440 twips
2401
+ * - 1 point = 20 twips
2402
+ * - 1 rem = 16px (assumed base)
2403
+ * - 1px ≈ 0.75pt ≈ 15 twips
2404
+ */
2405
+ function parseCssLengthToTwips(value) {
2406
+ const match = value.trim().match(/^(-?[0-9.]+)(px|rem|em|pt)?$/);
2407
+ if (!match)
2408
+ return undefined;
2409
+ const num = parseFloat(match[1]);
2410
+ const unit = match[2] || 'px';
2411
+ switch (unit) {
2412
+ case 'px':
2413
+ return Math.round(num * 15); // 1px ≈ 15 twips
2414
+ case 'rem':
2415
+ case 'em':
2416
+ return Math.round(num * 16 * 15); // 1rem = 16px = 240 twips
2417
+ case 'pt':
2418
+ return Math.round(num * 20); // 1pt = 20 twips
2419
+ default:
2420
+ return undefined;
2421
+ }
2422
+ }
2423
+ /**
2424
+ * Parse CSS padding shorthand into individual values in twips.
2425
+ * Supports: "value", "vertical horizontal", "top horizontal bottom", "top right bottom left"
2426
+ * Returns object with top, right, bottom, left in twips.
2427
+ */
2428
+ function parseCssPaddingToTwips(padding) {
2429
+ const parts = padding.trim().split(/\s+/);
2430
+ if (parts.length === 0)
2431
+ return undefined;
2432
+ const values = parts.map(p => parseCssLengthToTwips(p)).filter(v => v !== undefined);
2433
+ if (values.length === 0)
2434
+ return undefined;
2435
+ // CSS shorthand rules:
2436
+ // 1 value: all sides
2437
+ // 2 values: vertical horizontal
2438
+ // 3 values: top horizontal bottom
2439
+ // 4 values: top right bottom left
2440
+ switch (values.length) {
2441
+ case 1:
2442
+ return { top: values[0], right: values[0], bottom: values[0], left: values[0] };
2443
+ case 2:
2444
+ return { top: values[0], right: values[1], bottom: values[0], left: values[1] };
2445
+ case 3:
2446
+ return { top: values[0], right: values[1], bottom: values[2], left: values[1] };
2447
+ case 4:
2448
+ return { top: values[0], right: values[1], bottom: values[2], left: values[3] };
2449
+ default:
2450
+ return undefined;
2451
+ }
2452
+ }
2453
+ /**
2454
+ * GENERALIZED: Convert CSS font-size value to DOCX half-points.
2455
+ * CSS font-size values: "0.875rem", "14px", "12pt", "1.5rem"
2456
+ * DOCX size is in half-points (1pt = 2 half-points)
2457
+ * Default browser font-size is 16px = 12pt = 24 half-points
2458
+ */
2459
+ function parseFontSizeToHalfPoints(fontSize) {
2460
+ if (!fontSize)
2461
+ return undefined;
2462
+ const fs = fontSize.trim();
2463
+ if (fs.endsWith("rem")) {
2464
+ // rem relative to 16px base, convert to points then half-points
2465
+ // 1rem = 16px = 12pt = 24 half-points
2466
+ const remValue = parseFloat(fs);
2467
+ if (!isNaN(remValue)) {
2468
+ return Math.round(remValue * 12 * 2);
2469
+ }
2470
+ }
2471
+ else if (fs.endsWith("em")) {
2472
+ // em relative to parent, assume 16px base
2473
+ const emValue = parseFloat(fs);
2474
+ if (!isNaN(emValue)) {
2475
+ return Math.round(emValue * 12 * 2);
2476
+ }
2477
+ }
2478
+ else if (fs.endsWith("px")) {
2479
+ // px to points: 1px = 0.75pt, then to half-points
2480
+ const pxValue = parseFloat(fs);
2481
+ if (!isNaN(pxValue)) {
2482
+ return Math.round(pxValue * 0.75 * 2);
2483
+ }
2484
+ }
2485
+ else if (fs.endsWith("pt")) {
2486
+ const ptValue = parseFloat(fs);
2487
+ if (!isNaN(ptValue)) {
2488
+ return Math.round(ptValue * 2);
2489
+ }
2490
+ }
2491
+ return undefined;
2492
+ }
2493
+ /**
2494
+ * GENERALIZED: Convert CSS margin/spacing value to DOCX twips.
2495
+ * CSS margin values: "0.5rem", "8px", "6pt", "1rem"
2496
+ * DOCX spacing is in twips (1 inch = 1440 twips, 1pt = 20 twips)
2497
+ * Default browser font-size is 16px = 12pt
2498
+ * 1rem = 16px = 12pt = 240 twips
2499
+ */
2500
+ function parseMarginToTwips(margin) {
2501
+ if (!margin)
2502
+ return undefined;
2503
+ const m = margin.trim();
2504
+ if (m.endsWith("rem")) {
2505
+ // rem relative to 16px base
2506
+ // 1rem = 16px = 12pt = 240 twips
2507
+ const remValue = parseFloat(m);
2508
+ if (!isNaN(remValue)) {
2509
+ return Math.round(remValue * 240);
2510
+ }
2511
+ }
2512
+ else if (m.endsWith("em")) {
2513
+ // em relative to parent, assume 16px base
2514
+ const emValue = parseFloat(m);
2515
+ if (!isNaN(emValue)) {
2516
+ return Math.round(emValue * 240);
2517
+ }
2518
+ }
2519
+ else if (m.endsWith("px")) {
2520
+ // px to twips: 1px = 15 twips (1 inch = 96px = 1440 twips)
2521
+ const pxValue = parseFloat(m);
2522
+ if (!isNaN(pxValue)) {
2523
+ return Math.round(pxValue * 15);
2524
+ }
2525
+ }
2526
+ else if (m.endsWith("pt")) {
2527
+ // pt to twips: 1pt = 20 twips
2528
+ const ptValue = parseFloat(m);
2529
+ if (!isNaN(ptValue)) {
2530
+ return Math.round(ptValue * 20);
2531
+ }
2532
+ }
2533
+ return undefined;
2534
+ }
2535
+ /**
2536
+ * GENERALIZED: Convert CSS line-height value to DOCX line spacing.
2537
+ * CSS line-height: "1.3", "1.7", "24px", "1.5em", "normal"
2538
+ * DOCX line spacing with LineRuleType.AUTO uses "240ths of a line"
2539
+ * where 240 = single spacing (1.0), 360 = 1.5, 480 = double (2.0)
2540
+ * Formula: lineHeight * 240 = DOCX line value
2541
+ *
2542
+ * @param lineHeight CSS line-height value
2543
+ * @param fontSize Optional font size in half-points for px conversion (e.g., 24 = 12pt)
2544
+ */
2545
+ function parseLineHeightToDocx(lineHeight, fontSize) {
2546
+ if (!lineHeight)
2547
+ return undefined;
2548
+ const lh = lineHeight.trim();
2549
+ // "normal" typically equals 1.2 in browsers
2550
+ if (lh === "normal") {
2551
+ return Math.round(1.2 * 240);
2552
+ }
2553
+ // Unitless number (e.g., "1.3", "1.7") - most common
2554
+ const unitlessValue = parseFloat(lh);
2555
+ if (!isNaN(unitlessValue) && !lh.match(/[a-z%]/i)) {
2556
+ return Math.round(unitlessValue * 240);
2557
+ }
2558
+ // em units (e.g., "1.5em") - treat as multiplier
2559
+ if (lh.endsWith("em")) {
2560
+ const emValue = parseFloat(lh);
2561
+ if (!isNaN(emValue)) {
2562
+ return Math.round(emValue * 240);
2563
+ }
2564
+ }
2565
+ // px units (e.g., "27.2px" from getComputedStyle)
2566
+ // Convert to ratio using font size if available, otherwise estimate
2567
+ if (lh.endsWith("px")) {
2568
+ const pxValue = parseFloat(lh);
2569
+ if (!isNaN(pxValue)) {
2570
+ // If we have font size (in half-points), convert:
2571
+ // fontSize in half-points / 2 = points
2572
+ // points * 4/3 = px (approx: 1pt ≈ 1.333px)
2573
+ // lineHeight ratio = pxValue / (fontSize/2 * 1.333)
2574
+ if (fontSize && fontSize > 0) {
2575
+ const fontSizePx = (fontSize / 2) * (4 / 3);
2576
+ const ratio = pxValue / fontSizePx;
2577
+ return Math.round(ratio * 240);
2578
+ }
2579
+ // Without font size, estimate using 16px base (common default)
2580
+ // This gives us a rough ratio
2581
+ const estimatedRatio = pxValue / 16;
2582
+ // Only use if the ratio is reasonable (0.8 to 3.0)
2583
+ if (estimatedRatio >= 0.8 && estimatedRatio <= 3.0) {
2584
+ return Math.round(estimatedRatio * 240);
2585
+ }
2586
+ }
2587
+ }
2588
+ // pt units (e.g., "18pt")
2589
+ if (lh.endsWith("pt")) {
2590
+ const ptValue = parseFloat(lh);
2591
+ if (!isNaN(ptValue) && fontSize && fontSize > 0) {
2592
+ // fontSize in half-points / 2 = points
2593
+ const fontSizePt = fontSize / 2;
2594
+ const ratio = ptValue / fontSizePt;
2595
+ return Math.round(ratio * 240);
2596
+ }
2597
+ }
2598
+ // Return undefined to use default line-height
2599
+ return undefined;
2600
+ }
2601
+ function extractHexColor(value) {
2602
+ // Handle named CSS colors (common ones used in documents)
2603
+ const namedColors = {
2604
+ white: "FFFFFF",
2605
+ black: "000000",
2606
+ red: "FF0000",
2607
+ green: "008000",
2608
+ blue: "0000FF",
2609
+ yellow: "FFFF00",
2610
+ orange: "FFA500",
2611
+ purple: "800080",
2612
+ gray: "808080",
2613
+ grey: "808080",
2614
+ transparent: "", // Return empty for transparent
2615
+ };
2616
+ const lowerValue = value.toLowerCase().trim();
2617
+ if (namedColors[lowerValue] !== undefined) {
2618
+ return namedColors[lowerValue] || undefined;
2619
+ }
2620
+ // Handle hex colors
2621
+ const hexMatch = value.match(/#([0-9a-fA-F]{3,6})/);
2622
+ if (hexMatch) {
2623
+ let hex = hexMatch[1];
2624
+ // Expand 3-digit hex to 6-digit
2625
+ if (hex.length === 3) {
2626
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2627
+ }
2628
+ return hex.toUpperCase();
2629
+ }
2630
+ // Handle linear-gradient - extract and blend the first color
2631
+ const gradientMatch = value.match(/linear-gradient\s*\([^)]+\)/i);
2632
+ if (gradientMatch) {
2633
+ // Find all rgba colors in the gradient
2634
+ const rgbaMatches = value.matchAll(/rgba\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*([0-9.]+)\s*\)/g);
2635
+ for (const match of rgbaMatches) {
2636
+ const alpha = parseFloat(match[4]);
2637
+ // Blend with white based on alpha
2638
+ const r = Math.round(parseInt(match[1], 10) * alpha + 255 * (1 - alpha));
2639
+ const g = Math.round(parseInt(match[2], 10) * alpha + 255 * (1 - alpha));
2640
+ const b = Math.round(parseInt(match[3], 10) * alpha + 255 * (1 - alpha));
2641
+ return (r.toString(16).padStart(2, "0") +
2642
+ g.toString(16).padStart(2, "0") +
2643
+ b.toString(16).padStart(2, "0")).toUpperCase();
2644
+ }
2645
+ // Fallback: try to find a hex color in the gradient
2646
+ const gradientHexMatch = value.match(/#([0-9a-fA-F]{3,6})/);
2647
+ if (gradientHexMatch) {
2648
+ let hex = gradientHexMatch[1];
2649
+ if (hex.length === 3) {
2650
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2651
+ }
2652
+ return hex.toUpperCase();
2653
+ }
2654
+ // For gradients without extractable colors, return undefined (no background)
2655
+ return undefined;
2656
+ }
2657
+ // Handle rgb/rgba - check for alpha value
2658
+ const rgbaMatch = value.match(/rgba\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*([0-9.]+)\s*\)/);
2659
+ if (rgbaMatch) {
2660
+ const alpha = parseFloat(rgbaMatch[4]);
2661
+ // If alpha is very low (< 0.3), the color is mostly transparent
2662
+ // Blend with white to get the effective color
2663
+ if (alpha < 0.3) {
2664
+ const r = Math.round(parseInt(rgbaMatch[1], 10) * alpha + 255 * (1 - alpha));
2665
+ const g = Math.round(parseInt(rgbaMatch[2], 10) * alpha + 255 * (1 - alpha));
2666
+ const b = Math.round(parseInt(rgbaMatch[3], 10) * alpha + 255 * (1 - alpha));
2667
+ return (r.toString(16).padStart(2, "0") +
2668
+ g.toString(16).padStart(2, "0") +
2669
+ b.toString(16).padStart(2, "0")).toUpperCase();
2670
+ }
2671
+ // For higher alpha, just use the RGB values
2672
+ const r = parseInt(rgbaMatch[1], 10).toString(16).padStart(2, "0");
2673
+ const g = parseInt(rgbaMatch[2], 10).toString(16).padStart(2, "0");
2674
+ const b = parseInt(rgbaMatch[3], 10).toString(16).padStart(2, "0");
2675
+ return (r + g + b).toUpperCase();
2676
+ }
2677
+ // Handle rgb (no alpha)
2678
+ const rgbMatch = value.match(/rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)/);
2679
+ if (rgbMatch) {
2680
+ const r = parseInt(rgbMatch[1], 10).toString(16).padStart(2, "0");
2681
+ const g = parseInt(rgbMatch[2], 10).toString(16).padStart(2, "0");
2682
+ const b = parseInt(rgbMatch[3], 10).toString(16).padStart(2, "0");
2683
+ return (r + g + b).toUpperCase();
2684
+ }
2685
+ return undefined;
2686
+ }
2687
+ /**
2688
+ * Extract text color from an element.
2689
+ * Checks inline style first, then CSS class rules, then element type rules, then computed style if available.
2690
+ * Returns the actual color from the HTML - does NOT filter out any colors.
2691
+ * Per the skill rules: ALL styling values MUST be extracted from HTML, never filtered.
2692
+ */
2693
+ function extractTextColor(element, cssContext) {
2694
+ // Check inline style first (highest priority)
2695
+ const inlineStyle = element.getAttribute("style") || "";
2696
+ const colorMatch = inlineStyle.match(/(?:^|;)\s*color:\s*([^;]+)/i);
2697
+ if (colorMatch) {
2698
+ let colorValue = colorMatch[1].trim();
2699
+ // Resolve CSS variables in inline styles (e.g., var(--color-muted))
2700
+ if (cssContext && colorValue.includes("var(")) {
2701
+ const varMatch = colorValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2702
+ if (varMatch) {
2703
+ const resolvedValue = cssContext.variables.get(varMatch[1]);
2704
+ if (resolvedValue) {
2705
+ colorValue = resolvedValue;
2706
+ }
2707
+ }
2708
+ }
2709
+ const color = extractHexColor(colorValue);
2710
+ if (color) {
2711
+ return color;
2712
+ }
2713
+ }
2714
+ // Check CSS class rules (resolves CSS variables)
2715
+ if (cssContext) {
2716
+ const classColor = getColorFromClasses(element, cssContext);
2717
+ if (classColor) {
2718
+ return classColor;
2719
+ }
2720
+ // Check element type styles (e.g., body { color: ... }, p { color: ... })
2721
+ const elementStyles = getElementStyles(element, cssContext);
2722
+ if (elementStyles.color) {
2723
+ const color = extractHexColor(elementStyles.color);
2724
+ if (color) {
2725
+ return color;
2726
+ }
2727
+ }
2728
+ }
2729
+ // Try computed style if available (browser environment)
2730
+ if (typeof window !== "undefined" && window.getComputedStyle) {
2731
+ try {
2732
+ const computed = window.getComputedStyle(element);
2733
+ const computedColor = computed.color;
2734
+ if (computedColor) {
2735
+ const color = extractHexColor(computedColor);
2736
+ if (color) {
2737
+ return color;
2738
+ }
2739
+ }
2740
+ }
2741
+ catch {
2742
+ // getComputedStyle may fail in some environments
2743
+ }
2744
+ }
2745
+ return undefined;
2746
+ }
2747
+ /**
2748
+ * Parse content from a blockquote/callout element.
2749
+ * Directly iterates children instead of re-parsing innerHTML.
2750
+ *
2751
+ * @param element The blockquote/callout element
2752
+ * @param cssContext The CSS context for resolving styles
2753
+ */
2754
+ function parseBlockquoteContent(element, cssContext) {
2755
+ const innerElements = [];
2756
+ // GENERALIZED: Extract blockquote's font-style from CSS element selector
2757
+ // This handles rules like "blockquote { font-style: italic; }"
2758
+ const blockquoteStyles = getElementStyles(element, cssContext);
2759
+ const blockquoteIsItalic = blockquoteStyles.fontStyle === "italic";
2760
+ function processInnerNode(node) {
2761
+ if (node.nodeType === Node.TEXT_NODE) {
2762
+ const text = node.textContent?.trim();
2763
+ if (text) {
2764
+ // Apply blockquote's italic style to direct text nodes
2765
+ if (blockquoteIsItalic) {
2766
+ innerElements.push({ type: "paragraph", text, italic: true });
2767
+ }
2768
+ else {
2769
+ innerElements.push({ type: "paragraph", text });
2770
+ }
2771
+ }
2772
+ return;
2773
+ }
2774
+ if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
2775
+ return;
2776
+ }
2777
+ const el = node;
2778
+ const tagName = el.tagName.toLowerCase();
2779
+ // Handle headings
2780
+ const headingLevel = parseHeadingLevel(tagName);
2781
+ if (headingLevel !== null) {
2782
+ const text = getTextContent(el).trim();
2783
+ if (text) {
2784
+ // Extract color from the heading element, using the blockquote as parent for nested style lookups
2785
+ // This handles CSS rules like ".key-takeaways h3 { color: #7c3aed; }"
2786
+ const headingStyles = getElementStyles(el, cssContext, element);
2787
+ const color = headingStyles.color ? extractHexColor(headingStyles.color) : undefined;
2788
+ innerElements.push({ type: "heading", level: headingLevel, text, color });
2789
+ }
2790
+ return;
2791
+ }
2792
+ // Handle paragraphs
2793
+ if (tagName === "p") {
2794
+ const runs = extractInlineRuns(el, cssContext);
2795
+ if (runs.length > 0) {
2796
+ const text = runs.map((r) => r.text).join("");
2797
+ // Extract color from the paragraph element, using the blockquote as parent for nested style lookups
2798
+ // This handles CSS rules like ".cta p { color: white; }" or ".cta { color: white; }"
2799
+ const paragraphStyles = getElementStyles(el, cssContext, element);
2800
+ const color = paragraphStyles.color ? extractHexColor(paragraphStyles.color) : undefined;
2801
+ // Apply blockquote's italic style to runs that don't have their own italic setting
2802
+ if (blockquoteIsItalic) {
2803
+ runs.forEach(run => {
2804
+ if (run.italic === undefined) {
2805
+ run.italic = true;
2806
+ }
2807
+ });
2808
+ }
2809
+ if (hasInlineFormatting(runs) || blockquoteIsItalic) {
2810
+ // Apply color to runs that don't have their own color
2811
+ if (color) {
2812
+ runs.forEach(run => {
2813
+ if (!run.color) {
2814
+ run.color = color;
2815
+ }
2816
+ });
2817
+ }
2818
+ innerElements.push({ type: "paragraph", text, runs, color, italic: blockquoteIsItalic });
2819
+ }
2820
+ else {
2821
+ innerElements.push({ type: "paragraph", text, color });
2822
+ }
2823
+ }
2824
+ return;
2825
+ }
2826
+ // Handle lists
2827
+ if (tagName === "ul" || tagName === "ol") {
2828
+ const items = [];
2829
+ // Use direct children li, not querySelectorAll to avoid nested lists
2830
+ for (const child of el.children) {
2831
+ if (child.tagName.toLowerCase() === "li") {
2832
+ const runs = extractInlineRuns(child, cssContext);
2833
+ if (runs.length > 0) {
2834
+ // Apply blockquote's italic style to list item runs
2835
+ if (blockquoteIsItalic) {
2836
+ runs.forEach(run => {
2837
+ if (run.italic === undefined) {
2838
+ run.italic = true;
2839
+ }
2840
+ });
2841
+ }
2842
+ if (hasInlineFormatting(runs) || blockquoteIsItalic) {
2843
+ items.push(runs);
2844
+ }
2845
+ else {
2846
+ items.push(runs.map((r) => r.text).join(""));
2847
+ }
2848
+ }
2849
+ }
2850
+ }
2851
+ if (items.length > 0) {
2852
+ innerElements.push({ type: "list", ordered: tagName === "ol", items });
2853
+ }
2854
+ return;
2855
+ }
2856
+ // Handle labels/titles - detect by visual styling (bold/font-weight)
2857
+ // Extract color from the element's own CSS styles (handles nested selectors)
2858
+ // Pass the blockquote element as parent for nested style lookups
2859
+ if (tagName === "div") {
2860
+ const styles = getElementStyles(el, cssContext, element);
2861
+ const isBoldOrLabel = styles.fontWeight === "700" ||
2862
+ styles.fontWeight === "bold" ||
2863
+ styles.fontWeight === "600";
2864
+ if (isBoldOrLabel) {
2865
+ const text = getTextContent(el).trim();
2866
+ if (text) {
2867
+ // Extract the label's color from its own styles (includes nested CSS rules)
2868
+ const labelColor = styles.color ? extractHexColor(styles.color) : undefined;
2869
+ if (labelColor) {
2870
+ const runs = [{
2871
+ text,
2872
+ bold: true,
2873
+ italic: false,
2874
+ color: labelColor,
2875
+ }];
2876
+ innerElements.push({ type: "paragraph", text, runs });
2877
+ }
2878
+ else {
2879
+ innerElements.push({ type: "paragraph", text, bold: true });
2880
+ }
2881
+ }
2882
+ return;
2883
+ }
2884
+ }
2885
+ // Container tags - recurse into children
2886
+ // Include definition list elements (dl, dt, dd) for proper menu parsing
2887
+ if (["div", "span", "section", "dl", "dd", "figure", "figcaption"].includes(tagName)) {
2888
+ // GENERALIZED: Check if this container has only inline content (spans, text nodes)
2889
+ // and is a horizontal flex container - if so, render as a table to preserve horizontal layout
2890
+ const hasOnlyInlineContent = isInlineOnlyContainer(el);
2891
+ const isHorizFlex = isHorizontalFlexContainer(el, cssContext);
2892
+ const directChildren = Array.from(el.children);
2893
+ const hasMultipleFlexChildren = isHorizFlex && directChildren.length > 1;
2894
+ if (hasOnlyInlineContent && hasMultipleFlexChildren) {
2895
+ // GENERALIZED: Convert horizontal flexbox to a single-row borderless table
2896
+ // Each flex item becomes a table cell, preserving the horizontal layout
2897
+ const containerStyles = getElementStyles(el, cssContext);
2898
+ const containerFontFamily = containerStyles.fontFamily;
2899
+ const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
2900
+ const tableCells = [];
2901
+ for (const flexChild of flexChildren) {
2902
+ // Extract runs from this flex child
2903
+ const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
2904
+ if (childRuns.length > 0) {
2905
+ // Apply blockquote's italic style if applicable
2906
+ if (blockquoteIsItalic) {
2907
+ childRuns.forEach(run => {
2908
+ if (run.italic === undefined) {
2909
+ run.italic = true;
2910
+ }
2911
+ });
2912
+ }
2913
+ // Use runs as cell content
2914
+ tableCells.push(childRuns);
2915
+ }
2916
+ }
2917
+ if (tableCells.length > 0) {
2918
+ // Create a single-row borderless table to represent horizontal flexbox
2919
+ innerElements.push({
2920
+ type: "table",
2921
+ rows: [tableCells],
2922
+ hasHeader: false,
2923
+ noBorders: true,
2924
+ });
2925
+ }
2926
+ return;
2927
+ }
2928
+ else if (hasOnlyInlineContent) {
2929
+ // Single inline container (not horizontal flex) - extract as one paragraph
2930
+ const containerStyles = getElementStyles(el, cssContext);
2931
+ const containerFontFamily = containerStyles.fontFamily;
2932
+ const runs = extractInlineRuns(el, cssContext, undefined, containerFontFamily);
2933
+ if (runs.length > 0) {
2934
+ const text = runs.map(r => r.text).join("");
2935
+ if (blockquoteIsItalic) {
2936
+ runs.forEach(run => {
2937
+ if (run.italic === undefined) {
2938
+ run.italic = true;
2939
+ }
2940
+ });
2941
+ }
2942
+ if (hasInlineFormatting(runs)) {
2943
+ innerElements.push({ type: "paragraph", text, runs });
2944
+ }
2945
+ else {
2946
+ innerElements.push({ type: "paragraph", text });
2947
+ }
2948
+ }
2949
+ return;
2950
+ }
2951
+ // Default: recurse into children
2952
+ for (const child of el.childNodes) {
2953
+ processInnerNode(child);
2954
+ }
2955
+ return;
2956
+ }
2957
+ // Handle dt (definition term) elements - typically have flex layout with name + price
2958
+ // Combine all inline content into a single paragraph
2959
+ if (tagName === "dt") {
2960
+ const runs = extractInlineRuns(el, cssContext);
2961
+ if (runs.length > 0) {
2962
+ const text = runs.map((r) => r.text).join("");
2963
+ if (hasInlineFormatting(runs)) {
2964
+ innerElements.push({ type: "paragraph", text, runs });
2965
+ }
2966
+ else {
2967
+ innerElements.push({ type: "paragraph", text });
2968
+ }
2969
+ }
2970
+ return;
2971
+ }
2972
+ // Fallback - try to get text
2973
+ const text = getTextContent(el).trim();
2974
+ if (text && el.children.length === 0) {
2975
+ innerElements.push({ type: "paragraph", text });
2976
+ }
2977
+ else {
2978
+ for (const child of el.childNodes) {
2979
+ processInnerNode(child);
2980
+ }
2981
+ }
2982
+ }
2983
+ // Process all direct children of the blockquote element
2984
+ for (const child of element.childNodes) {
2985
+ processInnerNode(child);
2986
+ }
2987
+ return innerElements;
2988
+ }
2989
+ /**
2990
+ * Parse HTML string and extract content elements.
2991
+ */
2992
+ export function parseHtmlContent(html) {
2993
+ const parser = new DOMParser();
2994
+ const doc = parser.parseFromString(html, "text/html");
2995
+ const elements = [];
2996
+ // Parse CSS context for resolving variables and class-based colors
2997
+ const cssContext = parseCssContext(doc);
2998
+ // Track SVGs that have been processed (to avoid duplicate processing)
2999
+ const processedSvgs = new Set();
3000
+ const { body } = doc;
3001
+ function processNode(node, inheritedAlignment, inheritedColor) {
3002
+ if (node.nodeType === Node.TEXT_NODE) {
3003
+ const text = node.textContent?.trim();
3004
+ if (text) {
3005
+ elements.push({ type: "paragraph", text, alignment: inheritedAlignment, color: inheritedColor });
3006
+ }
3007
+ return;
3008
+ }
3009
+ if (node.nodeType !== Node.ELEMENT_NODE) {
3010
+ return;
3011
+ }
3012
+ // At this point we know it's an Element node by nodeType
3013
+ // Note: We use nodeType check instead of instanceof Element because
3014
+ // linkedom has separate HTMLElement and SVGElement classes, and the
3015
+ // global Element shim only covers HTMLElement. SVG elements have
3016
+ // nodeType 1 but fail instanceof Element check in linkedom.
3017
+ const element = node;
3018
+ if (!element.tagName) {
3019
+ return;
3020
+ }
3021
+ const tagName = element.tagName.toLowerCase();
3022
+ // Skip script, style, noscript, and other non-visual elements
3023
+ if (tagName === "script" || tagName === "style" || tagName === "noscript" ||
3024
+ tagName === "iframe" || tagName === "link" || tagName === "meta" ||
3025
+ tagName === "template" || tagName === "object" || tagName === "embed") {
3026
+ return;
3027
+ }
3028
+ // Get alignment from this element's CSS styles, or use inherited
3029
+ const elementAlignment = getTextAlignment(element, cssContext);
3030
+ const alignment = elementAlignment || inheritedAlignment;
3031
+ // Get color from this element (via CSS classes or inline styles)
3032
+ // Color inheritance rules:
3033
+ // 1. Inline styles on THIS element take precedence
3034
+ // 2. CSS class colors on THIS element take precedence
3035
+ // 3. Inline style colors from PARENT elements ARE inherited (CSS spec)
3036
+ // 4. CSS class colors from parent elements are NOT inherited here (handled by CSS cascade)
3037
+ const elementColor = extractTextColor(element, cssContext);
3038
+ // Use element's own color if set, otherwise use inherited color (from parent inline styles)
3039
+ const effectiveColor = elementColor || inheritedColor;
3040
+ const headingLevel = parseHeadingLevel(tagName);
3041
+ if (headingLevel !== null) {
3042
+ const text = getTextContent(element).trim();
3043
+ if (text) {
3044
+ // Extract text-transform from CSS
3045
+ const hStyles = getElementStyles(element, cssContext);
3046
+ let textTransform;
3047
+ if (hStyles.textTransform === "uppercase" || hStyles.textTransform === "lowercase" || hStyles.textTransform === "capitalize") {
3048
+ textTransform = hStyles.textTransform;
3049
+ }
3050
+ // Extract font-family for heading (e.g., h1 { font-family: var(--font-heading); })
3051
+ const headingFontFamily = hStyles.fontFamily;
3052
+ // GENERALIZED: Check for border-bottom on heading (e.g., h2 with underline style)
3053
+ // Any element can have borders - extract from CSS/inline styles
3054
+ const headingBorderColor = extractBorderBottomColor(element, cssContext);
3055
+ // When heading has border-bottom, reduce heading after spacing
3056
+ // and add HR with top border to simulate CSS padding-bottom
3057
+ // CSS padding-bottom: 0.5rem ≈ 100 twips (space from text to border)
3058
+ const spacingAfter = headingBorderColor ? 100 : undefined;
3059
+ // GENERALIZED: Extract line-height for vertical spacing
3060
+ // CSS line-height: 1.2 for titles, 1.3 for other headings, etc.
3061
+ const lineSpacing = hStyles.lineHeight ? parseLineHeightToDocx(hStyles.lineHeight) : undefined;
3062
+ // Check for gradient text (CSS background-clip: text)
3063
+ // If gradient exists, create runs array with gradient info
3064
+ let runs;
3065
+ if (hStyles.gradient) {
3066
+ runs = [{ text, gradient: hStyles.gradient, color: elementColor }];
3067
+ }
3068
+ elements.push({
3069
+ type: "heading",
3070
+ level: headingLevel,
3071
+ text,
3072
+ alignment,
3073
+ color: elementColor,
3074
+ textTransform,
3075
+ spacingAfter,
3076
+ fontFamily: headingFontFamily,
3077
+ runs,
3078
+ lineSpacing,
3079
+ });
3080
+ // Add horizontal-rule for border-bottom effect
3081
+ // Use top border with minimal before spacing - border appears right at top
3082
+ // CSS margin-bottom: 1rem ≈ 200 twips (space after border to content)
3083
+ if (headingBorderColor) {
3084
+ elements.push({ type: "horizontal-rule", color: headingBorderColor, spacingBefore: 0, spacingAfter: 200, borderPosition: "top" });
3085
+ }
3086
+ }
3087
+ return;
3088
+ }
3089
+ // Handle <hr> elements
3090
+ if (tagName === "hr") {
3091
+ // Try to extract color from inline style or CSS
3092
+ const hrStyle = element.getAttribute("style") || "";
3093
+ const colorMatch = hrStyle.match(/(?:border-color|background-color|color):\s*([^;]+)/i);
3094
+ let hrColor;
3095
+ if (colorMatch) {
3096
+ hrColor = extractHexColor(colorMatch[1]);
3097
+ }
3098
+ elements.push({ type: "horizontal-rule", color: hrColor });
3099
+ return;
3100
+ }
3101
+ // GENERALIZED: Check if this paragraph is a styled callout (has border-left)
3102
+ // This handles patterns like: <p class="intro-section"> with border-left: 4px solid ...
3103
+ // Must come BEFORE regular paragraph handling
3104
+ if (tagName === "p" && isBlockquoteOrCallout(element, cssContext)) {
3105
+ const content = parseBlockquoteContent(element, cssContext);
3106
+ if (content.length > 0) {
3107
+ const elementStyles = getElementStyles(element, cssContext);
3108
+ let borderColor;
3109
+ let backgroundColor;
3110
+ // Extract background color from element styles
3111
+ if (elementStyles.backgroundColor) {
3112
+ const hex = extractHexColor(elementStyles.backgroundColor);
3113
+ if (hex)
3114
+ backgroundColor = hex;
3115
+ }
3116
+ // Extract border color from borderLeft property
3117
+ if (elementStyles.borderLeft) {
3118
+ const hex = extractBorderColorFromStyle({ borderLeft: elementStyles.borderLeft });
3119
+ if (hex)
3120
+ borderColor = hex;
3121
+ }
3122
+ else if (elementStyles.borderColor) {
3123
+ const hex = extractHexColor(elementStyles.borderColor);
3124
+ if (hex)
3125
+ borderColor = hex;
3126
+ }
3127
+ elements.push({
3128
+ type: "blockquote",
3129
+ content,
3130
+ borderColor,
3131
+ backgroundColor,
3132
+ borderStyle: "left",
3133
+ });
3134
+ }
3135
+ return;
3136
+ }
3137
+ if (tagName === "p") {
3138
+ const runs = extractInlineRuns(element, cssContext, effectiveColor);
3139
+ if (runs.length > 0) {
3140
+ const text = runs.map((r) => r.text).join("");
3141
+ // Use effectiveColor which includes inheritance from parent inline styles
3142
+ // Extract text-indent, font-style, text-transform from CSS
3143
+ const pStyles = getElementStyles(element, cssContext);
3144
+ let firstLineIndent;
3145
+ let hangingIndent;
3146
+ let textTransform;
3147
+ let italic = false;
3148
+ // Handle text-indent (can be positive for first-line indent, negative for hanging indent)
3149
+ if (pStyles.textIndent) {
3150
+ const indentTwips = parseCssLengthToTwips(pStyles.textIndent);
3151
+ if (indentTwips !== undefined) {
3152
+ if (indentTwips >= 0) {
3153
+ firstLineIndent = indentTwips;
3154
+ }
3155
+ else {
3156
+ // Negative text-indent creates a hanging indent
3157
+ hangingIndent = Math.abs(indentTwips);
3158
+ }
3159
+ }
3160
+ }
3161
+ // Handle font-style: italic
3162
+ if (pStyles.fontStyle === "italic") {
3163
+ italic = true;
3164
+ // Apply italic to all runs
3165
+ runs.forEach(run => run.italic = true);
3166
+ }
3167
+ // Handle font-weight: bold/700/600
3168
+ let bold = false;
3169
+ if (pStyles.fontWeight === "700" || pStyles.fontWeight === "bold" || pStyles.fontWeight === "600") {
3170
+ bold = true;
3171
+ // Apply bold to all runs
3172
+ runs.forEach(run => run.bold = true);
3173
+ }
3174
+ // Handle text-transform
3175
+ if (pStyles.textTransform === "uppercase" || pStyles.textTransform === "lowercase" || pStyles.textTransform === "capitalize") {
3176
+ textTransform = pStyles.textTransform;
3177
+ }
3178
+ // Extract font-family from paragraph styles (will be inherited from body if not set on p)
3179
+ const paragraphFontFamily = pStyles.fontFamily;
3180
+ // Apply fontFamily to runs if present and runs don't have their own fontFamily
3181
+ if (paragraphFontFamily) {
3182
+ runs.forEach(run => {
3183
+ if (!run.fontFamily) {
3184
+ run.fontFamily = paragraphFontFamily;
3185
+ }
3186
+ });
3187
+ }
3188
+ if (hasInlineFormatting(runs)) {
3189
+ // Has mixed formatting - use runs array
3190
+ // Apply effective color to runs that don't have their own color
3191
+ if (effectiveColor) {
3192
+ runs.forEach(run => {
3193
+ if (!run.color) {
3194
+ run.color = effectiveColor;
3195
+ }
3196
+ });
3197
+ }
3198
+ elements.push({ type: "paragraph", text, runs, alignment, firstLineIndent, hangingIndent, textTransform });
3199
+ }
3200
+ else if (effectiveColor || italic || bold || paragraphFontFamily) {
3201
+ // Has color, italic, bold, or fontFamily - use runs to preserve formatting
3202
+ // Create runs with the appropriate formatting
3203
+ const formattedRuns = [{ text, bold, italic, color: effectiveColor, fontFamily: paragraphFontFamily }];
3204
+ elements.push({ type: "paragraph", text, runs: formattedRuns, alignment, firstLineIndent, hangingIndent, textTransform });
3205
+ }
3206
+ else {
3207
+ // No formatting - simple paragraph (but may have indent/transform)
3208
+ elements.push({ type: "paragraph", text, alignment, firstLineIndent, hangingIndent, textTransform });
3209
+ }
3210
+ }
3211
+ return;
3212
+ }
3213
+ if (tagName === "ul" || tagName === "ol") {
3214
+ const items = [];
3215
+ let hasComplexItems = false;
3216
+ let hasOnlyNestedLists = true; // Track if complexity is ONLY from nested lists
3217
+ // First pass: check if any list items contain block-level elements
3218
+ // Use direct children to avoid nested lists
3219
+ const listItems = [];
3220
+ for (const child of element.children) {
3221
+ if (child.tagName.toLowerCase() === "li") {
3222
+ listItems.push(child);
3223
+ if (hasBlockLevelChildren(child)) {
3224
+ hasComplexItems = true;
3225
+ // Check if the block children are only nested lists
3226
+ for (const blockChild of child.children) {
3227
+ const blockTagName = blockChild.tagName.toLowerCase();
3228
+ if (BLOCK_LEVEL_TAGS.has(blockTagName) && blockTagName !== "ul" && blockTagName !== "ol") {
3229
+ hasOnlyNestedLists = false;
3230
+ }
3231
+ }
3232
+ }
3233
+ }
3234
+ }
3235
+ if (hasComplexItems && hasOnlyNestedLists) {
3236
+ // List contains nested lists - use the nested list extraction with level tracking
3237
+ const nestedItems = extractNestedListItems(element, cssContext, 0);
3238
+ if (nestedItems.length > 0) {
3239
+ elements.push({ type: "list", ordered: tagName === "ol", items: nestedItems });
3240
+ }
3241
+ }
3242
+ else if (hasComplexItems) {
3243
+ // Complex list: process each list item recursively to capture block elements
3244
+ // This handles SVGs, tables, nested divs, etc. inside list items
3245
+ for (const li of listItems) {
3246
+ // For complex list items, we need to:
3247
+ // 1. Extract inline text that appears directly in the li (not inside block children)
3248
+ // 2. Process block-level children through processNode
3249
+ // Check which children are block-level
3250
+ const blockChildren = [];
3251
+ const inlineNodes = [];
3252
+ for (const child of li.childNodes) {
3253
+ if (child.nodeType === Node.ELEMENT_NODE) {
3254
+ const childTagName = child.tagName.toLowerCase();
3255
+ if (BLOCK_LEVEL_TAGS.has(childTagName)) {
3256
+ blockChildren.push(child);
3257
+ }
3258
+ else {
3259
+ // Inline element like <strong>, <span>, <a>, etc.
3260
+ inlineNodes.push(child);
3261
+ }
3262
+ }
3263
+ else if (child.nodeType === Node.TEXT_NODE) {
3264
+ inlineNodes.push(child);
3265
+ }
3266
+ }
3267
+ // Process block-level children (this captures SVGs, tables, divs, etc.)
3268
+ for (const child of blockChildren) {
3269
+ processNode(child);
3270
+ }
3271
+ }
3272
+ // Note: We intentionally don't create list items from inline content in complex lists
3273
+ // because the block children (paragraphs, etc.) already contain all the meaningful content.
3274
+ // The inline content in <li> for complex lists is typically whitespace.
3275
+ }
3276
+ else {
3277
+ // Simple list: all items are inline-only, use efficient path
3278
+ for (const li of listItems) {
3279
+ const runs = extractInlineRuns(li, cssContext);
3280
+ if (runs.length > 0) {
3281
+ if (hasInlineFormatting(runs)) {
3282
+ items.push(runs);
3283
+ }
3284
+ else {
3285
+ items.push(runs.map((r) => r.text).join(""));
3286
+ }
3287
+ }
3288
+ }
3289
+ if (items.length > 0) {
3290
+ elements.push({ type: "list", ordered: tagName === "ol", items });
3291
+ }
3292
+ }
3293
+ return;
3294
+ }
3295
+ if (tagName === "table") {
3296
+ const rows = [];
3297
+ for (const tr of element.querySelectorAll("tr")) {
3298
+ const cells = [];
3299
+ for (const cell of tr.querySelectorAll("td, th")) {
3300
+ // Extract inline runs to preserve bold/italic formatting in cells
3301
+ const runs = extractInlineRuns(cell, cssContext);
3302
+ if (runs.length > 0) {
3303
+ if (hasInlineFormatting(runs)) {
3304
+ // Has formatting - store as runs
3305
+ cells.push(runs);
3306
+ }
3307
+ else {
3308
+ // Plain text - store as string
3309
+ cells.push(runs.map((r) => r.text).join(""));
3310
+ }
3311
+ }
3312
+ else {
3313
+ cells.push("");
3314
+ }
3315
+ }
3316
+ if (cells.length > 0) {
3317
+ rows.push(cells);
3318
+ }
3319
+ }
3320
+ if (rows.length > 0) {
3321
+ // Extract cell padding from CSS (th, td selectors)
3322
+ let cellPadding;
3323
+ let headerBackgroundColor;
3324
+ let headerTextColor;
3325
+ let evenRowBackgroundColor;
3326
+ if (cssContext) {
3327
+ // Try td first, then th (they usually have the same padding)
3328
+ const tdStyle = cssContext.elementStyles.get("td");
3329
+ const thStyle = cssContext.elementStyles.get("th");
3330
+ const paddingStr = tdStyle?.padding || thStyle?.padding;
3331
+ if (paddingStr) {
3332
+ cellPadding = parseCssPaddingToTwips(paddingStr);
3333
+ }
3334
+ // GENERALIZED: Look for nested header styles from ancestor containers
3335
+ // Walk up the DOM tree to find containers with nested th styles
3336
+ let ancestor = element.parentElement;
3337
+ while (ancestor && !headerBackgroundColor) {
3338
+ const ancestorClassAttr = ancestor.getAttribute("class");
3339
+ const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
3340
+ for (const ancestorClass of ancestorClasses) {
3341
+ const nestedMap = cssContext.nestedStyles.get(ancestorClass);
3342
+ if (nestedMap) {
3343
+ // Check for th nested styles (e.g., .comparison-table-wrapper th { ... })
3344
+ const thNestedStyle = nestedMap.get("th");
3345
+ if (thNestedStyle) {
3346
+ if (thNestedStyle.backgroundColor) {
3347
+ const hex = extractHexColor(thNestedStyle.backgroundColor);
3348
+ if (hex)
3349
+ headerBackgroundColor = hex;
3350
+ }
3351
+ if (thNestedStyle.color) {
3352
+ const hex = extractHexColor(thNestedStyle.color);
3353
+ if (hex)
3354
+ headerTextColor = hex;
3355
+ }
3356
+ }
3357
+ // Check for tr:nth-child(even) or similar for even row styling
3358
+ const trNestedStyle = nestedMap.get("tr");
3359
+ if (trNestedStyle && trNestedStyle.backgroundColor) {
3360
+ const hex = extractHexColor(trNestedStyle.backgroundColor);
3361
+ if (hex)
3362
+ evenRowBackgroundColor = hex;
3363
+ }
3364
+ }
3365
+ }
3366
+ ancestor = ancestor.parentElement;
3367
+ }
3368
+ // Also check direct element styles for th (standalone th { ... } rules)
3369
+ if (!headerBackgroundColor && thStyle?.backgroundColor) {
3370
+ const hex = extractHexColor(thStyle.backgroundColor);
3371
+ if (hex)
3372
+ headerBackgroundColor = hex;
3373
+ }
3374
+ if (!headerTextColor && thStyle?.color) {
3375
+ const hex = extractHexColor(thStyle.color);
3376
+ if (hex)
3377
+ headerTextColor = hex;
3378
+ }
3379
+ }
3380
+ elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor });
3381
+ }
3382
+ return;
3383
+ }
3384
+ if (tagName === "pre" || tagName === "code") {
3385
+ const text = getTextContent(element).trim();
3386
+ if (text) {
3387
+ elements.push({ type: "code", text });
3388
+ }
3389
+ return;
3390
+ }
3391
+ // Check for chart containers (divs with SVG children)
3392
+ if (tagName === "div") {
3393
+ // Check for two-column grid layout (like resume templates with sidebar)
3394
+ const sidebarWidthPercent = isTwoColumnGridLayout(element, cssContext);
3395
+ if (sidebarWidthPercent !== undefined) {
3396
+ const columnChildren = findTwoColumnChildren(element);
3397
+ if (columnChildren) {
3398
+ const [sidebarEl, mainEl] = columnChildren;
3399
+ // Extract sidebar styling
3400
+ const sidebarStyles = getElementStyles(sidebarEl, cssContext);
3401
+ const sidebarBgColor = sidebarStyles.backgroundColor
3402
+ ? extractHexColor(sidebarStyles.backgroundColor)
3403
+ : undefined;
3404
+ const sidebarTextColor = sidebarStyles.color
3405
+ ? extractHexColor(sidebarStyles.color)
3406
+ : undefined;
3407
+ // Parse sidebar and main content separately
3408
+ const sidebarContent = parseContainerContent(sidebarEl, cssContext, sidebarTextColor);
3409
+ const mainContent = parseContainerContent(mainEl, cssContext);
3410
+ if (sidebarContent.length > 0 || mainContent.length > 0) {
3411
+ // TODO: Two-column layout is detected but causes DOCX rendering issues.
3412
+ // For now, just process the children normally instead of creating a two-column element.
3413
+ // This ensures content is extracted even if layout is not preserved.
3414
+ // elements.push({ type: "two-column-layout", ... });
3415
+ }
3416
+ }
3417
+ }
3418
+ // Fall through to normal processing for now since two-column DOCX has issues
3419
+ // GENERALIZED: Check for flex containers with equal-width columns (like signature blocks)
3420
+ // This handles layouts like: display: flex; with children having flex: 1
3421
+ // Render as a DOCX table with each child as a column
3422
+ const flexColumns = detectFlexEqualColumns(element, cssContext);
3423
+ if (flexColumns && flexColumns.length >= 2) {
3424
+ // Check if children have enough content to warrant a table layout
3425
+ // (Skip if children are simple/empty - those are handled by stats-grid or normal processing)
3426
+ const hasComplexChildren = flexColumns.some(col => {
3427
+ const childCount = col.children.length;
3428
+ return childCount > 4; // Complex structure like signature blocks
3429
+ });
3430
+ if (hasComplexChildren) {
3431
+ // Build rows: align content from each column side by side
3432
+ // First, extract content lines from each column (with styling preserved as InlineRuns)
3433
+ const columnLines = [];
3434
+ let maxLines = 0;
3435
+ for (const col of flexColumns) {
3436
+ const lines = [];
3437
+ for (const child of col.children) {
3438
+ // Handle signature lines (border-bottom divs) as horizontal rules in text
3439
+ const childStyles = getElementStyles(child, cssContext);
3440
+ const inlineStyle = child.getAttribute("style") || "";
3441
+ const hasBorderBottom = inlineStyle.includes("border-bottom") ||
3442
+ childStyles.borderBottom ||
3443
+ childStyles.border?.includes("solid");
3444
+ if (hasBorderBottom && !getTextContent(child).trim()) {
3445
+ // Empty div with border = signature line
3446
+ lines.push("________________________");
3447
+ }
3448
+ else {
3449
+ const text = getTextContent(child).trim();
3450
+ if (text) {
3451
+ // Extract styling for this element
3452
+ const isBold = childStyles.fontWeight === "700" ||
3453
+ childStyles.fontWeight === "bold" ||
3454
+ child.querySelector("strong, b") !== null;
3455
+ const isItalic = childStyles.fontStyle === "italic" ||
3456
+ child.querySelector("em, i") !== null;
3457
+ // Extract color - support both CSS class colors and inline styles
3458
+ let textColor = childStyles.color;
3459
+ if (!textColor) {
3460
+ const styleAttr = child.getAttribute("style") || "";
3461
+ const colorMatch = styleAttr.match(/color:\s*([^;]+)/);
3462
+ if (colorMatch) {
3463
+ textColor = colorMatch[1].trim();
3464
+ }
3465
+ }
3466
+ // Convert color to hex without #
3467
+ let hexColor;
3468
+ if (textColor) {
3469
+ if (textColor.startsWith("#")) {
3470
+ hexColor = textColor.slice(1);
3471
+ }
3472
+ else if (textColor.startsWith("rgb")) {
3473
+ // Parse rgb(r, g, b) to hex
3474
+ const rgbMatch = textColor.match(/rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)/);
3475
+ if (rgbMatch) {
3476
+ const r = parseInt(rgbMatch[1], 10);
3477
+ const g = parseInt(rgbMatch[2], 10);
3478
+ const b = parseInt(rgbMatch[3], 10);
3479
+ hexColor = ((1 << 24) + (r << 16) + (g << 8) + b).toString(16).slice(1).toUpperCase();
3480
+ }
3481
+ }
3482
+ else {
3483
+ hexColor = textColor;
3484
+ }
3485
+ }
3486
+ // Extract font size and convert to half-points
3487
+ // CSS font-size values: "0.875rem", "14px", "12pt"
3488
+ // DOCX size is in half-points (1pt = 2 half-points)
3489
+ let fontSize;
3490
+ if (childStyles.fontSize) {
3491
+ const fs = childStyles.fontSize;
3492
+ if (fs.endsWith("rem")) {
3493
+ // rem relative to 16px base, convert to points then half-points
3494
+ const remValue = parseFloat(fs);
3495
+ if (!isNaN(remValue)) {
3496
+ // 1rem = 16px = 12pt, so 0.875rem = 10.5pt = 21 half-points
3497
+ fontSize = Math.round(remValue * 12 * 2);
3498
+ }
3499
+ }
3500
+ else if (fs.endsWith("px")) {
3501
+ // px to points: 1px = 0.75pt
3502
+ const pxValue = parseFloat(fs);
3503
+ if (!isNaN(pxValue)) {
3504
+ fontSize = Math.round(pxValue * 0.75 * 2);
3505
+ }
3506
+ }
3507
+ else if (fs.endsWith("pt")) {
3508
+ const ptValue = parseFloat(fs);
3509
+ if (!isNaN(ptValue)) {
3510
+ fontSize = Math.round(ptValue * 2);
3511
+ }
3512
+ }
3513
+ }
3514
+ // If there's any styling, use InlineRun; otherwise plain string
3515
+ if (isBold || isItalic || hexColor || fontSize) {
3516
+ lines.push([{ text, bold: isBold, italic: isItalic, color: hexColor, size: fontSize }]);
3517
+ }
3518
+ else {
3519
+ lines.push(text);
3520
+ }
3521
+ }
3522
+ }
3523
+ }
3524
+ columnLines.push(lines);
3525
+ maxLines = Math.max(maxLines, lines.length);
3526
+ }
3527
+ // Create rows: each row has one cell per column
3528
+ const rows = [];
3529
+ for (let i = 0; i < maxLines; i++) {
3530
+ const rowCells = [];
3531
+ for (const lines of columnLines) {
3532
+ rowCells.push(lines[i] || ""); // Empty string if this column has fewer lines
3533
+ }
3534
+ rows.push(rowCells);
3535
+ }
3536
+ if (rows.length > 0) {
3537
+ elements.push({ type: "table", rows, hasHeader: false, noBorders: true });
3538
+ return;
3539
+ }
3540
+ }
3541
+ }
3542
+ // GENERALIZED: Check for grid/flex containers with styled card children
3543
+ // This detects any div that:
3544
+ // 1. Uses grid or flex layout
3545
+ // 2. Contains multiple similar child divs
3546
+ // 3. Each child has a "value" (larger/bold text) and "label" (smaller/muted text) pattern
3547
+ if (isGridOrFlexContainer(element, cssContext)) {
3548
+ const childDivs = Array.from(element.children).filter((child) => child.tagName.toLowerCase() === "div");
3549
+ // Check if this looks like a stats/card grid (2+ similar structured children)
3550
+ if (childDivs.length >= 2) {
3551
+ const cards = [];
3552
+ for (const card of childDivs) {
3553
+ // Look for value/label/change pattern in child structure
3554
+ // Value: element with larger font, bold, or accent color
3555
+ // Label: element with smaller font or muted color
3556
+ // Change: element with very small font and positive/negative color (comparison text like "↑ 8% vs Q4 2024")
3557
+ const cardChildren = Array.from(card.children);
3558
+ // GENERALIZED VALIDATION: Stats cards should have simple structure (2-4 elements)
3559
+ // Skip complex structures like signature blocks that have many children
3560
+ if (cardChildren.length > 6) {
3561
+ continue;
3562
+ }
3563
+ let valueEl = null;
3564
+ let labelEl = null;
3565
+ let changeEl = null;
3566
+ for (const child of cardChildren) {
3567
+ const childStyles = getElementStyles(child, cssContext);
3568
+ const fontSize = childStyles.fontSize || "";
3569
+ const fontWeight = childStyles.fontWeight || "";
3570
+ const color = childStyles.color || "";
3571
+ // Detect value element: larger font, bold, or accent color
3572
+ const isLargerFont = fontSize.includes("1.75rem") || fontSize.includes("1.5rem") ||
3573
+ fontSize.includes("2rem") || parseFloat(fontSize) > 1;
3574
+ const isBold = fontWeight === "700" || fontWeight === "bold";
3575
+ const isAccentColor = color.includes("accent") || color.includes("#2563eb") ||
3576
+ color.includes("--color-accent");
3577
+ // Detect label element: smaller font or muted color
3578
+ const isSmallerFont = fontSize.includes("0.875rem") || fontSize.includes("0.75rem");
3579
+ const isMutedColor = color.includes("muted") || color.includes("#6b7280") ||
3580
+ color.includes("--color-muted");
3581
+ // Detect change element: very small font (0.75rem) with semantic colors (green/red for positive/negative)
3582
+ const isVerySmallFont = fontSize.includes("0.75rem") || fontSize.includes("0.7rem");
3583
+ const isPositiveColor = color.includes("#10b981") || color.includes("10b981") || color.includes("green");
3584
+ const isNegativeColor = color.includes("#ef4444") || color.includes("ef4444") || color.includes("red");
3585
+ const isChangeColor = isPositiveColor || isNegativeColor;
3586
+ if ((isLargerFont || isBold || isAccentColor) && !valueEl) {
3587
+ valueEl = child;
3588
+ }
3589
+ else if ((isSmallerFont || isMutedColor) && !labelEl) {
3590
+ labelEl = child;
3591
+ }
3592
+ else if ((isVerySmallFont || isChangeColor) && !changeEl) {
3593
+ // Change element: usually the third child with very small font and trend indicator
3594
+ changeEl = child;
3595
+ }
3596
+ }
3597
+ // If we couldn't detect by styles, fall back to position (first = value, second = label, third = change)
3598
+ if (!valueEl && cardChildren.length >= 1) {
3599
+ valueEl = cardChildren[0];
3600
+ }
3601
+ if (!labelEl && cardChildren.length >= 2) {
3602
+ labelEl = cardChildren[1];
3603
+ }
3604
+ if (!changeEl && cardChildren.length >= 3) {
3605
+ changeEl = cardChildren[2];
3606
+ }
3607
+ if (valueEl && labelEl) {
3608
+ const value = getTextContent(valueEl).trim();
3609
+ const label = getTextContent(labelEl).trim();
3610
+ const change = changeEl ? getTextContent(changeEl).trim() : undefined;
3611
+ // GENERALIZED VALIDATION: Stats cards should have short, distinct content
3612
+ // Skip if value or label are too long (indicates paragraph content, not stats)
3613
+ // Skip if they contain multiple sentences or newlines
3614
+ const MAX_VALUE_LENGTH = 50; // e.g., "$4,500.00", "127%", "2.5M"
3615
+ const MAX_LABEL_LENGTH = 100; // e.g., "Total Revenue", "Active Users"
3616
+ const isValueLike = value.length > 0 && value.length <= MAX_VALUE_LENGTH &&
3617
+ !value.includes('\n') && !value.includes('.');
3618
+ const isLabelLike = label.length > 0 && label.length <= MAX_LABEL_LENGTH &&
3619
+ !label.includes('\n');
3620
+ // GENERALIZED: Values that end with ':' are labels, not stats values
3621
+ // This prevents press-meta patterns like "Date:" / "February 12, 2026" from matching
3622
+ const isFieldLabel = value.endsWith(':');
3623
+ // Skip this card if it doesn't look like stats content
3624
+ if (!isValueLike || !isLabelLike || isFieldLabel) {
3625
+ continue;
3626
+ }
3627
+ // Extract colors from elements
3628
+ const valueColor = extractTextColor(valueEl, cssContext);
3629
+ const labelColor = extractTextColor(labelEl, cssContext);
3630
+ const changeColor = changeEl ? extractTextColor(changeEl, cssContext) : undefined;
3631
+ // Extract card background and border using generalized style extraction
3632
+ const cardStyles = getElementStyles(card, cssContext);
3633
+ const backgroundColor = cardStyles.backgroundColor
3634
+ ? extractHexColor(cardStyles.backgroundColor)
3635
+ : undefined;
3636
+ const borderColor = extractBorderColorFromStyle(cardStyles);
3637
+ cards.push({
3638
+ value,
3639
+ label,
3640
+ change,
3641
+ valueColor,
3642
+ labelColor,
3643
+ changeColor,
3644
+ backgroundColor,
3645
+ borderColor,
3646
+ });
3647
+ }
3648
+ }
3649
+ if (cards.length >= 2) {
3650
+ elements.push({ type: "stats-grid", cards });
3651
+ return;
3652
+ }
3653
+ }
3654
+ }
3655
+ // GENERALIZED: Check for flex containers with inline key-value metadata items
3656
+ // Pattern: flex container with child divs, each containing label:value pairs
3657
+ // Example: .press-meta with "Date: Feb 12" and "Location: San Francisco"
3658
+ // These should render as a single inline paragraph, not separate lines
3659
+ if (isGridOrFlexContainer(element, cssContext)) {
3660
+ const childDivs = Array.from(element.children).filter((child) => child.tagName.toLowerCase() === "div");
3661
+ if (childDivs.length >= 2) {
3662
+ // Check if each child looks like a key-value pair (label ending with ":" + value)
3663
+ const metaItems = [];
3664
+ let isMetadataPattern = true;
3665
+ for (const child of childDivs) {
3666
+ // Get all text-containing children
3667
+ const textElements = Array.from(child.querySelectorAll("span, strong, em, b, i"));
3668
+ if (textElements.length >= 2) {
3669
+ const labelEl = textElements[0];
3670
+ const valueEl = textElements[1];
3671
+ if (labelEl && valueEl) {
3672
+ const label = getTextContent(labelEl).trim();
3673
+ const value = getTextContent(valueEl).trim();
3674
+ // Check if label ends with ":"
3675
+ if (label.endsWith(":") && value.length > 0) {
3676
+ metaItems.push({ label, value });
3677
+ continue;
3678
+ }
3679
+ }
3680
+ }
3681
+ // If any child doesn't match the pattern, break
3682
+ isMetadataPattern = false;
3683
+ break;
3684
+ }
3685
+ // If all children are key-value pairs, render as inline paragraph
3686
+ if (isMetadataPattern && metaItems.length >= 2) {
3687
+ // Build inline runs with spacing between items
3688
+ const runs = [];
3689
+ const elementStyles = getElementStyles(element, cssContext);
3690
+ const elementColor = elementStyles.color ? extractHexColor(elementStyles.color) : undefined;
3691
+ for (let i = 0; i < metaItems.length; i++) {
3692
+ const item = metaItems[i];
3693
+ // Add label (bold)
3694
+ runs.push({ text: item.label, bold: true, color: elementColor });
3695
+ // Add value with space
3696
+ runs.push({ text: ` ${item.value}`, color: elementColor });
3697
+ // Add separator between items (except last)
3698
+ if (i < metaItems.length - 1) {
3699
+ runs.push({ text: " ", color: elementColor }); // Tab-like spacing
3700
+ }
3701
+ }
3702
+ const text = runs.map(r => r.text).join("");
3703
+ elements.push({ type: "paragraph", text, runs });
3704
+ return;
3705
+ }
3706
+ }
3707
+ }
3708
+ // GENERALIZED: Check for div that contains an SVG chart
3709
+ // If div has ONLY an SVG (no text), treat entire div as chart wrapper
3710
+ // If div has BOTH SVG and text content, extract SVG as chart AND process text separately
3711
+ const svgElement = element.querySelector(":scope > svg, :scope > div > svg");
3712
+ // Recursively check if element has meaningful text content (direct or in nested containers)
3713
+ // This determines if the div is a dedicated SVG wrapper or a mixed content container
3714
+ const checkForTextContent = (el) => {
3715
+ for (const node of Array.from(el.childNodes)) {
3716
+ if (node.nodeType === 3) { // Text node
3717
+ if ((node.textContent || "").trim().length > 0)
3718
+ return true;
3719
+ }
3720
+ if (node.nodeType === 1) { // Element node
3721
+ const childEl = node;
3722
+ const tagLower = childEl.tagName?.toLowerCase();
3723
+ // Skip SVG elements - we're looking for text, not the chart itself
3724
+ if (tagLower === "svg")
3725
+ continue;
3726
+ // Check text-containing elements directly
3727
+ if (tagLower === "h1" || tagLower === "h2" || tagLower === "h3" ||
3728
+ tagLower === "h4" || tagLower === "h5" || tagLower === "h6" ||
3729
+ tagLower === "p" || tagLower === "span") {
3730
+ if ((getTextContent(childEl) || "").trim().length > 0)
3731
+ return true;
3732
+ }
3733
+ // Recursively check nested div/section/article containers for text content
3734
+ if (tagLower === "div" || tagLower === "section" || tagLower === "article" ||
3735
+ tagLower === "aside" || tagLower === "main" || tagLower === "nav" ||
3736
+ tagLower === "header" || tagLower === "footer") {
3737
+ if (checkForTextContent(childEl))
3738
+ return true;
3739
+ }
3740
+ }
3741
+ }
3742
+ return false;
3743
+ };
3744
+ const hasTextContent = checkForTextContent(element);
3745
+ // Process SVG if present, not decorative, and not already processed
3746
+ if (svgElement && !processedSvgs.has(svgElement) && !isDecorativeSvg(svgElement, element, cssContext)) {
3747
+ // Extract SVG content and dimensions
3748
+ const svgContent = svgElement.outerHTML;
3749
+ const viewBox = svgElement.getAttribute("viewBox");
3750
+ let width = 700;
3751
+ let height = 320;
3752
+ if (viewBox) {
3753
+ const parts = viewBox.split(/\s+/).map(Number);
3754
+ if (parts.length >= 4) {
3755
+ width = parts[2] || width;
3756
+ height = parts[3] || height;
3757
+ }
3758
+ }
3759
+ else {
3760
+ // Try width/height attributes
3761
+ const widthAttr = svgElement.getAttribute("width");
3762
+ const heightAttr = svgElement.getAttribute("height");
3763
+ if (widthAttr && !widthAttr.includes("%")) {
3764
+ width = parseInt(widthAttr, 10) || width;
3765
+ }
3766
+ if (heightAttr && !heightAttr.includes("%")) {
3767
+ height = parseInt(heightAttr, 10) || height;
3768
+ }
3769
+ }
3770
+ // Look for title - but ONLY for dedicated SVG wrappers (no mixed text content)
3771
+ // For mixed content containers, the heading will be processed separately as DOM content
3772
+ let title;
3773
+ if (!hasTextContent) {
3774
+ const titleEl = element.querySelector("h4, h3, h5, h6");
3775
+ if (titleEl) {
3776
+ title = getTextContent(titleEl).trim();
3777
+ }
3778
+ if (!title) {
3779
+ const prevSibling = element.previousElementSibling;
3780
+ if (prevSibling && /^h[1-6]$/i.test(prevSibling.tagName)) {
3781
+ title = getTextContent(prevSibling).trim();
3782
+ }
3783
+ }
3784
+ }
3785
+ // GENERALIZED: Extract background color from parent container for SVG rendering
3786
+ // This handles cases like .hero-image { background: linear-gradient(...) }
3787
+ // where the SVG needs to be rendered with the container's background color
3788
+ const containerStyles = getElementStyles(element, cssContext);
3789
+ let backgroundColor;
3790
+ if (containerStyles.backgroundColor) {
3791
+ backgroundColor = extractHexColor(containerStyles.backgroundColor);
3792
+ }
3793
+ elements.push({ type: "svg-chart", svgContent, width, height, title, backgroundColor });
3794
+ // Mark this SVG as processed to avoid duplicate processing when iterating children
3795
+ processedSvgs.add(svgElement);
3796
+ // If no text content, we're done (dedicated SVG wrapper)
3797
+ if (!hasTextContent) {
3798
+ return;
3799
+ }
3800
+ // Otherwise, continue to process remaining children (mixed content)
3801
+ // Fall through to CONTAINER_TAGS processing - SVG children will be skipped there
3802
+ }
3803
+ }
3804
+ // Check for SVG elements (standalone charts)
3805
+ // Filter out decorative SVGs (backgrounds, patterns, icons)
3806
+ if (tagName === "svg") {
3807
+ // Skip if this SVG was already processed by a parent container's mixed content handler
3808
+ if (processedSvgs.has(element)) {
3809
+ return;
3810
+ }
3811
+ // For standalone SVGs, use the SVG's parent as context for decorative detection
3812
+ // If no parent or parent is body, check SVG itself for decorative patterns
3813
+ const parent = element.parentElement || element;
3814
+ if (isDecorativeSvg(element, parent, cssContext)) {
3815
+ // Skip decorative SVGs - don't convert to chart
3816
+ return;
3817
+ }
3818
+ // Extract SVG content and dimensions
3819
+ const svgContent = element.outerHTML;
3820
+ const viewBox = element.getAttribute("viewBox");
3821
+ let width = 700;
3822
+ let height = 320;
3823
+ if (viewBox) {
3824
+ const parts = viewBox.split(/\s+/).map(Number);
3825
+ if (parts.length >= 4) {
3826
+ width = parts[2] || width;
3827
+ height = parts[3] || height;
3828
+ }
3829
+ }
3830
+ else {
3831
+ const widthAttr = element.getAttribute("width");
3832
+ const heightAttr = element.getAttribute("height");
3833
+ if (widthAttr && !widthAttr.includes("%")) {
3834
+ width = parseInt(widthAttr, 10) || width;
3835
+ }
3836
+ if (heightAttr && !heightAttr.includes("%")) {
3837
+ height = parseInt(heightAttr, 10) || height;
3838
+ }
3839
+ }
3840
+ // Try to find a title
3841
+ const titleEl = element.querySelector("title");
3842
+ const title = titleEl ? getTextContent(titleEl).trim() : undefined;
3843
+ // GENERALIZED: Extract background color from parent container for SVG rendering
3844
+ // This handles cases where an SVG is placed inside a styled container
3845
+ let backgroundColor;
3846
+ if (parent && parent !== element) {
3847
+ const parentStyles = getElementStyles(parent, cssContext);
3848
+ if (parentStyles.backgroundColor) {
3849
+ backgroundColor = extractHexColor(parentStyles.backgroundColor);
3850
+ }
3851
+ }
3852
+ elements.push({ type: "svg-chart", svgContent, width, height, title, backgroundColor });
3853
+ return;
3854
+ }
3855
+ // Handle <img> elements - external images that need to be fetched
3856
+ if (tagName === "img") {
3857
+ const src = element.getAttribute("src");
3858
+ if (src) {
3859
+ const alt = element.getAttribute("alt") || undefined;
3860
+ // Extract width and height from attributes only
3861
+ // Computed styles are not available in linkedom (Node.js)
3862
+ // Actual dimensions will be obtained when the image is fetched
3863
+ let width;
3864
+ let height;
3865
+ const widthAttr = element.getAttribute("width");
3866
+ const heightAttr = element.getAttribute("height");
3867
+ if (widthAttr && !widthAttr.includes("%")) {
3868
+ width = parseInt(widthAttr, 10) || undefined;
3869
+ }
3870
+ if (heightAttr && !heightAttr.includes("%")) {
3871
+ height = parseInt(heightAttr, 10) || undefined;
3872
+ }
3873
+ // Check if img is inside a figure with figcaption
3874
+ let caption;
3875
+ const parentFigure = element.closest("figure");
3876
+ if (parentFigure) {
3877
+ const figcaption = parentFigure.querySelector("figcaption");
3878
+ if (figcaption) {
3879
+ caption = getTextContent(figcaption).trim() || undefined;
3880
+ }
3881
+ }
3882
+ elements.push({ type: "image", src, alt, width, height, caption });
3883
+ }
3884
+ return;
3885
+ }
3886
+ // Handle <picture> elements - get the best source or fallback img
3887
+ if (tagName === "picture") {
3888
+ // Find the fallback img inside picture
3889
+ const imgEl = element.querySelector("img");
3890
+ if (imgEl) {
3891
+ const src = imgEl.getAttribute("src");
3892
+ if (src) {
3893
+ const alt = imgEl.getAttribute("alt") || undefined;
3894
+ // Extract width and height
3895
+ let width;
3896
+ let height;
3897
+ const widthAttr = imgEl.getAttribute("width");
3898
+ const heightAttr = imgEl.getAttribute("height");
3899
+ if (widthAttr && !widthAttr.includes("%")) {
3900
+ width = parseInt(widthAttr, 10) || undefined;
3901
+ }
3902
+ if (heightAttr && !heightAttr.includes("%")) {
3903
+ height = parseInt(heightAttr, 10) || undefined;
3904
+ }
3905
+ // Check for figcaption
3906
+ let caption;
3907
+ const parentFigure = element.closest("figure");
3908
+ if (parentFigure) {
3909
+ const figcaption = parentFigure.querySelector("figcaption");
3910
+ if (figcaption) {
3911
+ caption = getTextContent(figcaption).trim() || undefined;
3912
+ }
3913
+ }
3914
+ elements.push({ type: "image", src, alt, width, height, caption });
3915
+ }
3916
+ }
3917
+ return;
3918
+ }
3919
+ // Handle <figure> elements containing images - extract img and caption together
3920
+ if (tagName === "figure") {
3921
+ const imgEl = element.querySelector("img") || element.querySelector("picture img");
3922
+ if (imgEl) {
3923
+ const src = imgEl.getAttribute("src");
3924
+ if (src) {
3925
+ const alt = imgEl.getAttribute("alt") || undefined;
3926
+ // Extract width and height from attributes only
3927
+ // Computed styles are not available in linkedom (Node.js)
3928
+ // Actual dimensions will be obtained when the image is fetched
3929
+ let width;
3930
+ let height;
3931
+ const widthAttr = imgEl.getAttribute("width");
3932
+ const heightAttr = imgEl.getAttribute("height");
3933
+ if (widthAttr && !widthAttr.includes("%")) {
3934
+ width = parseInt(widthAttr, 10) || undefined;
3935
+ }
3936
+ if (heightAttr && !heightAttr.includes("%")) {
3937
+ height = parseInt(heightAttr, 10) || undefined;
3938
+ }
3939
+ // Extract caption from figcaption
3940
+ let caption;
3941
+ const figcaption = element.querySelector("figcaption");
3942
+ if (figcaption) {
3943
+ caption = getTextContent(figcaption).trim() || undefined;
3944
+ }
3945
+ elements.push({ type: "image", src, alt, width, height, caption });
3946
+ return;
3947
+ }
3948
+ }
3949
+ // If no img found, fall through to container handling
3950
+ }
3951
+ // Check for blockquote/callout before generic container handling
3952
+ // Uses style-based detection, NOT class names
3953
+ if (isBlockquoteOrCallout(element, cssContext)) {
3954
+ const content = parseBlockquoteContent(element, cssContext);
3955
+ if (content.length > 0) {
3956
+ // Extract styling from CSS classes and inline styles (generalized approach)
3957
+ const elementStyles = getElementStyles(element, cssContext);
3958
+ let borderColor;
3959
+ let backgroundColor;
3960
+ // Extract background color from element styles
3961
+ if (elementStyles.backgroundColor) {
3962
+ const hex = extractHexColor(elementStyles.backgroundColor);
3963
+ if (hex)
3964
+ backgroundColor = hex;
3965
+ }
3966
+ // Extract border color from element styles
3967
+ if (elementStyles.borderColor) {
3968
+ const hex = extractHexColor(elementStyles.borderColor);
3969
+ if (hex)
3970
+ borderColor = hex;
3971
+ }
3972
+ else if (elementStyles.border) {
3973
+ // Try to extract color from border shorthand (e.g., "4px solid #2563eb")
3974
+ const hex = extractBorderColorFromStyle(elementStyles);
3975
+ if (hex)
3976
+ borderColor = hex;
3977
+ }
3978
+ // Also check inline styles (overrides CSS)
3979
+ const inlineStyle = element.getAttribute("style") || "";
3980
+ const bgMatch = inlineStyle.match(/background(?:-color)?:\s*([^;]+)/i);
3981
+ if (bgMatch) {
3982
+ const extracted = extractHexColor(bgMatch[1]);
3983
+ if (extracted)
3984
+ backgroundColor = extracted;
3985
+ }
3986
+ const borderMatch = inlineStyle.match(/border(?:-left)?(?:-color)?:\s*([^;]+)/i);
3987
+ if (borderMatch) {
3988
+ // Parse border value which could be shorthand or just color
3989
+ const borderValue = borderMatch[1];
3990
+ const colorInBorder = borderValue.match(/#([0-9a-fA-F]{3,6})/);
3991
+ if (colorInBorder) {
3992
+ let hex = colorInBorder[1];
3993
+ if (hex.length === 3) {
3994
+ hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
3995
+ }
3996
+ borderColor = hex.toUpperCase();
3997
+ }
3998
+ }
3999
+ // Determine variant based on styling characteristics (NOT class names)
4000
+ // - "executive-summary": typically uses accent blue border with light background
4001
+ // - "callout": typically has themed background (yellow, blue, red, green)
4002
+ let variant;
4003
+ // If it's a semantic blockquote, treat as generic
4004
+ if (element.tagName.toLowerCase() === "blockquote") {
4005
+ variant = undefined;
4006
+ }
4007
+ else {
4008
+ // For styled divs, default to callout variant
4009
+ variant = "callout";
4010
+ }
4011
+ // GENERALIZED: Determine border style - "full" (all sides), "left" (left accent), or "none" (no border)
4012
+ // If CSS has `border:` shorthand (all sides), use full border
4013
+ // If CSS has only `border-left:`, use left accent border (callout style)
4014
+ // If NO border is specified but has background, use no border (hero sections, title blocks)
4015
+ let borderStyle;
4016
+ // Check if element has any border styling
4017
+ const hasBorderStyling = !!borderColor ||
4018
+ !!elementStyles.border ||
4019
+ !!elementStyles.borderLeft ||
4020
+ !!elementStyles.borderColor;
4021
+ if (!hasBorderStyling && backgroundColor) {
4022
+ // Has background but no border - use "none" style (title blocks, hero sections)
4023
+ borderStyle = "none";
4024
+ }
4025
+ else if (elementStyles.border && !elementStyles.border.includes("none")) {
4026
+ // Has full border shorthand (e.g., "1px solid #e5e7eb")
4027
+ // Check it's not just inherited from border-left
4028
+ if (!elementStyles.borderLeft || elementStyles.border !== elementStyles.borderLeft) {
4029
+ borderStyle = "full";
4030
+ }
4031
+ }
4032
+ // Otherwise borderStyle remains undefined, which defaults to "left" in convert.ts
4033
+ // GENERALIZED: Extract background gradient for container gradient rendering
4034
+ const backgroundGradient = elementStyles.backgroundGradient;
4035
+ elements.push({
4036
+ type: "blockquote",
4037
+ content,
4038
+ borderColor,
4039
+ backgroundColor,
4040
+ backgroundGradient,
4041
+ variant,
4042
+ borderStyle,
4043
+ });
4044
+ }
4045
+ return;
4046
+ }
4047
+ // Handle span elements specially - they're often inline styled elements
4048
+ // When a span is a direct child being processed, it should be part of a paragraph
4049
+ // but the parent div handling below should catch most cases
4050
+ if (tagName === "span") {
4051
+ const text = getTextContent(element).trim();
4052
+ if (text) {
4053
+ // Extract styling from the span
4054
+ const styles = getElementStyles(element, cssContext);
4055
+ const backgroundColor = styles.backgroundColor ? extractHexColor(styles.backgroundColor) : undefined;
4056
+ const spanColor = styles.color ? extractHexColor(styles.color) : undefined;
4057
+ if (backgroundColor || spanColor) {
4058
+ // Has styling - use runs to preserve it
4059
+ const runs = [{
4060
+ text,
4061
+ bold: false,
4062
+ italic: false,
4063
+ color: spanColor,
4064
+ backgroundColor,
4065
+ }];
4066
+ elements.push({ type: "paragraph", text, runs, alignment });
4067
+ }
4068
+ else {
4069
+ elements.push({ type: "paragraph", text, alignment, color: elementColor });
4070
+ }
4071
+ }
4072
+ return;
4073
+ }
4074
+ // Detect skill item pattern in main processNode too (for resumes without two-column DOCX support)
4075
+ if (tagName === "div") {
4076
+ const skillItem = detectSkillItem(element);
4077
+ if (skillItem) {
4078
+ // Output as single line: "Skill Name: 95%"
4079
+ const text = `${skillItem.name}: ${skillItem.percentage}`;
4080
+ elements.push({ type: "paragraph", text, color: elementColor });
4081
+ return;
4082
+ }
4083
+ // Detect language item pattern: language name + proficiency dots
4084
+ const languageItem = detectLanguageItem(element, cssContext);
4085
+ if (languageItem) {
4086
+ // Create visual representation with filled and empty dots
4087
+ const filledDots = "●".repeat(languageItem.filledCount);
4088
+ const emptyDots = "○".repeat(languageItem.totalCount - languageItem.filledCount);
4089
+ const text = `${languageItem.name}: ${filledDots}${emptyDots}`;
4090
+ elements.push({ type: "paragraph", text, color: elementColor });
4091
+ return;
4092
+ }
4093
+ }
4094
+ if (CONTAINER_TAGS.includes(tagName)) {
4095
+ // Before processing content, check if this element has border-top
4096
+ // (like footer sections which have a horizontal rule before the content)
4097
+ const borderTopColor = extractBorderTopColor(element, cssContext);
4098
+ if (borderTopColor) {
4099
+ elements.push({ type: "horizontal-rule", color: borderTopColor });
4100
+ }
4101
+ // Check if this container has ONLY inline content (spans, text nodes, inline formatting)
4102
+ // If so, treat the entire container as a single paragraph instead of separate elements
4103
+ const hasOnlyInlineContent = isInlineOnlyContainer(element);
4104
+ if (hasOnlyInlineContent) {
4105
+ // Get container's styling (italic, bold, color, font-family, text-transform) from CSS
4106
+ const containerStyles = getElementStyles(element, cssContext);
4107
+ const containerItalic = containerStyles.fontStyle === "italic";
4108
+ const containerBold = containerStyles.fontWeight === "700" || containerStyles.fontWeight === "bold" || containerStyles.fontWeight === "600";
4109
+ const containerColor = containerStyles.color ? extractHexColor(containerStyles.color) : undefined;
4110
+ // GENERALIZED: Extract font-family for inheritance to runs
4111
+ const containerFontFamily = containerStyles.fontFamily;
4112
+ // GENERALIZED: Extract text-transform (uppercase, lowercase, capitalize)
4113
+ let textTransform;
4114
+ if (containerStyles.textTransform === "uppercase" || containerStyles.textTransform === "lowercase" || containerStyles.textTransform === "capitalize") {
4115
+ textTransform = containerStyles.textTransform;
4116
+ }
4117
+ // GENERALIZED: Extract margin-bottom for paragraph spacing
4118
+ const spacingAfter = containerStyles.marginBottom ? parseMarginToTwips(containerStyles.marginBottom) : undefined;
4119
+ // GENERALIZED: Extract line-height for vertical spacing
4120
+ const lineSpacing = containerStyles.lineHeight ? parseLineHeightToDocx(containerStyles.lineHeight) : undefined;
4121
+ // GENERALIZED: Check if this is a horizontal flex container with multiple children
4122
+ // If so, we need to render as a horizontal table to represent the CSS gap
4123
+ const isHorizFlex = isHorizontalFlexContainer(element, cssContext);
4124
+ const directChildren = Array.from(element.children);
4125
+ const hasMultipleFlexChildren = isHorizFlex && directChildren.length > 1;
4126
+ if (hasMultipleFlexChildren) {
4127
+ // GENERALIZED: Convert horizontal flexbox to a single-row borderless table
4128
+ // Each flex item becomes a table cell, preserving the horizontal layout
4129
+ const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
4130
+ const tableCells = [];
4131
+ for (const flexChild of flexChildren) {
4132
+ // Extract runs from this flex child
4133
+ const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
4134
+ if (childRuns.length > 0) {
4135
+ // Apply container's styles to runs that don't have their own
4136
+ childRuns.forEach(run => {
4137
+ if (!run.color && (elementColor || containerColor)) {
4138
+ run.color = elementColor || containerColor;
4139
+ }
4140
+ if (!run.italic && containerItalic) {
4141
+ run.italic = true;
4142
+ }
4143
+ if (!run.bold && containerBold) {
4144
+ run.bold = true;
4145
+ }
4146
+ if (!run.fontFamily && containerFontFamily) {
4147
+ run.fontFamily = containerFontFamily;
4148
+ }
4149
+ });
4150
+ // Use runs as cell content
4151
+ tableCells.push(childRuns);
4152
+ }
4153
+ }
4154
+ if (tableCells.length > 0) {
4155
+ // Create a single-row borderless table to represent horizontal flexbox
4156
+ elements.push({
4157
+ type: "table",
4158
+ rows: [tableCells],
4159
+ hasHeader: false,
4160
+ noBorders: true,
4161
+ });
4162
+ }
4163
+ }
4164
+ else {
4165
+ // Normal inline extraction - not a horizontal flex container
4166
+ const runs = extractInlineRuns(element, cssContext, undefined, containerFontFamily);
4167
+ if (runs.length > 0) {
4168
+ const text = runs.map(r => r.text).join("");
4169
+ // Apply container's styles to runs that don't have their own
4170
+ runs.forEach(run => {
4171
+ if (!run.color && (elementColor || containerColor)) {
4172
+ run.color = elementColor || containerColor;
4173
+ }
4174
+ if (!run.italic && containerItalic) {
4175
+ run.italic = true;
4176
+ }
4177
+ if (!run.bold && containerBold) {
4178
+ run.bold = true;
4179
+ }
4180
+ // GENERALIZED: Apply container's font-family to runs that don't have their own
4181
+ if (!run.fontFamily && containerFontFamily) {
4182
+ run.fontFamily = containerFontFamily;
4183
+ }
4184
+ });
4185
+ // Build paragraph with all extracted styles
4186
+ const hasStyling = hasInlineFormatting(runs) || elementColor || containerColor || spacingAfter !== undefined || lineSpacing !== undefined || textTransform !== undefined;
4187
+ if (hasStyling) {
4188
+ elements.push({
4189
+ type: "paragraph",
4190
+ text,
4191
+ runs: hasInlineFormatting(runs) ? runs : undefined,
4192
+ color: !hasInlineFormatting(runs) ? (elementColor || containerColor) : undefined,
4193
+ alignment,
4194
+ spacingAfter,
4195
+ lineSpacing,
4196
+ textTransform,
4197
+ });
4198
+ }
4199
+ else {
4200
+ elements.push({ type: "paragraph", text, alignment });
4201
+ }
4202
+ }
4203
+ }
4204
+ }
4205
+ else {
4206
+ // Process children individually (block-level content)
4207
+ // Pass elementColor only if this container explicitly sets a color (for special containers)
4208
+ for (const child of element.childNodes) {
4209
+ processNode(child, alignment, elementColor);
4210
+ }
4211
+ }
4212
+ // After processing children, check if this element has border-bottom
4213
+ // (like .title-block which should have a horizontal rule after the heading)
4214
+ const borderBottomColor = extractBorderBottomColor(element, cssContext);
4215
+ if (borderBottomColor) {
4216
+ elements.push({ type: "horizontal-rule", color: borderBottomColor });
4217
+ }
4218
+ return;
4219
+ }
4220
+ const text = getTextContent(element).trim();
4221
+ if (text && element.children.length === 0) {
4222
+ // Check for styling from CSS (background color, italic, etc.)
4223
+ const styles = getElementStyles(element, cssContext);
4224
+ const backgroundColor = styles.backgroundColor ? extractHexColor(styles.backgroundColor) : undefined;
4225
+ const isItalic = styles.fontStyle === "italic";
4226
+ const isBold = styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600";
4227
+ if (backgroundColor || elementColor || isItalic || isBold) {
4228
+ // Has styling - use runs to preserve formatting
4229
+ const runs = [{
4230
+ text,
4231
+ bold: isBold,
4232
+ italic: isItalic,
4233
+ color: elementColor,
4234
+ backgroundColor,
4235
+ }];
4236
+ elements.push({ type: "paragraph", text, runs, alignment });
4237
+ }
4238
+ else {
4239
+ elements.push({ type: "paragraph", text, alignment });
4240
+ }
4241
+ }
4242
+ else {
4243
+ for (const child of element.childNodes) {
4244
+ processNode(child, alignment, elementColor);
4245
+ }
4246
+ }
4247
+ }
4248
+ for (const node of body.childNodes) {
4249
+ processNode(node);
4250
+ }
4251
+ return elements;
4252
+ }
4253
+ //# sourceMappingURL=parse.js.map