docgen-utils 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/bundle.js +42918 -6708
  2. package/dist/bundle.min.js +289 -109
  3. package/dist/cli.js +26450 -1266
  4. package/dist/packages/cli/commands/export-docs.d.ts.map +1 -1
  5. package/dist/packages/cli/commands/export-docs.js +131 -2
  6. package/dist/packages/cli/commands/export-docs.js.map +1 -1
  7. package/dist/packages/cli/commands/export-slides.d.ts.map +1 -1
  8. package/dist/packages/cli/commands/export-slides.js +25 -1
  9. package/dist/packages/cli/commands/export-slides.js.map +1 -1
  10. package/dist/packages/docs/common.d.ts +10 -0
  11. package/dist/packages/docs/common.d.ts.map +1 -1
  12. package/dist/packages/docs/common.js.map +1 -1
  13. package/dist/packages/docs/convert.d.ts.map +1 -1
  14. package/dist/packages/docs/convert.js +246 -218
  15. package/dist/packages/docs/convert.js.map +1 -1
  16. package/dist/packages/docs/create-document.d.ts.map +1 -1
  17. package/dist/packages/docs/create-document.js +43 -3
  18. package/dist/packages/docs/create-document.js.map +1 -1
  19. package/dist/packages/docs/export.d.ts +9 -8
  20. package/dist/packages/docs/export.d.ts.map +1 -1
  21. package/dist/packages/docs/export.js +23 -36
  22. package/dist/packages/docs/export.js.map +1 -1
  23. package/dist/packages/docs/import-docx.d.ts.map +1 -1
  24. package/dist/packages/docs/import-docx.js +397 -7
  25. package/dist/packages/docs/import-docx.js.map +1 -1
  26. package/dist/packages/docs/parse-colors.d.ts +37 -0
  27. package/dist/packages/docs/parse-colors.d.ts.map +1 -0
  28. package/dist/packages/docs/parse-colors.js +507 -0
  29. package/dist/packages/docs/parse-colors.js.map +1 -0
  30. package/dist/packages/docs/parse-css.d.ts +98 -0
  31. package/dist/packages/docs/parse-css.d.ts.map +1 -0
  32. package/dist/packages/docs/parse-css.js +1592 -0
  33. package/dist/packages/docs/parse-css.js.map +1 -0
  34. package/dist/packages/docs/parse-helpers.d.ts +45 -0
  35. package/dist/packages/docs/parse-helpers.d.ts.map +1 -0
  36. package/dist/packages/docs/parse-helpers.js +214 -0
  37. package/dist/packages/docs/parse-helpers.js.map +1 -0
  38. package/dist/packages/docs/parse-inline.d.ts +41 -0
  39. package/dist/packages/docs/parse-inline.d.ts.map +1 -0
  40. package/dist/packages/docs/parse-inline.js +473 -0
  41. package/dist/packages/docs/parse-inline.js.map +1 -0
  42. package/dist/packages/docs/parse-layout.d.ts +57 -0
  43. package/dist/packages/docs/parse-layout.d.ts.map +1 -0
  44. package/dist/packages/docs/parse-layout.js +295 -0
  45. package/dist/packages/docs/parse-layout.js.map +1 -0
  46. package/dist/packages/docs/parse-special.d.ts +51 -0
  47. package/dist/packages/docs/parse-special.d.ts.map +1 -0
  48. package/dist/packages/docs/parse-special.js +251 -0
  49. package/dist/packages/docs/parse-special.js.map +1 -0
  50. package/dist/packages/docs/parse-units.d.ts +68 -0
  51. package/dist/packages/docs/parse-units.d.ts.map +1 -0
  52. package/dist/packages/docs/parse-units.js +275 -0
  53. package/dist/packages/docs/parse-units.js.map +1 -0
  54. package/dist/packages/docs/parse.d.ts.map +1 -1
  55. package/dist/packages/docs/parse.js +957 -2800
  56. package/dist/packages/docs/parse.js.map +1 -1
  57. package/dist/packages/slides/common.d.ts +7 -0
  58. package/dist/packages/slides/common.d.ts.map +1 -1
  59. package/dist/packages/slides/convert.d.ts.map +1 -1
  60. package/dist/packages/slides/convert.js +92 -7
  61. package/dist/packages/slides/convert.js.map +1 -1
  62. package/dist/packages/slides/fonts.d.ts +41 -0
  63. package/dist/packages/slides/fonts.d.ts.map +1 -0
  64. package/dist/packages/slides/fonts.js +209 -0
  65. package/dist/packages/slides/fonts.js.map +1 -0
  66. package/dist/packages/slides/import-pptx.d.ts.map +1 -1
  67. package/dist/packages/slides/import-pptx.js +583 -120
  68. package/dist/packages/slides/import-pptx.js.map +1 -1
  69. package/dist/packages/slides/parse.d.ts.map +1 -1
  70. package/dist/packages/slides/parse.js +724 -91
  71. package/dist/packages/slides/parse.js.map +1 -1
  72. package/dist/packages/slides/transform.d.ts +6 -6
  73. package/dist/packages/slides/transform.d.ts.map +1 -1
  74. package/dist/packages/slides/transform.js +25 -51
  75. package/dist/packages/slides/transform.js.map +1 -1
  76. package/package.json +3 -2
@@ -1,2748 +1,233 @@
1
1
  import { CONTAINER_TAGS } from "./common";
2
+ import { parseCssContext, getElementStyles, extractTextColor, extractBorderColorFromStyle, extractBorderBottomColor, extractBorderTopColor } from "./parse-css";
3
+ import { extractHexColor } from "./parse-colors";
4
+ import { parseCssLengthToTwips, parseCssPaddingToTwips, extractPaddingLeft, parseFontSizeToHalfPoints, parseMarginToTwips, parseLetterSpacingToTwips, parseLineHeightToDocx } from "./parse-units";
5
+ import { parseHeadingLevel, getTextAlignment, getTextContent, BLOCK_LEVEL_TAGS, hasBlockLevelChildren, isBlockquoteOrCallout, extractNestedListItems } from "./parse-helpers";
6
+ import { isInlineOnlyContainer, extractInlineRuns, hasInlineFormatting } from "./parse-inline";
7
+ import { isGridOrFlexContainer, isHorizontalFlexContainer, isDecorativeSvg, isTwoColumnGridLayout, findTwoColumnChildren, detectFlexEqualColumns, detectGridEqualColumns } from "./parse-layout";
8
+ import { detectSkillItem, detectLanguageItem, detectProgressBar, detectTimeline } from "./parse-special";
2
9
  /**
3
- * Remove @media blocks from CSS text by tracking brace nesting.
4
- * This properly handles nested braces within media queries.
10
+ * Parse content from a container element (like sidebar or main content).
11
+ * Handles headings, paragraphs, lists, and nested containers with color inheritance.
5
12
  */
6
- function removeMediaQueries(cssText) {
7
- let result = "";
8
- let i = 0;
9
- while (i < cssText.length) {
10
- // Check for @media at current position
11
- if (cssText.substring(i, i + 6).toLowerCase() === "@media") {
12
- // Find the opening brace
13
- let braceStart = cssText.indexOf("{", i);
14
- if (braceStart === -1) {
15
- // Malformed CSS, include rest as-is
16
- result += cssText.substring(i);
17
- break;
18
- }
19
- // Track brace depth to find matching closing brace
20
- let depth = 1;
21
- let j = braceStart + 1;
22
- while (j < cssText.length && depth > 0) {
23
- if (cssText[j] === "{") {
24
- depth++;
13
+ function parseContainerContent(element, cssContext, inheritedColor) {
14
+ const innerElements = [];
15
+ function processInnerNode(node, color) {
16
+ if (node.nodeType === Node.TEXT_NODE) {
17
+ const text = node.textContent?.trim();
18
+ if (text) {
19
+ if (color) {
20
+ innerElements.push({ type: "paragraph", text, color });
25
21
  }
26
- else if (cssText[j] === "}") {
27
- depth--;
22
+ else {
23
+ innerElements.push({ type: "paragraph", text });
28
24
  }
29
- j++;
30
- }
31
- // Skip the entire @media block (from @media to matching })
32
- i = j;
33
- }
34
- else {
35
- result += cssText[i];
36
- i++;
37
- }
38
- }
39
- return result;
40
- }
41
- /**
42
- * Extract the first color from a CSS gradient value.
43
- * Handles linear-gradient, radial-gradient, and conic-gradient.
44
- * Returns the first color stop (hex, rgb, rgba, hsl, or named color).
45
- *
46
- * Examples:
47
- * "linear-gradient(135deg, #7c3aed, #a78bfa, #c084fc)" -> "#7c3aed"
48
- * "linear-gradient(to right, red, blue)" -> "red"
49
- * "radial-gradient(circle, rgb(124, 58, 237), purple)" -> "rgb(124, 58, 237)"
50
- */
51
- function extractFirstGradientColor(value) {
52
- if (!value)
53
- return undefined;
54
- // Check if it's a gradient
55
- const gradientMatch = value.match(/(?:linear|radial|conic)-gradient\s*\(([^)]+)\)/i);
56
- if (!gradientMatch)
57
- return undefined;
58
- const gradientContent = gradientMatch[1];
59
- // Split by commas, but handle rgb/rgba/hsl/hsla which contain commas
60
- // Find color values after the direction/angle part
61
- const parts = [];
62
- let current = "";
63
- let parenDepth = 0;
64
- for (let i = 0; i < gradientContent.length; i++) {
65
- const char = gradientContent[i];
66
- if (char === "(") {
67
- parenDepth++;
68
- current += char;
69
- }
70
- else if (char === ")") {
71
- parenDepth--;
72
- current += char;
73
- }
74
- else if (char === "," && parenDepth === 0) {
75
- parts.push(current.trim());
76
- current = "";
77
- }
78
- else {
79
- current += char;
80
- }
81
- }
82
- if (current.trim()) {
83
- parts.push(current.trim());
84
- }
85
- // Direction/angle keywords to skip
86
- const directionKeywords = /^(?:\d+deg|to\s+(?:top|bottom|left|right|top\s+left|top\s+right|bottom\s+left|bottom\s+right)|circle|ellipse|at\s+|closest-side|farthest-side|closest-corner|farthest-corner)/i;
87
- // Color patterns
88
- const colorPattern = /^(?:#[0-9a-fA-F]{3,8}|rgba?\s*\([^)]+\)|hsla?\s*\([^)]+\)|[a-zA-Z]+)(?:\s+\d+%)?$/i;
89
- for (const part of parts) {
90
- // Skip direction/angle specifications
91
- if (directionKeywords.test(part))
92
- continue;
93
- // Check if this looks like a color (possibly with a percentage stop position)
94
- if (colorPattern.test(part)) {
95
- // Extract just the color part (remove percentage if present)
96
- const colorOnly = part.replace(/\s+\d+%$/, "").trim();
97
- return colorOnly;
98
- }
99
- }
100
- return undefined;
101
- }
102
- /**
103
- * Parse a CSS gradient value into a TextGradient structure.
104
- * Extracts angle and all color stops for use in DOCX gradient fills.
105
- *
106
- * Examples:
107
- * "linear-gradient(135deg, #7c3aed, #a78bfa, #c084fc)" ->
108
- * { angle: 135, stops: [{ color: "7C3AED", position: 0 }, { color: "A78BFA", position: 50 }, { color: "C084FC", position: 100 }] }
109
- */
110
- function parseGradient(value) {
111
- if (!value)
112
- return undefined;
113
- // Check if it's a linear gradient (only linear gradients are supported in DOCX)
114
- const gradientMatch = value.match(/linear-gradient\s*\(([^)]+)\)/i);
115
- if (!gradientMatch)
116
- return undefined;
117
- const gradientContent = gradientMatch[1];
118
- // Split by commas, but handle rgb/rgba/hsl/hsla which contain commas
119
- const parts = [];
120
- let current = "";
121
- let parenDepth = 0;
122
- for (let i = 0; i < gradientContent.length; i++) {
123
- const char = gradientContent[i];
124
- if (char === "(") {
125
- parenDepth++;
126
- current += char;
127
- }
128
- else if (char === ")") {
129
- parenDepth--;
130
- current += char;
131
- }
132
- else if (char === "," && parenDepth === 0) {
133
- parts.push(current.trim());
134
- current = "";
135
- }
136
- else {
137
- current += char;
138
- }
139
- }
140
- if (current.trim()) {
141
- parts.push(current.trim());
142
- }
143
- // Extract angle from first part if it's a direction
144
- let angle = 180; // Default: top to bottom
145
- let colorStartIndex = 0;
146
- if (parts.length > 0) {
147
- const firstPart = parts[0];
148
- // Check for angle in degrees
149
- const degMatch = firstPart.match(/^(\d+)deg$/i);
150
- if (degMatch) {
151
- angle = parseInt(degMatch[1], 10);
152
- colorStartIndex = 1;
153
- }
154
- else if (firstPart.match(/^to\s+/i)) {
155
- // Convert direction keywords to angles
156
- const direction = firstPart.toLowerCase();
157
- if (direction.includes("right") && direction.includes("bottom"))
158
- angle = 135;
159
- else if (direction.includes("right") && direction.includes("top"))
160
- angle = 45;
161
- else if (direction.includes("left") && direction.includes("bottom"))
162
- angle = 225;
163
- else if (direction.includes("left") && direction.includes("top"))
164
- angle = 315;
165
- else if (direction.includes("right"))
166
- angle = 90;
167
- else if (direction.includes("left"))
168
- angle = 270;
169
- else if (direction.includes("bottom"))
170
- angle = 180;
171
- else if (direction.includes("top"))
172
- angle = 0;
173
- colorStartIndex = 1;
174
- }
175
- }
176
- // Extract color stops
177
- const colorParts = parts.slice(colorStartIndex);
178
- if (colorParts.length === 0)
179
- return undefined;
180
- const stops = [];
181
- const colorPattern = /^(#[0-9a-fA-F]{3,8}|rgba?\s*\([^)]+\)|hsla?\s*\([^)]+\)|[a-zA-Z]+)(?:\s+(\d+)%)?$/i;
182
- for (let i = 0; i < colorParts.length; i++) {
183
- const part = colorParts[i];
184
- const match = part.match(colorPattern);
185
- if (match) {
186
- let color = match[1];
187
- // Convert color to hex without #
188
- color = normalizeColorToHex(color);
189
- // Get position (default: evenly distributed)
190
- let position;
191
- if (match[2]) {
192
- position = parseInt(match[2], 10);
193
- }
194
- else {
195
- // Evenly distribute if no position specified
196
- position = colorParts.length === 1 ? 0 : (i / (colorParts.length - 1)) * 100;
197
25
  }
198
- stops.push({ color, position: Math.round(position) });
199
- }
200
- }
201
- if (stops.length < 2)
202
- return undefined;
203
- return { angle, stops };
204
- }
205
- /**
206
- * Normalize a CSS color value to hex format (without #).
207
- * Handles hex (#RGB, #RRGGBB), rgb(), rgba(), and named colors.
208
- */
209
- function normalizeColorToHex(color) {
210
- color = color.trim();
211
- // Already hex
212
- if (color.startsWith("#")) {
213
- let hex = color.slice(1).toUpperCase();
214
- // Expand shorthand (#RGB -> #RRGGBB)
215
- if (hex.length === 3) {
216
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
217
- }
218
- // Strip alpha if present (#RRGGBBAA -> #RRGGBB)
219
- if (hex.length === 8) {
220
- hex = hex.slice(0, 6);
221
- }
222
- return hex;
223
- }
224
- // RGB/RGBA
225
- const rgbMatch = color.match(/rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)/i);
226
- if (rgbMatch) {
227
- const r = parseInt(rgbMatch[1], 10).toString(16).padStart(2, "0");
228
- const g = parseInt(rgbMatch[2], 10).toString(16).padStart(2, "0");
229
- const b = parseInt(rgbMatch[3], 10).toString(16).padStart(2, "0");
230
- return (r + g + b).toUpperCase();
231
- }
232
- // Named colors (common ones)
233
- const namedColors = {
234
- red: "FF0000", green: "008000", blue: "0000FF", white: "FFFFFF", black: "000000",
235
- yellow: "FFFF00", cyan: "00FFFF", magenta: "FF00FF", orange: "FFA500", purple: "800080",
236
- pink: "FFC0CB", gray: "808080", grey: "808080", navy: "000080", teal: "008080",
237
- maroon: "800000", olive: "808000", lime: "00FF00", aqua: "00FFFF", silver: "C0C0C0",
238
- fuchsia: "FF00FF", transparent: "FFFFFF",
239
- };
240
- const lowerColor = color.toLowerCase();
241
- if (namedColors[lowerColor]) {
242
- return namedColors[lowerColor];
243
- }
244
- // Fallback: return as-is (uppercase)
245
- return color.toUpperCase().replace("#", "");
246
- }
247
- /**
248
- * Extract the primary (first) font name from a CSS font-family value.
249
- * Handles font stacks like "'Source Sans Pro', -apple-system, sans-serif"
250
- * Returns the first non-generic font name, cleaned of quotes.
251
- *
252
- * Generic fonts (sans-serif, serif, monospace, cursive, fantasy, system-ui) are skipped
253
- * unless they're the only option.
254
- */
255
- function extractPrimaryFont(fontFamily) {
256
- if (!fontFamily)
257
- return undefined;
258
- // Split by comma, handling quoted font names
259
- const fonts = [];
260
- let current = "";
261
- let inQuote = false;
262
- let quoteChar = "";
263
- for (let i = 0; i < fontFamily.length; i++) {
264
- const char = fontFamily[i];
265
- if ((char === '"' || char === "'") && !inQuote) {
266
- inQuote = true;
267
- quoteChar = char;
268
- }
269
- else if (char === quoteChar && inQuote) {
270
- inQuote = false;
271
- quoteChar = "";
272
- }
273
- else if (char === "," && !inQuote) {
274
- const trimmed = current.trim().replace(/^['"]|['"]$/g, "");
275
- if (trimmed)
276
- fonts.push(trimmed);
277
- current = "";
278
- }
279
- else {
280
- current += char;
26
+ return;
281
27
  }
282
- }
283
- // Add last font
284
- const trimmed = current.trim().replace(/^['"]|['"]$/g, "");
285
- if (trimmed)
286
- fonts.push(trimmed);
287
- // Generic font families to skip
288
- const genericFonts = new Set([
289
- "sans-serif", "serif", "monospace", "cursive", "fantasy", "system-ui",
290
- "-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", "Helvetica Neue",
291
- "Arial", "Helvetica", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans",
292
- "Noto Sans", "sans-serif", "Oxygen", "Open Sans",
293
- ]);
294
- // Find first non-generic font
295
- for (const font of fonts) {
296
- if (!genericFonts.has(font) && !font.startsWith("-")) {
297
- return font;
28
+ if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
29
+ return;
298
30
  }
299
- }
300
- // If all fonts are generic, return the first one
301
- return fonts[0];
302
- }
303
- /**
304
- * Parse CSS variables and class color rules from a style element.
305
- */
306
- function parseCssContext(doc) {
307
- const variables = new Map();
308
- const classColors = new Map();
309
- const calloutStyles = new Map();
310
- const classStyles = new Map();
311
- const elementStyles = new Map();
312
- const nestedStyles = new Map();
313
- // Helper to resolve CSS variable or return value as-is
314
- // Handles multiple var() references in a single value (e.g., "var(--a) var(--b)")
315
- const resolveValue = (value) => {
316
- // Replace all var() references with their resolved values
317
- return value.replace(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/g, (match, varName) => {
318
- const resolved = variables.get(varName);
319
- return resolved || match; // Keep original if not found
320
- });
321
- };
322
- // Find all style elements
323
- const styleElements = doc.querySelectorAll("style");
324
- for (const styleEl of styleElements) {
325
- let cssText = styleEl.textContent || "";
326
- // Remove media queries to avoid mobile-specific styles overriding desktop defaults
327
- // Use a function-based approach to correctly handle nested braces
328
- cssText = removeMediaQueries(cssText);
329
- // Extract CSS variables from :root { --name: value; }
330
- const rootMatch = cssText.match(/:root\s*\{([^}]+)\}/);
331
- if (rootMatch) {
332
- const rootContent = rootMatch[1];
333
- const varMatches = rootContent.matchAll(/--([a-zA-Z0-9-]+)\s*:\s*([^;]+)/g);
334
- for (const match of varMatches) {
335
- const varName = `--${match[1]}`;
336
- const varValue = match[2].trim();
337
- variables.set(varName, varValue);
31
+ const el = node;
32
+ const tagName = el.tagName.toLowerCase();
33
+ // Extract color from this element
34
+ const elementColor = extractTextColor(el, cssContext) || color;
35
+ // Skip decorative SVGs
36
+ if (tagName === "svg") {
37
+ const parent = el.parentElement || el;
38
+ if (isDecorativeSvg(el, parent, cssContext)) {
39
+ return;
338
40
  }
339
41
  }
340
- // Also extract CSS variables from theme classes (e.g., .theme-professional)
341
- // These are commonly used to scope CSS variables to body elements
342
- const themeClassMatches = cssText.matchAll(/\.theme-[a-zA-Z0-9_-]+\s*\{([^}]+)\}/g);
343
- for (const themeMatch of themeClassMatches) {
344
- const themeContent = themeMatch[1];
345
- const varMatches = themeContent.matchAll(/--([a-zA-Z0-9-]+)\s*:\s*([^;]+)/g);
346
- for (const match of varMatches) {
347
- const varName = `--${match[1]}`;
348
- const varValue = match[2].trim();
349
- // Only set if not already defined (prefer :root values)
350
- if (!variables.has(varName)) {
351
- variables.set(varName, varValue);
352
- }
42
+ // Handle headings
43
+ const headingLevel = parseHeadingLevel(tagName);
44
+ if (headingLevel !== null) {
45
+ const text = getTextContent(el).trim();
46
+ if (text) {
47
+ innerElements.push({ type: "heading", level: headingLevel, text, color: elementColor });
353
48
  }
49
+ return;
354
50
  }
355
- // Extract class color rules: .classname { color: value; }
356
- // NOTE: Use negative lookbehind to exclude nested selectors like ".parent .child { ... }"
357
- // The (?<!\S\s+) ensures we only match top-level class selectors
358
- const classRuleMatches = cssText.matchAll(/(?:^|[;\n}])\s*\.([a-zA-Z0-9_-]+)\s*\{([^}]+)\}/gm);
359
- for (const match of classRuleMatches) {
360
- const fullMatch = match[0];
361
- const className = match[1];
362
- const ruleContent = match[2];
363
- // Skip if this looks like a nested selector (has another class before it)
364
- // Check if the match is preceded by another class selector on the same line
365
- const precedingText = cssText.substring(0, match.index).split('\n').pop() || '';
366
- if (precedingText.match(/\.[a-zA-Z0-9_-]+\s*$/)) {
367
- // This is a nested selector like ".parent .child { ... }" - skip it here
368
- // Nested selectors will be handled by storing parent-child relationships
369
- continue;
370
- }
371
- // Extract ALL style properties for this class (generalized approach)
372
- const style = {};
373
- // Text color
374
- const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
375
- if (colorMatch) {
376
- const colorValue = resolveValue(colorMatch[1].trim());
377
- classColors.set(className, colorValue);
378
- style.color = colorValue;
379
- }
380
- // Background color
381
- const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
382
- if (bgMatch) {
383
- const bgValue = resolveValue(bgMatch[1].trim());
384
- // For gradient backgrounds, extract the first color as fallback for DOCX
385
- // Also store the full gradient for containers that support gradient rendering
386
- const gradientColor = extractFirstGradientColor(bgValue);
387
- if (gradientColor) {
388
- style.backgroundColor = gradientColor;
389
- // Parse full gradient for container backgrounds (not text gradients)
390
- const gradient = parseGradient(bgValue);
391
- if (gradient) {
392
- style.backgroundGradient = gradient;
393
- }
394
- }
395
- else {
396
- style.backgroundColor = bgValue;
397
- }
398
- }
399
- // Gradient text detection: when background-clip: text is used with a gradient background,
400
- // extract the first color from the gradient as fallback and also store full gradient
401
- const hasBackgroundClipText = ruleContent.match(/(?:-webkit-)?background-clip\s*:\s*text/i);
402
- if (hasBackgroundClipText && bgMatch) {
403
- const bgValue = resolveValue(bgMatch[1].trim());
404
- // Parse full gradient
405
- const gradient = parseGradient(bgValue);
406
- if (gradient) {
407
- style.gradient = gradient;
408
- // Also set fallback color from first stop
409
- if (!style.color) {
410
- style.color = gradient.stops[0]?.color;
411
- if (style.color) {
412
- classColors.set(className, style.color);
51
+ // Handle paragraphs
52
+ if (tagName === "p") {
53
+ const runs = extractInlineRuns(el, cssContext);
54
+ if (runs.length > 0) {
55
+ const text = runs.map((r) => r.text).join("");
56
+ // Apply inherited color to runs that don't have their own color
57
+ if (elementColor) {
58
+ runs.forEach(run => {
59
+ if (!run.color) {
60
+ run.color = elementColor;
413
61
  }
414
- }
62
+ });
415
63
  }
416
- else {
417
- // Fallback: extract first color if gradient parsing fails
418
- const gradientColor = extractFirstGradientColor(bgValue);
419
- if (gradientColor && !style.color) {
420
- style.color = gradientColor;
421
- classColors.set(className, gradientColor);
422
- }
64
+ if (hasInlineFormatting(runs)) {
65
+ innerElements.push({ type: "paragraph", text, runs });
423
66
  }
424
- }
425
- // Border color
426
- const borderColorMatch = ruleContent.match(/border-color\s*:\s*([^;]+)/i);
427
- if (borderColorMatch) {
428
- style.borderColor = resolveValue(borderColorMatch[1].trim());
429
- }
430
- // Border shorthand (e.g., "1px solid #e5e7eb")
431
- const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
432
- if (borderMatch) {
433
- style.border = resolveValue(borderMatch[1].trim());
434
- }
435
- // Border-left (used by callouts: "4px solid #2563eb")
436
- const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
437
- if (borderLeftMatch) {
438
- const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
439
- // If it contains a color, extract it as borderColor
440
- const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
441
- if (colorInBorder && !style.borderColor) {
442
- style.borderColor = `#${colorInBorder[1]}`;
67
+ else if (elementColor) {
68
+ innerElements.push({ type: "paragraph", text, color: elementColor });
443
69
  }
444
- // Store the full border-left value
445
- style.borderLeft = borderLeftValue;
446
- style.border = style.border || borderLeftValue;
447
- }
448
- // Border-right (used by sidebar dividers, etc.)
449
- const borderRightMatch = ruleContent.match(/border-right\s*:\s*([^;]+)/i);
450
- if (borderRightMatch) {
451
- style.borderRight = resolveValue(borderRightMatch[1].trim());
452
- }
453
- // Border-bottom (used by title blocks with divider lines)
454
- const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
455
- if (borderBottomMatch) {
456
- style.borderBottom = resolveValue(borderBottomMatch[1].trim());
457
- }
458
- // Border-top (used by footer sections with top divider lines)
459
- const borderTopMatch = ruleContent.match(/border-top\s*:\s*([^;]+)/i);
460
- if (borderTopMatch) {
461
- style.borderTop = resolveValue(borderTopMatch[1].trim());
462
- }
463
- // Display property (grid, flex, block, etc.)
464
- const displayMatch = ruleContent.match(/display\s*:\s*([^;]+)/i);
465
- if (displayMatch) {
466
- style.display = resolveValue(displayMatch[1].trim());
467
- }
468
- // Flex property (for flex item sizing like "flex: 1")
469
- const flexMatch = ruleContent.match(/(?:^|;)\s*flex\s*:\s*([^;]+)/i);
470
- if (flexMatch) {
471
- style.flex = resolveValue(flexMatch[1].trim());
472
- }
473
- // Flex-direction property (for horizontal vs vertical flex containers)
474
- const flexDirectionMatch = ruleContent.match(/flex-direction\s*:\s*([^;]+)/i);
475
- if (flexDirectionMatch) {
476
- style.flexDirection = resolveValue(flexDirectionMatch[1].trim());
477
- }
478
- // Flex-wrap property (for wrapping behavior)
479
- const flexWrapMatch = ruleContent.match(/flex-wrap\s*:\s*([^;]+)/i);
480
- if (flexWrapMatch) {
481
- style.flexWrap = resolveValue(flexWrapMatch[1].trim());
482
- }
483
- // Gap property (for flex/grid spacing)
484
- const gapMatch = ruleContent.match(/(?:^|;)\s*gap\s*:\s*([^;]+)/i);
485
- if (gapMatch) {
486
- style.gap = resolveValue(gapMatch[1].trim());
487
- }
488
- // Grid template columns (for two-column layout detection)
489
- const gridColsMatch = ruleContent.match(/grid-template-columns\s*:\s*([^;]+)/i);
490
- if (gridColsMatch) {
491
- style.gridTemplateColumns = resolveValue(gridColsMatch[1].trim());
492
- }
493
- // Text alignment
494
- const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
495
- if (textAlignMatch) {
496
- style.textAlign = resolveValue(textAlignMatch[1].trim());
497
- }
498
- // Font size
499
- const fontSizeMatch = ruleContent.match(/font-size\s*:\s*([^;]+)/i);
500
- if (fontSizeMatch) {
501
- style.fontSize = resolveValue(fontSizeMatch[1].trim());
502
- }
503
- // Font weight
504
- const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
505
- if (fontWeightMatch) {
506
- style.fontWeight = resolveValue(fontWeightMatch[1].trim());
507
- }
508
- // Padding
509
- const paddingMatch = ruleContent.match(/padding\s*:\s*([^;]+)/i);
510
- if (paddingMatch) {
511
- style.padding = resolveValue(paddingMatch[1].trim());
512
- }
513
- // Font style (italic, normal)
514
- const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
515
- if (fontStyleMatch) {
516
- style.fontStyle = resolveValue(fontStyleMatch[1].trim());
517
- }
518
- // Font family (extract primary font from font stack)
519
- const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
520
- if (fontFamilyMatch) {
521
- const resolved = resolveValue(fontFamilyMatch[1].trim());
522
- const primaryFont = extractPrimaryFont(resolved);
523
- if (primaryFont) {
524
- style.fontFamily = primaryFont;
70
+ else {
71
+ innerElements.push({ type: "paragraph", text });
525
72
  }
526
73
  }
527
- // Text indent (e.g., "2rem", "-2rem" for hanging indents)
528
- const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
529
- if (textIndentMatch) {
530
- style.textIndent = resolveValue(textIndentMatch[1].trim());
531
- }
532
- // Text transform (uppercase, lowercase, capitalize)
533
- const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
534
- if (textTransformMatch) {
535
- style.textTransform = resolveValue(textTransformMatch[1].trim());
536
- }
537
- // GENERALIZED: Margin-bottom (for paragraph spacing)
538
- const marginBottomMatch = ruleContent.match(/margin-bottom\s*:\s*([^;]+)/i);
539
- if (marginBottomMatch) {
540
- style.marginBottom = resolveValue(marginBottomMatch[1].trim());
541
- }
542
- // GENERALIZED: Line-height (for vertical spacing within text)
543
- const lineHeightMatch = ruleContent.match(/line-height\s*:\s*([^;]+)/i);
544
- if (lineHeightMatch) {
545
- style.lineHeight = resolveValue(lineHeightMatch[1].trim());
546
- }
547
- // Store if we found any properties
548
- // MERGE with existing styles (later rules can add properties without overwriting)
549
- if (Object.keys(style).length > 0) {
550
- const existing = classStyles.get(className) || {};
551
- classStyles.set(className, { ...existing, ...style });
552
- }
74
+ return;
553
75
  }
554
- // Parse element.class combined selectors like "dd.dish-description { font-style: italic; }"
555
- // These are more specific than just .class, but we store under the class name
556
- // since getElementStyles already verifies element type separately
557
- const elementClassMatches = cssText.matchAll(/(?:^|[;\n}])\s*([a-zA-Z][a-zA-Z0-9]*)\s*\.\s*([a-zA-Z0-9_-]+)\s*\{([^}]+)\}/gm);
558
- for (const match of elementClassMatches) {
559
- const elementName = match[1].toLowerCase();
560
- const className = match[2];
561
- const ruleContent = match[3];
562
- // Extract style properties (same as class rules above)
563
- const style = {};
564
- // Font style (italic, normal)
565
- const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
566
- if (fontStyleMatch) {
567
- style.fontStyle = resolveValue(fontStyleMatch[1].trim());
568
- }
569
- // Font weight
570
- const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
571
- if (fontWeightMatch) {
572
- style.fontWeight = resolveValue(fontWeightMatch[1].trim());
573
- }
574
- // Text color
575
- const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
576
- if (colorMatch) {
577
- const colorValue = resolveValue(colorMatch[1].trim());
578
- style.color = colorValue;
579
- }
580
- // Background color
581
- const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
582
- if (bgMatch) {
583
- const bgValue = resolveValue(bgMatch[1].trim());
584
- // For gradient backgrounds, extract the first color as fallback for DOCX
585
- // Also store the full gradient for containers that support gradient rendering
586
- const gradientColor = extractFirstGradientColor(bgValue);
587
- if (gradientColor) {
588
- style.backgroundColor = gradientColor;
589
- // Parse full gradient for container backgrounds
590
- const gradient = parseGradient(bgValue);
591
- if (gradient) {
592
- style.backgroundGradient = gradient;
76
+ // Handle lists
77
+ if (tagName === "ul" || tagName === "ol") {
78
+ const items = [];
79
+ for (const child of el.children) {
80
+ if (child.tagName.toLowerCase() === "li") {
81
+ const runs = extractInlineRuns(child, cssContext);
82
+ if (runs.length > 0) {
83
+ // Apply color to runs
84
+ if (elementColor) {
85
+ runs.forEach(run => {
86
+ if (!run.color) {
87
+ run.color = elementColor;
88
+ }
89
+ });
90
+ }
91
+ if (hasInlineFormatting(runs)) {
92
+ items.push(runs);
93
+ }
94
+ else {
95
+ items.push(runs.map((r) => r.text).join(""));
96
+ }
593
97
  }
594
98
  }
595
- else {
596
- style.backgroundColor = bgValue;
597
- }
598
- }
599
- // Text alignment
600
- const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
601
- if (textAlignMatch) {
602
- style.textAlign = resolveValue(textAlignMatch[1].trim());
603
- }
604
- // Font family (extract primary font from font stack)
605
- const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
606
- if (fontFamilyMatch) {
607
- const resolved = resolveValue(fontFamilyMatch[1].trim());
608
- const primaryFont = extractPrimaryFont(resolved);
609
- if (primaryFont) {
610
- style.fontFamily = primaryFont;
611
- }
612
99
  }
613
- // Store under the class name (merge with existing)
614
- if (Object.keys(style).length > 0) {
615
- const existing = classStyles.get(className) || {};
616
- classStyles.set(className, { ...existing, ...style });
100
+ if (items.length > 0) {
101
+ innerElements.push({ type: "list", ordered: tagName === "ol", items });
617
102
  }
103
+ return;
618
104
  }
619
- // Parse nested CSS selectors like ".parent .child { color: ... }" or ".parent element { color: ... }"
620
- // Also handles "element.parent .child { ... }" like "figure.menu-image .image-placeholder { ... }"
621
- // Store the parent-child relationship so we can look up styles based on context
622
- // Supports: .works-cited p { text-indent: -2rem; } or .callout .callout-label { color: ... }
623
- // Extended pattern to match:
624
- // - .parentClass .child { ... }
625
- // - element.parentClass .child { ... }
626
- const nestedSelectorMatches = cssText.matchAll(/(?:[a-zA-Z0-9_-]*)?\.([a-zA-Z0-9_-]+)\s+(\.?[a-zA-Z0-9_-]+)\s*\{([^}]+)\}/g);
627
- for (const match of nestedSelectorMatches) {
628
- const parentClass = match[1];
629
- let childSelector = match[2];
630
- // Remove leading dot if present (for .class selectors)
631
- if (childSelector.startsWith('.')) {
632
- childSelector = childSelector.slice(1);
633
- }
634
- const ruleContent = match[3];
635
- // Extract style properties for the child when inside this parent
636
- const style = {};
637
- // Text color
638
- const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
639
- if (colorMatch) {
640
- style.color = resolveValue(colorMatch[1].trim());
641
- }
642
- // Background color
643
- const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
644
- if (bgMatch) {
645
- const bgValue = resolveValue(bgMatch[1].trim());
646
- // For gradient backgrounds, extract the first color as fallback for DOCX
647
- // Also store the full gradient for containers that support gradient rendering
648
- const gradientColor = extractFirstGradientColor(bgValue);
649
- if (gradientColor) {
650
- style.backgroundColor = gradientColor;
651
- // Parse full gradient for container backgrounds
652
- const gradient = parseGradient(bgValue);
653
- if (gradient) {
654
- style.backgroundGradient = gradient;
105
+ // Handle tables
106
+ if (tagName === "table") {
107
+ const rows = [];
108
+ for (const tr of el.querySelectorAll("tr")) {
109
+ const cells = [];
110
+ for (const cell of tr.querySelectorAll("td, th")) {
111
+ const runs = extractInlineRuns(cell, cssContext);
112
+ if (runs.length > 0) {
113
+ if (hasInlineFormatting(runs)) {
114
+ cells.push(runs);
115
+ }
116
+ else {
117
+ cells.push(runs.map((r) => r.text).join(""));
118
+ }
119
+ }
120
+ else {
121
+ cells.push("");
655
122
  }
656
123
  }
657
- else {
658
- style.backgroundColor = bgValue;
124
+ if (cells.length > 0) {
125
+ rows.push(cells);
659
126
  }
660
127
  }
661
- // Gradient text detection: when background-clip: text is used with a gradient background,
662
- // extract the first color from the gradient as fallback and also store full gradient
663
- const hasBackgroundClipText = ruleContent.match(/(?:-webkit-)?background-clip\s*:\s*text/i);
664
- if (hasBackgroundClipText && bgMatch) {
665
- const bgValue = resolveValue(bgMatch[1].trim());
666
- // Parse full gradient
667
- const gradient = parseGradient(bgValue);
668
- if (gradient) {
669
- style.gradient = gradient;
670
- // Also set fallback color from first stop
671
- if (!style.color) {
672
- style.color = gradient.stops[0]?.color;
128
+ if (rows.length > 0) {
129
+ // Extract cell padding from CSS (th, td selectors)
130
+ let cellPadding;
131
+ if (cssContext) {
132
+ // Try td first, then th (they usually have the same padding)
133
+ const tdStyle = cssContext.elementStyles.get("td");
134
+ const thStyle = cssContext.elementStyles.get("th");
135
+ const paddingStr = tdStyle?.padding || thStyle?.padding;
136
+ if (paddingStr) {
137
+ cellPadding = parseCssPaddingToTwips(paddingStr);
673
138
  }
674
- }
675
- else {
676
- // Fallback: extract first color if gradient parsing fails
677
- const gradientColor = extractFirstGradientColor(bgValue);
678
- if (gradientColor && !style.color) {
679
- style.color = gradientColor;
139
+ // Also check individual padding properties (padding-top, padding-right, etc.)
140
+ // These override the shorthand if set
141
+ const paddingSource = tdStyle || thStyle;
142
+ if (paddingSource) {
143
+ if (!cellPadding)
144
+ cellPadding = {};
145
+ if (paddingSource.paddingTop) {
146
+ const twips = parseMarginToTwips(paddingSource.paddingTop);
147
+ if (twips !== undefined)
148
+ cellPadding.top = twips;
149
+ }
150
+ if (paddingSource.paddingRight) {
151
+ const twips = parseMarginToTwips(paddingSource.paddingRight);
152
+ if (twips !== undefined)
153
+ cellPadding.right = twips;
154
+ }
155
+ if (paddingSource.paddingBottom) {
156
+ const twips = parseMarginToTwips(paddingSource.paddingBottom);
157
+ if (twips !== undefined)
158
+ cellPadding.bottom = twips;
159
+ }
160
+ if (paddingSource.paddingLeft) {
161
+ const twips = parseMarginToTwips(paddingSource.paddingLeft);
162
+ if (twips !== undefined)
163
+ cellPadding.left = twips;
164
+ }
680
165
  }
681
166
  }
167
+ innerElements.push({ type: "table", rows, cellPadding });
682
168
  }
683
- // Font weight
684
- const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
685
- if (fontWeightMatch) {
686
- style.fontWeight = resolveValue(fontWeightMatch[1].trim());
687
- }
688
- // Text indent (for nested hanging indents like .works-cited p)
689
- const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
690
- if (textIndentMatch) {
691
- style.textIndent = resolveValue(textIndentMatch[1].trim());
692
- }
693
- // Font style
694
- const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
695
- if (fontStyleMatch) {
696
- style.fontStyle = resolveValue(fontStyleMatch[1].trim());
697
- }
698
- // Font family (extract primary font from font stack)
699
- const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
700
- if (fontFamilyMatch) {
701
- const resolved = resolveValue(fontFamilyMatch[1].trim());
702
- const primaryFont = extractPrimaryFont(resolved);
703
- if (primaryFont) {
704
- style.fontFamily = primaryFont;
169
+ return;
170
+ }
171
+ // Detect skill item pattern: container with name + percentage
172
+ // This handles divs containing "Skill Name" + "95%" on separate lines
173
+ if (tagName === "div") {
174
+ const skillItem = detectSkillItem(el);
175
+ if (skillItem) {
176
+ // Output as single line: "Skill Name: 95%"
177
+ const text = `${skillItem.name}: ${skillItem.percentage}`;
178
+ if (elementColor) {
179
+ innerElements.push({ type: "paragraph", text, color: elementColor });
705
180
  }
706
- }
707
- // Border (full shorthand, e.g., "1px solid #e5e7eb")
708
- const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
709
- if (borderMatch) {
710
- style.border = resolveValue(borderMatch[1].trim());
711
- }
712
- // Border-left (callout styling)
713
- const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
714
- if (borderLeftMatch) {
715
- const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
716
- style.borderLeft = borderLeftValue;
717
- style.border = style.border || borderLeftValue;
718
- // Extract color from border value
719
- const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
720
- if (colorInBorder && !style.borderColor) {
721
- style.borderColor = `#${colorInBorder[1]}`;
181
+ else {
182
+ innerElements.push({ type: "paragraph", text });
722
183
  }
184
+ return;
723
185
  }
724
- // Border-bottom (heading underline styling, e.g., h2 { border-bottom: 3px solid #7c3aed; })
725
- const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
726
- if (borderBottomMatch) {
727
- const borderBottomValue = resolveValue(borderBottomMatch[1].trim());
728
- style.borderBottom = borderBottomValue;
729
- // Extract color from border value
730
- const colorInBorder = borderBottomValue.match(/#([0-9a-fA-F]{3,6})/);
731
- if (colorInBorder && !style.borderColor) {
732
- style.borderColor = `#${colorInBorder[1]}`;
186
+ // Detect language item pattern: language name + proficiency dots
187
+ const languageItem = detectLanguageItem(el, cssContext);
188
+ if (languageItem) {
189
+ // Create visual representation with filled and empty dots
190
+ // = filled, ○ = empty
191
+ const filledDots = "●".repeat(languageItem.filledCount);
192
+ const emptyDots = "○".repeat(languageItem.totalCount - languageItem.filledCount);
193
+ const text = `${languageItem.name}: ${filledDots}${emptyDots}`;
194
+ if (elementColor) {
195
+ innerElements.push({ type: "paragraph", text, color: elementColor });
733
196
  }
734
- }
735
- // Text-transform (uppercase, lowercase, capitalize for headings, labels, etc.)
736
- const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
737
- if (textTransformMatch) {
738
- style.textTransform = resolveValue(textTransformMatch[1].trim());
739
- }
740
- // Store in nestedStyles: parentClass -> (childSelector -> style)
741
- if (Object.keys(style).length > 0) {
742
- if (!nestedStyles.has(parentClass)) {
743
- nestedStyles.set(parentClass, new Map());
197
+ else {
198
+ innerElements.push({ type: "paragraph", text });
744
199
  }
745
- const existing = nestedStyles.get(parentClass).get(childSelector) || {};
746
- nestedStyles.get(parentClass).set(childSelector, { ...existing, ...style });
200
+ return;
747
201
  }
748
202
  }
749
- // Parse element type selectors (body, p, h1, h2, h3, h4, h5, h6, etc.)
750
- // These are rules like: body { color: var(--color-text); }
751
- // or grouped rules like: h1, h2, h3, h4, h5, h6 { color: var(--color-heading); }
752
- // IMPORTANT: Only match STANDALONE element selectors, not selectors like ".class p" or "#id p"
753
- // The (?:^|[,\s]) lookbehind ensures we're not part of a complex selector
754
- // We need to be careful to not match ".timeline-content p" as just "p"
755
- const elementSelectorPattern = /(?:^|[\n\r])(\s*(?:body|p|h[1-6]|span|div|ul|ol|li|table|th|td|blockquote|section|article|aside|nav|header|footer|figure|figcaption|address|abbr)(?:\s*,\s*(?:body|p|h[1-6]|span|div|ul|ol|li|table|th|td|blockquote|section|article|aside|nav|header|footer|figure|figcaption|address|abbr))*)\s*\{([^}]+)\}/gi;
756
- const elementMatches = cssText.matchAll(elementSelectorPattern);
757
- for (const match of elementMatches) {
758
- const selectorList = match[1];
759
- const ruleContent = match[2];
760
- // Split comma-separated selectors
761
- const selectors = selectorList.split(/\s*,\s*/).map(s => s.trim().toLowerCase());
762
- // Extract style properties
763
- const style = {};
764
- // Text color
765
- const colorMatch = ruleContent.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
766
- if (colorMatch) {
767
- style.color = resolveValue(colorMatch[1].trim());
203
+ // Container tags - recurse into children
204
+ if (CONTAINER_TAGS.includes(tagName)) {
205
+ for (const child of el.childNodes) {
206
+ processInnerNode(child, elementColor);
768
207
  }
769
- // Background color
770
- const bgMatch = ruleContent.match(/background(?:-color)?\s*:\s*([^;]+)/i);
771
- if (bgMatch) {
772
- const bgValue = resolveValue(bgMatch[1].trim());
773
- // For gradient backgrounds, extract the first color as fallback for DOCX
774
- // Also store the full gradient for containers that support gradient rendering
775
- const gradientColor = extractFirstGradientColor(bgValue);
776
- if (gradientColor) {
777
- style.backgroundColor = gradientColor;
778
- // Parse full gradient for container backgrounds
779
- const gradient = parseGradient(bgValue);
780
- if (gradient) {
781
- style.backgroundGradient = gradient;
782
- }
783
- }
784
- else {
785
- style.backgroundColor = bgValue;
786
- }
787
- }
788
- // Font size
789
- const fontSizeMatch = ruleContent.match(/font-size\s*:\s*([^;]+)/i);
790
- if (fontSizeMatch) {
791
- style.fontSize = resolveValue(fontSizeMatch[1].trim());
792
- }
793
- // Font weight
794
- const fontWeightMatch = ruleContent.match(/font-weight\s*:\s*([^;]+)/i);
795
- if (fontWeightMatch) {
796
- style.fontWeight = resolveValue(fontWeightMatch[1].trim());
797
- }
798
- // Padding (for table cells th, td)
799
- const paddingMatch = ruleContent.match(/padding\s*:\s*([^;]+)/i);
800
- if (paddingMatch) {
801
- style.padding = resolveValue(paddingMatch[1].trim());
802
- }
803
- // Font style (italic, normal) - for blockquotes
804
- const fontStyleMatch = ruleContent.match(/font-style\s*:\s*([^;]+)/i);
805
- if (fontStyleMatch) {
806
- style.fontStyle = resolveValue(fontStyleMatch[1].trim());
807
- }
808
- // Font family (extract primary font from font stack) - for body, headings, etc.
809
- const fontFamilyMatch = ruleContent.match(/font-family\s*:\s*([^;]+)/i);
810
- if (fontFamilyMatch) {
811
- const resolved = resolveValue(fontFamilyMatch[1].trim());
812
- const primaryFont = extractPrimaryFont(resolved);
813
- if (primaryFont) {
814
- style.fontFamily = primaryFont;
815
- }
816
- }
817
- // Text indent (for paragraph first-line indentation)
818
- const textIndentMatch = ruleContent.match(/text-indent\s*:\s*([^;]+)/i);
819
- if (textIndentMatch) {
820
- style.textIndent = resolveValue(textIndentMatch[1].trim());
821
- }
822
- // Text transform (uppercase, lowercase, capitalize)
823
- const textTransformMatch = ruleContent.match(/text-transform\s*:\s*([^;]+)/i);
824
- if (textTransformMatch) {
825
- style.textTransform = resolveValue(textTransformMatch[1].trim());
826
- }
827
- // GENERALIZED: Margin-bottom (for paragraph spacing)
828
- const marginBottomMatch = ruleContent.match(/margin-bottom\s*:\s*([^;]+)/i);
829
- if (marginBottomMatch) {
830
- style.marginBottom = resolveValue(marginBottomMatch[1].trim());
831
- }
832
- // GENERALIZED: Line-height (for vertical spacing within text)
833
- const lineHeightMatch = ruleContent.match(/line-height\s*:\s*([^;]+)/i);
834
- if (lineHeightMatch) {
835
- style.lineHeight = resolveValue(lineHeightMatch[1].trim());
836
- }
837
- // Text alignment (left, center, right, justify)
838
- const textAlignMatch = ruleContent.match(/text-align\s*:\s*([^;]+)/i);
839
- if (textAlignMatch) {
840
- style.textAlign = resolveValue(textAlignMatch[1].trim());
841
- }
842
- // Border shorthand (e.g., "1px solid #e5e7eb")
843
- const borderMatch = ruleContent.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
844
- if (borderMatch) {
845
- style.border = resolveValue(borderMatch[1].trim());
846
- }
847
- // Border-left (used by blockquotes/callouts: "4px solid #8b4513")
848
- const borderLeftMatch = ruleContent.match(/border-left\s*:\s*([^;]+)/i);
849
- if (borderLeftMatch) {
850
- const borderLeftValue = resolveValue(borderLeftMatch[1].trim());
851
- // If it contains a color, extract it as borderColor
852
- const colorInBorder = borderLeftValue.match(/#([0-9a-fA-F]{3,6})/);
853
- if (colorInBorder && !style.borderColor) {
854
- style.borderColor = `#${colorInBorder[1]}`;
855
- }
856
- // Store the full border-left value
857
- style.borderLeft = borderLeftValue;
858
- style.border = style.border || borderLeftValue;
859
- }
860
- // Border-right (used by sidebar dividers, etc.)
861
- const borderRightMatch = ruleContent.match(/border-right\s*:\s*([^;]+)/i);
862
- if (borderRightMatch) {
863
- style.borderRight = resolveValue(borderRightMatch[1].trim());
864
- }
865
- // Border color (direct property)
866
- const borderColorMatch = ruleContent.match(/border-color\s*:\s*([^;]+)/i);
867
- if (borderColorMatch) {
868
- style.borderColor = resolveValue(borderColorMatch[1].trim());
869
- }
870
- // GENERALIZED: Border-bottom (used by h2 underlines, title blocks, etc.)
871
- // Any element can have border-bottom - extract from element selectors
872
- const borderBottomMatch = ruleContent.match(/border-bottom\s*:\s*([^;]+)/i);
873
- if (borderBottomMatch) {
874
- style.borderBottom = resolveValue(borderBottomMatch[1].trim());
875
- }
876
- // GENERALIZED: Border-top (used by footer sections with top divider lines)
877
- // Any element can have border-top - extract from element selectors
878
- const borderTopMatch = ruleContent.match(/border-top\s*:\s*([^;]+)/i);
879
- if (borderTopMatch) {
880
- style.borderTop = resolveValue(borderTopMatch[1].trim());
881
- }
882
- // Apply style to each selector
883
- if (Object.keys(style).length > 0) {
884
- for (const selector of selectors) {
885
- // Merge with existing styles (later rules override)
886
- const existing = elementStyles.get(selector) || {};
887
- elementStyles.set(selector, { ...existing, ...style });
888
- }
889
- }
890
- }
891
- }
892
- return { variables, classColors, calloutStyles, classStyles, elementStyles, nestedStyles };
893
- }
894
- /**
895
- * Get merged styles for an element by combining all its class styles.
896
- * Later classes override earlier ones.
897
- * Also checks element type selectors as a base layer.
898
- *
899
- * @param element The element to get styles for
900
- * @param cssContext The CSS context
901
- * @param parentElement Optional parent element for nested style lookups
902
- */
903
- function getElementStyles(element, cssContext, parentElement) {
904
- const result = {};
905
- // First, apply element type selector styles (lowest priority)
906
- const tagName = element.tagName.toLowerCase();
907
- const elementTypeStyle = cssContext.elementStyles.get(tagName);
908
- if (elementTypeStyle) {
909
- Object.assign(result, elementTypeStyle);
910
- }
911
- // Then apply class styles (higher priority)
912
- const classAttr = element.getAttribute("class");
913
- const elementClasses = classAttr ? classAttr.split(/\s+/).filter(c => c.length > 0) : [];
914
- for (const className of elementClasses) {
915
- const classStyle = cssContext.classStyles.get(className);
916
- if (classStyle) {
917
- Object.assign(result, classStyle);
918
- }
919
- }
920
- // Apply nested styles if parent is provided (highest CSS priority for nested selectors)
921
- // This handles rules like ".parent .child { color: ... }" and ".parent element { color: ... }"
922
- if (parentElement) {
923
- const parentClassAttr = parentElement.getAttribute("class");
924
- const parentClasses = parentClassAttr ? parentClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
925
- // First, inherit text color from parent's class styles (CSS color inheritance)
926
- // This handles cases like ".cta { color: white; }" where children should inherit the color
927
- if (!result.color) {
928
- for (const parentClass of parentClasses) {
929
- const parentClassStyle = cssContext.classStyles.get(parentClass);
930
- if (parentClassStyle?.color) {
931
- result.color = parentClassStyle.color;
932
- break;
933
- }
934
- }
935
- }
936
- // For each parent class, check if there are nested styles for our element's classes OR tag name
937
- for (const parentClass of parentClasses) {
938
- const nestedMap = cssContext.nestedStyles.get(parentClass);
939
- if (nestedMap) {
940
- // Check for element tag name (e.g., .works-cited p { ... })
941
- const nestedTagStyle = nestedMap.get(tagName);
942
- if (nestedTagStyle) {
943
- Object.assign(result, nestedTagStyle);
944
- }
945
- // Check for element's classes
946
- for (const childClass of elementClasses) {
947
- const nestedStyle = nestedMap.get(childClass);
948
- if (nestedStyle) {
949
- Object.assign(result, nestedStyle);
950
- }
951
- }
952
- }
953
- }
954
- }
955
- // Also walk up the DOM tree to find ancestor containers with nested styles
956
- // This handles cases where the parent element isn't passed explicitly
957
- if (!parentElement) {
958
- let ancestor = element.parentElement;
959
- while (ancestor) {
960
- const ancestorClassAttr = ancestor.getAttribute("class");
961
- const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
962
- for (const ancestorClass of ancestorClasses) {
963
- const nestedMap = cssContext.nestedStyles.get(ancestorClass);
964
- if (nestedMap) {
965
- // Check for element tag name
966
- const nestedTagStyle = nestedMap.get(tagName);
967
- if (nestedTagStyle) {
968
- Object.assign(result, nestedTagStyle);
969
- }
970
- // Check for element's classes
971
- for (const childClass of elementClasses) {
972
- const nestedStyle = nestedMap.get(childClass);
973
- if (nestedStyle) {
974
- Object.assign(result, nestedStyle);
975
- }
976
- }
977
- }
978
- }
979
- ancestor = ancestor.parentElement;
980
- }
981
- }
982
- // Inherit font-family from ancestor elements if not set on this element
983
- // CSS font-family is an inherited property, so we need to walk up the DOM tree
984
- if (!result.fontFamily) {
985
- let ancestor = element.parentElement;
986
- while (ancestor && !result.fontFamily) {
987
- const ancestorTagName = ancestor.tagName?.toLowerCase();
988
- if (ancestorTagName) {
989
- // Check element type selector for ancestor (e.g., body { font-family: ... })
990
- const ancestorTypeStyle = cssContext.elementStyles.get(ancestorTagName);
991
- if (ancestorTypeStyle?.fontFamily) {
992
- result.fontFamily = ancestorTypeStyle.fontFamily;
993
- break;
994
- }
995
- // Check ancestor's class styles
996
- const ancestorClassAttr = ancestor.getAttribute("class");
997
- const ancestorClasses = ancestorClassAttr ? ancestorClassAttr.split(/\s+/).filter(c => c.length > 0) : [];
998
- for (const ancestorClass of ancestorClasses) {
999
- const classStyle = cssContext.classStyles.get(ancestorClass);
1000
- if (classStyle?.fontFamily) {
1001
- result.fontFamily = classStyle.fontFamily;
1002
- break;
1003
- }
1004
- }
1005
- }
1006
- ancestor = ancestor.parentElement;
1007
- }
1008
- }
1009
- // Also check inline styles, which override CSS classes
1010
- const inlineStyle = element.getAttribute("style") || "";
1011
- if (inlineStyle) {
1012
- const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
1013
- if (bgMatch)
1014
- result.backgroundColor = bgMatch[1].trim();
1015
- const colorMatch = inlineStyle.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
1016
- if (colorMatch)
1017
- result.color = colorMatch[1].trim();
1018
- const borderMatch = inlineStyle.match(/(?:^|;)\s*border\s*:\s*([^;]+)/i);
1019
- if (borderMatch)
1020
- result.border = borderMatch[1].trim();
1021
- const borderColorMatch = inlineStyle.match(/border-color\s*:\s*([^;]+)/i);
1022
- if (borderColorMatch)
1023
- result.borderColor = borderColorMatch[1].trim();
1024
- const borderBottomMatch = inlineStyle.match(/border-bottom\s*:\s*([^;]+)/i);
1025
- if (borderBottomMatch)
1026
- result.borderBottom = borderBottomMatch[1].trim();
1027
- const displayMatch = inlineStyle.match(/display\s*:\s*([^;]+)/i);
1028
- if (displayMatch)
1029
- result.display = displayMatch[1].trim();
1030
- const flexMatch = inlineStyle.match(/(?:^|;)\s*flex\s*:\s*([^;]+)/i);
1031
- if (flexMatch)
1032
- result.flex = flexMatch[1].trim();
1033
- const gridColsMatch = inlineStyle.match(/grid-template-columns\s*:\s*([^;]+)/i);
1034
- if (gridColsMatch)
1035
- result.gridTemplateColumns = gridColsMatch[1].trim();
1036
- const textAlignMatch = inlineStyle.match(/text-align\s*:\s*([^;]+)/i);
1037
- if (textAlignMatch)
1038
- result.textAlign = textAlignMatch[1].trim();
1039
- // Inline font-family (highest priority)
1040
- const fontFamilyMatch = inlineStyle.match(/font-family\s*:\s*([^;]+)/i);
1041
- if (fontFamilyMatch) {
1042
- const primaryFont = extractPrimaryFont(fontFamilyMatch[1].trim());
1043
- if (primaryFont)
1044
- result.fontFamily = primaryFont;
1045
- }
1046
- // Inline line-height
1047
- const lineHeightMatch = inlineStyle.match(/line-height\s*:\s*([^;]+)/i);
1048
- if (lineHeightMatch) {
1049
- result.lineHeight = lineHeightMatch[1].trim();
1050
- }
1051
- }
1052
- // GENERALIZED: Use getComputedStyle as fallback for font-family and line-height
1053
- // This ensures we get the ACTUAL computed values from Playwright's browser context,
1054
- // including all CSS variable resolution and inheritance
1055
- if (typeof window !== "undefined" && window.getComputedStyle) {
1056
- try {
1057
- const computed = window.getComputedStyle(element);
1058
- // Font-family: Use computed style if not already set from CSS parsing
1059
- if (!result.fontFamily && computed.fontFamily) {
1060
- const primaryFont = extractPrimaryFont(computed.fontFamily);
1061
- if (primaryFont) {
1062
- result.fontFamily = primaryFont;
1063
- }
1064
- }
1065
- // Line-height: Use computed style if not already set from CSS parsing
1066
- if (!result.lineHeight && computed.lineHeight) {
1067
- // Convert computed lineHeight (e.g., "27.2px") to a ratio or keep as-is
1068
- result.lineHeight = computed.lineHeight;
1069
- }
1070
- }
1071
- catch {
1072
- // getComputedStyle may fail in some environments
1073
- }
1074
- }
1075
- return result;
1076
- }
1077
- /**
1078
- * Extract border color from a border shorthand or border-color property.
1079
- * Also handles borderLeft, borderRight, borderTop, borderBottom.
1080
- */
1081
- function extractBorderColorFromStyle(style) {
1082
- if (style.borderColor) {
1083
- return extractHexColor(style.borderColor);
1084
- }
1085
- // Check border shorthand
1086
- if (style.border) {
1087
- // Parse "1px solid #e5e7eb" or similar
1088
- const colorMatch = style.border.match(/#([0-9a-fA-F]{3,6})/);
1089
- if (colorMatch) {
1090
- let hex = colorMatch[1];
1091
- if (hex.length === 3) {
1092
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
1093
- }
1094
- return hex.toUpperCase();
1095
- }
1096
- }
1097
- // Check individual border properties (borderLeft, borderRight, borderTop, borderBottom)
1098
- const borderProps = [style.borderLeft, style.borderRight, style.borderTop, style.borderBottom];
1099
- for (const borderValue of borderProps) {
1100
- if (borderValue) {
1101
- // Parse "4px solid #7c3aed" or similar
1102
- const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
1103
- if (colorMatch) {
1104
- let hex = colorMatch[1];
1105
- if (hex.length === 3) {
1106
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
1107
- }
1108
- return hex.toUpperCase();
1109
- }
1110
- }
1111
- }
1112
- return undefined;
1113
- }
1114
- /**
1115
- * Check if an element is a grid or flex container based on its CSS.
1116
- */
1117
- function isGridOrFlexContainer(element, cssContext) {
1118
- const styles = getElementStyles(element, cssContext);
1119
- return styles.display === "grid" || styles.display === "flex";
1120
- }
1121
- /**
1122
- * GENERALIZED: Check if an element is a horizontal flex container.
1123
- * A horizontal flex container has display: flex with flex-direction: row (or unset, since row is default).
1124
- * This is used to detect containers where flex items should be visually separated
1125
- * with a separator character (like " • ") to represent the CSS gap.
1126
- *
1127
- * @param element - The element to check
1128
- * @param cssContext - CSS context for style resolution
1129
- * @returns true if this is a horizontal flex container, false otherwise
1130
- */
1131
- function isHorizontalFlexContainer(element, cssContext) {
1132
- const styles = getElementStyles(element, cssContext);
1133
- // Must be a flex container
1134
- if (styles.display !== "flex") {
1135
- return false;
1136
- }
1137
- // Check flex-direction - horizontal if "row" or "row-reverse" or not set (default is row)
1138
- // Vertical (column) containers should NOT have separators between items
1139
- const direction = styles.flexDirection?.toLowerCase();
1140
- if (direction === "column" || direction === "column-reverse") {
1141
- return false;
1142
- }
1143
- // Also check inline styles for flex-direction override
1144
- const inlineStyle = element.getAttribute("style") || "";
1145
- const directionMatch = inlineStyle.match(/flex-direction\s*:\s*([^;]+)/i);
1146
- if (directionMatch) {
1147
- const inlineDirection = directionMatch[1].trim().toLowerCase();
1148
- if (inlineDirection === "column" || inlineDirection === "column-reverse") {
1149
- return false;
1150
- }
1151
- }
1152
- return true;
1153
- }
1154
- /**
1155
- * Check if an SVG element is purely decorative (background pattern, decoration, etc).
1156
- * Decorative SVGs should NOT be converted to chart placeholders.
1157
- *
1158
- * Detection criteria (style-based, not class-name-based per skill rules):
1159
- * 1. Fixed/absolute positioning (background elements)
1160
- * 2. Very low opacity (< 0.5)
1161
- * 3. Contains only pattern definitions (<defs>, <pattern>, <linearGradient>)
1162
- * 4. Is a background overlay (pointer-events: none)
1163
- * 5. Very small viewBox (icons < 50x50)
1164
- */
1165
- function isDecorativeSvg(svgElement, parentElement, cssContext) {
1166
- // Check parent container styles
1167
- const parentStyles = getElementStyles(parentElement, cssContext);
1168
- const parentInlineStyle = parentElement.getAttribute("style") || "";
1169
- // Check SVG's own styles
1170
- const svgInlineStyle = svgElement.getAttribute("style") || "";
1171
- // 1. Check for fixed/absolute positioning (typically background elements)
1172
- const positionMatch = parentInlineStyle.match(/position\s*:\s*(fixed|absolute)/i) ||
1173
- svgInlineStyle.match(/position\s*:\s*(fixed|absolute)/i);
1174
- if (positionMatch) {
1175
- return true;
1176
- }
1177
- // 2. Check for low opacity (decorative overlays)
1178
- const parentOpacityMatch = parentInlineStyle.match(/opacity\s*:\s*([0-9.]+)/i);
1179
- const svgOpacityMatch = svgInlineStyle.match(/opacity\s*:\s*([0-9.]+)/i);
1180
- const opacityAttr = svgElement.getAttribute("opacity");
1181
- const opacity = parseFloat(parentOpacityMatch?.[1] || svgOpacityMatch?.[1] || opacityAttr || "1");
1182
- if (opacity < 0.5) {
1183
- return true;
1184
- }
1185
- // 3. Check for pointer-events: none (non-interactive background)
1186
- if (parentInlineStyle.includes("pointer-events: none") ||
1187
- svgInlineStyle.includes("pointer-events: none") ||
1188
- parentStyles.display === "none") {
1189
- return true;
1190
- }
1191
- // 4. Check if SVG contains only definitions (patterns, gradients) - no actual shapes
1192
- const hasOnlyDefs = svgElement.children.length > 0 &&
1193
- Array.from(svgElement.children).every((child) => ["defs", "style", "title", "desc"].includes(child.tagName.toLowerCase()));
1194
- if (hasOnlyDefs) {
1195
- return true;
1196
- }
1197
- // 5. Check for very small viewBox (likely an icon, not a chart)
1198
- // Charts must be at least 100x50 to match export.ts isChartSvg()
1199
- const viewBox = svgElement.getAttribute("viewBox");
1200
- if (viewBox) {
1201
- const parts = viewBox.split(/\s+/).map(Number);
1202
- if (parts.length >= 4) {
1203
- const width = parts[2] || 0;
1204
- const height = parts[3] || 0;
1205
- // Charts must be at least 100x50 - anything smaller is decorative
1206
- if (width > 0 && height > 0 && (width < 100 || height < 50)) {
1207
- return true;
1208
- }
1209
- }
1210
- }
1211
- else {
1212
- // No viewBox - check width/height attributes
1213
- const widthAttr = svgElement.getAttribute("width");
1214
- const heightAttr = svgElement.getAttribute("height");
1215
- if (widthAttr && heightAttr) {
1216
- const width = parseFloat(widthAttr) || 0;
1217
- const height = parseFloat(heightAttr) || 0;
1218
- // Charts must be at least 100x50
1219
- if (width > 0 && height > 0 && (width < 100 || height < 50)) {
1220
- return true;
1221
- }
1222
- }
1223
- }
1224
- // 6. Check if SVG is purely a pattern/gradient container
1225
- // These typically have <rect> with fill="url(#pattern)" or only <circle>/<path> with very low complexity
1226
- const shapes = svgElement.querySelectorAll("rect, circle, ellipse, path, polygon, polyline, line");
1227
- if (shapes.length > 0) {
1228
- // Count shapes that use pattern/gradient fills (decorative)
1229
- let decorativeShapeCount = 0;
1230
- for (const shape of shapes) {
1231
- const fill = shape.getAttribute("fill") || "";
1232
- const stroke = shape.getAttribute("stroke") || "";
1233
- if (fill.includes("url(#") || stroke.includes("url(#")) {
1234
- decorativeShapeCount++;
1235
- }
1236
- }
1237
- // If all shapes use pattern/gradient fills, it's likely decorative
1238
- if (decorativeShapeCount === shapes.length && shapes.length <= 5) {
1239
- return true;
1240
- }
1241
- }
1242
- // 7. Check if this appears to be a progress circle (small circular skills indicator)
1243
- // These have circle elements with stroke-dasharray for progress
1244
- const circles = svgElement.querySelectorAll("circle");
1245
- if (circles.length > 0 && circles.length <= 2) {
1246
- const hasStrokeDasharray = Array.from(circles).some((c) => c.getAttribute("stroke-dasharray"));
1247
- if (hasStrokeDasharray) {
1248
- // This is a progress indicator, not a chart - treat as decorative for now
1249
- // In the future, we could extract the percentage
1250
- return true;
1251
- }
1252
- }
1253
- // 8. Check if parent is a centered flex container (typical hero image pattern)
1254
- // Hero images typically have: display: flex; align-items: center; justify-content: center
1255
- // These contain illustrative SVGs which should be included as images, not filtered
1256
- // We no longer filter these - they are legitimate content images
1257
- // (Previously we filtered centered flex containers, but hero images should be included)
1258
- // 9. SVGs with sufficient size should be included as images
1259
- // We only filter out truly decorative elements (icons, patterns, progress circles)
1260
- // Large SVGs (illustrations, charts, diagrams) should all be included
1261
- return false;
1262
- }
1263
- /**
1264
- * Check if an element is a two-column CSS grid layout.
1265
- * Detects patterns like: display: grid; grid-template-columns: Xpx 1fr
1266
- * Returns the sidebar width percentage if it's a two-column layout, undefined otherwise.
1267
- */
1268
- function isTwoColumnGridLayout(element, cssContext) {
1269
- const styles = getElementStyles(element, cssContext);
1270
- // Must be a grid container
1271
- if (styles.display !== "grid") {
1272
- return undefined;
1273
- }
1274
- // Check for two-column pattern (e.g., "280px 1fr", "300px auto", "25% 75%")
1275
- const gridCols = styles.gridTemplateColumns;
1276
- if (!gridCols) {
1277
- return undefined;
1278
- }
1279
- // Parse grid-template-columns to detect two-column sidebar layout
1280
- // Common patterns:
1281
- // - "280px 1fr" (fixed sidebar + fluid main)
1282
- // - "300px auto" (fixed sidebar + auto main)
1283
- // - "25% 1fr" (percentage sidebar + fluid main)
1284
- const parts = gridCols.trim().split(/\s+/);
1285
- if (parts.length !== 2) {
1286
- return undefined;
1287
- }
1288
- const [first, second] = parts;
1289
- // Check if first column is a fixed width or small percentage (sidebar)
1290
- // and second column is flexible (1fr, auto, or larger percentage)
1291
- const firstIsSidebar = (first.endsWith("px") && parseInt(first, 10) <= 400) ||
1292
- (first.endsWith("%") && parseFloat(first) <= 35);
1293
- const secondIsMain = second === "1fr" ||
1294
- second === "auto" ||
1295
- (second.endsWith("%") && parseFloat(second) >= 50) ||
1296
- (second.endsWith("fr") && parseFloat(second) >= 1);
1297
- if (firstIsSidebar && secondIsMain) {
1298
- // Calculate sidebar width percentage
1299
- if (first.endsWith("px")) {
1300
- // Assume ~1100px total width for typical documents
1301
- const pxWidth = parseInt(first, 10);
1302
- return Math.round((pxWidth / 1100) * 100);
1303
- }
1304
- else if (first.endsWith("%")) {
1305
- return parseFloat(first);
1306
- }
1307
- return 25; // Default 25% for other cases
1308
- }
1309
- return undefined;
1310
- }
1311
- /**
1312
- * Find the sidebar and main content elements in a two-column grid container.
1313
- * Returns the first two direct children as [sidebar, main] or undefined.
1314
- */
1315
- function findTwoColumnChildren(container) {
1316
- const children = Array.from(container.children).filter((child) => child.nodeType === Node.ELEMENT_NODE);
1317
- if (children.length < 2) {
1318
- return undefined;
1319
- }
1320
- // First child is sidebar (aside, div, etc.), second is main content
1321
- return [children[0], children[1]];
1322
- }
1323
- /**
1324
- * GENERALIZED: Detect if an element is a flex container with equal-width columns.
1325
- * This handles layouts like:
1326
- * display: flex; with children having flex: 1 (equal width)
1327
- *
1328
- * Returns the column children if detected, undefined otherwise.
1329
- * Each child will become a column in a DOCX table.
1330
- */
1331
- function detectFlexEqualColumns(element, cssContext) {
1332
- const styles = getElementStyles(element, cssContext);
1333
- // Must be a flex container
1334
- if (styles.display !== "flex") {
1335
- return undefined;
1336
- }
1337
- // Get direct children
1338
- const children = Array.from(element.children).filter((child) => child.nodeType === Node.ELEMENT_NODE);
1339
- // Need at least 2 children for multi-column layout
1340
- if (children.length < 2 || children.length > 4) {
1341
- return undefined;
1342
- }
1343
- // Check if children have flex: 1 or similar equal-width pattern
1344
- // Also check inline styles since class-based flex: 1 is common
1345
- let hasEqualFlexChildren = true;
1346
- for (const child of children) {
1347
- const childStyles = getElementStyles(child, cssContext);
1348
- const inlineStyle = child.getAttribute("style") || "";
1349
- // Check for flex: 1 in CSS or inline styles
1350
- const hasFlex1 = childStyles.flex === "1" ||
1351
- inlineStyle.includes("flex:") ||
1352
- inlineStyle.includes("flex: 1");
1353
- // Also detect if class has flex: 1 (we can't directly read this, but
1354
- // children in a flex container with no explicit width typically share space)
1355
- // For now, assume if parent is flex and has 2+ children, they share space
1356
- // If any child has explicit width that's not equal, skip
1357
- // (We're being permissive here - assuming equal columns if parent is flex)
1358
- }
1359
- // Return children as columns if this looks like an equal-column flex layout
1360
- return children;
1361
- }
1362
- /**
1363
- * Detect if an element is a "skill item" by its structure.
1364
- * Pattern: The element has a direct child that contains exactly 2 text elements (name + percentage).
1365
- * The element should have exactly 1-2 direct children (header + optional progress bar).
1366
- *
1367
- * Returns { name, percentage } if detected, undefined otherwise.
1368
- */
1369
- function detectSkillItem(element) {
1370
- const children = Array.from(element.children);
1371
- // Skill items typically have 1-2 direct children:
1372
- // - A header div with name + percentage
1373
- // - Optionally a progress bar container
1374
- if (children.length === 0 || children.length > 3)
1375
- return undefined;
1376
- // Look for the header child that contains name + percentage
1377
- for (const child of children) {
1378
- // Skip if this child has too many children (not a header)
1379
- if (child.children.length !== 2)
1380
- continue;
1381
- const directChildren = Array.from(child.children).filter((c) => c.children.length === 0 && c.textContent?.trim());
1382
- // Must have exactly 2 leaf text elements
1383
- if (directChildren.length !== 2)
1384
- continue;
1385
- const text1 = directChildren[0].textContent?.trim() || "";
1386
- const text2 = directChildren[1].textContent?.trim() || "";
1387
- // Check if one is a percentage (number followed by %)
1388
- if (text2.match(/^\d+%$/)) {
1389
- return { name: text1, percentage: text2 };
1390
- }
1391
- if (text1.match(/^\d+%$/)) {
1392
- return { name: text2, percentage: text1 };
1393
- }
1394
- }
1395
- return undefined;
1396
- }
1397
- /**
1398
- * Detect if an element is a "language item" with proficiency dots/indicators.
1399
- * Pattern: container with text (language name) and a child with multiple small indicators.
1400
- *
1401
- * Returns { name, filledCount, totalCount } if detected, undefined otherwise.
1402
- */
1403
- function detectLanguageItem(element, cssContext) {
1404
- const children = Array.from(element.children);
1405
- if (children.length < 2)
1406
- return undefined;
1407
- let languageName = "";
1408
- let filledCount = 0;
1409
- let totalCount = 0;
1410
- for (const child of children) {
1411
- const childTagName = child.tagName.toLowerCase();
1412
- // GENERALIZED: Skip headings - they're not language names
1413
- // This prevents title-block patterns (h1 + metadata div) from being detected
1414
- if (childTagName.match(/^h[1-6]$/)) {
1415
- return undefined;
1416
- }
1417
- // Check if this is a text element (language name)
1418
- // Must be a simple span or similar inline element, not a complex structure
1419
- if (child.children.length === 0 && child.textContent?.trim()) {
1420
- const text = child.textContent.trim();
1421
- // GENERALIZED: Language names are typically short (1-3 words)
1422
- // Skip if the text is too long (like a title or paragraph)
1423
- if (text.length > 50 || text.split(/\s+/).length > 5) {
1424
- return undefined;
1425
- }
1426
- languageName = text;
1427
- continue;
1428
- }
1429
- // Check if this is a container with multiple similar small children (dots/indicators)
1430
- const indicators = Array.from(child.children);
1431
- if (indicators.length >= 3 && indicators.length <= 10) {
1432
- // Check if they look like indicators (similar structure)
1433
- const allSpans = indicators.every((ind) => ind.tagName.toLowerCase() === "span");
1434
- if (allSpans) {
1435
- totalCount = indicators.length;
1436
- // Count "filled" indicators by checking for background color or specific styling
1437
- for (const indicator of indicators) {
1438
- const styles = getElementStyles(indicator, cssContext);
1439
- const bgColor = styles.backgroundColor;
1440
- // A "filled" indicator typically has a non-white/non-transparent background
1441
- // or has more than just border styling
1442
- if (bgColor && bgColor !== "transparent" && bgColor !== "inherit") {
1443
- const hexBg = extractHexColor(bgColor);
1444
- // Check if it's a "filled" color (not white/very light)
1445
- if (hexBg && hexBg !== "FFFFFF" && !hexBg.startsWith("FF")) {
1446
- filledCount++;
1447
- }
1448
- else if (hexBg) {
1449
- // Light color means filled if rgba has low opacity
1450
- // For now, count any background as potentially filled
1451
- // We'll need to be smarter about this
1452
- const alpha = bgColor.match(/rgba.*,\s*([0-9.]+)\s*\)/)?.[1];
1453
- if (!alpha || parseFloat(alpha) > 0.15) {
1454
- filledCount++;
1455
- }
1456
- }
1457
- }
1458
- }
1459
- }
1460
- }
1461
- }
1462
- if (languageName && totalCount > 0) {
1463
- return { name: languageName, filledCount, totalCount };
1464
- }
1465
- return undefined;
1466
- }
1467
- /**
1468
- * Parse content from a container element (like sidebar or main content).
1469
- * Handles headings, paragraphs, lists, and nested containers with color inheritance.
1470
- */
1471
- function parseContainerContent(element, cssContext, inheritedColor) {
1472
- const innerElements = [];
1473
- function processInnerNode(node, color) {
1474
- if (node.nodeType === Node.TEXT_NODE) {
1475
- const text = node.textContent?.trim();
1476
- if (text) {
1477
- if (color) {
1478
- innerElements.push({ type: "paragraph", text, color });
1479
- }
1480
- else {
1481
- innerElements.push({ type: "paragraph", text });
1482
- }
1483
- }
1484
- return;
1485
- }
1486
- if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
1487
- return;
1488
- }
1489
- const el = node;
1490
- const tagName = el.tagName.toLowerCase();
1491
- // Extract color from this element
1492
- const elementColor = extractTextColor(el, cssContext) || color;
1493
- // Skip decorative SVGs
1494
- if (tagName === "svg") {
1495
- const parent = el.parentElement || el;
1496
- if (isDecorativeSvg(el, parent, cssContext)) {
1497
- return;
1498
- }
1499
- }
1500
- // Handle headings
1501
- const headingLevel = parseHeadingLevel(tagName);
1502
- if (headingLevel !== null) {
1503
- const text = getTextContent(el).trim();
1504
- if (text) {
1505
- innerElements.push({ type: "heading", level: headingLevel, text, color: elementColor });
1506
- }
1507
- return;
1508
- }
1509
- // Handle paragraphs
1510
- if (tagName === "p") {
1511
- const runs = extractInlineRuns(el, cssContext);
1512
- if (runs.length > 0) {
1513
- const text = runs.map((r) => r.text).join("");
1514
- // Apply inherited color to runs that don't have their own color
1515
- if (elementColor) {
1516
- runs.forEach(run => {
1517
- if (!run.color) {
1518
- run.color = elementColor;
1519
- }
1520
- });
1521
- }
1522
- if (hasInlineFormatting(runs)) {
1523
- innerElements.push({ type: "paragraph", text, runs });
1524
- }
1525
- else if (elementColor) {
1526
- innerElements.push({ type: "paragraph", text, color: elementColor });
1527
- }
1528
- else {
1529
- innerElements.push({ type: "paragraph", text });
1530
- }
1531
- }
1532
- return;
1533
- }
1534
- // Handle lists
1535
- if (tagName === "ul" || tagName === "ol") {
1536
- const items = [];
1537
- for (const child of el.children) {
1538
- if (child.tagName.toLowerCase() === "li") {
1539
- const runs = extractInlineRuns(child, cssContext);
1540
- if (runs.length > 0) {
1541
- // Apply color to runs
1542
- if (elementColor) {
1543
- runs.forEach(run => {
1544
- if (!run.color) {
1545
- run.color = elementColor;
1546
- }
1547
- });
1548
- }
1549
- if (hasInlineFormatting(runs)) {
1550
- items.push(runs);
1551
- }
1552
- else {
1553
- items.push(runs.map((r) => r.text).join(""));
1554
- }
1555
- }
1556
- }
1557
- }
1558
- if (items.length > 0) {
1559
- innerElements.push({ type: "list", ordered: tagName === "ol", items });
1560
- }
1561
- return;
1562
- }
1563
- // Handle tables
1564
- if (tagName === "table") {
1565
- const rows = [];
1566
- for (const tr of el.querySelectorAll("tr")) {
1567
- const cells = [];
1568
- for (const cell of tr.querySelectorAll("td, th")) {
1569
- const runs = extractInlineRuns(cell, cssContext);
1570
- if (runs.length > 0) {
1571
- if (hasInlineFormatting(runs)) {
1572
- cells.push(runs);
1573
- }
1574
- else {
1575
- cells.push(runs.map((r) => r.text).join(""));
1576
- }
1577
- }
1578
- else {
1579
- cells.push("");
1580
- }
1581
- }
1582
- if (cells.length > 0) {
1583
- rows.push(cells);
1584
- }
1585
- }
1586
- if (rows.length > 0) {
1587
- // Extract cell padding from CSS (th, td selectors)
1588
- let cellPadding;
1589
- if (cssContext) {
1590
- // Try td first, then th (they usually have the same padding)
1591
- const tdStyle = cssContext.elementStyles.get("td");
1592
- const thStyle = cssContext.elementStyles.get("th");
1593
- const paddingStr = tdStyle?.padding || thStyle?.padding;
1594
- if (paddingStr) {
1595
- cellPadding = parseCssPaddingToTwips(paddingStr);
1596
- }
1597
- }
1598
- innerElements.push({ type: "table", rows, cellPadding });
1599
- }
1600
- return;
1601
- }
1602
- // Detect skill item pattern: container with name + percentage
1603
- // This handles divs containing "Skill Name" + "95%" on separate lines
1604
- if (tagName === "div") {
1605
- const skillItem = detectSkillItem(el);
1606
- if (skillItem) {
1607
- // Output as single line: "Skill Name: 95%"
1608
- const text = `${skillItem.name}: ${skillItem.percentage}`;
1609
- if (elementColor) {
1610
- innerElements.push({ type: "paragraph", text, color: elementColor });
1611
- }
1612
- else {
1613
- innerElements.push({ type: "paragraph", text });
1614
- }
1615
- return;
1616
- }
1617
- // Detect language item pattern: language name + proficiency dots
1618
- const languageItem = detectLanguageItem(el, cssContext);
1619
- if (languageItem) {
1620
- // Create visual representation with filled and empty dots
1621
- // ● = filled, ○ = empty
1622
- const filledDots = "●".repeat(languageItem.filledCount);
1623
- const emptyDots = "○".repeat(languageItem.totalCount - languageItem.filledCount);
1624
- const text = `${languageItem.name}: ${filledDots}${emptyDots}`;
1625
- if (elementColor) {
1626
- innerElements.push({ type: "paragraph", text, color: elementColor });
1627
- }
1628
- else {
1629
- innerElements.push({ type: "paragraph", text });
1630
- }
1631
- return;
1632
- }
1633
- }
1634
- // Container tags - recurse into children
1635
- if (CONTAINER_TAGS.includes(tagName)) {
1636
- for (const child of el.childNodes) {
1637
- processInnerNode(child, elementColor);
1638
- }
1639
- return;
1640
- }
1641
- // Fallback - try to get text
1642
- const text = getTextContent(el).trim();
1643
- if (text && el.children.length === 0) {
1644
- if (elementColor) {
1645
- innerElements.push({ type: "paragraph", text, color: elementColor });
1646
- }
1647
- else {
1648
- innerElements.push({ type: "paragraph", text });
1649
- }
1650
- }
1651
- else {
1652
- for (const child of el.childNodes) {
1653
- processInnerNode(child, elementColor);
1654
- }
1655
- }
1656
- }
1657
- // Process all direct children
1658
- for (const child of element.childNodes) {
1659
- processInnerNode(child, inheritedColor);
1660
- }
1661
- return innerElements;
1662
- }
1663
- /**
1664
- * Get color for an element by checking its classes against parsed CSS rules.
1665
- */
1666
- function getColorFromClasses(element, cssContext) {
1667
- const classAttr = element.getAttribute("class");
1668
- if (!classAttr)
1669
- return undefined;
1670
- const classes = classAttr.split(/\s+/).filter(c => c.length > 0);
1671
- for (const className of classes) {
1672
- const color = cssContext.classColors.get(className);
1673
- if (color) {
1674
- return extractHexColor(color);
1675
- }
1676
- }
1677
- return undefined;
1678
- }
1679
- function isHeadingLevel(level) {
1680
- return level >= 1 && level <= 6;
1681
- }
1682
- function parseHeadingLevel(tagName) {
1683
- const match = /^h([1-6])$/.exec(tagName);
1684
- if (!match || match.length < 2) {
1685
- return null;
1686
- }
1687
- const level = parseInt(match[1], 10);
1688
- if (isHeadingLevel(level)) {
1689
- return level;
1690
- }
1691
- return null;
1692
- }
1693
- /**
1694
- * Extract text alignment from element's CSS styles (via cssContext) or inline style.
1695
- * Does NOT use class names directly - uses CSS style extraction.
1696
- */
1697
- function getTextAlignment(element, cssContext) {
1698
- // Get styles from CSS classes (generalized approach)
1699
- const styles = getElementStyles(element, cssContext);
1700
- // Check CSS text-align property
1701
- if (styles.textAlign) {
1702
- const align = styles.textAlign.toLowerCase();
1703
- if (align === "left" || align === "center" || align === "right" || align === "justify") {
1704
- return align;
1705
- }
1706
- }
1707
- // Also check inline style for text-align (overrides CSS)
1708
- const inlineStyle = element.getAttribute("style") || "";
1709
- const alignMatch = inlineStyle.match(/text-align:\s*(left|center|right|justify)/i);
1710
- if (alignMatch) {
1711
- return alignMatch[1].toLowerCase();
1712
- }
1713
- return undefined;
1714
- }
1715
- /**
1716
- * Safely get text content from an element, returning empty string if null.
1717
- */
1718
- function getTextContent(element) {
1719
- // textContent can be null for certain element types (document, doctype)
1720
- // Use String() to safely convert any value
1721
- return String(element.textContent || "");
1722
- }
1723
- /**
1724
- * Inline element tags that should NOT break a container into multiple paragraphs.
1725
- * These elements can be combined into a single paragraph with multiple runs.
1726
- */
1727
- const INLINE_TAGS = [
1728
- "span", "a", "b", "strong", "i", "em", "u", "s", "strike", "del", "ins",
1729
- "sub", "sup", "small", "mark", "abbr", "cite", "code", "kbd", "samp", "var",
1730
- "time", "data", "q", "dfn", "ruby", "rt", "rp", "bdi", "bdo", "wbr", "br",
1731
- "label", "font"
1732
- ];
1733
- /**
1734
- * Check if a container element has ONLY inline content (text nodes, spans, inline formatting).
1735
- * Returns true if the container should be treated as a single paragraph.
1736
- * Returns false if the container has block-level children that need separate paragraphs.
1737
- */
1738
- function isInlineOnlyContainer(element) {
1739
- // Check all child nodes
1740
- for (const child of element.childNodes) {
1741
- // Text nodes are inline - continue checking
1742
- if (child.nodeType === Node.TEXT_NODE) {
1743
- continue;
1744
- }
1745
- // Skip comments and other non-element nodes
1746
- if (child.nodeType !== Node.ELEMENT_NODE || !(child instanceof Element)) {
1747
- continue;
1748
- }
1749
- const tagName = child.tagName.toLowerCase();
1750
- // If child is an inline tag, recursively check its children
1751
- if (INLINE_TAGS.includes(tagName)) {
1752
- // Recursively check if this inline element also has only inline content
1753
- if (!isInlineOnlyContainer(child)) {
1754
- return false;
1755
- }
1756
- continue;
1757
- }
1758
- // Any other element tag is considered block-level (div, p, ul, table, h1-h6, etc.)
1759
- // The container has block-level children, so it should NOT be treated as inline-only
1760
- return false;
1761
- }
1762
- // All children are inline - this container can be a single paragraph
1763
- return true;
1764
- }
1765
- /**
1766
- * Check if a tag name indicates bold formatting.
1767
- */
1768
- function isBoldTag(tagName) {
1769
- return tagName === "strong" || tagName === "b";
1770
- }
1771
- /**
1772
- * Check if a tag name indicates italic formatting.
1773
- */
1774
- function isItalicTag(tagName) {
1775
- return tagName === "em" || tagName === "i";
1776
- }
1777
- /**
1778
- * Extract inline runs with formatting from an element.
1779
- * Walks the DOM tree and collects text with bold/italic state.
1780
- * Also extracts CSS ::before and ::after pseudo-element content.
1781
- */
1782
- function extractInlineRuns(element, cssContext, inheritedColor, inheritedFontFamily) {
1783
- const runs = [];
1784
- // GENERALIZED: Extract ::before pseudo-element content
1785
- // This handles CSS rules like "dd.dish-pairings::before { content: 'Suggested Pairing: '; }"
1786
- if (typeof window !== "undefined" && window.getComputedStyle) {
1787
- try {
1788
- const beforeStyles = window.getComputedStyle(element, "::before");
1789
- const beforeContent = beforeStyles.content;
1790
- // content property returns "none" or a quoted string like '"Suggested Pairing: "'
1791
- if (beforeContent && beforeContent !== "none" && beforeContent !== "normal") {
1792
- // Remove quotes from the content string
1793
- const cleanContent = beforeContent.replace(/^["']|["']$/g, "");
1794
- if (cleanContent) {
1795
- // Get styling from the pseudo-element
1796
- const fontWeight = beforeStyles.fontWeight;
1797
- const fontStyle = beforeStyles.fontStyle;
1798
- const color = beforeStyles.color ? extractHexColor(beforeStyles.color) : undefined;
1799
- runs.push({
1800
- text: cleanContent,
1801
- bold: fontWeight === "700" || fontWeight === "bold" || fontWeight === "600" || undefined,
1802
- italic: fontStyle === "italic" || undefined,
1803
- color,
1804
- });
1805
- }
1806
- }
1807
- }
1808
- catch {
1809
- // getComputedStyle may fail in some environments
1810
- }
1811
- }
1812
- function walkNode(node, bold, italic, color, backgroundColor, superscript, subscript, underline, fontFamily, size) {
1813
- if (node.nodeType === Node.TEXT_NODE) {
1814
- const text = node.textContent || "";
1815
- if (text) {
1816
- // Normalize whitespace but preserve spaces between words
1817
- const normalizedText = text.replace(/\s+/g, " ");
1818
- if (normalizedText.trim() || normalizedText === " ") {
1819
- runs.push({ text: normalizedText, bold, italic, color, backgroundColor, superscript, subscript, underline, fontFamily, size });
1820
- }
1821
- }
1822
- return;
1823
- }
1824
- if (node.nodeType !== Node.ELEMENT_NODE || !(node instanceof Element)) {
1825
- return;
1826
- }
1827
- const tagName = node.tagName.toLowerCase();
1828
- // Handle <br> tags - insert a newline
1829
- if (tagName === "br") {
1830
- runs.push({ text: "\n", bold, italic, color, backgroundColor, fontFamily, size });
1831
- return;
1832
- }
1833
- let newBold = bold || isBoldTag(tagName);
1834
- let newItalic = italic || isItalicTag(tagName);
1835
- let newSuperscript = superscript || tagName === "sup";
1836
- let newSubscript = subscript || tagName === "sub";
1837
- let newUnderline = underline;
1838
- let newFontFamily = fontFamily;
1839
- let newSize = size;
1840
- // Handle underline from <u> tag or <abbr> tag
1841
- if (tagName === "u") {
1842
- newUnderline = { type: "single" };
1843
- }
1844
- else if (tagName === "abbr") {
1845
- // abbr elements typically have dotted underlines (border-bottom: dotted)
1846
- // Check CSS for border-bottom color
1847
- let underlineColor;
1848
- if (cssContext) {
1849
- const styles = getElementStyles(node, cssContext);
1850
- if (styles.borderBottom) {
1851
- // Extract color from border-bottom (e.g., "1px dotted #b8860b")
1852
- const colorMatch = styles.borderBottom.match(/#([0-9a-fA-F]{3,6})/i);
1853
- if (colorMatch) {
1854
- underlineColor = colorMatch[1].toUpperCase();
1855
- }
1856
- }
1857
- }
1858
- newUnderline = { type: "dotted", color: underlineColor };
1859
- }
1860
- // Extract colors from this element
1861
- let newColor = color;
1862
- let newBackgroundColor = backgroundColor;
1863
- if (cssContext) {
1864
- const styles = getElementStyles(node, cssContext);
1865
- if (styles.color) {
1866
- const hexColor = extractHexColor(styles.color);
1867
- if (hexColor)
1868
- newColor = hexColor;
1869
- }
1870
- if (styles.backgroundColor) {
1871
- const hexBg = extractHexColor(styles.backgroundColor);
1872
- if (hexBg)
1873
- newBackgroundColor = hexBg;
1874
- }
1875
- // Check for font-weight: bold/700/600 from CSS classes
1876
- if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
1877
- newBold = true;
1878
- }
1879
- // Check for font-style: italic from CSS classes
1880
- if (styles.fontStyle === "italic") {
1881
- newItalic = true;
1882
- }
1883
- // Check for font-family from CSS classes
1884
- if (styles.fontFamily) {
1885
- newFontFamily = styles.fontFamily;
1886
- }
1887
- // GENERALIZED: Check for font-size from CSS classes
1888
- if (styles.fontSize) {
1889
- const halfPoints = parseFontSizeToHalfPoints(styles.fontSize);
1890
- if (halfPoints)
1891
- newSize = halfPoints;
1892
- }
1893
- }
1894
- // Also check inline styles
1895
- const inlineStyle = node.getAttribute("style") || "";
1896
- if (inlineStyle) {
1897
- const colorMatch = inlineStyle.match(/(?:^|;)\s*color\s*:\s*([^;]+)/i);
1898
- if (colorMatch) {
1899
- const hexColor = extractHexColor(colorMatch[1]);
1900
- if (hexColor)
1901
- newColor = hexColor;
1902
- }
1903
- const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
1904
- if (bgMatch) {
1905
- const hexBg = extractHexColor(bgMatch[1]);
1906
- if (hexBg)
1907
- newBackgroundColor = hexBg;
1908
- }
1909
- // Check inline font-weight
1910
- const weightMatch = inlineStyle.match(/font-weight\s*:\s*([^;]+)/i);
1911
- if (weightMatch) {
1912
- const weight = weightMatch[1].trim();
1913
- if (weight === "700" || weight === "bold" || weight === "600") {
1914
- newBold = true;
1915
- }
1916
- }
1917
- // Check inline font-style
1918
- const styleMatch = inlineStyle.match(/font-style\s*:\s*([^;]+)/i);
1919
- if (styleMatch) {
1920
- const style = styleMatch[1].trim();
1921
- if (style === "italic") {
1922
- newItalic = true;
1923
- }
1924
- }
1925
- // Check inline font-family
1926
- const fontFamilyMatch = inlineStyle.match(/font-family\s*:\s*([^;]+)/i);
1927
- if (fontFamilyMatch) {
1928
- const primaryFont = extractPrimaryFont(fontFamilyMatch[1].trim());
1929
- if (primaryFont) {
1930
- newFontFamily = primaryFont;
1931
- }
1932
- }
1933
- // GENERALIZED: Check inline font-size
1934
- const fontSizeMatch = inlineStyle.match(/font-size\s*:\s*([^;]+)/i);
1935
- if (fontSizeMatch) {
1936
- const halfPoints = parseFontSizeToHalfPoints(fontSizeMatch[1].trim());
1937
- if (halfPoints)
1938
- newSize = halfPoints;
1939
- }
1940
- }
1941
- for (const child of node.childNodes) {
1942
- walkNode(child, newBold, newItalic, newColor, newBackgroundColor, newSuperscript, newSubscript, newUnderline, newFontFamily, newSize);
1943
- }
1944
- }
1945
- // GENERALIZED: Extract styles from the element itself (e.g., <td class="positive">)
1946
- // These styles should be inherited by all text children
1947
- let elementBold = false;
1948
- let elementItalic = false;
1949
- let elementColor = inheritedColor;
1950
- let elementBackgroundColor;
1951
- let elementFontFamily = inheritedFontFamily;
1952
- let elementSize;
1953
- if (cssContext) {
1954
- const styles = getElementStyles(element, cssContext);
1955
- if (styles.color) {
1956
- const hexColor = extractHexColor(styles.color);
1957
- if (hexColor)
1958
- elementColor = hexColor;
1959
- }
1960
- if (styles.backgroundColor) {
1961
- const hexBg = extractHexColor(styles.backgroundColor);
1962
- if (hexBg)
1963
- elementBackgroundColor = hexBg;
1964
- }
1965
- if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
1966
- elementBold = true;
1967
- }
1968
- if (styles.fontStyle === "italic") {
1969
- elementItalic = true;
1970
- }
1971
- if (styles.fontFamily) {
1972
- elementFontFamily = styles.fontFamily;
1973
- }
1974
- // GENERALIZED: Extract font-size from element's CSS classes
1975
- if (styles.fontSize) {
1976
- const halfPoints = parseFontSizeToHalfPoints(styles.fontSize);
1977
- if (halfPoints)
1978
- elementSize = halfPoints;
1979
- }
1980
- }
1981
- for (const child of element.childNodes) {
1982
- walkNode(child, elementBold, elementItalic, elementColor, elementBackgroundColor, false, false, undefined, elementFontFamily, elementSize);
1983
- }
1984
- // Merge adjacent runs with same formatting and normalize
1985
- return mergeAndNormalizeRuns(runs);
1986
- }
1987
- /**
1988
- * Merge adjacent runs with same formatting and normalize whitespace.
1989
- */
1990
- function mergeAndNormalizeRuns(runs) {
1991
- if (runs.length === 0)
1992
- return [];
1993
- const merged = [];
1994
- // Helper to compare underline properties
1995
- const underlineEqual = (a, b) => {
1996
- if (!a && !b)
1997
- return true;
1998
- if (!a || !b)
1999
- return false;
2000
- return a.type === b.type && a.color === b.color;
2001
- };
2002
- for (const run of runs) {
2003
- const last = merged[merged.length - 1];
2004
- // Only merge if all formatting properties match
2005
- if (last &&
2006
- last.bold === run.bold &&
2007
- last.italic === run.italic &&
2008
- last.color === run.color &&
2009
- last.backgroundColor === run.backgroundColor &&
2010
- last.superscript === run.superscript &&
2011
- last.subscript === run.subscript &&
2012
- last.fontFamily === run.fontFamily &&
2013
- last.size === run.size &&
2014
- underlineEqual(last.underline, run.underline)) {
2015
- last.text += run.text;
2016
- }
2017
- else {
2018
- merged.push({ ...run });
2019
- }
2020
- }
2021
- // Normalize: trim leading whitespace from first run, trailing from last
2022
- if (merged.length > 0) {
2023
- merged[0].text = merged[0].text.trimStart();
2024
- merged[merged.length - 1].text = merged[merged.length - 1].text.trimEnd();
2025
- }
2026
- // Filter out empty runs
2027
- return merged.filter((r) => r.text.length > 0);
2028
- }
2029
- /**
2030
- * Check if any inline run has formatting (bold, italic, color, backgroundColor, fontFamily, size, etc.).
2031
- */
2032
- function hasInlineFormatting(runs) {
2033
- return runs.some((r) => r.bold || r.italic || r.color || r.backgroundColor || r.fontFamily || r.size || r.superscript || r.subscript || r.underline);
2034
- }
2035
- /**
2036
- * Block-level HTML elements that indicate complex content inside a list item.
2037
- * If a <li> contains any of these, it should be processed recursively,
2038
- * not just as inline text.
2039
- */
2040
- const BLOCK_LEVEL_TAGS = new Set([
2041
- "div", "p", "table", "ul", "ol", "blockquote", "pre", "figure",
2042
- "svg", "h1", "h2", "h3", "h4", "h5", "h6", "section", "article",
2043
- "aside", "nav", "header", "footer", "main", "form", "fieldset",
2044
- "dl", "dd", "dt", "address", "hr", "canvas", "video", "audio",
2045
- "picture", "iframe", "object", "embed", "details", "summary",
2046
- ]);
2047
- /**
2048
- * Check if an element contains any block-level children.
2049
- * Used to detect complex list items that need recursive processing.
2050
- */
2051
- function hasBlockLevelChildren(element) {
2052
- for (const child of element.children) {
2053
- const tagName = child.tagName.toLowerCase();
2054
- if (BLOCK_LEVEL_TAGS.has(tagName)) {
2055
- return true;
2056
- }
2057
- }
2058
- return false;
2059
- }
2060
- /**
2061
- * Check if a list item contains nested lists (ul or ol).
2062
- */
2063
- function hasNestedLists(element) {
2064
- return element.querySelector("ul, ol") !== null;
2065
- }
2066
- /**
2067
- * Recursively extract list items from a ul/ol element, flattening nested lists
2068
- * while preserving level information for proper indentation in DOCX.
2069
- *
2070
- * @param listElement - The ul or ol element to extract items from
2071
- * @param cssContext - CSS context for style extraction
2072
- * @param level - Current nesting level (0 = top level)
2073
- * @param parentOrdered - Whether the parent list is ordered
2074
- * @returns Array of ListItem objects with level information
2075
- */
2076
- function extractNestedListItems(listElement, cssContext, level = 0, parentOrdered) {
2077
- const items = [];
2078
- const tagName = listElement.tagName.toLowerCase();
2079
- const isOrdered = parentOrdered !== undefined ? parentOrdered : tagName === "ol";
2080
- for (const child of listElement.children) {
2081
- if (child.tagName.toLowerCase() !== "li")
2082
- continue;
2083
- // Extract inline content from this li (excluding nested lists)
2084
- const inlineNodes = [];
2085
- const nestedLists = [];
2086
- for (const node of child.childNodes) {
2087
- if (node.nodeType === Node.ELEMENT_NODE) {
2088
- const childTagName = node.tagName.toLowerCase();
2089
- if (childTagName === "ul" || childTagName === "ol") {
2090
- nestedLists.push(node);
2091
- }
2092
- else {
2093
- inlineNodes.push(node);
2094
- }
2095
- }
2096
- else if (node.nodeType === Node.TEXT_NODE) {
2097
- inlineNodes.push(node);
2098
- }
2099
- }
2100
- // Create a temporary container to extract inline runs from inline nodes only
2101
- // We need to get text content excluding nested lists
2102
- if (inlineNodes.length > 0) {
2103
- // Build inline content by extracting runs from the li, but we need to
2104
- // handle it carefully to exclude nested list content
2105
- const runs = extractInlineRunsFromNodes(inlineNodes, cssContext);
2106
- if (runs.length > 0 && runs.some(r => r.text.trim())) {
2107
- const content = hasInlineFormatting(runs) ? runs : runs.map(r => r.text).join("");
2108
- items.push({
2109
- content,
2110
- level,
2111
- ordered: isOrdered,
2112
- });
2113
- }
2114
- }
2115
- // Process nested lists recursively
2116
- for (const nestedList of nestedLists) {
2117
- const nestedTagName = nestedList.tagName.toLowerCase();
2118
- const nestedOrdered = nestedTagName === "ol";
2119
- const nestedItems = extractNestedListItems(nestedList, cssContext, level + 1, nestedOrdered);
2120
- items.push(...nestedItems);
2121
- }
2122
- }
2123
- return items;
2124
- }
2125
- /**
2126
- * Extract inline runs from a collection of nodes (excluding nested lists).
2127
- * This is similar to extractInlineRuns but works on a subset of nodes.
2128
- */
2129
- function extractInlineRunsFromNodes(nodes, cssContext) {
2130
- const runs = [];
2131
- function walkNode(node, bold, italic, color, backgroundColor, superscript, subscript) {
2132
- if (node.nodeType === Node.TEXT_NODE) {
2133
- const text = node.textContent || "";
2134
- if (text) {
2135
- runs.push({
2136
- text,
2137
- bold: bold || undefined,
2138
- italic: italic || undefined,
2139
- color,
2140
- backgroundColor,
2141
- superscript: superscript || undefined,
2142
- subscript: subscript || undefined,
2143
- });
2144
- }
2145
- return;
2146
- }
2147
- if (node.nodeType !== Node.ELEMENT_NODE)
2148
208
  return;
2149
- const element = node;
2150
- const tagName = element.tagName.toLowerCase();
2151
- // Skip nested lists entirely - they're handled separately
2152
- if (tagName === "ul" || tagName === "ol")
2153
- return;
2154
- // Determine formatting from tag
2155
- let newBold = bold || tagName === "strong" || tagName === "b";
2156
- let newItalic = italic || tagName === "em" || tagName === "i";
2157
- let newSuperscript = superscript || tagName === "sup";
2158
- let newSubscript = subscript || tagName === "sub";
2159
- let newColor = color;
2160
- let newBackgroundColor = backgroundColor;
2161
- // Get styles from CSS context
2162
- const styles = getElementStyles(element, cssContext);
2163
- if (styles.fontWeight === "700" || styles.fontWeight === "bold" || styles.fontWeight === "600") {
2164
- newBold = true;
2165
- }
2166
- if (styles.fontStyle === "italic") {
2167
- newItalic = true;
2168
- }
2169
- if (styles.color) {
2170
- const extracted = extractHexColor(styles.color);
2171
- if (extracted)
2172
- newColor = extracted;
2173
- }
2174
- if (styles.backgroundColor) {
2175
- const extracted = extractHexColor(styles.backgroundColor);
2176
- if (extracted)
2177
- newBackgroundColor = extracted;
2178
- }
2179
- // Recurse into children
2180
- for (const child of node.childNodes) {
2181
- walkNode(child, newBold, newItalic, newColor, newBackgroundColor, newSuperscript, newSubscript);
2182
- }
2183
- }
2184
- for (const node of nodes) {
2185
- walkNode(node, false, false);
2186
- }
2187
- return mergeAndNormalizeRuns(runs);
2188
- }
2189
- /**
2190
- * Check if an element is a blockquote or callout based on visual styling.
2191
- * Detects elements with:
2192
- * - Semantic <blockquote> tag
2193
- * - OR a div/section/header with both background color AND left border (visual callout pattern)
2194
- * - OR a div/section/header with background/gradient AND padding (styled content box like CTAs)
2195
- * - OR a paragraph with left border (intro sections with accent border)
2196
- */
2197
- function isBlockquoteOrCallout(element, cssContext) {
2198
- const tagName = element.tagName.toLowerCase();
2199
- // Semantic blockquote element
2200
- if (tagName === "blockquote") {
2201
- return true;
2202
- }
2203
- // Get styles for all container-type elements (div, section, article, header, p)
2204
- const styles = getElementStyles(element, cssContext);
2205
- const inlineStyle = element.getAttribute("style") || "";
2206
- // Helper to check for border-left
2207
- const hasBorderLeft = inlineStyle.includes("border-left") ||
2208
- styles.border?.includes("solid") ||
2209
- !!styles.borderColor ||
2210
- !!styles.borderLeft;
2211
- // For paragraphs with border-left, treat as blockquote (intro sections, etc.)
2212
- // This handles patterns like: <p class="intro-section"> with border-left: 4px solid ...
2213
- if (tagName === "p" && hasBorderLeft) {
2214
- return true;
2215
- }
2216
- // For divs, sections, articles, and headers - detect based on visual styling (NOT class names)
2217
- if (tagName === "div" || tagName === "section" || tagName === "article" || tagName === "header") {
2218
- // GENERALIZED: Don't treat as blockquotes if they contain figure > img elements
2219
- // Figures with external images need special handling via export.ts which fetches them
2220
- // and they can't be processed inside parseBlockquoteContent
2221
- const figuresWithImages = element.querySelectorAll("figure img, figure picture");
2222
- if (figuresWithImages.length > 0) {
2223
- return false;
2224
- }
2225
- // Also check CSS border property
2226
- const cssBorder = styles.border || "";
2227
- const hasCssBorder = cssBorder.includes("solid") || cssBorder.includes("px");
2228
- // Check for background color
2229
- const hasBackground = !!styles.backgroundColor &&
2230
- styles.backgroundColor !== "transparent" &&
2231
- styles.backgroundColor !== "inherit";
2232
- // If it has both background AND border, it's likely a callout/blockquote styled box
2233
- if (hasBackground && (hasBorderLeft || hasCssBorder)) {
2234
- return true;
2235
- }
2236
- // Check for styled content boxes (like CTAs, key-takeaways)
2237
- // These have: background + padding + may have border-radius
2238
- // But NOT border (which distinguishes them from callouts)
2239
- if (hasBackground) {
2240
- // Check for padding (indicates a styled content box, not just a wrapper)
2241
- const hasPadding = !!styles.padding ||
2242
- inlineStyle.includes("padding");
2243
- // Check if element has meaningful content (text nodes or block elements)
2244
- // This is more general - any non-empty div with styled background and padding
2245
- const hasContent = element.textContent?.trim().length &&
2246
- element.textContent.trim().length > 0;
2247
- // A styled box with background + padding + content = treat as blockquote
2248
- if (hasPadding && hasContent) {
2249
- return true;
2250
- }
2251
- }
2252
- }
2253
- return false;
2254
- }
2255
- /**
2256
- * Extract color from CSS color value (hex or rgb).
2257
- */
2258
- /**
2259
- * Check if an element has a border-bottom style that should become a horizontal rule.
2260
- * Returns the border color if found, undefined otherwise.
2261
- */
2262
- function extractBorderBottomColor(element, cssContext) {
2263
- // Check inline style for border-bottom
2264
- const inlineStyle = element.getAttribute("style") || "";
2265
- const borderMatch = inlineStyle.match(/border-bottom:\s*([^;]+)/i);
2266
- if (borderMatch) {
2267
- const borderValue = borderMatch[1];
2268
- // Parse "1px solid #e5e7eb" or similar
2269
- const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2270
- if (colorMatch) {
2271
- let hex = colorMatch[1];
2272
- if (hex.length === 3) {
2273
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2274
- }
2275
- return hex.toUpperCase();
2276
- }
2277
- // Check for CSS variable in border
2278
- const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2279
- if (varMatch) {
2280
- const varValue = cssContext.variables.get(varMatch[1]);
2281
- if (varValue) {
2282
- const hex = extractHexColor(varValue);
2283
- if (hex)
2284
- return hex;
2285
- }
2286
- }
2287
- }
2288
- // Check CSS styles for border-bottom (generalized approach)
2289
- const styles = getElementStyles(element, cssContext);
2290
- // Check border-bottom first (most specific for horizontal rules)
2291
- if (styles.borderBottom) {
2292
- const borderValue = styles.borderBottom;
2293
- const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2294
- if (colorMatch) {
2295
- let hex = colorMatch[1];
2296
- if (hex.length === 3) {
2297
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2298
- }
2299
- return hex.toUpperCase();
2300
- }
2301
- // Check for CSS variable reference in border-bottom
2302
- const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2303
- if (varMatch) {
2304
- const varValue = cssContext.variables.get(varMatch[1]);
2305
- if (varValue) {
2306
- const hex = extractHexColor(varValue);
2307
- if (hex)
2308
- return hex;
2309
- }
2310
- }
2311
- }
2312
- // Check if this element has a border-bottom defined in CSS
2313
- // The border property might be a shorthand like "1px solid #e5e7eb"
2314
- if (styles.border) {
2315
- const borderValue = styles.border;
2316
- const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2317
- if (colorMatch) {
2318
- let hex = colorMatch[1];
2319
- if (hex.length === 3) {
2320
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2321
- }
2322
- return hex.toUpperCase();
2323
- }
2324
- // Check for CSS variable reference
2325
- const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2326
- if (varMatch) {
2327
- const varValue = cssContext.variables.get(varMatch[1]);
2328
- if (varValue) {
2329
- const hex = extractHexColor(varValue);
2330
- if (hex)
2331
- return hex;
2332
- }
2333
- }
2334
- }
2335
- // Also check borderColor directly
2336
- if (styles.borderColor) {
2337
- const hex = extractHexColor(styles.borderColor);
2338
- if (hex)
2339
- return hex;
2340
- }
2341
- return undefined;
2342
- }
2343
- /**
2344
- * Extract color from a border-top CSS property on an element.
2345
- * Checks inline styles and CSS classes, and resolves CSS variables.
2346
- */
2347
- function extractBorderTopColor(element, cssContext) {
2348
- // First, check CSS class styles (e.g., .menu-footer { border-top: 2px solid #b8860b; })
2349
- const styles = getElementStyles(element, cssContext);
2350
- if (styles.borderTop) {
2351
- const borderValue = styles.borderTop;
2352
- // Parse "2px solid #b8860b" or similar
2353
- const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2354
- if (colorMatch) {
2355
- let hex = colorMatch[1];
2356
- if (hex.length === 3) {
2357
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2358
- }
2359
- return hex.toUpperCase();
2360
- }
2361
- // Check for CSS variable in border that was already resolved
2362
- // (resolveValue in parseCssContext should have resolved it)
2363
- const resolved = extractHexColor(borderValue);
2364
- if (resolved)
2365
- return resolved;
2366
- }
2367
- // Then check inline style for border-top (higher specificity)
2368
- const inlineStyle = element.getAttribute("style") || "";
2369
- const borderMatch = inlineStyle.match(/border-top:\s*([^;]+)/i);
2370
- if (borderMatch) {
2371
- const borderValue = borderMatch[1];
2372
- // Parse "1px solid #e5e7eb" or similar
2373
- const colorMatch = borderValue.match(/#([0-9a-fA-F]{3,6})/);
2374
- if (colorMatch) {
2375
- let hex = colorMatch[1];
2376
- if (hex.length === 3) {
2377
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2378
- }
2379
- return hex.toUpperCase();
2380
- }
2381
- // Check for CSS variable in border
2382
- const varMatch = borderValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2383
- if (varMatch) {
2384
- const varValue = cssContext.variables.get(varMatch[1]);
2385
- if (varValue) {
2386
- const hex = extractHexColor(varValue);
2387
- if (hex)
2388
- return hex;
2389
- }
2390
- }
2391
- }
2392
- return undefined;
2393
- }
2394
- /**
2395
- * Parse a CSS length value to twips.
2396
- * Supports: px, rem, em, pt
2397
- * Returns undefined if parsing fails.
2398
- *
2399
- * Conversions:
2400
- * - 1 inch = 1440 twips
2401
- * - 1 point = 20 twips
2402
- * - 1 rem = 16px (assumed base)
2403
- * - 1px ≈ 0.75pt ≈ 15 twips
2404
- */
2405
- function parseCssLengthToTwips(value) {
2406
- const match = value.trim().match(/^(-?[0-9.]+)(px|rem|em|pt)?$/);
2407
- if (!match)
2408
- return undefined;
2409
- const num = parseFloat(match[1]);
2410
- const unit = match[2] || 'px';
2411
- switch (unit) {
2412
- case 'px':
2413
- return Math.round(num * 15); // 1px ≈ 15 twips
2414
- case 'rem':
2415
- case 'em':
2416
- return Math.round(num * 16 * 15); // 1rem = 16px = 240 twips
2417
- case 'pt':
2418
- return Math.round(num * 20); // 1pt = 20 twips
2419
- default:
2420
- return undefined;
2421
- }
2422
- }
2423
- /**
2424
- * Parse CSS padding shorthand into individual values in twips.
2425
- * Supports: "value", "vertical horizontal", "top horizontal bottom", "top right bottom left"
2426
- * Returns object with top, right, bottom, left in twips.
2427
- */
2428
- function parseCssPaddingToTwips(padding) {
2429
- const parts = padding.trim().split(/\s+/);
2430
- if (parts.length === 0)
2431
- return undefined;
2432
- const values = parts.map(p => parseCssLengthToTwips(p)).filter(v => v !== undefined);
2433
- if (values.length === 0)
2434
- return undefined;
2435
- // CSS shorthand rules:
2436
- // 1 value: all sides
2437
- // 2 values: vertical horizontal
2438
- // 3 values: top horizontal bottom
2439
- // 4 values: top right bottom left
2440
- switch (values.length) {
2441
- case 1:
2442
- return { top: values[0], right: values[0], bottom: values[0], left: values[0] };
2443
- case 2:
2444
- return { top: values[0], right: values[1], bottom: values[0], left: values[1] };
2445
- case 3:
2446
- return { top: values[0], right: values[1], bottom: values[2], left: values[1] };
2447
- case 4:
2448
- return { top: values[0], right: values[1], bottom: values[2], left: values[3] };
2449
- default:
2450
- return undefined;
2451
- }
2452
- }
2453
- /**
2454
- * GENERALIZED: Convert CSS font-size value to DOCX half-points.
2455
- * CSS font-size values: "0.875rem", "14px", "12pt", "1.5rem"
2456
- * DOCX size is in half-points (1pt = 2 half-points)
2457
- * Default browser font-size is 16px = 12pt = 24 half-points
2458
- */
2459
- function parseFontSizeToHalfPoints(fontSize) {
2460
- if (!fontSize)
2461
- return undefined;
2462
- const fs = fontSize.trim();
2463
- if (fs.endsWith("rem")) {
2464
- // rem relative to 16px base, convert to points then half-points
2465
- // 1rem = 16px = 12pt = 24 half-points
2466
- const remValue = parseFloat(fs);
2467
- if (!isNaN(remValue)) {
2468
- return Math.round(remValue * 12 * 2);
2469
- }
2470
- }
2471
- else if (fs.endsWith("em")) {
2472
- // em relative to parent, assume 16px base
2473
- const emValue = parseFloat(fs);
2474
- if (!isNaN(emValue)) {
2475
- return Math.round(emValue * 12 * 2);
2476
- }
2477
- }
2478
- else if (fs.endsWith("px")) {
2479
- // px to points: 1px = 0.75pt, then to half-points
2480
- const pxValue = parseFloat(fs);
2481
- if (!isNaN(pxValue)) {
2482
- return Math.round(pxValue * 0.75 * 2);
2483
- }
2484
- }
2485
- else if (fs.endsWith("pt")) {
2486
- const ptValue = parseFloat(fs);
2487
- if (!isNaN(ptValue)) {
2488
- return Math.round(ptValue * 2);
2489
- }
2490
- }
2491
- return undefined;
2492
- }
2493
- /**
2494
- * GENERALIZED: Convert CSS margin/spacing value to DOCX twips.
2495
- * CSS margin values: "0.5rem", "8px", "6pt", "1rem"
2496
- * DOCX spacing is in twips (1 inch = 1440 twips, 1pt = 20 twips)
2497
- * Default browser font-size is 16px = 12pt
2498
- * 1rem = 16px = 12pt = 240 twips
2499
- */
2500
- function parseMarginToTwips(margin) {
2501
- if (!margin)
2502
- return undefined;
2503
- const m = margin.trim();
2504
- if (m.endsWith("rem")) {
2505
- // rem relative to 16px base
2506
- // 1rem = 16px = 12pt = 240 twips
2507
- const remValue = parseFloat(m);
2508
- if (!isNaN(remValue)) {
2509
- return Math.round(remValue * 240);
2510
- }
2511
- }
2512
- else if (m.endsWith("em")) {
2513
- // em relative to parent, assume 16px base
2514
- const emValue = parseFloat(m);
2515
- if (!isNaN(emValue)) {
2516
- return Math.round(emValue * 240);
2517
- }
2518
- }
2519
- else if (m.endsWith("px")) {
2520
- // px to twips: 1px = 15 twips (1 inch = 96px = 1440 twips)
2521
- const pxValue = parseFloat(m);
2522
- if (!isNaN(pxValue)) {
2523
- return Math.round(pxValue * 15);
2524
- }
2525
- }
2526
- else if (m.endsWith("pt")) {
2527
- // pt to twips: 1pt = 20 twips
2528
- const ptValue = parseFloat(m);
2529
- if (!isNaN(ptValue)) {
2530
- return Math.round(ptValue * 20);
2531
- }
2532
- }
2533
- return undefined;
2534
- }
2535
- /**
2536
- * GENERALIZED: Convert CSS line-height value to DOCX line spacing.
2537
- * CSS line-height: "1.3", "1.7", "24px", "1.5em", "normal"
2538
- * DOCX line spacing with LineRuleType.AUTO uses "240ths of a line"
2539
- * where 240 = single spacing (1.0), 360 = 1.5, 480 = double (2.0)
2540
- * Formula: lineHeight * 240 = DOCX line value
2541
- *
2542
- * @param lineHeight CSS line-height value
2543
- * @param fontSize Optional font size in half-points for px conversion (e.g., 24 = 12pt)
2544
- */
2545
- function parseLineHeightToDocx(lineHeight, fontSize) {
2546
- if (!lineHeight)
2547
- return undefined;
2548
- const lh = lineHeight.trim();
2549
- // "normal" typically equals 1.2 in browsers
2550
- if (lh === "normal") {
2551
- return Math.round(1.2 * 240);
2552
- }
2553
- // Unitless number (e.g., "1.3", "1.7") - most common
2554
- const unitlessValue = parseFloat(lh);
2555
- if (!isNaN(unitlessValue) && !lh.match(/[a-z%]/i)) {
2556
- return Math.round(unitlessValue * 240);
2557
- }
2558
- // em units (e.g., "1.5em") - treat as multiplier
2559
- if (lh.endsWith("em")) {
2560
- const emValue = parseFloat(lh);
2561
- if (!isNaN(emValue)) {
2562
- return Math.round(emValue * 240);
2563
- }
2564
- }
2565
- // px units (e.g., "27.2px" from getComputedStyle)
2566
- // Convert to ratio using font size if available, otherwise estimate
2567
- if (lh.endsWith("px")) {
2568
- const pxValue = parseFloat(lh);
2569
- if (!isNaN(pxValue)) {
2570
- // If we have font size (in half-points), convert:
2571
- // fontSize in half-points / 2 = points
2572
- // points * 4/3 = px (approx: 1pt ≈ 1.333px)
2573
- // lineHeight ratio = pxValue / (fontSize/2 * 1.333)
2574
- if (fontSize && fontSize > 0) {
2575
- const fontSizePx = (fontSize / 2) * (4 / 3);
2576
- const ratio = pxValue / fontSizePx;
2577
- return Math.round(ratio * 240);
2578
- }
2579
- // Without font size, estimate using 16px base (common default)
2580
- // This gives us a rough ratio
2581
- const estimatedRatio = pxValue / 16;
2582
- // Only use if the ratio is reasonable (0.8 to 3.0)
2583
- if (estimatedRatio >= 0.8 && estimatedRatio <= 3.0) {
2584
- return Math.round(estimatedRatio * 240);
2585
- }
2586
- }
2587
- }
2588
- // pt units (e.g., "18pt")
2589
- if (lh.endsWith("pt")) {
2590
- const ptValue = parseFloat(lh);
2591
- if (!isNaN(ptValue) && fontSize && fontSize > 0) {
2592
- // fontSize in half-points / 2 = points
2593
- const fontSizePt = fontSize / 2;
2594
- const ratio = ptValue / fontSizePt;
2595
- return Math.round(ratio * 240);
2596
- }
2597
- }
2598
- // Return undefined to use default line-height
2599
- return undefined;
2600
- }
2601
- function extractHexColor(value) {
2602
- // Handle named CSS colors (common ones used in documents)
2603
- const namedColors = {
2604
- white: "FFFFFF",
2605
- black: "000000",
2606
- red: "FF0000",
2607
- green: "008000",
2608
- blue: "0000FF",
2609
- yellow: "FFFF00",
2610
- orange: "FFA500",
2611
- purple: "800080",
2612
- gray: "808080",
2613
- grey: "808080",
2614
- transparent: "", // Return empty for transparent
2615
- };
2616
- const lowerValue = value.toLowerCase().trim();
2617
- if (namedColors[lowerValue] !== undefined) {
2618
- return namedColors[lowerValue] || undefined;
2619
- }
2620
- // Handle hex colors
2621
- const hexMatch = value.match(/#([0-9a-fA-F]{3,6})/);
2622
- if (hexMatch) {
2623
- let hex = hexMatch[1];
2624
- // Expand 3-digit hex to 6-digit
2625
- if (hex.length === 3) {
2626
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2627
- }
2628
- return hex.toUpperCase();
2629
- }
2630
- // Handle linear-gradient - extract and blend the first color
2631
- const gradientMatch = value.match(/linear-gradient\s*\([^)]+\)/i);
2632
- if (gradientMatch) {
2633
- // Find all rgba colors in the gradient
2634
- const rgbaMatches = value.matchAll(/rgba\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*([0-9.]+)\s*\)/g);
2635
- for (const match of rgbaMatches) {
2636
- const alpha = parseFloat(match[4]);
2637
- // Blend with white based on alpha
2638
- const r = Math.round(parseInt(match[1], 10) * alpha + 255 * (1 - alpha));
2639
- const g = Math.round(parseInt(match[2], 10) * alpha + 255 * (1 - alpha));
2640
- const b = Math.round(parseInt(match[3], 10) * alpha + 255 * (1 - alpha));
2641
- return (r.toString(16).padStart(2, "0") +
2642
- g.toString(16).padStart(2, "0") +
2643
- b.toString(16).padStart(2, "0")).toUpperCase();
2644
- }
2645
- // Fallback: try to find a hex color in the gradient
2646
- const gradientHexMatch = value.match(/#([0-9a-fA-F]{3,6})/);
2647
- if (gradientHexMatch) {
2648
- let hex = gradientHexMatch[1];
2649
- if (hex.length === 3) {
2650
- hex = hex[0] + hex[0] + hex[1] + hex[1] + hex[2] + hex[2];
2651
- }
2652
- return hex.toUpperCase();
2653
- }
2654
- // For gradients without extractable colors, return undefined (no background)
2655
- return undefined;
2656
- }
2657
- // Handle rgb/rgba - check for alpha value
2658
- const rgbaMatch = value.match(/rgba\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*([0-9.]+)\s*\)/);
2659
- if (rgbaMatch) {
2660
- const alpha = parseFloat(rgbaMatch[4]);
2661
- // If alpha is very low (< 0.3), the color is mostly transparent
2662
- // Blend with white to get the effective color
2663
- if (alpha < 0.3) {
2664
- const r = Math.round(parseInt(rgbaMatch[1], 10) * alpha + 255 * (1 - alpha));
2665
- const g = Math.round(parseInt(rgbaMatch[2], 10) * alpha + 255 * (1 - alpha));
2666
- const b = Math.round(parseInt(rgbaMatch[3], 10) * alpha + 255 * (1 - alpha));
2667
- return (r.toString(16).padStart(2, "0") +
2668
- g.toString(16).padStart(2, "0") +
2669
- b.toString(16).padStart(2, "0")).toUpperCase();
2670
209
  }
2671
- // For higher alpha, just use the RGB values
2672
- const r = parseInt(rgbaMatch[1], 10).toString(16).padStart(2, "0");
2673
- const g = parseInt(rgbaMatch[2], 10).toString(16).padStart(2, "0");
2674
- const b = parseInt(rgbaMatch[3], 10).toString(16).padStart(2, "0");
2675
- return (r + g + b).toUpperCase();
2676
- }
2677
- // Handle rgb (no alpha)
2678
- const rgbMatch = value.match(/rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)/);
2679
- if (rgbMatch) {
2680
- const r = parseInt(rgbMatch[1], 10).toString(16).padStart(2, "0");
2681
- const g = parseInt(rgbMatch[2], 10).toString(16).padStart(2, "0");
2682
- const b = parseInt(rgbMatch[3], 10).toString(16).padStart(2, "0");
2683
- return (r + g + b).toUpperCase();
2684
- }
2685
- return undefined;
2686
- }
2687
- /**
2688
- * Extract text color from an element.
2689
- * Checks inline style first, then CSS class rules, then element type rules, then computed style if available.
2690
- * Returns the actual color from the HTML - does NOT filter out any colors.
2691
- * Per the skill rules: ALL styling values MUST be extracted from HTML, never filtered.
2692
- */
2693
- function extractTextColor(element, cssContext) {
2694
- // Check inline style first (highest priority)
2695
- const inlineStyle = element.getAttribute("style") || "";
2696
- const colorMatch = inlineStyle.match(/(?:^|;)\s*color:\s*([^;]+)/i);
2697
- if (colorMatch) {
2698
- let colorValue = colorMatch[1].trim();
2699
- // Resolve CSS variables in inline styles (e.g., var(--color-muted))
2700
- if (cssContext && colorValue.includes("var(")) {
2701
- const varMatch = colorValue.match(/var\s*\(\s*(--[a-zA-Z0-9-]+)\s*\)/);
2702
- if (varMatch) {
2703
- const resolvedValue = cssContext.variables.get(varMatch[1]);
2704
- if (resolvedValue) {
2705
- colorValue = resolvedValue;
2706
- }
210
+ // Fallback - try to get text
211
+ const text = getTextContent(el).trim();
212
+ if (text && el.children.length === 0) {
213
+ if (elementColor) {
214
+ innerElements.push({ type: "paragraph", text, color: elementColor });
2707
215
  }
2708
- }
2709
- const color = extractHexColor(colorValue);
2710
- if (color) {
2711
- return color;
2712
- }
2713
- }
2714
- // Check CSS class rules (resolves CSS variables)
2715
- if (cssContext) {
2716
- const classColor = getColorFromClasses(element, cssContext);
2717
- if (classColor) {
2718
- return classColor;
2719
- }
2720
- // Check element type styles (e.g., body { color: ... }, p { color: ... })
2721
- const elementStyles = getElementStyles(element, cssContext);
2722
- if (elementStyles.color) {
2723
- const color = extractHexColor(elementStyles.color);
2724
- if (color) {
2725
- return color;
216
+ else {
217
+ innerElements.push({ type: "paragraph", text });
2726
218
  }
2727
219
  }
2728
- }
2729
- // Try computed style if available (browser environment)
2730
- if (typeof window !== "undefined" && window.getComputedStyle) {
2731
- try {
2732
- const computed = window.getComputedStyle(element);
2733
- const computedColor = computed.color;
2734
- if (computedColor) {
2735
- const color = extractHexColor(computedColor);
2736
- if (color) {
2737
- return color;
2738
- }
220
+ else {
221
+ for (const child of el.childNodes) {
222
+ processInnerNode(child, elementColor);
2739
223
  }
2740
224
  }
2741
- catch {
2742
- // getComputedStyle may fail in some environments
2743
- }
2744
225
  }
2745
- return undefined;
226
+ // Process all direct children
227
+ for (const child of element.childNodes) {
228
+ processInnerNode(child, inheritedColor);
229
+ }
230
+ return innerElements;
2746
231
  }
2747
232
  /**
2748
233
  * Parse content from a blockquote/callout element.
@@ -2781,11 +266,17 @@ function parseBlockquoteContent(element, cssContext) {
2781
266
  if (headingLevel !== null) {
2782
267
  const text = getTextContent(el).trim();
2783
268
  if (text) {
2784
- // Extract color from the heading element, using the blockquote as parent for nested style lookups
269
+ // Extract styles from the heading element, using the blockquote as parent for nested style lookups
2785
270
  // This handles CSS rules like ".key-takeaways h3 { color: #7c3aed; }"
2786
271
  const headingStyles = getElementStyles(el, cssContext, element);
2787
272
  const color = headingStyles.color ? extractHexColor(headingStyles.color) : undefined;
2788
- innerElements.push({ type: "heading", level: headingLevel, text, color });
273
+ const fontSize = headingStyles.fontSize ? parseFontSizeToHalfPoints(headingStyles.fontSize) : undefined;
274
+ const lineSpacing = headingStyles.lineHeight ? parseLineHeightToDocx(headingStyles.lineHeight) : undefined;
275
+ const fontFamily = headingStyles.fontFamily?.replace(/['"]/g, "");
276
+ const alignment = getTextAlignment(el, cssContext) || blockquoteStyles.textAlign;
277
+ const spacingAfter = headingStyles.marginBottom ? parseCssLengthToTwips(headingStyles.marginBottom) : undefined;
278
+ const letterSpacing = headingStyles.letterSpacing ? parseCssLengthToTwips(headingStyles.letterSpacing) : undefined;
279
+ innerElements.push({ type: "heading", level: headingLevel, text, color, fontSize, lineSpacing, fontFamily, alignment, spacingAfter, letterSpacing });
2789
280
  }
2790
281
  return;
2791
282
  }
@@ -2794,10 +285,25 @@ function parseBlockquoteContent(element, cssContext) {
2794
285
  const runs = extractInlineRuns(el, cssContext);
2795
286
  if (runs.length > 0) {
2796
287
  const text = runs.map((r) => r.text).join("");
2797
- // Extract color from the paragraph element, using the blockquote as parent for nested style lookups
288
+ // Extract styles from the paragraph element, using the blockquote as parent for nested style lookups
2798
289
  // This handles CSS rules like ".cta p { color: white; }" or ".cta { color: white; }"
2799
290
  const paragraphStyles = getElementStyles(el, cssContext, element);
2800
291
  const color = paragraphStyles.color ? extractHexColor(paragraphStyles.color) : undefined;
292
+ const lineSpacing = paragraphStyles.lineHeight ? parseLineHeightToDocx(paragraphStyles.lineHeight) : undefined;
293
+ const fontFamily = paragraphStyles.fontFamily?.replace(/['"]/g, "");
294
+ const alignment = getTextAlignment(el, cssContext) || blockquoteStyles.textAlign;
295
+ const spacingAfter = paragraphStyles.marginBottom ? parseCssLengthToTwips(paragraphStyles.marginBottom) : undefined;
296
+ // Apply paragraph-level font-size to runs that don't have their own size
297
+ if (paragraphStyles.fontSize) {
298
+ const halfPoints = parseFontSizeToHalfPoints(paragraphStyles.fontSize);
299
+ if (halfPoints) {
300
+ runs.forEach(run => {
301
+ if (!run.size) {
302
+ run.size = halfPoints;
303
+ }
304
+ });
305
+ }
306
+ }
2801
307
  // Apply blockquote's italic style to runs that don't have their own italic setting
2802
308
  if (blockquoteIsItalic) {
2803
309
  runs.forEach(run => {
@@ -2815,10 +321,10 @@ function parseBlockquoteContent(element, cssContext) {
2815
321
  }
2816
322
  });
2817
323
  }
2818
- innerElements.push({ type: "paragraph", text, runs, color, italic: blockquoteIsItalic });
324
+ innerElements.push({ type: "paragraph", text, runs, color, italic: blockquoteIsItalic, lineSpacing, fontFamily, alignment, spacingAfter });
2819
325
  }
2820
326
  else {
2821
- innerElements.push({ type: "paragraph", text, color });
327
+ innerElements.push({ type: "paragraph", text, color, lineSpacing, fontFamily, alignment, spacingAfter });
2822
328
  }
2823
329
  }
2824
330
  return;
@@ -2858,10 +364,56 @@ function parseBlockquoteContent(element, cssContext) {
2858
364
  // Pass the blockquote element as parent for nested style lookups
2859
365
  if (tagName === "div") {
2860
366
  const styles = getElementStyles(el, cssContext, element);
367
+ // Handle nested callouts: if this div is itself a styled callout (e.g., .famous-for inside .country-section),
368
+ // emit it as a nested blockquote to preserve its background color and border styling
369
+ if (isBlockquoteOrCallout(el, cssContext)) {
370
+ let nestedBgColor;
371
+ if (styles.backgroundColor) {
372
+ nestedBgColor = extractHexColor(styles.backgroundColor);
373
+ }
374
+ const nestedBorderHex = extractBorderColorFromStyle(styles);
375
+ // Only treat as nested blockquote if it has a visually distinct background or border
376
+ if (nestedBgColor || nestedBorderHex) {
377
+ const nestedContent = parseBlockquoteContent(el, cssContext);
378
+ if (nestedContent.length > 0) {
379
+ let nestedBorderStyle;
380
+ if (styles.borderLeft && !styles.border) {
381
+ nestedBorderStyle = "left";
382
+ }
383
+ else if (styles.border && !styles.border.includes("none")) {
384
+ nestedBorderStyle = "full";
385
+ }
386
+ else if (nestedBgColor && !styles.borderLeft && !styles.border) {
387
+ nestedBorderStyle = "none";
388
+ }
389
+ innerElements.push({
390
+ type: "blockquote",
391
+ content: nestedContent,
392
+ borderColor: nestedBorderHex,
393
+ backgroundColor: nestedBgColor,
394
+ variant: "callout",
395
+ borderStyle: nestedBorderStyle,
396
+ });
397
+ return;
398
+ }
399
+ }
400
+ }
2861
401
  const isBoldOrLabel = styles.fontWeight === "700" ||
2862
402
  styles.fontWeight === "bold" ||
2863
403
  styles.fontWeight === "600";
2864
404
  if (isBoldOrLabel) {
405
+ // Check if this bold div has block-level children — if so, recurse
406
+ // instead of flattening to a single text paragraph (which loses content)
407
+ const blockTags = new Set(["p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "table", "blockquote", "section", "article", "header", "figure"]);
408
+ const hasBlockChildren = Array.from(el.children).some((child) => blockTags.has(child.tagName?.toLowerCase()));
409
+ if (hasBlockChildren) {
410
+ // Recurse into children — bold will be inherited by inline run extraction
411
+ for (const child of el.childNodes) {
412
+ processInnerNode(child);
413
+ }
414
+ return;
415
+ }
416
+ // Simple inline-only bold div — flatten to single label paragraph
2865
417
  const text = getTextContent(el).trim();
2866
418
  if (text) {
2867
419
  // Extract the label's color from its own styles (includes nested CSS rules)
@@ -2882,6 +434,34 @@ function parseBlockquoteContent(element, cssContext) {
2882
434
  return;
2883
435
  }
2884
436
  }
437
+ // Handle <img> elements inside blockquotes/callouts
438
+ if (tagName === "img") {
439
+ const src = el.getAttribute("src");
440
+ if (src) {
441
+ const alt = el.getAttribute("alt") || undefined;
442
+ let width;
443
+ let height;
444
+ const widthAttr = el.getAttribute("width");
445
+ const heightAttr = el.getAttribute("height");
446
+ if (widthAttr && !widthAttr.includes("%")) {
447
+ width = parseInt(widthAttr, 10) || undefined;
448
+ }
449
+ if (heightAttr && !heightAttr.includes("%")) {
450
+ height = parseInt(heightAttr, 10) || undefined;
451
+ }
452
+ // Check if img is inside a figure with figcaption
453
+ let caption;
454
+ const parentFigure = el.closest("figure");
455
+ if (parentFigure) {
456
+ const figcaption = parentFigure.querySelector("figcaption");
457
+ if (figcaption) {
458
+ caption = getTextContent(figcaption).trim() || undefined;
459
+ }
460
+ }
461
+ innerElements.push({ type: "image", src, alt, width, height, caption });
462
+ }
463
+ return;
464
+ }
2885
465
  // Container tags - recurse into children
2886
466
  // Include definition list elements (dl, dt, dd) for proper menu parsing
2887
467
  if (["div", "span", "section", "dl", "dd", "figure", "figcaption"].includes(tagName)) {
@@ -2892,36 +472,71 @@ function parseBlockquoteContent(element, cssContext) {
2892
472
  const directChildren = Array.from(el.children);
2893
473
  const hasMultipleFlexChildren = isHorizFlex && directChildren.length > 1;
2894
474
  if (hasOnlyInlineContent && hasMultipleFlexChildren) {
2895
- // GENERALIZED: Convert horizontal flexbox to a single-row borderless table
2896
- // Each flex item becomes a table cell, preserving the horizontal layout
475
+ // Check flex-wrap to determine rendering strategy
2897
476
  const containerStyles = getElementStyles(el, cssContext);
2898
477
  const containerFontFamily = containerStyles.fontFamily;
2899
- const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
2900
- const tableCells = [];
2901
- for (const flexChild of flexChildren) {
2902
- // Extract runs from this flex child
2903
- const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
2904
- if (childRuns.length > 0) {
2905
- // Apply blockquote's italic style if applicable
2906
- if (blockquoteIsItalic) {
2907
- childRuns.forEach(run => {
2908
- if (run.italic === undefined) {
2909
- run.italic = true;
2910
- }
2911
- });
478
+ const flexWrap = containerStyles.flexWrap?.toLowerCase();
479
+ const justifyContent = containerStyles.justifyContent?.toLowerCase();
480
+ if (flexWrap === "wrap") {
481
+ // For flex-wrap: wrap, merge all children into a single paragraph with separators
482
+ const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
483
+ const allRuns = [];
484
+ // Determine separator based on content
485
+ const childTexts = flexChildren.map(c => (c.textContent || "").trim());
486
+ const hasPipeSeparators = childTexts.some(t => t === "|");
487
+ const separator = hasPipeSeparators ? " " : " · ";
488
+ for (let ci = 0; ci < flexChildren.length; ci++) {
489
+ const flexChild = flexChildren[ci];
490
+ const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
491
+ if (childRuns.length > 0) {
492
+ if (blockquoteIsItalic) {
493
+ childRuns.forEach(run => {
494
+ if (run.italic === undefined) {
495
+ run.italic = true;
496
+ }
497
+ });
498
+ }
499
+ if (allRuns.length > 0 && childRuns.length > 0) {
500
+ allRuns.push({ text: separator });
501
+ }
502
+ allRuns.push(...childRuns);
2912
503
  }
2913
- // Use runs as cell content
2914
- tableCells.push(childRuns);
504
+ }
505
+ if (allRuns.length > 0) {
506
+ const text = allRuns.map(r => r.text).join("");
507
+ innerElements.push({
508
+ type: "paragraph",
509
+ text,
510
+ runs: hasInlineFormatting(allRuns) ? allRuns : undefined,
511
+ alignment: justifyContent === "center" ? "center" : undefined,
512
+ });
2915
513
  }
2916
514
  }
2917
- if (tableCells.length > 0) {
2918
- // Create a single-row borderless table to represent horizontal flexbox
2919
- innerElements.push({
2920
- type: "table",
2921
- rows: [tableCells],
2922
- hasHeader: false,
2923
- noBorders: true,
2924
- });
515
+ else {
516
+ // Non-wrapping flex: Convert to single-row borderless table
517
+ const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
518
+ const tableCells = [];
519
+ for (const flexChild of flexChildren) {
520
+ const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
521
+ if (childRuns.length > 0) {
522
+ if (blockquoteIsItalic) {
523
+ childRuns.forEach(run => {
524
+ if (run.italic === undefined) {
525
+ run.italic = true;
526
+ }
527
+ });
528
+ }
529
+ tableCells.push(childRuns);
530
+ }
531
+ }
532
+ if (tableCells.length > 0) {
533
+ innerElements.push({
534
+ type: "table",
535
+ rows: [tableCells],
536
+ hasHeader: false,
537
+ noBorders: true,
538
+ });
539
+ }
2925
540
  }
2926
541
  return;
2927
542
  }
@@ -3052,18 +667,81 @@ export function parseHtmlContent(html) {
3052
667
  // GENERALIZED: Check for border-bottom on heading (e.g., h2 with underline style)
3053
668
  // Any element can have borders - extract from CSS/inline styles
3054
669
  const headingBorderColor = extractBorderBottomColor(element, cssContext);
670
+ // GENERALIZED: Check for border-left on heading (e.g., h2 with left accent bar)
671
+ // Extract from CSS class styles or inline styles
672
+ let headingBorderLeft;
673
+ let headingBorderLeftWidth;
674
+ if (hStyles.borderLeft) {
675
+ const borderLeftColor = extractBorderColorFromStyle({ borderLeft: hStyles.borderLeft });
676
+ if (borderLeftColor) {
677
+ headingBorderLeft = borderLeftColor;
678
+ }
679
+ // Extract border-left width in pixels and convert to OOXML eighth-of-a-point
680
+ // CSS "4px solid #7c3aed" → 4px → 3pt → 24 eighth-of-a-point
681
+ const widthMatch = hStyles.borderLeft.match(/(\d+(?:\.\d+)?)\s*px/);
682
+ if (widthMatch) {
683
+ const cssPixels = parseFloat(widthMatch[1]);
684
+ // 1px CSS ≈ 0.75pt, OOXML border size is in eighth-of-a-point
685
+ headingBorderLeftWidth = Math.round(cssPixels * 0.75 * 8);
686
+ }
687
+ }
688
+ else if (hStyles.borderColor && hStyles.border && !hStyles.border.includes("none")) {
689
+ headingBorderLeft = hStyles.borderColor;
690
+ }
691
+ // GENERALIZED: Extract background-color for heading (e.g., h2 with highlight background)
692
+ let headingBackgroundColor;
693
+ if (hStyles.backgroundColor) {
694
+ const hex = extractHexColor(hStyles.backgroundColor);
695
+ if (hex) {
696
+ headingBackgroundColor = hex;
697
+ }
698
+ }
3055
699
  // When heading has border-bottom, reduce heading after spacing
3056
700
  // and add HR with top border to simulate CSS padding-bottom
3057
701
  // CSS padding-bottom: 0.5rem ≈ 100 twips (space from text to border)
3058
- const spacingAfter = headingBorderColor ? 100 : undefined;
702
+ // Also extract margin-bottom for general heading spacing
703
+ let spacingAfter;
704
+ if (headingBorderColor) {
705
+ spacingAfter = 100;
706
+ }
707
+ else if (hStyles.marginBottom) {
708
+ // Apply CSS margin-bottom for heading spacing:
709
+ // - For values > 240 twips: use the CSS value
710
+ // - For values <= 120 twips: use with 60 twip min (tight layouts)
711
+ // - For moderate values: keep DOCX default
712
+ const cssSpacing = parseMarginToTwips(hStyles.marginBottom);
713
+ if (cssSpacing !== undefined) {
714
+ if (cssSpacing > 240) {
715
+ spacingAfter = cssSpacing;
716
+ }
717
+ else if (cssSpacing <= 120) {
718
+ spacingAfter = Math.max(cssSpacing, 60);
719
+ }
720
+ }
721
+ }
3059
722
  // GENERALIZED: Extract line-height for vertical spacing
3060
723
  // CSS line-height: 1.2 for titles, 1.3 for other headings, etc.
3061
724
  const lineSpacing = hStyles.lineHeight ? parseLineHeightToDocx(hStyles.lineHeight) : undefined;
725
+ // Extract font-size from CSS for heading (overrides document style defaults)
726
+ const headingFontSize = hStyles.fontSize ? parseFontSizeToHalfPoints(hStyles.fontSize) : undefined;
727
+ // Extract margin-top for heading spacing before
728
+ let spacingBefore;
729
+ if (hStyles.marginTop) {
730
+ const cssBefore = parseMarginToTwips(hStyles.marginTop);
731
+ if (cssBefore !== undefined && cssBefore > 0) {
732
+ spacingBefore = cssBefore;
733
+ }
734
+ }
735
+ // Extract letter-spacing for heading
736
+ const headingLetterSpacing = hStyles.letterSpacing ? parseLetterSpacingToTwips(hStyles.letterSpacing) : undefined;
3062
737
  // Check for gradient text (CSS background-clip: text)
3063
738
  // If gradient exists, create runs array with gradient info
739
+ // Always use the first gradient stop color as fallback — the gradient IS the intended
740
+ // text color, so it should take priority over the element's regular CSS color (e.g. h1 { color: #1a1a1a })
3064
741
  let runs;
3065
742
  if (hStyles.gradient) {
3066
- runs = [{ text, gradient: hStyles.gradient, color: elementColor }];
743
+ const gradientFallbackColor = hStyles.gradient.stops[0]?.color || elementColor;
744
+ runs = [{ text, gradient: hStyles.gradient, color: gradientFallbackColor }];
3067
745
  }
3068
746
  elements.push({
3069
747
  type: "heading",
@@ -3071,11 +749,17 @@ export function parseHtmlContent(html) {
3071
749
  text,
3072
750
  alignment,
3073
751
  color: elementColor,
752
+ backgroundColor: headingBackgroundColor,
3074
753
  textTransform,
3075
754
  spacingAfter,
755
+ spacingBefore,
3076
756
  fontFamily: headingFontFamily,
3077
757
  runs,
3078
758
  lineSpacing,
759
+ borderLeft: headingBorderLeft,
760
+ borderLeftWidth: headingBorderLeftWidth,
761
+ fontSize: headingFontSize,
762
+ letterSpacing: headingLetterSpacing,
3079
763
  });
3080
764
  // Add horizontal-rule for border-bottom effect
3081
765
  // Use top border with minimal before spacing - border appears right at top
@@ -3088,13 +772,37 @@ export function parseHtmlContent(html) {
3088
772
  }
3089
773
  // Handle <hr> elements
3090
774
  if (tagName === "hr") {
3091
- // Try to extract color from inline style or CSS
775
+ // Try to extract color from inline style first, then CSS classes
3092
776
  const hrStyle = element.getAttribute("style") || "";
3093
777
  const colorMatch = hrStyle.match(/(?:border-color|background-color|color):\s*([^;]+)/i);
3094
778
  let hrColor;
3095
779
  if (colorMatch) {
3096
780
  hrColor = extractHexColor(colorMatch[1]);
3097
781
  }
782
+ // Fall back to CSS class styles if no inline color found
783
+ if (!hrColor) {
784
+ const hrStyles = getElementStyles(element, cssContext);
785
+ if (hrStyles.borderColor) {
786
+ hrColor = extractHexColor(hrStyles.borderColor);
787
+ }
788
+ else if (hrStyles.backgroundColor) {
789
+ hrColor = extractHexColor(hrStyles.backgroundColor);
790
+ }
791
+ else if (hrStyles.color) {
792
+ hrColor = extractHexColor(hrStyles.color);
793
+ }
794
+ else if (hrStyles.border) {
795
+ hrColor = extractBorderColorFromStyle(hrStyles);
796
+ }
797
+ else if (hrStyles.borderBottom) {
798
+ // Extract color from border-bottom shorthand (e.g., "2px solid #ff0000")
799
+ hrColor = extractBorderColorFromStyle({ border: hrStyles.borderBottom });
800
+ }
801
+ else if (hrStyles.borderTop) {
802
+ // Extract color from border-top shorthand
803
+ hrColor = extractBorderColorFromStyle({ border: hrStyles.borderTop });
804
+ }
805
+ }
3098
806
  elements.push({ type: "horizontal-rule", color: hrColor });
3099
807
  return;
3100
808
  }
@@ -3113,23 +821,38 @@ export function parseHtmlContent(html) {
3113
821
  if (hex)
3114
822
  backgroundColor = hex;
3115
823
  }
3116
- // Extract border color from borderLeft property
3117
- if (elementStyles.borderLeft) {
3118
- const hex = extractBorderColorFromStyle({ borderLeft: elementStyles.borderLeft });
3119
- if (hex)
3120
- borderColor = hex;
824
+ // Extract border color - use unified extraction that checks all border properties
825
+ // in priority order: borderColor > border > borderLeft > borderRight > borderTop > borderBottom
826
+ const borderHex = extractBorderColorFromStyle(elementStyles);
827
+ if (borderHex)
828
+ borderColor = borderHex;
829
+ // Determine borderStyle using the same logic as the div/section path
830
+ // If has background but no border → "none" (full background, no accent bar)
831
+ // If has border shorthand → "full"
832
+ // Otherwise → "left" (default accent bar)
833
+ let borderStyle = "left";
834
+ const hasBorderStylingP = !!borderColor ||
835
+ !!elementStyles.border ||
836
+ !!elementStyles.borderLeft ||
837
+ !!elementStyles.borderColor;
838
+ if (!hasBorderStylingP && backgroundColor) {
839
+ borderStyle = "none";
3121
840
  }
3122
- else if (elementStyles.borderColor) {
3123
- const hex = extractHexColor(elementStyles.borderColor);
3124
- if (hex)
3125
- borderColor = hex;
841
+ else if (elementStyles.border && !elementStyles.border.includes("none")) {
842
+ if (!elementStyles.borderLeft || elementStyles.border !== elementStyles.borderLeft) {
843
+ borderStyle = "full";
844
+ }
845
+ }
846
+ else if (backgroundColor && !elementStyles.borderLeft && !elementStyles.border) {
847
+ // Has background color but only borderColor (from class), no explicit border-left → "none"
848
+ borderStyle = "none";
3126
849
  }
3127
850
  elements.push({
3128
851
  type: "blockquote",
3129
852
  content,
3130
853
  borderColor,
3131
854
  backgroundColor,
3132
- borderStyle: "left",
855
+ borderStyle,
3133
856
  });
3134
857
  }
3135
858
  return;
@@ -3158,6 +881,16 @@ export function parseHtmlContent(html) {
3158
881
  }
3159
882
  }
3160
883
  }
884
+ // Handle padding-left as left indent for paragraphs
885
+ // CSS pattern: .clause { padding-left: 2rem; text-indent: -2rem; } creates hanging indent
886
+ let leftIndent;
887
+ const paddingLeftValue = pStyles.paddingLeft || extractPaddingLeft(pStyles.padding);
888
+ if (paddingLeftValue) {
889
+ const leftTwips = parseCssLengthToTwips(paddingLeftValue);
890
+ if (leftTwips !== undefined && leftTwips > 0) {
891
+ leftIndent = leftTwips;
892
+ }
893
+ }
3161
894
  // Handle font-style: italic
3162
895
  if (pStyles.fontStyle === "italic") {
3163
896
  italic = true;
@@ -3177,6 +910,33 @@ export function parseHtmlContent(html) {
3177
910
  }
3178
911
  // Extract font-family from paragraph styles (will be inherited from body if not set on p)
3179
912
  const paragraphFontFamily = pStyles.fontFamily;
913
+ // GENERALIZED: Extract line-height and margin-bottom for paragraph spacing
914
+ const lineSpacing = pStyles.lineHeight ? parseLineHeightToDocx(pStyles.lineHeight) : undefined;
915
+ // Apply CSS margin-bottom when explicitly set:
916
+ // - For values > 240 twips (default): use the CSS value (larger spacing)
917
+ // - For values <= 120 twips (tight): use the CSS value with 60 twip min (respect tight layouts)
918
+ // - For values 120-240 (moderate): keep DOCX default to avoid margin-collapsing issues
919
+ const cssParaSpacing = pStyles.marginBottom ? parseMarginToTwips(pStyles.marginBottom) : undefined;
920
+ let spacingAfter;
921
+ if (cssParaSpacing !== undefined) {
922
+ if (cssParaSpacing > 240) {
923
+ spacingAfter = cssParaSpacing;
924
+ }
925
+ else if (cssParaSpacing <= 120) {
926
+ spacingAfter = Math.max(cssParaSpacing, 60);
927
+ }
928
+ }
929
+ // Extract margin-top for spacingBefore (spacing before paragraph)
930
+ const cssParaSpacingBefore = pStyles.marginTop ? parseMarginToTwips(pStyles.marginTop) : undefined;
931
+ let spacingBefore;
932
+ if (cssParaSpacingBefore !== undefined) {
933
+ if (cssParaSpacingBefore > 240) {
934
+ spacingBefore = cssParaSpacingBefore;
935
+ }
936
+ else if (cssParaSpacingBefore <= 120) {
937
+ spacingBefore = Math.max(cssParaSpacingBefore, 60);
938
+ }
939
+ }
3180
940
  // Apply fontFamily to runs if present and runs don't have their own fontFamily
3181
941
  if (paragraphFontFamily) {
3182
942
  runs.forEach(run => {
@@ -3185,6 +945,18 @@ export function parseHtmlContent(html) {
3185
945
  }
3186
946
  });
3187
947
  }
948
+ // Apply paragraph-level font-size to runs that don't have their own size
949
+ // This handles CSS like ".subtitle { font-size: 1.2rem; }" on <p class="subtitle">
950
+ if (pStyles.fontSize) {
951
+ const halfPoints = parseFontSizeToHalfPoints(pStyles.fontSize);
952
+ if (halfPoints) {
953
+ runs.forEach(run => {
954
+ if (!run.size) {
955
+ run.size = halfPoints;
956
+ }
957
+ });
958
+ }
959
+ }
3188
960
  if (hasInlineFormatting(runs)) {
3189
961
  // Has mixed formatting - use runs array
3190
962
  // Apply effective color to runs that don't have their own color
@@ -3195,17 +967,17 @@ export function parseHtmlContent(html) {
3195
967
  }
3196
968
  });
3197
969
  }
3198
- elements.push({ type: "paragraph", text, runs, alignment, firstLineIndent, hangingIndent, textTransform });
970
+ elements.push({ type: "paragraph", text, runs, alignment, firstLineIndent, hangingIndent, leftIndent, textTransform, lineSpacing, spacingAfter, spacingBefore });
3199
971
  }
3200
972
  else if (effectiveColor || italic || bold || paragraphFontFamily) {
3201
973
  // Has color, italic, bold, or fontFamily - use runs to preserve formatting
3202
974
  // Create runs with the appropriate formatting
3203
975
  const formattedRuns = [{ text, bold, italic, color: effectiveColor, fontFamily: paragraphFontFamily }];
3204
- elements.push({ type: "paragraph", text, runs: formattedRuns, alignment, firstLineIndent, hangingIndent, textTransform });
976
+ elements.push({ type: "paragraph", text, runs: formattedRuns, alignment, firstLineIndent, hangingIndent, leftIndent, textTransform, lineSpacing, spacingAfter, spacingBefore });
3205
977
  }
3206
978
  else {
3207
979
  // No formatting - simple paragraph (but may have indent/transform)
3208
- elements.push({ type: "paragraph", text, alignment, firstLineIndent, hangingIndent, textTransform });
980
+ elements.push({ type: "paragraph", text, alignment, firstLineIndent, hangingIndent, leftIndent, textTransform, lineSpacing, spacingAfter, spacingBefore });
3209
981
  }
3210
982
  }
3211
983
  return;
@@ -3331,6 +1103,33 @@ export function parseHtmlContent(html) {
3331
1103
  if (paddingStr) {
3332
1104
  cellPadding = parseCssPaddingToTwips(paddingStr);
3333
1105
  }
1106
+ // Also check individual padding properties (padding-top, padding-right, etc.)
1107
+ // These override the shorthand if set
1108
+ const paddingSource = tdStyle || thStyle;
1109
+ if (paddingSource) {
1110
+ if (!cellPadding)
1111
+ cellPadding = {};
1112
+ if (paddingSource.paddingTop) {
1113
+ const twips = parseMarginToTwips(paddingSource.paddingTop);
1114
+ if (twips !== undefined)
1115
+ cellPadding.top = twips;
1116
+ }
1117
+ if (paddingSource.paddingRight) {
1118
+ const twips = parseMarginToTwips(paddingSource.paddingRight);
1119
+ if (twips !== undefined)
1120
+ cellPadding.right = twips;
1121
+ }
1122
+ if (paddingSource.paddingBottom) {
1123
+ const twips = parseMarginToTwips(paddingSource.paddingBottom);
1124
+ if (twips !== undefined)
1125
+ cellPadding.bottom = twips;
1126
+ }
1127
+ if (paddingSource.paddingLeft) {
1128
+ const twips = parseMarginToTwips(paddingSource.paddingLeft);
1129
+ if (twips !== undefined)
1130
+ cellPadding.left = twips;
1131
+ }
1132
+ }
3334
1133
  // GENERALIZED: Look for nested header styles from ancestor containers
3335
1134
  // Walk up the DOM tree to find containers with nested th styles
3336
1135
  let ancestor = element.parentElement;
@@ -3376,8 +1175,86 @@ export function parseHtmlContent(html) {
3376
1175
  if (hex)
3377
1176
  headerTextColor = hex;
3378
1177
  }
1178
+ // Check element-to-element nested styles (e.g., "thead th { background: ... }")
1179
+ // These are stored with "__elem:" prefix keys in nestedStyles
1180
+ if (!headerBackgroundColor || !headerTextColor) {
1181
+ const theadThMap = cssContext.nestedStyles.get("__elem:thead");
1182
+ if (theadThMap) {
1183
+ const thStyle2 = theadThMap.get("th");
1184
+ if (thStyle2) {
1185
+ if (!headerBackgroundColor && thStyle2.backgroundColor) {
1186
+ const hex = extractHexColor(thStyle2.backgroundColor);
1187
+ if (hex)
1188
+ headerBackgroundColor = hex;
1189
+ }
1190
+ if (!headerTextColor && thStyle2.color) {
1191
+ const hex = extractHexColor(thStyle2.color);
1192
+ if (hex)
1193
+ headerTextColor = hex;
1194
+ }
1195
+ }
1196
+ }
1197
+ // Also check "table th { ... }" element-to-element selector
1198
+ const tableThMap = cssContext.nestedStyles.get("__elem:table");
1199
+ if (tableThMap) {
1200
+ const thStyle2 = tableThMap.get("th");
1201
+ if (thStyle2) {
1202
+ if (!headerBackgroundColor && thStyle2.backgroundColor) {
1203
+ const hex = extractHexColor(thStyle2.backgroundColor);
1204
+ if (hex)
1205
+ headerBackgroundColor = hex;
1206
+ }
1207
+ if (!headerTextColor && thStyle2.color) {
1208
+ const hex = extractHexColor(thStyle2.color);
1209
+ if (hex)
1210
+ headerTextColor = hex;
1211
+ }
1212
+ }
1213
+ }
1214
+ }
1215
+ }
1216
+ // Also check for even-row patterns parsed from tr:nth-child(even) CSS rules
1217
+ if (!evenRowBackgroundColor && cssContext) {
1218
+ const evenRowStyle = cssContext.elementStyles.get("__even-row");
1219
+ if (evenRowStyle?.backgroundColor) {
1220
+ const hex = extractHexColor(evenRowStyle.backgroundColor);
1221
+ if (hex)
1222
+ evenRowBackgroundColor = hex;
1223
+ }
1224
+ }
1225
+ // GENERALIZED: Detect whether table has a header row
1226
+ // Check for <thead> element or first row containing <th> elements
1227
+ const thead = element.querySelector("thead");
1228
+ const firstRow = element.querySelector("tr");
1229
+ const hasExplicitHeader = !!thead || (firstRow && firstRow.querySelector("th") !== null);
1230
+ // Detect horizontal-only border style
1231
+ // Tables with border-bottom on cells but no border/border-left/border-right
1232
+ // should render with horizontal separators only (no vertical grid lines)
1233
+ let horizontalBordersOnly = false;
1234
+ if (cssContext) {
1235
+ const tdBorderStyle = cssContext.elementStyles.get("td");
1236
+ const thBorderStyle = cssContext.elementStyles.get("th");
1237
+ const borderSource = tdBorderStyle || thBorderStyle;
1238
+ if (borderSource) {
1239
+ const hasBorderBottom = !!borderSource.borderBottom;
1240
+ const hasFullBorder = !!borderSource.border;
1241
+ const hasBorderLeft = !!borderSource.borderLeft;
1242
+ const hasBorderRight = !!borderSource.borderRight;
1243
+ // If cells have border-bottom but no full border or side borders, it's horizontal-only
1244
+ if (hasBorderBottom && !hasFullBorder && !hasBorderLeft && !hasBorderRight) {
1245
+ horizontalBordersOnly = true;
1246
+ }
1247
+ }
1248
+ // Also check table element's own styles - if table has no border but cells have border-bottom
1249
+ const tableStyles = getElementStyles(element, cssContext);
1250
+ if (tableStyles.border && tableStyles.border.includes("none")) {
1251
+ // Table explicitly has no border - if cells have bottom borders, it's horizontal-only
1252
+ if (borderSource?.borderBottom) {
1253
+ horizontalBordersOnly = true;
1254
+ }
1255
+ }
3379
1256
  }
3380
- elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor });
1257
+ elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor, hasHeader: hasExplicitHeader ? true : undefined, horizontalBordersOnly: horizontalBordersOnly || undefined });
3381
1258
  }
3382
1259
  return;
3383
1260
  }
@@ -3390,6 +1267,33 @@ export function parseHtmlContent(html) {
3390
1267
  }
3391
1268
  // Check for chart containers (divs with SVG children)
3392
1269
  if (tagName === "div") {
1270
+ // Detect timeline sections — convert to structured paragraphs with visual markers
1271
+ // Pattern: div.timeline or div containing .timeline-item children
1272
+ const timelineItems = detectTimeline(element, cssContext);
1273
+ if (timelineItems && timelineItems.length > 0) {
1274
+ for (let i = 0; i < timelineItems.length; i++) {
1275
+ const item = timelineItems[i];
1276
+ // Add separator between items (not before first)
1277
+ if (i > 0) {
1278
+ elements.push({ type: "horizontal-rule", color: "D0D0D0", spacingBefore: 60, spacingAfter: 60 });
1279
+ }
1280
+ // Date line with bullet marker: "● January 2026"
1281
+ const dateRuns = [
1282
+ { text: item.isCurrent ? "◉ " : "● ", bold: true, color: item.accentColor || "4A90D9" },
1283
+ { text: item.date, bold: true, color: item.accentColor || "4A90D9" },
1284
+ ];
1285
+ elements.push({ type: "paragraph", text: `${item.isCurrent ? "◉" : "●"} ${item.date}`, runs: dateRuns });
1286
+ // Title
1287
+ if (item.title) {
1288
+ elements.push({ type: "paragraph", text: item.title, bold: true });
1289
+ }
1290
+ // Description with indent (use spaces for visual indentation)
1291
+ if (item.description) {
1292
+ elements.push({ type: "paragraph", text: " " + item.description });
1293
+ }
1294
+ }
1295
+ return;
1296
+ }
3393
1297
  // Check for two-column grid layout (like resume templates with sidebar)
3394
1298
  const sidebarWidthPercent = isTwoColumnGridLayout(element, cssContext);
3395
1299
  if (sidebarWidthPercent !== undefined) {
@@ -3408,14 +1312,135 @@ export function parseHtmlContent(html) {
3408
1312
  const sidebarContent = parseContainerContent(sidebarEl, cssContext, sidebarTextColor);
3409
1313
  const mainContent = parseContainerContent(mainEl, cssContext);
3410
1314
  if (sidebarContent.length > 0 || mainContent.length > 0) {
3411
- // TODO: Two-column layout is detected but causes DOCX rendering issues.
3412
- // For now, just process the children normally instead of creating a two-column element.
3413
- // This ensures content is extracted even if layout is not preserved.
3414
- // elements.push({ type: "two-column-layout", ... });
1315
+ // Emit two-column layout for documents with sidebar patterns.
1316
+ // This produces a DOCX table with sidebar + main content columns,
1317
+ // matching the HTML's grid/flex sidebar layout.
1318
+ elements.push({
1319
+ type: "two-column-layout",
1320
+ sidebar: {
1321
+ content: sidebarContent,
1322
+ backgroundColor: sidebarBgColor,
1323
+ textColor: sidebarTextColor,
1324
+ widthPercent: sidebarWidthPercent,
1325
+ },
1326
+ main: {
1327
+ content: mainContent,
1328
+ },
1329
+ });
1330
+ return;
1331
+ }
1332
+ }
1333
+ }
1334
+ // Check for CSS Grid equal-width columns (e.g., pros/cons comparison_table)
1335
+ // Pattern: display: grid; grid-template-columns: 1fr 1fr (or repeat(2, 1fr))
1336
+ const gridColumns = detectGridEqualColumns(element, cssContext);
1337
+ if (gridColumns && gridColumns.length >= 2) {
1338
+ // GENERALIZED: Before converting grid to table, check if children match stats-grid pattern.
1339
+ // Stats-grid: each child has 2-3 short divs (value + label + optional change).
1340
+ // This prevents metric cards from being converted to tables.
1341
+ const gridChildDivs = Array.from(element.children).filter((child) => child.tagName.toLowerCase() === "div");
1342
+ if (gridChildDivs.length >= 2) {
1343
+ const statsCards = [];
1344
+ let allMatchStatsPattern = true;
1345
+ for (const card of gridChildDivs) {
1346
+ const cardChildren = Array.from(card.children).filter((c) => c.tagName.toLowerCase() !== "script" && c.tagName.toLowerCase() !== "style");
1347
+ // Stats cards typically have 2-3 child elements (value, label, optional change)
1348
+ if (cardChildren.length < 2 || cardChildren.length > 4) {
1349
+ allMatchStatsPattern = false;
1350
+ break;
1351
+ }
1352
+ // Check if children look like value/label (short text, no nested block elements)
1353
+ const texts = cardChildren.map(c => getTextContent(c).trim());
1354
+ const allShort = texts.every(t => t.length > 0 && t.length <= 50 && !t.includes('\n'));
1355
+ if (!allShort) {
1356
+ allMatchStatsPattern = false;
1357
+ break;
1358
+ }
1359
+ // First child = value, second = label, third = change
1360
+ const value = texts[0];
1361
+ const label = texts[1];
1362
+ const change = texts.length >= 3 ? texts[2] : undefined;
1363
+ const hasSentenceDot = /\.\s/.test(value) || (value.endsWith('.') && value.length > 10);
1364
+ const isFieldLabel = value.endsWith(':');
1365
+ if (hasSentenceDot || isFieldLabel) {
1366
+ allMatchStatsPattern = false;
1367
+ break;
1368
+ }
1369
+ // Extract colors
1370
+ const valueColor = extractTextColor(cardChildren[0], cssContext);
1371
+ const labelColor = extractTextColor(cardChildren[1], cssContext);
1372
+ const changeColor = cardChildren.length >= 3 ? extractTextColor(cardChildren[2], cssContext) : undefined;
1373
+ const cardStyles = getElementStyles(card, cssContext);
1374
+ const backgroundColor = cardStyles.backgroundColor ? extractHexColor(cardStyles.backgroundColor) : undefined;
1375
+ const borderColor = extractBorderColorFromStyle(cardStyles);
1376
+ statsCards.push({ value, label, change, valueColor, labelColor, changeColor, backgroundColor, borderColor });
1377
+ }
1378
+ if (allMatchStatsPattern && statsCards.length >= 2) {
1379
+ elements.push({ type: "stats-grid", cards: statsCards });
1380
+ return;
1381
+ }
1382
+ }
1383
+ // Parse each column's content
1384
+ const columnContents = [];
1385
+ for (const col of gridColumns) {
1386
+ const colContent = parseContainerContent(col, cssContext);
1387
+ columnContents.push(colContent);
1388
+ }
1389
+ // Check if at least one column has content
1390
+ const hasContent = columnContents.some(c => c.length > 0);
1391
+ if (hasContent) {
1392
+ // Build rows from aligned column content
1393
+ // Each ParsedElement in a column becomes a row cell
1394
+ const maxRows = Math.max(...columnContents.map(c => c.length));
1395
+ const rows = [];
1396
+ for (let i = 0; i < maxRows; i++) {
1397
+ const rowCells = [];
1398
+ for (const colContent of columnContents) {
1399
+ const elem = colContent[i];
1400
+ if (!elem) {
1401
+ rowCells.push("");
1402
+ }
1403
+ else if (elem.type === "paragraph") {
1404
+ // Use runs if available, otherwise plain text
1405
+ if (elem.runs && elem.runs.length > 0) {
1406
+ rowCells.push(elem.runs);
1407
+ }
1408
+ else if (elem.bold) {
1409
+ rowCells.push([{ text: elem.text, bold: true, color: elem.color }]);
1410
+ }
1411
+ else {
1412
+ rowCells.push(elem.color ? [{ text: elem.text, color: elem.color }] : elem.text);
1413
+ }
1414
+ }
1415
+ else if (elem.type === "heading") {
1416
+ rowCells.push([{ text: elem.text, bold: true, size: elem.level <= 3 ? 28 : 24 }]);
1417
+ }
1418
+ else if (elem.type === "list") {
1419
+ // Flatten list items to text
1420
+ const listText = elem.items.map((item, idx) => {
1421
+ const prefix = elem.ordered ? `${idx + 1}. ` : "• ";
1422
+ const itemText = typeof item === "string" ? item :
1423
+ Array.isArray(item) ? item.map(r => r.text).join("") :
1424
+ typeof item.content === "string" ? item.content :
1425
+ item.content.map(r => r.text).join("");
1426
+ return prefix + itemText;
1427
+ }).join("\n");
1428
+ rowCells.push(listText);
1429
+ }
1430
+ else {
1431
+ // Fallback: extract text
1432
+ const text = "text" in elem ? elem.text : "";
1433
+ rowCells.push(text);
1434
+ }
1435
+ }
1436
+ rows.push(rowCells);
1437
+ }
1438
+ if (rows.length > 0) {
1439
+ elements.push({ type: "table", rows, hasHeader: false, noBorders: true });
1440
+ return;
3415
1441
  }
3416
1442
  }
3417
1443
  }
3418
- // Fall through to normal processing for now since two-column DOCX has issues
3419
1444
  // GENERALIZED: Check for flex containers with equal-width columns (like signature blocks)
3420
1445
  // This handles layouts like: display: flex; with children having flex: 1
3421
1446
  // Render as a DOCX table with each child as a column
@@ -3613,8 +1638,10 @@ export function parseHtmlContent(html) {
3613
1638
  // Skip if they contain multiple sentences or newlines
3614
1639
  const MAX_VALUE_LENGTH = 50; // e.g., "$4,500.00", "127%", "2.5M"
3615
1640
  const MAX_LABEL_LENGTH = 100; // e.g., "Total Revenue", "Active Users"
1641
+ // Allow dots in numeric contexts (e.g., "4.2/5", "$3.50M") but reject sentence-like content
1642
+ const hasSentenceDot = /\.\s/.test(value) || (value.endsWith('.') && value.length > 10);
3616
1643
  const isValueLike = value.length > 0 && value.length <= MAX_VALUE_LENGTH &&
3617
- !value.includes('\n') && !value.includes('.');
1644
+ !value.includes('\n') && !hasSentenceDot;
3618
1645
  const isLabelLike = label.length > 0 && label.length <= MAX_LABEL_LENGTH &&
3619
1646
  !label.includes('\n');
3620
1647
  // GENERALIZED: Values that end with ':' are labels, not stats values
@@ -3948,6 +1975,13 @@ export function parseHtmlContent(html) {
3948
1975
  }
3949
1976
  // If no img found, fall through to container handling
3950
1977
  }
1978
+ // Handle standalone <input type="checkbox"> elements (not inside inline context)
1979
+ if (tagName === "input" && element.getAttribute("type") === "checkbox") {
1980
+ const isChecked = element.hasAttribute("checked");
1981
+ const text = isChecked ? "☑ " : "☐ ";
1982
+ elements.push({ type: "paragraph", text, color: elementColor });
1983
+ return;
1984
+ }
3951
1985
  // Check for blockquote/callout before generic container handling
3952
1986
  // Uses style-based detection, NOT class names
3953
1987
  if (isBlockquoteOrCallout(element, cssContext)) {
@@ -3963,18 +1997,11 @@ export function parseHtmlContent(html) {
3963
1997
  if (hex)
3964
1998
  backgroundColor = hex;
3965
1999
  }
3966
- // Extract border color from element styles
3967
- if (elementStyles.borderColor) {
3968
- const hex = extractHexColor(elementStyles.borderColor);
3969
- if (hex)
3970
- borderColor = hex;
3971
- }
3972
- else if (elementStyles.border) {
3973
- // Try to extract color from border shorthand (e.g., "4px solid #2563eb")
3974
- const hex = extractBorderColorFromStyle(elementStyles);
3975
- if (hex)
3976
- borderColor = hex;
3977
- }
2000
+ // Extract border color - use unified extraction that checks all border properties
2001
+ // in priority order: borderColor > border > borderLeft > borderRight > borderTop > borderBottom
2002
+ const borderHex = extractBorderColorFromStyle(elementStyles);
2003
+ if (borderHex)
2004
+ borderColor = borderHex;
3978
2005
  // Also check inline styles (overrides CSS)
3979
2006
  const inlineStyle = element.getAttribute("style") || "";
3980
2007
  const bgMatch = inlineStyle.match(/background(?:-color)?:\s*([^;]+)/i);
@@ -4029,9 +2056,19 @@ export function parseHtmlContent(html) {
4029
2056
  borderStyle = "full";
4030
2057
  }
4031
2058
  }
2059
+ else if (backgroundColor && !elementStyles.borderLeft && !elementStyles.border) {
2060
+ // Has background color but only borderColor (from class), no explicit border/border-left
2061
+ // This is a full-background callout, not a left-accent callout
2062
+ borderStyle = "none";
2063
+ }
4032
2064
  // Otherwise borderStyle remains undefined, which defaults to "left" in convert.ts
4033
2065
  // GENERALIZED: Extract background gradient for container gradient rendering
4034
2066
  const backgroundGradient = elementStyles.backgroundGradient;
2067
+ // Extract padding from CSS for blockquote cell margins
2068
+ let padding;
2069
+ if (elementStyles.padding) {
2070
+ padding = parseCssPaddingToTwips(elementStyles.padding);
2071
+ }
4035
2072
  elements.push({
4036
2073
  type: "blockquote",
4037
2074
  content,
@@ -4040,6 +2077,7 @@ export function parseHtmlContent(html) {
4040
2077
  backgroundGradient,
4041
2078
  variant,
4042
2079
  borderStyle,
2080
+ padding,
4043
2081
  });
4044
2082
  }
4045
2083
  return;
@@ -4075,8 +2113,13 @@ export function parseHtmlContent(html) {
4075
2113
  if (tagName === "div") {
4076
2114
  const skillItem = detectSkillItem(element);
4077
2115
  if (skillItem) {
4078
- // Output as single line: "Skill Name: 95%"
4079
- const text = `${skillItem.name}: ${skillItem.percentage}`;
2116
+ // Output as visual bar: "Skill Name ██████████ 95%"
2117
+ const pctNum = parseInt(skillItem.percentage, 10);
2118
+ const barLength = 10;
2119
+ const filled = Math.round((pctNum / 100) * barLength);
2120
+ const empty = barLength - filled;
2121
+ const bar = "█".repeat(filled) + "░".repeat(empty);
2122
+ const text = `${skillItem.name} ${bar} ${skillItem.percentage}`;
4080
2123
  elements.push({ type: "paragraph", text, color: elementColor });
4081
2124
  return;
4082
2125
  }
@@ -4090,6 +2133,21 @@ export function parseHtmlContent(html) {
4090
2133
  elements.push({ type: "paragraph", text, color: elementColor });
4091
2134
  return;
4092
2135
  }
2136
+ // Detect standalone progress bar containers (e.g., goal tracker progress)
2137
+ // Pattern: div containing a child with .progress-bar or .goal-progress class, or
2138
+ // a child div with width percentage in inline style and background color
2139
+ const progressBarResult = detectProgressBar(element, cssContext);
2140
+ if (progressBarResult) {
2141
+ const barLength = 10;
2142
+ const filled = Math.round((progressBarResult.percentage / 100) * barLength);
2143
+ const empty = barLength - filled;
2144
+ const bar = "█".repeat(filled) + "░".repeat(empty);
2145
+ const label = progressBarResult.label
2146
+ ? `${progressBarResult.label} ${bar} ${progressBarResult.percentage}%`
2147
+ : `${bar} ${progressBarResult.percentage}%`;
2148
+ elements.push({ type: "paragraph", text: label, color: elementColor });
2149
+ return;
2150
+ }
4093
2151
  }
4094
2152
  if (CONTAINER_TAGS.includes(tagName)) {
4095
2153
  // Before processing content, check if this element has border-top
@@ -4115,7 +2173,17 @@ export function parseHtmlContent(html) {
4115
2173
  textTransform = containerStyles.textTransform;
4116
2174
  }
4117
2175
  // GENERALIZED: Extract margin-bottom for paragraph spacing
4118
- const spacingAfter = containerStyles.marginBottom ? parseMarginToTwips(containerStyles.marginBottom) : undefined;
2176
+ // Apply CSS margin-bottom: > 240 for larger, <= 120 for tight, moderate keeps DOCX default
2177
+ const cssContainerSpacing = containerStyles.marginBottom ? parseMarginToTwips(containerStyles.marginBottom) : undefined;
2178
+ let spacingAfter;
2179
+ if (cssContainerSpacing !== undefined) {
2180
+ if (cssContainerSpacing > 240) {
2181
+ spacingAfter = cssContainerSpacing;
2182
+ }
2183
+ else if (cssContainerSpacing <= 120) {
2184
+ spacingAfter = Math.max(cssContainerSpacing, 60);
2185
+ }
2186
+ }
4119
2187
  // GENERALIZED: Extract line-height for vertical spacing
4120
2188
  const lineSpacing = containerStyles.lineHeight ? parseLineHeightToDocx(containerStyles.lineHeight) : undefined;
4121
2189
  // GENERALIZED: Check if this is a horizontal flex container with multiple children
@@ -4124,41 +2192,98 @@ export function parseHtmlContent(html) {
4124
2192
  const directChildren = Array.from(element.children);
4125
2193
  const hasMultipleFlexChildren = isHorizFlex && directChildren.length > 1;
4126
2194
  if (hasMultipleFlexChildren) {
4127
- // GENERALIZED: Convert horizontal flexbox to a single-row borderless table
4128
- // Each flex item becomes a table cell, preserving the horizontal layout
4129
- const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
4130
- const tableCells = [];
4131
- for (const flexChild of flexChildren) {
4132
- // Extract runs from this flex child
4133
- const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
4134
- if (childRuns.length > 0) {
4135
- // Apply container's styles to runs that don't have their own
4136
- childRuns.forEach(run => {
4137
- if (!run.color && (elementColor || containerColor)) {
4138
- run.color = elementColor || containerColor;
4139
- }
4140
- if (!run.italic && containerItalic) {
4141
- run.italic = true;
4142
- }
4143
- if (!run.bold && containerBold) {
4144
- run.bold = true;
4145
- }
4146
- if (!run.fontFamily && containerFontFamily) {
4147
- run.fontFamily = containerFontFamily;
2195
+ // Check flex-wrap and justify-content to determine rendering strategy
2196
+ const flexWrap = containerStyles.flexWrap?.toLowerCase();
2197
+ const justifyContent = containerStyles.justifyContent?.toLowerCase();
2198
+ // For flex-wrap: wrap containers (e.g., contact info, metadata lines),
2199
+ // merge all children into a single paragraph with separator characters.
2200
+ // This produces much better output than a multi-column table which
2201
+ // causes text to break across columns at awkward points.
2202
+ if (flexWrap === "wrap") {
2203
+ const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
2204
+ const allRuns = [];
2205
+ // Determine separator based on content - if children already contain "|" separators, use gap only
2206
+ const childTexts = flexChildren.map(c => (c.textContent || "").trim());
2207
+ const hasPipeSeparators = childTexts.some(t => t === "|");
2208
+ const separator = hasPipeSeparators ? " " : " · ";
2209
+ for (let ci = 0; ci < flexChildren.length; ci++) {
2210
+ const flexChild = flexChildren[ci];
2211
+ const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
2212
+ if (childRuns.length > 0) {
2213
+ // Apply container's styles to runs that don't have their own
2214
+ childRuns.forEach(run => {
2215
+ if (!run.color && (elementColor || containerColor)) {
2216
+ run.color = elementColor || containerColor;
2217
+ }
2218
+ if (!run.italic && containerItalic) {
2219
+ run.italic = true;
2220
+ }
2221
+ if (!run.bold && containerBold) {
2222
+ run.bold = true;
2223
+ }
2224
+ if (!run.fontFamily && containerFontFamily) {
2225
+ run.fontFamily = containerFontFamily;
2226
+ }
2227
+ });
2228
+ // Add separator between flex children (not before first or after last)
2229
+ if (allRuns.length > 0 && childRuns.length > 0) {
2230
+ allRuns.push({ text: separator, color: containerColor || elementColor });
4148
2231
  }
2232
+ allRuns.push(...childRuns);
2233
+ }
2234
+ }
2235
+ if (allRuns.length > 0) {
2236
+ const text = allRuns.map(r => r.text).join("");
2237
+ elements.push({
2238
+ type: "paragraph",
2239
+ text,
2240
+ runs: hasInlineFormatting(allRuns) ? allRuns : undefined,
2241
+ color: !hasInlineFormatting(allRuns) ? (elementColor || containerColor) : undefined,
2242
+ alignment: justifyContent === "center" ? "center" : alignment,
2243
+ spacingAfter,
2244
+ lineSpacing,
2245
+ textTransform,
4149
2246
  });
4150
- // Use runs as cell content
4151
- tableCells.push(childRuns);
4152
2247
  }
4153
2248
  }
4154
- if (tableCells.length > 0) {
4155
- // Create a single-row borderless table to represent horizontal flexbox
4156
- elements.push({
4157
- type: "table",
4158
- rows: [tableCells],
4159
- hasHeader: false,
4160
- noBorders: true,
4161
- });
2249
+ else {
2250
+ // For non-wrapping flex (e.g., justify-content: space-between):
2251
+ // Convert horizontal flexbox to a single-row borderless table
2252
+ // Each flex item becomes a table cell, preserving the horizontal layout
2253
+ const flexChildren = directChildren.filter(child => child.nodeType === Node.ELEMENT_NODE);
2254
+ const tableCells = [];
2255
+ for (const flexChild of flexChildren) {
2256
+ // Extract runs from this flex child
2257
+ const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamily);
2258
+ if (childRuns.length > 0) {
2259
+ // Apply container's styles to runs that don't have their own
2260
+ childRuns.forEach(run => {
2261
+ if (!run.color && (elementColor || containerColor)) {
2262
+ run.color = elementColor || containerColor;
2263
+ }
2264
+ if (!run.italic && containerItalic) {
2265
+ run.italic = true;
2266
+ }
2267
+ if (!run.bold && containerBold) {
2268
+ run.bold = true;
2269
+ }
2270
+ if (!run.fontFamily && containerFontFamily) {
2271
+ run.fontFamily = containerFontFamily;
2272
+ }
2273
+ });
2274
+ // Use runs as cell content
2275
+ tableCells.push(childRuns);
2276
+ }
2277
+ }
2278
+ if (tableCells.length > 0) {
2279
+ // Create a single-row borderless table to represent horizontal flexbox
2280
+ elements.push({
2281
+ type: "table",
2282
+ rows: [tableCells],
2283
+ hasHeader: false,
2284
+ noBorders: true,
2285
+ });
2286
+ }
4162
2287
  }
4163
2288
  }
4164
2289
  else {
@@ -4203,10 +2328,42 @@ export function parseHtmlContent(html) {
4203
2328
  }
4204
2329
  }
4205
2330
  else {
4206
- // Process children individually (block-level content)
4207
- // Pass elementColor only if this container explicitly sets a color (for special containers)
4208
- for (const child of element.childNodes) {
4209
- processNode(child, alignment, elementColor);
2331
+ // Has block-level children - check if this is a space-between flex container
2332
+ // Pattern: <div class="job-header" style="display:flex; justify-content:space-between">
2333
+ // <div>Title Company</div>
2334
+ // <span>Date | Location</span>
2335
+ // </div>
2336
+ // This should become a 2-column table with left-aligned and right-aligned content
2337
+ const containerStylesForFlex = getElementStyles(element, cssContext);
2338
+ const isSpaceBetweenFlex = containerStylesForFlex.display === "flex" &&
2339
+ containerStylesForFlex.justifyContent === "space-between" &&
2340
+ (!containerStylesForFlex.flexDirection || containerStylesForFlex.flexDirection === "row" || containerStylesForFlex.flexDirection === "row-reverse");
2341
+ if (isSpaceBetweenFlex) {
2342
+ // Extract runs from each direct child, merging block-level children's inline content
2343
+ const flexChildren = Array.from(element.children).filter(child => child.nodeType === Node.ELEMENT_NODE);
2344
+ const tableCells = [];
2345
+ const containerFontFamilyForFlex = containerStylesForFlex.fontFamily;
2346
+ for (const flexChild of flexChildren) {
2347
+ const childRuns = extractInlineRuns(flexChild, cssContext, undefined, containerFontFamilyForFlex);
2348
+ if (childRuns.length > 0) {
2349
+ tableCells.push(childRuns);
2350
+ }
2351
+ }
2352
+ if (tableCells.length > 0) {
2353
+ elements.push({
2354
+ type: "table",
2355
+ rows: [tableCells],
2356
+ hasHeader: false,
2357
+ noBorders: true,
2358
+ });
2359
+ }
2360
+ }
2361
+ else {
2362
+ // Process children individually (block-level content)
2363
+ // Pass elementColor only if this container explicitly sets a color (for special containers)
2364
+ for (const child of element.childNodes) {
2365
+ processNode(child, alignment, elementColor);
2366
+ }
4210
2367
  }
4211
2368
  }
4212
2369
  // After processing children, check if this element has border-bottom