docgen-utils 1.0.21 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,62 @@ import { parseHeadingLevel, getTextAlignment, getTextContent, BLOCK_LEVEL_TAGS,
6
6
  import { isInlineOnlyContainer, extractInlineRuns, hasInlineFormatting } from "./parse-inline";
7
7
  import { isGridOrFlexContainer, isHorizontalFlexContainer, isDecorativeSvg, isTwoColumnGridLayout, findTwoColumnChildren, detectFlexEqualColumns, detectGridEqualColumns } from "./parse-layout";
8
8
  import { detectSkillItem, detectLanguageItem, detectProgressBar, detectTimeline } from "./parse-special";
9
+ /**
10
+ * Get direct child rows of a table element (not nested table rows).
11
+ * Handles tbody, thead, tfoot containers.
12
+ */
13
+ function getDirectRows(tableEl) {
14
+ const result = [];
15
+ for (const child of tableEl.children) {
16
+ const childTag = child.tagName.toLowerCase();
17
+ if (childTag === "tr") {
18
+ result.push(child);
19
+ }
20
+ else if (childTag === "thead" || childTag === "tbody" || childTag === "tfoot") {
21
+ for (const grandchild of child.children) {
22
+ if (grandchild.tagName.toLowerCase() === "tr") {
23
+ result.push(grandchild);
24
+ }
25
+ }
26
+ }
27
+ }
28
+ return result;
29
+ }
30
+ /**
31
+ * Get direct child cells of a row element (not nested cells).
32
+ */
33
+ function getDirectCells(rowEl) {
34
+ const result = [];
35
+ for (const child of rowEl.children) {
36
+ const childTag = child.tagName.toLowerCase();
37
+ if (childTag === "td" || childTag === "th") {
38
+ result.push(child);
39
+ }
40
+ }
41
+ return result;
42
+ }
43
+ /**
44
+ * Check if a table cell contains nested tables (complex content).
45
+ */
46
+ function cellHasNestedTable(cellEl) {
47
+ return cellEl.querySelector("table") !== null;
48
+ }
49
+ /**
50
+ * Check if table has border:none style (layout table).
51
+ */
52
+ function isLayoutTable(tableEl, cssContext) {
53
+ const styles = getElementStyles(tableEl, cssContext);
54
+ // Check inline style
55
+ const inlineStyle = tableEl.getAttribute("style") || "";
56
+ if (inlineStyle.includes("border: none") || inlineStyle.includes("border:none")) {
57
+ return true;
58
+ }
59
+ // Check CSS class-based border:none
60
+ if (styles.border && styles.border.includes("none")) {
61
+ return true;
62
+ }
63
+ return false;
64
+ }
9
65
  function createParsedImageElement(imageEl, imageKey) {
10
66
  const src = imageEl.getAttribute("src")?.trim();
11
67
  if (!src) {
@@ -131,20 +187,60 @@ function parseContainerContent(element, cssContext, nextImageKey, inheritedColor
131
187
  // Handle tables
132
188
  if (tagName === "table") {
133
189
  const rows = [];
134
- for (const tr of el.querySelectorAll("tr")) {
190
+ const directRows = getDirectRows(el);
191
+ for (const tr of directRows) {
135
192
  const cells = [];
136
- for (const cell of tr.querySelectorAll("td, th")) {
193
+ const directCells = getDirectCells(tr);
194
+ for (const cell of directCells) {
195
+ // Extract cell-level background color (same logic as main table parsing)
196
+ let cellBackgroundColor;
197
+ // Method 1: Check cell's direct class-based styles
198
+ const cellStyles = getElementStyles(cell, cssContext, tr);
199
+ if (cellStyles.backgroundColor) {
200
+ const hexBg = extractHexColor(cellStyles.backgroundColor);
201
+ if (hexBg)
202
+ cellBackgroundColor = hexBg;
203
+ }
204
+ // Method 2: Check inline style on the cell
205
+ if (!cellBackgroundColor) {
206
+ const inlineStyle = cell.getAttribute("style") || "";
207
+ if (inlineStyle) {
208
+ const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
209
+ if (bgMatch) {
210
+ const hexBg = extractHexColor(bgMatch[1]);
211
+ if (hexBg)
212
+ cellBackgroundColor = hexBg;
213
+ }
214
+ }
215
+ }
137
216
  const runs = extractInlineRuns(cell, cssContext);
138
- if (runs.length > 0) {
139
- if (hasInlineFormatting(runs)) {
140
- cells.push(runs);
217
+ // If cell has background color, wrap in StyledTableCell
218
+ if (cellBackgroundColor) {
219
+ if (runs.length > 0) {
220
+ if (hasInlineFormatting(runs)) {
221
+ cells.push({ content: runs, backgroundColor: cellBackgroundColor });
222
+ }
223
+ else {
224
+ cells.push({ content: runs.map((r) => r.text).join(""), backgroundColor: cellBackgroundColor });
225
+ }
141
226
  }
142
227
  else {
143
- cells.push(runs.map((r) => r.text).join(""));
228
+ cells.push({ content: "", backgroundColor: cellBackgroundColor });
144
229
  }
145
230
  }
146
231
  else {
147
- cells.push("");
232
+ // No cell background - use simple format
233
+ if (runs.length > 0) {
234
+ if (hasInlineFormatting(runs)) {
235
+ cells.push(runs);
236
+ }
237
+ else {
238
+ cells.push(runs.map((r) => r.text).join(""));
239
+ }
240
+ }
241
+ else {
242
+ cells.push("");
243
+ }
148
244
  }
149
245
  }
150
246
  if (cells.length > 0) {
@@ -1126,23 +1222,91 @@ export function parseHtmlContent(html) {
1126
1222
  }
1127
1223
  if (tagName === "table") {
1128
1224
  const rows = [];
1129
- for (const tr of element.querySelectorAll("tr")) {
1225
+ // Use direct children traversal to avoid flattening nested tables
1226
+ const directRows = getDirectRows(element);
1227
+ for (const tr of directRows) {
1130
1228
  const cells = [];
1131
- for (const cell of tr.querySelectorAll("td, th")) {
1132
- // Extract inline runs to preserve bold/italic formatting in cells
1133
- const runs = extractInlineRuns(cell, cssContext);
1134
- if (runs.length > 0) {
1135
- if (hasInlineFormatting(runs)) {
1136
- // Has formatting - store as runs
1137
- cells.push(runs);
1229
+ const directCells = getDirectCells(tr);
1230
+ for (const cell of directCells) {
1231
+ // Check if cell contains nested table (complex content)
1232
+ if (cellHasNestedTable(cell)) {
1233
+ // Parse cell content recursively for nested tables
1234
+ const nestedContent = parseContainerContent(cell, cssContext, nextImageKey);
1235
+ if (nestedContent.length > 0) {
1236
+ cells.push({ content: nestedContent });
1138
1237
  }
1139
1238
  else {
1140
- // Plain text - store as string
1141
- cells.push(runs.map((r) => r.text).join(""));
1239
+ cells.push("");
1142
1240
  }
1143
1241
  }
1144
1242
  else {
1145
- cells.push("");
1243
+ // Extract cell-level background color from CSS
1244
+ // This handles nested selectors like .charges-table th or .charges-table tr.row-even td
1245
+ let cellBackgroundColor;
1246
+ // Method 1: Check cell's direct styles with row as parent
1247
+ // This handles .row-class td { ... } patterns
1248
+ const cellStylesWithRowParent = getElementStyles(cell, cssContext, tr);
1249
+ if (cellStylesWithRowParent.backgroundColor) {
1250
+ const hexBg = extractHexColor(cellStylesWithRowParent.backgroundColor);
1251
+ if (hexBg)
1252
+ cellBackgroundColor = hexBg;
1253
+ }
1254
+ // Method 2: Check cell's styles with TABLE as parent
1255
+ // This handles .table-class th { ... } and .table-class td { ... } patterns
1256
+ if (!cellBackgroundColor) {
1257
+ const cellStylesWithTableParent = getElementStyles(cell, cssContext, element);
1258
+ if (cellStylesWithTableParent.backgroundColor) {
1259
+ const hexBg = extractHexColor(cellStylesWithTableParent.backgroundColor);
1260
+ if (hexBg)
1261
+ cellBackgroundColor = hexBg;
1262
+ }
1263
+ }
1264
+ // Method 3: Check inline style on the cell
1265
+ const inlineStyle = cell.getAttribute("style") || "";
1266
+ if (inlineStyle && !cellBackgroundColor) {
1267
+ const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
1268
+ if (bgMatch) {
1269
+ const hexBg = extractHexColor(bgMatch[1]);
1270
+ if (hexBg)
1271
+ cellBackgroundColor = hexBg;
1272
+ }
1273
+ }
1274
+ // NOTE: Multi-level nested selectors like ".charges-table tr.row-even td { ... }" are now
1275
+ // handled by the general ancestor walk in getElementStyles(). The nestedSelectorPattern
1276
+ // extracts the row class (row-even) as the parent for td styles, and getElementStyles()
1277
+ // finds tr.row-even as an ancestor of td when walking up the DOM tree.
1278
+ // Extract inline runs to preserve bold/italic formatting in cells
1279
+ const runs = extractInlineRuns(cell, cssContext);
1280
+ // If cell has background color, wrap in StyledTableCell
1281
+ if (cellBackgroundColor) {
1282
+ if (runs.length > 0) {
1283
+ if (hasInlineFormatting(runs)) {
1284
+ cells.push({ content: runs, backgroundColor: cellBackgroundColor });
1285
+ }
1286
+ else {
1287
+ cells.push({ content: runs.map((r) => r.text).join(""), backgroundColor: cellBackgroundColor });
1288
+ }
1289
+ }
1290
+ else {
1291
+ cells.push({ content: "", backgroundColor: cellBackgroundColor });
1292
+ }
1293
+ }
1294
+ else {
1295
+ // No cell background - use simple format
1296
+ if (runs.length > 0) {
1297
+ if (hasInlineFormatting(runs)) {
1298
+ // Has formatting - store as runs
1299
+ cells.push(runs);
1300
+ }
1301
+ else {
1302
+ // Plain text - store as string
1303
+ cells.push(runs.map((r) => r.text).join(""));
1304
+ }
1305
+ }
1306
+ else {
1307
+ cells.push("");
1308
+ }
1309
+ }
1146
1310
  }
1147
1311
  }
1148
1312
  if (cells.length > 0) {
@@ -1314,7 +1478,9 @@ export function parseHtmlContent(html) {
1314
1478
  }
1315
1479
  }
1316
1480
  }
1317
- elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor, hasHeader: hasExplicitHeader ? true : undefined, horizontalBordersOnly: horizontalBordersOnly || undefined, columnWidths: calculateColumnWidths(rows, element) });
1481
+ // Detect noBorders layout tables (tables with border:none for pure layout)
1482
+ const noBorders = isLayoutTable(element, cssContext);
1483
+ elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor, hasHeader: hasExplicitHeader ? true : undefined, horizontalBordersOnly: horizontalBordersOnly || undefined, noBorders: noBorders || undefined, columnWidths: calculateColumnWidths(rows, element) });
1318
1484
  }
1319
1485
  return;
1320
1486
  }