docgen-utils 1.0.21 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.js +251 -50
- package/dist/bundle.min.js +243 -243
- package/dist/cli.js +240 -50
- package/dist/packages/docs/common.d.ts +30 -2
- package/dist/packages/docs/common.d.ts.map +1 -1
- package/dist/packages/docs/common.js +40 -0
- package/dist/packages/docs/common.js.map +1 -1
- package/dist/packages/docs/convert.d.ts.map +1 -1
- package/dist/packages/docs/convert.js +91 -21
- package/dist/packages/docs/convert.js.map +1 -1
- package/dist/packages/docs/parse-css.d.ts.map +1 -1
- package/dist/packages/docs/parse-css.js +10 -3
- package/dist/packages/docs/parse-css.js.map +1 -1
- package/dist/packages/docs/parse.d.ts.map +1 -1
- package/dist/packages/docs/parse.js +185 -19
- package/dist/packages/docs/parse.js.map +1 -1
- package/package.json +1 -1
|
@@ -6,6 +6,62 @@ import { parseHeadingLevel, getTextAlignment, getTextContent, BLOCK_LEVEL_TAGS,
|
|
|
6
6
|
import { isInlineOnlyContainer, extractInlineRuns, hasInlineFormatting } from "./parse-inline";
|
|
7
7
|
import { isGridOrFlexContainer, isHorizontalFlexContainer, isDecorativeSvg, isTwoColumnGridLayout, findTwoColumnChildren, detectFlexEqualColumns, detectGridEqualColumns } from "./parse-layout";
|
|
8
8
|
import { detectSkillItem, detectLanguageItem, detectProgressBar, detectTimeline } from "./parse-special";
|
|
9
|
+
/**
|
|
10
|
+
* Get direct child rows of a table element (not nested table rows).
|
|
11
|
+
* Handles tbody, thead, tfoot containers.
|
|
12
|
+
*/
|
|
13
|
+
function getDirectRows(tableEl) {
|
|
14
|
+
const result = [];
|
|
15
|
+
for (const child of tableEl.children) {
|
|
16
|
+
const childTag = child.tagName.toLowerCase();
|
|
17
|
+
if (childTag === "tr") {
|
|
18
|
+
result.push(child);
|
|
19
|
+
}
|
|
20
|
+
else if (childTag === "thead" || childTag === "tbody" || childTag === "tfoot") {
|
|
21
|
+
for (const grandchild of child.children) {
|
|
22
|
+
if (grandchild.tagName.toLowerCase() === "tr") {
|
|
23
|
+
result.push(grandchild);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return result;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Get direct child cells of a row element (not nested cells).
|
|
32
|
+
*/
|
|
33
|
+
function getDirectCells(rowEl) {
|
|
34
|
+
const result = [];
|
|
35
|
+
for (const child of rowEl.children) {
|
|
36
|
+
const childTag = child.tagName.toLowerCase();
|
|
37
|
+
if (childTag === "td" || childTag === "th") {
|
|
38
|
+
result.push(child);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Check if a table cell contains nested tables (complex content).
|
|
45
|
+
*/
|
|
46
|
+
function cellHasNestedTable(cellEl) {
|
|
47
|
+
return cellEl.querySelector("table") !== null;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Check if table has border:none style (layout table).
|
|
51
|
+
*/
|
|
52
|
+
function isLayoutTable(tableEl, cssContext) {
|
|
53
|
+
const styles = getElementStyles(tableEl, cssContext);
|
|
54
|
+
// Check inline style
|
|
55
|
+
const inlineStyle = tableEl.getAttribute("style") || "";
|
|
56
|
+
if (inlineStyle.includes("border: none") || inlineStyle.includes("border:none")) {
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
// Check CSS class-based border:none
|
|
60
|
+
if (styles.border && styles.border.includes("none")) {
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
9
65
|
function createParsedImageElement(imageEl, imageKey) {
|
|
10
66
|
const src = imageEl.getAttribute("src")?.trim();
|
|
11
67
|
if (!src) {
|
|
@@ -131,20 +187,60 @@ function parseContainerContent(element, cssContext, nextImageKey, inheritedColor
|
|
|
131
187
|
// Handle tables
|
|
132
188
|
if (tagName === "table") {
|
|
133
189
|
const rows = [];
|
|
134
|
-
|
|
190
|
+
const directRows = getDirectRows(el);
|
|
191
|
+
for (const tr of directRows) {
|
|
135
192
|
const cells = [];
|
|
136
|
-
|
|
193
|
+
const directCells = getDirectCells(tr);
|
|
194
|
+
for (const cell of directCells) {
|
|
195
|
+
// Extract cell-level background color (same logic as main table parsing)
|
|
196
|
+
let cellBackgroundColor;
|
|
197
|
+
// Method 1: Check cell's direct class-based styles
|
|
198
|
+
const cellStyles = getElementStyles(cell, cssContext, tr);
|
|
199
|
+
if (cellStyles.backgroundColor) {
|
|
200
|
+
const hexBg = extractHexColor(cellStyles.backgroundColor);
|
|
201
|
+
if (hexBg)
|
|
202
|
+
cellBackgroundColor = hexBg;
|
|
203
|
+
}
|
|
204
|
+
// Method 2: Check inline style on the cell
|
|
205
|
+
if (!cellBackgroundColor) {
|
|
206
|
+
const inlineStyle = cell.getAttribute("style") || "";
|
|
207
|
+
if (inlineStyle) {
|
|
208
|
+
const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
209
|
+
if (bgMatch) {
|
|
210
|
+
const hexBg = extractHexColor(bgMatch[1]);
|
|
211
|
+
if (hexBg)
|
|
212
|
+
cellBackgroundColor = hexBg;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
137
216
|
const runs = extractInlineRuns(cell, cssContext);
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
217
|
+
// If cell has background color, wrap in StyledTableCell
|
|
218
|
+
if (cellBackgroundColor) {
|
|
219
|
+
if (runs.length > 0) {
|
|
220
|
+
if (hasInlineFormatting(runs)) {
|
|
221
|
+
cells.push({ content: runs, backgroundColor: cellBackgroundColor });
|
|
222
|
+
}
|
|
223
|
+
else {
|
|
224
|
+
cells.push({ content: runs.map((r) => r.text).join(""), backgroundColor: cellBackgroundColor });
|
|
225
|
+
}
|
|
141
226
|
}
|
|
142
227
|
else {
|
|
143
|
-
cells.push(
|
|
228
|
+
cells.push({ content: "", backgroundColor: cellBackgroundColor });
|
|
144
229
|
}
|
|
145
230
|
}
|
|
146
231
|
else {
|
|
147
|
-
|
|
232
|
+
// No cell background - use simple format
|
|
233
|
+
if (runs.length > 0) {
|
|
234
|
+
if (hasInlineFormatting(runs)) {
|
|
235
|
+
cells.push(runs);
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
cells.push(runs.map((r) => r.text).join(""));
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
else {
|
|
242
|
+
cells.push("");
|
|
243
|
+
}
|
|
148
244
|
}
|
|
149
245
|
}
|
|
150
246
|
if (cells.length > 0) {
|
|
@@ -1126,23 +1222,91 @@ export function parseHtmlContent(html) {
|
|
|
1126
1222
|
}
|
|
1127
1223
|
if (tagName === "table") {
|
|
1128
1224
|
const rows = [];
|
|
1129
|
-
|
|
1225
|
+
// Use direct children traversal to avoid flattening nested tables
|
|
1226
|
+
const directRows = getDirectRows(element);
|
|
1227
|
+
for (const tr of directRows) {
|
|
1130
1228
|
const cells = [];
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
if (
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1229
|
+
const directCells = getDirectCells(tr);
|
|
1230
|
+
for (const cell of directCells) {
|
|
1231
|
+
// Check if cell contains nested table (complex content)
|
|
1232
|
+
if (cellHasNestedTable(cell)) {
|
|
1233
|
+
// Parse cell content recursively for nested tables
|
|
1234
|
+
const nestedContent = parseContainerContent(cell, cssContext, nextImageKey);
|
|
1235
|
+
if (nestedContent.length > 0) {
|
|
1236
|
+
cells.push({ content: nestedContent });
|
|
1138
1237
|
}
|
|
1139
1238
|
else {
|
|
1140
|
-
|
|
1141
|
-
cells.push(runs.map((r) => r.text).join(""));
|
|
1239
|
+
cells.push("");
|
|
1142
1240
|
}
|
|
1143
1241
|
}
|
|
1144
1242
|
else {
|
|
1145
|
-
|
|
1243
|
+
// Extract cell-level background color from CSS
|
|
1244
|
+
// This handles nested selectors like .charges-table th or .charges-table tr.row-even td
|
|
1245
|
+
let cellBackgroundColor;
|
|
1246
|
+
// Method 1: Check cell's direct styles with row as parent
|
|
1247
|
+
// This handles .row-class td { ... } patterns
|
|
1248
|
+
const cellStylesWithRowParent = getElementStyles(cell, cssContext, tr);
|
|
1249
|
+
if (cellStylesWithRowParent.backgroundColor) {
|
|
1250
|
+
const hexBg = extractHexColor(cellStylesWithRowParent.backgroundColor);
|
|
1251
|
+
if (hexBg)
|
|
1252
|
+
cellBackgroundColor = hexBg;
|
|
1253
|
+
}
|
|
1254
|
+
// Method 2: Check cell's styles with TABLE as parent
|
|
1255
|
+
// This handles .table-class th { ... } and .table-class td { ... } patterns
|
|
1256
|
+
if (!cellBackgroundColor) {
|
|
1257
|
+
const cellStylesWithTableParent = getElementStyles(cell, cssContext, element);
|
|
1258
|
+
if (cellStylesWithTableParent.backgroundColor) {
|
|
1259
|
+
const hexBg = extractHexColor(cellStylesWithTableParent.backgroundColor);
|
|
1260
|
+
if (hexBg)
|
|
1261
|
+
cellBackgroundColor = hexBg;
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
// Method 3: Check inline style on the cell
|
|
1265
|
+
const inlineStyle = cell.getAttribute("style") || "";
|
|
1266
|
+
if (inlineStyle && !cellBackgroundColor) {
|
|
1267
|
+
const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
1268
|
+
if (bgMatch) {
|
|
1269
|
+
const hexBg = extractHexColor(bgMatch[1]);
|
|
1270
|
+
if (hexBg)
|
|
1271
|
+
cellBackgroundColor = hexBg;
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
// NOTE: Multi-level nested selectors like ".charges-table tr.row-even td { ... }" are now
|
|
1275
|
+
// handled by the general ancestor walk in getElementStyles(). The nestedSelectorPattern
|
|
1276
|
+
// extracts the row class (row-even) as the parent for td styles, and getElementStyles()
|
|
1277
|
+
// finds tr.row-even as an ancestor of td when walking up the DOM tree.
|
|
1278
|
+
// Extract inline runs to preserve bold/italic formatting in cells
|
|
1279
|
+
const runs = extractInlineRuns(cell, cssContext);
|
|
1280
|
+
// If cell has background color, wrap in StyledTableCell
|
|
1281
|
+
if (cellBackgroundColor) {
|
|
1282
|
+
if (runs.length > 0) {
|
|
1283
|
+
if (hasInlineFormatting(runs)) {
|
|
1284
|
+
cells.push({ content: runs, backgroundColor: cellBackgroundColor });
|
|
1285
|
+
}
|
|
1286
|
+
else {
|
|
1287
|
+
cells.push({ content: runs.map((r) => r.text).join(""), backgroundColor: cellBackgroundColor });
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
else {
|
|
1291
|
+
cells.push({ content: "", backgroundColor: cellBackgroundColor });
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
else {
|
|
1295
|
+
// No cell background - use simple format
|
|
1296
|
+
if (runs.length > 0) {
|
|
1297
|
+
if (hasInlineFormatting(runs)) {
|
|
1298
|
+
// Has formatting - store as runs
|
|
1299
|
+
cells.push(runs);
|
|
1300
|
+
}
|
|
1301
|
+
else {
|
|
1302
|
+
// Plain text - store as string
|
|
1303
|
+
cells.push(runs.map((r) => r.text).join(""));
|
|
1304
|
+
}
|
|
1305
|
+
}
|
|
1306
|
+
else {
|
|
1307
|
+
cells.push("");
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1146
1310
|
}
|
|
1147
1311
|
}
|
|
1148
1312
|
if (cells.length > 0) {
|
|
@@ -1314,7 +1478,9 @@ export function parseHtmlContent(html) {
|
|
|
1314
1478
|
}
|
|
1315
1479
|
}
|
|
1316
1480
|
}
|
|
1317
|
-
|
|
1481
|
+
// Detect noBorders layout tables (tables with border:none for pure layout)
|
|
1482
|
+
const noBorders = isLayoutTable(element, cssContext);
|
|
1483
|
+
elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor, hasHeader: hasExplicitHeader ? true : undefined, horizontalBordersOnly: horizontalBordersOnly || undefined, noBorders: noBorders || undefined, columnWidths: calculateColumnWidths(rows, element) });
|
|
1318
1484
|
}
|
|
1319
1485
|
return;
|
|
1320
1486
|
}
|