docgen-utils 1.0.20 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/bundle.js +24203 -23922
- package/dist/bundle.min.js +251 -251
- package/dist/cli.js +501 -153
- package/dist/packages/cli/commands/export-docs.d.ts.map +1 -1
- package/dist/packages/cli/commands/export-docs.js +160 -11
- package/dist/packages/cli/commands/export-docs.js.map +1 -1
- package/dist/packages/docs/common.d.ts +31 -2
- package/dist/packages/docs/common.d.ts.map +1 -1
- package/dist/packages/docs/common.js +40 -0
- package/dist/packages/docs/common.js.map +1 -1
- package/dist/packages/docs/convert.d.ts.map +1 -1
- package/dist/packages/docs/convert.js +91 -21
- package/dist/packages/docs/convert.js.map +1 -1
- package/dist/packages/docs/create-document.d.ts.map +1 -1
- package/dist/packages/docs/create-document.js +8 -2
- package/dist/packages/docs/create-document.js.map +1 -1
- package/dist/packages/docs/import-docx.d.ts.map +1 -1
- package/dist/packages/docs/import-docx.js +2 -1
- package/dist/packages/docs/import-docx.js.map +1 -1
- package/dist/packages/docs/parse-css.d.ts.map +1 -1
- package/dist/packages/docs/parse-css.js +10 -3
- package/dist/packages/docs/parse-css.js.map +1 -1
- package/dist/packages/docs/parse.d.ts.map +1 -1
- package/dist/packages/docs/parse.js +233 -123
- package/dist/packages/docs/parse.js.map +1 -1
- package/dist/packages/slides/import-pptx.d.ts.map +1 -1
- package/dist/packages/slides/import-pptx.js +73 -2
- package/dist/packages/slides/import-pptx.js.map +1 -1
- package/dist/packages/slides/parse.d.ts.map +1 -1
- package/dist/packages/slides/parse.js +68 -2
- package/dist/packages/slides/parse.js.map +1 -1
- package/package.json +1 -1
|
@@ -6,11 +6,93 @@ import { parseHeadingLevel, getTextAlignment, getTextContent, BLOCK_LEVEL_TAGS,
|
|
|
6
6
|
import { isInlineOnlyContainer, extractInlineRuns, hasInlineFormatting } from "./parse-inline";
|
|
7
7
|
import { isGridOrFlexContainer, isHorizontalFlexContainer, isDecorativeSvg, isTwoColumnGridLayout, findTwoColumnChildren, detectFlexEqualColumns, detectGridEqualColumns } from "./parse-layout";
|
|
8
8
|
import { detectSkillItem, detectLanguageItem, detectProgressBar, detectTimeline } from "./parse-special";
|
|
9
|
+
/**
|
|
10
|
+
* Get direct child rows of a table element (not nested table rows).
|
|
11
|
+
* Handles tbody, thead, tfoot containers.
|
|
12
|
+
*/
|
|
13
|
+
function getDirectRows(tableEl) {
|
|
14
|
+
const result = [];
|
|
15
|
+
for (const child of tableEl.children) {
|
|
16
|
+
const childTag = child.tagName.toLowerCase();
|
|
17
|
+
if (childTag === "tr") {
|
|
18
|
+
result.push(child);
|
|
19
|
+
}
|
|
20
|
+
else if (childTag === "thead" || childTag === "tbody" || childTag === "tfoot") {
|
|
21
|
+
for (const grandchild of child.children) {
|
|
22
|
+
if (grandchild.tagName.toLowerCase() === "tr") {
|
|
23
|
+
result.push(grandchild);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return result;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Get direct child cells of a row element (not nested cells).
|
|
32
|
+
*/
|
|
33
|
+
function getDirectCells(rowEl) {
|
|
34
|
+
const result = [];
|
|
35
|
+
for (const child of rowEl.children) {
|
|
36
|
+
const childTag = child.tagName.toLowerCase();
|
|
37
|
+
if (childTag === "td" || childTag === "th") {
|
|
38
|
+
result.push(child);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Check if a table cell contains nested tables (complex content).
|
|
45
|
+
*/
|
|
46
|
+
function cellHasNestedTable(cellEl) {
|
|
47
|
+
return cellEl.querySelector("table") !== null;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Check if table has border:none style (layout table).
|
|
51
|
+
*/
|
|
52
|
+
function isLayoutTable(tableEl, cssContext) {
|
|
53
|
+
const styles = getElementStyles(tableEl, cssContext);
|
|
54
|
+
// Check inline style
|
|
55
|
+
const inlineStyle = tableEl.getAttribute("style") || "";
|
|
56
|
+
if (inlineStyle.includes("border: none") || inlineStyle.includes("border:none")) {
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
// Check CSS class-based border:none
|
|
60
|
+
if (styles.border && styles.border.includes("none")) {
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
function createParsedImageElement(imageEl, imageKey) {
|
|
66
|
+
const src = imageEl.getAttribute("src")?.trim();
|
|
67
|
+
if (!src) {
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
const alt = imageEl.getAttribute("alt") || undefined;
|
|
71
|
+
let width;
|
|
72
|
+
let height;
|
|
73
|
+
const widthAttr = imageEl.getAttribute("width");
|
|
74
|
+
const heightAttr = imageEl.getAttribute("height");
|
|
75
|
+
if (widthAttr && !widthAttr.includes("%")) {
|
|
76
|
+
width = parseInt(widthAttr, 10) || undefined;
|
|
77
|
+
}
|
|
78
|
+
if (heightAttr && !heightAttr.includes("%")) {
|
|
79
|
+
height = parseInt(heightAttr, 10) || undefined;
|
|
80
|
+
}
|
|
81
|
+
let caption;
|
|
82
|
+
const parentFigure = imageEl.closest("figure");
|
|
83
|
+
if (parentFigure) {
|
|
84
|
+
const figcaption = parentFigure.querySelector("figcaption");
|
|
85
|
+
if (figcaption) {
|
|
86
|
+
caption = getTextContent(figcaption).trim() || undefined;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return { type: "image", imageKey, src, alt, width, height, caption };
|
|
90
|
+
}
|
|
9
91
|
/**
|
|
10
92
|
* Parse content from a container element (like sidebar or main content).
|
|
11
93
|
* Handles headings, paragraphs, lists, and nested containers with color inheritance.
|
|
12
94
|
*/
|
|
13
|
-
function parseContainerContent(element, cssContext, inheritedColor) {
|
|
95
|
+
function parseContainerContent(element, cssContext, nextImageKey, inheritedColor) {
|
|
14
96
|
const innerElements = [];
|
|
15
97
|
function processInnerNode(node, color) {
|
|
16
98
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
@@ -105,20 +187,60 @@ function parseContainerContent(element, cssContext, inheritedColor) {
|
|
|
105
187
|
// Handle tables
|
|
106
188
|
if (tagName === "table") {
|
|
107
189
|
const rows = [];
|
|
108
|
-
|
|
190
|
+
const directRows = getDirectRows(el);
|
|
191
|
+
for (const tr of directRows) {
|
|
109
192
|
const cells = [];
|
|
110
|
-
|
|
193
|
+
const directCells = getDirectCells(tr);
|
|
194
|
+
for (const cell of directCells) {
|
|
195
|
+
// Extract cell-level background color (same logic as main table parsing)
|
|
196
|
+
let cellBackgroundColor;
|
|
197
|
+
// Method 1: Check cell's direct class-based styles
|
|
198
|
+
const cellStyles = getElementStyles(cell, cssContext, tr);
|
|
199
|
+
if (cellStyles.backgroundColor) {
|
|
200
|
+
const hexBg = extractHexColor(cellStyles.backgroundColor);
|
|
201
|
+
if (hexBg)
|
|
202
|
+
cellBackgroundColor = hexBg;
|
|
203
|
+
}
|
|
204
|
+
// Method 2: Check inline style on the cell
|
|
205
|
+
if (!cellBackgroundColor) {
|
|
206
|
+
const inlineStyle = cell.getAttribute("style") || "";
|
|
207
|
+
if (inlineStyle) {
|
|
208
|
+
const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
209
|
+
if (bgMatch) {
|
|
210
|
+
const hexBg = extractHexColor(bgMatch[1]);
|
|
211
|
+
if (hexBg)
|
|
212
|
+
cellBackgroundColor = hexBg;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
111
216
|
const runs = extractInlineRuns(cell, cssContext);
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
217
|
+
// If cell has background color, wrap in StyledTableCell
|
|
218
|
+
if (cellBackgroundColor) {
|
|
219
|
+
if (runs.length > 0) {
|
|
220
|
+
if (hasInlineFormatting(runs)) {
|
|
221
|
+
cells.push({ content: runs, backgroundColor: cellBackgroundColor });
|
|
222
|
+
}
|
|
223
|
+
else {
|
|
224
|
+
cells.push({ content: runs.map((r) => r.text).join(""), backgroundColor: cellBackgroundColor });
|
|
225
|
+
}
|
|
115
226
|
}
|
|
116
227
|
else {
|
|
117
|
-
cells.push(
|
|
228
|
+
cells.push({ content: "", backgroundColor: cellBackgroundColor });
|
|
118
229
|
}
|
|
119
230
|
}
|
|
120
231
|
else {
|
|
121
|
-
|
|
232
|
+
// No cell background - use simple format
|
|
233
|
+
if (runs.length > 0) {
|
|
234
|
+
if (hasInlineFormatting(runs)) {
|
|
235
|
+
cells.push(runs);
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
cells.push(runs.map((r) => r.text).join(""));
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
else {
|
|
242
|
+
cells.push("");
|
|
243
|
+
}
|
|
122
244
|
}
|
|
123
245
|
}
|
|
124
246
|
if (cells.length > 0) {
|
|
@@ -236,7 +358,7 @@ function parseContainerContent(element, cssContext, inheritedColor) {
|
|
|
236
358
|
* @param element The blockquote/callout element
|
|
237
359
|
* @param cssContext The CSS context for resolving styles
|
|
238
360
|
*/
|
|
239
|
-
function parseBlockquoteContent(element, cssContext) {
|
|
361
|
+
function parseBlockquoteContent(element, cssContext, nextImageKey) {
|
|
240
362
|
const innerElements = [];
|
|
241
363
|
// GENERALIZED: Extract blockquote's font-style from CSS element selector
|
|
242
364
|
// This handles rules like "blockquote { font-style: italic; }"
|
|
@@ -374,7 +496,7 @@ function parseBlockquoteContent(element, cssContext) {
|
|
|
374
496
|
const nestedBorderHex = extractBorderColorFromStyle(styles);
|
|
375
497
|
// Only treat as nested blockquote if it has a visually distinct background or border
|
|
376
498
|
if (nestedBgColor || nestedBorderHex) {
|
|
377
|
-
const nestedContent = parseBlockquoteContent(el, cssContext);
|
|
499
|
+
const nestedContent = parseBlockquoteContent(el, cssContext, nextImageKey);
|
|
378
500
|
if (nestedContent.length > 0) {
|
|
379
501
|
let nestedBorderStyle;
|
|
380
502
|
if (styles.borderLeft && !styles.border) {
|
|
@@ -436,29 +558,9 @@ function parseBlockquoteContent(element, cssContext) {
|
|
|
436
558
|
}
|
|
437
559
|
// Handle <img> elements inside blockquotes/callouts
|
|
438
560
|
if (tagName === "img") {
|
|
439
|
-
const
|
|
440
|
-
if (
|
|
441
|
-
|
|
442
|
-
let width;
|
|
443
|
-
let height;
|
|
444
|
-
const widthAttr = el.getAttribute("width");
|
|
445
|
-
const heightAttr = el.getAttribute("height");
|
|
446
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
447
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
448
|
-
}
|
|
449
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
450
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
451
|
-
}
|
|
452
|
-
// Check if img is inside a figure with figcaption
|
|
453
|
-
let caption;
|
|
454
|
-
const parentFigure = el.closest("figure");
|
|
455
|
-
if (parentFigure) {
|
|
456
|
-
const figcaption = parentFigure.querySelector("figcaption");
|
|
457
|
-
if (figcaption) {
|
|
458
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
innerElements.push({ type: "image", src, alt, width, height, caption });
|
|
561
|
+
const imageElement = createParsedImageElement(el, nextImageKey());
|
|
562
|
+
if (imageElement) {
|
|
563
|
+
innerElements.push(imageElement);
|
|
462
564
|
}
|
|
463
565
|
return;
|
|
464
566
|
}
|
|
@@ -664,6 +766,8 @@ export function parseHtmlContent(html) {
|
|
|
664
766
|
const cssContext = parseCssContext(doc);
|
|
665
767
|
// Track SVGs that have been processed (to avoid duplicate processing)
|
|
666
768
|
const processedSvgs = new Set();
|
|
769
|
+
let imageIndex = 0;
|
|
770
|
+
const nextImageKey = () => `image-${imageIndex++}`;
|
|
667
771
|
const { body } = doc;
|
|
668
772
|
function processNode(node, inheritedAlignment, inheritedColor) {
|
|
669
773
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
@@ -862,7 +966,7 @@ export function parseHtmlContent(html) {
|
|
|
862
966
|
// This handles patterns like: <p class="intro-section"> with border-left: 4px solid ...
|
|
863
967
|
// Must come BEFORE regular paragraph handling
|
|
864
968
|
if (tagName === "p" && isBlockquoteOrCallout(element, cssContext)) {
|
|
865
|
-
const content = parseBlockquoteContent(element, cssContext);
|
|
969
|
+
const content = parseBlockquoteContent(element, cssContext, nextImageKey);
|
|
866
970
|
if (content.length > 0) {
|
|
867
971
|
const elementStyles = getElementStyles(element, cssContext);
|
|
868
972
|
let borderColor;
|
|
@@ -1118,23 +1222,91 @@ export function parseHtmlContent(html) {
|
|
|
1118
1222
|
}
|
|
1119
1223
|
if (tagName === "table") {
|
|
1120
1224
|
const rows = [];
|
|
1121
|
-
|
|
1225
|
+
// Use direct children traversal to avoid flattening nested tables
|
|
1226
|
+
const directRows = getDirectRows(element);
|
|
1227
|
+
for (const tr of directRows) {
|
|
1122
1228
|
const cells = [];
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
if (
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1229
|
+
const directCells = getDirectCells(tr);
|
|
1230
|
+
for (const cell of directCells) {
|
|
1231
|
+
// Check if cell contains nested table (complex content)
|
|
1232
|
+
if (cellHasNestedTable(cell)) {
|
|
1233
|
+
// Parse cell content recursively for nested tables
|
|
1234
|
+
const nestedContent = parseContainerContent(cell, cssContext, nextImageKey);
|
|
1235
|
+
if (nestedContent.length > 0) {
|
|
1236
|
+
cells.push({ content: nestedContent });
|
|
1130
1237
|
}
|
|
1131
1238
|
else {
|
|
1132
|
-
|
|
1133
|
-
cells.push(runs.map((r) => r.text).join(""));
|
|
1239
|
+
cells.push("");
|
|
1134
1240
|
}
|
|
1135
1241
|
}
|
|
1136
1242
|
else {
|
|
1137
|
-
|
|
1243
|
+
// Extract cell-level background color from CSS
|
|
1244
|
+
// This handles nested selectors like .charges-table th or .charges-table tr.row-even td
|
|
1245
|
+
let cellBackgroundColor;
|
|
1246
|
+
// Method 1: Check cell's direct styles with row as parent
|
|
1247
|
+
// This handles .row-class td { ... } patterns
|
|
1248
|
+
const cellStylesWithRowParent = getElementStyles(cell, cssContext, tr);
|
|
1249
|
+
if (cellStylesWithRowParent.backgroundColor) {
|
|
1250
|
+
const hexBg = extractHexColor(cellStylesWithRowParent.backgroundColor);
|
|
1251
|
+
if (hexBg)
|
|
1252
|
+
cellBackgroundColor = hexBg;
|
|
1253
|
+
}
|
|
1254
|
+
// Method 2: Check cell's styles with TABLE as parent
|
|
1255
|
+
// This handles .table-class th { ... } and .table-class td { ... } patterns
|
|
1256
|
+
if (!cellBackgroundColor) {
|
|
1257
|
+
const cellStylesWithTableParent = getElementStyles(cell, cssContext, element);
|
|
1258
|
+
if (cellStylesWithTableParent.backgroundColor) {
|
|
1259
|
+
const hexBg = extractHexColor(cellStylesWithTableParent.backgroundColor);
|
|
1260
|
+
if (hexBg)
|
|
1261
|
+
cellBackgroundColor = hexBg;
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
// Method 3: Check inline style on the cell
|
|
1265
|
+
const inlineStyle = cell.getAttribute("style") || "";
|
|
1266
|
+
if (inlineStyle && !cellBackgroundColor) {
|
|
1267
|
+
const bgMatch = inlineStyle.match(/background(?:-color)?\s*:\s*([^;]+)/i);
|
|
1268
|
+
if (bgMatch) {
|
|
1269
|
+
const hexBg = extractHexColor(bgMatch[1]);
|
|
1270
|
+
if (hexBg)
|
|
1271
|
+
cellBackgroundColor = hexBg;
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
// NOTE: Multi-level nested selectors like ".charges-table tr.row-even td { ... }" are now
|
|
1275
|
+
// handled by the general ancestor walk in getElementStyles(). The nestedSelectorPattern
|
|
1276
|
+
// extracts the row class (row-even) as the parent for td styles, and getElementStyles()
|
|
1277
|
+
// finds tr.row-even as an ancestor of td when walking up the DOM tree.
|
|
1278
|
+
// Extract inline runs to preserve bold/italic formatting in cells
|
|
1279
|
+
const runs = extractInlineRuns(cell, cssContext);
|
|
1280
|
+
// If cell has background color, wrap in StyledTableCell
|
|
1281
|
+
if (cellBackgroundColor) {
|
|
1282
|
+
if (runs.length > 0) {
|
|
1283
|
+
if (hasInlineFormatting(runs)) {
|
|
1284
|
+
cells.push({ content: runs, backgroundColor: cellBackgroundColor });
|
|
1285
|
+
}
|
|
1286
|
+
else {
|
|
1287
|
+
cells.push({ content: runs.map((r) => r.text).join(""), backgroundColor: cellBackgroundColor });
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
else {
|
|
1291
|
+
cells.push({ content: "", backgroundColor: cellBackgroundColor });
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
else {
|
|
1295
|
+
// No cell background - use simple format
|
|
1296
|
+
if (runs.length > 0) {
|
|
1297
|
+
if (hasInlineFormatting(runs)) {
|
|
1298
|
+
// Has formatting - store as runs
|
|
1299
|
+
cells.push(runs);
|
|
1300
|
+
}
|
|
1301
|
+
else {
|
|
1302
|
+
// Plain text - store as string
|
|
1303
|
+
cells.push(runs.map((r) => r.text).join(""));
|
|
1304
|
+
}
|
|
1305
|
+
}
|
|
1306
|
+
else {
|
|
1307
|
+
cells.push("");
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1138
1310
|
}
|
|
1139
1311
|
}
|
|
1140
1312
|
if (cells.length > 0) {
|
|
@@ -1306,7 +1478,9 @@ export function parseHtmlContent(html) {
|
|
|
1306
1478
|
}
|
|
1307
1479
|
}
|
|
1308
1480
|
}
|
|
1309
|
-
|
|
1481
|
+
// Detect noBorders layout tables (tables with border:none for pure layout)
|
|
1482
|
+
const noBorders = isLayoutTable(element, cssContext);
|
|
1483
|
+
elements.push({ type: "table", rows, cellPadding, headerBackgroundColor, headerTextColor, evenRowBackgroundColor, hasHeader: hasExplicitHeader ? true : undefined, horizontalBordersOnly: horizontalBordersOnly || undefined, noBorders: noBorders || undefined, columnWidths: calculateColumnWidths(rows, element) });
|
|
1310
1484
|
}
|
|
1311
1485
|
return;
|
|
1312
1486
|
}
|
|
@@ -1361,8 +1535,8 @@ export function parseHtmlContent(html) {
|
|
|
1361
1535
|
? extractHexColor(sidebarStyles.color)
|
|
1362
1536
|
: undefined;
|
|
1363
1537
|
// Parse sidebar and main content separately
|
|
1364
|
-
const sidebarContent = parseContainerContent(sidebarEl, cssContext, sidebarTextColor);
|
|
1365
|
-
const mainContent = parseContainerContent(mainEl, cssContext);
|
|
1538
|
+
const sidebarContent = parseContainerContent(sidebarEl, cssContext, nextImageKey, sidebarTextColor);
|
|
1539
|
+
const mainContent = parseContainerContent(mainEl, cssContext, nextImageKey);
|
|
1366
1540
|
if (sidebarContent.length > 0 || mainContent.length > 0) {
|
|
1367
1541
|
// Emit two-column layout for documents with sidebar patterns.
|
|
1368
1542
|
// This produces a DOCX table with sidebar + main content columns,
|
|
@@ -1435,7 +1609,7 @@ export function parseHtmlContent(html) {
|
|
|
1435
1609
|
// Parse each column's content
|
|
1436
1610
|
const columnContents = [];
|
|
1437
1611
|
for (const col of gridColumns) {
|
|
1438
|
-
const colContent = parseContainerContent(col, cssContext);
|
|
1612
|
+
const colContent = parseContainerContent(col, cssContext, nextImageKey);
|
|
1439
1613
|
columnContents.push(colContent);
|
|
1440
1614
|
}
|
|
1441
1615
|
// Check if at least one column has content
|
|
@@ -1927,32 +2101,9 @@ export function parseHtmlContent(html) {
|
|
|
1927
2101
|
}
|
|
1928
2102
|
// Handle <img> elements - external images that need to be fetched
|
|
1929
2103
|
if (tagName === "img") {
|
|
1930
|
-
const
|
|
1931
|
-
if (
|
|
1932
|
-
|
|
1933
|
-
// Extract width and height from attributes only
|
|
1934
|
-
// Computed styles are not available in linkedom (Node.js)
|
|
1935
|
-
// Actual dimensions will be obtained when the image is fetched
|
|
1936
|
-
let width;
|
|
1937
|
-
let height;
|
|
1938
|
-
const widthAttr = element.getAttribute("width");
|
|
1939
|
-
const heightAttr = element.getAttribute("height");
|
|
1940
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
1941
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
1942
|
-
}
|
|
1943
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
1944
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
1945
|
-
}
|
|
1946
|
-
// Check if img is inside a figure with figcaption
|
|
1947
|
-
let caption;
|
|
1948
|
-
const parentFigure = element.closest("figure");
|
|
1949
|
-
if (parentFigure) {
|
|
1950
|
-
const figcaption = parentFigure.querySelector("figcaption");
|
|
1951
|
-
if (figcaption) {
|
|
1952
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
1953
|
-
}
|
|
1954
|
-
}
|
|
1955
|
-
elements.push({ type: "image", src, alt, width, height, caption });
|
|
2104
|
+
const imageElement = createParsedImageElement(element, nextImageKey());
|
|
2105
|
+
if (imageElement) {
|
|
2106
|
+
elements.push(imageElement);
|
|
1956
2107
|
}
|
|
1957
2108
|
return;
|
|
1958
2109
|
}
|
|
@@ -1961,30 +2112,9 @@ export function parseHtmlContent(html) {
|
|
|
1961
2112
|
// Find the fallback img inside picture
|
|
1962
2113
|
const imgEl = element.querySelector("img");
|
|
1963
2114
|
if (imgEl) {
|
|
1964
|
-
const
|
|
1965
|
-
if (
|
|
1966
|
-
|
|
1967
|
-
// Extract width and height
|
|
1968
|
-
let width;
|
|
1969
|
-
let height;
|
|
1970
|
-
const widthAttr = imgEl.getAttribute("width");
|
|
1971
|
-
const heightAttr = imgEl.getAttribute("height");
|
|
1972
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
1973
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
1974
|
-
}
|
|
1975
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
1976
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
1977
|
-
}
|
|
1978
|
-
// Check for figcaption
|
|
1979
|
-
let caption;
|
|
1980
|
-
const parentFigure = element.closest("figure");
|
|
1981
|
-
if (parentFigure) {
|
|
1982
|
-
const figcaption = parentFigure.querySelector("figcaption");
|
|
1983
|
-
if (figcaption) {
|
|
1984
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
1985
|
-
}
|
|
1986
|
-
}
|
|
1987
|
-
elements.push({ type: "image", src, alt, width, height, caption });
|
|
2115
|
+
const imageElement = createParsedImageElement(imgEl, nextImageKey());
|
|
2116
|
+
if (imageElement) {
|
|
2117
|
+
elements.push(imageElement);
|
|
1988
2118
|
}
|
|
1989
2119
|
}
|
|
1990
2120
|
return;
|
|
@@ -1993,29 +2123,9 @@ export function parseHtmlContent(html) {
|
|
|
1993
2123
|
if (tagName === "figure") {
|
|
1994
2124
|
const imgEl = element.querySelector("img") || element.querySelector("picture img");
|
|
1995
2125
|
if (imgEl) {
|
|
1996
|
-
const
|
|
1997
|
-
if (
|
|
1998
|
-
|
|
1999
|
-
// Extract width and height from attributes only
|
|
2000
|
-
// Computed styles are not available in linkedom (Node.js)
|
|
2001
|
-
// Actual dimensions will be obtained when the image is fetched
|
|
2002
|
-
let width;
|
|
2003
|
-
let height;
|
|
2004
|
-
const widthAttr = imgEl.getAttribute("width");
|
|
2005
|
-
const heightAttr = imgEl.getAttribute("height");
|
|
2006
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
2007
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
2008
|
-
}
|
|
2009
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
2010
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
2011
|
-
}
|
|
2012
|
-
// Extract caption from figcaption
|
|
2013
|
-
let caption;
|
|
2014
|
-
const figcaption = element.querySelector("figcaption");
|
|
2015
|
-
if (figcaption) {
|
|
2016
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
2017
|
-
}
|
|
2018
|
-
elements.push({ type: "image", src, alt, width, height, caption });
|
|
2126
|
+
const imageElement = createParsedImageElement(imgEl, nextImageKey());
|
|
2127
|
+
if (imageElement) {
|
|
2128
|
+
elements.push(imageElement);
|
|
2019
2129
|
return;
|
|
2020
2130
|
}
|
|
2021
2131
|
}
|
|
@@ -2031,7 +2141,7 @@ export function parseHtmlContent(html) {
|
|
|
2031
2141
|
// Check for blockquote/callout before generic container handling
|
|
2032
2142
|
// Uses style-based detection, NOT class names
|
|
2033
2143
|
if (isBlockquoteOrCallout(element, cssContext)) {
|
|
2034
|
-
const content = parseBlockquoteContent(element, cssContext);
|
|
2144
|
+
const content = parseBlockquoteContent(element, cssContext, nextImageKey);
|
|
2035
2145
|
if (content.length > 0) {
|
|
2036
2146
|
// Extract styling from CSS classes and inline styles (generalized approach)
|
|
2037
2147
|
const elementStyles = getElementStyles(element, cssContext);
|