docgen-utils 1.0.20 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/bundle.js +24021 -23941
- package/dist/bundle.min.js +100 -100
- package/dist/cli.js +261 -103
- package/dist/packages/cli/commands/export-docs.d.ts.map +1 -1
- package/dist/packages/cli/commands/export-docs.js +160 -11
- package/dist/packages/cli/commands/export-docs.js.map +1 -1
- package/dist/packages/docs/common.d.ts +1 -0
- package/dist/packages/docs/common.d.ts.map +1 -1
- package/dist/packages/docs/create-document.d.ts.map +1 -1
- package/dist/packages/docs/create-document.js +8 -2
- package/dist/packages/docs/create-document.js.map +1 -1
- package/dist/packages/docs/import-docx.d.ts.map +1 -1
- package/dist/packages/docs/import-docx.js +2 -1
- package/dist/packages/docs/import-docx.js.map +1 -1
- package/dist/packages/docs/parse.d.ts.map +1 -1
- package/dist/packages/docs/parse.js +48 -104
- package/dist/packages/docs/parse.js.map +1 -1
- package/dist/packages/slides/import-pptx.d.ts.map +1 -1
- package/dist/packages/slides/import-pptx.js +73 -2
- package/dist/packages/slides/import-pptx.js.map +1 -1
- package/dist/packages/slides/parse.d.ts.map +1 -1
- package/dist/packages/slides/parse.js +68 -2
- package/dist/packages/slides/parse.js.map +1 -1
- package/package.json +1 -1
|
@@ -6,11 +6,37 @@ import { parseHeadingLevel, getTextAlignment, getTextContent, BLOCK_LEVEL_TAGS,
|
|
|
6
6
|
import { isInlineOnlyContainer, extractInlineRuns, hasInlineFormatting } from "./parse-inline";
|
|
7
7
|
import { isGridOrFlexContainer, isHorizontalFlexContainer, isDecorativeSvg, isTwoColumnGridLayout, findTwoColumnChildren, detectFlexEqualColumns, detectGridEqualColumns } from "./parse-layout";
|
|
8
8
|
import { detectSkillItem, detectLanguageItem, detectProgressBar, detectTimeline } from "./parse-special";
|
|
9
|
+
function createParsedImageElement(imageEl, imageKey) {
|
|
10
|
+
const src = imageEl.getAttribute("src")?.trim();
|
|
11
|
+
if (!src) {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
const alt = imageEl.getAttribute("alt") || undefined;
|
|
15
|
+
let width;
|
|
16
|
+
let height;
|
|
17
|
+
const widthAttr = imageEl.getAttribute("width");
|
|
18
|
+
const heightAttr = imageEl.getAttribute("height");
|
|
19
|
+
if (widthAttr && !widthAttr.includes("%")) {
|
|
20
|
+
width = parseInt(widthAttr, 10) || undefined;
|
|
21
|
+
}
|
|
22
|
+
if (heightAttr && !heightAttr.includes("%")) {
|
|
23
|
+
height = parseInt(heightAttr, 10) || undefined;
|
|
24
|
+
}
|
|
25
|
+
let caption;
|
|
26
|
+
const parentFigure = imageEl.closest("figure");
|
|
27
|
+
if (parentFigure) {
|
|
28
|
+
const figcaption = parentFigure.querySelector("figcaption");
|
|
29
|
+
if (figcaption) {
|
|
30
|
+
caption = getTextContent(figcaption).trim() || undefined;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return { type: "image", imageKey, src, alt, width, height, caption };
|
|
34
|
+
}
|
|
9
35
|
/**
|
|
10
36
|
* Parse content from a container element (like sidebar or main content).
|
|
11
37
|
* Handles headings, paragraphs, lists, and nested containers with color inheritance.
|
|
12
38
|
*/
|
|
13
|
-
function parseContainerContent(element, cssContext, inheritedColor) {
|
|
39
|
+
function parseContainerContent(element, cssContext, nextImageKey, inheritedColor) {
|
|
14
40
|
const innerElements = [];
|
|
15
41
|
function processInnerNode(node, color) {
|
|
16
42
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
@@ -236,7 +262,7 @@ function parseContainerContent(element, cssContext, inheritedColor) {
|
|
|
236
262
|
* @param element The blockquote/callout element
|
|
237
263
|
* @param cssContext The CSS context for resolving styles
|
|
238
264
|
*/
|
|
239
|
-
function parseBlockquoteContent(element, cssContext) {
|
|
265
|
+
function parseBlockquoteContent(element, cssContext, nextImageKey) {
|
|
240
266
|
const innerElements = [];
|
|
241
267
|
// GENERALIZED: Extract blockquote's font-style from CSS element selector
|
|
242
268
|
// This handles rules like "blockquote { font-style: italic; }"
|
|
@@ -374,7 +400,7 @@ function parseBlockquoteContent(element, cssContext) {
|
|
|
374
400
|
const nestedBorderHex = extractBorderColorFromStyle(styles);
|
|
375
401
|
// Only treat as nested blockquote if it has a visually distinct background or border
|
|
376
402
|
if (nestedBgColor || nestedBorderHex) {
|
|
377
|
-
const nestedContent = parseBlockquoteContent(el, cssContext);
|
|
403
|
+
const nestedContent = parseBlockquoteContent(el, cssContext, nextImageKey);
|
|
378
404
|
if (nestedContent.length > 0) {
|
|
379
405
|
let nestedBorderStyle;
|
|
380
406
|
if (styles.borderLeft && !styles.border) {
|
|
@@ -436,29 +462,9 @@ function parseBlockquoteContent(element, cssContext) {
|
|
|
436
462
|
}
|
|
437
463
|
// Handle <img> elements inside blockquotes/callouts
|
|
438
464
|
if (tagName === "img") {
|
|
439
|
-
const
|
|
440
|
-
if (
|
|
441
|
-
|
|
442
|
-
let width;
|
|
443
|
-
let height;
|
|
444
|
-
const widthAttr = el.getAttribute("width");
|
|
445
|
-
const heightAttr = el.getAttribute("height");
|
|
446
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
447
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
448
|
-
}
|
|
449
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
450
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
451
|
-
}
|
|
452
|
-
// Check if img is inside a figure with figcaption
|
|
453
|
-
let caption;
|
|
454
|
-
const parentFigure = el.closest("figure");
|
|
455
|
-
if (parentFigure) {
|
|
456
|
-
const figcaption = parentFigure.querySelector("figcaption");
|
|
457
|
-
if (figcaption) {
|
|
458
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
innerElements.push({ type: "image", src, alt, width, height, caption });
|
|
465
|
+
const imageElement = createParsedImageElement(el, nextImageKey());
|
|
466
|
+
if (imageElement) {
|
|
467
|
+
innerElements.push(imageElement);
|
|
462
468
|
}
|
|
463
469
|
return;
|
|
464
470
|
}
|
|
@@ -664,6 +670,8 @@ export function parseHtmlContent(html) {
|
|
|
664
670
|
const cssContext = parseCssContext(doc);
|
|
665
671
|
// Track SVGs that have been processed (to avoid duplicate processing)
|
|
666
672
|
const processedSvgs = new Set();
|
|
673
|
+
let imageIndex = 0;
|
|
674
|
+
const nextImageKey = () => `image-${imageIndex++}`;
|
|
667
675
|
const { body } = doc;
|
|
668
676
|
function processNode(node, inheritedAlignment, inheritedColor) {
|
|
669
677
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
@@ -862,7 +870,7 @@ export function parseHtmlContent(html) {
|
|
|
862
870
|
// This handles patterns like: <p class="intro-section"> with border-left: 4px solid ...
|
|
863
871
|
// Must come BEFORE regular paragraph handling
|
|
864
872
|
if (tagName === "p" && isBlockquoteOrCallout(element, cssContext)) {
|
|
865
|
-
const content = parseBlockquoteContent(element, cssContext);
|
|
873
|
+
const content = parseBlockquoteContent(element, cssContext, nextImageKey);
|
|
866
874
|
if (content.length > 0) {
|
|
867
875
|
const elementStyles = getElementStyles(element, cssContext);
|
|
868
876
|
let borderColor;
|
|
@@ -1361,8 +1369,8 @@ export function parseHtmlContent(html) {
|
|
|
1361
1369
|
? extractHexColor(sidebarStyles.color)
|
|
1362
1370
|
: undefined;
|
|
1363
1371
|
// Parse sidebar and main content separately
|
|
1364
|
-
const sidebarContent = parseContainerContent(sidebarEl, cssContext, sidebarTextColor);
|
|
1365
|
-
const mainContent = parseContainerContent(mainEl, cssContext);
|
|
1372
|
+
const sidebarContent = parseContainerContent(sidebarEl, cssContext, nextImageKey, sidebarTextColor);
|
|
1373
|
+
const mainContent = parseContainerContent(mainEl, cssContext, nextImageKey);
|
|
1366
1374
|
if (sidebarContent.length > 0 || mainContent.length > 0) {
|
|
1367
1375
|
// Emit two-column layout for documents with sidebar patterns.
|
|
1368
1376
|
// This produces a DOCX table with sidebar + main content columns,
|
|
@@ -1435,7 +1443,7 @@ export function parseHtmlContent(html) {
|
|
|
1435
1443
|
// Parse each column's content
|
|
1436
1444
|
const columnContents = [];
|
|
1437
1445
|
for (const col of gridColumns) {
|
|
1438
|
-
const colContent = parseContainerContent(col, cssContext);
|
|
1446
|
+
const colContent = parseContainerContent(col, cssContext, nextImageKey);
|
|
1439
1447
|
columnContents.push(colContent);
|
|
1440
1448
|
}
|
|
1441
1449
|
// Check if at least one column has content
|
|
@@ -1927,32 +1935,9 @@ export function parseHtmlContent(html) {
|
|
|
1927
1935
|
}
|
|
1928
1936
|
// Handle <img> elements - external images that need to be fetched
|
|
1929
1937
|
if (tagName === "img") {
|
|
1930
|
-
const
|
|
1931
|
-
if (
|
|
1932
|
-
|
|
1933
|
-
// Extract width and height from attributes only
|
|
1934
|
-
// Computed styles are not available in linkedom (Node.js)
|
|
1935
|
-
// Actual dimensions will be obtained when the image is fetched
|
|
1936
|
-
let width;
|
|
1937
|
-
let height;
|
|
1938
|
-
const widthAttr = element.getAttribute("width");
|
|
1939
|
-
const heightAttr = element.getAttribute("height");
|
|
1940
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
1941
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
1942
|
-
}
|
|
1943
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
1944
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
1945
|
-
}
|
|
1946
|
-
// Check if img is inside a figure with figcaption
|
|
1947
|
-
let caption;
|
|
1948
|
-
const parentFigure = element.closest("figure");
|
|
1949
|
-
if (parentFigure) {
|
|
1950
|
-
const figcaption = parentFigure.querySelector("figcaption");
|
|
1951
|
-
if (figcaption) {
|
|
1952
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
1953
|
-
}
|
|
1954
|
-
}
|
|
1955
|
-
elements.push({ type: "image", src, alt, width, height, caption });
|
|
1938
|
+
const imageElement = createParsedImageElement(element, nextImageKey());
|
|
1939
|
+
if (imageElement) {
|
|
1940
|
+
elements.push(imageElement);
|
|
1956
1941
|
}
|
|
1957
1942
|
return;
|
|
1958
1943
|
}
|
|
@@ -1961,30 +1946,9 @@ export function parseHtmlContent(html) {
|
|
|
1961
1946
|
// Find the fallback img inside picture
|
|
1962
1947
|
const imgEl = element.querySelector("img");
|
|
1963
1948
|
if (imgEl) {
|
|
1964
|
-
const
|
|
1965
|
-
if (
|
|
1966
|
-
|
|
1967
|
-
// Extract width and height
|
|
1968
|
-
let width;
|
|
1969
|
-
let height;
|
|
1970
|
-
const widthAttr = imgEl.getAttribute("width");
|
|
1971
|
-
const heightAttr = imgEl.getAttribute("height");
|
|
1972
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
1973
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
1974
|
-
}
|
|
1975
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
1976
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
1977
|
-
}
|
|
1978
|
-
// Check for figcaption
|
|
1979
|
-
let caption;
|
|
1980
|
-
const parentFigure = element.closest("figure");
|
|
1981
|
-
if (parentFigure) {
|
|
1982
|
-
const figcaption = parentFigure.querySelector("figcaption");
|
|
1983
|
-
if (figcaption) {
|
|
1984
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
1985
|
-
}
|
|
1986
|
-
}
|
|
1987
|
-
elements.push({ type: "image", src, alt, width, height, caption });
|
|
1949
|
+
const imageElement = createParsedImageElement(imgEl, nextImageKey());
|
|
1950
|
+
if (imageElement) {
|
|
1951
|
+
elements.push(imageElement);
|
|
1988
1952
|
}
|
|
1989
1953
|
}
|
|
1990
1954
|
return;
|
|
@@ -1993,29 +1957,9 @@ export function parseHtmlContent(html) {
|
|
|
1993
1957
|
if (tagName === "figure") {
|
|
1994
1958
|
const imgEl = element.querySelector("img") || element.querySelector("picture img");
|
|
1995
1959
|
if (imgEl) {
|
|
1996
|
-
const
|
|
1997
|
-
if (
|
|
1998
|
-
|
|
1999
|
-
// Extract width and height from attributes only
|
|
2000
|
-
// Computed styles are not available in linkedom (Node.js)
|
|
2001
|
-
// Actual dimensions will be obtained when the image is fetched
|
|
2002
|
-
let width;
|
|
2003
|
-
let height;
|
|
2004
|
-
const widthAttr = imgEl.getAttribute("width");
|
|
2005
|
-
const heightAttr = imgEl.getAttribute("height");
|
|
2006
|
-
if (widthAttr && !widthAttr.includes("%")) {
|
|
2007
|
-
width = parseInt(widthAttr, 10) || undefined;
|
|
2008
|
-
}
|
|
2009
|
-
if (heightAttr && !heightAttr.includes("%")) {
|
|
2010
|
-
height = parseInt(heightAttr, 10) || undefined;
|
|
2011
|
-
}
|
|
2012
|
-
// Extract caption from figcaption
|
|
2013
|
-
let caption;
|
|
2014
|
-
const figcaption = element.querySelector("figcaption");
|
|
2015
|
-
if (figcaption) {
|
|
2016
|
-
caption = getTextContent(figcaption).trim() || undefined;
|
|
2017
|
-
}
|
|
2018
|
-
elements.push({ type: "image", src, alt, width, height, caption });
|
|
1960
|
+
const imageElement = createParsedImageElement(imgEl, nextImageKey());
|
|
1961
|
+
if (imageElement) {
|
|
1962
|
+
elements.push(imageElement);
|
|
2019
1963
|
return;
|
|
2020
1964
|
}
|
|
2021
1965
|
}
|
|
@@ -2031,7 +1975,7 @@ export function parseHtmlContent(html) {
|
|
|
2031
1975
|
// Check for blockquote/callout before generic container handling
|
|
2032
1976
|
// Uses style-based detection, NOT class names
|
|
2033
1977
|
if (isBlockquoteOrCallout(element, cssContext)) {
|
|
2034
|
-
const content = parseBlockquoteContent(element, cssContext);
|
|
1978
|
+
const content = parseBlockquoteContent(element, cssContext, nextImageKey);
|
|
2035
1979
|
if (content.length > 0) {
|
|
2036
1980
|
// Extract styling from CSS classes and inline styles (generalized approach)
|
|
2037
1981
|
const elementStyles = getElementStyles(element, cssContext);
|