npm - @docen/import-docx - Versions diffs - 0.0.13 → 0.0.14 - Mend

@docen/import-docx 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -70,11 +70,6 @@ interface DocxImportOptions {
   /** Custom image converter (default: embed as base64) */
   convertImage?: (image: DocxImageInfo) => Promise<DocxImageResult>;
-  /** Whether to ignore empty paragraphs (default: false).
-   * Empty paragraphs are those without text content or images.
-   * Paragraphs containing only whitespace or images are not considered empty. */
-  ignoreEmptyParagraphs?: boolean;
   /**
    * Dynamic import function for @napi-rs/canvas
    * Required for image cropping in Node.js environment, ignored in browser
@@ -94,7 +89,15 @@ interface DocxImportOptions {
    *
    * @default false
    */
-  enableImageCrop?: boolean;
+  crop?: boolean;
+  /** Paragraph processing options */
+  paragraph?: {
+    /** Whether to ignore empty paragraphs (default: false).
+     * Empty paragraphs are those without text content or images.
+     * Paragraphs containing only whitespace or images are not considered empty. */
+    ignoreEmpty?: boolean;
+  };
 }
 ```
@@ -231,7 +234,7 @@ const buffer = readFileSync("document.docx");
 const content = await parseDOCX(buffer, {
   canvasImport: () => import("@napi-rs/canvas"),
-  enableImageCrop: true, // Enable cropping (default is false)
+  crop: true, // Enable cropping (default is false)
 });
 ```
@@ -243,7 +246,7 @@ If you want to explicitly ignore crop information in DOCX and use full images (t
 ```typescript
 const content = await parseDOCX(buffer, {
-  enableImageCrop: false,
+  crop: false,
 });
 ```
@@ -271,7 +274,7 @@ All colors are imported as hex values (e.g., "#FF0000", "#008000"). Color names
 - Only embedded images are supported (external image links are not fetched)
 - Image dimensions and title are extracted from DOCX metadata
 - **Image Cropping**: By default, images are imported in full size (crop information is ignored)
-  - To enable cropping, set `enableImageCrop: true` in options
+  - To enable cropping, set `crop: true` in options
   - In browser environments, cropping works natively with Canvas API
   - In Node.js, you must also provide `canvasImport` option with dynamic import of `@napi-rs/canvas`
   - If `@napi-rs/canvas` is not available in Node.js, images will be imported without cropping (graceful degradation)

package/dist/index.d.mts CHANGED Viewed

@@ -2166,7 +2166,7 @@ can be lifted. Will not go across
 [isolating](https://prosemirror.net/docs/ref/#model.NodeSpec.isolating) parent nodes.
 */
 //#endregion
-//#region ../../node_modules/.pnpm/prosemirror-view@1.41.6/node_modules/prosemirror-view/dist/index.d.ts
+//#region ../../node_modules/.pnpm/prosemirror-view@1.41.7/node_modules/prosemirror-view/dist/index.d.ts
 type DOMNode = InstanceType<typeof window.Node>;
 type WidgetConstructor = ((view: EditorView, getPos: () => number | undefined) => DOMNode) | DOMNode;
 /**
@@ -3672,7 +3672,7 @@ point into textblock nodes. It can be empty (a regular cursor
 position).
 */
 //#endregion
-//#region ../../node_modules/.pnpm/@tiptap+core@3.20.0_@tiptap+pm@3.20.0/node_modules/@tiptap/core/dist/index.d.ts
+//#region ../../node_modules/.pnpm/@tiptap+core@3.20.4_@tiptap+pm@3.20.4/node_modules/@tiptap/core/dist/index.d.ts
 type StringKeyOf<T> = Extract<keyof T, string>;
 type CallbackType<T extends Record<string, any>, EventName extends StringKeyOf<T>> = T[EventName] extends any[] ? T[EventName] : [T[EventName]];
 type CallbackFunction<T extends Record<string, any>, EventName extends StringKeyOf<T>> = (...props: CallbackType<T, EventName>) => any;
@@ -4436,6 +4436,27 @@ interface ExtendableConfig<Options = any, Storage = any, Config extends Extensio
      * Defines if this markdown element should indent it's child elements
      */
     indentsContent?: boolean;
+    /**
+     * Lets a mark tell the Markdown serializer which inline HTML tags it can
+     * safely use when plain markdown delimiters would become ambiguous.
+     *
+     * This is mainly useful for overlapping marks. For example, bold followed
+     * by bold+italic followed by italic cannot always be written back with only
+     * `*` and `**` in a way that still parses correctly. In that case, the
+     * serializer can close the overlapping section with markdown and reopen the
+     * remaining tail with HTML instead.
+     *
+     * Example:
+     * - desired formatting: `**123` + `*456*` + `789 italic`
+     * - serialized result: `**123*456***<em>789</em>`
+     *
+     * If your extension defines custom mark names, set `htmlReopen` on that
+     * extension so the serializer can reuse its HTML form for overlap cases.
+     */
+    htmlReopen?: {
+      open: string;
+      close: string;
+    };
   };
   /**
    * This function extends the schema of the node.
@@ -5279,7 +5300,8 @@ type MarkdownToken = {
  */
 type MarkdownParseHelpers = {
   /** Parse an array of inline tokens into text nodes with marks */parseInline: (tokens: MarkdownToken[]) => JSONContent[]; /** Parse an array of block-level tokens */
-  parseChildren: (tokens: MarkdownToken[]) => JSONContent[]; /** Create a text node with optional marks */
+  parseChildren: (tokens: MarkdownToken[]) => JSONContent[]; /** Parse block-level tokens while preserving implicit empty paragraphs from blank lines */
+  parseBlockChildren?: (tokens: MarkdownToken[]) => JSONContent[]; /** Create a text node with optional marks */
   createTextNode: (text: string, marks?: Array<{
     type: string;
     attrs?: any;
@@ -5312,6 +5334,7 @@ type RenderContext = {
   level: number;
   meta?: Record<string, any>;
   parentType?: string | null;
+  previousNode?: JSONContent | null;
 };
 /** Extension contract for markdown parsing/serialization. */
 /**
@@ -5345,7 +5368,8 @@ type MarkdownRendererHelpers = {
    * @param separator An optional separator string (legacy) or RenderContext
    * @returns The rendered markdown string
    */
-  renderChildren: (nodes: JSONContent | JSONContent[], separator?: string) => string;
+  renderChildren: (nodes: JSONContent | JSONContent[], separator?: string) => string; /** Render a single child node with its sibling index preserved */
+  renderChild?: (node: JSONContent, index: number) => string;
   /**
    * Render a text token to a markdown string
    * @param prefix The prefix to add before the content
@@ -6638,9 +6662,11 @@ interface DocxImportOptions {
   image?: {
     handler?: DocxImageImportHandler;
     canvasImport?: () => Promise<typeof _napi_rs_canvas0>;
-    enableImageCrop?: boolean;
+    crop?: boolean;
+  };
+  paragraph?: {
+    ignoreEmpty?: boolean;
   };
-  ignoreEmptyParagraphs?: boolean;
 }
 //#endregion
 //#region ../../node_modules/.pnpm/@types+unist@3.0.3/node_modules/@types/unist/index.d.ts

package/dist/index.mjs CHANGED Viewed

@@ -463,7 +463,7 @@ async function applyCropToImage(pic, imgInfo, params) {
 		if (!base64Data) return imgInfo;
 		const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
 			canvasImport: params.context.image?.canvasImport,
-			enabled: params.context.image?.enableImageCrop ?? false
+			enabled: params.context.image?.crop ?? false
 		}));
 		const originalWidth = imgInfo.width || 0;
 		const originalHeight = imgInfo.height || 0;
@@ -532,25 +532,6 @@ function findDrawingElement(run) {
 	return choice ? findChild(choice, "w:drawing") : null;
 }
 /**
-* Adjust image dimensions to fit within group bounds while preserving aspect ratio
-*/
-function fitToGroup(groupWidth, groupHeight, metaWidth, metaHeight) {
-	const metaRatio = metaWidth / metaHeight;
-	const groupRatio = groupWidth / groupHeight;
-	if (Math.abs(metaRatio - groupRatio) > .1) if (metaRatio > groupRatio) return {
-		width: groupWidth,
-		height: Math.round(groupWidth / metaRatio)
-	};
-	else return {
-		width: Math.round(groupHeight * metaRatio),
-		height: groupHeight
-	};
-	return {
-		width: groupWidth,
-		height: groupHeight
-	};
-}
-/**
 * Extract images from DOCX and convert to base64 data URLs or use custom handler
 * Returns Map of relationship ID to image info (src + dimensions)
 */
@@ -613,7 +594,7 @@ async function extractImageFromDrawing(drawing, params) {
 				try {
 					src = `${metadata},${uint8ArrayToBase64(await cropImageIfNeeded(bytes, crop, {
 						canvasImport: context.image?.canvasImport,
-						enabled: context.image?.enableImageCrop ?? false
+						enabled: context.image?.crop ?? false
 					}))}`;
 				} catch (error) {
 					console.warn("Image cropping failed, using original image:", error);
@@ -711,10 +692,57 @@ async function extractImagesFromDrawing(drawing, params) {
 	if (group) {
 		const groupSp = findChild(group, "wpg:grpSp");
 		const pictures = groupSp ? [...findDeepChildren(groupSp, "pic:pic"), ...findDeepChildren(groupSp, "pic")] : [...findDeepChildren(group, "pic:pic"), ...findDeepChildren(group, "pic")];
+		const wspShapes = groupSp ? findDeepChildren(groupSp, "wps:wsp") : findDeepChildren(group, "wps:wsp");
+		const childImages = [];
 		for (const pic of pictures) {
 			const picGraphic = findChild(pic, "a:graphic");
+			let relativeSize = null;
+			const spPr = findChild(pic, "pic:spPr");
+			if (spPr) {
+				const xfrm = findChild(spPr, "a:xfrm");
+				if (xfrm) {
+					const ext = findChild(xfrm, "a:ext");
+					if (ext && ext.attributes["cx"] && ext.attributes["cy"]) relativeSize = {
+						cx: parseInt(ext.attributes["cx"], 10),
+						cy: parseInt(ext.attributes["cy"], 10)
+					};
+				}
+			}
+			childImages.push({
+				pic,
+				picGraphic,
+				relativeSize,
+				isWsp: false
+			});
+		}
+		for (const wsp of wspShapes) {
+			const wspGraphic = findChild(wsp, "a:graphic");
+			let relativeSize = null;
+			const spPr = findChild(wsp, "wps:spPr");
+			if (spPr) {
+				const xfrm = findChild(spPr, "a:xfrm");
+				if (xfrm) {
+					const ext = findChild(xfrm, "a:ext");
+					if (ext && ext.attributes["cx"] && ext.attributes["cy"]) relativeSize = {
+						cx: parseInt(ext.attributes["cx"], 10),
+						cy: parseInt(ext.attributes["cy"], 10)
+					};
+				}
+			}
+			childImages.push({
+				pic: wsp,
+				picGraphic: wspGraphic,
+				relativeSize,
+				isWsp: true
+			});
+		}
+		let totalCx = 0;
+		for (const child of childImages) if (child.relativeSize) totalCx += child.relativeSize.cx;
+		const scaleFactor = totalCx > 0 && groupWidth ? groupWidth / totalCx : 1;
+		for (const child of childImages) {
+			const { pic, picGraphic, relativeSize, isWsp } = child;
 			if (!picGraphic) {
-				const blipFill = findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
+				const blipFill = isWsp ? findChild(pic, "wps:blipFill") || findDeepChild(pic, "a:blipFill") : findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
 				if (!blipFill) continue;
 				const blip = findChild(blipFill, "a:blip") || findDeepChild(blipFill, "a:blip");
 				if (!blip?.attributes["r:embed"]) continue;
@@ -722,13 +750,22 @@ async function extractImagesFromDrawing(drawing, params) {
 				const imgInfo = params.context.images.get(rId);
 				if (!imgInfo) continue;
 				const processedImgInfo = await applyCropToImage(pic, imgInfo, params);
+				let width = processedImgInfo.width;
+				let height = processedImgInfo.height;
+				if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
+					width = Math.round(relativeSize.cx * scaleFactor);
+					height = Math.round(relativeSize.cy * scaleFactor);
+				} else if (groupWidth && groupHeight) {
+					width = groupWidth;
+					height = groupHeight;
+				}
 				result.push({
 					type: "image",
 					attrs: {
 						src: processedImgInfo.src,
 						alt: "",
-						width: processedImgInfo.width,
-						height: processedImgInfo.height
+						width,
+						height
 					}
 				});
 				continue;
@@ -748,7 +785,7 @@ async function extractImagesFromDrawing(drawing, params) {
 				if (base64Data) {
 					const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
 						canvasImport: params.context.image?.canvasImport,
-						enabled: params.context.image?.enableImageCrop ?? false
+						enabled: params.context.image?.crop ?? false
 					}));
 					image.attrs.src = `${metadata},${croppedBase64}`;
 					const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
@@ -763,27 +800,25 @@ async function extractImagesFromDrawing(drawing, params) {
 							const visibleHeightPct = 1 - cropTopPct - cropBottomPct;
 							const croppedWidth = Math.round(imgInfo.width * visibleWidthPct);
 							const croppedHeight = Math.round(imgInfo.height * visibleHeightPct);
-							image.attrs.width = croppedWidth;
-							image.attrs.height = croppedHeight;
+							if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
+								image.attrs.width = Math.round(relativeSize.cx * scaleFactor);
+								image.attrs.height = Math.round(relativeSize.cy * scaleFactor);
+							} else {
+								image.attrs.width = croppedWidth;
+								image.attrs.height = croppedHeight;
+							}
 						}
 					}
 				}
 			} catch (error) {
 				console.warn("Grouped image cropping failed, using original image:", error);
 			}
-			else {
-				const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
-				if (groupWidth && groupHeight && rId) {
-					const imgInfo = params.context.images.get(rId);
-					if (imgInfo?.width && imgInfo?.height) {
-						const adjusted = fitToGroup(groupWidth, groupHeight, imgInfo.width, imgInfo.height);
-						image.attrs.width = adjusted.width;
-						image.attrs.height = adjusted.height;
-					} else {
-						image.attrs.width = groupWidth;
-						image.attrs.height = groupHeight;
-					}
-				}
+			else if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
+				image.attrs.width = Math.round(relativeSize.cx * scaleFactor);
+				image.attrs.height = Math.round(relativeSize.cy * scaleFactor);
+			} else if (groupWidth && groupHeight) {
+				image.attrs.width = groupWidth;
+				image.attrs.height = groupHeight;
 			}
 			result.push(image);
 		}
@@ -1340,7 +1375,9 @@ function parseCellProperties(cellNode) {
 	if (!tcPr) return props;
 	const gridSpan = findChild(tcPr, "w:gridSpan");
 	if (gridSpan?.attributes["w:val"]) props.colspan = parseInt(gridSpan.attributes["w:val"]);
-	if (findChild(tcPr, "w:vMerge")?.attributes["w:val"] === "continue") props.rowspan = 0;
+	const vMerge = findChild(tcPr, "w:vMerge");
+	if (vMerge) if (vMerge.attributes["w:val"] === "continue") props.rowspan = 0;
+	else props.rowspan = 1;
 	const tcW = findChild(tcPr, "w:tcW");
 	if (tcW?.attributes["w:w"]) props.colwidth = [convertTwipToPixels(parseInt(tcW.attributes["w:w"]))];
 	const shd = findChild(tcPr, "w:shd");
@@ -1679,7 +1716,7 @@ async function convertElements(elements, params) {
 	for (let i = 0; i < elements.length; i++) {
 		if (processedIndices.has(i)) continue;
 		const element = elements[i];
-		if (params.context.ignoreEmptyParagraphs && element.name === "w:p" && isEmptyParagraph(element)) continue;
+		if (params.context.paragraph?.ignoreEmpty && element.name === "w:p" && isEmptyParagraph(element)) continue;
 		const node = await convertElement(element, elements, i, params, processedIndices);
 		if (Array.isArray(node)) result.push(...node);
 		else if (node) result.push(node);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@docen/import-docx",
-  "version": "0.0.13",
+  "version": "0.0.14",
   "description": "A powerful TipTap/ProseMirror extension that imports Microsoft Word DOCX files to editor content",
   "keywords": [
     "converter",
@@ -51,10 +51,10 @@
     "xast-util-from-xml": "4.0.0"
   },
   "devDependencies": {
-    "@tiptap/core": "3.20.0",
+    "@tiptap/core": "3.20.4",
     "@types/xast": "2.0.4",
-    "@docen/extensions": "0.0.13",
-    "@docen/utils": "0.0.13"
+    "@docen/extensions": "0.0.14",
+    "@docen/utils": "0.0.14"
   },
   "peerDependencies": {
     "@napi-rs/canvas": "^0.1.88"
@@ -65,7 +65,7 @@
     }
   },
   "optionalDependencies": {
-    "@napi-rs/canvas": "^0.1.96"
+    "@napi-rs/canvas": "^0.1.97"
   },
   "scripts": {
     "dev": "basis build --stub",