@docen/import-docx 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -70,11 +70,6 @@ interface DocxImportOptions {
70
70
  /** Custom image converter (default: embed as base64) */
71
71
  convertImage?: (image: DocxImageInfo) => Promise<DocxImageResult>;
72
72
 
73
- /** Whether to ignore empty paragraphs (default: false).
74
- * Empty paragraphs are those without text content or images.
75
- * Paragraphs containing only whitespace or images are not considered empty. */
76
- ignoreEmptyParagraphs?: boolean;
77
-
78
73
  /**
79
74
  * Dynamic import function for @napi-rs/canvas
80
75
  * Required for image cropping in Node.js environment, ignored in browser
@@ -94,7 +89,15 @@ interface DocxImportOptions {
94
89
  *
95
90
  * @default false
96
91
  */
97
- enableImageCrop?: boolean;
92
+ crop?: boolean;
93
+
94
+ /** Paragraph processing options */
95
+ paragraph?: {
96
+ /** Whether to ignore empty paragraphs (default: false).
97
+ * Empty paragraphs are those without text content or images.
98
+ * Paragraphs containing only whitespace or images are not considered empty. */
99
+ ignoreEmpty?: boolean;
100
+ };
98
101
  }
99
102
  ```
100
103
 
@@ -231,7 +234,7 @@ const buffer = readFileSync("document.docx");
231
234
 
232
235
  const content = await parseDOCX(buffer, {
233
236
  canvasImport: () => import("@napi-rs/canvas"),
234
- enableImageCrop: true, // Enable cropping (default is false)
237
+ crop: true, // Enable cropping (default is false)
235
238
  });
236
239
  ```
237
240
 
@@ -243,7 +246,7 @@ If you want to explicitly ignore crop information in DOCX and use full images (t
243
246
 
244
247
  ```typescript
245
248
  const content = await parseDOCX(buffer, {
246
- enableImageCrop: false,
249
+ crop: false,
247
250
  });
248
251
  ```
249
252
 
@@ -271,7 +274,7 @@ All colors are imported as hex values (e.g., "#FF0000", "#008000"). Color names
271
274
  - Only embedded images are supported (external image links are not fetched)
272
275
  - Image dimensions and title are extracted from DOCX metadata
273
276
  - **Image Cropping**: By default, images are imported in full size (crop information is ignored)
274
- - To enable cropping, set `enableImageCrop: true` in options
277
+ - To enable cropping, set `crop: true` in options
275
278
  - In browser environments, cropping works natively with Canvas API
276
279
  - In Node.js, you must also provide `canvasImport` option with dynamic import of `@napi-rs/canvas`
277
280
  - If `@napi-rs/canvas` is not available in Node.js, images will be imported without cropping (graceful degradation)
package/dist/index.d.mts CHANGED
@@ -2166,7 +2166,7 @@ can be lifted. Will not go across
2166
2166
  [isolating](https://prosemirror.net/docs/ref/#model.NodeSpec.isolating) parent nodes.
2167
2167
  */
2168
2168
  //#endregion
2169
- //#region ../../node_modules/.pnpm/prosemirror-view@1.41.6/node_modules/prosemirror-view/dist/index.d.ts
2169
+ //#region ../../node_modules/.pnpm/prosemirror-view@1.41.7/node_modules/prosemirror-view/dist/index.d.ts
2170
2170
  type DOMNode = InstanceType<typeof window.Node>;
2171
2171
  type WidgetConstructor = ((view: EditorView, getPos: () => number | undefined) => DOMNode) | DOMNode;
2172
2172
  /**
@@ -3672,7 +3672,7 @@ point into textblock nodes. It can be empty (a regular cursor
3672
3672
  position).
3673
3673
  */
3674
3674
  //#endregion
3675
- //#region ../../node_modules/.pnpm/@tiptap+core@3.20.0_@tiptap+pm@3.20.0/node_modules/@tiptap/core/dist/index.d.ts
3675
+ //#region ../../node_modules/.pnpm/@tiptap+core@3.20.4_@tiptap+pm@3.20.4/node_modules/@tiptap/core/dist/index.d.ts
3676
3676
  type StringKeyOf<T> = Extract<keyof T, string>;
3677
3677
  type CallbackType<T extends Record<string, any>, EventName extends StringKeyOf<T>> = T[EventName] extends any[] ? T[EventName] : [T[EventName]];
3678
3678
  type CallbackFunction<T extends Record<string, any>, EventName extends StringKeyOf<T>> = (...props: CallbackType<T, EventName>) => any;
@@ -4436,6 +4436,27 @@ interface ExtendableConfig<Options = any, Storage = any, Config extends Extensio
4436
4436
  * Defines if this markdown element should indent it's child elements
4437
4437
  */
4438
4438
  indentsContent?: boolean;
4439
+ /**
4440
+ * Lets a mark tell the Markdown serializer which inline HTML tags it can
4441
+ * safely use when plain markdown delimiters would become ambiguous.
4442
+ *
4443
+ * This is mainly useful for overlapping marks. For example, bold followed
4444
+ * by bold+italic followed by italic cannot always be written back with only
4445
+ * `*` and `**` in a way that still parses correctly. In that case, the
4446
+ * serializer can close the overlapping section with markdown and reopen the
4447
+ * remaining tail with HTML instead.
4448
+ *
4449
+ * Example:
4450
+ * - desired formatting: `**123` + `*456*` + `789 italic`
4451
+ * - serialized result: `**123*456***<em>789</em>`
4452
+ *
4453
+ * If your extension defines custom mark names, set `htmlReopen` on that
4454
+ * extension so the serializer can reuse its HTML form for overlap cases.
4455
+ */
4456
+ htmlReopen?: {
4457
+ open: string;
4458
+ close: string;
4459
+ };
4439
4460
  };
4440
4461
  /**
4441
4462
  * This function extends the schema of the node.
@@ -5279,7 +5300,8 @@ type MarkdownToken = {
5279
5300
  */
5280
5301
  type MarkdownParseHelpers = {
5281
5302
  /** Parse an array of inline tokens into text nodes with marks */parseInline: (tokens: MarkdownToken[]) => JSONContent[]; /** Parse an array of block-level tokens */
5282
- parseChildren: (tokens: MarkdownToken[]) => JSONContent[]; /** Create a text node with optional marks */
5303
+ parseChildren: (tokens: MarkdownToken[]) => JSONContent[]; /** Parse block-level tokens while preserving implicit empty paragraphs from blank lines */
5304
+ parseBlockChildren?: (tokens: MarkdownToken[]) => JSONContent[]; /** Create a text node with optional marks */
5283
5305
  createTextNode: (text: string, marks?: Array<{
5284
5306
  type: string;
5285
5307
  attrs?: any;
@@ -5312,6 +5334,7 @@ type RenderContext = {
5312
5334
  level: number;
5313
5335
  meta?: Record<string, any>;
5314
5336
  parentType?: string | null;
5337
+ previousNode?: JSONContent | null;
5315
5338
  };
5316
5339
  /** Extension contract for markdown parsing/serialization. */
5317
5340
  /**
@@ -5345,7 +5368,8 @@ type MarkdownRendererHelpers = {
5345
5368
  * @param separator An optional separator string (legacy) or RenderContext
5346
5369
  * @returns The rendered markdown string
5347
5370
  */
5348
- renderChildren: (nodes: JSONContent | JSONContent[], separator?: string) => string;
5371
+ renderChildren: (nodes: JSONContent | JSONContent[], separator?: string) => string; /** Render a single child node with its sibling index preserved */
5372
+ renderChild?: (node: JSONContent, index: number) => string;
5349
5373
  /**
5350
5374
  * Render a text token to a markdown string
5351
5375
  * @param prefix The prefix to add before the content
@@ -6638,9 +6662,11 @@ interface DocxImportOptions {
6638
6662
  image?: {
6639
6663
  handler?: DocxImageImportHandler;
6640
6664
  canvasImport?: () => Promise<typeof _napi_rs_canvas0>;
6641
- enableImageCrop?: boolean;
6665
+ crop?: boolean;
6666
+ };
6667
+ paragraph?: {
6668
+ ignoreEmpty?: boolean;
6642
6669
  };
6643
- ignoreEmptyParagraphs?: boolean;
6644
6670
  }
6645
6671
  //#endregion
6646
6672
  //#region ../../node_modules/.pnpm/@types+unist@3.0.3/node_modules/@types/unist/index.d.ts
package/dist/index.mjs CHANGED
@@ -463,7 +463,7 @@ async function applyCropToImage(pic, imgInfo, params) {
463
463
  if (!base64Data) return imgInfo;
464
464
  const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
465
465
  canvasImport: params.context.image?.canvasImport,
466
- enabled: params.context.image?.enableImageCrop ?? false
466
+ enabled: params.context.image?.crop ?? false
467
467
  }));
468
468
  const originalWidth = imgInfo.width || 0;
469
469
  const originalHeight = imgInfo.height || 0;
@@ -532,25 +532,6 @@ function findDrawingElement(run) {
532
532
  return choice ? findChild(choice, "w:drawing") : null;
533
533
  }
534
534
  /**
535
- * Adjust image dimensions to fit within group bounds while preserving aspect ratio
536
- */
537
- function fitToGroup(groupWidth, groupHeight, metaWidth, metaHeight) {
538
- const metaRatio = metaWidth / metaHeight;
539
- const groupRatio = groupWidth / groupHeight;
540
- if (Math.abs(metaRatio - groupRatio) > .1) if (metaRatio > groupRatio) return {
541
- width: groupWidth,
542
- height: Math.round(groupWidth / metaRatio)
543
- };
544
- else return {
545
- width: Math.round(groupHeight * metaRatio),
546
- height: groupHeight
547
- };
548
- return {
549
- width: groupWidth,
550
- height: groupHeight
551
- };
552
- }
553
- /**
554
535
  * Extract images from DOCX and convert to base64 data URLs or use custom handler
555
536
  * Returns Map of relationship ID to image info (src + dimensions)
556
537
  */
@@ -613,7 +594,7 @@ async function extractImageFromDrawing(drawing, params) {
613
594
  try {
614
595
  src = `${metadata},${uint8ArrayToBase64(await cropImageIfNeeded(bytes, crop, {
615
596
  canvasImport: context.image?.canvasImport,
616
- enabled: context.image?.enableImageCrop ?? false
597
+ enabled: context.image?.crop ?? false
617
598
  }))}`;
618
599
  } catch (error) {
619
600
  console.warn("Image cropping failed, using original image:", error);
@@ -711,10 +692,57 @@ async function extractImagesFromDrawing(drawing, params) {
711
692
  if (group) {
712
693
  const groupSp = findChild(group, "wpg:grpSp");
713
694
  const pictures = groupSp ? [...findDeepChildren(groupSp, "pic:pic"), ...findDeepChildren(groupSp, "pic")] : [...findDeepChildren(group, "pic:pic"), ...findDeepChildren(group, "pic")];
695
+ const wspShapes = groupSp ? findDeepChildren(groupSp, "wps:wsp") : findDeepChildren(group, "wps:wsp");
696
+ const childImages = [];
714
697
  for (const pic of pictures) {
715
698
  const picGraphic = findChild(pic, "a:graphic");
699
+ let relativeSize = null;
700
+ const spPr = findChild(pic, "pic:spPr");
701
+ if (spPr) {
702
+ const xfrm = findChild(spPr, "a:xfrm");
703
+ if (xfrm) {
704
+ const ext = findChild(xfrm, "a:ext");
705
+ if (ext && ext.attributes["cx"] && ext.attributes["cy"]) relativeSize = {
706
+ cx: parseInt(ext.attributes["cx"], 10),
707
+ cy: parseInt(ext.attributes["cy"], 10)
708
+ };
709
+ }
710
+ }
711
+ childImages.push({
712
+ pic,
713
+ picGraphic,
714
+ relativeSize,
715
+ isWsp: false
716
+ });
717
+ }
718
+ for (const wsp of wspShapes) {
719
+ const wspGraphic = findChild(wsp, "a:graphic");
720
+ let relativeSize = null;
721
+ const spPr = findChild(wsp, "wps:spPr");
722
+ if (spPr) {
723
+ const xfrm = findChild(spPr, "a:xfrm");
724
+ if (xfrm) {
725
+ const ext = findChild(xfrm, "a:ext");
726
+ if (ext && ext.attributes["cx"] && ext.attributes["cy"]) relativeSize = {
727
+ cx: parseInt(ext.attributes["cx"], 10),
728
+ cy: parseInt(ext.attributes["cy"], 10)
729
+ };
730
+ }
731
+ }
732
+ childImages.push({
733
+ pic: wsp,
734
+ picGraphic: wspGraphic,
735
+ relativeSize,
736
+ isWsp: true
737
+ });
738
+ }
739
+ let totalCx = 0;
740
+ for (const child of childImages) if (child.relativeSize) totalCx += child.relativeSize.cx;
741
+ const scaleFactor = totalCx > 0 && groupWidth ? groupWidth / totalCx : 1;
742
+ for (const child of childImages) {
743
+ const { pic, picGraphic, relativeSize, isWsp } = child;
716
744
  if (!picGraphic) {
717
- const blipFill = findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
745
+ const blipFill = isWsp ? findChild(pic, "wps:blipFill") || findDeepChild(pic, "a:blipFill") : findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
718
746
  if (!blipFill) continue;
719
747
  const blip = findChild(blipFill, "a:blip") || findDeepChild(blipFill, "a:blip");
720
748
  if (!blip?.attributes["r:embed"]) continue;
@@ -722,13 +750,22 @@ async function extractImagesFromDrawing(drawing, params) {
722
750
  const imgInfo = params.context.images.get(rId);
723
751
  if (!imgInfo) continue;
724
752
  const processedImgInfo = await applyCropToImage(pic, imgInfo, params);
753
+ let width = processedImgInfo.width;
754
+ let height = processedImgInfo.height;
755
+ if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
756
+ width = Math.round(relativeSize.cx * scaleFactor);
757
+ height = Math.round(relativeSize.cy * scaleFactor);
758
+ } else if (groupWidth && groupHeight) {
759
+ width = groupWidth;
760
+ height = groupHeight;
761
+ }
725
762
  result.push({
726
763
  type: "image",
727
764
  attrs: {
728
765
  src: processedImgInfo.src,
729
766
  alt: "",
730
- width: processedImgInfo.width,
731
- height: processedImgInfo.height
767
+ width,
768
+ height
732
769
  }
733
770
  });
734
771
  continue;
@@ -748,7 +785,7 @@ async function extractImagesFromDrawing(drawing, params) {
748
785
  if (base64Data) {
749
786
  const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
750
787
  canvasImport: params.context.image?.canvasImport,
751
- enabled: params.context.image?.enableImageCrop ?? false
788
+ enabled: params.context.image?.crop ?? false
752
789
  }));
753
790
  image.attrs.src = `${metadata},${croppedBase64}`;
754
791
  const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
@@ -763,27 +800,25 @@ async function extractImagesFromDrawing(drawing, params) {
763
800
  const visibleHeightPct = 1 - cropTopPct - cropBottomPct;
764
801
  const croppedWidth = Math.round(imgInfo.width * visibleWidthPct);
765
802
  const croppedHeight = Math.round(imgInfo.height * visibleHeightPct);
766
- image.attrs.width = croppedWidth;
767
- image.attrs.height = croppedHeight;
803
+ if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
804
+ image.attrs.width = Math.round(relativeSize.cx * scaleFactor);
805
+ image.attrs.height = Math.round(relativeSize.cy * scaleFactor);
806
+ } else {
807
+ image.attrs.width = croppedWidth;
808
+ image.attrs.height = croppedHeight;
809
+ }
768
810
  }
769
811
  }
770
812
  }
771
813
  } catch (error) {
772
814
  console.warn("Grouped image cropping failed, using original image:", error);
773
815
  }
774
- else {
775
- const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
776
- if (groupWidth && groupHeight && rId) {
777
- const imgInfo = params.context.images.get(rId);
778
- if (imgInfo?.width && imgInfo?.height) {
779
- const adjusted = fitToGroup(groupWidth, groupHeight, imgInfo.width, imgInfo.height);
780
- image.attrs.width = adjusted.width;
781
- image.attrs.height = adjusted.height;
782
- } else {
783
- image.attrs.width = groupWidth;
784
- image.attrs.height = groupHeight;
785
- }
786
- }
816
+ else if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
817
+ image.attrs.width = Math.round(relativeSize.cx * scaleFactor);
818
+ image.attrs.height = Math.round(relativeSize.cy * scaleFactor);
819
+ } else if (groupWidth && groupHeight) {
820
+ image.attrs.width = groupWidth;
821
+ image.attrs.height = groupHeight;
787
822
  }
788
823
  result.push(image);
789
824
  }
@@ -1340,7 +1375,9 @@ function parseCellProperties(cellNode) {
1340
1375
  if (!tcPr) return props;
1341
1376
  const gridSpan = findChild(tcPr, "w:gridSpan");
1342
1377
  if (gridSpan?.attributes["w:val"]) props.colspan = parseInt(gridSpan.attributes["w:val"]);
1343
- if (findChild(tcPr, "w:vMerge")?.attributes["w:val"] === "continue") props.rowspan = 0;
1378
+ const vMerge = findChild(tcPr, "w:vMerge");
1379
+ if (vMerge) if (vMerge.attributes["w:val"] === "continue") props.rowspan = 0;
1380
+ else props.rowspan = 1;
1344
1381
  const tcW = findChild(tcPr, "w:tcW");
1345
1382
  if (tcW?.attributes["w:w"]) props.colwidth = [convertTwipToPixels(parseInt(tcW.attributes["w:w"]))];
1346
1383
  const shd = findChild(tcPr, "w:shd");
@@ -1679,7 +1716,7 @@ async function convertElements(elements, params) {
1679
1716
  for (let i = 0; i < elements.length; i++) {
1680
1717
  if (processedIndices.has(i)) continue;
1681
1718
  const element = elements[i];
1682
- if (params.context.ignoreEmptyParagraphs && element.name === "w:p" && isEmptyParagraph(element)) continue;
1719
+ if (params.context.paragraph?.ignoreEmpty && element.name === "w:p" && isEmptyParagraph(element)) continue;
1683
1720
  const node = await convertElement(element, elements, i, params, processedIndices);
1684
1721
  if (Array.isArray(node)) result.push(...node);
1685
1722
  else if (node) result.push(node);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@docen/import-docx",
3
- "version": "0.0.13",
3
+ "version": "0.0.14",
4
4
  "description": "A powerful TipTap/ProseMirror extension that imports Microsoft Word DOCX files to editor content",
5
5
  "keywords": [
6
6
  "converter",
@@ -51,10 +51,10 @@
51
51
  "xast-util-from-xml": "4.0.0"
52
52
  },
53
53
  "devDependencies": {
54
- "@tiptap/core": "3.20.0",
54
+ "@tiptap/core": "3.20.4",
55
55
  "@types/xast": "2.0.4",
56
- "@docen/extensions": "0.0.13",
57
- "@docen/utils": "0.0.13"
56
+ "@docen/extensions": "0.0.14",
57
+ "@docen/utils": "0.0.14"
58
58
  },
59
59
  "peerDependencies": {
60
60
  "@napi-rs/canvas": "^0.1.88"
@@ -65,7 +65,7 @@
65
65
  }
66
66
  },
67
67
  "optionalDependencies": {
68
- "@napi-rs/canvas": "^0.1.96"
68
+ "@napi-rs/canvas": "^0.1.97"
69
69
  },
70
70
  "scripts": {
71
71
  "dev": "basis build --stub",