@docen/import-docx 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Demo Macro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,287 @@
1
+ # @docen/import-docx
2
+
3
+ ![npm version](https://img.shields.io/npm/v/@docen/import-docx)
4
+ ![npm downloads](https://img.shields.io/npm/dw/@docen/import-docx)
5
+ ![npm license](https://img.shields.io/npm/l/@docen/import-docx)
6
+
7
+ > Import Microsoft Word DOCX files to TipTap/ProseMirror content.
8
+
9
+ ## Features
10
+
11
+ - 📝 **Rich Text Parsing** - Accurate parsing of headings, paragraphs, and blockquotes with formatting
12
+ - 🖼️ **Image Extraction** - Automatic image extraction with base64 conversion and cropping support
13
+ - 📊 **Table Support** - Complete table structure with colspan/rowspan detection algorithm
14
+ - ✅ **Lists & Tasks** - Bullet lists, numbered lists with start number extraction, and task lists with checkbox detection
15
+ - 🎨 **Text Formatting** - Bold, italic, underline, strikethrough, subscript, superscript, and highlights
16
+ - 🎯 **Text Styles** - Comprehensive style support including colors, backgrounds, fonts, sizes, and line heights
17
+ - 🔗 **Links** - Hyperlink extraction with href preservation
18
+ - 💻 **Code Blocks** - Code block detection with language attribute extraction
19
+ - 🌐 **Cross-Platform** - Works in both browser and Node.js environments
20
+ - ✂️ **Image Cropping** - Automatic cropping of images based on DOCX crop metadata
21
+ - 🧠 **Smart Parsing** - DOCX XML parsing with proper element grouping and structure reconstruction
22
+ - ⚡ **Fast Processing** - Uses fflate for ultra-fast ZIP decompression
23
+
24
+ ## Installation
25
+
26
+ ```bash
27
+ # Install with npm
28
+ $ npm install @docen/import-docx
29
+
30
+ # Install with yarn
31
+ $ yarn add @docen/import-docx
32
+
33
+ # Install with pnpm
34
+ $ pnpm add @docen/import-docx
35
+ ```
36
+
37
+ ## Quick Start
38
+
39
+ ```typescript
40
+ import { parseDOCX } from "@docen/import-docx";
41
+ import { readFileSync } from "node:fs";
42
+
43
+ // Read DOCX file
44
+ const buffer = readFileSync("document.docx");
45
+
46
+ // Parse DOCX to TipTap JSON
47
+ const content = await parseDOCX(buffer);
48
+
49
+ // Use in TipTap editor
50
+ editor.commands.setContent(content);
51
+ ```
52
+
53
+ ## API Reference
54
+
55
+ ### `parseDOCX(input, options?)`
56
+
57
+ Parses a DOCX file and converts it to TipTap/ProseMirror JSON content.
58
+
59
+ **Parameters:**
60
+
61
+ - `input: Buffer | ArrayBuffer | Uint8Array` - DOCX file data
62
+ - `options?: DocxImportOptions` - Optional import configuration
63
+
64
+ **Returns:** `Promise<JSONContent>` - TipTap/ProseMirror document content with images embedded
65
+
66
+ **Options:**
67
+
68
+ ```typescript
69
+ interface DocxImportOptions {
70
+ /** Custom image converter (default: embed as base64) */
71
+ convertImage?: (image: DocxImageInfo) => Promise<DocxImageResult>;
72
+
73
+ /** Whether to ignore empty paragraphs (default: false).
74
+ * Empty paragraphs are those without text content or images.
75
+ * Paragraphs containing only whitespace or images are not considered empty. */
76
+ ignoreEmptyParagraphs?: boolean;
77
+
78
+ /**
79
+ * Dynamic import function for @napi-rs/canvas
80
+ * Required for image cropping in Node.js environment, ignored in browser
81
+ *
82
+ * @example
83
+ * import { parseDOCX } from '@docen/import-docx';
84
+ * const content = await parseDOCX(buffer, {
85
+ * canvasImport: () => import('@napi-rs/canvas')
86
+ * });
87
+ */
88
+ canvasImport?: () => Promise<typeof import("@napi-rs/canvas")>;
89
+
90
+ /**
91
+ * Enable or disable image cropping during import
92
+ * When true (default), images with crop information in DOCX will be cropped
93
+ * When false, crop information is ignored and full image is used
94
+ *
95
+ * @default true
96
+ */
97
+ enableImageCrop?: boolean;
98
+ }
99
+ ```
100
+
101
+ **Default Image Converter:**
102
+
103
+ The package exports `defaultImageConverter` which embeds images as base64 data URLs:
104
+
105
+ ```typescript
106
+ import { defaultImageConverter } from "@docen/import-docx";
107
+
108
+ // Use in custom converter
109
+ await parseDOCX(buffer, {
110
+ convertImage: async (image) => {
111
+ if (shouldUploadToCDN) {
112
+ return uploadToCDN(image.data);
113
+ }
114
+ return defaultImageConverter(image);
115
+ },
116
+ });
117
+ ```
118
+
119
+ ## Supported Content Types
120
+
121
+ ### Text Formatting
122
+
123
+ - **Bold**, _Italic_, <u>Underline</u>, ~~Strikethrough~~
124
+ - ^Superscript^ and ~Subscript~
125
+ - Text highlights
126
+ - Text colors and background colors
127
+ - Font families and sizes
128
+ - Line heights
129
+
130
+ ### Block Elements
131
+
132
+ - **Headings** (H1-H6) with proper level detection
133
+ - **Paragraphs** with text alignment (left, right, center, justify)
134
+ - **Blockquotes** (Detected by indentation + left border formatting)
135
+ - **Horizontal Rules** (Detected as page breaks in DOCX)
136
+ - **Code Blocks** with language attribute support
137
+
138
+ ### Lists
139
+
140
+ - **Bullet Lists** with proper nesting and structure
141
+ - **Numbered Lists** with custom start number extraction
142
+ - **Task Lists** with checked/unchecked state detection (☐/☑ symbols)
143
+
144
+ ### Tables
145
+
146
+ - Complete table structure parsing
147
+ - **Table Cells** with colspan detection using grid-based algorithm
148
+ - **Table Cells** with rowspan detection using vMerge tracking
149
+ - Cell alignment and formatting preservation
150
+ - Merged cell handling (both horizontal and vertical)
151
+
152
+ ### Media & Embeds
153
+
154
+ - **Images** with automatic base64 conversion
155
+ - **Grouped Images** (DOCX image groups) support
156
+ - **Links** (hyperlinks) with href extraction
157
+
158
+ ## Parsing Algorithm
159
+
160
+ ### Document Structure
161
+
162
+ The parser follows a structured workflow:
163
+
164
+ 1. **Extract Relationships** - Parse `_rels/document.xml.rels` for hyperlinks and images
165
+ 2. **Parse Numbering** - Extract list definitions from `numbering.xml` (abstractNum → numFmt)
166
+ 3. **Process Document Body** - Iterate through document.xml elements:
167
+ - Detect content types (tables, lists, paragraphs, code blocks, etc.)
168
+ - Group consecutive elements into proper containers
169
+ - Convert XML nodes to TipTap JSON nodes
170
+
171
+ ### Table Processing
172
+
173
+ Tables use specialized algorithms:
174
+
175
+ - **Colspan Detection** - Grid-based algorithm tracks cell positions and detects horizontal merges
176
+ - **Rowspan Detection** - Vertical merge (vMerge) tracking across rows with proper cell skipping
177
+ - **Cell Content** - Recursive parsing of nested paragraphs and formatting
178
+ - **Hyperlink Support** - Proper handling of links within table cells
179
+
180
+ ### List Processing
181
+
182
+ Lists utilize the DOCX numbering system:
183
+
184
+ - **Numbering ID Mapping** - Maps abstractNum to formatting (bullet vs decimal)
185
+ - **Start Value Extraction** - Extracts and preserves start numbers for ordered lists
186
+ - **Nesting Preservation** - Maintains proper list hierarchy
187
+ - **Consecutive Grouping** - Groups consecutive list items into list containers
188
+
189
+ ## Examples
190
+
191
+ ### Basic Usage
192
+
193
+ ```typescript
194
+ import { parseDOCX } from "@docen/import-docx";
195
+
196
+ const buffer = readFileSync("example.docx");
197
+ const { content } = await parseDOCX(buffer);
198
+
199
+ console.log(JSON.stringify(content, null, 2));
200
+ ```
201
+
202
+ ### Use with TipTap Editor
203
+
204
+ ```typescript
205
+ import { Editor } from "@tiptap/core";
206
+ import { parseDOCX } from "@docen/import-docx";
207
+
208
+ const editor = new Editor({
209
+ extensions: [...],
210
+ content: "",
211
+ });
212
+
213
+ // Import DOCX file
214
+ async function importDocx(file: File) {
215
+ const buffer = await file.arrayBuffer();
216
+ const content = await parseDOCX(buffer);
217
+ editor.commands.setContent(content);
218
+ }
219
+ ```
220
+
221
+ ### Node.js Environment with Image Cropping
222
+
223
+ In Node.js environment, you need to provide `@napi-rs/canvas` for image cropping:
224
+
225
+ ```typescript
226
+ import { parseDOCX } from "@docen/import-docx";
227
+ import { readFileSync } from "node:fs";
228
+
229
+ // Install @napi-rs/canvas first: pnpm add @napi-rs/canvas
230
+ const buffer = readFileSync("document.docx");
231
+
232
+ const content = await parseDOCX(buffer, {
233
+ canvasImport: () => import("@napi-rs/canvas"),
234
+ enableImageCrop: true, // default is true
235
+ });
236
+ ```
237
+
238
+ ### Disable Image Cropping
239
+
240
+ If you want to ignore crop information in DOCX and use full images:
241
+
242
+ ```typescript
243
+ const content = await parseDOCX(buffer, {
244
+ enableImageCrop: false,
245
+ });
246
+ ```
247
+
248
+ ## Known Limitations
249
+
250
+ ### Blockquote Detection
251
+
252
+ DOCX does not have a semantic blockquote structure. Blockquotes are detected by:
253
+
254
+ - Left indentation ≥ 720 twips (0.5 inch)
255
+ - Presence of left border (single line)
256
+
257
+ This detection method may produce false positives for documents with custom indentation similar to blockquotes.
258
+
259
+ ### Code Marks
260
+
261
+ The `code` mark is NOT automatically detected from monospace fonts (Consolas, Courier New, etc.). This is intentional to avoid false positives. Code marks should be explicitly added in the source document or through editor UI.
262
+
263
+ ### Color Format
264
+
265
+ All colors are imported as hex values (e.g., "#FF0000", "#008000"). Color names from the original document are not preserved.
266
+
267
+ ### Image Limitations
268
+
269
+ - Only embedded images are supported (external image links are not fetched)
270
+ - Image dimensions and title are extracted from DOCX metadata
271
+ - **Image Cropping in Node.js**: Requires `@napi-rs/canvas` as an optional dependency
272
+ - In browser environments, cropping works natively with Canvas API
273
+ - In Node.js, you must provide `canvasImport` option with dynamic import of `@napi-rs/canvas`
274
+ - If `@napi-rs/canvas` is not available, images will be imported without cropping (graceful degradation)
275
+ - Some DOCX image features (like advanced positioning or text wrapping) have limited support
276
+
277
+ ### Table Cell Types
278
+
279
+ DOCX format does not distinguish between header and body cells at a semantic level. All cells are imported as `tableCell` type for consistency. This is a DOCX format limitation.
280
+
281
+ ## Contributing
282
+
283
+ Contributions are welcome! Please read our [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) and submit pull requests to the [main repository](https://github.com/DemoMacro/docen).
284
+
285
+ ## License
286
+
287
+ - [MIT](LICENSE) &copy; [Demo Macro](https://imst.xyz/)
@@ -0,0 +1 @@
1
+ "use strict";const index=require("../index.cjs");require("xast-util-from-xml"),require("fflate"),require("undio"),require("image-meta"),exports.convertParagraph=index.convertParagraph,exports.convertTable=index.convertTable,exports.convertTaskItem=index.convertTaskItem,exports.extractAlignment=index.extractAlignment,exports.extractMarks=index.extractMarks,exports.extractRuns=index.extractRuns,exports.getCodeBlockLanguage=index.getCodeBlockLanguage,exports.getListInfo=index.getListInfo,exports.getTaskItemChecked=index.getTaskItemChecked,exports.isCodeBlock=index.isCodeBlock,exports.isHorizontalRule=index.isHorizontalRule,exports.isListItem=index.isListItem,exports.isTable=index.isTable,exports.isTaskItem=index.isTaskItem;
@@ -0,0 +1 @@
1
+ export{convertParagraph,convertTable,convertTaskItem,extractAlignment,extractMarks,extractRuns,getCodeBlockLanguage,getListInfo,getTaskItemChecked,isCodeBlock,isHorizontalRule,isListItem,isTable,isTaskItem}from"../index.mjs";import"xast-util-from-xml";import"fflate";import"undio";import"image-meta";
package/dist/index.cjs ADDED
@@ -0,0 +1 @@
1
+ "use strict";const xastUtilFromXml=require("xast-util-from-xml"),fflate=require("fflate"),undio=require("undio"),imageMeta=require("image-meta");function findChild(n,t){for(const e of n.children)if(e.type==="element"&&e.name===t)return e}function findDeepChild(n,t){for(const e of n.children){if(e.type==="element"&&e.name===t)return e;if(e.type==="element"){const r=findDeepChild(e,t);if(r)return r}}}function findDeepChildren(n,t){const e=[];for(const r of n.children)r.type==="element"&&r.name===t&&e.push(r),r.type==="element"&&e.push(...findDeepChildren(r,t));return e}const s="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function uint8ArrayToBase64(n){const t=n.length,e=Math.ceil(t/3)*4,r=Array.from({length:e});let i=0;for(let c=0;c<t;c+=3){const l=n[c],f=c+1<t?n[c+1]:0,b=c+2<t?n[c+2]:0,I=l>>2,k=(l&3)<<4|f>>4,R=(f&15)<<2|b>>6,P=b&63;r[i++]=s[I],r[i++]=s[k],r[i++]=c+1<t?s[R]:"=",r[i++]=c+2<t?s[P]:"="}return r.join("")}function base64ToUint8Array(n){const t=atob(n),e=new Uint8Array(t.length);for(let r=0;r<t.length;r++)e[r]=t.charCodeAt(r);return e}const isNode=globalThis.process?.release?.name==="node",isBrowser=typeof window<"u";async function h$2(n){const t=await n;return t.default||t}let o,u$1=class{#t=!1;constructor({enableHWA:t=!1}={}){this.#t=t}create(t,e){const r=this._createCanvas(t,e);return{canvas:r,context:r.getContext("2d",{willReadFrequently:!this.#t})}}reset({canvas:t},e,r){if(!t)throw new Error("Canvas is not specified");t.width=e,t.height=r}destroy(t){if(!t.canvas)throw new Error("Canvas is not specified");t.canvas.width=0,t.canvas.height=0,t.canvas=void 0,t.context=void 0}_createCanvas(t,e){throw new Error("Not implemented")}};class DOMCanvasFactory extends u$1{_document;constructor({ownerDocument:t=globalThis.document,enableHWA:e=!1}={}){super({enableHWA:e}),this._document=t}_createCanvas(t,e){const r=this._document.createElement("canvas");return r.width=t,r.height=e,r}}class NodeCanvasFactory extends u$1{constructor({enableHWA:t=!1}={}){super({enableHWA:t})}_createCanvas(t,e){if(!o)throw new Error("@napi-rs/canvas module is not resolved");return o.createCanvas(t,e)}}async function resolveCanvasModule(n){o??=await h$2(n())}async function createCanvasFactory(n){if(isBrowser)return DOMCanvasFactory;if(isNode){if(!n)throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");return await resolveCanvasModule(n),NodeCanvasFactory}throw new Error("Unsupported environment for canvas operations")}async function cropImageIfNeeded(n,t,e={}){if(!t||!t.left&&!t.top&&!t.right&&!t.bottom||e.enabled===!1)return n;try{const r=await createCanvasFactory(e.canvasImport),i=await w$1(n,r),c=(t.left||0)/1e5*i.width,l=(t.top||0)/1e5*i.height,f=(t.right||0)/1e5*i.width,b=(t.bottom||0)/1e5*i.height,I=Math.round(i.width-c-f),k=Math.round(i.height-l-b);if(I<=0||k<=0)return console.warn("Invalid crop dimensions, returning original image"),n;const R=new r().create(I,k);if(!R.context)throw new Error("Failed to get 2D context from canvas");R.context.drawImage(i,c,l,I,k,0,0,I,k);const P=R.canvas.toDataURL(),F=await(await fetch(P)).arrayBuffer();return new Uint8Array(F)}catch(r){return console.warn("Image cropping failed, returning original image:",r),n}}async function w$1(n,t){if(isBrowser){const e=new Blob([n.buffer]),r=URL.createObjectURL(e);try{const i=new Image;return new Promise((c,l)=>{i.onload=()=>{URL.revokeObjectURL(r),c(i)},i.onerror=()=>{URL.revokeObjectURL(r),l(new Error("Failed to load image"))},i.src=r})}catch(i){throw URL.revokeObjectURL(r),i}}else{if(!o)throw new Error("@napi-rs/canvas module is not resolved");return await o.loadImage(Buffer.from(n))}}const j="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";function C$1(n){const t=parseInt(n,10);if(!isNaN(t))return Math.round(t/9525)}function B(n){const t=n.attributes.l,e=n.attributes.t,r=n.attributes.r,i=n.attributes.b;if(!(!t&&!e&&!r&&!i))return{left:t?parseInt(t,10):void 0,top:e?parseInt(e,10):void 0,right:r?parseInt(r,10):void 0,bottom:i?parseInt(i,10):void 0}}function N(n){const t=findChild(n,"wp:align"),e=findChild(n,"wp:posOffset"),r=t?.children[0]?.type==="text"?t.children[0].value:void 0,i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function findDrawingElement(n){let t=findChild(n,"w:drawing");if(t)return t;const e=findChild(n,"mc:AlternateContent"),r=e&&findChild(e,"mc:Choice");return r&&findChild(r,"w:drawing")}function O$1(n,t,e,r){const i=e/r,c=n/t;return Math.abs(i-c)>.1?i>c?{width:n,height:Math.round(n/i)}:{width:Math.round(t*i),height:t}:{width:n,height:t}}function extractImages(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=findChild(r,"Relationships");if(!i)return t;const c=findDeepChildren(i,"Relationship");for(const l of c)if(l.attributes.Type===j&&l.attributes.Id&&l.attributes.Target){const f="word/"+l.attributes.Target,b=n[f];if(!b)continue;let I,k,R="png";try{const $=imageMeta.imageMeta(b);I=$.width,k=$.height,$.type&&(R=$.type)}catch{}const P=uint8ArrayToBase64(b),F=`data:image/${R};base64,${P}`;t.set(l.attributes.Id,{src:F,width:I,height:k})}return t}async function extractImageFromDrawing(n,t){const{images:e,options:r}=t,i=findDeepChild(n,"a:blip");if(!i?.attributes["r:embed"])return null;const c=i.attributes["r:embed"],l=e.get(c);if(!l)return null;let f=l.src;const b=findDeepChild(n,"a:srcRect");if(b){const D=B(b);if(D&&f.startsWith("data:")){const[U,_]=f.split(",");if(_){const V=base64ToUint8Array(_);try{const q=await cropImageIfNeeded(V,D,{canvasImport:r?.canvasImport,enabled:r?.enableImageCrop!==!1}),G=uint8ArrayToBase64(q);f=`${U},${G}`}catch(q){console.warn("Image cropping failed, using original image:",q)}}}}const I=findDeepChild(n,"wp:extent");let k,R;if(I){const D=I.attributes.cx,U=I.attributes.cy;typeof D=="string"&&(k=C$1(D)),typeof U=="string"&&(R=C$1(U))}const P=findDeepChild(n,"a:xfrm");let F;if(P?.attributes.rot){const D=parseInt(P.attributes.rot,10);isNaN(D)||(F=D/6e4)}const $=findDeepChild(n,"wp:docPr")?.attributes.title,W=findDeepChild(n,"wp:positionH"),z=findDeepChild(n,"wp:positionV");let E;if(W||z){const D=W?N(W):void 0,U=z?N(z):void 0;E={horizontalPosition:{relative:W?.attributes.relativeFrom||"page",...D?.align&&{align:D.align},...D?.offset!==void 0&&{offset:D.offset}},verticalPosition:{relative:z?.attributes.relativeFrom||"page",...U?.align&&{align:U.align},...U?.offset!==void 0&&{offset:U.offset}}}}const H=findDeepChild(n,"pic:spPr");let X;if(H){const D=findDeepChild(H,"a:ln"),U=D&&findDeepChild(D,"a:solidFill"),_=U&&findDeepChild(U,"a:srgbClr");_?.attributes.val&&(X={type:"solidFill",solidFillType:"rgb",value:_.attributes.val})}return{type:"image",attrs:{src:f,alt:"",...k!==void 0&&{width:k},...R!==void 0&&{height:R},...F!==void 0&&{rotation:F},...$&&{title:$},...E&&{floating:E},...X&&{outline:X}}}}function S$2(n,t,e){if(t&&e&&n.width&&n.height){const r=O$1(t,e,n.width,n.height);return{type:"image",attrs:{src:n.src,alt:"",width:r.width,height:r.height}}}return{type:"image",attrs:{src:n.src,alt:"",...t!==void 0&&{width:t},...e!==void 0&&{height:e}}}}async function extractImagesFromDrawing(n,t){const e=[],r=findChild(n,"wp:inline")||findChild(n,"wp:anchor");if(!r)return e;const i=findChild(r,"wp:extent");let c,l;if(i){const k=i.attributes.cx,R=i.attributes.cy;typeof k=="string"&&(c=C$1(k)),typeof R=="string"&&(l=C$1(R))}const f=findChild(r,"a:graphic");if(!f)return e;const b=findChild(f,"a:graphicData");if(!b)return e;const I=findChild(b,"wpg:wgp");if(I){const k=findChild(I,"wpg:grpSp"),R=k?[...findDeepChildren(k,"pic:pic"),...findDeepChildren(k,"pic")]:[...findDeepChildren(I,"pic:pic"),...findDeepChildren(I,"pic")];for(const P of R){const F=findChild(P,"a:graphic");if(!F){const E=findChild(P,"pic:blipFill")||findDeepChild(P,"a:blipFill");if(!E)continue;const H=findChild(E,"a:blip")||findDeepChild(E,"a:blip");if(!H?.attributes["r:embed"])continue;const X=H.attributes["r:embed"],D=t.images.get(X);if(!D)continue;e.push(S$2(D,c,l));continue}const $={children:[F]},W=await extractImageFromDrawing($,t);if(!W)continue;const z=$.children[0]?.type==="element"?findDeepChild($.children[0],"a:blip")?.attributes["r:embed"]:void 0;if(c&&l&&z){const E=t.images.get(z);if(E?.width&&E?.height){const H=O$1(c,l,E.width,E.height);W.attrs.width=H.width,W.attrs.height=H.height}else W.attrs.width=c,W.attrs.height=l}e.push(W)}}else{const k=await extractImageFromDrawing(n,t);k&&e.push(k)}return e}const p="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";function extractHyperlinks(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=findChild(r,"Relationships");if(!i)return t;const c=findDeepChildren(i,"Relationship");for(const l of c)l.attributes.Type===p&&l.attributes.Id&&l.attributes.Target&&t.set(l.attributes.Id,l.attributes.Target);return t}function parseNumberingXml(n){const t=new Map,e=new Map,r=n["word/numbering.xml"];if(!r)return t;const i=xastUtilFromXml.fromXml(new TextDecoder().decode(r)),c=new Map,l=findChild(i,"w:numbering");if(!l)return t;const f=findDeepChildren(l,"w:abstractNum");for(const I of f){const k=I.attributes["w:abstractNumId"],R=findChild(I,"w:lvl");if(!R)continue;const P=findChild(R,"w:numFmt");P?.attributes["w:val"]&&c.set(k,P.attributes["w:val"]);const F=findChild(R,"w:start");F?.attributes["w:val"]&&e.set(k,parseInt(F.attributes["w:val"],10))}const b=findDeepChildren(l,"w:num");for(const I of b){const k=I.attributes["w:numId"],R=findChild(I,"w:abstractNumId");if(!R?.attributes["w:val"])continue;const P=R.attributes["w:val"],F=c.get(P);if(!F)continue;const $=e.get(P);F==="bullet"?t.set(k,{type:"bullet"}):t.set(k,{type:"ordered",...$!==void 0&&{start:$}})}return t}function parseStylesXml(n){const t=new Map,e=n["word/styles.xml"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=findChild(r,"w:styles");if(!i)return t;const c=findDeepChildren(i,"w:style").filter(l=>l.attributes["w:type"]==="paragraph");for(const l of c){const f=l.attributes["w:styleId"];if(!f)continue;const b={styleId:f},I=findChild(l,"w:name");I?.attributes["w:val"]&&(b.name=I.attributes["w:val"]);const k=findChild(l,"w:pPr");if(k){const P=findChild(k,"w:outlineLvl");P?.attributes["w:val"]!==void 0&&(b.outlineLvl=parseInt(P.attributes["w:val"],10))}const R=findChild(l,"w:rPr");if(R){const P={},F=findChild(R,"w:color");if(F?.attributes["w:val"]&&F.attributes["w:val"]!=="auto"){const z=F.attributes["w:val"];P.color=z.startsWith("#")?z:`#${z}`}findChild(R,"w:b")&&(P.bold=!0),findChild(R,"w:i")&&(P.italic=!0),findChild(R,"w:u")&&(P.underline=!0),findChild(R,"w:strike")&&(P.strike=!0);const $=findChild(R,"w:sz");if($?.attributes["w:val"]){const z=$.attributes["w:val"],E=parseInt(z,10);isNaN(E)||(P.fontSize=E)}const W=findChild(R,"w:rFonts");W?.attributes["w:ascii"]&&(P.fontFamily=W.attributes["w:ascii"]),Object.keys(P).length>0&&(b.charFormat=P)}t.set(f,b)}return t}function d(n,t){const e=findChild(n,"w:t");if(!e)return null;const r=e.children.find(c=>c.type==="text");if(!r?.value)return null;const i=extractMarks(n,t);return{type:"text",text:r.value,...i.length&&{marks:i}}}async function extractRuns(n,t){const e=[];for(const r of n.children)if(r.type==="element"){if(r.name==="w:hyperlink"){const i=r,c=i.attributes["r:id"],l=t.hyperlinks.get(c);if(!l)continue;for(const f of i.children){if(f.type!=="element"||f.name!=="w:r")continue;const b=f,I=findDrawingElement(b);if(I){const R=await extractImageFromDrawing(I,t);if(R){e.push(R);continue}const P=await extractImagesFromDrawing(I,t);if(P.length){e.push(...P);continue}}const k=d(b,t.styleInfo);k&&(k.marks=k.marks||[],k.marks.push({type:"link",attrs:{href:l}}),e.push(k))}}else if(r.name==="w:r"){const i=r,c=findDrawingElement(i);if(c){const f=await extractImagesFromDrawing(c,t);if(f.length){e.push(...f);continue}}if(findChild(i,"w:br")){const f=extractMarks(i,t.styleInfo);e.push({type:"hardBreak",...f.length&&{marks:f}})}const l=d(i,t.styleInfo);l&&e.push(l)}}return e}function extractMarks(n,t){const e=[],r=findChild(n,"w:rPr");let i={};if(t?.charFormat&&(i={...t.charFormat}),r){const c=findChild(r,"w:b");c&&(c.attributes["w:val"]==="false"?i.bold=!1:i.bold=!0);const l=findChild(r,"w:i");l&&(l.attributes["w:val"]==="false"?i.italic=!1:i.italic=!0),findChild(r,"w:u")&&(i.underline=!0),findChild(r,"w:strike")&&(i.strike=!0);const f=findChild(r,"w:color");if(f?.attributes["w:val"]&&f.attributes["w:val"]!=="auto"){const P=f.attributes["w:val"];i.color=P.startsWith("#")?P:`#${P}`}const b=findChild(r,"w:sz");if(b?.attributes["w:val"]){const P=b.attributes["w:val"],F=parseInt(P,10);isNaN(F)||(i.fontSize=F)}const I=findChild(r,"w:rFonts");I?.attributes["w:ascii"]&&(i.fontFamily=I.attributes["w:ascii"]);const k=findChild(r,"w:shd");if(k?.attributes["w:fill"]&&k.attributes["w:fill"]!=="auto"){const P=k.attributes["w:fill"];i.backgroundColor=P.startsWith("#")?P:`#${P}`}findChild(r,"w:highlight")&&e.push({type:"highlight"});const R=findChild(r,"w:vertAlign");if(R){const P=R.attributes["w:val"];P==="subscript"?e.push({type:"subscript"}):P==="superscript"&&e.push({type:"superscript"})}}if(i.bold&&e.push({type:"bold"}),i.italic&&e.push({type:"italic"}),i.underline&&e.push({type:"underline"}),i.strike&&e.push({type:"strike"}),i.color||i.backgroundColor||i.fontSize||i.fontFamily){const c={color:i.color||"",backgroundColor:i.backgroundColor||"",fontSize:"",fontFamily:"",lineHeight:""};if(i.fontSize){const l=Math.round(i.fontSize/1.5*10)/10;c.fontSize=`${l}px`}i.fontFamily&&(c.fontFamily=i.fontFamily),e.push({type:"textStyle",attrs:c})}return e}function extractAlignment(n){const t=findChild(n,"w:pPr");if(!t)return;const e=findChild(t,"w:jc");if(!e?.attributes["w:val"])return;const r=e.attributes["w:val"],i={left:"left",right:"right",center:"center",both:"justify"}[r];return i?{textAlign:i}:void 0}function y$2(n){return`${Math.round(n/15)}px`}function m$1(n){const t=findChild(n,"w:pPr");if(!t)return null;const e={},r=findChild(t,"w:ind");if(r){const c=I=>{const k=r.attributes[I];if(typeof k!="string")return null;const R=parseInt(k,10);return isNaN(R)?null:y$2(R)},l=c("w:left");l&&(e.indentLeft=l);const f=c("w:right");f&&(e.indentRight=f);const b=c("w:firstLine");if(b)e.indentFirstLine=b;else{const I=c("w:hanging");I&&(e.indentFirstLine=`-${I}`)}}const i=findChild(t,"w:spacing");if(i){const c=b=>{const I=i.attributes[b];if(typeof I!="string")return null;const k=parseInt(I,10);return isNaN(k)?null:y$2(k)},l=c("w:before");l&&(e.spacingBefore=l);const f=c("w:after");f&&(e.spacingAfter=f)}return Object.keys(e).length?e:null}async function convertParagraph(n,t){const e=findChild(n,"w:pPr"),r=(e&&findChild(e,"w:pStyle"))?.attributes["w:val"];if(r&&t.styleMap){const f=t.styleMap.get(r);if(f?.outlineLvl!==void 0&&f.outlineLvl>=0&&f.outlineLvl<=5){const I=f.outlineLvl+1;return h$1(n,t,f,I)}const b=r.match(/^Heading(\d+)$/);if(b){const I=parseInt(b[1],10);return h$1(n,t,f,I)}}const i=r&&t.styleMap?t.styleMap.get(r):void 0,c=await extractRuns(n,{...t,styleInfo:i}),l={...extractAlignment(n),...m$1(n)};if(w(n)){const f=c.filter(b=>b.type!=="hardBreak");return[{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:f.length?f:void 0},{type:"horizontalRule"}]}if(c.length===1&&c[0].type==="hardBreak"){const f=findChild(n,"w:r");if((f&&findChild(f,"w:br"))?.attributes["w:type"]==="page")return{type:"horizontalRule"}}return c.length===1&&c[0].type==="image"?c[0]:{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:c}}function w(n){const t=[],e=r=>{if(r.name==="w:r")t.push(r);else for(const i of r.children)i.type==="element"&&e(i)};return e(n),t.some(r=>findChild(r,"w:br")?.attributes["w:type"]==="page")}async function h$1(n,t,e,r){return{type:"heading",attrs:{level:r,...m$1(n)},content:await extractRuns(n,{...t,styleInfo:e})}}function parseBorder(n){if(!n)return null;const t=n.attributes["w:val"],e=n.attributes["w:sz"],r=n.attributes["w:color"],i={single:"solid",dashed:"dashed",dotted:"dotted",double:"double",none:"none",nil:"none"},c={};if(r&&r!=="auto"&&(c.color=`#${r}`),e){const l=parseInt(e);isNaN(l)||(c.width=Math.round(l/6))}return t&&i[t]&&(c.style=i[t]),Object.keys(c).length>0?c:null}function parseTableProperties(n){const t={marginTop:void 0,marginBottom:void 0,marginLeft:void 0,marginRight:void 0},e=findChild(n,"w:tblPr");if(!e)return null;const r=findChild(e,"w:tblCellMar");if(!r)return null;const i=findChild(r,"w:top");if(i?.attributes["w:w"]){const b=parseInt(i.attributes["w:w"]);isNaN(b)||(t.marginTop=b)}const c=findChild(r,"w:bottom");if(c?.attributes["w:w"]){const b=parseInt(c.attributes["w:w"]);isNaN(b)||(t.marginBottom=b)}const l=findChild(r,"w:left");if(l?.attributes["w:w"]){const b=parseInt(l.attributes["w:w"]);isNaN(b)||(t.marginLeft=b)}const f=findChild(r,"w:right");if(f?.attributes["w:w"]){const b=parseInt(f.attributes["w:w"]);isNaN(b)||(t.marginRight=b)}return t.marginTop===void 0&&t.marginBottom===void 0&&t.marginLeft===void 0&&t.marginRight===void 0?null:t}function parseRowProperties(n){const t={rowHeight:null},e=findChild(n,"w:trPr");if(!e)return t;const r=findChild(e,"w:trHeight");if(r?.attributes["w:val"]){const i=parseInt(r.attributes["w:val"]),c=Math.round(i/15);t.rowHeight=`${c}px`}return t}function parseCellProperties(n){const t={colSpan:1,rowSpan:1,colWidth:null},e=findChild(n,"w:tcPr");if(!e)return t;const r=findChild(e,"w:gridSpan");r?.attributes["w:val"]&&(t.colSpan=parseInt(r.attributes["w:val"])),findChild(e,"w:vMerge")?.attributes["w:val"]==="continue"&&(t.rowSpan=0);const i=findChild(e,"w:tcW");if(i?.attributes["w:w"]){const b=parseInt(i.attributes["w:w"]);t.colWidth=Math.round(b/15)}const c=findChild(e,"w:shd");c?.attributes["w:fill"]&&(t.backgroundColor=`#${c.attributes["w:fill"]}`);const l=findChild(e,"w:vAlign");l?.attributes["w:val"]&&(t.verticalAlign=l.attributes["w:val"]);const f=findChild(e,"w:tcBorders");if(f){const b=parseBorder(findChild(f,"w:top"));b&&(t.borderTop=b);const I=parseBorder(findChild(f,"w:bottom"));I&&(t.borderBottom=I);const k=parseBorder(findChild(f,"w:left"));k&&(t.borderLeft=k);const R=parseBorder(findChild(f,"w:right"));R&&(t.borderRight=R)}return t}function isTable(n){return n.name==="w:tbl"}async function convertTable(n,t){const e=[];for(const l of n.children)l.type==="element"&&l.name==="w:tr"&&e.push(l);const r=new Map,i=await Promise.all(e.map((l,f)=>u(l,{...t,activeRowspans:r,rows:e,rowIndex:f}))),c=parseTableProperties(n);return{type:"table",...c&&{attrs:c},content:i}}async function u(n,t){const e=[];let r=0;const i=parseRowProperties(n);for(const c of n.children){if(c.type!=="element"||c.name!=="w:tc")continue;const l=t.activeRowspans.get(r);if(l&&l>0){t.activeRowspans.set(r,l-1),r++;continue}let f=parseCellProperties(c);if(f?.rowSpan===1){const I=y$1({...t,colIndex:r});I>1&&(f={...f,rowSpan:I})}if(f?.rowSpan&&f.rowSpan>1&&t.activeRowspans.set(r,f.rowSpan-1),f?.rowSpan===0){r++;continue}const b=await g(c,t);e.push({type:"tableCell",...f&&{attrs:f},content:b}),r+=f?.colSpan||1}return{type:"tableRow",...i&&{attrs:i},content:e}}function y$1(n){let t=1,e=n.colIndex;for(let r=n.rowIndex+1;r<n.rows.length;r++){const i=n.rows[r];let c=!1;for(const l of i.children){if(l.type!=="element"||l.name!=="w:tc")continue;const f=parseCellProperties(l),b=f?.colSpan||1;if(e>=0&&e<b){if(f?.rowSpan===0)t++,c=!0;else return t;break}e-=b}if(!c)break}return t}async function g(n,t){const e=[];for(const r of n.children)if(r.type==="element"&&r.name==="w:p"){const i=await convertParagraph(r,t);Array.isArray(i)?e.push(...i):e.push(i)}return e.length?e:[{type:"paragraph",content:[]}]}function isListItem(n){const t=findChild(n,"w:pPr");return!!t&&findChild(t,"w:numPr")!==void 0}function getListInfo(n){const t=findChild(n,"w:pPr"),e=t&&findChild(t,"w:numPr");if(!e)return null;const r=findChild(e,"w:ilvl"),i=findChild(e,"w:numId");return!r||!i?null:{numId:i.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0",10)}}function isCodeBlock(n){const t=findChild(n,"w:pPr"),e=(t&&findChild(t,"w:pStyle"))?.attributes["w:val"];return e==="CodeBlock"||e?.startsWith("Code")||!1}function getCodeBlockLanguage(n){const t=findChild(n,"w:pPr"),e=(t&&findChild(t,"w:pStyle"))?.attributes["w:val"];return e?.startsWith("CodeBlock")&&e.replace("CodeBlock","").toLowerCase()||void 0}const x="\u2610",a="\u2611";function m(n){const t=findChild(n,"w:r");if(!t)return null;const e=findChild(t,"w:t");if(!e)return null;const r=e.children.find(i=>i.type==="text");return r?.value&&r||null}function isTaskItem(n){const t=m(n);if(!t)return!1;const e=t.value;return e.startsWith(x)||e.startsWith(a)}function getTaskItemChecked(n){return m(n)?.value.startsWith(a)||!1}function convertTaskItem(n){return{type:"taskItem",attrs:{checked:getTaskItemChecked(n)},content:[h(n)]}}function h(n){const t=[];let e=!1;for(const i of n.children){if(i.type!=="element"||i.name!=="w:r")continue;if(!e){const f=findChild(i,"w:t")?.children.find(b=>b.type==="text");if(f?.value){const b=f.value;if(b.startsWith(x)||b.startsWith(a)){e=!0;const I=b.substring(2).trimStart();I&&t.push({type:"text",text:I});continue}}}const c=y(i),l=findChild(i,"w:t")?.children.find(f=>f.type==="text");if(l?.value){const f={type:"text",text:l.value};c.length&&(f.marks=c),t.push(f)}}const r=extractAlignment(n);return{type:"paragraph",...r&&{attrs:r},content:t.length?t:void 0}}function y(n){const t=[],e=findChild(n,"w:rPr");return e&&(findChild(e,"w:b")&&t.push({type:"bold"}),findChild(e,"w:i")&&t.push({type:"italic"}),findChild(e,"w:u")&&t.push({type:"underline"}),findChild(e,"w:strike")&&t.push({type:"strike"})),t}function isHorizontalRule(n){const t=findChild(n,"w:r");if(!t)return!1;let e=!1,r=!1;for(const i of t.children)i.type==="element"&&(i.name==="w:br"&&i.attributes["w:type"]==="page"?e=!0:i.name==="w:t"?i.children.find(c=>c.type==="text")?.value?.trim().length&&(r=!0):i.name!=="w:rPr"&&(r=!0));return e&&!r}const O=n=>{const t=[],e=findDeepChildren(n,"w:r");for(const r of e){const i=findChild(r,"w:t");if(!i)continue;const c=i.children.find(l=>l.type==="text");c&&"value"in c&&c.value&&t.push({type:"text",text:c.value})}return t},S$1=async(n,t,e)=>{const r=await convertTable(n[t],{hyperlinks:e.hyperlinks,images:e.images,options:e.options,styleMap:e.styleMap});let i=1;return t+1<n.length&&n[t+1].name==="w:p"&&C(n[t+1])&&i++,{nodes:[r],consumed:i}},J=async(n,t)=>{const e=[];let r=t;for(;r<n.length;){const i=n[r];if(i.name!=="w:p"||!isCodeBlock(i))break;const c=getCodeBlockLanguage(i),l={type:"codeBlock",...c&&{attrs:{language:c}},content:O(i)};e.push(l),r++}return{nodes:e,consumed:r-t}},M=async(n,t,e)=>{const{listTypeMap:r}=e,i=[];let c=t;for(;c<n.length;){const l=n[c];if(l.name!=="w:p"||!isListItem(l))break;const f=getListInfo(l);if(!f)break;const b=r.get(f.numId),I=b?.type||"bullet",k=[];for(;c<n.length;){const P=n[c];if(P.name!=="w:p"||!isListItem(P))break;const F=getListInfo(P);if(!F||F.numId!==f.numId)break;const $=await convertParagraph(P,e),W=Array.isArray($)?$[0]:$;k.push({type:"listItem",content:[W]}),c++}const R={type:I==="bullet"?"bulletList":"orderedList",content:k};I==="ordered"&&(R.attrs={type:null,...b?.start!==void 0&&{start:b.start}}),i.push(R)}return{nodes:i,consumed:c-t}},T=async(n,t)=>{const e=[];let r=t;for(;r<n.length;){const i=n[r];if(i.name!=="w:p"||!isTaskItem(i))break;const{convertTaskItem:c}=await import("./chunks/index.cjs"),l=c(i);e.push(l),r++}return{nodes:[{type:"taskList",content:e}],consumed:r-t}},v=async()=>({nodes:[{type:"horizontalRule"}],consumed:1}),L=async(n,t,e)=>{const r=await convertParagraph(n[t],e);return Array.isArray(r)?{nodes:r,consumed:1}:{nodes:[r],consumed:1}},C=n=>{const t=findDeepChildren(n,"w:r");for(const e of t){const r=findChild(e,"w:t");if(r){const c=r.children.find(l=>l.type==="text");if(c&&"value"in c&&c.value&&c.value.trim().length>0)return!1}if(findChild(e,"w:drawing")||findChild(e,"mc:AlternateContent")||findChild(e,"w:pict"))return!1;const i=findChild(e,"w:br");if(i&&i.attributes["w:type"]==="page")return!1}return!0},A=n=>n.name==="w:tbl"?S$1:n.name==="w:p"?isCodeBlock(n)?J:isTaskItem(n)?T:isListItem(n)?M:isHorizontalRule(n)?v:L:null,processElements=async(n,t)=>{const e=[];let r=0;for(;r<n.length;){const i=n[r],c=A(i);if(!c){r++;continue}if(i.name==="w:p"&&t.ignoreEmptyParagraphs&&C(i)){r++;continue}const{nodes:l,consumed:f}=await c(n,r,t);e.push(...l),r+=f}return e};async function parseDOCX(n,t={}){const{ignoreEmptyParagraphs:e=!1}=t,r=await undio.toUint8Array(n),i=fflate.unzipSync(r),c=extractHyperlinks(i),l=extractImages(i),f=i["word/document.xml"];if(!f)throw new Error("Invalid DOCX file: missing word/document.xml");const b=xastUtilFromXml.fromXml(new TextDecoder().decode(f)),I=parseNumberingXml(i),k=parseStylesXml(i);return await S(b,l,c,I,k,e,t)}async function S(n,t,e,r,i,c,l){if(n.type!=="root")return{type:"doc",content:[]};const f=findChild(n,"w:document");if(!f)return{type:"doc",content:[]};const b=findChild(f,"w:body");if(!b)return{type:"doc",content:[]};const I={hyperlinks:e,images:t,listTypeMap:r,styleMap:i,ignoreEmptyParagraphs:c,options:l};return{type:"doc",content:await processElements(b.children.filter(k=>k.type==="element"),I)}}exports.convertParagraph=convertParagraph,exports.convertTable=convertTable,exports.convertTaskItem=convertTaskItem,exports.extractAlignment=extractAlignment,exports.extractMarks=extractMarks,exports.extractRuns=extractRuns,exports.getCodeBlockLanguage=getCodeBlockLanguage,exports.getListInfo=getListInfo,exports.getTaskItemChecked=getTaskItemChecked,exports.isCodeBlock=isCodeBlock,exports.isHorizontalRule=isHorizontalRule,exports.isListItem=isListItem,exports.isTable=isTable,exports.isTaskItem=isTaskItem,exports.parseDOCX=parseDOCX;