@docen/import-docx 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Demo Macro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,230 @@
1
+ # @docen/import-docx
2
+
3
+ ![npm version](https://img.shields.io/npm/v/@docen/import-docx)
4
+ ![npm downloads](https://img.shields.io/npm/dw/@docen/import-docx)
5
+ ![npm license](https://img.shields.io/npm/l/@docen/import-docx)
6
+
7
+ > Import Microsoft Word DOCX files to TipTap/ProseMirror content.
8
+
9
+ ## Features
10
+
11
+ - 📝 **Rich Text Parsing** - Accurate parsing of headings, paragraphs, and blockquotes with formatting
12
+ - 🖼️ **Image Extraction** - Automatic image extraction and base64 conversion
13
+ - 📊 **Table Support** - Complete table structure with colspan/rowspan detection algorithm
14
+ - ✅ **Lists & Tasks** - Bullet lists, numbered lists with start number extraction, and task lists with checkbox detection
15
+ - 🎨 **Text Formatting** - Bold, italic, underline, strikethrough, subscript, superscript, and highlights
16
+ - 🎯 **Text Styles** - Comprehensive style support including colors, backgrounds, fonts, sizes, and line heights
17
+ - 🔗 **Links** - Hyperlink extraction with href preservation
18
+ - 💻 **Code Blocks** - Code block detection with language attribute extraction
19
+ - 🧠 **Smart Parsing** - DOCX XML parsing with proper element grouping and structure reconstruction
20
+ - ⚡ **Fast Processing** - Uses fflate for ultra-fast ZIP decompression
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ # Install with npm
26
+ $ npm install @docen/import-docx
27
+
28
+ # Install with yarn
29
+ $ yarn add @docen/import-docx
30
+
31
+ # Install with pnpm
32
+ $ pnpm add @docen/import-docx
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ```typescript
38
+ import { parseDOCX } from "@docen/import-docx";
39
+ import { readFileSync } from "node:fs";
40
+
41
+ // Read DOCX file
42
+ const buffer = readFileSync("document.docx");
43
+
44
+ // Parse DOCX to TipTap JSON
45
+ const content = await parseDOCX(buffer);
46
+
47
+ // Use in TipTap editor
48
+ editor.commands.setContent(content);
49
+ ```
50
+
51
+ ## API Reference
52
+
53
+ ### `parseDOCX(input, options?)`
54
+
55
+ Parses a DOCX file and converts it to TipTap/ProseMirror JSON content.
56
+
57
+ **Parameters:**
58
+
59
+ - `input: Buffer | ArrayBuffer | Uint8Array` - DOCX file data
60
+ - `options?: DocxImportOptions` - Optional import configuration
61
+
62
+ **Returns:** `Promise<JSONContent>` - TipTap/ProseMirror document content with images embedded
63
+
64
+ **Options:**
65
+
66
+ ```typescript
67
+ interface DocxImportOptions {
68
+ /** Custom image converter (default: embed as base64) */
69
+ convertImage?: (image: DocxImageInfo) => Promise<DocxImageResult>;
70
+
71
+ /** Whether to ignore empty paragraphs (default: false) */
72
+ ignoreEmptyParagraphs?: boolean;
73
+ }
74
+ ```
75
+
76
+ **Default Image Converter:**
77
+
78
+ The package exports `defaultImageConverter` which embeds images as base64 data URLs:
79
+
80
+ ```typescript
81
+ import { defaultImageConverter } from "@docen/import-docx";
82
+
83
+ // Use in custom converter
84
+ await parseDOCX(buffer, {
85
+ convertImage: async (image) => {
86
+ if (shouldUploadToCDN) {
87
+ return uploadToCDN(image.data);
88
+ }
89
+ return defaultImageConverter(image);
90
+ },
91
+ });
92
+ ```
93
+
94
+ ## Supported Content Types
95
+
96
+ ### Text Formatting
97
+
98
+ - **Bold**, _Italic_, <u>Underline</u>, ~~Strikethrough~~
99
+ - ^Superscript^ and ~Subscript~
100
+ - Text highlights
101
+ - Text colors and background colors
102
+ - Font families and sizes
103
+ - Line heights
104
+
105
+ ### Block Elements
106
+
107
+ - **Headings** (H1-H6) with proper level detection
108
+ - **Paragraphs** with text alignment (left, right, center, justify)
109
+ - **Blockquotes** (Detected by indentation + left border formatting)
110
+ - **Horizontal Rules** (Detected as page breaks in DOCX)
111
+ - **Code Blocks** with language attribute support
112
+
113
+ ### Lists
114
+
115
+ - **Bullet Lists** with proper nesting and structure
116
+ - **Numbered Lists** with custom start number extraction
117
+ - **Task Lists** with checked/unchecked state detection (☐/☑ symbols)
118
+
119
+ ### Tables
120
+
121
+ - Complete table structure parsing
122
+ - **Table Cells** with colspan detection using grid-based algorithm
123
+ - **Table Cells** with rowspan detection using vMerge tracking
124
+ - Cell alignment and formatting preservation
125
+ - Merged cell handling (both horizontal and vertical)
126
+
127
+ ### Media & Embeds
128
+
129
+ - **Images** with automatic base64 conversion
130
+ - **Links** (hyperlinks) with href extraction
131
+
132
+ ## Parsing Algorithm
133
+
134
+ ### Document Structure
135
+
136
+ The parser follows a structured workflow:
137
+
138
+ 1. **Extract Relationships** - Parse `_rels/document.xml.rels` for hyperlinks and images
139
+ 2. **Parse Numbering** - Extract list definitions from `numbering.xml` (abstractNum → numFmt)
140
+ 3. **Process Document Body** - Iterate through document.xml elements:
141
+ - Detect content types (tables, lists, paragraphs, code blocks, etc.)
142
+ - Group consecutive elements into proper containers
143
+ - Convert XML nodes to TipTap JSON nodes
144
+
145
+ ### Table Processing
146
+
147
+ Tables use specialized algorithms:
148
+
149
+ - **Colspan Detection** - Grid-based algorithm tracks cell positions and detects horizontal merges
150
+ - **Rowspan Detection** - Vertical merge (vMerge) tracking across rows with proper cell skipping
151
+ - **Cell Content** - Recursive parsing of nested paragraphs and formatting
152
+ - **Hyperlink Support** - Proper handling of links within table cells
153
+
154
+ ### List Processing
155
+
156
+ Lists utilize the DOCX numbering system:
157
+
158
+ - **Numbering ID Mapping** - Maps abstractNum to formatting (bullet vs decimal)
159
+ - **Start Value Extraction** - Extracts and preserves start numbers for ordered lists
160
+ - **Nesting Preservation** - Maintains proper list hierarchy
161
+ - **Consecutive Grouping** - Groups consecutive list items into list containers
162
+
163
+ ## Examples
164
+
165
+ ### Basic Usage
166
+
167
+ ```typescript
168
+ import { parseDOCX } from "@docen/import-docx";
169
+
170
+ const buffer = readFileSync("example.docx");
171
+ const { content } = await parseDOCX(buffer);
172
+
173
+ console.log(JSON.stringify(content, null, 2));
174
+ ```
175
+
176
+ ### Use with TipTap Editor
177
+
178
+ ```typescript
179
+ import { Editor } from "@tiptap/core";
180
+ import { parseDOCX } from "@docen/import-docx";
181
+
182
+ const editor = new Editor({
183
+ extensions: [...],
184
+ content: "",
185
+ });
186
+
187
+ // Import DOCX file
188
+ async function importDocx(file: File) {
189
+ const buffer = await file.arrayBuffer();
190
+ const content = await parseDOCX(buffer);
191
+ editor.commands.setContent(content);
192
+ }
193
+ ```
194
+
195
+ ## Known Limitations
196
+
197
+ ### Blockquote Detection
198
+
199
+ DOCX does not have a semantic blockquote structure. Blockquotes are detected by:
200
+
201
+ - Left indentation ≥ 720 twips (0.5 inch)
202
+ - Presence of left border (single line)
203
+
204
+ This detection method may produce false positives for documents with custom indentation similar to blockquotes.
205
+
206
+ ### Code Marks
207
+
208
+ The `code` mark is NOT automatically detected from monospace fonts (Consolas, Courier New, etc.). This is intentional to avoid false positives. Code marks should be explicitly added in the source document or through editor UI.
209
+
210
+ ### Color Format
211
+
212
+ All colors are imported as hex values (e.g., "#FF0000", "#008000"). Color names from the original document are not preserved.
213
+
214
+ ### Image Limitations
215
+
216
+ - Only embedded images are supported (external image links are not fetched)
217
+ - Image width/height metadata is preserved but visual sizing may vary
218
+ - Title, alt text, and other image attributes have limited DOCX support
219
+
220
+ ### Table Cell Types
221
+
222
+ DOCX format does not distinguish between header and body cells at a semantic level. All cells are imported as `tableCell` type for consistency. This is a DOCX format limitation.
223
+
224
+ ## Contributing
225
+
226
+ Contributions are welcome! Please read our [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) and submit pull requests to the [main repository](https://github.com/DemoMacro/docen).
227
+
228
+ ## License
229
+
230
+ - [MIT](LICENSE) &copy; [Demo Macro](https://imst.xyz/)
package/dist/index.cjs ADDED
@@ -0,0 +1 @@
1
+ "use strict";const xastUtilFromXml=require("xast-util-from-xml"),fflate=require("fflate"),undio=require("undio");function extractRuns(c,s,t){const i=[];for(const n of s.children)if(n.type==="element"){if(n.name==="w:hyperlink"){const l=n,a=l.attributes["r:id"],b=c.get(a);if(b){for(const h of l.children)if(h.type==="element"&&h.name==="w:r"){const k=h,v=r(k,"w:drawing");if(v){const L=m(v,t);L&&i.push(L);continue}const x=r(k,"w:t");if(!x)continue;const I=x.children.find(L=>L.type==="text");if(!I||!I.value)continue;const C=extractMarks(k);C.push({type:"link",attrs:{href:b}});const M={type:"text",text:I.value};C.length>0&&(M.marks=C),i.push(M)}}continue}if(n.name==="w:r"){const l=n,a=r(l,"w:drawing");if(a){const x=m(a,t);x&&i.push(x);continue}if(r(l,"w:br")){const x=extractMarks(l),I={type:"hardBreak"};x.length>0&&(I.marks=x),i.push(I)}const b=r(l,"w:t");if(!b)continue;const h=b.children.find(x=>x.type==="text");if(!h||!h.value)continue;const k=extractMarks(l),v={type:"text",text:h.value};k.length>0&&(v.marks=k),i.push(v)}}return i}function extractMarks(c){const s=[],t=r(c,"w:rPr");if(!t)return s;r(t,"w:b")&&s.push({type:"bold"}),r(t,"w:i")&&s.push({type:"italic"}),r(t,"w:u")&&s.push({type:"underline"}),r(t,"w:strike")&&s.push({type:"strike"}),r(t,"w:highlight")&&s.push({type:"highlight"});const i=r(t,"w:vertAlign");if(i){const h=i.attributes["w:val"];h==="subscript"?s.push({type:"subscript"}):h==="superscript"&&s.push({type:"superscript"})}const n=r(t,"w:color"),l=r(t,"w:shd"),a=r(t,"w:sz"),b=r(t,"w:rFonts");if(n||l||a||b){const h={color:"",backgroundColor:"",fontSize:"",fontFamily:"",lineHeight:""};if(n&&n.attributes["w:val"]){const k=n.attributes["w:val"];if(k!=="auto"){const v=k.startsWith("#")?k:`#${k}`;h.color=v}}if(l&&l.attributes["w:fill"]){const k=l.attributes["w:fill"];if(k!=="auto"){const v=k.startsWith("#")?k:`#${k}`;h.backgroundColor=v}}if(a&&a.attributes["w:val"]){const k=a.attributes["w:val"],v=parseFloat(k);if(!isNaN(v)){const x=Math.round(v/1.5*10)/10;h.fontSize=`${x}px`}}b&&b.attributes["w:ascii"]&&(h.fontFamily=b.attributes["w:ascii"]),s.push({type:"textStyle",attrs:h})}return s}function extractAlignment(c){const s=r(c,"w:pPr");if(!s)return;const t=r(s,"w:jc");if(!t?.attributes["w:val"])return;const i=t.attributes["w:val"],n={left:"left",right:"right",center:"center",both:"justify"}[i];return n?{textAlign:n}:void 0}function m(c,s){const t=w(c,"a:blip");if(!t?.attributes["r:embed"])return null;const i=t.attributes["r:embed"],n=s.get(i);return n?{type:"image",attrs:{src:n,alt:""}}:null}function r(c,s){for(const t of c.children)if(t.type==="element"&&t.name===s)return t}function w(c,s){for(const t of c.children)if(t.type==="element"&&t.name===s)return t;for(const t of c.children)if(t.type==="element"){const i=w(t,s);if(i)return i}}function convertParagraph(c,s,t){let i;for(const a of c.children)if(a.type==="element"&&a.name==="w:pPr"){const b=a;for(const h of b.children)if(h.type==="element"&&h.name==="w:pStyle"){i=h.attributes["w:val"];break}break}if(i){const a=i.match(/^Heading(\d)$/);if(a){const b=parseInt(a[1]);return f$1(c,s,b,t)}}const n=extractRuns(s,c,t);if(n.length===1&&n[0].type==="hardBreak"){for(const a of c.children)if(a.type==="element"&&a.name==="w:r"){for(const b of a.children)if(b.type==="element"&&b.name==="w:br"&&b.attributes["w:type"]==="page")return{type:"horizontalRule"}}}if(n.length===1&&n[0].type==="image")return n[0];const l=extractAlignment(c);return{type:"paragraph",...l&&{attrs:l},content:n}}function f$1(c,s,t,i){return{type:"heading",attrs:{level:t},content:extractRuns(s,c,i)}}function isListItem(c){const s=e(c,"w:pPr");return s?!!e(s,"w:numPr"):!1}function getListInfo(c){const s=e(c,"w:pPr");if(!s)return null;const t=e(s,"w:numPr");if(!t)return null;const i=e(t,"w:ilvl"),n=e(t,"w:numId");return!i||!n?null:{numId:n.attributes["w:val"],level:parseInt(i.attributes["w:val"]||"0")}}function e(c,s){for(const t of c.children)if(t.type==="element"&&t.name===s)return t}function isCodeBlock(c){const s=o(c,"w:pPr");if(!s)return!1;const t=o(s,"w:pStyle");if(!t)return!1;const i=t.attributes["w:val"];return i==="CodeBlock"||i?.startsWith("Code")}function getCodeBlockLanguage(c){const s=o(c,"w:pPr");if(!s)return;const t=o(s,"w:pStyle");if(!t)return;const i=t.attributes["w:val"];if(i?.startsWith("CodeBlock"))return i.replace("CodeBlock","").toLowerCase()||void 0}function o(c,s){for(const t of c.children)if(t.type==="element"&&t.name===s)return t}function isTable(c){return c.name==="w:tbl"}function convertTable(c,s,t){const i=[],n=[];for(const a of c.children)a.type==="element"&&a.name==="w:tr"&&n.push(a);const l=new Map;return n.forEach((a,b)=>{i.push(d(a,b===0,s,t,l,n,b))}),{type:"table",content:i}}function d(c,s,t,i,n,l,a){const b=[];let h=0;for(const k of c.children)if(k.type==="element"&&k.name==="w:tc"){const v=n.get(h);if(v&&v>0){n.set(h,v-1),h++;continue}let x=u$1(k);if(x&&x.rowspan===1){const M=g(l,a,h);M>1&&(x={...x,rowspan:M})}if(x&&x.rowspan>1&&n.set(h,x.rowspan-1),x&&x.rowspan===0){h++;continue}const I="tableCell",C=y(k,t,i);b.push({type:I,...x&&{attrs:x},content:[C]}),h+=x?.colspan||1}return{type:"tableRow",content:b}}function u$1(c){const s={colspan:1,rowspan:1,colwidth:null};let t;for(const i of c.children)if(i.type==="element"&&i.name==="w:tcPr"){t=i;break}if(!t)return s;for(const i of t.children)if(i.type==="element"&&i.name==="w:gridSpan"){const n=i.attributes["w:val"];n&&(s.colspan=parseInt(n));break}for(const i of t.children)if(i.type==="element"&&i.name==="w:vMerge"){i.attributes["w:val"]==="continue"&&(s.rowspan=0);break}for(const i of t.children)if(i.type==="element"&&i.name==="w:tcW"){const n=i.attributes["w:w"];n&&(s.colwidth=parseInt(n));break}return s}function g(c,s,t){let i=1,n=t;for(let l=s+1;l<c.length;l++){const a=c[l];let b=!1;for(const h of a.children)if(h.type==="element"&&h.name==="w:tc"){const k=u$1(h),v=k?.colspan||1;if(n>=0&&n<v){if(k?.rowspan===0)i++,b=!0;else return i;break}n-=v}if(!b)break}return i}function y(c,s,t){const i=[];for(const n of c.children)if(n.type==="element"&&n.name==="w:p"){const l=convertParagraph(n,s,t);i.push(l)}return i[0]||{type:"paragraph",content:[]}}function isTaskItem(c){for(const s of c.children)if(s.type==="element"&&s.name==="w:r"){for(const t of s.children)if(t.type==="element"&&t.name==="w:t"){const i=t.children.find(n=>n.type==="text");if(i&&"value"in i){const n=i.value;return n.startsWith("\u2610")||n.startsWith("\u2611")}}break}return!1}function getTaskItemChecked(c){for(const s of c.children)if(s.type==="element"&&s.name==="w:r"){for(const t of s.children)if(t.type==="element"&&t.name==="w:t"){const i=t.children.find(n=>n.type==="text");if(i&&"value"in i)return i.value.startsWith("\u2611")}break}return!1}function convertTaskItem(c){const s=getTaskItemChecked(c),t=f(c);return{type:"taskItem",attrs:{checked:s},content:[t]}}function f(c){const s=[];let t=!1;for(const n of c.children)if(n.type==="element"&&n.name==="w:r"){let l=!1;if(!t){for(const a of n.children)if(a.type==="element"&&a.name==="w:t"){const b=a.children.find(h=>h.type==="text");if(b&&"value"in b){const h=b.value;if(h.startsWith("\u2610")||h.startsWith("\u2611")){l=!0,t=!0;const k=h.substring(2).trimStart();k.length>0&&s.push({type:"text",text:k})}}}}if(!l){const a=p(n);for(const b of n.children)if(b.type==="element"&&b.name==="w:t"){const h=b.children.find(k=>k.type==="text");if(h&&"value"in h){const k={type:"text",text:h.value};a.length>0&&(k.marks=a),s.push(k)}}}}const i=u(c);return{type:"paragraph",...i&&{attrs:i},content:s.length>0?s:void 0}}function p(c){const s=[];for(const t of c.children)if(t.type==="element"&&t.name==="w:rPr"){const i=t;for(const n of i.children)if(n.type==="element"&&n.name==="w:b"){s.push({type:"bold"});break}for(const n of i.children)if(n.type==="element"&&n.name==="w:i"){s.push({type:"italic"});break}for(const n of i.children)if(n.type==="element"&&n.name==="w:u"){s.push({type:"underline"});break}for(const n of i.children)if(n.type==="element"&&n.name==="w:strike"){s.push({type:"strike"});break}break}return s}function u(c){for(const s of c.children)if(s.type==="element"&&s.name==="w:pPr"){const t=s;for(const i of t.children)if(i.type==="element"&&i.name==="w:jc"){const n=i.attributes["w:val"];if(n==="both")return{textAlign:"justify"};if(n==="center")return{textAlign:"center"};if(n==="right")return{textAlign:"right"};if(n==="left")return{textAlign:"left"}}}}function isHorizontalRule(c){for(const s of c.children)if(s.type==="element"&&s.name==="w:r"){const t=s;let i=!1,n=!1;for(const l of t.children)if(l.type==="element")if(l.name==="w:br")l.attributes["w:type"]==="page"&&(i=!0);else if(l.name==="w:t"){const a=l.children.find(b=>b.type==="text");a&&"value"in a&&a.value&&a.value.trim().length>0&&(n=!0)}else l.name!=="w:rPr"&&(n=!0);if(i&&!n)return!0}return!1}const defaultImageConverter=async c=>({src:undio.toBase64(c.data)});async function parseDOCX(c,s={}){const{convertImage:t=defaultImageConverter,ignoreEmptyParagraphs:i=!1}=s,n=await undio.toUint8Array(c),l=fflate.unzipSync(n),a=A(l),b=X(l),h=new Map;for(const[I,C]of b.entries())try{const M=`image/${Object.keys(l).find(R=>R.endsWith(I)||R.includes(`media/${I}`))?.split(".").pop()?.toLowerCase()||"png"}`,L=await t({id:I,contentType:M,data:C});h.set(I,L.src)}catch(M){console.warn(`Failed to convert image ${I}:`,M);const L=undio.toBase64(C);h.set(I,L)}const k=l["word/document.xml"];if(!k)throw new Error("Invalid DOCX file: missing word/document.xml");const v=xastUtilFromXml.fromXml(new TextDecoder().decode(k)),x=J(l);return P(v,h,a,x,i)}function J(c){const s=new Map,t=new Map,i=c["word/numbering.xml"];if(!i)return s;const n=xastUtilFromXml.fromXml(new TextDecoder().decode(i)),l=new Map;if(n.type==="root"){for(const a of n.children)if(a.type==="element"&&a.name==="w:numbering"){const b=a;for(const h of b.children)if(h.type==="element"&&h.name==="w:abstractNum"){const k=h,v=k.attributes["w:abstractNumId"];for(const x of k.children)if(x.type==="element"&&x.name==="w:lvl"){for(const I of x.children)if(I.type==="element"&&I.name==="w:numFmt"){const C=I.attributes["w:val"];if(C){l.set(v,C);break}}for(const I of x.children)if(I.type==="element"&&I.name==="w:start"){const C=I.attributes["w:val"];C&&t.set(v,parseInt(C,10));break}break}}for(const h of b.children)if(h.type==="element"&&h.name==="w:num"){const k=h,v=k.attributes["w:numId"];for(const x of k.children)if(x.type==="element"&&x.name==="w:abstractNumId"){const I=x.attributes["w:val"],C=l.get(I);if(C){const M=t.get(I);C==="bullet"?s.set(v,{type:"bullet"}):s.set(v,{type:"ordered",...M!==void 0&&{start:M}})}break}}break}}return s}function X(c){const s=new Map,t=c["word/_rels/document.xml.rels"];if(!t)return s;const i=xastUtilFromXml.fromXml(new TextDecoder().decode(t));if(i.type==="root"){for(const n of i.children)if(n.type==="element"&&n.name==="Relationships"){const l=n;for(const a of l.children)if(a.type==="element"&&a.name==="Relationship"){const b=a,h=b.attributes.Type;if(h&&h==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"){const k=b.attributes.Id,v=b.attributes.Target;if(k&&v){const x="word/"+v,I=c[x];I&&s.set(k,I)}}}break}}return s}function A(c){const s=new Map,t=c["word/_rels/document.xml.rels"];if(!t)return s;const i=xastUtilFromXml.fromXml(new TextDecoder().decode(t));if(i.type==="root"){for(const n of i.children)if(n.type==="element"&&n.name==="Relationships"){const l=n;for(const a of l.children)if(a.type==="element"&&a.name==="Relationship"){const b=a,h=b.attributes.Type;if(h&&h==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"){const k=b.attributes.Id,v=b.attributes.Target;k&&v&&s.set(k,v)}}break}}return s}function P(c,s,t,i,n){if(c.type!=="root")return{type:"doc",content:[]};for(const l of c.children)if(l.type==="element"&&l.name==="w:document"){const a=l;for(const b of a.children)if(b.type==="element"&&b.name==="w:body")return{type:"doc",content:B(b.children.filter(h=>h.type==="element"),s,t,i,n)};break}return{type:"doc",content:[]}}function B(c,s,t,i,n){const l=[];let a=0;for(;a<c.length;){const b=c[a];if(b.name==="w:tbl"){l.push(convertTable(b,t,s)),a++,a<c.length&&c[a].name==="w:p"&&T(c[a])&&a++;continue}if(b.name==="w:p"){if(n&&T(b)){a++;continue}if(isCodeBlock(b)){const h=U(c,a);l.push(...h),a+=h.length;continue}if(isTaskItem(b)){const h=$(c,a);l.push(...h),a+=H(c,a);continue}if(isListItem(b)){const h=F(c,a,s,t,i);l.push(...h),a+=z(c,a);continue}if(isHorizontalRule(b)){l.push({type:"horizontalRule"}),a++;continue}l.push(convertParagraph(b,t,s)),a++;continue}a++}return l}function U(c,s){const t=[];let i=s;for(;i<c.length;){const n=c[i];if(n.name!=="w:p"||!isCodeBlock(n))break;const l=getCodeBlockLanguage(n),a={type:"codeBlock",...l&&{attrs:{language:l}},content:_(n)};t.push(a),i++}return t}function F(c,s,t,i,n){const l=[];let a=s;for(;a<c.length;){const b=c[a];if(b.name!=="w:p"||!isListItem(b))break;const h=getListInfo(b);if(!h)break;const k=n.get(h.numId),v=k?.type||"bullet",x=[];for(;a<c.length;){const C=c[a];if(C.name!=="w:p"||!isListItem(C))break;const M=getListInfo(C);if(!M||M.numId!==h.numId)break;const L={type:"listItem",content:[convertParagraph(C,i,t)]};x.push(L),a++}const I={type:v==="bullet"?"bulletList":"orderedList",content:x};v==="ordered"&&(I.attrs={type:null,...k?.start!==void 0&&{start:k.start}}),l.push(I)}return l}function z(c,s){let t=0,i=s;for(;i<c.length;){const n=c[i];if(n.name!=="w:p"||!isListItem(n))break;t++,i++}return t}function $(c,s){const t=[];let i=s;for(;i<c.length;){const n=c[i];if(n.name!=="w:p"||!isTaskItem(n))break;const l=convertTaskItem(n);t.push(l),i++}return[{type:"taskList",content:t}]}function H(c,s){let t=0,i=s;for(;i<c.length;){const n=c[i];if(n.name!=="w:p"||!isTaskItem(n))break;t++,i++}return t}function _(c){const s=[];for(const t of c.children){if(t.type!=="element"||t.name!=="w:r")continue;const i=t;for(const n of i.children)if(n.type==="element"&&n.name==="w:t"){const l=n.children.find(a=>a.type==="text");l&&"value"in l&&s.push({type:"text",text:l.value})}}return s}function T(c){for(const s of c.children){if(s.type!=="element"||s.name!=="w:r")continue;const t=s;for(const i of t.children)if(i.type==="element"&&i.name==="w:t"){const n=i.children.find(l=>l.type==="text");if(n&&"value"in n&&n.value.trim().length>0)return!1}}return!0}exports.convertParagraph=convertParagraph,exports.convertTable=convertTable,exports.convertTaskItem=convertTaskItem,exports.defaultImageConverter=defaultImageConverter,exports.extractAlignment=extractAlignment,exports.extractMarks=extractMarks,exports.extractRuns=extractRuns,exports.getCodeBlockLanguage=getCodeBlockLanguage,exports.getListInfo=getListInfo,exports.getTaskItemChecked=getTaskItemChecked,exports.isCodeBlock=isCodeBlock,exports.isHorizontalRule=isHorizontalRule,exports.isListItem=isListItem,exports.isTable=isTable,exports.isTaskItem=isTaskItem,exports.parseDOCX=parseDOCX;