@docen/import-docx 0.0.12 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +293 -290
- package/dist/index.d.mts +112 -45
- package/dist/index.mjs +232 -109
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -1,290 +1,293 @@
|
|
|
1
|
-
# @docen/import-docx
|
|
2
|
-
|
|
3
|
-

|
|
4
|
-

|
|
5
|
-

|
|
6
|
-
|
|
7
|
-
> Import Microsoft Word DOCX files to TipTap/ProseMirror content.
|
|
8
|
-
|
|
9
|
-
## Features
|
|
10
|
-
|
|
11
|
-
- 📝 **Rich Text Parsing** - Accurate parsing of headings, paragraphs, and blockquotes with formatting
|
|
12
|
-
- 🖼️ **Image Extraction** - Automatic image extraction with base64 conversion and cropping support
|
|
13
|
-
- 📊 **Table Support** - Complete table structure with colspan/rowspan detection algorithm
|
|
14
|
-
- ✅ **Lists & Tasks** - Bullet lists, numbered lists with start number extraction, and task lists with checkbox detection
|
|
15
|
-
- 🎨 **Text Formatting** - Bold, italic, underline, strikethrough, subscript, superscript, and highlights
|
|
16
|
-
- 🎯 **Text Styles** - Comprehensive style support including colors, backgrounds, fonts, sizes, and line heights
|
|
17
|
-
- 🔗 **Links** - Hyperlink extraction with href preservation
|
|
18
|
-
- 💻 **Code Blocks** - Code block detection with language attribute extraction
|
|
19
|
-
- 🌐 **Cross-Platform** - Works in both browser and Node.js environments
|
|
20
|
-
- ✂️ **Image Cropping** - Automatic cropping of images based on DOCX crop metadata
|
|
21
|
-
- 🧠 **Smart Parsing** - DOCX XML parsing with proper element grouping and structure reconstruction
|
|
22
|
-
- ⚡ **Fast Processing** - Uses fflate for ultra-fast ZIP decompression
|
|
23
|
-
|
|
24
|
-
## Installation
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
# Install with npm
|
|
28
|
-
$ npm install @docen/import-docx
|
|
29
|
-
|
|
30
|
-
# Install with yarn
|
|
31
|
-
$ yarn add @docen/import-docx
|
|
32
|
-
|
|
33
|
-
# Install with pnpm
|
|
34
|
-
$ pnpm add @docen/import-docx
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
## Quick Start
|
|
38
|
-
|
|
39
|
-
```typescript
|
|
40
|
-
import { parseDOCX } from "@docen/import-docx";
|
|
41
|
-
import { readFileSync } from "node:fs";
|
|
42
|
-
|
|
43
|
-
// Read DOCX file
|
|
44
|
-
const buffer = readFileSync("document.docx");
|
|
45
|
-
|
|
46
|
-
// Parse DOCX to TipTap JSON
|
|
47
|
-
const content = await parseDOCX(buffer);
|
|
48
|
-
|
|
49
|
-
// Use in TipTap editor
|
|
50
|
-
editor.commands.setContent(content);
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## API Reference
|
|
54
|
-
|
|
55
|
-
### `parseDOCX(input, options?)`
|
|
56
|
-
|
|
57
|
-
Parses a DOCX file and converts it to TipTap/ProseMirror JSON content.
|
|
58
|
-
|
|
59
|
-
**Parameters:**
|
|
60
|
-
|
|
61
|
-
- `input: Buffer | ArrayBuffer | Uint8Array` - DOCX file data
|
|
62
|
-
- `options?: DocxImportOptions` - Optional import configuration
|
|
63
|
-
|
|
64
|
-
**Returns:** `Promise<JSONContent>` - TipTap/ProseMirror document content with images embedded
|
|
65
|
-
|
|
66
|
-
**Options:**
|
|
67
|
-
|
|
68
|
-
```typescript
|
|
69
|
-
interface DocxImportOptions {
|
|
70
|
-
/** Custom image converter (default: embed as base64) */
|
|
71
|
-
convertImage?: (image: DocxImageInfo) => Promise<DocxImageResult>;
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
*
|
|
75
|
-
*
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
*
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
*
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
- **
|
|
136
|
-
- **
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
-
|
|
150
|
-
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
- **
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
- **
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
-
|
|
278
|
-
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
1
|
+
# @docen/import-docx
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
> Import Microsoft Word DOCX files to TipTap/ProseMirror content.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- 📝 **Rich Text Parsing** - Accurate parsing of headings, paragraphs, and blockquotes with formatting
|
|
12
|
+
- 🖼️ **Image Extraction** - Automatic image extraction with base64 conversion and cropping support
|
|
13
|
+
- 📊 **Table Support** - Complete table structure with colspan/rowspan detection algorithm
|
|
14
|
+
- ✅ **Lists & Tasks** - Bullet lists, numbered lists with start number extraction, and task lists with checkbox detection
|
|
15
|
+
- 🎨 **Text Formatting** - Bold, italic, underline, strikethrough, subscript, superscript, and highlights
|
|
16
|
+
- 🎯 **Text Styles** - Comprehensive style support including colors, backgrounds, fonts, sizes, and line heights
|
|
17
|
+
- 🔗 **Links** - Hyperlink extraction with href preservation
|
|
18
|
+
- 💻 **Code Blocks** - Code block detection with language attribute extraction
|
|
19
|
+
- 🌐 **Cross-Platform** - Works in both browser and Node.js environments
|
|
20
|
+
- ✂️ **Image Cropping** - Automatic cropping of images based on DOCX crop metadata
|
|
21
|
+
- 🧠 **Smart Parsing** - DOCX XML parsing with proper element grouping and structure reconstruction
|
|
22
|
+
- ⚡ **Fast Processing** - Uses fflate for ultra-fast ZIP decompression
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Install with npm
|
|
28
|
+
$ npm install @docen/import-docx
|
|
29
|
+
|
|
30
|
+
# Install with yarn
|
|
31
|
+
$ yarn add @docen/import-docx
|
|
32
|
+
|
|
33
|
+
# Install with pnpm
|
|
34
|
+
$ pnpm add @docen/import-docx
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quick Start
|
|
38
|
+
|
|
39
|
+
```typescript
|
|
40
|
+
import { parseDOCX } from "@docen/import-docx";
|
|
41
|
+
import { readFileSync } from "node:fs";
|
|
42
|
+
|
|
43
|
+
// Read DOCX file
|
|
44
|
+
const buffer = readFileSync("document.docx");
|
|
45
|
+
|
|
46
|
+
// Parse DOCX to TipTap JSON
|
|
47
|
+
const content = await parseDOCX(buffer);
|
|
48
|
+
|
|
49
|
+
// Use in TipTap editor
|
|
50
|
+
editor.commands.setContent(content);
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## API Reference
|
|
54
|
+
|
|
55
|
+
### `parseDOCX(input, options?)`
|
|
56
|
+
|
|
57
|
+
Parses a DOCX file and converts it to TipTap/ProseMirror JSON content.
|
|
58
|
+
|
|
59
|
+
**Parameters:**
|
|
60
|
+
|
|
61
|
+
- `input: Buffer | ArrayBuffer | Uint8Array` - DOCX file data
|
|
62
|
+
- `options?: DocxImportOptions` - Optional import configuration
|
|
63
|
+
|
|
64
|
+
**Returns:** `Promise<JSONContent>` - TipTap/ProseMirror document content with images embedded
|
|
65
|
+
|
|
66
|
+
**Options:**
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
interface DocxImportOptions {
|
|
70
|
+
/** Custom image converter (default: embed as base64) */
|
|
71
|
+
convertImage?: (image: DocxImageInfo) => Promise<DocxImageResult>;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Dynamic import function for @napi-rs/canvas
|
|
75
|
+
* Required for image cropping in Node.js environment, ignored in browser
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* import { parseDOCX } from '@docen/import-docx';
|
|
79
|
+
* const content = await parseDOCX(buffer, {
|
|
80
|
+
* canvasImport: () => import('@napi-rs/canvas')
|
|
81
|
+
* });
|
|
82
|
+
*/
|
|
83
|
+
canvasImport?: () => Promise<typeof import("@napi-rs/canvas")>;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Enable or disable image cropping during import
|
|
87
|
+
* When true, images with crop information in DOCX will be cropped
|
|
88
|
+
* When false (default), crop information is ignored and full image is used
|
|
89
|
+
*
|
|
90
|
+
* @default false
|
|
91
|
+
*/
|
|
92
|
+
crop?: boolean;
|
|
93
|
+
|
|
94
|
+
/** Paragraph processing options */
|
|
95
|
+
paragraph?: {
|
|
96
|
+
/** Whether to ignore empty paragraphs (default: false).
|
|
97
|
+
* Empty paragraphs are those without text content or images.
|
|
98
|
+
* Paragraphs containing only whitespace or images are not considered empty. */
|
|
99
|
+
ignoreEmpty?: boolean;
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Default Image Converter:**
|
|
105
|
+
|
|
106
|
+
The package exports `defaultImageConverter` which embeds images as base64 data URLs:
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
import { defaultImageConverter } from "@docen/import-docx";
|
|
110
|
+
|
|
111
|
+
// Use in custom converter
|
|
112
|
+
await parseDOCX(buffer, {
|
|
113
|
+
convertImage: async (image) => {
|
|
114
|
+
if (shouldUploadToCDN) {
|
|
115
|
+
return uploadToCDN(image.data);
|
|
116
|
+
}
|
|
117
|
+
return defaultImageConverter(image);
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Supported Content Types
|
|
123
|
+
|
|
124
|
+
### Text Formatting
|
|
125
|
+
|
|
126
|
+
- **Bold**, _Italic_, <u>Underline</u>, ~~Strikethrough~~
|
|
127
|
+
- ^Superscript^ and ~Subscript~
|
|
128
|
+
- Text highlights
|
|
129
|
+
- Text colors and background colors
|
|
130
|
+
- Font families and sizes
|
|
131
|
+
- Line heights
|
|
132
|
+
|
|
133
|
+
### Block Elements
|
|
134
|
+
|
|
135
|
+
- **Headings** (H1-H6) with proper level detection
|
|
136
|
+
- **Paragraphs** with text alignment (left, right, center, justify)
|
|
137
|
+
- **Blockquotes** (Detected by indentation + left border formatting)
|
|
138
|
+
- **Horizontal Rules** (Detected as page breaks in DOCX)
|
|
139
|
+
- **Code Blocks** with language attribute support
|
|
140
|
+
|
|
141
|
+
### Lists
|
|
142
|
+
|
|
143
|
+
- **Bullet Lists** with proper nesting and structure
|
|
144
|
+
- **Numbered Lists** with custom start number extraction
|
|
145
|
+
- **Task Lists** with checked/unchecked state detection (☐/☑ symbols)
|
|
146
|
+
|
|
147
|
+
### Tables
|
|
148
|
+
|
|
149
|
+
- Complete table structure parsing
|
|
150
|
+
- **Table Cells** with colspan detection using grid-based algorithm
|
|
151
|
+
- **Table Cells** with rowspan detection using vMerge tracking
|
|
152
|
+
- Cell alignment and formatting preservation
|
|
153
|
+
- Merged cell handling (both horizontal and vertical)
|
|
154
|
+
|
|
155
|
+
### Media & Embeds
|
|
156
|
+
|
|
157
|
+
- **Images** with automatic base64 conversion
|
|
158
|
+
- **Grouped Images** (DOCX image groups) support
|
|
159
|
+
- **Links** (hyperlinks) with href extraction
|
|
160
|
+
|
|
161
|
+
## Parsing Algorithm
|
|
162
|
+
|
|
163
|
+
### Document Structure
|
|
164
|
+
|
|
165
|
+
The parser follows a structured workflow:
|
|
166
|
+
|
|
167
|
+
1. **Extract Relationships** - Parse `_rels/document.xml.rels` for hyperlinks and images
|
|
168
|
+
2. **Parse Numbering** - Extract list definitions from `numbering.xml` (abstractNum → numFmt)
|
|
169
|
+
3. **Process Document Body** - Iterate through document.xml elements:
|
|
170
|
+
- Detect content types (tables, lists, paragraphs, code blocks, etc.)
|
|
171
|
+
- Group consecutive elements into proper containers
|
|
172
|
+
- Convert XML nodes to TipTap JSON nodes
|
|
173
|
+
|
|
174
|
+
### Table Processing
|
|
175
|
+
|
|
176
|
+
Tables use specialized algorithms:
|
|
177
|
+
|
|
178
|
+
- **Colspan Detection** - Grid-based algorithm tracks cell positions and detects horizontal merges
|
|
179
|
+
- **Rowspan Detection** - Vertical merge (vMerge) tracking across rows with proper cell skipping
|
|
180
|
+
- **Cell Content** - Recursive parsing of nested paragraphs and formatting
|
|
181
|
+
- **Hyperlink Support** - Proper handling of links within table cells
|
|
182
|
+
|
|
183
|
+
### List Processing
|
|
184
|
+
|
|
185
|
+
Lists utilize the DOCX numbering system:
|
|
186
|
+
|
|
187
|
+
- **Numbering ID Mapping** - Maps abstractNum to formatting (bullet vs decimal)
|
|
188
|
+
- **Start Value Extraction** - Extracts and preserves start numbers for ordered lists
|
|
189
|
+
- **Nesting Preservation** - Maintains proper list hierarchy
|
|
190
|
+
- **Consecutive Grouping** - Groups consecutive list items into list containers
|
|
191
|
+
|
|
192
|
+
## Examples
|
|
193
|
+
|
|
194
|
+
### Basic Usage
|
|
195
|
+
|
|
196
|
+
```typescript
|
|
197
|
+
import { parseDOCX } from "@docen/import-docx";
|
|
198
|
+
|
|
199
|
+
const buffer = readFileSync("example.docx");
|
|
200
|
+
const { content } = await parseDOCX(buffer);
|
|
201
|
+
|
|
202
|
+
console.log(JSON.stringify(content, null, 2));
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Use with TipTap Editor
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
import { Editor } from "@tiptap/core";
|
|
209
|
+
import { parseDOCX } from "@docen/import-docx";
|
|
210
|
+
|
|
211
|
+
const editor = new Editor({
|
|
212
|
+
extensions: [...],
|
|
213
|
+
content: "",
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// Import DOCX file
|
|
217
|
+
async function importDocx(file: File) {
|
|
218
|
+
const buffer = await file.arrayBuffer();
|
|
219
|
+
const content = await parseDOCX(buffer);
|
|
220
|
+
editor.commands.setContent(content);
|
|
221
|
+
}
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Node.js Environment with Image Cropping
|
|
225
|
+
|
|
226
|
+
To enable image cropping in Node.js environment, you need to provide `@napi-rs/canvas`:
|
|
227
|
+
|
|
228
|
+
```typescript
|
|
229
|
+
import { parseDOCX } from "@docen/import-docx";
|
|
230
|
+
import { readFileSync } from "node:fs";
|
|
231
|
+
|
|
232
|
+
// Install @napi-rs/canvas first: pnpm add @napi-rs/canvas
|
|
233
|
+
const buffer = readFileSync("document.docx");
|
|
234
|
+
|
|
235
|
+
const content = await parseDOCX(buffer, {
|
|
236
|
+
canvasImport: () => import("@napi-rs/canvas"),
|
|
237
|
+
crop: true, // Enable cropping (default is false)
|
|
238
|
+
});
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
**Note:** By default, image cropping is disabled. Images are imported in full size, ignoring crop information in DOCX.
|
|
242
|
+
|
|
243
|
+
### Disable Image Cropping
|
|
244
|
+
|
|
245
|
+
If you want to explicitly ignore crop information in DOCX and use full images (this is the default behavior):
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
const content = await parseDOCX(buffer, {
|
|
249
|
+
crop: false,
|
|
250
|
+
});
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Known Limitations
|
|
254
|
+
|
|
255
|
+
### Blockquote Detection
|
|
256
|
+
|
|
257
|
+
DOCX does not have a semantic blockquote structure. Blockquotes are detected by:
|
|
258
|
+
|
|
259
|
+
- Left indentation ≥ 720 twips (0.5 inch)
|
|
260
|
+
- Presence of left border (single line)
|
|
261
|
+
|
|
262
|
+
This detection method may produce false positives for documents with custom indentation similar to blockquotes.
|
|
263
|
+
|
|
264
|
+
### Code Marks
|
|
265
|
+
|
|
266
|
+
The `code` mark is NOT automatically detected from monospace fonts (Consolas, Courier New, etc.). This is intentional to avoid false positives. Code marks should be explicitly added in the source document or through editor UI.
|
|
267
|
+
|
|
268
|
+
### Color Format
|
|
269
|
+
|
|
270
|
+
All colors are imported as hex values (e.g., "#FF0000", "#008000"). Color names from the original document are not preserved.
|
|
271
|
+
|
|
272
|
+
### Image Limitations
|
|
273
|
+
|
|
274
|
+
- Only embedded images are supported (external image links are not fetched)
|
|
275
|
+
- Image dimensions and title are extracted from DOCX metadata
|
|
276
|
+
- **Image Cropping**: By default, images are imported in full size (crop information is ignored)
|
|
277
|
+
- To enable cropping, set `crop: true` in options
|
|
278
|
+
- In browser environments, cropping works natively with Canvas API
|
|
279
|
+
- In Node.js, you must also provide `canvasImport` option with dynamic import of `@napi-rs/canvas`
|
|
280
|
+
- If `@napi-rs/canvas` is not available in Node.js, images will be imported without cropping (graceful degradation)
|
|
281
|
+
- Some DOCX image features (like advanced positioning or text wrapping) have limited support
|
|
282
|
+
|
|
283
|
+
### Table Cell Types
|
|
284
|
+
|
|
285
|
+
DOCX format does not distinguish between header and body cells at a semantic level. All cells are imported as `tableCell` type for consistency. This is a DOCX format limitation.
|
|
286
|
+
|
|
287
|
+
## Contributing
|
|
288
|
+
|
|
289
|
+
Contributions are welcome! Please read our [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) and submit pull requests to the [main repository](https://github.com/DemoMacro/docen).
|
|
290
|
+
|
|
291
|
+
## License
|
|
292
|
+
|
|
293
|
+
- [MIT](LICENSE) © [Demo Macro](https://imst.xyz/)
|
package/dist/index.d.mts
CHANGED
|
@@ -2166,7 +2166,7 @@ can be lifted. Will not go across
|
|
|
2166
2166
|
[isolating](https://prosemirror.net/docs/ref/#model.NodeSpec.isolating) parent nodes.
|
|
2167
2167
|
*/
|
|
2168
2168
|
//#endregion
|
|
2169
|
-
//#region ../../node_modules/.pnpm/prosemirror-view@1.41.
|
|
2169
|
+
//#region ../../node_modules/.pnpm/prosemirror-view@1.41.7/node_modules/prosemirror-view/dist/index.d.ts
|
|
2170
2170
|
type DOMNode = InstanceType<typeof window.Node>;
|
|
2171
2171
|
type WidgetConstructor = ((view: EditorView, getPos: () => number | undefined) => DOMNode) | DOMNode;
|
|
2172
2172
|
/**
|
|
@@ -3672,7 +3672,7 @@ point into textblock nodes. It can be empty (a regular cursor
|
|
|
3672
3672
|
position).
|
|
3673
3673
|
*/
|
|
3674
3674
|
//#endregion
|
|
3675
|
-
//#region ../../node_modules/.pnpm/@tiptap+core@3.20.
|
|
3675
|
+
//#region ../../node_modules/.pnpm/@tiptap+core@3.20.4_@tiptap+pm@3.20.4/node_modules/@tiptap/core/dist/index.d.ts
|
|
3676
3676
|
type StringKeyOf<T> = Extract<keyof T, string>;
|
|
3677
3677
|
type CallbackType<T extends Record<string, any>, EventName extends StringKeyOf<T>> = T[EventName] extends any[] ? T[EventName] : [T[EventName]];
|
|
3678
3678
|
type CallbackFunction<T extends Record<string, any>, EventName extends StringKeyOf<T>> = (...props: CallbackType<T, EventName>) => any;
|
|
@@ -4436,6 +4436,27 @@ interface ExtendableConfig<Options = any, Storage = any, Config extends Extensio
|
|
|
4436
4436
|
* Defines if this markdown element should indent it's child elements
|
|
4437
4437
|
*/
|
|
4438
4438
|
indentsContent?: boolean;
|
|
4439
|
+
/**
|
|
4440
|
+
* Lets a mark tell the Markdown serializer which inline HTML tags it can
|
|
4441
|
+
* safely use when plain markdown delimiters would become ambiguous.
|
|
4442
|
+
*
|
|
4443
|
+
* This is mainly useful for overlapping marks. For example, bold followed
|
|
4444
|
+
* by bold+italic followed by italic cannot always be written back with only
|
|
4445
|
+
* `*` and `**` in a way that still parses correctly. In that case, the
|
|
4446
|
+
* serializer can close the overlapping section with markdown and reopen the
|
|
4447
|
+
* remaining tail with HTML instead.
|
|
4448
|
+
*
|
|
4449
|
+
* Example:
|
|
4450
|
+
* - desired formatting: `**123` + `*456*` + `789 italic`
|
|
4451
|
+
* - serialized result: `**123*456***<em>789</em>`
|
|
4452
|
+
*
|
|
4453
|
+
* If your extension defines custom mark names, set `htmlReopen` on that
|
|
4454
|
+
* extension so the serializer can reuse its HTML form for overlap cases.
|
|
4455
|
+
*/
|
|
4456
|
+
htmlReopen?: {
|
|
4457
|
+
open: string;
|
|
4458
|
+
close: string;
|
|
4459
|
+
};
|
|
4439
4460
|
};
|
|
4440
4461
|
/**
|
|
4441
4462
|
* This function extends the schema of the node.
|
|
@@ -5279,7 +5300,8 @@ type MarkdownToken = {
|
|
|
5279
5300
|
*/
|
|
5280
5301
|
type MarkdownParseHelpers = {
|
|
5281
5302
|
/** Parse an array of inline tokens into text nodes with marks */parseInline: (tokens: MarkdownToken[]) => JSONContent[]; /** Parse an array of block-level tokens */
|
|
5282
|
-
parseChildren: (tokens: MarkdownToken[]) => JSONContent[]; /**
|
|
5303
|
+
parseChildren: (tokens: MarkdownToken[]) => JSONContent[]; /** Parse block-level tokens while preserving implicit empty paragraphs from blank lines */
|
|
5304
|
+
parseBlockChildren?: (tokens: MarkdownToken[]) => JSONContent[]; /** Create a text node with optional marks */
|
|
5283
5305
|
createTextNode: (text: string, marks?: Array<{
|
|
5284
5306
|
type: string;
|
|
5285
5307
|
attrs?: any;
|
|
@@ -5312,6 +5334,7 @@ type RenderContext = {
|
|
|
5312
5334
|
level: number;
|
|
5313
5335
|
meta?: Record<string, any>;
|
|
5314
5336
|
parentType?: string | null;
|
|
5337
|
+
previousNode?: JSONContent | null;
|
|
5315
5338
|
};
|
|
5316
5339
|
/** Extension contract for markdown parsing/serialization. */
|
|
5317
5340
|
/**
|
|
@@ -5345,7 +5368,8 @@ type MarkdownRendererHelpers = {
|
|
|
5345
5368
|
* @param separator An optional separator string (legacy) or RenderContext
|
|
5346
5369
|
* @returns The rendered markdown string
|
|
5347
5370
|
*/
|
|
5348
|
-
renderChildren: (nodes: JSONContent | JSONContent[], separator?: string) => string;
|
|
5371
|
+
renderChildren: (nodes: JSONContent | JSONContent[], separator?: string) => string; /** Render a single child node with its sibling index preserved */
|
|
5372
|
+
renderChild?: (node: JSONContent, index: number) => string;
|
|
5349
5373
|
/**
|
|
5350
5374
|
* Render a text token to a markdown string
|
|
5351
5375
|
* @param prefix The prefix to add before the content
|
|
@@ -6638,49 +6662,12 @@ interface DocxImportOptions {
|
|
|
6638
6662
|
image?: {
|
|
6639
6663
|
handler?: DocxImageImportHandler;
|
|
6640
6664
|
canvasImport?: () => Promise<typeof _napi_rs_canvas0>;
|
|
6641
|
-
|
|
6665
|
+
crop?: boolean;
|
|
6666
|
+
};
|
|
6667
|
+
paragraph?: {
|
|
6668
|
+
ignoreEmpty?: boolean;
|
|
6642
6669
|
};
|
|
6643
|
-
ignoreEmptyParagraphs?: boolean;
|
|
6644
|
-
}
|
|
6645
|
-
//#endregion
|
|
6646
|
-
//#region src/parsers/styles.d.ts
|
|
6647
|
-
/**
|
|
6648
|
-
* Character format information from a style definition
|
|
6649
|
-
*/
|
|
6650
|
-
interface CharFormat {
|
|
6651
|
-
color?: string;
|
|
6652
|
-
bold?: boolean;
|
|
6653
|
-
italic?: boolean;
|
|
6654
|
-
fontSize?: number;
|
|
6655
|
-
fontFamily?: string;
|
|
6656
|
-
underline?: boolean;
|
|
6657
|
-
strike?: boolean;
|
|
6658
|
-
}
|
|
6659
|
-
/**
|
|
6660
|
-
* Style information from styles.xml
|
|
6661
|
-
*/
|
|
6662
|
-
interface StyleInfo {
|
|
6663
|
-
styleId: string;
|
|
6664
|
-
name?: string;
|
|
6665
|
-
outlineLvl?: number;
|
|
6666
|
-
charFormat?: CharFormat;
|
|
6667
|
-
}
|
|
6668
|
-
type StyleMap = Map<string, StyleInfo>;
|
|
6669
|
-
//#endregion
|
|
6670
|
-
//#region src/parser.d.ts
|
|
6671
|
-
/**
|
|
6672
|
-
* Parsing context containing all global resources from DOCX file
|
|
6673
|
-
*/
|
|
6674
|
-
interface ParseContext extends DocxImportOptions {
|
|
6675
|
-
hyperlinks: Map<string, string>;
|
|
6676
|
-
images: Map<string, ImageInfo>;
|
|
6677
|
-
listTypeMap: ListTypeMap;
|
|
6678
|
-
styleMap: StyleMap;
|
|
6679
6670
|
}
|
|
6680
|
-
/**
|
|
6681
|
-
* Main entry point: Parse DOCX file and convert to TipTap JSON
|
|
6682
|
-
*/
|
|
6683
|
-
declare function parseDOCX(input: DataType, options?: DocxImportOptions): Promise<JSONContent>;
|
|
6684
6671
|
//#endregion
|
|
6685
6672
|
//#region ../../node_modules/.pnpm/@types+unist@3.0.3/node_modules/@types/unist/index.d.ts
|
|
6686
6673
|
// ## Interfaces
|
|
@@ -7016,6 +7003,86 @@ interface Text extends Literal {
|
|
|
7016
7003
|
*/
|
|
7017
7004
|
interface TextData extends Data {}
|
|
7018
7005
|
//#endregion
|
|
7006
|
+
//#region ../extensions/dist/types.d.mts
|
|
7007
|
+
//#endregion
|
|
7008
|
+
//#region src/types.d.ts
|
|
7009
|
+
/**
|
|
7010
|
+
* Border definition (compatible with docx.js BorderOptions)
|
|
7011
|
+
* Used by paragraphs, table cells, and blockquotes
|
|
7012
|
+
*/
|
|
7013
|
+
interface Border {
|
|
7014
|
+
/** Border color (hex without #, e.g., "FF0000" or "auto") */
|
|
7015
|
+
color?: string;
|
|
7016
|
+
/** Border size (eighth-points, 1/8 pt) */
|
|
7017
|
+
size?: number;
|
|
7018
|
+
/** Border style */
|
|
7019
|
+
style?: "single" | "dashed" | "dotted" | "double" | "dotDash" | "dotDotDash" | "none";
|
|
7020
|
+
/** Space between border and content (points) */
|
|
7021
|
+
space?: number;
|
|
7022
|
+
}
|
|
7023
|
+
/**
|
|
7024
|
+
* Shading definition (compatible with docx.js ShadingOptions)
|
|
7025
|
+
* Used for paragraph and table cell background colors
|
|
7026
|
+
*/
|
|
7027
|
+
interface Shading {
|
|
7028
|
+
/** Fill color (hex without #, e.g., "FF0000") */
|
|
7029
|
+
fill?: string;
|
|
7030
|
+
/** Pattern color (hex without #) */
|
|
7031
|
+
color?: string;
|
|
7032
|
+
/** Shading pattern type (e.g., "clear", "percent-10") */
|
|
7033
|
+
type?: string;
|
|
7034
|
+
}
|
|
7035
|
+
//#endregion
|
|
7036
|
+
//#region src/parsers/styles.d.ts
|
|
7037
|
+
/**
|
|
7038
|
+
* Character format information from a style definition
|
|
7039
|
+
*/
|
|
7040
|
+
interface CharFormat {
|
|
7041
|
+
color?: string;
|
|
7042
|
+
bold?: boolean;
|
|
7043
|
+
italic?: boolean;
|
|
7044
|
+
fontSize?: number;
|
|
7045
|
+
fontFamily?: string;
|
|
7046
|
+
underline?: boolean;
|
|
7047
|
+
strike?: boolean;
|
|
7048
|
+
}
|
|
7049
|
+
/**
|
|
7050
|
+
* Paragraph format information from a style definition
|
|
7051
|
+
*/
|
|
7052
|
+
interface ParagraphFormat {
|
|
7053
|
+
shading?: Shading;
|
|
7054
|
+
borderTop?: Border;
|
|
7055
|
+
borderBottom?: Border;
|
|
7056
|
+
borderLeft?: Border;
|
|
7057
|
+
borderRight?: Border;
|
|
7058
|
+
}
|
|
7059
|
+
/**
|
|
7060
|
+
* Style information from styles.xml
|
|
7061
|
+
*/
|
|
7062
|
+
interface StyleInfo {
|
|
7063
|
+
styleId: string;
|
|
7064
|
+
name?: string;
|
|
7065
|
+
outlineLvl?: number;
|
|
7066
|
+
charFormat?: CharFormat;
|
|
7067
|
+
paragraphFormat?: ParagraphFormat;
|
|
7068
|
+
}
|
|
7069
|
+
type StyleMap = Map<string, StyleInfo>;
|
|
7070
|
+
//#endregion
|
|
7071
|
+
//#region src/parser.d.ts
|
|
7072
|
+
/**
|
|
7073
|
+
* Parsing context containing all global resources from DOCX file
|
|
7074
|
+
*/
|
|
7075
|
+
interface ParseContext extends DocxImportOptions {
|
|
7076
|
+
hyperlinks: Map<string, string>;
|
|
7077
|
+
images: Map<string, ImageInfo>;
|
|
7078
|
+
listTypeMap: ListTypeMap;
|
|
7079
|
+
styleMap: StyleMap;
|
|
7080
|
+
}
|
|
7081
|
+
/**
|
|
7082
|
+
* Main entry point: Parse DOCX file and convert to TipTap JSON
|
|
7083
|
+
*/
|
|
7084
|
+
declare function parseDOCX(input: DataType, options?: DocxImportOptions): Promise<JSONContent>;
|
|
7085
|
+
//#endregion
|
|
7019
7086
|
//#region src/converters/paragraph.d.ts
|
|
7020
7087
|
/**
|
|
7021
7088
|
* Convert DOCX paragraph node to TipTap paragraph
|
package/dist/index.mjs
CHANGED
|
@@ -463,7 +463,7 @@ async function applyCropToImage(pic, imgInfo, params) {
|
|
|
463
463
|
if (!base64Data) return imgInfo;
|
|
464
464
|
const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
|
|
465
465
|
canvasImport: params.context.image?.canvasImport,
|
|
466
|
-
enabled: params.context.image?.
|
|
466
|
+
enabled: params.context.image?.crop ?? false
|
|
467
467
|
}));
|
|
468
468
|
const originalWidth = imgInfo.width || 0;
|
|
469
469
|
const originalHeight = imgInfo.height || 0;
|
|
@@ -532,25 +532,6 @@ function findDrawingElement(run) {
|
|
|
532
532
|
return choice ? findChild(choice, "w:drawing") : null;
|
|
533
533
|
}
|
|
534
534
|
/**
|
|
535
|
-
* Adjust image dimensions to fit within group bounds while preserving aspect ratio
|
|
536
|
-
*/
|
|
537
|
-
function fitToGroup(groupWidth, groupHeight, metaWidth, metaHeight) {
|
|
538
|
-
const metaRatio = metaWidth / metaHeight;
|
|
539
|
-
const groupRatio = groupWidth / groupHeight;
|
|
540
|
-
if (Math.abs(metaRatio - groupRatio) > .1) if (metaRatio > groupRatio) return {
|
|
541
|
-
width: groupWidth,
|
|
542
|
-
height: Math.round(groupWidth / metaRatio)
|
|
543
|
-
};
|
|
544
|
-
else return {
|
|
545
|
-
width: Math.round(groupHeight * metaRatio),
|
|
546
|
-
height: groupHeight
|
|
547
|
-
};
|
|
548
|
-
return {
|
|
549
|
-
width: groupWidth,
|
|
550
|
-
height: groupHeight
|
|
551
|
-
};
|
|
552
|
-
}
|
|
553
|
-
/**
|
|
554
535
|
* Extract images from DOCX and convert to base64 data URLs or use custom handler
|
|
555
536
|
* Returns Map of relationship ID to image info (src + dimensions)
|
|
556
537
|
*/
|
|
@@ -613,7 +594,7 @@ async function extractImageFromDrawing(drawing, params) {
|
|
|
613
594
|
try {
|
|
614
595
|
src = `${metadata},${uint8ArrayToBase64(await cropImageIfNeeded(bytes, crop, {
|
|
615
596
|
canvasImport: context.image?.canvasImport,
|
|
616
|
-
enabled: context.image?.
|
|
597
|
+
enabled: context.image?.crop ?? false
|
|
617
598
|
}))}`;
|
|
618
599
|
} catch (error) {
|
|
619
600
|
console.warn("Image cropping failed, using original image:", error);
|
|
@@ -711,10 +692,57 @@ async function extractImagesFromDrawing(drawing, params) {
|
|
|
711
692
|
if (group) {
|
|
712
693
|
const groupSp = findChild(group, "wpg:grpSp");
|
|
713
694
|
const pictures = groupSp ? [...findDeepChildren(groupSp, "pic:pic"), ...findDeepChildren(groupSp, "pic")] : [...findDeepChildren(group, "pic:pic"), ...findDeepChildren(group, "pic")];
|
|
695
|
+
const wspShapes = groupSp ? findDeepChildren(groupSp, "wps:wsp") : findDeepChildren(group, "wps:wsp");
|
|
696
|
+
const childImages = [];
|
|
714
697
|
for (const pic of pictures) {
|
|
715
698
|
const picGraphic = findChild(pic, "a:graphic");
|
|
699
|
+
let relativeSize = null;
|
|
700
|
+
const spPr = findChild(pic, "pic:spPr");
|
|
701
|
+
if (spPr) {
|
|
702
|
+
const xfrm = findChild(spPr, "a:xfrm");
|
|
703
|
+
if (xfrm) {
|
|
704
|
+
const ext = findChild(xfrm, "a:ext");
|
|
705
|
+
if (ext && ext.attributes["cx"] && ext.attributes["cy"]) relativeSize = {
|
|
706
|
+
cx: parseInt(ext.attributes["cx"], 10),
|
|
707
|
+
cy: parseInt(ext.attributes["cy"], 10)
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
childImages.push({
|
|
712
|
+
pic,
|
|
713
|
+
picGraphic,
|
|
714
|
+
relativeSize,
|
|
715
|
+
isWsp: false
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
for (const wsp of wspShapes) {
|
|
719
|
+
const wspGraphic = findChild(wsp, "a:graphic");
|
|
720
|
+
let relativeSize = null;
|
|
721
|
+
const spPr = findChild(wsp, "wps:spPr");
|
|
722
|
+
if (spPr) {
|
|
723
|
+
const xfrm = findChild(spPr, "a:xfrm");
|
|
724
|
+
if (xfrm) {
|
|
725
|
+
const ext = findChild(xfrm, "a:ext");
|
|
726
|
+
if (ext && ext.attributes["cx"] && ext.attributes["cy"]) relativeSize = {
|
|
727
|
+
cx: parseInt(ext.attributes["cx"], 10),
|
|
728
|
+
cy: parseInt(ext.attributes["cy"], 10)
|
|
729
|
+
};
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
childImages.push({
|
|
733
|
+
pic: wsp,
|
|
734
|
+
picGraphic: wspGraphic,
|
|
735
|
+
relativeSize,
|
|
736
|
+
isWsp: true
|
|
737
|
+
});
|
|
738
|
+
}
|
|
739
|
+
let totalCx = 0;
|
|
740
|
+
for (const child of childImages) if (child.relativeSize) totalCx += child.relativeSize.cx;
|
|
741
|
+
const scaleFactor = totalCx > 0 && groupWidth ? groupWidth / totalCx : 1;
|
|
742
|
+
for (const child of childImages) {
|
|
743
|
+
const { pic, picGraphic, relativeSize, isWsp } = child;
|
|
716
744
|
if (!picGraphic) {
|
|
717
|
-
const blipFill = findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
|
|
745
|
+
const blipFill = isWsp ? findChild(pic, "wps:blipFill") || findDeepChild(pic, "a:blipFill") : findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
|
|
718
746
|
if (!blipFill) continue;
|
|
719
747
|
const blip = findChild(blipFill, "a:blip") || findDeepChild(blipFill, "a:blip");
|
|
720
748
|
if (!blip?.attributes["r:embed"]) continue;
|
|
@@ -722,13 +750,22 @@ async function extractImagesFromDrawing(drawing, params) {
|
|
|
722
750
|
const imgInfo = params.context.images.get(rId);
|
|
723
751
|
if (!imgInfo) continue;
|
|
724
752
|
const processedImgInfo = await applyCropToImage(pic, imgInfo, params);
|
|
753
|
+
let width = processedImgInfo.width;
|
|
754
|
+
let height = processedImgInfo.height;
|
|
755
|
+
if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
|
|
756
|
+
width = Math.round(relativeSize.cx * scaleFactor);
|
|
757
|
+
height = Math.round(relativeSize.cy * scaleFactor);
|
|
758
|
+
} else if (groupWidth && groupHeight) {
|
|
759
|
+
width = groupWidth;
|
|
760
|
+
height = groupHeight;
|
|
761
|
+
}
|
|
725
762
|
result.push({
|
|
726
763
|
type: "image",
|
|
727
764
|
attrs: {
|
|
728
765
|
src: processedImgInfo.src,
|
|
729
766
|
alt: "",
|
|
730
|
-
width
|
|
731
|
-
height
|
|
767
|
+
width,
|
|
768
|
+
height
|
|
732
769
|
}
|
|
733
770
|
});
|
|
734
771
|
continue;
|
|
@@ -748,7 +785,7 @@ async function extractImagesFromDrawing(drawing, params) {
|
|
|
748
785
|
if (base64Data) {
|
|
749
786
|
const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
|
|
750
787
|
canvasImport: params.context.image?.canvasImport,
|
|
751
|
-
enabled: params.context.image?.
|
|
788
|
+
enabled: params.context.image?.crop ?? false
|
|
752
789
|
}));
|
|
753
790
|
image.attrs.src = `${metadata},${croppedBase64}`;
|
|
754
791
|
const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
|
|
@@ -763,27 +800,25 @@ async function extractImagesFromDrawing(drawing, params) {
|
|
|
763
800
|
const visibleHeightPct = 1 - cropTopPct - cropBottomPct;
|
|
764
801
|
const croppedWidth = Math.round(imgInfo.width * visibleWidthPct);
|
|
765
802
|
const croppedHeight = Math.round(imgInfo.height * visibleHeightPct);
|
|
766
|
-
|
|
767
|
-
|
|
803
|
+
if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
|
|
804
|
+
image.attrs.width = Math.round(relativeSize.cx * scaleFactor);
|
|
805
|
+
image.attrs.height = Math.round(relativeSize.cy * scaleFactor);
|
|
806
|
+
} else {
|
|
807
|
+
image.attrs.width = croppedWidth;
|
|
808
|
+
image.attrs.height = croppedHeight;
|
|
809
|
+
}
|
|
768
810
|
}
|
|
769
811
|
}
|
|
770
812
|
}
|
|
771
813
|
} catch (error) {
|
|
772
814
|
console.warn("Grouped image cropping failed, using original image:", error);
|
|
773
815
|
}
|
|
774
|
-
else {
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
image.attrs.width = adjusted.width;
|
|
781
|
-
image.attrs.height = adjusted.height;
|
|
782
|
-
} else {
|
|
783
|
-
image.attrs.width = groupWidth;
|
|
784
|
-
image.attrs.height = groupHeight;
|
|
785
|
-
}
|
|
786
|
-
}
|
|
816
|
+
else if (groupWidth && groupHeight && relativeSize && totalCx > 0) {
|
|
817
|
+
image.attrs.width = Math.round(relativeSize.cx * scaleFactor);
|
|
818
|
+
image.attrs.height = Math.round(relativeSize.cy * scaleFactor);
|
|
819
|
+
} else if (groupWidth && groupHeight) {
|
|
820
|
+
image.attrs.width = groupWidth;
|
|
821
|
+
image.attrs.height = groupHeight;
|
|
787
822
|
}
|
|
788
823
|
result.push(image);
|
|
789
824
|
}
|
|
@@ -854,6 +889,65 @@ function parseNumberingXml(files) {
|
|
|
854
889
|
//#endregion
|
|
855
890
|
//#region src/parsers/styles.ts
|
|
856
891
|
/**
|
|
892
|
+
* Parse a single border element
|
|
893
|
+
*/
|
|
894
|
+
function parseBorder(borderNode) {
|
|
895
|
+
if (!borderNode) return null;
|
|
896
|
+
const val = borderNode.attributes["w:val"];
|
|
897
|
+
const size = borderNode.attributes["w:sz"];
|
|
898
|
+
const color = borderNode.attributes["w:color"];
|
|
899
|
+
const space = borderNode.attributes["w:space"];
|
|
900
|
+
const styleMap = {
|
|
901
|
+
single: "single",
|
|
902
|
+
dashed: "dashed",
|
|
903
|
+
dotted: "dotted",
|
|
904
|
+
double: "double",
|
|
905
|
+
dotDash: "dotDash",
|
|
906
|
+
dotDotDash: "dotDotDash",
|
|
907
|
+
none: "none",
|
|
908
|
+
nil: "none"
|
|
909
|
+
};
|
|
910
|
+
const border = {};
|
|
911
|
+
if (color && color !== "auto") border.color = `#${color}`;
|
|
912
|
+
if (size) border.size = parseInt(size);
|
|
913
|
+
if (val && styleMap[val]) border.style = styleMap[val];
|
|
914
|
+
if (space) border.space = parseInt(space);
|
|
915
|
+
return Object.keys(border).length > 0 ? border : null;
|
|
916
|
+
}
|
|
917
|
+
/**
|
|
918
|
+
* Parse borders from w:pBdr or w:pBorders element
|
|
919
|
+
*/
|
|
920
|
+
function parseBorders(pPr) {
|
|
921
|
+
if (!pPr) return null;
|
|
922
|
+
const borderElement = findChild(pPr, "w:pBorders") || findChild(pPr, "w:pBdr");
|
|
923
|
+
if (!borderElement) return null;
|
|
924
|
+
const borders = {};
|
|
925
|
+
const topBorder = parseBorder(findChild(borderElement, "w:top"));
|
|
926
|
+
if (topBorder) borders.borderTop = topBorder;
|
|
927
|
+
const bottomBorder = parseBorder(findChild(borderElement, "w:bottom"));
|
|
928
|
+
if (bottomBorder) borders.borderBottom = bottomBorder;
|
|
929
|
+
const leftBorder = parseBorder(findChild(borderElement, "w:left"));
|
|
930
|
+
if (leftBorder) borders.borderLeft = leftBorder;
|
|
931
|
+
const rightBorder = parseBorder(findChild(borderElement, "w:right"));
|
|
932
|
+
if (rightBorder) borders.borderRight = rightBorder;
|
|
933
|
+
return Object.keys(borders).length > 0 ? borders : null;
|
|
934
|
+
}
|
|
935
|
+
/**
|
|
936
|
+
* Parse shading from w:shd element
|
|
937
|
+
*/
|
|
938
|
+
function parseShading(pPr) {
|
|
939
|
+
if (!pPr) return null;
|
|
940
|
+
const shd = findChild(pPr, "w:shd");
|
|
941
|
+
if (!shd) return null;
|
|
942
|
+
const shading = {};
|
|
943
|
+
if (shd.attributes["w:fill"]) {
|
|
944
|
+
const fill = shd.attributes["w:fill"];
|
|
945
|
+
shading.fill = fill.startsWith("#") ? fill : `#${fill}`;
|
|
946
|
+
}
|
|
947
|
+
if (shd.attributes["w:val"]) shading.type = shd.attributes["w:val"];
|
|
948
|
+
return Object.keys(shading).length > 0 ? shading : null;
|
|
949
|
+
}
|
|
950
|
+
/**
|
|
857
951
|
* Parse styles.xml to build style map
|
|
858
952
|
* Extracts outlineLvl from paragraph styles to identify headings
|
|
859
953
|
* Extracts character format (color, bold, etc.) from style definitions
|
|
@@ -875,6 +969,14 @@ function parseStylesXml(files) {
|
|
|
875
969
|
if (pPr) {
|
|
876
970
|
const outlineLvl = findChild(pPr, "w:outlineLvl");
|
|
877
971
|
if (outlineLvl?.attributes["w:val"] !== void 0) styleInfo.outlineLvl = parseInt(outlineLvl.attributes["w:val"], 10);
|
|
972
|
+
const borders = parseBorders(pPr);
|
|
973
|
+
const shading = parseShading(pPr);
|
|
974
|
+
if (borders || shading) {
|
|
975
|
+
const paragraphFormat = {};
|
|
976
|
+
if (borders) Object.assign(paragraphFormat, borders);
|
|
977
|
+
if (shading) paragraphFormat.shading = shading;
|
|
978
|
+
if (Object.keys(paragraphFormat).length > 0) styleInfo.paragraphFormat = paragraphFormat;
|
|
979
|
+
}
|
|
878
980
|
}
|
|
879
981
|
const rPr = findChild(style, "w:rPr");
|
|
880
982
|
if (rPr) {
|
|
@@ -884,10 +986,26 @@ function parseStylesXml(files) {
|
|
|
884
986
|
const colorVal = color.attributes["w:val"];
|
|
885
987
|
charFormat.color = colorVal.startsWith("#") ? colorVal : `#${colorVal}`;
|
|
886
988
|
}
|
|
887
|
-
|
|
888
|
-
if (
|
|
889
|
-
|
|
890
|
-
|
|
989
|
+
const bold = findChild(rPr, "w:b");
|
|
990
|
+
if (bold) {
|
|
991
|
+
const val = bold.attributes["w:val"];
|
|
992
|
+
if (val !== "0" && val !== "false") charFormat.bold = true;
|
|
993
|
+
}
|
|
994
|
+
const italic = findChild(rPr, "w:i");
|
|
995
|
+
if (italic) {
|
|
996
|
+
const val = italic.attributes["w:val"];
|
|
997
|
+
if (val !== "0" && val !== "false") charFormat.italic = true;
|
|
998
|
+
}
|
|
999
|
+
const underline = findChild(rPr, "w:u");
|
|
1000
|
+
if (underline) {
|
|
1001
|
+
const val = underline.attributes["w:val"];
|
|
1002
|
+
if (val !== "none" && val !== "false" && val !== "0") charFormat.underline = true;
|
|
1003
|
+
}
|
|
1004
|
+
const strike = findChild(rPr, "w:strike");
|
|
1005
|
+
if (strike) {
|
|
1006
|
+
const val = strike.attributes["w:val"];
|
|
1007
|
+
if (val !== "0" && val !== "false") charFormat.strike = true;
|
|
1008
|
+
}
|
|
891
1009
|
const sz = findChild(rPr, "w:sz");
|
|
892
1010
|
if (sz?.attributes["w:val"]) {
|
|
893
1011
|
const sizeVal = sz.attributes["w:val"];
|
|
@@ -902,6 +1020,48 @@ function parseStylesXml(files) {
|
|
|
902
1020
|
}
|
|
903
1021
|
return styleMap;
|
|
904
1022
|
}
|
|
1023
|
+
/**
|
|
1024
|
+
* Extract all paragraph style attributes from a paragraph element
|
|
1025
|
+
* Merges direct paragraph properties with style-based properties
|
|
1026
|
+
*/
|
|
1027
|
+
function extractParagraphStyles(node, styleInfo) {
|
|
1028
|
+
const pPr = findChild(node, "w:pPr");
|
|
1029
|
+
if (!pPr) return null;
|
|
1030
|
+
const result = {};
|
|
1031
|
+
if (styleInfo?.paragraphFormat) {
|
|
1032
|
+
const pf = styleInfo.paragraphFormat;
|
|
1033
|
+
if (pf.shading) result.shading = pf.shading;
|
|
1034
|
+
if (pf.borderTop) result.borderTop = pf.borderTop;
|
|
1035
|
+
if (pf.borderBottom) result.borderBottom = pf.borderBottom;
|
|
1036
|
+
if (pf.borderLeft) result.borderLeft = pf.borderLeft;
|
|
1037
|
+
if (pf.borderRight) result.borderRight = pf.borderRight;
|
|
1038
|
+
}
|
|
1039
|
+
const ind = findChild(pPr, "w:ind");
|
|
1040
|
+
if (ind) {
|
|
1041
|
+
const left = parseTwipAttr(ind.attributes, "w:left");
|
|
1042
|
+
if (left) result.indentLeft = convertTwipToCssString(parseInt(left, 10));
|
|
1043
|
+
const right = parseTwipAttr(ind.attributes, "w:right");
|
|
1044
|
+
if (right) result.indentRight = convertTwipToCssString(parseInt(right, 10));
|
|
1045
|
+
const firstLine = parseTwipAttr(ind.attributes, "w:firstLine");
|
|
1046
|
+
if (firstLine) result.indentFirstLine = convertTwipToCssString(parseInt(firstLine, 10));
|
|
1047
|
+
else {
|
|
1048
|
+
const hanging = parseTwipAttr(ind.attributes, "w:hanging");
|
|
1049
|
+
if (hanging) result.indentFirstLine = convertTwipToCssString((left ? parseInt(left, 10) : 0) - parseInt(hanging, 10));
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
const spacing = findChild(pPr, "w:spacing");
|
|
1053
|
+
if (spacing) {
|
|
1054
|
+
const before = parseTwipAttr(spacing.attributes, "w:before");
|
|
1055
|
+
if (before) result.spacingBefore = convertTwipToCssString(parseInt(before, 10));
|
|
1056
|
+
const after = parseTwipAttr(spacing.attributes, "w:after");
|
|
1057
|
+
if (after) result.spacingAfter = convertTwipToCssString(parseInt(after, 10));
|
|
1058
|
+
}
|
|
1059
|
+
const shading = parseShading(pPr);
|
|
1060
|
+
if (shading) result.shading = shading;
|
|
1061
|
+
const borders = parseBorders(pPr);
|
|
1062
|
+
if (borders) Object.assign(result, borders);
|
|
1063
|
+
return Object.keys(result).length > 0 ? result : null;
|
|
1064
|
+
}
|
|
905
1065
|
//#endregion
|
|
906
1066
|
//#region src/converters/text.ts
|
|
907
1067
|
/**
|
|
@@ -992,13 +1152,27 @@ function extractMarks(run, styleInfo) {
|
|
|
992
1152
|
if (styleInfo?.charFormat) mergedFormat = { ...styleInfo.charFormat };
|
|
993
1153
|
if (rPr) {
|
|
994
1154
|
const boldEl = findChild(rPr, "w:b");
|
|
995
|
-
if (boldEl)
|
|
996
|
-
|
|
1155
|
+
if (boldEl) {
|
|
1156
|
+
const val = boldEl.attributes["w:val"];
|
|
1157
|
+
if (val === "0" || val === "false") mergedFormat.bold = false;
|
|
1158
|
+
else mergedFormat.bold = true;
|
|
1159
|
+
}
|
|
997
1160
|
const italicEl = findChild(rPr, "w:i");
|
|
998
|
-
if (italicEl)
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1161
|
+
if (italicEl) {
|
|
1162
|
+
const val = italicEl.attributes["w:val"];
|
|
1163
|
+
if (val === "0" || val === "false") mergedFormat.italic = false;
|
|
1164
|
+
else mergedFormat.italic = true;
|
|
1165
|
+
}
|
|
1166
|
+
const underlineEl = findChild(rPr, "w:u");
|
|
1167
|
+
if (underlineEl) {
|
|
1168
|
+
const val = underlineEl.attributes["w:val"];
|
|
1169
|
+
if (val !== "none" && val !== "false" && val !== "0") mergedFormat.underline = true;
|
|
1170
|
+
}
|
|
1171
|
+
const strikeEl = findChild(rPr, "w:strike");
|
|
1172
|
+
if (strikeEl) {
|
|
1173
|
+
const val = strikeEl.attributes["w:val"];
|
|
1174
|
+
if (val !== "0" && val !== "false") mergedFormat.strike = true;
|
|
1175
|
+
}
|
|
1002
1176
|
const colorEl = findChild(rPr, "w:color");
|
|
1003
1177
|
if (colorEl?.attributes["w:val"] && colorEl.attributes["w:val"] !== "auto") {
|
|
1004
1178
|
const colorVal = colorEl.attributes["w:val"];
|
|
@@ -1061,35 +1235,6 @@ function extractAlignment(paragraph) {
|
|
|
1061
1235
|
//#endregion
|
|
1062
1236
|
//#region src/converters/paragraph.ts
|
|
1063
1237
|
/**
|
|
1064
|
-
* Extract paragraph style attributes from DOCX paragraph properties
|
|
1065
|
-
*/
|
|
1066
|
-
function extractParagraphStyles(node) {
|
|
1067
|
-
const pPr = findChild(node, "w:pPr");
|
|
1068
|
-
if (!pPr) return null;
|
|
1069
|
-
const result = {};
|
|
1070
|
-
const ind = findChild(pPr, "w:ind");
|
|
1071
|
-
if (ind) {
|
|
1072
|
-
const left = parseTwipAttr(ind.attributes, "w:left");
|
|
1073
|
-
if (left) result.indentLeft = convertTwipToCssString(parseInt(left, 10));
|
|
1074
|
-
const right = parseTwipAttr(ind.attributes, "w:right");
|
|
1075
|
-
if (right) result.indentRight = convertTwipToCssString(parseInt(right, 10));
|
|
1076
|
-
const firstLine = parseTwipAttr(ind.attributes, "w:firstLine");
|
|
1077
|
-
if (firstLine) result.indentFirstLine = convertTwipToCssString(parseInt(firstLine, 10));
|
|
1078
|
-
else {
|
|
1079
|
-
const hanging = parseTwipAttr(ind.attributes, "w:hanging");
|
|
1080
|
-
if (hanging) result.indentFirstLine = convertTwipToCssString((left ? parseInt(left, 10) : 0) - parseInt(hanging, 10));
|
|
1081
|
-
}
|
|
1082
|
-
}
|
|
1083
|
-
const spacing = findChild(pPr, "w:spacing");
|
|
1084
|
-
if (spacing) {
|
|
1085
|
-
const before = parseTwipAttr(spacing.attributes, "w:before");
|
|
1086
|
-
if (before) result.spacingBefore = convertTwipToCssString(parseInt(before, 10));
|
|
1087
|
-
const after = parseTwipAttr(spacing.attributes, "w:after");
|
|
1088
|
-
if (after) result.spacingAfter = convertTwipToCssString(parseInt(after, 10));
|
|
1089
|
-
}
|
|
1090
|
-
return Object.keys(result).length ? result : null;
|
|
1091
|
-
}
|
|
1092
|
-
/**
|
|
1093
1238
|
* Convert DOCX paragraph node to TipTap paragraph
|
|
1094
1239
|
*/
|
|
1095
1240
|
async function convertParagraph(node, params) {
|
|
@@ -1109,7 +1254,7 @@ async function convertParagraph(node, params) {
|
|
|
1109
1254
|
});
|
|
1110
1255
|
const attrs = {
|
|
1111
1256
|
...extractAlignment(node),
|
|
1112
|
-
...extractParagraphStyles(node)
|
|
1257
|
+
...extractParagraphStyles(node, styleInfo)
|
|
1113
1258
|
};
|
|
1114
1259
|
if (checkForPageBreak(node)) {
|
|
1115
1260
|
const filteredRuns = runs.filter((run) => run.type !== "hardBreak");
|
|
@@ -1159,7 +1304,7 @@ async function convertHeading(node, params, styleInfo, level) {
|
|
|
1159
1304
|
type: "heading",
|
|
1160
1305
|
attrs: {
|
|
1161
1306
|
level,
|
|
1162
|
-
...extractParagraphStyles(node)
|
|
1307
|
+
...extractParagraphStyles(node, styleInfo)
|
|
1163
1308
|
},
|
|
1164
1309
|
content: await extractRuns(node, {
|
|
1165
1310
|
context: params.context,
|
|
@@ -1170,31 +1315,6 @@ async function convertHeading(node, params, styleInfo, level) {
|
|
|
1170
1315
|
//#endregion
|
|
1171
1316
|
//#region src/parsers/table.ts
|
|
1172
1317
|
/**
|
|
1173
|
-
* Parse a single border element
|
|
1174
|
-
*/
|
|
1175
|
-
function parseBorder(borderNode) {
|
|
1176
|
-
if (!borderNode) return null;
|
|
1177
|
-
const val = borderNode.attributes["w:val"];
|
|
1178
|
-
const size = borderNode.attributes["w:sz"];
|
|
1179
|
-
const color = borderNode.attributes["w:color"];
|
|
1180
|
-
const styleMap = {
|
|
1181
|
-
single: "solid",
|
|
1182
|
-
dashed: "dashed",
|
|
1183
|
-
dotted: "dotted",
|
|
1184
|
-
double: "double",
|
|
1185
|
-
none: "none",
|
|
1186
|
-
nil: "none"
|
|
1187
|
-
};
|
|
1188
|
-
const border = {};
|
|
1189
|
-
if (color && color !== "auto") border.color = `#${color}`;
|
|
1190
|
-
if (size) {
|
|
1191
|
-
const eighthPoints = parseInt(size);
|
|
1192
|
-
if (!isNaN(eighthPoints)) border.width = Math.round(eighthPoints / 6);
|
|
1193
|
-
}
|
|
1194
|
-
if (val && styleMap[val]) border.style = styleMap[val];
|
|
1195
|
-
return Object.keys(border).length > 0 ? border : null;
|
|
1196
|
-
}
|
|
1197
|
-
/**
|
|
1198
1318
|
* Get table properties (cell margins)
|
|
1199
1319
|
*/
|
|
1200
1320
|
function parseTableProperties(tableNode) {
|
|
@@ -1255,7 +1375,9 @@ function parseCellProperties(cellNode) {
|
|
|
1255
1375
|
if (!tcPr) return props;
|
|
1256
1376
|
const gridSpan = findChild(tcPr, "w:gridSpan");
|
|
1257
1377
|
if (gridSpan?.attributes["w:val"]) props.colspan = parseInt(gridSpan.attributes["w:val"]);
|
|
1258
|
-
|
|
1378
|
+
const vMerge = findChild(tcPr, "w:vMerge");
|
|
1379
|
+
if (vMerge) if (vMerge.attributes["w:val"] === "continue") props.rowspan = 0;
|
|
1380
|
+
else props.rowspan = 1;
|
|
1259
1381
|
const tcW = findChild(tcPr, "w:tcW");
|
|
1260
1382
|
if (tcW?.attributes["w:w"]) props.colwidth = [convertTwipToPixels(parseInt(tcW.attributes["w:w"]))];
|
|
1261
1383
|
const shd = findChild(tcPr, "w:shd");
|
|
@@ -1594,7 +1716,7 @@ async function convertElements(elements, params) {
|
|
|
1594
1716
|
for (let i = 0; i < elements.length; i++) {
|
|
1595
1717
|
if (processedIndices.has(i)) continue;
|
|
1596
1718
|
const element = elements[i];
|
|
1597
|
-
if (params.context.
|
|
1719
|
+
if (params.context.paragraph?.ignoreEmpty && element.name === "w:p" && isEmptyParagraph(element)) continue;
|
|
1598
1720
|
const node = await convertElement(element, elements, i, params, processedIndices);
|
|
1599
1721
|
if (Array.isArray(node)) result.push(...node);
|
|
1600
1722
|
else if (node) result.push(node);
|
|
@@ -1640,7 +1762,8 @@ async function convertList(startElement, siblings, startIndex, params, processed
|
|
|
1640
1762
|
const listInfo = getListInfo(startElement);
|
|
1641
1763
|
if (!listInfo) return await convertParagraph(startElement, params);
|
|
1642
1764
|
const listTypeInfo = params.context.listTypeMap.get(listInfo.numId);
|
|
1643
|
-
|
|
1765
|
+
if (!listTypeInfo) return await convertParagraph(startElement, params);
|
|
1766
|
+
const listType = listTypeInfo.type;
|
|
1644
1767
|
const items = [];
|
|
1645
1768
|
let i = startIndex;
|
|
1646
1769
|
while (i < siblings.length) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@docen/import-docx",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.14",
|
|
4
4
|
"description": "A powerful TipTap/ProseMirror extension that imports Microsoft Word DOCX files to editor content",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"converter",
|
|
@@ -51,10 +51,10 @@
|
|
|
51
51
|
"xast-util-from-xml": "4.0.0"
|
|
52
52
|
},
|
|
53
53
|
"devDependencies": {
|
|
54
|
-
"@tiptap/core": "3.20.
|
|
54
|
+
"@tiptap/core": "3.20.4",
|
|
55
55
|
"@types/xast": "2.0.4",
|
|
56
|
-
"@docen/extensions": "0.0.
|
|
57
|
-
"@docen/utils": "0.0.
|
|
56
|
+
"@docen/extensions": "0.0.14",
|
|
57
|
+
"@docen/utils": "0.0.14"
|
|
58
58
|
},
|
|
59
59
|
"peerDependencies": {
|
|
60
60
|
"@napi-rs/canvas": "^0.1.88"
|
|
@@ -65,7 +65,7 @@
|
|
|
65
65
|
}
|
|
66
66
|
},
|
|
67
67
|
"optionalDependencies": {
|
|
68
|
-
"@napi-rs/canvas": "^0.1.
|
|
68
|
+
"@napi-rs/canvas": "^0.1.97"
|
|
69
69
|
},
|
|
70
70
|
"scripts": {
|
|
71
71
|
"dev": "basis build --stub",
|