docxmlater 7.2.0 → 7.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +606 -606
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +3 -0
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentGenerator.js +202 -202
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +6 -2
- package/dist/core/DocumentParser.js.map +1 -1
- package/dist/elements/CommentManager.js +2 -2
- package/dist/elements/ImageManager.d.ts +1 -1
- package/dist/elements/ImageManager.d.ts.map +1 -1
- package/dist/elements/ImageManager.js +2 -2
- package/dist/elements/ImageManager.js.map +1 -1
- package/dist/formatting/NumberingLevel.d.ts.map +1 -1
- package/dist/formatting/NumberingLevel.js +0 -6
- package/dist/formatting/NumberingLevel.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,606 +1,606 @@
|
|
|
1
|
-
# docXMLater
|
|
2
|
-
|
|
3
|
-
A comprehensive, production-ready TypeScript/JavaScript framework for creating, reading, and manipulating Microsoft Word (.docx) documents programmatically.
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
### Core Document Operations
|
|
8
|
-
|
|
9
|
-
- Create DOCX files from scratch
|
|
10
|
-
- Read and modify existing DOCX files
|
|
11
|
-
- Buffer-based operations (load/save from memory)
|
|
12
|
-
- Document properties (core, extended, custom)
|
|
13
|
-
- Memory management with dispose pattern
|
|
14
|
-
|
|
15
|
-
### Text & Paragraph Formatting
|
|
16
|
-
|
|
17
|
-
- Character formatting: bold, italic, underline, strikethrough, subscript, superscript
|
|
18
|
-
- Font properties: family, size, color (RGB and theme colors), highlight
|
|
19
|
-
- Text effects: small caps, all caps, shadow, emboss, engrave
|
|
20
|
-
- Paragraph alignment, indentation, spacing, borders, shading
|
|
21
|
-
- Text search and replace with regex support
|
|
22
|
-
- Custom styles (paragraph, character, table)
|
|
23
|
-
|
|
24
|
-
### Lists & Tables
|
|
25
|
-
|
|
26
|
-
- Numbered lists (decimal, roman, alpha)
|
|
27
|
-
- Bulleted lists with various bullet styles
|
|
28
|
-
- Multi-level lists with custom numbering
|
|
29
|
-
- Tables with formatting, borders, shading
|
|
30
|
-
- Cell spanning (merge cells horizontally and vertically)
|
|
31
|
-
- Advanced table properties (margins, widths, alignment)
|
|
32
|
-
|
|
33
|
-
### Rich Content
|
|
34
|
-
|
|
35
|
-
- Images (PNG, JPEG, GIF, SVG) with positioning and text wrapping
|
|
36
|
-
- Headers & footers (different first page, odd/even pages)
|
|
37
|
-
- Hyperlinks (external URLs, internal bookmarks)
|
|
38
|
-
- Hyperlink defragmentation utility (fixes fragmented links from Google Docs)
|
|
39
|
-
- Bookmarks and cross-references
|
|
40
|
-
- Shapes and text boxes
|
|
41
|
-
|
|
42
|
-
### Advanced Features
|
|
43
|
-
|
|
44
|
-
- Track changes (revisions for insertions, deletions, formatting)
|
|
45
|
-
- Comments and annotations
|
|
46
|
-
- Table of contents generation with customizable heading levels
|
|
47
|
-
- Fields: merge fields, date/time, page numbers, TOC fields
|
|
48
|
-
- Footnotes and endnotes
|
|
49
|
-
- Content controls (Structured Document Tags)
|
|
50
|
-
- Multiple sections with different page layouts
|
|
51
|
-
- Page orientation, size, and margins
|
|
52
|
-
|
|
53
|
-
### Developer Tools
|
|
54
|
-
|
|
55
|
-
- Complete XML generation and parsing (ReDoS-safe, position-based parser)
|
|
56
|
-
- 40+ unit conversion functions (twips, EMUs, points, pixels, inches, cm)
|
|
57
|
-
- Validation utilities and corruption detection
|
|
58
|
-
- Full TypeScript support with comprehensive type definitions
|
|
59
|
-
- Error handling utilities
|
|
60
|
-
- Logging infrastructure with multiple log levels
|
|
61
|
-
|
|
62
|
-
## Installation
|
|
63
|
-
|
|
64
|
-
```bash
|
|
65
|
-
npm install docxmlater
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
## Quick Start
|
|
69
|
-
|
|
70
|
-
### Creating a New Document
|
|
71
|
-
|
|
72
|
-
```typescript
|
|
73
|
-
import { Document } from "docxmlater";
|
|
74
|
-
|
|
75
|
-
// Create a new document
|
|
76
|
-
const doc = Document.create();
|
|
77
|
-
|
|
78
|
-
// Add a paragraph
|
|
79
|
-
const para = doc.createParagraph();
|
|
80
|
-
para.addText("Hello, World!", { bold: true, fontSize: 24 });
|
|
81
|
-
|
|
82
|
-
// Save to file
|
|
83
|
-
await doc.save("hello.docx");
|
|
84
|
-
|
|
85
|
-
// Don't forget to dispose
|
|
86
|
-
doc.dispose();
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
### Loading and Modifying Documents
|
|
90
|
-
|
|
91
|
-
```typescript
|
|
92
|
-
import { Document } from "docxmlater";
|
|
93
|
-
|
|
94
|
-
// Load existing document
|
|
95
|
-
const doc = await Document.load("input.docx");
|
|
96
|
-
|
|
97
|
-
// Find and replace text
|
|
98
|
-
doc.replaceText(/old text/g, "new text");
|
|
99
|
-
|
|
100
|
-
// Add a new paragraph
|
|
101
|
-
const para = doc.createParagraph();
|
|
102
|
-
para.addText("Added paragraph", { italic: true });
|
|
103
|
-
|
|
104
|
-
// Save modifications
|
|
105
|
-
await doc.save("output.docx");
|
|
106
|
-
doc.dispose();
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
### Working with Tables
|
|
110
|
-
|
|
111
|
-
```typescript
|
|
112
|
-
import { Document } from "docxmlater";
|
|
113
|
-
|
|
114
|
-
const doc = Document.create();
|
|
115
|
-
|
|
116
|
-
// Create a 3x4 table
|
|
117
|
-
const table = doc.createTable(3, 4);
|
|
118
|
-
|
|
119
|
-
// Set header row
|
|
120
|
-
const headerRow = table.getRow(0);
|
|
121
|
-
headerRow.getCell(0).addParagraph().addText("Column 1", { bold: true });
|
|
122
|
-
headerRow.getCell(1).addParagraph().addText("Column 2", { bold: true });
|
|
123
|
-
headerRow.getCell(2).addParagraph().addText("Column 3", { bold: true });
|
|
124
|
-
headerRow.getCell(3).addParagraph().addText("Column 4", { bold: true });
|
|
125
|
-
|
|
126
|
-
// Add data
|
|
127
|
-
table.getRow(1).getCell(0).addParagraph().addText("Data 1");
|
|
128
|
-
table.getRow(1).getCell(1).addParagraph().addText("Data 2");
|
|
129
|
-
|
|
130
|
-
// Apply borders
|
|
131
|
-
table.setBorders({
|
|
132
|
-
top: { style: "single", size: 4, color: "000000" },
|
|
133
|
-
bottom: { style: "single", size: 4, color: "000000" },
|
|
134
|
-
left: { style: "single", size: 4, color: "000000" },
|
|
135
|
-
right: { style: "single", size: 4, color: "000000" },
|
|
136
|
-
insideH: { style: "single", size: 4, color: "000000" },
|
|
137
|
-
insideV: { style: "single", size: 4, color: "000000" },
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
await doc.save("table.docx");
|
|
141
|
-
doc.dispose();
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
### Adding Images
|
|
145
|
-
|
|
146
|
-
```typescript
|
|
147
|
-
import { Document } from "docxmlater";
|
|
148
|
-
import { readFileSync } from "fs";
|
|
149
|
-
|
|
150
|
-
const doc = Document.create();
|
|
151
|
-
|
|
152
|
-
// Load image from file
|
|
153
|
-
const imageBuffer = readFileSync("photo.jpg");
|
|
154
|
-
|
|
155
|
-
// Add image to document
|
|
156
|
-
const para = doc.createParagraph();
|
|
157
|
-
await para.addImage(imageBuffer, {
|
|
158
|
-
width: 400,
|
|
159
|
-
height: 300,
|
|
160
|
-
format: "jpg",
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
await doc.save("with-image.docx");
|
|
164
|
-
doc.dispose();
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
### Hyperlink Management
|
|
168
|
-
|
|
169
|
-
```typescript
|
|
170
|
-
import { Document } from "docxmlater";
|
|
171
|
-
|
|
172
|
-
const doc = await Document.load("document.docx");
|
|
173
|
-
|
|
174
|
-
// Get all hyperlinks
|
|
175
|
-
const hyperlinks = doc.getHyperlinks();
|
|
176
|
-
console.log(`Found ${hyperlinks.length} hyperlinks`);
|
|
177
|
-
|
|
178
|
-
// Update URLs in batch (30-50% faster than manual iteration)
|
|
179
|
-
doc.updateHyperlinkUrls("http://old-domain.com", "https://new-domain.com");
|
|
180
|
-
|
|
181
|
-
// Fix fragmented hyperlinks from Google Docs
|
|
182
|
-
const mergedCount = doc.defragmentHyperlinks({
|
|
183
|
-
resetFormatting: true, // Fix corrupted fonts
|
|
184
|
-
});
|
|
185
|
-
console.log(`Merged ${mergedCount} fragmented hyperlinks`);
|
|
186
|
-
|
|
187
|
-
await doc.save("updated.docx");
|
|
188
|
-
doc.dispose();
|
|
189
|
-
```
|
|
190
|
-
|
|
191
|
-
### Custom Styles
|
|
192
|
-
|
|
193
|
-
```typescript
|
|
194
|
-
import { Document, Style } from "docxmlater";
|
|
195
|
-
|
|
196
|
-
const doc = Document.create();
|
|
197
|
-
|
|
198
|
-
// Create custom paragraph style
|
|
199
|
-
const customStyle = new Style("CustomHeading", "paragraph");
|
|
200
|
-
customStyle.setName("Custom Heading");
|
|
201
|
-
customStyle.setRunFormatting({
|
|
202
|
-
bold: true,
|
|
203
|
-
fontSize: 32,
|
|
204
|
-
color: "0070C0",
|
|
205
|
-
});
|
|
206
|
-
customStyle.setParagraphFormatting({
|
|
207
|
-
alignment: "center",
|
|
208
|
-
spacingAfter: 240,
|
|
209
|
-
});
|
|
210
|
-
|
|
211
|
-
// Add style to document
|
|
212
|
-
doc.getStylesManager().addStyle(customStyle);
|
|
213
|
-
|
|
214
|
-
// Apply style to paragraph
|
|
215
|
-
const para = doc.createParagraph();
|
|
216
|
-
para.addText("Styled Heading");
|
|
217
|
-
para.applyStyle("CustomHeading");
|
|
218
|
-
|
|
219
|
-
await doc.save("styled.docx");
|
|
220
|
-
doc.dispose();
|
|
221
|
-
```
|
|
222
|
-
|
|
223
|
-
## API Overview
|
|
224
|
-
|
|
225
|
-
### Document Class
|
|
226
|
-
|
|
227
|
-
**Creation & Loading:**
|
|
228
|
-
|
|
229
|
-
- `Document.create(options?)` - Create new document
|
|
230
|
-
- `Document.load(filepath, options?)` - Load from file
|
|
231
|
-
- `Document.loadFromBuffer(buffer, options?)` - Load from memory
|
|
232
|
-
|
|
233
|
-
**Handling Tracked Changes:**
|
|
234
|
-
|
|
235
|
-
By default, docXMLater accepts all tracked changes during document loading to prevent corruption:
|
|
236
|
-
|
|
237
|
-
```typescript
|
|
238
|
-
// Default: Accepts all changes (recommended)
|
|
239
|
-
const doc = await Document.load('document.docx');
|
|
240
|
-
|
|
241
|
-
// Explicit control
|
|
242
|
-
const doc = await Document.load('document.docx', {
|
|
243
|
-
revisionHandling: 'accept' // Accept all changes (default)
|
|
244
|
-
// OR
|
|
245
|
-
revisionHandling: 'strip' // Remove all revision markup
|
|
246
|
-
// OR
|
|
247
|
-
revisionHandling: 'preserve' // Keep tracked changes (may cause corruption)
|
|
248
|
-
});
|
|
249
|
-
```
|
|
250
|
-
|
|
251
|
-
**Revision Handling Options:**
|
|
252
|
-
- `'accept'` (default): Removes revision markup, keeps inserted content, removes deleted content
|
|
253
|
-
- `'strip'`: Removes all revision markup completely
|
|
254
|
-
- `'preserve'`: Keeps tracked changes as-is (may cause Word "unreadable content" errors)
|
|
255
|
-
|
|
256
|
-
**Why Accept By Default?**
|
|
257
|
-
|
|
258
|
-
Documents with tracked changes can cause Word corruption errors during round-trip processing due to revision ID conflicts. Accepting changes automatically prevents this issue while preserving document content.
|
|
259
|
-
|
|
260
|
-
**Content Management:**
|
|
261
|
-
|
|
262
|
-
- `createParagraph()` - Add paragraph
|
|
263
|
-
- `createTable(rows, cols)` - Add table
|
|
264
|
-
- `createSection()` - Add section
|
|
265
|
-
- `getBodyElements()` - Get all body content
|
|
266
|
-
|
|
267
|
-
**Search & Replace:**
|
|
268
|
-
|
|
269
|
-
- `findText(pattern)` - Find text matches
|
|
270
|
-
- `replaceText(pattern, replacement)` - Replace text
|
|
271
|
-
|
|
272
|
-
**Hyperlinks:**
|
|
273
|
-
|
|
274
|
-
- `getHyperlinks()` - Get all hyperlinks
|
|
275
|
-
- `updateHyperlinkUrls(oldUrl, newUrl)` - Batch URL update
|
|
276
|
-
- `defragmentHyperlinks(options?)` - Fix fragmented links
|
|
277
|
-
|
|
278
|
-
**Statistics:**
|
|
279
|
-
|
|
280
|
-
- `getWordCount()` - Count words
|
|
281
|
-
- `getCharacterCount(includeSpaces?)` - Count characters
|
|
282
|
-
- `estimateSize()` - Estimate file size
|
|
283
|
-
|
|
284
|
-
**Saving:**
|
|
285
|
-
|
|
286
|
-
- `save(filepath)` - Save to file
|
|
287
|
-
- `toBuffer()` - Save to Buffer
|
|
288
|
-
- `dispose()` - Free resources (important!)
|
|
289
|
-
|
|
290
|
-
### Paragraph Class
|
|
291
|
-
|
|
292
|
-
**Content:**
|
|
293
|
-
|
|
294
|
-
- `addText(text, formatting?)` - Add text run
|
|
295
|
-
- `addRun(run)` - Add custom run
|
|
296
|
-
- `addHyperlink(hyperlink)` - Add hyperlink
|
|
297
|
-
- `addImage(buffer, options)` - Add image
|
|
298
|
-
|
|
299
|
-
**Formatting:**
|
|
300
|
-
|
|
301
|
-
- `setAlignment(alignment)` - Left, center, right, justify
|
|
302
|
-
- `setIndentation(options)` - First line, hanging, left, right
|
|
303
|
-
- `setSpacing(options)` - Line spacing, before/after
|
|
304
|
-
- `setBorders(borders)` - Paragraph borders
|
|
305
|
-
- `setShading(shading)` - Background color
|
|
306
|
-
- `applyStyle(styleId)` - Apply paragraph style
|
|
307
|
-
|
|
308
|
-
**Properties:**
|
|
309
|
-
|
|
310
|
-
- `setKeepNext(value)` - Keep with next paragraph
|
|
311
|
-
- `setKeepLines(value)` - Keep lines together
|
|
312
|
-
- `setPageBreakBefore(value)` - Page break before
|
|
313
|
-
|
|
314
|
-
**Numbering:**
|
|
315
|
-
|
|
316
|
-
- `setNumbering(numId, level)` - Apply list numbering
|
|
317
|
-
|
|
318
|
-
### Run Class
|
|
319
|
-
|
|
320
|
-
**Text:**
|
|
321
|
-
|
|
322
|
-
- `setText(text)` - Set run text
|
|
323
|
-
- `getText()` - Get run text
|
|
324
|
-
|
|
325
|
-
**Character Formatting:**
|
|
326
|
-
|
|
327
|
-
- `setBold(value)` - Bold text
|
|
328
|
-
- `setItalic(value)` - Italic text
|
|
329
|
-
- `setUnderline(style?)` - Underline
|
|
330
|
-
- `setStrikethrough(value)` - Strikethrough
|
|
331
|
-
- `setFont(name)` - Font family
|
|
332
|
-
- `setFontSize(size)` - Font size in points
|
|
333
|
-
- `setColor(color)` - Text color (hex)
|
|
334
|
-
- `setHighlight(color)` - Highlight color
|
|
335
|
-
|
|
336
|
-
**Advanced:**
|
|
337
|
-
|
|
338
|
-
- `setSubscript(value)` - Subscript
|
|
339
|
-
- `setSuperscript(value)` - Superscript
|
|
340
|
-
- `setSmallCaps(value)` - Small capitals
|
|
341
|
-
- `setAllCaps(value)` - All capitals
|
|
342
|
-
|
|
343
|
-
### Table Class
|
|
344
|
-
|
|
345
|
-
**Structure:**
|
|
346
|
-
|
|
347
|
-
- `addRow()` - Add row
|
|
348
|
-
- `getRow(index)` - Get row by index
|
|
349
|
-
- `getCell(row, col)` - Get specific cell
|
|
350
|
-
|
|
351
|
-
**Formatting:**
|
|
352
|
-
|
|
353
|
-
- `setBorders(borders)` - Table borders
|
|
354
|
-
- `setAlignment(alignment)` - Table alignment
|
|
355
|
-
- `setWidth(width)` - Table width
|
|
356
|
-
- `setLayout(layout)` - Fixed or auto layout
|
|
357
|
-
|
|
358
|
-
**Style:**
|
|
359
|
-
|
|
360
|
-
- `applyStyle(styleId)` - Apply table style
|
|
361
|
-
|
|
362
|
-
### TableCell Class
|
|
363
|
-
|
|
364
|
-
**Content:**
|
|
365
|
-
|
|
366
|
-
- `addParagraph()` - Add paragraph to cell
|
|
367
|
-
- `getParagraphs()` - Get all paragraphs
|
|
368
|
-
|
|
369
|
-
**Formatting:**
|
|
370
|
-
|
|
371
|
-
- `setBorders(borders)` - Cell borders
|
|
372
|
-
- `setShading(color)` - Cell background
|
|
373
|
-
- `setVerticalAlignment(alignment)` - Top, center, bottom
|
|
374
|
-
- `setWidth(width)` - Cell width
|
|
375
|
-
|
|
376
|
-
**Spanning:**
|
|
377
|
-
|
|
378
|
-
- `setHorizontalMerge(mergeType)` - Horizontal merge
|
|
379
|
-
- `setVerticalMerge(mergeType)` - Vertical merge
|
|
380
|
-
|
|
381
|
-
### Utilities
|
|
382
|
-
|
|
383
|
-
**Unit Conversions:**
|
|
384
|
-
|
|
385
|
-
```typescript
|
|
386
|
-
import { twipsToPoints, inchesToTwips, emusToPixels } from "docxmlater";
|
|
387
|
-
|
|
388
|
-
const points = twipsToPoints(240); // 240 twips = 12 points
|
|
389
|
-
const twips = inchesToTwips(1); // 1 inch = 1440 twips
|
|
390
|
-
const pixels = emusToPixels(914400, 96); // 914400 EMUs = 96 pixels at 96 DPI
|
|
391
|
-
```
|
|
392
|
-
|
|
393
|
-
**Validation:**
|
|
394
|
-
|
|
395
|
-
```typescript
|
|
396
|
-
import { validateRunText, detectXmlInText, cleanXmlFromText } from "docxmlater";
|
|
397
|
-
|
|
398
|
-
// Detect XML patterns in text
|
|
399
|
-
const result = validateRunText("Some <w:t>text</w:t>");
|
|
400
|
-
if (result.hasXml) {
|
|
401
|
-
console.warn(result.message);
|
|
402
|
-
const cleaned = cleanXmlFromText(result.text);
|
|
403
|
-
}
|
|
404
|
-
```
|
|
405
|
-
|
|
406
|
-
**Corruption Detection:**
|
|
407
|
-
|
|
408
|
-
```typescript
|
|
409
|
-
import { detectCorruptionInDocument } from "docxmlater";
|
|
410
|
-
|
|
411
|
-
const doc = await Document.load("suspect.docx");
|
|
412
|
-
const report = detectCorruptionInDocument(doc);
|
|
413
|
-
|
|
414
|
-
if (report.isCorrupted) {
|
|
415
|
-
console.log(`Found ${report.locations.length} corruption issues`);
|
|
416
|
-
report.locations.forEach((loc) => {
|
|
417
|
-
console.log(`Line ${loc.lineNumber}: ${loc.issue}`);
|
|
418
|
-
console.log(`Suggested fix: ${loc.suggestedFix}`);
|
|
419
|
-
});
|
|
420
|
-
}
|
|
421
|
-
```
|
|
422
|
-
|
|
423
|
-
## TypeScript Support
|
|
424
|
-
|
|
425
|
-
Full TypeScript definitions included:
|
|
426
|
-
|
|
427
|
-
```typescript
|
|
428
|
-
import {
|
|
429
|
-
Document,
|
|
430
|
-
Paragraph,
|
|
431
|
-
Run,
|
|
432
|
-
Table,
|
|
433
|
-
RunFormatting,
|
|
434
|
-
ParagraphFormatting,
|
|
435
|
-
DocumentProperties,
|
|
436
|
-
} from "docxmlater";
|
|
437
|
-
|
|
438
|
-
// Type-safe formatting
|
|
439
|
-
const formatting: RunFormatting = {
|
|
440
|
-
bold: true,
|
|
441
|
-
fontSize: 12,
|
|
442
|
-
color: "FF0000",
|
|
443
|
-
};
|
|
444
|
-
|
|
445
|
-
// Type-safe document properties
|
|
446
|
-
const properties: DocumentProperties = {
|
|
447
|
-
title: "My Document",
|
|
448
|
-
author: "John Doe",
|
|
449
|
-
created: new Date(),
|
|
450
|
-
};
|
|
451
|
-
```
|
|
452
|
-
|
|
453
|
-
## Version History
|
|
454
|
-
|
|
455
|
-
**Current Version: 5.0.0**
|
|
456
|
-
|
|
457
|
-
See [CHANGELOG.md](CHANGELOG.md) for detailed version history.
|
|
458
|
-
|
|
459
|
-
## RAG-CLI Integration (Development Only)
|
|
460
|
-
|
|
461
|
-
This project includes MCP (Model Context Protocol) configuration to allow Claude Code to access docXMLater documentation from Documentation_Hub during development.
|
|
462
|
-
|
|
463
|
-
**Note:** RAG-CLI uses `python-docx` for DOCX indexing, not docXMLater. These are complementary tools:
|
|
464
|
-
|
|
465
|
-
- **RAG-CLI**: Index DOCX files for search/retrieval (read-only)
|
|
466
|
-
- **docXMLater**: Create, modify, format DOCX files (read-write)
|
|
467
|
-
|
|
468
|
-
The `.mcp.json` configuration is for development assistance only and does not represent a runtime integration between the two projects.
|
|
469
|
-
|
|
470
|
-
## Testing
|
|
471
|
-
|
|
472
|
-
The framework includes comprehensive test coverage:
|
|
473
|
-
|
|
474
|
-
- **2073+ test cases** across 59 test files
|
|
475
|
-
- Tests cover all phases of implementation
|
|
476
|
-
- Integration tests for complex scenarios
|
|
477
|
-
- Performance benchmarks
|
|
478
|
-
- Edge case validation
|
|
479
|
-
|
|
480
|
-
Run tests:
|
|
481
|
-
|
|
482
|
-
```bash
|
|
483
|
-
npm test # Run all tests
|
|
484
|
-
npm run test:watch # Watch mode
|
|
485
|
-
npm run test:coverage # Coverage report
|
|
486
|
-
```
|
|
487
|
-
|
|
488
|
-
## Performance Considerations
|
|
489
|
-
|
|
490
|
-
- Use `dispose()` to free resources after document operations
|
|
491
|
-
- Buffer-based operations are faster than file I/O
|
|
492
|
-
- Batch hyperlink updates are 30-50% faster than manual iteration
|
|
493
|
-
- Large documents (1000+ pages) supported with memory management
|
|
494
|
-
- Streaming support for very large files
|
|
495
|
-
|
|
496
|
-
## Architecture
|
|
497
|
-
|
|
498
|
-
The framework follows a modular architecture:
|
|
499
|
-
|
|
500
|
-
```
|
|
501
|
-
src/
|
|
502
|
-
├── core/ # Document, Parser, Generator, Validator
|
|
503
|
-
├── elements/ # Paragraph, Run, Table, Image, etc.
|
|
504
|
-
├── formatting/ # Style, Numbering managers
|
|
505
|
-
├── managers/ # Drawing, Image, Relationship managers
|
|
506
|
-
├── xml/ # XML generation and parsing
|
|
507
|
-
├── zip/ # ZIP archive handling
|
|
508
|
-
└── utils/ # Validation, units, error handling
|
|
509
|
-
```
|
|
510
|
-
|
|
511
|
-
Key design principles:
|
|
512
|
-
|
|
513
|
-
- KISS (Keep It Simple, Stupid) - no over-engineering
|
|
514
|
-
- Position-based XML parsing (ReDoS-safe)
|
|
515
|
-
- Defensive programming with comprehensive validation
|
|
516
|
-
- Memory-efficient with explicit disposal pattern
|
|
517
|
-
- Full ECMA-376 (OpenXML) compliance
|
|
518
|
-
|
|
519
|
-
## Security
|
|
520
|
-
|
|
521
|
-
docXMLater includes multiple security measures to protect against common attack vectors:
|
|
522
|
-
|
|
523
|
-
### ReDoS Prevention
|
|
524
|
-
|
|
525
|
-
The XML parser uses position-based parsing instead of regular expressions, preventing catastrophic backtracking attacks that can cause denial of service.
|
|
526
|
-
|
|
527
|
-
### Input Validation
|
|
528
|
-
|
|
529
|
-
**Size Limits:**
|
|
530
|
-
- Default document size limit: 150 MB (configurable)
|
|
531
|
-
- Warning threshold: 50 MB
|
|
532
|
-
- XML content size validation before parsing
|
|
533
|
-
|
|
534
|
-
```typescript
|
|
535
|
-
// Configure size limits
|
|
536
|
-
const doc = await Document.load("large.docx", {
|
|
537
|
-
sizeLimits: {
|
|
538
|
-
warningSizeMB: 100,
|
|
539
|
-
maxSizeMB: 500,
|
|
540
|
-
},
|
|
541
|
-
});
|
|
542
|
-
```
|
|
543
|
-
|
|
544
|
-
**Nesting Depth:**
|
|
545
|
-
- Maximum XML nesting depth: 256 (configurable)
|
|
546
|
-
- Prevents stack overflow attacks
|
|
547
|
-
|
|
548
|
-
```typescript
|
|
549
|
-
import { XMLParser } from "docxmlater";
|
|
550
|
-
|
|
551
|
-
// Parse with custom depth limit
|
|
552
|
-
const obj = XMLParser.parseToObject(xml, {
|
|
553
|
-
maxNestingDepth: 512, // Increase if needed
|
|
554
|
-
});
|
|
555
|
-
```
|
|
556
|
-
|
|
557
|
-
### Path Traversal Prevention
|
|
558
|
-
|
|
559
|
-
File paths within DOCX archives are validated to prevent directory traversal attacks:
|
|
560
|
-
- Blocks `../` path sequences
|
|
561
|
-
- Blocks absolute paths
|
|
562
|
-
- Validates URL-encoded path components
|
|
563
|
-
|
|
564
|
-
### XML Injection Prevention
|
|
565
|
-
|
|
566
|
-
All text content is properly escaped using:
|
|
567
|
-
- `XMLBuilder.escapeXmlText()` for element content
|
|
568
|
-
- `XMLBuilder.escapeXmlAttribute()` for attribute values
|
|
569
|
-
|
|
570
|
-
This prevents injection of malicious XML elements through user-provided text content.
|
|
571
|
-
|
|
572
|
-
### UTF-8 Encoding
|
|
573
|
-
|
|
574
|
-
All text files are explicitly UTF-8 encoded per ECMA-376 specification, preventing encoding-related vulnerabilities.
|
|
575
|
-
|
|
576
|
-
## Requirements
|
|
577
|
-
|
|
578
|
-
- Node.js 18.0.0 or higher
|
|
579
|
-
- TypeScript 5.0+ (for development)
|
|
580
|
-
|
|
581
|
-
## Dependencies
|
|
582
|
-
|
|
583
|
-
- `jszip` - ZIP archive handling
|
|
584
|
-
|
|
585
|
-
## License
|
|
586
|
-
|
|
587
|
-
MIT
|
|
588
|
-
|
|
589
|
-
## Contributing
|
|
590
|
-
|
|
591
|
-
Contributions welcome! Please:
|
|
592
|
-
|
|
593
|
-
1. Fork the repository
|
|
594
|
-
2. Create a feature branch
|
|
595
|
-
3. Add tests for new features
|
|
596
|
-
4. Ensure all tests pass
|
|
597
|
-
5. Submit a pull request
|
|
598
|
-
|
|
599
|
-
## Support
|
|
600
|
-
|
|
601
|
-
- GitHub Issues: https://github.com/ItMeDiaTech/docXMLater/issues
|
|
602
|
-
- Documentation: See CLAUDE.md for detailed implementation notes
|
|
603
|
-
|
|
604
|
-
## Acknowledgments
|
|
605
|
-
|
|
606
|
-
Built with careful attention to the ECMA-376 Office Open XML specification. Special thanks to the OpenXML community for comprehensive documentation and examples.
|
|
1
|
+
# docXMLater
|
|
2
|
+
|
|
3
|
+
A comprehensive, production-ready TypeScript/JavaScript framework for creating, reading, and manipulating Microsoft Word (.docx) documents programmatically.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
### Core Document Operations
|
|
8
|
+
|
|
9
|
+
- Create DOCX files from scratch
|
|
10
|
+
- Read and modify existing DOCX files
|
|
11
|
+
- Buffer-based operations (load/save from memory)
|
|
12
|
+
- Document properties (core, extended, custom)
|
|
13
|
+
- Memory management with dispose pattern
|
|
14
|
+
|
|
15
|
+
### Text & Paragraph Formatting
|
|
16
|
+
|
|
17
|
+
- Character formatting: bold, italic, underline, strikethrough, subscript, superscript
|
|
18
|
+
- Font properties: family, size, color (RGB and theme colors), highlight
|
|
19
|
+
- Text effects: small caps, all caps, shadow, emboss, engrave
|
|
20
|
+
- Paragraph alignment, indentation, spacing, borders, shading
|
|
21
|
+
- Text search and replace with regex support
|
|
22
|
+
- Custom styles (paragraph, character, table)
|
|
23
|
+
|
|
24
|
+
### Lists & Tables
|
|
25
|
+
|
|
26
|
+
- Numbered lists (decimal, roman, alpha)
|
|
27
|
+
- Bulleted lists with various bullet styles
|
|
28
|
+
- Multi-level lists with custom numbering
|
|
29
|
+
- Tables with formatting, borders, shading
|
|
30
|
+
- Cell spanning (merge cells horizontally and vertically)
|
|
31
|
+
- Advanced table properties (margins, widths, alignment)
|
|
32
|
+
|
|
33
|
+
### Rich Content
|
|
34
|
+
|
|
35
|
+
- Images (PNG, JPEG, GIF, SVG) with positioning and text wrapping
|
|
36
|
+
- Headers & footers (different first page, odd/even pages)
|
|
37
|
+
- Hyperlinks (external URLs, internal bookmarks)
|
|
38
|
+
- Hyperlink defragmentation utility (fixes fragmented links from Google Docs)
|
|
39
|
+
- Bookmarks and cross-references
|
|
40
|
+
- Shapes and text boxes
|
|
41
|
+
|
|
42
|
+
### Advanced Features
|
|
43
|
+
|
|
44
|
+
- Track changes (revisions for insertions, deletions, formatting)
|
|
45
|
+
- Comments and annotations
|
|
46
|
+
- Table of contents generation with customizable heading levels
|
|
47
|
+
- Fields: merge fields, date/time, page numbers, TOC fields
|
|
48
|
+
- Footnotes and endnotes
|
|
49
|
+
- Content controls (Structured Document Tags)
|
|
50
|
+
- Multiple sections with different page layouts
|
|
51
|
+
- Page orientation, size, and margins
|
|
52
|
+
|
|
53
|
+
### Developer Tools
|
|
54
|
+
|
|
55
|
+
- Complete XML generation and parsing (ReDoS-safe, position-based parser)
|
|
56
|
+
- 40+ unit conversion functions (twips, EMUs, points, pixels, inches, cm)
|
|
57
|
+
- Validation utilities and corruption detection
|
|
58
|
+
- Full TypeScript support with comprehensive type definitions
|
|
59
|
+
- Error handling utilities
|
|
60
|
+
- Logging infrastructure with multiple log levels
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
npm install docxmlater
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Quick Start
|
|
69
|
+
|
|
70
|
+
### Creating a New Document
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
import { Document } from "docxmlater";
|
|
74
|
+
|
|
75
|
+
// Create a new document
|
|
76
|
+
const doc = Document.create();
|
|
77
|
+
|
|
78
|
+
// Add a paragraph
|
|
79
|
+
const para = doc.createParagraph();
|
|
80
|
+
para.addText("Hello, World!", { bold: true, fontSize: 24 });
|
|
81
|
+
|
|
82
|
+
// Save to file
|
|
83
|
+
await doc.save("hello.docx");
|
|
84
|
+
|
|
85
|
+
// Don't forget to dispose
|
|
86
|
+
doc.dispose();
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Loading and Modifying Documents
|
|
90
|
+
|
|
91
|
+
```typescript
|
|
92
|
+
import { Document } from "docxmlater";
|
|
93
|
+
|
|
94
|
+
// Load existing document
|
|
95
|
+
const doc = await Document.load("input.docx");
|
|
96
|
+
|
|
97
|
+
// Find and replace text
|
|
98
|
+
doc.replaceText(/old text/g, "new text");
|
|
99
|
+
|
|
100
|
+
// Add a new paragraph
|
|
101
|
+
const para = doc.createParagraph();
|
|
102
|
+
para.addText("Added paragraph", { italic: true });
|
|
103
|
+
|
|
104
|
+
// Save modifications
|
|
105
|
+
await doc.save("output.docx");
|
|
106
|
+
doc.dispose();
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Working with Tables
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
import { Document } from "docxmlater";
|
|
113
|
+
|
|
114
|
+
const doc = Document.create();
|
|
115
|
+
|
|
116
|
+
// Create a 3x4 table
|
|
117
|
+
const table = doc.createTable(3, 4);
|
|
118
|
+
|
|
119
|
+
// Set header row
|
|
120
|
+
const headerRow = table.getRow(0);
|
|
121
|
+
headerRow.getCell(0).addParagraph().addText("Column 1", { bold: true });
|
|
122
|
+
headerRow.getCell(1).addParagraph().addText("Column 2", { bold: true });
|
|
123
|
+
headerRow.getCell(2).addParagraph().addText("Column 3", { bold: true });
|
|
124
|
+
headerRow.getCell(3).addParagraph().addText("Column 4", { bold: true });
|
|
125
|
+
|
|
126
|
+
// Add data
|
|
127
|
+
table.getRow(1).getCell(0).addParagraph().addText("Data 1");
|
|
128
|
+
table.getRow(1).getCell(1).addParagraph().addText("Data 2");
|
|
129
|
+
|
|
130
|
+
// Apply borders
|
|
131
|
+
table.setBorders({
|
|
132
|
+
top: { style: "single", size: 4, color: "000000" },
|
|
133
|
+
bottom: { style: "single", size: 4, color: "000000" },
|
|
134
|
+
left: { style: "single", size: 4, color: "000000" },
|
|
135
|
+
right: { style: "single", size: 4, color: "000000" },
|
|
136
|
+
insideH: { style: "single", size: 4, color: "000000" },
|
|
137
|
+
insideV: { style: "single", size: 4, color: "000000" },
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
await doc.save("table.docx");
|
|
141
|
+
doc.dispose();
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Adding Images
|
|
145
|
+
|
|
146
|
+
```typescript
|
|
147
|
+
import { Document } from "docxmlater";
|
|
148
|
+
import { readFileSync } from "fs";
|
|
149
|
+
|
|
150
|
+
const doc = Document.create();
|
|
151
|
+
|
|
152
|
+
// Load image from file
|
|
153
|
+
const imageBuffer = readFileSync("photo.jpg");
|
|
154
|
+
|
|
155
|
+
// Add image to document
|
|
156
|
+
const para = doc.createParagraph();
|
|
157
|
+
await para.addImage(imageBuffer, {
|
|
158
|
+
width: 400,
|
|
159
|
+
height: 300,
|
|
160
|
+
format: "jpg",
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
await doc.save("with-image.docx");
|
|
164
|
+
doc.dispose();
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Hyperlink Management
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
import { Document } from "docxmlater";
|
|
171
|
+
|
|
172
|
+
const doc = await Document.load("document.docx");
|
|
173
|
+
|
|
174
|
+
// Get all hyperlinks
|
|
175
|
+
const hyperlinks = doc.getHyperlinks();
|
|
176
|
+
console.log(`Found ${hyperlinks.length} hyperlinks`);
|
|
177
|
+
|
|
178
|
+
// Update URLs in batch (30-50% faster than manual iteration)
|
|
179
|
+
doc.updateHyperlinkUrls("http://old-domain.com", "https://new-domain.com");
|
|
180
|
+
|
|
181
|
+
// Fix fragmented hyperlinks from Google Docs
|
|
182
|
+
const mergedCount = doc.defragmentHyperlinks({
|
|
183
|
+
resetFormatting: true, // Fix corrupted fonts
|
|
184
|
+
});
|
|
185
|
+
console.log(`Merged ${mergedCount} fragmented hyperlinks`);
|
|
186
|
+
|
|
187
|
+
await doc.save("updated.docx");
|
|
188
|
+
doc.dispose();
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Custom Styles
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
import { Document, Style } from "docxmlater";
|
|
195
|
+
|
|
196
|
+
const doc = Document.create();
|
|
197
|
+
|
|
198
|
+
// Create custom paragraph style
|
|
199
|
+
const customStyle = new Style("CustomHeading", "paragraph");
|
|
200
|
+
customStyle.setName("Custom Heading");
|
|
201
|
+
customStyle.setRunFormatting({
|
|
202
|
+
bold: true,
|
|
203
|
+
fontSize: 32,
|
|
204
|
+
color: "0070C0",
|
|
205
|
+
});
|
|
206
|
+
customStyle.setParagraphFormatting({
|
|
207
|
+
alignment: "center",
|
|
208
|
+
spacingAfter: 240,
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
// Add style to document
|
|
212
|
+
doc.getStylesManager().addStyle(customStyle);
|
|
213
|
+
|
|
214
|
+
// Apply style to paragraph
|
|
215
|
+
const para = doc.createParagraph();
|
|
216
|
+
para.addText("Styled Heading");
|
|
217
|
+
para.applyStyle("CustomHeading");
|
|
218
|
+
|
|
219
|
+
await doc.save("styled.docx");
|
|
220
|
+
doc.dispose();
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## API Overview
|
|
224
|
+
|
|
225
|
+
### Document Class
|
|
226
|
+
|
|
227
|
+
**Creation & Loading:**
|
|
228
|
+
|
|
229
|
+
- `Document.create(options?)` - Create new document
|
|
230
|
+
- `Document.load(filepath, options?)` - Load from file
|
|
231
|
+
- `Document.loadFromBuffer(buffer, options?)` - Load from memory
|
|
232
|
+
|
|
233
|
+
**Handling Tracked Changes:**
|
|
234
|
+
|
|
235
|
+
By default, docXMLater accepts all tracked changes during document loading to prevent corruption:
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
// Default: Accepts all changes (recommended)
|
|
239
|
+
const doc = await Document.load('document.docx');
|
|
240
|
+
|
|
241
|
+
// Explicit control
|
|
242
|
+
const doc = await Document.load('document.docx', {
|
|
243
|
+
revisionHandling: 'accept' // Accept all changes (default)
|
|
244
|
+
// OR
|
|
245
|
+
revisionHandling: 'strip' // Remove all revision markup
|
|
246
|
+
// OR
|
|
247
|
+
revisionHandling: 'preserve' // Keep tracked changes (may cause corruption)
|
|
248
|
+
});
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
**Revision Handling Options:**
|
|
252
|
+
- `'accept'` (default): Removes revision markup, keeps inserted content, removes deleted content
|
|
253
|
+
- `'strip'`: Removes all revision markup completely
|
|
254
|
+
- `'preserve'`: Keeps tracked changes as-is (may cause Word "unreadable content" errors)
|
|
255
|
+
|
|
256
|
+
**Why Accept By Default?**
|
|
257
|
+
|
|
258
|
+
Documents with tracked changes can cause Word corruption errors during round-trip processing due to revision ID conflicts. Accepting changes automatically prevents this issue while preserving document content.
|
|
259
|
+
|
|
260
|
+
**Content Management:**
|
|
261
|
+
|
|
262
|
+
- `createParagraph()` - Add paragraph
|
|
263
|
+
- `createTable(rows, cols)` - Add table
|
|
264
|
+
- `createSection()` - Add section
|
|
265
|
+
- `getBodyElements()` - Get all body content
|
|
266
|
+
|
|
267
|
+
**Search & Replace:**
|
|
268
|
+
|
|
269
|
+
- `findText(pattern)` - Find text matches
|
|
270
|
+
- `replaceText(pattern, replacement)` - Replace text
|
|
271
|
+
|
|
272
|
+
**Hyperlinks:**
|
|
273
|
+
|
|
274
|
+
- `getHyperlinks()` - Get all hyperlinks
|
|
275
|
+
- `updateHyperlinkUrls(oldUrl, newUrl)` - Batch URL update
|
|
276
|
+
- `defragmentHyperlinks(options?)` - Fix fragmented links
|
|
277
|
+
|
|
278
|
+
**Statistics:**
|
|
279
|
+
|
|
280
|
+
- `getWordCount()` - Count words
|
|
281
|
+
- `getCharacterCount(includeSpaces?)` - Count characters
|
|
282
|
+
- `estimateSize()` - Estimate file size
|
|
283
|
+
|
|
284
|
+
**Saving:**
|
|
285
|
+
|
|
286
|
+
- `save(filepath)` - Save to file
|
|
287
|
+
- `toBuffer()` - Save to Buffer
|
|
288
|
+
- `dispose()` - Free resources (important!)
|
|
289
|
+
|
|
290
|
+
### Paragraph Class
|
|
291
|
+
|
|
292
|
+
**Content:**
|
|
293
|
+
|
|
294
|
+
- `addText(text, formatting?)` - Add text run
|
|
295
|
+
- `addRun(run)` - Add custom run
|
|
296
|
+
- `addHyperlink(hyperlink)` - Add hyperlink
|
|
297
|
+
- `addImage(buffer, options)` - Add image
|
|
298
|
+
|
|
299
|
+
**Formatting:**
|
|
300
|
+
|
|
301
|
+
- `setAlignment(alignment)` - Left, center, right, justify
|
|
302
|
+
- `setIndentation(options)` - First line, hanging, left, right
|
|
303
|
+
- `setSpacing(options)` - Line spacing, before/after
|
|
304
|
+
- `setBorders(borders)` - Paragraph borders
|
|
305
|
+
- `setShading(shading)` - Background color
|
|
306
|
+
- `applyStyle(styleId)` - Apply paragraph style
|
|
307
|
+
|
|
308
|
+
**Properties:**
|
|
309
|
+
|
|
310
|
+
- `setKeepNext(value)` - Keep with next paragraph
|
|
311
|
+
- `setKeepLines(value)` - Keep lines together
|
|
312
|
+
- `setPageBreakBefore(value)` - Page break before
|
|
313
|
+
|
|
314
|
+
**Numbering:**
|
|
315
|
+
|
|
316
|
+
- `setNumbering(numId, level)` - Apply list numbering
|
|
317
|
+
|
|
318
|
+
### Run Class
|
|
319
|
+
|
|
320
|
+
**Text:**
|
|
321
|
+
|
|
322
|
+
- `setText(text)` - Set run text
|
|
323
|
+
- `getText()` - Get run text
|
|
324
|
+
|
|
325
|
+
**Character Formatting:**
|
|
326
|
+
|
|
327
|
+
- `setBold(value)` - Bold text
|
|
328
|
+
- `setItalic(value)` - Italic text
|
|
329
|
+
- `setUnderline(style?)` - Underline
|
|
330
|
+
- `setStrikethrough(value)` - Strikethrough
|
|
331
|
+
- `setFont(name)` - Font family
|
|
332
|
+
- `setFontSize(size)` - Font size in points
|
|
333
|
+
- `setColor(color)` - Text color (hex)
|
|
334
|
+
- `setHighlight(color)` - Highlight color
|
|
335
|
+
|
|
336
|
+
**Advanced:**
|
|
337
|
+
|
|
338
|
+
- `setSubscript(value)` - Subscript
|
|
339
|
+
- `setSuperscript(value)` - Superscript
|
|
340
|
+
- `setSmallCaps(value)` - Small capitals
|
|
341
|
+
- `setAllCaps(value)` - All capitals
|
|
342
|
+
|
|
343
|
+
### Table Class
|
|
344
|
+
|
|
345
|
+
**Structure:**
|
|
346
|
+
|
|
347
|
+
- `addRow()` - Add row
|
|
348
|
+
- `getRow(index)` - Get row by index
|
|
349
|
+
- `getCell(row, col)` - Get specific cell
|
|
350
|
+
|
|
351
|
+
**Formatting:**
|
|
352
|
+
|
|
353
|
+
- `setBorders(borders)` - Table borders
|
|
354
|
+
- `setAlignment(alignment)` - Table alignment
|
|
355
|
+
- `setWidth(width)` - Table width
|
|
356
|
+
- `setLayout(layout)` - Fixed or auto layout
|
|
357
|
+
|
|
358
|
+
**Style:**
|
|
359
|
+
|
|
360
|
+
- `applyStyle(styleId)` - Apply table style
|
|
361
|
+
|
|
362
|
+
### TableCell Class
|
|
363
|
+
|
|
364
|
+
**Content:**
|
|
365
|
+
|
|
366
|
+
- `addParagraph()` - Add paragraph to cell
|
|
367
|
+
- `getParagraphs()` - Get all paragraphs
|
|
368
|
+
|
|
369
|
+
**Formatting:**
|
|
370
|
+
|
|
371
|
+
- `setBorders(borders)` - Cell borders
|
|
372
|
+
- `setShading(color)` - Cell background
|
|
373
|
+
- `setVerticalAlignment(alignment)` - Top, center, bottom
|
|
374
|
+
- `setWidth(width)` - Cell width
|
|
375
|
+
|
|
376
|
+
**Spanning:**
|
|
377
|
+
|
|
378
|
+
- `setHorizontalMerge(mergeType)` - Horizontal merge
|
|
379
|
+
- `setVerticalMerge(mergeType)` - Vertical merge
|
|
380
|
+
|
|
381
|
+
### Utilities
|
|
382
|
+
|
|
383
|
+
**Unit Conversions:**
|
|
384
|
+
|
|
385
|
+
```typescript
|
|
386
|
+
import { twipsToPoints, inchesToTwips, emusToPixels } from "docxmlater";
|
|
387
|
+
|
|
388
|
+
const points = twipsToPoints(240); // 240 twips = 12 points
|
|
389
|
+
const twips = inchesToTwips(1); // 1 inch = 1440 twips
|
|
390
|
+
const pixels = emusToPixels(914400, 96); // 914400 EMUs = 96 pixels at 96 DPI
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
**Validation:**
|
|
394
|
+
|
|
395
|
+
```typescript
|
|
396
|
+
import { validateRunText, detectXmlInText, cleanXmlFromText } from "docxmlater";
|
|
397
|
+
|
|
398
|
+
// Detect XML patterns in text
|
|
399
|
+
const result = validateRunText("Some <w:t>text</w:t>");
|
|
400
|
+
if (result.hasXml) {
|
|
401
|
+
console.warn(result.message);
|
|
402
|
+
const cleaned = cleanXmlFromText(result.text);
|
|
403
|
+
}
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
**Corruption Detection:**
|
|
407
|
+
|
|
408
|
+
```typescript
|
|
409
|
+
import { detectCorruptionInDocument } from "docxmlater";
|
|
410
|
+
|
|
411
|
+
const doc = await Document.load("suspect.docx");
|
|
412
|
+
const report = detectCorruptionInDocument(doc);
|
|
413
|
+
|
|
414
|
+
if (report.isCorrupted) {
|
|
415
|
+
console.log(`Found ${report.locations.length} corruption issues`);
|
|
416
|
+
report.locations.forEach((loc) => {
|
|
417
|
+
console.log(`Line ${loc.lineNumber}: ${loc.issue}`);
|
|
418
|
+
console.log(`Suggested fix: ${loc.suggestedFix}`);
|
|
419
|
+
});
|
|
420
|
+
}
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
## TypeScript Support
|
|
424
|
+
|
|
425
|
+
Full TypeScript definitions included:
|
|
426
|
+
|
|
427
|
+
```typescript
|
|
428
|
+
import {
|
|
429
|
+
Document,
|
|
430
|
+
Paragraph,
|
|
431
|
+
Run,
|
|
432
|
+
Table,
|
|
433
|
+
RunFormatting,
|
|
434
|
+
ParagraphFormatting,
|
|
435
|
+
DocumentProperties,
|
|
436
|
+
} from "docxmlater";
|
|
437
|
+
|
|
438
|
+
// Type-safe formatting
|
|
439
|
+
const formatting: RunFormatting = {
|
|
440
|
+
bold: true,
|
|
441
|
+
fontSize: 12,
|
|
442
|
+
color: "FF0000",
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
// Type-safe document properties
|
|
446
|
+
const properties: DocumentProperties = {
|
|
447
|
+
title: "My Document",
|
|
448
|
+
author: "John Doe",
|
|
449
|
+
created: new Date(),
|
|
450
|
+
};
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
## Version History
|
|
454
|
+
|
|
455
|
+
**Current Version: 5.0.0**
|
|
456
|
+
|
|
457
|
+
See [CHANGELOG.md](CHANGELOG.md) for detailed version history.
|
|
458
|
+
|
|
459
|
+
## RAG-CLI Integration (Development Only)
|
|
460
|
+
|
|
461
|
+
This project includes MCP (Model Context Protocol) configuration to allow Claude Code to access docXMLater documentation from Documentation_Hub during development.
|
|
462
|
+
|
|
463
|
+
**Note:** RAG-CLI uses `python-docx` for DOCX indexing, not docXMLater. These are complementary tools:
|
|
464
|
+
|
|
465
|
+
- **RAG-CLI**: Index DOCX files for search/retrieval (read-only)
|
|
466
|
+
- **docXMLater**: Create, modify, format DOCX files (read-write)
|
|
467
|
+
|
|
468
|
+
The `.mcp.json` configuration is for development assistance only and does not represent a runtime integration between the two projects.
|
|
469
|
+
|
|
470
|
+
## Testing
|
|
471
|
+
|
|
472
|
+
The framework includes comprehensive test coverage:
|
|
473
|
+
|
|
474
|
+
- **2073+ test cases** across 59 test files
|
|
475
|
+
- Tests cover all phases of implementation
|
|
476
|
+
- Integration tests for complex scenarios
|
|
477
|
+
- Performance benchmarks
|
|
478
|
+
- Edge case validation
|
|
479
|
+
|
|
480
|
+
Run tests:
|
|
481
|
+
|
|
482
|
+
```bash
|
|
483
|
+
npm test # Run all tests
|
|
484
|
+
npm run test:watch # Watch mode
|
|
485
|
+
npm run test:coverage # Coverage report
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
## Performance Considerations
|
|
489
|
+
|
|
490
|
+
- Use `dispose()` to free resources after document operations
|
|
491
|
+
- Buffer-based operations are faster than file I/O
|
|
492
|
+
- Batch hyperlink updates are 30-50% faster than manual iteration
|
|
493
|
+
- Large documents (1000+ pages) supported with memory management
|
|
494
|
+
- Streaming support for very large files
|
|
495
|
+
|
|
496
|
+
## Architecture
|
|
497
|
+
|
|
498
|
+
The framework follows a modular architecture:
|
|
499
|
+
|
|
500
|
+
```
|
|
501
|
+
src/
|
|
502
|
+
├── core/ # Document, Parser, Generator, Validator
|
|
503
|
+
├── elements/ # Paragraph, Run, Table, Image, etc.
|
|
504
|
+
├── formatting/ # Style, Numbering managers
|
|
505
|
+
├── managers/ # Drawing, Image, Relationship managers
|
|
506
|
+
├── xml/ # XML generation and parsing
|
|
507
|
+
├── zip/ # ZIP archive handling
|
|
508
|
+
└── utils/ # Validation, units, error handling
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
Key design principles:
|
|
512
|
+
|
|
513
|
+
- KISS (Keep It Simple, Stupid) - no over-engineering
|
|
514
|
+
- Position-based XML parsing (ReDoS-safe)
|
|
515
|
+
- Defensive programming with comprehensive validation
|
|
516
|
+
- Memory-efficient with explicit disposal pattern
|
|
517
|
+
- Full ECMA-376 (OpenXML) compliance
|
|
518
|
+
|
|
519
|
+
## Security
|
|
520
|
+
|
|
521
|
+
docXMLater includes multiple security measures to protect against common attack vectors:
|
|
522
|
+
|
|
523
|
+
### ReDoS Prevention
|
|
524
|
+
|
|
525
|
+
The XML parser uses position-based parsing instead of regular expressions, preventing catastrophic backtracking attacks that can cause denial of service.
|
|
526
|
+
|
|
527
|
+
### Input Validation
|
|
528
|
+
|
|
529
|
+
**Size Limits:**
|
|
530
|
+
- Default document size limit: 150 MB (configurable)
|
|
531
|
+
- Warning threshold: 50 MB
|
|
532
|
+
- XML content size validation before parsing
|
|
533
|
+
|
|
534
|
+
```typescript
|
|
535
|
+
// Configure size limits
|
|
536
|
+
const doc = await Document.load("large.docx", {
|
|
537
|
+
sizeLimits: {
|
|
538
|
+
warningSizeMB: 100,
|
|
539
|
+
maxSizeMB: 500,
|
|
540
|
+
},
|
|
541
|
+
});
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
**Nesting Depth:**
|
|
545
|
+
- Maximum XML nesting depth: 256 (configurable)
|
|
546
|
+
- Prevents stack overflow attacks
|
|
547
|
+
|
|
548
|
+
```typescript
|
|
549
|
+
import { XMLParser } from "docxmlater";
|
|
550
|
+
|
|
551
|
+
// Parse with custom depth limit
|
|
552
|
+
const obj = XMLParser.parseToObject(xml, {
|
|
553
|
+
maxNestingDepth: 512, // Increase if needed
|
|
554
|
+
});
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
### Path Traversal Prevention
|
|
558
|
+
|
|
559
|
+
File paths within DOCX archives are validated to prevent directory traversal attacks:
|
|
560
|
+
- Blocks `../` path sequences
|
|
561
|
+
- Blocks absolute paths
|
|
562
|
+
- Validates URL-encoded path components
|
|
563
|
+
|
|
564
|
+
### XML Injection Prevention
|
|
565
|
+
|
|
566
|
+
All text content is properly escaped using:
|
|
567
|
+
- `XMLBuilder.escapeXmlText()` for element content
|
|
568
|
+
- `XMLBuilder.escapeXmlAttribute()` for attribute values
|
|
569
|
+
|
|
570
|
+
This prevents injection of malicious XML elements through user-provided text content.
|
|
571
|
+
|
|
572
|
+
### UTF-8 Encoding
|
|
573
|
+
|
|
574
|
+
All text files are explicitly UTF-8 encoded per ECMA-376 specification, preventing encoding-related vulnerabilities.
|
|
575
|
+
|
|
576
|
+
## Requirements
|
|
577
|
+
|
|
578
|
+
- Node.js 18.0.0 or higher
|
|
579
|
+
- TypeScript 5.0+ (for development)
|
|
580
|
+
|
|
581
|
+
## Dependencies
|
|
582
|
+
|
|
583
|
+
- `jszip` - ZIP archive handling
|
|
584
|
+
|
|
585
|
+
## License
|
|
586
|
+
|
|
587
|
+
MIT
|
|
588
|
+
|
|
589
|
+
## Contributing
|
|
590
|
+
|
|
591
|
+
Contributions welcome! Please:
|
|
592
|
+
|
|
593
|
+
1. Fork the repository
|
|
594
|
+
2. Create a feature branch
|
|
595
|
+
3. Add tests for new features
|
|
596
|
+
4. Ensure all tests pass
|
|
597
|
+
5. Submit a pull request
|
|
598
|
+
|
|
599
|
+
## Support
|
|
600
|
+
|
|
601
|
+
- GitHub Issues: https://github.com/ItMeDiaTech/docXMLater/issues
|
|
602
|
+
- Documentation: See CLAUDE.md for detailed implementation notes
|
|
603
|
+
|
|
604
|
+
## Acknowledgments
|
|
605
|
+
|
|
606
|
+
Built with careful attention to the ECMA-376 Office Open XML specification. Special thanks to the OpenXML community for comprehensive documentation and examples.
|