@otomate/docx 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/reader.d.ts.map +1 -1
- package/dist/reader.js +77 -20
- package/dist/reader.js.map +1 -1
- package/dist/writer.d.ts +19 -0
- package/dist/writer.d.ts.map +1 -1
- package/dist/writer.js +590 -72
- package/dist/writer.js.map +1 -1
- package/dist/zip.d.ts.map +1 -1
- package/dist/zip.js +25 -2
- package/dist/zip.js.map +1 -1
- package/package.json +2 -2
package/dist/writer.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
2
|
// docx Writer — UDM tree → .docx buffer
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
|
+
import { createHash } from "node:crypto";
|
|
4
5
|
import { isParent, isText } from "@otomate/core";
|
|
5
6
|
import { buildStyleElement, cssToRunProps, cssToParaProps, parseLengthTwips, parseColor } from "@otomate/css-docx";
|
|
6
7
|
import { packDocx } from "./zip.js";
|
|
@@ -14,16 +15,28 @@ export async function writeDocx(tree, options) {
|
|
|
14
15
|
const cssData = tree.data?.css;
|
|
15
16
|
const mergedClassCss = { ...cssData?.classRules, ...options?.cssClasses };
|
|
16
17
|
const elementCss = cssData?.elementRules ?? {};
|
|
18
|
+
const docxDataPre = tree.data?.docx;
|
|
19
|
+
const existingRels = docxDataPre?.relationships;
|
|
17
20
|
const ctx = {
|
|
18
21
|
cssClasses: mergedClassCss,
|
|
19
22
|
cssElements: elementCss,
|
|
20
23
|
customStyles: [],
|
|
21
24
|
generatedStyleIds: new Set(),
|
|
22
25
|
nextNumId: 3,
|
|
26
|
+
allocatedNums: new Map(),
|
|
23
27
|
hyperlinks: new Map(),
|
|
24
|
-
|
|
28
|
+
// Seed past any existing rIds to avoid collisions on round-trip.
|
|
29
|
+
nextRId: nextRIdFor(existingRels),
|
|
25
30
|
};
|
|
26
|
-
const
|
|
31
|
+
const renderedBlocks = tree.children.map(child => convertBlock(child, ctx, 0));
|
|
32
|
+
// OOXML allows w:tbl as the last body child, but Word always inserts a trailing
|
|
33
|
+
// empty paragraph after a final table. Match that behavior so re-opening in Word
|
|
34
|
+
// produces no diff.
|
|
35
|
+
const lastBlock = tree.children[tree.children.length - 1];
|
|
36
|
+
if (lastBlock?.type === "table") {
|
|
37
|
+
renderedBlocks.push("<w:p/>");
|
|
38
|
+
}
|
|
39
|
+
const bodyXml = renderedBlocks.join("\n");
|
|
27
40
|
const docxData = tree.data?.docx;
|
|
28
41
|
// Build styles.xml with any generated custom styles
|
|
29
42
|
let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
|
|
@@ -46,17 +59,29 @@ ${bodyXml}
|
|
|
46
59
|
// Inject before closing </Relationships>
|
|
47
60
|
relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
|
|
48
61
|
}
|
|
62
|
+
// Inject any newly-allocated <w:num> entries into numbering.xml so each
|
|
63
|
+
// top-level list has its own num definition (independent counters & styles).
|
|
64
|
+
let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
|
|
65
|
+
if (ctx.allocatedNums.size > 0) {
|
|
66
|
+
const newNums = [...ctx.allocatedNums]
|
|
67
|
+
.map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
|
|
68
|
+
.join("\n ");
|
|
69
|
+
numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
|
|
70
|
+
}
|
|
49
71
|
const parts = {
|
|
50
72
|
document: docXml,
|
|
51
73
|
styles: stylesXml,
|
|
52
|
-
numbering:
|
|
74
|
+
numbering: numberingXml,
|
|
53
75
|
relationships: relsXml,
|
|
54
76
|
contentTypes: docxData?.contentTypes ?? undefined,
|
|
55
77
|
media: new Map(),
|
|
56
78
|
rawParts: new Map(),
|
|
57
79
|
};
|
|
58
|
-
// Embed UDM tree as custom part for lossless round-trip with otomate
|
|
80
|
+
// Embed UDM tree as custom part for lossless round-trip with otomate.
|
|
81
|
+
// The hash binds the snapshot to this exact document.xml so the reader
|
|
82
|
+
// can detect external edits (Word saves) and fall back to OOXML parsing.
|
|
59
83
|
const udmSnapshot = stripDataForSnapshot(tree);
|
|
84
|
+
udmSnapshot.__docHash = hashString(docXml);
|
|
60
85
|
parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
|
|
61
86
|
// Embed CSS rules if present
|
|
62
87
|
if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
|
|
@@ -76,6 +101,37 @@ ${bodyXml}
|
|
|
76
101
|
}
|
|
77
102
|
return packDocx(parts);
|
|
78
103
|
}
|
|
104
|
+
/**
|
|
105
|
+
* Compute the smallest unused rId for a hyperlink, given the round-tripped
|
|
106
|
+
* relationships file. Falls back to 100 (well above the default rId1/rId2)
|
|
107
|
+
* when there is no source rels.
|
|
108
|
+
*/
|
|
109
|
+
function nextRIdFor(existingRels) {
|
|
110
|
+
if (!existingRels)
|
|
111
|
+
return 100;
|
|
112
|
+
let max = 0;
|
|
113
|
+
const re = /Id="rId(\d+)"/g;
|
|
114
|
+
let m;
|
|
115
|
+
while ((m = re.exec(existingRels)) !== null) {
|
|
116
|
+
const n = Number(m[1]);
|
|
117
|
+
if (n > max)
|
|
118
|
+
max = n;
|
|
119
|
+
}
|
|
120
|
+
return Math.max(max + 1, 100);
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Allocate a fresh w:numId for a top-level list, recording it so the writer
|
|
124
|
+
* can inject a matching <w:num> entry into numbering.xml.
|
|
125
|
+
*
|
|
126
|
+
* Each top-level list gets its own numId so that ordered lists restart at 1
|
|
127
|
+
* instead of continuing the previous list's numbering, and bullet styles
|
|
128
|
+
* applied to one list don't bleed into all others.
|
|
129
|
+
*/
|
|
130
|
+
function allocNumId(ctx, ordered) {
|
|
131
|
+
const numId = String(ctx.nextNumId++);
|
|
132
|
+
ctx.allocatedNums.set(numId, ordered ? 1 : 0);
|
|
133
|
+
return numId;
|
|
134
|
+
}
|
|
79
135
|
function allocHyperlinkRId(ctx, url) {
|
|
80
136
|
// Reuse existing rId for same URL
|
|
81
137
|
for (const [rId, existingUrl] of ctx.hyperlinks) {
|
|
@@ -86,6 +142,36 @@ function allocHyperlinkRId(ctx, url) {
|
|
|
86
142
|
ctx.hyperlinks.set(rId, url);
|
|
87
143
|
return rId;
|
|
88
144
|
}
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
// Diff helpers
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
/** Build a node → pathKey map for the given tree (used by writeDiffDocx). */
|
|
149
|
+
function buildNodePathMap(tree) {
|
|
150
|
+
const map = new Map();
|
|
151
|
+
function recurse(node, path) {
|
|
152
|
+
map.set(node, path.join(","));
|
|
153
|
+
if (isParent(node)) {
|
|
154
|
+
node.children.forEach((c, i) => recurse(c, [...path, i]));
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
recurse(tree, []);
|
|
158
|
+
return map;
|
|
159
|
+
}
|
|
160
|
+
/** Index a DiffResult into three O(1) lookup maps. MoveOp and UpdateOp are ignored for v1. */
|
|
161
|
+
function buildDiffCtx(diffResult, author, date) {
|
|
162
|
+
const insertedPaths = new Map();
|
|
163
|
+
const deletedPaths = new Map();
|
|
164
|
+
const textChangePaths = new Map();
|
|
165
|
+
for (const op of diffResult.operations) {
|
|
166
|
+
if (op.type === "insert")
|
|
167
|
+
insertedPaths.set(op.path.join(","), op);
|
|
168
|
+
else if (op.type === "delete")
|
|
169
|
+
deletedPaths.set(op.path.join(","), op);
|
|
170
|
+
else if (op.type === "updateText")
|
|
171
|
+
textChangePaths.set(op.path.join(","), op);
|
|
172
|
+
}
|
|
173
|
+
return { insertedPaths, deletedPaths, textChangePaths, revId: { value: 1 }, author, date };
|
|
174
|
+
}
|
|
89
175
|
function convertBlock(node, ctx, listIlvl) {
|
|
90
176
|
switch (node.type) {
|
|
91
177
|
case "paragraph": return convertParagraph(node, ctx);
|
|
@@ -99,7 +185,7 @@ function convertBlock(node, ctx, listIlvl) {
|
|
|
99
185
|
case "div":
|
|
100
186
|
case "figure":
|
|
101
187
|
return convertDiv(node, ctx, listIlvl);
|
|
102
|
-
case "html": return `<w:p><w:r><w:t>${esc(node.value)}</w:t></w:r></w:p>`;
|
|
188
|
+
case "html": return `<w:p><w:r><w:t xml:space="preserve">${esc(node.value)}</w:t></w:r></w:p>`;
|
|
103
189
|
default: return "";
|
|
104
190
|
}
|
|
105
191
|
}
|
|
@@ -124,12 +210,16 @@ function convertDiv(node, ctx, listIlvl) {
|
|
|
124
210
|
const runProps = cssToRunProps(containerCss);
|
|
125
211
|
const containerRPr = buildRunPropsXml(runProps);
|
|
126
212
|
const bg = containerCss["background-color"] ? parseColorSafe(containerCss["background-color"]) : undefined;
|
|
127
|
-
// Build extra pPr elements to inject into child paragraphs
|
|
128
|
-
|
|
213
|
+
// Build extra pPr elements to inject into child paragraphs.
|
|
214
|
+
// Order doesn't matter here — injectPPr re-sorts via serializePPr.
|
|
215
|
+
const extraPPrParts = [];
|
|
216
|
+
const pBdrXml = buildPBdrXml(paraProps.pBdr);
|
|
217
|
+
if (pBdrXml)
|
|
218
|
+
extraPPrParts.push(pBdrXml);
|
|
129
219
|
if (bg && bg !== "auto")
|
|
130
|
-
|
|
220
|
+
extraPPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
|
|
131
221
|
if (paraProps.jc)
|
|
132
|
-
|
|
222
|
+
extraPPrParts.push(`<w:jc w:val="${paraProps.jc}"/>`);
|
|
133
223
|
if (paraProps.ind) {
|
|
134
224
|
const attrs = [];
|
|
135
225
|
if (paraProps.ind.left)
|
|
@@ -137,14 +227,14 @@ function convertDiv(node, ctx, listIlvl) {
|
|
|
137
227
|
if (paraProps.ind.right)
|
|
138
228
|
attrs.push(`w:right="${paraProps.ind.right}"`);
|
|
139
229
|
if (attrs.length)
|
|
140
|
-
|
|
230
|
+
extraPPrParts.push(`<w:ind ${attrs.join(" ")}/>`);
|
|
141
231
|
}
|
|
142
232
|
// Create a temporary modified context that includes the container's styles
|
|
143
233
|
// so child paragraphs inherit them
|
|
144
234
|
const childCtx = {
|
|
145
235
|
...ctx,
|
|
146
236
|
_containerRPr: containerRPr,
|
|
147
|
-
|
|
237
|
+
_containerPPrParts: extraPPrParts,
|
|
148
238
|
};
|
|
149
239
|
return node.children.map(c => convertBlockWithContainer(c, childCtx, listIlvl)).join("\n");
|
|
150
240
|
}
|
|
@@ -152,17 +242,16 @@ function convertDiv(node, ctx, listIlvl) {
|
|
|
152
242
|
function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
153
243
|
const containerCtx = ctx;
|
|
154
244
|
const extraRPr = containerCtx._containerRPr ?? "";
|
|
155
|
-
const
|
|
245
|
+
const extraPPrParts = containerCtx._containerPPrParts ?? [];
|
|
156
246
|
// Helper: resolve all CSS run props for this node (element + own classes)
|
|
157
247
|
const nodeRPr = resolveBlockCssRunProps(node, ctx);
|
|
158
248
|
const mergedRPr = extraRPr + nodeRPr;
|
|
159
|
-
// Helper:
|
|
249
|
+
// Helper: merge container pPr parts into an existing pPr string with schema ordering.
|
|
160
250
|
function injectPPr(pPr) {
|
|
161
|
-
if (
|
|
251
|
+
if (extraPPrParts.length === 0)
|
|
162
252
|
return pPr;
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
return `<w:pPr>${extraPPr}</w:pPr>`;
|
|
253
|
+
const baseChildren = pPr ? extractPPrChildren(pPr) : [];
|
|
254
|
+
return serializePPr([...baseChildren, ...extraPPrParts]);
|
|
166
255
|
}
|
|
167
256
|
if (node.type === "paragraph") {
|
|
168
257
|
const para = node;
|
|
@@ -188,11 +277,12 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
188
277
|
const classes = h.classes;
|
|
189
278
|
if (classes?.length) {
|
|
190
279
|
for (const cls of classes) {
|
|
191
|
-
|
|
192
|
-
|
|
280
|
+
const clsId = sanitizeStyleId(cls);
|
|
281
|
+
if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
|
|
282
|
+
ctx.generatedStyleIds.add(clsId);
|
|
193
283
|
const rp = cssToRunProps(ctx.cssClasses[cls]);
|
|
194
284
|
const pp = cssToParaProps(ctx.cssClasses[cls]);
|
|
195
|
-
ctx.customStyles.push(buildStyleElement(
|
|
285
|
+
ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", rp, pp));
|
|
196
286
|
}
|
|
197
287
|
}
|
|
198
288
|
}
|
|
@@ -202,7 +292,7 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
202
292
|
if (node.type === "list") {
|
|
203
293
|
// Pass container formatting down to list items
|
|
204
294
|
const list = node;
|
|
205
|
-
const numId = list.ordered
|
|
295
|
+
const numId = allocNumId(ctx, list.ordered);
|
|
206
296
|
const parts = [];
|
|
207
297
|
for (const item of list.children) {
|
|
208
298
|
if (item.type !== "listItem")
|
|
@@ -228,7 +318,8 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
228
318
|
parts.push(`<w:p>${pPr}${runs}</w:p>`);
|
|
229
319
|
}
|
|
230
320
|
else if (child.type === "list") {
|
|
231
|
-
|
|
321
|
+
// Inherit parent numId so nested levels share the same w:num entry.
|
|
322
|
+
parts.push(convertList(child, ctx, listIlvl + 1, numId));
|
|
232
323
|
}
|
|
233
324
|
else {
|
|
234
325
|
// div, blockquote, codeBlock, etc. — recurse with container inheritance
|
|
@@ -241,18 +332,35 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
241
332
|
// For other children, use normal conversion with container props cleared
|
|
242
333
|
const cleanCtx = { ...ctx };
|
|
243
334
|
delete cleanCtx._containerRPr;
|
|
244
|
-
delete cleanCtx.
|
|
335
|
+
delete cleanCtx._containerPPrParts;
|
|
245
336
|
return convertBlock(node, cleanCtx, listIlvl);
|
|
246
337
|
}
|
|
247
338
|
function convertParagraph(node, ctx) {
|
|
248
339
|
const pPr = buildPPr(node, ctx);
|
|
249
340
|
const cssRPr = resolveBlockCssRunProps(node, ctx);
|
|
341
|
+
if (ctx.diff && ctx.nodePathKeys) {
|
|
342
|
+
const key = ctx.nodePathKeys.get(node);
|
|
343
|
+
if (key !== undefined && ctx.diff.insertedPaths.has(key)) {
|
|
344
|
+
return convertInsertedParagraph(node, pPr, cssRPr, ctx);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
250
347
|
const runs = node.children.map(c => convertInline(c, ctx, cssRPr)).join("");
|
|
251
348
|
return `<w:p>${pPr}${runs}</w:p>`;
|
|
252
349
|
}
|
|
253
350
|
function convertHeading(node, ctx) {
|
|
254
351
|
const tag = `h${node.depth}`;
|
|
255
352
|
const cssRPr = ctx.cssElements[tag] ? buildRunPropsXml(cssToRunProps(ctx.cssElements[tag])) : "";
|
|
353
|
+
if (ctx.diff && ctx.nodePathKeys) {
|
|
354
|
+
const key = ctx.nodePathKeys.get(node);
|
|
355
|
+
if (key !== undefined && ctx.diff.insertedPaths.has(key)) {
|
|
356
|
+
if (ctx.cssElements[tag])
|
|
357
|
+
ensureElementStyle(tag, ctx);
|
|
358
|
+
const pPr = ctx.cssElements[tag]
|
|
359
|
+
? `<w:pPr><w:pStyle w:val="_el_${tag}"/></w:pPr>`
|
|
360
|
+
: `<w:pPr><w:pStyle w:val="Heading${node.depth}"/></w:pPr>`;
|
|
361
|
+
return convertInsertedParagraph(node, pPr, cssRPr, ctx);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
256
364
|
const runs = node.children.map(c => convertInline(c, ctx, cssRPr)).join("");
|
|
257
365
|
if (ctx.cssElements[tag]) {
|
|
258
366
|
ensureElementStyle(tag, ctx);
|
|
@@ -260,6 +368,25 @@ function convertHeading(node, ctx) {
|
|
|
260
368
|
}
|
|
261
369
|
return `<w:p><w:pPr><w:pStyle w:val="Heading${node.depth}"/></w:pPr>${runs}</w:p>`;
|
|
262
370
|
}
|
|
371
|
+
/** Render a paragraph (or heading cast as paragraph) with all runs wrapped in w:ins. */
|
|
372
|
+
function convertInsertedParagraph(node, pPr, cssRPr, ctx) {
|
|
373
|
+
const diff = ctx.diff;
|
|
374
|
+
const author = esc(diff.author);
|
|
375
|
+
const date = esc(diff.date);
|
|
376
|
+
// Track the paragraph mark itself as inserted (inject into pPr's rPr).
|
|
377
|
+
const markId = diff.revId.value++;
|
|
378
|
+
const insMark = `<w:ins w:id="${markId}" w:author="${author}" w:date="${date}"/>`;
|
|
379
|
+
const trackedPPr = mergeParaMarkChild(pPr, insMark);
|
|
380
|
+
// Wrap each inline child's run in its own w:ins
|
|
381
|
+
const runs = node.children.map(c => {
|
|
382
|
+
const runXml = convertInline(c, ctx, cssRPr);
|
|
383
|
+
if (!runXml)
|
|
384
|
+
return "";
|
|
385
|
+
const id = diff.revId.value++;
|
|
386
|
+
return `<w:ins w:id="${id}" w:author="${author}" w:date="${date}">${runXml}</w:ins>`;
|
|
387
|
+
}).join("");
|
|
388
|
+
return `<w:p>${trackedPPr}${runs}</w:p>`;
|
|
389
|
+
}
|
|
263
390
|
/** Resolve CSS run properties for a block node (from element + class CSS) */
|
|
264
391
|
function resolveBlockCssRunProps(node, ctx) {
|
|
265
392
|
const tag = udmTypeToHtmlTag(node.type, node);
|
|
@@ -278,8 +405,163 @@ function resolveBlockCssRunProps(node, ctx) {
|
|
|
278
405
|
return "";
|
|
279
406
|
return buildRunPropsXml(cssToRunProps(merged));
|
|
280
407
|
}
|
|
281
|
-
|
|
408
|
+
// ---------------------------------------------------------------------------
|
|
409
|
+
// ECMA-376 schema-order maps for w:rPr and w:pPr children
|
|
410
|
+
// (validators reject out-of-order elements in strict mode)
|
|
411
|
+
// ---------------------------------------------------------------------------
|
|
412
|
+
const RPR_ORDER = {
|
|
413
|
+
"w:rStyle": 1, "w:rFonts": 2, "w:b": 3, "w:bCs": 4, "w:i": 5, "w:iCs": 6,
|
|
414
|
+
"w:caps": 7, "w:smallCaps": 8, "w:strike": 9, "w:dstrike": 10,
|
|
415
|
+
"w:outline": 11, "w:shadow": 12, "w:emboss": 13, "w:imprint": 14,
|
|
416
|
+
"w:noProof": 15, "w:snapToGrid": 16, "w:vanish": 17, "w:webHidden": 18,
|
|
417
|
+
"w:color": 19, "w:spacing": 20, "w:w": 21, "w:kern": 22, "w:position": 23,
|
|
418
|
+
"w:sz": 24, "w:szCs": 25, "w:highlight": 26, "w:u": 27, "w:effect": 28,
|
|
419
|
+
"w:bdr": 29, "w:shd": 30, "w:fitText": 31, "w:vertAlign": 32, "w:rtl": 33,
|
|
420
|
+
"w:cs": 34, "w:em": 35, "w:lang": 36, "w:eastAsianLayout": 37,
|
|
421
|
+
"w:specVanish": 38, "w:oMath": 39,
|
|
422
|
+
};
|
|
423
|
+
const PPR_ORDER = {
|
|
424
|
+
"w:pStyle": 1, "w:keepNext": 2, "w:keepLines": 3, "w:pageBreakBefore": 4,
|
|
425
|
+
"w:framePr": 5, "w:widowControl": 6, "w:numPr": 7, "w:suppressLineNumbers": 8,
|
|
426
|
+
"w:pBdr": 9, "w:shd": 10, "w:tabs": 11, "w:suppressAutoHyphens": 12,
|
|
427
|
+
"w:kinsoku": 13, "w:wordWrap": 14, "w:overflowPunct": 15, "w:topLinePunct": 16,
|
|
428
|
+
"w:autoSpaceDE": 17, "w:autoSpaceDN": 18, "w:bidi": 19, "w:adjustRightInd": 20,
|
|
429
|
+
"w:snapToGrid": 21, "w:spacing": 22, "w:ind": 23, "w:contextualSpacing": 24,
|
|
430
|
+
"w:mirrorIndents": 25, "w:suppressOverlap": 26, "w:jc": 27, "w:textDirection": 28,
|
|
431
|
+
"w:textAlignment": 29, "w:textboxTightWrap": 30, "w:outlineLvl": 31,
|
|
432
|
+
"w:divId": 32, "w:cnfStyle": 33, "w:rPr": 34, "w:sectPr": 35, "w:pPrChange": 36,
|
|
433
|
+
};
|
|
434
|
+
function getElementTag(xml) {
|
|
435
|
+
const m = xml.match(/^<(w:[A-Za-z]+)/);
|
|
436
|
+
return m ? m[1] : "";
|
|
437
|
+
}
|
|
438
|
+
/** Tokenize the children of an OOXML container — handles both self-closing and parent elements. */
|
|
439
|
+
function tokenizeOoxmlChildren(xml) {
|
|
440
|
+
const tokens = [];
|
|
441
|
+
let i = 0;
|
|
442
|
+
while (i < xml.length) {
|
|
443
|
+
if (xml[i] !== "<") {
|
|
444
|
+
i++;
|
|
445
|
+
continue;
|
|
446
|
+
}
|
|
447
|
+
const nameMatch = xml.slice(i).match(/^<(w:[A-Za-z]+)/);
|
|
448
|
+
if (!nameMatch) {
|
|
449
|
+
i++;
|
|
450
|
+
continue;
|
|
451
|
+
}
|
|
452
|
+
const name = nameMatch[1];
|
|
453
|
+
// Find end of opening tag (skipping over quoted attribute values)
|
|
454
|
+
let j = i + 1;
|
|
455
|
+
let inQuote = false;
|
|
456
|
+
while (j < xml.length) {
|
|
457
|
+
const ch = xml[j];
|
|
458
|
+
if (ch === '"')
|
|
459
|
+
inQuote = !inQuote;
|
|
460
|
+
if (!inQuote && ch === ">")
|
|
461
|
+
break;
|
|
462
|
+
j++;
|
|
463
|
+
}
|
|
464
|
+
if (j >= xml.length)
|
|
465
|
+
break;
|
|
466
|
+
if (xml[j - 1] === "/") {
|
|
467
|
+
tokens.push(xml.slice(i, j + 1));
|
|
468
|
+
i = j + 1;
|
|
469
|
+
}
|
|
470
|
+
else {
|
|
471
|
+
const closeTag = `</${name}>`;
|
|
472
|
+
let depth = 1;
|
|
473
|
+
let k = j + 1;
|
|
474
|
+
while (k < xml.length && depth > 0) {
|
|
475
|
+
if (xml.startsWith(`<${name}`, k)) {
|
|
476
|
+
const after = xml[k + name.length + 1];
|
|
477
|
+
if (after === " " || after === ">" || after === "/")
|
|
478
|
+
depth++;
|
|
479
|
+
}
|
|
480
|
+
if (xml.startsWith(closeTag, k)) {
|
|
481
|
+
depth--;
|
|
482
|
+
if (depth === 0) {
|
|
483
|
+
k += closeTag.length;
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
k++;
|
|
488
|
+
}
|
|
489
|
+
tokens.push(xml.slice(i, k));
|
|
490
|
+
i = k;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
return tokens;
|
|
494
|
+
}
|
|
495
|
+
/** Stable-sort + tag-dedup a list of rPr/pPr child element strings by ECMA-376 order. */
|
|
496
|
+
function orderElements(elements, order) {
|
|
497
|
+
const byTag = new Map();
|
|
498
|
+
for (const el of elements) {
|
|
499
|
+
const tag = getElementTag(el);
|
|
500
|
+
if (tag)
|
|
501
|
+
byTag.set(tag, el); // last write wins (later sources override earlier)
|
|
502
|
+
}
|
|
503
|
+
return [...byTag.entries()]
|
|
504
|
+
.sort(([a], [b]) => (order[a] ?? 999) - (order[b] ?? 999))
|
|
505
|
+
.map(([, el]) => el);
|
|
506
|
+
}
|
|
507
|
+
/** Wrap rPr children in <w:rPr> with schema-correct ordering. Returns "" if empty. */
|
|
508
|
+
function serializeRPr(parts) {
|
|
509
|
+
const ordered = orderElements(parts, RPR_ORDER);
|
|
510
|
+
return ordered.length > 0 ? `<w:rPr>${ordered.join("")}</w:rPr>` : "";
|
|
511
|
+
}
|
|
512
|
+
/** Wrap pPr children in <w:pPr> with schema-correct ordering. Returns "" if empty. */
|
|
513
|
+
function serializePPr(parts) {
|
|
514
|
+
const ordered = orderElements(parts, PPR_ORDER);
|
|
515
|
+
return ordered.length > 0 ? `<w:pPr>${ordered.join("")}</w:pPr>` : "";
|
|
516
|
+
}
|
|
517
|
+
/** Extract the children of an existing <w:pPr>...</w:pPr> wrapper as raw element strings. */
|
|
518
|
+
function extractPPrChildren(pPr) {
|
|
519
|
+
const m = pPr.match(/<w:pPr>([\s\S]*?)<\/w:pPr>/);
|
|
520
|
+
return m ? tokenizeOoxmlChildren(m[1]) : [];
|
|
521
|
+
}
|
|
522
|
+
/**
|
|
523
|
+
* Merge a paragraph-mark child (w:ins/w:del/w:rPrChange/etc.) into the
|
|
524
|
+
* paragraph's pPr/rPr, creating either or both wrappers as needed and
|
|
525
|
+
* preserving any existing rPr children. Avoids the duplicate-`<w:rPr>`
|
|
526
|
+
* trap that string-replace produces.
|
|
527
|
+
*/
|
|
528
|
+
function mergeParaMarkChild(pPr, child) {
|
|
529
|
+
const baseChildren = pPr ? extractPPrChildren(pPr) : [];
|
|
530
|
+
// Find an existing inner <w:rPr>…</w:rPr> (paragraph mark properties).
|
|
531
|
+
let foundRPr = false;
|
|
532
|
+
const merged = baseChildren.map(el => {
|
|
533
|
+
if (el.startsWith("<w:rPr>") || el.startsWith("<w:rPr ")) {
|
|
534
|
+
foundRPr = true;
|
|
535
|
+
// Inject the child at the start (w:ins/w:del are tracked-change marks
|
|
536
|
+
// that appear first inside CT_ParaRPr per ECMA-376).
|
|
537
|
+
return el.replace(/^<w:rPr(\s[^>]*)?>/, m => `${m}${child}`);
|
|
538
|
+
}
|
|
539
|
+
return el;
|
|
540
|
+
});
|
|
541
|
+
if (!foundRPr)
|
|
542
|
+
merged.push(`<w:rPr>${child}</w:rPr>`);
|
|
543
|
+
return serializePPr(merged);
|
|
544
|
+
}
|
|
545
|
+
/** Extract the children of an existing <w:rPr>...</w:rPr> wrapper as raw element strings. */
|
|
546
|
+
function extractRPrChildren(rPr) {
|
|
547
|
+
const m = rPr.match(/<w:rPr>([\s\S]*?)<\/w:rPr>/);
|
|
548
|
+
return m ? tokenizeOoxmlChildren(m[1]) : [];
|
|
549
|
+
}
|
|
550
|
+
/** Build a `<w:pBdr>` element from a parsed pBdr definition. Returns "" if no sides set. */
|
|
551
|
+
function buildPBdrXml(pBdr) {
|
|
552
|
+
if (!pBdr)
|
|
553
|
+
return "";
|
|
554
|
+
const sides = [];
|
|
555
|
+
for (const side of ["top", "bottom", "left", "right"]) {
|
|
556
|
+
const bd = pBdr[side];
|
|
557
|
+
if (bd)
|
|
558
|
+
sides.push(`<w:${side} w:val="${bd.val}" w:sz="${bd.sz}" w:space="0" w:color="${bd.color}"/>`);
|
|
559
|
+
}
|
|
560
|
+
return sides.length > 0 ? `<w:pBdr>${sides.join("")}</w:pBdr>` : "";
|
|
561
|
+
}
|
|
562
|
+
/** Build w:rPr child elements from OoxmlRunProps (without the wrapper, in schema order). */
|
|
282
563
|
function buildRunPropsXml(props) {
|
|
564
|
+
// Order per ECMA-376 CT_RPr: rFonts(2), b(3), i(5), strike(9), color(19), sz(24), u(27), shd(30)
|
|
283
565
|
const parts = [];
|
|
284
566
|
if (props.rFonts) {
|
|
285
567
|
const attrs = [];
|
|
@@ -293,14 +575,14 @@ function buildRunPropsXml(props) {
|
|
|
293
575
|
parts.push("<w:b/>");
|
|
294
576
|
if (props.i)
|
|
295
577
|
parts.push("<w:i/>");
|
|
296
|
-
if (props.
|
|
297
|
-
parts.push(
|
|
578
|
+
if (props.strike)
|
|
579
|
+
parts.push("<w:strike/>");
|
|
298
580
|
if (props.color)
|
|
299
581
|
parts.push(`<w:color w:val="${props.color}"/>`);
|
|
582
|
+
if (props.sz)
|
|
583
|
+
parts.push(`<w:sz w:val="${props.sz}"/>`);
|
|
300
584
|
if (props.u)
|
|
301
585
|
parts.push(`<w:u w:val="${props.u}"/>`);
|
|
302
|
-
if (props.strike)
|
|
303
|
-
parts.push("<w:strike/>");
|
|
304
586
|
if (props.shd)
|
|
305
587
|
parts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${props.shd}"/>`);
|
|
306
588
|
return parts.join("");
|
|
@@ -318,6 +600,7 @@ function convertBlockquote(node, ctx) {
|
|
|
318
600
|
}).join("\n");
|
|
319
601
|
}
|
|
320
602
|
function buildBlockquotePPr(css) {
|
|
603
|
+
// Collect in any order — serializePPr applies ECMA-376 ordering.
|
|
321
604
|
const parts = [];
|
|
322
605
|
// Left border (vertical line) — always add for blockquotes
|
|
323
606
|
parts.push(`<w:pBdr><w:left w:val="single" w:sz="18" w:space="4" w:color="${css["border-color"] ? parseColorSafe(css["border-color"]) : "AAAAAA"}"/></w:pBdr>`);
|
|
@@ -341,10 +624,13 @@ function buildBlockquotePPr(css) {
|
|
|
341
624
|
if (attrs.length)
|
|
342
625
|
parts.push(`<w:spacing ${attrs.join(" ")}/>`);
|
|
343
626
|
}
|
|
344
|
-
return
|
|
627
|
+
return serializePPr(parts);
|
|
345
628
|
}
|
|
346
|
-
function convertList(node, ctx, baseIlvl) {
|
|
347
|
-
|
|
629
|
+
function convertList(node, ctx, baseIlvl, parentNumId) {
|
|
630
|
+
// Top-level lists get a fresh numId; nested lists inherit their parent's
|
|
631
|
+
// numId so the abstract numbering definition (bullet vs ordered) and
|
|
632
|
+
// counter continuity stay consistent across levels.
|
|
633
|
+
const numId = parentNumId ?? allocNumId(ctx, node.ordered);
|
|
348
634
|
const liRPr = ctx.cssElements["li"] ? buildRunPropsXml(cssToRunProps(ctx.cssElements["li"])) : "";
|
|
349
635
|
const parts = [];
|
|
350
636
|
for (const item of node.children) {
|
|
@@ -357,7 +643,7 @@ function convertList(node, ctx, baseIlvl) {
|
|
|
357
643
|
parts.push(`<w:p><w:pPr><w:numPr><w:ilvl w:val="${baseIlvl}"/><w:numId w:val="${numId}"/></w:numPr></w:pPr>${runs}</w:p>`);
|
|
358
644
|
}
|
|
359
645
|
else if (child.type === "list") {
|
|
360
|
-
parts.push(convertList(child, ctx, baseIlvl + 1));
|
|
646
|
+
parts.push(convertList(child, ctx, baseIlvl + 1, numId));
|
|
361
647
|
}
|
|
362
648
|
else {
|
|
363
649
|
parts.push(convertBlock(child, ctx, baseIlvl));
|
|
@@ -370,36 +656,62 @@ function convertCodeBlock(node, ctx) {
|
|
|
370
656
|
const preCss = ctx?.cssElements["pre"] ?? {};
|
|
371
657
|
const bg = preCss["background-color"] ? parseColorSafe(preCss["background-color"]) : undefined;
|
|
372
658
|
const preRPr = Object.keys(preCss).length > 0 ? buildRunPropsXml(cssToRunProps(preCss)) : "";
|
|
373
|
-
//
|
|
374
|
-
|
|
659
|
+
// pPr: pStyle(1) before shd(10) — already in correct order
|
|
660
|
+
const pPrParts = [`<w:pStyle w:val="Code"/>`];
|
|
375
661
|
if (bg)
|
|
376
|
-
|
|
377
|
-
pPr
|
|
662
|
+
pPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
|
|
663
|
+
const pPr = serializePPr(pPrParts);
|
|
664
|
+
// rPr: code rFonts + any pre-element CSS (rFonts override is intentional, last write wins)
|
|
665
|
+
const rPrParts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
|
|
666
|
+
if (preRPr)
|
|
667
|
+
rPrParts.push(...tokenizeOoxmlChildren(preRPr));
|
|
668
|
+
const rPr = serializeRPr(rPrParts);
|
|
378
669
|
const lines = node.value.split("\n");
|
|
379
|
-
return lines.map(line => `<w:p>${pPr}<w:r
|
|
670
|
+
return lines.map(line => `<w:p>${pPr}<w:r>${rPr}<w:t xml:space="preserve">${esc(line)}</w:t></w:r></w:p>`).join("\n");
|
|
671
|
+
}
|
|
672
|
+
function countTableColumns(node) {
|
|
673
|
+
let max = 0;
|
|
674
|
+
for (const row of node.children) {
|
|
675
|
+
if (row.type !== "tableRow")
|
|
676
|
+
continue;
|
|
677
|
+
const count = row.children.reduce((n, cell) => n + (cell.colspan ?? 1), 0);
|
|
678
|
+
if (count > max)
|
|
679
|
+
max = count;
|
|
680
|
+
}
|
|
681
|
+
return Math.max(max, 1);
|
|
380
682
|
}
|
|
381
683
|
function convertTable(node, ctx) {
|
|
684
|
+
const colCount = countTableColumns(node);
|
|
685
|
+
// Content width: 9360 twips (12240 page − 2×1440 margins)
|
|
686
|
+
const colWidth = Math.round(9360 / colCount);
|
|
687
|
+
const tblGrid = `<w:tblGrid>${Array.from({ length: colCount }, () => `<w:gridCol w:w="${colWidth}"/>`).join("")}</w:tblGrid>`;
|
|
382
688
|
const rows = node.children.map(row => {
|
|
383
689
|
if (row.type !== "tableRow")
|
|
384
690
|
return "";
|
|
385
691
|
const tr = row;
|
|
692
|
+
// ISO 29500 requires every w:tr to contain at least one w:tc.
|
|
693
|
+
// Skip rows with no tableCell children entirely (e.g. an empty <thead><tr/></thead>).
|
|
694
|
+
const cellNodes = tr.children.filter(c => c.type === "tableCell");
|
|
695
|
+
if (cellNodes.length === 0)
|
|
696
|
+
return "";
|
|
386
697
|
const trPr = tr.isHeader ? "<w:trPr><w:tblHeader/></w:trPr>" : "";
|
|
387
|
-
const cells =
|
|
388
|
-
if (cell.type !== "tableCell")
|
|
389
|
-
return "";
|
|
698
|
+
const cells = cellNodes.map(cell => {
|
|
390
699
|
const tc = cell;
|
|
700
|
+
const span = tc.colspan ?? 1;
|
|
391
701
|
let tcPr = "<w:tcPr>";
|
|
392
|
-
|
|
393
|
-
|
|
702
|
+
tcPr += `<w:tcW w:w="${colWidth * span}" w:type="dxa"/>`;
|
|
703
|
+
if (span > 1)
|
|
704
|
+
tcPr += `<w:gridSpan w:val="${span}"/>`;
|
|
394
705
|
tcPr += "</w:tcPr>";
|
|
706
|
+
const emptyPara = { type: "paragraph", children: [] };
|
|
395
707
|
const content = tc.children.length > 0
|
|
396
708
|
? tc.children.map(c => convertBlock(c, ctx, 0)).join("")
|
|
397
|
-
:
|
|
709
|
+
: convertParagraph(emptyPara, ctx);
|
|
398
710
|
return `<w:tc>${tcPr}${content}</w:tc>`;
|
|
399
711
|
}).join("");
|
|
400
712
|
return `<w:tr>${trPr}${cells}</w:tr>`;
|
|
401
|
-
}).join("\n");
|
|
402
|
-
return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${rows}</w:tbl>`;
|
|
713
|
+
}).filter(Boolean).join("\n");
|
|
714
|
+
return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${tblGrid}${rows}</w:tbl>`;
|
|
403
715
|
}
|
|
404
716
|
// ---------------------------------------------------------------------------
|
|
405
717
|
// Inline conversion
|
|
@@ -410,11 +722,14 @@ function convertInline(node, ctx, inheritedRPr = "") {
|
|
|
410
722
|
if (node.type === "break")
|
|
411
723
|
return `<w:r><w:br/></w:r>`;
|
|
412
724
|
if (node.type === "inlineCode") {
|
|
413
|
-
|
|
725
|
+
const parts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
|
|
726
|
+
if (inheritedRPr)
|
|
727
|
+
parts.push(...tokenizeOoxmlChildren(inheritedRPr));
|
|
728
|
+
return `<w:r>${serializeRPr(parts)}<w:t xml:space="preserve">${esc(node.value)}</w:t></w:r>`;
|
|
414
729
|
}
|
|
415
730
|
if (node.type === "image") {
|
|
416
731
|
const img = node;
|
|
417
|
-
return `<w:r><w:t>[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
|
|
732
|
+
return `<w:r><w:t xml:space="preserve">[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
|
|
418
733
|
}
|
|
419
734
|
return "";
|
|
420
735
|
}
|
|
@@ -423,11 +738,8 @@ function convertText(node, ctx, inheritedRPr = "") {
|
|
|
423
738
|
const otherMarks = node.marks?.filter(m => m.type !== "link") ?? [];
|
|
424
739
|
// Build rPr: inherited CSS props first, then mark-specific overrides
|
|
425
740
|
const parts = [];
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
const elems = inheritedRPr.match(/<w:[^/]*\/>/g) ?? [];
|
|
429
|
-
parts.push(...elems);
|
|
430
|
-
}
|
|
741
|
+
if (inheritedRPr)
|
|
742
|
+
parts.push(...tokenizeOoxmlChildren(inheritedRPr));
|
|
431
743
|
for (const mark of otherMarks) {
|
|
432
744
|
switch (mark.type) {
|
|
433
745
|
case "strong":
|
|
@@ -453,16 +765,42 @@ function convertText(node, ctx, inheritedRPr = "") {
|
|
|
453
765
|
break;
|
|
454
766
|
}
|
|
455
767
|
}
|
|
456
|
-
//
|
|
457
|
-
const
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
768
|
+
// Schema-order + tag-dedup (later sources override earlier).
|
|
769
|
+
const rPr = serializeRPr(parts);
|
|
770
|
+
// Track-changes: check diff context (skip for linked text to avoid nesting complexity)
|
|
771
|
+
if (ctx.diff && ctx.nodePathKeys && !linkMark) {
|
|
772
|
+
const key = ctx.nodePathKeys.get(node);
|
|
773
|
+
if (key !== undefined) {
|
|
774
|
+
const diff = ctx.diff;
|
|
775
|
+
const author = esc(diff.author);
|
|
776
|
+
const date = diff.date;
|
|
777
|
+
if (diff.insertedPaths.has(key)) {
|
|
778
|
+
// Whole text node is new — wrap run in w:ins
|
|
779
|
+
const id = diff.revId.value++;
|
|
780
|
+
return `<w:ins w:id="${id}" w:author="${author}" w:date="${date}"><w:r>${rPr}<w:t xml:space="preserve">${esc(node.value)}</w:t></w:r></w:ins>`;
|
|
781
|
+
}
|
|
782
|
+
if (diff.textChangePaths.has(key)) {
|
|
783
|
+
// Partial text change — emit one element per TextChange segment
|
|
784
|
+
const changes = diff.textChangePaths.get(key).changes;
|
|
785
|
+
return changes.map(change => {
|
|
786
|
+
if (!change.value)
|
|
787
|
+
return "";
|
|
788
|
+
switch (change.type) {
|
|
789
|
+
case "equal":
|
|
790
|
+
return `<w:r>${rPr}<w:t xml:space="preserve">${esc(change.value)}</w:t></w:r>`;
|
|
791
|
+
case "insert": {
|
|
792
|
+
const id = diff.revId.value++;
|
|
793
|
+
return `<w:ins w:id="${id}" w:author="${author}" w:date="${date}"><w:r>${rPr}<w:t xml:space="preserve">${esc(change.value)}</w:t></w:r></w:ins>`;
|
|
794
|
+
}
|
|
795
|
+
case "delete": {
|
|
796
|
+
const id = diff.revId.value++;
|
|
797
|
+
return `<w:del w:id="${id}" w:author="${author}" w:date="${date}"><w:r>${rPr}<w:delText xml:space="preserve">${esc(change.value)}</w:delText></w:r></w:del>`;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
}).join("");
|
|
801
|
+
}
|
|
463
802
|
}
|
|
464
803
|
}
|
|
465
|
-
const rPr = dedupedParts.length > 0 ? `<w:rPr>${dedupedParts.join("")}</w:rPr>` : "";
|
|
466
804
|
if (linkMark) {
|
|
467
805
|
const url = String(linkMark.attrs?.url ?? "");
|
|
468
806
|
const rId = allocHyperlinkRId(ctx, url);
|
|
@@ -473,21 +811,24 @@ function convertText(node, ctx, inheritedRPr = "") {
|
|
|
473
811
|
// ---------------------------------------------------------------------------
|
|
474
812
|
// Paragraph property builder
|
|
475
813
|
// ---------------------------------------------------------------------------
|
|
476
|
-
function
|
|
814
|
+
function buildPPrParts(node, ctx) {
|
|
815
|
+
// Collect parts in any order — serializePPr applies ECMA-376 ordering at the end.
|
|
477
816
|
const parts = [];
|
|
478
817
|
// Classes → pStyle + generate CSS-based Word styles
|
|
479
818
|
const classes = node.classes;
|
|
480
819
|
if (classes && classes.length > 0) {
|
|
481
|
-
|
|
482
|
-
|
|
820
|
+
// Style IDs must be valid OOXML identifiers — sanitize the CSS class name.
|
|
821
|
+
const styleId = sanitizeStyleId(classes[0]);
|
|
822
|
+
parts.push(`<w:pStyle w:val="${styleId}"/>`);
|
|
483
823
|
// Generate custom style from CSS rules (once per styleId)
|
|
484
824
|
for (const cls of classes) {
|
|
485
|
-
|
|
486
|
-
|
|
825
|
+
const clsId = sanitizeStyleId(cls);
|
|
826
|
+
if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
|
|
827
|
+
ctx.generatedStyleIds.add(clsId);
|
|
487
828
|
const cssDecls = ctx.cssClasses[cls];
|
|
488
829
|
const runProps = cssToRunProps(cssDecls);
|
|
489
830
|
const paraProps = cssToParaProps(cssDecls);
|
|
490
|
-
ctx.customStyles.push(buildStyleElement(
|
|
831
|
+
ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", runProps, paraProps));
|
|
491
832
|
}
|
|
492
833
|
}
|
|
493
834
|
}
|
|
@@ -514,13 +855,14 @@ function buildPPr(node, ctx) {
|
|
|
514
855
|
if (elTag) {
|
|
515
856
|
ensureElementStyle(elTag, ctx);
|
|
516
857
|
// If no class style was set, use the element style
|
|
517
|
-
if (!classes || classes.length === 0) {
|
|
518
|
-
parts.
|
|
858
|
+
if ((!classes || classes.length === 0) && ctx.cssElements[elTag]) {
|
|
859
|
+
parts.push(`<w:pStyle w:val="_el_${esc(elTag)}"/>`);
|
|
519
860
|
}
|
|
520
|
-
// If class style was set, element CSS is already baked into the base —
|
|
521
|
-
// we could chain basedOn, but for simplicity element styles are standalone
|
|
522
861
|
}
|
|
523
|
-
return parts
|
|
862
|
+
return parts;
|
|
863
|
+
}
|
|
864
|
+
function buildPPr(node, ctx) {
|
|
865
|
+
return serializePPr(buildPPrParts(node, ctx));
|
|
524
866
|
}
|
|
525
867
|
/** Map UDM type to the HTML tag name for CSS element selector lookup */
|
|
526
868
|
function udmTypeToHtmlTag(type, node) {
|
|
@@ -587,7 +929,183 @@ function stripDataForSnapshot(node) {
|
|
|
587
929
|
function parseColorSafe(val) {
|
|
588
930
|
return parseColor(val) ?? "auto";
|
|
589
931
|
}
|
|
932
|
+
/**
|
|
933
|
+
* Deterministic short hash of a string. Used to bind the embedded UDM
|
|
934
|
+
* snapshot to a specific document.xml so the reader can detect external
|
|
935
|
+
* edits (e.g. the user opened the file in Word and saved). 64 bits of
|
|
936
|
+
* SHA-256 is plenty for change detection — we don't need cryptographic
|
|
937
|
+
* collision resistance.
|
|
938
|
+
*/
|
|
939
|
+
function hashString(s) {
|
|
940
|
+
return createHash("sha256").update(s).digest("hex").slice(0, 16);
|
|
941
|
+
}
|
|
942
|
+
// ---------------------------------------------------------------------------
|
|
943
|
+
// Deleted-block rendering (for writeDiffDocx)
|
|
944
|
+
// ---------------------------------------------------------------------------
|
|
945
|
+
/** Recursively collect all text content from a UDM node snapshot. */
|
|
946
|
+
function extractTextContent(node) {
|
|
947
|
+
if (node.type === "text")
|
|
948
|
+
return node.value ?? "";
|
|
949
|
+
if (node.type === "codeBlock")
|
|
950
|
+
return node.value ?? "";
|
|
951
|
+
if (isParent(node))
|
|
952
|
+
return node.children.map(extractTextContent).filter(Boolean).join(" ");
|
|
953
|
+
return "";
|
|
954
|
+
}
|
|
955
|
+
/** Render a DeleteOp node as a w:del paragraph (track-changes markup). */
|
|
956
|
+
function convertDeletedBlock(op, ctx) {
|
|
957
|
+
const node = op.node;
|
|
958
|
+
const diff = ctx.diff;
|
|
959
|
+
const author = esc(diff.author);
|
|
960
|
+
const date = esc(diff.date);
|
|
961
|
+
const textContent = extractTextContent(node);
|
|
962
|
+
if (!textContent)
|
|
963
|
+
return "";
|
|
964
|
+
const delMarkId = diff.revId.value++;
|
|
965
|
+
const delContentId = diff.revId.value++;
|
|
966
|
+
// Preserve heading style if the deleted node was a heading
|
|
967
|
+
let pStyle = "";
|
|
968
|
+
if (node.type === "heading") {
|
|
969
|
+
const depth = node.depth;
|
|
970
|
+
pStyle = `<w:pStyle w:val="Heading${depth}"/>`;
|
|
971
|
+
}
|
|
972
|
+
const delMark = `<w:del w:id="${delMarkId}" w:author="${author}" w:date="${date}"/>`;
|
|
973
|
+
const basePPr = pStyle ? `<w:pPr>${pStyle}</w:pPr>` : "";
|
|
974
|
+
const pPr = mergeParaMarkChild(basePPr, delMark);
|
|
975
|
+
const delContent = `<w:del w:id="${delContentId}" w:author="${author}" w:date="${date}"><w:r><w:delText xml:space="preserve">${esc(textContent)}</w:delText></w:r></w:del>`;
|
|
976
|
+
return `<w:p>${pPr}${delContent}</w:p>`;
|
|
977
|
+
}
|
|
590
978
|
function esc(s) {
|
|
591
|
-
return s
|
|
979
|
+
return sanitizeText(s)
|
|
980
|
+
.replace(/&/g, "&")
|
|
981
|
+
.replace(/</g, "<")
|
|
982
|
+
.replace(/>/g, ">")
|
|
983
|
+
.replace(/"/g, """);
|
|
984
|
+
}
|
|
985
|
+
/**
|
|
986
|
+
* Strip XML 1.0 forbidden control characters (U+0000–U+001F except \t \n \r,
|
|
987
|
+
* plus U+007F). ECMA-376 inherits this restriction; leaving these in causes
|
|
988
|
+
* `document.xml` to be unparseable and Word refuses to open the file.
|
|
989
|
+
*/
|
|
990
|
+
function sanitizeText(s) {
|
|
991
|
+
return s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
|
|
992
|
+
}
|
|
993
|
+
/**
|
|
994
|
+
* Normalize a CSS class name into a valid OOXML style ID.
|
|
995
|
+
* Per ECMA-376 §17.7.4.9, style IDs must match `[a-zA-Z0-9_\-:]` and be
|
|
996
|
+
* ≤ 31 characters. Anything else is stripped.
|
|
997
|
+
*/
|
|
998
|
+
function sanitizeStyleId(id) {
|
|
999
|
+
const cleaned = id.replace(/[^a-zA-Z0-9_\-:]/g, "").slice(0, 31);
|
|
1000
|
+
return cleaned || "Normal";
|
|
1001
|
+
}
|
|
1002
|
+
// ---------------------------------------------------------------------------
|
|
1003
|
+
// writeDiffDocx — serialize a diff result as a .docx with track-changes markup
|
|
1004
|
+
// ---------------------------------------------------------------------------
|
|
1005
|
+
/**
|
|
1006
|
+
* Serialize a UDM tree to a .docx buffer with Word track-changes revision marks.
|
|
1007
|
+
*
|
|
1008
|
+
* Inserted content is wrapped in `<w:ins>`, deleted content in `<w:del>`, and
|
|
1009
|
+
* partial text changes are split into per-segment runs so Word's Accept/Reject
|
|
1010
|
+
* All Changes buttons work correctly.
|
|
1011
|
+
*
|
|
1012
|
+
* @param newTree The "after" document tree (from diff())
|
|
1013
|
+
* @param diffResult The diff result produced by diff(oldTree, newTree)
|
|
1014
|
+
* @param options Author/date for revision marks, plus standard CSS options
|
|
1015
|
+
*/
|
|
1016
|
+
export async function writeDiffDocx(newTree, diffResult, options) {
|
|
1017
|
+
const cssData = newTree.data?.css;
|
|
1018
|
+
const mergedClassCss = { ...cssData?.classRules, ...options?.cssClasses };
|
|
1019
|
+
const elementCss = cssData?.elementRules ?? {};
|
|
1020
|
+
const author = options?.author ?? "Otomate";
|
|
1021
|
+
const date = options?.date ?? "2024-01-01T00:00:00Z";
|
|
1022
|
+
const diffCtx = buildDiffCtx(diffResult, author, date);
|
|
1023
|
+
const nodePathKeys = buildNodePathMap(newTree);
|
|
1024
|
+
const docxDataPre = newTree.data?.docx;
|
|
1025
|
+
const existingRels = docxDataPre?.relationships;
|
|
1026
|
+
const ctx = {
|
|
1027
|
+
cssClasses: mergedClassCss,
|
|
1028
|
+
cssElements: elementCss,
|
|
1029
|
+
customStyles: [],
|
|
1030
|
+
generatedStyleIds: new Set(),
|
|
1031
|
+
nextNumId: 3,
|
|
1032
|
+
allocatedNums: new Map(),
|
|
1033
|
+
hyperlinks: new Map(),
|
|
1034
|
+
nextRId: nextRIdFor(existingRels),
|
|
1035
|
+
diff: diffCtx,
|
|
1036
|
+
nodePathKeys,
|
|
1037
|
+
};
|
|
1038
|
+
// Pass 1: render newTree with diff-aware converters (ins/updateText handled inline)
|
|
1039
|
+
const renderedBlocks = newTree.children.map(child => convertBlock(child, ctx, 0));
|
|
1040
|
+
// Pass 2: inject root-level deleted blocks at their approximate old-tree position
|
|
1041
|
+
const rootDeletes = [...diffCtx.deletedPaths.values()]
|
|
1042
|
+
.filter(op => op.path.length === 1)
|
|
1043
|
+
.sort((a, b) => a.path[0] - b.path[0]);
|
|
1044
|
+
let offset = 0;
|
|
1045
|
+
for (const op of rootDeletes) {
|
|
1046
|
+
const xml = convertDeletedBlock(op, ctx);
|
|
1047
|
+
if (xml) {
|
|
1048
|
+
renderedBlocks.splice(Math.min(op.path[0] + offset, renderedBlocks.length), 0, xml);
|
|
1049
|
+
offset++;
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
// Match writeDocx's trailing-paragraph-after-table behaviour so the body
|
|
1053
|
+
// doesn't end with </w:tbl> immediately before <w:sectPr>.
|
|
1054
|
+
const lastBlock = newTree.children[newTree.children.length - 1];
|
|
1055
|
+
if (lastBlock?.type === "table") {
|
|
1056
|
+
renderedBlocks.push("<w:p/>");
|
|
1057
|
+
}
|
|
1058
|
+
const bodyXml = renderedBlocks.join("\n");
|
|
1059
|
+
const docxData = newTree.data?.docx;
|
|
1060
|
+
let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
|
|
1061
|
+
if (ctx.customStyles.length > 0) {
|
|
1062
|
+
const injection = ctx.customStyles.join("\n");
|
|
1063
|
+
stylesXml = stylesXml.replace("</w:styles>", `${injection}\n</w:styles>`);
|
|
1064
|
+
}
|
|
1065
|
+
const docXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
1066
|
+
<w:document ${W_NS} ${R_NS} ${WP_NS}>
|
|
1067
|
+
<w:body>
|
|
1068
|
+
${bodyXml}
|
|
1069
|
+
<w:sectPr><w:pgSz w:w="12240" w:h="15840"/><w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440"/></w:sectPr>
|
|
1070
|
+
</w:body>
|
|
1071
|
+
</w:document>`;
|
|
1072
|
+
let relsXml = docxData?.relationships ?? DOC_RELS;
|
|
1073
|
+
if (ctx.hyperlinks.size > 0) {
|
|
1074
|
+
const hlRels = [...ctx.hyperlinks].map(([rId, url]) => `<Relationship Id="${rId}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" Target="${esc(url)}" TargetMode="External"/>`).join("\n ");
|
|
1075
|
+
relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
|
|
1076
|
+
}
|
|
1077
|
+
let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
|
|
1078
|
+
if (ctx.allocatedNums.size > 0) {
|
|
1079
|
+
const newNums = [...ctx.allocatedNums]
|
|
1080
|
+
.map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
|
|
1081
|
+
.join("\n ");
|
|
1082
|
+
numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
|
|
1083
|
+
}
|
|
1084
|
+
const parts = {
|
|
1085
|
+
document: docXml,
|
|
1086
|
+
styles: stylesXml,
|
|
1087
|
+
numbering: numberingXml,
|
|
1088
|
+
relationships: relsXml,
|
|
1089
|
+
contentTypes: docxData?.contentTypes ?? undefined,
|
|
1090
|
+
media: new Map(),
|
|
1091
|
+
rawParts: new Map(),
|
|
1092
|
+
};
|
|
1093
|
+
const udmSnapshot = stripDataForSnapshot(newTree);
|
|
1094
|
+
udmSnapshot.__docHash = hashString(docXml);
|
|
1095
|
+
parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
|
|
1096
|
+
if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
|
|
1097
|
+
parts.rawParts.set("word/otomate-css.json", JSON.stringify(cssData));
|
|
1098
|
+
}
|
|
1099
|
+
if (docxData?.rawParts) {
|
|
1100
|
+
const raw = docxData.rawParts;
|
|
1101
|
+
for (const [k, v] of Object.entries(raw)) {
|
|
1102
|
+
if (!parts.rawParts.has(k))
|
|
1103
|
+
parts.rawParts.set(k, v);
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
if (!parts.rawParts.has("_rels/.rels")) {
|
|
1107
|
+
parts.rawParts.set("_rels/.rels", RELS);
|
|
1108
|
+
}
|
|
1109
|
+
return packDocx(parts);
|
|
592
1110
|
}
|
|
593
1111
|
//# sourceMappingURL=writer.js.map
|