@otomate/docx 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/hash.d.ts +2 -0
- package/dist/hash.d.ts.map +1 -0
- package/dist/hash.js +24 -0
- package/dist/hash.js.map +1 -0
- package/dist/reader.d.ts.map +1 -1
- package/dist/reader.js +73 -20
- package/dist/reader.js.map +1 -1
- package/dist/writer.d.ts.map +1 -1
- package/dist/writer.js +423 -96
- package/dist/writer.js.map +1 -1
- package/dist/zip.d.ts.map +1 -1
- package/dist/zip.js +34 -3
- package/dist/zip.js.map +1 -1
- package/package.json +1 -1
package/dist/writer.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
2
|
// docx Writer — UDM tree → .docx buffer
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
|
+
import { hashString } from "./hash.js";
|
|
4
5
|
import { isParent, isText } from "@otomate/core";
|
|
5
6
|
import { buildStyleElement, cssToRunProps, cssToParaProps, parseLengthTwips, parseColor } from "@otomate/css-docx";
|
|
6
7
|
import { packDocx } from "./zip.js";
|
|
@@ -14,16 +15,28 @@ export async function writeDocx(tree, options) {
|
|
|
14
15
|
const cssData = tree.data?.css;
|
|
15
16
|
const mergedClassCss = { ...cssData?.classRules, ...options?.cssClasses };
|
|
16
17
|
const elementCss = cssData?.elementRules ?? {};
|
|
18
|
+
const docxDataPre = tree.data?.docx;
|
|
19
|
+
const existingRels = docxDataPre?.relationships;
|
|
17
20
|
const ctx = {
|
|
18
21
|
cssClasses: mergedClassCss,
|
|
19
22
|
cssElements: elementCss,
|
|
20
23
|
customStyles: [],
|
|
21
24
|
generatedStyleIds: new Set(),
|
|
22
25
|
nextNumId: 3,
|
|
26
|
+
allocatedNums: new Map(),
|
|
23
27
|
hyperlinks: new Map(),
|
|
24
|
-
|
|
28
|
+
// Seed past any existing rIds to avoid collisions on round-trip.
|
|
29
|
+
nextRId: nextRIdFor(existingRels),
|
|
25
30
|
};
|
|
26
|
-
const
|
|
31
|
+
const renderedBlocks = tree.children.map(child => convertBlock(child, ctx, 0));
|
|
32
|
+
// OOXML allows w:tbl as the last body child, but Word always inserts a trailing
|
|
33
|
+
// empty paragraph after a final table. Match that behavior so re-opening in Word
|
|
34
|
+
// produces no diff.
|
|
35
|
+
const lastBlock = tree.children[tree.children.length - 1];
|
|
36
|
+
if (lastBlock?.type === "table") {
|
|
37
|
+
renderedBlocks.push("<w:p/>");
|
|
38
|
+
}
|
|
39
|
+
const bodyXml = renderedBlocks.join("\n");
|
|
27
40
|
const docxData = tree.data?.docx;
|
|
28
41
|
// Build styles.xml with any generated custom styles
|
|
29
42
|
let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
|
|
@@ -46,17 +59,29 @@ ${bodyXml}
|
|
|
46
59
|
// Inject before closing </Relationships>
|
|
47
60
|
relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
|
|
48
61
|
}
|
|
62
|
+
// Inject any newly-allocated <w:num> entries into numbering.xml so each
|
|
63
|
+
// top-level list has its own num definition (independent counters & styles).
|
|
64
|
+
let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
|
|
65
|
+
if (ctx.allocatedNums.size > 0) {
|
|
66
|
+
const newNums = [...ctx.allocatedNums]
|
|
67
|
+
.map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
|
|
68
|
+
.join("\n ");
|
|
69
|
+
numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
|
|
70
|
+
}
|
|
49
71
|
const parts = {
|
|
50
72
|
document: docXml,
|
|
51
73
|
styles: stylesXml,
|
|
52
|
-
numbering:
|
|
74
|
+
numbering: numberingXml,
|
|
53
75
|
relationships: relsXml,
|
|
54
76
|
contentTypes: docxData?.contentTypes ?? undefined,
|
|
55
77
|
media: new Map(),
|
|
56
78
|
rawParts: new Map(),
|
|
57
79
|
};
|
|
58
|
-
// Embed UDM tree as custom part for lossless round-trip with otomate
|
|
80
|
+
// Embed UDM tree as custom part for lossless round-trip with otomate.
|
|
81
|
+
// The hash binds the snapshot to this exact document.xml so the reader
|
|
82
|
+
// can detect external edits (Word saves) and fall back to OOXML parsing.
|
|
59
83
|
const udmSnapshot = stripDataForSnapshot(tree);
|
|
84
|
+
udmSnapshot.__docHash = await hashString(docXml);
|
|
60
85
|
parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
|
|
61
86
|
// Embed CSS rules if present
|
|
62
87
|
if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
|
|
@@ -76,6 +101,37 @@ ${bodyXml}
|
|
|
76
101
|
}
|
|
77
102
|
return packDocx(parts);
|
|
78
103
|
}
|
|
104
|
+
/**
|
|
105
|
+
* Compute the smallest unused rId for a hyperlink, given the round-tripped
|
|
106
|
+
* relationships file. Falls back to 100 (well above the default rId1/rId2)
|
|
107
|
+
* when there is no source rels.
|
|
108
|
+
*/
|
|
109
|
+
function nextRIdFor(existingRels) {
|
|
110
|
+
if (!existingRels)
|
|
111
|
+
return 100;
|
|
112
|
+
let max = 0;
|
|
113
|
+
const re = /Id="rId(\d+)"/g;
|
|
114
|
+
let m;
|
|
115
|
+
while ((m = re.exec(existingRels)) !== null) {
|
|
116
|
+
const n = Number(m[1]);
|
|
117
|
+
if (n > max)
|
|
118
|
+
max = n;
|
|
119
|
+
}
|
|
120
|
+
return Math.max(max + 1, 100);
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Allocate a fresh w:numId for a top-level list, recording it so the writer
|
|
124
|
+
* can inject a matching <w:num> entry into numbering.xml.
|
|
125
|
+
*
|
|
126
|
+
* Each top-level list gets its own numId so that ordered lists restart at 1
|
|
127
|
+
* instead of continuing the previous list's numbering, and bullet styles
|
|
128
|
+
* applied to one list don't bleed into all others.
|
|
129
|
+
*/
|
|
130
|
+
function allocNumId(ctx, ordered) {
|
|
131
|
+
const numId = String(ctx.nextNumId++);
|
|
132
|
+
ctx.allocatedNums.set(numId, ordered ? 1 : 0);
|
|
133
|
+
return numId;
|
|
134
|
+
}
|
|
79
135
|
function allocHyperlinkRId(ctx, url) {
|
|
80
136
|
// Reuse existing rId for same URL
|
|
81
137
|
for (const [rId, existingUrl] of ctx.hyperlinks) {
|
|
@@ -129,7 +185,7 @@ function convertBlock(node, ctx, listIlvl) {
|
|
|
129
185
|
case "div":
|
|
130
186
|
case "figure":
|
|
131
187
|
return convertDiv(node, ctx, listIlvl);
|
|
132
|
-
case "html": return `<w:p><w:r><w:t>${esc(node.value)}</w:t></w:r></w:p>`;
|
|
188
|
+
case "html": return `<w:p><w:r><w:t xml:space="preserve">${esc(node.value)}</w:t></w:r></w:p>`;
|
|
133
189
|
default: return "";
|
|
134
190
|
}
|
|
135
191
|
}
|
|
@@ -154,12 +210,16 @@ function convertDiv(node, ctx, listIlvl) {
|
|
|
154
210
|
const runProps = cssToRunProps(containerCss);
|
|
155
211
|
const containerRPr = buildRunPropsXml(runProps);
|
|
156
212
|
const bg = containerCss["background-color"] ? parseColorSafe(containerCss["background-color"]) : undefined;
|
|
157
|
-
// Build extra pPr elements to inject into child paragraphs
|
|
158
|
-
|
|
213
|
+
// Build extra pPr elements to inject into child paragraphs.
|
|
214
|
+
// Order doesn't matter here — injectPPr re-sorts via serializePPr.
|
|
215
|
+
const extraPPrParts = [];
|
|
216
|
+
const pBdrXml = buildPBdrXml(paraProps.pBdr);
|
|
217
|
+
if (pBdrXml)
|
|
218
|
+
extraPPrParts.push(pBdrXml);
|
|
159
219
|
if (bg && bg !== "auto")
|
|
160
|
-
|
|
220
|
+
extraPPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
|
|
161
221
|
if (paraProps.jc)
|
|
162
|
-
|
|
222
|
+
extraPPrParts.push(`<w:jc w:val="${paraProps.jc}"/>`);
|
|
163
223
|
if (paraProps.ind) {
|
|
164
224
|
const attrs = [];
|
|
165
225
|
if (paraProps.ind.left)
|
|
@@ -167,14 +227,14 @@ function convertDiv(node, ctx, listIlvl) {
|
|
|
167
227
|
if (paraProps.ind.right)
|
|
168
228
|
attrs.push(`w:right="${paraProps.ind.right}"`);
|
|
169
229
|
if (attrs.length)
|
|
170
|
-
|
|
230
|
+
extraPPrParts.push(`<w:ind ${attrs.join(" ")}/>`);
|
|
171
231
|
}
|
|
172
232
|
// Create a temporary modified context that includes the container's styles
|
|
173
233
|
// so child paragraphs inherit them
|
|
174
234
|
const childCtx = {
|
|
175
235
|
...ctx,
|
|
176
236
|
_containerRPr: containerRPr,
|
|
177
|
-
|
|
237
|
+
_containerPPrParts: extraPPrParts,
|
|
178
238
|
};
|
|
179
239
|
return node.children.map(c => convertBlockWithContainer(c, childCtx, listIlvl)).join("\n");
|
|
180
240
|
}
|
|
@@ -182,17 +242,16 @@ function convertDiv(node, ctx, listIlvl) {
|
|
|
182
242
|
function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
183
243
|
const containerCtx = ctx;
|
|
184
244
|
const extraRPr = containerCtx._containerRPr ?? "";
|
|
185
|
-
const
|
|
245
|
+
const extraPPrParts = containerCtx._containerPPrParts ?? [];
|
|
186
246
|
// Helper: resolve all CSS run props for this node (element + own classes)
|
|
187
247
|
const nodeRPr = resolveBlockCssRunProps(node, ctx);
|
|
188
248
|
const mergedRPr = extraRPr + nodeRPr;
|
|
189
|
-
// Helper:
|
|
249
|
+
// Helper: merge container pPr parts into an existing pPr string with schema ordering.
|
|
190
250
|
function injectPPr(pPr) {
|
|
191
|
-
if (
|
|
251
|
+
if (extraPPrParts.length === 0)
|
|
192
252
|
return pPr;
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
return `<w:pPr>${extraPPr}</w:pPr>`;
|
|
253
|
+
const baseChildren = pPr ? extractPPrChildren(pPr) : [];
|
|
254
|
+
return serializePPr([...baseChildren, ...extraPPrParts]);
|
|
196
255
|
}
|
|
197
256
|
if (node.type === "paragraph") {
|
|
198
257
|
const para = node;
|
|
@@ -218,11 +277,12 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
218
277
|
const classes = h.classes;
|
|
219
278
|
if (classes?.length) {
|
|
220
279
|
for (const cls of classes) {
|
|
221
|
-
|
|
222
|
-
|
|
280
|
+
const clsId = sanitizeStyleId(cls);
|
|
281
|
+
if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
|
|
282
|
+
ctx.generatedStyleIds.add(clsId);
|
|
223
283
|
const rp = cssToRunProps(ctx.cssClasses[cls]);
|
|
224
284
|
const pp = cssToParaProps(ctx.cssClasses[cls]);
|
|
225
|
-
ctx.customStyles.push(buildStyleElement(
|
|
285
|
+
ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", rp, pp));
|
|
226
286
|
}
|
|
227
287
|
}
|
|
228
288
|
}
|
|
@@ -232,7 +292,7 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
232
292
|
if (node.type === "list") {
|
|
233
293
|
// Pass container formatting down to list items
|
|
234
294
|
const list = node;
|
|
235
|
-
const numId = list.ordered
|
|
295
|
+
const numId = allocNumId(ctx, list.ordered);
|
|
236
296
|
const parts = [];
|
|
237
297
|
for (const item of list.children) {
|
|
238
298
|
if (item.type !== "listItem")
|
|
@@ -258,7 +318,8 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
258
318
|
parts.push(`<w:p>${pPr}${runs}</w:p>`);
|
|
259
319
|
}
|
|
260
320
|
else if (child.type === "list") {
|
|
261
|
-
|
|
321
|
+
// Inherit parent numId so nested levels share the same w:num entry.
|
|
322
|
+
parts.push(convertList(child, ctx, listIlvl + 1, numId));
|
|
262
323
|
}
|
|
263
324
|
else {
|
|
264
325
|
// div, blockquote, codeBlock, etc. — recurse with container inheritance
|
|
@@ -271,7 +332,7 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
|
|
|
271
332
|
// For other children, use normal conversion with container props cleared
|
|
272
333
|
const cleanCtx = { ...ctx };
|
|
273
334
|
delete cleanCtx._containerRPr;
|
|
274
|
-
delete cleanCtx.
|
|
335
|
+
delete cleanCtx._containerPPrParts;
|
|
275
336
|
return convertBlock(node, cleanCtx, listIlvl);
|
|
276
337
|
}
|
|
277
338
|
function convertParagraph(node, ctx) {
|
|
@@ -288,36 +349,82 @@ function convertParagraph(node, ctx) {
|
|
|
288
349
|
}
|
|
289
350
|
function convertHeading(node, ctx) {
|
|
290
351
|
const tag = `h${node.depth}`;
|
|
291
|
-
|
|
352
|
+
// Merge element CSS with class CSS (class wins per CSS cascade). The
|
|
353
|
+
// list-item branch in convertBlockWithContainer already does this for
|
|
354
|
+
// list items; this is the heading equivalent, which was missing.
|
|
355
|
+
const classes = node.classes ?? [];
|
|
356
|
+
let mergedDecls = {};
|
|
357
|
+
if (ctx.cssElements[tag])
|
|
358
|
+
mergedDecls = { ...mergedDecls, ...ctx.cssElements[tag] };
|
|
359
|
+
for (const cls of classes) {
|
|
360
|
+
if (ctx.cssClasses[cls])
|
|
361
|
+
mergedDecls = { ...mergedDecls, ...ctx.cssClasses[cls] };
|
|
362
|
+
}
|
|
363
|
+
const hasCss = Object.keys(mergedDecls).length > 0;
|
|
364
|
+
// Run properties (color, font, size, strike, etc.) flow to every <w:r>.
|
|
365
|
+
const cssRPr = hasCss ? buildRunPropsXml(cssToRunProps(mergedDecls)) : "";
|
|
366
|
+
// Paragraph-level properties become direct formatting layered on top of
|
|
367
|
+
// the base Heading{N} style. This matches how browsers render a heading
|
|
368
|
+
// with a class: the <h2> default styling plus the class overrides.
|
|
369
|
+
const paraPropsFromCss = hasCss ? cssToParaProps(mergedDecls) : {};
|
|
370
|
+
// Build pPr children. Always start with a pStyle so Word still recognises
|
|
371
|
+
// this as a heading on re-import.
|
|
372
|
+
const pPrParts = [];
|
|
373
|
+
if (ctx.cssElements[tag]) {
|
|
374
|
+
ensureElementStyle(tag, ctx);
|
|
375
|
+
pPrParts.push(`<w:pStyle w:val="_el_${tag}"/>`);
|
|
376
|
+
}
|
|
377
|
+
else {
|
|
378
|
+
pPrParts.push(`<w:pStyle w:val="Heading${node.depth}"/>`);
|
|
379
|
+
}
|
|
380
|
+
const pBdrXml = buildPBdrXml(paraPropsFromCss.pBdr);
|
|
381
|
+
if (pBdrXml)
|
|
382
|
+
pPrParts.push(pBdrXml);
|
|
383
|
+
if (paraPropsFromCss.shd) {
|
|
384
|
+
pPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${paraPropsFromCss.shd}"/>`);
|
|
385
|
+
}
|
|
386
|
+
if (paraPropsFromCss.jc)
|
|
387
|
+
pPrParts.push(`<w:jc w:val="${paraPropsFromCss.jc}"/>`);
|
|
388
|
+
if (paraPropsFromCss.spacing) {
|
|
389
|
+
const attrs = [];
|
|
390
|
+
if (paraPropsFromCss.spacing.before !== undefined)
|
|
391
|
+
attrs.push(`w:before="${paraPropsFromCss.spacing.before}"`);
|
|
392
|
+
if (paraPropsFromCss.spacing.after !== undefined)
|
|
393
|
+
attrs.push(`w:after="${paraPropsFromCss.spacing.after}"`);
|
|
394
|
+
if (paraPropsFromCss.spacing.line !== undefined)
|
|
395
|
+
attrs.push(`w:line="${paraPropsFromCss.spacing.line}"`);
|
|
396
|
+
if (attrs.length)
|
|
397
|
+
pPrParts.push(`<w:spacing ${attrs.join(" ")}/>`);
|
|
398
|
+
}
|
|
399
|
+
if (paraPropsFromCss.ind) {
|
|
400
|
+
const attrs = [];
|
|
401
|
+
if (paraPropsFromCss.ind.left !== undefined)
|
|
402
|
+
attrs.push(`w:left="${paraPropsFromCss.ind.left}"`);
|
|
403
|
+
if (paraPropsFromCss.ind.right !== undefined)
|
|
404
|
+
attrs.push(`w:right="${paraPropsFromCss.ind.right}"`);
|
|
405
|
+
if (attrs.length)
|
|
406
|
+
pPrParts.push(`<w:ind ${attrs.join(" ")}/>`);
|
|
407
|
+
}
|
|
408
|
+
const pPr = serializePPr(pPrParts);
|
|
409
|
+
// Diff-aware path: inserted-paragraph wrapping for track-changes output.
|
|
292
410
|
if (ctx.diff && ctx.nodePathKeys) {
|
|
293
411
|
const key = ctx.nodePathKeys.get(node);
|
|
294
412
|
if (key !== undefined && ctx.diff.insertedPaths.has(key)) {
|
|
295
|
-
if (ctx.cssElements[tag])
|
|
296
|
-
ensureElementStyle(tag, ctx);
|
|
297
|
-
const pPr = ctx.cssElements[tag]
|
|
298
|
-
? `<w:pPr><w:pStyle w:val="_el_${tag}"/></w:pPr>`
|
|
299
|
-
: `<w:pPr><w:pStyle w:val="Heading${node.depth}"/></w:pPr>`;
|
|
300
413
|
return convertInsertedParagraph(node, pPr, cssRPr, ctx);
|
|
301
414
|
}
|
|
302
415
|
}
|
|
303
416
|
const runs = node.children.map(c => convertInline(c, ctx, cssRPr)).join("");
|
|
304
|
-
|
|
305
|
-
ensureElementStyle(tag, ctx);
|
|
306
|
-
return `<w:p><w:pPr><w:pStyle w:val="_el_${tag}"/></w:pPr>${runs}</w:p>`;
|
|
307
|
-
}
|
|
308
|
-
return `<w:p><w:pPr><w:pStyle w:val="Heading${node.depth}"/></w:pPr>${runs}</w:p>`;
|
|
417
|
+
return `<w:p>${pPr}${runs}</w:p>`;
|
|
309
418
|
}
|
|
310
419
|
/** Render a paragraph (or heading cast as paragraph) with all runs wrapped in w:ins. */
|
|
311
420
|
function convertInsertedParagraph(node, pPr, cssRPr, ctx) {
|
|
312
421
|
const diff = ctx.diff;
|
|
313
422
|
const author = esc(diff.author);
|
|
314
|
-
const date = diff.date;
|
|
315
|
-
// Track the paragraph mark itself as inserted (inject into pPr)
|
|
423
|
+
const date = esc(diff.date);
|
|
424
|
+
// Track the paragraph mark itself as inserted (inject into pPr's rPr).
|
|
316
425
|
const markId = diff.revId.value++;
|
|
317
|
-
const
|
|
318
|
-
const trackedPPr = pPr
|
|
319
|
-
? pPr.replace("</w:pPr>", `${insMarkXml}</w:pPr>`)
|
|
320
|
-
: `<w:pPr>${insMarkXml}</w:pPr>`;
|
|
426
|
+
const insMark = `<w:ins w:id="${markId}" w:author="${author}" w:date="${date}"/>`;
|
|
427
|
+
const trackedPPr = mergeParaMarkChild(pPr, insMark);
|
|
321
428
|
// Wrap each inline child's run in its own w:ins
|
|
322
429
|
const runs = node.children.map(c => {
|
|
323
430
|
const runXml = convertInline(c, ctx, cssRPr);
|
|
@@ -346,8 +453,163 @@ function resolveBlockCssRunProps(node, ctx) {
|
|
|
346
453
|
return "";
|
|
347
454
|
return buildRunPropsXml(cssToRunProps(merged));
|
|
348
455
|
}
|
|
349
|
-
|
|
456
|
+
// ---------------------------------------------------------------------------
|
|
457
|
+
// ECMA-376 schema-order maps for w:rPr and w:pPr children
|
|
458
|
+
// (validators reject out-of-order elements in strict mode)
|
|
459
|
+
// ---------------------------------------------------------------------------
|
|
460
|
+
const RPR_ORDER = {
|
|
461
|
+
"w:rStyle": 1, "w:rFonts": 2, "w:b": 3, "w:bCs": 4, "w:i": 5, "w:iCs": 6,
|
|
462
|
+
"w:caps": 7, "w:smallCaps": 8, "w:strike": 9, "w:dstrike": 10,
|
|
463
|
+
"w:outline": 11, "w:shadow": 12, "w:emboss": 13, "w:imprint": 14,
|
|
464
|
+
"w:noProof": 15, "w:snapToGrid": 16, "w:vanish": 17, "w:webHidden": 18,
|
|
465
|
+
"w:color": 19, "w:spacing": 20, "w:w": 21, "w:kern": 22, "w:position": 23,
|
|
466
|
+
"w:sz": 24, "w:szCs": 25, "w:highlight": 26, "w:u": 27, "w:effect": 28,
|
|
467
|
+
"w:bdr": 29, "w:shd": 30, "w:fitText": 31, "w:vertAlign": 32, "w:rtl": 33,
|
|
468
|
+
"w:cs": 34, "w:em": 35, "w:lang": 36, "w:eastAsianLayout": 37,
|
|
469
|
+
"w:specVanish": 38, "w:oMath": 39,
|
|
470
|
+
};
|
|
471
|
+
const PPR_ORDER = {
|
|
472
|
+
"w:pStyle": 1, "w:keepNext": 2, "w:keepLines": 3, "w:pageBreakBefore": 4,
|
|
473
|
+
"w:framePr": 5, "w:widowControl": 6, "w:numPr": 7, "w:suppressLineNumbers": 8,
|
|
474
|
+
"w:pBdr": 9, "w:shd": 10, "w:tabs": 11, "w:suppressAutoHyphens": 12,
|
|
475
|
+
"w:kinsoku": 13, "w:wordWrap": 14, "w:overflowPunct": 15, "w:topLinePunct": 16,
|
|
476
|
+
"w:autoSpaceDE": 17, "w:autoSpaceDN": 18, "w:bidi": 19, "w:adjustRightInd": 20,
|
|
477
|
+
"w:snapToGrid": 21, "w:spacing": 22, "w:ind": 23, "w:contextualSpacing": 24,
|
|
478
|
+
"w:mirrorIndents": 25, "w:suppressOverlap": 26, "w:jc": 27, "w:textDirection": 28,
|
|
479
|
+
"w:textAlignment": 29, "w:textboxTightWrap": 30, "w:outlineLvl": 31,
|
|
480
|
+
"w:divId": 32, "w:cnfStyle": 33, "w:rPr": 34, "w:sectPr": 35, "w:pPrChange": 36,
|
|
481
|
+
};
|
|
482
|
+
function getElementTag(xml) {
|
|
483
|
+
const m = xml.match(/^<(w:[A-Za-z]+)/);
|
|
484
|
+
return m ? m[1] : "";
|
|
485
|
+
}
|
|
486
|
+
/** Tokenize the children of an OOXML container — handles both self-closing and parent elements. */
|
|
487
|
+
function tokenizeOoxmlChildren(xml) {
|
|
488
|
+
const tokens = [];
|
|
489
|
+
let i = 0;
|
|
490
|
+
while (i < xml.length) {
|
|
491
|
+
if (xml[i] !== "<") {
|
|
492
|
+
i++;
|
|
493
|
+
continue;
|
|
494
|
+
}
|
|
495
|
+
const nameMatch = xml.slice(i).match(/^<(w:[A-Za-z]+)/);
|
|
496
|
+
if (!nameMatch) {
|
|
497
|
+
i++;
|
|
498
|
+
continue;
|
|
499
|
+
}
|
|
500
|
+
const name = nameMatch[1];
|
|
501
|
+
// Find end of opening tag (skipping over quoted attribute values)
|
|
502
|
+
let j = i + 1;
|
|
503
|
+
let inQuote = false;
|
|
504
|
+
while (j < xml.length) {
|
|
505
|
+
const ch = xml[j];
|
|
506
|
+
if (ch === '"')
|
|
507
|
+
inQuote = !inQuote;
|
|
508
|
+
if (!inQuote && ch === ">")
|
|
509
|
+
break;
|
|
510
|
+
j++;
|
|
511
|
+
}
|
|
512
|
+
if (j >= xml.length)
|
|
513
|
+
break;
|
|
514
|
+
if (xml[j - 1] === "/") {
|
|
515
|
+
tokens.push(xml.slice(i, j + 1));
|
|
516
|
+
i = j + 1;
|
|
517
|
+
}
|
|
518
|
+
else {
|
|
519
|
+
const closeTag = `</${name}>`;
|
|
520
|
+
let depth = 1;
|
|
521
|
+
let k = j + 1;
|
|
522
|
+
while (k < xml.length && depth > 0) {
|
|
523
|
+
if (xml.startsWith(`<${name}`, k)) {
|
|
524
|
+
const after = xml[k + name.length + 1];
|
|
525
|
+
if (after === " " || after === ">" || after === "/")
|
|
526
|
+
depth++;
|
|
527
|
+
}
|
|
528
|
+
if (xml.startsWith(closeTag, k)) {
|
|
529
|
+
depth--;
|
|
530
|
+
if (depth === 0) {
|
|
531
|
+
k += closeTag.length;
|
|
532
|
+
break;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
k++;
|
|
536
|
+
}
|
|
537
|
+
tokens.push(xml.slice(i, k));
|
|
538
|
+
i = k;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
return tokens;
|
|
542
|
+
}
|
|
543
|
+
/** Stable-sort + tag-dedup a list of rPr/pPr child element strings by ECMA-376 order. */
|
|
544
|
+
function orderElements(elements, order) {
|
|
545
|
+
const byTag = new Map();
|
|
546
|
+
for (const el of elements) {
|
|
547
|
+
const tag = getElementTag(el);
|
|
548
|
+
if (tag)
|
|
549
|
+
byTag.set(tag, el); // last write wins (later sources override earlier)
|
|
550
|
+
}
|
|
551
|
+
return [...byTag.entries()]
|
|
552
|
+
.sort(([a], [b]) => (order[a] ?? 999) - (order[b] ?? 999))
|
|
553
|
+
.map(([, el]) => el);
|
|
554
|
+
}
|
|
555
|
+
/** Wrap rPr children in <w:rPr> with schema-correct ordering. Returns "" if empty. */
|
|
556
|
+
function serializeRPr(parts) {
|
|
557
|
+
const ordered = orderElements(parts, RPR_ORDER);
|
|
558
|
+
return ordered.length > 0 ? `<w:rPr>${ordered.join("")}</w:rPr>` : "";
|
|
559
|
+
}
|
|
560
|
+
/** Wrap pPr children in <w:pPr> with schema-correct ordering. Returns "" if empty. */
|
|
561
|
+
function serializePPr(parts) {
|
|
562
|
+
const ordered = orderElements(parts, PPR_ORDER);
|
|
563
|
+
return ordered.length > 0 ? `<w:pPr>${ordered.join("")}</w:pPr>` : "";
|
|
564
|
+
}
|
|
565
|
+
/** Extract the children of an existing <w:pPr>...</w:pPr> wrapper as raw element strings. */
|
|
566
|
+
function extractPPrChildren(pPr) {
|
|
567
|
+
const m = pPr.match(/<w:pPr>([\s\S]*?)<\/w:pPr>/);
|
|
568
|
+
return m ? tokenizeOoxmlChildren(m[1]) : [];
|
|
569
|
+
}
|
|
570
|
+
/**
|
|
571
|
+
* Merge a paragraph-mark child (w:ins/w:del/w:rPrChange/etc.) into the
|
|
572
|
+
* paragraph's pPr/rPr, creating either or both wrappers as needed and
|
|
573
|
+
* preserving any existing rPr children. Avoids the duplicate-`<w:rPr>`
|
|
574
|
+
* trap that string-replace produces.
|
|
575
|
+
*/
|
|
576
|
+
function mergeParaMarkChild(pPr, child) {
|
|
577
|
+
const baseChildren = pPr ? extractPPrChildren(pPr) : [];
|
|
578
|
+
// Find an existing inner <w:rPr>…</w:rPr> (paragraph mark properties).
|
|
579
|
+
let foundRPr = false;
|
|
580
|
+
const merged = baseChildren.map(el => {
|
|
581
|
+
if (el.startsWith("<w:rPr>") || el.startsWith("<w:rPr ")) {
|
|
582
|
+
foundRPr = true;
|
|
583
|
+
// Inject the child at the start (w:ins/w:del are tracked-change marks
|
|
584
|
+
// that appear first inside CT_ParaRPr per ECMA-376).
|
|
585
|
+
return el.replace(/^<w:rPr(\s[^>]*)?>/, m => `${m}${child}`);
|
|
586
|
+
}
|
|
587
|
+
return el;
|
|
588
|
+
});
|
|
589
|
+
if (!foundRPr)
|
|
590
|
+
merged.push(`<w:rPr>${child}</w:rPr>`);
|
|
591
|
+
return serializePPr(merged);
|
|
592
|
+
}
|
|
593
|
+
/** Extract the children of an existing <w:rPr>...</w:rPr> wrapper as raw element strings. */
|
|
594
|
+
function extractRPrChildren(rPr) {
|
|
595
|
+
const m = rPr.match(/<w:rPr>([\s\S]*?)<\/w:rPr>/);
|
|
596
|
+
return m ? tokenizeOoxmlChildren(m[1]) : [];
|
|
597
|
+
}
|
|
598
|
+
/** Build a `<w:pBdr>` element from a parsed pBdr definition. Returns "" if no sides set. */
|
|
599
|
+
function buildPBdrXml(pBdr) {
|
|
600
|
+
if (!pBdr)
|
|
601
|
+
return "";
|
|
602
|
+
const sides = [];
|
|
603
|
+
for (const side of ["top", "bottom", "left", "right"]) {
|
|
604
|
+
const bd = pBdr[side];
|
|
605
|
+
if (bd)
|
|
606
|
+
sides.push(`<w:${side} w:val="${bd.val}" w:sz="${bd.sz}" w:space="0" w:color="${bd.color}"/>`);
|
|
607
|
+
}
|
|
608
|
+
return sides.length > 0 ? `<w:pBdr>${sides.join("")}</w:pBdr>` : "";
|
|
609
|
+
}
|
|
610
|
+
/** Build w:rPr child elements from OoxmlRunProps (without the wrapper, in schema order). */
|
|
350
611
|
function buildRunPropsXml(props) {
|
|
612
|
+
// Order per ECMA-376 CT_RPr: rFonts(2), b(3), i(5), strike(9), color(19), sz(24), u(27), shd(30)
|
|
351
613
|
const parts = [];
|
|
352
614
|
if (props.rFonts) {
|
|
353
615
|
const attrs = [];
|
|
@@ -361,14 +623,14 @@ function buildRunPropsXml(props) {
|
|
|
361
623
|
parts.push("<w:b/>");
|
|
362
624
|
if (props.i)
|
|
363
625
|
parts.push("<w:i/>");
|
|
364
|
-
if (props.
|
|
365
|
-
parts.push(
|
|
626
|
+
if (props.strike)
|
|
627
|
+
parts.push("<w:strike/>");
|
|
366
628
|
if (props.color)
|
|
367
629
|
parts.push(`<w:color w:val="${props.color}"/>`);
|
|
630
|
+
if (props.sz)
|
|
631
|
+
parts.push(`<w:sz w:val="${props.sz}"/>`);
|
|
368
632
|
if (props.u)
|
|
369
633
|
parts.push(`<w:u w:val="${props.u}"/>`);
|
|
370
|
-
if (props.strike)
|
|
371
|
-
parts.push("<w:strike/>");
|
|
372
634
|
if (props.shd)
|
|
373
635
|
parts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${props.shd}"/>`);
|
|
374
636
|
return parts.join("");
|
|
@@ -386,6 +648,7 @@ function convertBlockquote(node, ctx) {
|
|
|
386
648
|
}).join("\n");
|
|
387
649
|
}
|
|
388
650
|
function buildBlockquotePPr(css) {
|
|
651
|
+
// Collect in any order — serializePPr applies ECMA-376 ordering.
|
|
389
652
|
const parts = [];
|
|
390
653
|
// Left border (vertical line) — always add for blockquotes
|
|
391
654
|
parts.push(`<w:pBdr><w:left w:val="single" w:sz="18" w:space="4" w:color="${css["border-color"] ? parseColorSafe(css["border-color"]) : "AAAAAA"}"/></w:pBdr>`);
|
|
@@ -409,10 +672,13 @@ function buildBlockquotePPr(css) {
|
|
|
409
672
|
if (attrs.length)
|
|
410
673
|
parts.push(`<w:spacing ${attrs.join(" ")}/>`);
|
|
411
674
|
}
|
|
412
|
-
return
|
|
675
|
+
return serializePPr(parts);
|
|
413
676
|
}
|
|
414
|
-
function convertList(node, ctx, baseIlvl) {
|
|
415
|
-
|
|
677
|
+
function convertList(node, ctx, baseIlvl, parentNumId) {
|
|
678
|
+
// Top-level lists get a fresh numId; nested lists inherit their parent's
|
|
679
|
+
// numId so the abstract numbering definition (bullet vs ordered) and
|
|
680
|
+
// counter continuity stay consistent across levels.
|
|
681
|
+
const numId = parentNumId ?? allocNumId(ctx, node.ordered);
|
|
416
682
|
const liRPr = ctx.cssElements["li"] ? buildRunPropsXml(cssToRunProps(ctx.cssElements["li"])) : "";
|
|
417
683
|
const parts = [];
|
|
418
684
|
for (const item of node.children) {
|
|
@@ -425,7 +691,7 @@ function convertList(node, ctx, baseIlvl) {
|
|
|
425
691
|
parts.push(`<w:p><w:pPr><w:numPr><w:ilvl w:val="${baseIlvl}"/><w:numId w:val="${numId}"/></w:numPr></w:pPr>${runs}</w:p>`);
|
|
426
692
|
}
|
|
427
693
|
else if (child.type === "list") {
|
|
428
|
-
parts.push(convertList(child, ctx, baseIlvl + 1));
|
|
694
|
+
parts.push(convertList(child, ctx, baseIlvl + 1, numId));
|
|
429
695
|
}
|
|
430
696
|
else {
|
|
431
697
|
parts.push(convertBlock(child, ctx, baseIlvl));
|
|
@@ -438,36 +704,62 @@ function convertCodeBlock(node, ctx) {
|
|
|
438
704
|
const preCss = ctx?.cssElements["pre"] ?? {};
|
|
439
705
|
const bg = preCss["background-color"] ? parseColorSafe(preCss["background-color"]) : undefined;
|
|
440
706
|
const preRPr = Object.keys(preCss).length > 0 ? buildRunPropsXml(cssToRunProps(preCss)) : "";
|
|
441
|
-
//
|
|
442
|
-
|
|
707
|
+
// pPr: pStyle(1) before shd(10) — already in correct order
|
|
708
|
+
const pPrParts = [`<w:pStyle w:val="Code"/>`];
|
|
443
709
|
if (bg)
|
|
444
|
-
|
|
445
|
-
pPr
|
|
710
|
+
pPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
|
|
711
|
+
const pPr = serializePPr(pPrParts);
|
|
712
|
+
// rPr: code rFonts + any pre-element CSS (rFonts override is intentional, last write wins)
|
|
713
|
+
const rPrParts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
|
|
714
|
+
if (preRPr)
|
|
715
|
+
rPrParts.push(...tokenizeOoxmlChildren(preRPr));
|
|
716
|
+
const rPr = serializeRPr(rPrParts);
|
|
446
717
|
const lines = node.value.split("\n");
|
|
447
|
-
return lines.map(line => `<w:p>${pPr}<w:r
|
|
718
|
+
return lines.map(line => `<w:p>${pPr}<w:r>${rPr}<w:t xml:space="preserve">${esc(line)}</w:t></w:r></w:p>`).join("\n");
|
|
719
|
+
}
|
|
720
|
+
function countTableColumns(node) {
|
|
721
|
+
let max = 0;
|
|
722
|
+
for (const row of node.children) {
|
|
723
|
+
if (row.type !== "tableRow")
|
|
724
|
+
continue;
|
|
725
|
+
const count = row.children.reduce((n, cell) => n + (cell.colspan ?? 1), 0);
|
|
726
|
+
if (count > max)
|
|
727
|
+
max = count;
|
|
728
|
+
}
|
|
729
|
+
return Math.max(max, 1);
|
|
448
730
|
}
|
|
449
731
|
function convertTable(node, ctx) {
|
|
732
|
+
const colCount = countTableColumns(node);
|
|
733
|
+
// Content width: 9360 twips (12240 page − 2×1440 margins)
|
|
734
|
+
const colWidth = Math.round(9360 / colCount);
|
|
735
|
+
const tblGrid = `<w:tblGrid>${Array.from({ length: colCount }, () => `<w:gridCol w:w="${colWidth}"/>`).join("")}</w:tblGrid>`;
|
|
450
736
|
const rows = node.children.map(row => {
|
|
451
737
|
if (row.type !== "tableRow")
|
|
452
738
|
return "";
|
|
453
739
|
const tr = row;
|
|
740
|
+
// ISO 29500 requires every w:tr to contain at least one w:tc.
|
|
741
|
+
// Skip rows with no tableCell children entirely (e.g. an empty <thead><tr/></thead>).
|
|
742
|
+
const cellNodes = tr.children.filter(c => c.type === "tableCell");
|
|
743
|
+
if (cellNodes.length === 0)
|
|
744
|
+
return "";
|
|
454
745
|
const trPr = tr.isHeader ? "<w:trPr><w:tblHeader/></w:trPr>" : "";
|
|
455
|
-
const cells =
|
|
456
|
-
if (cell.type !== "tableCell")
|
|
457
|
-
return "";
|
|
746
|
+
const cells = cellNodes.map(cell => {
|
|
458
747
|
const tc = cell;
|
|
748
|
+
const span = tc.colspan ?? 1;
|
|
459
749
|
let tcPr = "<w:tcPr>";
|
|
460
|
-
|
|
461
|
-
|
|
750
|
+
tcPr += `<w:tcW w:w="${colWidth * span}" w:type="dxa"/>`;
|
|
751
|
+
if (span > 1)
|
|
752
|
+
tcPr += `<w:gridSpan w:val="${span}"/>`;
|
|
462
753
|
tcPr += "</w:tcPr>";
|
|
754
|
+
const emptyPara = { type: "paragraph", children: [] };
|
|
463
755
|
const content = tc.children.length > 0
|
|
464
756
|
? tc.children.map(c => convertBlock(c, ctx, 0)).join("")
|
|
465
|
-
:
|
|
757
|
+
: convertParagraph(emptyPara, ctx);
|
|
466
758
|
return `<w:tc>${tcPr}${content}</w:tc>`;
|
|
467
759
|
}).join("");
|
|
468
760
|
return `<w:tr>${trPr}${cells}</w:tr>`;
|
|
469
|
-
}).join("\n");
|
|
470
|
-
return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${rows}</w:tbl>`;
|
|
761
|
+
}).filter(Boolean).join("\n");
|
|
762
|
+
return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${tblGrid}${rows}</w:tbl>`;
|
|
471
763
|
}
|
|
472
764
|
// ---------------------------------------------------------------------------
|
|
473
765
|
// Inline conversion
|
|
@@ -478,11 +770,14 @@ function convertInline(node, ctx, inheritedRPr = "") {
|
|
|
478
770
|
if (node.type === "break")
|
|
479
771
|
return `<w:r><w:br/></w:r>`;
|
|
480
772
|
if (node.type === "inlineCode") {
|
|
481
|
-
|
|
773
|
+
const parts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
|
|
774
|
+
if (inheritedRPr)
|
|
775
|
+
parts.push(...tokenizeOoxmlChildren(inheritedRPr));
|
|
776
|
+
return `<w:r>${serializeRPr(parts)}<w:t xml:space="preserve">${esc(node.value)}</w:t></w:r>`;
|
|
482
777
|
}
|
|
483
778
|
if (node.type === "image") {
|
|
484
779
|
const img = node;
|
|
485
|
-
return `<w:r><w:t>[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
|
|
780
|
+
return `<w:r><w:t xml:space="preserve">[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
|
|
486
781
|
}
|
|
487
782
|
return "";
|
|
488
783
|
}
|
|
@@ -491,11 +786,8 @@ function convertText(node, ctx, inheritedRPr = "") {
|
|
|
491
786
|
const otherMarks = node.marks?.filter(m => m.type !== "link") ?? [];
|
|
492
787
|
// Build rPr: inherited CSS props first, then mark-specific overrides
|
|
493
788
|
const parts = [];
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
const elems = inheritedRPr.match(/<w:[^/]*\/>/g) ?? [];
|
|
497
|
-
parts.push(...elems);
|
|
498
|
-
}
|
|
789
|
+
if (inheritedRPr)
|
|
790
|
+
parts.push(...tokenizeOoxmlChildren(inheritedRPr));
|
|
499
791
|
for (const mark of otherMarks) {
|
|
500
792
|
switch (mark.type) {
|
|
501
793
|
case "strong":
|
|
@@ -521,16 +813,8 @@ function convertText(node, ctx, inheritedRPr = "") {
|
|
|
521
813
|
break;
|
|
522
814
|
}
|
|
523
815
|
}
|
|
524
|
-
//
|
|
525
|
-
const
|
|
526
|
-
const dedupedParts = [];
|
|
527
|
-
for (const p of parts) {
|
|
528
|
-
if (!seen.has(p)) {
|
|
529
|
-
seen.add(p);
|
|
530
|
-
dedupedParts.push(p);
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
const rPr = dedupedParts.length > 0 ? `<w:rPr>${dedupedParts.join("")}</w:rPr>` : "";
|
|
816
|
+
// Schema-order + tag-dedup (later sources override earlier).
|
|
817
|
+
const rPr = serializeRPr(parts);
|
|
534
818
|
// Track-changes: check diff context (skip for linked text to avoid nesting complexity)
|
|
535
819
|
if (ctx.diff && ctx.nodePathKeys && !linkMark) {
|
|
536
820
|
const key = ctx.nodePathKeys.get(node);
|
|
@@ -575,21 +859,24 @@ function convertText(node, ctx, inheritedRPr = "") {
|
|
|
575
859
|
// ---------------------------------------------------------------------------
|
|
576
860
|
// Paragraph property builder
|
|
577
861
|
// ---------------------------------------------------------------------------
|
|
578
|
-
function
|
|
862
|
+
function buildPPrParts(node, ctx) {
|
|
863
|
+
// Collect parts in any order — serializePPr applies ECMA-376 ordering at the end.
|
|
579
864
|
const parts = [];
|
|
580
865
|
// Classes → pStyle + generate CSS-based Word styles
|
|
581
866
|
const classes = node.classes;
|
|
582
867
|
if (classes && classes.length > 0) {
|
|
583
|
-
|
|
584
|
-
|
|
868
|
+
// Style IDs must be valid OOXML identifiers — sanitize the CSS class name.
|
|
869
|
+
const styleId = sanitizeStyleId(classes[0]);
|
|
870
|
+
parts.push(`<w:pStyle w:val="${styleId}"/>`);
|
|
585
871
|
// Generate custom style from CSS rules (once per styleId)
|
|
586
872
|
for (const cls of classes) {
|
|
587
|
-
|
|
588
|
-
|
|
873
|
+
const clsId = sanitizeStyleId(cls);
|
|
874
|
+
if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
|
|
875
|
+
ctx.generatedStyleIds.add(clsId);
|
|
589
876
|
const cssDecls = ctx.cssClasses[cls];
|
|
590
877
|
const runProps = cssToRunProps(cssDecls);
|
|
591
878
|
const paraProps = cssToParaProps(cssDecls);
|
|
592
|
-
ctx.customStyles.push(buildStyleElement(
|
|
879
|
+
ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", runProps, paraProps));
|
|
593
880
|
}
|
|
594
881
|
}
|
|
595
882
|
}
|
|
@@ -616,13 +903,14 @@ function buildPPr(node, ctx) {
|
|
|
616
903
|
if (elTag) {
|
|
617
904
|
ensureElementStyle(elTag, ctx);
|
|
618
905
|
// If no class style was set, use the element style
|
|
619
|
-
if (!classes || classes.length === 0) {
|
|
620
|
-
parts.
|
|
906
|
+
if ((!classes || classes.length === 0) && ctx.cssElements[elTag]) {
|
|
907
|
+
parts.push(`<w:pStyle w:val="_el_${esc(elTag)}"/>`);
|
|
621
908
|
}
|
|
622
|
-
// If class style was set, element CSS is already baked into the base —
|
|
623
|
-
// we could chain basedOn, but for simplicity element styles are standalone
|
|
624
909
|
}
|
|
625
|
-
return parts
|
|
910
|
+
return parts;
|
|
911
|
+
}
|
|
912
|
+
function buildPPr(node, ctx) {
|
|
913
|
+
return serializePPr(buildPPrParts(node, ctx));
|
|
626
914
|
}
|
|
627
915
|
/** Map UDM type to the HTML tag name for CSS element selector lookup */
|
|
628
916
|
function udmTypeToHtmlTag(type, node) {
|
|
@@ -707,7 +995,7 @@ function convertDeletedBlock(op, ctx) {
|
|
|
707
995
|
const node = op.node;
|
|
708
996
|
const diff = ctx.diff;
|
|
709
997
|
const author = esc(diff.author);
|
|
710
|
-
const date = diff.date;
|
|
998
|
+
const date = esc(diff.date);
|
|
711
999
|
const textContent = extractTextContent(node);
|
|
712
1000
|
if (!textContent)
|
|
713
1001
|
return "";
|
|
@@ -719,13 +1007,35 @@ function convertDeletedBlock(op, ctx) {
|
|
|
719
1007
|
const depth = node.depth;
|
|
720
1008
|
pStyle = `<w:pStyle w:val="Heading${depth}"/>`;
|
|
721
1009
|
}
|
|
722
|
-
const
|
|
723
|
-
const
|
|
1010
|
+
const delMark = `<w:del w:id="${delMarkId}" w:author="${author}" w:date="${date}"/>`;
|
|
1011
|
+
const basePPr = pStyle ? `<w:pPr>${pStyle}</w:pPr>` : "";
|
|
1012
|
+
const pPr = mergeParaMarkChild(basePPr, delMark);
|
|
724
1013
|
const delContent = `<w:del w:id="${delContentId}" w:author="${author}" w:date="${date}"><w:r><w:delText xml:space="preserve">${esc(textContent)}</w:delText></w:r></w:del>`;
|
|
725
1014
|
return `<w:p>${pPr}${delContent}</w:p>`;
|
|
726
1015
|
}
|
|
727
1016
|
function esc(s) {
|
|
728
|
-
return s
|
|
1017
|
+
return sanitizeText(s)
|
|
1018
|
+
.replace(/&/g, "&")
|
|
1019
|
+
.replace(/</g, "<")
|
|
1020
|
+
.replace(/>/g, ">")
|
|
1021
|
+
.replace(/"/g, """);
|
|
1022
|
+
}
|
|
1023
|
+
/**
|
|
1024
|
+
* Strip XML 1.0 forbidden control characters (U+0000–U+001F except \t \n \r,
|
|
1025
|
+
* plus U+007F). ECMA-376 inherits this restriction; leaving these in causes
|
|
1026
|
+
* `document.xml` to be unparseable and Word refuses to open the file.
|
|
1027
|
+
*/
|
|
1028
|
+
function sanitizeText(s) {
|
|
1029
|
+
return s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
|
|
1030
|
+
}
|
|
1031
|
+
/**
|
|
1032
|
+
* Normalize a CSS class name into a valid OOXML style ID.
|
|
1033
|
+
* Per ECMA-376 §17.7.4.9, style IDs must match `[a-zA-Z0-9_\-:]` and be
|
|
1034
|
+
* ≤ 31 characters. Anything else is stripped.
|
|
1035
|
+
*/
|
|
1036
|
+
function sanitizeStyleId(id) {
|
|
1037
|
+
const cleaned = id.replace(/[^a-zA-Z0-9_\-:]/g, "").slice(0, 31);
|
|
1038
|
+
return cleaned || "Normal";
|
|
729
1039
|
}
|
|
730
1040
|
// ---------------------------------------------------------------------------
|
|
731
1041
|
// writeDiffDocx — serialize a diff result as a .docx with track-changes markup
|
|
@@ -749,14 +1059,17 @@ export async function writeDiffDocx(newTree, diffResult, options) {
|
|
|
749
1059
|
const date = options?.date ?? "2024-01-01T00:00:00Z";
|
|
750
1060
|
const diffCtx = buildDiffCtx(diffResult, author, date);
|
|
751
1061
|
const nodePathKeys = buildNodePathMap(newTree);
|
|
1062
|
+
const docxDataPre = newTree.data?.docx;
|
|
1063
|
+
const existingRels = docxDataPre?.relationships;
|
|
752
1064
|
const ctx = {
|
|
753
1065
|
cssClasses: mergedClassCss,
|
|
754
1066
|
cssElements: elementCss,
|
|
755
1067
|
customStyles: [],
|
|
756
1068
|
generatedStyleIds: new Set(),
|
|
757
1069
|
nextNumId: 3,
|
|
1070
|
+
allocatedNums: new Map(),
|
|
758
1071
|
hyperlinks: new Map(),
|
|
759
|
-
nextRId:
|
|
1072
|
+
nextRId: nextRIdFor(existingRels),
|
|
760
1073
|
diff: diffCtx,
|
|
761
1074
|
nodePathKeys,
|
|
762
1075
|
};
|
|
@@ -774,6 +1087,12 @@ export async function writeDiffDocx(newTree, diffResult, options) {
|
|
|
774
1087
|
offset++;
|
|
775
1088
|
}
|
|
776
1089
|
}
|
|
1090
|
+
// Match writeDocx's trailing-paragraph-after-table behaviour so the body
|
|
1091
|
+
// doesn't end with </w:tbl> immediately before <w:sectPr>.
|
|
1092
|
+
const lastBlock = newTree.children[newTree.children.length - 1];
|
|
1093
|
+
if (lastBlock?.type === "table") {
|
|
1094
|
+
renderedBlocks.push("<w:p/>");
|
|
1095
|
+
}
|
|
777
1096
|
const bodyXml = renderedBlocks.join("\n");
|
|
778
1097
|
const docxData = newTree.data?.docx;
|
|
779
1098
|
let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
|
|
@@ -793,16 +1112,24 @@ ${bodyXml}
|
|
|
793
1112
|
const hlRels = [...ctx.hyperlinks].map(([rId, url]) => `<Relationship Id="${rId}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" Target="${esc(url)}" TargetMode="External"/>`).join("\n ");
|
|
794
1113
|
relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
|
|
795
1114
|
}
|
|
1115
|
+
let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
|
|
1116
|
+
if (ctx.allocatedNums.size > 0) {
|
|
1117
|
+
const newNums = [...ctx.allocatedNums]
|
|
1118
|
+
.map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
|
|
1119
|
+
.join("\n ");
|
|
1120
|
+
numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
|
|
1121
|
+
}
|
|
796
1122
|
const parts = {
|
|
797
1123
|
document: docXml,
|
|
798
1124
|
styles: stylesXml,
|
|
799
|
-
numbering:
|
|
1125
|
+
numbering: numberingXml,
|
|
800
1126
|
relationships: relsXml,
|
|
801
1127
|
contentTypes: docxData?.contentTypes ?? undefined,
|
|
802
1128
|
media: new Map(),
|
|
803
1129
|
rawParts: new Map(),
|
|
804
1130
|
};
|
|
805
1131
|
const udmSnapshot = stripDataForSnapshot(newTree);
|
|
1132
|
+
udmSnapshot.__docHash = await hashString(docXml);
|
|
806
1133
|
parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
|
|
807
1134
|
if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
|
|
808
1135
|
parts.rawParts.set("word/otomate-css.json", JSON.stringify(cssData));
|