@otomate/docx 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/writer.js CHANGED
@@ -1,6 +1,7 @@
1
1
  // ---------------------------------------------------------------------------
2
2
  // docx Writer — UDM tree → .docx buffer
3
3
  // ---------------------------------------------------------------------------
4
+ import { hashString } from "./hash.js";
4
5
  import { isParent, isText } from "@otomate/core";
5
6
  import { buildStyleElement, cssToRunProps, cssToParaProps, parseLengthTwips, parseColor } from "@otomate/css-docx";
6
7
  import { packDocx } from "./zip.js";
@@ -14,16 +15,28 @@ export async function writeDocx(tree, options) {
14
15
  const cssData = tree.data?.css;
15
16
  const mergedClassCss = { ...cssData?.classRules, ...options?.cssClasses };
16
17
  const elementCss = cssData?.elementRules ?? {};
18
+ const docxDataPre = tree.data?.docx;
19
+ const existingRels = docxDataPre?.relationships;
17
20
  const ctx = {
18
21
  cssClasses: mergedClassCss,
19
22
  cssElements: elementCss,
20
23
  customStyles: [],
21
24
  generatedStyleIds: new Set(),
22
25
  nextNumId: 3,
26
+ allocatedNums: new Map(),
23
27
  hyperlinks: new Map(),
24
- nextRId: 100,
28
+ // Seed past any existing rIds to avoid collisions on round-trip.
29
+ nextRId: nextRIdFor(existingRels),
25
30
  };
26
- const bodyXml = tree.children.map(child => convertBlock(child, ctx, 0)).join("\n");
31
+ const renderedBlocks = tree.children.map(child => convertBlock(child, ctx, 0));
32
+ // OOXML allows w:tbl as the last body child, but Word always inserts a trailing
33
+ // empty paragraph after a final table. Match that behavior so re-opening in Word
34
+ // produces no diff.
35
+ const lastBlock = tree.children[tree.children.length - 1];
36
+ if (lastBlock?.type === "table") {
37
+ renderedBlocks.push("<w:p/>");
38
+ }
39
+ const bodyXml = renderedBlocks.join("\n");
27
40
  const docxData = tree.data?.docx;
28
41
  // Build styles.xml with any generated custom styles
29
42
  let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
@@ -46,17 +59,29 @@ ${bodyXml}
46
59
  // Inject before closing </Relationships>
47
60
  relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
48
61
  }
62
+ // Inject any newly-allocated <w:num> entries into numbering.xml so each
63
+ // top-level list has its own num definition (independent counters & styles).
64
+ let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
65
+ if (ctx.allocatedNums.size > 0) {
66
+ const newNums = [...ctx.allocatedNums]
67
+ .map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
68
+ .join("\n ");
69
+ numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
70
+ }
49
71
  const parts = {
50
72
  document: docXml,
51
73
  styles: stylesXml,
52
- numbering: docxData?.numbering ?? DEFAULT_NUMBERING,
74
+ numbering: numberingXml,
53
75
  relationships: relsXml,
54
76
  contentTypes: docxData?.contentTypes ?? undefined,
55
77
  media: new Map(),
56
78
  rawParts: new Map(),
57
79
  };
58
- // Embed UDM tree as custom part for lossless round-trip with otomate
80
+ // Embed UDM tree as custom part for lossless round-trip with otomate.
81
+ // The hash binds the snapshot to this exact document.xml so the reader
82
+ // can detect external edits (Word saves) and fall back to OOXML parsing.
59
83
  const udmSnapshot = stripDataForSnapshot(tree);
84
+ udmSnapshot.__docHash = await hashString(docXml);
60
85
  parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
61
86
  // Embed CSS rules if present
62
87
  if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
@@ -76,6 +101,37 @@ ${bodyXml}
76
101
  }
77
102
  return packDocx(parts);
78
103
  }
104
+ /**
105
+ * Compute the smallest unused rId for a hyperlink, given the round-tripped
106
+ * relationships file. Falls back to 100 (well above the default rId1/rId2)
107
+ * when there is no source rels.
108
+ */
109
+ function nextRIdFor(existingRels) {
110
+ if (!existingRels)
111
+ return 100;
112
+ let max = 0;
113
+ const re = /Id="rId(\d+)"/g;
114
+ let m;
115
+ while ((m = re.exec(existingRels)) !== null) {
116
+ const n = Number(m[1]);
117
+ if (n > max)
118
+ max = n;
119
+ }
120
+ return Math.max(max + 1, 100);
121
+ }
122
+ /**
123
+ * Allocate a fresh w:numId for a top-level list, recording it so the writer
124
+ * can inject a matching <w:num> entry into numbering.xml.
125
+ *
126
+ * Each top-level list gets its own numId so that ordered lists restart at 1
127
+ * instead of continuing the previous list's numbering, and bullet styles
128
+ * applied to one list don't bleed into all others.
129
+ */
130
+ function allocNumId(ctx, ordered) {
131
+ const numId = String(ctx.nextNumId++);
132
+ ctx.allocatedNums.set(numId, ordered ? 1 : 0);
133
+ return numId;
134
+ }
79
135
  function allocHyperlinkRId(ctx, url) {
80
136
  // Reuse existing rId for same URL
81
137
  for (const [rId, existingUrl] of ctx.hyperlinks) {
@@ -129,7 +185,7 @@ function convertBlock(node, ctx, listIlvl) {
129
185
  case "div":
130
186
  case "figure":
131
187
  return convertDiv(node, ctx, listIlvl);
132
- case "html": return `<w:p><w:r><w:t>${esc(node.value)}</w:t></w:r></w:p>`;
188
+ case "html": return `<w:p><w:r><w:t xml:space="preserve">${esc(node.value)}</w:t></w:r></w:p>`;
133
189
  default: return "";
134
190
  }
135
191
  }
@@ -154,12 +210,16 @@ function convertDiv(node, ctx, listIlvl) {
154
210
  const runProps = cssToRunProps(containerCss);
155
211
  const containerRPr = buildRunPropsXml(runProps);
156
212
  const bg = containerCss["background-color"] ? parseColorSafe(containerCss["background-color"]) : undefined;
157
- // Build extra pPr elements to inject into child paragraphs
158
- let extraPPr = "";
213
+ // Build extra pPr elements to inject into child paragraphs.
214
+ // Order doesn't matter here — injectPPr re-sorts via serializePPr.
215
+ const extraPPrParts = [];
216
+ const pBdrXml = buildPBdrXml(paraProps.pBdr);
217
+ if (pBdrXml)
218
+ extraPPrParts.push(pBdrXml);
159
219
  if (bg && bg !== "auto")
160
- extraPPr += `<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`;
220
+ extraPPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
161
221
  if (paraProps.jc)
162
- extraPPr += `<w:jc w:val="${paraProps.jc}"/>`;
222
+ extraPPrParts.push(`<w:jc w:val="${paraProps.jc}"/>`);
163
223
  if (paraProps.ind) {
164
224
  const attrs = [];
165
225
  if (paraProps.ind.left)
@@ -167,14 +227,14 @@ function convertDiv(node, ctx, listIlvl) {
167
227
  if (paraProps.ind.right)
168
228
  attrs.push(`w:right="${paraProps.ind.right}"`);
169
229
  if (attrs.length)
170
- extraPPr += `<w:ind ${attrs.join(" ")}/>`;
230
+ extraPPrParts.push(`<w:ind ${attrs.join(" ")}/>`);
171
231
  }
172
232
  // Create a temporary modified context that includes the container's styles
173
233
  // so child paragraphs inherit them
174
234
  const childCtx = {
175
235
  ...ctx,
176
236
  _containerRPr: containerRPr,
177
- _containerPPr: extraPPr,
237
+ _containerPPrParts: extraPPrParts,
178
238
  };
179
239
  return node.children.map(c => convertBlockWithContainer(c, childCtx, listIlvl)).join("\n");
180
240
  }
@@ -182,17 +242,16 @@ function convertDiv(node, ctx, listIlvl) {
182
242
  function convertBlockWithContainer(node, ctx, listIlvl) {
183
243
  const containerCtx = ctx;
184
244
  const extraRPr = containerCtx._containerRPr ?? "";
185
- const extraPPr = containerCtx._containerPPr ?? "";
245
+ const extraPPrParts = containerCtx._containerPPrParts ?? [];
186
246
  // Helper: resolve all CSS run props for this node (element + own classes)
187
247
  const nodeRPr = resolveBlockCssRunProps(node, ctx);
188
248
  const mergedRPr = extraRPr + nodeRPr;
189
- // Helper: inject container pPr into a pPr string
249
+ // Helper: merge container pPr parts into an existing pPr string with schema ordering.
190
250
  function injectPPr(pPr) {
191
- if (!extraPPr)
251
+ if (extraPPrParts.length === 0)
192
252
  return pPr;
193
- if (pPr)
194
- return pPr.replace("</w:pPr>", `${extraPPr}</w:pPr>`);
195
- return `<w:pPr>${extraPPr}</w:pPr>`;
253
+ const baseChildren = pPr ? extractPPrChildren(pPr) : [];
254
+ return serializePPr([...baseChildren, ...extraPPrParts]);
196
255
  }
197
256
  if (node.type === "paragraph") {
198
257
  const para = node;
@@ -218,11 +277,12 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
218
277
  const classes = h.classes;
219
278
  if (classes?.length) {
220
279
  for (const cls of classes) {
221
- if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(cls)) {
222
- ctx.generatedStyleIds.add(cls);
280
+ const clsId = sanitizeStyleId(cls);
281
+ if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
282
+ ctx.generatedStyleIds.add(clsId);
223
283
  const rp = cssToRunProps(ctx.cssClasses[cls]);
224
284
  const pp = cssToParaProps(ctx.cssClasses[cls]);
225
- ctx.customStyles.push(buildStyleElement(cls, cls, "paragraph", rp, pp));
285
+ ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", rp, pp));
226
286
  }
227
287
  }
228
288
  }
@@ -232,7 +292,7 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
232
292
  if (node.type === "list") {
233
293
  // Pass container formatting down to list items
234
294
  const list = node;
235
- const numId = list.ordered ? "2" : "1";
295
+ const numId = allocNumId(ctx, list.ordered);
236
296
  const parts = [];
237
297
  for (const item of list.children) {
238
298
  if (item.type !== "listItem")
@@ -258,7 +318,8 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
258
318
  parts.push(`<w:p>${pPr}${runs}</w:p>`);
259
319
  }
260
320
  else if (child.type === "list") {
261
- parts.push(convertBlockWithContainer(child, ctx, listIlvl + 1));
321
+ // Inherit parent numId so nested levels share the same w:num entry.
322
+ parts.push(convertList(child, ctx, listIlvl + 1, numId));
262
323
  }
263
324
  else {
264
325
  // div, blockquote, codeBlock, etc. — recurse with container inheritance
@@ -271,7 +332,7 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
271
332
  // For other children, use normal conversion with container props cleared
272
333
  const cleanCtx = { ...ctx };
273
334
  delete cleanCtx._containerRPr;
274
- delete cleanCtx._containerPPr;
335
+ delete cleanCtx._containerPPrParts;
275
336
  return convertBlock(node, cleanCtx, listIlvl);
276
337
  }
277
338
  function convertParagraph(node, ctx) {
@@ -288,36 +349,82 @@ function convertParagraph(node, ctx) {
288
349
  }
289
350
  function convertHeading(node, ctx) {
290
351
  const tag = `h${node.depth}`;
291
- const cssRPr = ctx.cssElements[tag] ? buildRunPropsXml(cssToRunProps(ctx.cssElements[tag])) : "";
352
+ // Merge element CSS with class CSS (class wins per CSS cascade). The
353
+ // list-item branch in convertBlockWithContainer already does this for
354
+ // list items; this is the heading equivalent, which was missing.
355
+ const classes = node.classes ?? [];
356
+ let mergedDecls = {};
357
+ if (ctx.cssElements[tag])
358
+ mergedDecls = { ...mergedDecls, ...ctx.cssElements[tag] };
359
+ for (const cls of classes) {
360
+ if (ctx.cssClasses[cls])
361
+ mergedDecls = { ...mergedDecls, ...ctx.cssClasses[cls] };
362
+ }
363
+ const hasCss = Object.keys(mergedDecls).length > 0;
364
+ // Run properties (color, font, size, strike, etc.) flow to every <w:r>.
365
+ const cssRPr = hasCss ? buildRunPropsXml(cssToRunProps(mergedDecls)) : "";
366
+ // Paragraph-level properties become direct formatting layered on top of
367
+ // the base Heading{N} style. This matches how browsers render a heading
368
+ // with a class: the <h2> default styling plus the class overrides.
369
+ const paraPropsFromCss = hasCss ? cssToParaProps(mergedDecls) : {};
370
+ // Build pPr children. Always start with a pStyle so Word still recognises
371
+ // this as a heading on re-import.
372
+ const pPrParts = [];
373
+ if (ctx.cssElements[tag]) {
374
+ ensureElementStyle(tag, ctx);
375
+ pPrParts.push(`<w:pStyle w:val="_el_${tag}"/>`);
376
+ }
377
+ else {
378
+ pPrParts.push(`<w:pStyle w:val="Heading${node.depth}"/>`);
379
+ }
380
+ const pBdrXml = buildPBdrXml(paraPropsFromCss.pBdr);
381
+ if (pBdrXml)
382
+ pPrParts.push(pBdrXml);
383
+ if (paraPropsFromCss.shd) {
384
+ pPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${paraPropsFromCss.shd}"/>`);
385
+ }
386
+ if (paraPropsFromCss.jc)
387
+ pPrParts.push(`<w:jc w:val="${paraPropsFromCss.jc}"/>`);
388
+ if (paraPropsFromCss.spacing) {
389
+ const attrs = [];
390
+ if (paraPropsFromCss.spacing.before !== undefined)
391
+ attrs.push(`w:before="${paraPropsFromCss.spacing.before}"`);
392
+ if (paraPropsFromCss.spacing.after !== undefined)
393
+ attrs.push(`w:after="${paraPropsFromCss.spacing.after}"`);
394
+ if (paraPropsFromCss.spacing.line !== undefined)
395
+ attrs.push(`w:line="${paraPropsFromCss.spacing.line}"`);
396
+ if (attrs.length)
397
+ pPrParts.push(`<w:spacing ${attrs.join(" ")}/>`);
398
+ }
399
+ if (paraPropsFromCss.ind) {
400
+ const attrs = [];
401
+ if (paraPropsFromCss.ind.left !== undefined)
402
+ attrs.push(`w:left="${paraPropsFromCss.ind.left}"`);
403
+ if (paraPropsFromCss.ind.right !== undefined)
404
+ attrs.push(`w:right="${paraPropsFromCss.ind.right}"`);
405
+ if (attrs.length)
406
+ pPrParts.push(`<w:ind ${attrs.join(" ")}/>`);
407
+ }
408
+ const pPr = serializePPr(pPrParts);
409
+ // Diff-aware path: inserted-paragraph wrapping for track-changes output.
292
410
  if (ctx.diff && ctx.nodePathKeys) {
293
411
  const key = ctx.nodePathKeys.get(node);
294
412
  if (key !== undefined && ctx.diff.insertedPaths.has(key)) {
295
- if (ctx.cssElements[tag])
296
- ensureElementStyle(tag, ctx);
297
- const pPr = ctx.cssElements[tag]
298
- ? `<w:pPr><w:pStyle w:val="_el_${tag}"/></w:pPr>`
299
- : `<w:pPr><w:pStyle w:val="Heading${node.depth}"/></w:pPr>`;
300
413
  return convertInsertedParagraph(node, pPr, cssRPr, ctx);
301
414
  }
302
415
  }
303
416
  const runs = node.children.map(c => convertInline(c, ctx, cssRPr)).join("");
304
- if (ctx.cssElements[tag]) {
305
- ensureElementStyle(tag, ctx);
306
- return `<w:p><w:pPr><w:pStyle w:val="_el_${tag}"/></w:pPr>${runs}</w:p>`;
307
- }
308
- return `<w:p><w:pPr><w:pStyle w:val="Heading${node.depth}"/></w:pPr>${runs}</w:p>`;
417
+ return `<w:p>${pPr}${runs}</w:p>`;
309
418
  }
310
419
  /** Render a paragraph (or heading cast as paragraph) with all runs wrapped in w:ins. */
311
420
  function convertInsertedParagraph(node, pPr, cssRPr, ctx) {
312
421
  const diff = ctx.diff;
313
422
  const author = esc(diff.author);
314
- const date = diff.date;
315
- // Track the paragraph mark itself as inserted (inject into pPr)
423
+ const date = esc(diff.date);
424
+ // Track the paragraph mark itself as inserted (inject into pPr's rPr).
316
425
  const markId = diff.revId.value++;
317
- const insMarkXml = `<w:rPr><w:ins w:id="${markId}" w:author="${author}" w:date="${date}"/></w:rPr>`;
318
- const trackedPPr = pPr
319
- ? pPr.replace("</w:pPr>", `${insMarkXml}</w:pPr>`)
320
- : `<w:pPr>${insMarkXml}</w:pPr>`;
426
+ const insMark = `<w:ins w:id="${markId}" w:author="${author}" w:date="${date}"/>`;
427
+ const trackedPPr = mergeParaMarkChild(pPr, insMark);
321
428
  // Wrap each inline child's run in its own w:ins
322
429
  const runs = node.children.map(c => {
323
430
  const runXml = convertInline(c, ctx, cssRPr);
@@ -346,8 +453,163 @@ function resolveBlockCssRunProps(node, ctx) {
346
453
  return "";
347
454
  return buildRunPropsXml(cssToRunProps(merged));
348
455
  }
349
- /** Build w:rPr XML string from OoxmlRunProps (without the <w:rPr> wrapper) */
456
+ // ---------------------------------------------------------------------------
457
+ // ECMA-376 schema-order maps for w:rPr and w:pPr children
458
+ // (validators reject out-of-order elements in strict mode)
459
+ // ---------------------------------------------------------------------------
460
+ const RPR_ORDER = {
461
+ "w:rStyle": 1, "w:rFonts": 2, "w:b": 3, "w:bCs": 4, "w:i": 5, "w:iCs": 6,
462
+ "w:caps": 7, "w:smallCaps": 8, "w:strike": 9, "w:dstrike": 10,
463
+ "w:outline": 11, "w:shadow": 12, "w:emboss": 13, "w:imprint": 14,
464
+ "w:noProof": 15, "w:snapToGrid": 16, "w:vanish": 17, "w:webHidden": 18,
465
+ "w:color": 19, "w:spacing": 20, "w:w": 21, "w:kern": 22, "w:position": 23,
466
+ "w:sz": 24, "w:szCs": 25, "w:highlight": 26, "w:u": 27, "w:effect": 28,
467
+ "w:bdr": 29, "w:shd": 30, "w:fitText": 31, "w:vertAlign": 32, "w:rtl": 33,
468
+ "w:cs": 34, "w:em": 35, "w:lang": 36, "w:eastAsianLayout": 37,
469
+ "w:specVanish": 38, "w:oMath": 39,
470
+ };
471
+ const PPR_ORDER = {
472
+ "w:pStyle": 1, "w:keepNext": 2, "w:keepLines": 3, "w:pageBreakBefore": 4,
473
+ "w:framePr": 5, "w:widowControl": 6, "w:numPr": 7, "w:suppressLineNumbers": 8,
474
+ "w:pBdr": 9, "w:shd": 10, "w:tabs": 11, "w:suppressAutoHyphens": 12,
475
+ "w:kinsoku": 13, "w:wordWrap": 14, "w:overflowPunct": 15, "w:topLinePunct": 16,
476
+ "w:autoSpaceDE": 17, "w:autoSpaceDN": 18, "w:bidi": 19, "w:adjustRightInd": 20,
477
+ "w:snapToGrid": 21, "w:spacing": 22, "w:ind": 23, "w:contextualSpacing": 24,
478
+ "w:mirrorIndents": 25, "w:suppressOverlap": 26, "w:jc": 27, "w:textDirection": 28,
479
+ "w:textAlignment": 29, "w:textboxTightWrap": 30, "w:outlineLvl": 31,
480
+ "w:divId": 32, "w:cnfStyle": 33, "w:rPr": 34, "w:sectPr": 35, "w:pPrChange": 36,
481
+ };
482
+ function getElementTag(xml) {
483
+ const m = xml.match(/^<(w:[A-Za-z]+)/);
484
+ return m ? m[1] : "";
485
+ }
486
+ /** Tokenize the children of an OOXML container — handles both self-closing and parent elements. */
487
+ function tokenizeOoxmlChildren(xml) {
488
+ const tokens = [];
489
+ let i = 0;
490
+ while (i < xml.length) {
491
+ if (xml[i] !== "<") {
492
+ i++;
493
+ continue;
494
+ }
495
+ const nameMatch = xml.slice(i).match(/^<(w:[A-Za-z]+)/);
496
+ if (!nameMatch) {
497
+ i++;
498
+ continue;
499
+ }
500
+ const name = nameMatch[1];
501
+ // Find end of opening tag (skipping over quoted attribute values)
502
+ let j = i + 1;
503
+ let inQuote = false;
504
+ while (j < xml.length) {
505
+ const ch = xml[j];
506
+ if (ch === '"')
507
+ inQuote = !inQuote;
508
+ if (!inQuote && ch === ">")
509
+ break;
510
+ j++;
511
+ }
512
+ if (j >= xml.length)
513
+ break;
514
+ if (xml[j - 1] === "/") {
515
+ tokens.push(xml.slice(i, j + 1));
516
+ i = j + 1;
517
+ }
518
+ else {
519
+ const closeTag = `</${name}>`;
520
+ let depth = 1;
521
+ let k = j + 1;
522
+ while (k < xml.length && depth > 0) {
523
+ if (xml.startsWith(`<${name}`, k)) {
524
+ const after = xml[k + name.length + 1];
525
+ if (after === " " || after === ">" || after === "/")
526
+ depth++;
527
+ }
528
+ if (xml.startsWith(closeTag, k)) {
529
+ depth--;
530
+ if (depth === 0) {
531
+ k += closeTag.length;
532
+ break;
533
+ }
534
+ }
535
+ k++;
536
+ }
537
+ tokens.push(xml.slice(i, k));
538
+ i = k;
539
+ }
540
+ }
541
+ return tokens;
542
+ }
543
+ /** Stable-sort + tag-dedup a list of rPr/pPr child element strings by ECMA-376 order. */
544
+ function orderElements(elements, order) {
545
+ const byTag = new Map();
546
+ for (const el of elements) {
547
+ const tag = getElementTag(el);
548
+ if (tag)
549
+ byTag.set(tag, el); // last write wins (later sources override earlier)
550
+ }
551
+ return [...byTag.entries()]
552
+ .sort(([a], [b]) => (order[a] ?? 999) - (order[b] ?? 999))
553
+ .map(([, el]) => el);
554
+ }
555
+ /** Wrap rPr children in <w:rPr> with schema-correct ordering. Returns "" if empty. */
556
+ function serializeRPr(parts) {
557
+ const ordered = orderElements(parts, RPR_ORDER);
558
+ return ordered.length > 0 ? `<w:rPr>${ordered.join("")}</w:rPr>` : "";
559
+ }
560
+ /** Wrap pPr children in <w:pPr> with schema-correct ordering. Returns "" if empty. */
561
+ function serializePPr(parts) {
562
+ const ordered = orderElements(parts, PPR_ORDER);
563
+ return ordered.length > 0 ? `<w:pPr>${ordered.join("")}</w:pPr>` : "";
564
+ }
565
+ /** Extract the children of an existing <w:pPr>...</w:pPr> wrapper as raw element strings. */
566
+ function extractPPrChildren(pPr) {
567
+ const m = pPr.match(/<w:pPr>([\s\S]*?)<\/w:pPr>/);
568
+ return m ? tokenizeOoxmlChildren(m[1]) : [];
569
+ }
570
+ /**
571
+ * Merge a paragraph-mark child (w:ins/w:del/w:rPrChange/etc.) into the
572
+ * paragraph's pPr/rPr, creating either or both wrappers as needed and
573
+ * preserving any existing rPr children. Avoids the duplicate-`<w:rPr>`
574
+ * trap that string-replace produces.
575
+ */
576
+ function mergeParaMarkChild(pPr, child) {
577
+ const baseChildren = pPr ? extractPPrChildren(pPr) : [];
578
+ // Find an existing inner <w:rPr>…</w:rPr> (paragraph mark properties).
579
+ let foundRPr = false;
580
+ const merged = baseChildren.map(el => {
581
+ if (el.startsWith("<w:rPr>") || el.startsWith("<w:rPr ")) {
582
+ foundRPr = true;
583
+ // Inject the child at the start (w:ins/w:del are tracked-change marks
584
+ // that appear first inside CT_ParaRPr per ECMA-376).
585
+ return el.replace(/^<w:rPr(\s[^>]*)?>/, m => `${m}${child}`);
586
+ }
587
+ return el;
588
+ });
589
+ if (!foundRPr)
590
+ merged.push(`<w:rPr>${child}</w:rPr>`);
591
+ return serializePPr(merged);
592
+ }
593
+ /** Extract the children of an existing <w:rPr>...</w:rPr> wrapper as raw element strings. */
594
+ function extractRPrChildren(rPr) {
595
+ const m = rPr.match(/<w:rPr>([\s\S]*?)<\/w:rPr>/);
596
+ return m ? tokenizeOoxmlChildren(m[1]) : [];
597
+ }
598
+ /** Build a `<w:pBdr>` element from a parsed pBdr definition. Returns "" if no sides set. */
599
+ function buildPBdrXml(pBdr) {
600
+ if (!pBdr)
601
+ return "";
602
+ const sides = [];
603
+ for (const side of ["top", "bottom", "left", "right"]) {
604
+ const bd = pBdr[side];
605
+ if (bd)
606
+ sides.push(`<w:${side} w:val="${bd.val}" w:sz="${bd.sz}" w:space="0" w:color="${bd.color}"/>`);
607
+ }
608
+ return sides.length > 0 ? `<w:pBdr>${sides.join("")}</w:pBdr>` : "";
609
+ }
610
+ /** Build w:rPr child elements from OoxmlRunProps (without the wrapper, in schema order). */
350
611
  function buildRunPropsXml(props) {
612
+ // Order per ECMA-376 CT_RPr: rFonts(2), b(3), i(5), strike(9), color(19), sz(24), u(27), shd(30)
351
613
  const parts = [];
352
614
  if (props.rFonts) {
353
615
  const attrs = [];
@@ -361,14 +623,14 @@ function buildRunPropsXml(props) {
361
623
  parts.push("<w:b/>");
362
624
  if (props.i)
363
625
  parts.push("<w:i/>");
364
- if (props.sz)
365
- parts.push(`<w:sz w:val="${props.sz}"/>`);
626
+ if (props.strike)
627
+ parts.push("<w:strike/>");
366
628
  if (props.color)
367
629
  parts.push(`<w:color w:val="${props.color}"/>`);
630
+ if (props.sz)
631
+ parts.push(`<w:sz w:val="${props.sz}"/>`);
368
632
  if (props.u)
369
633
  parts.push(`<w:u w:val="${props.u}"/>`);
370
- if (props.strike)
371
- parts.push("<w:strike/>");
372
634
  if (props.shd)
373
635
  parts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${props.shd}"/>`);
374
636
  return parts.join("");
@@ -386,6 +648,7 @@ function convertBlockquote(node, ctx) {
386
648
  }).join("\n");
387
649
  }
388
650
  function buildBlockquotePPr(css) {
651
+ // Collect in any order — serializePPr applies ECMA-376 ordering.
389
652
  const parts = [];
390
653
  // Left border (vertical line) — always add for blockquotes
391
654
  parts.push(`<w:pBdr><w:left w:val="single" w:sz="18" w:space="4" w:color="${css["border-color"] ? parseColorSafe(css["border-color"]) : "AAAAAA"}"/></w:pBdr>`);
@@ -409,10 +672,13 @@ function buildBlockquotePPr(css) {
409
672
  if (attrs.length)
410
673
  parts.push(`<w:spacing ${attrs.join(" ")}/>`);
411
674
  }
412
- return `<w:pPr>${parts.join("")}</w:pPr>`;
675
+ return serializePPr(parts);
413
676
  }
414
- function convertList(node, ctx, baseIlvl) {
415
- const numId = node.ordered ? "2" : "1";
677
+ function convertList(node, ctx, baseIlvl, parentNumId) {
678
+ // Top-level lists get a fresh numId; nested lists inherit their parent's
679
+ // numId so the abstract numbering definition (bullet vs ordered) and
680
+ // counter continuity stay consistent across levels.
681
+ const numId = parentNumId ?? allocNumId(ctx, node.ordered);
416
682
  const liRPr = ctx.cssElements["li"] ? buildRunPropsXml(cssToRunProps(ctx.cssElements["li"])) : "";
417
683
  const parts = [];
418
684
  for (const item of node.children) {
@@ -425,7 +691,7 @@ function convertList(node, ctx, baseIlvl) {
425
691
  parts.push(`<w:p><w:pPr><w:numPr><w:ilvl w:val="${baseIlvl}"/><w:numId w:val="${numId}"/></w:numPr></w:pPr>${runs}</w:p>`);
426
692
  }
427
693
  else if (child.type === "list") {
428
- parts.push(convertList(child, ctx, baseIlvl + 1));
694
+ parts.push(convertList(child, ctx, baseIlvl + 1, numId));
429
695
  }
430
696
  else {
431
697
  parts.push(convertBlock(child, ctx, baseIlvl));
@@ -438,36 +704,62 @@ function convertCodeBlock(node, ctx) {
438
704
  const preCss = ctx?.cssElements["pre"] ?? {};
439
705
  const bg = preCss["background-color"] ? parseColorSafe(preCss["background-color"]) : undefined;
440
706
  const preRPr = Object.keys(preCss).length > 0 ? buildRunPropsXml(cssToRunProps(preCss)) : "";
441
- // Build pPr with Code style + optional background shading
442
- let pPr = `<w:pPr><w:pStyle w:val="Code"/>`;
707
+ // pPr: pStyle(1) before shd(10) already in correct order
708
+ const pPrParts = [`<w:pStyle w:val="Code"/>`];
443
709
  if (bg)
444
- pPr += `<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`;
445
- pPr += `</w:pPr>`;
710
+ pPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
711
+ const pPr = serializePPr(pPrParts);
712
+ // rPr: code rFonts + any pre-element CSS (rFonts override is intentional, last write wins)
713
+ const rPrParts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
714
+ if (preRPr)
715
+ rPrParts.push(...tokenizeOoxmlChildren(preRPr));
716
+ const rPr = serializeRPr(rPrParts);
446
717
  const lines = node.value.split("\n");
447
- return lines.map(line => `<w:p>${pPr}<w:r><w:rPr><w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>${preRPr}</w:rPr><w:t xml:space="preserve">${esc(line)}</w:t></w:r></w:p>`).join("\n");
718
+ return lines.map(line => `<w:p>${pPr}<w:r>${rPr}<w:t xml:space="preserve">${esc(line)}</w:t></w:r></w:p>`).join("\n");
719
+ }
720
+ function countTableColumns(node) {
721
+ let max = 0;
722
+ for (const row of node.children) {
723
+ if (row.type !== "tableRow")
724
+ continue;
725
+ const count = row.children.reduce((n, cell) => n + (cell.colspan ?? 1), 0);
726
+ if (count > max)
727
+ max = count;
728
+ }
729
+ return Math.max(max, 1);
448
730
  }
449
731
  function convertTable(node, ctx) {
732
+ const colCount = countTableColumns(node);
733
+ // Content width: 9360 twips (12240 page − 2×1440 margins)
734
+ const colWidth = Math.round(9360 / colCount);
735
+ const tblGrid = `<w:tblGrid>${Array.from({ length: colCount }, () => `<w:gridCol w:w="${colWidth}"/>`).join("")}</w:tblGrid>`;
450
736
  const rows = node.children.map(row => {
451
737
  if (row.type !== "tableRow")
452
738
  return "";
453
739
  const tr = row;
740
+ // ISO 29500 requires every w:tr to contain at least one w:tc.
741
+ // Skip rows with no tableCell children entirely (e.g. an empty <thead><tr/></thead>).
742
+ const cellNodes = tr.children.filter(c => c.type === "tableCell");
743
+ if (cellNodes.length === 0)
744
+ return "";
454
745
  const trPr = tr.isHeader ? "<w:trPr><w:tblHeader/></w:trPr>" : "";
455
- const cells = tr.children.map(cell => {
456
- if (cell.type !== "tableCell")
457
- return "";
746
+ const cells = cellNodes.map(cell => {
458
747
  const tc = cell;
748
+ const span = tc.colspan ?? 1;
459
749
  let tcPr = "<w:tcPr>";
460
- if (tc.colspan && tc.colspan > 1)
461
- tcPr += `<w:gridSpan w:val="${tc.colspan}"/>`;
750
+ tcPr += `<w:tcW w:w="${colWidth * span}" w:type="dxa"/>`;
751
+ if (span > 1)
752
+ tcPr += `<w:gridSpan w:val="${span}"/>`;
462
753
  tcPr += "</w:tcPr>";
754
+ const emptyPara = { type: "paragraph", children: [] };
463
755
  const content = tc.children.length > 0
464
756
  ? tc.children.map(c => convertBlock(c, ctx, 0)).join("")
465
- : "<w:p/>";
757
+ : convertParagraph(emptyPara, ctx);
466
758
  return `<w:tc>${tcPr}${content}</w:tc>`;
467
759
  }).join("");
468
760
  return `<w:tr>${trPr}${cells}</w:tr>`;
469
- }).join("\n");
470
- return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${rows}</w:tbl>`;
761
+ }).filter(Boolean).join("\n");
762
+ return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${tblGrid}${rows}</w:tbl>`;
471
763
  }
472
764
  // ---------------------------------------------------------------------------
473
765
  // Inline conversion
@@ -478,11 +770,14 @@ function convertInline(node, ctx, inheritedRPr = "") {
478
770
  if (node.type === "break")
479
771
  return `<w:r><w:br/></w:r>`;
480
772
  if (node.type === "inlineCode") {
481
- return `<w:r><w:rPr><w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>${inheritedRPr}</w:rPr><w:t>${esc(node.value)}</w:t></w:r>`;
773
+ const parts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
774
+ if (inheritedRPr)
775
+ parts.push(...tokenizeOoxmlChildren(inheritedRPr));
776
+ return `<w:r>${serializeRPr(parts)}<w:t xml:space="preserve">${esc(node.value)}</w:t></w:r>`;
482
777
  }
483
778
  if (node.type === "image") {
484
779
  const img = node;
485
- return `<w:r><w:t>[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
780
+ return `<w:r><w:t xml:space="preserve">[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
486
781
  }
487
782
  return "";
488
783
  }
@@ -491,11 +786,8 @@ function convertText(node, ctx, inheritedRPr = "") {
491
786
  const otherMarks = node.marks?.filter(m => m.type !== "link") ?? [];
492
787
  // Build rPr: inherited CSS props first, then mark-specific overrides
493
788
  const parts = [];
494
- // Split inherited XML into individual elements for dedup
495
- if (inheritedRPr) {
496
- const elems = inheritedRPr.match(/<w:[^/]*\/>/g) ?? [];
497
- parts.push(...elems);
498
- }
789
+ if (inheritedRPr)
790
+ parts.push(...tokenizeOoxmlChildren(inheritedRPr));
499
791
  for (const mark of otherMarks) {
500
792
  switch (mark.type) {
501
793
  case "strong":
@@ -521,16 +813,8 @@ function convertText(node, ctx, inheritedRPr = "") {
521
813
  break;
522
814
  }
523
815
  }
524
- // Deduplicate rPr elements (e.g. multiple <w:b/> from container + class CSS)
525
- const seen = new Set();
526
- const dedupedParts = [];
527
- for (const p of parts) {
528
- if (!seen.has(p)) {
529
- seen.add(p);
530
- dedupedParts.push(p);
531
- }
532
- }
533
- const rPr = dedupedParts.length > 0 ? `<w:rPr>${dedupedParts.join("")}</w:rPr>` : "";
816
+ // Schema-order + tag-dedup (later sources override earlier).
817
+ const rPr = serializeRPr(parts);
534
818
  // Track-changes: check diff context (skip for linked text to avoid nesting complexity)
535
819
  if (ctx.diff && ctx.nodePathKeys && !linkMark) {
536
820
  const key = ctx.nodePathKeys.get(node);
@@ -575,21 +859,24 @@ function convertText(node, ctx, inheritedRPr = "") {
575
859
  // ---------------------------------------------------------------------------
576
860
  // Paragraph property builder
577
861
  // ---------------------------------------------------------------------------
578
- function buildPPr(node, ctx) {
862
+ function buildPPrParts(node, ctx) {
863
+ // Collect parts in any order — serializePPr applies ECMA-376 ordering at the end.
579
864
  const parts = [];
580
865
  // Classes → pStyle + generate CSS-based Word styles
581
866
  const classes = node.classes;
582
867
  if (classes && classes.length > 0) {
583
- const styleId = classes[0]; // Primary class becomes the paragraph style
584
- parts.push(`<w:pStyle w:val="${esc(styleId)}"/>`);
868
+ // Style IDs must be valid OOXML identifiers — sanitize the CSS class name.
869
+ const styleId = sanitizeStyleId(classes[0]);
870
+ parts.push(`<w:pStyle w:val="${styleId}"/>`);
585
871
  // Generate custom style from CSS rules (once per styleId)
586
872
  for (const cls of classes) {
587
- if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(cls)) {
588
- ctx.generatedStyleIds.add(cls);
873
+ const clsId = sanitizeStyleId(cls);
874
+ if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
875
+ ctx.generatedStyleIds.add(clsId);
589
876
  const cssDecls = ctx.cssClasses[cls];
590
877
  const runProps = cssToRunProps(cssDecls);
591
878
  const paraProps = cssToParaProps(cssDecls);
592
- ctx.customStyles.push(buildStyleElement(cls, cls, "paragraph", runProps, paraProps));
879
+ ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", runProps, paraProps));
593
880
  }
594
881
  }
595
882
  }
@@ -616,13 +903,14 @@ function buildPPr(node, ctx) {
616
903
  if (elTag) {
617
904
  ensureElementStyle(elTag, ctx);
618
905
  // If no class style was set, use the element style
619
- if (!classes || classes.length === 0) {
620
- parts.unshift(`<w:pStyle w:val="_el_${esc(elTag)}"/>`);
906
+ if ((!classes || classes.length === 0) && ctx.cssElements[elTag]) {
907
+ parts.push(`<w:pStyle w:val="_el_${esc(elTag)}"/>`);
621
908
  }
622
- // If class style was set, element CSS is already baked into the base —
623
- // we could chain basedOn, but for simplicity element styles are standalone
624
909
  }
625
- return parts.length > 0 ? `<w:pPr>${parts.join("")}</w:pPr>` : "";
910
+ return parts;
911
+ }
912
+ function buildPPr(node, ctx) {
913
+ return serializePPr(buildPPrParts(node, ctx));
626
914
  }
627
915
  /** Map UDM type to the HTML tag name for CSS element selector lookup */
628
916
  function udmTypeToHtmlTag(type, node) {
@@ -707,7 +995,7 @@ function convertDeletedBlock(op, ctx) {
707
995
  const node = op.node;
708
996
  const diff = ctx.diff;
709
997
  const author = esc(diff.author);
710
- const date = diff.date;
998
+ const date = esc(diff.date);
711
999
  const textContent = extractTextContent(node);
712
1000
  if (!textContent)
713
1001
  return "";
@@ -719,13 +1007,35 @@ function convertDeletedBlock(op, ctx) {
719
1007
  const depth = node.depth;
720
1008
  pStyle = `<w:pStyle w:val="Heading${depth}"/>`;
721
1009
  }
722
- const delMarkXml = `<w:del w:id="${delMarkId}" w:author="${author}" w:date="${date}"/>`;
723
- const pPr = `<w:pPr>${pStyle}<w:rPr>${delMarkXml}</w:rPr></w:pPr>`;
1010
+ const delMark = `<w:del w:id="${delMarkId}" w:author="${author}" w:date="${date}"/>`;
1011
+ const basePPr = pStyle ? `<w:pPr>${pStyle}</w:pPr>` : "";
1012
+ const pPr = mergeParaMarkChild(basePPr, delMark);
724
1013
  const delContent = `<w:del w:id="${delContentId}" w:author="${author}" w:date="${date}"><w:r><w:delText xml:space="preserve">${esc(textContent)}</w:delText></w:r></w:del>`;
725
1014
  return `<w:p>${pPr}${delContent}</w:p>`;
726
1015
  }
727
1016
  function esc(s) {
728
- return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
1017
+ return sanitizeText(s)
1018
+ .replace(/&/g, "&amp;")
1019
+ .replace(/</g, "&lt;")
1020
+ .replace(/>/g, "&gt;")
1021
+ .replace(/"/g, "&quot;");
1022
+ }
1023
+ /**
1024
+ * Strip XML 1.0 forbidden control characters (U+0000–U+001F except \t \n \r,
1025
+ * plus U+007F). ECMA-376 inherits this restriction; leaving these in causes
1026
+ * `document.xml` to be unparseable and Word refuses to open the file.
1027
+ */
1028
+ function sanitizeText(s) {
1029
+ return s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
1030
+ }
1031
+ /**
1032
+ * Normalize a CSS class name into a valid OOXML style ID.
1033
+ * Per ECMA-376 §17.7.4.9, style IDs must match `[a-zA-Z0-9_\-:]` and be
1034
+ * ≤ 31 characters. Anything else is stripped.
1035
+ */
1036
+ function sanitizeStyleId(id) {
1037
+ const cleaned = id.replace(/[^a-zA-Z0-9_\-:]/g, "").slice(0, 31);
1038
+ return cleaned || "Normal";
729
1039
  }
730
1040
  // ---------------------------------------------------------------------------
731
1041
  // writeDiffDocx — serialize a diff result as a .docx with track-changes markup
@@ -749,14 +1059,17 @@ export async function writeDiffDocx(newTree, diffResult, options) {
749
1059
  const date = options?.date ?? "2024-01-01T00:00:00Z";
750
1060
  const diffCtx = buildDiffCtx(diffResult, author, date);
751
1061
  const nodePathKeys = buildNodePathMap(newTree);
1062
+ const docxDataPre = newTree.data?.docx;
1063
+ const existingRels = docxDataPre?.relationships;
752
1064
  const ctx = {
753
1065
  cssClasses: mergedClassCss,
754
1066
  cssElements: elementCss,
755
1067
  customStyles: [],
756
1068
  generatedStyleIds: new Set(),
757
1069
  nextNumId: 3,
1070
+ allocatedNums: new Map(),
758
1071
  hyperlinks: new Map(),
759
- nextRId: 100,
1072
+ nextRId: nextRIdFor(existingRels),
760
1073
  diff: diffCtx,
761
1074
  nodePathKeys,
762
1075
  };
@@ -774,6 +1087,12 @@ export async function writeDiffDocx(newTree, diffResult, options) {
774
1087
  offset++;
775
1088
  }
776
1089
  }
1090
+ // Match writeDocx's trailing-paragraph-after-table behaviour so the body
1091
+ // doesn't end with </w:tbl> immediately before <w:sectPr>.
1092
+ const lastBlock = newTree.children[newTree.children.length - 1];
1093
+ if (lastBlock?.type === "table") {
1094
+ renderedBlocks.push("<w:p/>");
1095
+ }
777
1096
  const bodyXml = renderedBlocks.join("\n");
778
1097
  const docxData = newTree.data?.docx;
779
1098
  let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
@@ -793,16 +1112,24 @@ ${bodyXml}
793
1112
  const hlRels = [...ctx.hyperlinks].map(([rId, url]) => `<Relationship Id="${rId}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" Target="${esc(url)}" TargetMode="External"/>`).join("\n ");
794
1113
  relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
795
1114
  }
1115
+ let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
1116
+ if (ctx.allocatedNums.size > 0) {
1117
+ const newNums = [...ctx.allocatedNums]
1118
+ .map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
1119
+ .join("\n ");
1120
+ numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
1121
+ }
796
1122
  const parts = {
797
1123
  document: docXml,
798
1124
  styles: stylesXml,
799
- numbering: docxData?.numbering ?? DEFAULT_NUMBERING,
1125
+ numbering: numberingXml,
800
1126
  relationships: relsXml,
801
1127
  contentTypes: docxData?.contentTypes ?? undefined,
802
1128
  media: new Map(),
803
1129
  rawParts: new Map(),
804
1130
  };
805
1131
  const udmSnapshot = stripDataForSnapshot(newTree);
1132
+ udmSnapshot.__docHash = await hashString(docXml);
806
1133
  parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
807
1134
  if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
808
1135
  parts.rawParts.set("word/otomate-css.json", JSON.stringify(cssData));