@otomate/docx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/writer.js CHANGED
@@ -1,6 +1,7 @@
1
1
  // ---------------------------------------------------------------------------
2
2
  // docx Writer — UDM tree → .docx buffer
3
3
  // ---------------------------------------------------------------------------
4
+ import { createHash } from "node:crypto";
4
5
  import { isParent, isText } from "@otomate/core";
5
6
  import { buildStyleElement, cssToRunProps, cssToParaProps, parseLengthTwips, parseColor } from "@otomate/css-docx";
6
7
  import { packDocx } from "./zip.js";
@@ -14,16 +15,28 @@ export async function writeDocx(tree, options) {
14
15
  const cssData = tree.data?.css;
15
16
  const mergedClassCss = { ...cssData?.classRules, ...options?.cssClasses };
16
17
  const elementCss = cssData?.elementRules ?? {};
18
+ const docxDataPre = tree.data?.docx;
19
+ const existingRels = docxDataPre?.relationships;
17
20
  const ctx = {
18
21
  cssClasses: mergedClassCss,
19
22
  cssElements: elementCss,
20
23
  customStyles: [],
21
24
  generatedStyleIds: new Set(),
22
25
  nextNumId: 3,
26
+ allocatedNums: new Map(),
23
27
  hyperlinks: new Map(),
24
- nextRId: 100,
28
+ // Seed past any existing rIds to avoid collisions on round-trip.
29
+ nextRId: nextRIdFor(existingRels),
25
30
  };
26
- const bodyXml = tree.children.map(child => convertBlock(child, ctx, 0)).join("\n");
31
+ const renderedBlocks = tree.children.map(child => convertBlock(child, ctx, 0));
32
+ // OOXML allows w:tbl as the last body child, but Word always inserts a trailing
33
+ // empty paragraph after a final table. Match that behavior so re-opening in Word
34
+ // produces no diff.
35
+ const lastBlock = tree.children[tree.children.length - 1];
36
+ if (lastBlock?.type === "table") {
37
+ renderedBlocks.push("<w:p/>");
38
+ }
39
+ const bodyXml = renderedBlocks.join("\n");
27
40
  const docxData = tree.data?.docx;
28
41
  // Build styles.xml with any generated custom styles
29
42
  let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
@@ -46,17 +59,29 @@ ${bodyXml}
46
59
  // Inject before closing </Relationships>
47
60
  relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
48
61
  }
62
+ // Inject any newly-allocated <w:num> entries into numbering.xml so each
63
+ // top-level list has its own num definition (independent counters & styles).
64
+ let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
65
+ if (ctx.allocatedNums.size > 0) {
66
+ const newNums = [...ctx.allocatedNums]
67
+ .map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
68
+ .join("\n ");
69
+ numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
70
+ }
49
71
  const parts = {
50
72
  document: docXml,
51
73
  styles: stylesXml,
52
- numbering: docxData?.numbering ?? DEFAULT_NUMBERING,
74
+ numbering: numberingXml,
53
75
  relationships: relsXml,
54
76
  contentTypes: docxData?.contentTypes ?? undefined,
55
77
  media: new Map(),
56
78
  rawParts: new Map(),
57
79
  };
58
- // Embed UDM tree as custom part for lossless round-trip with otomate
80
+ // Embed UDM tree as custom part for lossless round-trip with otomate.
81
+ // The hash binds the snapshot to this exact document.xml so the reader
82
+ // can detect external edits (Word saves) and fall back to OOXML parsing.
59
83
  const udmSnapshot = stripDataForSnapshot(tree);
84
+ udmSnapshot.__docHash = hashString(docXml);
60
85
  parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
61
86
  // Embed CSS rules if present
62
87
  if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
@@ -76,6 +101,37 @@ ${bodyXml}
76
101
  }
77
102
  return packDocx(parts);
78
103
  }
104
+ /**
105
+ * Compute the smallest unused rId for a hyperlink, given the round-tripped
106
+ * relationships file. Falls back to 100 (well above the default rId1/rId2)
107
+ * when there is no source rels.
108
+ */
109
+ function nextRIdFor(existingRels) {
110
+ if (!existingRels)
111
+ return 100;
112
+ let max = 0;
113
+ const re = /Id="rId(\d+)"/g;
114
+ let m;
115
+ while ((m = re.exec(existingRels)) !== null) {
116
+ const n = Number(m[1]);
117
+ if (n > max)
118
+ max = n;
119
+ }
120
+ return Math.max(max + 1, 100);
121
+ }
122
+ /**
123
+ * Allocate a fresh w:numId for a top-level list, recording it so the writer
124
+ * can inject a matching <w:num> entry into numbering.xml.
125
+ *
126
+ * Each top-level list gets its own numId so that ordered lists restart at 1
127
+ * instead of continuing the previous list's numbering, and bullet styles
128
+ * applied to one list don't bleed into all others.
129
+ */
130
+ function allocNumId(ctx, ordered) {
131
+ const numId = String(ctx.nextNumId++);
132
+ ctx.allocatedNums.set(numId, ordered ? 1 : 0);
133
+ return numId;
134
+ }
79
135
  function allocHyperlinkRId(ctx, url) {
80
136
  // Reuse existing rId for same URL
81
137
  for (const [rId, existingUrl] of ctx.hyperlinks) {
@@ -129,7 +185,7 @@ function convertBlock(node, ctx, listIlvl) {
129
185
  case "div":
130
186
  case "figure":
131
187
  return convertDiv(node, ctx, listIlvl);
132
- case "html": return `<w:p><w:r><w:t>${esc(node.value)}</w:t></w:r></w:p>`;
188
+ case "html": return `<w:p><w:r><w:t xml:space="preserve">${esc(node.value)}</w:t></w:r></w:p>`;
133
189
  default: return "";
134
190
  }
135
191
  }
@@ -154,12 +210,16 @@ function convertDiv(node, ctx, listIlvl) {
154
210
  const runProps = cssToRunProps(containerCss);
155
211
  const containerRPr = buildRunPropsXml(runProps);
156
212
  const bg = containerCss["background-color"] ? parseColorSafe(containerCss["background-color"]) : undefined;
157
- // Build extra pPr elements to inject into child paragraphs
158
- let extraPPr = "";
213
+ // Build extra pPr elements to inject into child paragraphs.
214
+ // Order doesn't matter here — injectPPr re-sorts via serializePPr.
215
+ const extraPPrParts = [];
216
+ const pBdrXml = buildPBdrXml(paraProps.pBdr);
217
+ if (pBdrXml)
218
+ extraPPrParts.push(pBdrXml);
159
219
  if (bg && bg !== "auto")
160
- extraPPr += `<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`;
220
+ extraPPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
161
221
  if (paraProps.jc)
162
- extraPPr += `<w:jc w:val="${paraProps.jc}"/>`;
222
+ extraPPrParts.push(`<w:jc w:val="${paraProps.jc}"/>`);
163
223
  if (paraProps.ind) {
164
224
  const attrs = [];
165
225
  if (paraProps.ind.left)
@@ -167,14 +227,14 @@ function convertDiv(node, ctx, listIlvl) {
167
227
  if (paraProps.ind.right)
168
228
  attrs.push(`w:right="${paraProps.ind.right}"`);
169
229
  if (attrs.length)
170
- extraPPr += `<w:ind ${attrs.join(" ")}/>`;
230
+ extraPPrParts.push(`<w:ind ${attrs.join(" ")}/>`);
171
231
  }
172
232
  // Create a temporary modified context that includes the container's styles
173
233
  // so child paragraphs inherit them
174
234
  const childCtx = {
175
235
  ...ctx,
176
236
  _containerRPr: containerRPr,
177
- _containerPPr: extraPPr,
237
+ _containerPPrParts: extraPPrParts,
178
238
  };
179
239
  return node.children.map(c => convertBlockWithContainer(c, childCtx, listIlvl)).join("\n");
180
240
  }
@@ -182,17 +242,16 @@ function convertDiv(node, ctx, listIlvl) {
182
242
  function convertBlockWithContainer(node, ctx, listIlvl) {
183
243
  const containerCtx = ctx;
184
244
  const extraRPr = containerCtx._containerRPr ?? "";
185
- const extraPPr = containerCtx._containerPPr ?? "";
245
+ const extraPPrParts = containerCtx._containerPPrParts ?? [];
186
246
  // Helper: resolve all CSS run props for this node (element + own classes)
187
247
  const nodeRPr = resolveBlockCssRunProps(node, ctx);
188
248
  const mergedRPr = extraRPr + nodeRPr;
189
- // Helper: inject container pPr into a pPr string
249
+ // Helper: merge container pPr parts into an existing pPr string with schema ordering.
190
250
  function injectPPr(pPr) {
191
- if (!extraPPr)
251
+ if (extraPPrParts.length === 0)
192
252
  return pPr;
193
- if (pPr)
194
- return pPr.replace("</w:pPr>", `${extraPPr}</w:pPr>`);
195
- return `<w:pPr>${extraPPr}</w:pPr>`;
253
+ const baseChildren = pPr ? extractPPrChildren(pPr) : [];
254
+ return serializePPr([...baseChildren, ...extraPPrParts]);
196
255
  }
197
256
  if (node.type === "paragraph") {
198
257
  const para = node;
@@ -218,11 +277,12 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
218
277
  const classes = h.classes;
219
278
  if (classes?.length) {
220
279
  for (const cls of classes) {
221
- if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(cls)) {
222
- ctx.generatedStyleIds.add(cls);
280
+ const clsId = sanitizeStyleId(cls);
281
+ if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
282
+ ctx.generatedStyleIds.add(clsId);
223
283
  const rp = cssToRunProps(ctx.cssClasses[cls]);
224
284
  const pp = cssToParaProps(ctx.cssClasses[cls]);
225
- ctx.customStyles.push(buildStyleElement(cls, cls, "paragraph", rp, pp));
285
+ ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", rp, pp));
226
286
  }
227
287
  }
228
288
  }
@@ -232,7 +292,7 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
232
292
  if (node.type === "list") {
233
293
  // Pass container formatting down to list items
234
294
  const list = node;
235
- const numId = list.ordered ? "2" : "1";
295
+ const numId = allocNumId(ctx, list.ordered);
236
296
  const parts = [];
237
297
  for (const item of list.children) {
238
298
  if (item.type !== "listItem")
@@ -258,7 +318,8 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
258
318
  parts.push(`<w:p>${pPr}${runs}</w:p>`);
259
319
  }
260
320
  else if (child.type === "list") {
261
- parts.push(convertBlockWithContainer(child, ctx, listIlvl + 1));
321
+ // Inherit parent numId so nested levels share the same w:num entry.
322
+ parts.push(convertList(child, ctx, listIlvl + 1, numId));
262
323
  }
263
324
  else {
264
325
  // div, blockquote, codeBlock, etc. — recurse with container inheritance
@@ -271,7 +332,7 @@ function convertBlockWithContainer(node, ctx, listIlvl) {
271
332
  // For other children, use normal conversion with container props cleared
272
333
  const cleanCtx = { ...ctx };
273
334
  delete cleanCtx._containerRPr;
274
- delete cleanCtx._containerPPr;
335
+ delete cleanCtx._containerPPrParts;
275
336
  return convertBlock(node, cleanCtx, listIlvl);
276
337
  }
277
338
  function convertParagraph(node, ctx) {
@@ -311,13 +372,11 @@ function convertHeading(node, ctx) {
311
372
  function convertInsertedParagraph(node, pPr, cssRPr, ctx) {
312
373
  const diff = ctx.diff;
313
374
  const author = esc(diff.author);
314
- const date = diff.date;
315
- // Track the paragraph mark itself as inserted (inject into pPr)
375
+ const date = esc(diff.date);
376
+ // Track the paragraph mark itself as inserted (inject into pPr's rPr).
316
377
  const markId = diff.revId.value++;
317
- const insMarkXml = `<w:rPr><w:ins w:id="${markId}" w:author="${author}" w:date="${date}"/></w:rPr>`;
318
- const trackedPPr = pPr
319
- ? pPr.replace("</w:pPr>", `${insMarkXml}</w:pPr>`)
320
- : `<w:pPr>${insMarkXml}</w:pPr>`;
378
+ const insMark = `<w:ins w:id="${markId}" w:author="${author}" w:date="${date}"/>`;
379
+ const trackedPPr = mergeParaMarkChild(pPr, insMark);
321
380
  // Wrap each inline child's run in its own w:ins
322
381
  const runs = node.children.map(c => {
323
382
  const runXml = convertInline(c, ctx, cssRPr);
@@ -346,8 +405,163 @@ function resolveBlockCssRunProps(node, ctx) {
346
405
  return "";
347
406
  return buildRunPropsXml(cssToRunProps(merged));
348
407
  }
349
- /** Build w:rPr XML string from OoxmlRunProps (without the <w:rPr> wrapper) */
408
+ // ---------------------------------------------------------------------------
409
+ // ECMA-376 schema-order maps for w:rPr and w:pPr children
410
+ // (validators reject out-of-order elements in strict mode)
411
+ // ---------------------------------------------------------------------------
412
+ const RPR_ORDER = {
413
+ "w:rStyle": 1, "w:rFonts": 2, "w:b": 3, "w:bCs": 4, "w:i": 5, "w:iCs": 6,
414
+ "w:caps": 7, "w:smallCaps": 8, "w:strike": 9, "w:dstrike": 10,
415
+ "w:outline": 11, "w:shadow": 12, "w:emboss": 13, "w:imprint": 14,
416
+ "w:noProof": 15, "w:snapToGrid": 16, "w:vanish": 17, "w:webHidden": 18,
417
+ "w:color": 19, "w:spacing": 20, "w:w": 21, "w:kern": 22, "w:position": 23,
418
+ "w:sz": 24, "w:szCs": 25, "w:highlight": 26, "w:u": 27, "w:effect": 28,
419
+ "w:bdr": 29, "w:shd": 30, "w:fitText": 31, "w:vertAlign": 32, "w:rtl": 33,
420
+ "w:cs": 34, "w:em": 35, "w:lang": 36, "w:eastAsianLayout": 37,
421
+ "w:specVanish": 38, "w:oMath": 39,
422
+ };
423
+ const PPR_ORDER = {
424
+ "w:pStyle": 1, "w:keepNext": 2, "w:keepLines": 3, "w:pageBreakBefore": 4,
425
+ "w:framePr": 5, "w:widowControl": 6, "w:numPr": 7, "w:suppressLineNumbers": 8,
426
+ "w:pBdr": 9, "w:shd": 10, "w:tabs": 11, "w:suppressAutoHyphens": 12,
427
+ "w:kinsoku": 13, "w:wordWrap": 14, "w:overflowPunct": 15, "w:topLinePunct": 16,
428
+ "w:autoSpaceDE": 17, "w:autoSpaceDN": 18, "w:bidi": 19, "w:adjustRightInd": 20,
429
+ "w:snapToGrid": 21, "w:spacing": 22, "w:ind": 23, "w:contextualSpacing": 24,
430
+ "w:mirrorIndents": 25, "w:suppressOverlap": 26, "w:jc": 27, "w:textDirection": 28,
431
+ "w:textAlignment": 29, "w:textboxTightWrap": 30, "w:outlineLvl": 31,
432
+ "w:divId": 32, "w:cnfStyle": 33, "w:rPr": 34, "w:sectPr": 35, "w:pPrChange": 36,
433
+ };
434
+ function getElementTag(xml) {
435
+ const m = xml.match(/^<(w:[A-Za-z]+)/);
436
+ return m ? m[1] : "";
437
+ }
438
+ /** Tokenize the children of an OOXML container — handles both self-closing and parent elements. */
439
+ function tokenizeOoxmlChildren(xml) {
440
+ const tokens = [];
441
+ let i = 0;
442
+ while (i < xml.length) {
443
+ if (xml[i] !== "<") {
444
+ i++;
445
+ continue;
446
+ }
447
+ const nameMatch = xml.slice(i).match(/^<(w:[A-Za-z]+)/);
448
+ if (!nameMatch) {
449
+ i++;
450
+ continue;
451
+ }
452
+ const name = nameMatch[1];
453
+ // Find end of opening tag (skipping over quoted attribute values)
454
+ let j = i + 1;
455
+ let inQuote = false;
456
+ while (j < xml.length) {
457
+ const ch = xml[j];
458
+ if (ch === '"')
459
+ inQuote = !inQuote;
460
+ if (!inQuote && ch === ">")
461
+ break;
462
+ j++;
463
+ }
464
+ if (j >= xml.length)
465
+ break;
466
+ if (xml[j - 1] === "/") {
467
+ tokens.push(xml.slice(i, j + 1));
468
+ i = j + 1;
469
+ }
470
+ else {
471
+ const closeTag = `</${name}>`;
472
+ let depth = 1;
473
+ let k = j + 1;
474
+ while (k < xml.length && depth > 0) {
475
+ if (xml.startsWith(`<${name}`, k)) {
476
+ const after = xml[k + name.length + 1];
477
+ if (after === " " || after === ">" || after === "/")
478
+ depth++;
479
+ }
480
+ if (xml.startsWith(closeTag, k)) {
481
+ depth--;
482
+ if (depth === 0) {
483
+ k += closeTag.length;
484
+ break;
485
+ }
486
+ }
487
+ k++;
488
+ }
489
+ tokens.push(xml.slice(i, k));
490
+ i = k;
491
+ }
492
+ }
493
+ return tokens;
494
+ }
495
+ /** Stable-sort + tag-dedup a list of rPr/pPr child element strings by ECMA-376 order. */
496
+ function orderElements(elements, order) {
497
+ const byTag = new Map();
498
+ for (const el of elements) {
499
+ const tag = getElementTag(el);
500
+ if (tag)
501
+ byTag.set(tag, el); // last write wins (later sources override earlier)
502
+ }
503
+ return [...byTag.entries()]
504
+ .sort(([a], [b]) => (order[a] ?? 999) - (order[b] ?? 999))
505
+ .map(([, el]) => el);
506
+ }
507
+ /** Wrap rPr children in <w:rPr> with schema-correct ordering. Returns "" if empty. */
508
+ function serializeRPr(parts) {
509
+ const ordered = orderElements(parts, RPR_ORDER);
510
+ return ordered.length > 0 ? `<w:rPr>${ordered.join("")}</w:rPr>` : "";
511
+ }
512
+ /** Wrap pPr children in <w:pPr> with schema-correct ordering. Returns "" if empty. */
513
+ function serializePPr(parts) {
514
+ const ordered = orderElements(parts, PPR_ORDER);
515
+ return ordered.length > 0 ? `<w:pPr>${ordered.join("")}</w:pPr>` : "";
516
+ }
517
+ /** Extract the children of an existing <w:pPr>...</w:pPr> wrapper as raw element strings. */
518
+ function extractPPrChildren(pPr) {
519
+ const m = pPr.match(/<w:pPr>([\s\S]*?)<\/w:pPr>/);
520
+ return m ? tokenizeOoxmlChildren(m[1]) : [];
521
+ }
522
+ /**
523
+ * Merge a paragraph-mark child (w:ins/w:del/w:rPrChange/etc.) into the
524
+ * paragraph's pPr/rPr, creating either or both wrappers as needed and
525
+ * preserving any existing rPr children. Avoids the duplicate-`<w:rPr>`
526
+ * trap that string-replace produces.
527
+ */
528
+ function mergeParaMarkChild(pPr, child) {
529
+ const baseChildren = pPr ? extractPPrChildren(pPr) : [];
530
+ // Find an existing inner <w:rPr>…</w:rPr> (paragraph mark properties).
531
+ let foundRPr = false;
532
+ const merged = baseChildren.map(el => {
533
+ if (el.startsWith("<w:rPr>") || el.startsWith("<w:rPr ")) {
534
+ foundRPr = true;
535
+ // Inject the child at the start (w:ins/w:del are tracked-change marks
536
+ // that appear first inside CT_ParaRPr per ECMA-376).
537
+ return el.replace(/^<w:rPr(\s[^>]*)?>/, m => `${m}${child}`);
538
+ }
539
+ return el;
540
+ });
541
+ if (!foundRPr)
542
+ merged.push(`<w:rPr>${child}</w:rPr>`);
543
+ return serializePPr(merged);
544
+ }
545
+ /** Extract the children of an existing <w:rPr>...</w:rPr> wrapper as raw element strings. */
546
+ function extractRPrChildren(rPr) {
547
+ const m = rPr.match(/<w:rPr>([\s\S]*?)<\/w:rPr>/);
548
+ return m ? tokenizeOoxmlChildren(m[1]) : [];
549
+ }
550
+ /** Build a `<w:pBdr>` element from a parsed pBdr definition. Returns "" if no sides set. */
551
+ function buildPBdrXml(pBdr) {
552
+ if (!pBdr)
553
+ return "";
554
+ const sides = [];
555
+ for (const side of ["top", "bottom", "left", "right"]) {
556
+ const bd = pBdr[side];
557
+ if (bd)
558
+ sides.push(`<w:${side} w:val="${bd.val}" w:sz="${bd.sz}" w:space="0" w:color="${bd.color}"/>`);
559
+ }
560
+ return sides.length > 0 ? `<w:pBdr>${sides.join("")}</w:pBdr>` : "";
561
+ }
562
+ /** Build w:rPr child elements from OoxmlRunProps (without the wrapper, in schema order). */
350
563
  function buildRunPropsXml(props) {
564
+ // Order per ECMA-376 CT_RPr: rFonts(2), b(3), i(5), strike(9), color(19), sz(24), u(27), shd(30)
351
565
  const parts = [];
352
566
  if (props.rFonts) {
353
567
  const attrs = [];
@@ -361,14 +575,14 @@ function buildRunPropsXml(props) {
361
575
  parts.push("<w:b/>");
362
576
  if (props.i)
363
577
  parts.push("<w:i/>");
364
- if (props.sz)
365
- parts.push(`<w:sz w:val="${props.sz}"/>`);
578
+ if (props.strike)
579
+ parts.push("<w:strike/>");
366
580
  if (props.color)
367
581
  parts.push(`<w:color w:val="${props.color}"/>`);
582
+ if (props.sz)
583
+ parts.push(`<w:sz w:val="${props.sz}"/>`);
368
584
  if (props.u)
369
585
  parts.push(`<w:u w:val="${props.u}"/>`);
370
- if (props.strike)
371
- parts.push("<w:strike/>");
372
586
  if (props.shd)
373
587
  parts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${props.shd}"/>`);
374
588
  return parts.join("");
@@ -386,6 +600,7 @@ function convertBlockquote(node, ctx) {
386
600
  }).join("\n");
387
601
  }
388
602
  function buildBlockquotePPr(css) {
603
+ // Collect in any order — serializePPr applies ECMA-376 ordering.
389
604
  const parts = [];
390
605
  // Left border (vertical line) — always add for blockquotes
391
606
  parts.push(`<w:pBdr><w:left w:val="single" w:sz="18" w:space="4" w:color="${css["border-color"] ? parseColorSafe(css["border-color"]) : "AAAAAA"}"/></w:pBdr>`);
@@ -409,10 +624,13 @@ function buildBlockquotePPr(css) {
409
624
  if (attrs.length)
410
625
  parts.push(`<w:spacing ${attrs.join(" ")}/>`);
411
626
  }
412
- return `<w:pPr>${parts.join("")}</w:pPr>`;
627
+ return serializePPr(parts);
413
628
  }
414
- function convertList(node, ctx, baseIlvl) {
415
- const numId = node.ordered ? "2" : "1";
629
+ function convertList(node, ctx, baseIlvl, parentNumId) {
630
+ // Top-level lists get a fresh numId; nested lists inherit their parent's
631
+ // numId so the abstract numbering definition (bullet vs ordered) and
632
+ // counter continuity stay consistent across levels.
633
+ const numId = parentNumId ?? allocNumId(ctx, node.ordered);
416
634
  const liRPr = ctx.cssElements["li"] ? buildRunPropsXml(cssToRunProps(ctx.cssElements["li"])) : "";
417
635
  const parts = [];
418
636
  for (const item of node.children) {
@@ -425,7 +643,7 @@ function convertList(node, ctx, baseIlvl) {
425
643
  parts.push(`<w:p><w:pPr><w:numPr><w:ilvl w:val="${baseIlvl}"/><w:numId w:val="${numId}"/></w:numPr></w:pPr>${runs}</w:p>`);
426
644
  }
427
645
  else if (child.type === "list") {
428
- parts.push(convertList(child, ctx, baseIlvl + 1));
646
+ parts.push(convertList(child, ctx, baseIlvl + 1, numId));
429
647
  }
430
648
  else {
431
649
  parts.push(convertBlock(child, ctx, baseIlvl));
@@ -438,36 +656,62 @@ function convertCodeBlock(node, ctx) {
438
656
  const preCss = ctx?.cssElements["pre"] ?? {};
439
657
  const bg = preCss["background-color"] ? parseColorSafe(preCss["background-color"]) : undefined;
440
658
  const preRPr = Object.keys(preCss).length > 0 ? buildRunPropsXml(cssToRunProps(preCss)) : "";
441
- // Build pPr with Code style + optional background shading
442
- let pPr = `<w:pPr><w:pStyle w:val="Code"/>`;
659
+ // pPr: pStyle(1) before shd(10) already in correct order
660
+ const pPrParts = [`<w:pStyle w:val="Code"/>`];
443
661
  if (bg)
444
- pPr += `<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`;
445
- pPr += `</w:pPr>`;
662
+ pPrParts.push(`<w:shd w:val="clear" w:color="auto" w:fill="${bg}"/>`);
663
+ const pPr = serializePPr(pPrParts);
664
+ // rPr: code rFonts + any pre-element CSS (rFonts override is intentional, last write wins)
665
+ const rPrParts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
666
+ if (preRPr)
667
+ rPrParts.push(...tokenizeOoxmlChildren(preRPr));
668
+ const rPr = serializeRPr(rPrParts);
446
669
  const lines = node.value.split("\n");
447
- return lines.map(line => `<w:p>${pPr}<w:r><w:rPr><w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>${preRPr}</w:rPr><w:t xml:space="preserve">${esc(line)}</w:t></w:r></w:p>`).join("\n");
670
+ return lines.map(line => `<w:p>${pPr}<w:r>${rPr}<w:t xml:space="preserve">${esc(line)}</w:t></w:r></w:p>`).join("\n");
671
+ }
672
+ function countTableColumns(node) {
673
+ let max = 0;
674
+ for (const row of node.children) {
675
+ if (row.type !== "tableRow")
676
+ continue;
677
+ const count = row.children.reduce((n, cell) => n + (cell.colspan ?? 1), 0);
678
+ if (count > max)
679
+ max = count;
680
+ }
681
+ return Math.max(max, 1);
448
682
  }
449
683
  function convertTable(node, ctx) {
684
+ const colCount = countTableColumns(node);
685
+ // Content width: 9360 twips (12240 page − 2×1440 margins)
686
+ const colWidth = Math.round(9360 / colCount);
687
+ const tblGrid = `<w:tblGrid>${Array.from({ length: colCount }, () => `<w:gridCol w:w="${colWidth}"/>`).join("")}</w:tblGrid>`;
450
688
  const rows = node.children.map(row => {
451
689
  if (row.type !== "tableRow")
452
690
  return "";
453
691
  const tr = row;
692
+ // ISO 29500 requires every w:tr to contain at least one w:tc.
693
+ // Skip rows with no tableCell children entirely (e.g. an empty <thead><tr/></thead>).
694
+ const cellNodes = tr.children.filter(c => c.type === "tableCell");
695
+ if (cellNodes.length === 0)
696
+ return "";
454
697
  const trPr = tr.isHeader ? "<w:trPr><w:tblHeader/></w:trPr>" : "";
455
- const cells = tr.children.map(cell => {
456
- if (cell.type !== "tableCell")
457
- return "";
698
+ const cells = cellNodes.map(cell => {
458
699
  const tc = cell;
700
+ const span = tc.colspan ?? 1;
459
701
  let tcPr = "<w:tcPr>";
460
- if (tc.colspan && tc.colspan > 1)
461
- tcPr += `<w:gridSpan w:val="${tc.colspan}"/>`;
702
+ tcPr += `<w:tcW w:w="${colWidth * span}" w:type="dxa"/>`;
703
+ if (span > 1)
704
+ tcPr += `<w:gridSpan w:val="${span}"/>`;
462
705
  tcPr += "</w:tcPr>";
706
+ const emptyPara = { type: "paragraph", children: [] };
463
707
  const content = tc.children.length > 0
464
708
  ? tc.children.map(c => convertBlock(c, ctx, 0)).join("")
465
- : "<w:p/>";
709
+ : convertParagraph(emptyPara, ctx);
466
710
  return `<w:tc>${tcPr}${content}</w:tc>`;
467
711
  }).join("");
468
712
  return `<w:tr>${trPr}${cells}</w:tr>`;
469
- }).join("\n");
470
- return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${rows}</w:tbl>`;
713
+ }).filter(Boolean).join("\n");
714
+ return `<w:tbl><w:tblPr><w:tblW w:w="0" w:type="auto"/><w:tblBorders><w:top w:val="single" w:sz="4" w:color="auto"/><w:left w:val="single" w:sz="4" w:color="auto"/><w:bottom w:val="single" w:sz="4" w:color="auto"/><w:right w:val="single" w:sz="4" w:color="auto"/><w:insideH w:val="single" w:sz="4" w:color="auto"/><w:insideV w:val="single" w:sz="4" w:color="auto"/></w:tblBorders></w:tblPr>${tblGrid}${rows}</w:tbl>`;
471
715
  }
472
716
  // ---------------------------------------------------------------------------
473
717
  // Inline conversion
@@ -478,11 +722,14 @@ function convertInline(node, ctx, inheritedRPr = "") {
478
722
  if (node.type === "break")
479
723
  return `<w:r><w:br/></w:r>`;
480
724
  if (node.type === "inlineCode") {
481
- return `<w:r><w:rPr><w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>${inheritedRPr}</w:rPr><w:t>${esc(node.value)}</w:t></w:r>`;
725
+ const parts = [`<w:rFonts w:ascii="Courier New" w:hAnsi="Courier New"/>`];
726
+ if (inheritedRPr)
727
+ parts.push(...tokenizeOoxmlChildren(inheritedRPr));
728
+ return `<w:r>${serializeRPr(parts)}<w:t xml:space="preserve">${esc(node.value)}</w:t></w:r>`;
482
729
  }
483
730
  if (node.type === "image") {
484
731
  const img = node;
485
- return `<w:r><w:t>[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
732
+ return `<w:r><w:t xml:space="preserve">[image: ${esc(img.alt ?? img.url)}]</w:t></w:r>`;
486
733
  }
487
734
  return "";
488
735
  }
@@ -491,11 +738,8 @@ function convertText(node, ctx, inheritedRPr = "") {
491
738
  const otherMarks = node.marks?.filter(m => m.type !== "link") ?? [];
492
739
  // Build rPr: inherited CSS props first, then mark-specific overrides
493
740
  const parts = [];
494
- // Split inherited XML into individual elements for dedup
495
- if (inheritedRPr) {
496
- const elems = inheritedRPr.match(/<w:[^/]*\/>/g) ?? [];
497
- parts.push(...elems);
498
- }
741
+ if (inheritedRPr)
742
+ parts.push(...tokenizeOoxmlChildren(inheritedRPr));
499
743
  for (const mark of otherMarks) {
500
744
  switch (mark.type) {
501
745
  case "strong":
@@ -521,16 +765,8 @@ function convertText(node, ctx, inheritedRPr = "") {
521
765
  break;
522
766
  }
523
767
  }
524
- // Deduplicate rPr elements (e.g. multiple <w:b/> from container + class CSS)
525
- const seen = new Set();
526
- const dedupedParts = [];
527
- for (const p of parts) {
528
- if (!seen.has(p)) {
529
- seen.add(p);
530
- dedupedParts.push(p);
531
- }
532
- }
533
- const rPr = dedupedParts.length > 0 ? `<w:rPr>${dedupedParts.join("")}</w:rPr>` : "";
768
+ // Schema-order + tag-dedup (later sources override earlier).
769
+ const rPr = serializeRPr(parts);
534
770
  // Track-changes: check diff context (skip for linked text to avoid nesting complexity)
535
771
  if (ctx.diff && ctx.nodePathKeys && !linkMark) {
536
772
  const key = ctx.nodePathKeys.get(node);
@@ -575,21 +811,24 @@ function convertText(node, ctx, inheritedRPr = "") {
575
811
  // ---------------------------------------------------------------------------
576
812
  // Paragraph property builder
577
813
  // ---------------------------------------------------------------------------
578
- function buildPPr(node, ctx) {
814
+ function buildPPrParts(node, ctx) {
815
+ // Collect parts in any order — serializePPr applies ECMA-376 ordering at the end.
579
816
  const parts = [];
580
817
  // Classes → pStyle + generate CSS-based Word styles
581
818
  const classes = node.classes;
582
819
  if (classes && classes.length > 0) {
583
- const styleId = classes[0]; // Primary class becomes the paragraph style
584
- parts.push(`<w:pStyle w:val="${esc(styleId)}"/>`);
820
+ // Style IDs must be valid OOXML identifiers — sanitize the CSS class name.
821
+ const styleId = sanitizeStyleId(classes[0]);
822
+ parts.push(`<w:pStyle w:val="${styleId}"/>`);
585
823
  // Generate custom style from CSS rules (once per styleId)
586
824
  for (const cls of classes) {
587
- if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(cls)) {
588
- ctx.generatedStyleIds.add(cls);
825
+ const clsId = sanitizeStyleId(cls);
826
+ if (ctx.cssClasses[cls] && !ctx.generatedStyleIds.has(clsId)) {
827
+ ctx.generatedStyleIds.add(clsId);
589
828
  const cssDecls = ctx.cssClasses[cls];
590
829
  const runProps = cssToRunProps(cssDecls);
591
830
  const paraProps = cssToParaProps(cssDecls);
592
- ctx.customStyles.push(buildStyleElement(cls, cls, "paragraph", runProps, paraProps));
831
+ ctx.customStyles.push(buildStyleElement(clsId, cls, "paragraph", runProps, paraProps));
593
832
  }
594
833
  }
595
834
  }
@@ -616,13 +855,14 @@ function buildPPr(node, ctx) {
616
855
  if (elTag) {
617
856
  ensureElementStyle(elTag, ctx);
618
857
  // If no class style was set, use the element style
619
- if (!classes || classes.length === 0) {
620
- parts.unshift(`<w:pStyle w:val="_el_${esc(elTag)}"/>`);
858
+ if ((!classes || classes.length === 0) && ctx.cssElements[elTag]) {
859
+ parts.push(`<w:pStyle w:val="_el_${esc(elTag)}"/>`);
621
860
  }
622
- // If class style was set, element CSS is already baked into the base —
623
- // we could chain basedOn, but for simplicity element styles are standalone
624
861
  }
625
- return parts.length > 0 ? `<w:pPr>${parts.join("")}</w:pPr>` : "";
862
+ return parts;
863
+ }
864
+ function buildPPr(node, ctx) {
865
+ return serializePPr(buildPPrParts(node, ctx));
626
866
  }
627
867
  /** Map UDM type to the HTML tag name for CSS element selector lookup */
628
868
  function udmTypeToHtmlTag(type, node) {
@@ -689,6 +929,16 @@ function stripDataForSnapshot(node) {
689
929
  function parseColorSafe(val) {
690
930
  return parseColor(val) ?? "auto";
691
931
  }
932
+ /**
933
+ * Deterministic short hash of a string. Used to bind the embedded UDM
934
+ * snapshot to a specific document.xml so the reader can detect external
935
+ * edits (e.g. the user opened the file in Word and saved). 64 bits of
936
+ * SHA-256 is plenty for change detection — we don't need cryptographic
937
+ * collision resistance.
938
+ */
939
+ function hashString(s) {
940
+ return createHash("sha256").update(s).digest("hex").slice(0, 16);
941
+ }
692
942
  // ---------------------------------------------------------------------------
693
943
  // Deleted-block rendering (for writeDiffDocx)
694
944
  // ---------------------------------------------------------------------------
@@ -707,7 +957,7 @@ function convertDeletedBlock(op, ctx) {
707
957
  const node = op.node;
708
958
  const diff = ctx.diff;
709
959
  const author = esc(diff.author);
710
- const date = diff.date;
960
+ const date = esc(diff.date);
711
961
  const textContent = extractTextContent(node);
712
962
  if (!textContent)
713
963
  return "";
@@ -719,13 +969,35 @@ function convertDeletedBlock(op, ctx) {
719
969
  const depth = node.depth;
720
970
  pStyle = `<w:pStyle w:val="Heading${depth}"/>`;
721
971
  }
722
- const delMarkXml = `<w:del w:id="${delMarkId}" w:author="${author}" w:date="${date}"/>`;
723
- const pPr = `<w:pPr>${pStyle}<w:rPr>${delMarkXml}</w:rPr></w:pPr>`;
972
+ const delMark = `<w:del w:id="${delMarkId}" w:author="${author}" w:date="${date}"/>`;
973
+ const basePPr = pStyle ? `<w:pPr>${pStyle}</w:pPr>` : "";
974
+ const pPr = mergeParaMarkChild(basePPr, delMark);
724
975
  const delContent = `<w:del w:id="${delContentId}" w:author="${author}" w:date="${date}"><w:r><w:delText xml:space="preserve">${esc(textContent)}</w:delText></w:r></w:del>`;
725
976
  return `<w:p>${pPr}${delContent}</w:p>`;
726
977
  }
727
978
  function esc(s) {
728
- return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
979
+ return sanitizeText(s)
980
+ .replace(/&/g, "&amp;")
981
+ .replace(/</g, "&lt;")
982
+ .replace(/>/g, "&gt;")
983
+ .replace(/"/g, "&quot;");
984
+ }
985
+ /**
986
+ * Strip XML 1.0 forbidden control characters (U+0000–U+001F except \t \n \r,
987
+ * plus U+007F). ECMA-376 inherits this restriction; leaving these in causes
988
+ * `document.xml` to be unparseable and Word refuses to open the file.
989
+ */
990
+ function sanitizeText(s) {
991
+ return s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
992
+ }
993
+ /**
994
+ * Normalize a CSS class name into a valid OOXML style ID.
995
+ * Per ECMA-376 §17.7.4.9, style IDs must match `[a-zA-Z0-9_\-:]` and be
996
+ * ≤ 31 characters. Anything else is stripped.
997
+ */
998
+ function sanitizeStyleId(id) {
999
+ const cleaned = id.replace(/[^a-zA-Z0-9_\-:]/g, "").slice(0, 31);
1000
+ return cleaned || "Normal";
729
1001
  }
730
1002
  // ---------------------------------------------------------------------------
731
1003
  // writeDiffDocx — serialize a diff result as a .docx with track-changes markup
@@ -749,14 +1021,17 @@ export async function writeDiffDocx(newTree, diffResult, options) {
749
1021
  const date = options?.date ?? "2024-01-01T00:00:00Z";
750
1022
  const diffCtx = buildDiffCtx(diffResult, author, date);
751
1023
  const nodePathKeys = buildNodePathMap(newTree);
1024
+ const docxDataPre = newTree.data?.docx;
1025
+ const existingRels = docxDataPre?.relationships;
752
1026
  const ctx = {
753
1027
  cssClasses: mergedClassCss,
754
1028
  cssElements: elementCss,
755
1029
  customStyles: [],
756
1030
  generatedStyleIds: new Set(),
757
1031
  nextNumId: 3,
1032
+ allocatedNums: new Map(),
758
1033
  hyperlinks: new Map(),
759
- nextRId: 100,
1034
+ nextRId: nextRIdFor(existingRels),
760
1035
  diff: diffCtx,
761
1036
  nodePathKeys,
762
1037
  };
@@ -774,6 +1049,12 @@ export async function writeDiffDocx(newTree, diffResult, options) {
774
1049
  offset++;
775
1050
  }
776
1051
  }
1052
+ // Match writeDocx's trailing-paragraph-after-table behaviour so the body
1053
+ // doesn't end with </w:tbl> immediately before <w:sectPr>.
1054
+ const lastBlock = newTree.children[newTree.children.length - 1];
1055
+ if (lastBlock?.type === "table") {
1056
+ renderedBlocks.push("<w:p/>");
1057
+ }
777
1058
  const bodyXml = renderedBlocks.join("\n");
778
1059
  const docxData = newTree.data?.docx;
779
1060
  let stylesXml = docxData?.styles ?? DEFAULT_STYLES;
@@ -793,16 +1074,24 @@ ${bodyXml}
793
1074
  const hlRels = [...ctx.hyperlinks].map(([rId, url]) => `<Relationship Id="${rId}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" Target="${esc(url)}" TargetMode="External"/>`).join("\n ");
794
1075
  relsXml = relsXml.replace("</Relationships>", ` ${hlRels}\n</Relationships>`);
795
1076
  }
1077
+ let numberingXml = docxData?.numbering ?? DEFAULT_NUMBERING;
1078
+ if (ctx.allocatedNums.size > 0) {
1079
+ const newNums = [...ctx.allocatedNums]
1080
+ .map(([numId, abstractId]) => `<w:num w:numId="${numId}"><w:abstractNumId w:val="${abstractId}"/></w:num>`)
1081
+ .join("\n ");
1082
+ numberingXml = numberingXml.replace("</w:numbering>", ` ${newNums}\n</w:numbering>`);
1083
+ }
796
1084
  const parts = {
797
1085
  document: docXml,
798
1086
  styles: stylesXml,
799
- numbering: docxData?.numbering ?? DEFAULT_NUMBERING,
1087
+ numbering: numberingXml,
800
1088
  relationships: relsXml,
801
1089
  contentTypes: docxData?.contentTypes ?? undefined,
802
1090
  media: new Map(),
803
1091
  rawParts: new Map(),
804
1092
  };
805
1093
  const udmSnapshot = stripDataForSnapshot(newTree);
1094
+ udmSnapshot.__docHash = hashString(docXml);
806
1095
  parts.rawParts.set("word/otomate-udm.json", JSON.stringify(udmSnapshot));
807
1096
  if (cssData && (Object.keys(cssData.classRules ?? {}).length > 0 || Object.keys(cssData.elementRules ?? {}).length > 0)) {
808
1097
  parts.rawParts.set("word/otomate-css.json", JSON.stringify(cssData));