@coding01/docsjs 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -80,6 +80,9 @@ function getAttr(node, name) {
80
80
  function emuToPx(emu) {
81
81
  return emu * 96 / 914400;
82
82
  }
83
+ function twipToPx(twip) {
84
+ return twip * 96 / 1440;
85
+ }
83
86
  function parseDrawingSizePx(drawing) {
84
87
  const extentNode = queryAllByLocalName(drawing, "extent").find((node) => {
85
88
  const parent = node.parentElement;
@@ -108,9 +111,7 @@ function imageDimensionAttributes(sizePx) {
108
111
  }
109
112
  return attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
110
113
  }
111
- function parseAnchorPositionPx(drawing) {
112
- const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
113
- if (!anchor) return { leftPx: null, topPx: null };
114
+ function parseAnchorPositionPx(anchor) {
114
115
  let leftPx = null;
115
116
  let topPx = null;
116
117
  const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
@@ -125,34 +126,80 @@ function parseAnchorPositionPx(drawing) {
125
126
  if (Number.isFinite(top)) topPx = emuToPx(top);
126
127
  return { leftPx, topPx };
127
128
  }
128
- function parseAnchorWrapMode(drawing) {
129
- const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
130
- if (!anchor) return null;
129
+ function parseAnchorWrapMode(anchor) {
131
130
  if (directChildrenByLocalName(anchor, "wrapSquare")[0]) return "square";
132
131
  if (directChildrenByLocalName(anchor, "wrapTight")[0]) return "tight";
133
132
  if (directChildrenByLocalName(anchor, "wrapTopAndBottom")[0]) return "topAndBottom";
134
133
  if (directChildrenByLocalName(anchor, "wrapNone")[0]) return "none";
135
134
  return null;
136
135
  }
137
- function mergeImageStyle(baseAttrs, anchorPos, wrapMode) {
138
- if (anchorPos.leftPx === null && anchorPos.topPx === null) return baseAttrs;
136
+ function parseAnchorMeta(drawing) {
137
+ const anchor = directChildrenByLocalName(drawing, "anchor")[0] ?? null;
138
+ if (!anchor) return null;
139
+ const positionH = directChildrenByLocalName(anchor, "positionH")[0] ?? null;
140
+ const positionV = directChildrenByLocalName(anchor, "positionV")[0] ?? null;
141
+ const relativeFromH = getAttr(positionH, "relativeFrom");
142
+ const relativeFromV = getAttr(positionV, "relativeFrom");
143
+ const parseDistPx = (name) => {
144
+ const raw = getAttr(anchor, name);
145
+ const emu = raw ? Number.parseInt(raw, 10) : Number.NaN;
146
+ return Number.isFinite(emu) && emu >= 0 ? emuToPx(emu) : null;
147
+ };
148
+ const rawHeight = getAttr(anchor, "relativeHeight");
149
+ const parsedHeight = rawHeight ? Number.parseInt(rawHeight, 10) : Number.NaN;
150
+ const boolAttr = (name, fallback) => {
151
+ const raw = (getAttr(anchor, name) ?? "").toLowerCase();
152
+ if (raw === "1" || raw === "true" || raw === "on") return true;
153
+ if (raw === "0" || raw === "false" || raw === "off") return false;
154
+ return fallback;
155
+ };
156
+ return {
157
+ position: parseAnchorPositionPx(anchor),
158
+ wrapMode: parseAnchorWrapMode(anchor),
159
+ distTPx: parseDistPx("distT"),
160
+ distBPx: parseDistPx("distB"),
161
+ distLPx: parseDistPx("distL"),
162
+ distRPx: parseDistPx("distR"),
163
+ relativeFromH,
164
+ relativeFromV,
165
+ behindDoc: boolAttr("behindDoc", false),
166
+ allowOverlap: boolAttr("allowOverlap", true),
167
+ layoutInCell: boolAttr("layoutInCell", true),
168
+ relativeHeight: Number.isFinite(parsedHeight) ? parsedHeight : null
169
+ };
170
+ }
171
+ function mergeImageStyle(baseAttrs, anchorMeta) {
172
+ if (!anchorMeta) return baseAttrs;
173
+ const { position, wrapMode } = anchorMeta;
174
+ if (position.leftPx === null && position.topPx === null) return baseAttrs;
139
175
  const styleParts = [
140
176
  "position:absolute",
141
- anchorPos.leftPx !== null ? `left:${anchorPos.leftPx.toFixed(2)}px` : "",
142
- anchorPos.topPx !== null ? `top:${anchorPos.topPx.toFixed(2)}px` : "",
143
- "z-index:3"
177
+ position.leftPx !== null ? `left:${position.leftPx.toFixed(2)}px` : "",
178
+ position.topPx !== null ? `top:${position.topPx.toFixed(2)}px` : "",
179
+ `z-index:${anchorMeta.behindDoc ? 0 : anchorMeta.relativeHeight ?? 3}`,
180
+ anchorMeta.distTPx !== null ? `margin-top:${anchorMeta.distTPx.toFixed(2)}px` : "",
181
+ anchorMeta.distBPx !== null ? `margin-bottom:${anchorMeta.distBPx.toFixed(2)}px` : "",
182
+ anchorMeta.distLPx !== null ? `margin-left:${anchorMeta.distLPx.toFixed(2)}px` : "",
183
+ anchorMeta.distRPx !== null ? `margin-right:${anchorMeta.distRPx.toFixed(2)}px` : ""
144
184
  ].filter((x) => x.length > 0);
145
185
  if (wrapMode === "topAndBottom") {
146
- styleParts.push("display:block");
147
- }
186
+ styleParts.push("display:block", "clear:both");
187
+ }
188
+ const anchorAttrs = [
189
+ `data-word-anchor="1"`,
190
+ wrapMode ? `data-word-wrap="${wrapMode}"` : "",
191
+ anchorMeta.relativeFromH ? `data-word-anchor-relh="${escapeHtml(anchorMeta.relativeFromH)}"` : "",
192
+ anchorMeta.relativeFromV ? `data-word-anchor-relv="${escapeHtml(anchorMeta.relativeFromV)}"` : "",
193
+ anchorMeta.behindDoc ? `data-word-anchor-behind="1"` : `data-word-anchor-behind="0"`,
194
+ anchorMeta.allowOverlap ? `data-word-anchor-overlap="1"` : `data-word-anchor-overlap="0"`,
195
+ anchorMeta.layoutInCell ? `data-word-anchor-layout-cell="1"` : `data-word-anchor-layout-cell="0"`
196
+ ].filter((x) => x.length > 0).join(" ");
148
197
  if (!baseAttrs.includes("style=")) {
149
- const wrapAttr = wrapMode ? ` data-word-wrap="${wrapMode}"` : "";
150
- return `${baseAttrs} style="${styleParts.join(";")}" data-word-anchor="1"${wrapAttr}`;
198
+ return `${baseAttrs} style="${styleParts.join(";")}" ${anchorAttrs}`;
151
199
  }
152
200
  return baseAttrs.replace(/style="([^"]*)"/, (_m, styleText) => {
153
201
  const merged = [styleText, ...styleParts].filter((x) => x.length > 0).join(";");
154
- const wrapAttr = wrapMode ? ` data-word-wrap="${wrapMode}"` : "";
155
- return `style="${merged}" data-word-anchor="1"${wrapAttr}`;
202
+ return `style="${merged}" ${anchorAttrs}`;
156
203
  });
157
204
  }
158
205
  function parseDocRelsMap(relsXmlText) {
@@ -178,11 +225,16 @@ function extToMime(ext) {
178
225
  if (lower === "svg") return "image/svg+xml";
179
226
  return "application/octet-stream";
180
227
  }
228
+ function normalizeWordPath(relTarget) {
229
+ const normalized = relTarget.replace(/\\/g, "/").replace(/^\/+/, "");
230
+ if (normalized.startsWith("word/")) return normalized;
231
+ if (normalized.startsWith("../")) return `word/${normalized.replace(/^(\.\.\/)+/, "")}`;
232
+ return `word/${normalized}`;
233
+ }
181
234
  async function imageRidToDataUrl(zip, relMap, rid) {
182
235
  const relTarget = relMap[rid];
183
236
  if (!relTarget) return null;
184
- const normalized = relTarget.replace(/^\/+/, "");
185
- const path = normalized.startsWith("word/") ? normalized : `word/${normalized}`;
237
+ const path = normalizeWordPath(relTarget);
186
238
  const file = zip.file(path);
187
239
  if (!file) return null;
188
240
  const base64 = await file.async("base64");
@@ -190,6 +242,55 @@ async function imageRidToDataUrl(zip, relMap, rid) {
190
242
  const mime = extToMime(ext);
191
243
  return `data:${mime};base64,${base64}`;
192
244
  }
245
+ async function readXmlByRid(zip, relMap, rid) {
246
+ const relTarget = relMap[rid];
247
+ if (!relTarget) return null;
248
+ const path = normalizeWordPath(relTarget);
249
+ const file = zip.file(path);
250
+ return file ? file.async("string") : null;
251
+ }
252
+ function parseChartType(chartDoc) {
253
+ const known = ["barChart", "lineChart", "pieChart", "areaChart", "scatterChart", "radarChart", "doughnutChart"];
254
+ for (const type of known) {
255
+ if (queryByLocalName(chartDoc, type)) return type.replace(/Chart$/, "");
256
+ }
257
+ return "unknown";
258
+ }
259
+ function parseChartSummary(chartXmlText) {
260
+ const chartDoc = parseXml(chartXmlText);
261
+ const title = queryAllByLocalName(chartDoc, "t").map((n) => (n.textContent ?? "").trim()).find((v) => v.length > 0) ?? "Chart";
262
+ const seriesCount = queryAllByLocalName(chartDoc, "ser").length;
263
+ const pointCount = queryAllByLocalName(chartDoc, "pt").length;
264
+ const type = parseChartType(chartDoc);
265
+ return { title, type, seriesCount, pointCount };
266
+ }
267
+ function extractSmartArtText(diagramXmlText) {
268
+ const diagramDoc = parseXml(diagramXmlText);
269
+ return queryAllByLocalName(diagramDoc, "t").map((n) => (n.textContent ?? "").trim()).filter((v) => v.length > 0).slice(0, 12);
270
+ }
271
+ function ommlNodeToText(node) {
272
+ if (node.localName === "t") return node.textContent ?? "";
273
+ if (node.localName === "f") {
274
+ const num = queryByLocalName(node, "num");
275
+ const den = queryByLocalName(node, "den");
276
+ return `(${num ? ommlNodeToText(num) : "?"})/(${den ? ommlNodeToText(den) : "?"})`;
277
+ }
278
+ if (node.localName === "sSup") {
279
+ const e = queryByLocalName(node, "e");
280
+ const sup = queryByLocalName(node, "sup");
281
+ return `${e ? ommlNodeToText(e) : ""}^(${sup ? ommlNodeToText(sup) : ""})`;
282
+ }
283
+ if (node.localName === "sSub") {
284
+ const e = queryByLocalName(node, "e");
285
+ const sub = queryByLocalName(node, "sub");
286
+ return `${e ? ommlNodeToText(e) : ""}_(${sub ? ommlNodeToText(sub) : ""})`;
287
+ }
288
+ if (node.localName === "rad") {
289
+ const e = queryByLocalName(node, "e");
290
+ return `sqrt(${e ? ommlNodeToText(e) : ""})`;
291
+ }
292
+ return Array.from(node.children).map((child) => ommlNodeToText(child)).join("");
293
+ }
193
294
  function runStyleToCss(rPr) {
194
295
  if (!rPr) return "";
195
296
  const declarations = [];
@@ -308,44 +409,66 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
308
409
  const tag = paragraphTag(paragraph);
309
410
  const alignStyle = paragraphAlignStyle(paragraph);
310
411
  const dataAttr = paragraphDataAttr(paragraphIndex);
311
- const runs = queryAllByLocalName(paragraph, "r");
312
- if (runs.length === 0) {
412
+ const hasRenderableNode = queryAllByLocalName(paragraph, "r").length > 0 || queryAllByLocalName(paragraph, "oMath").length > 0 || queryAllByLocalName(paragraph, "oMathPara").length > 0;
413
+ if (!hasRenderableNode) {
313
414
  return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}><br/></${tag}>`;
314
415
  }
315
- const parts = [];
316
- const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
317
- for (let i = 0; i < renderedPageBreakCount; i += 1) {
318
- parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
416
+ function parseRevisionMeta(node, type) {
417
+ return {
418
+ type,
419
+ id: getAttr(node, "w:id") ?? getAttr(node, "id"),
420
+ author: getAttr(node, "w:author") ?? getAttr(node, "author"),
421
+ date: getAttr(node, "w:date") ?? getAttr(node, "date")
422
+ };
423
+ }
424
+ function inferRevisionMeta(run, fallback) {
425
+ if (fallback) return fallback;
426
+ let cursor = run;
427
+ while (cursor) {
428
+ if (cursor.localName === "ins") return parseRevisionMeta(cursor, "ins");
429
+ if (cursor.localName === "del") return parseRevisionMeta(cursor, "del");
430
+ if (cursor.localName === "p") break;
431
+ cursor = cursor.parentElement;
432
+ }
433
+ return null;
319
434
  }
320
- for (const run of runs) {
435
+ function revisionMetaAttrs(meta) {
436
+ const attrs = [`data-word-revision="${meta.type}"`];
437
+ if (meta.id) attrs.push(`data-word-revision-id="${escapeHtml(meta.id)}"`);
438
+ if (meta.author) attrs.push(`data-word-revision-author="${escapeHtml(meta.author)}"`);
439
+ if (meta.date) attrs.push(`data-word-revision-date="${escapeHtml(meta.date)}"`);
440
+ return attrs.join(" ");
441
+ }
442
+ async function runToHtml(run, revisionFallback) {
443
+ const result = [];
321
444
  const rPr = queryByLocalName(run, "rPr");
322
445
  const css = runStyleToCss(rPr);
323
446
  const footnoteRef = queryByLocalName(run, "footnoteReference");
324
447
  const footnoteId = getAttr(footnoteRef, "w:id") ?? getAttr(footnoteRef, "id");
325
448
  if (footnoteId && footnotesMap[footnoteId]) {
326
449
  usedFootnoteIds.push(footnoteId);
327
- parts.push(
450
+ result.push(
328
451
  `<sup data-word-footnote-ref="${footnoteId}"><a href="#word-footnote-${footnoteId}">[${footnoteId}]</a></sup>`
329
452
  );
330
- continue;
453
+ return result;
331
454
  }
332
455
  const endnoteRef = queryByLocalName(run, "endnoteReference");
333
456
  const endnoteId = getAttr(endnoteRef, "w:id") ?? getAttr(endnoteRef, "id");
334
457
  if (endnoteId && endnotesMap[endnoteId]) {
335
458
  usedEndnoteIds.push(endnoteId);
336
- parts.push(
459
+ result.push(
337
460
  `<sup data-word-endnote-ref="${endnoteId}"><a href="#word-endnote-${endnoteId}">[${endnoteId}]</a></sup>`
338
461
  );
339
- continue;
462
+ return result;
340
463
  }
341
464
  const commentRef = queryByLocalName(run, "commentReference");
342
465
  const commentId = getAttr(commentRef, "w:id") ?? getAttr(commentRef, "id");
343
466
  if (commentId && commentsMap[commentId]) {
344
467
  usedCommentIds.push(commentId);
345
- parts.push(
468
+ result.push(
346
469
  `<sup data-word-comment-ref="${commentId}"><a href="#word-comment-${commentId}">[c${commentId}]</a></sup>`
347
470
  );
348
- continue;
471
+ return result;
349
472
  }
350
473
  const drawing = queryByLocalName(run, "drawing");
351
474
  if (drawing) {
@@ -356,13 +479,35 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
356
479
  if (src) {
357
480
  const imageSize = parseDrawingSizePx(drawing);
358
481
  const dimensionAttrs = imageDimensionAttributes(imageSize);
359
- const anchorPos = parseAnchorPositionPx(drawing);
360
- const wrapMode = parseAnchorWrapMode(drawing);
361
- const attrs = mergeImageStyle(dimensionAttrs, anchorPos, wrapMode);
362
- parts.push(`<img src="${src}" alt="word-image"${attrs}/>`);
363
- continue;
482
+ const anchorMeta = parseAnchorMeta(drawing);
483
+ const attrs = mergeImageStyle(dimensionAttrs, anchorMeta);
484
+ result.push(`<img src="${src}" alt="word-image"${attrs}/>`);
485
+ return result;
364
486
  }
365
487
  }
488
+ const chartRef = queryByLocalName(drawing, "chart");
489
+ const chartRid = getAttr(chartRef, "r:id") ?? getAttr(chartRef, "id");
490
+ if (chartRid) {
491
+ const chartXmlText = await readXmlByRid(zip, relMap, chartRid);
492
+ if (chartXmlText) {
493
+ const summary = parseChartSummary(chartXmlText);
494
+ result.push(
495
+ `<figure data-word-chart="1" data-word-chart-type="${summary.type}" data-word-chart-series="${summary.seriesCount}" data-word-chart-points="${summary.pointCount}"><figcaption>${escapeHtml(summary.title)}</figcaption><div>Chart(${escapeHtml(summary.type)}): series=${summary.seriesCount}, points=${summary.pointCount}</div></figure>`
496
+ );
497
+ return result;
498
+ }
499
+ }
500
+ const smartArtRef = queryByLocalName(drawing, "relIds");
501
+ const smartArtRid = getAttr(smartArtRef, "r:dm") ?? getAttr(smartArtRef, "dm");
502
+ if (smartArtRid) {
503
+ const diagramXmlText = await readXmlByRid(zip, relMap, smartArtRid);
504
+ const textItems = diagramXmlText ? extractSmartArtText(diagramXmlText) : [];
505
+ const preview = textItems.length > 0 ? `: ${escapeHtml(textItems.join(" / "))}` : "";
506
+ result.push(
507
+ `<figure data-word-smartart="1" data-word-smartart-items="${textItems.length}"><figcaption>SmartArt fallback${preview}</figcaption></figure>`
508
+ );
509
+ return result;
510
+ }
366
511
  }
367
512
  const texts = queryAllByLocalName(run, "t").map((t) => t.textContent ?? "").join("");
368
513
  const delTexts = queryAllByLocalName(run, "delText").map((t) => t.textContent ?? "").join("");
@@ -373,40 +518,66 @@ async function paragraphToHtml(zip, relMap, paragraph, paragraphIndex, footnotes
373
518
  }).length;
374
519
  const lineBreakCount = Math.max(0, brNodes.length - pageBreakCount);
375
520
  const runText2 = `${escapeHtml(texts || delTexts)}${"<br/>".repeat(lineBreakCount)}`;
376
- if (!runText2) continue;
377
- let revisionType = null;
378
- let cursor = run;
379
- while (cursor) {
380
- if (cursor.localName === "ins") {
381
- revisionType = "ins";
382
- break;
383
- }
384
- if (cursor.localName === "del") {
385
- revisionType = "del";
386
- break;
387
- }
388
- if (cursor.localName === "p") break;
389
- cursor = cursor.parentElement;
390
- }
391
- if (css) {
392
- const span = `<span style="${css}">${runText2}</span>`;
393
- if (revisionType) {
394
- const tag2 = revisionType === "ins" ? "ins" : "del";
395
- parts.push(`<${tag2} data-word-revision="${revisionType}">${span}</${tag2}>`);
396
- } else {
397
- parts.push(span);
398
- }
399
- } else {
400
- if (revisionType) {
401
- const tag2 = revisionType === "ins" ? "ins" : "del";
402
- parts.push(`<${tag2} data-word-revision="${revisionType}">${runText2}</${tag2}>`);
521
+ if (runText2) {
522
+ const revisionMeta = inferRevisionMeta(run, revisionFallback);
523
+ if (css) {
524
+ const span = `<span style="${css}">${runText2}</span>`;
525
+ if (revisionMeta) {
526
+ const tagName = revisionMeta.type === "ins" ? "ins" : "del";
527
+ result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${span}</${tagName}>`);
528
+ } else {
529
+ result.push(span);
530
+ }
531
+ } else if (revisionMeta) {
532
+ const tagName = revisionMeta.type === "ins" ? "ins" : "del";
533
+ result.push(`<${tagName} ${revisionMetaAttrs(revisionMeta)}>${runText2}</${tagName}>`);
403
534
  } else {
404
- parts.push(runText2);
535
+ result.push(runText2);
405
536
  }
406
537
  }
407
538
  for (let i = 0; i < pageBreakCount; i += 1) {
408
- parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
539
+ result.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
540
+ }
541
+ return result;
542
+ }
543
+ async function nodeToHtml(node, revisionFallback) {
544
+ if (node.localName === "commentRangeStart") {
545
+ const id = getAttr(node, "w:id") ?? getAttr(node, "id");
546
+ return id ? [`<span data-word-comment-range-start="${id}"></span>`] : [];
547
+ }
548
+ if (node.localName === "commentRangeEnd") {
549
+ const id = getAttr(node, "w:id") ?? getAttr(node, "id");
550
+ return id ? [`<span data-word-comment-range-end="${id}"></span>`] : [];
551
+ }
552
+ if (node.localName === "r") {
553
+ return runToHtml(node, revisionFallback);
409
554
  }
555
+ if (node.localName === "oMath" || node.localName === "oMathPara") {
556
+ const linear = ommlNodeToText(node).trim();
557
+ if (!linear) return [];
558
+ return [`<span data-word-omml="1">${escapeHtml(linear)}</span>`];
559
+ }
560
+ if (node.localName === "ins" || node.localName === "del") {
561
+ const scopedMeta = parseRevisionMeta(node, node.localName === "ins" ? "ins" : "del");
562
+ const nested2 = [];
563
+ for (const child of Array.from(node.children)) {
564
+ nested2.push(...await nodeToHtml(child, scopedMeta));
565
+ }
566
+ return nested2;
567
+ }
568
+ const nested = [];
569
+ for (const child of Array.from(node.children)) {
570
+ nested.push(...await nodeToHtml(child, revisionFallback));
571
+ }
572
+ return nested;
573
+ }
574
+ const parts = [];
575
+ const renderedPageBreakCount = queryAllByLocalName(paragraph, "lastRenderedPageBreak").length;
576
+ for (let i = 0; i < renderedPageBreakCount; i += 1) {
577
+ parts.push(`<span data-word-page-break="1" style="display:block;break-before:page"></span>`);
578
+ }
579
+ for (const child of Array.from(paragraph.children)) {
580
+ parts.push(...await nodeToHtml(child, null));
410
581
  }
411
582
  const content = parts.join("") || "<br/>";
412
583
  return `<${tag}${dataAttr}${alignStyle ? ` style="${alignStyle}"` : ""}>${content}</${tag}>`;
@@ -436,6 +607,101 @@ function parseTcVMerge(tc) {
436
607
  const rawVal = (getAttr(vMerge, "w:val") ?? getAttr(vMerge, "val") ?? "continue").toLowerCase();
437
608
  return rawVal === "restart" ? "restart" : "continue";
438
609
  }
610
+ function parseTblGridWidthsPx(table) {
611
+ const grid = directChildrenByLocalName(table, "tblGrid")[0] ?? null;
612
+ if (!grid) return [];
613
+ return directChildrenByLocalName(grid, "gridCol").map((col) => {
614
+ const raw = getAttr(col, "w:w") ?? getAttr(col, "w");
615
+ const twip = raw ? Number.parseInt(raw, 10) : Number.NaN;
616
+ return Number.isFinite(twip) && twip > 0 ? twipToPx(twip) : 0;
617
+ }).filter((px) => px > 0);
618
+ }
619
+ function borderSizeToPx(size) {
620
+ return size / 6;
621
+ }
622
+ function parseBorderCss(borderNode) {
623
+ if (!borderNode) return null;
624
+ const val = (getAttr(borderNode, "w:val") ?? getAttr(borderNode, "val") ?? "").toLowerCase();
625
+ if (!val || val === "nil" || val === "none") return "none";
626
+ const color = (getAttr(borderNode, "w:color") ?? getAttr(borderNode, "color") ?? "222222").replace(/^#/, "");
627
+ const rawSize = getAttr(borderNode, "w:sz") ?? getAttr(borderNode, "sz");
628
+ const size = rawSize ? Number.parseInt(rawSize, 10) : Number.NaN;
629
+ const px = Number.isFinite(size) && size > 0 ? borderSizeToPx(size) : 1;
630
+ const style = val === "single" ? "solid" : val;
631
+ return `${px.toFixed(2)}px ${style} #${color}`;
632
+ }
633
+ function parseTableStyleProfile(table) {
634
+ const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
635
+ const tblBorders = tblPr ? directChildrenByLocalName(tblPr, "tblBorders")[0] ?? null : null;
636
+ const layout = tblPr ? directChildrenByLocalName(tblPr, "tblLayout")[0] ?? null : null;
637
+ const spacing = tblPr ? directChildrenByLocalName(tblPr, "tblCellSpacing")[0] ?? null : null;
638
+ const spacingType = (getAttr(spacing, "w:type") ?? getAttr(spacing, "type") ?? "dxa").toLowerCase();
639
+ const spacingRaw = getAttr(spacing, "w:w") ?? getAttr(spacing, "w");
640
+ const spacingVal = spacingRaw ? Number.parseFloat(spacingRaw) : Number.NaN;
641
+ const borderSpacingPx = spacingType === "dxa" && Number.isFinite(spacingVal) && spacingVal > 0 ? twipToPx(spacingVal) : 0;
642
+ const borderCollapse = borderSpacingPx > 0 ? "separate" : "collapse";
643
+ const tableLayout = (getAttr(layout, "w:type") ?? getAttr(layout, "type") ?? "").toLowerCase() === "autofit" ? "auto" : "fixed";
644
+ const top = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "top")[0] ?? null : null);
645
+ const bottom = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "bottom")[0] ?? null : null);
646
+ const left = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "left")[0] ?? null : null);
647
+ const right = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "right")[0] ?? null : null);
648
+ const insideH = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideH")[0] ?? null : null);
649
+ const insideV = parseBorderCss(tblBorders ? directChildrenByLocalName(tblBorders, "insideV")[0] ?? null : null);
650
+ const borderCss = top ?? right ?? bottom ?? left ?? "1px solid #222";
651
+ return {
652
+ tableLayout,
653
+ borderCollapse,
654
+ borderSpacingPx,
655
+ borderCss,
656
+ insideHCss: insideH,
657
+ insideVCss: insideV
658
+ };
659
+ }
660
+ function parseTableWidthStyle(table, gridWidthsPx) {
661
+ const tblPr = directChildrenByLocalName(table, "tblPr")[0] ?? null;
662
+ const tblW = tblPr ? directChildrenByLocalName(tblPr, "tblW")[0] ?? null : null;
663
+ const type = (getAttr(tblW, "w:type") ?? getAttr(tblW, "type") ?? "").toLowerCase();
664
+ const rawVal = getAttr(tblW, "w:w") ?? getAttr(tblW, "w");
665
+ const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
666
+ if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
667
+ return `width:${twipToPx(numericVal).toFixed(2)}px`;
668
+ }
669
+ if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
670
+ return `width:${(numericVal / 50).toFixed(2)}%`;
671
+ }
672
+ const gridTotal = gridWidthsPx.reduce((sum, item) => sum + item, 0);
673
+ if (gridTotal > 0) return `width:${gridTotal.toFixed(2)}px;max-width:100%`;
674
+ return "width:100%";
675
+ }
676
+ function parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx) {
677
+ const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
678
+ const tcW = tcPr ? directChildrenByLocalName(tcPr, "tcW")[0] ?? null : null;
679
+ const type = (getAttr(tcW, "w:type") ?? getAttr(tcW, "type") ?? "").toLowerCase();
680
+ const rawVal = getAttr(tcW, "w:w") ?? getAttr(tcW, "w");
681
+ const numericVal = rawVal ? Number.parseFloat(rawVal) : Number.NaN;
682
+ if (type === "dxa" && Number.isFinite(numericVal) && numericVal > 0) {
683
+ return `width:${twipToPx(numericVal).toFixed(2)}px`;
684
+ }
685
+ if (type === "pct" && Number.isFinite(numericVal) && numericVal > 0) {
686
+ return `width:${(numericVal / 50).toFixed(2)}%`;
687
+ }
688
+ const width = gridWidthsPx.slice(colCursor, colCursor + colSpan).reduce((sum, item) => sum + item, 0);
689
+ if (width > 0) return `width:${width.toFixed(2)}px`;
690
+ return "";
691
+ }
692
+ function parseCellBorderStyle(cell, tableStyle) {
693
+ const tcPr = directChildrenByLocalName(cell, "tcPr")[0] ?? null;
694
+ const tcBorders = tcPr ? directChildrenByLocalName(tcPr, "tcBorders")[0] ?? null : null;
695
+ if (!tcBorders) {
696
+ const fallback = tableStyle.insideHCss ?? tableStyle.insideVCss ?? tableStyle.borderCss;
697
+ return `border:${fallback}`;
698
+ }
699
+ const top = parseBorderCss(directChildrenByLocalName(tcBorders, "top")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
700
+ const right = parseBorderCss(directChildrenByLocalName(tcBorders, "right")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
701
+ const bottom = parseBorderCss(directChildrenByLocalName(tcBorders, "bottom")[0] ?? null) ?? tableStyle.insideHCss ?? tableStyle.borderCss;
702
+ const left = parseBorderCss(directChildrenByLocalName(tcBorders, "left")[0] ?? null) ?? tableStyle.insideVCss ?? tableStyle.borderCss;
703
+ return `border-top:${top};border-right:${right};border-bottom:${bottom};border-left:${left}`;
704
+ }
439
705
  function tableCellHtml(cell, paragraphIndexMap) {
440
706
  const blocks = [];
441
707
  for (const child of Array.from(cell.children)) {
@@ -456,6 +722,8 @@ function tableCellHtml(cell, paragraphIndexMap) {
456
722
  }
457
723
  function tableToHtml(table, paragraphIndexMap) {
458
724
  const rows = directChildrenByLocalName(table, "tr");
725
+ const gridWidthsPx = parseTblGridWidthsPx(table);
726
+ const tableStyle = parseTableStyleProfile(table);
459
727
  const activeByCol = /* @__PURE__ */ new Map();
460
728
  const allOrigins = [];
461
729
  let nextOriginId = 1;
@@ -482,6 +750,8 @@ function tableToHtml(table, paragraphIndexMap) {
482
750
  }
483
751
  const html = tableCellHtml(cell, paragraphIndexMap);
484
752
  const attrs = [];
753
+ const widthStyle = parseCellWidthStyle(cell, colCursor, colSpan, gridWidthsPx);
754
+ const borderStyle = parseCellBorderStyle(cell, tableStyle);
485
755
  if (vMerge === "restart") {
486
756
  const origin = {
487
757
  id: `m${nextOriginId}`,
@@ -499,7 +769,7 @@ function tableToHtml(table, paragraphIndexMap) {
499
769
  }
500
770
  if (colSpan > 1) attrs.push(`colspan="${colSpan}"`);
501
771
  emittedCells.push(
502
- `<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="border:1px solid #222;vertical-align:top;">${html}</td>`
772
+ `<td${attrs.length > 0 ? ` ${attrs.join(" ")}` : ""} style="${borderStyle};vertical-align:top;${widthStyle}">${html}</td>`
503
773
  );
504
774
  colCursor += colSpan;
505
775
  }
@@ -518,7 +788,9 @@ function tableToHtml(table, paragraphIndexMap) {
518
788
  const replacement = origin.rowSpan > 1 ? `rowspan="${origin.rowSpan}"` : "";
519
789
  merged = merged.replace(marker, replacement).replace(/\s{2,}/g, " ");
520
790
  }
521
- return `<table style="border-collapse:collapse;table-layout:fixed;width:100%;border:1px solid #222;">${merged}</table>`;
791
+ const tableWidthStyle = parseTableWidthStyle(table, gridWidthsPx);
792
+ const spacing = tableStyle.borderSpacingPx > 0 ? `border-spacing:${tableStyle.borderSpacingPx.toFixed(2)}px;` : "";
793
+ return `<table style="border-collapse:${tableStyle.borderCollapse};${spacing}table-layout:${tableStyle.tableLayout};${tableWidthStyle};border:${tableStyle.borderCss};">${merged}</table>`;
522
794
  }
523
795
  async function parseDocxToHtmlSnapshot(file) {
524
796
  const maybeArrayBuffer = file.arrayBuffer;
@@ -808,7 +1080,7 @@ function createFallbackWordStyleProfile(sourceFileName = "snapshot") {
808
1080
  paragraphProfiles: []
809
1081
  };
810
1082
  }
811
- function twipToPx(twip) {
1083
+ function twipToPx2(twip) {
812
1084
  return twip / 15;
813
1085
  }
814
1086
  function getAttr2(node, attr) {
@@ -852,10 +1124,10 @@ function parsePageGeometry(documentXml) {
852
1124
  const top = getTwipAttr(pgMar, "w:top") ?? getTwipAttr(pgMar, "top") ?? null;
853
1125
  const bottom = getTwipAttr(pgMar, "w:bottom") ?? getTwipAttr(pgMar, "bottom") ?? null;
854
1126
  return {
855
- contentWidthPx: pageW === null ? null : twipToPx(pageW - left - right),
856
- pageHeightPx: pageH === null ? null : twipToPx(pageH),
857
- marginTopPx: top === null ? null : twipToPx(top),
858
- marginBottomPx: bottom === null ? null : twipToPx(bottom)
1127
+ contentWidthPx: pageW === null ? null : twipToPx2(pageW - left - right),
1128
+ pageHeightPx: pageH === null ? null : twipToPx2(pageH),
1129
+ marginTopPx: top === null ? null : twipToPx2(top),
1130
+ marginBottomPx: bottom === null ? null : twipToPx2(bottom)
859
1131
  };
860
1132
  }
861
1133
  function parseHeadingAlignFromDocument(documentXml) {
@@ -1028,15 +1300,15 @@ function parseParagraphProfiles(documentXml, numberingMap) {
1028
1300
  text,
1029
1301
  isEmpty: text.length === 0,
1030
1302
  align: parseParagraphAlign(paragraph),
1031
- beforePx: before === null ? null : twipToPx(before),
1032
- afterPx: after === null ? null : twipToPx(after),
1303
+ beforePx: before === null ? null : twipToPx2(before),
1304
+ afterPx: after === null ? null : twipToPx2(after),
1033
1305
  lineHeightRatio: line === null || lineHeightRule !== "auto" ? null : line / 240,
1034
- lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx(line),
1306
+ lineHeightPx: line === null || lineHeightRule === "auto" ? null : twipToPx2(line),
1035
1307
  lineHeightRule,
1036
- indentLeftPx: left === null ? null : twipToPx(left),
1037
- indentRightPx: right === null ? null : twipToPx(right),
1038
- firstLinePx: firstLine === null ? null : twipToPx(firstLine),
1039
- hangingPx: hanging === null ? null : twipToPx(hanging),
1308
+ indentLeftPx: left === null ? null : twipToPx2(left),
1309
+ indentRightPx: right === null ? null : twipToPx2(right),
1310
+ firstLinePx: firstLine === null ? null : twipToPx2(firstLine),
1311
+ hangingPx: hanging === null ? null : twipToPx2(hanging),
1040
1312
  listNumId,
1041
1313
  listLevel,
1042
1314
  listFormat: listSpec?.numFmt ?? null,
@@ -1071,19 +1343,19 @@ function parseTableDefaults(stylesXml) {
1071
1343
  return {
1072
1344
  topPx: (() => {
1073
1345
  const v = getTwipAttr(top, "w:w") ?? getTwipAttr(top, "w") ?? null;
1074
- return v === null ? null : twipToPx(v);
1346
+ return v === null ? null : twipToPx2(v);
1075
1347
  })(),
1076
1348
  leftPx: (() => {
1077
1349
  const v = getTwipAttr(left, "w:w") ?? getTwipAttr(left, "w") ?? null;
1078
- return v === null ? null : twipToPx(v);
1350
+ return v === null ? null : twipToPx2(v);
1079
1351
  })(),
1080
1352
  bottomPx: (() => {
1081
1353
  const v = getTwipAttr(bottom, "w:w") ?? getTwipAttr(bottom, "w") ?? null;
1082
- return v === null ? null : twipToPx(v);
1354
+ return v === null ? null : twipToPx2(v);
1083
1355
  })(),
1084
1356
  rightPx: (() => {
1085
1357
  const v = getTwipAttr(right, "w:w") ?? getTwipAttr(right, "w") ?? null;
1086
- return v === null ? null : twipToPx(v);
1358
+ return v === null ? null : twipToPx2(v);
1087
1359
  })()
1088
1360
  };
1089
1361
  }
@@ -1181,9 +1453,9 @@ function parseDefaults(stylesXml) {
1181
1453
  const rawLineRule = (getAttr2(spacing, "w:lineRule") ?? getAttr2(spacing, "lineRule") ?? "auto").toLowerCase();
1182
1454
  const bodyLineHeightRule = rawLineRule === "exact" ? "exact" : rawLineRule === "atleast" ? "atLeast" : "auto";
1183
1455
  const bodyLineHeightRatio = line === null || bodyLineHeightRule !== "auto" ? null : line / 240;
1184
- const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx(line);
1456
+ const bodyLineHeightPx = line === null || bodyLineHeightRule === "auto" ? null : twipToPx2(line);
1185
1457
  const after = getTwipAttr(spacing, "w:after") ?? getTwipAttr(spacing, "after") ?? null;
1186
- const paragraphAfterPx = after === null ? null : twipToPx(after);
1458
+ const paragraphAfterPx = after === null ? null : twipToPx2(after);
1187
1459
  return { bodyFontPx, bodyLineHeightRatio, bodyLineHeightPx, bodyLineHeightRule, paragraphAfterPx };
1188
1460
  }
1189
1461
  function parseHeading1Style(stylesXml) {
@@ -1976,6 +2248,9 @@ function collectSemanticStatsFromDocument(doc) {
1976
2248
  imageCount: countElements(doc, "img"),
1977
2249
  anchorImageCount: countElements(doc, 'img[data-word-anchor="1"]'),
1978
2250
  wrappedImageCount: countElements(doc, "img[data-word-wrap]"),
2251
+ ommlCount: countElements(doc, "[data-word-omml]"),
2252
+ chartCount: countElements(doc, "[data-word-chart]"),
2253
+ smartArtCount: countElements(doc, "[data-word-smartart]"),
1979
2254
  listParagraphCount,
1980
2255
  commentRefCount: countElements(doc, "[data-word-comment-ref]"),
1981
2256
  revisionInsCount: countElements(doc, '[data-word-revision="ins"]'),
@@ -2006,7 +2281,7 @@ function clamp01(v) {
2006
2281
  }
2007
2282
  function calculateFidelityScore(expected, actual) {
2008
2283
  const structure = clamp01(
2009
- (ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 6
2284
+ (ratioScore(actual.paragraphCount, expected.paragraphCount) + ratioScore(actual.headingCount, expected.headingCount) + ratioScore(actual.tableCount, expected.tableCount) + ratioScore(actual.tableCellCount, expected.tableCellCount) + ratioScore(actual.imageCount, expected.imageCount) + ratioScore(actual.ommlCount, expected.ommlCount) + ratioScore(actual.chartCount, expected.chartCount) + ratioScore(actual.smartArtCount, expected.smartArtCount) + ratioScore(actual.listParagraphCount, expected.listParagraphCount)) / 9
2010
2285
  );
2011
2286
  const styleProxy = clamp01(ratioScore(actual.textCharCount, expected.textCharCount));
2012
2287
  const pagination = clamp01(ratioScore(actual.pageSpacerCount, expected.pageSpacerCount));