@usejunior/docx-core 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/dist/.tsbuildinfo +1 -1
  2. package/dist/atomizer.d.ts +15 -1
  3. package/dist/atomizer.d.ts.map +1 -1
  4. package/dist/atomizer.js +37 -1
  5. package/dist/atomizer.js.map +1 -1
  6. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
  7. package/dist/baselines/atomizer/documentReconstructor.js +218 -90
  8. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
  9. package/dist/baselines/atomizer/formattingFidelity.d.ts +99 -0
  10. package/dist/baselines/atomizer/formattingFidelity.d.ts.map +1 -0
  11. package/dist/baselines/atomizer/formattingFidelity.js +449 -0
  12. package/dist/baselines/atomizer/formattingFidelity.js.map +1 -0
  13. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts +37 -0
  14. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts.map +1 -0
  15. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js +189 -0
  16. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js.map +1 -0
  17. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts +74 -0
  18. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts.map +1 -0
  19. package/dist/baselines/atomizer/inPlaceModifier-containers.js +171 -0
  20. package/dist/baselines/atomizer/inPlaceModifier-containers.js.map +1 -0
  21. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts +88 -0
  22. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts.map +1 -0
  23. package/dist/baselines/atomizer/inPlaceModifier-deletion.js +326 -0
  24. package/dist/baselines/atomizer/inPlaceModifier-deletion.js.map +1 -0
  25. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts +85 -0
  26. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts.map +1 -0
  27. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js +402 -0
  28. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js.map +1 -0
  29. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts +39 -0
  30. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts.map +1 -0
  31. package/dist/baselines/atomizer/inPlaceModifier-presplit.js +265 -0
  32. package/dist/baselines/atomizer/inPlaceModifier-presplit.js.map +1 -0
  33. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts +62 -0
  34. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts.map +1 -0
  35. package/dist/baselines/atomizer/inPlaceModifier-shared.js +139 -0
  36. package/dist/baselines/atomizer/inPlaceModifier-shared.js.map +1 -0
  37. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts +189 -0
  38. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts.map +1 -0
  39. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js +427 -0
  40. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js.map +1 -0
  41. package/dist/baselines/atomizer/inPlaceModifier.d.ts +6 -290
  42. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
  43. package/dist/baselines/atomizer/inPlaceModifier.js +23 -1828
  44. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
  45. package/dist/baselines/atomizer/pipeline.d.ts +76 -1
  46. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
  47. package/dist/baselines/atomizer/pipeline.js +204 -27
  48. package/dist/baselines/atomizer/pipeline.js.map +1 -1
  49. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
  50. package/dist/baselines/atomizer/trackChangesAcceptorAst.js +56 -160
  51. package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
  52. package/dist/compare-types.d.ts +151 -0
  53. package/dist/compare-types.d.ts.map +1 -0
  54. package/dist/compare-types.js +2 -0
  55. package/dist/compare-types.js.map +1 -0
  56. package/dist/core-types.d.ts +5 -1
  57. package/dist/core-types.d.ts.map +1 -1
  58. package/dist/core-types.js +5 -1
  59. package/dist/core-types.js.map +1 -1
  60. package/dist/footnotes.d.ts +8 -3
  61. package/dist/footnotes.d.ts.map +1 -1
  62. package/dist/footnotes.js +8 -3
  63. package/dist/footnotes.js.map +1 -1
  64. package/dist/index.d.ts +6 -150
  65. package/dist/index.d.ts.map +1 -1
  66. package/dist/index.js +6 -0
  67. package/dist/index.js.map +1 -1
  68. package/dist/integration/libreoffice-oracle.d.ts +41 -0
  69. package/dist/integration/libreoffice-oracle.d.ts.map +1 -0
  70. package/dist/integration/libreoffice-oracle.js +282 -0
  71. package/dist/integration/libreoffice-oracle.js.map +1 -0
  72. package/dist/primitives/accept_changes.d.ts +2 -2
  73. package/dist/primitives/accept_changes.d.ts.map +1 -1
  74. package/dist/primitives/accept_changes.js +24 -79
  75. package/dist/primitives/accept_changes.js.map +1 -1
  76. package/dist/primitives/comments.d.ts +12 -3
  77. package/dist/primitives/comments.d.ts.map +1 -1
  78. package/dist/primitives/comments.js +374 -97
  79. package/dist/primitives/comments.js.map +1 -1
  80. package/dist/primitives/content_fingerprint.d.ts +29 -0
  81. package/dist/primitives/content_fingerprint.d.ts.map +1 -0
  82. package/dist/primitives/content_fingerprint.js +63 -0
  83. package/dist/primitives/content_fingerprint.js.map +1 -0
  84. package/dist/primitives/document.d.ts +56 -15
  85. package/dist/primitives/document.d.ts.map +1 -1
  86. package/dist/primitives/document.js +303 -32
  87. package/dist/primitives/document.js.map +1 -1
  88. package/dist/primitives/document_view-comments.d.ts +18 -0
  89. package/dist/primitives/document_view-comments.d.ts.map +1 -0
  90. package/dist/primitives/document_view-comments.js +159 -0
  91. package/dist/primitives/document_view-comments.js.map +1 -0
  92. package/dist/primitives/document_view-headings.d.ts +45 -0
  93. package/dist/primitives/document_view-headings.d.ts.map +1 -0
  94. package/dist/primitives/document_view-headings.js +247 -0
  95. package/dist/primitives/document_view-headings.js.map +1 -0
  96. package/dist/primitives/document_view-styles.d.ts +11 -0
  97. package/dist/primitives/document_view-styles.d.ts.map +1 -0
  98. package/dist/primitives/document_view-styles.js +104 -0
  99. package/dist/primitives/document_view-styles.js.map +1 -0
  100. package/dist/primitives/document_view-toon.d.ts +37 -0
  101. package/dist/primitives/document_view-toon.d.ts.map +1 -0
  102. package/dist/primitives/document_view-toon.js +199 -0
  103. package/dist/primitives/document_view-toon.js.map +1 -0
  104. package/dist/primitives/document_view-types.d.ts +137 -0
  105. package/dist/primitives/document_view-types.d.ts.map +1 -0
  106. package/dist/primitives/document_view-types.js +2 -0
  107. package/dist/primitives/document_view-types.js.map +1 -0
  108. package/dist/primitives/document_view.d.ts +8 -106
  109. package/dist/primitives/document_view.d.ts.map +1 -1
  110. package/dist/primitives/document_view.js +134 -301
  111. package/dist/primitives/document_view.js.map +1 -1
  112. package/dist/primitives/dom-helpers.d.ts +9 -0
  113. package/dist/primitives/dom-helpers.d.ts.map +1 -1
  114. package/dist/primitives/dom-helpers.js +10 -1
  115. package/dist/primitives/dom-helpers.js.map +1 -1
  116. package/dist/primitives/footnotes.d.ts +4 -3
  117. package/dist/primitives/footnotes.d.ts.map +1 -1
  118. package/dist/primitives/footnotes.js +232 -44
  119. package/dist/primitives/footnotes.js.map +1 -1
  120. package/dist/primitives/formatting_tags.d.ts +6 -0
  121. package/dist/primitives/formatting_tags.d.ts.map +1 -1
  122. package/dist/primitives/formatting_tags.js +6 -1
  123. package/dist/primitives/formatting_tags.js.map +1 -1
  124. package/dist/primitives/index.d.ts +6 -0
  125. package/dist/primitives/index.d.ts.map +1 -1
  126. package/dist/primitives/index.js +5 -0
  127. package/dist/primitives/index.js.map +1 -1
  128. package/dist/primitives/layout.d.ts +4 -3
  129. package/dist/primitives/layout.d.ts.map +1 -1
  130. package/dist/primitives/layout.js +32 -3
  131. package/dist/primitives/layout.js.map +1 -1
  132. package/dist/primitives/merge_runs.d.ts +21 -3
  133. package/dist/primitives/merge_runs.d.ts.map +1 -1
  134. package/dist/primitives/merge_runs.js +32 -10
  135. package/dist/primitives/merge_runs.js.map +1 -1
  136. package/dist/primitives/namespaces.d.ts +6 -0
  137. package/dist/primitives/namespaces.d.ts.map +1 -1
  138. package/dist/primitives/namespaces.js +9 -0
  139. package/dist/primitives/namespaces.js.map +1 -1
  140. package/dist/primitives/reject_changes.d.ts +2 -2
  141. package/dist/primitives/reject_changes.d.ts.map +1 -1
  142. package/dist/primitives/reject_changes.js +24 -81
  143. package/dist/primitives/reject_changes.js.map +1 -1
  144. package/dist/primitives/semantic_tags.d.ts +7 -0
  145. package/dist/primitives/semantic_tags.d.ts.map +1 -1
  146. package/dist/primitives/semantic_tags.js +21 -3
  147. package/dist/primitives/semantic_tags.js.map +1 -1
  148. package/dist/primitives/serialize_html.d.ts +36 -0
  149. package/dist/primitives/serialize_html.d.ts.map +1 -0
  150. package/dist/primitives/serialize_html.js +393 -0
  151. package/dist/primitives/serialize_html.js.map +1 -0
  152. package/dist/primitives/serialize_markdown.d.ts +16 -0
  153. package/dist/primitives/serialize_markdown.d.ts.map +1 -0
  154. package/dist/primitives/serialize_markdown.js +300 -0
  155. package/dist/primitives/serialize_markdown.js.map +1 -0
  156. package/dist/primitives/serialize_plaintext.d.ts +15 -0
  157. package/dist/primitives/serialize_plaintext.d.ts.map +1 -0
  158. package/dist/primitives/serialize_plaintext.js +154 -0
  159. package/dist/primitives/serialize_plaintext.js.map +1 -0
  160. package/dist/primitives/styles.js +22 -22
  161. package/dist/primitives/styles.js.map +1 -1
  162. package/dist/primitives/tables.d.ts.map +1 -1
  163. package/dist/primitives/tables.js +13 -3
  164. package/dist/primitives/tables.js.map +1 -1
  165. package/dist/primitives/text.d.ts +2 -1
  166. package/dist/primitives/text.d.ts.map +1 -1
  167. package/dist/primitives/text.js +116 -12
  168. package/dist/primitives/text.js.map +1 -1
  169. package/dist/primitives/track-changes-emitter.d.ts +139 -0
  170. package/dist/primitives/track-changes-emitter.d.ts.map +1 -0
  171. package/dist/primitives/track-changes-emitter.js +241 -0
  172. package/dist/primitives/track-changes-emitter.js.map +1 -0
  173. package/dist/primitives/xml-helpers.d.ts +29 -0
  174. package/dist/primitives/xml-helpers.d.ts.map +1 -0
  175. package/dist/primitives/xml-helpers.js +35 -0
  176. package/dist/primitives/xml-helpers.js.map +1 -0
  177. package/dist/shared/ooxml/namespaces.d.ts +4 -1
  178. package/dist/shared/ooxml/namespaces.d.ts.map +1 -1
  179. package/dist/shared/ooxml/namespaces.js +4 -1
  180. package/dist/shared/ooxml/namespaces.js.map +1 -1
  181. package/package.json +7 -6
@@ -0,0 +1,300 @@
1
+ // DOCX → Markdown serializer.
2
+ //
3
+ // This is a *serializer over the existing structured document model* — it does no OOXML
4
+ // parsing. `DocxDocument.buildDocumentView({ showFormatting: true })` already yields a
5
+ // `DocumentViewNode[]` carrying headings, list metadata, grid-aware table context, injected
6
+ // `[^n]` footnote markers, and an HTML-shaped inline-tag string (`tagged_text`). This module
7
+ // turns that model into GitHub-Flavored Markdown.
8
+ //
9
+ // Markdown is intentionally *lossy*: there is no round-trip guarantee. Constructs without a
10
+ // Markdown equivalent (highlighting, font runs, merged/nested table cells, layout) are
11
+ // downgraded as documented below rather than preserved.
12
+ //
13
+ // The inline tokenizer (`inlineTagsToMarkdown`) is the reusable core; the planned HTML
14
+ // emitter (#304) renders the same tokens, so neither serializer reasons about the tag
15
+ // grammar independently and drifts from the emitter in `formatting_tags.ts`.
16
+ import { tokenizeToonInline } from './document_view.js';
17
+ import { LabelType } from './list_labels.js';
18
+ /** Footnote markers already injected into `tagged_text`, e.g. `[^1]`, `[^12]`. */
19
+ const FOOTNOTE_MARKER_RE = /\[\^\d+\]/g;
20
+ /**
21
+ * Backslash-escape the inline Markdown-significant characters that would otherwise be
22
+ * interpreted mid-line. GFM honours backslash escapes for ASCII punctuation, so `\*`
23
+ * renders a literal `*`. We escape only the characters that trigger *inline* constructs
24
+ * (emphasis, code, links, raw HTML, table pipes); block-level triggers (`#`, `-`, `>`, …)
25
+ * are handled per-line by {@link escapeLeadingBlockSyntax} so we don't litter prose with
26
+ * `\.` and `\-` on every sentence.
27
+ *
28
+ * Already-present `[^n]` footnote markers are protected: escaping their `[`/`]`/`^` would
29
+ * sever them from the appended `[^n]: …` definitions.
30
+ */
31
+ function escapeInlineText(text) {
32
+ const escapeSpan = (s) => s.replace(/[\\`*_[\]<|]/g, (c) => `\\${c}`);
33
+ let out = '';
34
+ let lastIndex = 0;
35
+ for (const match of text.matchAll(FOOTNOTE_MARKER_RE)) {
36
+ const idx = match.index ?? 0;
37
+ out += escapeSpan(text.slice(lastIndex, idx));
38
+ out += match[0]; // leave the footnote marker untouched
39
+ lastIndex = idx + match[0].length;
40
+ }
41
+ out += escapeSpan(text.slice(lastIndex));
42
+ return out;
43
+ }
44
+ /**
45
+ * Escape a leading block-level trigger so a normal paragraph whose visible text begins with
46
+ * `#`, `>`, `-`, `+`, `* `, or `N.`/`N)` is not mis-read as a heading, quote, or list.
47
+ * Block triggers always require a trailing space, whereas the emphasis we emit (`**`, `*`)
48
+ * never does — so matching the space-terminated forms cannot corrupt generated Markdown.
49
+ */
50
+ function escapeLeadingBlockSyntax(line) {
51
+ return line.replace(/^(\s*)(#{1,6}(?= )|>(?= )|[-+*](?= )|\d+[.)](?= ))/, (_m, ws, trig) => {
52
+ if (/^\d/.test(trig)) {
53
+ // ordered-list trigger: escape the delimiter (the `.` or `)`), keep the digits
54
+ return `${ws}${trig.slice(0, -1)}\\${trig.slice(-1)}`;
55
+ }
56
+ return `${ws}\\${trig[0]}${trig.slice(1)}`;
57
+ });
58
+ }
59
+ export function inlineTagsToMarkdown(text) {
60
+ const ops = [];
61
+ const linkUrls = []; // stack of open <a> hrefs (links don't nest meaningfully)
62
+ for (const token of tokenizeToonInline(text)) {
63
+ if (token.kind === 'text') {
64
+ ops.push({ t: 'md', v: escapeInlineText(token.value) });
65
+ continue;
66
+ }
67
+ const tag = token.value;
68
+ if (tag === '<b>')
69
+ ops.push({ t: 'emph', kind: 'b', dir: 1 });
70
+ else if (tag === '</b>')
71
+ ops.push({ t: 'emph', kind: 'b', dir: -1 });
72
+ else if (tag === '<i>')
73
+ ops.push({ t: 'emph', kind: 'i', dir: 1 });
74
+ else if (tag === '</i>')
75
+ ops.push({ t: 'emph', kind: 'i', dir: -1 });
76
+ else if (tag === '<u>' || tag === '</u>')
77
+ ops.push({ t: 'md', v: tag }); // raw HTML passthrough
78
+ else if (tag.startsWith('<a ')) {
79
+ linkUrls.push(/href="([^"]*)"/.exec(tag)?.[1] ?? '');
80
+ ops.push({ t: 'md', v: '[' });
81
+ }
82
+ else if (tag === '</a>') {
83
+ ops.push({ t: 'md', v: `](${linkUrls.pop() ?? ''})` });
84
+ }
85
+ // <highlight>, </highlight>, <font …>, </font> → strip (emit nothing, keep inner text)
86
+ }
87
+ // Defensive: an unbalanced <a> (no closing tag) would leave a dangling "["; close it.
88
+ while (linkUrls.length > 0) {
89
+ ops.push({ t: 'md', v: `](${linkUrls.pop()})` });
90
+ }
91
+ // Emit emphasis delimiters only where the *active* emphasis state actually changes
92
+ // between two text spans. Word splits a single formatted phrase into many runs, so
93
+ // `tagged_text` carries boundary noise: `</b></i><b><i>` (state unchanged across the
94
+ // boundary), interleaved different-kind toggles `</b></i><i><b>`, or empty `<b></b>`
95
+ // pairs. Mapping each toggle naively yields runs like `******` / `****` that render as
96
+ // literal asterisks. Tracking the state and reconciling on a stack collapses all of that
97
+ // to the minimal delimiters while keeping nesting well-formed (`**a*b*c**`).
98
+ const DELIM = { b: '**', i: '*' };
99
+ let out = '';
100
+ const activeStack = []; // emphasis kinds currently open, in open order
101
+ const desired = new Set(); // running target state as we scan emph ops
102
+ const reconcile = () => {
103
+ // Close from the top until every still-open kind is wanted, remembering any wanted
104
+ // kinds we had to close (because they sat above an unwanted one) so we can reopen them.
105
+ const reopen = [];
106
+ while (activeStack.length > 0 && !activeStack.every((k) => desired.has(k))) {
107
+ const k = activeStack.pop();
108
+ out += DELIM[k];
109
+ if (desired.has(k))
110
+ reopen.push(k);
111
+ }
112
+ // Open the kinds that are wanted but not currently open: the reopened ones first (in
113
+ // their original open order), then any brand-new kinds in a stable order (b before i).
114
+ const active = new Set(activeStack);
115
+ const toOpen = [...reopen.reverse(), ...['b', 'i'].filter((k) => desired.has(k))];
116
+ for (const k of toOpen) {
117
+ if (active.has(k))
118
+ continue;
119
+ active.add(k);
120
+ activeStack.push(k);
121
+ out += DELIM[k];
122
+ }
123
+ };
124
+ for (const op of ops) {
125
+ if (op.t === 'emph') {
126
+ if (op.dir === 1)
127
+ desired.add(op.kind);
128
+ else
129
+ desired.delete(op.kind);
130
+ continue;
131
+ }
132
+ reconcile(); // realize pending state changes before emitting literal text/Markdown
133
+ out += op.v;
134
+ }
135
+ desired.clear();
136
+ reconcile(); // close any still-open emphasis at end of string
137
+ return out;
138
+ }
139
+ /** A heading is structural (gets `#`) only when Word's style told us so and gave a level. */
140
+ function isStructuralHeading(node) {
141
+ return node.heading?.source === 'word_style' && typeof node.heading.level === 'number';
142
+ }
143
+ function renderListItem(node) {
144
+ const lm = node.list_metadata;
145
+ const level = Math.max(0, lm.list_level);
146
+ const indent = ' '.repeat(level);
147
+ const text = inlineTagsToMarkdown(node.tagged_text).trim();
148
+ const label = lm.label_string?.trim() ?? '';
149
+ // True auto-numbered numeric lists render as a Markdown ordered list (let the renderer
150
+ // number them). Everything else preserves the *literal* label — legal documents carry
151
+ // meaningful labels like `Section 2.1`, `Article IV`, `(a)`, `(i)` that a bare `1.` would
152
+ // silently destroy.
153
+ if (lm.label_type === LabelType.NUMBER && lm.is_auto_numbered) {
154
+ return `${indent}1. ${text}`.trimEnd();
155
+ }
156
+ if (label) {
157
+ return `${indent}- ${label} ${text}`.trimEnd();
158
+ }
159
+ return `${indent}- ${text}`.trimEnd();
160
+ }
161
+ /**
162
+ * Render a run of nodes that share a `table_context.table_id` as a GFM table.
163
+ *
164
+ * Lossy by design (GFM has no merged/nested-cell support):
165
+ * - Horizontally merged cells (`gridSpan`) advance `col_index`, leaving grid gaps that we
166
+ * fill with empty cells so the column count stays consistent and viewers don't break.
167
+ * - Vertically merged cells (`vMerge`) and nested tables are flattened into the body-level
168
+ * grid; multi-paragraph cells are joined with `<br>`.
169
+ */
170
+ function renderTable(group) {
171
+ let totalCols = 0;
172
+ for (const n of group) {
173
+ const tc = n.table_context;
174
+ if (!tc)
175
+ continue;
176
+ totalCols = Math.max(totalCols, tc.total_cols, tc.col_index + 1);
177
+ }
178
+ if (totalCols <= 0)
179
+ return [];
180
+ const rows = new Map();
181
+ const rowOrder = [];
182
+ const headerRows = new Set();
183
+ for (const n of group) {
184
+ const tc = n.table_context;
185
+ if (!tc)
186
+ continue;
187
+ if (!rows.has(tc.row_index)) {
188
+ rows.set(tc.row_index, new Map());
189
+ rowOrder.push(tc.row_index);
190
+ }
191
+ const cellMap = rows.get(tc.row_index);
192
+ // A raw newline inside a cell (from a line break) would split the GFM table row and
193
+ // break the whole table, so collapse intra-cell newlines to `<br>`.
194
+ const cellText = inlineTagsToMarkdown(n.tagged_text).replace(/\s*\n+\s*/g, '<br>').trim();
195
+ const parts = cellMap.get(tc.col_index) ?? [];
196
+ if (cellText)
197
+ parts.push(cellText);
198
+ cellMap.set(tc.col_index, parts);
199
+ if (tc.is_header_row)
200
+ headerRows.add(tc.row_index);
201
+ }
202
+ rowOrder.sort((a, b) => a - b);
203
+ if (rowOrder.length === 0)
204
+ return [];
205
+ const cellsFor = (rowIndex) => {
206
+ const cellMap = rows.get(rowIndex) ?? new Map();
207
+ const cells = [];
208
+ for (let c = 0; c < totalCols; c++) {
209
+ cells.push((cellMap.get(c) ?? []).join('<br>'));
210
+ }
211
+ return cells;
212
+ };
213
+ // GFM requires exactly one header row. Prefer the first row Word flagged as a header;
214
+ // otherwise treat the first row as the header (the common case).
215
+ const headerRowIndex = rowOrder.find((ri) => headerRows.has(ri)) ?? rowOrder[0];
216
+ const lines = [];
217
+ lines.push(`| ${cellsFor(headerRowIndex).join(' | ')} |`);
218
+ lines.push(`| ${Array.from({ length: totalCols }, () => '---').join(' | ')} |`);
219
+ for (const ri of rowOrder) {
220
+ if (ri === headerRowIndex)
221
+ continue;
222
+ lines.push(`| ${cellsFor(ri).join(' | ')} |`);
223
+ }
224
+ return lines;
225
+ }
226
+ /**
227
+ * Serialize a structured document view to GitHub-Flavored Markdown.
228
+ *
229
+ * @param nodes Block nodes from `buildDocumentView({ showFormatting: true }).nodes`.
230
+ * @param footnotes Footnotes from `DocxDocument.getFootnotes()` (already sorted by
231
+ * `displayNumber`); appended as `[^n]: …` definitions.
232
+ */
233
+ export function serializeToMarkdown(nodes, footnotes = [], _opts = {}) {
234
+ const blocks = [];
235
+ let inList = false;
236
+ const closeList = () => {
237
+ if (inList) {
238
+ blocks.push('');
239
+ inList = false;
240
+ }
241
+ };
242
+ for (let i = 0; i < nodes.length; i++) {
243
+ const node = nodes[i];
244
+ // ── Tables: consume the whole run of same-table_id nodes at once ──
245
+ if (node.table_context) {
246
+ closeList();
247
+ const tableId = node.table_context.table_id;
248
+ const group = [];
249
+ while (i < nodes.length && nodes[i].table_context?.table_id === tableId) {
250
+ group.push(nodes[i]);
251
+ i++;
252
+ }
253
+ i--; // for-loop will re-increment
254
+ const tableLines = renderTable(group);
255
+ if (tableLines.length > 0) {
256
+ blocks.push(tableLines.join('\n'));
257
+ blocks.push('');
258
+ }
259
+ continue;
260
+ }
261
+ // ── Structural (Word-styled) headings ──
262
+ if (isStructuralHeading(node)) {
263
+ closeList();
264
+ const level = Math.min(6, Math.max(1, node.heading.level));
265
+ const text = inlineTagsToMarkdown(node.tagged_text).trim();
266
+ blocks.push(`${'#'.repeat(level)} ${text}`.trimEnd());
267
+ blocks.push('');
268
+ continue;
269
+ }
270
+ // ── List items ──
271
+ if (node.list_metadata.list_level >= 0) {
272
+ inList = true;
273
+ blocks.push(renderListItem(node));
274
+ continue;
275
+ }
276
+ // ── Normal paragraphs (heuristic headings land here: their run-in bold already lives
277
+ // in the inline tags, so we keep them as paragraphs rather than inventing a `#`). ──
278
+ closeList();
279
+ const text = escapeLeadingBlockSyntax(inlineTagsToMarkdown(node.tagged_text));
280
+ if (text.trim() === '') {
281
+ blocks.push('');
282
+ }
283
+ else {
284
+ blocks.push(text);
285
+ blocks.push('');
286
+ }
287
+ }
288
+ closeList();
289
+ // ── Footnote definitions ──
290
+ const defs = footnotes.filter((fn) => fn.displayNumber > 0);
291
+ if (defs.length > 0) {
292
+ blocks.push('');
293
+ for (const fn of defs) {
294
+ const body = escapeInlineText(fn.text.replace(/\s+/g, ' ').trim());
295
+ blocks.push(`[^${fn.displayNumber}]: ${body}`);
296
+ }
297
+ }
298
+ return `${blocks.join('\n').replace(/\n{3,}/g, '\n\n').trim()}\n`;
299
+ }
300
+ //# sourceMappingURL=serialize_markdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serialize_markdown.js","sourceRoot":"","sources":["../../src/primitives/serialize_markdown.ts"],"names":[],"mappings":"AAAA,8BAA8B;AAC9B,EAAE;AACF,wFAAwF;AACxF,uFAAuF;AACvF,4FAA4F;AAC5F,6FAA6F;AAC7F,kDAAkD;AAClD,EAAE;AACF,4FAA4F;AAC5F,uFAAuF;AACvF,wDAAwD;AACxD,EAAE;AACF,uFAAuF;AACvF,sFAAsF;AACtF,6EAA6E;AAE7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAExD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAG7C,kFAAkF;AAClF,MAAM,kBAAkB,GAAG,YAAY,CAAC;AAExC;;;;;;;;;;GAUG;AACH,SAAS,gBAAgB,CAAC,IAAY;IACpC,MAAM,UAAU,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAEtF,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;QACtD,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,CAAC,CAAC;QAC7B,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,CAAC;QAC9C,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,sCAAsC;QACvD,SAAS,GAAG,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACpC,CAAC;IACD,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;IACzC,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,SAAS,wBAAwB,CAAC,IAAY;IAC5C,OAAO,IAAI,CAAC,OAAO,CAAC,oDAAoD,EAAE,CAAC,EAAE,EAAE,EAAU,EAAE,IAAY,EAAE,EAAE;QACzG,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB,+EAA+E;YAC/E,OAAO,GAAG,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxD,CAAC;QACD,OAAO,GAAG,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;IAC7C,CAAC,CAAC,CAAC;AACL,CAAC;AAoBD,MAAM,UAAU,oBAAoB,CAAC,IAAY;IAC/C,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,MAAM,QAAQ,GAAa,EAAE,CAAC,CAAC,0DAA0D;IAEzF,KAAK,MAAM,KAAK,IAAI,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC;QAC7C,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC1B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACxD,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC;QACxB,IAAI,GAAG,KAAK,KAAK;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;aACzD,IAAI,GAAG,KAAK,MAAM;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;aAChE,IAAI,GAAG,KAAK,KAAK;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;aAC9D,IAAI,GAAG,KAAK,MAAM;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;aAChE,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,MAAM;YAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,uBAAuB;aAC3F,IAAI,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;QAChC,CAAC;aAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;YAC1B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,QAAQ,CAAC,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;QACzD,CAAC;QACD,uFAAuF;IACzF,CAAC;IAED,sFAAsF;IACtF,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,QAAQ,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC;IACnD,CAAC;IAED,mFAAmF;IACnF,mFAAmF;IACnF,qFAAqF;IACrF,qFAAqF;IACrF,uFAAuF;IACvF,yFAAyF;IACzF,6EAA6E;IAC7E,MAAM,KAAK,GAA8B,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;IAC7D,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,MAAM,WAAW,GAAkB,EAAE,CAAC,CAAC,+CAA+C;IACtF,MAAM,OAAO,GAAG,IAAI,GAAG,EAAa,CAAC,CAAC,2CAA2C;IAEjF,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,mFAAmF;QACnF,wFAAwF;QACxF,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,OAAO,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3E,MAAM,CAAC,GAAG,WAAW,CAAC,GAAG,EAAG,CAAC;YAC7B,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;YAChB,IAAI,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,CAAC;QACD,qFAAqF;QACrF,uFAAuF;QACvF,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;QACpC,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,EAAE,GAAI,CAAC,GAAG,EAAE,GAAG,CAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7F,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;YACvB,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,SAAS;YAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACd,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACpB,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;QACrB,IAAI,EAAE,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC;YACpB,IAAI,EAAE,CAAC,GAAG,KAAK,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;;gBAClC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAC7B,SAAS;QACX,CAAC;QACD,SAAS,EAAE,CAAC,CAAC,sEAAsE;QACnF,GAAG,IAAI,EAAE,CAAC,CAAC,CAAC;IACd,CAAC;IACD,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,SAAS,EAAE,CAAC,CAAC,iDAAiD;IAC9D,OAAO,GAAG,CAAC;AACb,CAAC;AAED,6FAA6F;AAC7F,SAAS,mBAAmB,CAAC,IAAsB;IACjD,OAAO,IAAI,CAAC,OAAO,EAAE,MAAM,KAAK,YAAY,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,KAAK,QAAQ,CAAC;AACzF,CAAC;AAED,SAAS,cAAc,CAAC,IAAsB;IAC5C,MAAM,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,oBAAoB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3D,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAE5C,uFAAuF;IACvF,sFAAsF;IACtF,0FAA0F;IAC1F,oBAAoB;IACpB,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,MAAM,IAAI,EAAE,CAAC,gBAAgB,EAAE,CAAC;QAC9D,OAAO,GAAG,MAAM,MAAM,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;IACzC,CAAC;IACD,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,GAAG,MAAM,KAAK,KAAK,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;IACjD,CAAC;IACD,OAAO,GAAG,MAAM,KAAK,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;AACxC,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,WAAW,CAAC,KAAyB;IAC5C,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,EAAE,GAAG,CAAC,CAAC,aAAa,CAAC;QAC3B,IAAI,CAAC,EAAE;YAAE,SAAS;QAClB,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,CAAC,UAAU,EAAE,EAAE,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IACnE,CAAC;IACD,IAAI,SAAS,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IAE9B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAiC,CAAC;IACtD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;IAErC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,EAAE,GAAG,CAAC,CAAC,aAAa,CAAC;QAC3B,IAAI,CAAC,EAAE;YAAE,SAAS;QAClB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5B,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;YAClC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;QAC9B,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAE,CAAC;QACxC,oFAAoF;QACpF,oEAAoE;QACpE,MAAM,QAAQ,GAAG,oBAAoB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1F,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC9C,IAAI,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QACjC,IAAI,EAAE,CAAC,aAAa;YAAE,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;IACrD,CAAC;IAED,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC/B,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAY,EAAE;QAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,IAAI,GAAG,EAAoB,CAAC;QAClE,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAClD,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC;IAEF,sFAAsF;IACtF,iEAAiE;IACjE,MAAM,cAAc,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAE,CAAC;IAEjF,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,KAAK,QAAQ,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1D,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChF,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,EAAE,KAAK,cAAc;YAAE,SAAS;QACpC,KAAK,CAAC,IAAI,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAOD;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CACjC,KAAyB,EACzB,YAAwB,EAAE,EAC1B,QAAkC,EAAE;IAEpC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAChB,MAAM,GAAG,KAAK,CAAC;QACjB,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC;QAEvB,qEAAqE;QACrE,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,SAAS,EAAE,CAAC;YACZ,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;YAC5C,MAAM,KAAK,GAAuB,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,CAAC,CAAE,CAAC,aAAa,EAAE,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACzE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,CAAC;gBACtB,CAAC,EAAE,CAAC;YACN,CAAC;YACD,CAAC,EAAE,CAAC,CAAC,6BAA6B;YAClC,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;YACtC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gBACnC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAClB,CAAC;YACD,SAAS;QACX,CAAC;QAED,0CAA0C;QAC1C,IAAI,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9B,SAAS,EAAE,CAAC;YACZ,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,OAAQ,CAAC,KAAe,CAAC,CAAC,CAAC;YACtE,MAAM,IAAI,GAAG,oBAAoB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,MAAM,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC;YACtD,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAChB,SAAS;QACX,CAAC;QAED,mBAAmB;QACnB,IAAI,IAAI,CAAC,aAAa,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC;YACvC,MAAM,GAAG,IAAI,CAAC;YACd,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC;YAClC,SAAS;QACX,CAAC;QAED,sFAAsF;QACtF,yFAAyF;QACzF,SAAS,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,wBAAwB,CAAC,oBAAoB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;QAC9E,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,SAAS,EAAE,CAAC;IAEZ,6BAA6B;IAC7B,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC;IAC5D,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChB,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACnE,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,aAAa,MAAM,IAAI,EAAE,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC;AACpE,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { DocumentViewNode } from './document_view.js';
2
+ import type { Footnote } from './footnotes.js';
3
+ export interface SerializePlainTextOptions {
4
+ /** Reserved for future knobs (footnote policy, table layout). Currently unused. */
5
+ readonly _reserved?: never;
6
+ }
7
+ /**
8
+ * Serialize a structured document view to plain text.
9
+ *
10
+ * @param nodes Block nodes from `buildDocumentView({ showFormatting: true }).nodes`.
11
+ * @param footnotes Footnotes from `DocxDocument.getFootnotes()` (already sorted by
12
+ * `displayNumber`); appended as `[^n] …` definitions.
13
+ */
14
+ export declare function serializeToPlainText(nodes: DocumentViewNode[], footnotes?: Footnote[], _opts?: SerializePlainTextOptions): string;
15
+ //# sourceMappingURL=serialize_plaintext.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serialize_plaintext.d.ts","sourceRoot":"","sources":["../../src/primitives/serialize_plaintext.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAE3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AA2E/C,MAAM,WAAW,yBAAyB;IACxC,mFAAmF;IACnF,QAAQ,CAAC,SAAS,CAAC,EAAE,KAAK,CAAC;CAC5B;AAED;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,gBAAgB,EAAE,EACzB,SAAS,GAAE,QAAQ,EAAO,EAC1B,KAAK,GAAE,yBAA8B,GACpC,MAAM,CA4DR"}
@@ -0,0 +1,154 @@
1
+ // DOCX → plain text serializer.
2
+ //
3
+ // The thinnest member of the export family. Like `serialize_markdown.ts`, this is a
4
+ // *serializer over the existing structured document model* — it does no OOXML parsing.
5
+ // `DocxDocument.buildDocumentView({ showFormatting: true })` already yields a
6
+ // `DocumentViewNode[]` carrying headings, list metadata, grid-aware table context, injected
7
+ // `[^n]` footnote markers, and an HTML-shaped inline-tag string (`tagged_text`). This module
8
+ // turns that model into plain text with no markup.
9
+ //
10
+ // Where the Markdown emitter *maps* inline tags to Markdown syntax, the plain-text emitter
11
+ // *strips* them (via `stripAllInlineTags`) and keeps only sensible block separators:
12
+ // - a blank line between block-level paragraphs (including headings),
13
+ // - simple `- ` list bullets (preserving literal legal labels like `Section 2.1`),
14
+ // - tab-separated table cells, one row per line,
15
+ // - injected `[^n]` footnote markers kept inline, definitions appended at the end.
16
+ //
17
+ // Plain text is intentionally *lossy*: all formatting (bold/italic/underline, highlight,
18
+ // fonts, links, merged/nested table cells, layout) is discarded — that is the whole point of
19
+ // a "just give me the text" rendering.
20
+ import { stripAllInlineTags } from './semantic_tags.js';
21
+ /** Convert one `tagged_text` value to plain text: strip all inline/semantic tags, keep text. */
22
+ function toPlainInline(text) {
23
+ return stripAllInlineTags(text);
24
+ }
25
+ /**
26
+ * Render a list item as a simple bullet. Auto-numbered numeric items and unlabeled items get
27
+ * a bare `- ` bullet; items carrying a literal label (legal documents use meaningful labels
28
+ * like `Section 2.1`, `Article IV`, `(a)`, `(i)`) keep that label so it isn't silently lost.
29
+ * Indentation tracks the list level.
30
+ */
31
+ function renderListItem(node) {
32
+ const lm = node.list_metadata;
33
+ const level = Math.max(0, lm.list_level);
34
+ const indent = ' '.repeat(level);
35
+ const text = toPlainInline(node.tagged_text).trim();
36
+ const label = lm.label_string?.trim() ?? '';
37
+ if (label) {
38
+ return `${indent}- ${label} ${text}`.trimEnd();
39
+ }
40
+ return `${indent}- ${text}`.trimEnd();
41
+ }
42
+ /**
43
+ * Render a run of nodes sharing a `table_context.table_id` as tab-separated rows.
44
+ *
45
+ * Lossy by design (plain text has no table model):
46
+ * - Horizontally merged cells (`gridSpan`) leave grid gaps; we fill them with empty fields so
47
+ * every row keeps the same tab-delimited column count (a row `X<gap>Z` → `X\t\tZ`).
48
+ * - Vertically merged cells (`vMerge`) and nested tables are flattened into the body grid.
49
+ * - Multi-paragraph / multi-node cells and intra-cell line breaks are joined with a space
50
+ * (a raw newline would split the tab-delimited row).
51
+ */
52
+ function renderTable(group) {
53
+ let totalCols = 0;
54
+ for (const n of group) {
55
+ const tc = n.table_context;
56
+ if (!tc)
57
+ continue;
58
+ totalCols = Math.max(totalCols, tc.total_cols, tc.col_index + 1);
59
+ }
60
+ if (totalCols <= 0)
61
+ return [];
62
+ const rows = new Map();
63
+ const rowOrder = [];
64
+ for (const n of group) {
65
+ const tc = n.table_context;
66
+ if (!tc)
67
+ continue;
68
+ if (!rows.has(tc.row_index)) {
69
+ rows.set(tc.row_index, new Map());
70
+ rowOrder.push(tc.row_index);
71
+ }
72
+ const cellMap = rows.get(tc.row_index);
73
+ const cellText = toPlainInline(n.tagged_text).replace(/\s*\n+\s*/g, ' ').trim();
74
+ const parts = cellMap.get(tc.col_index) ?? [];
75
+ if (cellText)
76
+ parts.push(cellText);
77
+ cellMap.set(tc.col_index, parts);
78
+ }
79
+ rowOrder.sort((a, b) => a - b);
80
+ const lines = [];
81
+ for (const ri of rowOrder) {
82
+ const cellMap = rows.get(ri) ?? new Map();
83
+ const cells = [];
84
+ for (let c = 0; c < totalCols; c++) {
85
+ cells.push((cellMap.get(c) ?? []).join(' '));
86
+ }
87
+ lines.push(cells.join('\t'));
88
+ }
89
+ return lines;
90
+ }
91
+ /**
92
+ * Serialize a structured document view to plain text.
93
+ *
94
+ * @param nodes Block nodes from `buildDocumentView({ showFormatting: true }).nodes`.
95
+ * @param footnotes Footnotes from `DocxDocument.getFootnotes()` (already sorted by
96
+ * `displayNumber`); appended as `[^n] …` definitions.
97
+ */
98
+ export function serializeToPlainText(nodes, footnotes = [], _opts = {}) {
99
+ const blocks = [];
100
+ for (let i = 0; i < nodes.length; i++) {
101
+ const node = nodes[i];
102
+ // ── Tables: consume the whole run of same-table_id nodes at once ──
103
+ if (node.table_context) {
104
+ const tableId = node.table_context.table_id;
105
+ const group = [];
106
+ while (i < nodes.length && nodes[i].table_context?.table_id === tableId) {
107
+ group.push(nodes[i]);
108
+ i++;
109
+ }
110
+ i--; // for-loop will re-increment
111
+ const tableLines = renderTable(group);
112
+ if (tableLines.length > 0) {
113
+ blocks.push(tableLines.join('\n'));
114
+ blocks.push('');
115
+ }
116
+ continue;
117
+ }
118
+ // ── List items: a bullet per item, no surrounding blank lines ──
119
+ if (node.list_metadata.list_level >= 0) {
120
+ blocks.push(renderListItem(node));
121
+ continue;
122
+ }
123
+ // ── Headings and normal paragraphs alike: plain text, blank line between blocks ──
124
+ // Plain text has no heading syntax, so a Word-styled heading is just its text.
125
+ const text = toPlainInline(node.tagged_text).trim();
126
+ if (text === '') {
127
+ blocks.push('');
128
+ }
129
+ else {
130
+ blocks.push(text);
131
+ blocks.push('');
132
+ }
133
+ }
134
+ // ── Footnote definitions ──
135
+ const defs = footnotes.filter((fn) => fn.displayNumber > 0);
136
+ if (defs.length > 0) {
137
+ blocks.push('');
138
+ for (const fn of defs) {
139
+ const body = fn.text.replace(/\s+/g, ' ').trim();
140
+ blocks.push(`[^${fn.displayNumber}] ${body}`.trimEnd());
141
+ }
142
+ }
143
+ // Trim only blank *lines* at the document boundary — not all whitespace. A plain `.trim()`
144
+ // would eat a leading/trailing tab that is a meaningful empty TSV field when the document
145
+ // starts or ends with a table whose boundary cell is empty (e.g. a row `\tZ`), breaking the
146
+ // "every row keeps the same column count" contract.
147
+ const rendered = blocks
148
+ .join('\n')
149
+ .replace(/\n{3,}/g, '\n\n')
150
+ .replace(/^\n+/, '')
151
+ .replace(/\n+$/, '');
152
+ return `${rendered}\n`;
153
+ }
154
+ //# sourceMappingURL=serialize_plaintext.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serialize_plaintext.js","sourceRoot":"","sources":["../../src/primitives/serialize_plaintext.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,EAAE;AACF,oFAAoF;AACpF,uFAAuF;AACvF,8EAA8E;AAC9E,4FAA4F;AAC5F,6FAA6F;AAC7F,mDAAmD;AACnD,EAAE;AACF,2FAA2F;AAC3F,qFAAqF;AACrF,wEAAwE;AACxE,qFAAqF;AACrF,mDAAmD;AACnD,qFAAqF;AACrF,EAAE;AACF,yFAAyF;AACzF,6FAA6F;AAC7F,uCAAuC;AAGvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAGxD,gGAAgG;AAChG,SAAS,aAAa,CAAC,IAAY;IACjC,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAC;AAClC,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,IAAsB;IAC5C,MAAM,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC5C,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,GAAG,MAAM,KAAK,KAAK,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;IACjD,CAAC;IACD,OAAO,GAAG,MAAM,KAAK,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;AACxC,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,WAAW,CAAC,KAAyB;IAC5C,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,EAAE,GAAG,CAAC,CAAC,aAAa,CAAC;QAC3B,IAAI,CAAC,EAAE;YAAE,SAAS;QAClB,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,CAAC,UAAU,EAAE,EAAE,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IACnE,CAAC;IACD,IAAI,SAAS,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IAE9B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAiC,CAAC;IACtD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,EAAE,GAAG,CAAC,CAAC,aAAa,CAAC;QAC3B,IAAI,CAAC,EAAE;YAAE,SAAS;QAClB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5B,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;YAClC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;QAC9B,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAChF,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC9C,IAAI,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACnC,CAAC;IAED,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAE/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,GAAG,EAAoB,CAAC;QAC5D,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/C,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC/B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAOD;;;;;;GAMG;AACH,MAAM,UAAU,oBAAoB,CAClC,KAAyB,EACzB,YAAwB,EAAE,EAC1B,QAAmC,EAAE;IAErC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC;QAEvB,qEAAqE;QACrE,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;YAC5C,MAAM,KAAK,GAAuB,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,CAAC,CAAE,CAAC,aAAa,EAAE,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACzE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,CAAC;gBACtB,CAAC,EAAE,CAAC;YACN,CAAC;YACD,CAAC,EAAE,CAAC,CAAC,6BAA6B;YAClC,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;YACtC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gBACnC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAClB,CAAC;YACD,SAAS;QACX,CAAC;QAED,kEAAkE;QAClE,IAAI,IAAI,CAAC,aAAa,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC;YAClC,SAAS;QACX,CAAC;QAED,oFAAoF;QACpF,+EAA+E;QAC/E,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;QACpD,IAAI,IAAI,KAAK,EAAE,EAAE,CAAC;YAChB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC;IAC5D,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChB,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,2FAA2F;IAC3F,0FAA0F;IAC1F,4FAA4F;IAC5F,oDAAoD;IACpD,MAAM,QAAQ,GAAG,MAAM;SACpB,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;SACnB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IACvB,OAAO,GAAG,QAAQ,IAAI,CAAC;AACzB,CAAC"}
@@ -1,9 +1,9 @@
1
1
  import { OOXML, W } from './namespaces.js';
2
+ import { getAttributeSafe, getFirstChild } from './xml-helpers.js';
2
3
  function getWAttr(el, localName) {
3
- // Use || instead of ?? getAttributeNS returns "" (not null) when the
4
- // attribute exists without proper NS binding (e.g. set via setAttribute
5
- // instead of setAttributeNS).
6
- return el.getAttributeNS(OOXML.W_NS, localName) || el.getAttribute(`w:${localName}`) || el.getAttribute(localName) || null;
4
+ // Preserve legacy truthy fallback for empty strings from namespace-bound reads
5
+ // when attributes were written without a real namespace binding.
6
+ return getAttributeSafe(el, OOXML.W_NS, localName, 'w', { emptyIsMissing: true });
7
7
  }
8
8
  export function parseStylesXml(stylesDoc) {
9
9
  const byId = new Map();
@@ -14,10 +14,10 @@ export function parseStylesXml(stylesDoc) {
14
14
  const id = getWAttr(st, 'styleId');
15
15
  if (!id)
16
16
  continue;
17
- const nameEl = st.getElementsByTagNameNS(OOXML.W_NS, W.name).item(0);
18
- const basedOnEl = st.getElementsByTagNameNS(OOXML.W_NS, W.basedOn).item(0);
19
- const pPr = st.getElementsByTagNameNS(OOXML.W_NS, W.pPr).item(0);
20
- const rPr = st.getElementsByTagNameNS(OOXML.W_NS, W.rPr).item(0);
17
+ const nameEl = getFirstChild(st, OOXML.W_NS, W.name);
18
+ const basedOnEl = getFirstChild(st, OOXML.W_NS, W.basedOn);
19
+ const pPr = getFirstChild(st, OOXML.W_NS, W.pPr);
20
+ const rPr = getFirstChild(st, OOXML.W_NS, W.rPr);
21
21
  const name = nameEl ? (getWAttr(nameEl, 'val') ?? id) : id;
22
22
  const basedOn = basedOnEl ? (getWAttr(basedOnEl, 'val') ?? null) : null;
23
23
  byId.set(id, {
@@ -85,15 +85,15 @@ function firstNonNull(vals) {
85
85
  return null;
86
86
  }
87
87
  export function extractParagraphFormatting(pPr, styles) {
88
- const pStyleEl = pPr ? pPr.getElementsByTagNameNS(OOXML.W_NS, W.pStyle).item(0) : null;
88
+ const pStyleEl = pPr ? getFirstChild(pPr, OOXML.W_NS, W.pStyle) : null;
89
89
  const styleId = pStyleEl ? (getWAttr(pStyleEl, 'val') ?? null) : null;
90
90
  const chain = resolveStyleChain(styles, styleId);
91
91
  const styleName = (styleId && styles.byId.get(styleId)?.name) || styleId || '';
92
92
  // Resolve alignment and indents: direct pPr overrides style chain.
93
- const directJc = pPr ? pPr.getElementsByTagNameNS(OOXML.W_NS, W.jc).item(0) : null;
94
- const directInd = pPr ? pPr.getElementsByTagNameNS(OOXML.W_NS, W.ind).item(0) : null;
95
- const styleJc = firstNonNull(chain.map((s) => (s.pPr ? s.pPr.getElementsByTagNameNS(OOXML.W_NS, W.jc).item(0) : null)));
96
- const styleInd = firstNonNull(chain.map((s) => (s.pPr ? s.pPr.getElementsByTagNameNS(OOXML.W_NS, W.ind).item(0) : null)));
93
+ const directJc = pPr ? getFirstChild(pPr, OOXML.W_NS, W.jc) : null;
94
+ const directInd = pPr ? getFirstChild(pPr, OOXML.W_NS, W.ind) : null;
95
+ const styleJc = firstNonNull(chain.map((s) => (s.pPr ? getFirstChild(s.pPr, OOXML.W_NS, W.jc) : null)));
96
+ const styleInd = firstNonNull(chain.map((s) => (s.pPr ? getFirstChild(s.pPr, OOXML.W_NS, W.ind) : null)));
97
97
  const alignment = parseAlignment(directJc ?? styleJc);
98
98
  const ind = parseIndentPt(directInd ?? styleInd);
99
99
  return {
@@ -107,7 +107,7 @@ export function extractParagraphFormatting(pPr, styles) {
107
107
  function parseBoolProp(parent, tagLocal) {
108
108
  if (!parent)
109
109
  return null;
110
- const el = parent.getElementsByTagNameNS(OOXML.W_NS, tagLocal).item(0);
110
+ const el = getFirstChild(parent, OOXML.W_NS, tagLocal);
111
111
  if (!el)
112
112
  return null;
113
113
  // <w:b/> implies true. <w:b w:val="0"/> implies false.
@@ -119,7 +119,7 @@ function parseBoolProp(parent, tagLocal) {
119
119
  function parseUnderline(parent) {
120
120
  if (!parent)
121
121
  return null;
122
- const el = parent.getElementsByTagNameNS(OOXML.W_NS, W.u).item(0);
122
+ const el = getFirstChild(parent, OOXML.W_NS, W.u);
123
123
  if (!el)
124
124
  return null;
125
125
  const v = getWAttr(el, 'val');
@@ -130,7 +130,7 @@ function parseUnderline(parent) {
130
130
  function parseFontName(parent) {
131
131
  if (!parent)
132
132
  return null;
133
- const el = parent.getElementsByTagNameNS(OOXML.W_NS, W.rFonts).item(0);
133
+ const el = getFirstChild(parent, OOXML.W_NS, W.rFonts);
134
134
  if (!el)
135
135
  return null;
136
136
  return getWAttr(el, 'ascii') ?? getWAttr(el, 'hAnsi') ?? getWAttr(el, 'cs') ?? getWAttr(el, 'val') ?? null;
@@ -138,7 +138,7 @@ function parseFontName(parent) {
138
138
  function parseFontSizePt(parent) {
139
139
  if (!parent)
140
140
  return null;
141
- const el = parent.getElementsByTagNameNS(OOXML.W_NS, W.sz).item(0);
141
+ const el = getFirstChild(parent, OOXML.W_NS, W.sz);
142
142
  if (!el)
143
143
  return null;
144
144
  const valStr = getWAttr(el, 'val') || el.getAttribute('val');
@@ -153,7 +153,7 @@ function parseFontSizePt(parent) {
153
153
  function parseColorHex(parent) {
154
154
  if (!parent)
155
155
  return null;
156
- const el = parent.getElementsByTagNameNS(OOXML.W_NS, W.color).item(0);
156
+ const el = getFirstChild(parent, OOXML.W_NS, W.color);
157
157
  if (!el)
158
158
  return null;
159
159
  const v = getWAttr(el, 'val') || el.getAttribute('val');
@@ -164,7 +164,7 @@ function parseColorHex(parent) {
164
164
  function parseHighlightVal(parent) {
165
165
  if (!parent)
166
166
  return null;
167
- const el = parent.getElementsByTagNameNS(OOXML.W_NS, W.highlight).item(0);
167
+ const el = getFirstChild(parent, OOXML.W_NS, W.highlight);
168
168
  if (!el)
169
169
  return null;
170
170
  const v = getWAttr(el, 'val');
@@ -175,10 +175,10 @@ function parseHighlightVal(parent) {
175
175
  export function extractEffectiveRunFormatting(params) {
176
176
  const { run, paragraphPPr, paragraphStyleId, styles } = params;
177
177
  const isRun = run.localName === W.r || run.localName === 'r';
178
- const rPr = isRun ? run.getElementsByTagNameNS(OOXML.W_NS, W.rPr).item(0) : null;
179
- const pRPr = paragraphPPr ? paragraphPPr.getElementsByTagNameNS(OOXML.W_NS, W.rPr).item(0) : null;
178
+ const rPr = isRun ? getFirstChild(run, OOXML.W_NS, W.rPr) : null;
179
+ const pRPr = paragraphPPr ? getFirstChild(paragraphPPr, OOXML.W_NS, W.rPr) : null;
180
180
  // Resolve w:rStyle character style chain (e.g. "Strong" → bold via style definition).
181
- const rStyleEl = rPr?.getElementsByTagNameNS(OOXML.W_NS, W.rStyle).item(0);
181
+ const rStyleEl = rPr ? getFirstChild(rPr, OOXML.W_NS, W.rStyle) : null;
182
182
  const rStyleId = rStyleEl ? (getWAttr(rStyleEl, 'val') ?? null) : null;
183
183
  const rStyleChain = resolveStyleChain(styles, rStyleId);
184
184
  const rStyleRPr = firstNonNull(rStyleChain.map((s) => s.rPr));