@lotics/docx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/package.json +40 -0
  2. package/src/fixtures/.gitkeep +0 -0
  3. package/src/fixtures/lotics_generated_contract.docx +0 -0
  4. package/src/fonts/bundled.ts +123 -0
  5. package/src/fonts/registry.test.ts +233 -0
  6. package/src/fonts/registry.ts +219 -0
  7. package/src/fonts/types.ts +83 -0
  8. package/src/index.ts +16 -0
  9. package/src/layout/engine.test.ts +430 -0
  10. package/src/layout/engine.ts +566 -0
  11. package/src/layout/page_geometry.ts +43 -0
  12. package/src/layout/types.ts +159 -0
  13. package/src/load.test.ts +144 -0
  14. package/src/load.ts +142 -0
  15. package/src/model/default_numbering.ts +101 -0
  16. package/src/model/default_styles.ts +201 -0
  17. package/src/model/numbering_table.ts +52 -0
  18. package/src/model/properties.ts +328 -0
  19. package/src/model/sections.ts +94 -0
  20. package/src/model/style_resolution.test.ts +219 -0
  21. package/src/model/style_resolution.ts +113 -0
  22. package/src/model/style_table.ts +22 -0
  23. package/src/model/theme.ts +156 -0
  24. package/src/model/types.ts +55 -0
  25. package/src/parse/drawing.ts +157 -0
  26. package/src/parse/font_table.ts +132 -0
  27. package/src/parse/footnotes.ts +60 -0
  28. package/src/parse/header_footer.test.ts +264 -0
  29. package/src/parse/header_footer.ts +66 -0
  30. package/src/parse/numbering.ts +187 -0
  31. package/src/parse/parser.ts +184 -0
  32. package/src/parse/relationships.ts +83 -0
  33. package/src/parse/sections.test.ts +192 -0
  34. package/src/parse/sections.ts +182 -0
  35. package/src/parse/styles.ts +149 -0
  36. package/src/parse/theme.test.ts +86 -0
  37. package/src/parse/theme.ts +112 -0
  38. package/src/pm/bubble_menu.ts +117 -0
  39. package/src/pm/commands.test.ts +185 -0
  40. package/src/pm/commands.ts +697 -0
  41. package/src/pm/commands_insert.test.ts +183 -0
  42. package/src/pm/docx_to_pm.test.ts +330 -0
  43. package/src/pm/docx_to_pm.ts +643 -0
  44. package/src/pm/drag_handle.ts +166 -0
  45. package/src/pm/format_painter.test.ts +91 -0
  46. package/src/pm/format_painter.ts +109 -0
  47. package/src/pm/header_footer_doc.ts +24 -0
  48. package/src/pm/hyperlinks.test.ts +234 -0
  49. package/src/pm/image_registry.test.ts +81 -0
  50. package/src/pm/image_registry.ts +100 -0
  51. package/src/pm/images.test.ts +257 -0
  52. package/src/pm/link_popover.ts +159 -0
  53. package/src/pm/mark_commands.ts +60 -0
  54. package/src/pm/marks.ts +169 -0
  55. package/src/pm/nodes.ts +258 -0
  56. package/src/pm/numbering.test.ts +210 -0
  57. package/src/pm/numbering_plugin.test.ts +71 -0
  58. package/src/pm/numbering_plugin.ts +96 -0
  59. package/src/pm/outline.ts +41 -0
  60. package/src/pm/page_break.test.ts +80 -0
  61. package/src/pm/page_layout.test.ts +87 -0
  62. package/src/pm/pagination_plugin.test.ts +155 -0
  63. package/src/pm/pagination_plugin.ts +590 -0
  64. package/src/pm/phase5.test.ts +271 -0
  65. package/src/pm/phase6.test.ts +215 -0
  66. package/src/pm/placeholder_plugin.ts +24 -0
  67. package/src/pm/plugins.ts +91 -0
  68. package/src/pm/pm_to_docx.ts +0 -0
  69. package/src/pm/roundtrip.test.ts +332 -0
  70. package/src/pm/schema.test.ts +188 -0
  71. package/src/pm/schema.ts +79 -0
  72. package/src/pm/search.ts +46 -0
  73. package/src/pm/table_attrs.ts +48 -0
  74. package/src/pm/table_borders.test.ts +117 -0
  75. package/src/pm/table_borders.ts +130 -0
  76. package/src/pm/table_convert.test.ts +221 -0
  77. package/src/pm/table_convert.ts +541 -0
  78. package/src/pm/table_decorations.ts +132 -0
  79. package/src/pm/table_handles.ts +163 -0
  80. package/src/pm/template_marker.ts +47 -0
  81. package/src/pm/template_plugin.ts +65 -0
  82. package/src/pm/templates.test.ts +162 -0
  83. package/src/render/clipboard.test.ts +115 -0
  84. package/src/render/clipboard.ts +200 -0
  85. package/src/render/editable_view.test.ts +173 -0
  86. package/src/render/footnotes_view.ts +94 -0
  87. package/src/render/header_footer_view.ts +95 -0
  88. package/src/render/link_mark_view.ts +26 -0
  89. package/src/render/media_resolver.ts +61 -0
  90. package/src/render/node_views.ts +296 -0
  91. package/src/render/numbering_counter.ts +149 -0
  92. package/src/render/page_chrome.test.ts +262 -0
  93. package/src/render/page_chrome.ts +343 -0
  94. package/src/render/page_styles.ts +234 -0
  95. package/src/render/paragraph_view.test.ts +162 -0
  96. package/src/render/paragraph_view.ts +141 -0
  97. package/src/render/ruler.ts +110 -0
  98. package/src/render/style_registry.ts +33 -0
  99. package/src/render/table_dom.test.ts +171 -0
  100. package/src/render/table_dom.ts +288 -0
  101. package/src/render/units.ts +18 -0
  102. package/src/render/view.test.ts +165 -0
  103. package/src/render/view.ts +607 -0
  104. package/src/roundtrip.test.ts +179 -0
  105. package/src/serialize/default_parts.ts +128 -0
  106. package/src/serialize/header_footer_pm.ts +82 -0
  107. package/src/serialize/serializer.ts +114 -0
@@ -0,0 +1,643 @@
1
+ import { Mark, Node as PMNode } from "prosemirror-model";
2
+ import {
3
+ getChildren,
4
+ getTagName,
5
+ getTextContent,
6
+ isTextNode,
7
+ type XmlElement,
8
+ } from "@lotics/ooxml/xml";
9
+ import {
10
+ EMPTY_PARAGRAPH_PROPERTIES,
11
+ EMPTY_RUN_PROPERTIES,
12
+ extractParagraphProperties,
13
+ extractRunProperties,
14
+ type ParagraphProperties,
15
+ type RunProperties,
16
+ } from "../model/properties";
17
+ import { parseSectionProperties } from "../parse/sections";
18
+ import { parseDrawing } from "../parse/drawing";
19
+ import {
20
+ resolveParagraphProperties,
21
+ resolveRunProperties,
22
+ } from "../model/style_resolution";
23
+ import type { StyleTable } from "../model/style_table";
24
+ import type { NumberingTable } from "../model/numbering_table";
25
+ import {
26
+ computeLabel,
27
+ createNumberingState,
28
+ } from "../render/numbering_counter";
29
+ import type { Block, DocxDocument, Inline, Run, RunChild } from "../model/types";
30
+ import { docxSchema } from "./schema";
31
+ import { xmlToTableNode } from "./table_convert";
32
+ import { parseBlockXml } from "../parse/parser";
33
+
34
+ const MARK_PROPERTY_TAGS = new Set([
35
+ "w:rStyle",
36
+ "w:b",
37
+ "w:i",
38
+ "w:u",
39
+ "w:strike",
40
+ "w:dstrike",
41
+ "w:caps",
42
+ "w:smallCaps",
43
+ "w:color",
44
+ "w:highlight",
45
+ "w:sz",
46
+ "w:rFonts",
47
+ "w:vertAlign",
48
+ ]);
49
+
50
+ function partitionRunProperties(
51
+ rPr: readonly XmlElement[] | null,
52
+ ): { recognized: RunProperties; unknown: XmlElement[] } {
53
+ if (rPr === null) {
54
+ return { recognized: { ...EMPTY_RUN_PROPERTIES }, unknown: [] };
55
+ }
56
+ const recognizedEls: XmlElement[] = [];
57
+ const unknown: XmlElement[] = [];
58
+ for (const el of rPr) {
59
+ const tag = getTagName(el);
60
+ if (tag && MARK_PROPERTY_TAGS.has(tag)) {
61
+ recognizedEls.push(el);
62
+ } else {
63
+ unknown.push(el);
64
+ }
65
+ }
66
+ return { recognized: extractRunProperties(recognizedEls), unknown };
67
+ }
68
+
69
+ const PARAGRAPH_PROPERTY_TAGS = new Set([
70
+ "w:pStyle",
71
+ "w:jc",
72
+ "w:ind",
73
+ "w:spacing",
74
+ "w:numPr",
75
+ "w:pageBreakBefore",
76
+ "w:keepLines",
77
+ "w:keepNext",
78
+ "w:widowControl",
79
+ "w:outlineLvl",
80
+ ]);
81
+
82
+ function partitionParagraphProperties(
83
+ pPr: readonly XmlElement[] | null,
84
+ ): { recognized: ParagraphProperties; unknown: XmlElement[] } {
85
+ if (pPr === null) {
86
+ return { recognized: { ...EMPTY_PARAGRAPH_PROPERTIES }, unknown: [] };
87
+ }
88
+ const recognizedEls: XmlElement[] = [];
89
+ const unknown: XmlElement[] = [];
90
+ for (const el of pPr) {
91
+ const tag = getTagName(el);
92
+ if (tag === "w:sectPr") continue;
93
+ if (tag && PARAGRAPH_PROPERTY_TAGS.has(tag)) {
94
+ recognizedEls.push(el);
95
+ } else {
96
+ unknown.push(el);
97
+ }
98
+ }
99
+ return { recognized: extractParagraphProperties(recognizedEls), unknown };
100
+ }
101
+
102
+ function findInlineSectPr(pPr: readonly XmlElement[] | null): XmlElement | null {
103
+ if (pPr === null) return null;
104
+ for (const el of pPr) {
105
+ if (getTagName(el) === "w:sectPr") return el;
106
+ }
107
+ return null;
108
+ }
109
+
110
+ function marksFromRunProperties(props: RunProperties): readonly Mark[] {
111
+ const out: Mark[] = [];
112
+ const m = docxSchema.marks;
113
+ if (props.bold === true) out.push(m.bold.create());
114
+ if (props.italic === true) out.push(m.italic.create());
115
+ if (props.underline !== null) out.push(m.underline.create());
116
+ if (props.strike === true) out.push(m.strike.create());
117
+ if (props.doubleStrike === true) out.push(m.doubleStrike.create());
118
+ if (props.caps === true) out.push(m.caps.create());
119
+ if (props.smallCaps === true) out.push(m.smallCaps.create());
120
+ if (props.vertAlign === "superscript") out.push(m.superscript.create());
121
+ if (props.vertAlign === "subscript") out.push(m.subscript.create());
122
+ if (props.color !== null) out.push(m.color.create({ value: props.color }));
123
+ if (props.highlight !== null) {
124
+ out.push(m.highlight.create({ value: props.highlight }));
125
+ }
126
+ if (
127
+ props.fontAscii !== null ||
128
+ props.fontHAnsi !== null ||
129
+ props.fontEastAsia !== null ||
130
+ props.fontComplexScript !== null
131
+ ) {
132
+ out.push(
133
+ m.font.create({
134
+ ascii: props.fontAscii,
135
+ hAnsi: props.fontHAnsi,
136
+ eastAsia: props.fontEastAsia,
137
+ complexScript: props.fontComplexScript,
138
+ }),
139
+ );
140
+ }
141
+ if (props.size !== null) {
142
+ out.push(m.size.create({ halfPoints: props.size }));
143
+ }
144
+ if (props.styleId !== null) {
145
+ out.push(m.styleRef.create({ styleId: props.styleId }));
146
+ }
147
+ return out;
148
+ }
149
+
150
+ type ConvertState = {
151
+ footnoteNumberFor(id: string): number;
152
+ };
153
+
154
+ function createConvertState(): ConvertState {
155
+ const counter = new Map<string, number>();
156
+ let next = 1;
157
+ return {
158
+ footnoteNumberFor(id: string): number {
159
+ const cached = counter.get(id);
160
+ if (cached !== undefined) return cached;
161
+ const n = next++;
162
+ counter.set(id, n);
163
+ return n;
164
+ },
165
+ };
166
+ }
167
+
168
+ function convertRunChild(
169
+ child: RunChild,
170
+ marks: readonly Mark[],
171
+ state: ConvertState,
172
+ ): PMNode[] {
173
+ if (child.kind === "text") {
174
+ if (child.value.length === 0) return [];
175
+ return [docxSchema.text(child.value, marks)];
176
+ }
177
+ const tag = getTagName(child.xml);
178
+ if (tag === "w:br") {
179
+ const attrs = (child.xml[":@"] ?? {}) as Record<string, string>;
180
+ const rawType = attrs["@_w:type"];
181
+ const breakType =
182
+ rawType === "page" || rawType === "column" ? rawType : "line";
183
+ return [
184
+ docxSchema.nodes.hard_break.create({ breakType }, undefined, marks),
185
+ ];
186
+ }
187
+ if (tag === "w:footnoteReference") {
188
+ const attrs = (child.xml[":@"] ?? {}) as Record<string, string>;
189
+ const id = attrs["@_w:id"] ?? "";
190
+ return [
191
+ docxSchema.nodes.footnote_ref.create(
192
+ { footnoteId: id, number: state.footnoteNumberFor(id) },
193
+ undefined,
194
+ marks,
195
+ ),
196
+ ];
197
+ }
198
+ if (tag === "w:drawing") {
199
+ const info = parseDrawing(child.xml);
200
+ if (info) {
201
+ return [
202
+ docxSchema.nodes.image_inline.create(
203
+ {
204
+ relationshipId: info.relationshipId,
205
+ widthEmu: info.widthEmu,
206
+ heightEmu: info.heightEmu,
207
+ alt: info.alt,
208
+ originalXml: child.xml,
209
+ wrap: info.wrap,
210
+ floatSide: info.floatSide,
211
+ behindDoc: info.behindDoc,
212
+ offsetXEmu: info.offsetXEmu,
213
+ offsetYEmu: info.offsetYEmu,
214
+ },
215
+ undefined,
216
+ marks,
217
+ ),
218
+ ];
219
+ }
220
+ }
221
+ return [
222
+ docxSchema.nodes.opaque_inline.create({ xml: child.xml }, undefined, marks),
223
+ ];
224
+ }
225
+
226
+ function convertRun(run: Run, state: ConvertState): PMNode[] {
227
+ const partitioned = partitionRunProperties(run.properties);
228
+ const marks = marksFromRunProperties(partitioned.recognized);
229
+ const out: PMNode[] = [];
230
+ for (const child of run.content) {
231
+ for (const node of convertRunChild(child, marks, state)) {
232
+ out.push(node);
233
+ }
234
+ }
235
+ return out;
236
+ }
237
+
238
+ function getAttrFromXml(xml: XmlElement, key: string): string | undefined {
239
+ const attrs = xml[":@"] as Record<string, string> | undefined;
240
+ return attrs?.[`@_${key}`];
241
+ }
242
+
243
+ function convertHyperlink(xml: XmlElement): PMNode[] {
244
+ const relationshipId = getAttrFromXml(xml, "r:id") ?? null;
245
+ const anchor = getAttrFromXml(xml, "w:anchor") ?? null;
246
+
247
+ const linkMark = docxSchema.marks.link.create({
248
+ relationshipId,
249
+ anchor,
250
+ });
251
+
252
+ const out: PMNode[] = [];
253
+ for (const child of getChildren(xml)) {
254
+ if (getTagName(child) !== "w:r") continue;
255
+
256
+ const runProps: XmlElement[] = [];
257
+
258
+ let runPropertiesElems: XmlElement[] | null = null;
259
+ const runChildren: XmlElement[] = [];
260
+ for (const rc of getChildren(child)) {
261
+ if (isTextNode(rc)) continue;
262
+ if (getTagName(rc) === "w:rPr") {
263
+ runPropertiesElems = getChildren(rc).filter((p) => !isTextNode(p));
264
+ } else {
265
+ runChildren.push(rc);
266
+ }
267
+ }
268
+
269
+ if (runPropertiesElems) {
270
+ for (const rp of runPropertiesElems) runProps.push(rp);
271
+ }
272
+
273
+ const partitionedRun = partitionRunProperties(
274
+ runPropertiesElems !== null ? runProps : null,
275
+ );
276
+ const baseMarks = marksFromRunProperties(partitionedRun.recognized);
277
+ const allMarks = [...baseMarks, linkMark];
278
+
279
+ for (const rcXml of runChildren) {
280
+ const rcTag = getTagName(rcXml);
281
+ if (rcTag === "w:t") {
282
+ const value = getTextContent(rcXml);
283
+ if (value.length > 0) out.push(docxSchema.text(value, allMarks));
284
+ continue;
285
+ }
286
+ if (rcTag === "w:br") {
287
+ const attrs = (rcXml[":@"] ?? {}) as Record<string, string>;
288
+ const rawType = attrs["@_w:type"];
289
+ const breakType =
290
+ rawType === "page" || rawType === "column" ? rawType : "line";
291
+ out.push(
292
+ docxSchema.nodes.hard_break.create(
293
+ { breakType },
294
+ undefined,
295
+ allMarks,
296
+ ),
297
+ );
298
+ continue;
299
+ }
300
+ out.push(
301
+ docxSchema.nodes.opaque_inline.create({ xml: rcXml }, undefined, allMarks),
302
+ );
303
+ }
304
+ }
305
+ return out;
306
+ }
307
+
308
+ function convertBookmarkStart(xml: XmlElement): PMNode {
309
+ const attrs = (xml[":@"] ?? {}) as Record<string, string>;
310
+ return docxSchema.nodes.bookmark_start.create({
311
+ bookmarkId: attrs["@_w:id"] ?? "",
312
+ name: attrs["@_w:name"] ?? "",
313
+ });
314
+ }
315
+
316
+ function convertBookmarkEnd(xml: XmlElement): PMNode {
317
+ const attrs = (xml[":@"] ?? {}) as Record<string, string>;
318
+ return docxSchema.nodes.bookmark_end.create({
319
+ bookmarkId: attrs["@_w:id"] ?? "",
320
+ });
321
+ }
322
+
323
+ function parseInnerRun(runXml: XmlElement): Run {
324
+ const properties: XmlElement[] = [];
325
+ const content: RunChild[] = [];
326
+ let propertiesSeen = false;
327
+
328
+ for (const child of getChildren(runXml)) {
329
+ if (isTextNode(child)) continue;
330
+ const tag = getTagName(child);
331
+ if (tag === "w:rPr") {
332
+ propertiesSeen = true;
333
+ for (const propEl of getChildren(child)) {
334
+ if (isTextNode(propEl)) continue;
335
+ properties.push(propEl);
336
+ }
337
+ continue;
338
+ }
339
+ if (tag === "w:t") {
340
+ const attrs = (child[":@"] ?? {}) as Record<string, string>;
341
+ content.push({
342
+ kind: "text",
343
+ value: getTextContent(child),
344
+ preserveSpace: attrs["@_xml:space"] === "preserve",
345
+ });
346
+ continue;
347
+ }
348
+ content.push({ kind: "opaque_run_child", xml: child });
349
+ }
350
+
351
+ return {
352
+ kind: "run",
353
+ properties: propertiesSeen ? properties : null,
354
+ content,
355
+ };
356
+ }
357
+
358
+ function convertTrackedWrapper(
359
+ xml: XmlElement,
360
+ markName: "insertion" | "deletion",
361
+ state: ConvertState,
362
+ ): PMNode[] {
363
+ const tagName = Object.keys(xml).find((k) => k !== ":@");
364
+ if (!tagName) return [];
365
+ const attrs = (xml[":@"] ?? {}) as Record<string, string>;
366
+ const mark = docxSchema.marks[markName].create({
367
+ author: attrs["@_w:author"] ?? null,
368
+ date: attrs["@_w:date"] ?? null,
369
+ revisionId: attrs["@_w:id"] ?? null,
370
+ });
371
+ const inner: PMNode[] = [];
372
+ const children = (xml[tagName] as XmlElement[]) ?? [];
373
+ for (const child of children) {
374
+ const childTag = Object.keys(child).find((k) => k !== ":@");
375
+ if (childTag !== "w:r") continue;
376
+ const run = parseInnerRun(child);
377
+ for (const node of convertRun(run, state)) {
378
+ inner.push(node.mark(node.marks.concat(mark)));
379
+ }
380
+ }
381
+ return inner;
382
+ }
383
+
384
+ function convertInline(inline: Inline, state: ConvertState): PMNode[] {
385
+ if (inline.kind === "run") return convertRun(inline, state);
386
+ const tag = getTagName(inline.xml);
387
+ if (tag === "w:hyperlink") {
388
+ return convertHyperlink(inline.xml);
389
+ }
390
+ if (tag === "w:bookmarkStart") {
391
+ return [convertBookmarkStart(inline.xml)];
392
+ }
393
+ if (tag === "w:bookmarkEnd") {
394
+ return [convertBookmarkEnd(inline.xml)];
395
+ }
396
+ if (tag === "w:ins") {
397
+ return convertTrackedWrapper(inline.xml, "insertion", state);
398
+ }
399
+ if (tag === "w:del") {
400
+ return convertTrackedWrapper(inline.xml, "deletion", state);
401
+ }
402
+ return [docxSchema.nodes.opaque_inline.create({ xml: inline.xml })];
403
+ }
404
+
405
+ type FldPhase = "instruction" | "result";
406
+
407
+ function inlineFldCharType(
408
+ inline: Inline,
409
+ ): "begin" | "separate" | "end" | null {
410
+ if (inline.kind !== "run") return null;
411
+ for (const c of inline.content) {
412
+ if (c.kind === "opaque_run_child") {
413
+ const tag = getTagName(c.xml);
414
+ if (tag === "w:fldChar") {
415
+ const attrs = (c.xml[":@"] ?? {}) as Record<string, string>;
416
+ const t = attrs["@_w:fldCharType"];
417
+ if (t === "begin" || t === "separate" || t === "end") return t;
418
+ }
419
+ }
420
+ }
421
+ return null;
422
+ }
423
+
424
+ function instrTextFromRun(inline: Inline): string {
425
+ if (inline.kind !== "run") return "";
426
+ let out = "";
427
+ for (const c of inline.content) {
428
+ if (c.kind === "opaque_run_child" && getTagName(c.xml) === "w:instrText") {
429
+ out += getTextContent(c.xml);
430
+ }
431
+ }
432
+ return out;
433
+ }
434
+
435
+ function textFromRunForFieldResult(inline: Inline): string {
436
+ if (inline.kind !== "run") return "";
437
+ let out = "";
438
+ for (const c of inline.content) {
439
+ if (c.kind === "text") out += c.value;
440
+ }
441
+ return out;
442
+ }
443
+
444
+ function buildFieldInline(
445
+ instruction: string,
446
+ cachedResult: string,
447
+ ): PMNode {
448
+ return docxSchema.nodes.field_inline.create({
449
+ instruction: instruction.trim(),
450
+ cachedResult,
451
+ });
452
+ }
453
+
454
+ function convertParagraphInlines(
455
+ content: readonly Inline[],
456
+ state: ConvertState,
457
+ ): PMNode[] {
458
+ const out: PMNode[] = [];
459
+ let i = 0;
460
+ while (i < content.length) {
461
+ const inline = content[i];
462
+ if (inlineFldCharType(inline) === "begin") {
463
+ let j = i + 1;
464
+ let phase: FldPhase = "instruction";
465
+ let instruction = "";
466
+ let cachedResult = "";
467
+ while (j < content.length) {
468
+ const next = content[j];
469
+ const t = inlineFldCharType(next);
470
+ if (t === "separate") {
471
+ phase = "result";
472
+ j += 1;
473
+ continue;
474
+ }
475
+ if (t === "end") {
476
+ j += 1;
477
+ break;
478
+ }
479
+ if (phase === "instruction") {
480
+ instruction += instrTextFromRun(next);
481
+ } else {
482
+ cachedResult += textFromRunForFieldResult(next);
483
+ }
484
+ j += 1;
485
+ }
486
+ out.push(buildFieldInline(instruction, cachedResult));
487
+ i = j;
488
+ continue;
489
+ }
490
+ for (const node of convertInline(inline, state)) {
491
+ out.push(node);
492
+ }
493
+ i += 1;
494
+ }
495
+ return out;
496
+ }
497
+
498
+ function convertParagraph(
499
+ block: Extract<Block, { kind: "paragraph" }>,
500
+ styleTable: StyleTable | null,
501
+ numberingLabel: string | null,
502
+ state: ConvertState,
503
+ ): PMNode {
504
+ const partitioned = partitionParagraphProperties(block.properties);
505
+ const inline = convertParagraphInlines(block.content, state);
506
+ const resolvedProperties = styleTable
507
+ ? resolveParagraphProperties(styleTable, partitioned.recognized)
508
+ : null;
509
+ const resolvedBaseRun = styleTable
510
+ ? resolveRunProperties(
511
+ styleTable,
512
+ partitioned.recognized.styleId,
513
+ { ...EMPTY_RUN_PROPERTIES },
514
+ )
515
+ : null;
516
+ return docxSchema.nodes.paragraph.create(
517
+ {
518
+ properties: partitioned.recognized,
519
+ unknownProperties: partitioned.unknown,
520
+ resolvedProperties,
521
+ resolvedBaseRun,
522
+ numberingLabel,
523
+ },
524
+ inline,
525
+ );
526
+ }
527
+
528
+ function convertOpaqueBlock(xml: XmlElement): PMNode {
529
+ return docxSchema.nodes.opaque_block.create({ xml });
530
+ }
531
+
532
+ function convertSectionBreak(xml: XmlElement, isFinal: boolean): PMNode {
533
+ return docxSchema.nodes.section_break.create({
534
+ properties: parseSectionProperties(xml),
535
+ isFinal,
536
+ });
537
+ }
538
+
539
+ export type DocxToPmContext = {
540
+ styleTable?: StyleTable | null;
541
+ numberingTable?: NumberingTable | null;
542
+ };
543
+
544
+ export function docxToPm(
545
+ doc: DocxDocument,
546
+ context: DocxToPmContext = {},
547
+ ): PMNode {
548
+ const styleTable = context.styleTable ?? null;
549
+ const numberingTable = context.numberingTable ?? null;
550
+ const numberingState = createNumberingState();
551
+ const state = createConvertState();
552
+
553
+ const out: PMNode[] = [];
554
+
555
+ for (let i = 0; i < doc.body.children.length; i++) {
556
+ const block = doc.body.children[i];
557
+
558
+ if (block.kind === "paragraph") {
559
+ const partitioned = partitionParagraphProperties(block.properties);
560
+ let label: string | null = null;
561
+ if (numberingTable && partitioned.recognized.numbering) {
562
+ label = computeLabel(
563
+ numberingState,
564
+ numberingTable,
565
+ partitioned.recognized.numbering.numId,
566
+ partitioned.recognized.numbering.ilvl,
567
+ );
568
+ }
569
+ out.push(convertParagraph(block, styleTable, label, state));
570
+ const inlineSect = findInlineSectPr(block.properties);
571
+ if (inlineSect !== null) {
572
+ out.push(convertSectionBreak(inlineSect, false));
573
+ }
574
+ continue;
575
+ }
576
+
577
+ if (block.kind === "body_sect_pr") {
578
+ out.push(convertSectionBreak(block.xml, true));
579
+ continue;
580
+ }
581
+
582
+ const tag = getTagName(block.xml);
583
+ if (tag === "w:tbl") {
584
+ const tableNode = xmlToTableNode(block.xml, (xml) =>
585
+ convertBlockFromXml(
586
+ xml,
587
+ styleTable,
588
+ numberingTable,
589
+ numberingState,
590
+ state,
591
+ ),
592
+ );
593
+ if (tableNode) {
594
+ out.push(tableNode);
595
+ continue;
596
+ }
597
+ }
598
+ out.push(convertOpaqueBlock(block.xml));
599
+ }
600
+
601
+ if (out.length === 0) {
602
+ out.push(docxSchema.nodes.paragraph.create());
603
+ }
604
+
605
+ return docxSchema.nodes.doc.createChecked(null, out);
606
+ }
607
+
608
+ function convertBlockFromXml(
609
+ xml: XmlElement,
610
+ styleTable: StyleTable | null,
611
+ numberingTable: NumberingTable | null,
612
+ numberingState: ReturnType<typeof createNumberingState>,
613
+ state: ConvertState,
614
+ ): PMNode | null {
615
+ const tag = getTagName(xml);
616
+ if (tag === "w:p") {
617
+ const block = parseBlockXml(xml);
618
+ if (block.kind !== "paragraph") return null;
619
+ const partitioned = partitionParagraphProperties(block.properties);
620
+ let label: string | null = null;
621
+ if (numberingTable && partitioned.recognized.numbering) {
622
+ label = computeLabel(
623
+ numberingState,
624
+ numberingTable,
625
+ partitioned.recognized.numbering.numId,
626
+ partitioned.recognized.numbering.ilvl,
627
+ );
628
+ }
629
+ return convertParagraph(block, styleTable, label, state);
630
+ }
631
+ if (tag === "w:tbl") {
632
+ return xmlToTableNode(xml, (childXml) =>
633
+ convertBlockFromXml(
634
+ childXml,
635
+ styleTable,
636
+ numberingTable,
637
+ numberingState,
638
+ state,
639
+ ),
640
+ );
641
+ }
642
+ return null;
643
+ }