@beyondwork/docx-react-component 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/README.md +44 -104
  2. package/package.json +76 -46
  3. package/src/README.md +85 -0
  4. package/src/api/README.md +22 -0
  5. package/src/api/public-types.ts +525 -0
  6. package/src/compare/diff-engine.ts +530 -0
  7. package/src/compare/export-redlines.ts +162 -0
  8. package/src/compare/snapshot.ts +37 -0
  9. package/src/component-inventory.md +99 -0
  10. package/src/core/README.md +10 -0
  11. package/src/core/commands/README.md +3 -0
  12. package/src/core/commands/formatting-commands.ts +161 -0
  13. package/src/core/commands/image-commands.ts +144 -0
  14. package/src/core/commands/index.ts +1013 -0
  15. package/src/core/commands/list-commands.ts +370 -0
  16. package/src/core/commands/review-commands.ts +108 -0
  17. package/src/core/commands/text-commands.ts +119 -0
  18. package/src/core/schema/README.md +3 -0
  19. package/src/core/schema/text-schema.ts +512 -0
  20. package/src/core/selection/README.md +3 -0
  21. package/src/core/selection/mapping.ts +238 -0
  22. package/src/core/selection/review-anchors.ts +94 -0
  23. package/src/core/state/README.md +3 -0
  24. package/src/core/state/editor-state.ts +580 -0
  25. package/src/core/state/text-transaction.ts +276 -0
  26. package/src/formats/xlsx/io/parse-shared-strings.ts +41 -0
  27. package/src/formats/xlsx/io/parse-sheet.ts +289 -0
  28. package/src/formats/xlsx/io/parse-styles.ts +57 -0
  29. package/src/formats/xlsx/io/parse-workbook.ts +75 -0
  30. package/src/formats/xlsx/io/xlsx-session.ts +306 -0
  31. package/src/formats/xlsx/model/cell.ts +189 -0
  32. package/src/formats/xlsx/model/sheet.ts +244 -0
  33. package/src/formats/xlsx/model/styles.ts +118 -0
  34. package/src/formats/xlsx/model/workbook.ts +449 -0
  35. package/src/index.ts +45 -0
  36. package/src/io/README.md +10 -0
  37. package/src/io/docx-session.ts +1763 -0
  38. package/src/io/export/README.md +3 -0
  39. package/src/io/export/export-session.ts +165 -0
  40. package/src/io/export/minimal-docx.ts +115 -0
  41. package/src/io/export/reattach-preserved-parts.ts +54 -0
  42. package/src/io/export/serialize-comments.ts +876 -0
  43. package/src/io/export/serialize-footnotes.ts +217 -0
  44. package/src/io/export/serialize-headers-footers.ts +200 -0
  45. package/src/io/export/serialize-main-document.ts +982 -0
  46. package/src/io/export/serialize-numbering.ts +97 -0
  47. package/src/io/export/serialize-revisions.ts +389 -0
  48. package/src/io/export/serialize-runtime-revisions.ts +265 -0
  49. package/src/io/export/serialize-tables.ts +147 -0
  50. package/src/io/export/split-review-boundaries.ts +194 -0
  51. package/src/io/normalize/README.md +3 -0
  52. package/src/io/normalize/normalize-text.ts +437 -0
  53. package/src/io/ooxml/README.md +3 -0
  54. package/src/io/ooxml/parse-comments.ts +779 -0
  55. package/src/io/ooxml/parse-complex-content.ts +287 -0
  56. package/src/io/ooxml/parse-fields.ts +438 -0
  57. package/src/io/ooxml/parse-footnotes.ts +403 -0
  58. package/src/io/ooxml/parse-headers-footers.ts +483 -0
  59. package/src/io/ooxml/parse-inline-media.ts +431 -0
  60. package/src/io/ooxml/parse-main-document.ts +1846 -0
  61. package/src/io/ooxml/parse-numbering.ts +425 -0
  62. package/src/io/ooxml/parse-revisions.ts +658 -0
  63. package/src/io/ooxml/parse-shapes.ts +271 -0
  64. package/src/io/ooxml/parse-tables.ts +568 -0
  65. package/src/io/ooxml/parse-theme.ts +314 -0
  66. package/src/io/ooxml/part-manifest.ts +136 -0
  67. package/src/io/ooxml/revision-boundaries.ts +351 -0
  68. package/src/io/opc/README.md +3 -0
  69. package/src/io/opc/corrupt-package.ts +166 -0
  70. package/src/io/opc/docx-package.ts +74 -0
  71. package/src/io/opc/package-reader.ts +320 -0
  72. package/src/io/opc/package-writer.ts +273 -0
  73. package/src/legal/bookmarks.ts +196 -0
  74. package/src/legal/cross-references.ts +356 -0
  75. package/src/legal/defined-terms.ts +203 -0
  76. package/src/model/README.md +3 -0
  77. package/src/model/canonical-document.ts +1911 -0
  78. package/src/model/cds-1.0.0.ts +196 -0
  79. package/src/model/snapshot.ts +393 -0
  80. package/src/preservation/README.md +3 -0
  81. package/src/preservation/markup-compatibility.ts +48 -0
  82. package/src/preservation/opaque-fragment-store.ts +89 -0
  83. package/src/preservation/opaque-region.ts +233 -0
  84. package/src/preservation/package-preservation.ts +120 -0
  85. package/src/preservation/preserved-part-manifest.ts +56 -0
  86. package/src/preservation/relationship-retention.ts +57 -0
  87. package/src/preservation/store.ts +185 -0
  88. package/src/review/README.md +16 -0
  89. package/src/review/store/README.md +3 -0
  90. package/src/review/store/comment-anchors.ts +70 -0
  91. package/src/review/store/comment-remapping.ts +154 -0
  92. package/src/review/store/comment-store.ts +331 -0
  93. package/src/review/store/comment-thread.ts +109 -0
  94. package/src/review/store/revision-actions.ts +394 -0
  95. package/src/review/store/revision-store.ts +303 -0
  96. package/src/review/store/revision-types.ts +168 -0
  97. package/src/review/store/runtime-comment-store.ts +43 -0
  98. package/src/runtime/README.md +3 -0
  99. package/src/runtime/ai-action-policy.ts +764 -0
  100. package/src/runtime/document-runtime.ts +967 -0
  101. package/src/runtime/read-only-diagnostics-runtime.ts +232 -0
  102. package/src/runtime/review-runtime.ts +44 -0
  103. package/src/runtime/revision-runtime.ts +107 -0
  104. package/src/runtime/session-capabilities.ts +138 -0
  105. package/src/runtime/surface-projection.ts +570 -0
  106. package/src/runtime/table-commands.ts +87 -0
  107. package/src/runtime/table-schema.ts +140 -0
  108. package/src/runtime/virtualized-rendering.ts +258 -0
  109. package/src/ui/README.md +30 -0
  110. package/src/ui/WordReviewEditor.tsx +1504 -0
  111. package/src/ui/comments/README.md +3 -0
  112. package/src/ui/compatibility/README.md +3 -0
  113. package/src/ui/editor-surface/README.md +3 -0
  114. package/src/ui/headless/comment-decoration-model.ts +124 -0
  115. package/src/ui/headless/revision-decoration-model.ts +128 -0
  116. package/src/ui/headless/selection-helpers.ts +34 -0
  117. package/src/ui/headless/use-editor-keyboard.ts +98 -0
  118. package/src/ui/review/README.md +3 -0
  119. package/src/ui/shared/revision-filters.ts +31 -0
  120. package/src/ui/status/README.md +3 -0
  121. package/src/ui/theme/README.md +3 -0
  122. package/src/ui/toolbar/README.md +3 -0
  123. package/src/ui-tailwind/chrome/tw-alert-banner.tsx +48 -0
  124. package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +44 -0
  125. package/src/ui-tailwind/chrome/tw-unsaved-modal.tsx +58 -0
  126. package/src/ui-tailwind/chrome/use-before-unload.ts +20 -0
  127. package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +139 -0
  128. package/src/ui-tailwind/editor-surface/pm-decorations.ts +98 -0
  129. package/src/ui-tailwind/editor-surface/pm-position-map.ts +123 -0
  130. package/src/ui-tailwind/editor-surface/pm-schema.ts +452 -0
  131. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +327 -0
  132. package/src/ui-tailwind/editor-surface/search-plugin.ts +157 -0
  133. package/src/ui-tailwind/editor-surface/tw-caret.tsx +12 -0
  134. package/src/ui-tailwind/editor-surface/tw-editor-surface.tsx +150 -0
  135. package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +118 -0
  136. package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +52 -0
  137. package/src/ui-tailwind/editor-surface/tw-paragraph-block.tsx +151 -0
  138. package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +215 -0
  139. package/src/ui-tailwind/editor-surface/tw-segment-view.tsx +111 -0
  140. package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +122 -0
  141. package/src/ui-tailwind/index.ts +61 -0
  142. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +276 -0
  143. package/src/ui-tailwind/review/tw-health-panel.tsx +120 -0
  144. package/src/ui-tailwind/review/tw-review-rail.tsx +120 -0
  145. package/src/ui-tailwind/review/tw-revision-sidebar.tsx +164 -0
  146. package/src/ui-tailwind/status/tw-status-bar.tsx +58 -0
  147. package/src/ui-tailwind/theme/editor-theme.css +190 -0
  148. package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +48 -0
  149. package/src/ui-tailwind/toolbar/tw-toolbar.tsx +231 -0
  150. package/src/ui-tailwind/tw-review-workspace.tsx +140 -0
  151. package/src/validation/README.md +3 -0
  152. package/src/validation/compatibility-engine.ts +317 -0
  153. package/src/validation/compatibility-report.ts +160 -0
  154. package/src/validation/diagnostics.ts +203 -0
  155. package/src/validation/import-diagnostics.ts +128 -0
  156. package/src/validation/low-priority-word-surfaces.ts +373 -0
  157. package/dist/chunk-32W6IVQE.js +0 -7725
  158. package/dist/chunk-32W6IVQE.js.map +0 -1
  159. package/dist/index.cjs +0 -23722
  160. package/dist/index.cjs.map +0 -1
  161. package/dist/index.d.cts +0 -7
  162. package/dist/index.d.ts +0 -7
  163. package/dist/index.js +0 -16011
  164. package/dist/index.js.map +0 -1
  165. package/dist/public-types-DqCURAz8.d.cts +0 -1152
  166. package/dist/public-types-DqCURAz8.d.ts +0 -1152
  167. package/dist/tailwind.cjs +0 -8295
  168. package/dist/tailwind.cjs.map +0 -1
  169. package/dist/tailwind.d.cts +0 -323
  170. package/dist/tailwind.d.ts +0 -323
  171. package/dist/tailwind.js +0 -553
  172. package/dist/tailwind.js.map +0 -1
@@ -0,0 +1,779 @@
1
+ import type { CommentThread } from "../../review/store/comment-store.ts";
2
+ import { createImportedCommentThread } from "../../review/store/comment-thread.ts";
3
+
4
+ interface XmlElementNode {
5
+ type: "element";
6
+ name: string;
7
+ attributes: Record<string, string>;
8
+ children: XmlNode[];
9
+ start: number;
10
+ end: number;
11
+ }
12
+
13
+ interface XmlTextNode {
14
+ type: "text";
15
+ text: string;
16
+ start: number;
17
+ end: number;
18
+ }
19
+
20
+ type XmlNode = XmlElementNode | XmlTextNode;
21
+
22
+ export interface ParsedCommentsInput {
23
+ commentsXml: string;
24
+ commentsExtendedXml?: string;
25
+ commentsIdsXml?: string;
26
+ peopleXml?: string;
27
+ }
28
+
29
+ export interface ImportedCommentDefinition {
30
+ commentId: string;
31
+ authorId?: string;
32
+ createdAt?: string;
33
+ body: string;
34
+ rawXml: string;
35
+ order: number;
36
+ initials?: string;
37
+ paraId?: string;
38
+ parentParaId?: string;
39
+ durableId?: string;
40
+ isDone?: boolean;
41
+ }
42
+
43
+ interface CommentAnchorBounds {
44
+ start?: number;
45
+ end?: number;
46
+ startParagraphIndex?: number;
47
+ endParagraphIndex?: number;
48
+ referenceAt?: number;
49
+ referenceParagraphIndex?: number;
50
+ }
51
+
52
+ interface CommentExtensionRecord {
53
+ paraId: string;
54
+ parentParaId?: string;
55
+ isDone?: boolean;
56
+ }
57
+
58
+ export interface CommentImportDiagnostic {
59
+ commentId: string;
60
+ code:
61
+ | "missing_comment_definition"
62
+ | "missing_anchor_reference"
63
+ | "multi_paragraph_anchor_preserve_only"
64
+ | "opaque_anchor_preserve_only"
65
+ | "preserve_only_revision_overlap";
66
+ message: string;
67
+ featureClass: "preserve-only";
68
+ }
69
+
70
+ export interface ParsedCommentsResult {
71
+ threads: CommentThread[];
72
+ diagnostics: CommentImportDiagnostic[];
73
+ definitions: ImportedCommentDefinition[];
74
+ sourceRootTag?: string;
75
+ sourceExtendedRootTag?: string;
76
+ sourceIdsRootTag?: string;
77
+ sourcePeopleRootTag?: string;
78
+ peopleAuthors: string[];
79
+ }
80
+
81
+ export function parseCommentsFromOoxml(
82
+ documentXml: string,
83
+ input: string | ParsedCommentsInput,
84
+ ): ParsedCommentsResult {
85
+ const parts = normalizeInput(input);
86
+ const commentExtensions = parseCommentExtensions(parts.commentsExtendedXml);
87
+ const durableIds = parseCommentDurableIds(parts.commentsIdsXml);
88
+ const peopleAuthors = parsePeopleAuthors(parts.peopleXml);
89
+ const definitions = parseCommentDefinitions(parts.commentsXml, commentExtensions, durableIds);
90
+ const definitionsById = new Map(definitions.map((definition) => [definition.commentId, definition]));
91
+ const definitionsByParaId = new Map(
92
+ definitions
93
+ .filter((definition) => typeof definition.paraId === "string")
94
+ .map((definition) => [definition.paraId!, definition]),
95
+ );
96
+ const anchors = parseCommentAnchors(documentXml);
97
+ const diagnostics: CommentImportDiagnostic[] = [];
98
+ const threadDefinitions = groupThreadDefinitions(definitions, definitionsByParaId);
99
+ const threads: CommentThread[] = [];
100
+
101
+ for (const [rootCommentId, groupedDefinitions] of threadDefinitions) {
102
+ const rootDefinition = groupedDefinitions[0];
103
+ if (!rootDefinition) {
104
+ continue;
105
+ }
106
+
107
+ const anchor = anchors.get(rootCommentId);
108
+ const start = anchor?.start ?? anchor?.referenceAt;
109
+ const end = anchor?.end ?? anchor?.referenceAt;
110
+ const startParagraphIndex = anchor?.startParagraphIndex ?? anchor?.referenceParagraphIndex;
111
+ const endParagraphIndex = anchor?.endParagraphIndex ?? anchor?.referenceParagraphIndex;
112
+ const entries = groupedDefinitions.map((definition, index) => ({
113
+ entryId: `${rootCommentId}-entry-${index + 1}`,
114
+ authorId: definition.authorId ?? "unknown",
115
+ body: definition.body,
116
+ createdAt: definition.createdAt ?? "1970-01-01T00:00:00.000Z",
117
+ metadata: {
118
+ ooxmlCommentId: definition.commentId,
119
+ paraId: definition.paraId,
120
+ parentParaId: definition.parentParaId,
121
+ durableId: definition.durableId,
122
+ initials: definition.initials,
123
+ },
124
+ }));
125
+ const createdBy = rootDefinition.authorId ?? entries[0]?.authorId ?? "unknown";
126
+ const createdAt = rootDefinition.createdAt ?? entries[0]?.createdAt ?? "1970-01-01T00:00:00.000Z";
127
+ const resolution =
128
+ rootDefinition.isDone
129
+ ? {
130
+ resolvedAt: entries.at(-1)?.createdAt ?? createdAt,
131
+ resolvedBy: entries.at(-1)?.authorId ?? createdBy,
132
+ }
133
+ : undefined;
134
+ const detachedRange = toDetachedRange(anchor);
135
+
136
+ if (
137
+ start === undefined ||
138
+ end === undefined ||
139
+ startParagraphIndex === undefined ||
140
+ endParagraphIndex === undefined
141
+ ) {
142
+ diagnostics.push({
143
+ commentId: rootCommentId,
144
+ code: "missing_anchor_reference",
145
+ message: "Comment anchor markers are incomplete and remain preserve-only.",
146
+ featureClass: "preserve-only",
147
+ });
148
+ threads.push(
149
+ createImportedCommentThread({
150
+ commentId: rootCommentId,
151
+ body: rootDefinition.body,
152
+ createdBy,
153
+ createdAt,
154
+ range: detachedRange,
155
+ entries,
156
+ status: "detached",
157
+ resolution,
158
+ metadata: {
159
+ source: "import",
160
+ rootOoxmlCommentId: rootDefinition.commentId,
161
+ rootParaId: rootDefinition.paraId,
162
+ },
163
+ }),
164
+ );
165
+ continue;
166
+ }
167
+
168
+ if (startParagraphIndex !== endParagraphIndex) {
169
+ diagnostics.push({
170
+ commentId: rootCommentId,
171
+ code: "multi_paragraph_anchor_preserve_only",
172
+ message:
173
+ "Comment anchor spans multiple paragraphs and remains preserve-only for Wave 5.",
174
+ featureClass: "preserve-only",
175
+ });
176
+ threads.push(
177
+ createImportedCommentThread({
178
+ commentId: rootCommentId,
179
+ body: rootDefinition.body,
180
+ createdBy,
181
+ createdAt,
182
+ range: detachedRange,
183
+ entries,
184
+ status: "detached",
185
+ resolution,
186
+ metadata: {
187
+ source: "import",
188
+ rootOoxmlCommentId: rootDefinition.commentId,
189
+ rootParaId: rootDefinition.paraId,
190
+ },
191
+ }),
192
+ );
193
+ continue;
194
+ }
195
+
196
+ threads.push(
197
+ createImportedCommentThread({
198
+ commentId: rootCommentId,
199
+ body: rootDefinition.body,
200
+ createdBy,
201
+ createdAt,
202
+ range: {
203
+ from: Math.min(start, end),
204
+ to: Math.max(start, end),
205
+ },
206
+ entries,
207
+ status: resolution ? "resolved" : "open",
208
+ resolution,
209
+ metadata: {
210
+ source: "import",
211
+ rootOoxmlCommentId: rootDefinition.commentId,
212
+ rootParaId: rootDefinition.paraId,
213
+ },
214
+ }),
215
+ );
216
+ }
217
+
218
+ for (const commentId of anchors.keys()) {
219
+ if (definitionsById.has(commentId)) {
220
+ continue;
221
+ }
222
+
223
+ diagnostics.push({
224
+ commentId,
225
+ code: "missing_comment_definition",
226
+ message: "Document anchor markers reference a comment id missing from comments.xml.",
227
+ featureClass: "preserve-only",
228
+ });
229
+ }
230
+
231
+ threads.sort(compareThreadsByAnchor);
232
+
233
+ return {
234
+ threads,
235
+ diagnostics,
236
+ definitions,
237
+ sourceRootTag: extractRootTag(parts.commentsXml, "comments"),
238
+ sourceExtendedRootTag: extractRootTag(parts.commentsExtendedXml, "commentsEx"),
239
+ sourceIdsRootTag: extractRootTag(parts.commentsIdsXml, "commentsIds"),
240
+ sourcePeopleRootTag: extractRootTag(parts.peopleXml, "people"),
241
+ peopleAuthors,
242
+ };
243
+ }
244
+
245
+ function normalizeInput(input: string | ParsedCommentsInput): ParsedCommentsInput {
246
+ return typeof input === "string"
247
+ ? {
248
+ commentsXml: input,
249
+ }
250
+ : input;
251
+ }
252
+
253
+ function groupThreadDefinitions(
254
+ definitions: ImportedCommentDefinition[],
255
+ definitionsByParaId: Map<string, ImportedCommentDefinition>,
256
+ ): Map<string, ImportedCommentDefinition[]> {
257
+ const grouped = new Map<string, ImportedCommentDefinition[]>();
258
+
259
+ for (const definition of definitions) {
260
+ const rootCommentId = resolveRootCommentId(definition, definitionsByParaId);
261
+ const group = grouped.get(rootCommentId);
262
+ if (group) {
263
+ group.push(definition);
264
+ } else {
265
+ grouped.set(rootCommentId, [definition]);
266
+ }
267
+ }
268
+
269
+ for (const group of grouped.values()) {
270
+ group.sort((left, right) => left.order - right.order);
271
+ const rootIndex = group.findIndex((definition) => !definition.parentParaId);
272
+ if (rootIndex > 0) {
273
+ const [rootDefinition] = group.splice(rootIndex, 1);
274
+ if (rootDefinition) {
275
+ group.unshift(rootDefinition);
276
+ }
277
+ }
278
+ }
279
+
280
+ return new Map(
281
+ [...grouped.entries()].sort(([leftId], [rightId]) => leftId.localeCompare(rightId)),
282
+ );
283
+ }
284
+
285
+ function resolveRootCommentId(
286
+ definition: ImportedCommentDefinition,
287
+ definitionsByParaId: Map<string, ImportedCommentDefinition>,
288
+ ): string {
289
+ const visited = new Set<string>();
290
+ let current: ImportedCommentDefinition | undefined = definition;
291
+
292
+ while (current?.parentParaId) {
293
+ if (visited.has(current.parentParaId)) {
294
+ break;
295
+ }
296
+ visited.add(current.parentParaId);
297
+ const parent = definitionsByParaId.get(current.parentParaId);
298
+ if (!parent) {
299
+ break;
300
+ }
301
+ current = parent;
302
+ }
303
+
304
+ return current?.commentId ?? definition.commentId;
305
+ }
306
+
307
+ function parseCommentDefinitions(
308
+ commentsXml: string,
309
+ extensions: Map<string, CommentExtensionRecord>,
310
+ durableIds: Map<string, string>,
311
+ ): ImportedCommentDefinition[] {
312
+ if (commentsXml.trim().length === 0) {
313
+ return [];
314
+ }
315
+
316
+ const root = parseXml(commentsXml);
317
+ const commentsElement = findChildElement(root, "comments");
318
+ const definitions: ImportedCommentDefinition[] = [];
319
+ let order = 0;
320
+
321
+ for (const child of commentsElement.children) {
322
+ if (child.type !== "element" || localName(child.name) !== "comment") {
323
+ continue;
324
+ }
325
+
326
+ const commentId = child.attributes["w:id"] ?? child.attributes.id;
327
+ if (!commentId) {
328
+ continue;
329
+ }
330
+
331
+ const authorId = child.attributes["w:author"] ?? child.attributes.author;
332
+ const createdAt = normalizeImportedTimestamp(
333
+ child.attributes["w:date"] ?? child.attributes.date,
334
+ );
335
+ const initials = child.attributes["w:initials"] ?? child.attributes.initials;
336
+ const paragraphNodes = child.children.filter(
337
+ (node): node is XmlElementNode => node.type === "element" && localName(node.name) === "p",
338
+ );
339
+ const body = paragraphNodes
340
+ .map(extractParagraphText)
341
+ .join("\n");
342
+ const paraId = paragraphNodes[0]?.attributes["w14:paraId"] ?? paragraphNodes[0]?.attributes.paraId;
343
+ const extension = paraId ? extensions.get(paraId) : undefined;
344
+
345
+ definitions.push({
346
+ commentId,
347
+ authorId,
348
+ createdAt,
349
+ body,
350
+ rawXml: commentsXml.slice(child.start, child.end),
351
+ order,
352
+ initials,
353
+ paraId,
354
+ parentParaId: extension?.parentParaId,
355
+ durableId: paraId ? durableIds.get(paraId) : undefined,
356
+ isDone: extension?.isDone,
357
+ });
358
+ order += 1;
359
+ }
360
+
361
+ return definitions;
362
+ }
363
+
364
+ function normalizeImportedTimestamp(value: string | undefined): string | undefined {
365
+ if (!value) {
366
+ return undefined;
367
+ }
368
+
369
+ const parsed = new Date(value);
370
+ if (Number.isNaN(parsed.valueOf())) {
371
+ return undefined;
372
+ }
373
+
374
+ return parsed.toISOString();
375
+ }
376
+
377
+ function parseCommentExtensions(xml: string | undefined): Map<string, CommentExtensionRecord> {
378
+ if (!xml || xml.trim().length === 0) {
379
+ return new Map();
380
+ }
381
+
382
+ const root = parseXml(xml);
383
+ const commentsElement = findChildElement(root, "commentsEx");
384
+ const extensions = new Map<string, CommentExtensionRecord>();
385
+
386
+ for (const child of commentsElement.children) {
387
+ if (child.type !== "element" || localName(child.name) !== "commentEx") {
388
+ continue;
389
+ }
390
+
391
+ const paraId = child.attributes["w15:paraId"] ?? child.attributes.paraId;
392
+ if (!paraId) {
393
+ continue;
394
+ }
395
+
396
+ const parentParaId = child.attributes["w15:paraIdParent"] ?? child.attributes.paraIdParent;
397
+ const doneValue = child.attributes["w15:done"] ?? child.attributes.done;
398
+ extensions.set(paraId, {
399
+ paraId,
400
+ parentParaId,
401
+ isDone:
402
+ typeof doneValue === "string"
403
+ ? doneValue.toLowerCase() === "true" || doneValue === "1"
404
+ : undefined,
405
+ });
406
+ }
407
+
408
+ return extensions;
409
+ }
410
+
411
+ function parseCommentDurableIds(xml: string | undefined): Map<string, string> {
412
+ if (!xml || xml.trim().length === 0) {
413
+ return new Map();
414
+ }
415
+
416
+ const root = parseXml(xml);
417
+ const commentsIdsElement = findChildElement(root, "commentsIds");
418
+ const durableIds = new Map<string, string>();
419
+
420
+ for (const child of commentsIdsElement.children) {
421
+ if (child.type !== "element" || localName(child.name) !== "commentId") {
422
+ continue;
423
+ }
424
+
425
+ const paraId = child.attributes["w16cid:paraId"] ?? child.attributes.paraId;
426
+ const durableId = child.attributes["w16cid:durableId"] ?? child.attributes.durableId;
427
+ if (paraId && durableId) {
428
+ durableIds.set(paraId, durableId);
429
+ }
430
+ }
431
+
432
+ return durableIds;
433
+ }
434
+
435
+ function parsePeopleAuthors(xml: string | undefined): string[] {
436
+ if (!xml || xml.trim().length === 0) {
437
+ return [];
438
+ }
439
+
440
+ const root = parseXml(xml);
441
+ const peopleElement = findChildElement(root, "people");
442
+ const authors = new Set<string>();
443
+
444
+ for (const child of peopleElement.children) {
445
+ if (child.type !== "element" || localName(child.name) !== "person") {
446
+ continue;
447
+ }
448
+
449
+ const author = child.attributes["w15:author"] ?? child.attributes.author;
450
+ if (author) {
451
+ authors.add(author);
452
+ }
453
+ }
454
+
455
+ return [...authors].sort((left, right) => left.localeCompare(right));
456
+ }
457
+
458
+ function extractRootTag(xml: string | undefined, localTagName: string): string | undefined {
459
+ if (!xml) {
460
+ return undefined;
461
+ }
462
+
463
+ const pattern = new RegExp(`<[^>]*:?${localTagName}\\b[^>]*>`, "u");
464
+ return pattern.exec(xml)?.[0];
465
+ }
466
+
467
+ function parseCommentAnchors(documentXml: string): Map<string, CommentAnchorBounds> {
468
+ const root = parseXml(documentXml);
469
+ const documentElement = findChildElement(root, "document");
470
+ const bodyElement = findChildElement(documentElement, "body");
471
+ const anchors = new Map<string, CommentAnchorBounds>();
472
+ let cursor = 0;
473
+ let paragraphIndex = -1;
474
+ let previousWasParagraph = false;
475
+
476
+ for (const child of bodyElement.children) {
477
+ if (child.type !== "element") {
478
+ continue;
479
+ }
480
+
481
+ if (localName(child.name) !== "p") {
482
+ cursor += 1;
483
+ previousWasParagraph = false;
484
+ continue;
485
+ }
486
+
487
+ if (previousWasParagraph) {
488
+ cursor += 1;
489
+ }
490
+ paragraphIndex += 1;
491
+ walkParagraph(child, paragraphIndex, anchors, () => cursor, (next) => {
492
+ cursor = next;
493
+ });
494
+ previousWasParagraph = true;
495
+ }
496
+
497
+ return anchors;
498
+ }
499
+
500
+ function walkParagraph(
501
+ paragraph: XmlElementNode,
502
+ paragraphIndex: number,
503
+ anchors: Map<string, CommentAnchorBounds>,
504
+ getCursor: () => number,
505
+ setCursor: (next: number) => void,
506
+ ): void {
507
+ for (const child of paragraph.children) {
508
+ walkInlineNode(child, paragraphIndex, anchors, getCursor, setCursor);
509
+ }
510
+ }
511
+
512
+ function walkInlineNode(
513
+ node: XmlNode,
514
+ paragraphIndex: number,
515
+ anchors: Map<string, CommentAnchorBounds>,
516
+ getCursor: () => number,
517
+ setCursor: (next: number) => void,
518
+ ): void {
519
+ if (node.type !== "element") {
520
+ return;
521
+ }
522
+
523
+ switch (localName(node.name)) {
524
+ case "commentRangeStart": {
525
+ const commentId = node.attributes["w:id"] ?? node.attributes.id;
526
+ if (commentId) {
527
+ const bounds = ensureCommentAnchor(anchors, commentId);
528
+ bounds.start = getCursor();
529
+ bounds.startParagraphIndex = paragraphIndex;
530
+ }
531
+ return;
532
+ }
533
+ case "commentRangeEnd": {
534
+ const commentId = node.attributes["w:id"] ?? node.attributes.id;
535
+ if (commentId) {
536
+ const bounds = ensureCommentAnchor(anchors, commentId);
537
+ bounds.end = getCursor();
538
+ bounds.endParagraphIndex = paragraphIndex;
539
+ }
540
+ return;
541
+ }
542
+ case "commentReference": {
543
+ const commentId = node.attributes["w:id"] ?? node.attributes.id;
544
+ if (commentId) {
545
+ const bounds = ensureCommentAnchor(anchors, commentId);
546
+ bounds.referenceAt = getCursor();
547
+ bounds.referenceParagraphIndex = paragraphIndex;
548
+ }
549
+ return;
550
+ }
551
+ case "t": {
552
+ const text = node.children
553
+ .filter((child): child is XmlTextNode => child.type === "text")
554
+ .map((child) => child.text)
555
+ .join("");
556
+ setCursor(getCursor() + text.length);
557
+ return;
558
+ }
559
+ case "tab":
560
+ case "br":
561
+ setCursor(getCursor() + 1);
562
+ return;
563
+ default:
564
+ for (const child of node.children) {
565
+ walkInlineNode(child, paragraphIndex, anchors, getCursor, setCursor);
566
+ }
567
+ }
568
+ }
569
+
570
+ function ensureCommentAnchor(
571
+ anchors: Map<string, CommentAnchorBounds>,
572
+ commentId: string,
573
+ ): CommentAnchorBounds {
574
+ const existing = anchors.get(commentId);
575
+ if (existing) {
576
+ return existing;
577
+ }
578
+
579
+ const next: CommentAnchorBounds = {};
580
+ anchors.set(commentId, next);
581
+ return next;
582
+ }
583
+
584
+ function extractParagraphText(paragraph: XmlElementNode): string {
585
+ let text = "";
586
+
587
+ for (const child of paragraph.children) {
588
+ text += extractNodeText(child);
589
+ }
590
+
591
+ return text;
592
+ }
593
+
594
+ function extractNodeText(node: XmlNode): string {
595
+ if (node.type === "text") {
596
+ return node.text;
597
+ }
598
+
599
+ switch (localName(node.name)) {
600
+ case "t":
601
+ return node.children.map(extractNodeText).join("");
602
+ case "tab":
603
+ return "\t";
604
+ case "br":
605
+ return "\n";
606
+ default:
607
+ return node.children.map(extractNodeText).join("");
608
+ }
609
+ }
610
+
611
+ function compareThreadsByAnchor(left: CommentThread, right: CommentThread): number {
612
+ const leftStart =
613
+ left.anchor.kind === "range" ? left.anchor.range.from : Number.MAX_SAFE_INTEGER;
614
+ const rightStart =
615
+ right.anchor.kind === "range" ? right.anchor.range.from : Number.MAX_SAFE_INTEGER;
616
+
617
+ if (leftStart !== rightStart) {
618
+ return leftStart - rightStart;
619
+ }
620
+
621
+ return left.commentId.localeCompare(right.commentId);
622
+ }
623
+
624
+ function toDetachedRange(anchor: CommentAnchorBounds): { from: number; to: number } | undefined {
625
+ const positions = [anchor.start, anchor.end, anchor.referenceAt].filter(
626
+ (value): value is number => typeof value === "number",
627
+ );
628
+ if (positions.length === 0) {
629
+ return undefined;
630
+ }
631
+ return {
632
+ from: Math.min(...positions),
633
+ to: Math.max(...positions),
634
+ };
635
+ }
636
+
637
+ function parseXml(xml: string): XmlElementNode {
638
+ const root: XmlElementNode = {
639
+ type: "element",
640
+ name: "#document",
641
+ attributes: {},
642
+ children: [],
643
+ start: 0,
644
+ end: xml.length,
645
+ };
646
+ const stack: XmlElementNode[] = [root];
647
+ const tokenPattern =
648
+ /<!--[\s\S]*?-->|<\?[\s\S]*?\?>|<!DOCTYPE[\s\S]*?>|<!\[CDATA\[[\s\S]*?\]\]>|<[^>]+>|[^<]+/gu;
649
+
650
+ for (const match of xml.matchAll(tokenPattern)) {
651
+ const token = match[0] ?? "";
652
+ const start = match.index ?? 0;
653
+ const end = start + token.length;
654
+
655
+ if (token.startsWith("<?") || token.startsWith("<!DOCTYPE") || token.startsWith("<!--")) {
656
+ continue;
657
+ }
658
+
659
+ if (token.startsWith("<![CDATA[")) {
660
+ const text = token.slice(9, -3);
661
+ stack[stack.length - 1]?.children.push({
662
+ type: "text",
663
+ text,
664
+ start,
665
+ end,
666
+ });
667
+ continue;
668
+ }
669
+
670
+ if (token.startsWith("</")) {
671
+ const node = stack.pop();
672
+ if (!node) {
673
+ throw new Error("Malformed XML: unexpected closing tag.");
674
+ }
675
+ node.end = end;
676
+ continue;
677
+ }
678
+
679
+ if (token.startsWith("<")) {
680
+ const selfClosing = /\/>$/.test(token);
681
+ const tagBody = token.slice(1, token.length - (selfClosing ? 2 : 1)).trim();
682
+ const { name, attributes } = parseTag(tagBody);
683
+ const node: XmlElementNode = {
684
+ type: "element",
685
+ name,
686
+ attributes,
687
+ children: [],
688
+ start,
689
+ end,
690
+ };
691
+ stack[stack.length - 1]?.children.push(node);
692
+ if (!selfClosing) {
693
+ stack.push(node);
694
+ }
695
+ continue;
696
+ }
697
+
698
+ const text = decodeXmlText(token);
699
+ if (text.length > 0) {
700
+ stack[stack.length - 1]?.children.push({
701
+ type: "text",
702
+ text,
703
+ start,
704
+ end,
705
+ });
706
+ }
707
+ }
708
+
709
+ if (stack.length !== 1) {
710
+ throw new Error("Malformed XML: unclosed tag.");
711
+ }
712
+
713
+ return root;
714
+ }
715
+
716
+ function parseTag(tagBody: string): { name: string; attributes: Record<string, string> } {
717
+ const whitespaceIndex = tagBody.search(/\s/u);
718
+ const name = whitespaceIndex === -1 ? tagBody : tagBody.slice(0, whitespaceIndex);
719
+ const rawAttributes = whitespaceIndex === -1 ? "" : tagBody.slice(whitespaceIndex + 1);
720
+ const attributes: Record<string, string> = {};
721
+ const pattern = /([A-Za-z_][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
722
+
723
+ for (const match of rawAttributes.matchAll(pattern)) {
724
+ const key = match[1];
725
+ const value = match[3] ?? match[4] ?? "";
726
+ if (key) {
727
+ attributes[key] = decodeXmlText(value);
728
+ }
729
+ }
730
+
731
+ return { name, attributes };
732
+ }
733
+
734
+ function findChildElement(node: XmlElementNode, name: string): XmlElementNode {
735
+ const match = node.children.find(
736
+ (child): child is XmlElementNode =>
737
+ child.type === "element" && localName(child.name) === name,
738
+ );
739
+
740
+ if (!match) {
741
+ throw new Error(`Expected XML element ${name}.`);
742
+ }
743
+
744
+ return match;
745
+ }
746
+
747
+ function localName(name: string): string {
748
+ const index = name.indexOf(":");
749
+ return index === -1 ? name : name.slice(index + 1);
750
+ }
751
+
752
+ function decodeXmlText(text: string): string {
753
+ return text.replace(
754
+ /&(?:#x([0-9A-Fa-f]+)|#([0-9]+)|([A-Za-z]+));/gu,
755
+ (_, hex, dec, named) => {
756
+ if (hex) {
757
+ return String.fromCodePoint(Number.parseInt(hex, 16));
758
+ }
759
+ if (dec) {
760
+ return String.fromCodePoint(Number.parseInt(dec, 10));
761
+ }
762
+
763
+ switch (named) {
764
+ case "amp":
765
+ return "&";
766
+ case "lt":
767
+ return "<";
768
+ case "gt":
769
+ return ">";
770
+ case "quot":
771
+ return "\"";
772
+ case "apos":
773
+ return "'";
774
+ default:
775
+ return `&${named};`;
776
+ }
777
+ },
778
+ );
779
+ }