@beyondwork/docx-react-component 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/README.md +44 -104
  2. package/package.json +50 -30
  3. package/src/README.md +85 -0
  4. package/src/api/README.md +22 -0
  5. package/src/api/public-types.ts +525 -0
  6. package/src/compare/diff-engine.ts +530 -0
  7. package/src/compare/export-redlines.ts +162 -0
  8. package/src/compare/snapshot.ts +37 -0
  9. package/src/component-inventory.md +99 -0
  10. package/src/core/README.md +10 -0
  11. package/src/core/commands/README.md +3 -0
  12. package/src/core/commands/formatting-commands.ts +161 -0
  13. package/src/core/commands/image-commands.ts +144 -0
  14. package/src/core/commands/index.ts +1013 -0
  15. package/src/core/commands/list-commands.ts +370 -0
  16. package/src/core/commands/review-commands.ts +108 -0
  17. package/src/core/commands/text-commands.ts +119 -0
  18. package/src/core/schema/README.md +3 -0
  19. package/src/core/schema/text-schema.ts +512 -0
  20. package/src/core/selection/README.md +3 -0
  21. package/src/core/selection/mapping.ts +238 -0
  22. package/src/core/selection/review-anchors.ts +94 -0
  23. package/src/core/state/README.md +3 -0
  24. package/src/core/state/editor-state.ts +580 -0
  25. package/src/core/state/text-transaction.ts +276 -0
  26. package/src/formats/xlsx/io/parse-shared-strings.ts +41 -0
  27. package/src/formats/xlsx/io/parse-sheet.ts +289 -0
  28. package/src/formats/xlsx/io/parse-styles.ts +57 -0
  29. package/src/formats/xlsx/io/parse-workbook.ts +75 -0
  30. package/src/formats/xlsx/io/xlsx-session.ts +306 -0
  31. package/src/formats/xlsx/model/cell.ts +189 -0
  32. package/src/formats/xlsx/model/sheet.ts +244 -0
  33. package/src/formats/xlsx/model/styles.ts +118 -0
  34. package/src/formats/xlsx/model/workbook.ts +449 -0
  35. package/src/index.ts +45 -0
  36. package/src/io/README.md +10 -0
  37. package/src/io/docx-session.ts +1763 -0
  38. package/src/io/export/README.md +3 -0
  39. package/src/io/export/export-session.ts +165 -0
  40. package/src/io/export/minimal-docx.ts +115 -0
  41. package/src/io/export/reattach-preserved-parts.ts +54 -0
  42. package/src/io/export/serialize-comments.ts +876 -0
  43. package/src/io/export/serialize-footnotes.ts +217 -0
  44. package/src/io/export/serialize-headers-footers.ts +200 -0
  45. package/src/io/export/serialize-main-document.ts +982 -0
  46. package/src/io/export/serialize-numbering.ts +97 -0
  47. package/src/io/export/serialize-revisions.ts +389 -0
  48. package/src/io/export/serialize-runtime-revisions.ts +265 -0
  49. package/src/io/export/serialize-tables.ts +147 -0
  50. package/src/io/export/split-review-boundaries.ts +194 -0
  51. package/src/io/normalize/README.md +3 -0
  52. package/src/io/normalize/normalize-text.ts +437 -0
  53. package/src/io/ooxml/README.md +3 -0
  54. package/src/io/ooxml/parse-comments.ts +779 -0
  55. package/src/io/ooxml/parse-complex-content.ts +287 -0
  56. package/src/io/ooxml/parse-fields.ts +438 -0
  57. package/src/io/ooxml/parse-footnotes.ts +403 -0
  58. package/src/io/ooxml/parse-headers-footers.ts +483 -0
  59. package/src/io/ooxml/parse-inline-media.ts +431 -0
  60. package/src/io/ooxml/parse-main-document.ts +1846 -0
  61. package/src/io/ooxml/parse-numbering.ts +425 -0
  62. package/src/io/ooxml/parse-revisions.ts +658 -0
  63. package/src/io/ooxml/parse-shapes.ts +271 -0
  64. package/src/io/ooxml/parse-tables.ts +568 -0
  65. package/src/io/ooxml/parse-theme.ts +314 -0
  66. package/src/io/ooxml/part-manifest.ts +136 -0
  67. package/src/io/ooxml/revision-boundaries.ts +351 -0
  68. package/src/io/opc/README.md +3 -0
  69. package/src/io/opc/corrupt-package.ts +166 -0
  70. package/src/io/opc/docx-package.ts +74 -0
  71. package/src/io/opc/package-reader.ts +325 -0
  72. package/src/io/opc/package-writer.ts +273 -0
  73. package/src/legal/bookmarks.ts +196 -0
  74. package/src/legal/cross-references.ts +356 -0
  75. package/src/legal/defined-terms.ts +203 -0
  76. package/src/model/README.md +3 -0
  77. package/src/model/canonical-document.ts +1911 -0
  78. package/src/model/cds-1.0.0.ts +196 -0
  79. package/src/model/snapshot.ts +393 -0
  80. package/src/preservation/README.md +3 -0
  81. package/src/preservation/markup-compatibility.ts +48 -0
  82. package/src/preservation/opaque-fragment-store.ts +89 -0
  83. package/src/preservation/opaque-region.ts +233 -0
  84. package/src/preservation/package-preservation.ts +120 -0
  85. package/src/preservation/preserved-part-manifest.ts +56 -0
  86. package/src/preservation/relationship-retention.ts +57 -0
  87. package/src/preservation/store.ts +185 -0
  88. package/src/review/README.md +16 -0
  89. package/src/review/store/README.md +3 -0
  90. package/src/review/store/comment-anchors.ts +70 -0
  91. package/src/review/store/comment-remapping.ts +154 -0
  92. package/src/review/store/comment-store.ts +331 -0
  93. package/src/review/store/comment-thread.ts +109 -0
  94. package/src/review/store/revision-actions.ts +394 -0
  95. package/src/review/store/revision-store.ts +303 -0
  96. package/src/review/store/revision-types.ts +168 -0
  97. package/src/review/store/runtime-comment-store.ts +43 -0
  98. package/src/runtime/README.md +3 -0
  99. package/src/runtime/ai-action-policy.ts +764 -0
  100. package/src/runtime/document-runtime.ts +967 -0
  101. package/src/runtime/read-only-diagnostics-runtime.ts +232 -0
  102. package/src/runtime/review-runtime.ts +44 -0
  103. package/src/runtime/revision-runtime.ts +107 -0
  104. package/src/runtime/session-capabilities.ts +138 -0
  105. package/src/runtime/surface-projection.ts +570 -0
  106. package/src/runtime/table-commands.ts +87 -0
  107. package/src/runtime/table-schema.ts +140 -0
  108. package/src/runtime/virtualized-rendering.ts +258 -0
  109. package/src/ui/README.md +30 -0
  110. package/src/ui/WordReviewEditor.tsx +1506 -0
  111. package/src/ui/comments/README.md +3 -0
  112. package/src/ui/compatibility/README.md +3 -0
  113. package/src/ui/editor-surface/README.md +3 -0
  114. package/src/ui/headless/comment-decoration-model.ts +124 -0
  115. package/src/ui/headless/revision-decoration-model.ts +128 -0
  116. package/src/ui/headless/selection-helpers.ts +34 -0
  117. package/src/ui/headless/use-editor-keyboard.ts +98 -0
  118. package/src/ui/review/README.md +3 -0
  119. package/src/ui/shared/revision-filters.ts +31 -0
  120. package/src/ui/status/README.md +3 -0
  121. package/src/ui/theme/README.md +3 -0
  122. package/src/ui/toolbar/README.md +3 -0
  123. package/src/ui-tailwind/chrome/tw-alert-banner.tsx +48 -0
  124. package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +44 -0
  125. package/src/ui-tailwind/chrome/tw-unsaved-modal.tsx +58 -0
  126. package/src/ui-tailwind/chrome/use-before-unload.ts +20 -0
  127. package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +139 -0
  128. package/src/ui-tailwind/editor-surface/pm-decorations.ts +98 -0
  129. package/src/ui-tailwind/editor-surface/pm-position-map.ts +123 -0
  130. package/src/ui-tailwind/editor-surface/pm-schema.ts +452 -0
  131. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +327 -0
  132. package/src/ui-tailwind/editor-surface/search-plugin.ts +157 -0
  133. package/src/ui-tailwind/editor-surface/tw-caret.tsx +12 -0
  134. package/src/ui-tailwind/editor-surface/tw-editor-surface.tsx +150 -0
  135. package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +118 -0
  136. package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +52 -0
  137. package/src/ui-tailwind/editor-surface/tw-paragraph-block.tsx +151 -0
  138. package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +215 -0
  139. package/src/ui-tailwind/editor-surface/tw-segment-view.tsx +111 -0
  140. package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +122 -0
  141. package/src/ui-tailwind/index.ts +61 -0
  142. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +276 -0
  143. package/src/ui-tailwind/review/tw-health-panel.tsx +120 -0
  144. package/src/ui-tailwind/review/tw-review-rail.tsx +120 -0
  145. package/src/ui-tailwind/review/tw-revision-sidebar.tsx +164 -0
  146. package/src/ui-tailwind/status/tw-status-bar.tsx +58 -0
  147. package/src/ui-tailwind/theme/editor-theme.css +190 -0
  148. package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +48 -0
  149. package/src/ui-tailwind/toolbar/tw-toolbar.tsx +231 -0
  150. package/src/ui-tailwind/tw-review-workspace.tsx +140 -0
  151. package/src/validation/README.md +3 -0
  152. package/src/validation/compatibility-engine.ts +317 -0
  153. package/src/validation/compatibility-report.ts +160 -0
  154. package/src/validation/diagnostics.ts +203 -0
  155. package/src/validation/import-diagnostics.ts +128 -0
  156. package/src/validation/low-priority-word-surfaces.ts +373 -0
  157. package/dist/chunk-32W6IVQE.js +0 -7725
  158. package/dist/chunk-32W6IVQE.js.map +0 -1
  159. package/dist/index.cjs +0 -23722
  160. package/dist/index.cjs.map +0 -1
  161. package/dist/index.d.cts +0 -7
  162. package/dist/index.d.ts +0 -7
  163. package/dist/index.js +0 -16011
  164. package/dist/index.js.map +0 -1
  165. package/dist/public-types-DqCURAz8.d.cts +0 -1152
  166. package/dist/public-types-DqCURAz8.d.ts +0 -1152
  167. package/dist/tailwind.cjs +0 -8295
  168. package/dist/tailwind.cjs.map +0 -1
  169. package/dist/tailwind.d.cts +0 -323
  170. package/dist/tailwind.d.ts +0 -323
  171. package/dist/tailwind.js +0 -553
  172. package/dist/tailwind.js.map +0 -1
@@ -0,0 +1,196 @@
1
+ import { extractBookmarksFromBodyXml } from "../io/ooxml/parse-fields.ts";
2
+ import type {
3
+ BookmarkEndNode,
4
+ BookmarkStartNode,
5
+ CanonicalDocument,
6
+ DocumentNode,
7
+ } from "../model/canonical-document.ts";
8
+
9
+ export interface LegalBookmark {
10
+ bookmarkId: string;
11
+ name?: string;
12
+ hidden: boolean;
13
+ source: "ooxml" | "canonical";
14
+ status: "paired" | "start-only" | "end-only";
15
+ startIndex?: number;
16
+ endIndex?: number;
17
+ }
18
+
19
+ interface OpenBookmark {
20
+ bookmarkId: string;
21
+ name?: string;
22
+ hidden: boolean;
23
+ startIndex: number;
24
+ }
25
+
26
+ export function parseBookmarksFromDocumentXml(xml: string): LegalBookmark[] {
27
+ const bookmarks = extractBookmarksFromBodyXml(xml);
28
+ const openBookmarks = new Map<string, OpenBookmark[]>();
29
+ const results: LegalBookmark[] = [];
30
+
31
+ for (let index = 0; index < bookmarks.length; index += 1) {
32
+ const bookmark = bookmarks[index];
33
+
34
+ if (bookmark.type === "bookmark_start") {
35
+ const entry: OpenBookmark = {
36
+ bookmarkId: bookmark.bookmarkId,
37
+ name: bookmark.name,
38
+ hidden: isHiddenBookmarkName(bookmark.name),
39
+ startIndex: index,
40
+ };
41
+ const stack = openBookmarks.get(bookmark.bookmarkId) ?? [];
42
+ stack.push(entry);
43
+ openBookmarks.set(bookmark.bookmarkId, stack);
44
+ continue;
45
+ }
46
+
47
+ const stack = openBookmarks.get(bookmark.bookmarkId);
48
+ const open = stack?.pop();
49
+ if (stack && stack.length === 0) {
50
+ openBookmarks.delete(bookmark.bookmarkId);
51
+ }
52
+
53
+ if (open) {
54
+ results.push({
55
+ bookmarkId: bookmark.bookmarkId,
56
+ name: open.name,
57
+ hidden: open.hidden,
58
+ source: "ooxml",
59
+ status: "paired",
60
+ startIndex: open.startIndex,
61
+ endIndex: index,
62
+ });
63
+ continue;
64
+ }
65
+
66
+ results.push({
67
+ bookmarkId: bookmark.bookmarkId,
68
+ hidden: false,
69
+ source: "ooxml",
70
+ status: "end-only",
71
+ endIndex: index,
72
+ });
73
+ }
74
+
75
+ for (const stack of openBookmarks.values()) {
76
+ for (const open of stack) {
77
+ results.push({
78
+ bookmarkId: open.bookmarkId,
79
+ name: open.name,
80
+ hidden: open.hidden,
81
+ source: "ooxml",
82
+ status: "start-only",
83
+ startIndex: open.startIndex,
84
+ });
85
+ }
86
+ }
87
+
88
+ return results.sort(compareBookmarks);
89
+ }
90
+
91
+ export function collectBookmarksFromCanonicalDocument(
92
+ document: Pick<CanonicalDocument, "content"> | DocumentNode,
93
+ ): LegalBookmark[] {
94
+ const root = "content" in document ? document.content : document;
95
+ const sequence: Array<BookmarkStartNode | BookmarkEndNode> = [];
96
+
97
+ walkDocument(root, (node) => {
98
+ if (node.type === "bookmark_start" || node.type === "bookmark_end") {
99
+ sequence.push(node);
100
+ }
101
+ });
102
+
103
+ const openBookmarks = new Map<string, OpenBookmark[]>();
104
+ const results: LegalBookmark[] = [];
105
+
106
+ for (let index = 0; index < sequence.length; index += 1) {
107
+ const bookmark = sequence[index];
108
+
109
+ if (bookmark.type === "bookmark_start") {
110
+ const entry: OpenBookmark = {
111
+ bookmarkId: bookmark.bookmarkId,
112
+ name: bookmark.name,
113
+ hidden: isHiddenBookmarkName(bookmark.name),
114
+ startIndex: index,
115
+ };
116
+ const stack = openBookmarks.get(bookmark.bookmarkId) ?? [];
117
+ stack.push(entry);
118
+ openBookmarks.set(bookmark.bookmarkId, stack);
119
+ continue;
120
+ }
121
+
122
+ const stack = openBookmarks.get(bookmark.bookmarkId);
123
+ const open = stack?.pop();
124
+ if (stack && stack.length === 0) {
125
+ openBookmarks.delete(bookmark.bookmarkId);
126
+ }
127
+
128
+ if (open) {
129
+ results.push({
130
+ bookmarkId: bookmark.bookmarkId,
131
+ name: open.name,
132
+ hidden: open.hidden,
133
+ source: "canonical",
134
+ status: "paired",
135
+ startIndex: open.startIndex,
136
+ endIndex: index,
137
+ });
138
+ continue;
139
+ }
140
+
141
+ results.push({
142
+ bookmarkId: bookmark.bookmarkId,
143
+ hidden: false,
144
+ source: "canonical",
145
+ status: "end-only",
146
+ endIndex: index,
147
+ });
148
+ }
149
+
150
+ for (const stack of openBookmarks.values()) {
151
+ for (const open of stack) {
152
+ results.push({
153
+ bookmarkId: open.bookmarkId,
154
+ name: open.name,
155
+ hidden: open.hidden,
156
+ source: "canonical",
157
+ status: "start-only",
158
+ startIndex: open.startIndex,
159
+ });
160
+ }
161
+ }
162
+
163
+ return results.sort(compareBookmarks);
164
+ }
165
+
166
+ export function isHiddenBookmarkName(name: string | undefined): boolean {
167
+ return Boolean(name && name.startsWith("_"));
168
+ }
169
+
170
+ function compareBookmarks(left: LegalBookmark, right: LegalBookmark): number {
171
+ return (
172
+ (left.startIndex ?? left.endIndex ?? Number.MAX_SAFE_INTEGER) -
173
+ (right.startIndex ?? right.endIndex ?? Number.MAX_SAFE_INTEGER) ||
174
+ left.bookmarkId.localeCompare(right.bookmarkId)
175
+ );
176
+ }
177
+
178
+ function walkDocument(node: DocumentNode, visit: (node: DocumentNode) => void): void {
179
+ visit(node);
180
+
181
+ if ("children" in node && Array.isArray(node.children)) {
182
+ for (const child of node.children) {
183
+ walkDocument(child, visit);
184
+ }
185
+ }
186
+
187
+ if (node.type === "table") {
188
+ for (const row of node.rows) {
189
+ walkDocument(row, visit);
190
+ }
191
+ } else if (node.type === "table_row") {
192
+ for (const cell of node.cells) {
193
+ walkDocument(cell, visit);
194
+ }
195
+ }
196
+ }
@@ -0,0 +1,356 @@
1
+ import {
2
+ extractComplexFieldsFromBodyXml,
3
+ parseFieldsFromParagraphXml,
4
+ } from "../io/ooxml/parse-fields.ts";
5
+ import { parseMainDocumentXml } from "../io/ooxml/parse-main-document.ts";
6
+ import type {
7
+ CanonicalDocument,
8
+ DocumentNode,
9
+ FieldNode,
10
+ HyperlinkNode,
11
+ ParagraphNode,
12
+ } from "../model/canonical-document.ts";
13
+
14
+ export interface CrossReferencePattern {
15
+ kind: "section" | "clause" | "article" | "schedule" | "exhibit" | "appendix";
16
+ label: string;
17
+ targetHint: string;
18
+ }
19
+
20
+ export interface CrossReference {
21
+ source: "hyperlink" | "field" | "w:ref" | "text-pattern";
22
+ kind: "bookmark" | "section" | "clause" | "article" | "schedule" | "exhibit" | "appendix";
23
+ target?: string;
24
+ instruction?: string;
25
+ label: string;
26
+ paragraphIndex?: number;
27
+ confidence: "high" | "medium";
28
+ }
29
+
30
+ interface FieldReference {
31
+ kind: "REF" | "PAGEREF" | "NOTEREF";
32
+ target: string;
33
+ instruction: string;
34
+ }
35
+
36
+ const CROSS_REFERENCE_PATTERN =
37
+ /\b(Section|Clause|Article|Schedule|Exhibit|Appendix)\s+(\d+(?:\.\d+)*|[A-Z]-\d+|[A-Z])(?=[^A-Za-z0-9]|$)/g;
38
+ const W_REF_PATTERN = /<w:ref\b([^>]*)\/?>/g;
39
+ const ATTRIBUTE_PATTERN = /(?:^|\s)(?:w:)?([A-Za-z][\w-]*)="([^"]*)"/g;
40
+ const COMPLEX_FIELD_PATTERN =
41
+ /<w:instrText\b[^>]*>([\s\S]*?)<\/w:instrText>[\s\S]*?<w:fldChar\b[^>]*w:fldCharType="separate"[^>]*\/>[\s\S]*?<w:t\b[^>]*>([\s\S]*?)<\/w:t>[\s\S]*?<w:fldChar\b[^>]*w:fldCharType="end"[^>]*\/>/g;
42
+
43
+ export function parseCrossReferencesFromDocumentXml(xml: string): CrossReference[] {
44
+ const parsed = parseMainDocumentXml(xml);
45
+ const results: CrossReference[] = [];
46
+
47
+ parsed.blocks.forEach((block, paragraphIndex) => {
48
+ if (block.type !== "paragraph") {
49
+ return;
50
+ }
51
+
52
+ for (const child of block.children) {
53
+ if (child.type === "hyperlink" && child.href.startsWith("#")) {
54
+ results.push({
55
+ source: "hyperlink",
56
+ kind: "bookmark",
57
+ target: child.href.slice(1),
58
+ label: flattenInlineText(child.children).trim() || child.href.slice(1),
59
+ paragraphIndex,
60
+ confidence: "high",
61
+ });
62
+ }
63
+ }
64
+
65
+ const { simpleFields } = parseFieldsFromParagraphXml(block.rawXml);
66
+ for (const field of simpleFields) {
67
+ const reference = parseFieldReferenceInstruction(field.instruction);
68
+ if (!reference) {
69
+ continue;
70
+ }
71
+
72
+ results.push({
73
+ source: "field",
74
+ kind: "bookmark",
75
+ target: reference.target,
76
+ instruction: reference.instruction,
77
+ label: stripXml(field.contentXml) || reference.target,
78
+ paragraphIndex,
79
+ confidence: "high",
80
+ });
81
+ }
82
+
83
+ for (const pattern of detectCrossReferencePatterns(flattenParagraphText(block))) {
84
+ results.push({
85
+ source: "text-pattern",
86
+ kind: pattern.kind,
87
+ target: pattern.targetHint,
88
+ label: pattern.label,
89
+ paragraphIndex,
90
+ confidence: "medium",
91
+ });
92
+ }
93
+ });
94
+
95
+ for (const field of extractComplexFieldsFromBodyXml(xml)) {
96
+ const reference = parseFieldReferenceInstruction(field.instruction);
97
+ if (!reference) {
98
+ continue;
99
+ }
100
+ results.push({
101
+ source: "field",
102
+ kind: "bookmark",
103
+ target: reference.target,
104
+ instruction: reference.instruction,
105
+ label: stripXml(field.contentXml) || reference.target,
106
+ confidence: "high",
107
+ });
108
+ }
109
+
110
+ for (const field of extractComplexFieldsWithRegex(xml)) {
111
+ const reference = parseFieldReferenceInstruction(field.instruction);
112
+ if (!reference) {
113
+ continue;
114
+ }
115
+
116
+ results.push({
117
+ source: "field",
118
+ kind: "bookmark",
119
+ target: reference.target,
120
+ instruction: reference.instruction,
121
+ label: field.label || reference.target,
122
+ confidence: "high",
123
+ });
124
+ }
125
+
126
+ for (const element of extractWRefElements(xml)) {
127
+ results.push({
128
+ source: "w:ref",
129
+ kind: "bookmark",
130
+ target: element.target,
131
+ label: element.label || element.target || "w:ref",
132
+ confidence: element.target ? "high" : "medium",
133
+ });
134
+ }
135
+
136
+ return dedupeCrossReferences(results);
137
+ }
138
+
139
+ export function collectCrossReferencesFromCanonicalDocument(
140
+ document: Pick<CanonicalDocument, "content"> | DocumentNode,
141
+ ): CrossReference[] {
142
+ const root = "content" in document ? document.content : document;
143
+ const results: CrossReference[] = [];
144
+ let paragraphIndex = -1;
145
+
146
+ walkDocument(root, (node) => {
147
+ if (node.type === "paragraph") {
148
+ paragraphIndex += 1;
149
+ for (const child of node.children) {
150
+ if (child.type === "hyperlink" && child.href.startsWith("#")) {
151
+ results.push({
152
+ source: "hyperlink",
153
+ kind: "bookmark",
154
+ target: child.href.slice(1),
155
+ label: flattenInlineText(child.children).trim() || child.href.slice(1),
156
+ paragraphIndex,
157
+ confidence: "high",
158
+ });
159
+ continue;
160
+ }
161
+
162
+ if (child.type === "field") {
163
+ const reference = parseFieldReferenceInstruction(child.instruction);
164
+ if (!reference) {
165
+ continue;
166
+ }
167
+ results.push({
168
+ source: "field",
169
+ kind: "bookmark",
170
+ target: reference.target,
171
+ instruction: reference.instruction,
172
+ label: flattenInlineText(child.children).trim() || reference.target,
173
+ paragraphIndex,
174
+ confidence: "high",
175
+ });
176
+ }
177
+ }
178
+
179
+ for (const pattern of detectCrossReferencePatterns(flattenParagraphText(node))) {
180
+ results.push({
181
+ source: "text-pattern",
182
+ kind: pattern.kind,
183
+ target: pattern.targetHint,
184
+ label: pattern.label,
185
+ paragraphIndex,
186
+ confidence: "medium",
187
+ });
188
+ }
189
+ }
190
+ });
191
+
192
+ return dedupeCrossReferences(results);
193
+ }
194
+
195
+ export function detectCrossReferencePatterns(text: string): CrossReferencePattern[] {
196
+ const matches: CrossReferencePattern[] = [];
197
+
198
+ for (const match of text.matchAll(CROSS_REFERENCE_PATTERN)) {
199
+ const referenceType = match[1]?.toLowerCase();
200
+ const target = match[2];
201
+ const label = `${match[1]} ${target}`.trim();
202
+
203
+ if (!referenceType || !target) {
204
+ continue;
205
+ }
206
+
207
+ matches.push({
208
+ kind: referenceType as CrossReferencePattern["kind"],
209
+ label,
210
+ targetHint: target,
211
+ });
212
+ }
213
+
214
+ return matches;
215
+ }
216
+
217
+ export function parseFieldReferenceInstruction(instruction: string): FieldReference | undefined {
218
+ const trimmedInstruction = instruction.trim().replace(/\s+/g, " ");
219
+ const match = /^(REF|PAGEREF|NOTEREF)\s+(?:"([^"]+)"|([^\s\\]+))/i.exec(trimmedInstruction);
220
+ if (!match) {
221
+ return undefined;
222
+ }
223
+
224
+ return {
225
+ kind: match[1].toUpperCase() as FieldReference["kind"],
226
+ target: (match[2] ?? match[3] ?? "").trim(),
227
+ instruction: trimmedInstruction,
228
+ };
229
+ }
230
+
231
+ function extractWRefElements(xml: string): Array<{ target?: string; label?: string }> {
232
+ const matches: Array<{ target?: string; label?: string }> = [];
233
+
234
+ for (const match of xml.matchAll(W_REF_PATTERN)) {
235
+ const attributes = readAttributes(match[1] ?? "");
236
+ matches.push({
237
+ target: attributes.anchor ?? attributes.name ?? attributes.id,
238
+ label: attributes.displayText ?? attributes.text,
239
+ });
240
+ }
241
+
242
+ return matches;
243
+ }
244
+
245
+ function extractComplexFieldsWithRegex(xml: string): Array<{ instruction: string; label: string }> {
246
+ const matches: Array<{ instruction: string; label: string }> = [];
247
+
248
+ for (const match of xml.matchAll(COMPLEX_FIELD_PATTERN)) {
249
+ const instruction = stripXml(match[1] ?? "");
250
+ const label = stripXml(match[2] ?? "");
251
+ if (!instruction) {
252
+ continue;
253
+ }
254
+ matches.push({ instruction, label });
255
+ }
256
+
257
+ return matches;
258
+ }
259
+
260
+ function readAttributes(source: string): Record<string, string> {
261
+ const attributes: Record<string, string> = {};
262
+
263
+ for (const match of source.matchAll(ATTRIBUTE_PATTERN)) {
264
+ const [, key, value] = match;
265
+ attributes[key] = value;
266
+ }
267
+
268
+ return attributes;
269
+ }
270
+
271
+ function stripXml(xml: string): string {
272
+ return xml.replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim();
273
+ }
274
+
275
+ function flattenParagraphText(paragraph: ParagraphNode): string {
276
+ return paragraph.children
277
+ .map((child) => {
278
+ switch (child.type) {
279
+ case "text":
280
+ return child.text;
281
+ case "hyperlink":
282
+ return flattenInlineText(child.children);
283
+ case "field":
284
+ return flattenInlineText(child.children);
285
+ case "tab":
286
+ return "\t";
287
+ case "hard_break":
288
+ case "column_break":
289
+ return "\n";
290
+ default:
291
+ return "";
292
+ }
293
+ })
294
+ .join("");
295
+ }
296
+
297
+ function flattenInlineText(children: HyperlinkNode["children"] | FieldNode["children"]): string {
298
+ return children
299
+ .map((child) => {
300
+ if (child.type === "text") {
301
+ return child.text;
302
+ }
303
+ if (child.type === "tab") {
304
+ return "\t";
305
+ }
306
+ if (child.type === "hard_break" || child.type === "column_break") {
307
+ return "\n";
308
+ }
309
+ return "";
310
+ })
311
+ .join("");
312
+ }
313
+
314
+ function dedupeCrossReferences(references: CrossReference[]): CrossReference[] {
315
+ const seen = new Set<string>();
316
+ const deduped: CrossReference[] = [];
317
+
318
+ for (const reference of references) {
319
+ const key = [
320
+ reference.source,
321
+ reference.kind,
322
+ reference.target ?? "",
323
+ reference.label,
324
+ String(reference.paragraphIndex ?? -1),
325
+ ].join("|");
326
+
327
+ if (seen.has(key)) {
328
+ continue;
329
+ }
330
+
331
+ seen.add(key);
332
+ deduped.push(reference);
333
+ }
334
+
335
+ return deduped;
336
+ }
337
+
338
+ function walkDocument(node: DocumentNode, visit: (node: DocumentNode) => void): void {
339
+ visit(node);
340
+
341
+ if ("children" in node && Array.isArray(node.children)) {
342
+ for (const child of node.children) {
343
+ walkDocument(child, visit);
344
+ }
345
+ }
346
+
347
+ if (node.type === "table") {
348
+ for (const row of node.rows) {
349
+ walkDocument(row, visit);
350
+ }
351
+ } else if (node.type === "table_row") {
352
+ for (const cell of node.cells) {
353
+ walkDocument(cell, visit);
354
+ }
355
+ }
356
+ }