@beyondwork/docx-react-component 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -104
- package/package.json +50 -30
- package/src/README.md +85 -0
- package/src/api/README.md +22 -0
- package/src/api/public-types.ts +525 -0
- package/src/compare/diff-engine.ts +530 -0
- package/src/compare/export-redlines.ts +162 -0
- package/src/compare/snapshot.ts +37 -0
- package/src/component-inventory.md +99 -0
- package/src/core/README.md +10 -0
- package/src/core/commands/README.md +3 -0
- package/src/core/commands/formatting-commands.ts +161 -0
- package/src/core/commands/image-commands.ts +144 -0
- package/src/core/commands/index.ts +1013 -0
- package/src/core/commands/list-commands.ts +370 -0
- package/src/core/commands/review-commands.ts +108 -0
- package/src/core/commands/text-commands.ts +119 -0
- package/src/core/schema/README.md +3 -0
- package/src/core/schema/text-schema.ts +512 -0
- package/src/core/selection/README.md +3 -0
- package/src/core/selection/mapping.ts +238 -0
- package/src/core/selection/review-anchors.ts +94 -0
- package/src/core/state/README.md +3 -0
- package/src/core/state/editor-state.ts +580 -0
- package/src/core/state/text-transaction.ts +276 -0
- package/src/formats/xlsx/io/parse-shared-strings.ts +41 -0
- package/src/formats/xlsx/io/parse-sheet.ts +289 -0
- package/src/formats/xlsx/io/parse-styles.ts +57 -0
- package/src/formats/xlsx/io/parse-workbook.ts +75 -0
- package/src/formats/xlsx/io/xlsx-session.ts +306 -0
- package/src/formats/xlsx/model/cell.ts +189 -0
- package/src/formats/xlsx/model/sheet.ts +244 -0
- package/src/formats/xlsx/model/styles.ts +118 -0
- package/src/formats/xlsx/model/workbook.ts +449 -0
- package/src/index.ts +45 -0
- package/src/io/README.md +10 -0
- package/src/io/docx-session.ts +1763 -0
- package/src/io/export/README.md +3 -0
- package/src/io/export/export-session.ts +165 -0
- package/src/io/export/minimal-docx.ts +115 -0
- package/src/io/export/reattach-preserved-parts.ts +54 -0
- package/src/io/export/serialize-comments.ts +876 -0
- package/src/io/export/serialize-footnotes.ts +217 -0
- package/src/io/export/serialize-headers-footers.ts +200 -0
- package/src/io/export/serialize-main-document.ts +982 -0
- package/src/io/export/serialize-numbering.ts +97 -0
- package/src/io/export/serialize-revisions.ts +389 -0
- package/src/io/export/serialize-runtime-revisions.ts +265 -0
- package/src/io/export/serialize-tables.ts +147 -0
- package/src/io/export/split-review-boundaries.ts +194 -0
- package/src/io/normalize/README.md +3 -0
- package/src/io/normalize/normalize-text.ts +437 -0
- package/src/io/ooxml/README.md +3 -0
- package/src/io/ooxml/parse-comments.ts +779 -0
- package/src/io/ooxml/parse-complex-content.ts +287 -0
- package/src/io/ooxml/parse-fields.ts +438 -0
- package/src/io/ooxml/parse-footnotes.ts +403 -0
- package/src/io/ooxml/parse-headers-footers.ts +483 -0
- package/src/io/ooxml/parse-inline-media.ts +431 -0
- package/src/io/ooxml/parse-main-document.ts +1846 -0
- package/src/io/ooxml/parse-numbering.ts +425 -0
- package/src/io/ooxml/parse-revisions.ts +658 -0
- package/src/io/ooxml/parse-shapes.ts +271 -0
- package/src/io/ooxml/parse-tables.ts +568 -0
- package/src/io/ooxml/parse-theme.ts +314 -0
- package/src/io/ooxml/part-manifest.ts +136 -0
- package/src/io/ooxml/revision-boundaries.ts +351 -0
- package/src/io/opc/README.md +3 -0
- package/src/io/opc/corrupt-package.ts +166 -0
- package/src/io/opc/docx-package.ts +74 -0
- package/src/io/opc/package-reader.ts +325 -0
- package/src/io/opc/package-writer.ts +273 -0
- package/src/legal/bookmarks.ts +196 -0
- package/src/legal/cross-references.ts +356 -0
- package/src/legal/defined-terms.ts +203 -0
- package/src/model/README.md +3 -0
- package/src/model/canonical-document.ts +1911 -0
- package/src/model/cds-1.0.0.ts +196 -0
- package/src/model/snapshot.ts +393 -0
- package/src/preservation/README.md +3 -0
- package/src/preservation/markup-compatibility.ts +48 -0
- package/src/preservation/opaque-fragment-store.ts +89 -0
- package/src/preservation/opaque-region.ts +233 -0
- package/src/preservation/package-preservation.ts +120 -0
- package/src/preservation/preserved-part-manifest.ts +56 -0
- package/src/preservation/relationship-retention.ts +57 -0
- package/src/preservation/store.ts +185 -0
- package/src/review/README.md +16 -0
- package/src/review/store/README.md +3 -0
- package/src/review/store/comment-anchors.ts +70 -0
- package/src/review/store/comment-remapping.ts +154 -0
- package/src/review/store/comment-store.ts +331 -0
- package/src/review/store/comment-thread.ts +109 -0
- package/src/review/store/revision-actions.ts +394 -0
- package/src/review/store/revision-store.ts +303 -0
- package/src/review/store/revision-types.ts +168 -0
- package/src/review/store/runtime-comment-store.ts +43 -0
- package/src/runtime/README.md +3 -0
- package/src/runtime/ai-action-policy.ts +764 -0
- package/src/runtime/document-runtime.ts +967 -0
- package/src/runtime/read-only-diagnostics-runtime.ts +232 -0
- package/src/runtime/review-runtime.ts +44 -0
- package/src/runtime/revision-runtime.ts +107 -0
- package/src/runtime/session-capabilities.ts +138 -0
- package/src/runtime/surface-projection.ts +570 -0
- package/src/runtime/table-commands.ts +87 -0
- package/src/runtime/table-schema.ts +140 -0
- package/src/runtime/virtualized-rendering.ts +258 -0
- package/src/ui/README.md +30 -0
- package/src/ui/WordReviewEditor.tsx +1506 -0
- package/src/ui/comments/README.md +3 -0
- package/src/ui/compatibility/README.md +3 -0
- package/src/ui/editor-surface/README.md +3 -0
- package/src/ui/headless/comment-decoration-model.ts +124 -0
- package/src/ui/headless/revision-decoration-model.ts +128 -0
- package/src/ui/headless/selection-helpers.ts +34 -0
- package/src/ui/headless/use-editor-keyboard.ts +98 -0
- package/src/ui/review/README.md +3 -0
- package/src/ui/shared/revision-filters.ts +31 -0
- package/src/ui/status/README.md +3 -0
- package/src/ui/theme/README.md +3 -0
- package/src/ui/toolbar/README.md +3 -0
- package/src/ui-tailwind/chrome/tw-alert-banner.tsx +48 -0
- package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +44 -0
- package/src/ui-tailwind/chrome/tw-unsaved-modal.tsx +58 -0
- package/src/ui-tailwind/chrome/use-before-unload.ts +20 -0
- package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +139 -0
- package/src/ui-tailwind/editor-surface/pm-decorations.ts +98 -0
- package/src/ui-tailwind/editor-surface/pm-position-map.ts +123 -0
- package/src/ui-tailwind/editor-surface/pm-schema.ts +452 -0
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +327 -0
- package/src/ui-tailwind/editor-surface/search-plugin.ts +157 -0
- package/src/ui-tailwind/editor-surface/tw-caret.tsx +12 -0
- package/src/ui-tailwind/editor-surface/tw-editor-surface.tsx +150 -0
- package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +118 -0
- package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +52 -0
- package/src/ui-tailwind/editor-surface/tw-paragraph-block.tsx +151 -0
- package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +215 -0
- package/src/ui-tailwind/editor-surface/tw-segment-view.tsx +111 -0
- package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +122 -0
- package/src/ui-tailwind/index.ts +61 -0
- package/src/ui-tailwind/review/tw-comment-sidebar.tsx +276 -0
- package/src/ui-tailwind/review/tw-health-panel.tsx +120 -0
- package/src/ui-tailwind/review/tw-review-rail.tsx +120 -0
- package/src/ui-tailwind/review/tw-revision-sidebar.tsx +164 -0
- package/src/ui-tailwind/status/tw-status-bar.tsx +58 -0
- package/src/ui-tailwind/theme/editor-theme.css +190 -0
- package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +48 -0
- package/src/ui-tailwind/toolbar/tw-toolbar.tsx +231 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +140 -0
- package/src/validation/README.md +3 -0
- package/src/validation/compatibility-engine.ts +317 -0
- package/src/validation/compatibility-report.ts +160 -0
- package/src/validation/diagnostics.ts +203 -0
- package/src/validation/import-diagnostics.ts +128 -0
- package/src/validation/low-priority-word-surfaces.ts +373 -0
- package/dist/chunk-32W6IVQE.js +0 -7725
- package/dist/chunk-32W6IVQE.js.map +0 -1
- package/dist/index.cjs +0 -23722
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.cts +0 -7
- package/dist/index.d.ts +0 -7
- package/dist/index.js +0 -16011
- package/dist/index.js.map +0 -1
- package/dist/public-types-DqCURAz8.d.cts +0 -1152
- package/dist/public-types-DqCURAz8.d.ts +0 -1152
- package/dist/tailwind.cjs +0 -8295
- package/dist/tailwind.cjs.map +0 -1
- package/dist/tailwind.d.cts +0 -323
- package/dist/tailwind.d.ts +0 -323
- package/dist/tailwind.js +0 -553
- package/dist/tailwind.js.map +0 -1
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { extractBookmarksFromBodyXml } from "../io/ooxml/parse-fields.ts";
|
|
2
|
+
import type {
|
|
3
|
+
BookmarkEndNode,
|
|
4
|
+
BookmarkStartNode,
|
|
5
|
+
CanonicalDocument,
|
|
6
|
+
DocumentNode,
|
|
7
|
+
} from "../model/canonical-document.ts";
|
|
8
|
+
|
|
9
|
+
export interface LegalBookmark {
|
|
10
|
+
bookmarkId: string;
|
|
11
|
+
name?: string;
|
|
12
|
+
hidden: boolean;
|
|
13
|
+
source: "ooxml" | "canonical";
|
|
14
|
+
status: "paired" | "start-only" | "end-only";
|
|
15
|
+
startIndex?: number;
|
|
16
|
+
endIndex?: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface OpenBookmark {
|
|
20
|
+
bookmarkId: string;
|
|
21
|
+
name?: string;
|
|
22
|
+
hidden: boolean;
|
|
23
|
+
startIndex: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function parseBookmarksFromDocumentXml(xml: string): LegalBookmark[] {
|
|
27
|
+
const bookmarks = extractBookmarksFromBodyXml(xml);
|
|
28
|
+
const openBookmarks = new Map<string, OpenBookmark[]>();
|
|
29
|
+
const results: LegalBookmark[] = [];
|
|
30
|
+
|
|
31
|
+
for (let index = 0; index < bookmarks.length; index += 1) {
|
|
32
|
+
const bookmark = bookmarks[index];
|
|
33
|
+
|
|
34
|
+
if (bookmark.type === "bookmark_start") {
|
|
35
|
+
const entry: OpenBookmark = {
|
|
36
|
+
bookmarkId: bookmark.bookmarkId,
|
|
37
|
+
name: bookmark.name,
|
|
38
|
+
hidden: isHiddenBookmarkName(bookmark.name),
|
|
39
|
+
startIndex: index,
|
|
40
|
+
};
|
|
41
|
+
const stack = openBookmarks.get(bookmark.bookmarkId) ?? [];
|
|
42
|
+
stack.push(entry);
|
|
43
|
+
openBookmarks.set(bookmark.bookmarkId, stack);
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const stack = openBookmarks.get(bookmark.bookmarkId);
|
|
48
|
+
const open = stack?.pop();
|
|
49
|
+
if (stack && stack.length === 0) {
|
|
50
|
+
openBookmarks.delete(bookmark.bookmarkId);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (open) {
|
|
54
|
+
results.push({
|
|
55
|
+
bookmarkId: bookmark.bookmarkId,
|
|
56
|
+
name: open.name,
|
|
57
|
+
hidden: open.hidden,
|
|
58
|
+
source: "ooxml",
|
|
59
|
+
status: "paired",
|
|
60
|
+
startIndex: open.startIndex,
|
|
61
|
+
endIndex: index,
|
|
62
|
+
});
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
results.push({
|
|
67
|
+
bookmarkId: bookmark.bookmarkId,
|
|
68
|
+
hidden: false,
|
|
69
|
+
source: "ooxml",
|
|
70
|
+
status: "end-only",
|
|
71
|
+
endIndex: index,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for (const stack of openBookmarks.values()) {
|
|
76
|
+
for (const open of stack) {
|
|
77
|
+
results.push({
|
|
78
|
+
bookmarkId: open.bookmarkId,
|
|
79
|
+
name: open.name,
|
|
80
|
+
hidden: open.hidden,
|
|
81
|
+
source: "ooxml",
|
|
82
|
+
status: "start-only",
|
|
83
|
+
startIndex: open.startIndex,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return results.sort(compareBookmarks);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function collectBookmarksFromCanonicalDocument(
|
|
92
|
+
document: Pick<CanonicalDocument, "content"> | DocumentNode,
|
|
93
|
+
): LegalBookmark[] {
|
|
94
|
+
const root = "content" in document ? document.content : document;
|
|
95
|
+
const sequence: Array<BookmarkStartNode | BookmarkEndNode> = [];
|
|
96
|
+
|
|
97
|
+
walkDocument(root, (node) => {
|
|
98
|
+
if (node.type === "bookmark_start" || node.type === "bookmark_end") {
|
|
99
|
+
sequence.push(node);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
const openBookmarks = new Map<string, OpenBookmark[]>();
|
|
104
|
+
const results: LegalBookmark[] = [];
|
|
105
|
+
|
|
106
|
+
for (let index = 0; index < sequence.length; index += 1) {
|
|
107
|
+
const bookmark = sequence[index];
|
|
108
|
+
|
|
109
|
+
if (bookmark.type === "bookmark_start") {
|
|
110
|
+
const entry: OpenBookmark = {
|
|
111
|
+
bookmarkId: bookmark.bookmarkId,
|
|
112
|
+
name: bookmark.name,
|
|
113
|
+
hidden: isHiddenBookmarkName(bookmark.name),
|
|
114
|
+
startIndex: index,
|
|
115
|
+
};
|
|
116
|
+
const stack = openBookmarks.get(bookmark.bookmarkId) ?? [];
|
|
117
|
+
stack.push(entry);
|
|
118
|
+
openBookmarks.set(bookmark.bookmarkId, stack);
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const stack = openBookmarks.get(bookmark.bookmarkId);
|
|
123
|
+
const open = stack?.pop();
|
|
124
|
+
if (stack && stack.length === 0) {
|
|
125
|
+
openBookmarks.delete(bookmark.bookmarkId);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (open) {
|
|
129
|
+
results.push({
|
|
130
|
+
bookmarkId: bookmark.bookmarkId,
|
|
131
|
+
name: open.name,
|
|
132
|
+
hidden: open.hidden,
|
|
133
|
+
source: "canonical",
|
|
134
|
+
status: "paired",
|
|
135
|
+
startIndex: open.startIndex,
|
|
136
|
+
endIndex: index,
|
|
137
|
+
});
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
results.push({
|
|
142
|
+
bookmarkId: bookmark.bookmarkId,
|
|
143
|
+
hidden: false,
|
|
144
|
+
source: "canonical",
|
|
145
|
+
status: "end-only",
|
|
146
|
+
endIndex: index,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
for (const stack of openBookmarks.values()) {
|
|
151
|
+
for (const open of stack) {
|
|
152
|
+
results.push({
|
|
153
|
+
bookmarkId: open.bookmarkId,
|
|
154
|
+
name: open.name,
|
|
155
|
+
hidden: open.hidden,
|
|
156
|
+
source: "canonical",
|
|
157
|
+
status: "start-only",
|
|
158
|
+
startIndex: open.startIndex,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return results.sort(compareBookmarks);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export function isHiddenBookmarkName(name: string | undefined): boolean {
|
|
167
|
+
return Boolean(name && name.startsWith("_"));
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function compareBookmarks(left: LegalBookmark, right: LegalBookmark): number {
|
|
171
|
+
return (
|
|
172
|
+
(left.startIndex ?? left.endIndex ?? Number.MAX_SAFE_INTEGER) -
|
|
173
|
+
(right.startIndex ?? right.endIndex ?? Number.MAX_SAFE_INTEGER) ||
|
|
174
|
+
left.bookmarkId.localeCompare(right.bookmarkId)
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function walkDocument(node: DocumentNode, visit: (node: DocumentNode) => void): void {
|
|
179
|
+
visit(node);
|
|
180
|
+
|
|
181
|
+
if ("children" in node && Array.isArray(node.children)) {
|
|
182
|
+
for (const child of node.children) {
|
|
183
|
+
walkDocument(child, visit);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (node.type === "table") {
|
|
188
|
+
for (const row of node.rows) {
|
|
189
|
+
walkDocument(row, visit);
|
|
190
|
+
}
|
|
191
|
+
} else if (node.type === "table_row") {
|
|
192
|
+
for (const cell of node.cells) {
|
|
193
|
+
walkDocument(cell, visit);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
import {
|
|
2
|
+
extractComplexFieldsFromBodyXml,
|
|
3
|
+
parseFieldsFromParagraphXml,
|
|
4
|
+
} from "../io/ooxml/parse-fields.ts";
|
|
5
|
+
import { parseMainDocumentXml } from "../io/ooxml/parse-main-document.ts";
|
|
6
|
+
import type {
|
|
7
|
+
CanonicalDocument,
|
|
8
|
+
DocumentNode,
|
|
9
|
+
FieldNode,
|
|
10
|
+
HyperlinkNode,
|
|
11
|
+
ParagraphNode,
|
|
12
|
+
} from "../model/canonical-document.ts";
|
|
13
|
+
|
|
14
|
+
export interface CrossReferencePattern {
|
|
15
|
+
kind: "section" | "clause" | "article" | "schedule" | "exhibit" | "appendix";
|
|
16
|
+
label: string;
|
|
17
|
+
targetHint: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface CrossReference {
|
|
21
|
+
source: "hyperlink" | "field" | "w:ref" | "text-pattern";
|
|
22
|
+
kind: "bookmark" | "section" | "clause" | "article" | "schedule" | "exhibit" | "appendix";
|
|
23
|
+
target?: string;
|
|
24
|
+
instruction?: string;
|
|
25
|
+
label: string;
|
|
26
|
+
paragraphIndex?: number;
|
|
27
|
+
confidence: "high" | "medium";
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface FieldReference {
|
|
31
|
+
kind: "REF" | "PAGEREF" | "NOTEREF";
|
|
32
|
+
target: string;
|
|
33
|
+
instruction: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const CROSS_REFERENCE_PATTERN =
|
|
37
|
+
/\b(Section|Clause|Article|Schedule|Exhibit|Appendix)\s+(\d+(?:\.\d+)*|[A-Z]-\d+|[A-Z])(?=[^A-Za-z0-9]|$)/g;
|
|
38
|
+
const W_REF_PATTERN = /<w:ref\b([^>]*)\/?>/g;
|
|
39
|
+
const ATTRIBUTE_PATTERN = /(?:^|\s)(?:w:)?([A-Za-z][\w-]*)="([^"]*)"/g;
|
|
40
|
+
const COMPLEX_FIELD_PATTERN =
|
|
41
|
+
/<w:instrText\b[^>]*>([\s\S]*?)<\/w:instrText>[\s\S]*?<w:fldChar\b[^>]*w:fldCharType="separate"[^>]*\/>[\s\S]*?<w:t\b[^>]*>([\s\S]*?)<\/w:t>[\s\S]*?<w:fldChar\b[^>]*w:fldCharType="end"[^>]*\/>/g;
|
|
42
|
+
|
|
43
|
+
export function parseCrossReferencesFromDocumentXml(xml: string): CrossReference[] {
|
|
44
|
+
const parsed = parseMainDocumentXml(xml);
|
|
45
|
+
const results: CrossReference[] = [];
|
|
46
|
+
|
|
47
|
+
parsed.blocks.forEach((block, paragraphIndex) => {
|
|
48
|
+
if (block.type !== "paragraph") {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
for (const child of block.children) {
|
|
53
|
+
if (child.type === "hyperlink" && child.href.startsWith("#")) {
|
|
54
|
+
results.push({
|
|
55
|
+
source: "hyperlink",
|
|
56
|
+
kind: "bookmark",
|
|
57
|
+
target: child.href.slice(1),
|
|
58
|
+
label: flattenInlineText(child.children).trim() || child.href.slice(1),
|
|
59
|
+
paragraphIndex,
|
|
60
|
+
confidence: "high",
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const { simpleFields } = parseFieldsFromParagraphXml(block.rawXml);
|
|
66
|
+
for (const field of simpleFields) {
|
|
67
|
+
const reference = parseFieldReferenceInstruction(field.instruction);
|
|
68
|
+
if (!reference) {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
results.push({
|
|
73
|
+
source: "field",
|
|
74
|
+
kind: "bookmark",
|
|
75
|
+
target: reference.target,
|
|
76
|
+
instruction: reference.instruction,
|
|
77
|
+
label: stripXml(field.contentXml) || reference.target,
|
|
78
|
+
paragraphIndex,
|
|
79
|
+
confidence: "high",
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for (const pattern of detectCrossReferencePatterns(flattenParagraphText(block))) {
|
|
84
|
+
results.push({
|
|
85
|
+
source: "text-pattern",
|
|
86
|
+
kind: pattern.kind,
|
|
87
|
+
target: pattern.targetHint,
|
|
88
|
+
label: pattern.label,
|
|
89
|
+
paragraphIndex,
|
|
90
|
+
confidence: "medium",
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
for (const field of extractComplexFieldsFromBodyXml(xml)) {
|
|
96
|
+
const reference = parseFieldReferenceInstruction(field.instruction);
|
|
97
|
+
if (!reference) {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
results.push({
|
|
101
|
+
source: "field",
|
|
102
|
+
kind: "bookmark",
|
|
103
|
+
target: reference.target,
|
|
104
|
+
instruction: reference.instruction,
|
|
105
|
+
label: stripXml(field.contentXml) || reference.target,
|
|
106
|
+
confidence: "high",
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (const field of extractComplexFieldsWithRegex(xml)) {
|
|
111
|
+
const reference = parseFieldReferenceInstruction(field.instruction);
|
|
112
|
+
if (!reference) {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
results.push({
|
|
117
|
+
source: "field",
|
|
118
|
+
kind: "bookmark",
|
|
119
|
+
target: reference.target,
|
|
120
|
+
instruction: reference.instruction,
|
|
121
|
+
label: field.label || reference.target,
|
|
122
|
+
confidence: "high",
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
for (const element of extractWRefElements(xml)) {
|
|
127
|
+
results.push({
|
|
128
|
+
source: "w:ref",
|
|
129
|
+
kind: "bookmark",
|
|
130
|
+
target: element.target,
|
|
131
|
+
label: element.label || element.target || "w:ref",
|
|
132
|
+
confidence: element.target ? "high" : "medium",
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return dedupeCrossReferences(results);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export function collectCrossReferencesFromCanonicalDocument(
|
|
140
|
+
document: Pick<CanonicalDocument, "content"> | DocumentNode,
|
|
141
|
+
): CrossReference[] {
|
|
142
|
+
const root = "content" in document ? document.content : document;
|
|
143
|
+
const results: CrossReference[] = [];
|
|
144
|
+
let paragraphIndex = -1;
|
|
145
|
+
|
|
146
|
+
walkDocument(root, (node) => {
|
|
147
|
+
if (node.type === "paragraph") {
|
|
148
|
+
paragraphIndex += 1;
|
|
149
|
+
for (const child of node.children) {
|
|
150
|
+
if (child.type === "hyperlink" && child.href.startsWith("#")) {
|
|
151
|
+
results.push({
|
|
152
|
+
source: "hyperlink",
|
|
153
|
+
kind: "bookmark",
|
|
154
|
+
target: child.href.slice(1),
|
|
155
|
+
label: flattenInlineText(child.children).trim() || child.href.slice(1),
|
|
156
|
+
paragraphIndex,
|
|
157
|
+
confidence: "high",
|
|
158
|
+
});
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (child.type === "field") {
|
|
163
|
+
const reference = parseFieldReferenceInstruction(child.instruction);
|
|
164
|
+
if (!reference) {
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
results.push({
|
|
168
|
+
source: "field",
|
|
169
|
+
kind: "bookmark",
|
|
170
|
+
target: reference.target,
|
|
171
|
+
instruction: reference.instruction,
|
|
172
|
+
label: flattenInlineText(child.children).trim() || reference.target,
|
|
173
|
+
paragraphIndex,
|
|
174
|
+
confidence: "high",
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
for (const pattern of detectCrossReferencePatterns(flattenParagraphText(node))) {
|
|
180
|
+
results.push({
|
|
181
|
+
source: "text-pattern",
|
|
182
|
+
kind: pattern.kind,
|
|
183
|
+
target: pattern.targetHint,
|
|
184
|
+
label: pattern.label,
|
|
185
|
+
paragraphIndex,
|
|
186
|
+
confidence: "medium",
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
return dedupeCrossReferences(results);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export function detectCrossReferencePatterns(text: string): CrossReferencePattern[] {
|
|
196
|
+
const matches: CrossReferencePattern[] = [];
|
|
197
|
+
|
|
198
|
+
for (const match of text.matchAll(CROSS_REFERENCE_PATTERN)) {
|
|
199
|
+
const referenceType = match[1]?.toLowerCase();
|
|
200
|
+
const target = match[2];
|
|
201
|
+
const label = `${match[1]} ${target}`.trim();
|
|
202
|
+
|
|
203
|
+
if (!referenceType || !target) {
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
matches.push({
|
|
208
|
+
kind: referenceType as CrossReferencePattern["kind"],
|
|
209
|
+
label,
|
|
210
|
+
targetHint: target,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return matches;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export function parseFieldReferenceInstruction(instruction: string): FieldReference | undefined {
|
|
218
|
+
const trimmedInstruction = instruction.trim().replace(/\s+/g, " ");
|
|
219
|
+
const match = /^(REF|PAGEREF|NOTEREF)\s+(?:"([^"]+)"|([^\s\\]+))/i.exec(trimmedInstruction);
|
|
220
|
+
if (!match) {
|
|
221
|
+
return undefined;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
kind: match[1].toUpperCase() as FieldReference["kind"],
|
|
226
|
+
target: (match[2] ?? match[3] ?? "").trim(),
|
|
227
|
+
instruction: trimmedInstruction,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function extractWRefElements(xml: string): Array<{ target?: string; label?: string }> {
|
|
232
|
+
const matches: Array<{ target?: string; label?: string }> = [];
|
|
233
|
+
|
|
234
|
+
for (const match of xml.matchAll(W_REF_PATTERN)) {
|
|
235
|
+
const attributes = readAttributes(match[1] ?? "");
|
|
236
|
+
matches.push({
|
|
237
|
+
target: attributes.anchor ?? attributes.name ?? attributes.id,
|
|
238
|
+
label: attributes.displayText ?? attributes.text,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return matches;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function extractComplexFieldsWithRegex(xml: string): Array<{ instruction: string; label: string }> {
|
|
246
|
+
const matches: Array<{ instruction: string; label: string }> = [];
|
|
247
|
+
|
|
248
|
+
for (const match of xml.matchAll(COMPLEX_FIELD_PATTERN)) {
|
|
249
|
+
const instruction = stripXml(match[1] ?? "");
|
|
250
|
+
const label = stripXml(match[2] ?? "");
|
|
251
|
+
if (!instruction) {
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
matches.push({ instruction, label });
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return matches;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function readAttributes(source: string): Record<string, string> {
|
|
261
|
+
const attributes: Record<string, string> = {};
|
|
262
|
+
|
|
263
|
+
for (const match of source.matchAll(ATTRIBUTE_PATTERN)) {
|
|
264
|
+
const [, key, value] = match;
|
|
265
|
+
attributes[key] = value;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return attributes;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function stripXml(xml: string): string {
|
|
272
|
+
return xml.replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim();
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function flattenParagraphText(paragraph: ParagraphNode): string {
|
|
276
|
+
return paragraph.children
|
|
277
|
+
.map((child) => {
|
|
278
|
+
switch (child.type) {
|
|
279
|
+
case "text":
|
|
280
|
+
return child.text;
|
|
281
|
+
case "hyperlink":
|
|
282
|
+
return flattenInlineText(child.children);
|
|
283
|
+
case "field":
|
|
284
|
+
return flattenInlineText(child.children);
|
|
285
|
+
case "tab":
|
|
286
|
+
return "\t";
|
|
287
|
+
case "hard_break":
|
|
288
|
+
case "column_break":
|
|
289
|
+
return "\n";
|
|
290
|
+
default:
|
|
291
|
+
return "";
|
|
292
|
+
}
|
|
293
|
+
})
|
|
294
|
+
.join("");
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function flattenInlineText(children: HyperlinkNode["children"] | FieldNode["children"]): string {
|
|
298
|
+
return children
|
|
299
|
+
.map((child) => {
|
|
300
|
+
if (child.type === "text") {
|
|
301
|
+
return child.text;
|
|
302
|
+
}
|
|
303
|
+
if (child.type === "tab") {
|
|
304
|
+
return "\t";
|
|
305
|
+
}
|
|
306
|
+
if (child.type === "hard_break" || child.type === "column_break") {
|
|
307
|
+
return "\n";
|
|
308
|
+
}
|
|
309
|
+
return "";
|
|
310
|
+
})
|
|
311
|
+
.join("");
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function dedupeCrossReferences(references: CrossReference[]): CrossReference[] {
|
|
315
|
+
const seen = new Set<string>();
|
|
316
|
+
const deduped: CrossReference[] = [];
|
|
317
|
+
|
|
318
|
+
for (const reference of references) {
|
|
319
|
+
const key = [
|
|
320
|
+
reference.source,
|
|
321
|
+
reference.kind,
|
|
322
|
+
reference.target ?? "",
|
|
323
|
+
reference.label,
|
|
324
|
+
String(reference.paragraphIndex ?? -1),
|
|
325
|
+
].join("|");
|
|
326
|
+
|
|
327
|
+
if (seen.has(key)) {
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
seen.add(key);
|
|
332
|
+
deduped.push(reference);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
return deduped;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function walkDocument(node: DocumentNode, visit: (node: DocumentNode) => void): void {
|
|
339
|
+
visit(node);
|
|
340
|
+
|
|
341
|
+
if ("children" in node && Array.isArray(node.children)) {
|
|
342
|
+
for (const child of node.children) {
|
|
343
|
+
walkDocument(child, visit);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (node.type === "table") {
|
|
348
|
+
for (const row of node.rows) {
|
|
349
|
+
walkDocument(row, visit);
|
|
350
|
+
}
|
|
351
|
+
} else if (node.type === "table_row") {
|
|
352
|
+
for (const cell of node.cells) {
|
|
353
|
+
walkDocument(cell, visit);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|