@beyondwork/docx-react-component 1.0.57 → 1.0.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +2 -1
- package/src/api/awareness-identity-types.ts +4 -2
- package/src/api/comment-negotiation-types.ts +4 -1
- package/src/api/external-custody-types.ts +16 -0
- package/src/api/internal/build-ref-projections.ts +108 -0
- package/src/api/package-version.ts +1 -1
- package/src/api/participants-types.ts +11 -1
- package/src/api/public-types.ts +1149 -8
- package/src/api/scope-metadata-resolver-types.ts +6 -0
- package/src/compare/diff-engine.ts +3 -0
- package/src/core/commands/formatting-commands.ts +1 -0
- package/src/core/commands/index.ts +225 -16
- package/src/core/commands/legacy-form-field-commands.ts +181 -0
- package/src/core/commands/table-structure-commands.ts +149 -31
- package/src/core/selection/mapping.ts +20 -0
- package/src/core/state/editor-state.ts +2 -1
- package/src/index.ts +28 -0
- package/src/io/docx-session.ts +22 -3
- package/src/io/export/export-session.ts +11 -7
- package/src/io/export/ooxml-namespaces.ts +47 -0
- package/src/io/export/reattach-preserved-parts.ts +4 -16
- package/src/io/export/serialize-comments.ts +3 -131
- package/src/io/export/serialize-ffdata.ts +89 -0
- package/src/io/export/serialize-headers-footers.ts +5 -0
- package/src/io/export/serialize-main-document.ts +224 -34
- package/src/io/export/serialize-numbering.ts +22 -2
- package/src/io/export/serialize-revisions.ts +99 -0
- package/src/io/export/serialize-tables.ts +9 -0
- package/src/io/export/split-review-boundaries.ts +1 -0
- package/src/io/export/table-properties-xml.ts +14 -0
- package/src/io/load-scheduler.ts +70 -28
- package/src/io/normalize/normalize-text.ts +13 -0
- package/src/io/ooxml/_mini-xml.ts +198 -0
- package/src/io/ooxml/canonicalize-payload.ts +1 -4
- package/src/io/ooxml/chart/chart-style-table.ts +4 -3
- package/src/io/ooxml/chart/parse-chart-space.ts +2 -4
- package/src/io/ooxml/chart/parse-series.ts +2 -1
- package/src/io/ooxml/chart/resolve-color.ts +2 -2
- package/src/io/ooxml/chart/types.ts +6 -434
- package/src/io/ooxml/comment-presentation-payload.ts +6 -5
- package/src/io/ooxml/highlight-colors.ts +8 -5
- package/src/io/ooxml/parse-anchor.ts +68 -53
- package/src/io/ooxml/parse-comments.ts +14 -142
- package/src/io/ooxml/parse-complex-content.ts +3 -106
- package/src/io/ooxml/parse-drawing.ts +100 -195
- package/src/io/ooxml/parse-ffdata.ts +93 -0
- package/src/io/ooxml/parse-fields.ts +7 -146
- package/src/io/ooxml/parse-fill.ts +88 -8
- package/src/io/ooxml/parse-font-table.ts +5 -105
- package/src/io/ooxml/parse-footnotes.ts +28 -152
- package/src/io/ooxml/parse-headers-footers.ts +106 -212
- package/src/io/ooxml/parse-inline-media.ts +3 -200
- package/src/io/ooxml/parse-main-document.ts +180 -217
- package/src/io/ooxml/parse-numbering.ts +154 -335
- package/src/io/ooxml/parse-object.ts +147 -0
- package/src/io/ooxml/parse-ole-relationship.ts +82 -0
- package/src/io/ooxml/parse-paragraph-formatting.ts +7 -10
- package/src/io/ooxml/parse-picture-sdt.ts +85 -0
- package/src/io/ooxml/parse-picture.ts +120 -39
- package/src/io/ooxml/parse-revisions.ts +285 -51
- package/src/io/ooxml/parse-settings.ts +6 -99
- package/src/io/ooxml/parse-shapes.ts +25 -140
- package/src/io/ooxml/parse-styles.ts +3 -218
- package/src/io/ooxml/parse-tables.ts +76 -256
- package/src/io/ooxml/parse-theme.ts +1 -4
- package/src/io/ooxml/property-grab-bag.ts +5 -47
- package/src/io/ooxml/xml-element-serialize.ts +32 -0
- package/src/io/ooxml/xml-parser.ts +183 -0
- package/src/legal/bookmarks.ts +1 -1
- package/src/legal/cross-references.ts +1 -1
- package/src/legal/defined-terms.ts +1 -1
- package/src/legal/{_document-root.ts → document-root.ts} +8 -0
- package/src/legal/signature-blocks.ts +1 -1
- package/src/model/canonical-document.ts +165 -6
- package/src/model/chart-types.ts +439 -0
- package/src/model/snapshot.ts +3 -1
- package/src/review/store/comment-remapping.ts +24 -11
- package/src/review/store/revision-actions.ts +482 -2
- package/src/review/store/revision-store.ts +15 -0
- package/src/review/store/revision-types.ts +76 -0
- package/src/runtime/collab/remote-cursor-awareness.ts +24 -0
- package/src/runtime/collab/runtime-collab-sync.ts +33 -0
- package/src/runtime/diagnostics/build-diagnostic.ts +151 -0
- package/src/runtime/diagnostics/code-metadata-table.ts +221 -0
- package/src/runtime/document-runtime.ts +544 -35
- package/src/runtime/document-search.ts +176 -0
- package/src/runtime/edit-ops/index.ts +18 -2
- package/src/runtime/footnote-resolver.ts +130 -0
- package/src/runtime/layout/layout-engine-instance.ts +31 -4
- package/src/runtime/layout/layout-engine-version.ts +37 -1
- package/src/runtime/layout/page-graph.ts +14 -1
- package/src/runtime/layout/resolved-formatting-state.ts +21 -0
- package/src/runtime/numbering-prefix.ts +17 -0
- package/src/runtime/query-scopes.ts +183 -0
- package/src/runtime/resolved-numbering-geometry.ts +37 -6
- package/src/runtime/revision-runtime.ts +27 -1
- package/src/runtime/scope-resolver.ts +60 -0
- package/src/runtime/selection/post-edit-validator.ts +60 -6
- package/src/runtime/structure-ops/index.ts +20 -4
- package/src/runtime/surface-projection.ts +293 -18
- package/src/runtime/table-schema.ts +6 -0
- package/src/runtime/theme-color-resolver.ts +2 -2
- package/src/runtime/units.ts +9 -0
- package/src/runtime/workflow-rail-segments.ts +4 -0
- package/src/ui/WordReviewEditor.tsx +258 -44
- package/src/ui/editor-runtime-boundary.ts +13 -0
- package/src/ui/editor-shell-view.tsx +4 -1
- package/src/ui/headless/chrome-registry.ts +53 -0
- package/src/ui/headless/selection-tool-resolver.ts +11 -1
- package/src/ui-tailwind/chrome/chrome-preset-model.ts +13 -0
- package/src/ui-tailwind/chrome/tw-command-palette-mount.tsx +96 -0
- package/src/ui-tailwind/chrome/tw-context-menu.tsx +2 -1
- package/src/ui-tailwind/chrome/tw-image-context-toolbar.tsx +5 -4
- package/src/ui-tailwind/chrome/tw-mode-dock.tsx +6 -2
- package/src/ui-tailwind/chrome/use-container-breakpoint.ts +111 -0
- package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +23 -9
- package/src/ui-tailwind/chrome-overlay/tw-object-selection-overlay.tsx +158 -0
- package/src/ui-tailwind/chrome-overlay/tw-page-stack-overlay-layer.tsx +6 -7
- package/src/ui-tailwind/editor-surface/pm-schema.ts +105 -17
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +13 -0
- package/src/ui-tailwind/editor-surface/shape-renderer.ts +76 -14
- package/src/ui-tailwind/editor-surface/tw-page-block-view.helpers.ts +18 -1
- package/src/ui-tailwind/editor-surface/tw-page-block-view.tsx +2 -0
- package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +18 -2
- package/src/ui-tailwind/index.ts +9 -0
- package/src/ui-tailwind/page-chrome-model.ts +77 -5
- package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +56 -1
- package/src/ui-tailwind/page-stack/tw-region-block-renderer.tsx +2 -0
- package/src/ui-tailwind/review/tw-comment-sidebar.tsx +116 -113
- package/src/ui-tailwind/review/tw-review-rail-footer.tsx +2 -2
- package/src/ui-tailwind/theme/tokens.ts +14 -0
- package/src/ui-tailwind/toolbar/tw-shell-header.tsx +5 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +52 -87
- package/src/validation/diagnostics.ts +1 -0
|
@@ -4,8 +4,15 @@ import type { InlineMediaPart } from "./parse-inline-media.ts";
|
|
|
4
4
|
import type { ChartPartLookup } from "./parse-complex-content.ts";
|
|
5
5
|
import type { DrawingFrameNode, AnchorGeometry } from "../../model/canonical-document.ts";
|
|
6
6
|
import { parseAnchorGeometry } from "./parse-anchor.ts";
|
|
7
|
-
import { parsePicture
|
|
7
|
+
import { parsePicture } from "./parse-picture.ts";
|
|
8
8
|
import { parseShapeContent, type TxbxBlockParser } from "./parse-shapes.ts";
|
|
9
|
+
import {
|
|
10
|
+
type XmlElementNode,
|
|
11
|
+
findFirstChild,
|
|
12
|
+
findFirstDescendant,
|
|
13
|
+
localName,
|
|
14
|
+
parseXml,
|
|
15
|
+
} from "./_mini-xml.ts";
|
|
9
16
|
|
|
10
17
|
const PICTURE_GRAPHIC_URI =
|
|
11
18
|
"http://schemas.openxmlformats.org/drawingml/2006/picture";
|
|
@@ -40,34 +47,102 @@ export function parseDrawingFrame(
|
|
|
40
47
|
): DrawingFrameNode | null {
|
|
41
48
|
const root = parseXml(drawingXml);
|
|
42
49
|
|
|
43
|
-
// F3.1:
|
|
44
|
-
//
|
|
45
|
-
//
|
|
46
|
-
|
|
50
|
+
// F3.1 / Phase-1.1 B1: mc:AlternateContent handling moved from "pick one branch"
|
|
51
|
+
// to "try branches in order, keep first that yields non-opaque content".
|
|
52
|
+
// This fixes the case where Choice advertises a known graphicData URI but its
|
|
53
|
+
// body is empty / unparseable, and Fallback carries the actual usable content.
|
|
54
|
+
const alt = findFirstDescendant(root, "AlternateContent");
|
|
55
|
+
if (alt) {
|
|
56
|
+
return resolveFromAlternateContent(alt, drawingXml, opts);
|
|
57
|
+
}
|
|
58
|
+
return resolveFromBranch(root, drawingXml, opts);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Phase 4.1 G2 — namespaces whose Choice branches we can parse. An
|
|
62
|
+
// mc:Choice element's Requires="ns1 ns2 …" is considered supported when
|
|
63
|
+
// every listed namespace appears here. Unknown Requires tokens reject
|
|
64
|
+
// the Choice. Expand as CO4 / downstream lanes add coverage.
|
|
65
|
+
const SUPPORTED_REQUIRES_NAMESPACES = new Set<string>([
|
|
66
|
+
"", // no Requires attr = accept
|
|
67
|
+
"c14", // Word chart Choice wrappers still resolve to chart_preview
|
|
68
|
+
"dgm2009", // SmartArt Choice wrappers still resolve to smartart_preview
|
|
69
|
+
"wps", // WordprocessingShape (we parse)
|
|
70
|
+
"wpg", // WordprocessingGroup (wpg is preserved via opaque; Choice-level acceptance is fine)
|
|
71
|
+
"w14", // WordML 2010 extensions
|
|
72
|
+
"w15", // WordML 2012 extensions
|
|
73
|
+
"w16se", // WordML 2016 SE
|
|
74
|
+
"a14", // DrawingML 2010
|
|
75
|
+
"v", // VML (preserved as opaque)
|
|
76
|
+
"o", // VML office extensions (preserved as opaque)
|
|
77
|
+
"wp14", // DrawingML wordproc 2010
|
|
78
|
+
]);
|
|
79
|
+
|
|
80
|
+
function resolveFromAlternateContent(
|
|
81
|
+
alt: XmlElementNode,
|
|
82
|
+
outerRawXml: string,
|
|
83
|
+
opts: ParseDrawingOpts,
|
|
84
|
+
): DrawingFrameNode | null {
|
|
85
|
+
// Enumerate ALL Choice elements in document order; ignore ones whose
|
|
86
|
+
// Requires= lists a namespace we don't know how to parse.
|
|
87
|
+
const supportedChoices = alt.children.filter(
|
|
88
|
+
(c): c is XmlElementNode =>
|
|
89
|
+
c.type === "element" && localName(c.name) === "Choice" && isChoiceSupported(c),
|
|
90
|
+
);
|
|
91
|
+
const fallback = findFirstChild(alt, "Fallback");
|
|
92
|
+
|
|
93
|
+
// Try each supported Choice in order; return first that yields non-opaque
|
|
94
|
+
// typed content. This is the mc:AlternateContent spec: readers process
|
|
95
|
+
// Choice elements in order, selecting the first whose Requires is satisfied.
|
|
96
|
+
for (const choice of supportedChoices) {
|
|
97
|
+
const frame = resolveFromBranch(choice, outerRawXml, opts);
|
|
98
|
+
if (frame && frame.content.type !== "opaque") return frame;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Phase 1.1 B1 — no Choice produced typed content; try Fallback.
|
|
102
|
+
const fallbackFrame = fallback ? resolveFromBranch(fallback, outerRawXml, opts) : null;
|
|
103
|
+
if (fallbackFrame && fallbackFrame.content.type !== "opaque") return fallbackFrame;
|
|
104
|
+
|
|
105
|
+
// All branches opaque — preserve whichever typed result exists. First
|
|
106
|
+
// supported Choice takes precedence over Fallback to match Word's
|
|
107
|
+
// declared-order semantic.
|
|
108
|
+
for (const choice of supportedChoices) {
|
|
109
|
+
const frame = resolveFromBranch(choice, outerRawXml, opts);
|
|
110
|
+
if (frame) return frame;
|
|
111
|
+
}
|
|
112
|
+
return fallbackFrame ?? null;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function isChoiceSupported(choice: XmlElementNode): boolean {
|
|
116
|
+
const requires = (choice.attributes.Requires ?? "").trim();
|
|
117
|
+
if (!requires) return true;
|
|
118
|
+
return requires.split(/\s+/).every((ns) => SUPPORTED_REQUIRES_NAMESPACES.has(ns));
|
|
119
|
+
}
|
|
47
120
|
|
|
121
|
+
function resolveFromBranch(
|
|
122
|
+
branch: XmlElementNode,
|
|
123
|
+
outerRawXml: string,
|
|
124
|
+
opts: ParseDrawingOpts,
|
|
125
|
+
): DrawingFrameNode | null {
|
|
48
126
|
// Find wp:anchor or wp:inline in the chosen branch
|
|
49
|
-
const anchor = findFirstDescendant(
|
|
50
|
-
const inline = findFirstDescendant(
|
|
127
|
+
const anchor = findFirstDescendant(branch, "anchor");
|
|
128
|
+
const inline = findFirstDescendant(branch, "inline");
|
|
51
129
|
const container = anchor ?? inline;
|
|
52
130
|
if (!container) return null;
|
|
53
131
|
|
|
54
|
-
const geometry: AnchorGeometry = parseAnchorGeometry(container
|
|
132
|
+
const geometry: AnchorGeometry = parseAnchorGeometry(container);
|
|
55
133
|
|
|
56
134
|
// Locate a:graphicData to determine content type
|
|
57
|
-
const graphicData = findFirstDescendant(
|
|
135
|
+
const graphicData = findFirstDescendant(branch, "graphicData");
|
|
58
136
|
const uri = graphicData?.attributes.uri ?? "";
|
|
59
137
|
|
|
60
138
|
// F3.5: if we don't recognise the graphicData URI, return null and let the
|
|
61
|
-
// legacy parse chain
|
|
62
|
-
// parseInlineMediaXml) handle it. The new DrawingFrameNode path only
|
|
63
|
-
// short-circuits for known URIs.
|
|
139
|
+
// legacy parse chain handle it.
|
|
64
140
|
if (!isKnownUri(uri)) return null;
|
|
65
141
|
// WordArt remains on the dedicated legacy `wordart` node path. Only
|
|
66
142
|
// non-WordArt WPS drawings are promoted into DrawingFrame shape content.
|
|
67
143
|
if (uri === WPS_SHAPE_GRAPHIC_URI && isWordArtGraphicData(graphicData)) return null;
|
|
68
144
|
|
|
69
|
-
const content = resolveContent(uri, graphicData,
|
|
70
|
-
|
|
145
|
+
const content = resolveContent(uri, graphicData, outerRawXml, opts);
|
|
71
146
|
return { type: "drawing_frame", anchor: geometry, content };
|
|
72
147
|
}
|
|
73
148
|
|
|
@@ -91,34 +166,6 @@ function isWordArtGraphicData(graphicData: XmlElementNode | undefined): boolean
|
|
|
91
166
|
return /^text/i.test(geometry);
|
|
92
167
|
}
|
|
93
168
|
|
|
94
|
-
/**
|
|
95
|
-
* If the drawing is wrapped in mc:AlternateContent, return the chosen branch
|
|
96
|
-
* subtree. Prefer Choice (richer content); use Fallback only when Choice
|
|
97
|
-
* contains no graphicData we can parse.
|
|
98
|
-
*/
|
|
99
|
-
function pickAlternateContentBranch(root: XmlElementNode): XmlElementNode {
|
|
100
|
-
const alt = findFirstDescendant(root, "AlternateContent");
|
|
101
|
-
if (!alt) return root;
|
|
102
|
-
|
|
103
|
-
const choice = findFirstChild(alt, "Choice");
|
|
104
|
-
const fallback = findFirstChild(alt, "Fallback");
|
|
105
|
-
|
|
106
|
-
if (choice) {
|
|
107
|
-
const choiceGraphicData = findFirstDescendant(choice, "graphicData");
|
|
108
|
-
const choiceUri = choiceGraphicData?.attributes.uri ?? "";
|
|
109
|
-
if (isKnownUri(choiceUri)) return choice;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
if (fallback) {
|
|
113
|
-
const fallbackGraphicData = findFirstDescendant(fallback, "graphicData");
|
|
114
|
-
const fallbackUri = fallbackGraphicData?.attributes.uri ?? "";
|
|
115
|
-
if (isKnownUri(fallbackUri)) return fallback;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
// Neither branch has a known URI — return Choice (if any) so caller can emit
|
|
119
|
-
// opaque preservation; otherwise the original root.
|
|
120
|
-
return choice ?? fallback ?? root;
|
|
121
|
-
}
|
|
122
169
|
|
|
123
170
|
function resolveContent(
|
|
124
171
|
uri: string,
|
|
@@ -128,12 +175,16 @@ function resolveContent(
|
|
|
128
175
|
): DrawingFrameNode["content"] {
|
|
129
176
|
if (uri === PICTURE_GRAPHIC_URI) {
|
|
130
177
|
if (graphicData) {
|
|
131
|
-
const pic = parsePicture(graphicData
|
|
178
|
+
const pic = parsePicture(graphicData);
|
|
132
179
|
if (pic) {
|
|
133
|
-
// Resolve mediaId from the relationship map
|
|
180
|
+
// Resolve mediaId from the relationship map. For embedded blips
|
|
181
|
+
// (r:embed) the relationship target is a package-local media part,
|
|
182
|
+
// so we normalize + register a mediaId. For linked blips (G4 / r:link)
|
|
183
|
+
// the relationship target is an external URL — we skip mediaId
|
|
184
|
+
// (leaving it undefined so surface-projection flags state: "missing").
|
|
134
185
|
const relMap = new Map(opts.relationships.map((r) => [r.id, r]));
|
|
135
186
|
const rel = relMap.get(pic.blipRef);
|
|
136
|
-
if (rel?.type.endsWith("/image")) {
|
|
187
|
+
if (!pic.isLinked && rel?.type.endsWith("/image")) {
|
|
137
188
|
const partPath = normalizePartPath(
|
|
138
189
|
resolveRelationshipTarget(opts.sourcePartPath ?? "/word/document.xml", rel),
|
|
139
190
|
);
|
|
@@ -156,7 +207,7 @@ function resolveContent(
|
|
|
156
207
|
if (uri === WPS_SHAPE_GRAPHIC_URI) {
|
|
157
208
|
if (graphicData) {
|
|
158
209
|
const shape = parseShapeContent(
|
|
159
|
-
graphicData
|
|
210
|
+
graphicData,
|
|
160
211
|
rawXml,
|
|
161
212
|
opts.blockParser,
|
|
162
213
|
);
|
|
@@ -168,152 +219,6 @@ function resolveContent(
|
|
|
168
219
|
return { type: "opaque", rawXml };
|
|
169
220
|
}
|
|
170
221
|
|
|
171
|
-
//
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
type: "element";
|
|
175
|
-
name: string;
|
|
176
|
-
attributes: Record<string, string>;
|
|
177
|
-
children: XmlNode[];
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
interface XmlTextNode {
|
|
181
|
-
type: "text";
|
|
182
|
-
text: string;
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
type XmlNode = XmlElementNode | XmlTextNode;
|
|
186
|
-
|
|
187
|
-
function findFirstChild(
|
|
188
|
-
node: XmlElementNode,
|
|
189
|
-
local: string,
|
|
190
|
-
): XmlElementNode | undefined {
|
|
191
|
-
for (const child of node.children) {
|
|
192
|
-
if (child.type === "element" && localName(child.name) === local) return child;
|
|
193
|
-
}
|
|
194
|
-
return undefined;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
function findFirstDescendant(
|
|
198
|
-
node: XmlElementNode,
|
|
199
|
-
local: string,
|
|
200
|
-
): XmlElementNode | undefined {
|
|
201
|
-
for (const child of node.children) {
|
|
202
|
-
if (child.type !== "element") continue;
|
|
203
|
-
if (localName(child.name) === local) return child;
|
|
204
|
-
const found = findFirstDescendant(child, local);
|
|
205
|
-
if (found) return found;
|
|
206
|
-
}
|
|
207
|
-
return undefined;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
function localName(name: string): string {
|
|
211
|
-
const i = name.indexOf(":");
|
|
212
|
-
return i >= 0 ? name.slice(i + 1) : name;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
function parseXml(xml: string): XmlElementNode {
|
|
216
|
-
const root: XmlElementNode = {
|
|
217
|
-
type: "element",
|
|
218
|
-
name: "__root__",
|
|
219
|
-
attributes: {},
|
|
220
|
-
children: [],
|
|
221
|
-
};
|
|
222
|
-
const stack: XmlElementNode[] = [root];
|
|
223
|
-
let cursor = 0;
|
|
224
|
-
|
|
225
|
-
while (cursor < xml.length) {
|
|
226
|
-
if (xml.startsWith("<!--", cursor)) {
|
|
227
|
-
const end = xml.indexOf("-->", cursor);
|
|
228
|
-
cursor = end >= 0 ? end + 3 : xml.length;
|
|
229
|
-
continue;
|
|
230
|
-
}
|
|
231
|
-
if (xml.startsWith("<?", cursor)) {
|
|
232
|
-
const end = xml.indexOf("?>", cursor);
|
|
233
|
-
cursor = end >= 0 ? end + 2 : xml.length;
|
|
234
|
-
continue;
|
|
235
|
-
}
|
|
236
|
-
if (xml[cursor] !== "<") {
|
|
237
|
-
const nextTag = xml.indexOf("<", cursor);
|
|
238
|
-
const end = nextTag >= 0 ? nextTag : xml.length;
|
|
239
|
-
const text = decodeEntities(xml.slice(cursor, end));
|
|
240
|
-
if (text) stack[stack.length - 1]?.children.push({ type: "text", text });
|
|
241
|
-
cursor = end;
|
|
242
|
-
continue;
|
|
243
|
-
}
|
|
244
|
-
if (xml[cursor + 1] === "/") {
|
|
245
|
-
const end = xml.indexOf(">", cursor);
|
|
246
|
-
stack.pop();
|
|
247
|
-
cursor = end + 1;
|
|
248
|
-
continue;
|
|
249
|
-
}
|
|
250
|
-
const tagEnd = findTagEnd(xml, cursor);
|
|
251
|
-
const tagBody = xml.slice(cursor + 1, tagEnd);
|
|
252
|
-
const selfClosing = /\/\s*$/.test(tagBody);
|
|
253
|
-
const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
|
|
254
|
-
const el: XmlElementNode = { type: "element", name, attributes, children: [] };
|
|
255
|
-
stack[stack.length - 1]?.children.push(el);
|
|
256
|
-
if (!selfClosing) stack.push(el);
|
|
257
|
-
cursor = tagEnd + 1;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
return root;
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
function findTagEnd(xml: string, start: number): number {
|
|
264
|
-
let cursor = start + 1;
|
|
265
|
-
let quote: string | null = null;
|
|
266
|
-
while (cursor < xml.length) {
|
|
267
|
-
const ch = xml[cursor];
|
|
268
|
-
if (quote) {
|
|
269
|
-
if (ch === quote) quote = null;
|
|
270
|
-
} else if (ch === `"` || ch === `'`) {
|
|
271
|
-
quote = ch;
|
|
272
|
-
} else if (ch === ">") {
|
|
273
|
-
return cursor;
|
|
274
|
-
}
|
|
275
|
-
cursor++;
|
|
276
|
-
}
|
|
277
|
-
return xml.length - 1;
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
function parseTag(body: string): { name: string; attributes: Record<string, string> } {
|
|
281
|
-
let i = 0;
|
|
282
|
-
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
283
|
-
const nameStart = i;
|
|
284
|
-
while (i < body.length && !/\s/.test(body[i] ?? "")) i++;
|
|
285
|
-
const name = body.slice(nameStart, i);
|
|
286
|
-
const attributes: Record<string, string> = {};
|
|
287
|
-
while (i < body.length) {
|
|
288
|
-
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
289
|
-
if (i >= body.length) break;
|
|
290
|
-
const kStart = i;
|
|
291
|
-
while (i < body.length && !/[\s=]/.test(body[i] ?? "")) i++;
|
|
292
|
-
const key = body.slice(kStart, i);
|
|
293
|
-
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
294
|
-
if (body[i] !== "=") { attributes[key] = ""; continue; }
|
|
295
|
-
i++;
|
|
296
|
-
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
297
|
-
const q = body[i];
|
|
298
|
-
if (q !== `"` && q !== `'`) throw new Error(`Bad attr ${key}`);
|
|
299
|
-
i++;
|
|
300
|
-
const vStart = i;
|
|
301
|
-
while (i < body.length && body[i] !== q) i++;
|
|
302
|
-
attributes[key] = decodeEntities(body.slice(vStart, i));
|
|
303
|
-
i++;
|
|
304
|
-
}
|
|
305
|
-
return { name, attributes };
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
function decodeEntities(s: string): string {
|
|
309
|
-
return s.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (_, e) => {
|
|
310
|
-
if (e === "amp") return "&";
|
|
311
|
-
if (e === "lt") return "<";
|
|
312
|
-
if (e === "gt") return ">";
|
|
313
|
-
if (e === "quot") return `"`;
|
|
314
|
-
if (e === "apos") return "'";
|
|
315
|
-
if (e.startsWith("#x")) return String.fromCodePoint(parseInt(e.slice(2), 16));
|
|
316
|
-
if (e.startsWith("#")) return String.fromCodePoint(parseInt(e.slice(1), 10));
|
|
317
|
-
return `&${e};`;
|
|
318
|
-
});
|
|
319
|
-
}
|
|
222
|
+
// Phase 6 — XML parser helpers imported from ./_mini-xml.ts (previously
|
|
223
|
+
// duplicated inline across four files). See that module for B4 throw-on-
|
|
224
|
+
// unterminated-tag contract and entity-decoding implementation.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import type { LegacyFormFieldNode } from "../../model/canonical-document.ts";
|
|
2
|
+
import type { XmlElementNode } from "./xml-element.ts";
|
|
3
|
+
import {
|
|
4
|
+
findChildOptional,
|
|
5
|
+
localName,
|
|
6
|
+
readIntAttr,
|
|
7
|
+
readOnOff,
|
|
8
|
+
readStringAttr,
|
|
9
|
+
} from "./xml-attr-helpers.ts";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Parse a <w:ffData> child of a <w:fldChar w:fldCharType="begin"> element.
|
|
13
|
+
*
|
|
14
|
+
* Returns undefined if no <w:ffData> child is present (most fields don't have one).
|
|
15
|
+
* Only textInput, checkBox, and ddList kinds are modelled; rawXml preserves
|
|
16
|
+
* everything verbatim for round-trip fidelity.
|
|
17
|
+
*/
|
|
18
|
+
export function parseFFDataFromFldChar(
|
|
19
|
+
fldCharEl: XmlElementNode,
|
|
20
|
+
sourceXml: string,
|
|
21
|
+
): LegacyFormFieldNode | undefined {
|
|
22
|
+
const ffDataEl = findChildOptional(fldCharEl, "ffData");
|
|
23
|
+
if (!ffDataEl) return undefined;
|
|
24
|
+
|
|
25
|
+
const rawXml = sourceXml.slice(ffDataEl.start, ffDataEl.end);
|
|
26
|
+
|
|
27
|
+
const nameEl = findChildOptional(ffDataEl, "name");
|
|
28
|
+
const name = nameEl ? readStringAttr(nameEl, "w:val") : undefined;
|
|
29
|
+
const enabledEl = findChildOptional(ffDataEl, "enabled");
|
|
30
|
+
const enabled = enabledEl !== undefined ? readOnOff(enabledEl) : undefined;
|
|
31
|
+
const calcOnExitEl = findChildOptional(ffDataEl, "calcOnExit");
|
|
32
|
+
const calcOnExit = calcOnExitEl !== undefined ? readOnOff(calcOnExitEl) : undefined;
|
|
33
|
+
|
|
34
|
+
const base: Pick<LegacyFormFieldNode, "name" | "enabled" | "calcOnExit"> = {
|
|
35
|
+
...(name !== undefined ? { name } : {}),
|
|
36
|
+
...(enabled !== undefined ? { enabled } : {}),
|
|
37
|
+
...(calcOnExit !== undefined ? { calcOnExit } : {}),
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const textInputEl = findChildOptional(ffDataEl, "textInput");
|
|
41
|
+
if (textInputEl) {
|
|
42
|
+
const defaultEl = findChildOptional(textInputEl, "default");
|
|
43
|
+
const maxLengthEl = findChildOptional(textInputEl, "maxLength");
|
|
44
|
+
const formatEl = findChildOptional(textInputEl, "format");
|
|
45
|
+
return {
|
|
46
|
+
kind: "textInput",
|
|
47
|
+
...base,
|
|
48
|
+
textInput: {
|
|
49
|
+
...(defaultEl !== undefined ? { default: readStringAttr(defaultEl, "w:val") } : {}),
|
|
50
|
+
...(maxLengthEl !== undefined ? { maxLength: readIntAttr(maxLengthEl, "w:val") } : {}),
|
|
51
|
+
...(formatEl !== undefined ? { format: readStringAttr(formatEl, "w:val") } : {}),
|
|
52
|
+
},
|
|
53
|
+
rawXml,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const checkBoxEl = findChildOptional(ffDataEl, "checkBox");
|
|
58
|
+
if (checkBoxEl) {
|
|
59
|
+
const sizeEl = findChildOptional(checkBoxEl, "size");
|
|
60
|
+
const defaultEl = findChildOptional(checkBoxEl, "default");
|
|
61
|
+
const checkedEl = findChildOptional(checkBoxEl, "checked");
|
|
62
|
+
return {
|
|
63
|
+
kind: "checkBox",
|
|
64
|
+
...base,
|
|
65
|
+
checkBox: {
|
|
66
|
+
...(sizeEl !== undefined ? { size: readIntAttr(sizeEl, "w:val") } : {}),
|
|
67
|
+
...(defaultEl !== undefined ? { default: readOnOff(defaultEl) ?? false } : {}),
|
|
68
|
+
...(checkedEl !== undefined ? { checked: readOnOff(checkedEl) } : {}),
|
|
69
|
+
},
|
|
70
|
+
rawXml,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const ddListEl = findChildOptional(ffDataEl, "ddList");
|
|
75
|
+
if (ddListEl) {
|
|
76
|
+
const defaultEl = findChildOptional(ddListEl, "default");
|
|
77
|
+
const listEntries = ddListEl.children
|
|
78
|
+
.filter((c): c is XmlElementNode => c.type === "element" && localName(c.name) === "listEntry")
|
|
79
|
+
.map((c) => readStringAttr(c, "w:val") ?? "");
|
|
80
|
+
return {
|
|
81
|
+
kind: "ddList",
|
|
82
|
+
...base,
|
|
83
|
+
ddList: {
|
|
84
|
+
...(defaultEl !== undefined ? { default: readIntAttr(defaultEl, "w:val") } : {}),
|
|
85
|
+
listEntry: listEntries,
|
|
86
|
+
},
|
|
87
|
+
rawXml,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Unknown or empty ffData — preserve via rawXml passthrough
|
|
92
|
+
return { kind: "textInput", ...base, rawXml };
|
|
93
|
+
}
|
|
@@ -53,6 +53,9 @@ export interface ParsedBookmarkEndNode {
|
|
|
53
53
|
|
|
54
54
|
export type ParsedBookmarkNode = ParsedBookmarkStartNode | ParsedBookmarkEndNode;
|
|
55
55
|
|
|
56
|
+
import { parseXmlWithOffsets } from "./xml-parser.ts";
|
|
57
|
+
import { localName } from "./xml-attr-helpers.ts";
|
|
58
|
+
|
|
56
59
|
// ─── Internal XML types ───────────────────────────────────────────────────────
|
|
57
60
|
|
|
58
61
|
interface XmlElementNode {
|
|
@@ -86,7 +89,7 @@ export function parseFieldsFromParagraphXml(paragraphXml: string): {
|
|
|
86
89
|
simpleFields: ParsedSimpleFieldNode[];
|
|
87
90
|
bookmarks: ParsedBookmarkNode[];
|
|
88
91
|
} {
|
|
89
|
-
const root =
|
|
92
|
+
const root = parseXmlWithOffsets(paragraphXml) as XmlElementNode;
|
|
90
93
|
const pEl = findFirstChild(root, "p");
|
|
91
94
|
const target = pEl ?? root;
|
|
92
95
|
|
|
@@ -120,7 +123,7 @@ export function parseFieldsFromParagraphXml(paragraphXml: string): {
|
|
|
120
123
|
* @param bodyXml Raw XML string of a <w:body> element (or full document).
|
|
121
124
|
*/
|
|
122
125
|
export function extractComplexFieldsFromBodyXml(bodyXml: string): ParsedComplexFieldNode[] {
|
|
123
|
-
const root =
|
|
126
|
+
const root = parseXmlWithOffsets(bodyXml) as XmlElementNode;
|
|
124
127
|
const results: ParsedComplexFieldNode[] = [];
|
|
125
128
|
|
|
126
129
|
// Walk all <w:p> children of the body
|
|
@@ -140,7 +143,7 @@ export function extractComplexFieldsFromBodyXml(bodyXml: string): ParsedComplexF
|
|
|
140
143
|
* Parse all bookmark start/end nodes from a full document or body XML.
|
|
141
144
|
*/
|
|
142
145
|
export function extractBookmarksFromBodyXml(bodyXml: string): ParsedBookmarkNode[] {
|
|
143
|
-
const root =
|
|
146
|
+
const root = parseXmlWithOffsets(bodyXml) as XmlElementNode;
|
|
144
147
|
const results: ParsedBookmarkNode[] = [];
|
|
145
148
|
collectBookmarks(root, bodyXml, results);
|
|
146
149
|
return results;
|
|
@@ -174,7 +177,7 @@ export interface FieldGroup {
|
|
|
174
177
|
* Fail-closed: a begin with no matching end is silently dropped.
|
|
175
178
|
*/
|
|
176
179
|
export function streamWalkFieldGroups(bodyXml: string): FieldGroup[] {
|
|
177
|
-
const root =
|
|
180
|
+
const root = parseXmlWithOffsets(bodyXml) as XmlElementNode;
|
|
178
181
|
const bodyEl =
|
|
179
182
|
findFirstChild(root, "body") ??
|
|
180
183
|
findFirstChild(root, "document") ??
|
|
@@ -836,145 +839,3 @@ function findFirstChildEl(node: XmlElementNode, childLocalName: string): XmlElem
|
|
|
836
839
|
);
|
|
837
840
|
}
|
|
838
841
|
|
|
839
|
-
function localName(name: string): string {
|
|
840
|
-
const sep = name.indexOf(":");
|
|
841
|
-
return sep >= 0 ? name.slice(sep + 1) : name;
|
|
842
|
-
}
|
|
843
|
-
|
|
844
|
-
// ─── Minimal XML parser (same pattern as parse-tables.ts) ────────────────────
|
|
845
|
-
|
|
846
|
-
function parseXml(xml: string): XmlElementNode {
|
|
847
|
-
const root: XmlElementNode = {
|
|
848
|
-
type: "element",
|
|
849
|
-
name: "__root__",
|
|
850
|
-
attributes: {},
|
|
851
|
-
children: [],
|
|
852
|
-
start: 0,
|
|
853
|
-
end: xml.length,
|
|
854
|
-
};
|
|
855
|
-
const stack: XmlElementNode[] = [root];
|
|
856
|
-
let cursor = 0;
|
|
857
|
-
|
|
858
|
-
while (cursor < xml.length) {
|
|
859
|
-
if (xml.startsWith("<!--", cursor)) {
|
|
860
|
-
const end = xml.indexOf("-->", cursor);
|
|
861
|
-
cursor = end >= 0 ? end + 3 : xml.length;
|
|
862
|
-
continue;
|
|
863
|
-
}
|
|
864
|
-
if (xml.startsWith("<?", cursor)) {
|
|
865
|
-
const end = xml.indexOf("?>", cursor);
|
|
866
|
-
cursor = end >= 0 ? end + 2 : xml.length;
|
|
867
|
-
continue;
|
|
868
|
-
}
|
|
869
|
-
if (xml.startsWith("<![CDATA[", cursor)) {
|
|
870
|
-
const end = xml.indexOf("]]>", cursor);
|
|
871
|
-
const textEnd = end >= 0 ? end : xml.length;
|
|
872
|
-
stack[stack.length - 1]?.children.push({
|
|
873
|
-
type: "text",
|
|
874
|
-
text: xml.slice(cursor + 9, textEnd),
|
|
875
|
-
start: cursor,
|
|
876
|
-
end: end >= 0 ? end + 3 : xml.length,
|
|
877
|
-
});
|
|
878
|
-
cursor = end >= 0 ? end + 3 : xml.length;
|
|
879
|
-
continue;
|
|
880
|
-
}
|
|
881
|
-
if (xml[cursor] !== "<") {
|
|
882
|
-
const nextTag = xml.indexOf("<", cursor);
|
|
883
|
-
const end = nextTag >= 0 ? nextTag : xml.length;
|
|
884
|
-
const text = decodeXmlEntities(xml.slice(cursor, end));
|
|
885
|
-
if (text.length > 0) {
|
|
886
|
-
stack[stack.length - 1]?.children.push({ type: "text", text, start: cursor, end });
|
|
887
|
-
}
|
|
888
|
-
cursor = end;
|
|
889
|
-
continue;
|
|
890
|
-
}
|
|
891
|
-
if (xml[cursor + 1] === "/") {
|
|
892
|
-
const end = xml.indexOf(">", cursor);
|
|
893
|
-
if (end < 0) throw new Error("Malformed XML: missing >.");
|
|
894
|
-
const name = xml.slice(cursor + 2, end).trim();
|
|
895
|
-
const current = stack.pop();
|
|
896
|
-
if (!current || localName(current.name) !== localName(name)) {
|
|
897
|
-
throw new Error(`Malformed XML: unexpected closing tag </${name}>.`);
|
|
898
|
-
}
|
|
899
|
-
current.end = end + 1;
|
|
900
|
-
cursor = end + 1;
|
|
901
|
-
continue;
|
|
902
|
-
}
|
|
903
|
-
const tagEnd = findTagEnd(xml, cursor);
|
|
904
|
-
const tagBody = xml.slice(cursor + 1, tagEnd);
|
|
905
|
-
const selfClosing = /\/\s*$/.test(tagBody);
|
|
906
|
-
const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
|
|
907
|
-
const element: XmlElementNode = {
|
|
908
|
-
type: "element",
|
|
909
|
-
name,
|
|
910
|
-
attributes,
|
|
911
|
-
children: [],
|
|
912
|
-
start: cursor,
|
|
913
|
-
end: tagEnd + 1,
|
|
914
|
-
};
|
|
915
|
-
stack[stack.length - 1]?.children.push(element);
|
|
916
|
-
if (!selfClosing) stack.push(element);
|
|
917
|
-
cursor = tagEnd + 1;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
|
-
if (stack.length !== 1) throw new Error("Malformed XML: unclosed element.");
|
|
921
|
-
return root;
|
|
922
|
-
}
|
|
923
|
-
|
|
924
|
-
function parseTag(tagBody: string): { name: string; attributes: Record<string, string> } {
|
|
925
|
-
let cursor = 0;
|
|
926
|
-
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
|
|
927
|
-
const nameStart = cursor;
|
|
928
|
-
while (cursor < tagBody.length && !/\s/.test(tagBody[cursor] ?? "")) cursor += 1;
|
|
929
|
-
const name = tagBody.slice(nameStart, cursor);
|
|
930
|
-
const attributes: Record<string, string> = {};
|
|
931
|
-
|
|
932
|
-
while (cursor < tagBody.length) {
|
|
933
|
-
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
|
|
934
|
-
if (cursor >= tagBody.length) break;
|
|
935
|
-
const keyStart = cursor;
|
|
936
|
-
while (cursor < tagBody.length && !/[\s=]/.test(tagBody[cursor] ?? "")) cursor += 1;
|
|
937
|
-
const key = tagBody.slice(keyStart, cursor);
|
|
938
|
-
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
|
|
939
|
-
if (tagBody[cursor] !== "=") { attributes[key] = ""; continue; }
|
|
940
|
-
cursor += 1;
|
|
941
|
-
while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
|
|
942
|
-
const quote = tagBody[cursor];
|
|
943
|
-
if (quote !== `"` && quote !== `'`) throw new Error(`Malformed XML attribute ${key}.`);
|
|
944
|
-
cursor += 1;
|
|
945
|
-
const valueStart = cursor;
|
|
946
|
-
while (cursor < tagBody.length && tagBody[cursor] !== quote) cursor += 1;
|
|
947
|
-
attributes[key] = decodeXmlEntities(tagBody.slice(valueStart, cursor));
|
|
948
|
-
cursor += 1;
|
|
949
|
-
}
|
|
950
|
-
return { name, attributes };
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
function findTagEnd(xml: string, start: number): number {
|
|
954
|
-
let cursor = start + 1;
|
|
955
|
-
let quote: string | null = null;
|
|
956
|
-
while (cursor < xml.length) {
|
|
957
|
-
const c = xml[cursor];
|
|
958
|
-
if (quote) { if (c === quote) quote = null; cursor += 1; continue; }
|
|
959
|
-
if (c === `"` || c === `'`) { quote = c; cursor += 1; continue; }
|
|
960
|
-
if (c === ">") return cursor;
|
|
961
|
-
cursor += 1;
|
|
962
|
-
}
|
|
963
|
-
throw new Error("Malformed XML: missing >.");
|
|
964
|
-
}
|
|
965
|
-
|
|
966
|
-
function decodeXmlEntities(value: string): string {
|
|
967
|
-
return value.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (match, entity) => {
|
|
968
|
-
switch (entity) {
|
|
969
|
-
case "amp": return "&";
|
|
970
|
-
case "lt": return "<";
|
|
971
|
-
case "gt": return ">";
|
|
972
|
-
case "quot": return `"`;
|
|
973
|
-
case "apos": return `'`;
|
|
974
|
-
default:
|
|
975
|
-
if (entity.startsWith("#x")) return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
|
|
976
|
-
if (entity.startsWith("#")) return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
|
|
977
|
-
return match;
|
|
978
|
-
}
|
|
979
|
-
});
|
|
980
|
-
}
|