@beyondwork/docx-react-component 1.0.56 → 1.0.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api/public-types.ts +157 -0
- package/src/compare/diff-engine.ts +3 -0
- package/src/core/commands/formatting-commands.ts +1 -0
- package/src/core/commands/index.ts +17 -11
- package/src/core/selection/mapping.ts +18 -1
- package/src/core/selection/review-anchors.ts +29 -18
- package/src/io/chart-preview-resolver.ts +175 -41
- package/src/io/docx-session.ts +57 -2
- package/src/io/export/serialize-main-document.ts +82 -0
- package/src/io/export/serialize-styles.ts +61 -3
- package/src/io/export/table-properties-xml.ts +19 -4
- package/src/io/normalize/normalize-text.ts +33 -0
- package/src/io/ooxml/parse-anchor.ts +182 -0
- package/src/io/ooxml/parse-drawing.ts +319 -0
- package/src/io/ooxml/parse-fields.ts +115 -2
- package/src/io/ooxml/parse-fill.ts +215 -0
- package/src/io/ooxml/parse-font-table.ts +190 -0
- package/src/io/ooxml/parse-footnotes.ts +52 -1
- package/src/io/ooxml/parse-main-document.ts +241 -1
- package/src/io/ooxml/parse-numbering.ts +96 -0
- package/src/io/ooxml/parse-picture.ts +107 -0
- package/src/io/ooxml/parse-settings.ts +34 -0
- package/src/io/ooxml/parse-shapes.ts +87 -0
- package/src/io/ooxml/parse-solid-fill.ts +11 -0
- package/src/io/ooxml/parse-styles.ts +74 -1
- package/src/io/ooxml/parse-theme.ts +60 -0
- package/src/io/paste/html-clipboard.ts +449 -0
- package/src/io/paste/word-clipboard.ts +5 -1
- package/src/legal/_document-root.ts +26 -0
- package/src/legal/bookmarks.ts +4 -3
- package/src/legal/cross-references.ts +3 -2
- package/src/legal/defined-terms.ts +2 -1
- package/src/legal/signature-blocks.ts +2 -1
- package/src/model/canonical-document.ts +415 -3
- package/src/runtime/chart/chart-model-store.ts +73 -10
- package/src/runtime/document-runtime.ts +693 -41
- package/src/runtime/edit-ops/index.ts +129 -0
- package/src/runtime/event-refresh-hints.ts +7 -0
- package/src/runtime/field-resolver.ts +341 -0
- package/src/runtime/footnote-resolver.ts +55 -0
- package/src/runtime/hyperlink-color-resolver.ts +13 -10
- package/src/runtime/object-grab/index.ts +51 -0
- package/src/runtime/paragraph-style-resolver.ts +105 -0
- package/src/runtime/resolved-numbering-geometry.ts +12 -0
- package/src/runtime/selection/cursor-ops.ts +186 -15
- package/src/runtime/selection/index.ts +17 -1
- package/src/runtime/structure-ops/index.ts +77 -0
- package/src/runtime/styles-cascade.ts +33 -0
- package/src/runtime/surface-projection.ts +186 -12
- package/src/runtime/theme-color-resolver.ts +189 -44
- package/src/runtime/units.ts +46 -0
- package/src/runtime/view-state.ts +13 -2
- package/src/ui/WordReviewEditor.tsx +168 -10
- package/src/ui/editor-runtime-boundary.ts +94 -1
- package/src/ui/editor-shell-view.tsx +1 -1
- package/src/ui/runtime-shortcut-dispatch.ts +17 -3
- package/src/ui-tailwind/chart/ChartSurface.tsx +36 -10
- package/src/ui-tailwind/chart/layout/plot-area.ts +120 -45
- package/src/ui-tailwind/chart/render/area.tsx +22 -4
- package/src/ui-tailwind/chart/render/bar-column.tsx +37 -11
- package/src/ui-tailwind/chart/render/bubble.tsx +6 -2
- package/src/ui-tailwind/chart/render/combo.tsx +37 -4
- package/src/ui-tailwind/chart/render/line.tsx +28 -5
- package/src/ui-tailwind/chart/render/pie.tsx +36 -16
- package/src/ui-tailwind/chart/render/progressive-render.ts +8 -1
- package/src/ui-tailwind/chart/render/scatter.tsx +9 -4
- package/src/ui-tailwind/chrome/avatar-initials.ts +15 -0
- package/src/ui-tailwind/chrome/tw-comment-preview.tsx +3 -1
- package/src/ui-tailwind/chrome/tw-context-menu.tsx +14 -0
- package/src/ui-tailwind/chrome/tw-selection-tool-host.tsx +3 -2
- package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +30 -11
- package/src/ui-tailwind/chrome/tw-shortcut-hint.tsx +15 -2
- package/src/ui-tailwind/chrome/tw-suggestion-card.tsx +1 -1
- package/src/ui-tailwind/chrome/tw-table-context-toolbar.tsx +24 -7
- package/src/ui-tailwind/chrome/tw-table-grip-layer.tsx +31 -12
- package/src/ui-tailwind/chrome-overlay/page-border-resolver.ts +211 -0
- package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +1 -0
- package/src/ui-tailwind/chrome-overlay/tw-comment-balloon-layer.tsx +74 -0
- package/src/ui-tailwind/chrome-overlay/tw-locked-block-layer.tsx +65 -0
- package/src/ui-tailwind/chrome-overlay/tw-page-border-overlay.tsx +233 -0
- package/src/ui-tailwind/chrome-overlay/tw-page-stack-overlay-layer.tsx +135 -13
- package/src/ui-tailwind/chrome-overlay/tw-revision-margin-bar-layer.tsx +51 -0
- package/src/ui-tailwind/chrome-overlay/tw-scope-card-layer.tsx +12 -4
- package/src/ui-tailwind/chrome-overlay/tw-scope-card.tsx +32 -12
- package/src/ui-tailwind/chrome-overlay/tw-toc-outline-sidebar.tsx +133 -0
- package/src/ui-tailwind/editor-surface/chart-node-view.tsx +49 -10
- package/src/ui-tailwind/editor-surface/float-wrap-resolver.ts +119 -0
- package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +236 -9
- package/src/ui-tailwind/editor-surface/pm-schema.ts +188 -11
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +28 -2
- package/src/ui-tailwind/editor-surface/shape-renderer.ts +206 -0
- package/src/ui-tailwind/editor-surface/surface-layer.ts +66 -0
- package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +29 -0
- package/src/ui-tailwind/editor-surface/tw-segment-view.tsx +7 -1
- package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +22 -6
- package/src/ui-tailwind/review/tw-comment-sidebar.tsx +10 -16
- package/src/ui-tailwind/review/tw-health-panel.tsx +0 -25
- package/src/ui-tailwind/review/tw-rail-card.tsx +38 -17
- package/src/ui-tailwind/review/tw-review-rail.tsx +2 -2
- package/src/ui-tailwind/review/tw-revision-sidebar.tsx +5 -12
- package/src/ui-tailwind/review/tw-workflow-tab.tsx +2 -2
- package/src/ui-tailwind/theme/editor-theme.css +1 -0
- package/src/ui-tailwind/theme/tokens.css +6 -0
- package/src/ui-tailwind/theme/tokens.ts +10 -0
- package/src/validation/compatibility-engine.ts +2 -0
- package/src/validation/docx-comment-proof.ts +12 -3
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import type { AnchorGeometry } from "../../model/canonical-document.ts";
|
|
2
|
+
|
|
3
|
+
interface XmlElementNode {
|
|
4
|
+
type: "element";
|
|
5
|
+
name: string;
|
|
6
|
+
attributes: Record<string, string>;
|
|
7
|
+
children: XmlNode[];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface XmlTextNode {
|
|
11
|
+
type: "text";
|
|
12
|
+
text: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
type XmlNode = XmlElementNode | XmlTextNode;
|
|
16
|
+
|
|
17
|
+
export function parseAnchorGeometry(container: XmlElementNode): AnchorGeometry {
|
|
18
|
+
const tag = localName(container.name);
|
|
19
|
+
const display: "inline" | "floating" = tag === "anchor" ? "floating" : "inline";
|
|
20
|
+
|
|
21
|
+
const extent = readExtent(container);
|
|
22
|
+
|
|
23
|
+
const wrapMode = readWrapMode(container);
|
|
24
|
+
|
|
25
|
+
const positionHEl = findFirstChild(container, "positionH");
|
|
26
|
+
const positionVEl = findFirstChild(container, "positionV");
|
|
27
|
+
|
|
28
|
+
const effectExtentEl = findFirstChild(container, "effectExtent");
|
|
29
|
+
const distMargins = effectExtentEl
|
|
30
|
+
? {
|
|
31
|
+
top: readIntAttr(effectExtentEl, "t") ?? 0,
|
|
32
|
+
bottom: readIntAttr(effectExtentEl, "b") ?? 0,
|
|
33
|
+
left: readIntAttr(effectExtentEl, "l") ?? 0,
|
|
34
|
+
right: readIntAttr(effectExtentEl, "r") ?? 0,
|
|
35
|
+
}
|
|
36
|
+
: undefined;
|
|
37
|
+
|
|
38
|
+
const relativeHeight =
|
|
39
|
+
display === "floating" ? readIntAttr(container, "relativeHeight") : undefined;
|
|
40
|
+
|
|
41
|
+
const behindDoc =
|
|
42
|
+
display === "floating" ? readBoolAttr(container, "behindDoc") : undefined;
|
|
43
|
+
const layoutInCell =
|
|
44
|
+
display === "floating" ? readBoolAttr(container, "layoutInCell") : undefined;
|
|
45
|
+
const allowOverlap =
|
|
46
|
+
display === "floating" ? readBoolAttr(container, "allowOverlap") : undefined;
|
|
47
|
+
const simplePos =
|
|
48
|
+
display === "floating" ? readBoolAttr(container, "simplePos") : undefined;
|
|
49
|
+
|
|
50
|
+
const docPrEl = findFirstChild(container, "docPr");
|
|
51
|
+
const docPr = docPrEl
|
|
52
|
+
? {
|
|
53
|
+
id: docPrEl.attributes.id ?? "",
|
|
54
|
+
...(docPrEl.attributes.name ? { name: docPrEl.attributes.name } : {}),
|
|
55
|
+
...(docPrEl.attributes.descr ? { descr: docPrEl.attributes.descr } : {}),
|
|
56
|
+
}
|
|
57
|
+
: undefined;
|
|
58
|
+
|
|
59
|
+
const geometry: AnchorGeometry = {
|
|
60
|
+
display,
|
|
61
|
+
extent,
|
|
62
|
+
wrapMode,
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
if (positionHEl) geometry.positionH = readAxisPosition(positionHEl);
|
|
66
|
+
if (positionVEl) geometry.positionV = readAxisPosition(positionVEl);
|
|
67
|
+
if (distMargins) geometry.distMargins = distMargins;
|
|
68
|
+
if (relativeHeight !== undefined) geometry.relativeHeight = relativeHeight;
|
|
69
|
+
if (behindDoc !== undefined) geometry.behindDoc = behindDoc;
|
|
70
|
+
if (layoutInCell !== undefined) geometry.layoutInCell = layoutInCell;
|
|
71
|
+
if (allowOverlap !== undefined) geometry.allowOverlap = allowOverlap;
|
|
72
|
+
if (simplePos !== undefined) geometry.simplePos = simplePos;
|
|
73
|
+
if (docPr) geometry.docPr = docPr;
|
|
74
|
+
|
|
75
|
+
return geometry;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function readExtent(container: XmlElementNode): AnchorGeometry["extent"] {
|
|
79
|
+
const extentEl = findFirstChild(container, "extent");
|
|
80
|
+
if (extentEl) {
|
|
81
|
+
return {
|
|
82
|
+
widthEmu: readIntAttr(extentEl, "cx") ?? 0,
|
|
83
|
+
heightEmu: readIntAttr(extentEl, "cy") ?? 0,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const transform = findFirstDescendant(container, "xfrm");
|
|
88
|
+
const shapeExtent = transform ? findFirstDescendant(transform, "ext") : undefined;
|
|
89
|
+
return {
|
|
90
|
+
widthEmu: shapeExtent ? readIntAttr(shapeExtent, "cx") ?? 0 : 0,
|
|
91
|
+
heightEmu: shapeExtent ? readIntAttr(shapeExtent, "cy") ?? 0 : 0,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function readWrapMode(
|
|
96
|
+
container: XmlElementNode,
|
|
97
|
+
): AnchorGeometry["wrapMode"] {
|
|
98
|
+
for (const child of container.children) {
|
|
99
|
+
if (child.type !== "element") continue;
|
|
100
|
+
const name = localName(child.name);
|
|
101
|
+
if (name === "wrapNone") return "none";
|
|
102
|
+
if (name === "wrapSquare") return "square";
|
|
103
|
+
if (name === "wrapTight") return "tight";
|
|
104
|
+
if (name === "wrapThrough") return "through";
|
|
105
|
+
if (name === "wrapTopAndBottom") return "topAndBottom";
|
|
106
|
+
}
|
|
107
|
+
return "none";
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function readAxisPosition(
|
|
111
|
+
el: XmlElementNode,
|
|
112
|
+
): { relativeFrom: string; align?: string; offset?: number } {
|
|
113
|
+
const relativeFrom =
|
|
114
|
+
el.attributes.relativeFrom ?? el.attributes["wp:relativeFrom"] ?? "";
|
|
115
|
+
const alignEl = findFirstChild(el, "align");
|
|
116
|
+
const posOffsetEl = findFirstChild(el, "posOffset");
|
|
117
|
+
|
|
118
|
+
const result: { relativeFrom: string; align?: string; offset?: number } = {
|
|
119
|
+
relativeFrom,
|
|
120
|
+
};
|
|
121
|
+
if (alignEl) {
|
|
122
|
+
const text = extractText(alignEl).trim();
|
|
123
|
+
if (text) result.align = text;
|
|
124
|
+
}
|
|
125
|
+
if (posOffsetEl) {
|
|
126
|
+
const val = parseInt(extractText(posOffsetEl).trim(), 10);
|
|
127
|
+
if (Number.isFinite(val)) result.offset = val;
|
|
128
|
+
}
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function findFirstChild(
|
|
133
|
+
node: XmlElementNode,
|
|
134
|
+
local: string,
|
|
135
|
+
): XmlElementNode | undefined {
|
|
136
|
+
for (const child of node.children) {
|
|
137
|
+
if (child.type === "element" && localName(child.name) === local) return child;
|
|
138
|
+
}
|
|
139
|
+
return undefined;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function findFirstDescendant(
|
|
143
|
+
node: XmlElementNode,
|
|
144
|
+
local: string,
|
|
145
|
+
): XmlElementNode | undefined {
|
|
146
|
+
for (const child of node.children) {
|
|
147
|
+
if (child.type !== "element") continue;
|
|
148
|
+
if (localName(child.name) === local) return child;
|
|
149
|
+
const found = findFirstDescendant(child, local);
|
|
150
|
+
if (found) return found;
|
|
151
|
+
}
|
|
152
|
+
return undefined;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function localName(name: string): string {
|
|
156
|
+
const i = name.indexOf(":");
|
|
157
|
+
return i >= 0 ? name.slice(i + 1) : name;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function extractText(node: XmlElementNode): string {
|
|
161
|
+
return node.children
|
|
162
|
+
.map((c) => (c.type === "text" ? c.text : extractText(c as XmlElementNode)))
|
|
163
|
+
.join("");
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function readIntAttr(node: XmlElementNode, name: string): number | undefined {
|
|
167
|
+
const v = node.attributes[name] ?? node.attributes[`wp:${name}`];
|
|
168
|
+
if (v === undefined) return undefined;
|
|
169
|
+
const n = parseInt(v, 10);
|
|
170
|
+
return Number.isFinite(n) ? n : undefined;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function readBoolAttr(node: XmlElementNode, name: string): boolean | undefined {
|
|
174
|
+
const v =
|
|
175
|
+
node.attributes[name] ??
|
|
176
|
+
node.attributes[`wp:${name}`] ??
|
|
177
|
+
node.attributes[`w:${name}`];
|
|
178
|
+
if (v === undefined) return undefined;
|
|
179
|
+
return v !== "0" && v !== "false";
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export { type XmlElementNode as AnchorXmlElement };
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
import type { OpcRelationship } from "./part-manifest.ts";
|
|
2
|
+
import { normalizePartPath, resolveRelationshipTarget } from "./part-manifest.ts";
|
|
3
|
+
import type { InlineMediaPart } from "./parse-inline-media.ts";
|
|
4
|
+
import type { ChartPartLookup } from "./parse-complex-content.ts";
|
|
5
|
+
import type { DrawingFrameNode, AnchorGeometry } from "../../model/canonical-document.ts";
|
|
6
|
+
import { parseAnchorGeometry } from "./parse-anchor.ts";
|
|
7
|
+
import { parsePicture, type PictureXmlElement } from "./parse-picture.ts";
|
|
8
|
+
import { parseShapeContent, type TxbxBlockParser } from "./parse-shapes.ts";
|
|
9
|
+
|
|
10
|
+
const PICTURE_GRAPHIC_URI =
|
|
11
|
+
"http://schemas.openxmlformats.org/drawingml/2006/picture";
|
|
12
|
+
const CHART_GRAPHIC_URI =
|
|
13
|
+
"http://schemas.openxmlformats.org/drawingml/2006/chart";
|
|
14
|
+
const CHART_GRAPHIC_URI_ALT =
|
|
15
|
+
"http://schemas.microsoft.com/office/drawing/2007/8/2/chart";
|
|
16
|
+
const SMARTART_GRAPHIC_URI =
|
|
17
|
+
"http://schemas.microsoft.com/office/drawing/2007/8/2/diagram";
|
|
18
|
+
const SMARTART_GRAPHIC_URI_ALT =
|
|
19
|
+
"http://schemas.openxmlformats.org/drawingml/2006/diagram";
|
|
20
|
+
const WPS_SHAPE_GRAPHIC_URI =
|
|
21
|
+
"http://schemas.microsoft.com/office/word/2010/wordprocessingShape";
|
|
22
|
+
|
|
23
|
+
export interface ParseDrawingOpts {
|
|
24
|
+
relationships: readonly OpcRelationship[];
|
|
25
|
+
mediaParts?: ReadonlyMap<string, InlineMediaPart>;
|
|
26
|
+
sourcePartPath?: string;
|
|
27
|
+
chartPartLookup?: ChartPartLookup;
|
|
28
|
+
/**
|
|
29
|
+
* CO4 F3.3 — optional recursive parser for `w:txbxContent`. When supplied,
|
|
30
|
+
* parseDrawingFrame forwards it to parseShapeContent so shape text-boxes
|
|
31
|
+
* get fully-parsed block structure on `ShapeContent.txbxBlocks`. Without it
|
|
32
|
+
* only `txbxContentXml` raw preservation is populated.
|
|
33
|
+
*/
|
|
34
|
+
blockParser?: TxbxBlockParser;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function parseDrawingFrame(
|
|
38
|
+
drawingXml: string,
|
|
39
|
+
opts: ParseDrawingOpts,
|
|
40
|
+
): DrawingFrameNode | null {
|
|
41
|
+
const root = parseXml(drawingXml);
|
|
42
|
+
|
|
43
|
+
// F3.1: unwrap mc:AlternateContent to its Choice branch (preferred) or Fallback.
|
|
44
|
+
// Real-world Word output wraps nearly every w:drawing in AlternateContent. The
|
|
45
|
+
// branch we pick determines which anchor/graphicData we descend into.
|
|
46
|
+
const searchRoot = pickAlternateContentBranch(root);
|
|
47
|
+
|
|
48
|
+
// Find wp:anchor or wp:inline in the chosen branch
|
|
49
|
+
const anchor = findFirstDescendant(searchRoot, "anchor");
|
|
50
|
+
const inline = findFirstDescendant(searchRoot, "inline");
|
|
51
|
+
const container = anchor ?? inline;
|
|
52
|
+
if (!container) return null;
|
|
53
|
+
|
|
54
|
+
const geometry: AnchorGeometry = parseAnchorGeometry(container as never);
|
|
55
|
+
|
|
56
|
+
// Locate a:graphicData to determine content type
|
|
57
|
+
const graphicData = findFirstDescendant(searchRoot, "graphicData");
|
|
58
|
+
const uri = graphicData?.attributes.uri ?? "";
|
|
59
|
+
|
|
60
|
+
// F3.5: if we don't recognise the graphicData URI, return null and let the
|
|
61
|
+
// legacy parse chain (parseComplexContentXml / parseShapeXml / parseVmlXml /
|
|
62
|
+
// parseInlineMediaXml) handle it. The new DrawingFrameNode path only
|
|
63
|
+
// short-circuits for known URIs.
|
|
64
|
+
if (!isKnownUri(uri)) return null;
|
|
65
|
+
// WordArt remains on the dedicated legacy `wordart` node path. Only
|
|
66
|
+
// non-WordArt WPS drawings are promoted into DrawingFrame shape content.
|
|
67
|
+
if (uri === WPS_SHAPE_GRAPHIC_URI && isWordArtGraphicData(graphicData)) return null;
|
|
68
|
+
|
|
69
|
+
const content = resolveContent(uri, graphicData, drawingXml, opts);
|
|
70
|
+
|
|
71
|
+
return { type: "drawing_frame", anchor: geometry, content };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function isKnownUri(uri: string): boolean {
|
|
75
|
+
return (
|
|
76
|
+
uri === PICTURE_GRAPHIC_URI ||
|
|
77
|
+
uri === CHART_GRAPHIC_URI ||
|
|
78
|
+
uri === CHART_GRAPHIC_URI_ALT ||
|
|
79
|
+
uri === SMARTART_GRAPHIC_URI ||
|
|
80
|
+
uri === SMARTART_GRAPHIC_URI_ALT ||
|
|
81
|
+
uri === WPS_SHAPE_GRAPHIC_URI
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function isWordArtGraphicData(graphicData: XmlElementNode | undefined): boolean {
|
|
86
|
+
if (!graphicData) return false;
|
|
87
|
+
const wsp = findFirstDescendant(graphicData, "wsp");
|
|
88
|
+
const spPr = wsp ? findFirstChild(wsp, "spPr") : undefined;
|
|
89
|
+
const prstGeom = spPr ? findFirstChild(spPr, "prstGeom") : undefined;
|
|
90
|
+
const geometry = prstGeom?.attributes.prst ?? "";
|
|
91
|
+
return /^text/i.test(geometry);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* If the drawing is wrapped in mc:AlternateContent, return the chosen branch
|
|
96
|
+
* subtree. Prefer Choice (richer content); use Fallback only when Choice
|
|
97
|
+
* contains no graphicData we can parse.
|
|
98
|
+
*/
|
|
99
|
+
function pickAlternateContentBranch(root: XmlElementNode): XmlElementNode {
|
|
100
|
+
const alt = findFirstDescendant(root, "AlternateContent");
|
|
101
|
+
if (!alt) return root;
|
|
102
|
+
|
|
103
|
+
const choice = findFirstChild(alt, "Choice");
|
|
104
|
+
const fallback = findFirstChild(alt, "Fallback");
|
|
105
|
+
|
|
106
|
+
if (choice) {
|
|
107
|
+
const choiceGraphicData = findFirstDescendant(choice, "graphicData");
|
|
108
|
+
const choiceUri = choiceGraphicData?.attributes.uri ?? "";
|
|
109
|
+
if (isKnownUri(choiceUri)) return choice;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (fallback) {
|
|
113
|
+
const fallbackGraphicData = findFirstDescendant(fallback, "graphicData");
|
|
114
|
+
const fallbackUri = fallbackGraphicData?.attributes.uri ?? "";
|
|
115
|
+
if (isKnownUri(fallbackUri)) return fallback;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Neither branch has a known URI — return Choice (if any) so caller can emit
|
|
119
|
+
// opaque preservation; otherwise the original root.
|
|
120
|
+
return choice ?? fallback ?? root;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function resolveContent(
|
|
124
|
+
uri: string,
|
|
125
|
+
graphicData: XmlElementNode | undefined,
|
|
126
|
+
rawXml: string,
|
|
127
|
+
opts: ParseDrawingOpts,
|
|
128
|
+
): DrawingFrameNode["content"] {
|
|
129
|
+
if (uri === PICTURE_GRAPHIC_URI) {
|
|
130
|
+
if (graphicData) {
|
|
131
|
+
const pic = parsePicture(graphicData as PictureXmlElement);
|
|
132
|
+
if (pic) {
|
|
133
|
+
// Resolve mediaId from the relationship map
|
|
134
|
+
const relMap = new Map(opts.relationships.map((r) => [r.id, r]));
|
|
135
|
+
const rel = relMap.get(pic.blipRef);
|
|
136
|
+
if (rel?.type.endsWith("/image")) {
|
|
137
|
+
const partPath = normalizePartPath(
|
|
138
|
+
resolveRelationshipTarget(opts.sourcePartPath ?? "/word/document.xml", rel),
|
|
139
|
+
);
|
|
140
|
+
pic.packagePartName = partPath;
|
|
141
|
+
pic.mediaId = `media:${partPath.slice(1)}`;
|
|
142
|
+
}
|
|
143
|
+
// F4.1 — preserve outer drawing XML for lossless round-trip serialization
|
|
144
|
+
pic.rawXml = rawXml;
|
|
145
|
+
return pic;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return { type: "opaque", rawXml };
|
|
149
|
+
}
|
|
150
|
+
if (uri === CHART_GRAPHIC_URI || uri === CHART_GRAPHIC_URI_ALT) {
|
|
151
|
+
return { type: "chart_preview", rawXml };
|
|
152
|
+
}
|
|
153
|
+
if (uri === SMARTART_GRAPHIC_URI || uri === SMARTART_GRAPHIC_URI_ALT) {
|
|
154
|
+
return { type: "smartart_preview", rawXml };
|
|
155
|
+
}
|
|
156
|
+
if (uri === WPS_SHAPE_GRAPHIC_URI) {
|
|
157
|
+
if (graphicData) {
|
|
158
|
+
const shape = parseShapeContent(
|
|
159
|
+
graphicData as PictureXmlElement,
|
|
160
|
+
rawXml,
|
|
161
|
+
opts.blockParser,
|
|
162
|
+
);
|
|
163
|
+
if (shape) return shape;
|
|
164
|
+
}
|
|
165
|
+
return { type: "opaque", rawXml };
|
|
166
|
+
}
|
|
167
|
+
void graphicData;
|
|
168
|
+
return { type: "opaque", rawXml };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ── Minimal self-contained XML parser ──────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
interface XmlElementNode {
|
|
174
|
+
type: "element";
|
|
175
|
+
name: string;
|
|
176
|
+
attributes: Record<string, string>;
|
|
177
|
+
children: XmlNode[];
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
interface XmlTextNode {
|
|
181
|
+
type: "text";
|
|
182
|
+
text: string;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
type XmlNode = XmlElementNode | XmlTextNode;
|
|
186
|
+
|
|
187
|
+
function findFirstChild(
|
|
188
|
+
node: XmlElementNode,
|
|
189
|
+
local: string,
|
|
190
|
+
): XmlElementNode | undefined {
|
|
191
|
+
for (const child of node.children) {
|
|
192
|
+
if (child.type === "element" && localName(child.name) === local) return child;
|
|
193
|
+
}
|
|
194
|
+
return undefined;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function findFirstDescendant(
|
|
198
|
+
node: XmlElementNode,
|
|
199
|
+
local: string,
|
|
200
|
+
): XmlElementNode | undefined {
|
|
201
|
+
for (const child of node.children) {
|
|
202
|
+
if (child.type !== "element") continue;
|
|
203
|
+
if (localName(child.name) === local) return child;
|
|
204
|
+
const found = findFirstDescendant(child, local);
|
|
205
|
+
if (found) return found;
|
|
206
|
+
}
|
|
207
|
+
return undefined;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function localName(name: string): string {
|
|
211
|
+
const i = name.indexOf(":");
|
|
212
|
+
return i >= 0 ? name.slice(i + 1) : name;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function parseXml(xml: string): XmlElementNode {
|
|
216
|
+
const root: XmlElementNode = {
|
|
217
|
+
type: "element",
|
|
218
|
+
name: "__root__",
|
|
219
|
+
attributes: {},
|
|
220
|
+
children: [],
|
|
221
|
+
};
|
|
222
|
+
const stack: XmlElementNode[] = [root];
|
|
223
|
+
let cursor = 0;
|
|
224
|
+
|
|
225
|
+
while (cursor < xml.length) {
|
|
226
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
227
|
+
const end = xml.indexOf("-->", cursor);
|
|
228
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
if (xml.startsWith("<?", cursor)) {
|
|
232
|
+
const end = xml.indexOf("?>", cursor);
|
|
233
|
+
cursor = end >= 0 ? end + 2 : xml.length;
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
if (xml[cursor] !== "<") {
|
|
237
|
+
const nextTag = xml.indexOf("<", cursor);
|
|
238
|
+
const end = nextTag >= 0 ? nextTag : xml.length;
|
|
239
|
+
const text = decodeEntities(xml.slice(cursor, end));
|
|
240
|
+
if (text) stack[stack.length - 1]?.children.push({ type: "text", text });
|
|
241
|
+
cursor = end;
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
if (xml[cursor + 1] === "/") {
|
|
245
|
+
const end = xml.indexOf(">", cursor);
|
|
246
|
+
stack.pop();
|
|
247
|
+
cursor = end + 1;
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
const tagEnd = findTagEnd(xml, cursor);
|
|
251
|
+
const tagBody = xml.slice(cursor + 1, tagEnd);
|
|
252
|
+
const selfClosing = /\/\s*$/.test(tagBody);
|
|
253
|
+
const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
|
|
254
|
+
const el: XmlElementNode = { type: "element", name, attributes, children: [] };
|
|
255
|
+
stack[stack.length - 1]?.children.push(el);
|
|
256
|
+
if (!selfClosing) stack.push(el);
|
|
257
|
+
cursor = tagEnd + 1;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return root;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function findTagEnd(xml: string, start: number): number {
|
|
264
|
+
let cursor = start + 1;
|
|
265
|
+
let quote: string | null = null;
|
|
266
|
+
while (cursor < xml.length) {
|
|
267
|
+
const ch = xml[cursor];
|
|
268
|
+
if (quote) {
|
|
269
|
+
if (ch === quote) quote = null;
|
|
270
|
+
} else if (ch === `"` || ch === `'`) {
|
|
271
|
+
quote = ch;
|
|
272
|
+
} else if (ch === ">") {
|
|
273
|
+
return cursor;
|
|
274
|
+
}
|
|
275
|
+
cursor++;
|
|
276
|
+
}
|
|
277
|
+
return xml.length - 1;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function parseTag(body: string): { name: string; attributes: Record<string, string> } {
|
|
281
|
+
let i = 0;
|
|
282
|
+
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
283
|
+
const nameStart = i;
|
|
284
|
+
while (i < body.length && !/\s/.test(body[i] ?? "")) i++;
|
|
285
|
+
const name = body.slice(nameStart, i);
|
|
286
|
+
const attributes: Record<string, string> = {};
|
|
287
|
+
while (i < body.length) {
|
|
288
|
+
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
289
|
+
if (i >= body.length) break;
|
|
290
|
+
const kStart = i;
|
|
291
|
+
while (i < body.length && !/[\s=]/.test(body[i] ?? "")) i++;
|
|
292
|
+
const key = body.slice(kStart, i);
|
|
293
|
+
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
294
|
+
if (body[i] !== "=") { attributes[key] = ""; continue; }
|
|
295
|
+
i++;
|
|
296
|
+
while (i < body.length && /\s/.test(body[i] ?? "")) i++;
|
|
297
|
+
const q = body[i];
|
|
298
|
+
if (q !== `"` && q !== `'`) throw new Error(`Bad attr ${key}`);
|
|
299
|
+
i++;
|
|
300
|
+
const vStart = i;
|
|
301
|
+
while (i < body.length && body[i] !== q) i++;
|
|
302
|
+
attributes[key] = decodeEntities(body.slice(vStart, i));
|
|
303
|
+
i++;
|
|
304
|
+
}
|
|
305
|
+
return { name, attributes };
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function decodeEntities(s: string): string {
|
|
309
|
+
return s.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (_, e) => {
|
|
310
|
+
if (e === "amp") return "&";
|
|
311
|
+
if (e === "lt") return "<";
|
|
312
|
+
if (e === "gt") return ">";
|
|
313
|
+
if (e === "quot") return `"`;
|
|
314
|
+
if (e === "apos") return "'";
|
|
315
|
+
if (e.startsWith("#x")) return String.fromCodePoint(parseInt(e.slice(2), 16));
|
|
316
|
+
if (e.startsWith("#")) return String.fromCodePoint(parseInt(e.slice(1), 10));
|
|
317
|
+
return `&${e};`;
|
|
318
|
+
});
|
|
319
|
+
}
|
|
@@ -146,6 +146,117 @@ export function extractBookmarksFromBodyXml(bodyXml: string): ParsedBookmarkNode
|
|
|
146
146
|
return results;
|
|
147
147
|
}
|
|
148
148
|
|
|
149
|
+
// ─── FieldGroup stream walker ─────────────────────────────────────────────────
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* A fully-parsed fldChar triad: begin → instrText* → separate → display → end.
|
|
153
|
+
* Unlike `ParsedComplexFieldNode`, this captures cross-paragraph triads by
|
|
154
|
+
* walking the body-level run list flattened across all block elements.
|
|
155
|
+
*
|
|
156
|
+
* Fail-closed: an incomplete triad (begin with no end) is silently discarded.
|
|
157
|
+
*/
|
|
158
|
+
export interface FieldGroup {
|
|
159
|
+
/** Joined instrText content (trimmed). */
|
|
160
|
+
instruction: string;
|
|
161
|
+
/** Raw XML of all runs between the separate and end fldChar runs. */
|
|
162
|
+
displayContent: string;
|
|
163
|
+
/** Byte offset in sourceXml where the begin run starts. */
|
|
164
|
+
start: number;
|
|
165
|
+
/** Byte offset (exclusive) where the end run ends. */
|
|
166
|
+
end: number;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Stream-walk all block elements in a body XML string and emit one
|
|
171
|
+
* `FieldGroup` per complete fldChar triad. Handles cross-paragraph
|
|
172
|
+
* triads (e.g., TOC fields that span multiple <w:p> siblings).
|
|
173
|
+
*
|
|
174
|
+
* Fail-closed: a begin with no matching end is silently dropped.
|
|
175
|
+
*/
|
|
176
|
+
export function streamWalkFieldGroups(bodyXml: string): FieldGroup[] {
|
|
177
|
+
const root = parseXml(bodyXml);
|
|
178
|
+
const bodyEl =
|
|
179
|
+
findFirstChild(root, "body") ??
|
|
180
|
+
findFirstChild(root, "document") ??
|
|
181
|
+
root;
|
|
182
|
+
|
|
183
|
+
const allRuns: XmlElementNode[] = [];
|
|
184
|
+
collectRunsDeep(bodyEl, allRuns);
|
|
185
|
+
|
|
186
|
+
return extractFieldGroupsFromRuns(allRuns, bodyXml);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function collectRunsDeep(node: XmlElementNode, out: XmlElementNode[]): void {
|
|
190
|
+
for (const child of node.children) {
|
|
191
|
+
if (child.type !== "element") continue;
|
|
192
|
+
if (localName(child.name) === "r") {
|
|
193
|
+
out.push(child);
|
|
194
|
+
} else {
|
|
195
|
+
collectRunsDeep(child, out);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function extractFieldGroupsFromRuns(
|
|
201
|
+
runs: XmlElementNode[],
|
|
202
|
+
sourceXml: string,
|
|
203
|
+
): FieldGroup[] {
|
|
204
|
+
type State = "idle" | "in-instr" | "in-content";
|
|
205
|
+
let state: State = "idle";
|
|
206
|
+
let instrParts: string[] = [];
|
|
207
|
+
let fieldStart = -1;
|
|
208
|
+
let contentStart = -1;
|
|
209
|
+
const results: FieldGroup[] = [];
|
|
210
|
+
|
|
211
|
+
for (const run of runs) {
|
|
212
|
+
const fldChar = findFirstChildEl(run, "fldChar");
|
|
213
|
+
const instrText = findFirstChildEl(run, "instrText");
|
|
214
|
+
|
|
215
|
+
if (fldChar) {
|
|
216
|
+
const charType = (
|
|
217
|
+
fldChar.attributes["w:fldCharType"] ??
|
|
218
|
+
fldChar.attributes.fldCharType ??
|
|
219
|
+
""
|
|
220
|
+
).toLowerCase();
|
|
221
|
+
|
|
222
|
+
if (charType === "begin") {
|
|
223
|
+
state = "in-instr";
|
|
224
|
+
instrParts = [];
|
|
225
|
+
fieldStart = run.start;
|
|
226
|
+
contentStart = -1;
|
|
227
|
+
} else if (charType === "separate" && state === "in-instr") {
|
|
228
|
+
state = "in-content";
|
|
229
|
+
contentStart = run.end;
|
|
230
|
+
} else if (charType === "end") {
|
|
231
|
+
if (state === "in-content" || state === "in-instr") {
|
|
232
|
+
const displayContent =
|
|
233
|
+
contentStart >= 0 && contentStart <= run.start
|
|
234
|
+
? sourceXml.slice(contentStart, run.start)
|
|
235
|
+
: "";
|
|
236
|
+
results.push({
|
|
237
|
+
instruction: instrParts.join("").trim(),
|
|
238
|
+
displayContent,
|
|
239
|
+
start: fieldStart,
|
|
240
|
+
end: run.end,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
state = "idle";
|
|
244
|
+
instrParts = [];
|
|
245
|
+
fieldStart = -1;
|
|
246
|
+
contentStart = -1;
|
|
247
|
+
}
|
|
248
|
+
} else if (instrText && state === "in-instr") {
|
|
249
|
+
const text = instrText.children
|
|
250
|
+
.filter((c): c is XmlTextNode => c.type === "text")
|
|
251
|
+
.map((c) => c.text)
|
|
252
|
+
.join("");
|
|
253
|
+
instrParts.push(text);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return results;
|
|
258
|
+
}
|
|
259
|
+
|
|
149
260
|
// ─── Element-level parsers (exported for unit testing) ────────────────────────
|
|
150
261
|
|
|
151
262
|
export function parseFldSimple(
|
|
@@ -309,7 +420,7 @@ export function buildFieldRegistry(
|
|
|
309
420
|
paragraphIndex = pIdx;
|
|
310
421
|
if (node.type === "field") {
|
|
311
422
|
const classification = node.fieldFamily
|
|
312
|
-
? { family: node.fieldFamily, supported: isSupportedFieldFamily(node.fieldFamily), target: node.fieldTarget }
|
|
423
|
+
? { family: node.fieldFamily, supported: isSupportedFieldFamily(node.fieldFamily), target: node.fieldTarget, switches: node.switches }
|
|
313
424
|
: classifyFieldInstruction(node.instruction);
|
|
314
425
|
const displayText = flattenFieldText(node.children);
|
|
315
426
|
const entry: FieldRegistryEntry = {
|
|
@@ -321,6 +432,7 @@ export function buildFieldRegistry(
|
|
|
321
432
|
displayText,
|
|
322
433
|
paragraphIndex,
|
|
323
434
|
refreshStatus: node.refreshStatus ?? (classification.supported ? "stale" : "preserve-only"),
|
|
435
|
+
...(classification.switches ? { switches: classification.switches } : {}),
|
|
324
436
|
};
|
|
325
437
|
if (classification.supported) {
|
|
326
438
|
supported.push(entry);
|
|
@@ -338,7 +450,7 @@ export function buildFieldRegistry(
|
|
|
338
450
|
paragraphIndex = pIdx;
|
|
339
451
|
if (node.type === "field") {
|
|
340
452
|
const classification = node.fieldFamily
|
|
341
|
-
? { family: node.fieldFamily, supported: isSupportedFieldFamily(node.fieldFamily), target: node.fieldTarget }
|
|
453
|
+
? { family: node.fieldFamily, supported: isSupportedFieldFamily(node.fieldFamily), target: node.fieldTarget, switches: node.switches }
|
|
342
454
|
: classifyFieldInstruction(node.instruction);
|
|
343
455
|
const displayText = flattenFieldText(node.children);
|
|
344
456
|
const entry: FieldRegistryEntry = {
|
|
@@ -350,6 +462,7 @@ export function buildFieldRegistry(
|
|
|
350
462
|
displayText,
|
|
351
463
|
paragraphIndex,
|
|
352
464
|
refreshStatus: node.refreshStatus ?? (classification.supported ? "stale" : "preserve-only"),
|
|
465
|
+
...(classification.switches ? { switches: classification.switches } : {}),
|
|
353
466
|
};
|
|
354
467
|
if (classification.supported) {
|
|
355
468
|
supported.push(entry);
|