@beyondwork/docx-react-component 1.0.48 → 1.0.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -11
- package/package.json +30 -41
- package/src/api/public-types.ts +103 -12
- package/src/core/commands/index.ts +30 -1
- package/src/core/commands/text-commands.ts +3 -1
- package/src/core/selection/anchor-conversion.ts +112 -0
- package/src/core/selection/review-anchors.ts +108 -3
- package/src/core/state/text-transaction.ts +86 -2
- package/src/internal/harness-debug-ports.ts +168 -0
- package/src/io/chart-preview-resolver.ts +32 -1
- package/src/io/export/serialize-comments.ts +50 -5
- package/src/io/export/serialize-main-document.ts +9 -0
- package/src/io/export/serialize-paragraph-formatting.ts +8 -0
- package/src/io/export/serialize-run-formatting.ts +10 -1
- package/src/io/ooxml/chart/chart-style-table.ts +543 -0
- package/src/io/ooxml/chart/color-palette.ts +101 -0
- package/src/io/ooxml/chart/compose-series-color.ts +147 -0
- package/src/io/ooxml/chart/parse-chart-space.ts +118 -46
- package/src/io/ooxml/chart/parse-series.ts +76 -11
- package/src/io/ooxml/chart/resolve-color.ts +16 -6
- package/src/io/ooxml/chart/types.ts +30 -11
- package/src/io/ooxml/parse-complex-content.ts +6 -3
- package/src/io/ooxml/parse-main-document.ts +41 -0
- package/src/io/ooxml/parse-paragraph-formatting.ts +46 -0
- package/src/io/ooxml/parse-run-formatting.ts +49 -0
- package/src/io/ooxml/property-grab-bag.ts +211 -0
- package/src/io/paste/word-clipboard.ts +114 -0
- package/src/model/canonical-document.ts +69 -3
- package/src/runtime/collab/index.ts +7 -0
- package/src/runtime/collab/runtime-collab-sync.ts +51 -0
- package/src/runtime/collab/workflow-shared.ts +247 -0
- package/src/runtime/document-locations.ts +1 -9
- package/src/runtime/document-outline.ts +1 -9
- package/src/runtime/document-runtime.ts +98 -50
- package/src/runtime/hyperlink-color-resolver.ts +119 -0
- package/src/runtime/layout/layout-engine-version.ts +11 -1
- package/src/runtime/layout/public-facet.ts +5 -12
- package/src/runtime/render/render-frame-types.ts +14 -0
- package/src/runtime/render/render-kernel.ts +40 -2
- package/src/runtime/structure-ops/fragment-insert.ts +134 -0
- package/src/runtime/surface-projection.ts +94 -36
- package/src/runtime/theme-color-resolver.ts +188 -0
- package/src/runtime/workflow-markup.ts +7 -18
- package/src/ui/WordReviewEditor.tsx +22 -4
- package/src/ui/editor-runtime-boundary.ts +37 -0
- package/src/ui/headless/selection-helpers.ts +10 -23
- package/src/ui/unsupported-previews-policy.ts +23 -0
- package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +10 -0
- package/src/ui-tailwind/editor-surface/perf-probe.ts +1 -0
- package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +60 -5
- package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +47 -0
- package/src/ui-tailwind/page-stack/use-visible-block-range.ts +88 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +16 -1
|
@@ -108,8 +108,11 @@ export function parseComplexContentXml(
|
|
|
108
108
|
const uri = graphicData.attributes.uri ?? graphicData.attributes["uri"] ?? "";
|
|
109
109
|
if (isChartUri(uri)) {
|
|
110
110
|
const parsedData = maybeParseChart(root, chartPartLookup);
|
|
111
|
-
const node: ParsedChartContent = {
|
|
112
|
-
|
|
111
|
+
const node: ParsedChartContent = {
|
|
112
|
+
type: "chart_preview",
|
|
113
|
+
...(parsedData ? { parsedData } : {}),
|
|
114
|
+
rawXml: drawingXml,
|
|
115
|
+
};
|
|
113
116
|
return node;
|
|
114
117
|
}
|
|
115
118
|
if (isSmartArtUri(uri)) {
|
|
@@ -229,9 +232,9 @@ function parseAlternateContent(
|
|
|
229
232
|
...(previewMediaId ? { previewMediaId } : {}),
|
|
230
233
|
...(previewPackagePartName ? { previewPackagePartName } : {}),
|
|
231
234
|
...(previewContentType ? { previewContentType } : {}),
|
|
235
|
+
...(parsedData ? { parsedData } : {}),
|
|
232
236
|
rawXml: fullDrawingXml,
|
|
233
237
|
};
|
|
234
|
-
if (parsedData) node.parsedData = parsedData;
|
|
235
238
|
return node;
|
|
236
239
|
}
|
|
237
240
|
|
|
@@ -67,6 +67,37 @@ import {
|
|
|
67
67
|
readTableStyleId as readSharedTableStyleId,
|
|
68
68
|
readTableWidth as readSharedTableWidth,
|
|
69
69
|
} from "./parse-tables.ts";
|
|
70
|
+
import {
|
|
71
|
+
buildGrabBagSourceChildFromParsed,
|
|
72
|
+
capturePropertyGrabBag,
|
|
73
|
+
type PropertyGrabBagDescriptor,
|
|
74
|
+
} from "./property-grab-bag.ts";
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Modelled direct children of `<w:sectPr>` that `parseSectionPropertiesFromElement`
|
|
78
|
+
* below dispatches into typed fields on `SectionProperties`. Anything else
|
|
79
|
+
* becomes a grab-bag entry so a parse→serialize round-trip preserves
|
|
80
|
+
* extension-namespace section properties and Word-internal knobs the
|
|
81
|
+
* canonical model doesn't understand. Mirrors Slice 1/2 pPr/rPr coverage.
|
|
82
|
+
*/
|
|
83
|
+
const SECT_PR_MODELLED_CHILDREN: ReadonlySet<string> = new Set([
|
|
84
|
+
"cols",
|
|
85
|
+
"docGrid",
|
|
86
|
+
"footerReference",
|
|
87
|
+
"headerReference",
|
|
88
|
+
"lnNumType",
|
|
89
|
+
"pgBorders",
|
|
90
|
+
"pgMar",
|
|
91
|
+
"pgNumType",
|
|
92
|
+
"pgSz",
|
|
93
|
+
"titlePg",
|
|
94
|
+
"type",
|
|
95
|
+
]);
|
|
96
|
+
|
|
97
|
+
const SECT_PR_GRAB_BAG_DESCRIPTOR: PropertyGrabBagDescriptor = {
|
|
98
|
+
modelledChildNames: SECT_PR_MODELLED_CHILDREN,
|
|
99
|
+
modelledChildAttributes: new Map(),
|
|
100
|
+
};
|
|
70
101
|
|
|
71
102
|
export interface ParsedMainDocument {
|
|
72
103
|
blocks: ParsedBlockNode[];
|
|
@@ -3170,6 +3201,16 @@ export function parseSectionPropertiesFromElement(
|
|
|
3170
3201
|
}
|
|
3171
3202
|
}
|
|
3172
3203
|
|
|
3204
|
+
// Grab-bag capture for unmodelled <w:sectPr> children (O2 Slice 4).
|
|
3205
|
+
const sourceChildren = node.children
|
|
3206
|
+
.filter((child): child is XmlElementNode => child.type === "element")
|
|
3207
|
+
.map((child) => buildGrabBagSourceChildFromParsed(child));
|
|
3208
|
+
const unknown = capturePropertyGrabBag(
|
|
3209
|
+
sourceChildren,
|
|
3210
|
+
SECT_PR_GRAB_BAG_DESCRIPTOR,
|
|
3211
|
+
);
|
|
3212
|
+
if (unknown) props.unknownPropertyChildren = unknown;
|
|
3213
|
+
|
|
3173
3214
|
return props;
|
|
3174
3215
|
}
|
|
3175
3216
|
|
|
@@ -17,6 +17,42 @@ import type {
|
|
|
17
17
|
import { readRunProperties } from "./parse-run-formatting.ts";
|
|
18
18
|
import { findChildOptional, localName, readIntAttr, readIntVal, readOnOff } from "./xml-attr-helpers.ts";
|
|
19
19
|
import type { XmlElementNode } from "./xml-element.ts";
|
|
20
|
+
import {
|
|
21
|
+
buildGrabBagSourceChildFromParsed,
|
|
22
|
+
capturePropertyGrabBag,
|
|
23
|
+
type PropertyGrabBagDescriptor,
|
|
24
|
+
} from "./property-grab-bag.ts";
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Modelled direct children of `<w:pPr>` that `readParagraphProperties` below
|
|
28
|
+
* dispatches into typed fields on `CanonicalParagraphFormatting`. Anything
|
|
29
|
+
* else becomes a grab-bag entry so a parse→serialize round-trip preserves
|
|
30
|
+
* extension-namespace properties like `<w15:collapsed>` or Word-internal
|
|
31
|
+
* knobs like `<w:kinsoku>` that our canonical model doesn't understand.
|
|
32
|
+
*/
|
|
33
|
+
const PPR_MODELLED_CHILDREN: ReadonlySet<string> = new Set([
|
|
34
|
+
"spacing",
|
|
35
|
+
"ind",
|
|
36
|
+
"jc",
|
|
37
|
+
"pBdr",
|
|
38
|
+
"shd",
|
|
39
|
+
"tabs",
|
|
40
|
+
"keepNext",
|
|
41
|
+
"keepLines",
|
|
42
|
+
"widowControl",
|
|
43
|
+
"pageBreakBefore",
|
|
44
|
+
"contextualSpacing",
|
|
45
|
+
"bidi",
|
|
46
|
+
"suppressLineNumbers",
|
|
47
|
+
"suppressAutoHyphens",
|
|
48
|
+
"outlineLvl",
|
|
49
|
+
"rPr",
|
|
50
|
+
]);
|
|
51
|
+
|
|
52
|
+
const PPR_GRAB_BAG_DESCRIPTOR: PropertyGrabBagDescriptor = {
|
|
53
|
+
modelledChildNames: PPR_MODELLED_CHILDREN,
|
|
54
|
+
modelledChildAttributes: new Map(),
|
|
55
|
+
};
|
|
20
56
|
|
|
21
57
|
function readSpacing(node: XmlElementNode): ParagraphSpacing | undefined {
|
|
22
58
|
const out: ParagraphSpacing = {};
|
|
@@ -184,5 +220,15 @@ export function readParagraphProperties(
|
|
|
184
220
|
const markRpr = readRunProperties(rPrNode);
|
|
185
221
|
if (markRpr) out.paragraphMarkRunProperties = markRpr;
|
|
186
222
|
|
|
223
|
+
// Capture any unmodelled direct children of <w:pPr> so a parse→serialize
|
|
224
|
+
// round-trip does not silently drop extension-namespace properties
|
|
225
|
+
// (w15:collapsed, w16cex:..., w:kinsoku, etc.). See Lane 3 O2 plan +
|
|
226
|
+
// src/io/ooxml/property-grab-bag.ts for the pattern.
|
|
227
|
+
const sourceChildren = node.children
|
|
228
|
+
.filter((child): child is XmlElementNode => child.type === "element")
|
|
229
|
+
.map((child) => buildGrabBagSourceChildFromParsed(child));
|
|
230
|
+
const unknown = capturePropertyGrabBag(sourceChildren, PPR_GRAB_BAG_DESCRIPTOR);
|
|
231
|
+
if (unknown) out.unknownPropertyChildren = unknown;
|
|
232
|
+
|
|
187
233
|
return Object.keys(out).length > 0 ? out : undefined;
|
|
188
234
|
}
|
|
@@ -1,6 +1,42 @@
|
|
|
1
1
|
import type { CanonicalRunFormatting } from "../../model/canonical-document.ts";
|
|
2
2
|
import { findChildOptional, readIntVal, readOnOff, readStringAttr } from "./xml-attr-helpers.ts";
|
|
3
3
|
import type { XmlElementNode } from "./xml-element.ts";
|
|
4
|
+
import {
|
|
5
|
+
buildGrabBagSourceChildFromParsed,
|
|
6
|
+
capturePropertyGrabBag,
|
|
7
|
+
type PropertyGrabBagDescriptor,
|
|
8
|
+
} from "./property-grab-bag.ts";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Modelled direct children of `<w:rPr>` that `readRunProperties` dispatches
|
|
12
|
+
* into typed fields on `CanonicalRunFormatting`. Anything else goes through
|
|
13
|
+
* the grab-bag helper so a parse→serialize round-trip preserves extension-
|
|
14
|
+
* namespace properties (e.g. `<w14:textOutline>`, `<w:em>`, `<w:kern>`).
|
|
15
|
+
*/
|
|
16
|
+
const RPR_MODELLED_CHILDREN: ReadonlySet<string> = new Set([
|
|
17
|
+
"b",
|
|
18
|
+
"i",
|
|
19
|
+
"strike",
|
|
20
|
+
"dstrike",
|
|
21
|
+
"vanish",
|
|
22
|
+
"caps",
|
|
23
|
+
"smallCaps",
|
|
24
|
+
"u",
|
|
25
|
+
"vertAlign",
|
|
26
|
+
"rFonts",
|
|
27
|
+
"sz",
|
|
28
|
+
"szCs",
|
|
29
|
+
"color",
|
|
30
|
+
"highlight",
|
|
31
|
+
"spacing",
|
|
32
|
+
"rStyle",
|
|
33
|
+
"lang",
|
|
34
|
+
]);
|
|
35
|
+
|
|
36
|
+
const RPR_GRAB_BAG_DESCRIPTOR: PropertyGrabBagDescriptor = {
|
|
37
|
+
modelledChildNames: RPR_MODELLED_CHILDREN,
|
|
38
|
+
modelledChildAttributes: new Map(),
|
|
39
|
+
};
|
|
4
40
|
|
|
5
41
|
/**
|
|
6
42
|
* Read `<w:rPr>` (run properties) into a `CanonicalRunFormatting` value.
|
|
@@ -101,8 +137,14 @@ export function readRunProperties(
|
|
|
101
137
|
const val = color.attributes["w:val"] ?? color.attributes["val"];
|
|
102
138
|
const theme =
|
|
103
139
|
color.attributes["w:themeColor"] ?? color.attributes["themeColor"];
|
|
140
|
+
const tint =
|
|
141
|
+
color.attributes["w:themeTint"] ?? color.attributes["themeTint"];
|
|
142
|
+
const shade =
|
|
143
|
+
color.attributes["w:themeShade"] ?? color.attributes["themeShade"];
|
|
104
144
|
if (val) rPr.colorHex = val;
|
|
105
145
|
if (theme) rPr.colorThemeSlot = theme;
|
|
146
|
+
if (tint) rPr.colorThemeTint = tint;
|
|
147
|
+
if (shade) rPr.colorThemeShade = shade;
|
|
106
148
|
}
|
|
107
149
|
|
|
108
150
|
const highlight = findChildOptional(node, "highlight");
|
|
@@ -125,5 +167,12 @@ export function readRunProperties(
|
|
|
125
167
|
if (val) rPr.languageCode = val;
|
|
126
168
|
}
|
|
127
169
|
|
|
170
|
+
// Grab-bag capture: unmodelled <w:rPr> children survive round-trip.
|
|
171
|
+
const sourceChildren = node.children
|
|
172
|
+
.filter((child): child is XmlElementNode => child.type === "element")
|
|
173
|
+
.map((child) => buildGrabBagSourceChildFromParsed(child));
|
|
174
|
+
const unknown = capturePropertyGrabBag(sourceChildren, RPR_GRAB_BAG_DESCRIPTOR);
|
|
175
|
+
if (unknown) rPr.unknownPropertyChildren = unknown;
|
|
176
|
+
|
|
128
177
|
return Object.keys(rPr).length > 0 ? rPr : undefined;
|
|
129
178
|
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Property-level grab-bag primitive for Lane 3 O2.
|
|
3
|
+
*
|
|
4
|
+
* LibreOffice captures unmodelled children / attributes on every OOXML
|
|
5
|
+
* property container (`<w:pPr>`, `<w:rPr>`, `<w:tcPr>`, `<w:trPr>`,
|
|
6
|
+
* `<w:tblPr>`, `<w:sectPr>`) via per-container "grab bags" keyed by
|
|
7
|
+
* element name — see `PropertyMap.hxx:82` and
|
|
8
|
+
* `libreoffice-analysis.md` §2 for the mechanism. On export, every grab
|
|
9
|
+
* bag re-emits verbatim inside its container so the round-trip pipeline
|
|
10
|
+
* does not silently drop extension-namespace properties (`w15:collapsed`,
|
|
11
|
+
* `w16cex:...`, etc.) or attributes Word adds after we parsed its schema.
|
|
12
|
+
*
|
|
13
|
+
* This module is a small, framework-free adapter: per-container parsers
|
|
14
|
+
* supply a descriptor listing modelled child names (and later, modelled
|
|
15
|
+
* attributes on modelled children); the helper returns everything else as
|
|
16
|
+
* raw XML in insertion order. The matching emitter is a one-liner that
|
|
17
|
+
* just joins `rawXml` strings.
|
|
18
|
+
*
|
|
19
|
+
* Scope (O2 Slice 1): per-container child diff only — the unknown-attribute
|
|
20
|
+
* diff on modelled children is a follow-up slice. Today the descriptor's
|
|
21
|
+
* `modelledChildAttributes` is declared but ignored; the helper emits a
|
|
22
|
+
* whole-child entry only when the element's localName is NOT in
|
|
23
|
+
* `modelledChildNames`.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Input node shape accepted by `capturePropertyGrabBag`. Intentionally
|
|
28
|
+
* minimal so every caller can adapt their own scanner output — per-file
|
|
29
|
+
* parsers in `src/io/ooxml/` each carry a slightly different node shape.
|
|
30
|
+
*/
|
|
31
|
+
export interface GrabBagSourceChild {
|
|
32
|
+
/**
|
|
33
|
+
* Local element name (no namespace prefix). E.g. `"kinsoku"` for
|
|
34
|
+
* `<w:kinsoku>`, `"collapsed"` for `<w15:collapsed>`.
|
|
35
|
+
*/
|
|
36
|
+
localName: string;
|
|
37
|
+
/**
|
|
38
|
+
* The source XML for the entire child element, including its opening
|
|
39
|
+
* tag, all attributes, any children, and its closing tag (or the
|
|
40
|
+
* self-closing form). Preserved verbatim for re-emission.
|
|
41
|
+
*/
|
|
42
|
+
rawXml: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Interop helper for callers that only carry the parsed `XmlElementNode`
|
|
47
|
+
* shape used by `src/io/ooxml/xml-element.ts`. Reconstructs a best-effort
|
|
48
|
+
* `rawXml` string from the parsed tree so parsers that don't track source
|
|
49
|
+
* offsets can still feed the grab-bag helper.
|
|
50
|
+
*
|
|
51
|
+
* The reconstruction preserves element/attribute semantic content and
|
|
52
|
+
* attribute insertion order (since `Record<string, string>` iteration in
|
|
53
|
+
* V8 is insertion-ordered for string keys) but does NOT guarantee
|
|
54
|
+
* byte-identical source preservation: whitespace between elements and
|
|
55
|
+
* attribute quoting style are normalized. For the Slice 1 scope this is
|
|
56
|
+
* the correct trade-off — unmodelled children's semantic content
|
|
57
|
+
* survives, which closes the silent-drop gap.
|
|
58
|
+
*/
|
|
59
|
+
export function buildGrabBagSourceChildFromParsed(node: {
|
|
60
|
+
name: string;
|
|
61
|
+
attributes: Record<string, string>;
|
|
62
|
+
children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
|
|
63
|
+
}): GrabBagSourceChild {
|
|
64
|
+
return {
|
|
65
|
+
localName: localNameOf(node.name),
|
|
66
|
+
rawXml: serializeElementToString(node),
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function localNameOf(qualified: string): string {
|
|
71
|
+
const colon = qualified.indexOf(":");
|
|
72
|
+
return colon < 0 ? qualified : qualified.slice(colon + 1);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function escapeAttr(value: string): string {
|
|
76
|
+
return value
|
|
77
|
+
.replace(/&/gu, "&")
|
|
78
|
+
.replace(/</gu, "<")
|
|
79
|
+
.replace(/>/gu, ">")
|
|
80
|
+
.replace(/"/gu, """);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function escapeText(value: string): string {
|
|
84
|
+
return value
|
|
85
|
+
.replace(/&/gu, "&")
|
|
86
|
+
.replace(/</gu, "<")
|
|
87
|
+
.replace(/>/gu, ">");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function serializeElementToString(node: {
|
|
91
|
+
name: string;
|
|
92
|
+
attributes: Record<string, string>;
|
|
93
|
+
children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
|
|
94
|
+
}): string {
|
|
95
|
+
const attrs = Object.entries(node.attributes)
|
|
96
|
+
.map(([name, value]) => ` ${name}="${escapeAttr(value)}"`)
|
|
97
|
+
.join("");
|
|
98
|
+
if (node.children.length === 0) {
|
|
99
|
+
return `<${node.name}${attrs}/>`;
|
|
100
|
+
}
|
|
101
|
+
const body = node.children
|
|
102
|
+
.map((child) => {
|
|
103
|
+
if (child.type === "text") return escapeText(child.text);
|
|
104
|
+
return serializeElementToString(
|
|
105
|
+
child as {
|
|
106
|
+
name: string;
|
|
107
|
+
attributes: Record<string, string>;
|
|
108
|
+
children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
|
|
109
|
+
},
|
|
110
|
+
);
|
|
111
|
+
})
|
|
112
|
+
.join("");
|
|
113
|
+
return `<${node.name}${attrs}>${body}</${node.name}>`;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Descriptor a per-container parser supplies to the helper to declare
|
|
118
|
+
* which child element names it dispatches into its modelled fields.
|
|
119
|
+
* Children not listed here become grab-bag entries.
|
|
120
|
+
*/
|
|
121
|
+
export interface PropertyGrabBagDescriptor {
|
|
122
|
+
/**
|
|
123
|
+
* Set of local names the container parser handles natively. Children
|
|
124
|
+
* whose `localName` matches one of these are NOT captured.
|
|
125
|
+
*/
|
|
126
|
+
modelledChildNames: ReadonlySet<string>;
|
|
127
|
+
/**
|
|
128
|
+
* Reserved for the follow-up slice: per-modelled-child the set of
|
|
129
|
+
* attributes the parser consumes. The Slice 1 helper ignores this
|
|
130
|
+
* field; Slice 2 will use it to emit attribute-level grab entries on
|
|
131
|
+
* modelled children.
|
|
132
|
+
*
|
|
133
|
+
* Note: table containers (tblPr/trPr/tcPr — O2 Slice 3) currently use
|
|
134
|
+
* a parallel raw-XML mechanism in `src/io/export/table-properties-xml.ts`
|
|
135
|
+
* (`mergePropertiesXml`) that stores the full container XML as a string
|
|
136
|
+
* on `TableNode.propertiesXml`/`TableRowNode.propertiesXml`/
|
|
137
|
+
* `TableCellNode.propertiesXml`. That path cannot participate in the
|
|
138
|
+
* attribute-level grab-bag slice until it is retrofit to emit
|
|
139
|
+
* `UnknownPropertyChild[]` through this descriptor. Tracked as a Lane 3
|
|
140
|
+
* Tier-2 backlog entry — see `docs/plans/lane-3-layout-engine-ooxml-fidelity.md`.
|
|
141
|
+
*/
|
|
142
|
+
modelledChildAttributes: ReadonlyMap<string, ReadonlySet<string>>;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Single grab-bag entry: an unmodelled top-level child captured verbatim
|
|
147
|
+
* so the serializer can re-emit it inside its container without any
|
|
148
|
+
* round-trip loss.
|
|
149
|
+
*/
|
|
150
|
+
export interface UnknownPropertyChild {
|
|
151
|
+
/**
|
|
152
|
+
* Qualified element name as it appeared in the source (e.g.
|
|
153
|
+
* `"w:kinsoku"`, `"w15:collapsed"`). Used for diagnostics and for the
|
|
154
|
+
* future attribute-level diff so the emitter can re-open the matching
|
|
155
|
+
* element when needed.
|
|
156
|
+
*/
|
|
157
|
+
elementName: string;
|
|
158
|
+
/**
|
|
159
|
+
* Verbatim XML for the child element.
|
|
160
|
+
*/
|
|
161
|
+
rawXml: string;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Walk the container's direct children. Return every child whose
|
|
166
|
+
* `localName` is NOT in `descriptor.modelledChildNames` as a grab-bag
|
|
167
|
+
* entry in source order. Returns `undefined` when no unmodelled children
|
|
168
|
+
* were found — callers should prefer `undefined` over an empty array so
|
|
169
|
+
* the canonical model stays sparse.
|
|
170
|
+
*
|
|
171
|
+
* The helper does NOT inspect attributes or grandchildren. That
|
|
172
|
+
* refinement is reserved for the follow-up slice.
|
|
173
|
+
*/
|
|
174
|
+
export function capturePropertyGrabBag(
|
|
175
|
+
children: readonly GrabBagSourceChild[],
|
|
176
|
+
descriptor: PropertyGrabBagDescriptor,
|
|
177
|
+
): UnknownPropertyChild[] | undefined {
|
|
178
|
+
const bag: UnknownPropertyChild[] = [];
|
|
179
|
+
for (const child of children) {
|
|
180
|
+
if (descriptor.modelledChildNames.has(child.localName)) continue;
|
|
181
|
+
bag.push({
|
|
182
|
+
elementName: extractQualifiedNameFromRawXml(child.rawXml) ?? child.localName,
|
|
183
|
+
rawXml: child.rawXml,
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
return bag.length > 0 ? bag : undefined;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Emit a grab-bag list back into a property container. Just concatenates
|
|
191
|
+
* each entry's `rawXml` in insertion order — the source bytes survive
|
|
192
|
+
* verbatim including attribute order, whitespace inside the element, and
|
|
193
|
+
* namespace prefixes.
|
|
194
|
+
*/
|
|
195
|
+
export function emitPropertyGrabBag(
|
|
196
|
+
entries: readonly UnknownPropertyChild[] | undefined,
|
|
197
|
+
): string {
|
|
198
|
+
if (!entries || entries.length === 0) return "";
|
|
199
|
+
return entries.map((entry) => entry.rawXml).join("");
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Best-effort extraction of the qualified element name from an opening
|
|
204
|
+
* tag — e.g. `<w15:collapsed w:val="1"/>` → `"w15:collapsed"`. Falls back
|
|
205
|
+
* to the caller-supplied `localName` when the raw XML doesn't look like
|
|
206
|
+
* a valid element.
|
|
207
|
+
*/
|
|
208
|
+
function extractQualifiedNameFromRawXml(rawXml: string): string | undefined {
|
|
209
|
+
const match = rawXml.match(/^<([^\s/>]+)/u);
|
|
210
|
+
return match?.[1];
|
|
211
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* I2 Tier B Slice 2 — Office-clipboard / WordprocessingML paste parser.
|
|
3
|
+
*
|
|
4
|
+
* Adapts the authoritative `parseMainDocumentXml` → `normalizeParsedTextDocument`
|
|
5
|
+
* pipeline to a clipboard-paste payload. Inputs from the browser clipboard under
|
|
6
|
+
* the Office MIME types (`application/x-docx-fragment`,
|
|
7
|
+
* `application/vnd.ms-word.wordprocessingml.paste`) arrive either as:
|
|
8
|
+
*
|
|
9
|
+
* - a full `<w:document><w:body>…</w:body></w:document>` wrapper, or
|
|
10
|
+
* - a bare `<w:body>…</w:body>` fragment.
|
|
11
|
+
*
|
|
12
|
+
* This adapter auto-wraps the bare form, runs the full parse + normalize, and
|
|
13
|
+
* returns the resulting canonical `BlockNode`s as a `CanonicalDocumentFragment`.
|
|
14
|
+
* Errors (XML parse failure, missing body) are returned as a structured result
|
|
15
|
+
* instead of thrown, so `pm-command-bridge.ts` `handlePaste` can gracefully fall
|
|
16
|
+
* through to HTML or plain-text Tier A.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import type { CanonicalDocumentFragment } from "../../api/public-types.ts";
|
|
20
|
+
import type { DocumentRootNode } from "../../model/canonical-document.ts";
|
|
21
|
+
import { parseMainDocumentXml } from "../ooxml/parse-main-document.ts";
|
|
22
|
+
import { normalizeParsedTextDocument } from "../normalize/normalize-text.ts";
|
|
23
|
+
import { serializeMainDocument } from "../export/serialize-main-document.ts";
|
|
24
|
+
|
|
25
|
+
export type ParseCanonicalFragmentResult =
|
|
26
|
+
| { ok: true; fragment: CanonicalDocumentFragment }
|
|
27
|
+
| { ok: false; reason: string };
|
|
28
|
+
|
|
29
|
+
const WORD_NS = `xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"`;
|
|
30
|
+
|
|
31
|
+
export function parseCanonicalFragmentFromWordML(xml: string): ParseCanonicalFragmentResult {
|
|
32
|
+
if (typeof xml !== "string" || xml.length === 0) {
|
|
33
|
+
return { ok: false, reason: "empty WordML payload" };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const prepared = ensureDocumentShell(xml);
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const parsed = parseMainDocumentXml(prepared);
|
|
40
|
+
const normalized = normalizeParsedTextDocument(parsed);
|
|
41
|
+
return {
|
|
42
|
+
ok: true,
|
|
43
|
+
fragment: { blocks: normalized.content.children },
|
|
44
|
+
};
|
|
45
|
+
} catch (error) {
|
|
46
|
+
return {
|
|
47
|
+
ok: false,
|
|
48
|
+
reason: error instanceof Error ? error.message : "unknown WordML parse error",
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Normalize the incoming clipboard payload into a full `<w:document><w:body>…`
|
|
55
|
+
* shell. Handles three shapes:
|
|
56
|
+
*
|
|
57
|
+
* 1. Already-wrapped `<w:document>…</w:document>` → pass-through with the XML
|
|
58
|
+
* declaration normalized.
|
|
59
|
+
* 2. Bare `<w:body>…</w:body>` — wrap in `<w:document>…</w:document>`.
|
|
60
|
+
* 3. Any other fragment — wrap the whole input in `<w:document><w:body>…`.
|
|
61
|
+
*
|
|
62
|
+
* Namespace hygiene: if the input lacks the `xmlns:w` declaration on whatever
|
|
63
|
+
* outer element survives, it's added to the outer `<w:document>` wrapper.
|
|
64
|
+
*/
|
|
65
|
+
function ensureDocumentShell(xml: string): string {
|
|
66
|
+
const trimmed = xml.trim();
|
|
67
|
+
const withoutDecl = trimmed.replace(/^<\?xml[^?]*\?>/, "").trim();
|
|
68
|
+
|
|
69
|
+
const hasDocumentWrapper = /^<w:document[\s>]/i.test(withoutDecl);
|
|
70
|
+
if (hasDocumentWrapper) {
|
|
71
|
+
return ensureXmlDecl(trimmed);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const hasBodyWrapper = /^<w:body[\s>]/i.test(withoutDecl);
|
|
75
|
+
const inner = hasBodyWrapper ? withoutDecl : `<w:body>${withoutDecl}</w:body>`;
|
|
76
|
+
|
|
77
|
+
const wrapped = `<w:document ${WORD_NS}>${stripRedundantNs(inner)}</w:document>`;
|
|
78
|
+
return ensureXmlDecl(wrapped);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function ensureXmlDecl(xml: string): string {
|
|
82
|
+
if (/^\s*<\?xml/.test(xml)) return xml;
|
|
83
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>${xml}`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Strip `xmlns:w="…"` from the first child element so the outer
|
|
88
|
+
* `<w:document>` declaration is the single authoritative binding. The XML
|
|
89
|
+
* parser accepts both, but removing the duplicate keeps output clean.
|
|
90
|
+
*/
|
|
91
|
+
function stripRedundantNs(xml: string): string {
|
|
92
|
+
return xml.replace(/\s+xmlns:w="[^"]*"/, "");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* I2 Tier B Slice 4a — inverse of `parseCanonicalFragmentFromWordML`. Produces
|
|
97
|
+
* a full `<w:document><w:body>…</w:body></w:document>` payload suitable for
|
|
98
|
+
* writing to the system clipboard under the Office MIME types, or for exchange
|
|
99
|
+
* with agents that expect WordML.
|
|
100
|
+
*
|
|
101
|
+
* Implementation reuses the authoritative `serializeMainDocument` pipeline by
|
|
102
|
+
* wrapping `fragment.blocks` in a synthetic `DocumentRootNode`. The output is
|
|
103
|
+
* the full document XML — parsers (browsers, Word, our own Slice 2 adapter)
|
|
104
|
+
* accept the full envelope, so there's no need to strip it down to a bare
|
|
105
|
+
* `<w:body>` fragment.
|
|
106
|
+
*/
|
|
107
|
+
export function serializeFragmentToWordML(fragment: CanonicalDocumentFragment): string {
|
|
108
|
+
const root: DocumentRootNode = {
|
|
109
|
+
type: "doc",
|
|
110
|
+
children: fragment.blocks,
|
|
111
|
+
};
|
|
112
|
+
const serialized = serializeMainDocument(root);
|
|
113
|
+
return serialized.documentXml;
|
|
114
|
+
}
|
|
@@ -472,10 +472,34 @@ export interface CanonicalRunFormatting {
|
|
|
472
472
|
*/
|
|
473
473
|
colorHex?: string;
|
|
474
474
|
colorThemeSlot?: string;
|
|
475
|
+
/**
|
|
476
|
+
* `w:themeTint` hex byte (0x00–0xFF) read from `<w:color>`. ECMA-376
|
|
477
|
+
* §17.18.85. Applied at render/cascade time against `colorThemeSlot`:
|
|
478
|
+
* luminance mod = (1 - tint/255) * L + tint/255 (shifts hue toward
|
|
479
|
+
* white; 0xFF means no modulation). Preserved as the raw hex string
|
|
480
|
+
* for byte-stable round-trip; resolution math lives in the runtime
|
|
481
|
+
* theme-color resolver.
|
|
482
|
+
*/
|
|
483
|
+
colorThemeTint?: string;
|
|
484
|
+
/**
|
|
485
|
+
* `w:themeShade` hex byte (0x00–0xFF) from `<w:color>`. ECMA-376
|
|
486
|
+
* §17.18.83. Applied at render/cascade time: luminance mod =
|
|
487
|
+
* shade/255 * L (darkens toward black; 0xFF means no modulation).
|
|
488
|
+
*/
|
|
489
|
+
colorThemeShade?: string;
|
|
475
490
|
highlight?: string;
|
|
476
491
|
characterSpacingTwips?: number;
|
|
477
492
|
characterStyleId?: string;
|
|
478
493
|
languageCode?: string;
|
|
494
|
+
/**
|
|
495
|
+
* Unmodelled direct children of `<w:rPr>` captured verbatim for round-trip.
|
|
496
|
+
* See `src/io/ooxml/property-grab-bag.ts` and
|
|
497
|
+
* `CanonicalParagraphFormatting.unknownPropertyChildren` for the full
|
|
498
|
+
* pattern. Preserves extension-namespace properties like `<w14:textOutline>`,
|
|
499
|
+
* `<w:em>`, `<w:kern>` through parse→serialize round-trip even though the
|
|
500
|
+
* runtime does not model them.
|
|
501
|
+
*/
|
|
502
|
+
unknownPropertyChildren?: UnknownPropertyChild[];
|
|
479
503
|
}
|
|
480
504
|
|
|
481
505
|
/** Body of an OOXML `<w:pPr>` (paragraph properties). All fields optional; absence = "not specified at this level". */
|
|
@@ -496,6 +520,32 @@ export interface CanonicalParagraphFormatting {
|
|
|
496
520
|
suppressLineNumbers?: boolean;
|
|
497
521
|
suppressAutoHyphens?: boolean;
|
|
498
522
|
paragraphMarkRunProperties?: CanonicalRunFormatting;
|
|
523
|
+
/**
|
|
524
|
+
* Unmodelled direct children of `<w:pPr>` captured verbatim for round-trip.
|
|
525
|
+
* See `src/io/ooxml/property-grab-bag.ts` for the mechanism and Lane 3 O2
|
|
526
|
+
* plan for the LibreOffice `PropertyMap.hxx:82` precedent.
|
|
527
|
+
*
|
|
528
|
+
* Each entry carries the source XML for an unmodelled child element plus
|
|
529
|
+
* its qualified name. On export, entries are re-emitted after the
|
|
530
|
+
* modelled children so Word-extension properties like `<w15:collapsed>`
|
|
531
|
+
* or `<w:kinsoku>` survive a parse→serialize round-trip even though the
|
|
532
|
+
* runtime doesn't understand them.
|
|
533
|
+
*/
|
|
534
|
+
unknownPropertyChildren?: UnknownPropertyChild[];
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* A single unmodelled direct child of an OOXML property container (pPr,
|
|
539
|
+
* rPr, tcPr, trPr, tblPr, sectPr). Captured verbatim so the serializer
|
|
540
|
+
* re-emits source bytes without loss. See `src/io/ooxml/property-grab-bag.ts`
|
|
541
|
+
* for the helper that computes the diff against a per-container modelled-
|
|
542
|
+
* child allow-list.
|
|
543
|
+
*/
|
|
544
|
+
export interface UnknownPropertyChild {
|
|
545
|
+
/** Qualified element name as it appeared in source, e.g. "w15:collapsed". */
|
|
546
|
+
elementName: string;
|
|
547
|
+
/** Verbatim source XML for the child element, including closing/self-closing form. */
|
|
548
|
+
rawXml: string;
|
|
499
549
|
}
|
|
500
550
|
|
|
501
551
|
/** Body of an OOXML `<w:docDefaults>` — baseline formatting applied before style chain. */
|
|
@@ -896,6 +946,15 @@ export interface SectionProperties {
|
|
|
896
946
|
footerReferences?: HeaderFooterReference[];
|
|
897
947
|
sectionType?: "continuous" | "nextPage" | "evenPage" | "oddPage" | "nextColumn";
|
|
898
948
|
titlePage?: boolean;
|
|
949
|
+
/**
|
|
950
|
+
* Unmodelled direct children of `<w:sectPr>` captured verbatim for
|
|
951
|
+
* round-trip. Mirrors `CanonicalParagraphFormatting.unknownPropertyChildren`
|
|
952
|
+
* and `CanonicalRunFormatting.unknownPropertyChildren` (Lane 3 O2 Slices
|
|
953
|
+
* 1+2). Preserves extension-namespace properties like
|
|
954
|
+
* `<w15:footnoteColumns>` and Word-internal section knobs through a
|
|
955
|
+
* parse→serialize round-trip when the runtime mutates section properties.
|
|
956
|
+
*/
|
|
957
|
+
unknownPropertyChildren?: UnknownPropertyChild[];
|
|
899
958
|
}
|
|
900
959
|
|
|
901
960
|
export interface PageSize {
|
|
@@ -1078,10 +1137,17 @@ export interface ChartPreviewNode {
|
|
|
1078
1137
|
* (`src/io/ooxml/chart/parse-chart-space.ts`). Undefined when the chart
|
|
1079
1138
|
* part cannot be located, fails to parse, or has no chart-family match
|
|
1080
1139
|
* — consumers fall back to the fallback bitmap (`previewMediaId`) or the
|
|
1081
|
-
* typed badge in that case.
|
|
1082
|
-
*
|
|
1140
|
+
* typed badge in that case.
|
|
1141
|
+
*
|
|
1142
|
+
* **`rawXml` is the authoritative round-trip source** regardless of
|
|
1143
|
+
* whether `parsedData` is populated. `parsedData` is a read-only
|
|
1144
|
+
* projection of that rawXml; mutating it would diverge the two fields
|
|
1145
|
+
* and silently degrade export fidelity. The `readonly` modifier
|
|
1146
|
+
* enforces this at the type level — any future collab-replay that
|
|
1147
|
+
* needs to edit a chart must round-trip through `rawXml`, not patch
|
|
1148
|
+
* `parsedData` in place.
|
|
1083
1149
|
*/
|
|
1084
|
-
parsedData?: ChartModel;
|
|
1150
|
+
readonly parsedData?: ChartModel;
|
|
1085
1151
|
rawXml: string;
|
|
1086
1152
|
}
|
|
1087
1153
|
|
|
@@ -42,3 +42,10 @@ export {
|
|
|
42
42
|
type RemoteCursorTrackerHandle,
|
|
43
43
|
type RemoteCursorTrackerOptions,
|
|
44
44
|
} from "./remote-cursor-awareness.ts";
|
|
45
|
+
export {
|
|
46
|
+
createWorkflowShared,
|
|
47
|
+
type CreateWorkflowSharedOptions,
|
|
48
|
+
type SharedWorkflowState,
|
|
49
|
+
type WorkflowSharedHandle,
|
|
50
|
+
type WorkflowSharedResult,
|
|
51
|
+
} from "./workflow-shared.ts";
|