@beyondwork/docx-react-component 1.0.47 → 1.0.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -11
- package/package.json +30 -41
- package/src/api/public-types.ts +199 -13
- package/src/compare/diff-engine.ts +4 -0
- package/src/core/commands/add-scope.ts +257 -0
- package/src/core/commands/formatting-commands.ts +2 -0
- package/src/core/commands/index.ts +9 -1
- package/src/core/commands/text-commands.ts +3 -1
- package/src/core/schema/text-schema.ts +95 -1
- package/src/core/selection/anchor-conversion.ts +112 -0
- package/src/core/selection/review-anchors.ts +108 -3
- package/src/core/state/text-transaction.ts +103 -7
- package/src/internal/harness-debug-ports.ts +168 -0
- package/src/io/chart-preview-resolver.ts +59 -1
- package/src/io/docx-session.ts +226 -38
- package/src/io/export/serialize-main-document.ts +46 -0
- package/src/io/export/serialize-paragraph-formatting.ts +8 -0
- package/src/io/export/serialize-run-formatting.ts +10 -1
- package/src/io/export/serialize-settings.ts +421 -0
- package/src/io/export/serialize-styles.ts +10 -0
- package/src/io/normalize/normalize-text.ts +1 -0
- package/src/io/ooxml/chart/chart-style-table.ts +543 -0
- package/src/io/ooxml/chart/color-palette.ts +101 -0
- package/src/io/ooxml/chart/compose-series-color.ts +147 -0
- package/src/io/ooxml/chart/parse-axis.ts +277 -0
- package/src/io/ooxml/chart/parse-chart-space.ts +885 -0
- package/src/io/ooxml/chart/parse-series.ts +635 -0
- package/src/io/ooxml/chart/resolve-color.ts +261 -0
- package/src/io/ooxml/chart/types.ts +439 -0
- package/src/io/ooxml/parse-block-structure.ts +99 -0
- package/src/io/ooxml/parse-complex-content.ts +90 -2
- package/src/io/ooxml/parse-main-document.ts +156 -1
- package/src/io/ooxml/parse-paragraph-formatting.ts +46 -0
- package/src/io/ooxml/parse-run-formatting.ts +49 -0
- package/src/io/ooxml/parse-scope-markers.ts +184 -0
- package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
- package/src/io/ooxml/parse-settings.ts +97 -1
- package/src/io/ooxml/parse-styles.ts +65 -0
- package/src/io/ooxml/parse-theme.ts +2 -127
- package/src/io/ooxml/property-grab-bag.ts +211 -0
- package/src/io/ooxml/xml-attr-helpers.ts +59 -1
- package/src/io/ooxml/xml-parser.ts +142 -0
- package/src/model/canonical-document.ts +160 -0
- package/src/model/scope-markers.ts +144 -0
- package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
- package/src/runtime/collab/checkpoint-election.ts +75 -0
- package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
- package/src/runtime/collab/checkpoint-store.ts +115 -0
- package/src/runtime/collab/event-types.ts +27 -0
- package/src/runtime/collab/index.ts +29 -0
- package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
- package/src/runtime/collab/runtime-collab-sync.ts +330 -0
- package/src/runtime/collab/workflow-shared.ts +247 -0
- package/src/runtime/document-locations.ts +1 -9
- package/src/runtime/document-outline.ts +1 -9
- package/src/runtime/document-runtime.ts +288 -65
- package/src/runtime/editor-surface/capabilities.ts +63 -50
- package/src/runtime/hyperlink-color-resolver.ts +119 -0
- package/src/runtime/layout/layout-engine-version.ts +8 -1
- package/src/runtime/prerender/cache-envelope.ts +19 -7
- package/src/runtime/prerender/cache-key.ts +25 -14
- package/src/runtime/prerender/canonical-document-hash.ts +63 -0
- package/src/runtime/prerender/customxml-cache.ts +211 -0
- package/src/runtime/prerender/customxml-probe.ts +78 -0
- package/src/runtime/prerender/prerender-document.ts +74 -7
- package/src/runtime/scope-resolver.ts +148 -0
- package/src/runtime/scope-tag-registry.ts +10 -0
- package/src/runtime/surface-projection.ts +102 -37
- package/src/runtime/theme-color-resolver.ts +188 -0
- package/src/runtime/workflow-markup.ts +7 -18
- package/src/ui/WordReviewEditor.tsx +48 -2
- package/src/ui/editor-runtime-boundary.ts +42 -1
- package/src/ui/headless/selection-helpers.ts +10 -23
- package/src/ui/runtime-shortcut-dispatch.ts +12 -7
- package/src/ui/unsupported-previews-policy.ts +23 -0
- package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +10 -0
- package/src/ui-tailwind/editor-surface/perf-probe.ts +1 -0
- package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +47 -0
- package/src/ui-tailwind/page-stack/use-visible-block-range.ts +88 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +16 -1
|
@@ -153,6 +153,7 @@ export function parseStylesXml(xml: string): ParseStylesResult {
|
|
|
153
153
|
switch (styleType) {
|
|
154
154
|
case "paragraph": {
|
|
155
155
|
const nextStyle = readLinkedStyleId(child, "next");
|
|
156
|
+
const linkedStyleId = readLinkedStyleId(child, "link");
|
|
156
157
|
const outlineLevel = readParagraphStyleOutlineLevel(child);
|
|
157
158
|
const numbering = readParagraphStyleNumbering(child);
|
|
158
159
|
const pPrNode = findChildElementOptional(child, "pPr");
|
|
@@ -170,10 +171,12 @@ export function parseStylesXml(xml: string): ParseStylesResult {
|
|
|
170
171
|
...(numbering ? { numbering } : {}),
|
|
171
172
|
...(paragraphProperties ? { paragraphProperties } : {}),
|
|
172
173
|
...(runProperties ? { runProperties } : {}),
|
|
174
|
+
...(linkedStyleId ? { linkedStyleId } : {}),
|
|
173
175
|
};
|
|
174
176
|
break;
|
|
175
177
|
}
|
|
176
178
|
case "character": {
|
|
179
|
+
const linkedStyleId = readLinkedStyleId(child, "link");
|
|
177
180
|
const rPrNode = findChildElementOptional(child, "rPr");
|
|
178
181
|
const runProperties = readRunProperties(rPrNode);
|
|
179
182
|
characters[styleId] = {
|
|
@@ -183,6 +186,7 @@ export function parseStylesXml(xml: string): ParseStylesResult {
|
|
|
183
186
|
isDefault,
|
|
184
187
|
...(basedOn ? { basedOn } : {}),
|
|
185
188
|
...(runProperties ? { runProperties } : {}),
|
|
189
|
+
...(linkedStyleId ? { linkedStyleId } : {}),
|
|
186
190
|
};
|
|
187
191
|
break;
|
|
188
192
|
}
|
|
@@ -209,6 +213,8 @@ export function parseStylesXml(xml: string): ParseStylesResult {
|
|
|
209
213
|
}
|
|
210
214
|
}
|
|
211
215
|
|
|
216
|
+
resolveStyleLinkReciprocals(paragraphs, characters, diagnostics);
|
|
217
|
+
|
|
212
218
|
const hasLatent = Object.keys(latentStyles).length > 0;
|
|
213
219
|
diagnostics.push(
|
|
214
220
|
`parsed ${Object.keys(paragraphs).length} paragraph, ` +
|
|
@@ -250,6 +256,65 @@ function readLinkedStyleId(
|
|
|
250
256
|
return el.attributes["w:val"] ?? el.attributes.val ?? undefined;
|
|
251
257
|
}
|
|
252
258
|
|
|
259
|
+
/**
|
|
260
|
+
* Second-pass resolver for `<w:link>` on paragraph ↔ character style pairs.
|
|
261
|
+
*
|
|
262
|
+
* Mirrors LibreOffice's StyleSheetTable.cxx around line 1533 ("Update the
|
|
263
|
+
* styles that were created before their linked styles"): after every style
|
|
264
|
+
* is ingested, walk the two catalogs and synthesize the reciprocal
|
|
265
|
+
* `linkedStyleId` on a partner whose source XML declared no `<w:link>` of
|
|
266
|
+
* its own. This makes the canonical catalog symmetric regardless of source
|
|
267
|
+
* declaration order.
|
|
268
|
+
*
|
|
269
|
+
* Conflict handling is conservative: if a style already declares its own
|
|
270
|
+
* linkedStyleId pointing at a different target, the existing value is left
|
|
271
|
+
* intact and a diagnostic is emitted so a future debugger can trace the
|
|
272
|
+
* conflicting source-side assertions.
|
|
273
|
+
*
|
|
274
|
+
* Dangling references (a `<w:link>` that points at a styleId not in either
|
|
275
|
+
* catalog) are preserved verbatim and logged as a diagnostic — matching
|
|
276
|
+
* LibreOffice's "keep the XML, warn the author" stance.
|
|
277
|
+
*/
|
|
278
|
+
function resolveStyleLinkReciprocals(
|
|
279
|
+
paragraphs: Record<string, ParagraphStyleDefinition>,
|
|
280
|
+
characters: Record<string, CharacterStyleDefinition>,
|
|
281
|
+
diagnostics: string[],
|
|
282
|
+
): void {
|
|
283
|
+
const walkers: Array<{
|
|
284
|
+
catalog:
|
|
285
|
+
| Record<string, ParagraphStyleDefinition>
|
|
286
|
+
| Record<string, CharacterStyleDefinition>;
|
|
287
|
+
partnerCatalog:
|
|
288
|
+
| Record<string, ParagraphStyleDefinition>
|
|
289
|
+
| Record<string, CharacterStyleDefinition>;
|
|
290
|
+
label: string;
|
|
291
|
+
}> = [
|
|
292
|
+
{ catalog: paragraphs, partnerCatalog: characters, label: "paragraph" },
|
|
293
|
+
{ catalog: characters, partnerCatalog: paragraphs, label: "character" },
|
|
294
|
+
];
|
|
295
|
+
|
|
296
|
+
for (const { catalog, partnerCatalog, label } of walkers) {
|
|
297
|
+
for (const style of Object.values(catalog)) {
|
|
298
|
+
const target = style.linkedStyleId;
|
|
299
|
+
if (!target) continue;
|
|
300
|
+
const partner = partnerCatalog[target];
|
|
301
|
+
if (!partner) {
|
|
302
|
+
diagnostics.push(
|
|
303
|
+
`style ${label} "${style.styleId}" declares <w:link w:val="${target}"/> but no matching ${label === "paragraph" ? "character" : "paragraph"} style was found; link preserved as dangling`,
|
|
304
|
+
);
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
if (partner.linkedStyleId === undefined) {
|
|
308
|
+
partner.linkedStyleId = style.styleId;
|
|
309
|
+
} else if (partner.linkedStyleId !== style.styleId) {
|
|
310
|
+
diagnostics.push(
|
|
311
|
+
`style ${label} "${style.styleId}" links to "${target}" but partner already links to "${partner.linkedStyleId}"; partner link retained`,
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
253
318
|
function readParagraphStyleOutlineLevel(
|
|
254
319
|
styleNode: XmlElementNode,
|
|
255
320
|
): number | undefined {
|
|
@@ -4,22 +4,8 @@ import type {
|
|
|
4
4
|
ThemeFontScheme,
|
|
5
5
|
ResolvedTheme,
|
|
6
6
|
} from "../../model/canonical-document.ts";
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
interface XmlElementNode {
|
|
11
|
-
type: "element";
|
|
12
|
-
name: string;
|
|
13
|
-
attributes: Record<string, string>;
|
|
14
|
-
children: XmlNode[];
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
interface XmlTextNode {
|
|
18
|
-
type: "text";
|
|
19
|
-
text: string;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
type XmlNode = XmlElementNode | XmlTextNode;
|
|
7
|
+
import type { XmlElementNode } from "./xml-element.ts";
|
|
8
|
+
import { parseXml } from "./xml-parser.ts";
|
|
23
9
|
|
|
24
10
|
// ---- Well-known DrawingML color slot names ----
|
|
25
11
|
|
|
@@ -233,114 +219,3 @@ function localName(name: string): string {
|
|
|
233
219
|
return idx >= 0 ? name.slice(idx + 1) : name;
|
|
234
220
|
}
|
|
235
221
|
|
|
236
|
-
// ---- Minimal XML parser ----
|
|
237
|
-
|
|
238
|
-
function parseXml(xml: string): XmlElementNode {
|
|
239
|
-
const root: XmlElementNode = {
|
|
240
|
-
type: "element",
|
|
241
|
-
name: "__root__",
|
|
242
|
-
attributes: {},
|
|
243
|
-
children: [],
|
|
244
|
-
};
|
|
245
|
-
const stack: XmlElementNode[] = [root];
|
|
246
|
-
let cursor = 0;
|
|
247
|
-
|
|
248
|
-
while (cursor < xml.length) {
|
|
249
|
-
if (xml.startsWith("<!--", cursor)) {
|
|
250
|
-
const end = xml.indexOf("-->", cursor);
|
|
251
|
-
cursor = end >= 0 ? end + 3 : xml.length;
|
|
252
|
-
continue;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
if (xml.startsWith("<?", cursor)) {
|
|
256
|
-
const end = xml.indexOf("?>", cursor);
|
|
257
|
-
cursor = end >= 0 ? end + 2 : xml.length;
|
|
258
|
-
continue;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
if (xml.startsWith("<![CDATA[", cursor)) {
|
|
262
|
-
const end = xml.indexOf("]]>", cursor);
|
|
263
|
-
const textEnd = end >= 0 ? end : xml.length;
|
|
264
|
-
stack[stack.length - 1]?.children.push({
|
|
265
|
-
type: "text",
|
|
266
|
-
text: xml.slice(cursor + 9, textEnd),
|
|
267
|
-
});
|
|
268
|
-
cursor = end >= 0 ? end + 3 : xml.length;
|
|
269
|
-
continue;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
if (xml[cursor] !== "<") {
|
|
273
|
-
const nextTag = xml.indexOf("<", cursor);
|
|
274
|
-
const end = nextTag >= 0 ? nextTag : xml.length;
|
|
275
|
-
const text = decodeXmlEntities(xml.slice(cursor, end));
|
|
276
|
-
if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
|
|
277
|
-
stack[stack.length - 1]?.children.push({ type: "text", text });
|
|
278
|
-
}
|
|
279
|
-
cursor = end;
|
|
280
|
-
continue;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
if (xml[cursor + 1] === "/") {
|
|
284
|
-
const end = xml.indexOf(">", cursor);
|
|
285
|
-
if (end < 0) break;
|
|
286
|
-
stack.pop();
|
|
287
|
-
cursor = end + 1;
|
|
288
|
-
continue;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
const tagEnd = xml.indexOf(">", cursor);
|
|
292
|
-
if (tagEnd < 0) break;
|
|
293
|
-
|
|
294
|
-
const tagContent = xml.slice(cursor + 1, tagEnd);
|
|
295
|
-
const selfClosing = tagContent.endsWith("/");
|
|
296
|
-
const normalized = selfClosing ? tagContent.slice(0, -1).trimEnd() : tagContent;
|
|
297
|
-
|
|
298
|
-
const spaceIndex = normalized.search(/\s/);
|
|
299
|
-
const tagName = spaceIndex >= 0 ? normalized.slice(0, spaceIndex) : normalized;
|
|
300
|
-
const attrString = spaceIndex >= 0 ? normalized.slice(spaceIndex + 1) : "";
|
|
301
|
-
const attributes = parseAttributes(attrString);
|
|
302
|
-
|
|
303
|
-
const element: XmlElementNode = {
|
|
304
|
-
type: "element",
|
|
305
|
-
name: tagName,
|
|
306
|
-
attributes,
|
|
307
|
-
children: [],
|
|
308
|
-
};
|
|
309
|
-
|
|
310
|
-
stack[stack.length - 1]?.children.push(element);
|
|
311
|
-
|
|
312
|
-
if (!selfClosing) {
|
|
313
|
-
stack.push(element);
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
cursor = tagEnd + 1;
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
return root;
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
function parseAttributes(attrString: string): Record<string, string> {
|
|
323
|
-
const attrs: Record<string, string> = {};
|
|
324
|
-
const pattern = /([A-Za-z_:][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
|
|
325
|
-
for (const match of attrString.matchAll(pattern)) {
|
|
326
|
-
const name = match[1];
|
|
327
|
-
const value = match[3] ?? match[4] ?? "";
|
|
328
|
-
if (name) {
|
|
329
|
-
attrs[name] = decodeXmlEntities(value);
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
return attrs;
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
function decodeXmlEntities(text: string): string {
|
|
336
|
-
return text
|
|
337
|
-
.replace(/&/g, "&")
|
|
338
|
-
.replace(/</g, "<")
|
|
339
|
-
.replace(/>/g, ">")
|
|
340
|
-
.replace(/"/g, '"')
|
|
341
|
-
.replace(/'/g, "'")
|
|
342
|
-
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10)))
|
|
343
|
-
.replace(/&#x([0-9a-fA-F]+);/g, (_, hex) =>
|
|
344
|
-
String.fromCodePoint(Number.parseInt(hex, 16)),
|
|
345
|
-
);
|
|
346
|
-
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Property-level grab-bag primitive for Lane 3 O2.
|
|
3
|
+
*
|
|
4
|
+
* LibreOffice captures unmodelled children / attributes on every OOXML
|
|
5
|
+
* property container (`<w:pPr>`, `<w:rPr>`, `<w:tcPr>`, `<w:trPr>`,
|
|
6
|
+
* `<w:tblPr>`, `<w:sectPr>`) via per-container "grab bags" keyed by
|
|
7
|
+
* element name — see `PropertyMap.hxx:82` and
|
|
8
|
+
* `libreoffice-analysis.md` §2 for the mechanism. On export, every grab
|
|
9
|
+
* bag re-emits verbatim inside its container so the round-trip pipeline
|
|
10
|
+
* does not silently drop extension-namespace properties (`w15:collapsed`,
|
|
11
|
+
* `w16cex:...`, etc.) or attributes Word adds after we parsed its schema.
|
|
12
|
+
*
|
|
13
|
+
* This module is a small, framework-free adapter: per-container parsers
|
|
14
|
+
* supply a descriptor listing modelled child names (and later, modelled
|
|
15
|
+
* attributes on modelled children); the helper returns everything else as
|
|
16
|
+
* raw XML in insertion order. The matching emitter is a one-liner that
|
|
17
|
+
* just joins `rawXml` strings.
|
|
18
|
+
*
|
|
19
|
+
* Scope (O2 Slice 1): per-container child diff only — the unknown-attribute
|
|
20
|
+
* diff on modelled children is a follow-up slice. Today the descriptor's
|
|
21
|
+
* `modelledChildAttributes` is declared but ignored; the helper emits a
|
|
22
|
+
* whole-child entry only when the element's localName is NOT in
|
|
23
|
+
* `modelledChildNames`.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Input node shape accepted by `capturePropertyGrabBag`. Intentionally
|
|
28
|
+
* minimal so every caller can adapt their own scanner output — per-file
|
|
29
|
+
* parsers in `src/io/ooxml/` each carry a slightly different node shape.
|
|
30
|
+
*/
|
|
31
|
+
export interface GrabBagSourceChild {
|
|
32
|
+
/**
|
|
33
|
+
* Local element name (no namespace prefix). E.g. `"kinsoku"` for
|
|
34
|
+
* `<w:kinsoku>`, `"collapsed"` for `<w15:collapsed>`.
|
|
35
|
+
*/
|
|
36
|
+
localName: string;
|
|
37
|
+
/**
|
|
38
|
+
* The source XML for the entire child element, including its opening
|
|
39
|
+
* tag, all attributes, any children, and its closing tag (or the
|
|
40
|
+
* self-closing form). Preserved verbatim for re-emission.
|
|
41
|
+
*/
|
|
42
|
+
rawXml: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Interop helper for callers that only carry the parsed `XmlElementNode`
|
|
47
|
+
* shape used by `src/io/ooxml/xml-element.ts`. Reconstructs a best-effort
|
|
48
|
+
* `rawXml` string from the parsed tree so parsers that don't track source
|
|
49
|
+
* offsets can still feed the grab-bag helper.
|
|
50
|
+
*
|
|
51
|
+
* The reconstruction preserves element/attribute semantic content and
|
|
52
|
+
* attribute insertion order (since `Record<string, string>` iteration in
|
|
53
|
+
* V8 is insertion-ordered for string keys) but does NOT guarantee
|
|
54
|
+
* byte-identical source preservation: whitespace between elements and
|
|
55
|
+
* attribute quoting style are normalized. For the Slice 1 scope this is
|
|
56
|
+
* the correct trade-off — unmodelled children's semantic content
|
|
57
|
+
* survives, which closes the silent-drop gap.
|
|
58
|
+
*/
|
|
59
|
+
export function buildGrabBagSourceChildFromParsed(node: {
|
|
60
|
+
name: string;
|
|
61
|
+
attributes: Record<string, string>;
|
|
62
|
+
children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
|
|
63
|
+
}): GrabBagSourceChild {
|
|
64
|
+
return {
|
|
65
|
+
localName: localNameOf(node.name),
|
|
66
|
+
rawXml: serializeElementToString(node),
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function localNameOf(qualified: string): string {
|
|
71
|
+
const colon = qualified.indexOf(":");
|
|
72
|
+
return colon < 0 ? qualified : qualified.slice(colon + 1);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function escapeAttr(value: string): string {
|
|
76
|
+
return value
|
|
77
|
+
.replace(/&/gu, "&")
|
|
78
|
+
.replace(/</gu, "<")
|
|
79
|
+
.replace(/>/gu, ">")
|
|
80
|
+
.replace(/"/gu, """);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function escapeText(value: string): string {
|
|
84
|
+
return value
|
|
85
|
+
.replace(/&/gu, "&")
|
|
86
|
+
.replace(/</gu, "<")
|
|
87
|
+
.replace(/>/gu, ">");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function serializeElementToString(node: {
|
|
91
|
+
name: string;
|
|
92
|
+
attributes: Record<string, string>;
|
|
93
|
+
children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
|
|
94
|
+
}): string {
|
|
95
|
+
const attrs = Object.entries(node.attributes)
|
|
96
|
+
.map(([name, value]) => ` ${name}="${escapeAttr(value)}"`)
|
|
97
|
+
.join("");
|
|
98
|
+
if (node.children.length === 0) {
|
|
99
|
+
return `<${node.name}${attrs}/>`;
|
|
100
|
+
}
|
|
101
|
+
const body = node.children
|
|
102
|
+
.map((child) => {
|
|
103
|
+
if (child.type === "text") return escapeText(child.text);
|
|
104
|
+
return serializeElementToString(
|
|
105
|
+
child as {
|
|
106
|
+
name: string;
|
|
107
|
+
attributes: Record<string, string>;
|
|
108
|
+
children: Array<{ type: "element"; name: string; attributes: Record<string, string>; children: unknown[] } | { type: "text"; text: string }>;
|
|
109
|
+
},
|
|
110
|
+
);
|
|
111
|
+
})
|
|
112
|
+
.join("");
|
|
113
|
+
return `<${node.name}${attrs}>${body}</${node.name}>`;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Descriptor a per-container parser supplies to the helper to declare
|
|
118
|
+
* which child element names it dispatches into its modelled fields.
|
|
119
|
+
* Children not listed here become grab-bag entries.
|
|
120
|
+
*/
|
|
121
|
+
export interface PropertyGrabBagDescriptor {
|
|
122
|
+
/**
|
|
123
|
+
* Set of local names the container parser handles natively. Children
|
|
124
|
+
* whose `localName` matches one of these are NOT captured.
|
|
125
|
+
*/
|
|
126
|
+
modelledChildNames: ReadonlySet<string>;
|
|
127
|
+
/**
|
|
128
|
+
* Reserved for the follow-up slice: per-modelled-child the set of
|
|
129
|
+
* attributes the parser consumes. The Slice 1 helper ignores this
|
|
130
|
+
* field; Slice 2 will use it to emit attribute-level grab entries on
|
|
131
|
+
* modelled children.
|
|
132
|
+
*
|
|
133
|
+
* Note: table containers (tblPr/trPr/tcPr — O2 Slice 3) currently use
|
|
134
|
+
* a parallel raw-XML mechanism in `src/io/export/table-properties-xml.ts`
|
|
135
|
+
* (`mergePropertiesXml`) that stores the full container XML as a string
|
|
136
|
+
* on `TableNode.propertiesXml`/`TableRowNode.propertiesXml`/
|
|
137
|
+
* `TableCellNode.propertiesXml`. That path cannot participate in the
|
|
138
|
+
* attribute-level grab-bag slice until it is retrofit to emit
|
|
139
|
+
* `UnknownPropertyChild[]` through this descriptor. Tracked as a Lane 3
|
|
140
|
+
* Tier-2 backlog entry — see `docs/plans/lane-3-layout-engine-ooxml-fidelity.md`.
|
|
141
|
+
*/
|
|
142
|
+
modelledChildAttributes: ReadonlyMap<string, ReadonlySet<string>>;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Single grab-bag entry: an unmodelled top-level child captured verbatim
|
|
147
|
+
* so the serializer can re-emit it inside its container without any
|
|
148
|
+
* round-trip loss.
|
|
149
|
+
*/
|
|
150
|
+
export interface UnknownPropertyChild {
|
|
151
|
+
/**
|
|
152
|
+
* Qualified element name as it appeared in the source (e.g.
|
|
153
|
+
* `"w:kinsoku"`, `"w15:collapsed"`). Used for diagnostics and for the
|
|
154
|
+
* future attribute-level diff so the emitter can re-open the matching
|
|
155
|
+
* element when needed.
|
|
156
|
+
*/
|
|
157
|
+
elementName: string;
|
|
158
|
+
/**
|
|
159
|
+
* Verbatim XML for the child element.
|
|
160
|
+
*/
|
|
161
|
+
rawXml: string;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Walk the container's direct children. Return every child whose
|
|
166
|
+
* `localName` is NOT in `descriptor.modelledChildNames` as a grab-bag
|
|
167
|
+
* entry in source order. Returns `undefined` when no unmodelled children
|
|
168
|
+
* were found — callers should prefer `undefined` over an empty array so
|
|
169
|
+
* the canonical model stays sparse.
|
|
170
|
+
*
|
|
171
|
+
* The helper does NOT inspect attributes or grandchildren. That
|
|
172
|
+
* refinement is reserved for the follow-up slice.
|
|
173
|
+
*/
|
|
174
|
+
export function capturePropertyGrabBag(
|
|
175
|
+
children: readonly GrabBagSourceChild[],
|
|
176
|
+
descriptor: PropertyGrabBagDescriptor,
|
|
177
|
+
): UnknownPropertyChild[] | undefined {
|
|
178
|
+
const bag: UnknownPropertyChild[] = [];
|
|
179
|
+
for (const child of children) {
|
|
180
|
+
if (descriptor.modelledChildNames.has(child.localName)) continue;
|
|
181
|
+
bag.push({
|
|
182
|
+
elementName: extractQualifiedNameFromRawXml(child.rawXml) ?? child.localName,
|
|
183
|
+
rawXml: child.rawXml,
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
return bag.length > 0 ? bag : undefined;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Emit a grab-bag list back into a property container. Just concatenates
|
|
191
|
+
* each entry's `rawXml` in insertion order — the source bytes survive
|
|
192
|
+
* verbatim including attribute order, whitespace inside the element, and
|
|
193
|
+
* namespace prefixes.
|
|
194
|
+
*/
|
|
195
|
+
export function emitPropertyGrabBag(
|
|
196
|
+
entries: readonly UnknownPropertyChild[] | undefined,
|
|
197
|
+
): string {
|
|
198
|
+
if (!entries || entries.length === 0) return "";
|
|
199
|
+
return entries.map((entry) => entry.rawXml).join("");
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Best-effort extraction of the qualified element name from an opening
|
|
204
|
+
* tag — e.g. `<w15:collapsed w:val="1"/>` → `"w15:collapsed"`. Falls back
|
|
205
|
+
* to the caller-supplied `localName` when the raw XML doesn't look like
|
|
206
|
+
* a valid element.
|
|
207
|
+
*/
|
|
208
|
+
function extractQualifiedNameFromRawXml(rawXml: string): string | undefined {
|
|
209
|
+
const match = rawXml.match(/^<([^\s/>]+)/u);
|
|
210
|
+
return match?.[1];
|
|
211
|
+
}
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* parse-styles.ts, parse-numbering.ts.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import type { XmlElementNode } from "./xml-element.ts";
|
|
13
|
+
import type { XmlElementNode, XmlNode } from "./xml-element.ts";
|
|
14
14
|
|
|
15
15
|
export function localName(name: string): string {
|
|
16
16
|
const sep = name.indexOf(":");
|
|
@@ -26,6 +26,27 @@ export function findChildOptional(
|
|
|
26
26
|
);
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Depth-first search for the first descendant element with the given local name
|
|
31
|
+
* (namespace prefix ignored). Returns undefined if no descendant matches.
|
|
32
|
+
*
|
|
33
|
+
* Useful when the exact parent chain isn't known — e.g. descending into
|
|
34
|
+
* `<w:drawing>` to find a `<c:chart>` regardless of which DrawingML wrapper
|
|
35
|
+
* stands between them.
|
|
36
|
+
*/
|
|
37
|
+
export function findFirstDescendant(
|
|
38
|
+
node: XmlElementNode,
|
|
39
|
+
local: string,
|
|
40
|
+
): XmlElementNode | undefined {
|
|
41
|
+
for (const child of node.children) {
|
|
42
|
+
if (child.type !== "element") continue;
|
|
43
|
+
if (localName(child.name) === local) return child;
|
|
44
|
+
const nested = findFirstDescendant(child, local);
|
|
45
|
+
if (nested) return nested;
|
|
46
|
+
}
|
|
47
|
+
return undefined;
|
|
48
|
+
}
|
|
49
|
+
|
|
29
50
|
/** ST_OnOff: missing child → undefined; present bare or w:val="1|true|on" → true; w:val="0|false|off" → false. */
|
|
30
51
|
export function readOnOff(node: XmlElementNode | undefined): boolean | undefined {
|
|
31
52
|
if (!node) return undefined;
|
|
@@ -46,6 +67,43 @@ export function readIntVal(node: XmlElementNode | undefined): number | undefined
|
|
|
46
67
|
return Number.isFinite(v) ? v : undefined;
|
|
47
68
|
}
|
|
48
69
|
|
|
70
|
+
/** Read the child's `val` attribute as a float. Returns undefined if missing or not a finite number. */
|
|
71
|
+
export function readFloatVal(node: XmlElementNode | undefined): number | undefined {
|
|
72
|
+
if (!node) return undefined;
|
|
73
|
+
const raw = node.attributes["w:val"] ?? node.attributes.val;
|
|
74
|
+
if (raw === undefined) return undefined;
|
|
75
|
+
const v = Number.parseFloat(raw);
|
|
76
|
+
return Number.isFinite(v) ? v : undefined;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Return the concatenated text content of the first child element with the
|
|
81
|
+
* given local name. Returns undefined if the child is missing. Returns an
|
|
82
|
+
* empty string if the child exists but has no text.
|
|
83
|
+
*/
|
|
84
|
+
export function readStringChild(
|
|
85
|
+
node: XmlElementNode | undefined,
|
|
86
|
+
local: string,
|
|
87
|
+
): string | undefined {
|
|
88
|
+
if (!node) return undefined;
|
|
89
|
+
const child = findChildOptional(node, local);
|
|
90
|
+
if (!child) return undefined;
|
|
91
|
+
return textContent(child);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Concatenate all descendant text nodes of an element into a single string. */
|
|
95
|
+
export function textContent(node: XmlElementNode): string {
|
|
96
|
+
let out = "";
|
|
97
|
+
const walk = (children: XmlNode[]): void => {
|
|
98
|
+
for (const c of children) {
|
|
99
|
+
if (c.type === "text") out += c.text;
|
|
100
|
+
else walk(c.children);
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
walk(node.children);
|
|
104
|
+
return out;
|
|
105
|
+
}
|
|
106
|
+
|
|
49
107
|
/** Read an arbitrary attribute from a node as an int, with namespace fallback. */
|
|
50
108
|
export function readIntAttr(node: XmlElementNode, attr: string): number | undefined {
|
|
51
109
|
const raw = node.attributes[attr] ?? node.attributes[attr.replace(/^w:/, "")];
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal XML parser shared across OOXML parsers.
|
|
3
|
+
*
|
|
4
|
+
* Handles the subset of XML that Office documents actually emit:
|
|
5
|
+
* elements, attributes, text, comments (stripped), processing instructions
|
|
6
|
+
* (stripped), and CDATA sections. Namespace prefixes are preserved verbatim
|
|
7
|
+
* on element and attribute names — strip with `localName()` from
|
|
8
|
+
* `xml-attr-helpers.ts` when lookups must be prefix-agnostic.
|
|
9
|
+
*
|
|
10
|
+
* Not a full XML conformance parser: does not validate element nesting,
|
|
11
|
+
* does not track source offsets on produced nodes, does not report errors on
|
|
12
|
+
* malformed input beyond best-effort early termination. Sufficient for every
|
|
13
|
+
* OOXML part we import.
|
|
14
|
+
*
|
|
15
|
+
* Originally inlined in `parse-theme.ts`; extracted here so the chart parsers
|
|
16
|
+
* and any future OOXML import path can share the same implementation.
|
|
17
|
+
*/
|
|
18
|
+
import type { XmlElementNode } from "./xml-element.ts";
|
|
19
|
+
|
|
20
|
+
const ROOT_TAG = "__root__";
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Parse an XML string into a virtual root element. The root is a synthetic
|
|
24
|
+
* wrapper element whose `children` contain the top-level nodes from the
|
|
25
|
+
* source. Callers typically do `parseXml(xml).children.find(...)` or look up
|
|
26
|
+
* a specific top-level element via `findChildOptional`.
|
|
27
|
+
*/
|
|
28
|
+
export function parseXml(xml: string): XmlElementNode {
|
|
29
|
+
const root: XmlElementNode = {
|
|
30
|
+
type: "element",
|
|
31
|
+
name: ROOT_TAG,
|
|
32
|
+
attributes: {},
|
|
33
|
+
children: [],
|
|
34
|
+
};
|
|
35
|
+
const stack: XmlElementNode[] = [root];
|
|
36
|
+
let cursor = 0;
|
|
37
|
+
|
|
38
|
+
while (cursor < xml.length) {
|
|
39
|
+
if (xml.startsWith("<!--", cursor)) {
|
|
40
|
+
const end = xml.indexOf("-->", cursor);
|
|
41
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (xml.startsWith("<?", cursor)) {
|
|
46
|
+
const end = xml.indexOf("?>", cursor);
|
|
47
|
+
cursor = end >= 0 ? end + 2 : xml.length;
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (xml.startsWith("<![CDATA[", cursor)) {
|
|
52
|
+
const end = xml.indexOf("]]>", cursor);
|
|
53
|
+
const textEnd = end >= 0 ? end : xml.length;
|
|
54
|
+
stack[stack.length - 1]?.children.push({
|
|
55
|
+
type: "text",
|
|
56
|
+
text: xml.slice(cursor + 9, textEnd),
|
|
57
|
+
});
|
|
58
|
+
cursor = end >= 0 ? end + 3 : xml.length;
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (xml[cursor] !== "<") {
|
|
63
|
+
const nextTag = xml.indexOf("<", cursor);
|
|
64
|
+
const end = nextTag >= 0 ? nextTag : xml.length;
|
|
65
|
+
const text = decodeXmlEntities(xml.slice(cursor, end));
|
|
66
|
+
if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
|
|
67
|
+
stack[stack.length - 1]?.children.push({ type: "text", text });
|
|
68
|
+
}
|
|
69
|
+
cursor = end;
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (xml[cursor + 1] === "/") {
|
|
74
|
+
const end = xml.indexOf(">", cursor);
|
|
75
|
+
if (end < 0) break;
|
|
76
|
+
stack.pop();
|
|
77
|
+
cursor = end + 1;
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const tagEnd = xml.indexOf(">", cursor);
|
|
82
|
+
if (tagEnd < 0) break;
|
|
83
|
+
|
|
84
|
+
const tagContent = xml.slice(cursor + 1, tagEnd);
|
|
85
|
+
const selfClosing = tagContent.endsWith("/");
|
|
86
|
+
const normalized = selfClosing
|
|
87
|
+
? tagContent.slice(0, -1).trimEnd()
|
|
88
|
+
: tagContent;
|
|
89
|
+
|
|
90
|
+
const spaceIndex = normalized.search(/\s/);
|
|
91
|
+
const tagName =
|
|
92
|
+
spaceIndex >= 0 ? normalized.slice(0, spaceIndex) : normalized;
|
|
93
|
+
const attrString = spaceIndex >= 0 ? normalized.slice(spaceIndex + 1) : "";
|
|
94
|
+
const attributes = parseAttributes(attrString);
|
|
95
|
+
|
|
96
|
+
const element: XmlElementNode = {
|
|
97
|
+
type: "element",
|
|
98
|
+
name: tagName,
|
|
99
|
+
attributes,
|
|
100
|
+
children: [],
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
stack[stack.length - 1]?.children.push(element);
|
|
104
|
+
|
|
105
|
+
if (!selfClosing) {
|
|
106
|
+
stack.push(element);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
cursor = tagEnd + 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return root;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/** Decode the standard XML entity references plus numeric character references. */
|
|
116
|
+
export function decodeXmlEntities(text: string): string {
|
|
117
|
+
return text
|
|
118
|
+
.replace(/&/g, "&")
|
|
119
|
+
.replace(/</g, "<")
|
|
120
|
+
.replace(/>/g, ">")
|
|
121
|
+
.replace(/"/g, '"')
|
|
122
|
+
.replace(/'/g, "'")
|
|
123
|
+
.replace(/&#(\d+);/g, (_, dec) =>
|
|
124
|
+
String.fromCodePoint(Number.parseInt(dec, 10)),
|
|
125
|
+
)
|
|
126
|
+
.replace(/&#x([0-9a-fA-F]+);/g, (_, hex) =>
|
|
127
|
+
String.fromCodePoint(Number.parseInt(hex, 16)),
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function parseAttributes(attrString: string): Record<string, string> {
|
|
132
|
+
const attrs: Record<string, string> = {};
|
|
133
|
+
const pattern = /([A-Za-z_:][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
|
|
134
|
+
for (const match of attrString.matchAll(pattern)) {
|
|
135
|
+
const name = match[1];
|
|
136
|
+
const value = match[3] ?? match[4] ?? "";
|
|
137
|
+
if (name) {
|
|
138
|
+
attrs[name] = decodeXmlEntities(value);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return attrs;
|
|
142
|
+
}
|