@beyondwork/docx-react-component 1.0.47 → 1.0.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api/public-types.ts +115 -1
- package/src/compare/diff-engine.ts +4 -0
- package/src/core/commands/add-scope.ts +257 -0
- package/src/core/commands/formatting-commands.ts +2 -0
- package/src/core/schema/text-schema.ts +95 -1
- package/src/core/state/text-transaction.ts +17 -5
- package/src/io/chart-preview-resolver.ts +27 -0
- package/src/io/docx-session.ts +226 -38
- package/src/io/export/serialize-main-document.ts +37 -0
- package/src/io/export/serialize-settings.ts +421 -0
- package/src/io/export/serialize-styles.ts +10 -0
- package/src/io/normalize/normalize-text.ts +1 -0
- package/src/io/ooxml/chart/parse-axis.ts +277 -0
- package/src/io/ooxml/chart/parse-chart-space.ts +813 -0
- package/src/io/ooxml/chart/parse-series.ts +570 -0
- package/src/io/ooxml/chart/resolve-color.ts +251 -0
- package/src/io/ooxml/chart/types.ts +420 -0
- package/src/io/ooxml/parse-block-structure.ts +99 -0
- package/src/io/ooxml/parse-complex-content.ts +87 -2
- package/src/io/ooxml/parse-main-document.ts +115 -1
- package/src/io/ooxml/parse-scope-markers.ts +184 -0
- package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
- package/src/io/ooxml/parse-settings.ts +97 -1
- package/src/io/ooxml/parse-styles.ts +65 -0
- package/src/io/ooxml/parse-theme.ts +2 -127
- package/src/io/ooxml/xml-attr-helpers.ts +59 -1
- package/src/io/ooxml/xml-parser.ts +142 -0
- package/src/model/canonical-document.ts +94 -0
- package/src/model/scope-markers.ts +144 -0
- package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
- package/src/runtime/collab/checkpoint-election.ts +75 -0
- package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
- package/src/runtime/collab/checkpoint-store.ts +115 -0
- package/src/runtime/collab/event-types.ts +27 -0
- package/src/runtime/collab/index.ts +22 -0
- package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
- package/src/runtime/collab/runtime-collab-sync.ts +279 -0
- package/src/runtime/document-runtime.ts +214 -16
- package/src/runtime/editor-surface/capabilities.ts +63 -50
- package/src/runtime/layout/layout-engine-version.ts +8 -1
- package/src/runtime/prerender/cache-envelope.ts +19 -7
- package/src/runtime/prerender/cache-key.ts +25 -14
- package/src/runtime/prerender/canonical-document-hash.ts +63 -0
- package/src/runtime/prerender/customxml-cache.ts +211 -0
- package/src/runtime/prerender/customxml-probe.ts +78 -0
- package/src/runtime/prerender/prerender-document.ts +74 -7
- package/src/runtime/scope-resolver.ts +148 -0
- package/src/runtime/scope-tag-registry.ts +10 -0
- package/src/runtime/surface-projection.ts +8 -1
- package/src/ui/WordReviewEditor.tsx +30 -0
- package/src/ui/editor-runtime-boundary.ts +6 -1
- package/src/ui/runtime-shortcut-dispatch.ts +12 -7
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* L7 Phase 2.5 Plan B B.7 — shallow structural probe for `word/document.xml`.
|
|
3
|
+
*
|
|
4
|
+
* Emits the ordered (kind, blockId) list for top-level body children without
|
|
5
|
+
* building the canonical-document model. Used by `customxml-probe` to verify
|
|
6
|
+
* that a cached laycache envelope's `structuralHash` still matches the
|
|
7
|
+
* document the envelope was written against.
|
|
8
|
+
*
|
|
9
|
+
* **Correctness requirement.** The output must match what
|
|
10
|
+
* `surface-projection.ts:createSurfaceBlock` emits when walking a
|
|
11
|
+
* canonical document produced by the full parse pipeline. Specifically:
|
|
12
|
+
* - Paragraph blockIds use a GLOBAL counter incremented on every
|
|
13
|
+
* `<w:p>` encountered ANYWHERE in the tree (top-level or nested
|
|
14
|
+
* inside a table cell). The top-level paragraph's blockId is
|
|
15
|
+
* `paragraph-${counter_at_time_of_encounter}`.
|
|
16
|
+
* - Table blockIds use a GLOBAL counter incremented on every
|
|
17
|
+
* `<w:tbl>` at any depth.
|
|
18
|
+
* - Other top-level elements (`<w:sdt>`, `<w:altChunk>`, `<w:sectPr>`)
|
|
19
|
+
* are NOT emitted by this probe.
|
|
20
|
+
*
|
|
21
|
+
* **Known limitation (2026-04-19 shipping state).** The full parse
|
|
22
|
+
* promotes certain `<w:p>` elements to `opaque_block` based on their
|
|
23
|
+
* content — e.g. paragraphs containing structured content controls
|
|
24
|
+
* (`<w:sdt>`), floating drawings (`<w:drawing>` with `<wp:anchor>`),
|
|
25
|
+
* or `<mc:AlternateContent>` markup-compat wrappers. The shallow probe
|
|
26
|
+
* cannot detect these patterns without a deeper walk, so it counts such
|
|
27
|
+
* paragraphs as plain `paragraph` blocks. On docs where this triggers
|
|
28
|
+
* (~20% of F-series fixtures; 2 of 3 CCEP templates), the probe's
|
|
29
|
+
* structural hash diverges from the envelope's → cache is rejected →
|
|
30
|
+
* safe fallback to the full-parse open path. Plan B warm-cache opt-in
|
|
31
|
+
* is "clean docs only" under this probe.
|
|
32
|
+
*
|
|
33
|
+
* Future improvement: refine the probe to detect `<w:sdt>`,
|
|
34
|
+
* `<w:drawing w:anchor>`, and `<mc:AlternateContent>` inside top-level
|
|
35
|
+
* paragraphs and classify them accordingly. Deferred unless real-world
|
|
36
|
+
* hit rates prove insufficient.
|
|
37
|
+
*
|
|
38
|
+
* **Cost budget.** <30 ms on extra-large CCEP (~2.7 MB document.xml).
|
|
39
|
+
* Single regex walk, O(bytes). No DOM, no full XML parse.
|
|
40
|
+
*
|
|
41
|
+
* **Fidelity gate.** `test/io/parse-block-structure.test.ts` compares
|
|
42
|
+
* probe output against full-parse blockIds on representative fixtures
|
|
43
|
+
* (F01/F02/F05/F48 for paragraph + table patterns, plus a clean CCEP
|
|
44
|
+
* template). Docs with opaque-promoting features are covered by a
|
|
45
|
+
* separate "safe fallback" test rather than the strict match.
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
export interface BlockStructureProbe {
|
|
49
|
+
readonly kind: "paragraph" | "table";
|
|
50
|
+
readonly blockId: string;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const BODY_RE = /<w:body\b[^>]*>([\s\S]*?)<\/w:body>/u;
|
|
54
|
+
const TAG_RE = /<(\/?)w:(p|tbl)\b[^>]*?(\/?)>/gu;
|
|
55
|
+
|
|
56
|
+
export function parseBlockStructure(documentXml: string): BlockStructureProbe[] {
|
|
57
|
+
const bodyMatch = BODY_RE.exec(documentXml);
|
|
58
|
+
if (!bodyMatch) return [];
|
|
59
|
+
const body = bodyMatch[1] ?? "";
|
|
60
|
+
|
|
61
|
+
const results: BlockStructureProbe[] = [];
|
|
62
|
+
let paragraphCounter = 0;
|
|
63
|
+
let tableCounter = 0;
|
|
64
|
+
let depth = 0;
|
|
65
|
+
|
|
66
|
+
TAG_RE.lastIndex = 0;
|
|
67
|
+
let match: RegExpExecArray | null;
|
|
68
|
+
while ((match = TAG_RE.exec(body)) !== null) {
|
|
69
|
+
const closing = match[1] === "/";
|
|
70
|
+
const tag = match[2] as "p" | "tbl";
|
|
71
|
+
const selfClose = match[3] === "/";
|
|
72
|
+
|
|
73
|
+
if (closing) {
|
|
74
|
+
depth -= 1;
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (depth === 0) {
|
|
79
|
+
if (tag === "p") {
|
|
80
|
+
results.push({ kind: "paragraph", blockId: `paragraph-${paragraphCounter}` });
|
|
81
|
+
} else {
|
|
82
|
+
results.push({ kind: "table", blockId: `table-${tableCounter}` });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Global counter bumps (all depths, including top-level).
|
|
87
|
+
if (tag === "p") {
|
|
88
|
+
paragraphCounter += 1;
|
|
89
|
+
} else {
|
|
90
|
+
tableCounter += 1;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (!selfClose) {
|
|
94
|
+
depth += 1;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return results;
|
|
99
|
+
}
|
|
@@ -11,12 +11,22 @@
|
|
|
11
11
|
|
|
12
12
|
import type { OpcRelationship } from "./part-manifest.ts";
|
|
13
13
|
import { normalizePartPath, resolveRelationshipTarget } from "./part-manifest.ts";
|
|
14
|
+
import { parseChartSpace } from "./chart/parse-chart-space.ts";
|
|
15
|
+
import type { ChartModel } from "./chart/types.ts";
|
|
14
16
|
|
|
15
17
|
export interface InlineMediaPart {
|
|
16
18
|
path: string;
|
|
17
19
|
contentType: string;
|
|
18
20
|
}
|
|
19
21
|
|
|
22
|
+
/**
|
|
23
|
+
* Callback that resolves a chart relationship id (the `r:id` on a
|
|
24
|
+
* `<c:chart>` reference) to the chart-part XML body. Returning undefined
|
|
25
|
+
* skips ChartModel population — the drawing still parses as a
|
|
26
|
+
* `ParsedChartContent` with `rawXml`, just without `parsedData`.
|
|
27
|
+
*/
|
|
28
|
+
export type ChartPartLookup = (rId: string) => string | undefined;
|
|
29
|
+
|
|
20
30
|
export interface ParsedChartContent {
|
|
21
31
|
type: "chart_preview";
|
|
22
32
|
/** Media ID of the fallback preview image, if one is present in mc:Fallback. */
|
|
@@ -25,6 +35,17 @@ export interface ParsedChartContent {
|
|
|
25
35
|
previewPackagePartName?: string;
|
|
26
36
|
/** MIME type of the preview media (e.g. `image/png`, `image/svg+xml`). */
|
|
27
37
|
previewContentType?: string;
|
|
38
|
+
/**
|
|
39
|
+
* Stage 1 typed chart model, when the chart part XML resolved and
|
|
40
|
+
* parsed cleanly. Undefined when no chart-part lookup was supplied, the
|
|
41
|
+
* lookup returned undefined, or `parseChartSpace` threw / returned an
|
|
42
|
+
* `UnsupportedChartModel` with reason="parse-error".
|
|
43
|
+
*
|
|
44
|
+
* A successful `UnsupportedChartModel{reason: "not-yet-implemented"}`
|
|
45
|
+
* IS attached — the renderer decides whether to fall back; preserve-
|
|
46
|
+
* only rawXml always survives export.
|
|
47
|
+
*/
|
|
48
|
+
parsedData?: ChartModel;
|
|
28
49
|
/** Original drawing XML slice for lossless round-trip export. */
|
|
29
50
|
rawXml: string;
|
|
30
51
|
}
|
|
@@ -60,6 +81,7 @@ export function parseComplexContentXml(
|
|
|
60
81
|
relationships: readonly OpcRelationship[],
|
|
61
82
|
mediaParts: ReadonlyMap<string, InlineMediaPart> = new Map(),
|
|
62
83
|
sourcePartPath = "/word/document.xml",
|
|
84
|
+
chartPartLookup?: ChartPartLookup,
|
|
63
85
|
): ParsedComplexContent | null {
|
|
64
86
|
const root = parseXml(drawingXml);
|
|
65
87
|
const relationshipMap = new Map(relationships.map((r) => [r.id, r]));
|
|
@@ -67,7 +89,14 @@ export function parseComplexContentXml(
|
|
|
67
89
|
// Look for mc:AlternateContent at any depth
|
|
68
90
|
const altContent = findFirstDescendant(root, "AlternateContent");
|
|
69
91
|
if (altContent) {
|
|
70
|
-
return parseAlternateContent(
|
|
92
|
+
return parseAlternateContent(
|
|
93
|
+
altContent,
|
|
94
|
+
drawingXml,
|
|
95
|
+
relationshipMap,
|
|
96
|
+
mediaParts,
|
|
97
|
+
sourcePartPath,
|
|
98
|
+
chartPartLookup,
|
|
99
|
+
);
|
|
71
100
|
}
|
|
72
101
|
|
|
73
102
|
// No mc:AlternateContent — look for direct graphic data
|
|
@@ -78,7 +107,10 @@ export function parseComplexContentXml(
|
|
|
78
107
|
|
|
79
108
|
const uri = graphicData.attributes.uri ?? graphicData.attributes["uri"] ?? "";
|
|
80
109
|
if (isChartUri(uri)) {
|
|
81
|
-
|
|
110
|
+
const parsedData = maybeParseChart(root, chartPartLookup);
|
|
111
|
+
const node: ParsedChartContent = { type: "chart_preview", rawXml: drawingXml };
|
|
112
|
+
if (parsedData) node.parsedData = parsedData;
|
|
113
|
+
return node;
|
|
82
114
|
}
|
|
83
115
|
if (isSmartArtUri(uri)) {
|
|
84
116
|
return { type: "smartart_preview", rawXml: drawingXml };
|
|
@@ -87,12 +119,43 @@ export function parseComplexContentXml(
|
|
|
87
119
|
return null;
|
|
88
120
|
}
|
|
89
121
|
|
|
122
|
+
/**
|
|
123
|
+
* Attempt to parse the referenced chart part into a ChartModel.
|
|
124
|
+
*
|
|
125
|
+
* Walks the drawing for a `<c:chart r:id="…"/>` reference, hands the id to
|
|
126
|
+
* the lookup callback, and if the callback returns chart-part XML,
|
|
127
|
+
* invokes `parseChartSpace`. Returns undefined on any failure — the
|
|
128
|
+
* caller still emits a valid `ParsedChartContent` with `rawXml`, just
|
|
129
|
+
* without `parsedData`.
|
|
130
|
+
*/
|
|
131
|
+
function maybeParseChart(
|
|
132
|
+
drawingRoot: XmlElementNode,
|
|
133
|
+
chartPartLookup: ChartPartLookup | undefined,
|
|
134
|
+
): ChartModel | undefined {
|
|
135
|
+
if (!chartPartLookup) return undefined;
|
|
136
|
+
const chartRef = findFirstDescendant(drawingRoot, "chart");
|
|
137
|
+
if (!chartRef) return undefined;
|
|
138
|
+
const rId =
|
|
139
|
+
chartRef.attributes["r:id"] ??
|
|
140
|
+
chartRef.attributes["id"] ??
|
|
141
|
+
chartRef.attributes["r:embed"];
|
|
142
|
+
if (!rId) return undefined;
|
|
143
|
+
const chartXml = chartPartLookup(rId);
|
|
144
|
+
if (!chartXml) return undefined;
|
|
145
|
+
try {
|
|
146
|
+
return parseChartSpace(chartXml);
|
|
147
|
+
} catch {
|
|
148
|
+
return undefined;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
90
152
|
function parseAlternateContent(
|
|
91
153
|
altContent: XmlElementNode,
|
|
92
154
|
fullDrawingXml: string,
|
|
93
155
|
relationshipMap: Map<string, OpcRelationship>,
|
|
94
156
|
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
95
157
|
sourcePartPath: string,
|
|
158
|
+
chartPartLookup: ChartPartLookup | undefined,
|
|
96
159
|
): ParsedComplexContent | null {
|
|
97
160
|
const choice = findFirstChild(altContent, "Choice");
|
|
98
161
|
const fallback = findFirstChild(altContent, "Fallback");
|
|
@@ -150,6 +213,28 @@ function parseAlternateContent(
|
|
|
150
213
|
}
|
|
151
214
|
}
|
|
152
215
|
|
|
216
|
+
// For chart_preview, try to populate parsedData from the referenced
|
|
217
|
+
// chart part. parseAlternateContent is called with the AlternateContent
|
|
218
|
+
// subtree; the <c:chart> reference typically lives in the Choice branch,
|
|
219
|
+
// so we search from the altContent root (captures both Choice and any
|
|
220
|
+
// nested graphicData paths).
|
|
221
|
+
let parsedData: ChartModel | undefined;
|
|
222
|
+
if (contentType === "chart_preview") {
|
|
223
|
+
parsedData = maybeParseChart(altContent, chartPartLookup);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if (contentType === "chart_preview") {
|
|
227
|
+
const node: ParsedChartContent = {
|
|
228
|
+
type: "chart_preview",
|
|
229
|
+
...(previewMediaId ? { previewMediaId } : {}),
|
|
230
|
+
...(previewPackagePartName ? { previewPackagePartName } : {}),
|
|
231
|
+
...(previewContentType ? { previewContentType } : {}),
|
|
232
|
+
rawXml: fullDrawingXml,
|
|
233
|
+
};
|
|
234
|
+
if (parsedData) node.parsedData = parsedData;
|
|
235
|
+
return node;
|
|
236
|
+
}
|
|
237
|
+
|
|
153
238
|
return {
|
|
154
239
|
type: contentType,
|
|
155
240
|
...(previewMediaId ? { previewMediaId } : {}),
|
|
@@ -26,12 +26,13 @@ import type {
|
|
|
26
26
|
SectionPageBorders,
|
|
27
27
|
} from "../../model/canonical-document.ts";
|
|
28
28
|
import type { OpcRelationship } from "./part-manifest.ts";
|
|
29
|
+
import { SCOPE_MARKER_BOOKMARK_PREFIX } from "./parse-scope-markers.ts";
|
|
29
30
|
import {
|
|
30
31
|
parseInlineMediaXml,
|
|
31
32
|
type InlineMediaPart,
|
|
32
33
|
} from "./parse-inline-media.ts";
|
|
33
34
|
import { toCanonicalNumberingInstanceId } from "./parse-numbering.ts";
|
|
34
|
-
import { parseComplexContentXml } from "./parse-complex-content.ts";
|
|
35
|
+
import { parseComplexContentXml, type ChartPartLookup } from "./parse-complex-content.ts";
|
|
35
36
|
import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
|
|
36
37
|
import { classifyFieldInstruction } from "./parse-fields.ts";
|
|
37
38
|
import { resolveHighlightColor } from "./highlight-colors.ts";
|
|
@@ -213,6 +214,9 @@ export interface ParsedChartPreviewNode {
|
|
|
213
214
|
previewMediaId?: string;
|
|
214
215
|
previewPackagePartName?: string;
|
|
215
216
|
previewContentType?: string;
|
|
217
|
+
/** Typed chart data parsed from the c:chartSpace part. See
|
|
218
|
+
* `src/io/ooxml/parse-complex-content.ts` for semantics. */
|
|
219
|
+
parsedData?: import("./chart/types.ts").ChartModel;
|
|
216
220
|
rawXml: string;
|
|
217
221
|
}
|
|
218
222
|
|
|
@@ -429,11 +433,38 @@ interface MarksParseResult {
|
|
|
429
433
|
const HYPERLINK_RELATIONSHIP_TYPE =
|
|
430
434
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
|
|
431
435
|
|
|
436
|
+
/**
|
|
437
|
+
* Request-scoped chart-part lookup. Set by `parseMainDocumentXml` for
|
|
438
|
+
* the duration of a single top-level parse; read by `parseRun` where
|
|
439
|
+
* the `<w:drawing>` → `parseComplexContentXml` call site lives. Using a
|
|
440
|
+
* module variable instead of threading the callback through ~8
|
|
441
|
+
* intermediate function signatures keeps the call sites readable; the
|
|
442
|
+
* try/finally in `parseMainDocumentXml` ensures the variable never
|
|
443
|
+
* leaks across concurrent parses (Node.js is single-threaded; no
|
|
444
|
+
* re-entrancy since the parser is fully synchronous).
|
|
445
|
+
*/
|
|
446
|
+
let activeChartPartLookup: ChartPartLookup | undefined;
|
|
447
|
+
|
|
432
448
|
export function parseMainDocumentXml(
|
|
433
449
|
xml: string,
|
|
434
450
|
relationships: readonly OpcRelationship[] = [],
|
|
435
451
|
mediaParts: ReadonlyMap<string, InlineMediaPart> = new Map(),
|
|
436
452
|
sourcePartPath = "/word/document.xml",
|
|
453
|
+
chartPartLookup?: ChartPartLookup,
|
|
454
|
+
): ParsedMainDocument {
|
|
455
|
+
activeChartPartLookup = chartPartLookup;
|
|
456
|
+
try {
|
|
457
|
+
return parseMainDocumentXmlInner(xml, relationships, mediaParts, sourcePartPath);
|
|
458
|
+
} finally {
|
|
459
|
+
activeChartPartLookup = undefined;
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
function parseMainDocumentXmlInner(
|
|
464
|
+
xml: string,
|
|
465
|
+
relationships: readonly OpcRelationship[],
|
|
466
|
+
mediaParts: ReadonlyMap<string, InlineMediaPart>,
|
|
467
|
+
sourcePartPath: string,
|
|
437
468
|
): ParsedMainDocument {
|
|
438
469
|
const root = parseXml(xml);
|
|
439
470
|
const documentElement = findChildElement(root, "document");
|
|
@@ -457,9 +488,91 @@ export function parseMainDocumentXml(
|
|
|
457
488
|
}
|
|
458
489
|
}
|
|
459
490
|
|
|
491
|
+
rewriteScopeMarkerBookmarks(blocks);
|
|
492
|
+
|
|
460
493
|
return { blocks, finalSectionProperties };
|
|
461
494
|
}
|
|
462
495
|
|
|
496
|
+
/**
|
|
497
|
+
* S1 — post-process the parsed block tree in place, converting bookmark
|
|
498
|
+
* pairs whose `name` starts with `bw:scope:` into `scope_marker_*` inline
|
|
499
|
+
* nodes. The `bookmarkId` is used to pair start+end; the `scopeId` is
|
|
500
|
+
* taken from the name after the prefix. Unmatched bookmarks (start without
|
|
501
|
+
* end or vice versa) stay as regular bookmarks — S1 markers are always
|
|
502
|
+
* emitted in pairs on export, so an orphan implies upstream corruption
|
|
503
|
+
* that we preserve rather than drop.
|
|
504
|
+
*/
|
|
505
|
+
function rewriteScopeMarkerBookmarks(blocks: ParsedBlockNode[]): void {
|
|
506
|
+
const scopeBookmarkIds = new Map<string, string>();
|
|
507
|
+
|
|
508
|
+
const scanForStarts = (nodes: readonly { type?: string; [key: string]: unknown }[]): void => {
|
|
509
|
+
for (const node of nodes) {
|
|
510
|
+
if (!node || typeof node !== "object") continue;
|
|
511
|
+
if (node.type === "bookmark_start") {
|
|
512
|
+
const name = (node as { name?: string }).name ?? "";
|
|
513
|
+
if (name.startsWith(SCOPE_MARKER_BOOKMARK_PREFIX)) {
|
|
514
|
+
const bkId = (node as { bookmarkId?: string }).bookmarkId ?? "";
|
|
515
|
+
const scopeId = name.slice(SCOPE_MARKER_BOOKMARK_PREFIX.length);
|
|
516
|
+
if (bkId && scopeId) {
|
|
517
|
+
scopeBookmarkIds.set(bkId, scopeId);
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
const children = (node as { children?: unknown }).children;
|
|
522
|
+
if (Array.isArray(children)) scanForStarts(children);
|
|
523
|
+
const rows = (node as { rows?: unknown }).rows;
|
|
524
|
+
if (Array.isArray(rows)) scanForStarts(rows);
|
|
525
|
+
const cells = (node as { cells?: unknown }).cells;
|
|
526
|
+
if (Array.isArray(cells)) scanForStarts(cells);
|
|
527
|
+
}
|
|
528
|
+
};
|
|
529
|
+
|
|
530
|
+
const rewriteInPlace = (nodes: { type?: string; [key: string]: unknown }[]): void => {
|
|
531
|
+
for (let i = 0; i < nodes.length; i += 1) {
|
|
532
|
+
const node = nodes[i]!;
|
|
533
|
+
if (!node || typeof node !== "object") continue;
|
|
534
|
+
|
|
535
|
+
if (node.type === "bookmark_start") {
|
|
536
|
+
const bkId = (node as { bookmarkId?: string }).bookmarkId ?? "";
|
|
537
|
+
const scopeId = scopeBookmarkIds.get(bkId);
|
|
538
|
+
if (scopeId !== undefined) {
|
|
539
|
+
nodes[i] = { type: "scope_marker_start", scopeId } as typeof node;
|
|
540
|
+
continue;
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
if (node.type === "bookmark_end") {
|
|
545
|
+
const bkId = (node as { bookmarkId?: string }).bookmarkId ?? "";
|
|
546
|
+
const scopeId = scopeBookmarkIds.get(bkId);
|
|
547
|
+
if (scopeId !== undefined) {
|
|
548
|
+
nodes[i] = { type: "scope_marker_end", scopeId } as typeof node;
|
|
549
|
+
continue;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
const children = (node as { children?: unknown }).children;
|
|
554
|
+
if (Array.isArray(children)) {
|
|
555
|
+
rewriteInPlace(children as { type?: string; [key: string]: unknown }[]);
|
|
556
|
+
}
|
|
557
|
+
const rows = (node as { rows?: unknown }).rows;
|
|
558
|
+
if (Array.isArray(rows)) {
|
|
559
|
+
rewriteInPlace(rows as { type?: string; [key: string]: unknown }[]);
|
|
560
|
+
}
|
|
561
|
+
const cells = (node as { cells?: unknown }).cells;
|
|
562
|
+
if (Array.isArray(cells)) {
|
|
563
|
+
rewriteInPlace(cells as { type?: string; [key: string]: unknown }[]);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
};
|
|
567
|
+
|
|
568
|
+
// Two passes: collect all scope-prefixed start IDs, then rewrite both
|
|
569
|
+
// start + end occurrences. Pairing by id — scope_marker_end may appear
|
|
570
|
+
// in a later paragraph than its matching scope_marker_start.
|
|
571
|
+
scanForStarts(blocks as unknown as readonly { [key: string]: unknown }[]);
|
|
572
|
+
if (scopeBookmarkIds.size === 0) return;
|
|
573
|
+
rewriteInPlace(blocks as unknown as { [key: string]: unknown }[]);
|
|
574
|
+
}
|
|
575
|
+
|
|
463
576
|
function parseBodyChild(
|
|
464
577
|
node: XmlElementNode,
|
|
465
578
|
sourceXml: string,
|
|
@@ -1911,6 +2024,7 @@ function parseRun(
|
|
|
1911
2024
|
relationships,
|
|
1912
2025
|
mediaParts,
|
|
1913
2026
|
sourcePartPath,
|
|
2027
|
+
activeChartPartLookup,
|
|
1914
2028
|
);
|
|
1915
2029
|
if (complexContent) {
|
|
1916
2030
|
result.push(complexContent);
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
CanonicalDocument,
|
|
3
|
+
DocumentRootNode,
|
|
4
|
+
InlineNode,
|
|
5
|
+
} from "../../model/canonical-document.ts";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Reserved OOXML bookmark-name prefix used to discriminate S1 scope markers
|
|
9
|
+
* from user-authored bookmarks. On export, each scope marker emits as
|
|
10
|
+
* `<w:bookmarkStart w:name="bw:scope:<scopeId>"/>` / `<w:bookmarkEnd/>`. On
|
|
11
|
+
* import, any bookmark whose name starts with this prefix is extracted as a
|
|
12
|
+
* `scope_marker_*` inline node pair and removed from the regular bookmark
|
|
13
|
+
* list so user-facing bookmark APIs stay clean.
|
|
14
|
+
*/
|
|
15
|
+
export const SCOPE_MARKER_BOOKMARK_PREFIX = "bw:scope:";
|
|
16
|
+
|
|
17
|
+
export interface ScopeMarkerBookmark {
|
|
18
|
+
/** Serialized bookmark id (shared between start + end in the OOXML pair). */
|
|
19
|
+
bookmarkId: string;
|
|
20
|
+
/** `bw:scope:<scopeId>` — caller applies the prefix via the exported constant. */
|
|
21
|
+
name: string;
|
|
22
|
+
boundary: "start" | "end";
|
|
23
|
+
scopeId: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Walk a canonical document in pre-order and return one pair of bookmark
|
|
28
|
+
* descriptors for each scope-marker pair found. The returned objects are
|
|
29
|
+
* OOXML-flavor (paired `w:id`, `w:name` on start only, end references id)
|
|
30
|
+
* so callers can weave them straight into the `<w:bookmarkStart>` /
|
|
31
|
+
* `<w:bookmarkEnd>` emit path.
|
|
32
|
+
*/
|
|
33
|
+
export function serializeScopeMarkersToBookmarks(
|
|
34
|
+
document: CanonicalDocument | Pick<CanonicalDocument, "content">,
|
|
35
|
+
): ScopeMarkerBookmark[] {
|
|
36
|
+
const root = ("content" in document
|
|
37
|
+
? (document.content as DocumentRootNode)
|
|
38
|
+
: (document as unknown as DocumentRootNode));
|
|
39
|
+
const out: ScopeMarkerBookmark[] = [];
|
|
40
|
+
let bookmarkIdCounter = 0;
|
|
41
|
+
const scopeIdToBookmarkId = new Map<string, string>();
|
|
42
|
+
|
|
43
|
+
walkInlineNodes(root, (node) => {
|
|
44
|
+
if (node.type === "scope_marker_start") {
|
|
45
|
+
const bookmarkId = String(bookmarkIdCounter);
|
|
46
|
+
bookmarkIdCounter += 1;
|
|
47
|
+
scopeIdToBookmarkId.set(node.scopeId, bookmarkId);
|
|
48
|
+
out.push({
|
|
49
|
+
bookmarkId,
|
|
50
|
+
name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
|
|
51
|
+
boundary: "start",
|
|
52
|
+
scopeId: node.scopeId,
|
|
53
|
+
});
|
|
54
|
+
} else if (node.type === "scope_marker_end") {
|
|
55
|
+
const bookmarkId = scopeIdToBookmarkId.get(node.scopeId) ?? String(bookmarkIdCounter++);
|
|
56
|
+
out.push({
|
|
57
|
+
bookmarkId,
|
|
58
|
+
name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
|
|
59
|
+
boundary: "end",
|
|
60
|
+
scopeId: node.scopeId,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
return out;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface ParsedScopeMarkerPair {
|
|
69
|
+
scopeId: string;
|
|
70
|
+
bookmarkId: string;
|
|
71
|
+
startIndex: number;
|
|
72
|
+
endIndex: number;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export interface RawBookmark {
|
|
76
|
+
readonly type: "bookmark_start" | "bookmark_end";
|
|
77
|
+
readonly bookmarkId: string;
|
|
78
|
+
readonly name?: string;
|
|
79
|
+
readonly index: number;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Split an OOXML bookmark list into (a) scope-marker pairs extracted via the
|
|
84
|
+
* `bw:scope:` prefix convention and (b) the remaining user bookmarks. The
|
|
85
|
+
* extraction is id-paired — a start with a prefix name pairs with the
|
|
86
|
+
* matching end by `bookmarkId`.
|
|
87
|
+
*/
|
|
88
|
+
export function parseScopeMarkersFromBookmarks(
|
|
89
|
+
rawBookmarks: readonly RawBookmark[],
|
|
90
|
+
): { scopeMarkers: ParsedScopeMarkerPair[]; remainingBookmarks: RawBookmark[] } {
|
|
91
|
+
const scopeStartsById = new Map<
|
|
92
|
+
string,
|
|
93
|
+
{ scopeId: string; startIndex: number }
|
|
94
|
+
>();
|
|
95
|
+
const scopeMarkers: ParsedScopeMarkerPair[] = [];
|
|
96
|
+
const remainingBookmarks: RawBookmark[] = [];
|
|
97
|
+
|
|
98
|
+
for (const bm of rawBookmarks) {
|
|
99
|
+
if (bm.type === "bookmark_start") {
|
|
100
|
+
const name = bm.name ?? "";
|
|
101
|
+
if (name.startsWith(SCOPE_MARKER_BOOKMARK_PREFIX)) {
|
|
102
|
+
const scopeId = name.slice(SCOPE_MARKER_BOOKMARK_PREFIX.length);
|
|
103
|
+
scopeStartsById.set(bm.bookmarkId, {
|
|
104
|
+
scopeId,
|
|
105
|
+
startIndex: bm.index,
|
|
106
|
+
});
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
remainingBookmarks.push(bm);
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const open = scopeStartsById.get(bm.bookmarkId);
|
|
114
|
+
if (open) {
|
|
115
|
+
scopeMarkers.push({
|
|
116
|
+
scopeId: open.scopeId,
|
|
117
|
+
bookmarkId: bm.bookmarkId,
|
|
118
|
+
startIndex: open.startIndex,
|
|
119
|
+
endIndex: bm.index,
|
|
120
|
+
});
|
|
121
|
+
scopeStartsById.delete(bm.bookmarkId);
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
remainingBookmarks.push(bm);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return { scopeMarkers, remainingBookmarks };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function walkInlineNodes(
|
|
132
|
+
node: DocumentRootNode | InlineNode | { children?: unknown; rows?: unknown; cells?: unknown; type?: string },
|
|
133
|
+
visit: (inline: InlineNode) => void,
|
|
134
|
+
): void {
|
|
135
|
+
if (!node || typeof node !== "object") return;
|
|
136
|
+
const nt = (node as { type?: string }).type;
|
|
137
|
+
|
|
138
|
+
// Inline leaf node: visit it.
|
|
139
|
+
if (
|
|
140
|
+
nt === "text" ||
|
|
141
|
+
nt === "tab" ||
|
|
142
|
+
nt === "hard_break" ||
|
|
143
|
+
nt === "column_break" ||
|
|
144
|
+
nt === "symbol" ||
|
|
145
|
+
nt === "image" ||
|
|
146
|
+
nt === "bookmark_start" ||
|
|
147
|
+
nt === "bookmark_end" ||
|
|
148
|
+
nt === "scope_marker_start" ||
|
|
149
|
+
nt === "scope_marker_end" ||
|
|
150
|
+
nt === "opaque_inline" ||
|
|
151
|
+
nt === "footnote_ref" ||
|
|
152
|
+
nt === "chart_preview" ||
|
|
153
|
+
nt === "smartart_preview" ||
|
|
154
|
+
nt === "shape" ||
|
|
155
|
+
nt === "wordart" ||
|
|
156
|
+
nt === "vml_shape"
|
|
157
|
+
) {
|
|
158
|
+
visit(node as InlineNode);
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const children = (node as { children?: unknown }).children;
|
|
163
|
+
if (Array.isArray(children)) {
|
|
164
|
+
for (const child of children) {
|
|
165
|
+
walkInlineNodes(child as InlineNode, visit);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (nt === "table") {
|
|
170
|
+
const rows = (node as { rows?: unknown }).rows;
|
|
171
|
+
if (Array.isArray(rows)) {
|
|
172
|
+
for (const row of rows) {
|
|
173
|
+
walkInlineNodes(row as InlineNode, visit);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
} else if (nt === "table_row") {
|
|
177
|
+
const cells = (node as { cells?: unknown }).cells;
|
|
178
|
+
if (Array.isArray(cells)) {
|
|
179
|
+
for (const cell of cells) {
|
|
180
|
+
walkInlineNodes(cell as InlineNode, visit);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|