@beyondwork/docx-react-component 1.0.47 → 1.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/package.json +1 -1
  2. package/src/api/public-types.ts +115 -1
  3. package/src/compare/diff-engine.ts +4 -0
  4. package/src/core/commands/add-scope.ts +257 -0
  5. package/src/core/commands/formatting-commands.ts +2 -0
  6. package/src/core/schema/text-schema.ts +95 -1
  7. package/src/core/state/text-transaction.ts +17 -5
  8. package/src/io/chart-preview-resolver.ts +27 -0
  9. package/src/io/docx-session.ts +226 -38
  10. package/src/io/export/serialize-main-document.ts +37 -0
  11. package/src/io/export/serialize-settings.ts +421 -0
  12. package/src/io/export/serialize-styles.ts +10 -0
  13. package/src/io/normalize/normalize-text.ts +1 -0
  14. package/src/io/ooxml/chart/parse-axis.ts +277 -0
  15. package/src/io/ooxml/chart/parse-chart-space.ts +813 -0
  16. package/src/io/ooxml/chart/parse-series.ts +570 -0
  17. package/src/io/ooxml/chart/resolve-color.ts +251 -0
  18. package/src/io/ooxml/chart/types.ts +420 -0
  19. package/src/io/ooxml/parse-block-structure.ts +99 -0
  20. package/src/io/ooxml/parse-complex-content.ts +87 -2
  21. package/src/io/ooxml/parse-main-document.ts +115 -1
  22. package/src/io/ooxml/parse-scope-markers.ts +184 -0
  23. package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
  24. package/src/io/ooxml/parse-settings.ts +97 -1
  25. package/src/io/ooxml/parse-styles.ts +65 -0
  26. package/src/io/ooxml/parse-theme.ts +2 -127
  27. package/src/io/ooxml/xml-attr-helpers.ts +59 -1
  28. package/src/io/ooxml/xml-parser.ts +142 -0
  29. package/src/model/canonical-document.ts +94 -0
  30. package/src/model/scope-markers.ts +144 -0
  31. package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
  32. package/src/runtime/collab/checkpoint-election.ts +75 -0
  33. package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
  34. package/src/runtime/collab/checkpoint-store.ts +115 -0
  35. package/src/runtime/collab/event-types.ts +27 -0
  36. package/src/runtime/collab/index.ts +22 -0
  37. package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
  38. package/src/runtime/collab/runtime-collab-sync.ts +279 -0
  39. package/src/runtime/document-runtime.ts +214 -16
  40. package/src/runtime/editor-surface/capabilities.ts +63 -50
  41. package/src/runtime/layout/layout-engine-version.ts +8 -1
  42. package/src/runtime/prerender/cache-envelope.ts +19 -7
  43. package/src/runtime/prerender/cache-key.ts +25 -14
  44. package/src/runtime/prerender/canonical-document-hash.ts +63 -0
  45. package/src/runtime/prerender/customxml-cache.ts +211 -0
  46. package/src/runtime/prerender/customxml-probe.ts +78 -0
  47. package/src/runtime/prerender/prerender-document.ts +74 -7
  48. package/src/runtime/scope-resolver.ts +148 -0
  49. package/src/runtime/scope-tag-registry.ts +10 -0
  50. package/src/runtime/surface-projection.ts +8 -1
  51. package/src/ui/WordReviewEditor.tsx +30 -0
  52. package/src/ui/editor-runtime-boundary.ts +6 -1
  53. package/src/ui/runtime-shortcut-dispatch.ts +12 -7
@@ -0,0 +1,99 @@
1
+ /**
2
+ * L7 Phase 2.5 Plan B B.7 — shallow structural probe for `word/document.xml`.
3
+ *
4
+ * Emits the ordered (kind, blockId) list for top-level body children without
5
+ * building the canonical-document model. Used by `customxml-probe` to verify
6
+ * that a cached laycache envelope's `structuralHash` still matches the
7
+ * document the envelope was written against.
8
+ *
9
+ * **Correctness requirement.** The output must match what
10
+ * `surface-projection.ts:createSurfaceBlock` emits when walking a
11
+ * canonical document produced by the full parse pipeline. Specifically:
12
+ * - Paragraph blockIds use a GLOBAL counter incremented on every
13
+ * `<w:p>` encountered ANYWHERE in the tree (top-level or nested
14
+ * inside a table cell). The top-level paragraph's blockId is
15
+ * `paragraph-${counter_at_time_of_encounter}`.
16
+ * - Table blockIds use a GLOBAL counter incremented on every
17
+ * `<w:tbl>` at any depth.
18
+ * - Other top-level elements (`<w:sdt>`, `<w:altChunk>`, `<w:sectPr>`)
19
+ * are NOT emitted by this probe.
20
+ *
21
+ * **Known limitation (2026-04-19 shipping state).** The full parse
22
+ * promotes certain `<w:p>` elements to `opaque_block` based on their
23
+ * content — e.g. paragraphs containing structured content controls
24
+ * (`<w:sdt>`), floating drawings (`<w:drawing>` with `<wp:anchor>`),
25
+ * or `<mc:AlternateContent>` markup-compat wrappers. The shallow probe
26
+ * cannot detect these patterns without a deeper walk, so it counts such
27
+ * paragraphs as plain `paragraph` blocks. On docs where this triggers
28
+ * (~20% of F-series fixtures; 2 of 3 CCEP templates), the probe's
29
+ * structural hash diverges from the envelope's → cache is rejected →
30
+ * safe fallback to the full-parse open path. Plan B warm-cache opt-in
31
+ * is "clean docs only" under this probe.
32
+ *
33
+ * Future improvement: refine the probe to detect `<w:sdt>`,
34
+ * `<w:drawing w:anchor>`, and `<mc:AlternateContent>` inside top-level
35
+ * paragraphs and classify them accordingly. Deferred unless real-world
36
+ * hit rates prove insufficient.
37
+ *
38
+ * **Cost budget.** <30 ms on extra-large CCEP (~2.7 MB document.xml).
39
+ * Single regex walk, O(bytes). No DOM, no full XML parse.
40
+ *
41
+ * **Fidelity gate.** `test/io/parse-block-structure.test.ts` compares
42
+ * probe output against full-parse blockIds on representative fixtures
43
+ * (F01/F02/F05/F48 for paragraph + table patterns, plus a clean CCEP
44
+ * template). Docs with opaque-promoting features are covered by a
45
+ * separate "safe fallback" test rather than the strict match.
46
+ */
47
+
48
+ export interface BlockStructureProbe {
49
+ readonly kind: "paragraph" | "table";
50
+ readonly blockId: string;
51
+ }
52
+
53
+ const BODY_RE = /<w:body\b[^>]*>([\s\S]*?)<\/w:body>/u;
54
+ const TAG_RE = /<(\/?)w:(p|tbl)\b[^>]*?(\/?)>/gu;
55
+
56
+ export function parseBlockStructure(documentXml: string): BlockStructureProbe[] {
57
+ const bodyMatch = BODY_RE.exec(documentXml);
58
+ if (!bodyMatch) return [];
59
+ const body = bodyMatch[1] ?? "";
60
+
61
+ const results: BlockStructureProbe[] = [];
62
+ let paragraphCounter = 0;
63
+ let tableCounter = 0;
64
+ let depth = 0;
65
+
66
+ TAG_RE.lastIndex = 0;
67
+ let match: RegExpExecArray | null;
68
+ while ((match = TAG_RE.exec(body)) !== null) {
69
+ const closing = match[1] === "/";
70
+ const tag = match[2] as "p" | "tbl";
71
+ const selfClose = match[3] === "/";
72
+
73
+ if (closing) {
74
+ depth -= 1;
75
+ continue;
76
+ }
77
+
78
+ if (depth === 0) {
79
+ if (tag === "p") {
80
+ results.push({ kind: "paragraph", blockId: `paragraph-${paragraphCounter}` });
81
+ } else {
82
+ results.push({ kind: "table", blockId: `table-${tableCounter}` });
83
+ }
84
+ }
85
+
86
+ // Global counter bumps (all depths, including top-level).
87
+ if (tag === "p") {
88
+ paragraphCounter += 1;
89
+ } else {
90
+ tableCounter += 1;
91
+ }
92
+
93
+ if (!selfClose) {
94
+ depth += 1;
95
+ }
96
+ }
97
+
98
+ return results;
99
+ }
@@ -11,12 +11,22 @@
11
11
 
12
12
  import type { OpcRelationship } from "./part-manifest.ts";
13
13
  import { normalizePartPath, resolveRelationshipTarget } from "./part-manifest.ts";
14
+ import { parseChartSpace } from "./chart/parse-chart-space.ts";
15
+ import type { ChartModel } from "./chart/types.ts";
14
16
 
15
17
  export interface InlineMediaPart {
16
18
  path: string;
17
19
  contentType: string;
18
20
  }
19
21
 
22
+ /**
23
+ * Callback that resolves a chart relationship id (the `r:id` on a
24
+ * `<c:chart>` reference) to the chart-part XML body. Returning undefined
25
+ * skips ChartModel population — the drawing still parses as a
26
+ * `ParsedChartContent` with `rawXml`, just without `parsedData`.
27
+ */
28
+ export type ChartPartLookup = (rId: string) => string | undefined;
29
+
20
30
  export interface ParsedChartContent {
21
31
  type: "chart_preview";
22
32
  /** Media ID of the fallback preview image, if one is present in mc:Fallback. */
@@ -25,6 +35,17 @@ export interface ParsedChartContent {
25
35
  previewPackagePartName?: string;
26
36
  /** MIME type of the preview media (e.g. `image/png`, `image/svg+xml`). */
27
37
  previewContentType?: string;
38
+ /**
39
+ * Stage 1 typed chart model, when the chart part XML resolved and
40
+ * parsed cleanly. Undefined when no chart-part lookup was supplied, the
41
+ * lookup returned undefined, or `parseChartSpace` threw / returned an
42
+ * `UnsupportedChartModel` with reason="parse-error".
43
+ *
44
+ * A successful `UnsupportedChartModel{reason: "not-yet-implemented"}`
45
+ * IS attached — the renderer decides whether to fall back; preserve-
46
+ * only rawXml always survives export.
47
+ */
48
+ parsedData?: ChartModel;
28
49
  /** Original drawing XML slice for lossless round-trip export. */
29
50
  rawXml: string;
30
51
  }
@@ -60,6 +81,7 @@ export function parseComplexContentXml(
60
81
  relationships: readonly OpcRelationship[],
61
82
  mediaParts: ReadonlyMap<string, InlineMediaPart> = new Map(),
62
83
  sourcePartPath = "/word/document.xml",
84
+ chartPartLookup?: ChartPartLookup,
63
85
  ): ParsedComplexContent | null {
64
86
  const root = parseXml(drawingXml);
65
87
  const relationshipMap = new Map(relationships.map((r) => [r.id, r]));
@@ -67,7 +89,14 @@ export function parseComplexContentXml(
67
89
  // Look for mc:AlternateContent at any depth
68
90
  const altContent = findFirstDescendant(root, "AlternateContent");
69
91
  if (altContent) {
70
- return parseAlternateContent(altContent, drawingXml, relationshipMap, mediaParts, sourcePartPath);
92
+ return parseAlternateContent(
93
+ altContent,
94
+ drawingXml,
95
+ relationshipMap,
96
+ mediaParts,
97
+ sourcePartPath,
98
+ chartPartLookup,
99
+ );
71
100
  }
72
101
 
73
102
  // No mc:AlternateContent — look for direct graphic data
@@ -78,7 +107,10 @@ export function parseComplexContentXml(
78
107
 
79
108
  const uri = graphicData.attributes.uri ?? graphicData.attributes["uri"] ?? "";
80
109
  if (isChartUri(uri)) {
81
- return { type: "chart_preview", rawXml: drawingXml };
110
+ const parsedData = maybeParseChart(root, chartPartLookup);
111
+ const node: ParsedChartContent = { type: "chart_preview", rawXml: drawingXml };
112
+ if (parsedData) node.parsedData = parsedData;
113
+ return node;
82
114
  }
83
115
  if (isSmartArtUri(uri)) {
84
116
  return { type: "smartart_preview", rawXml: drawingXml };
@@ -87,12 +119,43 @@ export function parseComplexContentXml(
87
119
  return null;
88
120
  }
89
121
 
122
+ /**
123
+ * Attempt to parse the referenced chart part into a ChartModel.
124
+ *
125
+ * Walks the drawing for a `<c:chart r:id="…"/>` reference, hands the id to
126
+ * the lookup callback, and if the callback returns chart-part XML,
127
+ * invokes `parseChartSpace`. Returns undefined on any failure — the
128
+ * caller still emits a valid `ParsedChartContent` with `rawXml`, just
129
+ * without `parsedData`.
130
+ */
131
+ function maybeParseChart(
132
+ drawingRoot: XmlElementNode,
133
+ chartPartLookup: ChartPartLookup | undefined,
134
+ ): ChartModel | undefined {
135
+ if (!chartPartLookup) return undefined;
136
+ const chartRef = findFirstDescendant(drawingRoot, "chart");
137
+ if (!chartRef) return undefined;
138
+ const rId =
139
+ chartRef.attributes["r:id"] ??
140
+ chartRef.attributes["id"] ??
141
+ chartRef.attributes["r:embed"];
142
+ if (!rId) return undefined;
143
+ const chartXml = chartPartLookup(rId);
144
+ if (!chartXml) return undefined;
145
+ try {
146
+ return parseChartSpace(chartXml);
147
+ } catch {
148
+ return undefined;
149
+ }
150
+ }
151
+
90
152
  function parseAlternateContent(
91
153
  altContent: XmlElementNode,
92
154
  fullDrawingXml: string,
93
155
  relationshipMap: Map<string, OpcRelationship>,
94
156
  mediaParts: ReadonlyMap<string, InlineMediaPart>,
95
157
  sourcePartPath: string,
158
+ chartPartLookup: ChartPartLookup | undefined,
96
159
  ): ParsedComplexContent | null {
97
160
  const choice = findFirstChild(altContent, "Choice");
98
161
  const fallback = findFirstChild(altContent, "Fallback");
@@ -150,6 +213,28 @@ function parseAlternateContent(
150
213
  }
151
214
  }
152
215
 
216
+ // For chart_preview, try to populate parsedData from the referenced
217
+ // chart part. parseAlternateContent is called with the AlternateContent
218
+ // subtree; the <c:chart> reference typically lives in the Choice branch,
219
+ // so we search from the altContent root (captures both Choice and any
220
+ // nested graphicData paths).
221
+ let parsedData: ChartModel | undefined;
222
+ if (contentType === "chart_preview") {
223
+ parsedData = maybeParseChart(altContent, chartPartLookup);
224
+ }
225
+
226
+ if (contentType === "chart_preview") {
227
+ const node: ParsedChartContent = {
228
+ type: "chart_preview",
229
+ ...(previewMediaId ? { previewMediaId } : {}),
230
+ ...(previewPackagePartName ? { previewPackagePartName } : {}),
231
+ ...(previewContentType ? { previewContentType } : {}),
232
+ rawXml: fullDrawingXml,
233
+ };
234
+ if (parsedData) node.parsedData = parsedData;
235
+ return node;
236
+ }
237
+
153
238
  return {
154
239
  type: contentType,
155
240
  ...(previewMediaId ? { previewMediaId } : {}),
@@ -26,12 +26,13 @@ import type {
26
26
  SectionPageBorders,
27
27
  } from "../../model/canonical-document.ts";
28
28
  import type { OpcRelationship } from "./part-manifest.ts";
29
+ import { SCOPE_MARKER_BOOKMARK_PREFIX } from "./parse-scope-markers.ts";
29
30
  import {
30
31
  parseInlineMediaXml,
31
32
  type InlineMediaPart,
32
33
  } from "./parse-inline-media.ts";
33
34
  import { toCanonicalNumberingInstanceId } from "./parse-numbering.ts";
34
- import { parseComplexContentXml } from "./parse-complex-content.ts";
35
+ import { parseComplexContentXml, type ChartPartLookup } from "./parse-complex-content.ts";
35
36
  import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
36
37
  import { classifyFieldInstruction } from "./parse-fields.ts";
37
38
  import { resolveHighlightColor } from "./highlight-colors.ts";
@@ -213,6 +214,9 @@ export interface ParsedChartPreviewNode {
213
214
  previewMediaId?: string;
214
215
  previewPackagePartName?: string;
215
216
  previewContentType?: string;
217
+ /** Typed chart data parsed from the c:chartSpace part. See
218
+ * `src/io/ooxml/parse-complex-content.ts` for semantics. */
219
+ parsedData?: import("./chart/types.ts").ChartModel;
216
220
  rawXml: string;
217
221
  }
218
222
 
@@ -429,11 +433,38 @@ interface MarksParseResult {
429
433
  const HYPERLINK_RELATIONSHIP_TYPE =
430
434
  "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
431
435
 
436
+ /**
437
+ * Request-scoped chart-part lookup. Set by `parseMainDocumentXml` for
438
+ * the duration of a single top-level parse; read by `parseRun` where
439
+ * the `<w:drawing>` → `parseComplexContentXml` call site lives. Using a
440
+ * module variable instead of threading the callback through ~8
441
+ * intermediate function signatures keeps the call sites readable; the
442
+ * try/finally in `parseMainDocumentXml` ensures the variable never
443
+ * leaks across concurrent parses (Node.js is single-threaded; no
444
+ * re-entrancy since the parser is fully synchronous).
445
+ */
446
+ let activeChartPartLookup: ChartPartLookup | undefined;
447
+
432
448
  export function parseMainDocumentXml(
433
449
  xml: string,
434
450
  relationships: readonly OpcRelationship[] = [],
435
451
  mediaParts: ReadonlyMap<string, InlineMediaPart> = new Map(),
436
452
  sourcePartPath = "/word/document.xml",
453
+ chartPartLookup?: ChartPartLookup,
454
+ ): ParsedMainDocument {
455
+ activeChartPartLookup = chartPartLookup;
456
+ try {
457
+ return parseMainDocumentXmlInner(xml, relationships, mediaParts, sourcePartPath);
458
+ } finally {
459
+ activeChartPartLookup = undefined;
460
+ }
461
+ }
462
+
463
+ function parseMainDocumentXmlInner(
464
+ xml: string,
465
+ relationships: readonly OpcRelationship[],
466
+ mediaParts: ReadonlyMap<string, InlineMediaPart>,
467
+ sourcePartPath: string,
437
468
  ): ParsedMainDocument {
438
469
  const root = parseXml(xml);
439
470
  const documentElement = findChildElement(root, "document");
@@ -457,9 +488,91 @@ export function parseMainDocumentXml(
457
488
  }
458
489
  }
459
490
 
491
+ rewriteScopeMarkerBookmarks(blocks);
492
+
460
493
  return { blocks, finalSectionProperties };
461
494
  }
462
495
 
496
+ /**
497
+ * S1 — post-process the parsed block tree in place, converting bookmark
498
+ * pairs whose `name` starts with `bw:scope:` into `scope_marker_*` inline
499
+ * nodes. The `bookmarkId` is used to pair start+end; the `scopeId` is
500
+ * taken from the name after the prefix. Unmatched bookmarks (start without
501
+ * end or vice versa) stay as regular bookmarks — S1 markers are always
502
+ * emitted in pairs on export, so an orphan implies upstream corruption
503
+ * that we preserve rather than drop.
504
+ */
505
+ function rewriteScopeMarkerBookmarks(blocks: ParsedBlockNode[]): void {
506
+ const scopeBookmarkIds = new Map<string, string>();
507
+
508
+ const scanForStarts = (nodes: readonly { type?: string; [key: string]: unknown }[]): void => {
509
+ for (const node of nodes) {
510
+ if (!node || typeof node !== "object") continue;
511
+ if (node.type === "bookmark_start") {
512
+ const name = (node as { name?: string }).name ?? "";
513
+ if (name.startsWith(SCOPE_MARKER_BOOKMARK_PREFIX)) {
514
+ const bkId = (node as { bookmarkId?: string }).bookmarkId ?? "";
515
+ const scopeId = name.slice(SCOPE_MARKER_BOOKMARK_PREFIX.length);
516
+ if (bkId && scopeId) {
517
+ scopeBookmarkIds.set(bkId, scopeId);
518
+ }
519
+ }
520
+ }
521
+ const children = (node as { children?: unknown }).children;
522
+ if (Array.isArray(children)) scanForStarts(children);
523
+ const rows = (node as { rows?: unknown }).rows;
524
+ if (Array.isArray(rows)) scanForStarts(rows);
525
+ const cells = (node as { cells?: unknown }).cells;
526
+ if (Array.isArray(cells)) scanForStarts(cells);
527
+ }
528
+ };
529
+
530
+ const rewriteInPlace = (nodes: { type?: string; [key: string]: unknown }[]): void => {
531
+ for (let i = 0; i < nodes.length; i += 1) {
532
+ const node = nodes[i]!;
533
+ if (!node || typeof node !== "object") continue;
534
+
535
+ if (node.type === "bookmark_start") {
536
+ const bkId = (node as { bookmarkId?: string }).bookmarkId ?? "";
537
+ const scopeId = scopeBookmarkIds.get(bkId);
538
+ if (scopeId !== undefined) {
539
+ nodes[i] = { type: "scope_marker_start", scopeId } as typeof node;
540
+ continue;
541
+ }
542
+ }
543
+
544
+ if (node.type === "bookmark_end") {
545
+ const bkId = (node as { bookmarkId?: string }).bookmarkId ?? "";
546
+ const scopeId = scopeBookmarkIds.get(bkId);
547
+ if (scopeId !== undefined) {
548
+ nodes[i] = { type: "scope_marker_end", scopeId } as typeof node;
549
+ continue;
550
+ }
551
+ }
552
+
553
+ const children = (node as { children?: unknown }).children;
554
+ if (Array.isArray(children)) {
555
+ rewriteInPlace(children as { type?: string; [key: string]: unknown }[]);
556
+ }
557
+ const rows = (node as { rows?: unknown }).rows;
558
+ if (Array.isArray(rows)) {
559
+ rewriteInPlace(rows as { type?: string; [key: string]: unknown }[]);
560
+ }
561
+ const cells = (node as { cells?: unknown }).cells;
562
+ if (Array.isArray(cells)) {
563
+ rewriteInPlace(cells as { type?: string; [key: string]: unknown }[]);
564
+ }
565
+ }
566
+ };
567
+
568
+ // Two passes: collect all scope-prefixed start IDs, then rewrite both
569
+ // start + end occurrences. Pairing by id — scope_marker_end may appear
570
+ // in a later paragraph than its matching scope_marker_start.
571
+ scanForStarts(blocks as unknown as readonly { [key: string]: unknown }[]);
572
+ if (scopeBookmarkIds.size === 0) return;
573
+ rewriteInPlace(blocks as unknown as { [key: string]: unknown }[]);
574
+ }
575
+
463
576
  function parseBodyChild(
464
577
  node: XmlElementNode,
465
578
  sourceXml: string,
@@ -1911,6 +2024,7 @@ function parseRun(
1911
2024
  relationships,
1912
2025
  mediaParts,
1913
2026
  sourcePartPath,
2027
+ activeChartPartLookup,
1914
2028
  );
1915
2029
  if (complexContent) {
1916
2030
  result.push(complexContent);
@@ -0,0 +1,184 @@
1
+ import type {
2
+ CanonicalDocument,
3
+ DocumentRootNode,
4
+ InlineNode,
5
+ } from "../../model/canonical-document.ts";
6
+
7
+ /**
8
+ * Reserved OOXML bookmark-name prefix used to discriminate S1 scope markers
9
+ * from user-authored bookmarks. On export, each scope marker emits as
10
+ * `<w:bookmarkStart w:name="bw:scope:<scopeId>"/>` / `<w:bookmarkEnd/>`. On
11
+ * import, any bookmark whose name starts with this prefix is extracted as a
12
+ * `scope_marker_*` inline node pair and removed from the regular bookmark
13
+ * list so user-facing bookmark APIs stay clean.
14
+ */
15
+ export const SCOPE_MARKER_BOOKMARK_PREFIX = "bw:scope:";
16
+
17
+ export interface ScopeMarkerBookmark {
18
+ /** Serialized bookmark id (shared between start + end in the OOXML pair). */
19
+ bookmarkId: string;
20
+ /** `bw:scope:<scopeId>` — caller applies the prefix via the exported constant. */
21
+ name: string;
22
+ boundary: "start" | "end";
23
+ scopeId: string;
24
+ }
25
+
26
+ /**
27
+ * Walk a canonical document in pre-order and return one pair of bookmark
28
+ * descriptors for each scope-marker pair found. The returned objects are
29
+ * OOXML-flavor (paired `w:id`, `w:name` on start only, end references id)
30
+ * so callers can weave them straight into the `<w:bookmarkStart>` /
31
+ * `<w:bookmarkEnd>` emit path.
32
+ */
33
+ export function serializeScopeMarkersToBookmarks(
34
+ document: CanonicalDocument | Pick<CanonicalDocument, "content">,
35
+ ): ScopeMarkerBookmark[] {
36
+ const root = ("content" in document
37
+ ? (document.content as DocumentRootNode)
38
+ : (document as unknown as DocumentRootNode));
39
+ const out: ScopeMarkerBookmark[] = [];
40
+ let bookmarkIdCounter = 0;
41
+ const scopeIdToBookmarkId = new Map<string, string>();
42
+
43
+ walkInlineNodes(root, (node) => {
44
+ if (node.type === "scope_marker_start") {
45
+ const bookmarkId = String(bookmarkIdCounter);
46
+ bookmarkIdCounter += 1;
47
+ scopeIdToBookmarkId.set(node.scopeId, bookmarkId);
48
+ out.push({
49
+ bookmarkId,
50
+ name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
51
+ boundary: "start",
52
+ scopeId: node.scopeId,
53
+ });
54
+ } else if (node.type === "scope_marker_end") {
55
+ const bookmarkId = scopeIdToBookmarkId.get(node.scopeId) ?? String(bookmarkIdCounter++);
56
+ out.push({
57
+ bookmarkId,
58
+ name: `${SCOPE_MARKER_BOOKMARK_PREFIX}${node.scopeId}`,
59
+ boundary: "end",
60
+ scopeId: node.scopeId,
61
+ });
62
+ }
63
+ });
64
+
65
+ return out;
66
+ }
67
+
68
+ export interface ParsedScopeMarkerPair {
69
+ scopeId: string;
70
+ bookmarkId: string;
71
+ startIndex: number;
72
+ endIndex: number;
73
+ }
74
+
75
+ export interface RawBookmark {
76
+ readonly type: "bookmark_start" | "bookmark_end";
77
+ readonly bookmarkId: string;
78
+ readonly name?: string;
79
+ readonly index: number;
80
+ }
81
+
82
+ /**
83
+ * Split an OOXML bookmark list into (a) scope-marker pairs extracted via the
84
+ * `bw:scope:` prefix convention and (b) the remaining user bookmarks. The
85
+ * extraction is id-paired — a start with a prefix name pairs with the
86
+ * matching end by `bookmarkId`.
87
+ */
88
+ export function parseScopeMarkersFromBookmarks(
89
+ rawBookmarks: readonly RawBookmark[],
90
+ ): { scopeMarkers: ParsedScopeMarkerPair[]; remainingBookmarks: RawBookmark[] } {
91
+ const scopeStartsById = new Map<
92
+ string,
93
+ { scopeId: string; startIndex: number }
94
+ >();
95
+ const scopeMarkers: ParsedScopeMarkerPair[] = [];
96
+ const remainingBookmarks: RawBookmark[] = [];
97
+
98
+ for (const bm of rawBookmarks) {
99
+ if (bm.type === "bookmark_start") {
100
+ const name = bm.name ?? "";
101
+ if (name.startsWith(SCOPE_MARKER_BOOKMARK_PREFIX)) {
102
+ const scopeId = name.slice(SCOPE_MARKER_BOOKMARK_PREFIX.length);
103
+ scopeStartsById.set(bm.bookmarkId, {
104
+ scopeId,
105
+ startIndex: bm.index,
106
+ });
107
+ continue;
108
+ }
109
+ remainingBookmarks.push(bm);
110
+ continue;
111
+ }
112
+
113
+ const open = scopeStartsById.get(bm.bookmarkId);
114
+ if (open) {
115
+ scopeMarkers.push({
116
+ scopeId: open.scopeId,
117
+ bookmarkId: bm.bookmarkId,
118
+ startIndex: open.startIndex,
119
+ endIndex: bm.index,
120
+ });
121
+ scopeStartsById.delete(bm.bookmarkId);
122
+ continue;
123
+ }
124
+
125
+ remainingBookmarks.push(bm);
126
+ }
127
+
128
+ return { scopeMarkers, remainingBookmarks };
129
+ }
130
+
131
+ function walkInlineNodes(
132
+ node: DocumentRootNode | InlineNode | { children?: unknown; rows?: unknown; cells?: unknown; type?: string },
133
+ visit: (inline: InlineNode) => void,
134
+ ): void {
135
+ if (!node || typeof node !== "object") return;
136
+ const nt = (node as { type?: string }).type;
137
+
138
+ // Inline leaf node: visit it.
139
+ if (
140
+ nt === "text" ||
141
+ nt === "tab" ||
142
+ nt === "hard_break" ||
143
+ nt === "column_break" ||
144
+ nt === "symbol" ||
145
+ nt === "image" ||
146
+ nt === "bookmark_start" ||
147
+ nt === "bookmark_end" ||
148
+ nt === "scope_marker_start" ||
149
+ nt === "scope_marker_end" ||
150
+ nt === "opaque_inline" ||
151
+ nt === "footnote_ref" ||
152
+ nt === "chart_preview" ||
153
+ nt === "smartart_preview" ||
154
+ nt === "shape" ||
155
+ nt === "wordart" ||
156
+ nt === "vml_shape"
157
+ ) {
158
+ visit(node as InlineNode);
159
+ return;
160
+ }
161
+
162
+ const children = (node as { children?: unknown }).children;
163
+ if (Array.isArray(children)) {
164
+ for (const child of children) {
165
+ walkInlineNodes(child as InlineNode, visit);
166
+ }
167
+ }
168
+
169
+ if (nt === "table") {
170
+ const rows = (node as { rows?: unknown }).rows;
171
+ if (Array.isArray(rows)) {
172
+ for (const row of rows) {
173
+ walkInlineNodes(row as InlineNode, visit);
174
+ }
175
+ }
176
+ } else if (nt === "table_row") {
177
+ const cells = (node as { cells?: unknown }).cells;
178
+ if (Array.isArray(cells)) {
179
+ for (const cell of cells) {
180
+ walkInlineNodes(cell as InlineNode, visit);
181
+ }
182
+ }
183
+ }
184
+ }