@beyondwork/docx-react-component 1.0.47 → 1.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/package.json +1 -1
  2. package/src/api/public-types.ts +115 -1
  3. package/src/compare/diff-engine.ts +4 -0
  4. package/src/core/commands/add-scope.ts +257 -0
  5. package/src/core/commands/formatting-commands.ts +2 -0
  6. package/src/core/schema/text-schema.ts +95 -1
  7. package/src/core/state/text-transaction.ts +17 -5
  8. package/src/io/chart-preview-resolver.ts +27 -0
  9. package/src/io/docx-session.ts +226 -38
  10. package/src/io/export/serialize-main-document.ts +37 -0
  11. package/src/io/export/serialize-settings.ts +421 -0
  12. package/src/io/export/serialize-styles.ts +10 -0
  13. package/src/io/normalize/normalize-text.ts +1 -0
  14. package/src/io/ooxml/chart/parse-axis.ts +277 -0
  15. package/src/io/ooxml/chart/parse-chart-space.ts +813 -0
  16. package/src/io/ooxml/chart/parse-series.ts +570 -0
  17. package/src/io/ooxml/chart/resolve-color.ts +251 -0
  18. package/src/io/ooxml/chart/types.ts +420 -0
  19. package/src/io/ooxml/parse-block-structure.ts +99 -0
  20. package/src/io/ooxml/parse-complex-content.ts +87 -2
  21. package/src/io/ooxml/parse-main-document.ts +115 -1
  22. package/src/io/ooxml/parse-scope-markers.ts +184 -0
  23. package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
  24. package/src/io/ooxml/parse-settings.ts +97 -1
  25. package/src/io/ooxml/parse-styles.ts +65 -0
  26. package/src/io/ooxml/parse-theme.ts +2 -127
  27. package/src/io/ooxml/xml-attr-helpers.ts +59 -1
  28. package/src/io/ooxml/xml-parser.ts +142 -0
  29. package/src/model/canonical-document.ts +94 -0
  30. package/src/model/scope-markers.ts +144 -0
  31. package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
  32. package/src/runtime/collab/checkpoint-election.ts +75 -0
  33. package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
  34. package/src/runtime/collab/checkpoint-store.ts +115 -0
  35. package/src/runtime/collab/event-types.ts +27 -0
  36. package/src/runtime/collab/index.ts +22 -0
  37. package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
  38. package/src/runtime/collab/runtime-collab-sync.ts +279 -0
  39. package/src/runtime/document-runtime.ts +214 -16
  40. package/src/runtime/editor-surface/capabilities.ts +63 -50
  41. package/src/runtime/layout/layout-engine-version.ts +8 -1
  42. package/src/runtime/prerender/cache-envelope.ts +19 -7
  43. package/src/runtime/prerender/cache-key.ts +25 -14
  44. package/src/runtime/prerender/canonical-document-hash.ts +63 -0
  45. package/src/runtime/prerender/customxml-cache.ts +211 -0
  46. package/src/runtime/prerender/customxml-probe.ts +78 -0
  47. package/src/runtime/prerender/prerender-document.ts +74 -7
  48. package/src/runtime/scope-resolver.ts +148 -0
  49. package/src/runtime/scope-tag-registry.ts +10 -0
  50. package/src/runtime/surface-projection.ts +8 -1
  51. package/src/ui/WordReviewEditor.tsx +30 -0
  52. package/src/ui/editor-runtime-boundary.ts +6 -1
  53. package/src/ui/runtime-shortcut-dispatch.ts +12 -7
@@ -4,22 +4,8 @@ import type {
4
4
  ThemeFontScheme,
5
5
  ResolvedTheme,
6
6
  } from "../../model/canonical-document.ts";
7
-
8
- // ---- XML node types (inline, no external dep) ----
9
-
10
- interface XmlElementNode {
11
- type: "element";
12
- name: string;
13
- attributes: Record<string, string>;
14
- children: XmlNode[];
15
- }
16
-
17
- interface XmlTextNode {
18
- type: "text";
19
- text: string;
20
- }
21
-
22
- type XmlNode = XmlElementNode | XmlTextNode;
7
+ import type { XmlElementNode } from "./xml-element.ts";
8
+ import { parseXml } from "./xml-parser.ts";
23
9
 
24
10
  // ---- Well-known DrawingML color slot names ----
25
11
 
@@ -233,114 +219,3 @@ function localName(name: string): string {
233
219
  return idx >= 0 ? name.slice(idx + 1) : name;
234
220
  }
235
221
 
236
- // ---- Minimal XML parser ----
237
-
238
- function parseXml(xml: string): XmlElementNode {
239
- const root: XmlElementNode = {
240
- type: "element",
241
- name: "__root__",
242
- attributes: {},
243
- children: [],
244
- };
245
- const stack: XmlElementNode[] = [root];
246
- let cursor = 0;
247
-
248
- while (cursor < xml.length) {
249
- if (xml.startsWith("<!--", cursor)) {
250
- const end = xml.indexOf("-->", cursor);
251
- cursor = end >= 0 ? end + 3 : xml.length;
252
- continue;
253
- }
254
-
255
- if (xml.startsWith("<?", cursor)) {
256
- const end = xml.indexOf("?>", cursor);
257
- cursor = end >= 0 ? end + 2 : xml.length;
258
- continue;
259
- }
260
-
261
- if (xml.startsWith("<![CDATA[", cursor)) {
262
- const end = xml.indexOf("]]>", cursor);
263
- const textEnd = end >= 0 ? end : xml.length;
264
- stack[stack.length - 1]?.children.push({
265
- type: "text",
266
- text: xml.slice(cursor + 9, textEnd),
267
- });
268
- cursor = end >= 0 ? end + 3 : xml.length;
269
- continue;
270
- }
271
-
272
- if (xml[cursor] !== "<") {
273
- const nextTag = xml.indexOf("<", cursor);
274
- const end = nextTag >= 0 ? nextTag : xml.length;
275
- const text = decodeXmlEntities(xml.slice(cursor, end));
276
- if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
277
- stack[stack.length - 1]?.children.push({ type: "text", text });
278
- }
279
- cursor = end;
280
- continue;
281
- }
282
-
283
- if (xml[cursor + 1] === "/") {
284
- const end = xml.indexOf(">", cursor);
285
- if (end < 0) break;
286
- stack.pop();
287
- cursor = end + 1;
288
- continue;
289
- }
290
-
291
- const tagEnd = xml.indexOf(">", cursor);
292
- if (tagEnd < 0) break;
293
-
294
- const tagContent = xml.slice(cursor + 1, tagEnd);
295
- const selfClosing = tagContent.endsWith("/");
296
- const normalized = selfClosing ? tagContent.slice(0, -1).trimEnd() : tagContent;
297
-
298
- const spaceIndex = normalized.search(/\s/);
299
- const tagName = spaceIndex >= 0 ? normalized.slice(0, spaceIndex) : normalized;
300
- const attrString = spaceIndex >= 0 ? normalized.slice(spaceIndex + 1) : "";
301
- const attributes = parseAttributes(attrString);
302
-
303
- const element: XmlElementNode = {
304
- type: "element",
305
- name: tagName,
306
- attributes,
307
- children: [],
308
- };
309
-
310
- stack[stack.length - 1]?.children.push(element);
311
-
312
- if (!selfClosing) {
313
- stack.push(element);
314
- }
315
-
316
- cursor = tagEnd + 1;
317
- }
318
-
319
- return root;
320
- }
321
-
322
- function parseAttributes(attrString: string): Record<string, string> {
323
- const attrs: Record<string, string> = {};
324
- const pattern = /([A-Za-z_:][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
325
- for (const match of attrString.matchAll(pattern)) {
326
- const name = match[1];
327
- const value = match[3] ?? match[4] ?? "";
328
- if (name) {
329
- attrs[name] = decodeXmlEntities(value);
330
- }
331
- }
332
- return attrs;
333
- }
334
-
335
- function decodeXmlEntities(text: string): string {
336
- return text
337
- .replace(/&amp;/g, "&")
338
- .replace(/&lt;/g, "<")
339
- .replace(/&gt;/g, ">")
340
- .replace(/&quot;/g, '"')
341
- .replace(/&apos;/g, "'")
342
- .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10)))
343
- .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) =>
344
- String.fromCodePoint(Number.parseInt(hex, 16)),
345
- );
346
- }
@@ -10,7 +10,7 @@
10
10
  * parse-styles.ts, parse-numbering.ts.
11
11
  */
12
12
 
13
- import type { XmlElementNode } from "./xml-element.ts";
13
+ import type { XmlElementNode, XmlNode } from "./xml-element.ts";
14
14
 
15
15
  export function localName(name: string): string {
16
16
  const sep = name.indexOf(":");
@@ -26,6 +26,27 @@ export function findChildOptional(
26
26
  );
27
27
  }
28
28
 
29
+ /**
30
+ * Depth-first search for the first descendant element with the given local name
31
+ * (namespace prefix ignored). Returns undefined if no descendant matches.
32
+ *
33
+ * Useful when the exact parent chain isn't known — e.g. descending into
34
+ * `<w:drawing>` to find a `<c:chart>` regardless of which DrawingML wrapper
35
+ * stands between them.
36
+ */
37
+ export function findFirstDescendant(
38
+ node: XmlElementNode,
39
+ local: string,
40
+ ): XmlElementNode | undefined {
41
+ for (const child of node.children) {
42
+ if (child.type !== "element") continue;
43
+ if (localName(child.name) === local) return child;
44
+ const nested = findFirstDescendant(child, local);
45
+ if (nested) return nested;
46
+ }
47
+ return undefined;
48
+ }
49
+
29
50
  /** ST_OnOff: missing child → undefined; present bare or w:val="1|true|on" → true; w:val="0|false|off" → false. */
30
51
  export function readOnOff(node: XmlElementNode | undefined): boolean | undefined {
31
52
  if (!node) return undefined;
@@ -46,6 +67,43 @@ export function readIntVal(node: XmlElementNode | undefined): number | undefined
46
67
  return Number.isFinite(v) ? v : undefined;
47
68
  }
48
69
 
70
+ /** Read the child's `val` attribute as a float. Returns undefined if missing or not a finite number. */
71
+ export function readFloatVal(node: XmlElementNode | undefined): number | undefined {
72
+ if (!node) return undefined;
73
+ const raw = node.attributes["w:val"] ?? node.attributes.val;
74
+ if (raw === undefined) return undefined;
75
+ const v = Number.parseFloat(raw);
76
+ return Number.isFinite(v) ? v : undefined;
77
+ }
78
+
79
+ /**
80
+ * Return the concatenated text content of the first child element with the
81
+ * given local name. Returns undefined if the child is missing. Returns an
82
+ * empty string if the child exists but has no text.
83
+ */
84
+ export function readStringChild(
85
+ node: XmlElementNode | undefined,
86
+ local: string,
87
+ ): string | undefined {
88
+ if (!node) return undefined;
89
+ const child = findChildOptional(node, local);
90
+ if (!child) return undefined;
91
+ return textContent(child);
92
+ }
93
+
94
+ /** Concatenate all descendant text nodes of an element into a single string. */
95
+ export function textContent(node: XmlElementNode): string {
96
+ let out = "";
97
+ const walk = (children: XmlNode[]): void => {
98
+ for (const c of children) {
99
+ if (c.type === "text") out += c.text;
100
+ else walk(c.children);
101
+ }
102
+ };
103
+ walk(node.children);
104
+ return out;
105
+ }
106
+
49
107
  /** Read an arbitrary attribute from a node as an int, with namespace fallback. */
50
108
  export function readIntAttr(node: XmlElementNode, attr: string): number | undefined {
51
109
  const raw = node.attributes[attr] ?? node.attributes[attr.replace(/^w:/, "")];
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Minimal XML parser shared across OOXML parsers.
3
+ *
4
+ * Handles the subset of XML that Office documents actually emit:
5
+ * elements, attributes, text, comments (stripped), processing instructions
6
+ * (stripped), and CDATA sections. Namespace prefixes are preserved verbatim
7
+ * on element and attribute names — strip with `localName()` from
8
+ * `xml-attr-helpers.ts` when lookups must be prefix-agnostic.
9
+ *
10
+ * Not a full XML conformance parser: does not validate element nesting,
11
+ * does not track source offsets on produced nodes, does not report errors on
12
+ * malformed input beyond best-effort early termination. Sufficient for every
13
+ * OOXML part we import.
14
+ *
15
+ * Originally inlined in `parse-theme.ts`; extracted here so the chart parsers
16
+ * and any future OOXML import path can share the same implementation.
17
+ */
18
+ import type { XmlElementNode } from "./xml-element.ts";
19
+
20
+ const ROOT_TAG = "__root__";
21
+
22
+ /**
23
+ * Parse an XML string into a virtual root element. The root is a synthetic
24
+ * wrapper element whose `children` contain the top-level nodes from the
25
+ * source. Callers typically do `parseXml(xml).children.find(...)` or look up
26
+ * a specific top-level element via `findChildOptional`.
27
+ */
28
+ export function parseXml(xml: string): XmlElementNode {
29
+ const root: XmlElementNode = {
30
+ type: "element",
31
+ name: ROOT_TAG,
32
+ attributes: {},
33
+ children: [],
34
+ };
35
+ const stack: XmlElementNode[] = [root];
36
+ let cursor = 0;
37
+
38
+ while (cursor < xml.length) {
39
+ if (xml.startsWith("<!--", cursor)) {
40
+ const end = xml.indexOf("-->", cursor);
41
+ cursor = end >= 0 ? end + 3 : xml.length;
42
+ continue;
43
+ }
44
+
45
+ if (xml.startsWith("<?", cursor)) {
46
+ const end = xml.indexOf("?>", cursor);
47
+ cursor = end >= 0 ? end + 2 : xml.length;
48
+ continue;
49
+ }
50
+
51
+ if (xml.startsWith("<![CDATA[", cursor)) {
52
+ const end = xml.indexOf("]]>", cursor);
53
+ const textEnd = end >= 0 ? end : xml.length;
54
+ stack[stack.length - 1]?.children.push({
55
+ type: "text",
56
+ text: xml.slice(cursor + 9, textEnd),
57
+ });
58
+ cursor = end >= 0 ? end + 3 : xml.length;
59
+ continue;
60
+ }
61
+
62
+ if (xml[cursor] !== "<") {
63
+ const nextTag = xml.indexOf("<", cursor);
64
+ const end = nextTag >= 0 ? nextTag : xml.length;
65
+ const text = decodeXmlEntities(xml.slice(cursor, end));
66
+ if (text.trim().length > 0 || (text.length > 0 && stack.length > 1)) {
67
+ stack[stack.length - 1]?.children.push({ type: "text", text });
68
+ }
69
+ cursor = end;
70
+ continue;
71
+ }
72
+
73
+ if (xml[cursor + 1] === "/") {
74
+ const end = xml.indexOf(">", cursor);
75
+ if (end < 0) break;
76
+ stack.pop();
77
+ cursor = end + 1;
78
+ continue;
79
+ }
80
+
81
+ const tagEnd = xml.indexOf(">", cursor);
82
+ if (tagEnd < 0) break;
83
+
84
+ const tagContent = xml.slice(cursor + 1, tagEnd);
85
+ const selfClosing = tagContent.endsWith("/");
86
+ const normalized = selfClosing
87
+ ? tagContent.slice(0, -1).trimEnd()
88
+ : tagContent;
89
+
90
+ const spaceIndex = normalized.search(/\s/);
91
+ const tagName =
92
+ spaceIndex >= 0 ? normalized.slice(0, spaceIndex) : normalized;
93
+ const attrString = spaceIndex >= 0 ? normalized.slice(spaceIndex + 1) : "";
94
+ const attributes = parseAttributes(attrString);
95
+
96
+ const element: XmlElementNode = {
97
+ type: "element",
98
+ name: tagName,
99
+ attributes,
100
+ children: [],
101
+ };
102
+
103
+ stack[stack.length - 1]?.children.push(element);
104
+
105
+ if (!selfClosing) {
106
+ stack.push(element);
107
+ }
108
+
109
+ cursor = tagEnd + 1;
110
+ }
111
+
112
+ return root;
113
+ }
114
+
115
+ /** Decode the standard XML entity references plus numeric character references. */
116
+ export function decodeXmlEntities(text: string): string {
117
+ return text
118
+ .replace(/&amp;/g, "&")
119
+ .replace(/&lt;/g, "<")
120
+ .replace(/&gt;/g, ">")
121
+ .replace(/&quot;/g, '"')
122
+ .replace(/&apos;/g, "'")
123
+ .replace(/&#(\d+);/g, (_, dec) =>
124
+ String.fromCodePoint(Number.parseInt(dec, 10)),
125
+ )
126
+ .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) =>
127
+ String.fromCodePoint(Number.parseInt(hex, 16)),
128
+ );
129
+ }
130
+
131
+ function parseAttributes(attrString: string): Record<string, string> {
132
+ const attrs: Record<string, string> = {};
133
+ const pattern = /([A-Za-z_:][A-Za-z0-9:._-]*)\s*=\s*("([^"]*)"|'([^']*)')/gu;
134
+ for (const match of attrString.matchAll(pattern)) {
135
+ const name = match[1];
136
+ const value = match[3] ?? match[4] ?? "";
137
+ if (name) {
138
+ attrs[name] = decodeXmlEntities(value);
139
+ }
140
+ }
141
+ return attrs;
142
+ }
@@ -12,6 +12,14 @@ import {
12
12
  expectUuid,
13
13
  stableStringify,
14
14
  } from "./cds-1.0.0.ts";
15
+ import type {
16
+ ScopeMarkerStartNode,
17
+ ScopeMarkerEndNode,
18
+ } from "./scope-markers.ts";
19
+ import type { ChartModel } from "../io/ooxml/chart/types.ts";
20
+
21
+ export type { ScopeMarkerStartNode, ScopeMarkerEndNode } from "./scope-markers.ts";
22
+ export type { ChartModel } from "../io/ooxml/chart/types.ts";
15
23
 
16
24
  const CANONICAL_DOCUMENT_TOP_LEVEL_KEYS = [
17
25
  "schemaVersion",
@@ -97,6 +105,14 @@ export interface ParagraphStyleDefinition {
97
105
  isDefault: boolean;
98
106
  paragraphProperties?: CanonicalParagraphFormatting;
99
107
  runProperties?: CanonicalRunFormatting;
108
+ /**
109
+ * Style ID of the linked character style (from `<w:link w:val="..."/>`).
110
+ * Populated during parse; the second-pass resolver in `parse-styles.ts`
111
+ * synthesizes the reciprocal link on the partner when the source only
112
+ * declares the relationship on one side. Mirrors LibreOffice's
113
+ * `StyleSheetTable.cxx:535` second pass.
114
+ */
115
+ linkedStyleId?: string;
100
116
  }
101
117
 
102
118
  export interface ParagraphStyleNumberingReference {
@@ -111,6 +127,12 @@ export interface CharacterStyleDefinition {
111
127
  kind: "character";
112
128
  isDefault: boolean;
113
129
  runProperties?: CanonicalRunFormatting;
130
+ /**
131
+ * Style ID of the linked paragraph style (from `<w:link w:val="..."/>`).
132
+ * See `ParagraphStyleDefinition.linkedStyleId` for the second-pass
133
+ * reciprocal-resolution contract.
134
+ */
135
+ linkedStyleId?: string;
114
136
  }
115
137
 
116
138
  export interface TableStyleDefinition {
@@ -257,9 +279,63 @@ export interface ThemeDefinition {
257
279
  fontScheme?: ThemeFontScheme;
258
280
  }
259
281
 
282
+ /**
283
+ * One <w:compatSetting> entry under <w:compat>. Word emits multiple of these
284
+ * with the same `name` across different `uri` namespaces, so the canonical
285
+ * shape is an ordered tuple list rather than a name→value map.
286
+ *
287
+ * `value` is preserved as the raw `w:val` string (e.g. "15", "1", "0") so the
288
+ * future serializer can re-emit byte-stable diffs.
289
+ */
290
+ export interface CompatSetting {
291
+ name: string;
292
+ uri: string;
293
+ value: string;
294
+ }
295
+
260
296
  export interface DocumentSettings {
261
297
  evenAndOddHeaders?: boolean;
262
298
  zoomLevel?: "pageWidth" | "onePage" | number;
299
+ /**
300
+ * Ordered list of <w:compatSetting> entries inside <w:compat>. Insertion
301
+ * order is preserved for serializer diff stability.
302
+ */
303
+ compatSettings?: CompatSetting[];
304
+ /**
305
+ * Boolean flag children of <w:compat> that are NOT <w:compatSetting>
306
+ * (e.g. <w:spaceForUL/>, <w:doNotExpandShiftReturn/>). Keyed by local
307
+ * element name. The value reflects ST_OnOff semantics: missing `w:val` is
308
+ * true; explicit `w:val="0"`/`"false"` is false.
309
+ */
310
+ compatFlags?: Record<string, boolean>;
311
+ /**
312
+ * Settings-level (NOT inside <w:compat>) compat-adjacent boolean flags
313
+ * such as <w:doNotEmbedSmartTags/>. Kept in a separate field from
314
+ * compatFlags because the OOXML location differs and the future
315
+ * serializer must re-emit them at root, not inside <w:compat>.
316
+ */
317
+ rootCompatFlags?: Record<string, boolean>;
318
+ /**
319
+ * <w:themeFontLang> attribute bag, captured verbatim so the future
320
+ * serializer can re-emit unknown attributes Word may add.
321
+ *
322
+ * Distinguishes three states:
323
+ * - undefined: the element was absent.
324
+ * - {}: the element existed with no attributes.
325
+ * - { "w:val": "en-US", … }: attributes preserved with their qualified
326
+ * names so the serializer round-trips namespace prefixes intact.
327
+ */
328
+ themeFontLang?: Record<string, string>;
329
+ /**
330
+ * Local names of every direct child of <w:settings> that this parser does
331
+ * not model individually. Insertion order preserved; duplicates retained.
332
+ *
333
+ * Diagnostic only — round-trip is served by whole-part preservation while
334
+ * settings.xml stays out of `ownedOutputPaths`. The future serializer will
335
+ * use this list as a validator assertion that every preserved child still
336
+ * appears in the re-emitted output.
337
+ */
338
+ unmodelledSettingsChildren?: string[];
263
339
  }
264
340
 
265
341
  export interface SubPartsCatalog {
@@ -305,6 +381,8 @@ export type DocumentNode =
305
381
  | FieldNode
306
382
  | BookmarkStartNode
307
383
  | BookmarkEndNode
384
+ | ScopeMarkerStartNode
385
+ | ScopeMarkerEndNode
308
386
  | SectionBreakNode
309
387
  | OpaqueInlineNode
310
388
  | OpaqueBlockNode
@@ -890,6 +968,8 @@ export type InlineNode =
890
968
  | FieldNode
891
969
  | BookmarkStartNode
892
970
  | BookmarkEndNode
971
+ | ScopeMarkerStartNode
972
+ | ScopeMarkerEndNode
893
973
  | OpaqueInlineNode
894
974
  | FootnoteRefNode
895
975
  | ChartPreviewNode
@@ -992,6 +1072,16 @@ export interface OpaqueInlineNode {
992
1072
  export interface ChartPreviewNode {
993
1073
  type: "chart_preview";
994
1074
  previewMediaId?: string;
1075
+ /**
1076
+ * Typed chart data model parsed from the `c:chartSpace` part, when
1077
+ * available. Populated at import time by the Stage 1 chart parser
1078
+ * (`src/io/ooxml/chart/parse-chart-space.ts`). Undefined when the chart
1079
+ * part cannot be located, fails to parse, or has no chart-family match
1080
+ * — consumers fall back to the fallback bitmap (`previewMediaId`) or the
1081
+ * typed badge in that case. `rawXml` is the authoritative round-trip
1082
+ * source regardless of whether `parsedData` is populated.
1083
+ */
1084
+ parsedData?: ChartModel;
995
1085
  rawXml: string;
996
1086
  }
997
1087
 
@@ -1658,6 +1748,10 @@ function validateDocumentNode(
1658
1748
  case "bookmark_end":
1659
1749
  expectString(record.bookmarkId, `${path}.bookmarkId`, issues);
1660
1750
  return;
1751
+ case "scope_marker_start":
1752
+ case "scope_marker_end":
1753
+ expectString(record.scopeId, `${path}.scopeId`, issues);
1754
+ return;
1661
1755
  case "section_break":
1662
1756
  return;
1663
1757
  case "text":
@@ -0,0 +1,144 @@
1
+ import type {
2
+ CanonicalDocument,
3
+ DocumentNode,
4
+ DocumentRootNode,
5
+ } from "./canonical-document.ts";
6
+
7
+ /**
8
+ * Inline zero-width marker that opens a workflow scope. Modeled on
9
+ * `BookmarkStartNode` — the marker lives IN the document so PM handles
10
+ * position bookkeeping for free. `scopeId` is the key; metadata persistence
11
+ * is orthogonal and owned by `WorkflowOverlay` + customXml payloads.
12
+ */
13
+ export interface ScopeMarkerStartNode {
14
+ type: "scope_marker_start";
15
+ scopeId: string;
16
+ }
17
+
18
+ /**
19
+ * Inline zero-width marker that closes a workflow scope opened by a matching
20
+ * `ScopeMarkerStartNode` with the same `scopeId`.
21
+ */
22
+ export interface ScopeMarkerEndNode {
23
+ type: "scope_marker_end";
24
+ scopeId: string;
25
+ }
26
+
27
+ export type ScopeMarkerNode = ScopeMarkerStartNode | ScopeMarkerEndNode;
28
+
29
+ export function isScopeMarkerNode(node: unknown): node is ScopeMarkerNode {
30
+ if (typeof node !== "object" || node === null) return false;
31
+ const t = (node as { type?: unknown }).type;
32
+ return t === "scope_marker_start" || t === "scope_marker_end";
33
+ }
34
+
35
+ export interface ScopeMarkerWalkEntry {
36
+ scopeId: string;
37
+ source: "canonical";
38
+ status: "paired" | "start-only" | "end-only";
39
+ startIndex?: number;
40
+ endIndex?: number;
41
+ }
42
+
43
+ interface OpenScopeMarker {
44
+ scopeId: string;
45
+ startIndex: number;
46
+ }
47
+
48
+ /**
49
+ * Walk the canonical document in pre-order and return one entry per scope
50
+ * detected. A paired entry carries both `startIndex` + `endIndex`; an
51
+ * unpaired entry has only the surviving side filled.
52
+ */
53
+ export function collectScopeMarkers(
54
+ document: Pick<CanonicalDocument, "content"> | DocumentNode,
55
+ ): ScopeMarkerWalkEntry[] {
56
+ const root = ("content" in document
57
+ ? document.content
58
+ : document) as DocumentNode | DocumentRootNode;
59
+ const sequence: ScopeMarkerNode[] = [];
60
+
61
+ walkDocument(root, (node) => {
62
+ if (isScopeMarkerNode(node)) {
63
+ sequence.push(node);
64
+ }
65
+ });
66
+
67
+ const open = new Map<string, OpenScopeMarker[]>();
68
+ const results: ScopeMarkerWalkEntry[] = [];
69
+
70
+ for (let index = 0; index < sequence.length; index += 1) {
71
+ const marker = sequence[index]!;
72
+ if (marker.type === "scope_marker_start") {
73
+ const stack = open.get(marker.scopeId) ?? [];
74
+ stack.push({ scopeId: marker.scopeId, startIndex: index });
75
+ open.set(marker.scopeId, stack);
76
+ continue;
77
+ }
78
+
79
+ const stack = open.get(marker.scopeId);
80
+ const opener = stack?.pop();
81
+ if (stack && stack.length === 0) {
82
+ open.delete(marker.scopeId);
83
+ }
84
+
85
+ if (opener) {
86
+ results.push({
87
+ scopeId: marker.scopeId,
88
+ source: "canonical",
89
+ status: "paired",
90
+ startIndex: opener.startIndex,
91
+ endIndex: index,
92
+ });
93
+ continue;
94
+ }
95
+
96
+ results.push({
97
+ scopeId: marker.scopeId,
98
+ source: "canonical",
99
+ status: "end-only",
100
+ endIndex: index,
101
+ });
102
+ }
103
+
104
+ for (const stack of open.values()) {
105
+ for (const opener of stack) {
106
+ results.push({
107
+ scopeId: opener.scopeId,
108
+ source: "canonical",
109
+ status: "start-only",
110
+ startIndex: opener.startIndex,
111
+ });
112
+ }
113
+ }
114
+
115
+ return results.sort(
116
+ (left, right) =>
117
+ (left.startIndex ?? left.endIndex ?? Number.MAX_SAFE_INTEGER) -
118
+ (right.startIndex ?? right.endIndex ?? Number.MAX_SAFE_INTEGER) ||
119
+ left.scopeId.localeCompare(right.scopeId),
120
+ );
121
+ }
122
+
123
+ function walkDocument(
124
+ node: DocumentNode | DocumentRootNode,
125
+ visit: (node: DocumentNode) => void,
126
+ ): void {
127
+ visit(node as DocumentNode);
128
+
129
+ if ("children" in node && Array.isArray(node.children)) {
130
+ for (const child of node.children) {
131
+ walkDocument(child as DocumentNode, visit);
132
+ }
133
+ }
134
+
135
+ if (node.type === "table") {
136
+ for (const row of node.rows) {
137
+ walkDocument(row, visit);
138
+ }
139
+ } else if (node.type === "table_row") {
140
+ for (const cell of node.cells) {
141
+ walkDocument(cell, visit);
142
+ }
143
+ }
144
+ }