@beyondwork/docx-react-component 1.0.76 → 1.0.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@beyondwork/docx-react-component",
3
3
  "publisher": "beyondwork",
4
- "version": "1.0.76",
4
+ "version": "1.0.78",
5
5
  "description": "Embeddable React Word (docx) editor with review, comments, tracked changes, and round-trip OOXML fidelity.",
6
6
  "type": "module",
7
7
  "sideEffects": [
@@ -24,6 +24,8 @@
24
24
 
25
25
  import type { RuntimeApiHandle } from "../_runtime-handle.ts";
26
26
  import {
27
+ queryScopeAtPosition,
28
+ queryScopeInRange,
27
29
  resolveReference,
28
30
  type ResolveReferenceResult as RuntimeResolveResult,
29
31
  type ScopeHandle,
@@ -67,13 +69,85 @@ export const resolveReferenceMetadata: ApiV3FnMetadata = {
67
69
  boundedScope: "document",
68
70
  auditCategory: "reference-resolve",
69
71
  contextPromptShape:
70
- "Resolve a natural-language hint or structured ScopeReference into a stable scopeId.",
72
+ "Resolve a durable ScopeReference (scope-id / semantic-path / natural-language) into a stable ScopeHandle. Positional lookups are NOT accepted here — use ai.queryScopeAtPosition / ai.queryScopeInRange for click-to-scope / hit-test flows.",
71
73
  },
72
74
  stateClass: "A-canonical",
73
75
  persistsTo: "canonical",
74
76
  rwdReference: "§AI API § ai.resolveReference",
75
77
  };
76
78
 
79
+ /**
80
+ * Input for the one-shot positional query entry points.
81
+ *
82
+ * Callers MUST treat the input as transient: the position is resolved
83
+ * against the *current* document state and the returned handle's
84
+ * `scopeId` becomes the durable reference. Do not store the `at` /
85
+ * `from`/`to` values — they are meaningless after any intervening
86
+ * mutation (KI-P9).
87
+ */
88
+ export interface QueryScopeAtPositionInput {
89
+ readonly at: number;
90
+ }
91
+
92
+ export interface QueryScopeInRangeInput {
93
+ readonly from: number;
94
+ readonly to: number;
95
+ }
96
+
97
+ /**
98
+ * Result of a one-shot positional query. Either the innermost scope
99
+ * whose range matches the position/range, or `null` when nothing
100
+ * matches. No `status` union, no `confidence` field — the caller's
101
+ * next use of the handle is identity-based (via `handle.scopeId`) and
102
+ * the query itself does not make durable claims.
103
+ */
104
+ export interface QueryScopePositionResult {
105
+ readonly handle: ScopeHandle | null;
106
+ }
107
+
108
+ export const queryScopeAtPositionMetadata: ApiV3FnMetadata = {
109
+ name: "ai.queryScopeAtPosition",
110
+ status: "live-with-adapter",
111
+ sourceLayer: "semantic-scope-compiler",
112
+ liveEvidence: {
113
+ runnerTest: "test/api/v3/ai/query-scope-position.test.ts",
114
+ commit: "pending",
115
+ },
116
+ uxIntent: { uiVisible: false, expectsUxResponse: "none" },
117
+ agentMetadata: {
118
+ readOrMutate: "read",
119
+ boundedScope: "document",
120
+ auditCategory: "reference-resolve",
121
+ contextPromptShape:
122
+ "One-shot click-to-scope / hit-test lookup. Returns the innermost scope at the given offset, or null. The handle's scopeId is durable; the offset you passed in is NOT — do not store it.",
123
+ },
124
+ stateClass: "A-canonical",
125
+ persistsTo: "canonical",
126
+ rwdReference:
127
+ "§AI API § ai.queryScopeAtPosition. Companion to ai.resolveReference for positional lookups — the position-as-reference surface that was conflated pre-2026-04-24 has been split out so positions can't be accidentally round-tripped as durable references (KI-P9).",
128
+ };
129
+
130
+ export const queryScopeInRangeMetadata: ApiV3FnMetadata = {
131
+ name: "ai.queryScopeInRange",
132
+ status: "live-with-adapter",
133
+ sourceLayer: "semantic-scope-compiler",
134
+ liveEvidence: {
135
+ runnerTest: "test/api/v3/ai/query-scope-position.test.ts",
136
+ commit: "pending",
137
+ },
138
+ uxIntent: { uiVisible: false, expectsUxResponse: "none" },
139
+ agentMetadata: {
140
+ readOrMutate: "read",
141
+ boundedScope: "document",
142
+ auditCategory: "reference-resolve",
143
+ contextPromptShape:
144
+ "One-shot selection-to-scope lookup. Returns the innermost scope fully containing [from, to], or null. The handle's scopeId is durable; the positions are NOT — do not store them.",
145
+ },
146
+ stateClass: "A-canonical",
147
+ persistsTo: "canonical",
148
+ rwdReference: "§AI API § ai.queryScopeInRange",
149
+ };
150
+
77
151
  function asReference(input: ResolveReferenceInput): ScopeReference {
78
152
  if ("reference" in input) return input.reference;
79
153
  return { kind: "natural-language", hint: input.hint };
@@ -117,9 +191,10 @@ function projectResult(raw: RuntimeResolveResult): ResolveReferenceResult {
117
191
  export function createResolveFamily(runtime: RuntimeApiHandle) {
118
192
  return {
119
193
  resolveReference(input: ResolveReferenceInput): ResolveReferenceResult {
120
- // @endStateApi — live-with-adapter. Delegates to the scope-compiler
121
- // `resolveReference` over the typed ScopeReference union; NL hints
122
- // stay at confidence "low" per Slice 3.
194
+ // @endStateApi — live-with-adapter. Durable identity lookup via
195
+ // ScopeReference union (scope-id / semantic-path / natural-
196
+ // language). Positional references are not accepted here — use
197
+ // queryScopeAtPosition / queryScopeInRange instead.
123
198
  const reference = asReference(input);
124
199
  if (reference.kind === "natural-language" && reference.hint.trim().length === 0) {
125
200
  return { status: "not-found", confidence: "none", reason: "empty hint" };
@@ -129,5 +204,30 @@ export function createResolveFamily(runtime: RuntimeApiHandle) {
129
204
  const raw = resolveReference(reference, { document, overlay });
130
205
  return projectResult(raw);
131
206
  },
207
+
208
+ queryScopeAtPosition(
209
+ input: QueryScopeAtPositionInput,
210
+ ): QueryScopePositionResult {
211
+ // @endStateApi — live-with-adapter. One-shot position-to-scope
212
+ // query. The input offset is transient — callers must use the
213
+ // returned handle's scopeId as the durable reference, not the
214
+ // offset (KI-P9).
215
+ const document = runtime.getCanonicalDocument();
216
+ const overlay = runtime.getWorkflowOverlay();
217
+ const handle = queryScopeAtPosition(input.at, { document, overlay });
218
+ return { handle };
219
+ },
220
+
221
+ queryScopeInRange(input: QueryScopeInRangeInput): QueryScopePositionResult {
222
+ // @endStateApi — live-with-adapter. One-shot selection-to-scope
223
+ // query. Same durable-reference contract as queryScopeAtPosition.
224
+ const document = runtime.getCanonicalDocument();
225
+ const overlay = runtime.getWorkflowOverlay();
226
+ const handle = queryScopeInRange(input.from, input.to, {
227
+ document,
228
+ overlay,
229
+ });
230
+ return { handle };
231
+ },
132
232
  };
133
233
  }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Phase 2 bookmark-strip reference scanner.
3
+ *
4
+ * Walks the source main-document XML and collects the names of
5
+ * bookmarks that are LOAD-BEARING — i.e. referenced by:
6
+ *
7
+ * 1. A hyperlink anchor (`<w:hyperlink w:anchor="NAME">`). This
8
+ * catches `HYPERLINK \l "NAME"` field references too — Word
9
+ * always emits the anchor on the surrounding `<w:hyperlink>`
10
+ * element regardless of how the field's instrText is shaped.
11
+ * 2. A `REF` / `PAGEREF` / `NOTEREF` field instruction whose first
12
+ * argument is the unquoted bookmark name.
13
+ * 3. A `TOC` field anywhere in the doc — TOC fields produce
14
+ * hyperlinks to generated `_Toc####` anchors at render time, so
15
+ * we blanket-retain `_Toc*` whenever a TOC field is present.
16
+ *
17
+ * Produces a typed `BookmarkReferenceScan` that the parser consults
18
+ * at every `<w:bookmarkStart>` / `<w:bookmarkEnd>` emission site to
19
+ * decide RETAIN vs STRIP under
20
+ * `ParseMainDocumentOptions.stripCosmeticMarkers`.
21
+ *
22
+ * Design note: regex-based (not full XML walk) because the scanner
23
+ * runs BEFORE the structural parser and only needs name-extraction
24
+ * accuracy. Greedy patterns are bounded to single elements to avoid
25
+ * catching XML attribute values inside opaque payloads.
26
+ *
27
+ * Reference docs:
28
+ * - `services/debug/docs/phase-2-bookmark-strip-audit-2026-04-24.md`
29
+ * - `services/debug/docs/l01-bookmark-hyperlink-implementation-plan-2026-04-24.md` § 2.1
30
+ * - `docs/architecture/cosmetic-marker-strip.md` § Phase 2
31
+ */
32
+
33
+ export interface BookmarkReferenceScan {
34
+ /**
35
+ * Names that survive the strip — explicitly referenced or in the
36
+ * caller-supplied allowlist.
37
+ */
38
+ readonly retainedNames: ReadonlySet<string>;
39
+
40
+ /**
41
+ * When `true`, retain ALL `_Toc*` bookmarks unconditionally because
42
+ * the document contains at least one TOC field (TOC fields produce
43
+ * hyperlinks to generated `_Toc####` anchors at render time and the
44
+ * specific anchor names are not knowable until render).
45
+ */
46
+ readonly retainAllTocPattern: boolean;
47
+
48
+ /**
49
+ * Defensive blanket-retain. When `true`, retain every bookmark
50
+ * regardless of name — the document carries an SDT data-binding
51
+ * (`<w:dataBinding>`) whose xpath could reference bookmarks via
52
+ * paths we cannot statically analyze. Erring on the side of
53
+ * preservation is the correct posture; a follow-up can refine to
54
+ * scan xpath strings for bookmark-ref shape.
55
+ */
56
+ readonly retainAll: boolean;
57
+ }
58
+
59
+ const HYPERLINK_ANCHOR_RE =
60
+ /<(?:\w+:)?hyperlink\b[^>]*\bw:anchor\s*=\s*"([^"]*)"/gi;
61
+ const INSTR_TEXT_RE =
62
+ /<(?:\w+:)?instrText\b[^>]*>([\s\S]*?)<\/(?:\w+:)?instrText>/gi;
63
+ const TOC_FIELD_RE = /\bTOC\b/;
64
+ const REFLIKE_FIELD_RE =
65
+ /\b(?:HYPERLINK|REF|PAGEREF|NOTEREF)\s+([A-Za-z0-9_:.\-]+)/g;
66
+ const DATA_BINDING_RE = /<(?:\w+:)?dataBinding\b/i;
67
+
68
+ /**
69
+ * Always-retain prefix check — bookmarks whose name starts with
70
+ * these are NEVER stripped regardless of the reference scan, because
71
+ * they are converted to first-class scope markers by
72
+ * `rewriteScopeMarkerBookmarks` BEFORE the strip even runs. Listing
73
+ * them here is defense-in-depth.
74
+ */
75
+ const ALWAYS_RETAIN_PREFIXES: readonly string[] = ["bw:scope:"];
76
+
77
+ export function scanBookmarkReferences(
78
+ documentXml: string,
79
+ callerAllowlist: ReadonlyArray<string> = [],
80
+ ): BookmarkReferenceScan {
81
+ const retained = new Set<string>(callerAllowlist);
82
+ let retainAllToc = false;
83
+ const retainAll = DATA_BINDING_RE.test(documentXml);
84
+
85
+ // 1. <w:hyperlink w:anchor="NAME">
86
+ HYPERLINK_ANCHOR_RE.lastIndex = 0;
87
+ let m: RegExpExecArray | null;
88
+ while ((m = HYPERLINK_ANCHOR_RE.exec(documentXml)) !== null) {
89
+ if (m[1]) retained.add(m[1]);
90
+ }
91
+
92
+ // 2. <w:instrText>...</w:instrText> — split into per-instruction lookups
93
+ INSTR_TEXT_RE.lastIndex = 0;
94
+ while ((m = INSTR_TEXT_RE.exec(documentXml)) !== null) {
95
+ const instrText = m[1] ?? "";
96
+ if (TOC_FIELD_RE.test(instrText)) retainAllToc = true;
97
+
98
+ REFLIKE_FIELD_RE.lastIndex = 0;
99
+ let r: RegExpExecArray | null;
100
+ while ((r = REFLIKE_FIELD_RE.exec(instrText)) !== null) {
101
+ if (r[1]) retained.add(r[1]);
102
+ }
103
+ }
104
+
105
+ return {
106
+ retainedNames: retained,
107
+ retainAllTocPattern: retainAllToc,
108
+ retainAll,
109
+ };
110
+ }
111
+
112
+ export function isRetainedBookmarkName(
113
+ name: string,
114
+ scan: BookmarkReferenceScan,
115
+ ): boolean {
116
+ if (scan.retainAll) return true;
117
+ if (scan.retainedNames.has(name)) return true;
118
+ if (scan.retainAllTocPattern && name.startsWith("_Toc")) return true;
119
+ for (const prefix of ALWAYS_RETAIN_PREFIXES) {
120
+ if (name.startsWith(prefix)) return true;
121
+ }
122
+ return false;
123
+ }
@@ -13,6 +13,7 @@ import type {
13
13
  TextMark,
14
14
  } from "../../model/canonical-document.ts";
15
15
  import { classifyFieldInstruction } from "./parse-fields.ts";
16
+ import { isSafeTableFieldInstruction } from "./table-opaque-preservation.ts";
16
17
  import {
17
18
  readCellBorders,
18
19
  readCellCnfStyle,
@@ -695,6 +696,15 @@ function findChildElementOptional(
695
696
 
696
697
  // ---- Simple secondary-story table support ----
697
698
 
699
+ // Revision + structural-change markup disqualifies a footnote table from
700
+ // supported-roundtrip — tracked-change-aware table editing isn't
701
+ // implemented yet, and SDT / customXml structural wrappers aren't
702
+ // supported at footnote-table scope. Field-bearing elements are handled
703
+ // per-instruction via the shared `isSafeTableFieldInstruction` helper
704
+ // (coord-01 §11 unification, 2026-04-24) — this replaces the pre-unification
705
+ // "reject ALL fldChar/instrText unconditionally" stance so footnote tables
706
+ // with supported field families (REF / DOCPROPERTY / FORMTEXT / etc.) can
707
+ // now parse as structured.
698
708
  const RISKY_TABLE_ELEMENT_NAMES = new Set([
699
709
  "ins",
700
710
  "del",
@@ -706,9 +716,6 @@ const RISKY_TABLE_ELEMENT_NAMES = new Set([
706
716
  "rPrChange",
707
717
  "pPrChange",
708
718
  "sectPrChange",
709
- "fldSimple",
710
- "fldChar",
711
- "instrText",
712
719
  "sdt",
713
720
  "customXml",
714
721
  ]);
@@ -726,6 +733,22 @@ function containsRiskyElement(element: XmlElementNode): boolean {
726
733
  if (RISKY_TABLE_ELEMENT_NAMES.has(name)) {
727
734
  return true;
728
735
  }
736
+ // Field-bearing elements: defer to the shared table-safety predicate.
737
+ if (name === "fldSimple" || name === "instrText") {
738
+ const instruction =
739
+ readStringAttr(child, "w:instr") ??
740
+ extractTextContent(child);
741
+ if (!isSafeTableFieldInstruction(instruction)) {
742
+ return true;
743
+ }
744
+ continue;
745
+ }
746
+ if (name === "fldChar") {
747
+ // `<w:fldChar>` is always safe — the instruction text it brackets
748
+ // is carried by a sibling `<w:instrText>` that this walker checks
749
+ // separately.
750
+ continue;
751
+ }
729
752
  if (name === "tbl") {
730
753
  return true;
731
754
  }
@@ -70,8 +70,21 @@ export interface ParsedHeaderFooterDocument {
70
70
  blocks: BlockNode[];
71
71
  }
72
72
 
73
+ // `blockParser` must stay internal to the header/footer lifecycle — the
74
+ // recursion depth counter + source-xml rebinding around `currentSourceXml`
75
+ // are managed inside this module. External callers never supply one.
73
76
  export type ParseHeaderFooterOpts = Omit<ParseDrawingOpts, "blockParser">;
74
77
 
78
+ /**
79
+ * Max depth for shape txbxContent recursion inside header / footer
80
+ * parts. Matches the `TXBX_BLOCK_STREAM_MAX_DEPTH` guard on the
81
+ * main-document path — text boxes can legally nest
82
+ * (drawing → txbx → drawing → txbx → …), and OOXML has no spec-level
83
+ * bound, so we cap recursion to avoid pathological input hanging the
84
+ * parser.
85
+ */
86
+ const HDRFTR_TXBX_MAX_DEPTH = 4;
87
+
75
88
  // ---- XML node types (inline, no external dep) ----
76
89
 
77
90
  interface XmlElementNode {
@@ -687,6 +700,15 @@ function parseDrawingInlineNode(
687
700
  const frame = parseDrawingFrame(rawXml, {
688
701
  ...opts,
689
702
  relationships: opts.relationships ?? [],
703
+ // Coord-02 §14 / coord-11 §22 follow-up (2026-04-24): supply a
704
+ // blockParser so `parseShapeContent` populates `shape.txbxBlocks`
705
+ // for text-box shapes in headers / footers. Without this, a
706
+ // footer shape carrying "Copyright CCEP STRICTLY CONFIDENTIAL"
707
+ // flattens to a shape with `text` summary only — the inner `<w:p>`
708
+ // chain is lost. The main-document path has always supplied a
709
+ // blockParser; headers/footers did not until now.
710
+ blockParser: (xml) =>
711
+ parseTxbxBlocksForHeaderFooter(xml, opts, 1),
690
712
  });
691
713
  if (
692
714
  frame &&
@@ -705,7 +727,9 @@ function parseDrawingInlineNode(
705
727
  }
706
728
 
707
729
  const shapeXml = legacyDrawingXml ?? rawXml;
708
- const legacyShape = parseShapeXml(shapeXml);
730
+ const legacyShape = parseShapeXml(shapeXml, (xml) =>
731
+ parseTxbxBlocksForHeaderFooter(xml, opts, 1),
732
+ );
709
733
  if (!legacyShape) {
710
734
  return null;
711
735
  }
@@ -718,6 +742,77 @@ function parseDrawingInlineNode(
718
742
  return legacyShape;
719
743
  }
720
744
 
745
+ /**
746
+ * Walk `<w:txbxContent>` inner XML into canonical blocks, for
747
+ * shapes embedded inside header / footer parts. Mirrors the
748
+ * main-document `parseBlockStreamFromXml` blockParser pattern but
749
+ * keeps the full header/footer parser pipeline (paragraph +
750
+ * simple-table + drawing-inline + SDT) so shape text boxes match
751
+ * whatever the enclosing story would parse on its own.
752
+ *
753
+ * Guards against pathological recursion (shape → txbx → shape
754
+ * → txbx → …) via `HDRFTR_TXBX_MAX_DEPTH` — same class of guard as
755
+ * `TXBX_BLOCK_STREAM_MAX_DEPTH` on the main-document side.
756
+ */
757
+ function parseTxbxBlocksForHeaderFooter(
758
+ innerXml: string,
759
+ opts: ParseHeaderFooterOpts,
760
+ depth: number,
761
+ ): ReadonlyArray<{ type: string; [key: string]: unknown }> {
762
+ if (depth > HDRFTR_TXBX_MAX_DEPTH) return [];
763
+ let root: XmlElementNode;
764
+ try {
765
+ root = parseXml(innerXml) as XmlElementNode;
766
+ } catch {
767
+ return [];
768
+ }
769
+ // `parseXml` returns a wrapper whose single child is the actual
770
+ // root element (`<w:txbxContent>`). Find it defensively.
771
+ const txbxContent =
772
+ localName(root.name) === "txbxContent"
773
+ ? root
774
+ : (root.children.find(
775
+ (c): c is XmlElementNode =>
776
+ c.type === "element" && localName(c.name) === "txbxContent",
777
+ ) ?? null);
778
+ if (!txbxContent) return [];
779
+
780
+ // Rebind module-local `currentSourceXml` to the inner XML for the
781
+ // duration of this recursive parse so drawing-offset reads inside
782
+ // the txbx body resolve against the right buffer. Restore on exit.
783
+ const previousSourceXml = currentSourceXml;
784
+ currentSourceXml = innerXml;
785
+ try {
786
+ const blocks: BlockNode[] = [];
787
+ for (const child of txbxContent.children) {
788
+ if (child.type !== "element") continue;
789
+ const name = localName(child.name);
790
+ if (name === "p") {
791
+ blocks.push(parseParagraphElement(child, innerXml, opts));
792
+ } else if (name === "tbl") {
793
+ if (isSimpleSecondaryStoryTable(child)) {
794
+ blocks.push(parseSimpleTableElement(child, innerXml, opts));
795
+ } else {
796
+ blocks.push({
797
+ type: "opaque_block",
798
+ fragmentId: "fragment:hdrftr-txbx-tbl",
799
+ warningId: "warning:hdrftr-txbx-opaque-table",
800
+ rawXml: serializeElementToXml(child),
801
+ });
802
+ }
803
+ }
804
+ // Other block kinds (sdt, customXml) are rare inside a header/
805
+ // footer text-box; fall through without emitting.
806
+ }
807
+ return blocks as unknown as ReadonlyArray<{
808
+ type: string;
809
+ [key: string]: unknown;
810
+ }>;
811
+ } finally {
812
+ currentSourceXml = previousSourceXml;
813
+ }
814
+ }
815
+
721
816
  function parseBookmarkElement(
722
817
  element: XmlElementNode,
723
818
  ): Extract<InlineNode, { type: "bookmark_start" | "bookmark_end" }> {