@beyondwork/docx-react-component 1.0.76 → 1.0.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api/v3/ai/resolve.ts +104 -4
- package/src/io/ooxml/parse-bookmark-references.ts +123 -0
- package/src/io/ooxml/parse-footnotes.ts +26 -3
- package/src/io/ooxml/parse-headers-footers.ts +96 -1
- package/src/io/ooxml/parse-main-document.ts +256 -4
- package/src/io/ooxml/parse-shapes.ts +29 -1
- package/src/io/ooxml/table-opaque-preservation.ts +70 -5
- package/src/runtime/scopes/action-validation.ts +39 -12
- package/src/runtime/scopes/index.ts +3 -0
- package/src/runtime/scopes/resolve-reference.ts +99 -43
- package/src/session/import/loader-types.ts +26 -0
- package/src/session/import/loader.ts +12 -2
- package/src/ui-tailwind/editor-surface/perf-probe.ts +3 -0
- package/src/ui-tailwind/editor-surface/pm-decorations.ts +44 -0
- package/src/ui-tailwind/editor-surface/preserve-position.ts +28 -9
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@beyondwork/docx-react-component",
|
|
3
3
|
"publisher": "beyondwork",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.77",
|
|
5
5
|
"description": "Embeddable React Word (docx) editor with review, comments, tracked changes, and round-trip OOXML fidelity.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"sideEffects": [
|
package/src/api/v3/ai/resolve.ts
CHANGED
|
@@ -24,6 +24,8 @@
|
|
|
24
24
|
|
|
25
25
|
import type { RuntimeApiHandle } from "../_runtime-handle.ts";
|
|
26
26
|
import {
|
|
27
|
+
queryScopeAtPosition,
|
|
28
|
+
queryScopeInRange,
|
|
27
29
|
resolveReference,
|
|
28
30
|
type ResolveReferenceResult as RuntimeResolveResult,
|
|
29
31
|
type ScopeHandle,
|
|
@@ -67,13 +69,85 @@ export const resolveReferenceMetadata: ApiV3FnMetadata = {
|
|
|
67
69
|
boundedScope: "document",
|
|
68
70
|
auditCategory: "reference-resolve",
|
|
69
71
|
contextPromptShape:
|
|
70
|
-
"Resolve a
|
|
72
|
+
"Resolve a durable ScopeReference (scope-id / semantic-path / natural-language) into a stable ScopeHandle. Positional lookups are NOT accepted here — use ai.queryScopeAtPosition / ai.queryScopeInRange for click-to-scope / hit-test flows.",
|
|
71
73
|
},
|
|
72
74
|
stateClass: "A-canonical",
|
|
73
75
|
persistsTo: "canonical",
|
|
74
76
|
rwdReference: "§AI API § ai.resolveReference",
|
|
75
77
|
};
|
|
76
78
|
|
|
79
|
+
/**
|
|
80
|
+
* Input for the one-shot positional query entry points.
|
|
81
|
+
*
|
|
82
|
+
* Callers MUST treat the input as transient: the position is resolved
|
|
83
|
+
* against the *current* document state and the returned handle's
|
|
84
|
+
* `scopeId` becomes the durable reference. Do not store the `at` /
|
|
85
|
+
* `from`/`to` values — they are meaningless after any intervening
|
|
86
|
+
* mutation (KI-P9).
|
|
87
|
+
*/
|
|
88
|
+
export interface QueryScopeAtPositionInput {
|
|
89
|
+
readonly at: number;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export interface QueryScopeInRangeInput {
|
|
93
|
+
readonly from: number;
|
|
94
|
+
readonly to: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Result of a one-shot positional query. Either the innermost scope
|
|
99
|
+
* whose range matches the position/range, or `null` when nothing
|
|
100
|
+
* matches. No `status` union, no `confidence` field — the caller's
|
|
101
|
+
* next use of the handle is identity-based (via `handle.scopeId`) and
|
|
102
|
+
* the query itself does not make durable claims.
|
|
103
|
+
*/
|
|
104
|
+
export interface QueryScopePositionResult {
|
|
105
|
+
readonly handle: ScopeHandle | null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export const queryScopeAtPositionMetadata: ApiV3FnMetadata = {
|
|
109
|
+
name: "ai.queryScopeAtPosition",
|
|
110
|
+
status: "live-with-adapter",
|
|
111
|
+
sourceLayer: "semantic-scope-compiler",
|
|
112
|
+
liveEvidence: {
|
|
113
|
+
runnerTest: "test/api/v3/ai/query-scope-position.test.ts",
|
|
114
|
+
commit: "pending",
|
|
115
|
+
},
|
|
116
|
+
uxIntent: { uiVisible: false, expectsUxResponse: "none" },
|
|
117
|
+
agentMetadata: {
|
|
118
|
+
readOrMutate: "read",
|
|
119
|
+
boundedScope: "document",
|
|
120
|
+
auditCategory: "reference-resolve",
|
|
121
|
+
contextPromptShape:
|
|
122
|
+
"One-shot click-to-scope / hit-test lookup. Returns the innermost scope at the given offset, or null. The handle's scopeId is durable; the offset you passed in is NOT — do not store it.",
|
|
123
|
+
},
|
|
124
|
+
stateClass: "A-canonical",
|
|
125
|
+
persistsTo: "canonical",
|
|
126
|
+
rwdReference:
|
|
127
|
+
"§AI API § ai.queryScopeAtPosition. Companion to ai.resolveReference for positional lookups — the position-as-reference surface that was conflated pre-2026-04-24 has been split out so positions can't be accidentally round-tripped as durable references (KI-P9).",
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
export const queryScopeInRangeMetadata: ApiV3FnMetadata = {
|
|
131
|
+
name: "ai.queryScopeInRange",
|
|
132
|
+
status: "live-with-adapter",
|
|
133
|
+
sourceLayer: "semantic-scope-compiler",
|
|
134
|
+
liveEvidence: {
|
|
135
|
+
runnerTest: "test/api/v3/ai/query-scope-position.test.ts",
|
|
136
|
+
commit: "pending",
|
|
137
|
+
},
|
|
138
|
+
uxIntent: { uiVisible: false, expectsUxResponse: "none" },
|
|
139
|
+
agentMetadata: {
|
|
140
|
+
readOrMutate: "read",
|
|
141
|
+
boundedScope: "document",
|
|
142
|
+
auditCategory: "reference-resolve",
|
|
143
|
+
contextPromptShape:
|
|
144
|
+
"One-shot selection-to-scope lookup. Returns the innermost scope fully containing [from, to], or null. The handle's scopeId is durable; the positions are NOT — do not store them.",
|
|
145
|
+
},
|
|
146
|
+
stateClass: "A-canonical",
|
|
147
|
+
persistsTo: "canonical",
|
|
148
|
+
rwdReference: "§AI API § ai.queryScopeInRange",
|
|
149
|
+
};
|
|
150
|
+
|
|
77
151
|
function asReference(input: ResolveReferenceInput): ScopeReference {
|
|
78
152
|
if ("reference" in input) return input.reference;
|
|
79
153
|
return { kind: "natural-language", hint: input.hint };
|
|
@@ -117,9 +191,10 @@ function projectResult(raw: RuntimeResolveResult): ResolveReferenceResult {
|
|
|
117
191
|
export function createResolveFamily(runtime: RuntimeApiHandle) {
|
|
118
192
|
return {
|
|
119
193
|
resolveReference(input: ResolveReferenceInput): ResolveReferenceResult {
|
|
120
|
-
// @endStateApi — live-with-adapter.
|
|
121
|
-
//
|
|
122
|
-
//
|
|
194
|
+
// @endStateApi — live-with-adapter. Durable identity lookup via
|
|
195
|
+
// ScopeReference union (scope-id / semantic-path / natural-
|
|
196
|
+
// language). Positional references are not accepted here — use
|
|
197
|
+
// queryScopeAtPosition / queryScopeInRange instead.
|
|
123
198
|
const reference = asReference(input);
|
|
124
199
|
if (reference.kind === "natural-language" && reference.hint.trim().length === 0) {
|
|
125
200
|
return { status: "not-found", confidence: "none", reason: "empty hint" };
|
|
@@ -129,5 +204,30 @@ export function createResolveFamily(runtime: RuntimeApiHandle) {
|
|
|
129
204
|
const raw = resolveReference(reference, { document, overlay });
|
|
130
205
|
return projectResult(raw);
|
|
131
206
|
},
|
|
207
|
+
|
|
208
|
+
queryScopeAtPosition(
|
|
209
|
+
input: QueryScopeAtPositionInput,
|
|
210
|
+
): QueryScopePositionResult {
|
|
211
|
+
// @endStateApi — live-with-adapter. One-shot position-to-scope
|
|
212
|
+
// query. The input offset is transient — callers must use the
|
|
213
|
+
// returned handle's scopeId as the durable reference, not the
|
|
214
|
+
// offset (KI-P9).
|
|
215
|
+
const document = runtime.getCanonicalDocument();
|
|
216
|
+
const overlay = runtime.getWorkflowOverlay();
|
|
217
|
+
const handle = queryScopeAtPosition(input.at, { document, overlay });
|
|
218
|
+
return { handle };
|
|
219
|
+
},
|
|
220
|
+
|
|
221
|
+
queryScopeInRange(input: QueryScopeInRangeInput): QueryScopePositionResult {
|
|
222
|
+
// @endStateApi — live-with-adapter. One-shot selection-to-scope
|
|
223
|
+
// query. Same durable-reference contract as queryScopeAtPosition.
|
|
224
|
+
const document = runtime.getCanonicalDocument();
|
|
225
|
+
const overlay = runtime.getWorkflowOverlay();
|
|
226
|
+
const handle = queryScopeInRange(input.from, input.to, {
|
|
227
|
+
document,
|
|
228
|
+
overlay,
|
|
229
|
+
});
|
|
230
|
+
return { handle };
|
|
231
|
+
},
|
|
132
232
|
};
|
|
133
233
|
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 2 bookmark-strip reference scanner.
|
|
3
|
+
*
|
|
4
|
+
* Walks the source main-document XML and collects the names of
|
|
5
|
+
* bookmarks that are LOAD-BEARING — i.e. referenced by:
|
|
6
|
+
*
|
|
7
|
+
* 1. A hyperlink anchor (`<w:hyperlink w:anchor="NAME">`). This
|
|
8
|
+
* catches `HYPERLINK \l "NAME"` field references too — Word
|
|
9
|
+
* always emits the anchor on the surrounding `<w:hyperlink>`
|
|
10
|
+
* element regardless of how the field's instrText is shaped.
|
|
11
|
+
* 2. A `REF` / `PAGEREF` / `NOTEREF` field instruction whose first
|
|
12
|
+
* argument is the unquoted bookmark name.
|
|
13
|
+
* 3. A `TOC` field anywhere in the doc — TOC fields produce
|
|
14
|
+
* hyperlinks to generated `_Toc####` anchors at render time, so
|
|
15
|
+
* we blanket-retain `_Toc*` whenever a TOC field is present.
|
|
16
|
+
*
|
|
17
|
+
* Produces a typed `BookmarkReferenceScan` that the parser consults
|
|
18
|
+
* at every `<w:bookmarkStart>` / `<w:bookmarkEnd>` emission site to
|
|
19
|
+
* decide RETAIN vs STRIP under
|
|
20
|
+
* `ParseMainDocumentOptions.stripCosmeticMarkers`.
|
|
21
|
+
*
|
|
22
|
+
* Design note: regex-based (not full XML walk) because the scanner
|
|
23
|
+
* runs BEFORE the structural parser and only needs name-extraction
|
|
24
|
+
* accuracy. Greedy patterns are bounded to single elements to avoid
|
|
25
|
+
* catching XML attribute values inside opaque payloads.
|
|
26
|
+
*
|
|
27
|
+
* Reference docs:
|
|
28
|
+
* - `services/debug/docs/phase-2-bookmark-strip-audit-2026-04-24.md`
|
|
29
|
+
* - `services/debug/docs/l01-bookmark-hyperlink-implementation-plan-2026-04-24.md` § 2.1
|
|
30
|
+
* - `docs/architecture/cosmetic-marker-strip.md` § Phase 2
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
export interface BookmarkReferenceScan {
|
|
34
|
+
/**
|
|
35
|
+
* Names that survive the strip — explicitly referenced or in the
|
|
36
|
+
* caller-supplied allowlist.
|
|
37
|
+
*/
|
|
38
|
+
readonly retainedNames: ReadonlySet<string>;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* When `true`, retain ALL `_Toc*` bookmarks unconditionally because
|
|
42
|
+
* the document contains at least one TOC field (TOC fields produce
|
|
43
|
+
* hyperlinks to generated `_Toc####` anchors at render time and the
|
|
44
|
+
* specific anchor names are not knowable until render).
|
|
45
|
+
*/
|
|
46
|
+
readonly retainAllTocPattern: boolean;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Defensive blanket-retain. When `true`, retain every bookmark
|
|
50
|
+
* regardless of name — the document carries an SDT data-binding
|
|
51
|
+
* (`<w:dataBinding>`) whose xpath could reference bookmarks via
|
|
52
|
+
* paths we cannot statically analyze. Erring on the side of
|
|
53
|
+
* preservation is the correct posture; a follow-up can refine to
|
|
54
|
+
* scan xpath strings for bookmark-ref shape.
|
|
55
|
+
*/
|
|
56
|
+
readonly retainAll: boolean;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const HYPERLINK_ANCHOR_RE =
|
|
60
|
+
/<(?:\w+:)?hyperlink\b[^>]*\bw:anchor\s*=\s*"([^"]*)"/gi;
|
|
61
|
+
const INSTR_TEXT_RE =
|
|
62
|
+
/<(?:\w+:)?instrText\b[^>]*>([\s\S]*?)<\/(?:\w+:)?instrText>/gi;
|
|
63
|
+
const TOC_FIELD_RE = /\bTOC\b/;
|
|
64
|
+
const REFLIKE_FIELD_RE =
|
|
65
|
+
/\b(?:HYPERLINK|REF|PAGEREF|NOTEREF)\s+([A-Za-z0-9_:.\-]+)/g;
|
|
66
|
+
const DATA_BINDING_RE = /<(?:\w+:)?dataBinding\b/i;
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Always-retain prefix check — bookmarks whose name starts with
|
|
70
|
+
* these are NEVER stripped regardless of the reference scan, because
|
|
71
|
+
* they are converted to first-class scope markers by
|
|
72
|
+
* `rewriteScopeMarkerBookmarks` BEFORE the strip even runs. Listing
|
|
73
|
+
* them here is defense-in-depth.
|
|
74
|
+
*/
|
|
75
|
+
const ALWAYS_RETAIN_PREFIXES: readonly string[] = ["bw:scope:"];
|
|
76
|
+
|
|
77
|
+
export function scanBookmarkReferences(
|
|
78
|
+
documentXml: string,
|
|
79
|
+
callerAllowlist: ReadonlyArray<string> = [],
|
|
80
|
+
): BookmarkReferenceScan {
|
|
81
|
+
const retained = new Set<string>(callerAllowlist);
|
|
82
|
+
let retainAllToc = false;
|
|
83
|
+
const retainAll = DATA_BINDING_RE.test(documentXml);
|
|
84
|
+
|
|
85
|
+
// 1. <w:hyperlink w:anchor="NAME">
|
|
86
|
+
HYPERLINK_ANCHOR_RE.lastIndex = 0;
|
|
87
|
+
let m: RegExpExecArray | null;
|
|
88
|
+
while ((m = HYPERLINK_ANCHOR_RE.exec(documentXml)) !== null) {
|
|
89
|
+
if (m[1]) retained.add(m[1]);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// 2. <w:instrText>...</w:instrText> — split into per-instruction lookups
|
|
93
|
+
INSTR_TEXT_RE.lastIndex = 0;
|
|
94
|
+
while ((m = INSTR_TEXT_RE.exec(documentXml)) !== null) {
|
|
95
|
+
const instrText = m[1] ?? "";
|
|
96
|
+
if (TOC_FIELD_RE.test(instrText)) retainAllToc = true;
|
|
97
|
+
|
|
98
|
+
REFLIKE_FIELD_RE.lastIndex = 0;
|
|
99
|
+
let r: RegExpExecArray | null;
|
|
100
|
+
while ((r = REFLIKE_FIELD_RE.exec(instrText)) !== null) {
|
|
101
|
+
if (r[1]) retained.add(r[1]);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
retainedNames: retained,
|
|
107
|
+
retainAllTocPattern: retainAllToc,
|
|
108
|
+
retainAll,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function isRetainedBookmarkName(
|
|
113
|
+
name: string,
|
|
114
|
+
scan: BookmarkReferenceScan,
|
|
115
|
+
): boolean {
|
|
116
|
+
if (scan.retainAll) return true;
|
|
117
|
+
if (scan.retainedNames.has(name)) return true;
|
|
118
|
+
if (scan.retainAllTocPattern && name.startsWith("_Toc")) return true;
|
|
119
|
+
for (const prefix of ALWAYS_RETAIN_PREFIXES) {
|
|
120
|
+
if (name.startsWith(prefix)) return true;
|
|
121
|
+
}
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
@@ -13,6 +13,7 @@ import type {
|
|
|
13
13
|
TextMark,
|
|
14
14
|
} from "../../model/canonical-document.ts";
|
|
15
15
|
import { classifyFieldInstruction } from "./parse-fields.ts";
|
|
16
|
+
import { isSafeTableFieldInstruction } from "./table-opaque-preservation.ts";
|
|
16
17
|
import {
|
|
17
18
|
readCellBorders,
|
|
18
19
|
readCellCnfStyle,
|
|
@@ -695,6 +696,15 @@ function findChildElementOptional(
|
|
|
695
696
|
|
|
696
697
|
// ---- Simple secondary-story table support ----
|
|
697
698
|
|
|
699
|
+
// Revision + structural-change markup disqualifies a footnote table from
|
|
700
|
+
// supported-roundtrip — tracked-change-aware table editing isn't
|
|
701
|
+
// implemented yet, and SDT / customXml structural wrappers aren't
|
|
702
|
+
// supported at footnote-table scope. Field-bearing elements are handled
|
|
703
|
+
// per-instruction via the shared `isSafeTableFieldInstruction` helper
|
|
704
|
+
// (coord-01 §11 unification, 2026-04-24) — this replaces the pre-unification
|
|
705
|
+
// "reject ALL fldChar/instrText unconditionally" stance so footnote tables
|
|
706
|
+
// with supported field families (REF / DOCPROPERTY / FORMTEXT / etc.) can
|
|
707
|
+
// now parse as structured.
|
|
698
708
|
const RISKY_TABLE_ELEMENT_NAMES = new Set([
|
|
699
709
|
"ins",
|
|
700
710
|
"del",
|
|
@@ -706,9 +716,6 @@ const RISKY_TABLE_ELEMENT_NAMES = new Set([
|
|
|
706
716
|
"rPrChange",
|
|
707
717
|
"pPrChange",
|
|
708
718
|
"sectPrChange",
|
|
709
|
-
"fldSimple",
|
|
710
|
-
"fldChar",
|
|
711
|
-
"instrText",
|
|
712
719
|
"sdt",
|
|
713
720
|
"customXml",
|
|
714
721
|
]);
|
|
@@ -726,6 +733,22 @@ function containsRiskyElement(element: XmlElementNode): boolean {
|
|
|
726
733
|
if (RISKY_TABLE_ELEMENT_NAMES.has(name)) {
|
|
727
734
|
return true;
|
|
728
735
|
}
|
|
736
|
+
// Field-bearing elements: defer to the shared table-safety predicate.
|
|
737
|
+
if (name === "fldSimple" || name === "instrText") {
|
|
738
|
+
const instruction =
|
|
739
|
+
readStringAttr(child, "w:instr") ??
|
|
740
|
+
extractTextContent(child);
|
|
741
|
+
if (!isSafeTableFieldInstruction(instruction)) {
|
|
742
|
+
return true;
|
|
743
|
+
}
|
|
744
|
+
continue;
|
|
745
|
+
}
|
|
746
|
+
if (name === "fldChar") {
|
|
747
|
+
// `<w:fldChar>` is always safe — the instruction text it brackets
|
|
748
|
+
// is carried by a sibling `<w:instrText>` that this walker checks
|
|
749
|
+
// separately.
|
|
750
|
+
continue;
|
|
751
|
+
}
|
|
729
752
|
if (name === "tbl") {
|
|
730
753
|
return true;
|
|
731
754
|
}
|
|
@@ -70,8 +70,21 @@ export interface ParsedHeaderFooterDocument {
|
|
|
70
70
|
blocks: BlockNode[];
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
+
// `blockParser` must stay internal to the header/footer lifecycle — the
|
|
74
|
+
// recursion depth counter + source-xml rebinding around `currentSourceXml`
|
|
75
|
+
// are managed inside this module. External callers never supply one.
|
|
73
76
|
export type ParseHeaderFooterOpts = Omit<ParseDrawingOpts, "blockParser">;
|
|
74
77
|
|
|
78
|
+
/**
|
|
79
|
+
* Max depth for shape txbxContent recursion inside header / footer
|
|
80
|
+
* parts. Matches the `TXBX_BLOCK_STREAM_MAX_DEPTH` guard on the
|
|
81
|
+
* main-document path — text boxes can legally nest
|
|
82
|
+
* (drawing → txbx → drawing → txbx → …), and OOXML has no spec-level
|
|
83
|
+
* bound, so we cap recursion to avoid pathological input hanging the
|
|
84
|
+
* parser.
|
|
85
|
+
*/
|
|
86
|
+
const HDRFTR_TXBX_MAX_DEPTH = 4;
|
|
87
|
+
|
|
75
88
|
// ---- XML node types (inline, no external dep) ----
|
|
76
89
|
|
|
77
90
|
interface XmlElementNode {
|
|
@@ -687,6 +700,15 @@ function parseDrawingInlineNode(
|
|
|
687
700
|
const frame = parseDrawingFrame(rawXml, {
|
|
688
701
|
...opts,
|
|
689
702
|
relationships: opts.relationships ?? [],
|
|
703
|
+
// Coord-02 §14 / coord-11 §22 follow-up (2026-04-24): supply a
|
|
704
|
+
// blockParser so `parseShapeContent` populates `shape.txbxBlocks`
|
|
705
|
+
// for text-box shapes in headers / footers. Without this, a
|
|
706
|
+
// footer shape carrying "Copyright CCEP STRICTLY CONFIDENTIAL"
|
|
707
|
+
// flattens to a shape with `text` summary only — the inner `<w:p>`
|
|
708
|
+
// chain is lost. The main-document path has always supplied a
|
|
709
|
+
// blockParser; headers/footers did not until now.
|
|
710
|
+
blockParser: (xml) =>
|
|
711
|
+
parseTxbxBlocksForHeaderFooter(xml, opts, 1),
|
|
690
712
|
});
|
|
691
713
|
if (
|
|
692
714
|
frame &&
|
|
@@ -705,7 +727,9 @@ function parseDrawingInlineNode(
|
|
|
705
727
|
}
|
|
706
728
|
|
|
707
729
|
const shapeXml = legacyDrawingXml ?? rawXml;
|
|
708
|
-
const legacyShape = parseShapeXml(shapeXml)
|
|
730
|
+
const legacyShape = parseShapeXml(shapeXml, (xml) =>
|
|
731
|
+
parseTxbxBlocksForHeaderFooter(xml, opts, 1),
|
|
732
|
+
);
|
|
709
733
|
if (!legacyShape) {
|
|
710
734
|
return null;
|
|
711
735
|
}
|
|
@@ -718,6 +742,77 @@ function parseDrawingInlineNode(
|
|
|
718
742
|
return legacyShape;
|
|
719
743
|
}
|
|
720
744
|
|
|
745
|
+
/**
|
|
746
|
+
* Walk `<w:txbxContent>` inner XML into canonical blocks, for
|
|
747
|
+
* shapes embedded inside header / footer parts. Mirrors the
|
|
748
|
+
* main-document `parseBlockStreamFromXml` blockParser pattern but
|
|
749
|
+
* keeps the full header/footer parser pipeline (paragraph +
|
|
750
|
+
* simple-table + drawing-inline + SDT) so shape text boxes match
|
|
751
|
+
* whatever the enclosing story would parse on its own.
|
|
752
|
+
*
|
|
753
|
+
* Guards against pathological recursion (shape → txbx → shape
|
|
754
|
+
* → txbx → …) via `HDRFTR_TXBX_MAX_DEPTH` — same class of guard as
|
|
755
|
+
* `TXBX_BLOCK_STREAM_MAX_DEPTH` on the main-document side.
|
|
756
|
+
*/
|
|
757
|
+
function parseTxbxBlocksForHeaderFooter(
|
|
758
|
+
innerXml: string,
|
|
759
|
+
opts: ParseHeaderFooterOpts,
|
|
760
|
+
depth: number,
|
|
761
|
+
): ReadonlyArray<{ type: string; [key: string]: unknown }> {
|
|
762
|
+
if (depth > HDRFTR_TXBX_MAX_DEPTH) return [];
|
|
763
|
+
let root: XmlElementNode;
|
|
764
|
+
try {
|
|
765
|
+
root = parseXml(innerXml) as XmlElementNode;
|
|
766
|
+
} catch {
|
|
767
|
+
return [];
|
|
768
|
+
}
|
|
769
|
+
// `parseXml` returns a wrapper whose single child is the actual
|
|
770
|
+
// root element (`<w:txbxContent>`). Find it defensively.
|
|
771
|
+
const txbxContent =
|
|
772
|
+
localName(root.name) === "txbxContent"
|
|
773
|
+
? root
|
|
774
|
+
: (root.children.find(
|
|
775
|
+
(c): c is XmlElementNode =>
|
|
776
|
+
c.type === "element" && localName(c.name) === "txbxContent",
|
|
777
|
+
) ?? null);
|
|
778
|
+
if (!txbxContent) return [];
|
|
779
|
+
|
|
780
|
+
// Rebind module-local `currentSourceXml` to the inner XML for the
|
|
781
|
+
// duration of this recursive parse so drawing-offset reads inside
|
|
782
|
+
// the txbx body resolve against the right buffer. Restore on exit.
|
|
783
|
+
const previousSourceXml = currentSourceXml;
|
|
784
|
+
currentSourceXml = innerXml;
|
|
785
|
+
try {
|
|
786
|
+
const blocks: BlockNode[] = [];
|
|
787
|
+
for (const child of txbxContent.children) {
|
|
788
|
+
if (child.type !== "element") continue;
|
|
789
|
+
const name = localName(child.name);
|
|
790
|
+
if (name === "p") {
|
|
791
|
+
blocks.push(parseParagraphElement(child, innerXml, opts));
|
|
792
|
+
} else if (name === "tbl") {
|
|
793
|
+
if (isSimpleSecondaryStoryTable(child)) {
|
|
794
|
+
blocks.push(parseSimpleTableElement(child, innerXml, opts));
|
|
795
|
+
} else {
|
|
796
|
+
blocks.push({
|
|
797
|
+
type: "opaque_block",
|
|
798
|
+
fragmentId: "fragment:hdrftr-txbx-tbl",
|
|
799
|
+
warningId: "warning:hdrftr-txbx-opaque-table",
|
|
800
|
+
rawXml: serializeElementToXml(child),
|
|
801
|
+
});
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
// Other block kinds (sdt, customXml) are rare inside a header/
|
|
805
|
+
// footer text-box; fall through without emitting.
|
|
806
|
+
}
|
|
807
|
+
return blocks as unknown as ReadonlyArray<{
|
|
808
|
+
type: string;
|
|
809
|
+
[key: string]: unknown;
|
|
810
|
+
}>;
|
|
811
|
+
} finally {
|
|
812
|
+
currentSourceXml = previousSourceXml;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
721
816
|
function parseBookmarkElement(
|
|
722
817
|
element: XmlElementNode,
|
|
723
818
|
): Extract<InlineNode, { type: "bookmark_start" | "bookmark_end" }> {
|