@beyondwork/docx-react-component 1.0.47 → 1.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/package.json +1 -1
  2. package/src/api/public-types.ts +115 -1
  3. package/src/compare/diff-engine.ts +4 -0
  4. package/src/core/commands/add-scope.ts +257 -0
  5. package/src/core/commands/formatting-commands.ts +2 -0
  6. package/src/core/schema/text-schema.ts +95 -1
  7. package/src/core/state/text-transaction.ts +17 -5
  8. package/src/io/chart-preview-resolver.ts +27 -0
  9. package/src/io/docx-session.ts +226 -38
  10. package/src/io/export/serialize-main-document.ts +37 -0
  11. package/src/io/export/serialize-settings.ts +421 -0
  12. package/src/io/export/serialize-styles.ts +10 -0
  13. package/src/io/normalize/normalize-text.ts +1 -0
  14. package/src/io/ooxml/chart/parse-axis.ts +277 -0
  15. package/src/io/ooxml/chart/parse-chart-space.ts +813 -0
  16. package/src/io/ooxml/chart/parse-series.ts +570 -0
  17. package/src/io/ooxml/chart/resolve-color.ts +251 -0
  18. package/src/io/ooxml/chart/types.ts +420 -0
  19. package/src/io/ooxml/parse-block-structure.ts +99 -0
  20. package/src/io/ooxml/parse-complex-content.ts +87 -2
  21. package/src/io/ooxml/parse-main-document.ts +115 -1
  22. package/src/io/ooxml/parse-scope-markers.ts +184 -0
  23. package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
  24. package/src/io/ooxml/parse-settings.ts +97 -1
  25. package/src/io/ooxml/parse-styles.ts +65 -0
  26. package/src/io/ooxml/parse-theme.ts +2 -127
  27. package/src/io/ooxml/xml-attr-helpers.ts +59 -1
  28. package/src/io/ooxml/xml-parser.ts +142 -0
  29. package/src/model/canonical-document.ts +94 -0
  30. package/src/model/scope-markers.ts +144 -0
  31. package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
  32. package/src/runtime/collab/checkpoint-election.ts +75 -0
  33. package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
  34. package/src/runtime/collab/checkpoint-store.ts +115 -0
  35. package/src/runtime/collab/event-types.ts +27 -0
  36. package/src/runtime/collab/index.ts +22 -0
  37. package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
  38. package/src/runtime/collab/runtime-collab-sync.ts +279 -0
  39. package/src/runtime/document-runtime.ts +214 -16
  40. package/src/runtime/editor-surface/capabilities.ts +63 -50
  41. package/src/runtime/layout/layout-engine-version.ts +8 -1
  42. package/src/runtime/prerender/cache-envelope.ts +19 -7
  43. package/src/runtime/prerender/cache-key.ts +25 -14
  44. package/src/runtime/prerender/canonical-document-hash.ts +63 -0
  45. package/src/runtime/prerender/customxml-cache.ts +211 -0
  46. package/src/runtime/prerender/customxml-probe.ts +78 -0
  47. package/src/runtime/prerender/prerender-document.ts +74 -7
  48. package/src/runtime/scope-resolver.ts +148 -0
  49. package/src/runtime/scope-tag-registry.ts +10 -0
  50. package/src/runtime/surface-projection.ts +8 -1
  51. package/src/ui/WordReviewEditor.tsx +30 -0
  52. package/src/ui/editor-runtime-boundary.ts +6 -1
  53. package/src/ui/runtime-shortcut-dispatch.ts +12 -7
@@ -0,0 +1,211 @@
1
+ import type { OpcPackage } from "../../io/opc/package-reader.ts";
2
+ import type { OpcPackagePart } from "../../io/ooxml/part-manifest.ts";
3
+ import {
4
+ WORKFLOW_PAYLOAD_PART_PATH,
5
+ buildEditorStateXml,
6
+ parseEditorStateXml,
7
+ parseWorkflowPayloadEnvelopeFromPackage,
8
+ type EditorStatePayload,
9
+ } from "../../io/ooxml/workflow-payload.ts";
10
+ import {
11
+ LAYCACHE_SCHEMA_VERSION,
12
+ LAYOUT_ENGINE_VERSION,
13
+ } from "../layout/layout-engine-version.ts";
14
+ import type { CacheEnvelope } from "./cache-envelope.ts";
15
+
16
+ /**
17
+ * L7 Phase 2.5 Plan B B.4 — customXml read/write for the prerender cache.
18
+ *
19
+ * Persists the cache envelope inside the docx's workflow-payload part
20
+ * (`/customXml/item1.xml`) under the "laycache" namespace, so the cache
21
+ * travels with the file and hands a warm-start experience to any client
22
+ * that opens the same bytes.
23
+ *
24
+ * **Routing decision:** laycache does NOT extend `EditorStateNamespace`.
25
+ * The entry round-trips through the existing `unknownNamespaces`
26
+ * preservation path, which already provides:
27
+ * - Automatic Word round-trip (Word doesn't touch customXml parts).
28
+ * - Automatic runtime save-path preservation (the save path appends
29
+ * `channel.getUnknownEntries()` verbatim).
30
+ * - Separation of cache concerns from runtime subsystem state (laycache
31
+ * is not conflated with hostAnnotations/workflowOverlay/etc.).
32
+ *
33
+ * Tradeoff: the write path hand-builds a `<bw:namespace name="laycache">`
34
+ * XML fragment rather than leveraging `buildEditorStateXml`'s entry
35
+ * serializer. Six lines of escaped string concatenation in exchange for
36
+ * the above.
37
+ *
38
+ * **Scope (MVP):** Write-side requires the docx to already have a
39
+ * workflow-payload part (`/customXml/item1.xml`). Docs without it return
40
+ * `{ written: false, reason: "no-customxml-part" }` so the caller can
41
+ * fall back to IndexedDB-only caching. Fresh/minimal docs that lack a
42
+ * payload part are already fast-opening; Plan B is targeted at CCEP-scale
43
+ * templates which reliably carry the part.
44
+ */
45
+
46
+ export const LAYCACHE_NAMESPACE_NAME = "laycache" as const;
47
+ const LAYCACHE_ENTRY_SCHEMA_VERSION = `laycache/${LAYCACHE_SCHEMA_VERSION}`;
48
+
49
+ export type WriteEnvelopeResult =
50
+ | { written: true }
51
+ | { written: false; reason: "no-customxml-part" };
52
+
53
+ /**
54
+ * Writes (or replaces) the laycache envelope inside the OPC package's
55
+ * workflow-payload part. Mutates `opcPackage.parts` in place.
56
+ *
57
+ * Preserves all other namespaces (`hostAnnotations`, `workflowOverlay`,
58
+ * etc.) verbatim. Returns `{ written: false, reason: "no-customxml-part" }`
59
+ * when the package has no workflow-payload part — caller falls back to
60
+ * IndexedDB caching in that case.
61
+ */
62
+ export function writeEnvelopeToOpcPackage(
63
+ opcPackage: OpcPackage,
64
+ envelope: CacheEnvelope,
65
+ ): WriteEnvelopeResult {
66
+ const existingPart = opcPackage.parts.get(WORKFLOW_PAYLOAD_PART_PATH);
67
+ if (!existingPart) {
68
+ return { written: false, reason: "no-customxml-part" };
69
+ }
70
+
71
+ const existingXml = new TextDecoder().decode(existingPart.bytes);
72
+ const existingEditorState: EditorStatePayload = parseEditorStateXml(existingXml) ?? {
73
+ entries: [],
74
+ };
75
+
76
+ // Remove any previous laycache entry; keep every other unknown namespace.
77
+ const preservedUnknowns = (existingEditorState.unknownNamespaces ?? []).filter(
78
+ (ns) => ns.name !== LAYCACHE_NAMESPACE_NAME,
79
+ );
80
+
81
+ const laycacheRawXml = buildLaycacheNamespaceXml(envelope);
82
+
83
+ const nextEditorState: EditorStatePayload = {
84
+ entries: existingEditorState.entries,
85
+ unknownNamespaces: [
86
+ ...preservedUnknowns,
87
+ { name: LAYCACHE_NAMESPACE_NAME, rawXml: laycacheRawXml },
88
+ ],
89
+ };
90
+
91
+ const newEditorStateBlock = buildEditorStateXml(nextEditorState);
92
+ const newXml = spliceEditorStateIntoPayloadXml(existingXml, newEditorStateBlock);
93
+ const newBytes = new TextEncoder().encode(newXml);
94
+
95
+ // Note: crc32 is left at the previous value. `writeOpcPackage` recomputes
96
+ // the CRC from uncompressedBytes at zip time (`package-writer.ts:156`), so
97
+ // the field is effectively documentation from the reader — not consumed by
98
+ // the writer.
99
+ const nextPart: OpcPackagePart = {
100
+ ...existingPart,
101
+ bytes: newBytes,
102
+ };
103
+ opcPackage.parts.set(WORKFLOW_PAYLOAD_PART_PATH, nextPart);
104
+ return { written: true };
105
+ }
106
+
107
+ /**
108
+ * Reads and validates the laycache envelope from an already-parsed OPC
109
+ * package. Returns `null` on any validation failure:
110
+ * - customXml part missing
111
+ * - no laycache entry in `unknownNamespaces`
112
+ * - JSON parse failure
113
+ * - envelope.schemaVersion mismatch (LAYCACHE_SCHEMA_VERSION)
114
+ * - envelope.engineVersion mismatch (LAYOUT_ENGINE_VERSION)
115
+ * - required fields missing
116
+ */
117
+ export function readEnvelopeFromOpcPackage(opcPackage: OpcPackage): CacheEnvelope | null {
118
+ const envelope = parseWorkflowPayloadEnvelopeFromPackage(opcPackage);
119
+ if (!envelope?.editorState) return null;
120
+
121
+ const laycacheUnknown = (envelope.editorState.unknownNamespaces ?? []).find(
122
+ (ns) => ns.name === LAYCACHE_NAMESPACE_NAME,
123
+ );
124
+ if (!laycacheUnknown) return null;
125
+
126
+ const inlineJson = extractInlineCdata(laycacheUnknown.rawXml);
127
+ if (inlineJson === null) return null;
128
+
129
+ let parsed: unknown;
130
+ try {
131
+ parsed = JSON.parse(inlineJson);
132
+ } catch {
133
+ return null;
134
+ }
135
+
136
+ if (!isValidCacheEnvelope(parsed)) return null;
137
+ return parsed;
138
+ }
139
+
140
+ // ---------------------------------------------------------------------------
141
+ // Helpers
142
+ // ---------------------------------------------------------------------------
143
+
144
+ function buildLaycacheNamespaceXml(envelope: CacheEnvelope): string {
145
+ const json = JSON.stringify(envelope).replace(/\]\]>/g, "]]]]><![CDATA[>");
146
+ return (
147
+ `<bw:namespace name="${LAYCACHE_NAMESPACE_NAME}" schemaVersion="${LAYCACHE_ENTRY_SCHEMA_VERSION}">` +
148
+ `<bw:inline><![CDATA[${json}]]></bw:inline>` +
149
+ `</bw:namespace>`
150
+ );
151
+ }
152
+
153
+ /**
154
+ * Replaces (or inserts) the `<bw:editorState>` block inside an existing
155
+ * workflow-payload XML. Handles three cases:
156
+ * 1. Block already present — regex-replace in-place.
157
+ * 2. Block absent, `<bw:workflowPayload>…</bw:workflowPayload>` present —
158
+ * insert before `</bw:workflowPayload>` and upgrade the `version`
159
+ * attribute on the opening tag to "1.2".
160
+ * 3. Neither — leave input unchanged (defensive; parseEditorStateXml
161
+ * returned undefined, and if the outer structure is also absent we
162
+ * cannot safely synthesize one).
163
+ *
164
+ * Note: the outer element is `<bw:workflowPayload version="...">` (per
165
+ * `buildPayloadXml` at workflow-payload.ts:526), not `<bw:root>`. The
166
+ * `version` attribute drives the schema-version gate (1.0 / 1.1 / 1.2).
167
+ */
168
+ function spliceEditorStateIntoPayloadXml(xml: string, editorStateBlock: string): string {
169
+ const existingBlockRe = /<bw:editorState\b[^>]*>[\s\S]*?<\/bw:editorState>/u;
170
+ if (existingBlockRe.test(xml)) {
171
+ return xml.replace(existingBlockRe, editorStateBlock);
172
+ }
173
+
174
+ const rootCloseRe = /<\/bw:workflowPayload>/u;
175
+ if (rootCloseRe.test(xml)) {
176
+ const upgraded = xml.replace(
177
+ /(<bw:workflowPayload\b[^>]*?\bversion=")([^"]*)(")/u,
178
+ (_m, prefix: string, _v: string, suffix: string) => `${prefix}1.2${suffix}`,
179
+ );
180
+ return upgraded.replace(rootCloseRe, `${editorStateBlock}\n</bw:workflowPayload>`);
181
+ }
182
+
183
+ return xml;
184
+ }
185
+
186
+ function extractInlineCdata(rawXml: string): string | null {
187
+ const m = rawXml.match(/<bw:inline\b[^>]*>([\s\S]*?)<\/bw:inline>/u);
188
+ if (!m) return null;
189
+ const inner = m[1] ?? "";
190
+ const cdata = inner.replace(/<!\[CDATA\[|\]\]>/g, "").trim();
191
+ return cdata.length > 0 ? cdata : null;
192
+ }
193
+
194
+ function isValidCacheEnvelope(value: unknown): value is CacheEnvelope {
195
+ if (typeof value !== "object" || value === null) return false;
196
+ const v = value as Record<string, unknown>;
197
+ return (
198
+ v.schemaVersion === LAYCACHE_SCHEMA_VERSION &&
199
+ v.engineVersion === LAYOUT_ENGINE_VERSION &&
200
+ typeof v.fontFingerprint === "string" &&
201
+ typeof v.structuralHash === "string" &&
202
+ typeof v.canonicalDocumentHash === "string" &&
203
+ typeof v.graph === "object" &&
204
+ v.graph !== null &&
205
+ typeof v.surface === "object" &&
206
+ v.surface !== null &&
207
+ typeof v.canonicalDocument === "object" &&
208
+ v.canonicalDocument !== null
209
+ );
210
+ }
211
+
@@ -0,0 +1,78 @@
1
+ import type { OpcPackage } from "../../io/opc/package-reader.ts";
2
+ import { readOpcPackage } from "../../io/opc/package-reader.ts";
3
+ import { parseBlockStructure } from "../../io/ooxml/parse-block-structure.ts";
4
+ import type { CacheEnvelope } from "./cache-envelope.ts";
5
+ import { computeStructuralHash } from "./cache-key.ts";
6
+ import { readEnvelopeFromOpcPackage } from "./customxml-cache.ts";
7
+
8
+ /**
9
+ * L7 Phase 2.5 Plan B B.6 — customXml probe (read-side pre-parse).
10
+ *
11
+ * Opens a docx's OPC package just enough to extract the laycache envelope
12
+ * from `/customXml/item1.xml` without parsing `word/document.xml` or
13
+ * running the canonical-document builder. Consumers use the returned
14
+ * envelope to decide whether to take the fast seeding path or fall
15
+ * through to `loadDocxEditorSessionAsync`'s full pipeline.
16
+ *
17
+ * Cost budget: ~20-50 ms on extra-large (OPC unzip 17 ms + XML regex
18
+ * extract + JSON parse + schema validation). Well below the ~584 ms of
19
+ * parse + skeleton-ready work that a successful probe skips.
20
+ *
21
+ * Null is returned for every rejection path:
22
+ * - OPC parse failure (malformed zip, missing Content_Types)
23
+ * - no workflow-payload part (`/customXml/item1.xml`)
24
+ * - no laycache entry inside the editor-state payload
25
+ * - corrupted inline JSON
26
+ * - envelope.schemaVersion or engineVersion mismatch
27
+ * - missing/mis-typed required envelope fields
28
+ *
29
+ * The returned `opcPackage` is included so callers can hand it to
30
+ * downstream code that would otherwise re-parse the ZIP from bytes
31
+ * (saving the ~17 ms OPC unzip on the cache-hit path).
32
+ *
33
+ * **B.7 structural verification.** When the probe accepts an envelope,
34
+ * it additionally runs `parseBlockStructure` against the package's
35
+ * `word/document.xml` and compares the resulting `structuralHash`
36
+ * against `envelope.structuralHash`. A mismatch means the docx has
37
+ * been edited since the envelope was written — e.g. Word added a
38
+ * paragraph and saved — and the probe returns null to force the full
39
+ * parse path. See `src/io/ooxml/parse-block-structure.ts` for the
40
+ * probe's known limitations (opaque-promoting OOXML features cannot
41
+ * be detected shallow-parse; those docs safely fall through).
42
+ */
43
+ export interface LaycacheProbeResult {
44
+ readonly envelope: CacheEnvelope;
45
+ readonly opcPackage: OpcPackage;
46
+ }
47
+
48
+ export async function tryReadLaycacheEnvelope(
49
+ input: ArrayBuffer | Uint8Array,
50
+ ): Promise<LaycacheProbeResult | null> {
51
+ const bytes = input instanceof Uint8Array ? input : new Uint8Array(input);
52
+
53
+ let opcPackage: OpcPackage;
54
+ try {
55
+ opcPackage = readOpcPackage(bytes);
56
+ } catch {
57
+ return null;
58
+ }
59
+
60
+ const envelope = readEnvelopeFromOpcPackage(opcPackage);
61
+ if (!envelope) return null;
62
+
63
+ // B.7 — shallow structural probe. Reject envelopes whose cached
64
+ // structuralHash does not match a fresh shallow-parse of the current
65
+ // document.xml. Catches the "user edited the docx in Word between
66
+ // prerender and reopen" path. The probe is conservative: it cannot
67
+ // detect opaque-promoting OOXML features (content controls, floating
68
+ // drawings, AlternateContent), so some clean-looking structural
69
+ // identity docs may still be rejected — a safe false negative.
70
+ const documentXmlPart = opcPackage.parts.get("/word/document.xml");
71
+ if (!documentXmlPart) return null;
72
+ const documentXml = new TextDecoder().decode(documentXmlPart.bytes);
73
+ const probedBlocks = parseBlockStructure(documentXml);
74
+ const probedHash = await computeStructuralHash(probedBlocks);
75
+ if (probedHash !== envelope.structuralHash) return null;
76
+
77
+ return { envelope, opcPackage };
78
+ }
@@ -2,6 +2,8 @@ import type { EditorSurfaceSnapshot } from "../../api/public-types";
2
2
  import { createSelectionSnapshot } from "../../core/state/editor-state.ts";
3
3
  import { loadDocxEditorSessionAsync } from "../../io/docx-session.ts";
4
4
  import { createLoadScheduler } from "../../io/load-scheduler.ts";
5
+ import { readOpcPackage } from "../../io/opc/package-reader.ts";
6
+ import { writeOpcPackage } from "../../io/opc/package-writer.ts";
5
7
  import {
6
8
  LAYCACHE_SCHEMA_VERSION,
7
9
  LAYOUT_ENGINE_VERSION,
@@ -14,6 +16,8 @@ import {
14
16
  deriveCacheKey,
15
17
  type CacheKeyBlock,
16
18
  } from "./cache-key.ts";
19
+ import { computeCanonicalDocumentHash } from "./canonical-document-hash.ts";
20
+ import { writeEnvelopeToOpcPackage } from "./customxml-cache.ts";
17
21
  import { resolveFontFingerprint } from "./font-fingerprint.ts";
18
22
  import { canonicalizeGraph } from "./graph-canonicalize.ts";
19
23
 
@@ -46,11 +50,18 @@ import { canonicalizeGraph } from "./graph-canonicalize.ts";
46
50
  export interface PrerenderOptions {
47
51
  readonly fontFingerprint?: string;
48
52
  /**
49
- * Plan B hook — when true, prerenderDocument will inject the cache
50
- * envelope into the document's `laycache` customXml namespace and
51
- * return the re-serialized bytes in `docWithCustomXml`. In Plan A
52
- * (this task) the flag is accepted for API stability but ignored;
53
- * `docWithCustomXml` returns the input unchanged.
53
+ * Plan B — when true, prerenderDocument injects the cache envelope into
54
+ * the document's workflow-payload customXml part under a `laycache`
55
+ * unknown-namespace entry, and returns the re-serialized bytes in
56
+ * `docWithCustomXml`. Default: `false`, in which case `docWithCustomXml`
57
+ * equals the input bytes.
58
+ *
59
+ * Scope caveat: requires the input docx to already have a
60
+ * `/customXml/item1.xml` part. If the part is absent, the flag is a
61
+ * no-op (documented via `counters.persistedToCustomXml`) and the caller
62
+ * falls back to IndexedDB-only caching. Fresh/minimal docs that lack
63
+ * the part are already fast-opening; Plan B targets CCEP-scale
64
+ * templates which reliably carry the part.
54
65
  */
55
66
  readonly persistToCustomXml?: boolean;
56
67
  }
@@ -59,6 +70,13 @@ export interface PrerenderCounters {
59
70
  readonly blockCount: number;
60
71
  readonly pageCount: number;
61
72
  readonly prerenderMs: number;
73
+ /**
74
+ * Plan B signal: `true` when `persistToCustomXml` was requested AND the
75
+ * docx had an existing workflow-payload part to mutate. `false` when
76
+ * the flag was off OR the docx had no such part (caller should fall
77
+ * back to IndexedDB caching).
78
+ */
79
+ readonly persistedToCustomXml: boolean;
62
80
  }
63
81
 
64
82
  export interface PrerenderResult {
@@ -70,6 +88,15 @@ export interface PrerenderResult {
70
88
 
71
89
  const PRERENDER_DOCUMENT_ID = "prerender";
72
90
 
91
+ /**
92
+ * Fixed ISO8601 timestamp used to override session-birth `createdAt` /
93
+ * `updatedAt` on the prerendered envelope. Epoch zero — a valid ISO8601
94
+ * value that downstream validators accept — replacing `Date.now()`-driven
95
+ * values that would otherwise defeat byte-identical `docWithCustomXml`
96
+ * output across two prerender calls on identical bytes.
97
+ */
98
+ const PRERENDER_NORMALIZED_TIMESTAMP = "1970-01-01T00:00:00.000Z";
99
+
73
100
  function toUint8Array(input: ArrayBuffer | Uint8Array): Uint8Array {
74
101
  if (input instanceof Uint8Array) return input;
75
102
  return new Uint8Array(input);
@@ -106,7 +133,20 @@ export async function prerenderDocument(
106
133
  );
107
134
  }
108
135
 
109
- const envelope = session.initialSessionState.canonicalDocument;
136
+ // Normalize session-birth timestamps. `loadDocxEditorSessionAsync` sets
137
+ // `createdAt`/`updatedAt` from `new Date().toISOString()`; without this
138
+ // override, two sequential prerender calls on identical bytes would
139
+ // produce different envelopes → different customXml bytes → determinism
140
+ // failure on the B.5 byte-identical gate. Using the epoch keeps the
141
+ // value a valid ISO8601 string (downstream validators accept it) while
142
+ // eliminating the only remaining source of non-determinism. The live
143
+ // session's updatedAt is re-populated by runtime mutations anyway, so
144
+ // the normalized value is irrelevant at runtime.
145
+ const envelope: typeof session.initialSessionState.canonicalDocument = {
146
+ ...session.initialSessionState.canonicalDocument,
147
+ createdAt: PRERENDER_NORMALIZED_TIMESTAMP,
148
+ updatedAt: PRERENDER_NORMALIZED_TIMESTAMP,
149
+ };
110
150
  const surface = createEditorSurfaceSnapshot(envelope, createSelectionSnapshot(), { kind: "main" });
111
151
 
112
152
  const engine = createLayoutEngine({ autoUpgradeToCanvasBackend: false });
@@ -114,11 +154,17 @@ export async function prerenderDocument(
114
154
  const graph = canonicalizeGraph(rawGraph);
115
155
 
116
156
  const structuralHash = await computeStructuralHash(blocksToCacheKeyBlocks(surface));
157
+ // L7 Phase 2.5 Plan B B.2 — sha256 of stable-stringified canonical document.
158
+ // Enters the cache key as the 5th input so style / metadata / comments /
159
+ // preservation mutations correctly invalidate (structuralHash alone misses
160
+ // them because the block-id list is unchanged).
161
+ const canonicalDocumentHash = await computeCanonicalDocumentHash(envelope);
117
162
  const cacheKey = await deriveCacheKey({
118
163
  blocks: blocksToCacheKeyBlocks(surface),
119
164
  fontFingerprint,
120
165
  engineVersion: LAYOUT_ENGINE_VERSION,
121
166
  schemaVersion: LAYCACHE_SCHEMA_VERSION,
167
+ canonicalDocumentHash,
122
168
  });
123
169
 
124
170
  const cacheBlob: CacheEnvelope = {
@@ -126,20 +172,41 @@ export async function prerenderDocument(
126
172
  engineVersion: LAYOUT_ENGINE_VERSION,
127
173
  fontFingerprint,
128
174
  structuralHash,
175
+ canonicalDocumentHash,
129
176
  graph,
130
177
  surface,
178
+ canonicalDocument: envelope,
131
179
  };
132
180
 
181
+ // Plan B B.5 — persistToCustomXml: inject the envelope into the docx's
182
+ // workflow-payload part. Re-parses the OPC package from bytes (~17 ms on
183
+ // extra-large) because `LoadedDocxEditorSession` does not expose
184
+ // `sourcePackage` publicly. Acceptable cost on the one-shot ingest path.
185
+ let docWithCustomXml = bytes;
186
+ let persistedToCustomXml = false;
187
+ if (options.persistToCustomXml === true) {
188
+ const opcPackage = readOpcPackage(bytes);
189
+ const writeResult = writeEnvelopeToOpcPackage(opcPackage, cacheBlob);
190
+ if (writeResult.written) {
191
+ docWithCustomXml = writeOpcPackage(opcPackage);
192
+ persistedToCustomXml = true;
193
+ }
194
+ // writeResult.written === false → docx had no customXml part; silently
195
+ // fall through with docWithCustomXml = input bytes. Caller observes via
196
+ // counters.persistedToCustomXml.
197
+ }
198
+
133
199
  const prerenderMs = performance.now() - t0;
134
200
 
135
201
  return {
136
- docWithCustomXml: bytes,
202
+ docWithCustomXml,
137
203
  cacheBlob,
138
204
  cacheKey,
139
205
  counters: {
140
206
  blockCount: surface.blocks.length,
141
207
  pageCount: graph.pages.length,
142
208
  prerenderMs,
209
+ persistedToCustomXml,
143
210
  },
144
211
  };
145
212
  }
@@ -0,0 +1,148 @@
1
+ import type {
2
+ CanonicalDocument,
3
+ DocumentRootNode,
4
+ InlineNode,
5
+ ParagraphNode,
6
+ } from "../model/canonical-document.ts";
7
+ import type { CanonicalDocumentEnvelope } from "../core/state/editor-state.ts";
8
+ import type { EditorAnchorProjection } from "../api/public-types.ts";
9
+
10
+ export interface ResolvedScopeLocation {
11
+ scopeId: string;
12
+ startPos: number;
13
+ endPos: number;
14
+ }
15
+
16
+ function inlineLength(node: InlineNode): number {
17
+ switch (node.type) {
18
+ case "text":
19
+ return Array.from(node.text).length;
20
+ case "hyperlink":
21
+ case "field":
22
+ return node.children.reduce(
23
+ (total, child) => total + inlineLength(child as InlineNode),
24
+ 0,
25
+ );
26
+ case "bookmark_start":
27
+ case "bookmark_end":
28
+ case "scope_marker_start":
29
+ case "scope_marker_end":
30
+ return 0;
31
+ default:
32
+ return 1;
33
+ }
34
+ }
35
+
36
+ function walkParagraphs(
37
+ document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
38
+ ): { paragraph: ParagraphNode; from: number }[] {
39
+ const envelope = document as CanonicalDocumentEnvelope;
40
+ const root =
41
+ "content" in envelope
42
+ ? (envelope.content as DocumentRootNode)
43
+ : (document as unknown as DocumentRootNode);
44
+ const out: { paragraph: ParagraphNode; from: number }[] = [];
45
+ let cursor = 0;
46
+ for (let index = 0; index < root.children.length; index += 1) {
47
+ const block = root.children[index];
48
+ if (block && block.type === "paragraph") {
49
+ out.push({ paragraph: block, from: cursor });
50
+ cursor += block.children.reduce(
51
+ (total, child) => total + inlineLength(child as InlineNode),
52
+ 0,
53
+ );
54
+ } else if (block && block.type === "table") {
55
+ cursor += 1;
56
+ } else {
57
+ cursor += 1;
58
+ }
59
+ if (index < root.children.length - 1) {
60
+ cursor += 1;
61
+ }
62
+ }
63
+ return out;
64
+ }
65
+
66
+ /**
67
+ * Walk all paragraphs in the document and return the absolute positions of the
68
+ * start and end markers for each scope that has either side present. Used by
69
+ * `resolveScope` + `findScopeAt`; exported so test code can inspect the
70
+ * marker-to-position mapping directly.
71
+ */
72
+ export function collectScopeLocations(
73
+ document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
74
+ ): Map<string, { startPos?: number; endPos?: number }> {
75
+ const locations = new Map<string, { startPos?: number; endPos?: number }>();
76
+ const paragraphs = walkParagraphs(document);
77
+ for (const { paragraph, from } of paragraphs) {
78
+ let cursor = from;
79
+ for (const child of paragraph.children) {
80
+ if (child.type === "scope_marker_start") {
81
+ const prior = locations.get(child.scopeId) ?? {};
82
+ locations.set(child.scopeId, { ...prior, startPos: cursor });
83
+ } else if (child.type === "scope_marker_end") {
84
+ const prior = locations.get(child.scopeId) ?? {};
85
+ locations.set(child.scopeId, { ...prior, endPos: cursor });
86
+ }
87
+ cursor += inlineLength(child as InlineNode);
88
+ }
89
+ }
90
+ return locations;
91
+ }
92
+
93
+ /**
94
+ * Resolve a scopeId to a live public range anchor derived from the marker
95
+ * positions currently in the document. Returns:
96
+ * - A range anchor when both markers are present
97
+ * - A detached anchor when one or zero markers survive
98
+ * - `null` when neither marker is in the document
99
+ */
100
+ export function resolveScope(
101
+ document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
102
+ scopeId: string,
103
+ ): EditorAnchorProjection | null {
104
+ const locations = collectScopeLocations(document);
105
+ const loc = locations.get(scopeId);
106
+ if (!loc) return null;
107
+
108
+ if (loc.startPos !== undefined && loc.endPos !== undefined) {
109
+ const from = Math.min(loc.startPos, loc.endPos);
110
+ const to = Math.max(loc.startPos, loc.endPos);
111
+ return {
112
+ kind: "range",
113
+ from,
114
+ to,
115
+ assoc: { start: -1, end: 1 },
116
+ };
117
+ }
118
+
119
+ return {
120
+ kind: "detached",
121
+ reason: "deleted",
122
+ lastKnownRange: {
123
+ from: loc.startPos ?? loc.endPos ?? 0,
124
+ to: loc.endPos ?? loc.startPos ?? 0,
125
+ },
126
+ };
127
+ }
128
+
129
+ /**
130
+ * Given a position, return the innermost enclosing scope (by document order).
131
+ * Used by the chrome overlay hit-test and by the edit-dispatch guard that
132
+ * routes delete-through-marker to `removeScope`.
133
+ */
134
+ export function findScopeAt(
135
+ document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
136
+ position: number,
137
+ ): { scopeId: string; startPos: number; endPos: number } | null {
138
+ const locations = collectScopeLocations(document);
139
+ let best: { scopeId: string; startPos: number; endPos: number } | null = null;
140
+ for (const [scopeId, loc] of locations) {
141
+ if (loc.startPos === undefined || loc.endPos === undefined) continue;
142
+ if (position < loc.startPos || position > loc.endPos) continue;
143
+ if (!best || loc.startPos > best.startPos) {
144
+ best = { scopeId, startPos: loc.startPos, endPos: loc.endPos };
145
+ }
146
+ }
147
+ return best;
148
+ }
@@ -53,6 +53,16 @@ export const DEFAULT_REGISTRY_ENTRIES: Readonly<Record<string, ScopeTagBehavior>
53
53
  trimOnDelete: true,
54
54
  bailIfCrossed: false,
55
55
  },
56
+ // S1 scope-marker anchoring. `trimOnDelete: false` is the load-bearing
57
+ // difference vs. `bookmark` — a delete that crosses a scope marker routes
58
+ // through `removeScope` instead of silently trimming the marker, so
59
+ // half-scope states (orphaned metadata in customXml) never appear.
60
+ "workflow-scope-marker": {
61
+ extendOnInsertLeft: true,
62
+ extendOnInsertRight: true,
63
+ trimOnDelete: false,
64
+ bailIfCrossed: false,
65
+ },
56
66
  sdt: {
57
67
  extendOnInsertLeft: false,
58
68
  extendOnInsertRight: false,
@@ -1047,7 +1047,11 @@ function appendInlineSegments(
1047
1047
  }
1048
1048
  case "bookmark_start":
1049
1049
  case "bookmark_end":
1050
- // Zero-width markers — no visual, no cursor advancement
1050
+ case "scope_marker_start":
1051
+ case "scope_marker_end":
1052
+ // Zero-width markers — no visual, no cursor advancement. Scope markers
1053
+ // (S1) follow the bookmark precedent: structural anchors whose positions
1054
+ // track with surrounding text but which don't occupy cursor positions.
1051
1055
  return { nextCursor: start, lockedFragmentIds: [] };
1052
1056
  default:
1053
1057
  return { nextCursor: start + 1, lockedFragmentIds: [] };
@@ -1466,6 +1470,9 @@ function summarizePreviewInline(node: InlineNode): string {
1466
1470
  return node.name ? `[Bookmark: ${node.name}]` : "[Bookmark]";
1467
1471
  case "bookmark_end":
1468
1472
  return "";
1473
+ case "scope_marker_start":
1474
+ case "scope_marker_end":
1475
+ return "";
1469
1476
  case "image":
1470
1477
  return node.altText ? `[Image: ${node.altText}]` : "[Image]";
1471
1478
  case "opaque_inline":