@beyondwork/docx-react-component 1.0.47 → 1.0.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +16 -11
  2. package/package.json +30 -41
  3. package/src/api/public-types.ts +199 -13
  4. package/src/compare/diff-engine.ts +4 -0
  5. package/src/core/commands/add-scope.ts +257 -0
  6. package/src/core/commands/formatting-commands.ts +2 -0
  7. package/src/core/commands/index.ts +9 -1
  8. package/src/core/commands/text-commands.ts +3 -1
  9. package/src/core/schema/text-schema.ts +95 -1
  10. package/src/core/selection/anchor-conversion.ts +112 -0
  11. package/src/core/selection/review-anchors.ts +108 -3
  12. package/src/core/state/text-transaction.ts +103 -7
  13. package/src/internal/harness-debug-ports.ts +168 -0
  14. package/src/io/chart-preview-resolver.ts +59 -1
  15. package/src/io/docx-session.ts +226 -38
  16. package/src/io/export/serialize-main-document.ts +46 -0
  17. package/src/io/export/serialize-paragraph-formatting.ts +8 -0
  18. package/src/io/export/serialize-run-formatting.ts +10 -1
  19. package/src/io/export/serialize-settings.ts +421 -0
  20. package/src/io/export/serialize-styles.ts +10 -0
  21. package/src/io/normalize/normalize-text.ts +1 -0
  22. package/src/io/ooxml/chart/chart-style-table.ts +543 -0
  23. package/src/io/ooxml/chart/color-palette.ts +101 -0
  24. package/src/io/ooxml/chart/compose-series-color.ts +147 -0
  25. package/src/io/ooxml/chart/parse-axis.ts +277 -0
  26. package/src/io/ooxml/chart/parse-chart-space.ts +885 -0
  27. package/src/io/ooxml/chart/parse-series.ts +635 -0
  28. package/src/io/ooxml/chart/resolve-color.ts +261 -0
  29. package/src/io/ooxml/chart/types.ts +439 -0
  30. package/src/io/ooxml/parse-block-structure.ts +99 -0
  31. package/src/io/ooxml/parse-complex-content.ts +90 -2
  32. package/src/io/ooxml/parse-main-document.ts +156 -1
  33. package/src/io/ooxml/parse-paragraph-formatting.ts +46 -0
  34. package/src/io/ooxml/parse-run-formatting.ts +49 -0
  35. package/src/io/ooxml/parse-scope-markers.ts +184 -0
  36. package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
  37. package/src/io/ooxml/parse-settings.ts +97 -1
  38. package/src/io/ooxml/parse-styles.ts +65 -0
  39. package/src/io/ooxml/parse-theme.ts +2 -127
  40. package/src/io/ooxml/property-grab-bag.ts +211 -0
  41. package/src/io/ooxml/xml-attr-helpers.ts +59 -1
  42. package/src/io/ooxml/xml-parser.ts +142 -0
  43. package/src/model/canonical-document.ts +160 -0
  44. package/src/model/scope-markers.ts +144 -0
  45. package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
  46. package/src/runtime/collab/checkpoint-election.ts +75 -0
  47. package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
  48. package/src/runtime/collab/checkpoint-store.ts +115 -0
  49. package/src/runtime/collab/event-types.ts +27 -0
  50. package/src/runtime/collab/index.ts +29 -0
  51. package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
  52. package/src/runtime/collab/runtime-collab-sync.ts +330 -0
  53. package/src/runtime/collab/workflow-shared.ts +247 -0
  54. package/src/runtime/document-locations.ts +1 -9
  55. package/src/runtime/document-outline.ts +1 -9
  56. package/src/runtime/document-runtime.ts +288 -65
  57. package/src/runtime/editor-surface/capabilities.ts +63 -50
  58. package/src/runtime/hyperlink-color-resolver.ts +119 -0
  59. package/src/runtime/layout/layout-engine-version.ts +8 -1
  60. package/src/runtime/prerender/cache-envelope.ts +19 -7
  61. package/src/runtime/prerender/cache-key.ts +25 -14
  62. package/src/runtime/prerender/canonical-document-hash.ts +63 -0
  63. package/src/runtime/prerender/customxml-cache.ts +211 -0
  64. package/src/runtime/prerender/customxml-probe.ts +78 -0
  65. package/src/runtime/prerender/prerender-document.ts +74 -7
  66. package/src/runtime/scope-resolver.ts +148 -0
  67. package/src/runtime/scope-tag-registry.ts +10 -0
  68. package/src/runtime/surface-projection.ts +102 -37
  69. package/src/runtime/theme-color-resolver.ts +188 -0
  70. package/src/runtime/workflow-markup.ts +7 -18
  71. package/src/ui/WordReviewEditor.tsx +48 -2
  72. package/src/ui/editor-runtime-boundary.ts +42 -1
  73. package/src/ui/headless/selection-helpers.ts +10 -23
  74. package/src/ui/runtime-shortcut-dispatch.ts +12 -7
  75. package/src/ui/unsupported-previews-policy.ts +23 -0
  76. package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +10 -0
  77. package/src/ui-tailwind/editor-surface/perf-probe.ts +1 -0
  78. package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +47 -0
  79. package/src/ui-tailwind/page-stack/use-visible-block-range.ts +88 -0
  80. package/src/ui-tailwind/tw-review-workspace.tsx +16 -1
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Lane 3 V7 — hyperlink color cascade.
3
+ *
4
+ * OOXML convention: runs inside `<w:hyperlink>` inherit the `Hyperlink`
5
+ * character style implicitly, EVEN WHEN source XML does not declare an
6
+ * explicit `<w:rStyle w:val="Hyperlink"/>` on each run. Word applies the
7
+ * style based on the containing hyperlink element's context alone.
8
+ *
9
+ * Our existing cascade (`resolveEffectiveRunFormatting`) only applies the
10
+ * character-style chain when `input.characterStyleId` is populated — so
11
+ * runs inside hyperlinks that lacked explicit rStyle were inheriting
12
+ * whatever the paragraph style said (usually black body text).
13
+ *
14
+ * This module closes that gap by resolving hyperlink color via a
15
+ * four-tier fallback chain:
16
+ *
17
+ * 1. Direct color on the run (`colorHex !== "auto"`) — wins outright.
18
+ * 2. Character-style cascade — forces Hyperlink style participation.
19
+ * 3. Theme hlink slot (`ResolvedTheme.colors.hlink`).
20
+ * 4. Hardcoded Word default `#0563C1`.
21
+ *
22
+ * The resolver also honors `colorThemeSlot` + `colorThemeTint`/`colorThemeShade`
23
+ * from L2.c by delegating to `resolveThemeColorHex`.
24
+ *
25
+ * Contract: the returned `CanonicalRunFormatting` is the effective cascade
26
+ * result with `colorHex` concretized to a non-theme hex (or `"auto"`). The
27
+ * original `colorThemeSlot` / `colorThemeTint` / `colorThemeShade` fields
28
+ * are preserved on the returned object so downstream code (or re-export
29
+ * via the canonical document) still sees the theme reference.
30
+ */
31
+
32
+ import type {
33
+ CanonicalRunFormatting,
34
+ ResolvedTheme,
35
+ StylesCatalog,
36
+ } from "../model/canonical-document.ts";
37
+ import { resolveThemeColor } from "../io/ooxml/parse-theme.ts";
38
+ import { resolveThemeColorHex } from "./theme-color-resolver.ts";
39
+ import {
40
+ resolveEffectiveRunFormatting,
41
+ type RunResolveInput,
42
+ } from "./paragraph-style-resolver.ts";
43
+
44
+ export const HYPERLINK_CHARACTER_STYLE_ID = "Hyperlink";
45
+
46
+ /**
47
+ * Microsoft Word's default hyperlink color (applied when neither the
48
+ * Hyperlink character style nor the theme's `hlink` slot supplies one).
49
+ * Matches Word 2013+ fresh-document rendering.
50
+ */
51
+ export const DEFAULT_HYPERLINK_COLOR_HEX = "0563C1";
52
+
53
+ /**
54
+ * Resolve effective run formatting for a hyperlink-inner run. Honors the
55
+ * Hyperlink character style implicitly, resolves any theme-slot color
56
+ * references, and applies the Word-default fallback when upstream data
57
+ * is absent.
58
+ *
59
+ * `input.characterStyleId` is respected when the caller already passed
60
+ * one (e.g., source XML had an explicit `<w:rStyle>` overriding the
61
+ * implicit Hyperlink). Only when it is absent does the resolver inject
62
+ * `"Hyperlink"` itself.
63
+ */
64
+ export function resolveHyperlinkRunFormatting(
65
+ input: RunResolveInput,
66
+ catalog: StylesCatalog | undefined,
67
+ theme: ResolvedTheme | undefined,
68
+ ): CanonicalRunFormatting {
69
+ // V7a — auto-apply the Hyperlink character style when the caller did
70
+ // not supply one (runs inside <w:hyperlink> typically lack explicit
71
+ // rStyle; Word applies the style by context).
72
+ const augmentedInput: RunResolveInput =
73
+ input.characterStyleId === undefined
74
+ ? { ...input, characterStyleId: HYPERLINK_CHARACTER_STYLE_ID }
75
+ : input;
76
+
77
+ const cascade = resolveEffectiveRunFormatting(augmentedInput, catalog);
78
+
79
+ // V7b — concretize the color through the theme resolver + Word default.
80
+ const resolvedColor = resolveHyperlinkColorHex(cascade, theme);
81
+ if (resolvedColor && resolvedColor !== cascade.colorHex) {
82
+ return { ...cascade, colorHex: resolvedColor };
83
+ }
84
+ return cascade;
85
+ }
86
+
87
+ /**
88
+ * Four-tier hyperlink color fallback. Exported for targeted testing; use
89
+ * `resolveHyperlinkRunFormatting` as the primary entry point.
90
+ */
91
+ export function resolveHyperlinkColorHex(
92
+ cascade: Pick<
93
+ CanonicalRunFormatting,
94
+ "colorHex" | "colorThemeSlot" | "colorThemeTint" | "colorThemeShade"
95
+ >,
96
+ theme: ResolvedTheme | undefined,
97
+ ): string | undefined {
98
+ // Tier 1 — direct non-auto hex wins.
99
+ if (cascade.colorHex && cascade.colorHex !== "auto") {
100
+ return cascade.colorHex;
101
+ }
102
+ // Tier 2 — theme-slot reference from the cascade (which now includes the
103
+ // Hyperlink style's rPr — typically `<w:color w:themeColor="hlink"/>`).
104
+ if (cascade.colorThemeSlot) {
105
+ const viaTheme = resolveThemeColorHex(cascade, theme);
106
+ if (viaTheme && viaTheme !== "auto") {
107
+ return viaTheme;
108
+ }
109
+ }
110
+ // Tier 3 — theme hlink slot even when the cascade never wrote a slot
111
+ // reference. This catches docs whose Hyperlink style lacks a color
112
+ // declaration entirely but whose theme defines hlink.
113
+ const themeHlink = resolveThemeColor(theme, "hlink");
114
+ if (themeHlink) {
115
+ return themeHlink;
116
+ }
117
+ // Tier 4 — Word's hardcoded default.
118
+ return DEFAULT_HYPERLINK_COLOR_HEX;
119
+ }
@@ -55,5 +55,12 @@ export const LAYOUT_ENGINE_VERSION = 5 as const;
55
55
  * 1 — initial envelope shape: { schemaVersion, engineVersion,
56
56
  * fontFingerprint, structuralHash, graph, surface }. Ships with
57
57
  * L7 Phase 2.5 Plan A.
58
+ * 2 — L7 Phase 2.5 Plan B: adds `canonicalDocument` + `canonicalDocumentHash`
59
+ * fields so the receiving client can skip the DOCX parse entirely on
60
+ * cache hit. `canonicalDocumentHash` is also a 5th input to the cache
61
+ * key so any state mutation (styles, metadata, comments, preservation)
62
+ * correctly invalidates. v1 envelopes are rejected on load under v2 —
63
+ * no corruption path exists because schemaVersion is the top-level
64
+ * discriminator.
58
65
  */
59
- export const LAYCACHE_SCHEMA_VERSION = 1 as const;
66
+ export const LAYCACHE_SCHEMA_VERSION = 2 as const;
@@ -1,13 +1,14 @@
1
1
  import type { EditorSurfaceSnapshot } from "../../api/public-types";
2
+ import type { CanonicalDocument } from "../../model/canonical-document.ts";
2
3
  import type { RuntimePageGraph } from "../layout/page-graph.ts";
3
4
 
4
5
  /**
5
- * L7 Phase 2.5 Task 2.5.3 — prerender cache envelope shape.
6
+ * L7 Phase 2.5 — prerender cache envelope shape.
6
7
  *
7
- * The envelope is the unit written to IndexedDB (Plan A) and — after Plan B
8
- * ships — to the `laycache` customXml editor-state namespace. Two consumers
9
- * must agree on this shape: the prerender pipeline that populates it, and
10
- * the warm-path loader that rehydrates it.
8
+ * The envelope is the unit written to IndexedDB (Plan A) and — under Plan B
9
+ * (schema v2) — to the `laycache` customXml editor-state namespace. Two
10
+ * consumers must agree on this shape: the prerender pipeline that populates
11
+ * it, and the warm-path loader that rehydrates it.
11
12
  *
12
13
  * Load-time invariants checked by consumers before trusting the envelope:
13
14
  * - `schemaVersion === LAYCACHE_SCHEMA_VERSION` — bump invalidates
@@ -15,15 +16,26 @@ import type { RuntimePageGraph } from "../layout/page-graph.ts";
15
16
  * - `graph.revision === 0` — canonical marker
16
17
  *
17
18
  * The envelope MUST be structured-clone-safe because IndexedDB and Plan B's
18
- * customXml path both rely on structured-clone semantics. Keep fields as
19
- * JSON-serializable primitives, plain objects, or arrays — no class
19
+ * customXml path both rely on structured-clone / JSON semantics. Keep fields
20
+ * as JSON-serializable primitives, plain objects, or arrays — no class
20
21
  * instances, functions, or symbols.
22
+ *
23
+ * Plan B additions (schema v2):
24
+ * - `canonicalDocument` — the full parsed model, so the warm-path loader
25
+ * can skip `parseMainDocumentXml` + `createImportedCanonicalDocument` +
26
+ * `buildCompatibilityReport` + `createImportedSnapshot` on cache hit.
27
+ * Saves ~584 ms of the 976 ms cold-upload on `extra-large` CCEP.
28
+ * - `canonicalDocumentHash` — sha256 of sorted-keys JSON. Also the 5th
29
+ * input to `deriveCacheKey`, so style/metadata/comment/preservation
30
+ * mutations correctly invalidate the cache.
21
31
  */
22
32
  export interface CacheEnvelope {
23
33
  readonly schemaVersion: number;
24
34
  readonly engineVersion: number;
25
35
  readonly fontFingerprint: string;
26
36
  readonly structuralHash: string;
37
+ readonly canonicalDocumentHash: string;
27
38
  readonly graph: RuntimePageGraph;
28
39
  readonly surface: EditorSurfaceSnapshot;
40
+ readonly canonicalDocument: CanonicalDocument;
29
41
  }
@@ -1,22 +1,31 @@
1
1
  /**
2
- * L7 Phase 2.5 Task 2.5.1 — prerender cache-key derivation.
2
+ * L7 Phase 2.5 — prerender cache-key derivation.
3
3
  *
4
4
  * The cache key is the composite identity the IndexedDB (Plan A) and
5
5
  * customXml (Plan B) backends index on. It has five inputs:
6
6
  *
7
- * 1. structuralHash(blocks) — sha256 of the ordered kind:blockId list.
8
- * Stable across text-only edits; changes on
9
- * insert/delete/reorder because blockIds are
10
- * kind-counter pairs (paragraph-5 stays 5
11
- * under typing, shifts to paragraph-6 after
12
- * an insert).
13
- * 2. fontFingerprint — identifies the measurement-backend + font-
14
- * metric source. "empirical-backend" in Plan A;
15
- * a real font-derived string after Phase 8.
16
- * 3. engineVersion — LAYOUT_ENGINE_VERSION from src/runtime/
17
- * layout/layout-engine-version.ts. Bumped by
18
- * CI gate on any layout/render shape change.
19
- * 4. schemaVersion — LAYCACHE_SCHEMA_VERSION for envelope format.
7
+ * 1. structuralHash(blocks) — sha256 of the ordered kind:blockId list.
8
+ * Stable across text-only edits; changes on
9
+ * insert/delete/reorder because blockIds are
10
+ * kind-counter pairs (paragraph-5 stays 5
11
+ * under typing, shifts to paragraph-6 after
12
+ * an insert).
13
+ * 2. fontFingerprint — identifies the measurement-backend + font-
14
+ * metric source. "empirical-backend" in
15
+ * Plan A; a real font-derived string after
16
+ * Phase 8.
17
+ * 3. engineVersion — LAYOUT_ENGINE_VERSION from src/runtime/
18
+ * layout/layout-engine-version.ts. Bumped by
19
+ * CI gate on any layout/render shape change.
20
+ * 4. schemaVersion — LAYCACHE_SCHEMA_VERSION for envelope
21
+ * format.
22
+ * 5. canonicalDocumentHash — (Plan B, schema v2) sha256 of sorted-keys
23
+ * JSON of the CanonicalDocument. Catches
24
+ * non-structural mutations (styles,
25
+ * metadata, comments, preservation) that
26
+ * `structuralHash` alone misses. Computed
27
+ * via `computeCanonicalDocumentHash()` from
28
+ * `./canonical-document-hash.ts`.
20
29
  *
21
30
  * Returns a 64-char lower-case hex digest. Uses the Web Crypto API
22
31
  * (globalThis.crypto.subtle), available in Node 18+ and all target browsers —
@@ -33,6 +42,7 @@ export interface CacheKeyInputs {
33
42
  readonly fontFingerprint: string;
34
43
  readonly engineVersion: string | number;
35
44
  readonly schemaVersion: number;
45
+ readonly canonicalDocumentHash: string;
36
46
  }
37
47
 
38
48
  const BLOCK_SEPARATOR = "\u0000";
@@ -61,6 +71,7 @@ export async function deriveCacheKey(inputs: CacheKeyInputs): Promise<string> {
61
71
  inputs.fontFingerprint,
62
72
  String(inputs.engineVersion),
63
73
  String(inputs.schemaVersion),
74
+ inputs.canonicalDocumentHash,
64
75
  ].join(FIELD_SEPARATOR);
65
76
  return sha256Hex(composite);
66
77
  }
@@ -0,0 +1,63 @@
1
+ import type { CanonicalDocument } from "../../model/canonical-document.ts";
2
+ import { stableStringify } from "../../model/cds-1.0.0.ts";
3
+
4
+ /**
5
+ * L7 Phase 2.5 Plan B — deterministic hash of a CanonicalDocument.
6
+ *
7
+ * Used as the fifth input to `deriveCacheKey` so non-structural mutations
8
+ * (style edits, metadata changes, comment/revision edits, preservation
9
+ * fragment updates) correctly invalidate the cache. `structuralHash`
10
+ * alone — which keys on the block-id list — misses these because the
11
+ * block structure is unchanged by such mutations.
12
+ *
13
+ * Determinism: uses `stableStringify` from `cds-1.0.0.ts` (the same
14
+ * ordering the canonical-document model already uses for equality
15
+ * comparison), so two runs on structurally-identical documents produce
16
+ * byte-identical JSON → byte-identical SHA-256. Cross-process / cross-
17
+ * machine agreement holds as long as the CanonicalDocument shape matches.
18
+ *
19
+ * **Session-birth metadata is excluded from the hash** — `createdAt` and
20
+ * `updatedAt` are set to `new Date().toISOString()` at session load (see
21
+ * `docx-session.ts`), and `docId` derives from a runtime-allocated UUID
22
+ * when the host does not pin one. These fields are not document identity
23
+ * for cache-validity purposes: a save-and-reload should hit the cache if
24
+ * the document content is unchanged, even though `updatedAt` shifted.
25
+ *
26
+ * Cost budget: <50 ms on `extra-large` CCEP (~250 KB canonical). Dominated
27
+ * by `JSON.stringify` + Web Crypto SHA-256; the key-sort pass inside
28
+ * `stableStringify` is a single depth-first walk.
29
+ */
30
+
31
+ const textEncoder = new TextEncoder();
32
+ const NORMALIZED_SENTINEL = "__hash_normalized__";
33
+
34
+ async function sha256Hex(input: string): Promise<string> {
35
+ const digest = await crypto.subtle.digest("SHA-256", textEncoder.encode(input));
36
+ const bytes = new Uint8Array(digest);
37
+ let hex = "";
38
+ for (let i = 0; i < bytes.length; i++) {
39
+ hex += bytes[i]!.toString(16).padStart(2, "0");
40
+ }
41
+ return hex;
42
+ }
43
+
44
+ /**
45
+ * Produces a copy of `doc` with session-birth metadata replaced by fixed
46
+ * sentinel values. Keeps the hash stable across two sequential
47
+ * `prerenderDocument` calls on identical bytes (both calls set
48
+ * `createdAt`/`updatedAt` from `Date.now()` so would otherwise diverge).
49
+ */
50
+ function normalizeForHashing(doc: CanonicalDocument): CanonicalDocument {
51
+ return {
52
+ ...doc,
53
+ docId: NORMALIZED_SENTINEL as CanonicalDocument["docId"],
54
+ createdAt: NORMALIZED_SENTINEL as CanonicalDocument["createdAt"],
55
+ updatedAt: NORMALIZED_SENTINEL as CanonicalDocument["updatedAt"],
56
+ };
57
+ }
58
+
59
+ export async function computeCanonicalDocumentHash(
60
+ doc: CanonicalDocument,
61
+ ): Promise<string> {
62
+ return sha256Hex(stableStringify(normalizeForHashing(doc)));
63
+ }
@@ -0,0 +1,211 @@
1
+ import type { OpcPackage } from "../../io/opc/package-reader.ts";
2
+ import type { OpcPackagePart } from "../../io/ooxml/part-manifest.ts";
3
+ import {
4
+ WORKFLOW_PAYLOAD_PART_PATH,
5
+ buildEditorStateXml,
6
+ parseEditorStateXml,
7
+ parseWorkflowPayloadEnvelopeFromPackage,
8
+ type EditorStatePayload,
9
+ } from "../../io/ooxml/workflow-payload.ts";
10
+ import {
11
+ LAYCACHE_SCHEMA_VERSION,
12
+ LAYOUT_ENGINE_VERSION,
13
+ } from "../layout/layout-engine-version.ts";
14
+ import type { CacheEnvelope } from "./cache-envelope.ts";
15
+
16
+ /**
17
+ * L7 Phase 2.5 Plan B B.4 — customXml read/write for the prerender cache.
18
+ *
19
+ * Persists the cache envelope inside the docx's workflow-payload part
20
+ * (`/customXml/item1.xml`) under the "laycache" namespace, so the cache
21
+ * travels with the file and hands a warm-start experience to any client
22
+ * that opens the same bytes.
23
+ *
24
+ * **Routing decision:** laycache does NOT extend `EditorStateNamespace`.
25
+ * The entry round-trips through the existing `unknownNamespaces`
26
+ * preservation path, which already provides:
27
+ * - Automatic Word round-trip (Word doesn't touch customXml parts).
28
+ * - Automatic runtime save-path preservation (the save path appends
29
+ * `channel.getUnknownEntries()` verbatim).
30
+ * - Separation of cache concerns from runtime subsystem state (laycache
31
+ * is not conflated with hostAnnotations/workflowOverlay/etc.).
32
+ *
33
+ * Tradeoff: the write path hand-builds a `<bw:namespace name="laycache">`
34
+ * XML fragment rather than leveraging `buildEditorStateXml`'s entry
35
+ * serializer. Six lines of escaped string concatenation in exchange for
36
+ * the above.
37
+ *
38
+ * **Scope (MVP):** Write-side requires the docx to already have a
39
+ * workflow-payload part (`/customXml/item1.xml`). Docs without it return
40
+ * `{ written: false, reason: "no-customxml-part" }` so the caller can
41
+ * fall back to IndexedDB-only caching. Fresh/minimal docs that lack a
42
+ * payload part are already fast-opening; Plan B is targeted at CCEP-scale
43
+ * templates which reliably carry the part.
44
+ */
45
+
46
+ export const LAYCACHE_NAMESPACE_NAME = "laycache" as const;
47
+ const LAYCACHE_ENTRY_SCHEMA_VERSION = `laycache/${LAYCACHE_SCHEMA_VERSION}`;
48
+
49
+ export type WriteEnvelopeResult =
50
+ | { written: true }
51
+ | { written: false; reason: "no-customxml-part" };
52
+
53
+ /**
54
+ * Writes (or replaces) the laycache envelope inside the OPC package's
55
+ * workflow-payload part. Mutates `opcPackage.parts` in place.
56
+ *
57
+ * Preserves all other namespaces (`hostAnnotations`, `workflowOverlay`,
58
+ * etc.) verbatim. Returns `{ written: false, reason: "no-customxml-part" }`
59
+ * when the package has no workflow-payload part — caller falls back to
60
+ * IndexedDB caching in that case.
61
+ */
62
+ export function writeEnvelopeToOpcPackage(
63
+ opcPackage: OpcPackage,
64
+ envelope: CacheEnvelope,
65
+ ): WriteEnvelopeResult {
66
+ const existingPart = opcPackage.parts.get(WORKFLOW_PAYLOAD_PART_PATH);
67
+ if (!existingPart) {
68
+ return { written: false, reason: "no-customxml-part" };
69
+ }
70
+
71
+ const existingXml = new TextDecoder().decode(existingPart.bytes);
72
+ const existingEditorState: EditorStatePayload = parseEditorStateXml(existingXml) ?? {
73
+ entries: [],
74
+ };
75
+
76
+ // Remove any previous laycache entry; keep every other unknown namespace.
77
+ const preservedUnknowns = (existingEditorState.unknownNamespaces ?? []).filter(
78
+ (ns) => ns.name !== LAYCACHE_NAMESPACE_NAME,
79
+ );
80
+
81
+ const laycacheRawXml = buildLaycacheNamespaceXml(envelope);
82
+
83
+ const nextEditorState: EditorStatePayload = {
84
+ entries: existingEditorState.entries,
85
+ unknownNamespaces: [
86
+ ...preservedUnknowns,
87
+ { name: LAYCACHE_NAMESPACE_NAME, rawXml: laycacheRawXml },
88
+ ],
89
+ };
90
+
91
+ const newEditorStateBlock = buildEditorStateXml(nextEditorState);
92
+ const newXml = spliceEditorStateIntoPayloadXml(existingXml, newEditorStateBlock);
93
+ const newBytes = new TextEncoder().encode(newXml);
94
+
95
+ // Note: crc32 is left at the previous value. `writeOpcPackage` recomputes
96
+ // the CRC from uncompressedBytes at zip time (`package-writer.ts:156`), so
97
+ // the field is effectively documentation from the reader — not consumed by
98
+ // the writer.
99
+ const nextPart: OpcPackagePart = {
100
+ ...existingPart,
101
+ bytes: newBytes,
102
+ };
103
+ opcPackage.parts.set(WORKFLOW_PAYLOAD_PART_PATH, nextPart);
104
+ return { written: true };
105
+ }
106
+
107
+ /**
108
+ * Reads and validates the laycache envelope from an already-parsed OPC
109
+ * package. Returns `null` on any validation failure:
110
+ * - customXml part missing
111
+ * - no laycache entry in `unknownNamespaces`
112
+ * - JSON parse failure
113
+ * - envelope.schemaVersion mismatch (LAYCACHE_SCHEMA_VERSION)
114
+ * - envelope.engineVersion mismatch (LAYOUT_ENGINE_VERSION)
115
+ * - required fields missing
116
+ */
117
+ export function readEnvelopeFromOpcPackage(opcPackage: OpcPackage): CacheEnvelope | null {
118
+ const envelope = parseWorkflowPayloadEnvelopeFromPackage(opcPackage);
119
+ if (!envelope?.editorState) return null;
120
+
121
+ const laycacheUnknown = (envelope.editorState.unknownNamespaces ?? []).find(
122
+ (ns) => ns.name === LAYCACHE_NAMESPACE_NAME,
123
+ );
124
+ if (!laycacheUnknown) return null;
125
+
126
+ const inlineJson = extractInlineCdata(laycacheUnknown.rawXml);
127
+ if (inlineJson === null) return null;
128
+
129
+ let parsed: unknown;
130
+ try {
131
+ parsed = JSON.parse(inlineJson);
132
+ } catch {
133
+ return null;
134
+ }
135
+
136
+ if (!isValidCacheEnvelope(parsed)) return null;
137
+ return parsed;
138
+ }
139
+
140
+ // ---------------------------------------------------------------------------
141
+ // Helpers
142
+ // ---------------------------------------------------------------------------
143
+
144
+ function buildLaycacheNamespaceXml(envelope: CacheEnvelope): string {
145
+ const json = JSON.stringify(envelope).replace(/\]\]>/g, "]]]]><![CDATA[>");
146
+ return (
147
+ `<bw:namespace name="${LAYCACHE_NAMESPACE_NAME}" schemaVersion="${LAYCACHE_ENTRY_SCHEMA_VERSION}">` +
148
+ `<bw:inline><![CDATA[${json}]]></bw:inline>` +
149
+ `</bw:namespace>`
150
+ );
151
+ }
152
+
153
+ /**
154
+ * Replaces (or inserts) the `<bw:editorState>` block inside an existing
155
+ * workflow-payload XML. Handles three cases:
156
+ * 1. Block already present — regex-replace in-place.
157
+ * 2. Block absent, `<bw:workflowPayload>…</bw:workflowPayload>` present —
158
+ * insert before `</bw:workflowPayload>` and upgrade the `version`
159
+ * attribute on the opening tag to "1.2".
160
+ * 3. Neither — leave input unchanged (defensive; parseEditorStateXml
161
+ * returned undefined, and if the outer structure is also absent we
162
+ * cannot safely synthesize one).
163
+ *
164
+ * Note: the outer element is `<bw:workflowPayload version="...">` (per
165
+ * `buildPayloadXml` at workflow-payload.ts:526), not `<bw:root>`. The
166
+ * `version` attribute drives the schema-version gate (1.0 / 1.1 / 1.2).
167
+ */
168
+ function spliceEditorStateIntoPayloadXml(xml: string, editorStateBlock: string): string {
169
+ const existingBlockRe = /<bw:editorState\b[^>]*>[\s\S]*?<\/bw:editorState>/u;
170
+ if (existingBlockRe.test(xml)) {
171
+ return xml.replace(existingBlockRe, editorStateBlock);
172
+ }
173
+
174
+ const rootCloseRe = /<\/bw:workflowPayload>/u;
175
+ if (rootCloseRe.test(xml)) {
176
+ const upgraded = xml.replace(
177
+ /(<bw:workflowPayload\b[^>]*?\bversion=")([^"]*)(")/u,
178
+ (_m, prefix: string, _v: string, suffix: string) => `${prefix}1.2${suffix}`,
179
+ );
180
+ return upgraded.replace(rootCloseRe, `${editorStateBlock}\n</bw:workflowPayload>`);
181
+ }
182
+
183
+ return xml;
184
+ }
185
+
186
+ function extractInlineCdata(rawXml: string): string | null {
187
+ const m = rawXml.match(/<bw:inline\b[^>]*>([\s\S]*?)<\/bw:inline>/u);
188
+ if (!m) return null;
189
+ const inner = m[1] ?? "";
190
+ const cdata = inner.replace(/<!\[CDATA\[|\]\]>/g, "").trim();
191
+ return cdata.length > 0 ? cdata : null;
192
+ }
193
+
194
+ function isValidCacheEnvelope(value: unknown): value is CacheEnvelope {
195
+ if (typeof value !== "object" || value === null) return false;
196
+ const v = value as Record<string, unknown>;
197
+ return (
198
+ v.schemaVersion === LAYCACHE_SCHEMA_VERSION &&
199
+ v.engineVersion === LAYOUT_ENGINE_VERSION &&
200
+ typeof v.fontFingerprint === "string" &&
201
+ typeof v.structuralHash === "string" &&
202
+ typeof v.canonicalDocumentHash === "string" &&
203
+ typeof v.graph === "object" &&
204
+ v.graph !== null &&
205
+ typeof v.surface === "object" &&
206
+ v.surface !== null &&
207
+ typeof v.canonicalDocument === "object" &&
208
+ v.canonicalDocument !== null
209
+ );
210
+ }
211
+
@@ -0,0 +1,78 @@
1
+ import type { OpcPackage } from "../../io/opc/package-reader.ts";
2
+ import { readOpcPackage } from "../../io/opc/package-reader.ts";
3
+ import { parseBlockStructure } from "../../io/ooxml/parse-block-structure.ts";
4
+ import type { CacheEnvelope } from "./cache-envelope.ts";
5
+ import { computeStructuralHash } from "./cache-key.ts";
6
+ import { readEnvelopeFromOpcPackage } from "./customxml-cache.ts";
7
+
8
+ /**
9
+ * L7 Phase 2.5 Plan B B.6 — customXml probe (read-side pre-parse).
10
+ *
11
+ * Opens a docx's OPC package just enough to extract the laycache envelope
12
+ * from `/customXml/item1.xml` without parsing `word/document.xml` or
13
+ * running the canonical-document builder. Consumers use the returned
14
+ * envelope to decide whether to take the fast seeding path or fall
15
+ * through to `loadDocxEditorSessionAsync`'s full pipeline.
16
+ *
17
+ * Cost budget: ~20-50 ms on extra-large (OPC unzip 17 ms + XML regex
18
+ * extract + JSON parse + schema validation). Well below the ~584 ms of
19
+ * parse + skeleton-ready work that a successful probe skips.
20
+ *
21
+ * Null is returned for every rejection path:
22
+ * - OPC parse failure (malformed zip, missing Content_Types)
23
+ * - no workflow-payload part (`/customXml/item1.xml`)
24
+ * - no laycache entry inside the editor-state payload
25
+ * - corrupted inline JSON
26
+ * - envelope.schemaVersion or engineVersion mismatch
27
+ * - missing/mis-typed required envelope fields
28
+ *
29
+ * The returned `opcPackage` is included so callers can hand it to
30
+ * downstream code that would otherwise re-parse the ZIP from bytes
31
+ * (saving the ~17 ms OPC unzip on the cache-hit path).
32
+ *
33
+ * **B.7 structural verification.** When the probe accepts an envelope,
34
+ * it additionally runs `parseBlockStructure` against the package's
35
+ * `word/document.xml` and compares the resulting `structuralHash`
36
+ * against `envelope.structuralHash`. A mismatch means the docx has
37
+ * been edited since the envelope was written — e.g. Word added a
38
+ * paragraph and saved — and the probe returns null to force the full
39
+ * parse path. See `src/io/ooxml/parse-block-structure.ts` for the
40
+ * probe's known limitations (opaque-promoting OOXML features cannot
41
+ * be detected shallow-parse; those docs safely fall through).
42
+ */
43
+ export interface LaycacheProbeResult {
44
+ readonly envelope: CacheEnvelope;
45
+ readonly opcPackage: OpcPackage;
46
+ }
47
+
48
+ export async function tryReadLaycacheEnvelope(
49
+ input: ArrayBuffer | Uint8Array,
50
+ ): Promise<LaycacheProbeResult | null> {
51
+ const bytes = input instanceof Uint8Array ? input : new Uint8Array(input);
52
+
53
+ let opcPackage: OpcPackage;
54
+ try {
55
+ opcPackage = readOpcPackage(bytes);
56
+ } catch {
57
+ return null;
58
+ }
59
+
60
+ const envelope = readEnvelopeFromOpcPackage(opcPackage);
61
+ if (!envelope) return null;
62
+
63
+ // B.7 — shallow structural probe. Reject envelopes whose cached
64
+ // structuralHash does not match a fresh shallow-parse of the current
65
+ // document.xml. Catches the "user edited the docx in Word between
66
+ // prerender and reopen" path. The probe is conservative: it cannot
67
+ // detect opaque-promoting OOXML features (content controls, floating
68
+ // drawings, AlternateContent), so some clean-looking structural
69
+ // identity docs may still be rejected — a safe false negative.
70
+ const documentXmlPart = opcPackage.parts.get("/word/document.xml");
71
+ if (!documentXmlPart) return null;
72
+ const documentXml = new TextDecoder().decode(documentXmlPart.bytes);
73
+ const probedBlocks = parseBlockStructure(documentXml);
74
+ const probedHash = await computeStructuralHash(probedBlocks);
75
+ if (probedHash !== envelope.structuralHash) return null;
76
+
77
+ return { envelope, opcPackage };
78
+ }