@beyondwork/docx-react-component 1.0.47 → 1.0.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -11
- package/package.json +30 -41
- package/src/api/public-types.ts +199 -13
- package/src/compare/diff-engine.ts +4 -0
- package/src/core/commands/add-scope.ts +257 -0
- package/src/core/commands/formatting-commands.ts +2 -0
- package/src/core/commands/index.ts +9 -1
- package/src/core/commands/text-commands.ts +3 -1
- package/src/core/schema/text-schema.ts +95 -1
- package/src/core/selection/anchor-conversion.ts +112 -0
- package/src/core/selection/review-anchors.ts +108 -3
- package/src/core/state/text-transaction.ts +103 -7
- package/src/internal/harness-debug-ports.ts +168 -0
- package/src/io/chart-preview-resolver.ts +59 -1
- package/src/io/docx-session.ts +226 -38
- package/src/io/export/serialize-main-document.ts +46 -0
- package/src/io/export/serialize-paragraph-formatting.ts +8 -0
- package/src/io/export/serialize-run-formatting.ts +10 -1
- package/src/io/export/serialize-settings.ts +421 -0
- package/src/io/export/serialize-styles.ts +10 -0
- package/src/io/normalize/normalize-text.ts +1 -0
- package/src/io/ooxml/chart/chart-style-table.ts +543 -0
- package/src/io/ooxml/chart/color-palette.ts +101 -0
- package/src/io/ooxml/chart/compose-series-color.ts +147 -0
- package/src/io/ooxml/chart/parse-axis.ts +277 -0
- package/src/io/ooxml/chart/parse-chart-space.ts +885 -0
- package/src/io/ooxml/chart/parse-series.ts +635 -0
- package/src/io/ooxml/chart/resolve-color.ts +261 -0
- package/src/io/ooxml/chart/types.ts +439 -0
- package/src/io/ooxml/parse-block-structure.ts +99 -0
- package/src/io/ooxml/parse-complex-content.ts +90 -2
- package/src/io/ooxml/parse-main-document.ts +156 -1
- package/src/io/ooxml/parse-paragraph-formatting.ts +46 -0
- package/src/io/ooxml/parse-run-formatting.ts +49 -0
- package/src/io/ooxml/parse-scope-markers.ts +184 -0
- package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
- package/src/io/ooxml/parse-settings.ts +97 -1
- package/src/io/ooxml/parse-styles.ts +65 -0
- package/src/io/ooxml/parse-theme.ts +2 -127
- package/src/io/ooxml/property-grab-bag.ts +211 -0
- package/src/io/ooxml/xml-attr-helpers.ts +59 -1
- package/src/io/ooxml/xml-parser.ts +142 -0
- package/src/model/canonical-document.ts +160 -0
- package/src/model/scope-markers.ts +144 -0
- package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
- package/src/runtime/collab/checkpoint-election.ts +75 -0
- package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
- package/src/runtime/collab/checkpoint-store.ts +115 -0
- package/src/runtime/collab/event-types.ts +27 -0
- package/src/runtime/collab/index.ts +29 -0
- package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
- package/src/runtime/collab/runtime-collab-sync.ts +330 -0
- package/src/runtime/collab/workflow-shared.ts +247 -0
- package/src/runtime/document-locations.ts +1 -9
- package/src/runtime/document-outline.ts +1 -9
- package/src/runtime/document-runtime.ts +288 -65
- package/src/runtime/editor-surface/capabilities.ts +63 -50
- package/src/runtime/hyperlink-color-resolver.ts +119 -0
- package/src/runtime/layout/layout-engine-version.ts +8 -1
- package/src/runtime/prerender/cache-envelope.ts +19 -7
- package/src/runtime/prerender/cache-key.ts +25 -14
- package/src/runtime/prerender/canonical-document-hash.ts +63 -0
- package/src/runtime/prerender/customxml-cache.ts +211 -0
- package/src/runtime/prerender/customxml-probe.ts +78 -0
- package/src/runtime/prerender/prerender-document.ts +74 -7
- package/src/runtime/scope-resolver.ts +148 -0
- package/src/runtime/scope-tag-registry.ts +10 -0
- package/src/runtime/surface-projection.ts +102 -37
- package/src/runtime/theme-color-resolver.ts +188 -0
- package/src/runtime/workflow-markup.ts +7 -18
- package/src/ui/WordReviewEditor.tsx +48 -2
- package/src/ui/editor-runtime-boundary.ts +42 -1
- package/src/ui/headless/selection-helpers.ts +10 -23
- package/src/ui/runtime-shortcut-dispatch.ts +12 -7
- package/src/ui/unsupported-previews-policy.ts +23 -0
- package/src/ui-tailwind/chrome-overlay/tw-chrome-overlay.tsx +10 -0
- package/src/ui-tailwind/editor-surface/perf-probe.ts +1 -0
- package/src/ui-tailwind/page-stack/tw-page-stack-chrome-layer.tsx +47 -0
- package/src/ui-tailwind/page-stack/use-visible-block-range.ts +88 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +16 -1
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lane 3 V7 — hyperlink color cascade.
|
|
3
|
+
*
|
|
4
|
+
* OOXML convention: runs inside `<w:hyperlink>` inherit the `Hyperlink`
|
|
5
|
+
* character style implicitly, EVEN WHEN source XML does not declare an
|
|
6
|
+
* explicit `<w:rStyle w:val="Hyperlink"/>` on each run. Word applies the
|
|
7
|
+
* style based on the containing hyperlink element's context alone.
|
|
8
|
+
*
|
|
9
|
+
* Our existing cascade (`resolveEffectiveRunFormatting`) only applies the
|
|
10
|
+
* character-style chain when `input.characterStyleId` is populated — so
|
|
11
|
+
* runs inside hyperlinks that lacked explicit rStyle were inheriting
|
|
12
|
+
* whatever the paragraph style said (usually black body text).
|
|
13
|
+
*
|
|
14
|
+
* This module closes that gap by resolving hyperlink color via a
|
|
15
|
+
* four-tier fallback chain:
|
|
16
|
+
*
|
|
17
|
+
* 1. Direct color on the run (`colorHex !== "auto"`) — wins outright.
|
|
18
|
+
* 2. Character-style cascade — forces Hyperlink style participation.
|
|
19
|
+
* 3. Theme hlink slot (`ResolvedTheme.colors.hlink`).
|
|
20
|
+
* 4. Hardcoded Word default `#0563C1`.
|
|
21
|
+
*
|
|
22
|
+
* The resolver also honors `colorThemeSlot` + `colorThemeTint`/`colorThemeShade`
|
|
23
|
+
* from L2.c by delegating to `resolveThemeColorHex`.
|
|
24
|
+
*
|
|
25
|
+
* Contract: the returned `CanonicalRunFormatting` is the effective cascade
|
|
26
|
+
* result with `colorHex` concretized to a non-theme hex (or `"auto"`). The
|
|
27
|
+
* original `colorThemeSlot` / `colorThemeTint` / `colorThemeShade` fields
|
|
28
|
+
* are preserved on the returned object so downstream code (or re-export
|
|
29
|
+
* via the canonical document) still sees the theme reference.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import type {
|
|
33
|
+
CanonicalRunFormatting,
|
|
34
|
+
ResolvedTheme,
|
|
35
|
+
StylesCatalog,
|
|
36
|
+
} from "../model/canonical-document.ts";
|
|
37
|
+
import { resolveThemeColor } from "../io/ooxml/parse-theme.ts";
|
|
38
|
+
import { resolveThemeColorHex } from "./theme-color-resolver.ts";
|
|
39
|
+
import {
|
|
40
|
+
resolveEffectiveRunFormatting,
|
|
41
|
+
type RunResolveInput,
|
|
42
|
+
} from "./paragraph-style-resolver.ts";
|
|
43
|
+
|
|
44
|
+
export const HYPERLINK_CHARACTER_STYLE_ID = "Hyperlink";
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Microsoft Word's default hyperlink color (applied when neither the
|
|
48
|
+
* Hyperlink character style nor the theme's `hlink` slot supplies one).
|
|
49
|
+
* Matches Word 2013+ fresh-document rendering.
|
|
50
|
+
*/
|
|
51
|
+
export const DEFAULT_HYPERLINK_COLOR_HEX = "0563C1";
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Resolve effective run formatting for a hyperlink-inner run. Honors the
|
|
55
|
+
* Hyperlink character style implicitly, resolves any theme-slot color
|
|
56
|
+
* references, and applies the Word-default fallback when upstream data
|
|
57
|
+
* is absent.
|
|
58
|
+
*
|
|
59
|
+
* `input.characterStyleId` is respected when the caller already passed
|
|
60
|
+
* one (e.g., source XML had an explicit `<w:rStyle>` overriding the
|
|
61
|
+
* implicit Hyperlink). Only when it is absent does the resolver inject
|
|
62
|
+
* `"Hyperlink"` itself.
|
|
63
|
+
*/
|
|
64
|
+
export function resolveHyperlinkRunFormatting(
|
|
65
|
+
input: RunResolveInput,
|
|
66
|
+
catalog: StylesCatalog | undefined,
|
|
67
|
+
theme: ResolvedTheme | undefined,
|
|
68
|
+
): CanonicalRunFormatting {
|
|
69
|
+
// V7a — auto-apply the Hyperlink character style when the caller did
|
|
70
|
+
// not supply one (runs inside <w:hyperlink> typically lack explicit
|
|
71
|
+
// rStyle; Word applies the style by context).
|
|
72
|
+
const augmentedInput: RunResolveInput =
|
|
73
|
+
input.characterStyleId === undefined
|
|
74
|
+
? { ...input, characterStyleId: HYPERLINK_CHARACTER_STYLE_ID }
|
|
75
|
+
: input;
|
|
76
|
+
|
|
77
|
+
const cascade = resolveEffectiveRunFormatting(augmentedInput, catalog);
|
|
78
|
+
|
|
79
|
+
// V7b — concretize the color through the theme resolver + Word default.
|
|
80
|
+
const resolvedColor = resolveHyperlinkColorHex(cascade, theme);
|
|
81
|
+
if (resolvedColor && resolvedColor !== cascade.colorHex) {
|
|
82
|
+
return { ...cascade, colorHex: resolvedColor };
|
|
83
|
+
}
|
|
84
|
+
return cascade;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Four-tier hyperlink color fallback. Exported for targeted testing; use
|
|
89
|
+
* `resolveHyperlinkRunFormatting` as the primary entry point.
|
|
90
|
+
*/
|
|
91
|
+
export function resolveHyperlinkColorHex(
|
|
92
|
+
cascade: Pick<
|
|
93
|
+
CanonicalRunFormatting,
|
|
94
|
+
"colorHex" | "colorThemeSlot" | "colorThemeTint" | "colorThemeShade"
|
|
95
|
+
>,
|
|
96
|
+
theme: ResolvedTheme | undefined,
|
|
97
|
+
): string | undefined {
|
|
98
|
+
// Tier 1 — direct non-auto hex wins.
|
|
99
|
+
if (cascade.colorHex && cascade.colorHex !== "auto") {
|
|
100
|
+
return cascade.colorHex;
|
|
101
|
+
}
|
|
102
|
+
// Tier 2 — theme-slot reference from the cascade (which now includes the
|
|
103
|
+
// Hyperlink style's rPr — typically `<w:color w:themeColor="hlink"/>`).
|
|
104
|
+
if (cascade.colorThemeSlot) {
|
|
105
|
+
const viaTheme = resolveThemeColorHex(cascade, theme);
|
|
106
|
+
if (viaTheme && viaTheme !== "auto") {
|
|
107
|
+
return viaTheme;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// Tier 3 — theme hlink slot even when the cascade never wrote a slot
|
|
111
|
+
// reference. This catches docs whose Hyperlink style lacks a color
|
|
112
|
+
// declaration entirely but whose theme defines hlink.
|
|
113
|
+
const themeHlink = resolveThemeColor(theme, "hlink");
|
|
114
|
+
if (themeHlink) {
|
|
115
|
+
return themeHlink;
|
|
116
|
+
}
|
|
117
|
+
// Tier 4 — Word's hardcoded default.
|
|
118
|
+
return DEFAULT_HYPERLINK_COLOR_HEX;
|
|
119
|
+
}
|
|
@@ -55,5 +55,12 @@ export const LAYOUT_ENGINE_VERSION = 5 as const;
|
|
|
55
55
|
* 1 — initial envelope shape: { schemaVersion, engineVersion,
|
|
56
56
|
* fontFingerprint, structuralHash, graph, surface }. Ships with
|
|
57
57
|
* L7 Phase 2.5 Plan A.
|
|
58
|
+
* 2 — L7 Phase 2.5 Plan B: adds `canonicalDocument` + `canonicalDocumentHash`
|
|
59
|
+
* fields so the receiving client can skip the DOCX parse entirely on
|
|
60
|
+
* cache hit. `canonicalDocumentHash` is also a 5th input to the cache
|
|
61
|
+
* key so any state mutation (styles, metadata, comments, preservation)
|
|
62
|
+
* correctly invalidates. v1 envelopes are rejected on load under v2 —
|
|
63
|
+
* no corruption path exists because schemaVersion is the top-level
|
|
64
|
+
* discriminator.
|
|
58
65
|
*/
|
|
59
|
-
export const LAYCACHE_SCHEMA_VERSION =
|
|
66
|
+
export const LAYCACHE_SCHEMA_VERSION = 2 as const;
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import type { EditorSurfaceSnapshot } from "../../api/public-types";
|
|
2
|
+
import type { CanonicalDocument } from "../../model/canonical-document.ts";
|
|
2
3
|
import type { RuntimePageGraph } from "../layout/page-graph.ts";
|
|
3
4
|
|
|
4
5
|
/**
|
|
5
|
-
* L7 Phase 2.5
|
|
6
|
+
* L7 Phase 2.5 — prerender cache envelope shape.
|
|
6
7
|
*
|
|
7
|
-
* The envelope is the unit written to IndexedDB (Plan A) and —
|
|
8
|
-
*
|
|
9
|
-
* must agree on this shape: the prerender pipeline that populates
|
|
10
|
-
* the warm-path loader that rehydrates it.
|
|
8
|
+
* The envelope is the unit written to IndexedDB (Plan A) and — under Plan B
|
|
9
|
+
* (schema v2) — to the `laycache` customXml editor-state namespace. Two
|
|
10
|
+
* consumers must agree on this shape: the prerender pipeline that populates
|
|
11
|
+
* it, and the warm-path loader that rehydrates it.
|
|
11
12
|
*
|
|
12
13
|
* Load-time invariants checked by consumers before trusting the envelope:
|
|
13
14
|
* - `schemaVersion === LAYCACHE_SCHEMA_VERSION` — bump invalidates
|
|
@@ -15,15 +16,26 @@ import type { RuntimePageGraph } from "../layout/page-graph.ts";
|
|
|
15
16
|
* - `graph.revision === 0` — canonical marker
|
|
16
17
|
*
|
|
17
18
|
* The envelope MUST be structured-clone-safe because IndexedDB and Plan B's
|
|
18
|
-
* customXml path both rely on structured-clone semantics. Keep fields
|
|
19
|
-
* JSON-serializable primitives, plain objects, or arrays — no class
|
|
19
|
+
* customXml path both rely on structured-clone / JSON semantics. Keep fields
|
|
20
|
+
* as JSON-serializable primitives, plain objects, or arrays — no class
|
|
20
21
|
* instances, functions, or symbols.
|
|
22
|
+
*
|
|
23
|
+
* Plan B additions (schema v2):
|
|
24
|
+
* - `canonicalDocument` — the full parsed model, so the warm-path loader
|
|
25
|
+
* can skip `parseMainDocumentXml` + `createImportedCanonicalDocument` +
|
|
26
|
+
* `buildCompatibilityReport` + `createImportedSnapshot` on cache hit.
|
|
27
|
+
* Saves ~584 ms of the 976 ms cold-upload on `extra-large` CCEP.
|
|
28
|
+
* - `canonicalDocumentHash` — sha256 of sorted-keys JSON. Also the 5th
|
|
29
|
+
* input to `deriveCacheKey`, so style/metadata/comment/preservation
|
|
30
|
+
* mutations correctly invalidate the cache.
|
|
21
31
|
*/
|
|
22
32
|
export interface CacheEnvelope {
|
|
23
33
|
readonly schemaVersion: number;
|
|
24
34
|
readonly engineVersion: number;
|
|
25
35
|
readonly fontFingerprint: string;
|
|
26
36
|
readonly structuralHash: string;
|
|
37
|
+
readonly canonicalDocumentHash: string;
|
|
27
38
|
readonly graph: RuntimePageGraph;
|
|
28
39
|
readonly surface: EditorSurfaceSnapshot;
|
|
40
|
+
readonly canonicalDocument: CanonicalDocument;
|
|
29
41
|
}
|
|
@@ -1,22 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* L7 Phase 2.5
|
|
2
|
+
* L7 Phase 2.5 — prerender cache-key derivation.
|
|
3
3
|
*
|
|
4
4
|
* The cache key is the composite identity the IndexedDB (Plan A) and
|
|
5
5
|
* customXml (Plan B) backends index on. It has five inputs:
|
|
6
6
|
*
|
|
7
|
-
* 1. structuralHash(blocks)
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* 2. fontFingerprint
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
7
|
+
* 1. structuralHash(blocks) — sha256 of the ordered kind:blockId list.
|
|
8
|
+
* Stable across text-only edits; changes on
|
|
9
|
+
* insert/delete/reorder because blockIds are
|
|
10
|
+
* kind-counter pairs (paragraph-5 stays 5
|
|
11
|
+
* under typing, shifts to paragraph-6 after
|
|
12
|
+
* an insert).
|
|
13
|
+
* 2. fontFingerprint — identifies the measurement-backend + font-
|
|
14
|
+
* metric source. "empirical-backend" in
|
|
15
|
+
* Plan A; a real font-derived string after
|
|
16
|
+
* Phase 8.
|
|
17
|
+
* 3. engineVersion — LAYOUT_ENGINE_VERSION from src/runtime/
|
|
18
|
+
* layout/layout-engine-version.ts. Bumped by
|
|
19
|
+
* CI gate on any layout/render shape change.
|
|
20
|
+
* 4. schemaVersion — LAYCACHE_SCHEMA_VERSION for envelope
|
|
21
|
+
* format.
|
|
22
|
+
* 5. canonicalDocumentHash — (Plan B, schema v2) sha256 of sorted-keys
|
|
23
|
+
* JSON of the CanonicalDocument. Catches
|
|
24
|
+
* non-structural mutations (styles,
|
|
25
|
+
* metadata, comments, preservation) that
|
|
26
|
+
* `structuralHash` alone misses. Computed
|
|
27
|
+
* via `computeCanonicalDocumentHash()` from
|
|
28
|
+
* `./canonical-document-hash.ts`.
|
|
20
29
|
*
|
|
21
30
|
* Returns a 64-char lower-case hex digest. Uses the Web Crypto API
|
|
22
31
|
* (globalThis.crypto.subtle), available in Node 18+ and all target browsers —
|
|
@@ -33,6 +42,7 @@ export interface CacheKeyInputs {
|
|
|
33
42
|
readonly fontFingerprint: string;
|
|
34
43
|
readonly engineVersion: string | number;
|
|
35
44
|
readonly schemaVersion: number;
|
|
45
|
+
readonly canonicalDocumentHash: string;
|
|
36
46
|
}
|
|
37
47
|
|
|
38
48
|
const BLOCK_SEPARATOR = "\u0000";
|
|
@@ -61,6 +71,7 @@ export async function deriveCacheKey(inputs: CacheKeyInputs): Promise<string> {
|
|
|
61
71
|
inputs.fontFingerprint,
|
|
62
72
|
String(inputs.engineVersion),
|
|
63
73
|
String(inputs.schemaVersion),
|
|
74
|
+
inputs.canonicalDocumentHash,
|
|
64
75
|
].join(FIELD_SEPARATOR);
|
|
65
76
|
return sha256Hex(composite);
|
|
66
77
|
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import type { CanonicalDocument } from "../../model/canonical-document.ts";
|
|
2
|
+
import { stableStringify } from "../../model/cds-1.0.0.ts";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* L7 Phase 2.5 Plan B — deterministic hash of a CanonicalDocument.
|
|
6
|
+
*
|
|
7
|
+
* Used as the fifth input to `deriveCacheKey` so non-structural mutations
|
|
8
|
+
* (style edits, metadata changes, comment/revision edits, preservation
|
|
9
|
+
* fragment updates) correctly invalidate the cache. `structuralHash`
|
|
10
|
+
* alone — which keys on the block-id list — misses these because the
|
|
11
|
+
* block structure is unchanged by such mutations.
|
|
12
|
+
*
|
|
13
|
+
* Determinism: uses `stableStringify` from `cds-1.0.0.ts` (the same
|
|
14
|
+
* ordering the canonical-document model already uses for equality
|
|
15
|
+
* comparison), so two runs on structurally-identical documents produce
|
|
16
|
+
* byte-identical JSON → byte-identical SHA-256. Cross-process / cross-
|
|
17
|
+
* machine agreement holds as long as the CanonicalDocument shape matches.
|
|
18
|
+
*
|
|
19
|
+
* **Session-birth metadata is excluded from the hash** — `createdAt` and
|
|
20
|
+
* `updatedAt` are set to `new Date().toISOString()` at session load (see
|
|
21
|
+
* `docx-session.ts`), and `docId` derives from a runtime-allocated UUID
|
|
22
|
+
* when the host does not pin one. These fields are not document identity
|
|
23
|
+
* for cache-validity purposes: a save-and-reload should hit the cache if
|
|
24
|
+
* the document content is unchanged, even though `updatedAt` shifted.
|
|
25
|
+
*
|
|
26
|
+
* Cost budget: <50 ms on `extra-large` CCEP (~250 KB canonical). Dominated
|
|
27
|
+
* by `JSON.stringify` + Web Crypto SHA-256; the key-sort pass inside
|
|
28
|
+
* `stableStringify` is a single depth-first walk.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
const textEncoder = new TextEncoder();
|
|
32
|
+
const NORMALIZED_SENTINEL = "__hash_normalized__";
|
|
33
|
+
|
|
34
|
+
async function sha256Hex(input: string): Promise<string> {
|
|
35
|
+
const digest = await crypto.subtle.digest("SHA-256", textEncoder.encode(input));
|
|
36
|
+
const bytes = new Uint8Array(digest);
|
|
37
|
+
let hex = "";
|
|
38
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
39
|
+
hex += bytes[i]!.toString(16).padStart(2, "0");
|
|
40
|
+
}
|
|
41
|
+
return hex;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Produces a copy of `doc` with session-birth metadata replaced by fixed
|
|
46
|
+
* sentinel values. Keeps the hash stable across two sequential
|
|
47
|
+
* `prerenderDocument` calls on identical bytes (both calls set
|
|
48
|
+
* `createdAt`/`updatedAt` from `Date.now()` so would otherwise diverge).
|
|
49
|
+
*/
|
|
50
|
+
function normalizeForHashing(doc: CanonicalDocument): CanonicalDocument {
|
|
51
|
+
return {
|
|
52
|
+
...doc,
|
|
53
|
+
docId: NORMALIZED_SENTINEL as CanonicalDocument["docId"],
|
|
54
|
+
createdAt: NORMALIZED_SENTINEL as CanonicalDocument["createdAt"],
|
|
55
|
+
updatedAt: NORMALIZED_SENTINEL as CanonicalDocument["updatedAt"],
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export async function computeCanonicalDocumentHash(
|
|
60
|
+
doc: CanonicalDocument,
|
|
61
|
+
): Promise<string> {
|
|
62
|
+
return sha256Hex(stableStringify(normalizeForHashing(doc)));
|
|
63
|
+
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import type { OpcPackage } from "../../io/opc/package-reader.ts";
|
|
2
|
+
import type { OpcPackagePart } from "../../io/ooxml/part-manifest.ts";
|
|
3
|
+
import {
|
|
4
|
+
WORKFLOW_PAYLOAD_PART_PATH,
|
|
5
|
+
buildEditorStateXml,
|
|
6
|
+
parseEditorStateXml,
|
|
7
|
+
parseWorkflowPayloadEnvelopeFromPackage,
|
|
8
|
+
type EditorStatePayload,
|
|
9
|
+
} from "../../io/ooxml/workflow-payload.ts";
|
|
10
|
+
import {
|
|
11
|
+
LAYCACHE_SCHEMA_VERSION,
|
|
12
|
+
LAYOUT_ENGINE_VERSION,
|
|
13
|
+
} from "../layout/layout-engine-version.ts";
|
|
14
|
+
import type { CacheEnvelope } from "./cache-envelope.ts";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* L7 Phase 2.5 Plan B B.4 — customXml read/write for the prerender cache.
|
|
18
|
+
*
|
|
19
|
+
* Persists the cache envelope inside the docx's workflow-payload part
|
|
20
|
+
* (`/customXml/item1.xml`) under the "laycache" namespace, so the cache
|
|
21
|
+
* travels with the file and hands a warm-start experience to any client
|
|
22
|
+
* that opens the same bytes.
|
|
23
|
+
*
|
|
24
|
+
* **Routing decision:** laycache does NOT extend `EditorStateNamespace`.
|
|
25
|
+
* The entry round-trips through the existing `unknownNamespaces`
|
|
26
|
+
* preservation path, which already provides:
|
|
27
|
+
* - Automatic Word round-trip (Word doesn't touch customXml parts).
|
|
28
|
+
* - Automatic runtime save-path preservation (the save path appends
|
|
29
|
+
* `channel.getUnknownEntries()` verbatim).
|
|
30
|
+
* - Separation of cache concerns from runtime subsystem state (laycache
|
|
31
|
+
* is not conflated with hostAnnotations/workflowOverlay/etc.).
|
|
32
|
+
*
|
|
33
|
+
* Tradeoff: the write path hand-builds a `<bw:namespace name="laycache">`
|
|
34
|
+
* XML fragment rather than leveraging `buildEditorStateXml`'s entry
|
|
35
|
+
* serializer. Six lines of escaped string concatenation in exchange for
|
|
36
|
+
* the above.
|
|
37
|
+
*
|
|
38
|
+
* **Scope (MVP):** Write-side requires the docx to already have a
|
|
39
|
+
* workflow-payload part (`/customXml/item1.xml`). Docs without it return
|
|
40
|
+
* `{ written: false, reason: "no-customxml-part" }` so the caller can
|
|
41
|
+
* fall back to IndexedDB-only caching. Fresh/minimal docs that lack a
|
|
42
|
+
* payload part are already fast-opening; Plan B is targeted at CCEP-scale
|
|
43
|
+
* templates which reliably carry the part.
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
export const LAYCACHE_NAMESPACE_NAME = "laycache" as const;
|
|
47
|
+
const LAYCACHE_ENTRY_SCHEMA_VERSION = `laycache/${LAYCACHE_SCHEMA_VERSION}`;
|
|
48
|
+
|
|
49
|
+
export type WriteEnvelopeResult =
|
|
50
|
+
| { written: true }
|
|
51
|
+
| { written: false; reason: "no-customxml-part" };
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Writes (or replaces) the laycache envelope inside the OPC package's
|
|
55
|
+
* workflow-payload part. Mutates `opcPackage.parts` in place.
|
|
56
|
+
*
|
|
57
|
+
* Preserves all other namespaces (`hostAnnotations`, `workflowOverlay`,
|
|
58
|
+
* etc.) verbatim. Returns `{ written: false, reason: "no-customxml-part" }`
|
|
59
|
+
* when the package has no workflow-payload part — caller falls back to
|
|
60
|
+
* IndexedDB caching in that case.
|
|
61
|
+
*/
|
|
62
|
+
export function writeEnvelopeToOpcPackage(
|
|
63
|
+
opcPackage: OpcPackage,
|
|
64
|
+
envelope: CacheEnvelope,
|
|
65
|
+
): WriteEnvelopeResult {
|
|
66
|
+
const existingPart = opcPackage.parts.get(WORKFLOW_PAYLOAD_PART_PATH);
|
|
67
|
+
if (!existingPart) {
|
|
68
|
+
return { written: false, reason: "no-customxml-part" };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const existingXml = new TextDecoder().decode(existingPart.bytes);
|
|
72
|
+
const existingEditorState: EditorStatePayload = parseEditorStateXml(existingXml) ?? {
|
|
73
|
+
entries: [],
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
// Remove any previous laycache entry; keep every other unknown namespace.
|
|
77
|
+
const preservedUnknowns = (existingEditorState.unknownNamespaces ?? []).filter(
|
|
78
|
+
(ns) => ns.name !== LAYCACHE_NAMESPACE_NAME,
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
const laycacheRawXml = buildLaycacheNamespaceXml(envelope);
|
|
82
|
+
|
|
83
|
+
const nextEditorState: EditorStatePayload = {
|
|
84
|
+
entries: existingEditorState.entries,
|
|
85
|
+
unknownNamespaces: [
|
|
86
|
+
...preservedUnknowns,
|
|
87
|
+
{ name: LAYCACHE_NAMESPACE_NAME, rawXml: laycacheRawXml },
|
|
88
|
+
],
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const newEditorStateBlock = buildEditorStateXml(nextEditorState);
|
|
92
|
+
const newXml = spliceEditorStateIntoPayloadXml(existingXml, newEditorStateBlock);
|
|
93
|
+
const newBytes = new TextEncoder().encode(newXml);
|
|
94
|
+
|
|
95
|
+
// Note: crc32 is left at the previous value. `writeOpcPackage` recomputes
|
|
96
|
+
// the CRC from uncompressedBytes at zip time (`package-writer.ts:156`), so
|
|
97
|
+
// the field is effectively documentation from the reader — not consumed by
|
|
98
|
+
// the writer.
|
|
99
|
+
const nextPart: OpcPackagePart = {
|
|
100
|
+
...existingPart,
|
|
101
|
+
bytes: newBytes,
|
|
102
|
+
};
|
|
103
|
+
opcPackage.parts.set(WORKFLOW_PAYLOAD_PART_PATH, nextPart);
|
|
104
|
+
return { written: true };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Reads and validates the laycache envelope from an already-parsed OPC
|
|
109
|
+
* package. Returns `null` on any validation failure:
|
|
110
|
+
* - customXml part missing
|
|
111
|
+
* - no laycache entry in `unknownNamespaces`
|
|
112
|
+
* - JSON parse failure
|
|
113
|
+
* - envelope.schemaVersion mismatch (LAYCACHE_SCHEMA_VERSION)
|
|
114
|
+
* - envelope.engineVersion mismatch (LAYOUT_ENGINE_VERSION)
|
|
115
|
+
* - required fields missing
|
|
116
|
+
*/
|
|
117
|
+
export function readEnvelopeFromOpcPackage(opcPackage: OpcPackage): CacheEnvelope | null {
|
|
118
|
+
const envelope = parseWorkflowPayloadEnvelopeFromPackage(opcPackage);
|
|
119
|
+
if (!envelope?.editorState) return null;
|
|
120
|
+
|
|
121
|
+
const laycacheUnknown = (envelope.editorState.unknownNamespaces ?? []).find(
|
|
122
|
+
(ns) => ns.name === LAYCACHE_NAMESPACE_NAME,
|
|
123
|
+
);
|
|
124
|
+
if (!laycacheUnknown) return null;
|
|
125
|
+
|
|
126
|
+
const inlineJson = extractInlineCdata(laycacheUnknown.rawXml);
|
|
127
|
+
if (inlineJson === null) return null;
|
|
128
|
+
|
|
129
|
+
let parsed: unknown;
|
|
130
|
+
try {
|
|
131
|
+
parsed = JSON.parse(inlineJson);
|
|
132
|
+
} catch {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (!isValidCacheEnvelope(parsed)) return null;
|
|
137
|
+
return parsed;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
// Helpers
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
function buildLaycacheNamespaceXml(envelope: CacheEnvelope): string {
|
|
145
|
+
const json = JSON.stringify(envelope).replace(/\]\]>/g, "]]]]><![CDATA[>");
|
|
146
|
+
return (
|
|
147
|
+
`<bw:namespace name="${LAYCACHE_NAMESPACE_NAME}" schemaVersion="${LAYCACHE_ENTRY_SCHEMA_VERSION}">` +
|
|
148
|
+
`<bw:inline><![CDATA[${json}]]></bw:inline>` +
|
|
149
|
+
`</bw:namespace>`
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Replaces (or inserts) the `<bw:editorState>` block inside an existing
|
|
155
|
+
* workflow-payload XML. Handles three cases:
|
|
156
|
+
* 1. Block already present — regex-replace in-place.
|
|
157
|
+
* 2. Block absent, `<bw:workflowPayload>…</bw:workflowPayload>` present —
|
|
158
|
+
* insert before `</bw:workflowPayload>` and upgrade the `version`
|
|
159
|
+
* attribute on the opening tag to "1.2".
|
|
160
|
+
* 3. Neither — leave input unchanged (defensive; parseEditorStateXml
|
|
161
|
+
* returned undefined, and if the outer structure is also absent we
|
|
162
|
+
* cannot safely synthesize one).
|
|
163
|
+
*
|
|
164
|
+
* Note: the outer element is `<bw:workflowPayload version="...">` (per
|
|
165
|
+
* `buildPayloadXml` at workflow-payload.ts:526), not `<bw:root>`. The
|
|
166
|
+
* `version` attribute drives the schema-version gate (1.0 / 1.1 / 1.2).
|
|
167
|
+
*/
|
|
168
|
+
function spliceEditorStateIntoPayloadXml(xml: string, editorStateBlock: string): string {
|
|
169
|
+
const existingBlockRe = /<bw:editorState\b[^>]*>[\s\S]*?<\/bw:editorState>/u;
|
|
170
|
+
if (existingBlockRe.test(xml)) {
|
|
171
|
+
return xml.replace(existingBlockRe, editorStateBlock);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const rootCloseRe = /<\/bw:workflowPayload>/u;
|
|
175
|
+
if (rootCloseRe.test(xml)) {
|
|
176
|
+
const upgraded = xml.replace(
|
|
177
|
+
/(<bw:workflowPayload\b[^>]*?\bversion=")([^"]*)(")/u,
|
|
178
|
+
(_m, prefix: string, _v: string, suffix: string) => `${prefix}1.2${suffix}`,
|
|
179
|
+
);
|
|
180
|
+
return upgraded.replace(rootCloseRe, `${editorStateBlock}\n</bw:workflowPayload>`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return xml;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function extractInlineCdata(rawXml: string): string | null {
|
|
187
|
+
const m = rawXml.match(/<bw:inline\b[^>]*>([\s\S]*?)<\/bw:inline>/u);
|
|
188
|
+
if (!m) return null;
|
|
189
|
+
const inner = m[1] ?? "";
|
|
190
|
+
const cdata = inner.replace(/<!\[CDATA\[|\]\]>/g, "").trim();
|
|
191
|
+
return cdata.length > 0 ? cdata : null;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function isValidCacheEnvelope(value: unknown): value is CacheEnvelope {
|
|
195
|
+
if (typeof value !== "object" || value === null) return false;
|
|
196
|
+
const v = value as Record<string, unknown>;
|
|
197
|
+
return (
|
|
198
|
+
v.schemaVersion === LAYCACHE_SCHEMA_VERSION &&
|
|
199
|
+
v.engineVersion === LAYOUT_ENGINE_VERSION &&
|
|
200
|
+
typeof v.fontFingerprint === "string" &&
|
|
201
|
+
typeof v.structuralHash === "string" &&
|
|
202
|
+
typeof v.canonicalDocumentHash === "string" &&
|
|
203
|
+
typeof v.graph === "object" &&
|
|
204
|
+
v.graph !== null &&
|
|
205
|
+
typeof v.surface === "object" &&
|
|
206
|
+
v.surface !== null &&
|
|
207
|
+
typeof v.canonicalDocument === "object" &&
|
|
208
|
+
v.canonicalDocument !== null
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import type { OpcPackage } from "../../io/opc/package-reader.ts";
|
|
2
|
+
import { readOpcPackage } from "../../io/opc/package-reader.ts";
|
|
3
|
+
import { parseBlockStructure } from "../../io/ooxml/parse-block-structure.ts";
|
|
4
|
+
import type { CacheEnvelope } from "./cache-envelope.ts";
|
|
5
|
+
import { computeStructuralHash } from "./cache-key.ts";
|
|
6
|
+
import { readEnvelopeFromOpcPackage } from "./customxml-cache.ts";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* L7 Phase 2.5 Plan B B.6 — customXml probe (read-side pre-parse).
|
|
10
|
+
*
|
|
11
|
+
* Opens a docx's OPC package just enough to extract the laycache envelope
|
|
12
|
+
* from `/customXml/item1.xml` without parsing `word/document.xml` or
|
|
13
|
+
* running the canonical-document builder. Consumers use the returned
|
|
14
|
+
* envelope to decide whether to take the fast seeding path or fall
|
|
15
|
+
* through to `loadDocxEditorSessionAsync`'s full pipeline.
|
|
16
|
+
*
|
|
17
|
+
* Cost budget: ~20-50 ms on extra-large (OPC unzip 17 ms + XML regex
|
|
18
|
+
* extract + JSON parse + schema validation). Well below the ~584 ms of
|
|
19
|
+
* parse + skeleton-ready work that a successful probe skips.
|
|
20
|
+
*
|
|
21
|
+
* Null is returned for every rejection path:
|
|
22
|
+
* - OPC parse failure (malformed zip, missing Content_Types)
|
|
23
|
+
* - no workflow-payload part (`/customXml/item1.xml`)
|
|
24
|
+
* - no laycache entry inside the editor-state payload
|
|
25
|
+
* - corrupted inline JSON
|
|
26
|
+
* - envelope.schemaVersion or engineVersion mismatch
|
|
27
|
+
* - missing/mis-typed required envelope fields
|
|
28
|
+
*
|
|
29
|
+
* The returned `opcPackage` is included so callers can hand it to
|
|
30
|
+
* downstream code that would otherwise re-parse the ZIP from bytes
|
|
31
|
+
* (saving the ~17 ms OPC unzip on the cache-hit path).
|
|
32
|
+
*
|
|
33
|
+
* **B.7 structural verification.** When the probe accepts an envelope,
|
|
34
|
+
* it additionally runs `parseBlockStructure` against the package's
|
|
35
|
+
* `word/document.xml` and compares the resulting `structuralHash`
|
|
36
|
+
* against `envelope.structuralHash`. A mismatch means the docx has
|
|
37
|
+
* been edited since the envelope was written — e.g. Word added a
|
|
38
|
+
* paragraph and saved — and the probe returns null to force the full
|
|
39
|
+
* parse path. See `src/io/ooxml/parse-block-structure.ts` for the
|
|
40
|
+
* probe's known limitations (opaque-promoting OOXML features cannot
|
|
41
|
+
* be detected shallow-parse; those docs safely fall through).
|
|
42
|
+
*/
|
|
43
|
+
export interface LaycacheProbeResult {
|
|
44
|
+
readonly envelope: CacheEnvelope;
|
|
45
|
+
readonly opcPackage: OpcPackage;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function tryReadLaycacheEnvelope(
|
|
49
|
+
input: ArrayBuffer | Uint8Array,
|
|
50
|
+
): Promise<LaycacheProbeResult | null> {
|
|
51
|
+
const bytes = input instanceof Uint8Array ? input : new Uint8Array(input);
|
|
52
|
+
|
|
53
|
+
let opcPackage: OpcPackage;
|
|
54
|
+
try {
|
|
55
|
+
opcPackage = readOpcPackage(bytes);
|
|
56
|
+
} catch {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const envelope = readEnvelopeFromOpcPackage(opcPackage);
|
|
61
|
+
if (!envelope) return null;
|
|
62
|
+
|
|
63
|
+
// B.7 — shallow structural probe. Reject envelopes whose cached
|
|
64
|
+
// structuralHash does not match a fresh shallow-parse of the current
|
|
65
|
+
// document.xml. Catches the "user edited the docx in Word between
|
|
66
|
+
// prerender and reopen" path. The probe is conservative: it cannot
|
|
67
|
+
// detect opaque-promoting OOXML features (content controls, floating
|
|
68
|
+
// drawings, AlternateContent), so some clean-looking structural
|
|
69
|
+
// identity docs may still be rejected — a safe false negative.
|
|
70
|
+
const documentXmlPart = opcPackage.parts.get("/word/document.xml");
|
|
71
|
+
if (!documentXmlPart) return null;
|
|
72
|
+
const documentXml = new TextDecoder().decode(documentXmlPart.bytes);
|
|
73
|
+
const probedBlocks = parseBlockStructure(documentXml);
|
|
74
|
+
const probedHash = await computeStructuralHash(probedBlocks);
|
|
75
|
+
if (probedHash !== envelope.structuralHash) return null;
|
|
76
|
+
|
|
77
|
+
return { envelope, opcPackage };
|
|
78
|
+
}
|