@beyondwork/docx-react-component 1.0.47 → 1.0.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api/public-types.ts +115 -1
- package/src/compare/diff-engine.ts +4 -0
- package/src/core/commands/add-scope.ts +257 -0
- package/src/core/commands/formatting-commands.ts +2 -0
- package/src/core/schema/text-schema.ts +95 -1
- package/src/core/state/text-transaction.ts +17 -5
- package/src/io/chart-preview-resolver.ts +27 -0
- package/src/io/docx-session.ts +226 -38
- package/src/io/export/serialize-main-document.ts +37 -0
- package/src/io/export/serialize-settings.ts +421 -0
- package/src/io/export/serialize-styles.ts +10 -0
- package/src/io/normalize/normalize-text.ts +1 -0
- package/src/io/ooxml/chart/parse-axis.ts +277 -0
- package/src/io/ooxml/chart/parse-chart-space.ts +813 -0
- package/src/io/ooxml/chart/parse-series.ts +570 -0
- package/src/io/ooxml/chart/resolve-color.ts +251 -0
- package/src/io/ooxml/chart/types.ts +420 -0
- package/src/io/ooxml/parse-block-structure.ts +99 -0
- package/src/io/ooxml/parse-complex-content.ts +87 -2
- package/src/io/ooxml/parse-main-document.ts +115 -1
- package/src/io/ooxml/parse-scope-markers.ts +184 -0
- package/src/io/ooxml/parse-settings-blueprint.ts +349 -0
- package/src/io/ooxml/parse-settings.ts +97 -1
- package/src/io/ooxml/parse-styles.ts +65 -0
- package/src/io/ooxml/parse-theme.ts +2 -127
- package/src/io/ooxml/xml-attr-helpers.ts +59 -1
- package/src/io/ooxml/xml-parser.ts +142 -0
- package/src/model/canonical-document.ts +94 -0
- package/src/model/scope-markers.ts +144 -0
- package/src/runtime/collab/base-doc-fingerprint.ts +99 -0
- package/src/runtime/collab/checkpoint-election.ts +75 -0
- package/src/runtime/collab/checkpoint-scheduler.ts +204 -0
- package/src/runtime/collab/checkpoint-store.ts +115 -0
- package/src/runtime/collab/event-types.ts +27 -0
- package/src/runtime/collab/index.ts +22 -0
- package/src/runtime/collab/remote-cursor-awareness.ts +167 -0
- package/src/runtime/collab/runtime-collab-sync.ts +279 -0
- package/src/runtime/document-runtime.ts +214 -16
- package/src/runtime/editor-surface/capabilities.ts +63 -50
- package/src/runtime/layout/layout-engine-version.ts +8 -1
- package/src/runtime/prerender/cache-envelope.ts +19 -7
- package/src/runtime/prerender/cache-key.ts +25 -14
- package/src/runtime/prerender/canonical-document-hash.ts +63 -0
- package/src/runtime/prerender/customxml-cache.ts +211 -0
- package/src/runtime/prerender/customxml-probe.ts +78 -0
- package/src/runtime/prerender/prerender-document.ts +74 -7
- package/src/runtime/scope-resolver.ts +148 -0
- package/src/runtime/scope-tag-registry.ts +10 -0
- package/src/runtime/surface-projection.ts +8 -1
- package/src/ui/WordReviewEditor.tsx +30 -0
- package/src/ui/editor-runtime-boundary.ts +6 -1
- package/src/ui/runtime-shortcut-dispatch.ts +12 -7
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import type { OpcPackage } from "../../io/opc/package-reader.ts";
|
|
2
|
+
import type { OpcPackagePart } from "../../io/ooxml/part-manifest.ts";
|
|
3
|
+
import {
|
|
4
|
+
WORKFLOW_PAYLOAD_PART_PATH,
|
|
5
|
+
buildEditorStateXml,
|
|
6
|
+
parseEditorStateXml,
|
|
7
|
+
parseWorkflowPayloadEnvelopeFromPackage,
|
|
8
|
+
type EditorStatePayload,
|
|
9
|
+
} from "../../io/ooxml/workflow-payload.ts";
|
|
10
|
+
import {
|
|
11
|
+
LAYCACHE_SCHEMA_VERSION,
|
|
12
|
+
LAYOUT_ENGINE_VERSION,
|
|
13
|
+
} from "../layout/layout-engine-version.ts";
|
|
14
|
+
import type { CacheEnvelope } from "./cache-envelope.ts";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* L7 Phase 2.5 Plan B B.4 — customXml read/write for the prerender cache.
|
|
18
|
+
*
|
|
19
|
+
* Persists the cache envelope inside the docx's workflow-payload part
|
|
20
|
+
* (`/customXml/item1.xml`) under the "laycache" namespace, so the cache
|
|
21
|
+
* travels with the file and hands a warm-start experience to any client
|
|
22
|
+
* that opens the same bytes.
|
|
23
|
+
*
|
|
24
|
+
* **Routing decision:** laycache does NOT extend `EditorStateNamespace`.
|
|
25
|
+
* The entry round-trips through the existing `unknownNamespaces`
|
|
26
|
+
* preservation path, which already provides:
|
|
27
|
+
* - Automatic Word round-trip (Word doesn't touch customXml parts).
|
|
28
|
+
* - Automatic runtime save-path preservation (the save path appends
|
|
29
|
+
* `channel.getUnknownEntries()` verbatim).
|
|
30
|
+
* - Separation of cache concerns from runtime subsystem state (laycache
|
|
31
|
+
* is not conflated with hostAnnotations/workflowOverlay/etc.).
|
|
32
|
+
*
|
|
33
|
+
* Tradeoff: the write path hand-builds a `<bw:namespace name="laycache">`
|
|
34
|
+
* XML fragment rather than leveraging `buildEditorStateXml`'s entry
|
|
35
|
+
* serializer. Six lines of escaped string concatenation in exchange for
|
|
36
|
+
* the above.
|
|
37
|
+
*
|
|
38
|
+
* **Scope (MVP):** Write-side requires the docx to already have a
|
|
39
|
+
* workflow-payload part (`/customXml/item1.xml`). Docs without it return
|
|
40
|
+
* `{ written: false, reason: "no-customxml-part" }` so the caller can
|
|
41
|
+
* fall back to IndexedDB-only caching. Fresh/minimal docs that lack a
|
|
42
|
+
* payload part are already fast-opening; Plan B is targeted at CCEP-scale
|
|
43
|
+
* templates which reliably carry the part.
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
export const LAYCACHE_NAMESPACE_NAME = "laycache" as const;
|
|
47
|
+
const LAYCACHE_ENTRY_SCHEMA_VERSION = `laycache/${LAYCACHE_SCHEMA_VERSION}`;
|
|
48
|
+
|
|
49
|
+
export type WriteEnvelopeResult =
|
|
50
|
+
| { written: true }
|
|
51
|
+
| { written: false; reason: "no-customxml-part" };
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Writes (or replaces) the laycache envelope inside the OPC package's
|
|
55
|
+
* workflow-payload part. Mutates `opcPackage.parts` in place.
|
|
56
|
+
*
|
|
57
|
+
* Preserves all other namespaces (`hostAnnotations`, `workflowOverlay`,
|
|
58
|
+
* etc.) verbatim. Returns `{ written: false, reason: "no-customxml-part" }`
|
|
59
|
+
* when the package has no workflow-payload part — caller falls back to
|
|
60
|
+
* IndexedDB caching in that case.
|
|
61
|
+
*/
|
|
62
|
+
export function writeEnvelopeToOpcPackage(
|
|
63
|
+
opcPackage: OpcPackage,
|
|
64
|
+
envelope: CacheEnvelope,
|
|
65
|
+
): WriteEnvelopeResult {
|
|
66
|
+
const existingPart = opcPackage.parts.get(WORKFLOW_PAYLOAD_PART_PATH);
|
|
67
|
+
if (!existingPart) {
|
|
68
|
+
return { written: false, reason: "no-customxml-part" };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const existingXml = new TextDecoder().decode(existingPart.bytes);
|
|
72
|
+
const existingEditorState: EditorStatePayload = parseEditorStateXml(existingXml) ?? {
|
|
73
|
+
entries: [],
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
// Remove any previous laycache entry; keep every other unknown namespace.
|
|
77
|
+
const preservedUnknowns = (existingEditorState.unknownNamespaces ?? []).filter(
|
|
78
|
+
(ns) => ns.name !== LAYCACHE_NAMESPACE_NAME,
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
const laycacheRawXml = buildLaycacheNamespaceXml(envelope);
|
|
82
|
+
|
|
83
|
+
const nextEditorState: EditorStatePayload = {
|
|
84
|
+
entries: existingEditorState.entries,
|
|
85
|
+
unknownNamespaces: [
|
|
86
|
+
...preservedUnknowns,
|
|
87
|
+
{ name: LAYCACHE_NAMESPACE_NAME, rawXml: laycacheRawXml },
|
|
88
|
+
],
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const newEditorStateBlock = buildEditorStateXml(nextEditorState);
|
|
92
|
+
const newXml = spliceEditorStateIntoPayloadXml(existingXml, newEditorStateBlock);
|
|
93
|
+
const newBytes = new TextEncoder().encode(newXml);
|
|
94
|
+
|
|
95
|
+
// Note: crc32 is left at the previous value. `writeOpcPackage` recomputes
|
|
96
|
+
// the CRC from uncompressedBytes at zip time (`package-writer.ts:156`), so
|
|
97
|
+
// the field is effectively documentation from the reader — not consumed by
|
|
98
|
+
// the writer.
|
|
99
|
+
const nextPart: OpcPackagePart = {
|
|
100
|
+
...existingPart,
|
|
101
|
+
bytes: newBytes,
|
|
102
|
+
};
|
|
103
|
+
opcPackage.parts.set(WORKFLOW_PAYLOAD_PART_PATH, nextPart);
|
|
104
|
+
return { written: true };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Reads and validates the laycache envelope from an already-parsed OPC
|
|
109
|
+
* package. Returns `null` on any validation failure:
|
|
110
|
+
* - customXml part missing
|
|
111
|
+
* - no laycache entry in `unknownNamespaces`
|
|
112
|
+
* - JSON parse failure
|
|
113
|
+
* - envelope.schemaVersion mismatch (LAYCACHE_SCHEMA_VERSION)
|
|
114
|
+
* - envelope.engineVersion mismatch (LAYOUT_ENGINE_VERSION)
|
|
115
|
+
* - required fields missing
|
|
116
|
+
*/
|
|
117
|
+
export function readEnvelopeFromOpcPackage(opcPackage: OpcPackage): CacheEnvelope | null {
|
|
118
|
+
const envelope = parseWorkflowPayloadEnvelopeFromPackage(opcPackage);
|
|
119
|
+
if (!envelope?.editorState) return null;
|
|
120
|
+
|
|
121
|
+
const laycacheUnknown = (envelope.editorState.unknownNamespaces ?? []).find(
|
|
122
|
+
(ns) => ns.name === LAYCACHE_NAMESPACE_NAME,
|
|
123
|
+
);
|
|
124
|
+
if (!laycacheUnknown) return null;
|
|
125
|
+
|
|
126
|
+
const inlineJson = extractInlineCdata(laycacheUnknown.rawXml);
|
|
127
|
+
if (inlineJson === null) return null;
|
|
128
|
+
|
|
129
|
+
let parsed: unknown;
|
|
130
|
+
try {
|
|
131
|
+
parsed = JSON.parse(inlineJson);
|
|
132
|
+
} catch {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (!isValidCacheEnvelope(parsed)) return null;
|
|
137
|
+
return parsed;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
// Helpers
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
function buildLaycacheNamespaceXml(envelope: CacheEnvelope): string {
|
|
145
|
+
const json = JSON.stringify(envelope).replace(/\]\]>/g, "]]]]><![CDATA[>");
|
|
146
|
+
return (
|
|
147
|
+
`<bw:namespace name="${LAYCACHE_NAMESPACE_NAME}" schemaVersion="${LAYCACHE_ENTRY_SCHEMA_VERSION}">` +
|
|
148
|
+
`<bw:inline><![CDATA[${json}]]></bw:inline>` +
|
|
149
|
+
`</bw:namespace>`
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Replaces (or inserts) the `<bw:editorState>` block inside an existing
|
|
155
|
+
* workflow-payload XML. Handles three cases:
|
|
156
|
+
* 1. Block already present — regex-replace in-place.
|
|
157
|
+
* 2. Block absent, `<bw:workflowPayload>…</bw:workflowPayload>` present —
|
|
158
|
+
* insert before `</bw:workflowPayload>` and upgrade the `version`
|
|
159
|
+
* attribute on the opening tag to "1.2".
|
|
160
|
+
* 3. Neither — leave input unchanged (defensive; parseEditorStateXml
|
|
161
|
+
* returned undefined, and if the outer structure is also absent we
|
|
162
|
+
* cannot safely synthesize one).
|
|
163
|
+
*
|
|
164
|
+
* Note: the outer element is `<bw:workflowPayload version="...">` (per
|
|
165
|
+
* `buildPayloadXml` at workflow-payload.ts:526), not `<bw:root>`. The
|
|
166
|
+
* `version` attribute drives the schema-version gate (1.0 / 1.1 / 1.2).
|
|
167
|
+
*/
|
|
168
|
+
function spliceEditorStateIntoPayloadXml(xml: string, editorStateBlock: string): string {
|
|
169
|
+
const existingBlockRe = /<bw:editorState\b[^>]*>[\s\S]*?<\/bw:editorState>/u;
|
|
170
|
+
if (existingBlockRe.test(xml)) {
|
|
171
|
+
return xml.replace(existingBlockRe, editorStateBlock);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const rootCloseRe = /<\/bw:workflowPayload>/u;
|
|
175
|
+
if (rootCloseRe.test(xml)) {
|
|
176
|
+
const upgraded = xml.replace(
|
|
177
|
+
/(<bw:workflowPayload\b[^>]*?\bversion=")([^"]*)(")/u,
|
|
178
|
+
(_m, prefix: string, _v: string, suffix: string) => `${prefix}1.2${suffix}`,
|
|
179
|
+
);
|
|
180
|
+
return upgraded.replace(rootCloseRe, `${editorStateBlock}\n</bw:workflowPayload>`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return xml;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function extractInlineCdata(rawXml: string): string | null {
|
|
187
|
+
const m = rawXml.match(/<bw:inline\b[^>]*>([\s\S]*?)<\/bw:inline>/u);
|
|
188
|
+
if (!m) return null;
|
|
189
|
+
const inner = m[1] ?? "";
|
|
190
|
+
const cdata = inner.replace(/<!\[CDATA\[|\]\]>/g, "").trim();
|
|
191
|
+
return cdata.length > 0 ? cdata : null;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function isValidCacheEnvelope(value: unknown): value is CacheEnvelope {
|
|
195
|
+
if (typeof value !== "object" || value === null) return false;
|
|
196
|
+
const v = value as Record<string, unknown>;
|
|
197
|
+
return (
|
|
198
|
+
v.schemaVersion === LAYCACHE_SCHEMA_VERSION &&
|
|
199
|
+
v.engineVersion === LAYOUT_ENGINE_VERSION &&
|
|
200
|
+
typeof v.fontFingerprint === "string" &&
|
|
201
|
+
typeof v.structuralHash === "string" &&
|
|
202
|
+
typeof v.canonicalDocumentHash === "string" &&
|
|
203
|
+
typeof v.graph === "object" &&
|
|
204
|
+
v.graph !== null &&
|
|
205
|
+
typeof v.surface === "object" &&
|
|
206
|
+
v.surface !== null &&
|
|
207
|
+
typeof v.canonicalDocument === "object" &&
|
|
208
|
+
v.canonicalDocument !== null
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import type { OpcPackage } from "../../io/opc/package-reader.ts";
|
|
2
|
+
import { readOpcPackage } from "../../io/opc/package-reader.ts";
|
|
3
|
+
import { parseBlockStructure } from "../../io/ooxml/parse-block-structure.ts";
|
|
4
|
+
import type { CacheEnvelope } from "./cache-envelope.ts";
|
|
5
|
+
import { computeStructuralHash } from "./cache-key.ts";
|
|
6
|
+
import { readEnvelopeFromOpcPackage } from "./customxml-cache.ts";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* L7 Phase 2.5 Plan B B.6 — customXml probe (read-side pre-parse).
|
|
10
|
+
*
|
|
11
|
+
* Opens a docx's OPC package just enough to extract the laycache envelope
|
|
12
|
+
* from `/customXml/item1.xml` without parsing `word/document.xml` or
|
|
13
|
+
* running the canonical-document builder. Consumers use the returned
|
|
14
|
+
* envelope to decide whether to take the fast seeding path or fall
|
|
15
|
+
* through to `loadDocxEditorSessionAsync`'s full pipeline.
|
|
16
|
+
*
|
|
17
|
+
* Cost budget: ~20-50 ms on extra-large (OPC unzip 17 ms + XML regex
|
|
18
|
+
* extract + JSON parse + schema validation). Well below the ~584 ms of
|
|
19
|
+
* parse + skeleton-ready work that a successful probe skips.
|
|
20
|
+
*
|
|
21
|
+
* Null is returned for every rejection path:
|
|
22
|
+
* - OPC parse failure (malformed zip, missing Content_Types)
|
|
23
|
+
* - no workflow-payload part (`/customXml/item1.xml`)
|
|
24
|
+
* - no laycache entry inside the editor-state payload
|
|
25
|
+
* - corrupted inline JSON
|
|
26
|
+
* - envelope.schemaVersion or engineVersion mismatch
|
|
27
|
+
* - missing/mis-typed required envelope fields
|
|
28
|
+
*
|
|
29
|
+
* The returned `opcPackage` is included so callers can hand it to
|
|
30
|
+
* downstream code that would otherwise re-parse the ZIP from bytes
|
|
31
|
+
* (saving the ~17 ms OPC unzip on the cache-hit path).
|
|
32
|
+
*
|
|
33
|
+
* **B.7 structural verification.** When the probe accepts an envelope,
|
|
34
|
+
* it additionally runs `parseBlockStructure` against the package's
|
|
35
|
+
* `word/document.xml` and compares the resulting `structuralHash`
|
|
36
|
+
* against `envelope.structuralHash`. A mismatch means the docx has
|
|
37
|
+
* been edited since the envelope was written — e.g. Word added a
|
|
38
|
+
* paragraph and saved — and the probe returns null to force the full
|
|
39
|
+
* parse path. See `src/io/ooxml/parse-block-structure.ts` for the
|
|
40
|
+
* probe's known limitations (opaque-promoting OOXML features cannot
|
|
41
|
+
* be detected shallow-parse; those docs safely fall through).
|
|
42
|
+
*/
|
|
43
|
+
export interface LaycacheProbeResult {
|
|
44
|
+
readonly envelope: CacheEnvelope;
|
|
45
|
+
readonly opcPackage: OpcPackage;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function tryReadLaycacheEnvelope(
|
|
49
|
+
input: ArrayBuffer | Uint8Array,
|
|
50
|
+
): Promise<LaycacheProbeResult | null> {
|
|
51
|
+
const bytes = input instanceof Uint8Array ? input : new Uint8Array(input);
|
|
52
|
+
|
|
53
|
+
let opcPackage: OpcPackage;
|
|
54
|
+
try {
|
|
55
|
+
opcPackage = readOpcPackage(bytes);
|
|
56
|
+
} catch {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const envelope = readEnvelopeFromOpcPackage(opcPackage);
|
|
61
|
+
if (!envelope) return null;
|
|
62
|
+
|
|
63
|
+
// B.7 — shallow structural probe. Reject envelopes whose cached
|
|
64
|
+
// structuralHash does not match a fresh shallow-parse of the current
|
|
65
|
+
// document.xml. Catches the "user edited the docx in Word between
|
|
66
|
+
// prerender and reopen" path. The probe is conservative: it cannot
|
|
67
|
+
// detect opaque-promoting OOXML features (content controls, floating
|
|
68
|
+
// drawings, AlternateContent), so some clean-looking structural
|
|
69
|
+
// identity docs may still be rejected — a safe false negative.
|
|
70
|
+
const documentXmlPart = opcPackage.parts.get("/word/document.xml");
|
|
71
|
+
if (!documentXmlPart) return null;
|
|
72
|
+
const documentXml = new TextDecoder().decode(documentXmlPart.bytes);
|
|
73
|
+
const probedBlocks = parseBlockStructure(documentXml);
|
|
74
|
+
const probedHash = await computeStructuralHash(probedBlocks);
|
|
75
|
+
if (probedHash !== envelope.structuralHash) return null;
|
|
76
|
+
|
|
77
|
+
return { envelope, opcPackage };
|
|
78
|
+
}
|
|
@@ -2,6 +2,8 @@ import type { EditorSurfaceSnapshot } from "../../api/public-types";
|
|
|
2
2
|
import { createSelectionSnapshot } from "../../core/state/editor-state.ts";
|
|
3
3
|
import { loadDocxEditorSessionAsync } from "../../io/docx-session.ts";
|
|
4
4
|
import { createLoadScheduler } from "../../io/load-scheduler.ts";
|
|
5
|
+
import { readOpcPackage } from "../../io/opc/package-reader.ts";
|
|
6
|
+
import { writeOpcPackage } from "../../io/opc/package-writer.ts";
|
|
5
7
|
import {
|
|
6
8
|
LAYCACHE_SCHEMA_VERSION,
|
|
7
9
|
LAYOUT_ENGINE_VERSION,
|
|
@@ -14,6 +16,8 @@ import {
|
|
|
14
16
|
deriveCacheKey,
|
|
15
17
|
type CacheKeyBlock,
|
|
16
18
|
} from "./cache-key.ts";
|
|
19
|
+
import { computeCanonicalDocumentHash } from "./canonical-document-hash.ts";
|
|
20
|
+
import { writeEnvelopeToOpcPackage } from "./customxml-cache.ts";
|
|
17
21
|
import { resolveFontFingerprint } from "./font-fingerprint.ts";
|
|
18
22
|
import { canonicalizeGraph } from "./graph-canonicalize.ts";
|
|
19
23
|
|
|
@@ -46,11 +50,18 @@ import { canonicalizeGraph } from "./graph-canonicalize.ts";
|
|
|
46
50
|
export interface PrerenderOptions {
|
|
47
51
|
readonly fontFingerprint?: string;
|
|
48
52
|
/**
|
|
49
|
-
* Plan B
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
53
|
+
* Plan B — when true, prerenderDocument injects the cache envelope into
|
|
54
|
+
* the document's workflow-payload customXml part under a `laycache`
|
|
55
|
+
* unknown-namespace entry, and returns the re-serialized bytes in
|
|
56
|
+
* `docWithCustomXml`. Default: `false`, in which case `docWithCustomXml`
|
|
57
|
+
* equals the input bytes.
|
|
58
|
+
*
|
|
59
|
+
* Scope caveat: requires the input docx to already have a
|
|
60
|
+
* `/customXml/item1.xml` part. If the part is absent, the flag is a
|
|
61
|
+
* no-op (documented via `counters.persistedToCustomXml`) and the caller
|
|
62
|
+
* falls back to IndexedDB-only caching. Fresh/minimal docs that lack
|
|
63
|
+
* the part are already fast-opening; Plan B targets CCEP-scale
|
|
64
|
+
* templates which reliably carry the part.
|
|
54
65
|
*/
|
|
55
66
|
readonly persistToCustomXml?: boolean;
|
|
56
67
|
}
|
|
@@ -59,6 +70,13 @@ export interface PrerenderCounters {
|
|
|
59
70
|
readonly blockCount: number;
|
|
60
71
|
readonly pageCount: number;
|
|
61
72
|
readonly prerenderMs: number;
|
|
73
|
+
/**
|
|
74
|
+
* Plan B signal: `true` when `persistToCustomXml` was requested AND the
|
|
75
|
+
* docx had an existing workflow-payload part to mutate. `false` when
|
|
76
|
+
* the flag was off OR the docx had no such part (caller should fall
|
|
77
|
+
* back to IndexedDB caching).
|
|
78
|
+
*/
|
|
79
|
+
readonly persistedToCustomXml: boolean;
|
|
62
80
|
}
|
|
63
81
|
|
|
64
82
|
export interface PrerenderResult {
|
|
@@ -70,6 +88,15 @@ export interface PrerenderResult {
|
|
|
70
88
|
|
|
71
89
|
const PRERENDER_DOCUMENT_ID = "prerender";
|
|
72
90
|
|
|
91
|
+
/**
|
|
92
|
+
* Fixed ISO8601 timestamp used to override session-birth `createdAt` /
|
|
93
|
+
* `updatedAt` on the prerendered envelope. Epoch zero — a valid ISO8601
|
|
94
|
+
* value that downstream validators accept — replacing `Date.now()`-driven
|
|
95
|
+
* values that would otherwise defeat byte-identical `docWithCustomXml`
|
|
96
|
+
* output across two prerender calls on identical bytes.
|
|
97
|
+
*/
|
|
98
|
+
const PRERENDER_NORMALIZED_TIMESTAMP = "1970-01-01T00:00:00.000Z";
|
|
99
|
+
|
|
73
100
|
function toUint8Array(input: ArrayBuffer | Uint8Array): Uint8Array {
|
|
74
101
|
if (input instanceof Uint8Array) return input;
|
|
75
102
|
return new Uint8Array(input);
|
|
@@ -106,7 +133,20 @@ export async function prerenderDocument(
|
|
|
106
133
|
);
|
|
107
134
|
}
|
|
108
135
|
|
|
109
|
-
|
|
136
|
+
// Normalize session-birth timestamps. `loadDocxEditorSessionAsync` sets
|
|
137
|
+
// `createdAt`/`updatedAt` from `new Date().toISOString()`; without this
|
|
138
|
+
// override, two sequential prerender calls on identical bytes would
|
|
139
|
+
// produce different envelopes → different customXml bytes → determinism
|
|
140
|
+
// failure on the B.5 byte-identical gate. Using the epoch keeps the
|
|
141
|
+
// value a valid ISO8601 string (downstream validators accept it) while
|
|
142
|
+
// eliminating the only remaining source of non-determinism. The live
|
|
143
|
+
// session's updatedAt is re-populated by runtime mutations anyway, so
|
|
144
|
+
// the normalized value is irrelevant at runtime.
|
|
145
|
+
const envelope: typeof session.initialSessionState.canonicalDocument = {
|
|
146
|
+
...session.initialSessionState.canonicalDocument,
|
|
147
|
+
createdAt: PRERENDER_NORMALIZED_TIMESTAMP,
|
|
148
|
+
updatedAt: PRERENDER_NORMALIZED_TIMESTAMP,
|
|
149
|
+
};
|
|
110
150
|
const surface = createEditorSurfaceSnapshot(envelope, createSelectionSnapshot(), { kind: "main" });
|
|
111
151
|
|
|
112
152
|
const engine = createLayoutEngine({ autoUpgradeToCanvasBackend: false });
|
|
@@ -114,11 +154,17 @@ export async function prerenderDocument(
|
|
|
114
154
|
const graph = canonicalizeGraph(rawGraph);
|
|
115
155
|
|
|
116
156
|
const structuralHash = await computeStructuralHash(blocksToCacheKeyBlocks(surface));
|
|
157
|
+
// L7 Phase 2.5 Plan B B.2 — sha256 of stable-stringified canonical document.
|
|
158
|
+
// Enters the cache key as the 5th input so style / metadata / comments /
|
|
159
|
+
// preservation mutations correctly invalidate (structuralHash alone misses
|
|
160
|
+
// them because the block-id list is unchanged).
|
|
161
|
+
const canonicalDocumentHash = await computeCanonicalDocumentHash(envelope);
|
|
117
162
|
const cacheKey = await deriveCacheKey({
|
|
118
163
|
blocks: blocksToCacheKeyBlocks(surface),
|
|
119
164
|
fontFingerprint,
|
|
120
165
|
engineVersion: LAYOUT_ENGINE_VERSION,
|
|
121
166
|
schemaVersion: LAYCACHE_SCHEMA_VERSION,
|
|
167
|
+
canonicalDocumentHash,
|
|
122
168
|
});
|
|
123
169
|
|
|
124
170
|
const cacheBlob: CacheEnvelope = {
|
|
@@ -126,20 +172,41 @@ export async function prerenderDocument(
|
|
|
126
172
|
engineVersion: LAYOUT_ENGINE_VERSION,
|
|
127
173
|
fontFingerprint,
|
|
128
174
|
structuralHash,
|
|
175
|
+
canonicalDocumentHash,
|
|
129
176
|
graph,
|
|
130
177
|
surface,
|
|
178
|
+
canonicalDocument: envelope,
|
|
131
179
|
};
|
|
132
180
|
|
|
181
|
+
// Plan B B.5 — persistToCustomXml: inject the envelope into the docx's
|
|
182
|
+
// workflow-payload part. Re-parses the OPC package from bytes (~17 ms on
|
|
183
|
+
// extra-large) because `LoadedDocxEditorSession` does not expose
|
|
184
|
+
// `sourcePackage` publicly. Acceptable cost on the one-shot ingest path.
|
|
185
|
+
let docWithCustomXml = bytes;
|
|
186
|
+
let persistedToCustomXml = false;
|
|
187
|
+
if (options.persistToCustomXml === true) {
|
|
188
|
+
const opcPackage = readOpcPackage(bytes);
|
|
189
|
+
const writeResult = writeEnvelopeToOpcPackage(opcPackage, cacheBlob);
|
|
190
|
+
if (writeResult.written) {
|
|
191
|
+
docWithCustomXml = writeOpcPackage(opcPackage);
|
|
192
|
+
persistedToCustomXml = true;
|
|
193
|
+
}
|
|
194
|
+
// writeResult.written === false → docx had no customXml part; silently
|
|
195
|
+
// fall through with docWithCustomXml = input bytes. Caller observes via
|
|
196
|
+
// counters.persistedToCustomXml.
|
|
197
|
+
}
|
|
198
|
+
|
|
133
199
|
const prerenderMs = performance.now() - t0;
|
|
134
200
|
|
|
135
201
|
return {
|
|
136
|
-
docWithCustomXml
|
|
202
|
+
docWithCustomXml,
|
|
137
203
|
cacheBlob,
|
|
138
204
|
cacheKey,
|
|
139
205
|
counters: {
|
|
140
206
|
blockCount: surface.blocks.length,
|
|
141
207
|
pageCount: graph.pages.length,
|
|
142
208
|
prerenderMs,
|
|
209
|
+
persistedToCustomXml,
|
|
143
210
|
},
|
|
144
211
|
};
|
|
145
212
|
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
CanonicalDocument,
|
|
3
|
+
DocumentRootNode,
|
|
4
|
+
InlineNode,
|
|
5
|
+
ParagraphNode,
|
|
6
|
+
} from "../model/canonical-document.ts";
|
|
7
|
+
import type { CanonicalDocumentEnvelope } from "../core/state/editor-state.ts";
|
|
8
|
+
import type { EditorAnchorProjection } from "../api/public-types.ts";
|
|
9
|
+
|
|
10
|
+
export interface ResolvedScopeLocation {
|
|
11
|
+
scopeId: string;
|
|
12
|
+
startPos: number;
|
|
13
|
+
endPos: number;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function inlineLength(node: InlineNode): number {
|
|
17
|
+
switch (node.type) {
|
|
18
|
+
case "text":
|
|
19
|
+
return Array.from(node.text).length;
|
|
20
|
+
case "hyperlink":
|
|
21
|
+
case "field":
|
|
22
|
+
return node.children.reduce(
|
|
23
|
+
(total, child) => total + inlineLength(child as InlineNode),
|
|
24
|
+
0,
|
|
25
|
+
);
|
|
26
|
+
case "bookmark_start":
|
|
27
|
+
case "bookmark_end":
|
|
28
|
+
case "scope_marker_start":
|
|
29
|
+
case "scope_marker_end":
|
|
30
|
+
return 0;
|
|
31
|
+
default:
|
|
32
|
+
return 1;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function walkParagraphs(
|
|
37
|
+
document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
|
|
38
|
+
): { paragraph: ParagraphNode; from: number }[] {
|
|
39
|
+
const envelope = document as CanonicalDocumentEnvelope;
|
|
40
|
+
const root =
|
|
41
|
+
"content" in envelope
|
|
42
|
+
? (envelope.content as DocumentRootNode)
|
|
43
|
+
: (document as unknown as DocumentRootNode);
|
|
44
|
+
const out: { paragraph: ParagraphNode; from: number }[] = [];
|
|
45
|
+
let cursor = 0;
|
|
46
|
+
for (let index = 0; index < root.children.length; index += 1) {
|
|
47
|
+
const block = root.children[index];
|
|
48
|
+
if (block && block.type === "paragraph") {
|
|
49
|
+
out.push({ paragraph: block, from: cursor });
|
|
50
|
+
cursor += block.children.reduce(
|
|
51
|
+
(total, child) => total + inlineLength(child as InlineNode),
|
|
52
|
+
0,
|
|
53
|
+
);
|
|
54
|
+
} else if (block && block.type === "table") {
|
|
55
|
+
cursor += 1;
|
|
56
|
+
} else {
|
|
57
|
+
cursor += 1;
|
|
58
|
+
}
|
|
59
|
+
if (index < root.children.length - 1) {
|
|
60
|
+
cursor += 1;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return out;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Walk all paragraphs in the document and return the absolute positions of the
|
|
68
|
+
* start and end markers for each scope that has either side present. Used by
|
|
69
|
+
* `resolveScope` + `findScopeAt`; exported so test code can inspect the
|
|
70
|
+
* marker-to-position mapping directly.
|
|
71
|
+
*/
|
|
72
|
+
export function collectScopeLocations(
|
|
73
|
+
document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
|
|
74
|
+
): Map<string, { startPos?: number; endPos?: number }> {
|
|
75
|
+
const locations = new Map<string, { startPos?: number; endPos?: number }>();
|
|
76
|
+
const paragraphs = walkParagraphs(document);
|
|
77
|
+
for (const { paragraph, from } of paragraphs) {
|
|
78
|
+
let cursor = from;
|
|
79
|
+
for (const child of paragraph.children) {
|
|
80
|
+
if (child.type === "scope_marker_start") {
|
|
81
|
+
const prior = locations.get(child.scopeId) ?? {};
|
|
82
|
+
locations.set(child.scopeId, { ...prior, startPos: cursor });
|
|
83
|
+
} else if (child.type === "scope_marker_end") {
|
|
84
|
+
const prior = locations.get(child.scopeId) ?? {};
|
|
85
|
+
locations.set(child.scopeId, { ...prior, endPos: cursor });
|
|
86
|
+
}
|
|
87
|
+
cursor += inlineLength(child as InlineNode);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return locations;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Resolve a scopeId to a live public range anchor derived from the marker
|
|
95
|
+
* positions currently in the document. Returns:
|
|
96
|
+
* - A range anchor when both markers are present
|
|
97
|
+
* - A detached anchor when one or zero markers survive
|
|
98
|
+
* - `null` when neither marker is in the document
|
|
99
|
+
*/
|
|
100
|
+
export function resolveScope(
|
|
101
|
+
document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
|
|
102
|
+
scopeId: string,
|
|
103
|
+
): EditorAnchorProjection | null {
|
|
104
|
+
const locations = collectScopeLocations(document);
|
|
105
|
+
const loc = locations.get(scopeId);
|
|
106
|
+
if (!loc) return null;
|
|
107
|
+
|
|
108
|
+
if (loc.startPos !== undefined && loc.endPos !== undefined) {
|
|
109
|
+
const from = Math.min(loc.startPos, loc.endPos);
|
|
110
|
+
const to = Math.max(loc.startPos, loc.endPos);
|
|
111
|
+
return {
|
|
112
|
+
kind: "range",
|
|
113
|
+
from,
|
|
114
|
+
to,
|
|
115
|
+
assoc: { start: -1, end: 1 },
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
kind: "detached",
|
|
121
|
+
reason: "deleted",
|
|
122
|
+
lastKnownRange: {
|
|
123
|
+
from: loc.startPos ?? loc.endPos ?? 0,
|
|
124
|
+
to: loc.endPos ?? loc.startPos ?? 0,
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Given a position, return the innermost enclosing scope (by document order).
|
|
131
|
+
* Used by the chrome overlay hit-test and by the edit-dispatch guard that
|
|
132
|
+
* routes delete-through-marker to `removeScope`.
|
|
133
|
+
*/
|
|
134
|
+
export function findScopeAt(
|
|
135
|
+
document: Pick<CanonicalDocument, "content"> | CanonicalDocumentEnvelope,
|
|
136
|
+
position: number,
|
|
137
|
+
): { scopeId: string; startPos: number; endPos: number } | null {
|
|
138
|
+
const locations = collectScopeLocations(document);
|
|
139
|
+
let best: { scopeId: string; startPos: number; endPos: number } | null = null;
|
|
140
|
+
for (const [scopeId, loc] of locations) {
|
|
141
|
+
if (loc.startPos === undefined || loc.endPos === undefined) continue;
|
|
142
|
+
if (position < loc.startPos || position > loc.endPos) continue;
|
|
143
|
+
if (!best || loc.startPos > best.startPos) {
|
|
144
|
+
best = { scopeId, startPos: loc.startPos, endPos: loc.endPos };
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return best;
|
|
148
|
+
}
|
|
@@ -53,6 +53,16 @@ export const DEFAULT_REGISTRY_ENTRIES: Readonly<Record<string, ScopeTagBehavior>
|
|
|
53
53
|
trimOnDelete: true,
|
|
54
54
|
bailIfCrossed: false,
|
|
55
55
|
},
|
|
56
|
+
// S1 scope-marker anchoring. `trimOnDelete: false` is the load-bearing
|
|
57
|
+
// difference vs. `bookmark` — a delete that crosses a scope marker routes
|
|
58
|
+
// through `removeScope` instead of silently trimming the marker, so
|
|
59
|
+
// half-scope states (orphaned metadata in customXml) never appear.
|
|
60
|
+
"workflow-scope-marker": {
|
|
61
|
+
extendOnInsertLeft: true,
|
|
62
|
+
extendOnInsertRight: true,
|
|
63
|
+
trimOnDelete: false,
|
|
64
|
+
bailIfCrossed: false,
|
|
65
|
+
},
|
|
56
66
|
sdt: {
|
|
57
67
|
extendOnInsertLeft: false,
|
|
58
68
|
extendOnInsertRight: false,
|
|
@@ -1047,7 +1047,11 @@ function appendInlineSegments(
|
|
|
1047
1047
|
}
|
|
1048
1048
|
case "bookmark_start":
|
|
1049
1049
|
case "bookmark_end":
|
|
1050
|
-
|
|
1050
|
+
case "scope_marker_start":
|
|
1051
|
+
case "scope_marker_end":
|
|
1052
|
+
// Zero-width markers — no visual, no cursor advancement. Scope markers
|
|
1053
|
+
// (S1) follow the bookmark precedent: structural anchors whose positions
|
|
1054
|
+
// track with surrounding text but which don't occupy cursor positions.
|
|
1051
1055
|
return { nextCursor: start, lockedFragmentIds: [] };
|
|
1052
1056
|
default:
|
|
1053
1057
|
return { nextCursor: start + 1, lockedFragmentIds: [] };
|
|
@@ -1466,6 +1470,9 @@ function summarizePreviewInline(node: InlineNode): string {
|
|
|
1466
1470
|
return node.name ? `[Bookmark: ${node.name}]` : "[Bookmark]";
|
|
1467
1471
|
case "bookmark_end":
|
|
1468
1472
|
return "";
|
|
1473
|
+
case "scope_marker_start":
|
|
1474
|
+
case "scope_marker_end":
|
|
1475
|
+
return "";
|
|
1469
1476
|
case "image":
|
|
1470
1477
|
return node.altText ? `[Image: ${node.altText}]` : "[Image]";
|
|
1471
1478
|
case "opaque_inline":
|