@beyondwork/docx-react-component 1.0.61 → 1.0.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@beyondwork/docx-react-component",
|
|
3
3
|
"publisher": "beyondwork",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.62",
|
|
5
5
|
"description": "Embeddable React Word (docx) editor with review, comments, tracked changes, and round-trip OOXML fidelity.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"sideEffects": [
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedded-object classifier (hotfix/ole-digestibility-guard).
|
|
3
|
+
*
|
|
4
|
+
* Decides how the session layer should handle each `<w:object>` /
|
|
5
|
+
* `w:altChunk` / embedded package encountered during parse, given its
|
|
6
|
+
* ProgID, relationship type, and content-type. Three outcomes:
|
|
7
|
+
*
|
|
8
|
+
* - **digestible** — the runtime currently renders this as an inline
|
|
9
|
+
* `OleEmbedNode` with a `[Embedded object: progId]` placeholder, with
|
|
10
|
+
* no side-effects that break the editor. Parser proceeds as today.
|
|
11
|
+
*
|
|
12
|
+
* - **offloadable** — a native OOXML package (Word sub-doc, Excel
|
|
13
|
+
* workbook) that a host-side storage adapter could extract and
|
|
14
|
+
* reconstitute. In the hotfix, `offloadable` is treated as
|
|
15
|
+
* `store-only` (there is no adapter yet); the opaque-fragment path
|
|
16
|
+
* preserves bytes + XML for round-trip. Graduates when refactor/01
|
|
17
|
+
* Step 7 lands a `hostAdapter.storeEmbeddedDocument?` callback.
|
|
18
|
+
*
|
|
19
|
+
* - **store-only** — complex binary or undigestible content (PDF,
|
|
20
|
+
* legacy binary Office, package-embedded docx with icon aspect,
|
|
21
|
+
* unknown ProgIDs). The parser returns undefined from `parseObject`
|
|
22
|
+
* and the existing opaque-fragment fallback in `parse-main-document`
|
|
23
|
+
* preserves both the `<w:object>` XML and its relationship id.
|
|
24
|
+
*
|
|
25
|
+
* Policy (opt-in to store-only):
|
|
26
|
+
* - Default: **digestible** for ProgIDs the hotfix does not recognize
|
|
27
|
+
* as problematic. This preserves today's behavior for benign OLE
|
|
28
|
+
* content that has not been reported as breaking — placeholder
|
|
29
|
+
* rendering + byte-preserved round-trip. Graduating an embedding to
|
|
30
|
+
* store-only is an explicit decision made against a real-world
|
|
31
|
+
* crash report.
|
|
32
|
+
* - ProgID prefix matching: `Word.Document.*`, `Excel.*`, `PowerPoint.*`,
|
|
33
|
+
* `AcroExch.Document.*`, exact `Package` → store-only. These are the
|
|
34
|
+
* ProgIDs most likely to break the editor (either because the
|
|
35
|
+
* payload is a nested OPC package whose downstream processing can
|
|
36
|
+
* fail mid-mount, or because the binary's rendering is out of scope
|
|
37
|
+
* for v1).
|
|
38
|
+
* - Content-type override: if the relationship points at a
|
|
39
|
+
* `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
|
|
40
|
+
* (or analogous OOXML/PDF content-type), it is store-only regardless
|
|
41
|
+
* of ProgID. Catches the real-world CCEP "master agreement with
|
|
42
|
+
* sub-agreement inside" case.
|
|
43
|
+
* - Target-path extension match: if the relationship target ends in
|
|
44
|
+
* `.docx`/`.xlsx`/`.pptx`/`.pdf` (including macro-enabled and
|
|
45
|
+
* template variants), it is store-only even if ProgID + content-type
|
|
46
|
+
* were both missing. Weak signal but matches file-type intent.
|
|
47
|
+
*
|
|
48
|
+
* Why no `offloadable` return value today: the host-adapter callback
|
|
49
|
+
* pair (`storeEmbeddedDocument?` + `loadEmbeddedDocument?`) that makes
|
|
50
|
+
* `offloadable` meaningful lands as refactor/01 Step 7. Until then,
|
|
51
|
+
* what would be `offloadable` is folded into `store-only` — the
|
|
52
|
+
* opaque path preserves bytes + XML for the eventual offload.
|
|
53
|
+
*
|
|
54
|
+
* See `docs/architecture/01-package-session.md` §P8 for the full
|
|
55
|
+
* contract.
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
const STORE_ONLY_PROGID_PREFIXES: readonly string[] = [
|
|
59
|
+
// Nested Word documents — primary real-world crash source (CCEP
|
|
60
|
+
// "EU & Global IT Services Agreement.docx" type).
|
|
61
|
+
"Word.Document.",
|
|
62
|
+
"Word.DocumentMacroEnabled.",
|
|
63
|
+
"Word.Template.",
|
|
64
|
+
// Spreadsheet embeddings. Excel.Sheet.12 + Excel.Worksheet.12 are
|
|
65
|
+
// the common ProgIDs; Excel.Chart.* less so.
|
|
66
|
+
"Excel.Sheet.",
|
|
67
|
+
"Excel.SheetMacroEnabled.",
|
|
68
|
+
"Excel.SheetBinaryMacroEnabled.",
|
|
69
|
+
"Excel.Worksheet.",
|
|
70
|
+
"Excel.Chart.",
|
|
71
|
+
"Excel.ChartMacroEnabled.",
|
|
72
|
+
// Presentations.
|
|
73
|
+
"PowerPoint.Slide.",
|
|
74
|
+
"PowerPoint.SlideMacroEnabled.",
|
|
75
|
+
"PowerPoint.Show.",
|
|
76
|
+
"PowerPoint.ShowMacroEnabled.",
|
|
77
|
+
"PowerPoint.Document.",
|
|
78
|
+
"PowerPoint.Template.",
|
|
79
|
+
// PDF via Adobe Acrobat.
|
|
80
|
+
"AcroExch.Document.",
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
const STORE_ONLY_PROGID_EXACT: ReadonlySet<string> = new Set([
|
|
84
|
+
// Generic OLE container — ambiguous payload, fail closed.
|
|
85
|
+
"Package",
|
|
86
|
+
"Packager.Package",
|
|
87
|
+
]);
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Content-type patterns that force store-only regardless of ProgID.
|
|
91
|
+
* These are checked against the relationship target's content-type as
|
|
92
|
+
* declared in `[Content_Types].xml` overrides.
|
|
93
|
+
*/
|
|
94
|
+
const STORE_ONLY_CONTENT_TYPES: readonly string[] = [
|
|
95
|
+
// Package-embedded Word document (relationships/package type on a
|
|
96
|
+
// word/embeddings/*.docx part). Catches the CCEP case even if the
|
|
97
|
+
// ProgID is missing or atypical.
|
|
98
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
99
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
|
|
100
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
101
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.template",
|
|
102
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
103
|
+
"application/vnd.openxmlformats-officedocument.presentationml.template",
|
|
104
|
+
"application/pdf",
|
|
105
|
+
];
|
|
106
|
+
|
|
107
|
+
export type EmbeddingKind = "digestible" | "offloadable" | "store-only";
|
|
108
|
+
|
|
109
|
+
export interface ClassifyEmbeddingInput {
|
|
110
|
+
/** ProgID on the `<o:OLEObject>` element, if any. */
|
|
111
|
+
progId?: string;
|
|
112
|
+
/**
|
|
113
|
+
* Full OOXML relationship Type URI — e.g.
|
|
114
|
+
* `http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject`
|
|
115
|
+
* or `.../relationships/package`.
|
|
116
|
+
*/
|
|
117
|
+
relationshipType?: string;
|
|
118
|
+
/**
|
|
119
|
+
* Content-type of the embedding's target package part (from
|
|
120
|
+
* `[Content_Types].xml` override or inferred default).
|
|
121
|
+
*/
|
|
122
|
+
contentType?: string;
|
|
123
|
+
/**
|
|
124
|
+
* Relationship target path — e.g. `embeddings/Microsoft_Word_Document.docx`.
|
|
125
|
+
* Used as a weak signal when progId + contentType are both absent.
|
|
126
|
+
*/
|
|
127
|
+
targetPath?: string;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Classify an embedding. Fails closed: returns `"store-only"` when the
|
|
132
|
+
* classifier cannot prove `"digestible"`.
|
|
133
|
+
*/
|
|
134
|
+
export function classifyEmbedding(
|
|
135
|
+
input: ClassifyEmbeddingInput,
|
|
136
|
+
): EmbeddingKind {
|
|
137
|
+
const progId = input.progId?.trim() ?? "";
|
|
138
|
+
const contentType = input.contentType?.trim().toLowerCase() ?? "";
|
|
139
|
+
const targetPath = input.targetPath?.toLowerCase() ?? "";
|
|
140
|
+
|
|
141
|
+
// Exact ProgID match.
|
|
142
|
+
if (progId && STORE_ONLY_PROGID_EXACT.has(progId)) {
|
|
143
|
+
return "store-only";
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ProgID prefix match. Handles Word.Document.12, Word.Document.14,
|
|
147
|
+
// Word.DocumentMacroEnabled.12, Excel.Sheet.12, AcroExch.Document.7, etc.
|
|
148
|
+
if (progId) {
|
|
149
|
+
for (const prefix of STORE_ONLY_PROGID_PREFIXES) {
|
|
150
|
+
if (progId.startsWith(prefix)) {
|
|
151
|
+
return "store-only";
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Content-type override.
|
|
157
|
+
if (contentType) {
|
|
158
|
+
for (const ct of STORE_ONLY_CONTENT_TYPES) {
|
|
159
|
+
if (contentType === ct || contentType.startsWith(`${ct};`)) {
|
|
160
|
+
return "store-only";
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Target-path fallback when we have no progId + no content-type.
|
|
166
|
+
// A `.docx` / `.xlsx` / `.pptx` / `.pdf` extension in the embeddings
|
|
167
|
+
// folder is a strong signal of a package payload.
|
|
168
|
+
if (targetPath) {
|
|
169
|
+
const storeOnlyExtensions = [".docx", ".docm", ".dotx", ".dotm",
|
|
170
|
+
".xlsx", ".xlsm", ".xltx", ".xltm",
|
|
171
|
+
".pptx", ".pptm", ".potx", ".potm",
|
|
172
|
+
".pdf"];
|
|
173
|
+
for (const ext of storeOnlyExtensions) {
|
|
174
|
+
if (targetPath.endsWith(ext)) {
|
|
175
|
+
return "store-only";
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// No known-problematic signal matched — keep today's behavior.
|
|
181
|
+
// parseObject will construct an OleEmbedNode; render-path continues
|
|
182
|
+
// to show a placeholder for the embedding.
|
|
183
|
+
return "digestible";
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Exposed for tests. Not part of the public API.
|
|
188
|
+
*/
|
|
189
|
+
export const __internal = {
|
|
190
|
+
STORE_ONLY_PROGID_PREFIXES,
|
|
191
|
+
STORE_ONLY_PROGID_EXACT,
|
|
192
|
+
STORE_ONLY_CONTENT_TYPES,
|
|
193
|
+
} as const;
|
|
@@ -22,6 +22,7 @@ import type { OleEmbedNode } from "../../model/canonical-document.ts";
|
|
|
22
22
|
import type { OpcRelationship } from "./part-manifest.ts";
|
|
23
23
|
import type { XmlElementNode } from "./xml-element.ts";
|
|
24
24
|
import { resolveOleRelationship } from "./parse-ole-relationship.ts";
|
|
25
|
+
import { classifyEmbedding } from "./classify-embedding.ts";
|
|
25
26
|
|
|
26
27
|
/**
|
|
27
28
|
* Parse a `<w:object>` element into an `OleEmbedNode` if it contains an
|
|
@@ -64,6 +65,28 @@ export function parseObject(
|
|
|
64
65
|
return undefined;
|
|
65
66
|
}
|
|
66
67
|
|
|
68
|
+
// hotfix/ole-digestibility-guard — classify the embedding before
|
|
69
|
+
// constructing a canonical node. When the classifier returns
|
|
70
|
+
// "store-only" (nested Word docs, PDF OLE, Excel/PowerPoint
|
|
71
|
+
// embeddings, unknown ProgIDs), return undefined so the caller's
|
|
72
|
+
// existing opaque-fragment fallback preserves both <w:object> XML
|
|
73
|
+
// and its r:id verbatim. Binary preservation is unaffected —
|
|
74
|
+
// collectPreservedPackageParts indexes embedding parts by path, not
|
|
75
|
+
// by canonical-tree reference.
|
|
76
|
+
//
|
|
77
|
+
// TODO(refactor/01 Step 6-7): replace this skip-construction with
|
|
78
|
+
// extraction + offload via hostAdapter.storeEmbeddedDocument?. See
|
|
79
|
+
// docs/architecture/01-package-session.md §P8 + docs/plans/refactor/
|
|
80
|
+
// 01-package-session.md Steps 6-7.
|
|
81
|
+
const kind = classifyEmbedding({
|
|
82
|
+
progId,
|
|
83
|
+
relationshipType: resolved.relationshipType,
|
|
84
|
+
targetPath: resolved.target,
|
|
85
|
+
});
|
|
86
|
+
if (kind !== "digestible") {
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
67
90
|
const metadata: OleEmbedNode["metadata"] = {};
|
|
68
91
|
if (resolved.originalFilename) {
|
|
69
92
|
metadata.originalFilename = resolved.originalFilename;
|