@beyondwork/docx-react-component 1.0.61 → 1.0.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@beyondwork/docx-react-component",
3
3
  "publisher": "beyondwork",
4
- "version": "1.0.61",
4
+ "version": "1.0.63",
5
5
  "description": "Embeddable React Word (docx) editor with review, comments, tracked changes, and round-trip OOXML fidelity.",
6
6
  "type": "module",
7
7
  "sideEffects": [
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Embedded-object classifier (hotfix/ole-digestibility-guard).
3
+ *
4
+ * Decides how the session layer should handle each `<w:object>` /
5
+ * `w:altChunk` / embedded package encountered during parse, given its
6
+ * ProgID, relationship type, and content-type. Three outcomes:
7
+ *
8
+ * - **digestible** — the runtime currently renders this as an inline
9
+ * `OleEmbedNode` with a `[Embedded object: progId]` placeholder, with
10
+ * no side-effects that break the editor. Parser proceeds as today.
11
+ *
12
+ * - **offloadable** — a native OOXML package (Word sub-doc, Excel
13
+ * workbook) that a host-side storage adapter could extract and
14
+ * reconstitute. In the hotfix, `offloadable` is treated as
15
+ * `store-only` (there is no adapter yet); the opaque-fragment path
16
+ * preserves bytes + XML for round-trip. Graduates when refactor/01
17
+ * Step 7 lands a `hostAdapter.storeEmbeddedDocument?` callback.
18
+ *
19
+ * - **store-only** — complex binary or undigestible content (PDF,
20
+ * legacy binary Office, package-embedded docx with icon aspect,
21
+ * unknown ProgIDs). The parser returns undefined from `parseObject`
22
+ * and the existing opaque-fragment fallback in `parse-main-document`
23
+ * preserves both the `<w:object>` XML and its relationship id.
24
+ *
25
+ * Policy (opt-in to store-only):
26
+ * - Default: **digestible** for ProgIDs the hotfix does not recognize
27
+ * as problematic. This preserves today's behavior for benign OLE
28
+ * content that has not been reported as breaking — placeholder
29
+ * rendering + byte-preserved round-trip. Graduating an embedding to
30
+ * store-only is an explicit decision made against a real-world
31
+ * crash report.
32
+ * - ProgID prefix matching: `Word.Document.*`, `Excel.*`, `PowerPoint.*`,
33
+ * `AcroExch.Document.*`, exact `Package` → store-only. These are the
34
+ * ProgIDs most likely to break the editor (either because the
35
+ * payload is a nested OPC package whose downstream processing can
36
+ * fail mid-mount, or because the binary's rendering is out of scope
37
+ * for v1).
38
+ * - Content-type override: if the relationship points at a
39
+ * `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
40
+ * (or analogous OOXML/PDF content-type), it is store-only regardless
41
+ * of ProgID. Catches the real-world CCEP "master agreement with
42
+ * sub-agreement inside" case.
43
+ * - Target-path extension match: if the relationship target ends in
44
+ * `.docx`/`.xlsx`/`.pptx`/`.pdf` (including macro-enabled and
45
+ * template variants), it is store-only even if ProgID + content-type
46
+ * were both missing. Weak signal but matches file-type intent.
47
+ *
48
+ * Why no `offloadable` return value today: the host-adapter callback
49
+ * pair (`storeEmbeddedDocument?` + `loadEmbeddedDocument?`) that makes
50
+ * `offloadable` meaningful lands as refactor/01 Step 7. Until then,
51
+ * what would be `offloadable` is folded into `store-only` — the
52
+ * opaque path preserves bytes + XML for the eventual offload.
53
+ *
54
+ * See `docs/architecture/01-package-session.md` §P8 for the full
55
+ * contract.
56
+ */
57
+
58
+ const STORE_ONLY_PROGID_PREFIXES: readonly string[] = [
59
+ // Nested Word documents — primary real-world crash source (CCEP
60
+ // "EU & Global IT Services Agreement.docx" type).
61
+ "Word.Document.",
62
+ "Word.DocumentMacroEnabled.",
63
+ "Word.Template.",
64
+ // Spreadsheet embeddings. Excel.Sheet.12 + Excel.Worksheet.12 are
65
+ // the common ProgIDs; Excel.Chart.* less so.
66
+ "Excel.Sheet.",
67
+ "Excel.SheetMacroEnabled.",
68
+ "Excel.SheetBinaryMacroEnabled.",
69
+ "Excel.Worksheet.",
70
+ "Excel.Chart.",
71
+ "Excel.ChartMacroEnabled.",
72
+ // Presentations.
73
+ "PowerPoint.Slide.",
74
+ "PowerPoint.SlideMacroEnabled.",
75
+ "PowerPoint.Show.",
76
+ "PowerPoint.ShowMacroEnabled.",
77
+ "PowerPoint.Document.",
78
+ "PowerPoint.Template.",
79
+ // PDF via Adobe Acrobat.
80
+ "AcroExch.Document.",
81
+ ];
82
+
83
+ const STORE_ONLY_PROGID_EXACT: ReadonlySet<string> = new Set([
84
+ // Generic OLE container — ambiguous payload, fail closed.
85
+ "Package",
86
+ "Packager.Package",
87
+ ]);
88
+
89
+ /**
90
+ * Content-type patterns that force store-only regardless of ProgID.
91
+ * These are checked against the relationship target's content-type as
92
+ * declared in `[Content_Types].xml` overrides.
93
+ */
94
+ const STORE_ONLY_CONTENT_TYPES: readonly string[] = [
95
+ // Package-embedded Word document (relationships/package type on a
96
+ // word/embeddings/*.docx part). Catches the CCEP case even if the
97
+ // ProgID is missing or atypical.
98
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
99
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
100
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
101
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
102
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
103
+ "application/vnd.openxmlformats-officedocument.presentationml.template",
104
+ "application/pdf",
105
+ ];
106
+
107
+ export type EmbeddingKind = "digestible" | "offloadable" | "store-only";
108
+
109
+ export interface ClassifyEmbeddingInput {
110
+ /** ProgID on the `<o:OLEObject>` element, if any. */
111
+ progId?: string;
112
+ /**
113
+ * Full OOXML relationship Type URI — e.g.
114
+ * `http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject`
115
+ * or `.../relationships/package`.
116
+ */
117
+ relationshipType?: string;
118
+ /**
119
+ * Content-type of the embedding's target package part (from
120
+ * `[Content_Types].xml` override or inferred default).
121
+ */
122
+ contentType?: string;
123
+ /**
124
+ * Relationship target path — e.g. `embeddings/Microsoft_Word_Document.docx`.
125
+ * Used as a weak signal when progId + contentType are both absent.
126
+ */
127
+ targetPath?: string;
128
+ }
129
+
130
+ /**
131
+ * Classify an embedding. Fails closed: returns `"store-only"` when the
132
+ * classifier cannot prove `"digestible"`.
133
+ */
134
+ export function classifyEmbedding(
135
+ input: ClassifyEmbeddingInput,
136
+ ): EmbeddingKind {
137
+ const progId = input.progId?.trim() ?? "";
138
+ const contentType = input.contentType?.trim().toLowerCase() ?? "";
139
+ const targetPath = input.targetPath?.toLowerCase() ?? "";
140
+
141
+ // Exact ProgID match.
142
+ if (progId && STORE_ONLY_PROGID_EXACT.has(progId)) {
143
+ return "store-only";
144
+ }
145
+
146
+ // ProgID prefix match. Handles Word.Document.12, Word.Document.14,
147
+ // Word.DocumentMacroEnabled.12, Excel.Sheet.12, AcroExch.Document.7, etc.
148
+ if (progId) {
149
+ for (const prefix of STORE_ONLY_PROGID_PREFIXES) {
150
+ if (progId.startsWith(prefix)) {
151
+ return "store-only";
152
+ }
153
+ }
154
+ }
155
+
156
+ // Content-type override.
157
+ if (contentType) {
158
+ for (const ct of STORE_ONLY_CONTENT_TYPES) {
159
+ if (contentType === ct || contentType.startsWith(`${ct};`)) {
160
+ return "store-only";
161
+ }
162
+ }
163
+ }
164
+
165
+ // Target-path fallback when we have no progId + no content-type.
166
+ // A `.docx` / `.xlsx` / `.pptx` / `.pdf` extension in the embeddings
167
+ // folder is a strong signal of a package payload.
168
+ if (targetPath) {
169
+ const storeOnlyExtensions = [".docx", ".docm", ".dotx", ".dotm",
170
+ ".xlsx", ".xlsm", ".xltx", ".xltm",
171
+ ".pptx", ".pptm", ".potx", ".potm",
172
+ ".pdf"];
173
+ for (const ext of storeOnlyExtensions) {
174
+ if (targetPath.endsWith(ext)) {
175
+ return "store-only";
176
+ }
177
+ }
178
+ }
179
+
180
+ // No known-problematic signal matched — keep today's behavior.
181
+ // parseObject will construct an OleEmbedNode; render-path continues
182
+ // to show a placeholder for the embedding.
183
+ return "digestible";
184
+ }
185
+
186
+ /**
187
+ * Exposed for tests. Not part of the public API.
188
+ */
189
+ export const __internal = {
190
+ STORE_ONLY_PROGID_PREFIXES,
191
+ STORE_ONLY_PROGID_EXACT,
192
+ STORE_ONLY_CONTENT_TYPES,
193
+ } as const;
@@ -22,6 +22,7 @@ import type { OleEmbedNode } from "../../model/canonical-document.ts";
22
22
  import type { OpcRelationship } from "./part-manifest.ts";
23
23
  import type { XmlElementNode } from "./xml-element.ts";
24
24
  import { resolveOleRelationship } from "./parse-ole-relationship.ts";
25
+ import { classifyEmbedding } from "./classify-embedding.ts";
25
26
 
26
27
  /**
27
28
  * Parse a `<w:object>` element into an `OleEmbedNode` if it contains an
@@ -64,6 +65,28 @@ export function parseObject(
64
65
  return undefined;
65
66
  }
66
67
 
68
+ // hotfix/ole-digestibility-guard — classify the embedding before
69
+ // constructing a canonical node. When the classifier returns
70
+ // "store-only" (nested Word docs, PDF OLE, Excel/PowerPoint
71
+ // embeddings, unknown ProgIDs), return undefined so the caller's
72
+ // existing opaque-fragment fallback preserves both <w:object> XML
73
+ // and its r:id verbatim. Binary preservation is unaffected —
74
+ // collectPreservedPackageParts indexes embedding parts by path, not
75
+ // by canonical-tree reference.
76
+ //
77
+ // TODO(refactor/01 Step 6-7): replace this skip-construction with
78
+ // extraction + offload via hostAdapter.storeEmbeddedDocument?. See
79
+ // docs/architecture/01-package-session.md §P8 + docs/plans/refactor/
80
+ // 01-package-session.md Steps 6-7.
81
+ const kind = classifyEmbedding({
82
+ progId,
83
+ relationshipType: resolved.relationshipType,
84
+ targetPath: resolved.target,
85
+ });
86
+ if (kind !== "digestible") {
87
+ return undefined;
88
+ }
89
+
67
90
  const metadata: OleEmbedNode["metadata"] = {};
68
91
  if (resolved.originalFilename) {
69
92
  metadata.originalFilename = resolved.originalFilename;