@beyondwork/docx-react-component 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +8 -2
  2. package/package.json +35 -21
  3. package/src/api/public-types.ts +103 -1
  4. package/src/core/commands/formatting-commands.ts +742 -0
  5. package/src/core/commands/image-commands.ts +84 -2
  6. package/src/core/commands/structural-helpers.ts +309 -0
  7. package/src/core/commands/table-structure-commands.ts +721 -0
  8. package/src/core/commands/text-commands.ts +166 -1
  9. package/src/core/state/editor-state.ts +318 -9
  10. package/src/formats/xlsx/io/parse-sheet.ts +177 -7
  11. package/src/formats/xlsx/io/parse-styles.ts +2 -0
  12. package/src/formats/xlsx/io/xlsx-session.ts +18 -12
  13. package/src/formats/xlsx/model/sheet.ts +81 -1
  14. package/src/formats/xlsx/model/workbook.ts +10 -6
  15. package/src/io/docx-session.ts +392 -22
  16. package/src/io/export/export-session.ts +55 -0
  17. package/src/io/export/serialize-footnotes.ts +5 -20
  18. package/src/io/export/serialize-headers-footers.ts +5 -31
  19. package/src/io/export/serialize-main-document.ts +78 -5
  20. package/src/io/normalize/normalize-text.ts +90 -1
  21. package/src/io/ooxml/parse-footnotes.ts +68 -5
  22. package/src/io/ooxml/parse-headers-footers.ts +67 -9
  23. package/src/io/ooxml/parse-main-document.ts +169 -6
  24. package/src/io/opc/package-reader.ts +3 -3
  25. package/src/io/source-package-provenance.ts +241 -0
  26. package/src/model/canonical-document.ts +450 -2
  27. package/src/model/cds-1.0.0.ts +5 -2
  28. package/src/model/snapshot.ts +190 -19
  29. package/src/preservation/package-preservation.ts +0 -7
  30. package/src/runtime/document-runtime.ts +7 -1
  31. package/src/runtime/read-only-diagnostics-runtime.ts +1 -1
  32. package/src/runtime/surface-projection.ts +200 -17
  33. package/src/runtime/table-commands.ts +79 -0
  34. package/src/runtime/table-schema.ts +9 -0
  35. package/src/ui/WordReviewEditor.tsx +708 -16
  36. package/src/ui-tailwind/editor-surface/pm-schema.ts +121 -5
  37. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +73 -7
  38. package/src/ui-tailwind/editor-surface/search-plugin.ts +76 -16
  39. package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +162 -14
  40. package/src/validation/compatibility-engine.ts +208 -0
@@ -13,6 +13,8 @@ import {
13
13
  type InlineMediaPart,
14
14
  } from "./parse-inline-media.ts";
15
15
  import { toCanonicalNumberingInstanceId } from "./parse-numbering.ts";
16
+ import { parseComplexContentXml } from "./parse-complex-content.ts";
17
+ import { parseShapeXml, parseVmlXml } from "./parse-shapes.ts";
16
18
 
17
19
  export interface ParsedMainDocument {
18
20
  blocks: ParsedBlockNode[];
@@ -59,7 +61,15 @@ export type ParsedInlineNode =
59
61
  | ParsedSymbolNode
60
62
  | ParsedImageNode
61
63
  | ParsedHyperlinkNode
62
- | ParsedOpaqueInlineNode;
64
+ | ParsedOpaqueInlineNode
65
+ | ParsedChartPreviewNode
66
+ | ParsedSmartArtPreviewNode
67
+ | ParsedShapeInlineNode
68
+ | ParsedWordArtInlineNode
69
+ | ParsedVmlShapeInlineNode
70
+ | ParsedBookmarkStartInlineNode
71
+ | ParsedBookmarkEndInlineNode
72
+ | ParsedFieldInlineNode;
63
73
 
64
74
  export interface ParsedTextNode {
65
75
  type: "text";
@@ -126,6 +136,60 @@ export interface ParsedOpaqueInlineNode {
126
136
  rawXml: string;
127
137
  }
128
138
 
139
+ export interface ParsedChartPreviewNode {
140
+ type: "chart_preview";
141
+ previewMediaId?: string;
142
+ rawXml: string;
143
+ }
144
+
145
+ export interface ParsedSmartArtPreviewNode {
146
+ type: "smartart_preview";
147
+ previewMediaId?: string;
148
+ rawXml: string;
149
+ }
150
+
151
+ export interface ParsedShapeInlineNode {
152
+ type: "shape";
153
+ text?: string;
154
+ geometry?: string;
155
+ rawXml: string;
156
+ }
157
+
158
+ export interface ParsedWordArtInlineNode {
159
+ type: "wordart";
160
+ text: string;
161
+ geometry?: string;
162
+ rawXml: string;
163
+ }
164
+
165
+ export interface ParsedVmlShapeInlineNode {
166
+ type: "vml_shape";
167
+ text?: string;
168
+ shapeType?: string;
169
+ rawXml: string;
170
+ }
171
+
172
+ export interface ParsedBookmarkStartInlineNode {
173
+ type: "bookmark_start";
174
+ bookmarkId: string;
175
+ name: string;
176
+ rawXml: string;
177
+ }
178
+
179
+ export interface ParsedBookmarkEndInlineNode {
180
+ type: "bookmark_end";
181
+ bookmarkId: string;
182
+ rawXml: string;
183
+ }
184
+
185
+ export interface ParsedFieldInlineNode {
186
+ type: "field";
187
+ fieldType: "simple";
188
+ instruction: string;
189
+ contentXml: string;
190
+ rawXml: string;
191
+ }
192
+
129
193
  export interface ParsedOpaqueBlockNode {
130
194
  type: "opaque_block";
131
195
  rawXml: string;
@@ -345,6 +409,7 @@ function parseBodyChild(
345
409
  break;
346
410
  case "bookmarkStart":
347
411
  case "bookmarkEnd":
412
+ case "fldSimple":
348
413
  case "permStart":
349
414
  case "permEnd":
350
415
  case "proofErr":
@@ -696,10 +761,10 @@ function readTableGridColumns(node: XmlElementNode): number[] {
696
761
  * are implemented in the table editing path.
697
762
  */
698
763
  function tableRequiresOpaquePreservation(rawXml: string): boolean {
699
- // For now, only parse tables that contain exclusively simple content
700
- // (plain text, basic formatting). Any complex OOXML stays opaque.
701
- // This list will shrink as the table editing path gains feature coverage.
702
- return /<w:(ins|del|rPrChange|pPrChange|tblPrChange|trPrChange|tcPrChange|sectPrChange|cellIns|cellDel|cellMerge|hyperlink|commentRangeStart|commentRangeEnd|commentReference|bookmarkStart|bookmarkEnd|rStyle|pict|fldChar|fldSimple|smartTag|gridAfter|gridBefore|hideMark|tblHeader|tblCellSpacing|bCs)\b/.test(rawXml);
764
+ // Tables with revision markup, complex content, or structural tags stay opaque.
765
+ // Safe tags (hyperlinks, bookmarks, comments, basic formatting) are now allowed
766
+ // since they are handled by the paragraph parser within table cells.
767
+ return /<w:(ins|del|rPrChange|pPrChange|tblPrChange|trPrChange|tcPrChange|sectPrChange|cellIns|cellDel|cellMerge|pict|fldChar|fldSimple|smartTag|gridAfter|gridBefore|tblCellSpacing)\b/.test(rawXml);
703
768
  }
704
769
 
705
770
  function readCellGridSpan(node: XmlElementNode): number | undefined {
@@ -1072,8 +1137,37 @@ function parseRun(
1072
1137
  }
1073
1138
  break;
1074
1139
  case "drawing": {
1140
+ const drawingXml = sourceXml.slice(child.start, child.end);
1141
+
1142
+ // Try complex content (charts / SmartArt) first
1143
+ try {
1144
+ const complexContent = parseComplexContentXml(
1145
+ drawingXml,
1146
+ relationships,
1147
+ mediaParts,
1148
+ sourcePartPath,
1149
+ );
1150
+ if (complexContent) {
1151
+ result.push(complexContent);
1152
+ break;
1153
+ }
1154
+ } catch {
1155
+ // Fall through to shape / image parsing
1156
+ }
1157
+
1158
+ // Try shape / WordArt parsing
1159
+ try {
1160
+ const shapeResult = parseShapeXml(drawingXml);
1161
+ if (shapeResult) {
1162
+ result.push(shapeResult);
1163
+ break;
1164
+ }
1165
+ } catch {
1166
+ // Fall through to image parsing
1167
+ }
1168
+
1075
1169
  const parsedMedia = parseInlineMediaXml(
1076
- sourceXml.slice(child.start, child.end),
1170
+ drawingXml,
1077
1171
  relationships,
1078
1172
  mediaParts,
1079
1173
  sourcePartPath,
@@ -1112,6 +1206,29 @@ function parseRun(
1112
1206
  );
1113
1207
  break;
1114
1208
  }
1209
+ case "pict": {
1210
+ const pictXml = sourceXml.slice(child.start, child.end);
1211
+ try {
1212
+ const vmlResult = parseVmlXml(pictXml);
1213
+ if (vmlResult) {
1214
+ // Use the full run XML as rawXml so the export serializer can emit
1215
+ // the complete <w:r>...<w:pict>...</w:pict></w:r> wrapper
1216
+ result.push({
1217
+ ...vmlResult,
1218
+ rawXml: sourceXml.slice(node.start, node.end),
1219
+ });
1220
+ break;
1221
+ }
1222
+ } catch {
1223
+ // Fall through to opaque
1224
+ }
1225
+ encounteredUnsupportedChild = true;
1226
+ result.push({
1227
+ type: "opaque_inline",
1228
+ rawXml: pictXml,
1229
+ });
1230
+ break;
1231
+ }
1115
1232
  case "commentReference":
1116
1233
  break;
1117
1234
  case "lastRenderedPageBreak":
@@ -1454,6 +1571,52 @@ function readRunMarks(node: XmlElementNode, sourceXml: string): MarksParseResult
1454
1571
  marks.push(textFillMark);
1455
1572
  }
1456
1573
 
1574
+ // Font family
1575
+ const rFontsNode = properties.children.find(
1576
+ (child): child is XmlElementNode => child.type === "element" && localName(child.name) === "rFonts",
1577
+ );
1578
+ if (rFontsNode) {
1579
+ const family = rFontsNode.attributes["w:ascii"] ?? rFontsNode.attributes["w:hAnsi"] ?? rFontsNode.attributes.ascii ?? rFontsNode.attributes.hAnsi;
1580
+ if (family) {
1581
+ marks.push({ type: "fontFamily", val: family });
1582
+ }
1583
+ }
1584
+
1585
+ // Font size (half-points)
1586
+ const szNode = properties.children.find(
1587
+ (child): child is XmlElementNode => child.type === "element" && localName(child.name) === "sz",
1588
+ );
1589
+ if (szNode) {
1590
+ const szVal = szNode.attributes["w:val"] ?? szNode.attributes.val;
1591
+ if (szVal) {
1592
+ const size = Number.parseInt(szVal, 10);
1593
+ if (Number.isFinite(size) && size > 0) {
1594
+ marks.push({ type: "fontSize", val: size });
1595
+ }
1596
+ }
1597
+ }
1598
+
1599
+ // Text color
1600
+ const colorNode = properties.children.find(
1601
+ (child): child is XmlElementNode => child.type === "element" && localName(child.name) === "color",
1602
+ );
1603
+ if (colorNode) {
1604
+ const colorVal = colorNode.attributes["w:val"] ?? colorNode.attributes.val;
1605
+ if (colorVal && colorVal !== "auto") {
1606
+ marks.push({ type: "textColor", color: colorVal });
1607
+ }
1608
+ }
1609
+
1610
+ // Small caps
1611
+ if (hasOnOffProperty(properties, "smallCaps")) {
1612
+ marks.push({ type: "smallCaps" });
1613
+ }
1614
+
1615
+ // All caps
1616
+ if (hasOnOffProperty(properties, "caps")) {
1617
+ marks.push({ type: "allCaps" });
1618
+ }
1619
+
1457
1620
  return {
1458
1621
  marks,
1459
1622
  supported: true,
@@ -271,7 +271,7 @@ function parseRelationshipsXml(xml: string): OpcRelationship[] {
271
271
  }
272
272
 
273
273
  function findTagAttributes(xml: string, tagName: string): Record<string, string>[] {
274
- const tagPattern = new RegExp(`<${tagName}\\b([^>]*)\\/?>`, "g");
274
+ const tagPattern = new RegExp(`<(?:[A-Za-z_][\\w.-]*:)?${tagName}\\b([^>]*)\\/?>`, "g");
275
275
  const results: Record<string, string>[] = [];
276
276
  let tagMatch: RegExpExecArray | null = tagPattern.exec(xml);
277
277
 
@@ -285,11 +285,11 @@ function findTagAttributes(xml: string, tagName: string): Record<string, string>
285
285
 
286
286
  function parseAttributes(rawAttributes: string): Record<string, string> {
287
287
  const attributes: Record<string, string> = {};
288
- const attributePattern = /([A-Za-z_][\w:.-]*)="([^"]*)"/g;
288
+ const attributePattern = /([A-Za-z_][\w:.-]*)\s*=\s*(["'])([\s\S]*?)\2/g;
289
289
  let match: RegExpExecArray | null = attributePattern.exec(rawAttributes);
290
290
 
291
291
  while (match) {
292
- attributes[match[1]] = decodeXmlEntities(match[2]);
292
+ attributes[match[1]] = decodeXmlEntities(match[3]);
293
293
  match = attributePattern.exec(rawAttributes);
294
294
  }
295
295
 
@@ -0,0 +1,241 @@
1
+ import type { PersistedSourcePackage } from "../model/snapshot.ts";
2
+ import { DOCX_MIME_TYPE } from "./opc/docx-package.ts";
3
+
4
+ const SHA256_INITIAL_STATE = [
5
+ 0x6a09e667,
6
+ 0xbb67ae85,
7
+ 0x3c6ef372,
8
+ 0xa54ff53a,
9
+ 0x510e527f,
10
+ 0x9b05688c,
11
+ 0x1f83d9ab,
12
+ 0x5be0cd19,
13
+ ] as const;
14
+
15
+ const SHA256_CONSTANTS = [
16
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
17
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
18
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
19
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
20
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
21
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
22
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
23
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
24
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
25
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
26
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
27
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
28
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
29
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
30
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
31
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
32
+ ] as const;
33
+
34
+ export function createPersistedSourcePackage(
35
+ bytes: Uint8Array | ArrayBuffer,
36
+ sourceLabel?: string,
37
+ ): PersistedSourcePackage {
38
+ const normalizedBytes = toUint8Array(bytes);
39
+
40
+ return {
41
+ format: "docx",
42
+ storage: "embedded-base64",
43
+ mimeType: DOCX_MIME_TYPE,
44
+ sourceLabel:
45
+ typeof sourceLabel === "string" && sourceLabel.trim().length > 0
46
+ ? sourceLabel
47
+ : undefined,
48
+ sha256Hex: sha256Hex(normalizedBytes),
49
+ bytesBase64: encodeBytesBase64(normalizedBytes),
50
+ };
51
+ }
52
+
53
+ export function decodePersistedSourcePackageBytes(
54
+ sourcePackage: PersistedSourcePackage,
55
+ ): Uint8Array {
56
+ return decodeBytesBase64(sourcePackage.bytesBase64);
57
+ }
58
+
59
+ export function hasValidPersistedSourcePackageDigest(
60
+ sourcePackage: PersistedSourcePackage,
61
+ bytes: Uint8Array,
62
+ ): boolean {
63
+ return sha256Hex(bytes) === sourcePackage.sha256Hex;
64
+ }
65
+
66
+ export function encodeBytesBase64(bytes: Uint8Array): string {
67
+ const bufferConstructor = getBufferConstructor();
68
+ if (bufferConstructor) {
69
+ return bufferConstructor.from(bytes).toString("base64");
70
+ }
71
+
72
+ let binary = "";
73
+ for (let offset = 0; offset < bytes.byteLength; offset += 0x8000) {
74
+ const chunk = bytes.subarray(offset, offset + 0x8000);
75
+ for (let index = 0; index < chunk.length; index += 1) {
76
+ binary += String.fromCharCode(chunk[index] ?? 0);
77
+ }
78
+ }
79
+
80
+ if (typeof btoa !== "function") {
81
+ throw new Error("Base64 encoding is unavailable in this runtime.");
82
+ }
83
+
84
+ return btoa(binary);
85
+ }
86
+
87
+ export function decodeBytesBase64(value: string): Uint8Array {
88
+ const bufferConstructor = getBufferConstructor();
89
+ if (bufferConstructor) {
90
+ return new Uint8Array(bufferConstructor.from(value, "base64"));
91
+ }
92
+
93
+ if (typeof atob !== "function") {
94
+ throw new Error("Base64 decoding is unavailable in this runtime.");
95
+ }
96
+
97
+ const binary = atob(value);
98
+ const bytes = new Uint8Array(binary.length);
99
+ for (let index = 0; index < binary.length; index += 1) {
100
+ bytes[index] = binary.charCodeAt(index);
101
+ }
102
+ return bytes;
103
+ }
104
+
105
+ export function sha256Hex(bytes: Uint8Array): string {
106
+ const padded = padSha256Message(bytes);
107
+ const state = Array.from(SHA256_INITIAL_STATE, (value) => value >>> 0);
108
+ const schedule = new Uint32Array(64);
109
+
110
+ for (let chunkOffset = 0; chunkOffset < padded.byteLength; chunkOffset += 64) {
111
+ for (let index = 0; index < 16; index += 1) {
112
+ const wordOffset = chunkOffset + index * 4;
113
+ schedule[index] =
114
+ (((padded[wordOffset] ?? 0) << 24) |
115
+ ((padded[wordOffset + 1] ?? 0) << 16) |
116
+ ((padded[wordOffset + 2] ?? 0) << 8) |
117
+ (padded[wordOffset + 3] ?? 0)) >>>
118
+ 0;
119
+ }
120
+
121
+ for (let index = 16; index < 64; index += 1) {
122
+ schedule[index] = add32(
123
+ lowerSigma1(schedule[index - 2] ?? 0),
124
+ schedule[index - 7] ?? 0,
125
+ lowerSigma0(schedule[index - 15] ?? 0),
126
+ schedule[index - 16] ?? 0,
127
+ );
128
+ }
129
+
130
+ let [a, b, c, d, e, f, g, h] = state;
131
+
132
+ for (let index = 0; index < 64; index += 1) {
133
+ const t1 = add32(
134
+ h,
135
+ upperSigma1(e),
136
+ choose(e, f, g),
137
+ SHA256_CONSTANTS[index] ?? 0,
138
+ schedule[index] ?? 0,
139
+ );
140
+ const t2 = add32(upperSigma0(a), majority(a, b, c));
141
+
142
+ h = g;
143
+ g = f;
144
+ f = e;
145
+ e = add32(d, t1);
146
+ d = c;
147
+ c = b;
148
+ b = a;
149
+ a = add32(t1, t2);
150
+ }
151
+
152
+ state[0] = add32(state[0] ?? 0, a);
153
+ state[1] = add32(state[1] ?? 0, b);
154
+ state[2] = add32(state[2] ?? 0, c);
155
+ state[3] = add32(state[3] ?? 0, d);
156
+ state[4] = add32(state[4] ?? 0, e);
157
+ state[5] = add32(state[5] ?? 0, f);
158
+ state[6] = add32(state[6] ?? 0, g);
159
+ state[7] = add32(state[7] ?? 0, h);
160
+ }
161
+
162
+ return state.map((word) => word.toString(16).padStart(8, "0")).join("");
163
+ }
164
+
165
+ function padSha256Message(bytes: Uint8Array): Uint8Array {
166
+ const bitLength = bytes.byteLength * 8;
167
+ const paddedLength = Math.ceil((bytes.byteLength + 9) / 64) * 64;
168
+ const padded = new Uint8Array(paddedLength);
169
+ padded.set(bytes);
170
+ padded[bytes.byteLength] = 0x80;
171
+
172
+ const lengthOffset = paddedLength - 8;
173
+ for (let index = 0; index < 8; index += 1) {
174
+ const shift = (7 - index) * 8;
175
+ padded[lengthOffset + index] = Math.floor(bitLength / 2 ** shift) & 0xff;
176
+ }
177
+
178
+ return padded;
179
+ }
180
+
181
+ function rotateRight(value: number, shift: number): number {
182
+ return ((value >>> shift) | (value << (32 - shift))) >>> 0;
183
+ }
184
+
185
+ function add32(...values: number[]): number {
186
+ let total = 0;
187
+ for (const value of values) {
188
+ total = (total + value) >>> 0;
189
+ }
190
+ return total;
191
+ }
192
+
193
+ function choose(x: number, y: number, z: number): number {
194
+ return ((x & y) ^ (~x & z)) >>> 0;
195
+ }
196
+
197
+ function majority(x: number, y: number, z: number): number {
198
+ return ((x & y) ^ (x & z) ^ (y & z)) >>> 0;
199
+ }
200
+
201
+ function upperSigma0(value: number): number {
202
+ return (rotateRight(value, 2) ^ rotateRight(value, 13) ^ rotateRight(value, 22)) >>> 0;
203
+ }
204
+
205
+ function upperSigma1(value: number): number {
206
+ return (rotateRight(value, 6) ^ rotateRight(value, 11) ^ rotateRight(value, 25)) >>> 0;
207
+ }
208
+
209
+ function lowerSigma0(value: number): number {
210
+ return (rotateRight(value, 7) ^ rotateRight(value, 18) ^ (value >>> 3)) >>> 0;
211
+ }
212
+
213
+ function lowerSigma1(value: number): number {
214
+ return (rotateRight(value, 17) ^ rotateRight(value, 19) ^ (value >>> 10)) >>> 0;
215
+ }
216
+
217
+ function toUint8Array(bytes: Uint8Array | ArrayBuffer): Uint8Array {
218
+ return bytes instanceof Uint8Array ? new Uint8Array(bytes) : new Uint8Array(bytes);
219
+ }
220
+
221
+ function getBufferConstructor():
222
+ | {
223
+ from(
224
+ value: Uint8Array | string,
225
+ byteOffsetOrEncoding?: number | string,
226
+ length?: number,
227
+ ): Uint8Array & { toString(encoding: string): string };
228
+ }
229
+ | undefined {
230
+ const maybeBuffer = (globalThis as {
231
+ Buffer?: {
232
+ from(
233
+ value: Uint8Array | string,
234
+ byteOffsetOrEncoding?: number | string,
235
+ length?: number,
236
+ ): Uint8Array & { toString(encoding: string): string };
237
+ };
238
+ }).Buffer;
239
+
240
+ return maybeBuffer;
241
+ }