@beyondwork/docx-react-component 1.0.60 → 1.0.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +33 -44
- package/src/api/public-types.ts +41 -0
- package/src/io/docx-session.ts +167 -8
- package/src/io/export/serialize-footnotes.ts +36 -5
- package/src/io/export/serialize-headers-footers.ts +7 -0
- package/src/io/export/serialize-main-document.ts +25 -18
- package/src/io/export/serialize-paragraph-formatting.ts +6 -0
- package/src/io/export/serialize-settings.ts +130 -3
- package/src/io/normalize/normalize-text.ts +8 -4
- package/src/io/ooxml/classify-embedding.ts +193 -0
- package/src/io/ooxml/parse-footnotes.ts +11 -0
- package/src/io/ooxml/parse-headers-footers.ts +117 -42
- package/src/io/ooxml/parse-main-document.ts +20 -8
- package/src/io/ooxml/parse-object.ts +23 -0
- package/src/io/ooxml/parse-paragraph-formatting.ts +25 -1
- package/src/io/ooxml/parse-settings.ts +91 -1
- package/src/model/canonical-document.ts +36 -2
- package/src/runtime/document-runtime.ts +424 -0
- package/src/runtime/footnote-resolver.ts +32 -8
- package/src/runtime/layout/layout-engine-version.ts +7 -1
- package/src/runtime/layout/measurement-backend-canvas.ts +1 -1
- package/src/runtime/layout/measurement-backend-empirical.ts +1 -1
- package/src/runtime/layout/paginated-layout-engine.ts +41 -8
- package/src/runtime/layout/resolved-formatting-document.ts +11 -9
- package/src/runtime/layout/resolved-formatting-state.ts +4 -0
- package/src/runtime/numbering-prefix.ts +26 -2
- package/src/runtime/surface-projection.ts +75 -14
- package/src/runtime/table-schema.ts +26 -0
- package/src/ui/WordReviewEditor.tsx +25 -0
- package/src/ui/editor-runtime-boundary.ts +1 -0
- package/src/ui/editor-shell-view.tsx +8 -0
- package/src/ui-tailwind/chrome/tw-runtime-repl-dialog.tsx +514 -0
- package/src/ui-tailwind/editor-surface/pm-schema.ts +14 -0
- package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +55 -6
- package/src/ui-tailwind/editor-surface/surface-build-keys.ts +2 -0
- package/src/ui-tailwind/editor-surface/tw-page-block-view.tsx +4 -0
- package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +9 -1
- package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +16 -0
- package/src/ui-tailwind/page-stack/floating-image-overlay-model.ts +319 -0
- package/src/ui-tailwind/page-stack/tw-floating-image-layer.tsx +248 -0
- package/src/ui-tailwind/page-stack/tw-region-block-renderer.tsx +4 -0
- package/src/ui-tailwind/tw-review-workspace.tsx +54 -3
|
@@ -62,6 +62,7 @@
|
|
|
62
62
|
*/
|
|
63
63
|
|
|
64
64
|
import type {
|
|
65
|
+
ClrSchemeMappingSlot,
|
|
65
66
|
CompatSetting,
|
|
66
67
|
DocumentSettings,
|
|
67
68
|
} from "../../model/canonical-document.ts";
|
|
@@ -81,6 +82,21 @@ export const WORD_SETTINGS_CONTENT_TYPE =
|
|
|
81
82
|
const WORDPROCESSINGML_2006_MAIN_NS =
|
|
82
83
|
"http://schemas.openxmlformats.org/wordprocessingml/2006/main";
|
|
83
84
|
|
|
85
|
+
const CLRSCHEME_MAPPING_EMIT_ORDER: readonly ClrSchemeMappingSlot[] = [
|
|
86
|
+
"bg1",
|
|
87
|
+
"t1",
|
|
88
|
+
"bg2",
|
|
89
|
+
"t2",
|
|
90
|
+
"accent1",
|
|
91
|
+
"accent2",
|
|
92
|
+
"accent3",
|
|
93
|
+
"accent4",
|
|
94
|
+
"accent5",
|
|
95
|
+
"accent6",
|
|
96
|
+
"hlink",
|
|
97
|
+
"followedHyperlink",
|
|
98
|
+
];
|
|
99
|
+
|
|
84
100
|
/**
|
|
85
101
|
* Render a complete `<w:settings>` XML document from canonical
|
|
86
102
|
* `DocumentSettings`. The output is the standard XML declaration plus a
|
|
@@ -89,9 +105,13 @@ const WORDPROCESSINGML_2006_MAIN_NS =
|
|
|
89
105
|
* Emit order is OOXML-schema-friendly to maximize Word's tolerance:
|
|
90
106
|
* 1. <w:evenAndOddHeaders>
|
|
91
107
|
* 2. <w:zoom>
|
|
92
|
-
* 3.
|
|
93
|
-
* 4. <w:
|
|
94
|
-
* 5. <w:
|
|
108
|
+
* 3. <w:defaultTabStop>
|
|
109
|
+
* 4. <w:footnotePr>
|
|
110
|
+
* 5. <w:endnotePr>
|
|
111
|
+
* 6. root-level compat-adjacent flags (e.g. <w:doNotEmbedSmartTags/>)
|
|
112
|
+
* 7. <w:compat> wrapping flags then compatSetting triples
|
|
113
|
+
* 8. <w:themeFontLang>
|
|
114
|
+
* 9. <w:clrSchemeMapping>
|
|
95
115
|
*
|
|
96
116
|
* Insertion order of `compatSettings` array entries and `compatFlags` /
|
|
97
117
|
* `rootCompatFlags` / `themeFontLang` keys is preserved so a byte-stable
|
|
@@ -112,9 +132,13 @@ function synthesizeSettingsXml(settings: DocumentSettings): string {
|
|
|
112
132
|
const parts: string[] = [];
|
|
113
133
|
parts.push(emitEvenAndOddHeaders(settings));
|
|
114
134
|
parts.push(emitZoom(settings));
|
|
135
|
+
parts.push(emitDefaultTabStop(settings));
|
|
136
|
+
parts.push(emitFootnoteLikeProperties("w:footnotePr", settings.footnotePr));
|
|
137
|
+
parts.push(emitFootnoteLikeProperties("w:endnotePr", settings.endnotePr));
|
|
115
138
|
parts.push(emitRootCompatFlags(settings));
|
|
116
139
|
parts.push(emitCompatBlock(settings));
|
|
117
140
|
parts.push(emitThemeFontLang(settings));
|
|
141
|
+
parts.push(emitClrSchemeMapping(settings));
|
|
118
142
|
|
|
119
143
|
const body = parts.filter((p) => p.length > 0).join("");
|
|
120
144
|
return [
|
|
@@ -195,6 +219,24 @@ function graftSettingsXml(
|
|
|
195
219
|
) {
|
|
196
220
|
appendedParts.push(emitZoom(settings));
|
|
197
221
|
}
|
|
222
|
+
if (
|
|
223
|
+
!emittedTopLevel.has("defaultTabStop") &&
|
|
224
|
+
settings.defaultTabStop !== undefined
|
|
225
|
+
) {
|
|
226
|
+
appendedParts.push(emitDefaultTabStop(settings));
|
|
227
|
+
}
|
|
228
|
+
if (
|
|
229
|
+
!emittedTopLevel.has("footnotePr") &&
|
|
230
|
+
settings.footnotePr !== undefined
|
|
231
|
+
) {
|
|
232
|
+
appendedParts.push(emitFootnoteLikeProperties("w:footnotePr", settings.footnotePr));
|
|
233
|
+
}
|
|
234
|
+
if (
|
|
235
|
+
!emittedTopLevel.has("endnotePr") &&
|
|
236
|
+
settings.endnotePr !== undefined
|
|
237
|
+
) {
|
|
238
|
+
appendedParts.push(emitFootnoteLikeProperties("w:endnotePr", settings.endnotePr));
|
|
239
|
+
}
|
|
198
240
|
// Any rootCompatFlags entries that didn't have a source counterpart.
|
|
199
241
|
for (const [name, value] of pendingRootFlags) {
|
|
200
242
|
appendedParts.push(emitOnOffElement(name, value));
|
|
@@ -209,6 +251,12 @@ function graftSettingsXml(
|
|
|
209
251
|
) {
|
|
210
252
|
appendedParts.push(emitThemeFontLang(settings));
|
|
211
253
|
}
|
|
254
|
+
if (
|
|
255
|
+
!emittedTopLevel.has("clrSchemeMapping") &&
|
|
256
|
+
settings.clrSchemeMapping !== undefined
|
|
257
|
+
) {
|
|
258
|
+
appendedParts.push(emitClrSchemeMapping(settings));
|
|
259
|
+
}
|
|
212
260
|
|
|
213
261
|
return (
|
|
214
262
|
blueprint.prelude +
|
|
@@ -229,8 +277,12 @@ function graftSettingsXml(
|
|
|
229
277
|
const MODELLED_TOP_LEVEL_NAMES: ReadonlySet<string> = new Set([
|
|
230
278
|
"evenAndOddHeaders",
|
|
231
279
|
"zoom",
|
|
280
|
+
"defaultTabStop",
|
|
281
|
+
"footnotePr",
|
|
282
|
+
"endnotePr",
|
|
232
283
|
"compat",
|
|
233
284
|
"themeFontLang",
|
|
285
|
+
"clrSchemeMapping",
|
|
234
286
|
]);
|
|
235
287
|
|
|
236
288
|
type ChildReplacement =
|
|
@@ -282,6 +334,36 @@ function computeChildReplacement(
|
|
|
282
334
|
const xml = emitZoom(settings);
|
|
283
335
|
return xml.length > 0 ? { kind: "replace", xml } : { kind: "drop" };
|
|
284
336
|
}
|
|
337
|
+
case "defaultTabStop": {
|
|
338
|
+
if (
|
|
339
|
+
emitDefaultTabStop(parseModelledChild(child.rawXml)) ===
|
|
340
|
+
emitDefaultTabStop(settings)
|
|
341
|
+
) {
|
|
342
|
+
return { kind: "keep" };
|
|
343
|
+
}
|
|
344
|
+
const xml = emitDefaultTabStop(settings);
|
|
345
|
+
return xml.length > 0 ? { kind: "replace", xml } : { kind: "drop" };
|
|
346
|
+
}
|
|
347
|
+
case "footnotePr": {
|
|
348
|
+
if (
|
|
349
|
+
emitFootnoteLikeProperties("w:footnotePr", parseModelledChild(child.rawXml).footnotePr) ===
|
|
350
|
+
emitFootnoteLikeProperties("w:footnotePr", settings.footnotePr)
|
|
351
|
+
) {
|
|
352
|
+
return { kind: "keep" };
|
|
353
|
+
}
|
|
354
|
+
const xml = emitFootnoteLikeProperties("w:footnotePr", settings.footnotePr);
|
|
355
|
+
return xml.length > 0 ? { kind: "replace", xml } : { kind: "drop" };
|
|
356
|
+
}
|
|
357
|
+
case "endnotePr": {
|
|
358
|
+
if (
|
|
359
|
+
emitFootnoteLikeProperties("w:endnotePr", parseModelledChild(child.rawXml).endnotePr) ===
|
|
360
|
+
emitFootnoteLikeProperties("w:endnotePr", settings.endnotePr)
|
|
361
|
+
) {
|
|
362
|
+
return { kind: "keep" };
|
|
363
|
+
}
|
|
364
|
+
const xml = emitFootnoteLikeProperties("w:endnotePr", settings.endnotePr);
|
|
365
|
+
return xml.length > 0 ? { kind: "replace", xml } : { kind: "drop" };
|
|
366
|
+
}
|
|
285
367
|
case "compat": {
|
|
286
368
|
if (
|
|
287
369
|
emitCompatBlock(parseModelledChild(child.rawXml)) ===
|
|
@@ -302,6 +384,16 @@ function computeChildReplacement(
|
|
|
302
384
|
const xml = emitThemeFontLang(settings);
|
|
303
385
|
return xml.length > 0 ? { kind: "replace", xml } : { kind: "drop" };
|
|
304
386
|
}
|
|
387
|
+
case "clrSchemeMapping": {
|
|
388
|
+
if (
|
|
389
|
+
emitClrSchemeMapping(parseModelledChild(child.rawXml)) ===
|
|
390
|
+
emitClrSchemeMapping(settings)
|
|
391
|
+
) {
|
|
392
|
+
return { kind: "keep" };
|
|
393
|
+
}
|
|
394
|
+
const xml = emitClrSchemeMapping(settings);
|
|
395
|
+
return xml.length > 0 ? { kind: "replace", xml } : { kind: "drop" };
|
|
396
|
+
}
|
|
305
397
|
}
|
|
306
398
|
// Root compat flag?
|
|
307
399
|
if (ROOT_COMPAT_FLAG_NAMES.has(child.localName)) {
|
|
@@ -361,6 +453,28 @@ function emitZoom(settings: DocumentSettings): string {
|
|
|
361
453
|
return "";
|
|
362
454
|
}
|
|
363
455
|
|
|
456
|
+
function emitDefaultTabStop(settings: DocumentSettings): string {
|
|
457
|
+
const { defaultTabStop } = settings;
|
|
458
|
+
if (defaultTabStop === undefined || !Number.isFinite(defaultTabStop)) return "";
|
|
459
|
+
return `<w:defaultTabStop w:val="${Math.round(defaultTabStop)}"/>`;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
function emitFootnoteLikeProperties(
|
|
463
|
+
elementName: "w:footnotePr" | "w:endnotePr",
|
|
464
|
+
props: DocumentSettings["footnotePr"] | DocumentSettings["endnotePr"],
|
|
465
|
+
): string {
|
|
466
|
+
if (!props) return "";
|
|
467
|
+
const parts: string[] = [];
|
|
468
|
+
if (props.pos) parts.push(`<w:pos w:val="${escapeXmlAttribute(props.pos)}"/>`);
|
|
469
|
+
if (props.numFmt) parts.push(`<w:numFmt w:val="${escapeXmlAttribute(props.numFmt)}"/>`);
|
|
470
|
+
if (props.numStart !== undefined && Number.isFinite(props.numStart)) {
|
|
471
|
+
parts.push(`<w:numStart w:val="${Math.round(props.numStart)}"/>`);
|
|
472
|
+
}
|
|
473
|
+
if (props.numRestart) parts.push(`<w:numRestart w:val="${escapeXmlAttribute(props.numRestart)}"/>`);
|
|
474
|
+
if (parts.length === 0) return "";
|
|
475
|
+
return `<${elementName}>${parts.join("")}</${elementName}>`;
|
|
476
|
+
}
|
|
477
|
+
|
|
364
478
|
function emitRootCompatFlags(settings: DocumentSettings): string {
|
|
365
479
|
const flags = settings.rootCompatFlags;
|
|
366
480
|
if (!flags) return "";
|
|
@@ -410,6 +524,19 @@ function emitThemeFontLang(settings: DocumentSettings): string {
|
|
|
410
524
|
return `<w:themeFontLang${attrs}/>`;
|
|
411
525
|
}
|
|
412
526
|
|
|
527
|
+
function emitClrSchemeMapping(settings: DocumentSettings): string {
|
|
528
|
+
const mapping = settings.clrSchemeMapping;
|
|
529
|
+
if (!mapping) return "";
|
|
530
|
+
const attrs = CLRSCHEME_MAPPING_EMIT_ORDER
|
|
531
|
+
.map((slot) => {
|
|
532
|
+
const value = mapping[slot];
|
|
533
|
+
return value ? ` w:${slot}="${escapeXmlAttribute(value)}"` : "";
|
|
534
|
+
})
|
|
535
|
+
.join("");
|
|
536
|
+
if (attrs.length === 0) return "";
|
|
537
|
+
return `<w:clrSchemeMapping${attrs}/>`;
|
|
538
|
+
}
|
|
539
|
+
|
|
413
540
|
/**
|
|
414
541
|
* Emit a ST_OnOff element. true → bare self-closing tag; false → explicit
|
|
415
542
|
* `w:val="false"` so the parser doesn't infer the default-true. Symmetric
|
|
@@ -260,13 +260,17 @@ function normalizeParagraph(
|
|
|
260
260
|
: {}),
|
|
261
261
|
...(paragraph.indentation ? { indentation: paragraph.indentation } : {}),
|
|
262
262
|
...(paragraph.tabStops && paragraph.tabStops.length > 0 ? { tabStops: paragraph.tabStops } : {}),
|
|
263
|
-
...(paragraph.keepNext ? { keepNext: paragraph.keepNext } : {}),
|
|
264
|
-
...(paragraph.keepLines ? { keepLines: paragraph.keepLines } : {}),
|
|
263
|
+
...(paragraph.keepNext !== undefined ? { keepNext: paragraph.keepNext } : {}),
|
|
264
|
+
...(paragraph.keepLines !== undefined ? { keepLines: paragraph.keepLines } : {}),
|
|
265
265
|
...(paragraph.outlineLevel !== undefined ? { outlineLevel: paragraph.outlineLevel } : {}),
|
|
266
|
-
...(paragraph.pageBreakBefore ? { pageBreakBefore: paragraph.pageBreakBefore } : {}),
|
|
267
|
-
...(paragraph.
|
|
266
|
+
...(paragraph.pageBreakBefore !== undefined ? { pageBreakBefore: paragraph.pageBreakBefore } : {}),
|
|
267
|
+
...(paragraph.widowControl !== undefined ? { widowControl: paragraph.widowControl } : {}),
|
|
268
|
+
...(paragraph.bidi !== undefined ? { bidi: paragraph.bidi } : {}),
|
|
268
269
|
...(paragraph.borders ? { borders: paragraph.borders } : {}),
|
|
269
270
|
...(paragraph.shading ? { shading: paragraph.shading } : {}),
|
|
271
|
+
...(paragraph.suppressLineNumbers !== undefined
|
|
272
|
+
? { suppressLineNumbers: paragraph.suppressLineNumbers }
|
|
273
|
+
: {}),
|
|
270
274
|
// A.7: preserve w14:paraId / w14:textId across import → export so
|
|
271
275
|
// downstream tools that diff documents by paragraph id stay stable.
|
|
272
276
|
...(paragraph.wordExtensionIds
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedded-object classifier (hotfix/ole-digestibility-guard).
|
|
3
|
+
*
|
|
4
|
+
* Decides how the session layer should handle each `<w:object>` /
|
|
5
|
+
* `w:altChunk` / embedded package encountered during parse, given its
|
|
6
|
+
* ProgID, relationship type, and content-type. Three outcomes:
|
|
7
|
+
*
|
|
8
|
+
* - **digestible** — the runtime currently renders this as an inline
|
|
9
|
+
* `OleEmbedNode` with a `[Embedded object: progId]` placeholder, with
|
|
10
|
+
* no side-effects that break the editor. Parser proceeds as today.
|
|
11
|
+
*
|
|
12
|
+
* - **offloadable** — a native OOXML package (Word sub-doc, Excel
|
|
13
|
+
* workbook) that a host-side storage adapter could extract and
|
|
14
|
+
* reconstitute. In the hotfix, `offloadable` is treated as
|
|
15
|
+
* `store-only` (there is no adapter yet); the opaque-fragment path
|
|
16
|
+
* preserves bytes + XML for round-trip. Graduates when refactor/01
|
|
17
|
+
* Step 7 lands a `hostAdapter.storeEmbeddedDocument?` callback.
|
|
18
|
+
*
|
|
19
|
+
* - **store-only** — complex binary or undigestible content (PDF,
|
|
20
|
+
* legacy binary Office, package-embedded docx with icon aspect,
|
|
21
|
+
* unknown ProgIDs). The parser returns undefined from `parseObject`
|
|
22
|
+
* and the existing opaque-fragment fallback in `parse-main-document`
|
|
23
|
+
* preserves both the `<w:object>` XML and its relationship id.
|
|
24
|
+
*
|
|
25
|
+
* Policy (opt-in to store-only):
|
|
26
|
+
* - Default: **digestible** for ProgIDs the hotfix does not recognize
|
|
27
|
+
* as problematic. This preserves today's behavior for benign OLE
|
|
28
|
+
* content that has not been reported as breaking — placeholder
|
|
29
|
+
* rendering + byte-preserved round-trip. Graduating an embedding to
|
|
30
|
+
* store-only is an explicit decision made against a real-world
|
|
31
|
+
* crash report.
|
|
32
|
+
* - ProgID prefix matching: `Word.Document.*`, `Excel.*`, `PowerPoint.*`,
|
|
33
|
+
* `AcroExch.Document.*`, exact `Package` → store-only. These are the
|
|
34
|
+
* ProgIDs most likely to break the editor (either because the
|
|
35
|
+
* payload is a nested OPC package whose downstream processing can
|
|
36
|
+
* fail mid-mount, or because the binary's rendering is out of scope
|
|
37
|
+
* for v1).
|
|
38
|
+
* - Content-type override: if the relationship points at a
|
|
39
|
+
* `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
|
|
40
|
+
* (or analogous OOXML/PDF content-type), it is store-only regardless
|
|
41
|
+
* of ProgID. Catches the real-world CCEP "master agreement with
|
|
42
|
+
* sub-agreement inside" case.
|
|
43
|
+
* - Target-path extension match: if the relationship target ends in
|
|
44
|
+
* `.docx`/`.xlsx`/`.pptx`/`.pdf` (including macro-enabled and
|
|
45
|
+
* template variants), it is store-only even if ProgID + content-type
|
|
46
|
+
* were both missing. Weak signal but matches file-type intent.
|
|
47
|
+
*
|
|
48
|
+
* Why no `offloadable` return value today: the host-adapter callback
|
|
49
|
+
* pair (`storeEmbeddedDocument?` + `loadEmbeddedDocument?`) that makes
|
|
50
|
+
* `offloadable` meaningful lands as refactor/01 Step 7. Until then,
|
|
51
|
+
* what would be `offloadable` is folded into `store-only` — the
|
|
52
|
+
* opaque path preserves bytes + XML for the eventual offload.
|
|
53
|
+
*
|
|
54
|
+
* See `docs/architecture/01-package-session.md` §P8 for the full
|
|
55
|
+
* contract.
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
const STORE_ONLY_PROGID_PREFIXES: readonly string[] = [
|
|
59
|
+
// Nested Word documents — primary real-world crash source (CCEP
|
|
60
|
+
// "EU & Global IT Services Agreement.docx" type).
|
|
61
|
+
"Word.Document.",
|
|
62
|
+
"Word.DocumentMacroEnabled.",
|
|
63
|
+
"Word.Template.",
|
|
64
|
+
// Spreadsheet embeddings. Excel.Sheet.12 + Excel.Worksheet.12 are
|
|
65
|
+
// the common ProgIDs; Excel.Chart.* less so.
|
|
66
|
+
"Excel.Sheet.",
|
|
67
|
+
"Excel.SheetMacroEnabled.",
|
|
68
|
+
"Excel.SheetBinaryMacroEnabled.",
|
|
69
|
+
"Excel.Worksheet.",
|
|
70
|
+
"Excel.Chart.",
|
|
71
|
+
"Excel.ChartMacroEnabled.",
|
|
72
|
+
// Presentations.
|
|
73
|
+
"PowerPoint.Slide.",
|
|
74
|
+
"PowerPoint.SlideMacroEnabled.",
|
|
75
|
+
"PowerPoint.Show.",
|
|
76
|
+
"PowerPoint.ShowMacroEnabled.",
|
|
77
|
+
"PowerPoint.Document.",
|
|
78
|
+
"PowerPoint.Template.",
|
|
79
|
+
// PDF via Adobe Acrobat.
|
|
80
|
+
"AcroExch.Document.",
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
const STORE_ONLY_PROGID_EXACT: ReadonlySet<string> = new Set([
|
|
84
|
+
// Generic OLE container — ambiguous payload, fail closed.
|
|
85
|
+
"Package",
|
|
86
|
+
"Packager.Package",
|
|
87
|
+
]);
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Content-type patterns that force store-only regardless of ProgID.
|
|
91
|
+
* These are checked against the relationship target's content-type as
|
|
92
|
+
* declared in `[Content_Types].xml` overrides.
|
|
93
|
+
*/
|
|
94
|
+
const STORE_ONLY_CONTENT_TYPES: readonly string[] = [
|
|
95
|
+
// Package-embedded Word document (relationships/package type on a
|
|
96
|
+
// word/embeddings/*.docx part). Catches the CCEP case even if the
|
|
97
|
+
// ProgID is missing or atypical.
|
|
98
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
99
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
|
|
100
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
101
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.template",
|
|
102
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
103
|
+
"application/vnd.openxmlformats-officedocument.presentationml.template",
|
|
104
|
+
"application/pdf",
|
|
105
|
+
];
|
|
106
|
+
|
|
107
|
+
export type EmbeddingKind = "digestible" | "offloadable" | "store-only";
|
|
108
|
+
|
|
109
|
+
export interface ClassifyEmbeddingInput {
|
|
110
|
+
/** ProgID on the `<o:OLEObject>` element, if any. */
|
|
111
|
+
progId?: string;
|
|
112
|
+
/**
|
|
113
|
+
* Full OOXML relationship Type URI — e.g.
|
|
114
|
+
* `http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject`
|
|
115
|
+
* or `.../relationships/package`.
|
|
116
|
+
*/
|
|
117
|
+
relationshipType?: string;
|
|
118
|
+
/**
|
|
119
|
+
* Content-type of the embedding's target package part (from
|
|
120
|
+
* `[Content_Types].xml` override or inferred default).
|
|
121
|
+
*/
|
|
122
|
+
contentType?: string;
|
|
123
|
+
/**
|
|
124
|
+
* Relationship target path — e.g. `embeddings/Microsoft_Word_Document.docx`.
|
|
125
|
+
* Used as a weak signal when progId + contentType are both absent.
|
|
126
|
+
*/
|
|
127
|
+
targetPath?: string;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Classify an embedding. Fails closed: returns `"store-only"` when the
|
|
132
|
+
* classifier cannot prove `"digestible"`.
|
|
133
|
+
*/
|
|
134
|
+
export function classifyEmbedding(
|
|
135
|
+
input: ClassifyEmbeddingInput,
|
|
136
|
+
): EmbeddingKind {
|
|
137
|
+
const progId = input.progId?.trim() ?? "";
|
|
138
|
+
const contentType = input.contentType?.trim().toLowerCase() ?? "";
|
|
139
|
+
const targetPath = input.targetPath?.toLowerCase() ?? "";
|
|
140
|
+
|
|
141
|
+
// Exact ProgID match.
|
|
142
|
+
if (progId && STORE_ONLY_PROGID_EXACT.has(progId)) {
|
|
143
|
+
return "store-only";
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ProgID prefix match. Handles Word.Document.12, Word.Document.14,
|
|
147
|
+
// Word.DocumentMacroEnabled.12, Excel.Sheet.12, AcroExch.Document.7, etc.
|
|
148
|
+
if (progId) {
|
|
149
|
+
for (const prefix of STORE_ONLY_PROGID_PREFIXES) {
|
|
150
|
+
if (progId.startsWith(prefix)) {
|
|
151
|
+
return "store-only";
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Content-type override.
|
|
157
|
+
if (contentType) {
|
|
158
|
+
for (const ct of STORE_ONLY_CONTENT_TYPES) {
|
|
159
|
+
if (contentType === ct || contentType.startsWith(`${ct};`)) {
|
|
160
|
+
return "store-only";
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Target-path fallback when we have no progId + no content-type.
|
|
166
|
+
// A `.docx` / `.xlsx` / `.pptx` / `.pdf` extension in the embeddings
|
|
167
|
+
// folder is a strong signal of a package payload.
|
|
168
|
+
if (targetPath) {
|
|
169
|
+
const storeOnlyExtensions = [".docx", ".docm", ".dotx", ".dotm",
|
|
170
|
+
".xlsx", ".xlsm", ".xltx", ".xltm",
|
|
171
|
+
".pptx", ".pptm", ".potx", ".potm",
|
|
172
|
+
".pdf"];
|
|
173
|
+
for (const ext of storeOnlyExtensions) {
|
|
174
|
+
if (targetPath.endsWith(ext)) {
|
|
175
|
+
return "store-only";
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// No known-problematic signal matched — keep today's behavior.
|
|
181
|
+
// parseObject will construct an OleEmbedNode; render-path continues
|
|
182
|
+
// to show a placeholder for the embedding.
|
|
183
|
+
return "digestible";
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Exposed for tests. Not part of the public API.
|
|
188
|
+
*/
|
|
189
|
+
export const __internal = {
|
|
190
|
+
STORE_ONLY_PROGID_PREFIXES,
|
|
191
|
+
STORE_ONLY_PROGID_EXACT,
|
|
192
|
+
STORE_ONLY_CONTENT_TYPES,
|
|
193
|
+
} as const;
|
|
@@ -154,9 +154,13 @@ export function parseEndnotesXml(
|
|
|
154
154
|
}
|
|
155
155
|
}
|
|
156
156
|
|
|
157
|
+
const endnoteSeparators = parseFootnoteSeparators(xml);
|
|
158
|
+
|
|
157
159
|
return {
|
|
158
160
|
footnotes: existing?.footnotes ?? {},
|
|
159
161
|
endnotes,
|
|
162
|
+
...(existing?.footnoteSeparators ? { footnoteSeparators: existing.footnoteSeparators } : {}),
|
|
163
|
+
...(Object.keys(endnoteSeparators).length > 0 ? { endnoteSeparators } : {}),
|
|
160
164
|
};
|
|
161
165
|
}
|
|
162
166
|
|
|
@@ -174,7 +178,9 @@ export function parseFootnoteSeparators(xml: string): FootnoteSeparators {
|
|
|
174
178
|
if (!containerEl) return {};
|
|
175
179
|
|
|
176
180
|
let separatorContent: string | undefined;
|
|
181
|
+
let separatorParagraphXml: string | undefined;
|
|
177
182
|
let continuationSeparatorContent: string | undefined;
|
|
183
|
+
let continuationSeparatorParagraphXml: string | undefined;
|
|
178
184
|
|
|
179
185
|
for (const child of containerEl.children) {
|
|
180
186
|
if (child.type !== "element") continue;
|
|
@@ -186,6 +192,7 @@ export function parseFootnoteSeparators(xml: string): FootnoteSeparators {
|
|
|
186
192
|
|
|
187
193
|
const paraEl = findChildElementOptional(child, "p");
|
|
188
194
|
if (!paraEl) continue;
|
|
195
|
+
const paragraphXml = serializeElementToXml(paraEl);
|
|
189
196
|
|
|
190
197
|
const runXml = paraEl.children
|
|
191
198
|
.filter((c): c is XmlElementNode => c.type === "element" && localName(c.name) === "r")
|
|
@@ -194,14 +201,18 @@ export function parseFootnoteSeparators(xml: string): FootnoteSeparators {
|
|
|
194
201
|
|
|
195
202
|
if (rawType === "separator") {
|
|
196
203
|
separatorContent = runXml;
|
|
204
|
+
separatorParagraphXml = paragraphXml;
|
|
197
205
|
} else {
|
|
198
206
|
continuationSeparatorContent = runXml;
|
|
207
|
+
continuationSeparatorParagraphXml = paragraphXml;
|
|
199
208
|
}
|
|
200
209
|
}
|
|
201
210
|
|
|
202
211
|
return {
|
|
203
212
|
...(separatorContent !== undefined ? { separatorContent } : {}),
|
|
213
|
+
...(separatorParagraphXml !== undefined ? { separatorParagraphXml } : {}),
|
|
204
214
|
...(continuationSeparatorContent !== undefined ? { continuationSeparatorContent } : {}),
|
|
215
|
+
...(continuationSeparatorParagraphXml !== undefined ? { continuationSeparatorParagraphXml } : {}),
|
|
205
216
|
};
|
|
206
217
|
}
|
|
207
218
|
|