pptxtojson-pro 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc.cjs +15 -0
- package/.eslintignore +3 -0
- package/.eslintrc.cjs +78 -0
- package/LICENSE +21 -0
- package/README.md +294 -0
- package/favicon.ico +0 -0
- package/index.html +541 -0
- package/package.json +56 -0
- package/rollup.config.js +42 -0
- package/scripts/extract-pptx-structure.js +115 -0
- package/scripts/transvert.js +34 -0
- package/src/adapter/toPptxtojson.ts +46 -0
- package/src/adapter/types.ts +330 -0
- package/src/export/serializePresentation.ts +200 -0
- package/src/index.ts +27 -0
- package/src/model/Layout.ts +218 -0
- package/src/model/Master.ts +114 -0
- package/src/model/Presentation.ts +502 -0
- package/src/model/Slide.ts +386 -0
- package/src/model/Theme.ts +95 -0
- package/src/model/nodes/BaseNode.ts +169 -0
- package/src/model/nodes/ChartNode.ts +55 -0
- package/src/model/nodes/GroupNode.ts +62 -0
- package/src/model/nodes/PicNode.ts +102 -0
- package/src/model/nodes/ShapeNode.ts +289 -0
- package/src/model/nodes/TableNode.ts +135 -0
- package/src/parser/RelParser.ts +81 -0
- package/src/parser/XmlParser.ts +101 -0
- package/src/parser/ZipParser.ts +277 -0
- package/src/parser/units.ts +59 -0
- package/src/serializer/RenderContext.ts +79 -0
- package/src/serializer/StyleResolver.ts +821 -0
- package/src/serializer/backgroundSerializer.ts +143 -0
- package/src/serializer/borderMapper.ts +93 -0
- package/src/serializer/chartSerializer.ts +97 -0
- package/src/serializer/fillMapper.ts +224 -0
- package/src/serializer/groupSerializer.ts +94 -0
- package/src/serializer/imageSerializer.ts +330 -0
- package/src/serializer/index.ts +27 -0
- package/src/serializer/shapeSerializer.ts +694 -0
- package/src/serializer/slideSerializer.ts +250 -0
- package/src/serializer/tableSerializer.ts +66 -0
- package/src/serializer/textSerializer.md +70 -0
- package/src/serializer/textSerializer.ts +1019 -0
- package/src/shapes/customGeometry.ts +178 -0
- package/src/shapes/presets.ts +6587 -0
- package/src/shapes/shapeArc.ts +44 -0
- package/src/types/vendor-shims.d.ts +20 -0
- package/src/utils/color.ts +488 -0
- package/src/utils/emfParser.ts +298 -0
- package/src/utils/media.ts +73 -0
- package/src/utils/mediaWebConvert.ts +100 -0
- package/src/utils/rgbaToPng.ts +33 -0
- package/src/utils/urlSafety.ts +17 -0
- package/tsconfig.json +24 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EMF (Enhanced Metafile) binary parser — extracts embedded content from EMF files.
|
|
3
|
+
*
|
|
4
|
+
* PPTX files frequently embed EMF images as OLE object previews.
|
|
5
|
+
* Most contain embedded PDF data inside GDI comment records, or DIB bitmaps
|
|
6
|
+
* via STRETCHDIBITS records. This parser extracts those embedded resources
|
|
7
|
+
* without implementing full EMF record interpretation.
|
|
8
|
+
*
|
|
9
|
+
* EMF record format: each record is { type: u32, size: u32, ...data }
|
|
10
|
+
* Records are walked sequentially until EOF record (type 14).
|
|
11
|
+
*
|
|
12
|
+
* Bitmap output uses a plain RGBA buffer (no browser ImageData) so Node/bundlers work.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
export type RasterBitmap = {
|
|
16
|
+
width: number;
|
|
17
|
+
height: number;
|
|
18
|
+
/** RGBA, length width * height * 4 */
|
|
19
|
+
data: Uint8ClampedArray;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export type EmfContent =
|
|
23
|
+
| { type: 'pdf'; data: Uint8Array }
|
|
24
|
+
| { type: 'bitmap'; bitmap: RasterBitmap }
|
|
25
|
+
| { type: 'empty' }
|
|
26
|
+
| { type: 'unsupported' };
|
|
27
|
+
|
|
28
|
+
// EMF record types
|
|
29
|
+
const EMR_EOF = 14;
|
|
30
|
+
const EMR_COMMENT = 70;
|
|
31
|
+
const EMR_STRETCHDIBITS = 81;
|
|
32
|
+
|
|
33
|
+
// GDI comment identifiers (MS-EMF spec)
|
|
34
|
+
const GDIC_COMMENT_ID = 0x43494447; // "GDIC"
|
|
35
|
+
const GDIC_BEGINGROUP = 0x00000002;
|
|
36
|
+
const GDIC_MULTIFORMATS = 0x40000004;
|
|
37
|
+
|
|
38
|
+
// EMF header signature at offset 40
|
|
39
|
+
const EMF_SIGNATURE = 0x464d4520; // " EMF"
|
|
40
|
+
|
|
41
|
+
// PDF markers
|
|
42
|
+
const PDF_HEADER = [0x25, 0x50, 0x44, 0x46]; // "%PDF"
|
|
43
|
+
const PDF_EOF = [0x25, 0x25, 0x45, 0x4f, 0x46]; // "%%EOF"
|
|
44
|
+
|
|
45
|
+
// DIB compression
|
|
46
|
+
const BI_RGB = 0;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Parse an EMF file and extract its embedded content.
|
|
50
|
+
*/
|
|
51
|
+
export function parseEmfContent(data: Uint8Array): EmfContent {
|
|
52
|
+
if (data.length < 44) return { type: 'unsupported' };
|
|
53
|
+
|
|
54
|
+
const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
|
|
55
|
+
|
|
56
|
+
// Validate EMF signature at offset 40
|
|
57
|
+
if (view.getUint32(40, true) !== EMF_SIGNATURE) {
|
|
58
|
+
return { type: 'unsupported' };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
let offset = 0;
|
|
62
|
+
let recordCount = 0;
|
|
63
|
+
|
|
64
|
+
while (offset + 8 <= data.length) {
|
|
65
|
+
const recordType = view.getUint32(offset, true);
|
|
66
|
+
const recordSize = view.getUint32(offset + 4, true);
|
|
67
|
+
|
|
68
|
+
// Sanity check record size
|
|
69
|
+
if (recordSize < 8 || offset + recordSize > data.length) break;
|
|
70
|
+
|
|
71
|
+
recordCount++;
|
|
72
|
+
|
|
73
|
+
if (recordType === EMR_EOF) break;
|
|
74
|
+
|
|
75
|
+
// Check GDI Comment records for embedded PDF
|
|
76
|
+
if (recordType === EMR_COMMENT && recordSize > 16) {
|
|
77
|
+
const result = parseGdiComment(data, view, offset, recordSize);
|
|
78
|
+
if (result) return result;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Check STRETCHDIBITS for embedded bitmaps
|
|
82
|
+
if (recordType === EMR_STRETCHDIBITS && recordSize > 80) {
|
|
83
|
+
const result = parseStretchDibits(data, view, offset, recordSize);
|
|
84
|
+
if (result) return result;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
offset += recordSize;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Only HEADER + EOF → empty
|
|
91
|
+
if (recordCount <= 2) {
|
|
92
|
+
return { type: 'empty' };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return { type: 'unsupported' };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Parse a GDI Comment record looking for embedded PDF data.
|
|
100
|
+
*/
|
|
101
|
+
function parseGdiComment(
|
|
102
|
+
data: Uint8Array,
|
|
103
|
+
view: DataView,
|
|
104
|
+
offset: number,
|
|
105
|
+
recordSize: number,
|
|
106
|
+
): EmfContent | null {
|
|
107
|
+
// Record layout: type(4) + size(4) + cbData(4) + commentId(4) + ...
|
|
108
|
+
if (offset + 16 > data.length) return null;
|
|
109
|
+
|
|
110
|
+
const commentId = view.getUint32(offset + 12, true);
|
|
111
|
+
|
|
112
|
+
if (commentId === GDIC_COMMENT_ID && offset + 20 <= data.length) {
|
|
113
|
+
const publicType = view.getUint32(offset + 16, true);
|
|
114
|
+
|
|
115
|
+
if (publicType === GDIC_BEGINGROUP) {
|
|
116
|
+
// Search for %PDF signature in the record data
|
|
117
|
+
const recordData = data.subarray(offset + 8, offset + recordSize);
|
|
118
|
+
const pdf = extractPdfFromBuffer(recordData);
|
|
119
|
+
if (pdf) return { type: 'pdf', data: pdf };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (publicType === GDIC_MULTIFORMATS && offset + 24 <= data.length) {
|
|
123
|
+
// MULTIFORMATS: parse format descriptors and extract first usable one
|
|
124
|
+
const result = parseMultiformats(data, view, offset, recordSize);
|
|
125
|
+
if (result) return result;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Also search non-GDIC comments for raw PDF data
|
|
130
|
+
if (recordSize > 100) {
|
|
131
|
+
const recordData = data.subarray(offset + 8, offset + recordSize);
|
|
132
|
+
const pdf = extractPdfFromBuffer(recordData);
|
|
133
|
+
if (pdf) return { type: 'pdf', data: pdf };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Parse MULTIFORMATS GDI comment — contains format descriptors pointing to embedded data.
|
|
141
|
+
*/
|
|
142
|
+
function parseMultiformats(
|
|
143
|
+
data: Uint8Array,
|
|
144
|
+
view: DataView,
|
|
145
|
+
offset: number,
|
|
146
|
+
_recordSize: number,
|
|
147
|
+
): EmfContent | null {
|
|
148
|
+
// Layout from record start:
|
|
149
|
+
// +12: commentIdentifier(4), +16: publicCommentIdentifier(4)
|
|
150
|
+
// +20: outputRect(16 = RECTL)
|
|
151
|
+
// +36: countFormats(4)
|
|
152
|
+
// +40: format descriptors array, each: { signature(4), version(4), cbData(4), offData(4) }
|
|
153
|
+
if (offset + 40 > data.length) return null;
|
|
154
|
+
|
|
155
|
+
const countFormats = view.getUint32(offset + 36, true);
|
|
156
|
+
const descriptorStart = offset + 40;
|
|
157
|
+
|
|
158
|
+
for (let i = 0; i < countFormats && i < 10; i++) {
|
|
159
|
+
const descOff = descriptorStart + i * 16;
|
|
160
|
+
if (descOff + 16 > data.length) break;
|
|
161
|
+
|
|
162
|
+
const cbData = view.getUint32(descOff + 8, true);
|
|
163
|
+
const offData = view.getUint32(descOff + 12, true);
|
|
164
|
+
|
|
165
|
+
// offData is relative to the start of the record
|
|
166
|
+
const dataStart = offset + offData;
|
|
167
|
+
if (dataStart + cbData > data.length || cbData === 0) continue;
|
|
168
|
+
|
|
169
|
+
const formatData = data.subarray(dataStart, dataStart + cbData);
|
|
170
|
+
const pdf = extractPdfFromBuffer(formatData);
|
|
171
|
+
if (pdf) return { type: 'pdf', data: pdf };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Search for %PDF...%%EOF in a buffer and extract the PDF bytes.
|
|
179
|
+
*/
|
|
180
|
+
function extractPdfFromBuffer(buf: Uint8Array): Uint8Array | null {
|
|
181
|
+
const pdfStart = findSequence(buf, PDF_HEADER);
|
|
182
|
+
if (pdfStart === -1) return null;
|
|
183
|
+
|
|
184
|
+
// Search for %%EOF from the end (PDF may have multiple %%EOF; take the last one)
|
|
185
|
+
let pdfEnd = -1;
|
|
186
|
+
for (let i = buf.length - PDF_EOF.length; i >= pdfStart; i--) {
|
|
187
|
+
if (matchesAt(buf, i, PDF_EOF)) {
|
|
188
|
+
pdfEnd = i + PDF_EOF.length;
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (pdfEnd === -1) {
|
|
194
|
+
// No %%EOF found — take everything from %PDF to end of buffer
|
|
195
|
+
pdfEnd = buf.length;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return buf.slice(pdfStart, pdfEnd);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Parse a STRETCHDIBITS record and extract the bitmap as RGBA buffer.
|
|
203
|
+
*/
|
|
204
|
+
function parseStretchDibits(
|
|
205
|
+
data: Uint8Array,
|
|
206
|
+
view: DataView,
|
|
207
|
+
offset: number,
|
|
208
|
+
_recordSize: number,
|
|
209
|
+
): EmfContent | null {
|
|
210
|
+
// STRETCHDIBITS record layout (offsets from record start):
|
|
211
|
+
// 0: type(4), 4: size(4)
|
|
212
|
+
// 8: rclBounds (16 bytes)
|
|
213
|
+
// 24: xDest(4), 28: yDest(4)
|
|
214
|
+
// 32: xSrc(4), 36: ySrc(4)
|
|
215
|
+
// 40: cxSrc(4), 44: cySrc(4)
|
|
216
|
+
// 48: offBmiSrc(4), 52: cbBmiSrc(4)
|
|
217
|
+
// 56: offBitsSrc(4), 60: cbBitsSrc(4)
|
|
218
|
+
// 64: iUsageSrc(4), 68: dwRop(4)
|
|
219
|
+
// 72: cxDest(4), 76: cyDest(4)
|
|
220
|
+
if (offset + 80 > data.length) return null;
|
|
221
|
+
|
|
222
|
+
const offBmiSrc = view.getUint32(offset + 48, true);
|
|
223
|
+
const cbBmiSrc = view.getUint32(offset + 52, true);
|
|
224
|
+
const offBitsSrc = view.getUint32(offset + 56, true);
|
|
225
|
+
const cbBitsSrc = view.getUint32(offset + 60, true);
|
|
226
|
+
|
|
227
|
+
if (cbBmiSrc === 0 || cbBitsSrc === 0) return null;
|
|
228
|
+
|
|
229
|
+
const bmiStart = offset + offBmiSrc;
|
|
230
|
+
if (bmiStart + 40 > data.length) return null;
|
|
231
|
+
|
|
232
|
+
// Parse BITMAPINFOHEADER
|
|
233
|
+
const biWidth = view.getInt32(bmiStart + 4, true);
|
|
234
|
+
const biHeight = view.getInt32(bmiStart + 8, true);
|
|
235
|
+
const biBitCount = view.getUint16(bmiStart + 14, true);
|
|
236
|
+
const biCompression = view.getUint32(bmiStart + 16, true);
|
|
237
|
+
|
|
238
|
+
// Only support uncompressed RGB bitmaps
|
|
239
|
+
if (biCompression !== BI_RGB) return null;
|
|
240
|
+
if (biBitCount !== 24 && biBitCount !== 32) return null;
|
|
241
|
+
|
|
242
|
+
const width = Math.abs(biWidth);
|
|
243
|
+
const height = Math.abs(biHeight);
|
|
244
|
+
if (width === 0 || height === 0 || width > 8192 || height > 8192) return null;
|
|
245
|
+
|
|
246
|
+
const bitsStart = offset + offBitsSrc;
|
|
247
|
+
if (bitsStart + cbBitsSrc > data.length) return null;
|
|
248
|
+
|
|
249
|
+
const bitsData = data.subarray(bitsStart, bitsStart + cbBitsSrc);
|
|
250
|
+
|
|
251
|
+
// Negative height means top-down row order; positive means bottom-up
|
|
252
|
+
const topDown = biHeight < 0;
|
|
253
|
+
|
|
254
|
+
const pixels = new Uint8ClampedArray(width * height * 4);
|
|
255
|
+
const bytesPerPixel = biBitCount / 8;
|
|
256
|
+
// DIB rows are padded to 4-byte boundaries
|
|
257
|
+
const rowStride = Math.ceil((width * bytesPerPixel) / 4) * 4;
|
|
258
|
+
|
|
259
|
+
for (let y = 0; y < height; y++) {
|
|
260
|
+
const srcRow = topDown ? y : height - 1 - y;
|
|
261
|
+
const srcOffset = srcRow * rowStride;
|
|
262
|
+
const dstOffset = y * width * 4;
|
|
263
|
+
|
|
264
|
+
for (let x = 0; x < width; x++) {
|
|
265
|
+
const srcIdx = srcOffset + x * bytesPerPixel;
|
|
266
|
+
if (srcIdx + bytesPerPixel > bitsData.length) break;
|
|
267
|
+
|
|
268
|
+
// DIB stores BGR(A)
|
|
269
|
+
pixels[dstOffset + x * 4 + 0] = bitsData[srcIdx + 2]; // R
|
|
270
|
+
pixels[dstOffset + x * 4 + 1] = bitsData[srcIdx + 1]; // G
|
|
271
|
+
pixels[dstOffset + x * 4 + 2] = bitsData[srcIdx + 0]; // B
|
|
272
|
+
pixels[dstOffset + x * 4 + 3] = biBitCount === 32 ? bitsData[srcIdx + 3] : 255;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
return { type: 'bitmap', bitmap: { width, height, data: pixels } };
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Find the first occurrence of a byte sequence in a buffer.
|
|
281
|
+
*/
|
|
282
|
+
function findSequence(buf: Uint8Array, seq: number[]): number {
|
|
283
|
+
const end = buf.length - seq.length;
|
|
284
|
+
for (let i = 0; i <= end; i++) {
|
|
285
|
+
if (matchesAt(buf, i, seq)) return i;
|
|
286
|
+
}
|
|
287
|
+
return -1;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* Check if buffer matches a byte sequence at a given offset.
|
|
292
|
+
*/
|
|
293
|
+
function matchesAt(buf: Uint8Array, offset: number, seq: number[]): boolean {
|
|
294
|
+
for (let j = 0; j < seq.length; j++) {
|
|
295
|
+
if (buf[offset + j] !== seq[j]) return false;
|
|
296
|
+
}
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Media utilities — MIME type detection, path resolution, and blob URL management.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Determine MIME type from file extension.
|
|
7
|
+
* Covers images, video, and audio formats used in PPTX files.
|
|
8
|
+
*/
|
|
9
|
+
export function getMimeType(path: string): string {
|
|
10
|
+
const ext = path.split('.').pop()?.toLowerCase() || '';
|
|
11
|
+
const mimeMap: Record<string, string> = {
|
|
12
|
+
png: 'image/png',
|
|
13
|
+
jpg: 'image/jpeg',
|
|
14
|
+
jpeg: 'image/jpeg',
|
|
15
|
+
gif: 'image/gif',
|
|
16
|
+
svg: 'image/svg+xml',
|
|
17
|
+
bmp: 'image/bmp',
|
|
18
|
+
tiff: 'image/tiff',
|
|
19
|
+
tif: 'image/tiff',
|
|
20
|
+
emf: 'image/x-emf',
|
|
21
|
+
wmf: 'image/x-wmf',
|
|
22
|
+
webp: 'image/webp',
|
|
23
|
+
mp4: 'video/mp4',
|
|
24
|
+
m4v: 'video/mp4',
|
|
25
|
+
webm: 'video/webm',
|
|
26
|
+
avi: 'video/x-msvideo',
|
|
27
|
+
mp3: 'audio/mpeg',
|
|
28
|
+
wav: 'audio/wav',
|
|
29
|
+
m4a: 'audio/mp4',
|
|
30
|
+
ogg: 'audio/ogg',
|
|
31
|
+
};
|
|
32
|
+
return mimeMap[ext] || 'application/octet-stream';
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Resolve a relative media path (from rels) to its canonical path in PptxFiles.media.
|
|
37
|
+
* Rels targets are relative like "../media/image1.png".
|
|
38
|
+
* Media paths in PptxFiles are like "ppt/media/image1.png".
|
|
39
|
+
*/
|
|
40
|
+
export function resolveMediaPath(target: string): string {
|
|
41
|
+
const fileName = target.split('/').pop() || '';
|
|
42
|
+
return `ppt/media/${fileName}`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Get or create a blob URL for a media file, using a cache to avoid duplicates.
|
|
47
|
+
*
|
|
48
|
+
* @param mediaPath - Canonical path (e.g. "ppt/media/image1.png")
|
|
49
|
+
* @param data - Raw media data (Uint8Array or ArrayBuffer)
|
|
50
|
+
* @param cache - Map to store/retrieve cached blob URLs
|
|
51
|
+
* @returns The blob URL string
|
|
52
|
+
*/
|
|
53
|
+
export function getOrCreateBlobUrl(
|
|
54
|
+
mediaPath: string,
|
|
55
|
+
data: Uint8Array | ArrayBuffer,
|
|
56
|
+
cache: Map<string, string>,
|
|
57
|
+
): string {
|
|
58
|
+
let url = cache.get(mediaPath);
|
|
59
|
+
if (!url) {
|
|
60
|
+
const mime = getMimeType(mediaPath);
|
|
61
|
+
const blob = new Blob([data as unknown as BlobPart], { type: mime });
|
|
62
|
+
url = URL.createObjectURL(blob);
|
|
63
|
+
cache.set(mediaPath, url);
|
|
64
|
+
}
|
|
65
|
+
return url;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Build a data URL string from raw base64 and MIME.
|
|
70
|
+
*/
|
|
71
|
+
export function toDataUrl(base64: string, mime: string): string {
|
|
72
|
+
return `data:${mime};base64,${base64}`;
|
|
73
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Convert legacy / non-web-safe image bytes (TIFF, EMF bitmap, etc.) to PNG data URLs
|
|
3
|
+
* so JSON output works in browsers (PPTist). Mirrors pptx-renderer ImageRenderer strategy.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import UTIF from 'utif';
|
|
7
|
+
import { parseEmfContent } from './emfParser';
|
|
8
|
+
import { rgbaToPngDataUrl } from './rgbaToPng';
|
|
9
|
+
import { getMimeType, toDataUrl } from './media';
|
|
10
|
+
|
|
11
|
+
type UtifPage = {
|
|
12
|
+
width: number;
|
|
13
|
+
height: number;
|
|
14
|
+
data?: Uint8Array;
|
|
15
|
+
[key: string]: unknown;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
function arrayBufferToBase64(data: Uint8Array): string {
|
|
19
|
+
let binary = '';
|
|
20
|
+
const len = data.byteLength;
|
|
21
|
+
for (let i = 0; i < len; i++) {
|
|
22
|
+
binary += String.fromCharCode(data[i]);
|
|
23
|
+
}
|
|
24
|
+
if (typeof btoa !== 'undefined') return btoa(binary);
|
|
25
|
+
const NodeBuffer = (typeof globalThis !== 'undefined' &&
|
|
26
|
+
(globalThis as unknown as { Buffer?: { from(a: Uint8Array): { toString(e: string): string } } }).Buffer);
|
|
27
|
+
if (NodeBuffer) return NodeBuffer.from(data).toString('base64');
|
|
28
|
+
return btoa(binary);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function extOf(path: string): string {
|
|
32
|
+
return path.split('.').pop()?.toLowerCase() || '';
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Decode TIFF/TIF bytes to RGBA using UTIF.
|
|
37
|
+
*/
|
|
38
|
+
function tiffToRgba(data: Uint8Array): { width: number; height: number; data: Uint8ClampedArray } | null {
|
|
39
|
+
try {
|
|
40
|
+
const ifds = UTIF.decode(data) as UtifPage[];
|
|
41
|
+
if (!ifds.length) return null;
|
|
42
|
+
UTIF.decodeImage(data, ifds[0], ifds);
|
|
43
|
+
const page = ifds[0];
|
|
44
|
+
const w = page.width;
|
|
45
|
+
const h = page.height;
|
|
46
|
+
if (!w || !h) return null;
|
|
47
|
+
const rgba = UTIF.toRGBA8(page);
|
|
48
|
+
return { width: w, height: h, data: new Uint8ClampedArray(rgba) };
|
|
49
|
+
} catch {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** 1×1 transparent PNG — fallback when conversion is not possible */
|
|
55
|
+
const TRANSPARENT_PNG_DATA_URL =
|
|
56
|
+
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==';
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* If the media type is not reliably displayable in browsers, convert to PNG data URL.
|
|
60
|
+
* Otherwise return `data:<mime>;base64,<raw>` for the original bytes.
|
|
61
|
+
*
|
|
62
|
+
* - TIFF/TIF: decode → PNG
|
|
63
|
+
* - EMF: embedded DIB → PNG; embedded PDF → transparent placeholder (full PDF render needs pdfjs)
|
|
64
|
+
* - WMF: not supported → transparent placeholder
|
|
65
|
+
* - PNG/JPEG/GIF/WebP/BMP/SVG: pass through with correct MIME
|
|
66
|
+
*/
|
|
67
|
+
export function encodeMediaForWebDisplay(mediaPath: string, data: Uint8Array): string {
|
|
68
|
+
const ext = extOf(mediaPath);
|
|
69
|
+
|
|
70
|
+
if (ext === 'tif' || ext === 'tiff') {
|
|
71
|
+
const rgba = tiffToRgba(data);
|
|
72
|
+
if (rgba) {
|
|
73
|
+
return rgbaToPngDataUrl(rgba.data, rgba.width, rgba.height);
|
|
74
|
+
}
|
|
75
|
+
return TRANSPARENT_PNG_DATA_URL;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (ext === 'emf') {
|
|
79
|
+
const content = parseEmfContent(data);
|
|
80
|
+
if (content.type === 'bitmap' && content.bitmap) {
|
|
81
|
+
const { width, height, data: rgba } = content.bitmap;
|
|
82
|
+
return rgbaToPngDataUrl(rgba, width, height);
|
|
83
|
+
}
|
|
84
|
+
if (content.type === 'pdf') {
|
|
85
|
+
// Optional: integrate pdfjs-dist later (see pptx-renderer pdfRenderer.ts)
|
|
86
|
+
return TRANSPARENT_PNG_DATA_URL;
|
|
87
|
+
}
|
|
88
|
+
if (content.type === 'empty') {
|
|
89
|
+
return TRANSPARENT_PNG_DATA_URL;
|
|
90
|
+
}
|
|
91
|
+
return TRANSPARENT_PNG_DATA_URL;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (ext === 'wmf') {
|
|
95
|
+
return TRANSPARENT_PNG_DATA_URL;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const mime = getMimeType(mediaPath);
|
|
99
|
+
return toDataUrl(arrayBufferToBase64(data), mime);
|
|
100
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Encode RGBA pixel buffer to PNG data URL (browser + Node).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { PNG } from 'pngjs';
|
|
6
|
+
|
|
7
|
+
function uint8ToBase64(u8: Uint8Array): string {
|
|
8
|
+
let binary = '';
|
|
9
|
+
for (let i = 0; i < u8.length; i++) {
|
|
10
|
+
binary += String.fromCharCode(u8[i]);
|
|
11
|
+
}
|
|
12
|
+
if (typeof btoa !== 'undefined') return btoa(binary);
|
|
13
|
+
const NodeBuffer = (typeof globalThis !== 'undefined' &&
|
|
14
|
+
(globalThis as unknown as { Buffer?: { from(a: Uint8Array): { toString(e: string): string } } }).Buffer);
|
|
15
|
+
if (NodeBuffer) return NodeBuffer.from(u8).toString('base64');
|
|
16
|
+
return btoa(binary);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Encode RGBA (length = width * height * 4) to PNG data URL.
|
|
21
|
+
*/
|
|
22
|
+
export function rgbaToPngDataUrl(
|
|
23
|
+
rgba: Uint8Array | Uint8ClampedArray,
|
|
24
|
+
width: number,
|
|
25
|
+
height: number,
|
|
26
|
+
): string {
|
|
27
|
+
const png = new PNG({ width, height });
|
|
28
|
+
png.data.set(rgba);
|
|
29
|
+
const buf = PNG.sync.write(png) as unknown as Uint8Array;
|
|
30
|
+
const u8 = buf instanceof Uint8Array ? buf : new Uint8Array(buf as ArrayLike<number>);
|
|
31
|
+
const b64 = uint8ToBase64(u8);
|
|
32
|
+
return `data:image/png;base64,${b64}`;
|
|
33
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL safety utilities for external hyperlinks/media in untrusted PPTX content.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const ALLOWED_PROTOCOLS = new Set(['http:', 'https:', 'mailto:']);
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Returns true only for absolute URLs with an allowed protocol.
|
|
9
|
+
*/
|
|
10
|
+
export function isAllowedExternalUrl(url: string): boolean {
|
|
11
|
+
try {
|
|
12
|
+
const parsed = new URL(url);
|
|
13
|
+
return ALLOWED_PROTOCOLS.has(parsed.protocol.toLowerCase());
|
|
14
|
+
} catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"useDefineForClassFields": true,
|
|
5
|
+
"module": "ESNext",
|
|
6
|
+
"lib": ["ES2022", "DOM", "DOM.Iterable"],
|
|
7
|
+
"skipLibCheck": true,
|
|
8
|
+
"outDir": "dist",
|
|
9
|
+
"rootDir": "src",
|
|
10
|
+
"declaration": true,
|
|
11
|
+
"declarationMap": true,
|
|
12
|
+
"moduleResolution": "bundler",
|
|
13
|
+
"strict": true,
|
|
14
|
+
"noUnusedLocals": false,
|
|
15
|
+
"noUnusedParameters": false,
|
|
16
|
+
"noFallthroughCasesInSwitch": true,
|
|
17
|
+
"esModuleInterop": true,
|
|
18
|
+
"allowSyntheticDefaultImports": true,
|
|
19
|
+
"resolveJsonModule": true,
|
|
20
|
+
"isolatedModules": true
|
|
21
|
+
},
|
|
22
|
+
"include": ["src"],
|
|
23
|
+
"exclude": ["node_modules", "dist", "src1"]
|
|
24
|
+
}
|