file2md 1.1.9 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/index.d.ts +1 -1
  2. package/dist/index.d.ts.map +1 -1
  3. package/dist/index.js +52 -6
  4. package/dist/index.js.map +1 -1
  5. package/dist/parsers/hwp-parser.d.ts +20 -0
  6. package/dist/parsers/hwp-parser.d.ts.map +1 -0
  7. package/dist/parsers/hwp-parser.js +474 -0
  8. package/dist/parsers/hwp-parser.js.map +1 -0
  9. package/dist/parsers/pptx-parser.d.ts +3 -4
  10. package/dist/parsers/pptx-parser.d.ts.map +1 -1
  11. package/dist/parsers/pptx-parser.js +94 -81
  12. package/dist/parsers/pptx-parser.js.map +1 -1
  13. package/dist/types/interfaces.d.ts +2 -0
  14. package/dist/types/interfaces.d.ts.map +1 -1
  15. package/dist/types/interfaces.js +3 -1
  16. package/dist/types/interfaces.js.map +1 -1
  17. package/dist/utils/libreoffice-converter.d.ts +33 -0
  18. package/dist/utils/libreoffice-converter.d.ts.map +1 -0
  19. package/dist/utils/libreoffice-converter.js +169 -0
  20. package/dist/utils/libreoffice-converter.js.map +1 -0
  21. package/dist/utils/libreoffice-detector.d.ts +57 -0
  22. package/dist/utils/libreoffice-detector.d.ts.map +1 -0
  23. package/dist/utils/libreoffice-detector.js +295 -0
  24. package/dist/utils/libreoffice-detector.js.map +1 -0
  25. package/dist/utils/pptx-visual-parser.d.ts +190 -0
  26. package/dist/utils/pptx-visual-parser.d.ts.map +1 -0
  27. package/dist/utils/pptx-visual-parser.js +648 -0
  28. package/dist/utils/pptx-visual-parser.js.map +1 -0
  29. package/package.json +11 -7
  30. package/dist/utils/slide-renderer.d.ts +0 -91
  31. package/dist/utils/slide-renderer.d.ts.map +0 -1
  32. package/dist/utils/slide-renderer.js +0 -540
  33. package/dist/utils/slide-renderer.js.map +0 -1
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "file2md",
3
- "version": "1.1.9",
4
- "description": "A TypeScript library for converting various document types (PDF, DOCX, XLSX, PPTX) into Markdown with image and layout preservation",
3
+ "version": "1.2.0",
4
+ "description": "A TypeScript library for converting various document types (PDF, DOCX, XLSX, PPTX, HWP, HWPX) into Markdown with image and layout preservation",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
7
  "type": "module",
@@ -24,6 +24,10 @@
24
24
  "docx",
25
25
  "xlsx",
26
26
  "pptx",
27
+ "hwp",
28
+ "hwpx",
29
+ "korean",
30
+ "hangul",
27
31
  "document",
28
32
  "typescript",
29
33
  "layout-preservation",
@@ -32,17 +36,15 @@
32
36
  "author": "",
33
37
  "license": "MIT",
34
38
  "dependencies": {
35
- "canvas": "^2.11.2",
39
+ "fast-xml-parser": "^4.3.2",
36
40
  "file-type": "^16.5.4",
41
+ "hwp.js": "^0.0.3",
42
+ "jsdom": "^23.0.0",
37
43
  "jszip": "^3.10.1",
38
- "libreoffice-convert": "^1.6.1",
39
44
  "pdf-parse": "^1.1.1",
40
45
  "pdf2pic": "^2.1.4",
41
46
  "xml2js": "^0.6.2"
42
47
  },
43
- "optionalDependencies": {
44
- "@canvas/noto-fonts": "^1.0.0"
45
- },
46
48
  "engines": {
47
49
  "node": ">=18.0.0"
48
50
  },
@@ -52,10 +54,12 @@
52
54
  "@types/node": "^20.0.0",
53
55
  "@types/pdf-parse": "^1.1.5",
54
56
  "@types/xml2js": "^0.4.14",
57
+ "@types/jsdom": "^21.1.0",
55
58
  "@typescript-eslint/eslint-plugin": "^6.0.0",
56
59
  "@typescript-eslint/parser": "^6.0.0",
57
60
  "eslint": "^8.50.0",
58
61
  "jest": "^29.7.0",
62
+ "jest-image-snapshot": "^6.2.0",
59
63
  "rimraf": "^5.0.0",
60
64
  "ts-jest": "^29.1.0",
61
65
  "ts-node": "^10.9.0",
@@ -1,91 +0,0 @@
1
- import { Buffer } from 'node:buffer';
2
- import type { ImageData } from '../types/interfaces.js';
3
- export interface SlideRenderOptions {
4
- readonly quality?: number;
5
- readonly density?: number;
6
- readonly format?: 'png' | 'jpg';
7
- readonly saveBase64?: boolean;
8
- }
9
- export interface SlideRenderResult {
10
- readonly slideImages: readonly ImageData[];
11
- readonly slideCount: number;
12
- readonly metadata: {
13
- readonly format: string;
14
- readonly quality: number;
15
- readonly density: number;
16
- };
17
- }
18
- export declare class SlideRenderer {
19
- private readonly outputDir;
20
- constructor(outputDir: string);
21
- /**
22
- * Convert PPTX buffer to individual slide images
23
- */
24
- renderSlidesToImages(pptxBuffer: Buffer, options?: SlideRenderOptions): Promise<SlideRenderResult>;
25
- /**
26
- * Convert PPTX buffer to PDF buffer using multiple methods
27
- */
28
- private convertPptxToPdf;
29
- /**
30
- * Create slide images without LibreOffice using direct PPTX parsing
31
- */
32
- private createAlternativeSlideImages;
33
- private generatedSlideImages;
34
- /**
35
- * Render a single slide to image using canvas with multi-language font support
36
- */
37
- private renderSlideToImage;
38
- /**
39
- * Register system fonts and fallback fonts for international character support
40
- */
41
- private registerFontsForCanvas;
42
- /**
43
- * Get font family name from font path
44
- */
45
- private getFontFamily;
46
- /**
47
- * Get universal font string with fallbacks for international characters
48
- */
49
- private getUniversalFont;
50
- /**
51
- * Wrap text to fit within specified width
52
- */
53
- private wrapText;
54
- /**
55
- * Create a text-based slide image when canvas is not available
56
- */
57
- private createTextBasedSlideImage;
58
- /**
59
- * Create SVG representation of slide content
60
- */
61
- private createSVGSlideImage;
62
- /**
63
- * Convert SVG to PNG buffer
64
- */
65
- private convertSVGToPNG;
66
- /**
67
- * Extract text content from slide XML data
68
- */
69
- private extractSlideText;
70
- /**
71
- * Create placeholder slide image for failed conversions
72
- */
73
- private createPlaceholderSlideImage;
74
- /**
75
- * Convert PDF buffer to individual slide images using pdf2pic or return pre-generated images
76
- */
77
- private convertPdfToSlideImages;
78
- /**
79
- * Generate markdown with slide images
80
- */
81
- generateSlideMarkdown(slideImages: readonly ImageData[], title?: string): string;
82
- /**
83
- * Clean up generated image files
84
- */
85
- cleanup(): Promise<void>;
86
- /**
87
- * Check if LibreOffice is available on the system
88
- */
89
- static checkLibreOfficeAvailability(): Promise<boolean>;
90
- }
91
- //# sourceMappingURL=slide-renderer.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"slide-renderer.d.ts","sourceRoot":"","sources":["../../src/utils/slide-renderer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAMrC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAKxD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IAChC,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,WAAW,EAAE,SAAS,SAAS,EAAE,CAAC;IAC3C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,QAAQ,EAAE;QACjB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;QACxB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;QACzB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;KAC1B,CAAC;CACH;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,SAAS,EAAE,MAAM;IAI7B;;OAEG;IACG,oBAAoB,CACxB,UAAU,EAAE,MAAM,EAClB,OAAO,GAAE,kBAAuB,GAC/B,OAAO,CAAC,iBAAiB,CAAC;IAgD7B;;OAEG;YACW,gBAAgB;IAoB9B;;OAEG;YACW,4BAA4B;IAkF1C,OAAO,CAAC,oBAAoB,CAAmB;IAE/C;;OAEG;YACW,kBAAkB;IA8DhC;;OAEG;YACW,sBAAsB;IA2CpC;;OAEG;IACH,OAAO,CAAC,aAAa;IAarB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAKxB;;OAEG;IACH,OAAO,CAAC,QAAQ;IAwBhB;;OAEG;YACW,yBAAyB;IAmBvC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAsC3B;;OAEG;YACW,eAAe;IAwB7B;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAwCxB;;OAEG;YACW,2BAA2B;IAQzC;;OAEG;YACW,uBAAuB;IA0ErC;;OAEG;IACH,qBAAqB,CAAC,WAAW,EAAE,SAAS,SAAS,EAAE,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM;IAuBhF;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAgB9B;;OAEG;WACU,4BAA4B,IAAI,OAAO,CAAC,OAAO,CAAC;CAU9D"}
@@ -1,540 +0,0 @@
1
- import path from 'node:path';
2
- import fs from 'node:fs/promises';
3
- import { Buffer } from 'node:buffer';
4
- import libre from 'libreoffice-convert';
5
- import { fromBuffer } from 'pdf2pic';
6
- import { promisify } from 'node:util';
7
- import { ParseError } from '../types/errors.js';
8
- // Promisify libreoffice-convert
9
- const convertAsync = promisify(libre.convert);
10
- export class SlideRenderer {
11
- outputDir;
12
- constructor(outputDir) {
13
- this.outputDir = outputDir;
14
- }
15
- /**
16
- * Convert PPTX buffer to individual slide images
17
- */
18
- async renderSlidesToImages(pptxBuffer, options = {}) {
19
- const { quality = 90, density = 150, format = 'png', saveBase64 = false } = options;
20
- try {
21
- // Ensure output directory exists
22
- await fs.mkdir(this.outputDir, { recursive: true });
23
- console.log('Created slide output directory:', this.outputDir);
24
- // Step 1: Convert PPTX to PDF using LibreOffice
25
- console.log('Converting PPTX to PDF...');
26
- const pdfBuffer = await this.convertPptxToPdf(pptxBuffer);
27
- console.log('PPTX to PDF conversion successful, PDF size:', pdfBuffer.length);
28
- // Step 2: Convert PDF to individual slide images
29
- console.log('Converting PDF to slide images...');
30
- const slideImages = await this.convertPdfToSlideImages(pdfBuffer, { quality, density, format, saveBase64 });
31
- console.log(`Generated ${slideImages.length} slide images`);
32
- // Verify images were actually created
33
- for (const slide of slideImages) {
34
- const exists = await fs.access(slide.savedPath).then(() => true).catch(() => false);
35
- console.log(`Slide image ${slide.savedPath} exists:`, exists);
36
- }
37
- return {
38
- slideImages,
39
- slideCount: slideImages.length,
40
- metadata: {
41
- format,
42
- quality,
43
- density
44
- }
45
- };
46
- }
47
- catch (error) {
48
- const message = error instanceof Error ? error.message : 'Unknown error';
49
- console.error('SlideRenderer error:', message);
50
- throw new ParseError('SlideRenderer', `Failed to render slides: ${message}`, error);
51
- }
52
- }
53
- /**
54
- * Convert PPTX buffer to PDF buffer using multiple methods
55
- */
56
- async convertPptxToPdf(pptxBuffer) {
57
- // Try LibreOffice first
58
- try {
59
- console.log('Trying LibreOffice conversion...');
60
- const pdfBuffer = await convertAsync(pptxBuffer, '.pdf', undefined);
61
- if (pdfBuffer && pdfBuffer.length > 0) {
62
- console.log('LibreOffice conversion successful, PDF size:', pdfBuffer.length);
63
- return pdfBuffer;
64
- }
65
- }
66
- catch (libreOfficeError) {
67
- const message = libreOfficeError instanceof Error ? libreOfficeError.message : 'Unknown error';
68
- console.log('LibreOffice conversion failed:', message);
69
- }
70
- // LibreOffice failed, try alternative approach
71
- console.log('Attempting alternative slide screenshot generation...');
72
- return await this.createAlternativeSlideImages(pptxBuffer);
73
- }
74
- /**
75
- * Create slide images without LibreOffice using direct PPTX parsing
76
- */
77
- async createAlternativeSlideImages(pptxBuffer) {
78
- try {
79
- // Import required modules dynamically
80
- const JSZip = (await import('jszip')).default;
81
- const { parseStringPromise } = await import('xml2js');
82
- // Parse PPTX to get slide information
83
- const zip = await JSZip.loadAsync(pptxBuffer);
84
- // Get slide files
85
- const slideFiles = [];
86
- zip.forEach((relativePath, file) => {
87
- if (relativePath.startsWith('ppt/slides/slide') && relativePath.endsWith('.xml')) {
88
- slideFiles.push({
89
- path: relativePath,
90
- file: file,
91
- slideNumber: parseInt(relativePath.match(/slide(\d+)\.xml/)?.[1] || '0')
92
- });
93
- }
94
- });
95
- slideFiles.sort((a, b) => a.slideNumber - b.slideNumber);
96
- console.log(`Found ${slideFiles.length} slides to convert`);
97
- // Create individual slide images directly
98
- const slideImages = [];
99
- for (let i = 0; i < slideFiles.length; i++) {
100
- const slideFile = slideFiles[i];
101
- const slideNumber = i + 1;
102
- try {
103
- // Parse slide XML to extract content
104
- const xmlContent = await slideFile.file.async('string');
105
- const slideData = await parseStringPromise(xmlContent);
106
- // Generate slide image using canvas-based rendering
107
- const slideImageBuffer = await this.renderSlideToImage(slideData, slideNumber, zip);
108
- if (slideImageBuffer) {
109
- const filename = `slide-${slideNumber.toString().padStart(3, '0')}.png`;
110
- const savedPath = path.join(this.outputDir, filename);
111
- // Save the generated slide image
112
- await fs.writeFile(savedPath, slideImageBuffer);
113
- console.log(`Generated slide screenshot: ${filename}`);
114
- slideImages.push({
115
- originalPath: `slide${slideNumber}`,
116
- savedPath: savedPath,
117
- size: slideImageBuffer.length,
118
- format: 'png'
119
- });
120
- }
121
- }
122
- catch (slideError) {
123
- console.warn(`Failed to generate slide ${slideNumber}:`, slideError);
124
- // Create a placeholder image for failed slides
125
- const placeholderBuffer = await this.createPlaceholderSlideImage(slideNumber);
126
- const filename = `slide-${slideNumber.toString().padStart(3, '0')}.png`;
127
- const savedPath = path.join(this.outputDir, filename);
128
- await fs.writeFile(savedPath, placeholderBuffer);
129
- slideImages.push({
130
- originalPath: `slide${slideNumber}`,
131
- savedPath: savedPath,
132
- size: placeholderBuffer.length,
133
- format: 'png'
134
- });
135
- }
136
- }
137
- // Return a fake PDF buffer to satisfy the interface
138
- // The actual slide images have been saved to disk
139
- this.generatedSlideImages = slideImages;
140
- return Buffer.from('FAKE_PDF_FOR_ALTERNATIVE_METHOD');
141
- }
142
- catch (error) {
143
- const message = error instanceof Error ? error.message : 'Unknown error';
144
- throw new ParseError('SlideRenderer', `Alternative slide conversion failed: ${message}`, error);
145
- }
146
- }
147
- generatedSlideImages = [];
148
- /**
149
- * Render a single slide to image using canvas with multi-language font support
150
- */
151
- async renderSlideToImage(slideData, slideNumber, zip) {
152
- try {
153
- // Import canvas dynamically (make it optional)
154
- let Canvas;
155
- try {
156
- Canvas = await import('canvas');
157
- }
158
- catch {
159
- console.log('Canvas module not available, creating text-based slide image');
160
- return await this.createTextBasedSlideImage(slideData, slideNumber);
161
- }
162
- const width = 1920;
163
- const height = 1080;
164
- const canvas = Canvas.createCanvas(width, height);
165
- const ctx = canvas.getContext('2d');
166
- // Register fonts for international character support
167
- await this.registerFontsForCanvas(Canvas);
168
- // Set background
169
- ctx.fillStyle = '#ffffff';
170
- ctx.fillRect(0, 0, width, height);
171
- // Add slide number with multi-language font
172
- ctx.fillStyle = '#333333';
173
- ctx.font = this.getUniversalFont(48);
174
- ctx.textAlign = 'center';
175
- ctx.fillText(`Slide ${slideNumber}`, width / 2, 100);
176
- // Extract and render text content
177
- const textContent = this.extractSlideText(slideData);
178
- if (textContent.length > 0) {
179
- ctx.font = this.getUniversalFont(32);
180
- ctx.textAlign = 'left';
181
- let yPos = 200;
182
- for (const text of textContent.slice(0, 20)) { // Limit to 20 lines
183
- // Handle long text with proper wrapping
184
- const wrappedLines = this.wrapText(ctx, text, width - 200); // Leave margins
185
- for (const line of wrappedLines.slice(0, 2)) { // Max 2 lines per text element
186
- ctx.fillText(line, 100, yPos);
187
- yPos += 50;
188
- if (yPos > height - 100)
189
- break;
190
- }
191
- if (yPos > height - 100)
192
- break;
193
- }
194
- }
195
- // Add border
196
- ctx.strokeStyle = '#cccccc';
197
- ctx.lineWidth = 4;
198
- ctx.strokeRect(0, 0, width, height);
199
- return canvas.toBuffer('image/png');
200
- }
201
- catch (error) {
202
- console.warn('Canvas rendering failed:', error);
203
- return await this.createTextBasedSlideImage(slideData, slideNumber);
204
- }
205
- }
206
- /**
207
- * Register system fonts and fallback fonts for international character support
208
- */
209
- async registerFontsForCanvas(Canvas) {
210
- try {
211
- // Try to register common system fonts that support international characters
212
- const fontPaths = [
213
- // Windows fonts
214
- 'C:\\Windows\\Fonts\\arial.ttf',
215
- 'C:\\Windows\\Fonts\\SimSun.ttc', // Chinese (Simplified)
216
- 'C:\\Windows\\Fonts\\mingliu.ttc', // Chinese (Traditional)
217
- 'C:\\Windows\\Fonts\\malgun.ttf', // Korean
218
- 'C:\\Windows\\Fonts\\meiryo.ttc', // Japanese
219
- 'C:\\Windows\\Fonts\\NotoSansCJK-Regular.ttc', // Noto CJK
220
- // macOS fonts
221
- '/System/Library/Fonts/Arial.ttf',
222
- '/System/Library/Fonts/PingFang.ttc', // Chinese
223
- '/System/Library/Fonts/AppleGothic.ttf', // Korean
224
- '/System/Library/Fonts/Hiragino Sans GB.ttc', // Japanese/Chinese
225
- // Linux fonts
226
- '/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf',
227
- '/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc',
228
- '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'
229
- ];
230
- const fs = await import('fs/promises');
231
- for (const fontPath of fontPaths) {
232
- try {
233
- await fs.access(fontPath);
234
- Canvas.registerFont(fontPath, {
235
- family: this.getFontFamily(fontPath)
236
- });
237
- console.log(`Registered font: ${fontPath}`);
238
- }
239
- catch {
240
- // Font file doesn't exist, skip silently
241
- }
242
- }
243
- }
244
- catch (error) {
245
- console.warn('Font registration failed:', error);
246
- // Continue without custom fonts - will use Canvas defaults
247
- }
248
- }
249
- /**
250
- * Get font family name from font path
251
- */
252
- getFontFamily(fontPath) {
253
- const filename = fontPath.split(/[/\\]/).pop() || '';
254
- if (filename.includes('SimSun') || filename.includes('PingFang'))
255
- return 'SimSun';
256
- if (filename.includes('malgun') || filename.includes('AppleGothic'))
257
- return 'Malgun Gothic';
258
- if (filename.includes('meiryo') || filename.includes('Hiragino'))
259
- return 'Meiryo';
260
- if (filename.includes('Noto'))
261
- return 'Noto Sans CJK';
262
- if (filename.includes('Liberation'))
263
- return 'Liberation Sans';
264
- if (filename.includes('DejaVu'))
265
- return 'DejaVu Sans';
266
- return 'Arial'; // Default fallback
267
- }
268
- /**
269
- * Get universal font string with fallbacks for international characters
270
- */
271
- getUniversalFont(size) {
272
- // Use a comprehensive font stack that covers most international characters
273
- return `${size}px "Noto Sans CJK", "SimSun", "Malgun Gothic", "Meiryo", "Liberation Sans", "DejaVu Sans", "Arial Unicode MS", Arial, sans-serif`;
274
- }
275
- /**
276
- * Wrap text to fit within specified width
277
- */
278
- wrapText(ctx, text, maxWidth) {
279
- const words = text.split(' ');
280
- const lines = [];
281
- let currentLine = '';
282
- for (const word of words) {
283
- const testLine = currentLine ? `${currentLine} ${word}` : word;
284
- const metrics = ctx.measureText(testLine);
285
- if (metrics.width > maxWidth && currentLine) {
286
- lines.push(currentLine);
287
- currentLine = word;
288
- }
289
- else {
290
- currentLine = testLine;
291
- }
292
- }
293
- if (currentLine) {
294
- lines.push(currentLine);
295
- }
296
- return lines;
297
- }
298
- /**
299
- * Create a text-based slide image when canvas is not available
300
- */
301
- async createTextBasedSlideImage(slideData, slideNumber) {
302
- try {
303
- // Try to use a simple image generation approach
304
- const textContent = this.extractSlideText(slideData);
305
- // Create a minimal SVG that can be converted to PNG
306
- const svgContent = this.createSVGSlideImage(slideNumber, textContent);
307
- // Try to convert SVG to PNG buffer
308
- return await this.convertSVGToPNG(svgContent);
309
- }
310
- catch (error) {
311
- console.warn('SVG fallback failed:', error);
312
- // Ultimate fallback - return a simple text buffer
313
- const textContent = this.extractSlideText(slideData);
314
- const slideText = `SLIDE ${slideNumber}\n\n${textContent.join('\n')}`;
315
- return Buffer.from(`Slide ${slideNumber} Content:\n${slideText}`);
316
- }
317
- }
318
- /**
319
- * Create SVG representation of slide content
320
- */
321
- createSVGSlideImage(slideNumber, textContent) {
322
- const width = 1920;
323
- const height = 1080;
324
- let svgContent = `<?xml version="1.0" encoding="UTF-8"?>
325
- <svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg">
326
- <!-- Background -->
327
- <rect width="${width}" height="${height}" fill="white" stroke="#cccccc" stroke-width="4"/>
328
-
329
- <!-- Slide Number -->
330
- <text x="${width / 2}" y="100" text-anchor="middle" font-family="Arial, sans-serif" font-size="48" fill="#333333">Slide ${slideNumber}</text>
331
-
332
- <!-- Content -->`;
333
- let yPos = 200;
334
- for (const text of textContent.slice(0, 15)) { // Limit to 15 lines
335
- if (yPos > height - 100)
336
- break;
337
- // Escape HTML entities for SVG
338
- const escapedText = text
339
- .replace(/&/g, '&amp;')
340
- .replace(/</g, '&lt;')
341
- .replace(/>/g, '&gt;')
342
- .replace(/"/g, '&quot;')
343
- .substring(0, 80); // Limit line length
344
- svgContent += `
345
- <text x="100" y="${yPos}" font-family="Arial, sans-serif" font-size="32" fill="#333333">${escapedText}</text>`;
346
- yPos += 50;
347
- }
348
- svgContent += `
349
- </svg>`;
350
- return svgContent;
351
- }
352
- /**
353
- * Convert SVG to PNG buffer
354
- */
355
- async convertSVGToPNG(svgContent) {
356
- try {
357
- // Try to use Canvas to convert SVG to PNG
358
- const Canvas = await import('canvas');
359
- const { createCanvas, loadImage } = Canvas;
360
- // Convert SVG string to data URL
361
- const svgDataUrl = `data:image/svg+xml;base64,${Buffer.from(svgContent).toString('base64')}`;
362
- const canvas = createCanvas(1920, 1080);
363
- const ctx = canvas.getContext('2d');
364
- // Load the SVG as an image
365
- const img = await loadImage(svgDataUrl);
366
- ctx.drawImage(img, 0, 0);
367
- return canvas.toBuffer('image/png');
368
- }
369
- catch (error) {
370
- console.warn('SVG to PNG conversion failed:', error);
371
- // Return simple placeholder buffer
372
- throw error;
373
- }
374
- }
375
- /**
376
- * Extract text content from slide XML data
377
- */
378
- extractSlideText(slideData) {
379
- const textElements = [];
380
- function extractText(obj) {
381
- if (typeof obj === 'object' && obj !== null) {
382
- if (Array.isArray(obj)) {
383
- for (const item of obj) {
384
- extractText(item);
385
- }
386
- }
387
- else {
388
- // Look for text content
389
- if (obj['a:t']) {
390
- if (Array.isArray(obj['a:t'])) {
391
- for (const textItem of obj['a:t']) {
392
- if (typeof textItem === 'string' && textItem.trim()) {
393
- textElements.push(textItem.trim());
394
- }
395
- else if (textItem && typeof textItem === 'object' && '_' in textItem) {
396
- const text = textItem._;
397
- if (text && text.trim()) {
398
- textElements.push(text.trim());
399
- }
400
- }
401
- }
402
- }
403
- }
404
- // Recursively process nested objects
405
- for (const key in obj) {
406
- if (key !== 'a:t') {
407
- extractText(obj[key]);
408
- }
409
- }
410
- }
411
- }
412
- }
413
- extractText(slideData);
414
- return textElements;
415
- }
416
- /**
417
- * Create placeholder slide image for failed conversions
418
- */
419
- async createPlaceholderSlideImage(slideNumber) {
420
- // Create a simple placeholder
421
- const placeholderText = `Slide ${slideNumber}\n\n[Slide content could not be rendered]\n\nThis slide contains the original presentation content\nbut could not be converted to an image.`;
422
- // Return a minimal buffer (in real implementation, create a proper placeholder image)
423
- return Buffer.from(placeholderText);
424
- }
425
- /**
426
- * Convert PDF buffer to individual slide images using pdf2pic or return pre-generated images
427
- */
428
- async convertPdfToSlideImages(pdfBuffer, options) {
429
- try {
430
- // Check if we already generated slide images using alternative method
431
- if (pdfBuffer.toString() === 'FAKE_PDF_FOR_ALTERNATIVE_METHOD') {
432
- console.log('Using pre-generated slide images from alternative method');
433
- return this.generatedSlideImages;
434
- }
435
- // Standard PDF to image conversion using pdf2pic
436
- await fs.mkdir(this.outputDir, { recursive: true });
437
- console.log('PDF to images: Output directory created:', this.outputDir);
438
- // Configure pdf2pic
439
- const convert = fromBuffer(pdfBuffer, {
440
- density: options.density,
441
- saveFilename: 'slide',
442
- savePath: this.outputDir,
443
- format: options.format,
444
- width: undefined, // Let pdf2pic calculate based on density
445
- height: undefined,
446
- quality: options.quality
447
- });
448
- console.log('PDF2PIC configuration:', {
449
- density: options.density,
450
- format: options.format,
451
- quality: options.quality,
452
- outputDir: this.outputDir
453
- });
454
- // Get total number of pages first
455
- const storeAsImage = convert.bulk(-1, true);
456
- const results = await storeAsImage;
457
- console.log(`PDF2PIC processed ${results.length} pages`);
458
- const slideImages = [];
459
- for (let i = 0; i < results.length; i++) {
460
- const result = results[i];
461
- const slideNumber = i + 1;
462
- const filename = `slide-${slideNumber.toString().padStart(3, '0')}.${options.format}`;
463
- const savedPath = path.join(this.outputDir, filename);
464
- console.log(`Processing slide ${slideNumber}, expected file: ${filename}`);
465
- // Save the image file
466
- const imageBuffer = result.buffer;
467
- if (imageBuffer) {
468
- await fs.writeFile(savedPath, imageBuffer);
469
- console.log(`Saved slide image: ${savedPath} (${imageBuffer.length} bytes)`);
470
- slideImages.push({
471
- originalPath: `slide${slideNumber}`, // Virtual path for consistency
472
- savedPath: savedPath,
473
- size: imageBuffer.length,
474
- format: options.format
475
- });
476
- }
477
- else {
478
- console.warn(`No buffer found for slide ${slideNumber}`);
479
- }
480
- }
481
- console.log(`Successfully created ${slideImages.length} slide images`);
482
- return slideImages;
483
- }
484
- catch (error) {
485
- const message = error instanceof Error ? error.message : 'Unknown error';
486
- console.error('PDF to images conversion error:', error);
487
- throw new ParseError('SlideRenderer', `PDF to images conversion failed: ${message}`, error);
488
- }
489
- }
490
- /**
491
- * Generate markdown with slide images
492
- */
493
- generateSlideMarkdown(slideImages, title) {
494
- let markdown = '';
495
- if (title) {
496
- markdown += `# ${title}\n\n`;
497
- }
498
- for (let i = 0; i < slideImages.length; i++) {
499
- const slide = slideImages[i];
500
- const slideNumber = i + 1;
501
- markdown += `## Slide ${slideNumber}\n\n`;
502
- // Use relative path for markdown image reference
503
- const relativePath = path.relative(process.cwd(), slide.savedPath)
504
- .replace(/\\/g, '/'); // Ensure forward slashes for markdown
505
- markdown += `![Slide ${slideNumber}](${relativePath})\n\n`;
506
- }
507
- return markdown.trim();
508
- }
509
- /**
510
- * Clean up generated image files
511
- */
512
- async cleanup() {
513
- try {
514
- const files = await fs.readdir(this.outputDir);
515
- const slideFiles = files.filter(file => file.startsWith('slide-') && (file.endsWith('.png') || file.endsWith('.jpg')));
516
- for (const file of slideFiles) {
517
- const filePath = path.join(this.outputDir, file);
518
- await fs.unlink(filePath);
519
- }
520
- }
521
- catch (error) {
522
- // Ignore cleanup errors
523
- }
524
- }
525
- /**
526
- * Check if LibreOffice is available on the system
527
- */
528
- static async checkLibreOfficeAvailability() {
529
- try {
530
- // Create a minimal test document to verify LibreOffice works
531
- const testBuffer = Buffer.from('test');
532
- await convertAsync(testBuffer, '.pdf', undefined);
533
- return true;
534
- }
535
- catch {
536
- return false;
537
- }
538
- }
539
- }
540
- //# sourceMappingURL=slide-renderer.js.map