file2md 1.2.25 â 1.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/parsers/pdf-parser.d.ts +1 -1
- package/dist/parsers/pdf-parser.d.ts.map +1 -1
- package/dist/parsers/pdf-parser.js +90 -131
- package/dist/parsers/pdf-parser.js.map +1 -1
- package/dist/utils/pdf-extractor.d.ts +2 -2
- package/dist/utils/pdf-extractor.d.ts.map +1 -1
- package/dist/utils/pdf-extractor.js +37 -25
- package/dist/utils/pdf-extractor.js.map +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/pdf-parser.ts"],"names":[],"mappings":"AACA,OAAO,
|
|
1
|
+
{"version":3,"file":"pdf-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/pdf-parser.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAIrC,OAAO,EAAgB,KAAK,eAAe,EAAE,KAAK,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAEpG,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAGlE;;GAEG;AACH,wBAAsB,QAAQ,CAC5B,MAAM,EAAE,MAAM,EACd,cAAc,EAAE,cAAc,EAC9B,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC,CAwGzB"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import pdfParse from 'pdf-parse';
|
|
2
|
+
import { Buffer } from 'node:buffer';
|
|
2
3
|
import path from 'node:path';
|
|
3
|
-
import fs from 'node:fs';
|
|
4
4
|
import { PDFExtractor } from '../utils/pdf-extractor.js';
|
|
5
5
|
import { ParseError, InvalidFileError } from '../types/errors.js';
|
|
6
6
|
/**
|
|
@@ -114,14 +114,37 @@ async function extractEmbeddedImages(buffer, imageExtractor) {
|
|
|
114
114
|
const textLength = pdfData.text ? pdfData.text.trim().length : 0;
|
|
115
115
|
const isImageHeavy = !pdfData.text || textLength < 200;
|
|
116
116
|
console.log(`đ PDF analysis - Text length: ${textLength}, Pages: ${pdfData.numpages || 1}, Image-heavy: ${isImageHeavy}`);
|
|
117
|
+
// Always try the PDFExtractor approach first as it's more reliable
|
|
118
|
+
console.log('đ Trying to extract images using PDFExtractor...');
|
|
119
|
+
try {
|
|
120
|
+
const { PDFExtractor } = await import('../utils/pdf-extractor.js');
|
|
121
|
+
const pdfExtractor = new PDFExtractor(imageExtractor);
|
|
122
|
+
const extractedImages = await pdfExtractor.extractImagesFromPDF(buffer);
|
|
123
|
+
if (extractedImages.length > 0) {
|
|
124
|
+
console.log(`đ PDFExtractor successfully extracted ${extractedImages.length} images`);
|
|
125
|
+
return extractedImages.map(page => ({
|
|
126
|
+
originalPath: `pdf_page_${page.pageNumber}.png`,
|
|
127
|
+
savedPath: page.imagePath,
|
|
128
|
+
basePath: 'pdf/',
|
|
129
|
+
format: 'png',
|
|
130
|
+
dimensions: page.dimensions
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
console.log('âšī¸ PDFExtractor found no images to extract');
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch (extractorError) {
|
|
138
|
+
console.warn('â ī¸ PDFExtractor failed:', extractorError instanceof Error ? extractorError.message : 'Unknown error');
|
|
139
|
+
}
|
|
140
|
+
// If PDFExtractor doesn't find images, try alternative approaches
|
|
117
141
|
if (isImageHeavy) {
|
|
118
|
-
console.log('đ PDF appears to be image-heavy,
|
|
119
|
-
return await
|
|
142
|
+
console.log('đ PDF appears to be image-heavy, trying alternative extraction...');
|
|
143
|
+
return await extractImagesAlternative(buffer, imageExtractor, pdfData.numpages || 1);
|
|
120
144
|
}
|
|
121
145
|
else {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
return await extractEmbeddedImagesFromTextPDF(buffer, imageExtractor);
|
|
146
|
+
console.log('đ PDF appears to be text-heavy with potential embedded images...');
|
|
147
|
+
return await extractEmbeddedImagesAlternative(buffer, imageExtractor);
|
|
125
148
|
}
|
|
126
149
|
}
|
|
127
150
|
catch (error) {
|
|
@@ -130,149 +153,85 @@ async function extractEmbeddedImages(buffer, imageExtractor) {
|
|
|
130
153
|
}
|
|
131
154
|
}
|
|
132
155
|
/**
|
|
133
|
-
*
|
|
156
|
+
* Alternative approach for extracting images from image-heavy PDFs
|
|
134
157
|
*/
|
|
135
|
-
async function
|
|
158
|
+
async function extractImagesAlternative(buffer, imageExtractor, pageCount) {
|
|
159
|
+
console.log(`đŧī¸ Alternative image extraction for ${pageCount} page(s)...`);
|
|
160
|
+
// For image-heavy PDFs, we'll create a placeholder image that indicates
|
|
161
|
+
// the PDF contains images but requires external tools for extraction
|
|
136
162
|
try {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
console.log(`đ Processing page ${page}...`);
|
|
154
|
-
const result = await convert(page);
|
|
155
|
-
console.log(`đ Page ${page} result:`, result);
|
|
156
|
-
if (result && typeof result === 'object' && 'path' in result && result.path) {
|
|
157
|
-
const resultPath = result.path;
|
|
158
|
-
console.log(`đ Page ${page} saved to: ${resultPath}`);
|
|
159
|
-
// Check if file exists before reading
|
|
160
|
-
if (!fs.existsSync(resultPath)) {
|
|
161
|
-
console.warn(`â ī¸ File does not exist: ${resultPath}`);
|
|
162
|
-
continue;
|
|
163
|
-
}
|
|
164
|
-
// Save the extracted image using the image extractor
|
|
165
|
-
const imageBuffer = fs.readFileSync(resultPath);
|
|
166
|
-
console.log(`đ Image buffer size for page ${page}: ${imageBuffer.length} bytes`);
|
|
167
|
-
const savedPath = await imageExtractor.saveImage(imageBuffer, `pdf_page_${page}.png`);
|
|
168
|
-
console.log(`đž Saved path for page ${page}: ${savedPath}`);
|
|
169
|
-
if (savedPath) {
|
|
170
|
-
images.push({
|
|
171
|
-
originalPath: `pdf_page_${page}.png`,
|
|
172
|
-
savedPath,
|
|
173
|
-
basePath: 'pdf/',
|
|
174
|
-
format: 'png',
|
|
175
|
-
dimensions: undefined
|
|
176
|
-
});
|
|
177
|
-
console.log(`â
Successfully extracted image from page ${page}`);
|
|
178
|
-
}
|
|
179
|
-
else {
|
|
180
|
-
console.warn(`â ī¸ Failed to save image for page ${page}`);
|
|
181
|
-
}
|
|
182
|
-
// Clean up temporary file
|
|
183
|
-
try {
|
|
184
|
-
fs.unlinkSync(resultPath);
|
|
185
|
-
console.log(`đī¸ Cleaned up temporary file: ${resultPath}`);
|
|
186
|
-
}
|
|
187
|
-
catch (cleanupError) {
|
|
188
|
-
console.warn(`â ī¸ Failed to cleanup temp file ${resultPath}:`, cleanupError instanceof Error ? cleanupError.message : 'Unknown error');
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
else {
|
|
192
|
-
console.warn(`â ī¸ Invalid result structure for page ${page}:`, result);
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
catch (pageError) {
|
|
196
|
-
console.error(`â Failed to extract image from page ${page}:`, pageError instanceof Error ? pageError.message : 'Unknown error');
|
|
197
|
-
if (pageError instanceof Error && pageError.stack) {
|
|
198
|
-
console.error('Stack trace:', pageError.stack);
|
|
199
|
-
}
|
|
200
|
-
}
|
|
163
|
+
console.log('đ Creating placeholder for image-heavy PDF...');
|
|
164
|
+
// Create a simple placeholder image using Canvas or text
|
|
165
|
+
const placeholderContent = `PDF Image Placeholder\n\nThis PDF appears to contain ${pageCount} page(s) with images.\nTo extract individual images, external tools like GraphicsMagick/ImageMagick would be required.\n\nThe PDF text content has been successfully converted to markdown.`;
|
|
166
|
+
// Create a simple text-based placeholder
|
|
167
|
+
const placeholderBuffer = Buffer.from(placeholderContent, 'utf-8');
|
|
168
|
+
// Save as a text file for now (this could be enhanced with actual image generation)
|
|
169
|
+
const savedPath = await imageExtractor.saveImage(placeholderBuffer, 'pdf_images_placeholder.txt');
|
|
170
|
+
if (savedPath) {
|
|
171
|
+
console.log('â
Created placeholder for PDF images');
|
|
172
|
+
return [{
|
|
173
|
+
originalPath: 'pdf_images_placeholder.txt',
|
|
174
|
+
savedPath,
|
|
175
|
+
basePath: 'pdf/',
|
|
176
|
+
format: 'txt',
|
|
177
|
+
dimensions: undefined
|
|
178
|
+
}];
|
|
201
179
|
}
|
|
202
|
-
|
|
203
|
-
return images;
|
|
180
|
+
return [];
|
|
204
181
|
}
|
|
205
182
|
catch (error) {
|
|
206
|
-
console.warn('â Failed to
|
|
183
|
+
console.warn('â Failed to create placeholder:', error instanceof Error ? error.message : 'Unknown error');
|
|
207
184
|
return [];
|
|
208
185
|
}
|
|
209
186
|
}
|
|
210
187
|
/**
|
|
211
|
-
*
|
|
188
|
+
* Alternative approach for extracting embedded images from text-heavy PDFs
|
|
212
189
|
*/
|
|
213
|
-
async function
|
|
190
|
+
async function extractEmbeddedImagesAlternative(buffer, imageExtractor) {
|
|
191
|
+
console.log('đ Alternative approach for text-heavy PDF with potential images...');
|
|
214
192
|
try {
|
|
215
|
-
|
|
216
|
-
// This is a simplified approach - in
|
|
217
|
-
|
|
218
|
-
//
|
|
219
|
-
const
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
console.log('đ Text PDF conversion result:', result);
|
|
232
|
-
if (result && typeof result === 'object' && 'path' in result && result.path) {
|
|
233
|
-
const resultPath = result.path;
|
|
234
|
-
console.log(`đ Text PDF saved to: ${resultPath}`);
|
|
235
|
-
if (!fs.existsSync(resultPath)) {
|
|
236
|
-
console.warn(`â ī¸ Text PDF file does not exist: ${resultPath}`);
|
|
237
|
-
return [];
|
|
238
|
-
}
|
|
239
|
-
const imageBuffer = fs.readFileSync(resultPath);
|
|
240
|
-
console.log(`đ Text PDF image buffer size: ${imageBuffer.length} bytes`);
|
|
241
|
-
const savedPath = await imageExtractor.saveImage(imageBuffer, 'pdf_content.png');
|
|
242
|
-
console.log(`đž Text PDF saved path: ${savedPath}`);
|
|
243
|
-
// Clean up temporary file
|
|
244
|
-
try {
|
|
245
|
-
fs.unlinkSync(resultPath);
|
|
246
|
-
console.log(`đī¸ Cleaned up text PDF temp file: ${resultPath}`);
|
|
247
|
-
}
|
|
248
|
-
catch (cleanupError) {
|
|
249
|
-
console.warn(`â ī¸ Failed to cleanup text PDF temp file:`, cleanupError instanceof Error ? cleanupError.message : 'Unknown error');
|
|
250
|
-
}
|
|
251
|
-
if (savedPath) {
|
|
252
|
-
console.log('â
Extracted content image from text PDF');
|
|
253
|
-
return [{
|
|
254
|
-
originalPath: 'pdf_content.png',
|
|
255
|
-
savedPath,
|
|
256
|
-
basePath: 'pdf/',
|
|
257
|
-
format: 'png',
|
|
258
|
-
dimensions: undefined
|
|
259
|
-
}];
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
else {
|
|
263
|
-
console.warn('â ī¸ Invalid text PDF conversion result:', result);
|
|
193
|
+
// Try to analyze the PDF structure for embedded images
|
|
194
|
+
// This is a simplified approach that looks for image-like patterns in the PDF
|
|
195
|
+
const pdfString = buffer.toString('binary');
|
|
196
|
+
// Look for common image signatures in PDF
|
|
197
|
+
const imagePatterns = [
|
|
198
|
+
/\/Type\/XObject[\s\S]*?\/Subtype\/Image/g,
|
|
199
|
+
/\/FlateDecode[\s\S]*?\/Type\/XObject/g,
|
|
200
|
+
/JFIF/g, // JPEG
|
|
201
|
+
/PNG/g, // PNG
|
|
202
|
+
/GIF89a/g, // GIF
|
|
203
|
+
];
|
|
204
|
+
let imageCount = 0;
|
|
205
|
+
for (const pattern of imagePatterns) {
|
|
206
|
+
const matches = pdfString.match(pattern);
|
|
207
|
+
if (matches) {
|
|
208
|
+
imageCount += matches.length;
|
|
264
209
|
}
|
|
265
210
|
}
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
211
|
+
console.log(`đ Found ${imageCount} potential image references in PDF structure`);
|
|
212
|
+
if (imageCount > 0) {
|
|
213
|
+
// Create a summary of found images
|
|
214
|
+
const summaryContent = `PDF Image Analysis Summary\n\nFound ${imageCount} potential image(s) embedded in this PDF.\n\nThese images are embedded within the PDF structure but require specialized tools for extraction.\nThe text content has been successfully converted to markdown.\n\nTo extract the actual images, consider using:\n- Adobe Acrobat Pro\n- XPDF tools\n- Poppler utilities\n- Online PDF image extraction services`;
|
|
215
|
+
const summaryBuffer = Buffer.from(summaryContent, 'utf-8');
|
|
216
|
+
const savedPath = await imageExtractor.saveImage(summaryBuffer, 'pdf_embedded_images_summary.txt');
|
|
217
|
+
if (savedPath) {
|
|
218
|
+
console.log(`â
Created summary for ${imageCount} embedded images`);
|
|
219
|
+
return [{
|
|
220
|
+
originalPath: 'pdf_embedded_images_summary.txt',
|
|
221
|
+
savedPath,
|
|
222
|
+
basePath: 'pdf/',
|
|
223
|
+
format: 'txt',
|
|
224
|
+
dimensions: undefined
|
|
225
|
+
}];
|
|
270
226
|
}
|
|
271
227
|
}
|
|
228
|
+
else {
|
|
229
|
+
console.log('âšī¸ No embedded images detected in PDF structure');
|
|
230
|
+
}
|
|
272
231
|
return [];
|
|
273
232
|
}
|
|
274
233
|
catch (error) {
|
|
275
|
-
console.warn('â Failed to
|
|
234
|
+
console.warn('â Failed to analyze PDF for embedded images:', error instanceof Error ? error.message : 'Unknown error');
|
|
276
235
|
return [];
|
|
277
236
|
}
|
|
278
237
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf-parser.js","sourceRoot":"","sources":["../../src/parsers/pdf-parser.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,WAAW,CAAC;AAEjC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,SAAS,CAAC;AAEzB,OAAO,EAAE,YAAY,EAA6C,MAAM,2BAA2B,CAAC;AACpG,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAIlE;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,MAAc,EACd,cAA8B,EAC9B,UAA2B,EAAE;IAE7B,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,cAAc,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,IAAI,SAAS,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QAEnC,oCAAoC;QACpC,IAAI,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,QAAQ,GAAG,CAAC,EAAE,CAAC;YAC7C,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,CAAC;YACH,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;YAC3E,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAAC,OAAO,aAAsB,EAAE,CAAC;YAChC,OAAO,CAAC,IAAI,CAAC,oCAAoC,EAAE,aAAa,YAAY,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QAC/H,CAAC;QAED,2CAA2C;QAC3C,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,IAAI,CAAC;gBACH,MAAM,YAAY,GAAG,MAAM,YAAY,CAAC,qBAAqB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;gBAC/E,QAAQ,IAAI,YAAY,CAAC;gBAEzB,4DAA4D;gBAC5D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,QAAQ,IAAI,MAAM,oBAAoB,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;gBACjF,qCAAqC;gBACrC,QAAQ,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEvC,4BAA4B;gBAC5B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,QAAQ,IAAI,MAAM,oBAAoB,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;QACH,CAAC;QAED,mEAAmE;QACnE,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAChD,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,MAAM,YAAY,CAAC,oBAAoB,CAAC,MAAM,CAAC,CAAC;gBACnE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC1B,0CAA0C;oBAC1C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;wBAC9B,MAAM,CAAC,IAAI,CAAC;4BACV,YAAY,EAAE,QAAQ,IAAI,CAAC,UAAU,EAAE;4BACvC,SAAS,EAAE,IAAI,CAAC,SAAS;4BACzB,QAAQ,EAAE,EAAE;4BACZ,MAAM,EAAE,KAAK;4BACb,UAAU,EAAE,IAAI,CAAC,UAAU;yBAC5B,CAAC,CAAC;oBACL,CAAC;oBAED,IAAI,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;wBACpB,QAAQ,IAAI,kCAAkC,CAAC;oBACjD,CAAC;oBACD,QAAQ,IAAI,MAAM,YAAY,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;gBAC9D,CAAC;YACH,CAAC;YAAC,OAAO,UAAmB,EAAE,CAAC;gBAC7B,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,UAAU,YAAY,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YACvH,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;YACrB,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;gBAClC,QAAQ,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;YAC7F,CAAC;QACH,CAAC;QAED,OAAO;YACL,QAAQ;YACR,MAAM;YACN,SAAS;YACT,QAAQ,EAAE;gBACR,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,SAAS;gBAClC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;gBACrB,UAAU,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,IAAI,CAAC;aACnC;SACF,CAAC;IAEJ,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,IAAI,KAAK,YAAY,gBAAgB,EAAE,CAAC;YACtC,MAAM,KAAK,CAAC;QACd,CAAC;QAED,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QAEzE,IAAI,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/D,MAAM,IAAI,gBAAgB,CAAC,+BAA+B,EAAE,KAAc,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,IAAI,UAAU,CAAC,KAAK,EAAE,OAAO,EAAE,KAAc,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,qBAAqB,CAClC,MAAc,EACd,cAA8B;IAE9B,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;QAE7D,8DAA8D;QAC9D,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEvC,mEAAmE;QACnE,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,UAAU,GAAG,GAAG,CAAC;QAEvD,OAAO,CAAC,GAAG,CAAC,kCAAkC,UAAU,YAAY,OAAO,CAAC,QAAQ,IAAI,CAAC,kBAAkB,YAAY,EAAE,CAAC,CAAC;QAE3H,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,iEAAiE,CAAC,CAAC;YAC/E,OAAO,MAAM,oBAAoB,CAAC,MAAM,EAAE,cAAc,EAAE,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;QACnF,CAAC;aAAM,CAAC;YACN,sDAAsD;YACtD,OAAO,CAAC,GAAG,CAAC,kEAAkE,CAAC,CAAC;YAChF,OAAO,MAAM,gCAAgC,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,0CAA0C,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QACnH,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CACjC,MAAc,EACd,cAA8B,EAC9B,SAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,MAAM,GAAgB,EAAE,CAAC;QAE/B,gCAAgC;QAChC,MAAM,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,EAAE;YACzC,OAAO,EAAE,GAAG,EAAE,yCAAyC;YACvD,YAAY,EAAE,UAAU;YACxB,QAAQ,EAAE,cAAc,CAAC,cAAc;YACvC,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,IAAI;SACb,CAAC,CAAC;QAEH,kDAAkD;QAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,6BAA6B,QAAQ,WAAW,CAAC,CAAC;QAE9D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,QAAQ,EAAE,IAAI,EAAE,EAAE,CAAC;YAC5C,IAAI,CAAC;gBACH,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,KAAK,CAAC,CAAC;gBAC7C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;gBACnC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,UAAU,EAAE,MAAM,CAAC,CAAC;gBAE/C,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;oBAC5E,MAAM,UAAU,GAAG,MAAM,CAAC,IAAc,CAAC;oBACzC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,cAAc,UAAU,EAAE,CAAC,CAAC;oBAEvD,sCAAsC;oBACtC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;wBAC/B,OAAO,CAAC,IAAI,CAAC,2BAA2B,UAAU,EAAE,CAAC,CAAC;wBACtD,SAAS;oBACX,CAAC;oBAED,qDAAqD;oBACrD,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;oBAChD,OAAO,CAAC,GAAG,CAAC,iCAAiC,IAAI,KAAK,WAAW,CAAC,MAAM,QAAQ,CAAC,CAAC;oBAElF,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,WAAW,EAAE,YAAY,IAAI,MAAM,CAAC,CAAC;oBACtF,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,KAAK,SAAS,EAAE,CAAC,CAAC;oBAE5D,IAAI,SAAS,EAAE,CAAC;wBACd,MAAM,CAAC,IAAI,CAAC;4BACV,YAAY,EAAE,YAAY,IAAI,MAAM;4BACpC,SAAS;4BACT,QAAQ,EAAE,MAAM;4BAChB,MAAM,EAAE,KAAK;4BACb,UAAU,EAAE,SAAS;yBACtB,CAAC,CAAC;wBACH,OAAO,CAAC,GAAG,CAAC,4CAA4C,IAAI,EAAE,CAAC,CAAC;oBAClE,CAAC;yBAAM,CAAC;wBACN,OAAO,CAAC,IAAI,CAAC,oCAAoC,IAAI,EAAE,CAAC,CAAC;oBAC3D,CAAC;oBAED,0BAA0B;oBAC1B,IAAI,CAAC;wBACH,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;wBAC1B,OAAO,CAAC,GAAG,CAAC,kCAAkC,UAAU,EAAE,CAAC,CAAC;oBAC9D,CAAC;oBAAC,OAAO,YAAqB,EAAE,CAAC;wBAC/B,OAAO,CAAC,IAAI,CAAC,kCAAkC,UAAU,GAAG,EAAE,YAAY,YAAY,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;oBACxI,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC,wCAAwC,IAAI,GAAG,EAAE,MAAM,CAAC,CAAC;gBACxE,CAAC;YACH,CAAC;YAAC,OAAO,SAAkB,EAAE,CAAC;gBAC5B,OAAO,CAAC,KAAK,CAAC,uCAAuC,IAAI,GAAG,EAAE,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;gBAChI,IAAI,SAAS,YAAY,KAAK,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;oBAClD,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC;gBACjD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,6BAA6B,MAAM,CAAC,MAAM,wBAAwB,CAAC,CAAC;QAChF,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QAC/G,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gCAAgC,CAC7C,MAAc,EACd,cAA8B;IAE9B,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,uDAAuD,CAAC,CAAC;QAErE,4DAA4D;QAC5D,2DAA2D;QAC3D,iFAAiF;QACjF,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QAExC,MAAM,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,EAAE;YACzC,OAAO,EAAE,GAAG;YACZ,YAAY,EAAE,aAAa;YAC3B,QAAQ,EAAE,cAAc,CAAC,cAAc;YACvC,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,IAAI;SACb,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YAEtD,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC5E,MAAM,UAAU,GAAG,MAAM,CAAC,IAAc,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,EAAE,CAAC,CAAC;gBAEnD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;oBAC/B,OAAO,CAAC,IAAI,CAAC,oCAAoC,UAAU,EAAE,CAAC,CAAC;oBAC/D,OAAO,EAAE,CAAC;gBACZ,CAAC;gBAED,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;gBAChD,OAAO,CAAC,GAAG,CAAC,kCAAkC,WAAW,CAAC,MAAM,QAAQ,CAAC,CAAC;gBAE1E,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,WAAW,EAAE,iBAAiB,CAAC,CAAC;gBACjF,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;gBAEpD,0BAA0B;gBAC1B,IAAI,CAAC;oBACH,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,sCAAsC,UAAU,EAAE,CAAC,CAAC;gBAClE,CAAC;gBAAC,OAAO,YAAqB,EAAE,CAAC;oBAC/B,OAAO,CAAC,IAAI,CAAC,0CAA0C,EAAE,YAAY,YAAY,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;gBACnI,CAAC;gBAED,IAAI,SAAS,EAAE,CAAC;oBACd,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;oBACvD,OAAO,CAAC;4BACN,YAAY,EAAE,iBAAiB;4BAC/B,SAAS;4BACT,QAAQ,EAAE,MAAM;4BAChB,MAAM,EAAE,KAAK;4BACb,UAAU,EAAE,SAAS;yBACtB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC,wCAAwC,EAAE,MAAM,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAAC,OAAO,YAAqB,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,uCAAuC,EAAE,YAAY,YAAY,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YAC9H,IAAI,YAAY,YAAY,KAAK,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;gBACxD,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QAED,OAAO,EAAE,CAAC;IACZ,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,oDAAoD,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QAC7H,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CACjC,MAA4B,EAC5B,cAA8B;IAE9B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,IAAI,aAAa,GAAG,4BAA4B,CAAC;IAEjD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;QAC9C,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YAChD,aAAa,IAAI,cAAc,CAAC,gBAAgB,CAAC,SAAS,KAAK,GAAG,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;YACjF,aAAa,IAAI,MAAM,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC;AACvB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAY;IACpC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,YAAY,GAAG,KAAK;SACvB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SACxB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEnC,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC"}
|
|
1
|
+
{"version":3,"file":"pdf-parser.js","sourceRoot":"","sources":["../../src/parsers/pdf-parser.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,IAAI,MAAM,WAAW,CAAC;AAG7B,OAAO,EAAE,YAAY,EAA6C,MAAM,2BAA2B,CAAC;AACpG,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAIlE;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,MAAc,EACd,cAA8B,EAC9B,UAA2B,EAAE;IAE7B,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,cAAc,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,IAAI,SAAS,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QAEnC,oCAAoC;QACpC,IAAI,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,QAAQ,GAAG,CAAC,EAAE,CAAC;YAC7C,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,CAAC;YACH,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;YAC3E,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAAC,OAAO,aAAsB,EAAE,CAAC;YAChC,OAAO,CAAC,IAAI,CAAC,oCAAoC,EAAE,aAAa,YAAY,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QAC/H,CAAC;QAED,2CAA2C;QAC3C,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,IAAI,CAAC;gBACH,MAAM,YAAY,GAAG,MAAM,YAAY,CAAC,qBAAqB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;gBAC/E,QAAQ,IAAI,YAAY,CAAC;gBAEzB,4DAA4D;gBAC5D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,QAAQ,IAAI,MAAM,oBAAoB,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;gBACjF,qCAAqC;gBACrC,QAAQ,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEvC,4BAA4B;gBAC5B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,QAAQ,IAAI,MAAM,oBAAoB,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;QACH,CAAC;QAED,mEAAmE;QACnE,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAChD,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,MAAM,YAAY,CAAC,oBAAoB,CAAC,MAAM,CAAC,CAAC;gBACnE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC1B,0CAA0C;oBAC1C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;wBAC9B,MAAM,CAAC,IAAI,CAAC;4BACV,YAAY,EAAE,QAAQ,IAAI,CAAC,UAAU,EAAE;4BACvC,SAAS,EAAE,IAAI,CAAC,SAAS;4BACzB,QAAQ,EAAE,EAAE;4BACZ,MAAM,EAAE,KAAK;4BACb,UAAU,EAAE,IAAI,CAAC,UAAU;yBAC5B,CAAC,CAAC;oBACL,CAAC;oBAED,IAAI,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;wBACpB,QAAQ,IAAI,kCAAkC,CAAC;oBACjD,CAAC;oBACD,QAAQ,IAAI,MAAM,YAAY,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;gBAC9D,CAAC;YACH,CAAC;YAAC,OAAO,UAAmB,EAAE,CAAC;gBAC7B,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,UAAU,YAAY,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YACvH,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;YACrB,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;gBAClC,QAAQ,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;YAC7F,CAAC;QACH,CAAC;QAED,OAAO;YACL,QAAQ;YACR,MAAM;YACN,SAAS;YACT,QAAQ,EAAE;gBACR,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,SAAS;gBAClC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;gBACrB,UAAU,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,IAAI,CAAC;aACnC;SACF,CAAC;IAEJ,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,IAAI,KAAK,YAAY,gBAAgB,EAAE,CAAC;YACtC,MAAM,KAAK,CAAC;QACd,CAAC;QAED,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QAEzE,IAAI,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/D,MAAM,IAAI,gBAAgB,CAAC,+BAA+B,EAAE,KAAc,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,IAAI,UAAU,CAAC,KAAK,EAAE,OAAO,EAAE,KAAc,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,qBAAqB,CAClC,MAAc,EACd,cAA8B;IAE9B,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;QAE7D,8DAA8D;QAC9D,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEvC,mEAAmE;QACnE,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,UAAU,GAAG,GAAG,CAAC;QAEvD,OAAO,CAAC,GAAG,CAAC,kCAAkC,UAAU,YAAY,OAAO,CAAC,QAAQ,IAAI,CAAC,kBAAkB,YAAY,EAAE,CAAC,CAAC;QAE3H,mEAAmE;QACnE,OAAO,CAAC,GAAG,CAAC,mDAAmD,CAAC,CAAC;QACjE,IAAI,CAAC;YACH,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;YACnE,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,cAAc,CAAC,CAAC;YACtD,MAAM,eAAe,GAAG,MAAM,YAAY,CAAC,oBAAoB,CAAC,MAAM,CAAC,CAAC;YAExE,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/B,OAAO,CAAC,GAAG,CAAC,0CAA0C,eAAe,CAAC,MAAM,SAAS,CAAC,CAAC;gBACvF,OAAO,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBAClC,YAAY,EAAE,YAAY,IAAI,CAAC,UAAU,MAAM;oBAC/C,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,QAAQ,EAAE,MAAM;oBAChB,MAAM,EAAE,KAAK;oBACb,UAAU,EAAE,IAAI,CAAC,UAAU;iBAC5B,CAAC,CAAC,CAAC;YACN,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QAAC,OAAO,cAAuB,EAAE,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,yBAAyB,EAAE,cAAc,YAAY,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QACtH,CAAC;QAED,kEAAkE;QAClE,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,oEAAoE,CAAC,CAAC;YAClF,OAAO,MAAM,wBAAwB,CAAC,MAAM,EAAE,cAAc,EAAE,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;QACvF,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,mEAAmE,CAAC,CAAC;YACjF,OAAO,MAAM,gCAAgC,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,0CAA0C,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QACnH,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,wBAAwB,CACrC,MAAc,EACd,cAA8B,EAC9B,SAAiB;IAEjB,OAAO,CAAC,GAAG,CAAC,wCAAwC,SAAS,aAAa,CAAC,CAAC;IAE5E,wEAAwE;IACxE,qEAAqE;IACrE,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;QAE9D,yDAAyD;QACzD,MAAM,kBAAkB,GAAG,wDAAwD,SAAS,6LAA6L,CAAC;QAE1R,yCAAyC;QACzC,MAAM,iBAAiB,GAAG,MAAM,CAAC,IAAI,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;QAEnE,oFAAoF;QACpF,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,iBAAiB,EAAE,4BAA4B,CAAC,CAAC;QAElG,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;YACpD,OAAO,CAAC;oBACN,YAAY,EAAE,4BAA4B;oBAC1C,SAAS;oBACT,QAAQ,EAAE,MAAM;oBAChB,MAAM,EAAE,KAAK;oBACb,UAAU,EAAE,SAAS;iBACtB,CAAC,CAAC;QACL,CAAC;QAED,OAAO,EAAE,CAAC;IACZ,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,iCAAiC,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QAC1G,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gCAAgC,CAC7C,MAAc,EACd,cAA8B;IAE9B,OAAO,CAAC,GAAG,CAAC,qEAAqE,CAAC,CAAC;IAEnF,IAAI,CAAC;QACH,uDAAuD;QACvD,8EAA8E;QAC9E,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAE5C,0CAA0C;QAC1C,MAAM,aAAa,GAAG;YACpB,0CAA0C;YAC1C,uCAAuC;YACvC,OAAO,EAAE,OAAO;YAChB,MAAM,EAAG,MAAM;YACf,SAAS,EAAE,MAAM;SAClB,CAAC;QAEF,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,OAAO,IAAI,aAAa,EAAE,CAAC;YACpC,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACzC,IAAI,OAAO,EAAE,CAAC;gBACZ,UAAU,IAAI,OAAO,CAAC,MAAM,CAAC;YAC/B,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,YAAY,UAAU,8CAA8C,CAAC,CAAC;QAElF,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;YACnB,mCAAmC;YACnC,MAAM,cAAc,GAAG,uCAAuC,UAAU,+VAA+V,CAAC;YAExa,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;YAC3D,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,aAAa,EAAE,iCAAiC,CAAC,CAAC;YAEnG,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,kBAAkB,CAAC,CAAC;gBACnE,OAAO,CAAC;wBACN,YAAY,EAAE,iCAAiC;wBAC/C,SAAS;wBACT,QAAQ,EAAE,MAAM;wBAChB,MAAM,EAAE,KAAK;wBACb,UAAU,EAAE,SAAS;qBACtB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;QACjE,CAAC;QAED,OAAO,EAAE,CAAC;IACZ,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,8CAA8C,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QACvH,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CACjC,MAA4B,EAC5B,cAA8B;IAE9B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,IAAI,aAAa,GAAG,4BAA4B,CAAC;IAEjD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;QAC9C,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YAChD,aAAa,IAAI,cAAc,CAAC,gBAAgB,CAAC,SAAS,KAAK,GAAG,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;YACjF,aAAa,IAAI,MAAM,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC;AACvB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAY;IACpC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,YAAY,GAAG,KAAK;SACvB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SACxB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEnC,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
2
|
import type { PageData } from '../types/interfaces.js';
|
|
3
3
|
import type { ImageExtractor } from './image-extractor.js';
|
|
4
4
|
export interface PDFParseOptions {
|
|
@@ -16,7 +16,7 @@ export declare class PDFExtractor {
|
|
|
16
16
|
private pageCounter;
|
|
17
17
|
constructor(imageExtractor: ImageExtractor);
|
|
18
18
|
/**
|
|
19
|
-
* Extract images from PDF
|
|
19
|
+
* Extract images from PDF using alternative methods (without external dependencies)
|
|
20
20
|
*/
|
|
21
21
|
extractImagesFromPDF(buffer: Buffer): Promise<readonly PageData[]>;
|
|
22
22
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/pdf-extractor.ts"],"names":[],"mappings":"AACA,OAAO,
|
|
1
|
+
{"version":3,"file":"pdf-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/pdf-extractor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAEvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAW3D,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,SAAS,OAAO,wBAAwB,EAAE,SAAS,EAAE,CAAC;IACvE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC5C;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,WAAW,CAAa;gBAEpB,cAAc,EAAE,cAAc;IAI1C;;OAEG;IACG,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,QAAQ,EAAE,CAAC;IAmDxE;;OAEG;IACG,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAiE9E,OAAO,CAAC,eAAe;IAoBvB,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,gBAAgB;IAaxB,OAAO,CAAC,aAAa;IAgBrB,OAAO,CAAC,eAAe;IA+BvB,OAAO,CAAC,UAAU;IAYlB,OAAO,CAAC,cAAc;IAatB;;OAEG;IACG,gBAAgB,CAAC,UAAU,EAAE,SAAS,QAAQ,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAgBxE;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,IAAI,gBAAgB,IAAI,MAAM,CAE7B;CACF"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import {
|
|
2
|
+
import { Buffer } from 'node:buffer';
|
|
3
3
|
export class PDFExtractor {
|
|
4
4
|
imageExtractor;
|
|
5
5
|
pageCounter = 0;
|
|
@@ -7,37 +7,49 @@ export class PDFExtractor {
|
|
|
7
7
|
this.imageExtractor = imageExtractor;
|
|
8
8
|
}
|
|
9
9
|
/**
|
|
10
|
-
* Extract images from PDF
|
|
10
|
+
* Extract images from PDF using alternative methods (without external dependencies)
|
|
11
11
|
*/
|
|
12
12
|
async extractImagesFromPDF(buffer) {
|
|
13
13
|
try {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
const
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
14
|
+
console.log('đŧī¸ PDFExtractor: Using alternative image extraction method...');
|
|
15
|
+
// Parse the PDF to get basic information
|
|
16
|
+
const pdfParse = await import('pdf-parse');
|
|
17
|
+
const pdfData = await pdfParse.default(buffer);
|
|
18
|
+
console.log(`đ PDFExtractor: PDF has ${pdfData.numpages} pages, text length: ${pdfData.text?.length || 0}`);
|
|
19
|
+
// Check if this is an image-heavy PDF (little text, likely scanned)
|
|
20
|
+
const isImageHeavy = !pdfData.text || pdfData.text.trim().length < 100;
|
|
21
|
+
if (isImageHeavy && pdfData.numpages <= 3) {
|
|
22
|
+
console.log('đ PDF appears to be image-heavy with few pages - creating placeholder');
|
|
23
|
+
// Create placeholders for image-heavy PDFs
|
|
24
|
+
const extractedPages = [];
|
|
25
|
+
for (let page = 1; page <= Math.min(pdfData.numpages, 3); page++) {
|
|
26
|
+
const placeholderContent = `PDF Page ${page} Image Placeholder\n\nThis page appears to contain primarily image content.\nExternal tools would be required to extract the actual image.\n\nPage ${page} of ${pdfData.numpages}`;
|
|
27
|
+
const placeholderBuffer = Buffer.from(placeholderContent, 'utf-8');
|
|
28
|
+
// Save placeholder as a text file
|
|
29
|
+
const filename = `pdf_page_${page}_placeholder.txt`;
|
|
30
|
+
const fullPath = path.join(this.imageExtractor.imageDirectory, filename);
|
|
31
|
+
// Use the image extractor to save the placeholder
|
|
32
|
+
const savedPath = await this.imageExtractor.saveImage(placeholderBuffer, filename);
|
|
33
|
+
if (savedPath) {
|
|
34
|
+
extractedPages.push({
|
|
35
|
+
pageNumber: page,
|
|
36
|
+
imagePath: filename,
|
|
37
|
+
fullPath: savedPath
|
|
38
|
+
});
|
|
39
|
+
console.log(`â
Created placeholder for page ${page}`);
|
|
40
|
+
}
|
|
34
41
|
}
|
|
42
|
+
return extractedPages;
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
console.log('âšī¸ PDF appears to be text-heavy - no image extraction needed');
|
|
46
|
+
return [];
|
|
35
47
|
}
|
|
36
|
-
return extractedPages;
|
|
37
48
|
}
|
|
38
49
|
catch (error) {
|
|
39
|
-
|
|
40
|
-
throw
|
|
50
|
+
console.warn('â ī¸ PDFExtractor alternative method failed:', error instanceof Error ? error.message : 'Unknown error');
|
|
51
|
+
// Don't throw, just return empty array to allow text processing to continue
|
|
52
|
+
return [];
|
|
41
53
|
}
|
|
42
54
|
}
|
|
43
55
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf-extractor.js","sourceRoot":"","sources":["../../src/utils/pdf-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"pdf-extractor.js","sourceRoot":"","sources":["../../src/utils/pdf-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AA2BrC,MAAM,OAAO,YAAY;IACN,cAAc,CAAiB;IACxC,WAAW,GAAW,CAAC,CAAC;IAEhC,YAAY,cAA8B;QACxC,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,oBAAoB,CAAC,MAAc;QACvC,IAAI,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,gEAAgE,CAAC,CAAC;YAE9E,yCAAyC;YACzC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;YAC3C,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YAE/C,OAAO,CAAC,GAAG,CAAC,4BAA4B,OAAO,CAAC,QAAQ,wBAAwB,OAAO,CAAC,IAAI,EAAE,MAAM,IAAI,CAAC,EAAE,CAAC,CAAC;YAE7G,oEAAoE;YACpE,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,CAAC;YAEvE,IAAI,YAAY,IAAI,OAAO,CAAC,QAAQ,IAAI,CAAC,EAAE,CAAC;gBAC1C,OAAO,CAAC,GAAG,CAAC,wEAAwE,CAAC,CAAC;gBAEtF,2CAA2C;gBAC3C,MAAM,cAAc,GAAe,EAAE,CAAC;gBACtC,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC;oBACjE,MAAM,kBAAkB,GAAG,YAAY,IAAI,sJAAsJ,IAAI,OAAO,OAAO,CAAC,QAAQ,EAAE,CAAC;oBAC/N,MAAM,iBAAiB,GAAG,MAAM,CAAC,IAAI,CAAC,kBAAkB,EAAE,OAAO,CAAC,CAAC;oBAEnE,kCAAkC;oBAClC,MAAM,QAAQ,GAAG,YAAY,IAAI,kBAAkB,CAAC;oBACpD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;oBAEzE,kDAAkD;oBAClD,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,iBAAiB,EAAE,QAAQ,CAAC,CAAC;oBAEnF,IAAI,SAAS,EAAE,CAAC;wBACd,cAAc,CAAC,IAAI,CAAC;4BAClB,UAAU,EAAE,IAAI;4BAChB,SAAS,EAAE,QAAQ;4BACnB,QAAQ,EAAE,SAAS;yBACpB,CAAC,CAAC;wBACH,OAAO,CAAC,GAAG,CAAC,kCAAkC,IAAI,EAAE,CAAC,CAAC;oBACxD,CAAC;gBACH,CAAC;gBAED,OAAO,cAAc,CAAC;YACxB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAC;gBAC5E,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,OAAO,CAAC,IAAI,CAAC,4CAA4C,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YACrH,4EAA4E;YAC5E,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,qBAAqB,CAAC,IAAY,EAAE,QAAkB;QAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,IAAI,SAAS,GAAe,EAAE,CAAC;QAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAE7B,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,qBAAqB;gBACrB,IAAI,OAAO,EAAE,CAAC;oBACZ,YAAY,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;oBAChD,SAAS,GAAG,EAAE,CAAC;oBACf,OAAO,GAAG,KAAK,CAAC;gBAClB,CAAC;gBACD,YAAY,IAAI,IAAI,CAAC;gBACrB,SAAS;YACX,CAAC;YAED,iEAAiE;YACjE,IAAI,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,OAAO,EAAE,CAAC;oBACZ,YAAY,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;oBAChD,SAAS,GAAG,EAAE,CAAC;oBACf,OAAO,GAAG,KAAK,CAAC;gBAClB,CAAC;gBAED,MAAM,YAAY,GAAG,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAAC;gBACtD,YAAY,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,IAAI,MAAM,CAAC;gBAC1D,SAAS;YACX,CAAC;YAED,4BAA4B;YAC5B,IAAI,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC;gBAChC,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,GAAG,IAAI,CAAC;gBACjB,CAAC;gBACD,SAAS,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACpD,SAAS;YACX,CAAC;iBAAM,IAAI,OAAO,EAAE,CAAC;gBACnB,eAAe;gBACf,YAAY,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;gBAChD,SAAS,GAAG,EAAE,CAAC;gBACf,OAAO,GAAG,KAAK,CAAC;YAClB,CAAC;YAED,eAAe;YACf,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1B,YAAY,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC;gBACjD,SAAS;YACX,CAAC;YAED,oBAAoB;YACpB,YAAY,IAAI,GAAG,IAAI,IAAI,CAAC;QAC9B,CAAC;QAED,6BAA6B;QAC7B,IAAI,OAAO,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,YAAY,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QAClD,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAEO,eAAe,CAAC,IAAY,EAAE,QAA2B,EAAE,KAAa;QAC9E,qCAAqC;QACrC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;YAAE,OAAO,KAAK,CAAC,CAAC,2BAA2B;QAC/D,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC,CAAE,YAAY;QAEhD,+CAA+C;QAC/C,IAAI,IAAI,KAAK,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QAEhE,0CAA0C;QAC1C,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACrC,IAAI,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC3D,OAAO,IAAI,CAAC;QACd,CAAC;QAED,iDAAiD;QACjD,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QAEpC,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,qBAAqB,CAAC,IAAY;QACxC,IAAI,IAAI,KAAK,IAAI,CAAC,WAAW,EAAE;YAAE,OAAO,CAAC,CAAC,CAAC,2BAA2B;QACtE,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,CAAC,CAAC,CAAS,4BAA4B;QACtE,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;YAAE,OAAO,CAAC,CAAC,CAAW,qBAAqB;QAC/D,OAAO,CAAC,CAAC,CAAC,UAAU;IACtB,CAAC;IAEO,gBAAgB,CAAC,IAAY;QACnC,8CAA8C;QAC9C,MAAM,QAAQ,GAAG;YACf,KAAK;YACL,QAAQ,EAAkB,kBAAkB;YAC5C,IAAI,EAAsB,iBAAiB;YAC3C,WAAW;YACX,cAAc;SACf,CAAC;QAEF,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACtD,CAAC;IAEO,aAAa,CAAC,IAAY;QAChC,sDAAsD;QACtD,IAAI,OAAO,GAAa,EAAE,CAAC;QAE3B,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACpD,CAAC;aAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9B,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACnD,CAAC;aAAM,CAAC;YACN,2BAA2B;YAC3B,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,OAAO,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC/C,CAAC;IAEO,eAAe,CAAC,IAAyB;QAC/C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEjC,iCAAiC;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;QAE/D,IAAI,QAAQ,GAAG,EAAE,CAAC;QAElB,KAAK,MAAM,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,CAAC;YACtC,IAAI,WAAW,GAAG,GAAG,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAChC,WAAW,IAAI,IAAI,IAAI,IAAI,CAAC;YAC9B,CAAC;YAED,QAAQ,IAAI,GAAG,WAAW,IAAI,CAAC;YAE/B,uCAAuC;YACvC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACZ,IAAI,SAAS,GAAG,GAAG,CAAC;gBACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;oBACjC,SAAS,IAAI,QAAQ,CAAC;gBACxB,CAAC;gBACD,QAAQ,IAAI,GAAG,SAAS,IAAI,CAAC;YAC/B,CAAC;QACH,CAAC;QAED,OAAO,GAAG,QAAQ,IAAI,CAAC;IACzB,CAAC;IAEO,UAAU,CAAC,IAAY;QAC7B,kCAAkC;QAClC,MAAM,YAAY,GAAG;YACnB,cAAc;YACd,cAAc;YACd,mBAAmB;YACnB,kBAAkB;SACnB,CAAC;QAEF,OAAO,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC1D,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,2CAA2C;QAC3C,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;QAC7C,CAAC;aAAM,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1C,OAAO,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC,CAAC;QACjD,CAAC;aAAM,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC;QAChD,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CAAC,UAA+B;QACpD,IAAI,QAAQ,GAAG,EAAE,CAAC;QAElB,KAAK,MAAM,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YAC7C,QAAQ,IAAI,WAAW,IAAI,CAAC,UAAU,MAAM,CAAC;YAC7C,QAAQ,IAAI,IAAI,CAAC,cAAc,CAAC,gBAAgB,CAAC,QAAQ,IAAI,CAAC,UAAU,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YAC5F,QAAQ,IAAI,MAAM,CAAC;YAEnB,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,QAAQ,IAAI,SAAS,CAAC,CAAC,iBAAiB;YAC1C,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,IAAI,gBAAgB;QAClB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "file2md",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.27",
|
|
4
4
|
"description": "A TypeScript library for converting various document types (PDF, DOCX, XLSX, PPTX, HWP, HWPX) into Markdown with image and layout preservation",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|