@monostate/node-scraper 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +6 -2
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -4,7 +4,7 @@ import { existsSync, statSync } from 'fs';
|
|
|
4
4
|
import path from 'path';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
6
|
import { promises as fsPromises } from 'fs';
|
|
7
|
-
|
|
7
|
+
let PDFParse = null;
|
|
8
8
|
import browserPool from './browser-pool.js';
|
|
9
9
|
|
|
10
10
|
let puppeteer = null;
|
|
@@ -860,7 +860,11 @@ ${parsedContent.headings?.length ? `\nHeadings:\n${parsedContent.headings.map(h
|
|
|
860
860
|
};
|
|
861
861
|
}
|
|
862
862
|
|
|
863
|
-
//
|
|
863
|
+
// Lazy-load pdf-parse (pdfjs-dist requires DOMMatrix, only available in Node 22+)
|
|
864
|
+
if (!PDFParse) {
|
|
865
|
+
const mod = await import('pdf-parse');
|
|
866
|
+
PDFParse = mod.PDFParse;
|
|
867
|
+
}
|
|
864
868
|
const parser = new PDFParse({ data: new Uint8Array(buffer) });
|
|
865
869
|
await parser.load();
|
|
866
870
|
const textResult = await parser.getText();
|
package/package.json
CHANGED