@monostate/node-scraper 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +6 -2
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -4,7 +4,7 @@ import { existsSync, statSync } from 'fs';
4
4
  import path from 'path';
5
5
  import { fileURLToPath } from 'url';
6
6
  import { promises as fsPromises } from 'fs';
7
- import { PDFParse } from 'pdf-parse';
7
+ let PDFParse = null;
8
8
  import browserPool from './browser-pool.js';
9
9
 
10
10
  let puppeteer = null;
@@ -860,7 +860,11 @@ ${parsedContent.headings?.length ? `\nHeadings:\n${parsedContent.headings.map(h
860
860
  };
861
861
  }
862
862
 
863
- // Parse PDF with pdf-parse v2 API
863
+ // Lazy-load pdf-parse (pdfjs-dist requires DOMMatrix, only available in Node 22+)
864
+ if (!PDFParse) {
865
+ const mod = await import('pdf-parse');
866
+ PDFParse = mod.PDFParse;
867
+ }
864
868
  const parser = new PDFParse({ data: new Uint8Array(buffer) });
865
869
  await parser.load();
866
870
  const textResult = await parser.getText();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@monostate/node-scraper",
3
- "version": "2.2.0",
3
+ "version": "2.2.1",
4
4
  "description": "Intelligent web scraping with AI Q&A, PDF support and multi-level fallback system - 11x faster than traditional scrapers",
5
5
  "type": "module",
6
6
  "main": "index.js",