firecrawl-pdf-inspector 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,99 @@
1
+ # firecrawl-pdf-inspector
2
+
3
+ Fast PDF classification and region-based text extraction for Node.js/Bun. Native Rust performance via [napi-rs](https://napi.rs).
4
+
5
+ Built by [Firecrawl](https://firecrawl.dev) for hybrid OCR pipelines — extract text from PDF structure where possible, fall back to OCR only when needed.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install firecrawl-pdf-inspector
11
+ # or
12
+ bun add firecrawl-pdf-inspector
13
+ ```
14
+
15
+ Prebuilt binaries included for **linux-x64** and **macOS ARM64**. No Rust toolchain needed.
16
+
17
+ ## API
18
+
19
+ ### `classifyPdf(buffer: Buffer): PdfClassification`
20
+
21
+ Classify a PDF as TextBased, Scanned, Mixed, or ImageBased (~10-50ms). Returns which pages need OCR.
22
+
23
+ ```typescript
24
+ import { classifyPdf } from 'firecrawl-pdf-inspector'
25
+ import { readFileSync } from 'fs'
26
+
27
+ const pdf = readFileSync('document.pdf')
28
+ const result = classifyPdf(pdf)
29
+
30
+ console.log(result.pdfType) // "TextBased" | "Scanned" | "Mixed" | "ImageBased"
31
+ console.log(result.pageCount) // 42
32
+ console.log(result.pagesNeedingOcr) // [5, 12, 15] (0-indexed)
33
+ console.log(result.confidence) // 0.875
34
+ ```
35
+
36
+ ### `extractTextInRegions(buffer: Buffer, pageRegions: PageRegions[]): PageRegionTexts[]`
37
+
38
+ Extract text within bounding-box regions from a PDF. Designed for hybrid OCR pipelines where a layout model detects regions in rendered page images, and this function extracts text from the PDF structure for text-based pages — skipping GPU OCR.
39
+
40
+ Each region result includes a `needsOcr` flag that signals unreliable extraction (empty text, GID-encoded fonts, garbage text, encoding issues).
41
+
42
+ ```typescript
43
+ import { extractTextInRegions } from 'firecrawl-pdf-inspector'
44
+
45
+ const result = extractTextInRegions(pdf, [
46
+ {
47
+ page: 0, // 0-indexed
48
+ regions: [
49
+ [0, 0, 300, 400], // [x1, y1, x2, y2] in PDF points, top-left origin
50
+ [300, 0, 612, 400],
51
+ ]
52
+ }
53
+ ])
54
+
55
+ for (const region of result[0].regions) {
56
+ if (region.needsOcr) {
57
+ // Unreliable text — send this region to OCR instead
58
+ } else {
59
+ console.log(region.text) // Extracted text in reading order
60
+ }
61
+ }
62
+ ```
63
+
64
+ ## Types
65
+
66
+ ```typescript
67
+ interface PdfClassification {
68
+ pdfType: string // "TextBased" | "Scanned" | "Mixed" | "ImageBased"
69
+ pageCount: number
70
+ pagesNeedingOcr: number[] // 0-indexed page numbers
71
+ confidence: number // 0.0 - 1.0
72
+ }
73
+
74
+ interface PageRegions {
75
+ page: number // 0-indexed
76
+ regions: number[][] // [[x1, y1, x2, y2], ...] in PDF points, top-left origin
77
+ }
78
+
79
+ interface PageRegionTexts {
80
+ page: number
81
+ regions: RegionText[]
82
+ }
83
+
84
+ interface RegionText {
85
+ text: string
86
+ needsOcr: boolean // true when text is unreliable
87
+ }
88
+ ```
89
+
90
+ ## Platforms
91
+
92
+ | Platform | Architecture | Supported |
93
+ |----------|-------------|-----------|
94
+ | Linux | x64 | Yes |
95
+ | macOS | ARM64 | Yes |
96
+
97
+ ## License
98
+
99
+ MIT
package/index.d.ts CHANGED
@@ -1,11 +1,18 @@
1
1
  /* auto-generated by NAPI-RS */
2
2
  /* eslint-disable */
3
3
  /**
4
- * Classify a PDF: detect type (TextBased/Scanned/Mixed/ImageBased),
5
- * page count, and which pages need OCR. Takes PDF bytes as Buffer.
4
+ * Lightweight PDF classification — returns type, page count, and OCR pages.
5
+ * Faster than detectPdf as it skips building the full PdfResult.
6
+ * Pages in pagesNeedingOcr are 0-indexed.
6
7
  */
7
8
  export declare function classifyPdf(buffer: Buffer): PdfClassification
8
9
 
10
+ /** Fast detection only — no text extraction or markdown. */
11
+ export declare function detectPdf(buffer: Buffer): PdfResult
12
+
13
+ /** Extract plain text from a PDF Buffer. */
14
+ export declare function extractText(buffer: Buffer): string
15
+
9
16
  /**
10
17
  * Extract text within bounding-box regions from a PDF.
11
18
  *
@@ -13,13 +20,16 @@ export declare function classifyPdf(buffer: Buffer): PdfClassification
13
20
  * this extracts PDF text within those regions — skipping GPU OCR
14
21
  * for text-based pages.
15
22
  *
16
- * Each region result includes `needs_ocr` — set when the extracted text
23
+ * Each region result includes `needsOcr` — set when the extracted text
17
24
  * is unreliable (empty, GID-encoded fonts, garbage, encoding issues).
18
25
  *
19
26
  * Coordinates are PDF points with top-left origin.
20
27
  */
21
28
  export declare function extractTextInRegions(buffer: Buffer, pageRegions: Array<PageRegions>): Array<PageRegionTexts>
22
29
 
30
+ /** Extract text with position information from a PDF Buffer. */
31
+ export declare function extractTextWithPositions(buffer: Buffer, pages?: Array<number> | undefined | null): Array<TextItem>
32
+
23
33
  /** A page's regions for text extraction: (page_index_0based, bboxes). */
24
34
  export interface PageRegions {
25
35
  page: number
@@ -37,13 +47,48 @@ export interface PageRegionTexts {
37
47
  export interface PdfClassification {
38
48
  pdfType: string
39
49
  pageCount: number
50
+ /** 0-indexed page numbers that need OCR. */
51
+ pagesNeedingOcr: Array<number>
52
+ confidence: number
53
+ }
54
+
55
+ /** Full PDF processing result with markdown and metadata. */
56
+ export interface PdfResult {
57
+ pdfType: string
58
+ markdown?: string
59
+ pageCount: number
60
+ processingTimeMs: number
61
+ /** 1-indexed page numbers that need OCR. */
40
62
  pagesNeedingOcr: Array<number>
63
+ title?: string
41
64
  confidence: number
65
+ isComplexLayout: boolean
66
+ pagesWithTables: Array<number>
67
+ pagesWithColumns: Array<number>
68
+ hasEncodingIssues: boolean
42
69
  }
43
70
 
71
+ /** Process a PDF from a Buffer: detect type, extract text, and convert to Markdown. */
72
+ export declare function processPdf(buffer: Buffer, pages?: Array<number> | undefined | null): PdfResult
73
+
44
74
  /** Extracted text for a single region. */
45
75
  export interface RegionText {
46
76
  text: string
47
77
  /** `true` when the text should not be trusted (empty, GID fonts, garbage, encoding issues). */
48
78
  needsOcr: boolean
49
79
  }
80
+
81
+ /** A positioned text item extracted from a PDF. */
82
+ export interface TextItem {
83
+ text: string
84
+ x: number
85
+ y: number
86
+ width: number
87
+ height: number
88
+ font: string
89
+ fontSize: number
90
+ page: number
91
+ isBold: boolean
92
+ isItalic: boolean
93
+ itemType: string
94
+ }
package/index.js CHANGED
@@ -77,8 +77,8 @@ function requireNative() {
77
77
  try {
78
78
  const binding = require('firecrawl-pdf-inspector-android-arm64')
79
79
  const bindingPackageVersion = require('firecrawl-pdf-inspector-android-arm64/package.json').version
80
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
81
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
80
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
81
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
82
82
  }
83
83
  return binding
84
84
  } catch (e) {
@@ -93,8 +93,8 @@ function requireNative() {
93
93
  try {
94
94
  const binding = require('firecrawl-pdf-inspector-android-arm-eabi')
95
95
  const bindingPackageVersion = require('firecrawl-pdf-inspector-android-arm-eabi/package.json').version
96
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
97
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
96
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
97
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
98
98
  }
99
99
  return binding
100
100
  } catch (e) {
@@ -114,8 +114,8 @@ function requireNative() {
114
114
  try {
115
115
  const binding = require('firecrawl-pdf-inspector-win32-x64-gnu')
116
116
  const bindingPackageVersion = require('firecrawl-pdf-inspector-win32-x64-gnu/package.json').version
117
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
118
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
117
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
118
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
119
119
  }
120
120
  return binding
121
121
  } catch (e) {
@@ -130,8 +130,8 @@ function requireNative() {
130
130
  try {
131
131
  const binding = require('firecrawl-pdf-inspector-win32-x64-msvc')
132
132
  const bindingPackageVersion = require('firecrawl-pdf-inspector-win32-x64-msvc/package.json').version
133
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
134
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
133
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
134
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
135
135
  }
136
136
  return binding
137
137
  } catch (e) {
@@ -147,8 +147,8 @@ function requireNative() {
147
147
  try {
148
148
  const binding = require('firecrawl-pdf-inspector-win32-ia32-msvc')
149
149
  const bindingPackageVersion = require('firecrawl-pdf-inspector-win32-ia32-msvc/package.json').version
150
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
151
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
150
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
151
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
152
152
  }
153
153
  return binding
154
154
  } catch (e) {
@@ -163,8 +163,8 @@ function requireNative() {
163
163
  try {
164
164
  const binding = require('firecrawl-pdf-inspector-win32-arm64-msvc')
165
165
  const bindingPackageVersion = require('firecrawl-pdf-inspector-win32-arm64-msvc/package.json').version
166
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
167
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
166
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
167
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
168
168
  }
169
169
  return binding
170
170
  } catch (e) {
@@ -182,8 +182,8 @@ function requireNative() {
182
182
  try {
183
183
  const binding = require('firecrawl-pdf-inspector-darwin-universal')
184
184
  const bindingPackageVersion = require('firecrawl-pdf-inspector-darwin-universal/package.json').version
185
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
186
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
185
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
186
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
187
187
  }
188
188
  return binding
189
189
  } catch (e) {
@@ -198,8 +198,8 @@ function requireNative() {
198
198
  try {
199
199
  const binding = require('firecrawl-pdf-inspector-darwin-x64')
200
200
  const bindingPackageVersion = require('firecrawl-pdf-inspector-darwin-x64/package.json').version
201
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
202
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
201
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
202
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
203
203
  }
204
204
  return binding
205
205
  } catch (e) {
@@ -214,8 +214,8 @@ function requireNative() {
214
214
  try {
215
215
  const binding = require('firecrawl-pdf-inspector-darwin-arm64')
216
216
  const bindingPackageVersion = require('firecrawl-pdf-inspector-darwin-arm64/package.json').version
217
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
218
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
217
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
218
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
219
219
  }
220
220
  return binding
221
221
  } catch (e) {
@@ -234,8 +234,8 @@ function requireNative() {
234
234
  try {
235
235
  const binding = require('firecrawl-pdf-inspector-freebsd-x64')
236
236
  const bindingPackageVersion = require('firecrawl-pdf-inspector-freebsd-x64/package.json').version
237
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
238
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
237
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
238
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
239
239
  }
240
240
  return binding
241
241
  } catch (e) {
@@ -250,8 +250,8 @@ function requireNative() {
250
250
  try {
251
251
  const binding = require('firecrawl-pdf-inspector-freebsd-arm64')
252
252
  const bindingPackageVersion = require('firecrawl-pdf-inspector-freebsd-arm64/package.json').version
253
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
254
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
253
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
254
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
255
255
  }
256
256
  return binding
257
257
  } catch (e) {
@@ -271,8 +271,8 @@ function requireNative() {
271
271
  try {
272
272
  const binding = require('firecrawl-pdf-inspector-linux-x64-musl')
273
273
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-x64-musl/package.json').version
274
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
275
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
274
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
275
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
276
276
  }
277
277
  return binding
278
278
  } catch (e) {
@@ -287,8 +287,8 @@ function requireNative() {
287
287
  try {
288
288
  const binding = require('firecrawl-pdf-inspector-linux-x64-gnu')
289
289
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-x64-gnu/package.json').version
290
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
291
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
290
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
291
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
292
292
  }
293
293
  return binding
294
294
  } catch (e) {
@@ -305,8 +305,8 @@ function requireNative() {
305
305
  try {
306
306
  const binding = require('firecrawl-pdf-inspector-linux-arm64-musl')
307
307
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-arm64-musl/package.json').version
308
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
309
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
308
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
309
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
310
310
  }
311
311
  return binding
312
312
  } catch (e) {
@@ -321,8 +321,8 @@ function requireNative() {
321
321
  try {
322
322
  const binding = require('firecrawl-pdf-inspector-linux-arm64-gnu')
323
323
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-arm64-gnu/package.json').version
324
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
325
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
324
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
325
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
326
326
  }
327
327
  return binding
328
328
  } catch (e) {
@@ -339,8 +339,8 @@ function requireNative() {
339
339
  try {
340
340
  const binding = require('firecrawl-pdf-inspector-linux-arm-musleabihf')
341
341
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-arm-musleabihf/package.json').version
342
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
343
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
342
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
343
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
344
344
  }
345
345
  return binding
346
346
  } catch (e) {
@@ -355,8 +355,8 @@ function requireNative() {
355
355
  try {
356
356
  const binding = require('firecrawl-pdf-inspector-linux-arm-gnueabihf')
357
357
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-arm-gnueabihf/package.json').version
358
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
359
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
358
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
359
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
360
360
  }
361
361
  return binding
362
362
  } catch (e) {
@@ -373,8 +373,8 @@ function requireNative() {
373
373
  try {
374
374
  const binding = require('firecrawl-pdf-inspector-linux-loong64-musl')
375
375
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-loong64-musl/package.json').version
376
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
377
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
376
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
377
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
378
378
  }
379
379
  return binding
380
380
  } catch (e) {
@@ -389,8 +389,8 @@ function requireNative() {
389
389
  try {
390
390
  const binding = require('firecrawl-pdf-inspector-linux-loong64-gnu')
391
391
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-loong64-gnu/package.json').version
392
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
393
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
392
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
393
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
394
394
  }
395
395
  return binding
396
396
  } catch (e) {
@@ -407,8 +407,8 @@ function requireNative() {
407
407
  try {
408
408
  const binding = require('firecrawl-pdf-inspector-linux-riscv64-musl')
409
409
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-riscv64-musl/package.json').version
410
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
411
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
410
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
411
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
412
412
  }
413
413
  return binding
414
414
  } catch (e) {
@@ -423,8 +423,8 @@ function requireNative() {
423
423
  try {
424
424
  const binding = require('firecrawl-pdf-inspector-linux-riscv64-gnu')
425
425
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-riscv64-gnu/package.json').version
426
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
427
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
426
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
427
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
428
428
  }
429
429
  return binding
430
430
  } catch (e) {
@@ -440,8 +440,8 @@ function requireNative() {
440
440
  try {
441
441
  const binding = require('firecrawl-pdf-inspector-linux-ppc64-gnu')
442
442
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-ppc64-gnu/package.json').version
443
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
444
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
443
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
444
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
445
445
  }
446
446
  return binding
447
447
  } catch (e) {
@@ -456,8 +456,8 @@ function requireNative() {
456
456
  try {
457
457
  const binding = require('firecrawl-pdf-inspector-linux-s390x-gnu')
458
458
  const bindingPackageVersion = require('firecrawl-pdf-inspector-linux-s390x-gnu/package.json').version
459
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
460
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
459
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
460
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
461
461
  }
462
462
  return binding
463
463
  } catch (e) {
@@ -476,8 +476,8 @@ function requireNative() {
476
476
  try {
477
477
  const binding = require('firecrawl-pdf-inspector-openharmony-arm64')
478
478
  const bindingPackageVersion = require('firecrawl-pdf-inspector-openharmony-arm64/package.json').version
479
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
480
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
479
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
480
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
481
481
  }
482
482
  return binding
483
483
  } catch (e) {
@@ -492,8 +492,8 @@ function requireNative() {
492
492
  try {
493
493
  const binding = require('firecrawl-pdf-inspector-openharmony-x64')
494
494
  const bindingPackageVersion = require('firecrawl-pdf-inspector-openharmony-x64/package.json').version
495
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
496
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
495
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
496
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
497
497
  }
498
498
  return binding
499
499
  } catch (e) {
@@ -508,8 +508,8 @@ function requireNative() {
508
508
  try {
509
509
  const binding = require('firecrawl-pdf-inspector-openharmony-arm')
510
510
  const bindingPackageVersion = require('firecrawl-pdf-inspector-openharmony-arm/package.json').version
511
- if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
512
- throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
511
+ if (bindingPackageVersion !== '0.3.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
512
+ throw new Error(`Native binding package version mismatch, expected 0.3.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
513
513
  }
514
514
  return binding
515
515
  } catch (e) {
@@ -577,4 +577,8 @@ if (!nativeBinding) {
577
577
 
578
578
  module.exports = nativeBinding
579
579
  module.exports.classifyPdf = nativeBinding.classifyPdf
580
+ module.exports.detectPdf = nativeBinding.detectPdf
581
+ module.exports.extractText = nativeBinding.extractText
580
582
  module.exports.extractTextInRegions = nativeBinding.extractTextInRegions
583
+ module.exports.extractTextWithPositions = nativeBinding.extractTextWithPositions
584
+ module.exports.processPdf = nativeBinding.processPdf
package/package.json CHANGED
@@ -1,18 +1,32 @@
1
1
  {
2
2
  "name": "firecrawl-pdf-inspector",
3
- "version": "0.2.2",
3
+ "version": "0.3.0",
4
+ "description": "Fast PDF classification and text extraction. Detect text-based vs scanned PDFs, extract text by region with quality checks. Native Rust performance via napi-rs.",
4
5
  "main": "index.js",
5
6
  "types": "index.d.ts",
6
7
  "license": "MIT",
8
+ "keywords": [
9
+ "pdf",
10
+ "pdf-extraction",
11
+ "pdf-parser",
12
+ "text-extraction",
13
+ "ocr",
14
+ "pdf-classification",
15
+ "napi",
16
+ "rust",
17
+ "firecrawl"
18
+ ],
7
19
  "files": [
8
20
  "index.js",
9
21
  "index.d.ts",
10
- "*.node"
22
+ "*.node",
23
+ "README.md"
11
24
  ],
12
25
  "repository": {
13
26
  "type": "git",
14
27
  "url": "https://github.com/firecrawl/pdf-inspector"
15
28
  },
29
+ "homepage": "https://github.com/firecrawl/pdf-inspector",
16
30
  "publishConfig": {
17
31
  "access": "public"
18
32
  },
Binary file
Binary file