@d0paminedriven/pdfdown-ocr 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +116 -12
  2. package/index.d.ts +34 -0
  3. package/index.js +52 -50
  4. package/package.json +1 -1
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # `@d0paminedriven/pdfdown-ocr`
2
2
 
3
- Rust-powered PDF extraction for Node.js with Tesseract OCR fallback for image-only pages. A superset of [`@d0paminedriven/pdfdown`](https://www.npmjs.com/package/@d0paminedriven/pdfdown) includes all base extraction APIs (text, images, annotations, structured text, metadata) plus OCR.
3
+ Rust-powered PDF extraction for Node.js with Tesseract OCR fallback for image-only pages. A superset of [`@d0paminedriven/pdfdown`](https://www.npmjs.com/package/@d0paminedriven/pdfdown) -- includes all base extraction APIs (text, images, annotations, structured text, metadata) plus OCR.
4
4
 
5
5
  **System requirement:** [Tesseract](https://github.com/tesseract-ocr/tesseract) 5.x must be installed on the host.
6
6
 
@@ -13,7 +13,7 @@ npm install @d0paminedriven/pdfdown-ocr
13
13
  ### Tesseract setup
14
14
 
15
15
  ```bash
16
- # Ubuntu/Debian (22.04 ships tesseract 3.x use the PPA for 5.x)
16
+ # Ubuntu/Debian (22.04 ships tesseract 3.x -- use the PPA for 5.x)
17
17
  sudo add-apt-repository ppa:alex-p/tesseract-ocr5
18
18
  sudo apt update
19
19
  sudo apt install tesseract-ocr tesseract-ocr-eng -y
@@ -27,15 +27,38 @@ brew install tesseract
27
27
  sudo pacman -S tesseract tesseract-data-eng
28
28
  ```
29
29
 
30
- Verify with `tesseract --version` you should see 5.x.
30
+ Verify with `tesseract --version` -- you should see 5.x.
31
+
32
+ ### Tessdata auto-detection
33
+
34
+ The package automatically detects the tessdata directory at runtime by parsing the output of `tesseract --list-langs`. The detected path is cached for the lifetime of the process using a `OnceLock<Option<String>>` -- no global environment mutation, fully thread-safe.
35
+
36
+ **Resolution order:**
37
+
38
+ 1. `TESSDATA_PREFIX` environment variable (if set, used as-is -- no auto-detection runs)
39
+ 2. Auto-detection via `tesseract --list-langs` (parses the path from `List of available languages in "/path/to/tessdata/"`)
40
+ 3. Tesseract's compiled-in default (if neither of the above yields a path)
41
+
42
+ Most users will not need to set `TESSDATA_PREFIX` at all. The auto-detection handles standard installations on Ubuntu (`/usr/share/tesseract-ocr/5/tessdata/`), macOS Homebrew (`/opt/homebrew/share/tessdata/`), Arch, and any other layout where `tesseract` is on `PATH`.
43
+
44
+ Set `TESSDATA_PREFIX` explicitly only if:
45
+
46
+ - Tesseract is not on `PATH` but the tessdata directory exists elsewhere
47
+ - You want to override the detected path (e.g., pointing to a custom-trained data directory)
48
+
49
+ ```bash
50
+ # Override example (not usually needed)
51
+ export TESSDATA_PREFIX="/opt/custom/tessdata"
52
+ ```
31
53
 
32
54
  ## API
33
55
 
34
- This package exports everything from `@d0paminedriven/pdfdown` (text, images, annotations, structured text, metadata both sync and async), plus the OCR-specific APIs below. See the [base package docs](https://www.npmjs.com/package/@d0paminedriven/pdfdown) for the full base API.
56
+ This package exports everything from `@d0paminedriven/pdfdown` (text, images, annotations, structured text, metadata -- both sync and async), plus the OCR-specific APIs below. See the [base package docs](https://www.npmjs.com/package/@d0paminedriven/pdfdown) for the full base API.
35
57
 
36
58
  ### OCR standalone functions
37
59
 
38
60
  ```typescript
61
+ // Per-page OCR text extraction
39
62
  export declare function extractTextWithOcrPerPage(
40
63
  buffer: Buffer,
41
64
  opts?: OcrOptions,
@@ -45,6 +68,17 @@ export declare function extractTextWithOcrPerPageAsync(
45
68
  buffer: Buffer,
46
69
  opts?: OcrOptions,
47
70
  ): Promise<Array<OcrPageText>>
71
+
72
+ // Full document extraction with OCR text fallback
73
+ export declare function pdfDocumentOcr(
74
+ buffer: Buffer,
75
+ opts?: OcrOptions,
76
+ ): PdfDocumentOcr
77
+
78
+ export declare function pdfDocumentOcrAsync(
79
+ buffer: Buffer,
80
+ opts?: OcrOptions,
81
+ ): Promise<PdfDocumentOcr>
48
82
  ```
49
83
 
50
84
  ### `PdfDown` class (includes OCR methods)
@@ -53,12 +87,25 @@ export declare function extractTextWithOcrPerPageAsync(
53
87
  export declare class PdfDown {
54
88
  constructor(buffer: Buffer)
55
89
 
56
- // All base methods (textPerPage, imagesPerPage, annotationsPerPage,
57
- // structuredText, metadata, document — sync and async variants)
58
-
59
- // OCR methods:
90
+ // ── Base methods ──
91
+ textPerPage(): Array<PageText>
92
+ textPerPageAsync(): Promise<Array<PageText>>
93
+ imagesPerPage(): Array<PageImage>
94
+ imagesPerPageAsync(): Promise<Array<PageImage>>
95
+ annotationsPerPage(): Array<PageAnnotation>
96
+ annotationsPerPageAsync(): Promise<Array<PageAnnotation>>
97
+ structuredText(): Array<StructuredPageText>
98
+ structuredTextAsync(): Promise<Array<StructuredPageText>>
99
+ metadata(): PdfMeta
100
+ metadataAsync(): Promise<PdfMeta>
101
+ document(): PdfDocument
102
+ documentAsync(): Promise<PdfDocument>
103
+
104
+ // ── OCR methods ──
60
105
  textWithOcrPerPage(opts?: OcrOptions): Array<OcrPageText>
61
106
  textWithOcrPerPageAsync(opts?: OcrOptions): Promise<Array<OcrPageText>>
107
+ documentOcr(opts?: OcrOptions): PdfDocumentOcr
108
+ documentOcrAsync(opts?: OcrOptions): Promise<PdfDocumentOcr>
62
109
  }
63
110
  ```
64
111
 
@@ -76,16 +123,42 @@ export interface OcrPageText {
76
123
  source: TextSource
77
124
  }
78
125
 
126
+ export interface OcrStructuredPageText {
127
+ page: number
128
+ header: string
129
+ body: string
130
+ footer: string
131
+ source: TextSource
132
+ }
133
+
79
134
  export interface OcrOptions {
80
135
  lang?: string // Tesseract language code, default "eng"
81
136
  minTextLength?: number // non-whitespace char threshold before OCR fallback, default 1
82
137
  maxThreads?: number // cap on Rayon threads for OCR parallelism, default 4, clamped to [1, available CPUs]
83
138
  }
139
+
140
+ export interface PdfDocumentOcr {
141
+ version: string
142
+ isLinearized: boolean
143
+ pageCount: number
144
+ creator?: string
145
+ producer?: string
146
+ creationDate?: string
147
+ modificationDate?: string
148
+ totalImages: number
149
+ totalAnnotations: number
150
+ imagePages: Array<number>
151
+ annotationPages: Array<number>
152
+ text: Array<OcrPageText>
153
+ structuredText: Array<OcrStructuredPageText>
154
+ images: Array<PageImage>
155
+ annotations: Array<PageAnnotation>
156
+ }
84
157
  ```
85
158
 
86
159
  ## Usage
87
160
 
88
- > **Use the async API for OCR.** The sync variants block the Node.js event loop for the duration of OCR processing, which can be significant for multi-page scanned documents. Prefer `extractTextWithOcrPerPageAsync` / `textWithOcrPerPageAsync` in production.
161
+ > **Use the async API for OCR.** The sync variants block the Node.js event loop for the duration of OCR processing, which can be significant for multi-page scanned documents.
89
162
 
90
163
  ### Standalone
91
164
 
@@ -117,6 +190,22 @@ const images = await pdf.imagesPerPageAsync()
117
190
  const meta = pdf.metadata()
118
191
  ```
119
192
 
193
+ ### Extract everything with OCR in one call
194
+
195
+ ```typescript
196
+ import { readFile } from 'fs/promises'
197
+ import { PdfDown } from '@d0paminedriven/pdfdown-ocr'
198
+
199
+ const pdf = new PdfDown(await readFile('scanned-document.pdf'))
200
+ const result = await pdf.documentOcrAsync({ minTextLength: 10 })
201
+
202
+ // result.text — OcrPageText[] (page, text, source per page)
203
+ // result.structuredText — OcrStructuredPageText[] (header/body/footer + source per page)
204
+ // result.images — PageImage[] (decoded PNGs with dimensions and color space)
205
+ // result.annotations — PageAnnotation[] (links, destinations, rects)
206
+ // result.pageCount, result.version, result.creator, ...
207
+ ```
208
+
120
209
  ### Combined: OCR text + images for multimodal pipelines
121
210
 
122
211
  ```typescript
@@ -142,11 +231,26 @@ for (const { page, text, source } of ocrText) {
142
231
  }
143
232
  ```
144
233
 
234
+ ### `document()` vs `documentOcr()`
235
+
236
+ Both methods extract everything from a PDF in a single call. The difference is how text is extracted:
237
+
238
+ | Method | Text extraction | Return type | Use when |
239
+ |--------|----------------|-------------|----------|
240
+ | `document()` / `documentAsync()` | Native PDF text only | `PdfDocument` | PDF has selectable text |
241
+ | `documentOcr()` / `documentOcrAsync()` | Native with OCR fallback | `PdfDocumentOcr` | PDF may contain scanned/image-only pages |
242
+
243
+ `PdfDocumentOcr` uses `OcrPageText` (with `source: 'Native' | 'Ocr'`) and `OcrStructuredPageText` (with header/body/footer split plus source) instead of the base `PageText` and `StructuredPageText` types. Images, annotations, and metadata are identical in both.
244
+
145
245
  ## How it works
146
246
 
147
- Pages with native text are extracted directly. When a page yields fewer non-whitespace characters than `minTextLength`, its embedded images are decoded and fed to Tesseract for OCR. Each result is tagged with `source: 'Native'` or `source: 'Ocr'` so you know which path was taken.
247
+ 1. **Text extraction:** Each page is first attempted with native PDF text extraction. If a page yields fewer non-whitespace characters than `minTextLength`, its embedded images are decoded and fed to Tesseract for OCR. Each result is tagged with `source: 'Native'` or `source: 'Ocr'`.
248
+
249
+ 2. **Structured text:** After text extraction, repeated header/footer lines are detected across pages using frequency analysis (requires 3+ pages). Each page's text is split into `header`, `body`, and `footer` sections. For OCR results, the `source` tag is preserved so you know whether each page's content came from native extraction or OCR.
250
+
251
+ 3. **Parallelism:** OCR runs on a dedicated capped Rayon thread pool (default 4 threads, configurable via `maxThreads`) to prevent CPU oversubscription. Text extraction, image extraction, and annotation extraction run concurrently via `rayon::join` when using `documentOcr` / `documentOcrAsync`.
148
252
 
149
- OCR runs on a dedicated capped thread pool (default 4 threads, configurable via `maxThreads`) to prevent CPU oversubscription.
253
+ 4. **Tessdata discovery:** On first OCR invocation, the tessdata path is resolved once and cached in a `OnceLock`. The `TESSDATA_PREFIX` environment variable is checked first; if unset, `tesseract --list-langs` is executed and its output is parsed to extract the path. No environment variables are mutated -- the path is passed directly to Tesseract's init function.
150
254
 
151
255
  ## Supported platforms
152
256
 
@@ -157,7 +261,7 @@ Prebuilt binaries are provided for:
157
261
 
158
262
  ## Relationship to `@d0paminedriven/pdfdown`
159
263
 
160
- Same Rust codebase, compiled with the `ocr` Cargo feature flag enabled. This package is a strict superset you can use it as a drop-in replacement for the base package if you need OCR capabilities.
264
+ Same Rust codebase, compiled with the `ocr` Cargo feature flag enabled. This package is a strict superset -- you can use it as a drop-in replacement for the base package if you need OCR capabilities.
161
265
 
162
266
  ## License
163
267
 
package/index.d.ts CHANGED
@@ -30,6 +30,10 @@ export declare class PdfDown {
30
30
  textWithOcrPerPage(opts?: OcrOptions | undefined | null): Array<OcrPageText>
31
31
  /** Async: extract text with OCR fallback for image-only pages */
32
32
  textWithOcrPerPageAsync(opts?: OcrOptions | undefined | null): Promise<Array<OcrPageText>>
33
+ /** Sync: extract everything from the PDF with OCR text fallback */
34
+ documentOcr(opts?: OcrOptions | undefined | null): PdfDocumentOcr
35
+ /** Async: extract everything from the PDF with OCR text fallback */
36
+ documentOcrAsync(opts?: OcrOptions | undefined | null): Promise<PdfDocumentOcr>
33
37
  }
34
38
 
35
39
  export declare function extractAnnotationsPerPage(buffer: Buffer): Array<PageAnnotation>
@@ -64,6 +68,14 @@ export interface OcrPageText {
64
68
  source: TextSource
65
69
  }
66
70
 
71
+ export interface OcrStructuredPageText {
72
+ page: number
73
+ header: string
74
+ body: string
75
+ footer: string
76
+ source: TextSource
77
+ }
78
+
67
79
  export interface PageAnnotation {
68
80
  page: number
69
81
  subtype: string
@@ -113,6 +125,28 @@ export interface PdfDocument {
113
125
 
114
126
  export declare function pdfDocumentAsync(buffer: Buffer): Promise<PdfDocument>
115
127
 
128
+ export declare function pdfDocumentOcr(buffer: Buffer, opts?: OcrOptions | undefined | null): PdfDocumentOcr
129
+
130
+ export interface PdfDocumentOcr {
131
+ version: string
132
+ isLinearized: boolean
133
+ pageCount: number
134
+ creator?: string
135
+ producer?: string
136
+ creationDate?: string
137
+ modificationDate?: string
138
+ totalImages: number
139
+ totalAnnotations: number
140
+ imagePages: Array<number>
141
+ annotationPages: Array<number>
142
+ text: Array<OcrPageText>
143
+ structuredText: Array<OcrStructuredPageText>
144
+ images: Array<PageImage>
145
+ annotations: Array<PageAnnotation>
146
+ }
147
+
148
+ export declare function pdfDocumentOcrAsync(buffer: Buffer, opts?: OcrOptions | undefined | null): Promise<PdfDocumentOcr>
149
+
116
150
  export interface PdfMeta {
117
151
  pageCount: number
118
152
  version: string
package/index.js CHANGED
@@ -80,8 +80,8 @@ function requireNative() {
80
80
  try {
81
81
  const binding = require('@d0paminedriven/pdfdown-ocr-android-arm64')
82
82
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-android-arm64/package.json').version
83
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
84
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
83
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
84
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
85
85
  }
86
86
  return binding
87
87
  } catch (e) {
@@ -96,8 +96,8 @@ function requireNative() {
96
96
  try {
97
97
  const binding = require('@d0paminedriven/pdfdown-ocr-android-arm-eabi')
98
98
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-android-arm-eabi/package.json').version
99
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
100
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
99
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
100
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
101
101
  }
102
102
  return binding
103
103
  } catch (e) {
@@ -116,8 +116,8 @@ function requireNative() {
116
116
  try {
117
117
  const binding = require('@d0paminedriven/pdfdown-ocr-win32-x64-msvc')
118
118
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-x64-msvc/package.json').version
119
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
120
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
119
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
120
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
121
121
  }
122
122
  return binding
123
123
  } catch (e) {
@@ -132,8 +132,8 @@ function requireNative() {
132
132
  try {
133
133
  const binding = require('@d0paminedriven/pdfdown-ocr-win32-ia32-msvc')
134
134
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-ia32-msvc/package.json').version
135
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
136
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
135
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
136
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
137
137
  }
138
138
  return binding
139
139
  } catch (e) {
@@ -148,8 +148,8 @@ function requireNative() {
148
148
  try {
149
149
  const binding = require('@d0paminedriven/pdfdown-ocr-win32-arm64-msvc')
150
150
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-arm64-msvc/package.json').version
151
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
152
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
151
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
152
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
153
153
  }
154
154
  return binding
155
155
  } catch (e) {
@@ -167,8 +167,8 @@ function requireNative() {
167
167
  try {
168
168
  const binding = require('@d0paminedriven/pdfdown-ocr-darwin-universal')
169
169
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-universal/package.json').version
170
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
171
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
170
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
171
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
172
172
  }
173
173
  return binding
174
174
  } catch (e) {
@@ -183,8 +183,8 @@ function requireNative() {
183
183
  try {
184
184
  const binding = require('@d0paminedriven/pdfdown-ocr-darwin-x64')
185
185
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-x64/package.json').version
186
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
187
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
186
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
187
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
188
188
  }
189
189
  return binding
190
190
  } catch (e) {
@@ -199,8 +199,8 @@ function requireNative() {
199
199
  try {
200
200
  const binding = require('@d0paminedriven/pdfdown-ocr-darwin-arm64')
201
201
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-arm64/package.json').version
202
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
203
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
202
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
203
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
204
204
  }
205
205
  return binding
206
206
  } catch (e) {
@@ -219,8 +219,8 @@ function requireNative() {
219
219
  try {
220
220
  const binding = require('@d0paminedriven/pdfdown-ocr-freebsd-x64')
221
221
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-freebsd-x64/package.json').version
222
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
223
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
222
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
223
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
224
224
  }
225
225
  return binding
226
226
  } catch (e) {
@@ -235,8 +235,8 @@ function requireNative() {
235
235
  try {
236
236
  const binding = require('@d0paminedriven/pdfdown-ocr-freebsd-arm64')
237
237
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-freebsd-arm64/package.json').version
238
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
239
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
238
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
239
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
240
240
  }
241
241
  return binding
242
242
  } catch (e) {
@@ -256,8 +256,8 @@ function requireNative() {
256
256
  try {
257
257
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-x64-musl')
258
258
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-x64-musl/package.json').version
259
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
260
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
259
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
260
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
261
261
  }
262
262
  return binding
263
263
  } catch (e) {
@@ -272,8 +272,8 @@ function requireNative() {
272
272
  try {
273
273
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-x64-gnu')
274
274
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-x64-gnu/package.json').version
275
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
276
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
275
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
276
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
277
277
  }
278
278
  return binding
279
279
  } catch (e) {
@@ -290,8 +290,8 @@ function requireNative() {
290
290
  try {
291
291
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm64-musl')
292
292
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm64-musl/package.json').version
293
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
294
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
293
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
294
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
295
295
  }
296
296
  return binding
297
297
  } catch (e) {
@@ -306,8 +306,8 @@ function requireNative() {
306
306
  try {
307
307
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm64-gnu')
308
308
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm64-gnu/package.json').version
309
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
310
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
309
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
310
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
311
311
  }
312
312
  return binding
313
313
  } catch (e) {
@@ -324,8 +324,8 @@ function requireNative() {
324
324
  try {
325
325
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm-musleabihf')
326
326
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm-musleabihf/package.json').version
327
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
328
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
327
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
328
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
329
329
  }
330
330
  return binding
331
331
  } catch (e) {
@@ -340,8 +340,8 @@ function requireNative() {
340
340
  try {
341
341
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm-gnueabihf')
342
342
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm-gnueabihf/package.json').version
343
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
344
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
343
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
344
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
345
345
  }
346
346
  return binding
347
347
  } catch (e) {
@@ -358,8 +358,8 @@ function requireNative() {
358
358
  try {
359
359
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-loong64-musl')
360
360
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-loong64-musl/package.json').version
361
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
362
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
361
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
362
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
363
363
  }
364
364
  return binding
365
365
  } catch (e) {
@@ -374,8 +374,8 @@ function requireNative() {
374
374
  try {
375
375
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-loong64-gnu')
376
376
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-loong64-gnu/package.json').version
377
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
378
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
377
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
378
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
379
379
  }
380
380
  return binding
381
381
  } catch (e) {
@@ -392,8 +392,8 @@ function requireNative() {
392
392
  try {
393
393
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-musl')
394
394
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-musl/package.json').version
395
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
396
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
395
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
396
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
397
397
  }
398
398
  return binding
399
399
  } catch (e) {
@@ -408,8 +408,8 @@ function requireNative() {
408
408
  try {
409
409
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-gnu')
410
410
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-gnu/package.json').version
411
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
412
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
411
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
412
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
413
413
  }
414
414
  return binding
415
415
  } catch (e) {
@@ -425,8 +425,8 @@ function requireNative() {
425
425
  try {
426
426
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-ppc64-gnu')
427
427
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-ppc64-gnu/package.json').version
428
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
429
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
428
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
429
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
430
430
  }
431
431
  return binding
432
432
  } catch (e) {
@@ -441,8 +441,8 @@ function requireNative() {
441
441
  try {
442
442
  const binding = require('@d0paminedriven/pdfdown-ocr-linux-s390x-gnu')
443
443
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-s390x-gnu/package.json').version
444
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
445
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
444
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
445
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
446
446
  }
447
447
  return binding
448
448
  } catch (e) {
@@ -461,8 +461,8 @@ function requireNative() {
461
461
  try {
462
462
  const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-arm64')
463
463
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-arm64/package.json').version
464
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
465
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
464
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
465
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
466
466
  }
467
467
  return binding
468
468
  } catch (e) {
@@ -477,8 +477,8 @@ function requireNative() {
477
477
  try {
478
478
  const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-x64')
479
479
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-x64/package.json').version
480
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
481
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
480
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
481
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
482
482
  }
483
483
  return binding
484
484
  } catch (e) {
@@ -493,8 +493,8 @@ function requireNative() {
493
493
  try {
494
494
  const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-arm')
495
495
  const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-arm/package.json').version
496
- if (bindingPackageVersion !== '0.9.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
497
- throw new Error(`Native binding package version mismatch, expected 0.9.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
496
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
497
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
498
498
  }
499
499
  return binding
500
500
  } catch (e) {
@@ -570,6 +570,8 @@ module.exports.extractTextWithOcrPerPage = nativeBinding.extractTextWithOcrPerPa
570
570
  module.exports.extractTextWithOcrPerPageAsync = nativeBinding.extractTextWithOcrPerPageAsync
571
571
  module.exports.pdfDocument = nativeBinding.pdfDocument
572
572
  module.exports.pdfDocumentAsync = nativeBinding.pdfDocumentAsync
573
+ module.exports.pdfDocumentOcr = nativeBinding.pdfDocumentOcr
574
+ module.exports.pdfDocumentOcrAsync = nativeBinding.pdfDocumentOcrAsync
573
575
  module.exports.pdfMetadata = nativeBinding.pdfMetadata
574
576
  module.exports.pdfMetadataAsync = nativeBinding.pdfMetadataAsync
575
577
  module.exports.TextSource = nativeBinding.TextSource
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d0paminedriven/pdfdown-ocr",
3
- "version": "0.9.0",
3
+ "version": "0.9.1",
4
4
  "description": "Rust powered PDF extraction for Node with OCR fallback (requires system tesseract).",
5
5
  "main": "index.js",
6
6
  "repository": {