@d0paminedriven/pdfdown-ocr 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -12
- package/index.d.ts +34 -0
- package/index.js +52 -50
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# `@d0paminedriven/pdfdown-ocr`
|
|
2
2
|
|
|
3
|
-
Rust-powered PDF extraction for Node.js with Tesseract OCR fallback for image-only pages. A superset of [`@d0paminedriven/pdfdown`](https://www.npmjs.com/package/@d0paminedriven/pdfdown)
|
|
3
|
+
Rust-powered PDF extraction for Node.js with Tesseract OCR fallback for image-only pages. A superset of [`@d0paminedriven/pdfdown`](https://www.npmjs.com/package/@d0paminedriven/pdfdown) -- includes all base extraction APIs (text, images, annotations, structured text, metadata) plus OCR.
|
|
4
4
|
|
|
5
5
|
**System requirement:** [Tesseract](https://github.com/tesseract-ocr/tesseract) 5.x must be installed on the host.
|
|
6
6
|
|
|
@@ -13,7 +13,7 @@ npm install @d0paminedriven/pdfdown-ocr
|
|
|
13
13
|
### Tesseract setup
|
|
14
14
|
|
|
15
15
|
```bash
|
|
16
|
-
# Ubuntu/Debian (22.04 ships tesseract 3.x
|
|
16
|
+
# Ubuntu/Debian (22.04 ships tesseract 3.x -- use the PPA for 5.x)
|
|
17
17
|
sudo add-apt-repository ppa:alex-p/tesseract-ocr5
|
|
18
18
|
sudo apt update
|
|
19
19
|
sudo apt install tesseract-ocr tesseract-ocr-eng -y
|
|
@@ -27,15 +27,38 @@ brew install tesseract
|
|
|
27
27
|
sudo pacman -S tesseract tesseract-data-eng
|
|
28
28
|
```
|
|
29
29
|
|
|
30
|
-
Verify with `tesseract --version`
|
|
30
|
+
Verify with `tesseract --version` -- you should see 5.x.
|
|
31
|
+
|
|
32
|
+
### Tessdata auto-detection
|
|
33
|
+
|
|
34
|
+
The package automatically detects the tessdata directory at runtime by parsing the output of `tesseract --list-langs`. The detected path is cached for the lifetime of the process using a `OnceLock<Option<String>>` -- no global environment mutation, fully thread-safe.
|
|
35
|
+
|
|
36
|
+
**Resolution order:**
|
|
37
|
+
|
|
38
|
+
1. `TESSDATA_PREFIX` environment variable (if set, used as-is -- no auto-detection runs)
|
|
39
|
+
2. Auto-detection via `tesseract --list-langs` (parses the path from `List of available languages in "/path/to/tessdata/"`)
|
|
40
|
+
3. Tesseract's compiled-in default (if neither of the above yields a path)
|
|
41
|
+
|
|
42
|
+
Most users will not need to set `TESSDATA_PREFIX` at all. The auto-detection handles standard installations on Ubuntu (`/usr/share/tesseract-ocr/5/tessdata/`), macOS Homebrew (`/opt/homebrew/share/tessdata/`), Arch, and any other layout where `tesseract` is on `PATH`.
|
|
43
|
+
|
|
44
|
+
Set `TESSDATA_PREFIX` explicitly only if:
|
|
45
|
+
|
|
46
|
+
- Tesseract is not on `PATH` but the tessdata directory exists elsewhere
|
|
47
|
+
- You want to override the detected path (e.g., pointing to a custom-trained data directory)
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Override example (not usually needed)
|
|
51
|
+
export TESSDATA_PREFIX="/opt/custom/tessdata"
|
|
52
|
+
```
|
|
31
53
|
|
|
32
54
|
## API
|
|
33
55
|
|
|
34
|
-
This package exports everything from `@d0paminedriven/pdfdown` (text, images, annotations, structured text, metadata
|
|
56
|
+
This package exports everything from `@d0paminedriven/pdfdown` (text, images, annotations, structured text, metadata -- both sync and async), plus the OCR-specific APIs below. See the [base package docs](https://www.npmjs.com/package/@d0paminedriven/pdfdown) for the full base API.
|
|
35
57
|
|
|
36
58
|
### OCR standalone functions
|
|
37
59
|
|
|
38
60
|
```typescript
|
|
61
|
+
// Per-page OCR text extraction
|
|
39
62
|
export declare function extractTextWithOcrPerPage(
|
|
40
63
|
buffer: Buffer,
|
|
41
64
|
opts?: OcrOptions,
|
|
@@ -45,6 +68,17 @@ export declare function extractTextWithOcrPerPageAsync(
|
|
|
45
68
|
buffer: Buffer,
|
|
46
69
|
opts?: OcrOptions,
|
|
47
70
|
): Promise<Array<OcrPageText>>
|
|
71
|
+
|
|
72
|
+
// Full document extraction with OCR text fallback
|
|
73
|
+
export declare function pdfDocumentOcr(
|
|
74
|
+
buffer: Buffer,
|
|
75
|
+
opts?: OcrOptions,
|
|
76
|
+
): PdfDocumentOcr
|
|
77
|
+
|
|
78
|
+
export declare function pdfDocumentOcrAsync(
|
|
79
|
+
buffer: Buffer,
|
|
80
|
+
opts?: OcrOptions,
|
|
81
|
+
): Promise<PdfDocumentOcr>
|
|
48
82
|
```
|
|
49
83
|
|
|
50
84
|
### `PdfDown` class (includes OCR methods)
|
|
@@ -53,12 +87,25 @@ export declare function extractTextWithOcrPerPageAsync(
|
|
|
53
87
|
export declare class PdfDown {
|
|
54
88
|
constructor(buffer: Buffer)
|
|
55
89
|
|
|
56
|
-
//
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
90
|
+
// ── Base methods ──
|
|
91
|
+
textPerPage(): Array<PageText>
|
|
92
|
+
textPerPageAsync(): Promise<Array<PageText>>
|
|
93
|
+
imagesPerPage(): Array<PageImage>
|
|
94
|
+
imagesPerPageAsync(): Promise<Array<PageImage>>
|
|
95
|
+
annotationsPerPage(): Array<PageAnnotation>
|
|
96
|
+
annotationsPerPageAsync(): Promise<Array<PageAnnotation>>
|
|
97
|
+
structuredText(): Array<StructuredPageText>
|
|
98
|
+
structuredTextAsync(): Promise<Array<StructuredPageText>>
|
|
99
|
+
metadata(): PdfMeta
|
|
100
|
+
metadataAsync(): Promise<PdfMeta>
|
|
101
|
+
document(): PdfDocument
|
|
102
|
+
documentAsync(): Promise<PdfDocument>
|
|
103
|
+
|
|
104
|
+
// ── OCR methods ──
|
|
60
105
|
textWithOcrPerPage(opts?: OcrOptions): Array<OcrPageText>
|
|
61
106
|
textWithOcrPerPageAsync(opts?: OcrOptions): Promise<Array<OcrPageText>>
|
|
107
|
+
documentOcr(opts?: OcrOptions): PdfDocumentOcr
|
|
108
|
+
documentOcrAsync(opts?: OcrOptions): Promise<PdfDocumentOcr>
|
|
62
109
|
}
|
|
63
110
|
```
|
|
64
111
|
|
|
@@ -76,16 +123,42 @@ export interface OcrPageText {
|
|
|
76
123
|
source: TextSource
|
|
77
124
|
}
|
|
78
125
|
|
|
126
|
+
export interface OcrStructuredPageText {
|
|
127
|
+
page: number
|
|
128
|
+
header: string
|
|
129
|
+
body: string
|
|
130
|
+
footer: string
|
|
131
|
+
source: TextSource
|
|
132
|
+
}
|
|
133
|
+
|
|
79
134
|
export interface OcrOptions {
|
|
80
135
|
lang?: string // Tesseract language code, default "eng"
|
|
81
136
|
minTextLength?: number // non-whitespace char threshold before OCR fallback, default 1
|
|
82
137
|
maxThreads?: number // cap on Rayon threads for OCR parallelism, default 4, clamped to [1, available CPUs]
|
|
83
138
|
}
|
|
139
|
+
|
|
140
|
+
export interface PdfDocumentOcr {
|
|
141
|
+
version: string
|
|
142
|
+
isLinearized: boolean
|
|
143
|
+
pageCount: number
|
|
144
|
+
creator?: string
|
|
145
|
+
producer?: string
|
|
146
|
+
creationDate?: string
|
|
147
|
+
modificationDate?: string
|
|
148
|
+
totalImages: number
|
|
149
|
+
totalAnnotations: number
|
|
150
|
+
imagePages: Array<number>
|
|
151
|
+
annotationPages: Array<number>
|
|
152
|
+
text: Array<OcrPageText>
|
|
153
|
+
structuredText: Array<OcrStructuredPageText>
|
|
154
|
+
images: Array<PageImage>
|
|
155
|
+
annotations: Array<PageAnnotation>
|
|
156
|
+
}
|
|
84
157
|
```
|
|
85
158
|
|
|
86
159
|
## Usage
|
|
87
160
|
|
|
88
|
-
> **Use the async API for OCR.** The sync variants block the Node.js event loop for the duration of OCR processing, which can be significant for multi-page scanned documents.
|
|
161
|
+
> **Use the async API for OCR.** The sync variants block the Node.js event loop for the duration of OCR processing, which can be significant for multi-page scanned documents.
|
|
89
162
|
|
|
90
163
|
### Standalone
|
|
91
164
|
|
|
@@ -117,6 +190,22 @@ const images = await pdf.imagesPerPageAsync()
|
|
|
117
190
|
const meta = pdf.metadata()
|
|
118
191
|
```
|
|
119
192
|
|
|
193
|
+
### Extract everything with OCR in one call
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
import { readFile } from 'fs/promises'
|
|
197
|
+
import { PdfDown } from '@d0paminedriven/pdfdown-ocr'
|
|
198
|
+
|
|
199
|
+
const pdf = new PdfDown(await readFile('scanned-document.pdf'))
|
|
200
|
+
const result = await pdf.documentOcrAsync({ minTextLength: 10 })
|
|
201
|
+
|
|
202
|
+
// result.text — OcrPageText[] (page, text, source per page)
|
|
203
|
+
// result.structuredText — OcrStructuredPageText[] (header/body/footer + source per page)
|
|
204
|
+
// result.images — PageImage[] (decoded PNGs with dimensions and color space)
|
|
205
|
+
// result.annotations — PageAnnotation[] (links, destinations, rects)
|
|
206
|
+
// result.pageCount, result.version, result.creator, ...
|
|
207
|
+
```
|
|
208
|
+
|
|
120
209
|
### Combined: OCR text + images for multimodal pipelines
|
|
121
210
|
|
|
122
211
|
```typescript
|
|
@@ -142,11 +231,26 @@ for (const { page, text, source } of ocrText) {
|
|
|
142
231
|
}
|
|
143
232
|
```
|
|
144
233
|
|
|
234
|
+
### `document()` vs `documentOcr()`
|
|
235
|
+
|
|
236
|
+
Both methods extract everything from a PDF in a single call. The difference is how text is extracted:
|
|
237
|
+
|
|
238
|
+
| Method | Text extraction | Return type | Use when |
|
|
239
|
+
|--------|----------------|-------------|----------|
|
|
240
|
+
| `document()` / `documentAsync()` | Native PDF text only | `PdfDocument` | PDF has selectable text |
|
|
241
|
+
| `documentOcr()` / `documentOcrAsync()` | Native with OCR fallback | `PdfDocumentOcr` | PDF may contain scanned/image-only pages |
|
|
242
|
+
|
|
243
|
+
`PdfDocumentOcr` uses `OcrPageText` (with `source: 'Native' | 'Ocr'`) and `OcrStructuredPageText` (with header/body/footer split plus source) instead of the base `PageText` and `StructuredPageText` types. Images, annotations, and metadata are identical in both.
|
|
244
|
+
|
|
145
245
|
## How it works
|
|
146
246
|
|
|
147
|
-
|
|
247
|
+
1. **Text extraction:** Each page is first attempted with native PDF text extraction. If a page yields fewer non-whitespace characters than `minTextLength`, its embedded images are decoded and fed to Tesseract for OCR. Each result is tagged with `source: 'Native'` or `source: 'Ocr'`.
|
|
248
|
+
|
|
249
|
+
2. **Structured text:** After text extraction, repeated header/footer lines are detected across pages using frequency analysis (requires 3+ pages). Each page's text is split into `header`, `body`, and `footer` sections. For OCR results, the `source` tag is preserved so you know whether each page's content came from native extraction or OCR.
|
|
250
|
+
|
|
251
|
+
3. **Parallelism:** OCR runs on a dedicated capped Rayon thread pool (default 4 threads, configurable via `maxThreads`) to prevent CPU oversubscription. Text extraction, image extraction, and annotation extraction run concurrently via `rayon::join` when using `documentOcr` / `documentOcrAsync`.
|
|
148
252
|
|
|
149
|
-
OCR
|
|
253
|
+
4. **Tessdata discovery:** On first OCR invocation, the tessdata path is resolved once and cached in a `OnceLock`. The `TESSDATA_PREFIX` environment variable is checked first; if unset, `tesseract --list-langs` is executed and its output is parsed to extract the path. No environment variables are mutated -- the path is passed directly to Tesseract's init function.
|
|
150
254
|
|
|
151
255
|
## Supported platforms
|
|
152
256
|
|
|
@@ -157,7 +261,7 @@ Prebuilt binaries are provided for:
|
|
|
157
261
|
|
|
158
262
|
## Relationship to `@d0paminedriven/pdfdown`
|
|
159
263
|
|
|
160
|
-
Same Rust codebase, compiled with the `ocr` Cargo feature flag enabled. This package is a strict superset
|
|
264
|
+
Same Rust codebase, compiled with the `ocr` Cargo feature flag enabled. This package is a strict superset -- you can use it as a drop-in replacement for the base package if you need OCR capabilities.
|
|
161
265
|
|
|
162
266
|
## License
|
|
163
267
|
|
package/index.d.ts
CHANGED
|
@@ -30,6 +30,10 @@ export declare class PdfDown {
|
|
|
30
30
|
textWithOcrPerPage(opts?: OcrOptions | undefined | null): Array<OcrPageText>
|
|
31
31
|
/** Async: extract text with OCR fallback for image-only pages */
|
|
32
32
|
textWithOcrPerPageAsync(opts?: OcrOptions | undefined | null): Promise<Array<OcrPageText>>
|
|
33
|
+
/** Sync: extract everything from the PDF with OCR text fallback */
|
|
34
|
+
documentOcr(opts?: OcrOptions | undefined | null): PdfDocumentOcr
|
|
35
|
+
/** Async: extract everything from the PDF with OCR text fallback */
|
|
36
|
+
documentOcrAsync(opts?: OcrOptions | undefined | null): Promise<PdfDocumentOcr>
|
|
33
37
|
}
|
|
34
38
|
|
|
35
39
|
export declare function extractAnnotationsPerPage(buffer: Buffer): Array<PageAnnotation>
|
|
@@ -64,6 +68,14 @@ export interface OcrPageText {
|
|
|
64
68
|
source: TextSource
|
|
65
69
|
}
|
|
66
70
|
|
|
71
|
+
export interface OcrStructuredPageText {
|
|
72
|
+
page: number
|
|
73
|
+
header: string
|
|
74
|
+
body: string
|
|
75
|
+
footer: string
|
|
76
|
+
source: TextSource
|
|
77
|
+
}
|
|
78
|
+
|
|
67
79
|
export interface PageAnnotation {
|
|
68
80
|
page: number
|
|
69
81
|
subtype: string
|
|
@@ -113,6 +125,28 @@ export interface PdfDocument {
|
|
|
113
125
|
|
|
114
126
|
export declare function pdfDocumentAsync(buffer: Buffer): Promise<PdfDocument>
|
|
115
127
|
|
|
128
|
+
export declare function pdfDocumentOcr(buffer: Buffer, opts?: OcrOptions | undefined | null): PdfDocumentOcr
|
|
129
|
+
|
|
130
|
+
export interface PdfDocumentOcr {
|
|
131
|
+
version: string
|
|
132
|
+
isLinearized: boolean
|
|
133
|
+
pageCount: number
|
|
134
|
+
creator?: string
|
|
135
|
+
producer?: string
|
|
136
|
+
creationDate?: string
|
|
137
|
+
modificationDate?: string
|
|
138
|
+
totalImages: number
|
|
139
|
+
totalAnnotations: number
|
|
140
|
+
imagePages: Array<number>
|
|
141
|
+
annotationPages: Array<number>
|
|
142
|
+
text: Array<OcrPageText>
|
|
143
|
+
structuredText: Array<OcrStructuredPageText>
|
|
144
|
+
images: Array<PageImage>
|
|
145
|
+
annotations: Array<PageAnnotation>
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export declare function pdfDocumentOcrAsync(buffer: Buffer, opts?: OcrOptions | undefined | null): Promise<PdfDocumentOcr>
|
|
149
|
+
|
|
116
150
|
export interface PdfMeta {
|
|
117
151
|
pageCount: number
|
|
118
152
|
version: string
|
package/index.js
CHANGED
|
@@ -80,8 +80,8 @@ function requireNative() {
|
|
|
80
80
|
try {
|
|
81
81
|
const binding = require('@d0paminedriven/pdfdown-ocr-android-arm64')
|
|
82
82
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-android-arm64/package.json').version
|
|
83
|
-
if (bindingPackageVersion !== '0.9.
|
|
84
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
83
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
84
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
85
85
|
}
|
|
86
86
|
return binding
|
|
87
87
|
} catch (e) {
|
|
@@ -96,8 +96,8 @@ function requireNative() {
|
|
|
96
96
|
try {
|
|
97
97
|
const binding = require('@d0paminedriven/pdfdown-ocr-android-arm-eabi')
|
|
98
98
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-android-arm-eabi/package.json').version
|
|
99
|
-
if (bindingPackageVersion !== '0.9.
|
|
100
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
99
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
100
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
101
101
|
}
|
|
102
102
|
return binding
|
|
103
103
|
} catch (e) {
|
|
@@ -116,8 +116,8 @@ function requireNative() {
|
|
|
116
116
|
try {
|
|
117
117
|
const binding = require('@d0paminedriven/pdfdown-ocr-win32-x64-msvc')
|
|
118
118
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-x64-msvc/package.json').version
|
|
119
|
-
if (bindingPackageVersion !== '0.9.
|
|
120
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
119
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
120
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
121
121
|
}
|
|
122
122
|
return binding
|
|
123
123
|
} catch (e) {
|
|
@@ -132,8 +132,8 @@ function requireNative() {
|
|
|
132
132
|
try {
|
|
133
133
|
const binding = require('@d0paminedriven/pdfdown-ocr-win32-ia32-msvc')
|
|
134
134
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-ia32-msvc/package.json').version
|
|
135
|
-
if (bindingPackageVersion !== '0.9.
|
|
136
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
135
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
136
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
137
137
|
}
|
|
138
138
|
return binding
|
|
139
139
|
} catch (e) {
|
|
@@ -148,8 +148,8 @@ function requireNative() {
|
|
|
148
148
|
try {
|
|
149
149
|
const binding = require('@d0paminedriven/pdfdown-ocr-win32-arm64-msvc')
|
|
150
150
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-arm64-msvc/package.json').version
|
|
151
|
-
if (bindingPackageVersion !== '0.9.
|
|
152
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
151
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
152
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
153
153
|
}
|
|
154
154
|
return binding
|
|
155
155
|
} catch (e) {
|
|
@@ -167,8 +167,8 @@ function requireNative() {
|
|
|
167
167
|
try {
|
|
168
168
|
const binding = require('@d0paminedriven/pdfdown-ocr-darwin-universal')
|
|
169
169
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-universal/package.json').version
|
|
170
|
-
if (bindingPackageVersion !== '0.9.
|
|
171
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
170
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
171
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
172
172
|
}
|
|
173
173
|
return binding
|
|
174
174
|
} catch (e) {
|
|
@@ -183,8 +183,8 @@ function requireNative() {
|
|
|
183
183
|
try {
|
|
184
184
|
const binding = require('@d0paminedriven/pdfdown-ocr-darwin-x64')
|
|
185
185
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-x64/package.json').version
|
|
186
|
-
if (bindingPackageVersion !== '0.9.
|
|
187
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
186
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
187
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
188
188
|
}
|
|
189
189
|
return binding
|
|
190
190
|
} catch (e) {
|
|
@@ -199,8 +199,8 @@ function requireNative() {
|
|
|
199
199
|
try {
|
|
200
200
|
const binding = require('@d0paminedriven/pdfdown-ocr-darwin-arm64')
|
|
201
201
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-arm64/package.json').version
|
|
202
|
-
if (bindingPackageVersion !== '0.9.
|
|
203
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
202
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
203
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
204
204
|
}
|
|
205
205
|
return binding
|
|
206
206
|
} catch (e) {
|
|
@@ -219,8 +219,8 @@ function requireNative() {
|
|
|
219
219
|
try {
|
|
220
220
|
const binding = require('@d0paminedriven/pdfdown-ocr-freebsd-x64')
|
|
221
221
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-freebsd-x64/package.json').version
|
|
222
|
-
if (bindingPackageVersion !== '0.9.
|
|
223
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
222
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
223
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
224
224
|
}
|
|
225
225
|
return binding
|
|
226
226
|
} catch (e) {
|
|
@@ -235,8 +235,8 @@ function requireNative() {
|
|
|
235
235
|
try {
|
|
236
236
|
const binding = require('@d0paminedriven/pdfdown-ocr-freebsd-arm64')
|
|
237
237
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-freebsd-arm64/package.json').version
|
|
238
|
-
if (bindingPackageVersion !== '0.9.
|
|
239
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
238
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
239
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
240
240
|
}
|
|
241
241
|
return binding
|
|
242
242
|
} catch (e) {
|
|
@@ -256,8 +256,8 @@ function requireNative() {
|
|
|
256
256
|
try {
|
|
257
257
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-x64-musl')
|
|
258
258
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-x64-musl/package.json').version
|
|
259
|
-
if (bindingPackageVersion !== '0.9.
|
|
260
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
259
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
260
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
261
261
|
}
|
|
262
262
|
return binding
|
|
263
263
|
} catch (e) {
|
|
@@ -272,8 +272,8 @@ function requireNative() {
|
|
|
272
272
|
try {
|
|
273
273
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-x64-gnu')
|
|
274
274
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-x64-gnu/package.json').version
|
|
275
|
-
if (bindingPackageVersion !== '0.9.
|
|
276
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
275
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
276
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
277
277
|
}
|
|
278
278
|
return binding
|
|
279
279
|
} catch (e) {
|
|
@@ -290,8 +290,8 @@ function requireNative() {
|
|
|
290
290
|
try {
|
|
291
291
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm64-musl')
|
|
292
292
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm64-musl/package.json').version
|
|
293
|
-
if (bindingPackageVersion !== '0.9.
|
|
294
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
293
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
294
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
295
295
|
}
|
|
296
296
|
return binding
|
|
297
297
|
} catch (e) {
|
|
@@ -306,8 +306,8 @@ function requireNative() {
|
|
|
306
306
|
try {
|
|
307
307
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm64-gnu')
|
|
308
308
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm64-gnu/package.json').version
|
|
309
|
-
if (bindingPackageVersion !== '0.9.
|
|
310
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
309
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
310
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
311
311
|
}
|
|
312
312
|
return binding
|
|
313
313
|
} catch (e) {
|
|
@@ -324,8 +324,8 @@ function requireNative() {
|
|
|
324
324
|
try {
|
|
325
325
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm-musleabihf')
|
|
326
326
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm-musleabihf/package.json').version
|
|
327
|
-
if (bindingPackageVersion !== '0.9.
|
|
328
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
327
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
328
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
329
329
|
}
|
|
330
330
|
return binding
|
|
331
331
|
} catch (e) {
|
|
@@ -340,8 +340,8 @@ function requireNative() {
|
|
|
340
340
|
try {
|
|
341
341
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm-gnueabihf')
|
|
342
342
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm-gnueabihf/package.json').version
|
|
343
|
-
if (bindingPackageVersion !== '0.9.
|
|
344
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
343
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
344
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
345
345
|
}
|
|
346
346
|
return binding
|
|
347
347
|
} catch (e) {
|
|
@@ -358,8 +358,8 @@ function requireNative() {
|
|
|
358
358
|
try {
|
|
359
359
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-loong64-musl')
|
|
360
360
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-loong64-musl/package.json').version
|
|
361
|
-
if (bindingPackageVersion !== '0.9.
|
|
362
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
361
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
362
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
363
363
|
}
|
|
364
364
|
return binding
|
|
365
365
|
} catch (e) {
|
|
@@ -374,8 +374,8 @@ function requireNative() {
|
|
|
374
374
|
try {
|
|
375
375
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-loong64-gnu')
|
|
376
376
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-loong64-gnu/package.json').version
|
|
377
|
-
if (bindingPackageVersion !== '0.9.
|
|
378
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
377
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
378
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
379
379
|
}
|
|
380
380
|
return binding
|
|
381
381
|
} catch (e) {
|
|
@@ -392,8 +392,8 @@ function requireNative() {
|
|
|
392
392
|
try {
|
|
393
393
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-musl')
|
|
394
394
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-musl/package.json').version
|
|
395
|
-
if (bindingPackageVersion !== '0.9.
|
|
396
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
395
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
396
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
397
397
|
}
|
|
398
398
|
return binding
|
|
399
399
|
} catch (e) {
|
|
@@ -408,8 +408,8 @@ function requireNative() {
|
|
|
408
408
|
try {
|
|
409
409
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-gnu')
|
|
410
410
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-gnu/package.json').version
|
|
411
|
-
if (bindingPackageVersion !== '0.9.
|
|
412
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
411
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
412
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
413
413
|
}
|
|
414
414
|
return binding
|
|
415
415
|
} catch (e) {
|
|
@@ -425,8 +425,8 @@ function requireNative() {
|
|
|
425
425
|
try {
|
|
426
426
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-ppc64-gnu')
|
|
427
427
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-ppc64-gnu/package.json').version
|
|
428
|
-
if (bindingPackageVersion !== '0.9.
|
|
429
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
428
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
429
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
430
430
|
}
|
|
431
431
|
return binding
|
|
432
432
|
} catch (e) {
|
|
@@ -441,8 +441,8 @@ function requireNative() {
|
|
|
441
441
|
try {
|
|
442
442
|
const binding = require('@d0paminedriven/pdfdown-ocr-linux-s390x-gnu')
|
|
443
443
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-s390x-gnu/package.json').version
|
|
444
|
-
if (bindingPackageVersion !== '0.9.
|
|
445
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
444
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
445
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
446
446
|
}
|
|
447
447
|
return binding
|
|
448
448
|
} catch (e) {
|
|
@@ -461,8 +461,8 @@ function requireNative() {
|
|
|
461
461
|
try {
|
|
462
462
|
const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-arm64')
|
|
463
463
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-arm64/package.json').version
|
|
464
|
-
if (bindingPackageVersion !== '0.9.
|
|
465
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
464
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
465
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
466
466
|
}
|
|
467
467
|
return binding
|
|
468
468
|
} catch (e) {
|
|
@@ -477,8 +477,8 @@ function requireNative() {
|
|
|
477
477
|
try {
|
|
478
478
|
const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-x64')
|
|
479
479
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-x64/package.json').version
|
|
480
|
-
if (bindingPackageVersion !== '0.9.
|
|
481
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
480
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
481
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
482
482
|
}
|
|
483
483
|
return binding
|
|
484
484
|
} catch (e) {
|
|
@@ -493,8 +493,8 @@ function requireNative() {
|
|
|
493
493
|
try {
|
|
494
494
|
const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-arm')
|
|
495
495
|
const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-arm/package.json').version
|
|
496
|
-
if (bindingPackageVersion !== '0.9.
|
|
497
|
-
throw new Error(`Native binding package version mismatch, expected 0.9.
|
|
496
|
+
if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
497
|
+
throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
498
498
|
}
|
|
499
499
|
return binding
|
|
500
500
|
} catch (e) {
|
|
@@ -570,6 +570,8 @@ module.exports.extractTextWithOcrPerPage = nativeBinding.extractTextWithOcrPerPa
|
|
|
570
570
|
module.exports.extractTextWithOcrPerPageAsync = nativeBinding.extractTextWithOcrPerPageAsync
|
|
571
571
|
module.exports.pdfDocument = nativeBinding.pdfDocument
|
|
572
572
|
module.exports.pdfDocumentAsync = nativeBinding.pdfDocumentAsync
|
|
573
|
+
module.exports.pdfDocumentOcr = nativeBinding.pdfDocumentOcr
|
|
574
|
+
module.exports.pdfDocumentOcrAsync = nativeBinding.pdfDocumentOcrAsync
|
|
573
575
|
module.exports.pdfMetadata = nativeBinding.pdfMetadata
|
|
574
576
|
module.exports.pdfMetadataAsync = nativeBinding.pdfMetadataAsync
|
|
575
577
|
module.exports.TextSource = nativeBinding.TextSource
|