@d0paminedriven/pdfdown-ocr 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +268 -0
  2. package/index.d.ts +174 -0
  3. package/index.js +577 -0
  4. package/package.json +3 -2
package/README.md ADDED
@@ -0,0 +1,268 @@
1
+ # `@d0paminedriven/pdfdown-ocr`
2
+
3
+ Rust-powered PDF extraction for Node.js with Tesseract OCR fallback for image-only pages. A superset of [`@d0paminedriven/pdfdown`](https://www.npmjs.com/package/@d0paminedriven/pdfdown) -- includes all base extraction APIs (text, images, annotations, structured text, metadata) plus OCR.
4
+
5
+ **System requirement:** [Tesseract](https://github.com/tesseract-ocr/tesseract) 5.x must be installed on the host.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install @d0paminedriven/pdfdown-ocr
11
+ ```
12
+
13
+ ### Tesseract setup
14
+
15
+ ```bash
16
+ # Ubuntu/Debian (22.04 ships tesseract 3.x -- use the PPA for 5.x)
17
+ sudo add-apt-repository ppa:alex-p/tesseract-ocr5
18
+ sudo apt update
19
+ sudo apt install tesseract-ocr tesseract-ocr-eng -y
20
+ # Optional: all language packs
21
+ # sudo apt install tesseract-ocr-all
22
+
23
+ # macOS
24
+ brew install tesseract
25
+
26
+ # Arch
27
+ sudo pacman -S tesseract tesseract-data-eng
28
+ ```
29
+
30
+ Verify with `tesseract --version` -- you should see 5.x.
31
+
32
+ ### Tessdata auto-detection
33
+
34
+ The package automatically detects the tessdata directory at runtime by parsing the output of `tesseract --list-langs`. The detected path is cached for the lifetime of the process using a `OnceLock<Option<String>>` -- no global environment mutation, fully thread-safe.
35
+
36
+ **Resolution order:**
37
+
38
+ 1. `TESSDATA_PREFIX` environment variable (if set, used as-is -- no auto-detection runs)
39
+ 2. Auto-detection via `tesseract --list-langs` (parses the path from `List of available languages in "/path/to/tessdata/"`)
40
+ 3. Tesseract's compiled-in default (if neither of the above yields a path)
41
+
42
+ Most users will not need to set `TESSDATA_PREFIX` at all. The auto-detection handles standard installations on Ubuntu (`/usr/share/tesseract-ocr/5/tessdata/`), macOS Homebrew (`/opt/homebrew/share/tessdata/`), Arch, and any other layout where `tesseract` is on `PATH`.
43
+
44
+ Set `TESSDATA_PREFIX` explicitly only if:
45
+
46
+ - Tesseract is not on `PATH` but the tessdata directory exists elsewhere
47
+ - You want to override the detected path (e.g., pointing to a custom-trained data directory)
48
+
49
+ ```bash
50
+ # Override example (not usually needed)
51
+ export TESSDATA_PREFIX="/opt/custom/tessdata"
52
+ ```
53
+
54
+ ## API
55
+
56
+ This package exports everything from `@d0paminedriven/pdfdown` (text, images, annotations, structured text, metadata -- both sync and async), plus the OCR-specific APIs below. See the [base package docs](https://www.npmjs.com/package/@d0paminedriven/pdfdown) for the full base API.
57
+
58
+ ### OCR standalone functions
59
+
60
+ ```typescript
61
+ // Per-page OCR text extraction
62
+ export declare function extractTextWithOcrPerPage(
63
+ buffer: Buffer,
64
+ opts?: OcrOptions,
65
+ ): Array<OcrPageText>
66
+
67
+ export declare function extractTextWithOcrPerPageAsync(
68
+ buffer: Buffer,
69
+ opts?: OcrOptions,
70
+ ): Promise<Array<OcrPageText>>
71
+
72
+ // Full document extraction with OCR text fallback
73
+ export declare function pdfDocumentOcr(
74
+ buffer: Buffer,
75
+ opts?: OcrOptions,
76
+ ): PdfDocumentOcr
77
+
78
+ export declare function pdfDocumentOcrAsync(
79
+ buffer: Buffer,
80
+ opts?: OcrOptions,
81
+ ): Promise<PdfDocumentOcr>
82
+ ```
83
+
84
+ ### `PdfDown` class (includes OCR methods)
85
+
86
+ ```typescript
87
+ export declare class PdfDown {
88
+ constructor(buffer: Buffer)
89
+
90
+ // ── Base methods ──
91
+ textPerPage(): Array<PageText>
92
+ textPerPageAsync(): Promise<Array<PageText>>
93
+ imagesPerPage(): Array<PageImage>
94
+ imagesPerPageAsync(): Promise<Array<PageImage>>
95
+ annotationsPerPage(): Array<PageAnnotation>
96
+ annotationsPerPageAsync(): Promise<Array<PageAnnotation>>
97
+ structuredText(): Array<StructuredPageText>
98
+ structuredTextAsync(): Promise<Array<StructuredPageText>>
99
+ metadata(): PdfMeta
100
+ metadataAsync(): Promise<PdfMeta>
101
+ document(): PdfDocument
102
+ documentAsync(): Promise<PdfDocument>
103
+
104
+ // ── OCR methods ──
105
+ textWithOcrPerPage(opts?: OcrOptions): Array<OcrPageText>
106
+ textWithOcrPerPageAsync(opts?: OcrOptions): Promise<Array<OcrPageText>>
107
+ documentOcr(opts?: OcrOptions): PdfDocumentOcr
108
+ documentOcrAsync(opts?: OcrOptions): Promise<PdfDocumentOcr>
109
+ }
110
+ ```
111
+
112
+ ### Types
113
+
114
+ ```typescript
115
+ export const enum TextSource {
116
+ Native = 'Native',
117
+ Ocr = 'Ocr',
118
+ }
119
+
120
+ export interface OcrPageText {
121
+ page: number
122
+ text: string
123
+ source: TextSource
124
+ }
125
+
126
+ export interface OcrStructuredPageText {
127
+ page: number
128
+ header: string
129
+ body: string
130
+ footer: string
131
+ source: TextSource
132
+ }
133
+
134
+ export interface OcrOptions {
135
+ lang?: string // Tesseract language code, default "eng"
136
+ minTextLength?: number // non-whitespace char threshold before OCR fallback, default 1
137
+ maxThreads?: number // cap on Rayon threads for OCR parallelism, default 4, clamped to [1, available CPUs]
138
+ }
139
+
140
+ export interface PdfDocumentOcr {
141
+ version: string
142
+ isLinearized: boolean
143
+ pageCount: number
144
+ creator?: string
145
+ producer?: string
146
+ creationDate?: string
147
+ modificationDate?: string
148
+ totalImages: number
149
+ totalAnnotations: number
150
+ imagePages: Array<number>
151
+ annotationPages: Array<number>
152
+ text: Array<OcrPageText>
153
+ structuredText: Array<OcrStructuredPageText>
154
+ images: Array<PageImage>
155
+ annotations: Array<PageAnnotation>
156
+ }
157
+ ```
158
+
159
+ ## Usage
160
+
161
+ > **Use the async API for OCR.** The sync variants block the Node.js event loop for the duration of OCR processing, which can be significant for multi-page scanned documents.
162
+
163
+ ### Standalone
164
+
165
+ ```typescript
166
+ import { readFile } from 'fs/promises'
167
+ import { extractTextWithOcrPerPageAsync } from '@d0paminedriven/pdfdown-ocr'
168
+
169
+ const pdf = await readFile('scanned-document.pdf')
170
+ const pages = await extractTextWithOcrPerPageAsync(pdf, { lang: 'eng', minTextLength: 10 })
171
+
172
+ for (const { page, text, source } of pages) {
173
+ console.log(`Page ${page} [${source}]: ${text.slice(0, 100)}...`)
174
+ }
175
+ ```
176
+
177
+ ### Class-based (parse once, extract many)
178
+
179
+ ```typescript
180
+ import { readFile } from 'fs/promises'
181
+ import { PdfDown } from '@d0paminedriven/pdfdown-ocr'
182
+
183
+ const pdf = new PdfDown(await readFile('scanned-document.pdf'))
184
+
185
+ // OCR text extraction
186
+ const pages = await pdf.textWithOcrPerPageAsync({ lang: 'eng', minTextLength: 10 })
187
+
188
+ // All base methods work too
189
+ const images = await pdf.imagesPerPageAsync()
190
+ const meta = pdf.metadata()
191
+ ```
192
+
193
+ ### Extract everything with OCR in one call
194
+
195
+ ```typescript
196
+ import { readFile } from 'fs/promises'
197
+ import { PdfDown } from '@d0paminedriven/pdfdown-ocr'
198
+
199
+ const pdf = new PdfDown(await readFile('scanned-document.pdf'))
200
+ const result = await pdf.documentOcrAsync({ minTextLength: 10 })
201
+
202
+ // result.text — OcrPageText[] (page, text, source per page)
203
+ // result.structuredText — OcrStructuredPageText[] (header/body/footer + source per page)
204
+ // result.images — PageImage[] (decoded PNGs with dimensions and color space)
205
+ // result.annotations — PageAnnotation[] (links, destinations, rects)
206
+ // result.pageCount, result.version, result.creator, ...
207
+ ```
208
+
209
+ ### Combined: OCR text + images for multimodal pipelines
210
+
211
+ ```typescript
212
+ import { readFile } from 'fs/promises'
213
+ import { PdfDown } from '@d0paminedriven/pdfdown-ocr'
214
+
215
+ const pdf = new PdfDown(await readFile('scanned-document.pdf'))
216
+
217
+ const [ocrText, images] = await Promise.all([
218
+ pdf.textWithOcrPerPageAsync({ minTextLength: 10 }),
219
+ pdf.imagesPerPageAsync(),
220
+ ])
221
+
222
+ const imagesByPage = Map.groupBy(images, (img) => img.page)
223
+
224
+ for (const { page, text, source } of ocrText) {
225
+ const pageImages = (imagesByPage.get(page) ?? []).map((img) => ({
226
+ dataUrl: `data:image/png;base64,${img.data.toString('base64')}`,
227
+ width: img.width,
228
+ height: img.height,
229
+ }))
230
+ // Send { page, text, source, images: pageImages } to your embedding pipeline
231
+ }
232
+ ```
233
+
234
+ ### `document()` vs `documentOcr()`
235
+
236
+ Both methods extract everything from a PDF in a single call. The difference is how text is extracted:
237
+
238
+ | Method | Text extraction | Return type | Use when |
239
+ |--------|----------------|-------------|----------|
240
+ | `document()` / `documentAsync()` | Native PDF text only | `PdfDocument` | PDF has selectable text |
241
+ | `documentOcr()` / `documentOcrAsync()` | Native with OCR fallback | `PdfDocumentOcr` | PDF may contain scanned/image-only pages |
242
+
243
+ `PdfDocumentOcr` uses `OcrPageText` (with `source: 'Native' | 'Ocr'`) and `OcrStructuredPageText` (with header/body/footer split plus source) instead of the base `PageText` and `StructuredPageText` types. Images, annotations, and metadata are identical in both.
244
+
245
+ ## How it works
246
+
247
+ 1. **Text extraction:** Each page is first attempted with native PDF text extraction. If a page yields fewer non-whitespace characters than `minTextLength`, its embedded images are decoded and fed to Tesseract for OCR. Each result is tagged with `source: 'Native'` or `source: 'Ocr'`.
248
+
249
+ 2. **Structured text:** After text extraction, repeated header/footer lines are detected across pages using frequency analysis (requires 3+ pages). Each page's text is split into `header`, `body`, and `footer` sections. For OCR results, the `source` tag is preserved so you know whether each page's content came from native extraction or OCR.
250
+
251
+ 3. **Parallelism:** OCR runs on a dedicated capped Rayon thread pool (default 4 threads, configurable via `maxThreads`) to prevent CPU oversubscription. Text extraction, image extraction, and annotation extraction run concurrently via `rayon::join` when using `documentOcr` / `documentOcrAsync`.
252
+
253
+ 4. **Tessdata discovery:** On first OCR invocation, the tessdata path is resolved once and cached in a `OnceLock`. The `TESSDATA_PREFIX` environment variable is checked first; if unset, `tesseract --list-langs` is executed and its output is parsed to extract the path. No environment variables are mutated -- the path is passed directly to Tesseract's init function.
254
+
255
+ ## Supported platforms
256
+
257
+ Prebuilt binaries are provided for:
258
+
259
+ - macOS (x64, ARM64)
260
+ - Linux glibc (x64, ARM64)
261
+
262
+ ## Relationship to `@d0paminedriven/pdfdown`
263
+
264
+ Same Rust codebase, compiled with the `ocr` Cargo feature flag enabled. This package is a strict superset -- you can use it as a drop-in replacement for the base package if you need OCR capabilities.
265
+
266
+ ## License
267
+
268
+ MIT
package/index.d.ts ADDED
@@ -0,0 +1,174 @@
1
+ /* auto-generated by NAPI-RS */
2
+ /* eslint-disable */
3
+ export declare class PdfDown {
4
+ constructor(buffer: Buffer)
5
+ /** Sync: extract text per page (reuses the already-parsed document) */
6
+ textPerPage(): Array<PageText>
7
+ /** Sync: extract images per page (reuses the already-parsed document) */
8
+ imagesPerPage(): Array<PageImage>
9
+ /** Sync: extract annotations per page (reuses the already-parsed document) */
10
+ annotationsPerPage(): Array<PageAnnotation>
11
+ /** Sync: get PDF metadata (reuses the already-parsed document) */
12
+ metadata(): PdfMeta
13
+ /** Async: extract text per page on the libuv thread pool (shares parsed document via Arc) */
14
+ textPerPageAsync(): Promise<Array<PageText>>
15
+ /** Async: extract images per page on the libuv thread pool (shares parsed document via Arc) */
16
+ imagesPerPageAsync(): Promise<Array<PageImage>>
17
+ /** Async: extract annotations per page on the libuv thread pool (shares parsed document via Arc) */
18
+ annotationsPerPageAsync(): Promise<Array<PageAnnotation>>
19
+ /** Async: get PDF metadata on the libuv thread pool (shares parsed document via Arc) */
20
+ metadataAsync(): Promise<PdfMeta>
21
+ /** Sync: extract everything from the PDF in one call (reuses the already-parsed document) */
22
+ document(): PdfDocument
23
+ /** Async: extract everything from the PDF on the libuv thread pool (shares parsed document via Arc) */
24
+ documentAsync(): Promise<PdfDocument>
25
+ /** Sync: extract structured text with header/footer detection */
26
+ structuredText(): Array<StructuredPageText>
27
+ /** Async: extract structured text with header/footer detection */
28
+ structuredTextAsync(): Promise<Array<StructuredPageText>>
29
+ /** Sync: extract text with OCR fallback for image-only pages */
30
+ textWithOcrPerPage(opts?: OcrOptions | undefined | null): Array<OcrPageText>
31
+ /** Async: extract text with OCR fallback for image-only pages */
32
+ textWithOcrPerPageAsync(opts?: OcrOptions | undefined | null): Promise<Array<OcrPageText>>
33
+ /** Sync: extract everything from the PDF with OCR text fallback */
34
+ documentOcr(opts?: OcrOptions | undefined | null): PdfDocumentOcr
35
+ /** Async: extract everything from the PDF with OCR text fallback */
36
+ documentOcrAsync(opts?: OcrOptions | undefined | null): Promise<PdfDocumentOcr>
37
+ }
38
+
39
+ export declare function extractAnnotationsPerPage(buffer: Buffer): Array<PageAnnotation>
40
+
41
+ export declare function extractAnnotationsPerPageAsync(buffer: Buffer): Promise<Array<PageAnnotation>>
42
+
43
+ export declare function extractImagesPerPage(buffer: Buffer): Array<PageImage>
44
+
45
+ export declare function extractImagesPerPageAsync(buffer: Buffer): Promise<Array<PageImage>>
46
+
47
+ export declare function extractStructuredTextPerPage(buffer: Buffer): Array<StructuredPageText>
48
+
49
+ export declare function extractStructuredTextPerPageAsync(buffer: Buffer): Promise<Array<StructuredPageText>>
50
+
51
+ export declare function extractTextPerPage(buffer: Buffer): Array<PageText>
52
+
53
+ export declare function extractTextPerPageAsync(buffer: Buffer): Promise<Array<PageText>>
54
+
55
+ export declare function extractTextWithOcrPerPage(buffer: Buffer, opts?: OcrOptions | undefined | null): Array<OcrPageText>
56
+
57
+ export declare function extractTextWithOcrPerPageAsync(buffer: Buffer, opts?: OcrOptions | undefined | null): Promise<Array<OcrPageText>>
58
+
59
+ export interface OcrOptions {
60
+ lang?: string
61
+ minTextLength?: number
62
+ maxThreads?: number
63
+ }
64
+
65
+ export interface OcrPageText {
66
+ page: number
67
+ text: string
68
+ source: TextSource
69
+ }
70
+
71
+ export interface OcrStructuredPageText {
72
+ page: number
73
+ header: string
74
+ body: string
75
+ footer: string
76
+ source: TextSource
77
+ }
78
+
79
+ export interface PageAnnotation {
80
+ page: number
81
+ subtype: string
82
+ rect: Array<number>
83
+ uri?: string
84
+ dest?: string
85
+ content?: string
86
+ }
87
+
88
+ export interface PageImage {
89
+ page: number
90
+ imageIndex: number
91
+ width: number
92
+ height: number
93
+ data: Buffer
94
+ colorSpace: string
95
+ bitsPerComponent: number
96
+ filter: string
97
+ xobjectName: string
98
+ objectId: string
99
+ }
100
+
101
+ export interface PageText {
102
+ page: number
103
+ text: string
104
+ }
105
+
106
+ export declare function pdfDocument(buffer: Buffer): PdfDocument
107
+
108
+ export interface PdfDocument {
109
+ version: string
110
+ isLinearized: boolean
111
+ pageCount: number
112
+ creator?: string
113
+ producer?: string
114
+ creationDate?: string
115
+ modificationDate?: string
116
+ totalImages: number
117
+ totalAnnotations: number
118
+ imagePages: Array<number>
119
+ annotationPages: Array<number>
120
+ text: Array<PageText>
121
+ structuredText: Array<StructuredPageText>
122
+ images: Array<PageImage>
123
+ annotations: Array<PageAnnotation>
124
+ }
125
+
126
+ export declare function pdfDocumentAsync(buffer: Buffer): Promise<PdfDocument>
127
+
128
+ export declare function pdfDocumentOcr(buffer: Buffer, opts?: OcrOptions | undefined | null): PdfDocumentOcr
129
+
130
+ export interface PdfDocumentOcr {
131
+ version: string
132
+ isLinearized: boolean
133
+ pageCount: number
134
+ creator?: string
135
+ producer?: string
136
+ creationDate?: string
137
+ modificationDate?: string
138
+ totalImages: number
139
+ totalAnnotations: number
140
+ imagePages: Array<number>
141
+ annotationPages: Array<number>
142
+ text: Array<OcrPageText>
143
+ structuredText: Array<OcrStructuredPageText>
144
+ images: Array<PageImage>
145
+ annotations: Array<PageAnnotation>
146
+ }
147
+
148
+ export declare function pdfDocumentOcrAsync(buffer: Buffer, opts?: OcrOptions | undefined | null): Promise<PdfDocumentOcr>
149
+
150
+ export interface PdfMeta {
151
+ pageCount: number
152
+ version: string
153
+ isLinearized: boolean
154
+ creator?: string
155
+ producer?: string
156
+ creationDate?: string
157
+ modificationDate?: string
158
+ }
159
+
160
+ export declare function pdfMetadata(buffer: Buffer): PdfMeta
161
+
162
+ export declare function pdfMetadataAsync(buffer: Buffer): Promise<PdfMeta>
163
+
164
+ export interface StructuredPageText {
165
+ page: number
166
+ header: string
167
+ body: string
168
+ footer: string
169
+ }
170
+
171
+ export declare const enum TextSource {
172
+ Native = 'Native',
173
+ Ocr = 'Ocr'
174
+ }
package/index.js ADDED
@@ -0,0 +1,577 @@
1
+ // prettier-ignore
2
+ /* eslint-disable */
3
+ // @ts-nocheck
4
+ /* auto-generated by NAPI-RS */
5
+
6
+ const { createRequire } = require('node:module')
7
+ require = createRequire(__filename)
8
+
9
+ const { readFileSync } = require('node:fs')
10
+ let nativeBinding = null
11
+ const loadErrors = []
12
+
13
+ const isMusl = () => {
14
+ let musl = false
15
+ if (process.platform === 'linux') {
16
+ musl = isMuslFromFilesystem()
17
+ if (musl === null) {
18
+ musl = isMuslFromReport()
19
+ }
20
+ if (musl === null) {
21
+ musl = isMuslFromChildProcess()
22
+ }
23
+ }
24
+ return musl
25
+ }
26
+
27
+ const isFileMusl = (f) => f.includes('libc.musl-') || f.includes('ld-musl-')
28
+
29
+ const isMuslFromFilesystem = () => {
30
+ try {
31
+ return readFileSync('/usr/bin/ldd', 'utf-8').includes('musl')
32
+ } catch {
33
+ return null
34
+ }
35
+ }
36
+
37
+ const isMuslFromReport = () => {
38
+ let report = null
39
+ if (typeof process.report?.getReport === 'function') {
40
+ process.report.excludeNetwork = true
41
+ report = process.report.getReport()
42
+ }
43
+ if (!report) {
44
+ return null
45
+ }
46
+ if (report.header && report.header.glibcVersionRuntime) {
47
+ return false
48
+ }
49
+ if (Array.isArray(report.sharedObjects)) {
50
+ if (report.sharedObjects.some(isFileMusl)) {
51
+ return true
52
+ }
53
+ }
54
+ return false
55
+ }
56
+
57
+ const isMuslFromChildProcess = () => {
58
+ try {
59
+ return require('child_process').execSync('ldd --version', { encoding: 'utf8' }).includes('musl')
60
+ } catch (e) {
61
+ // If we reach this case, we don't know if the system is musl or not, so is better to just fallback to false
62
+ return false
63
+ }
64
+ }
65
+
66
+ function requireNative() {
67
+ if (process.env.NAPI_RS_NATIVE_LIBRARY_PATH) {
68
+ try {
69
+ return require(process.env.NAPI_RS_NATIVE_LIBRARY_PATH);
70
+ } catch (err) {
71
+ loadErrors.push(err)
72
+ }
73
+ } else if (process.platform === 'android') {
74
+ if (process.arch === 'arm64') {
75
+ try {
76
+ return require('./pdfdown_ocr.android-arm64.node')
77
+ } catch (e) {
78
+ loadErrors.push(e)
79
+ }
80
+ try {
81
+ const binding = require('@d0paminedriven/pdfdown-ocr-android-arm64')
82
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-android-arm64/package.json').version
83
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
84
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
85
+ }
86
+ return binding
87
+ } catch (e) {
88
+ loadErrors.push(e)
89
+ }
90
+ } else if (process.arch === 'arm') {
91
+ try {
92
+ return require('./pdfdown_ocr.android-arm-eabi.node')
93
+ } catch (e) {
94
+ loadErrors.push(e)
95
+ }
96
+ try {
97
+ const binding = require('@d0paminedriven/pdfdown-ocr-android-arm-eabi')
98
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-android-arm-eabi/package.json').version
99
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
100
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
101
+ }
102
+ return binding
103
+ } catch (e) {
104
+ loadErrors.push(e)
105
+ }
106
+ } else {
107
+ loadErrors.push(new Error(`Unsupported architecture on Android ${process.arch}`))
108
+ }
109
+ } else if (process.platform === 'win32') {
110
+ if (process.arch === 'x64') {
111
+ try {
112
+ return require('./pdfdown_ocr.win32-x64-msvc.node')
113
+ } catch (e) {
114
+ loadErrors.push(e)
115
+ }
116
+ try {
117
+ const binding = require('@d0paminedriven/pdfdown-ocr-win32-x64-msvc')
118
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-x64-msvc/package.json').version
119
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
120
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
121
+ }
122
+ return binding
123
+ } catch (e) {
124
+ loadErrors.push(e)
125
+ }
126
+ } else if (process.arch === 'ia32') {
127
+ try {
128
+ return require('./pdfdown_ocr.win32-ia32-msvc.node')
129
+ } catch (e) {
130
+ loadErrors.push(e)
131
+ }
132
+ try {
133
+ const binding = require('@d0paminedriven/pdfdown-ocr-win32-ia32-msvc')
134
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-ia32-msvc/package.json').version
135
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
136
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
137
+ }
138
+ return binding
139
+ } catch (e) {
140
+ loadErrors.push(e)
141
+ }
142
+ } else if (process.arch === 'arm64') {
143
+ try {
144
+ return require('./pdfdown_ocr.win32-arm64-msvc.node')
145
+ } catch (e) {
146
+ loadErrors.push(e)
147
+ }
148
+ try {
149
+ const binding = require('@d0paminedriven/pdfdown-ocr-win32-arm64-msvc')
150
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-win32-arm64-msvc/package.json').version
151
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
152
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
153
+ }
154
+ return binding
155
+ } catch (e) {
156
+ loadErrors.push(e)
157
+ }
158
+ } else {
159
+ loadErrors.push(new Error(`Unsupported architecture on Windows: ${process.arch}`))
160
+ }
161
+ } else if (process.platform === 'darwin') {
162
+ try {
163
+ return require('./pdfdown_ocr.darwin-universal.node')
164
+ } catch (e) {
165
+ loadErrors.push(e)
166
+ }
167
+ try {
168
+ const binding = require('@d0paminedriven/pdfdown-ocr-darwin-universal')
169
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-universal/package.json').version
170
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
171
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
172
+ }
173
+ return binding
174
+ } catch (e) {
175
+ loadErrors.push(e)
176
+ }
177
+ if (process.arch === 'x64') {
178
+ try {
179
+ return require('./pdfdown_ocr.darwin-x64.node')
180
+ } catch (e) {
181
+ loadErrors.push(e)
182
+ }
183
+ try {
184
+ const binding = require('@d0paminedriven/pdfdown-ocr-darwin-x64')
185
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-x64/package.json').version
186
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
187
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
188
+ }
189
+ return binding
190
+ } catch (e) {
191
+ loadErrors.push(e)
192
+ }
193
+ } else if (process.arch === 'arm64') {
194
+ try {
195
+ return require('./pdfdown_ocr.darwin-arm64.node')
196
+ } catch (e) {
197
+ loadErrors.push(e)
198
+ }
199
+ try {
200
+ const binding = require('@d0paminedriven/pdfdown-ocr-darwin-arm64')
201
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-darwin-arm64/package.json').version
202
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
203
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
204
+ }
205
+ return binding
206
+ } catch (e) {
207
+ loadErrors.push(e)
208
+ }
209
+ } else {
210
+ loadErrors.push(new Error(`Unsupported architecture on macOS: ${process.arch}`))
211
+ }
212
+ } else if (process.platform === 'freebsd') {
213
+ if (process.arch === 'x64') {
214
+ try {
215
+ return require('./pdfdown_ocr.freebsd-x64.node')
216
+ } catch (e) {
217
+ loadErrors.push(e)
218
+ }
219
+ try {
220
+ const binding = require('@d0paminedriven/pdfdown-ocr-freebsd-x64')
221
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-freebsd-x64/package.json').version
222
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
223
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
224
+ }
225
+ return binding
226
+ } catch (e) {
227
+ loadErrors.push(e)
228
+ }
229
+ } else if (process.arch === 'arm64') {
230
+ try {
231
+ return require('./pdfdown_ocr.freebsd-arm64.node')
232
+ } catch (e) {
233
+ loadErrors.push(e)
234
+ }
235
+ try {
236
+ const binding = require('@d0paminedriven/pdfdown-ocr-freebsd-arm64')
237
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-freebsd-arm64/package.json').version
238
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
239
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
240
+ }
241
+ return binding
242
+ } catch (e) {
243
+ loadErrors.push(e)
244
+ }
245
+ } else {
246
+ loadErrors.push(new Error(`Unsupported architecture on FreeBSD: ${process.arch}`))
247
+ }
248
+ } else if (process.platform === 'linux') {
249
+ if (process.arch === 'x64') {
250
+ if (isMusl()) {
251
+ try {
252
+ return require('./pdfdown_ocr.linux-x64-musl.node')
253
+ } catch (e) {
254
+ loadErrors.push(e)
255
+ }
256
+ try {
257
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-x64-musl')
258
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-x64-musl/package.json').version
259
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
260
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
261
+ }
262
+ return binding
263
+ } catch (e) {
264
+ loadErrors.push(e)
265
+ }
266
+ } else {
267
+ try {
268
+ return require('./pdfdown_ocr.linux-x64-gnu.node')
269
+ } catch (e) {
270
+ loadErrors.push(e)
271
+ }
272
+ try {
273
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-x64-gnu')
274
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-x64-gnu/package.json').version
275
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
276
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
277
+ }
278
+ return binding
279
+ } catch (e) {
280
+ loadErrors.push(e)
281
+ }
282
+ }
283
+ } else if (process.arch === 'arm64') {
284
+ if (isMusl()) {
285
+ try {
286
+ return require('./pdfdown_ocr.linux-arm64-musl.node')
287
+ } catch (e) {
288
+ loadErrors.push(e)
289
+ }
290
+ try {
291
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm64-musl')
292
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm64-musl/package.json').version
293
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
294
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
295
+ }
296
+ return binding
297
+ } catch (e) {
298
+ loadErrors.push(e)
299
+ }
300
+ } else {
301
+ try {
302
+ return require('./pdfdown_ocr.linux-arm64-gnu.node')
303
+ } catch (e) {
304
+ loadErrors.push(e)
305
+ }
306
+ try {
307
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm64-gnu')
308
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm64-gnu/package.json').version
309
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
310
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
311
+ }
312
+ return binding
313
+ } catch (e) {
314
+ loadErrors.push(e)
315
+ }
316
+ }
317
+ } else if (process.arch === 'arm') {
318
+ if (isMusl()) {
319
+ try {
320
+ return require('./pdfdown_ocr.linux-arm-musleabihf.node')
321
+ } catch (e) {
322
+ loadErrors.push(e)
323
+ }
324
+ try {
325
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm-musleabihf')
326
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm-musleabihf/package.json').version
327
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
328
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
329
+ }
330
+ return binding
331
+ } catch (e) {
332
+ loadErrors.push(e)
333
+ }
334
+ } else {
335
+ try {
336
+ return require('./pdfdown_ocr.linux-arm-gnueabihf.node')
337
+ } catch (e) {
338
+ loadErrors.push(e)
339
+ }
340
+ try {
341
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-arm-gnueabihf')
342
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-arm-gnueabihf/package.json').version
343
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
344
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
345
+ }
346
+ return binding
347
+ } catch (e) {
348
+ loadErrors.push(e)
349
+ }
350
+ }
351
+ } else if (process.arch === 'loong64') {
352
+ if (isMusl()) {
353
+ try {
354
+ return require('./pdfdown_ocr.linux-loong64-musl.node')
355
+ } catch (e) {
356
+ loadErrors.push(e)
357
+ }
358
+ try {
359
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-loong64-musl')
360
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-loong64-musl/package.json').version
361
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
362
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
363
+ }
364
+ return binding
365
+ } catch (e) {
366
+ loadErrors.push(e)
367
+ }
368
+ } else {
369
+ try {
370
+ return require('./pdfdown_ocr.linux-loong64-gnu.node')
371
+ } catch (e) {
372
+ loadErrors.push(e)
373
+ }
374
+ try {
375
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-loong64-gnu')
376
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-loong64-gnu/package.json').version
377
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
378
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
379
+ }
380
+ return binding
381
+ } catch (e) {
382
+ loadErrors.push(e)
383
+ }
384
+ }
385
+ } else if (process.arch === 'riscv64') {
386
+ if (isMusl()) {
387
+ try {
388
+ return require('./pdfdown_ocr.linux-riscv64-musl.node')
389
+ } catch (e) {
390
+ loadErrors.push(e)
391
+ }
392
+ try {
393
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-musl')
394
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-musl/package.json').version
395
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
396
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
397
+ }
398
+ return binding
399
+ } catch (e) {
400
+ loadErrors.push(e)
401
+ }
402
+ } else {
403
+ try {
404
+ return require('./pdfdown_ocr.linux-riscv64-gnu.node')
405
+ } catch (e) {
406
+ loadErrors.push(e)
407
+ }
408
+ try {
409
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-gnu')
410
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-riscv64-gnu/package.json').version
411
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
412
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
413
+ }
414
+ return binding
415
+ } catch (e) {
416
+ loadErrors.push(e)
417
+ }
418
+ }
419
+ } else if (process.arch === 'ppc64') {
420
+ try {
421
+ return require('./pdfdown_ocr.linux-ppc64-gnu.node')
422
+ } catch (e) {
423
+ loadErrors.push(e)
424
+ }
425
+ try {
426
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-ppc64-gnu')
427
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-ppc64-gnu/package.json').version
428
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
429
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
430
+ }
431
+ return binding
432
+ } catch (e) {
433
+ loadErrors.push(e)
434
+ }
435
+ } else if (process.arch === 's390x') {
436
+ try {
437
+ return require('./pdfdown_ocr.linux-s390x-gnu.node')
438
+ } catch (e) {
439
+ loadErrors.push(e)
440
+ }
441
+ try {
442
+ const binding = require('@d0paminedriven/pdfdown-ocr-linux-s390x-gnu')
443
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-linux-s390x-gnu/package.json').version
444
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
445
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
446
+ }
447
+ return binding
448
+ } catch (e) {
449
+ loadErrors.push(e)
450
+ }
451
+ } else {
452
+ loadErrors.push(new Error(`Unsupported architecture on Linux: ${process.arch}`))
453
+ }
454
+ } else if (process.platform === 'openharmony') {
455
+ if (process.arch === 'arm64') {
456
+ try {
457
+ return require('./pdfdown_ocr.openharmony-arm64.node')
458
+ } catch (e) {
459
+ loadErrors.push(e)
460
+ }
461
+ try {
462
+ const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-arm64')
463
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-arm64/package.json').version
464
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
465
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
466
+ }
467
+ return binding
468
+ } catch (e) {
469
+ loadErrors.push(e)
470
+ }
471
+ } else if (process.arch === 'x64') {
472
+ try {
473
+ return require('./pdfdown_ocr.openharmony-x64.node')
474
+ } catch (e) {
475
+ loadErrors.push(e)
476
+ }
477
+ try {
478
+ const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-x64')
479
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-x64/package.json').version
480
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
481
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
482
+ }
483
+ return binding
484
+ } catch (e) {
485
+ loadErrors.push(e)
486
+ }
487
+ } else if (process.arch === 'arm') {
488
+ try {
489
+ return require('./pdfdown_ocr.openharmony-arm.node')
490
+ } catch (e) {
491
+ loadErrors.push(e)
492
+ }
493
+ try {
494
+ const binding = require('@d0paminedriven/pdfdown-ocr-openharmony-arm')
495
+ const bindingPackageVersion = require('@d0paminedriven/pdfdown-ocr-openharmony-arm/package.json').version
496
+ if (bindingPackageVersion !== '0.9.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
497
+ throw new Error(`Native binding package version mismatch, expected 0.9.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
498
+ }
499
+ return binding
500
+ } catch (e) {
501
+ loadErrors.push(e)
502
+ }
503
+ } else {
504
+ loadErrors.push(new Error(`Unsupported architecture on OpenHarmony: ${process.arch}`))
505
+ }
506
+ } else {
507
+ loadErrors.push(new Error(`Unsupported OS: ${process.platform}, architecture: ${process.arch}`))
508
+ }
509
+ }
510
+
511
+ nativeBinding = requireNative()
512
+
513
+ if (!nativeBinding || process.env.NAPI_RS_FORCE_WASI) {
514
+ let wasiBinding = null
515
+ let wasiBindingError = null
516
+ try {
517
+ wasiBinding = require('./pdfdown_ocr.wasi.cjs')
518
+ nativeBinding = wasiBinding
519
+ } catch (err) {
520
+ if (process.env.NAPI_RS_FORCE_WASI) {
521
+ wasiBindingError = err
522
+ }
523
+ }
524
+ if (!nativeBinding) {
525
+ try {
526
+ wasiBinding = require('@d0paminedriven/pdfdown-ocr-wasm32-wasi')
527
+ nativeBinding = wasiBinding
528
+ } catch (err) {
529
+ if (process.env.NAPI_RS_FORCE_WASI) {
530
+ wasiBindingError.cause = err
531
+ loadErrors.push(err)
532
+ }
533
+ }
534
+ }
535
+ if (process.env.NAPI_RS_FORCE_WASI === 'error' && !wasiBinding) {
536
+ const error = new Error('WASI binding not found and NAPI_RS_FORCE_WASI is set to error')
537
+ error.cause = wasiBindingError
538
+ throw error
539
+ }
540
+ }
541
+
542
+ if (!nativeBinding) {
543
+ if (loadErrors.length > 0) {
544
+ throw new Error(
545
+ `Cannot find native binding. ` +
546
+ `npm has a bug related to optional dependencies (https://github.com/npm/cli/issues/4828). ` +
547
+ 'Please try `npm i` again after removing both package-lock.json and node_modules directory.',
548
+ {
549
+ cause: loadErrors.reduce((err, cur) => {
550
+ cur.cause = err
551
+ return cur
552
+ }),
553
+ },
554
+ )
555
+ }
556
+ throw new Error(`Failed to load native binding`)
557
+ }
558
+
559
+ module.exports = nativeBinding
560
+ module.exports.PdfDown = nativeBinding.PdfDown
561
+ module.exports.extractAnnotationsPerPage = nativeBinding.extractAnnotationsPerPage
562
+ module.exports.extractAnnotationsPerPageAsync = nativeBinding.extractAnnotationsPerPageAsync
563
+ module.exports.extractImagesPerPage = nativeBinding.extractImagesPerPage
564
+ module.exports.extractImagesPerPageAsync = nativeBinding.extractImagesPerPageAsync
565
+ module.exports.extractStructuredTextPerPage = nativeBinding.extractStructuredTextPerPage
566
+ module.exports.extractStructuredTextPerPageAsync = nativeBinding.extractStructuredTextPerPageAsync
567
+ module.exports.extractTextPerPage = nativeBinding.extractTextPerPage
568
+ module.exports.extractTextPerPageAsync = nativeBinding.extractTextPerPageAsync
569
+ module.exports.extractTextWithOcrPerPage = nativeBinding.extractTextWithOcrPerPage
570
+ module.exports.extractTextWithOcrPerPageAsync = nativeBinding.extractTextWithOcrPerPageAsync
571
+ module.exports.pdfDocument = nativeBinding.pdfDocument
572
+ module.exports.pdfDocumentAsync = nativeBinding.pdfDocumentAsync
573
+ module.exports.pdfDocumentOcr = nativeBinding.pdfDocumentOcr
574
+ module.exports.pdfDocumentOcrAsync = nativeBinding.pdfDocumentOcrAsync
575
+ module.exports.pdfMetadata = nativeBinding.pdfMetadata
576
+ module.exports.pdfMetadataAsync = nativeBinding.pdfMetadataAsync
577
+ module.exports.TextSource = nativeBinding.TextSource
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d0paminedriven/pdfdown-ocr",
3
- "version": "0.8.0",
3
+ "version": "0.9.1",
4
4
  "description": "Rust powered PDF extraction for Node with OCR fallback (requires system tesseract).",
5
5
  "main": "index.js",
6
6
  "repository": {
@@ -10,7 +10,8 @@
10
10
  "license": "MIT",
11
11
  "files": [
12
12
  "index.d.ts",
13
- "index.js"
13
+ "index.js",
14
+ "README.md"
14
15
  ],
15
16
  "os": [
16
17
  "darwin",