@hoshomoh/react-native-document-scanner 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DocumentScanner.podspec +22 -0
- package/LICENSE +20 -0
- package/README.md +384 -0
- package/android/build.gradle +72 -0
- package/android/gradle.properties +17 -0
- package/android/local.properties +8 -0
- package/android/src/main/AndroidManifest.xml +8 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerModule.kt +217 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerPackage.kt +39 -0
- package/android/src/main/java/com/documentscanner/ImageProcessor.kt +325 -0
- package/android/src/main/java/com/documentscanner/Logger.kt +36 -0
- package/android/src/main/java/com/documentscanner/OCRConfiguration.kt +56 -0
- package/android/src/main/java/com/documentscanner/Options.kt +109 -0
- package/android/src/main/java/com/documentscanner/ScannerError.kt +18 -0
- package/android/src/main/java/com/documentscanner/TextRecognizer.kt +56 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV1.kt +68 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV2.kt +244 -0
- package/ios/DocumentScanner.h +5 -0
- package/ios/DocumentScanner.mm +113 -0
- package/ios/DocumentScannerManager.swift +148 -0
- package/ios/Errors.swift +33 -0
- package/ios/ImageProcessor.swift +78 -0
- package/ios/ImageUtil.swift +279 -0
- package/ios/Logger.swift +43 -0
- package/ios/OCRConfiguration.swift +60 -0
- package/ios/Options.swift +109 -0
- package/ios/ResponseUtil.swift +25 -0
- package/ios/ScanModels.swift +84 -0
- package/ios/TextRecognizer.swift +134 -0
- package/ios/TextRecognizerV1.swift +56 -0
- package/ios/TextRecognizerV2.swift +169 -0
- package/lib/module/NativeDocumentScanner.js +51 -0
- package/lib/module/NativeDocumentScanner.js.map +1 -0
- package/lib/module/index.js +40 -0
- package/lib/module/index.js.map +1 -0
- package/lib/module/package.json +1 -0
- package/lib/module/textReconstructor.js +147 -0
- package/lib/module/textReconstructor.js.map +1 -0
- package/lib/typescript/package.json +1 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts +191 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +34 -0
- package/lib/typescript/src/index.d.ts.map +1 -0
- package/lib/typescript/src/textReconstructor.d.ts +60 -0
- package/lib/typescript/src/textReconstructor.d.ts.map +1 -0
- package/package.json +137 -0
- package/src/NativeDocumentScanner.ts +205 -0
- package/src/index.ts +61 -0
- package/src/textReconstructor.ts +212 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require "json"
|
|
2
|
+
|
|
3
|
+
package = JSON.parse(File.read(File.join(__dir__, "package.json")))
|
|
4
|
+
|
|
5
|
+
Pod::Spec.new do |s|
|
|
6
|
+
s.name = "DocumentScanner"
|
|
7
|
+
s.version = package["version"]
|
|
8
|
+
s.summary = package["description"]
|
|
9
|
+
s.homepage = package["homepage"]
|
|
10
|
+
s.license = package["license"]
|
|
11
|
+
s.authors = package["author"]
|
|
12
|
+
|
|
13
|
+
s.platforms = { :ios => "13.0" }
|
|
14
|
+
s.source = { :git => "https://github.com/hoshomoh/react-native-document-scanner.git", :tag => "#{s.version}" }
|
|
15
|
+
|
|
16
|
+
s.source_files = "ios/**/*.{h,m,mm,swift,cpp}"
|
|
17
|
+
s.private_header_files = "ios/**/*.h"
|
|
18
|
+
s.frameworks = "VisionKit"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
install_modules_dependencies(s)
|
|
22
|
+
end
|
package/LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Oshomo Oforomeh
|
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
6
|
+
in the Software without restriction, including without limitation the rights
|
|
7
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
8
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
9
|
+
furnished to do so, subject to the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be included in all
|
|
12
|
+
copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
20
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
# react-native-document-scanner
|
|
2
|
+
|
|
3
|
+
A powerful, high-performance React Native library for scanning documents and extracting text using native platform APIs. Optimized for structured documents like receipts, invoices, and forms.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 📸 **Document Scanning**: Native UI for scanning documents with auto-detection and perspective correction (VisionKit on iOS, ML Kit on Android).
|
|
8
|
+
- 🖼️ **Image Processing**: High-performance filters including Grayscale, Monochrome, Denoise, and Sharpen.
|
|
9
|
+
- 📝 **Dual-Engine OCR**: Choose between raw platform output (V1) and layout-preserving heuristic extraction (V2).
|
|
10
|
+
- 🧠 **Adaptive Heuristics**: Intelligent line clustering and adaptive spacing for perfect horizontal alignment on receipts.
|
|
11
|
+
- ⚙️ **Batch Processing**: Headless processing of existing images from file system, Content URIs, or Base64.
|
|
12
|
+
- 🗂️ **Result Metadata**: Every `ScanResult` includes `metadata` (platform, OCR engine, filter, version) so you always know exactly how the result was produced.
|
|
13
|
+
- 🧾 **Text Reconstruction**: Pure JS `reconstructText` utility re-renders `blocks` as a column-aligned string — works across all platforms and versions.
|
|
14
|
+
- 🚀 **TurboModules**: Built from the ground up for the React Native New Architecture.
|
|
15
|
+
- 📱 **Cross-Platform Parity**: Identical coordinate systems and configuration logic across iOS and Android.
|
|
16
|
+
|
|
17
|
+
## Platform Support
|
|
18
|
+
|
|
19
|
+
| Feature | iOS | Android |
|
|
20
|
+
| ------------------- | ---------------- | ----------------------- |
|
|
21
|
+
| Document Scanning | VisionKit | ML Kit Document Scanner |
|
|
22
|
+
| OCR Engine | Vision Framework | ML Kit Text Recognition |
|
|
23
|
+
| Coordinate Origin | **Top-Left** | **Top-Left** |
|
|
24
|
+
| Logic Architecture | Swift | Kotlin (Coroutines) |
|
|
25
|
+
| Layout Preservation | ✅ (V2) | ✅ (V2) |
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
yarn add @hoshomoh/react-native-document-scanner
|
|
33
|
+
# or
|
|
34
|
+
npm install @hoshomoh/react-native-document-scanner
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### iOS Setup
|
|
38
|
+
|
|
39
|
+
```sh
|
|
40
|
+
cd ios && pod install
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Android Setup
|
|
44
|
+
|
|
45
|
+
No additional setup required. Google Play Services will automatically manage ML Kit models.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
### 1. Scan Documents (Camera UI)
|
|
52
|
+
|
|
53
|
+
Opens the system scanner. Best for manual document capture.
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
import { scanDocuments } from '@hoshomoh/react-native-document-scanner';
|
|
57
|
+
|
|
58
|
+
const results = await scanDocuments({
|
|
59
|
+
maxPageCount: 5,
|
|
60
|
+
textVersion: 2, // Use V2 for receipt layout extraction
|
|
61
|
+
filter: 'ocrOptimized', // Applies denoise -> sharpen -> monochrome
|
|
62
|
+
includeText: true,
|
|
63
|
+
});
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### 2. Process Existing Images (Headless)
|
|
67
|
+
|
|
68
|
+
Batch process images already on the device.
|
|
69
|
+
|
|
70
|
+
```typescript
|
|
71
|
+
import { processDocuments } from '@hoshomoh/react-native-document-scanner';
|
|
72
|
+
|
|
73
|
+
const results = await processDocuments({
|
|
74
|
+
images: ['file:///path/to/receipt.jpg'],
|
|
75
|
+
textVersion: 2,
|
|
76
|
+
includeText: true,
|
|
77
|
+
});
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## API Reference
|
|
83
|
+
|
|
84
|
+
### `scanDocuments(options?: ScanOptions): Promise<ScanResult[]>`
|
|
85
|
+
|
|
86
|
+
### `processDocuments(options: ProcessOptions): Promise<ScanResult[]>`
|
|
87
|
+
|
|
88
|
+
### `reconstructText(blocks: TextBlock[], options?: ReconstructOptions): string`
|
|
89
|
+
|
|
90
|
+
Reconstructs `blocks` as a column-aligned plain-text string. See [Text Reconstruction](#receipt--document-reconstruction) for full details.
|
|
91
|
+
|
|
92
|
+
### `getReconstructMode(metadata: ScanMetadata): ReconstructMode`
|
|
93
|
+
|
|
94
|
+
Returns `'paragraphs'` or `'clustered'` based on `result.metadata`, so you never have to hard-code the mode. See [Text Reconstruction](#receipt--document-reconstruction).
|
|
95
|
+
|
|
96
|
+
### Options
|
|
97
|
+
|
|
98
|
+
| Property | Type | Default | Description |
|
|
99
|
+
| --------------- | ---------------- | --------- | ------------------------------------------------------------------------ |
|
|
100
|
+
| `textVersion` | `1 \| 2` | `2` | OCR Engine version (1 = Raw, 2 = Heuristic) |
|
|
101
|
+
| `includeText` | `boolean` | `false` | Perform OCR and return structured text |
|
|
102
|
+
| `filter` | `FilterType` | `'color'` | `color`, `grayscale`, `monochrome`, `denoise`, `sharpen`, `ocrOptimized` |
|
|
103
|
+
| `quality` | `number` | `1.0` | Image compression quality (0.1 - 1.0) |
|
|
104
|
+
| `format` | `'jpg' \| 'png'` | `'jpg'` | Output file format |
|
|
105
|
+
| `maxPageCount` | `number` | `0` | (Scan only) Limit pages (0 = unlimited). Max 100. |
|
|
106
|
+
| `includeBase64` | `boolean` | `false` | Returns binary data as Base64 string |
|
|
107
|
+
| `images` | `string[]` | **Req.** | (Process only) Local URIs or Base64 data strings |
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Result Types
|
|
112
|
+
|
|
113
|
+
### `ScanResult`
|
|
114
|
+
|
|
115
|
+
| Property | Type | Description |
|
|
116
|
+
| :--------- | :------------- | :--------------------------------------------------------------------------------- |
|
|
117
|
+
| `uri` | `string` | Local temporary file path of the processed image. |
|
|
118
|
+
| `text` | `string` | The full extracted text. V2 preserves the visual layout. |
|
|
119
|
+
| `blocks` | `TextBlock[]` | One block per visual line, in top-to-bottom order. |
|
|
120
|
+
| `base64` | `string` | Optional binary data (if `includeBase64` is true). |
|
|
121
|
+
| `metadata` | `ScanMetadata` | Platform, engine, filter, and version used to produce this result. Always present. |
|
|
122
|
+
|
|
123
|
+
### `TextBlock`
|
|
124
|
+
|
|
125
|
+
The coordinate system is **unified** across iOS and Android:
|
|
126
|
+
|
|
127
|
+
- **Range**: `0.0` to `1.0` (Normalized).
|
|
128
|
+
- **Origin**: `(0,0)` is the **Top-Left** corner.
|
|
129
|
+
- **Sorting**: Blocks are returned in a natural **top-to-bottom reading order**.
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
interface TextBlock {
|
|
133
|
+
text: string; // Content of the block (one visual line)
|
|
134
|
+
confidence?: number; // Engine reliability (0.0–1.0). May be absent — see notes below.
|
|
135
|
+
frame: {
|
|
136
|
+
x: number; // Horizontal offset from left (0.0–1.0)
|
|
137
|
+
y: number; // Vertical offset from top (0.0–1.0)
|
|
138
|
+
width: number; // Normalized width (fraction of image width)
|
|
139
|
+
height: number; // Normalized height (fraction of image height)
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## OCR Engine Versions
|
|
147
|
+
|
|
148
|
+
The library provides two distinct OCR extraction strategies via the `textVersion` parameter. For V1 and V2 (LineCluster path), every entry in the `blocks` array represents exactly one visual line of text, and `blocks[i].text` corresponds to line `i` in the `text` string. On iOS 26+ V2 (`RecognizeDocumentsRequest`), blocks are paragraph-level and may not align 1:1 with split lines — see the [Block Contract](#block-contract) note below.
|
|
149
|
+
|
|
150
|
+
### Version 1 — Raw Output (`textVersion: 1`)
|
|
151
|
+
|
|
152
|
+
Returns the platform's native OCR output with minimal post-processing.
|
|
153
|
+
|
|
154
|
+
| | iOS | Android |
|
|
155
|
+
| --------------------- | ------------------------------------------------------ | ------------------------------------------------- |
|
|
156
|
+
| **Engine** | `VNRecognizeTextRequest` (language correction enabled) | ML Kit Text Recognition |
|
|
157
|
+
| **Block granularity** | One block per Vision observation (≈ one visual line) | One block per ML Kit `TextLine` |
|
|
158
|
+
| **`text` field** | Lines joined with `\n`, no trailing newline | ML Kit's full recognized text |
|
|
159
|
+
| **Confidence** | Always present (per observation) | Present when ML Kit provides element-level detail |
|
|
160
|
+
| **Best for** | General prose, paragraphs, unstructured text | General prose, paragraphs, unstructured text |
|
|
161
|
+
|
|
162
|
+
**When to use V1:** You want the raw platform output with the least processing overhead. Suitable for plain paragraphs where layout structure doesn't matter.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
### Version 2 — Heuristic Enhanced (`textVersion: 2`, default)
|
|
167
|
+
|
|
168
|
+
Our custom **LineCluster** algorithm rebuilds the document's visual line structure from word-level elements, then reconstructs spacing proportionally.
|
|
169
|
+
|
|
170
|
+
| | iOS (< 26) | iOS 26+ | Android |
|
|
171
|
+
| --------------------- | ------------------------------------------------------- | ------------------------------------------------------ | ------------------------------------------------- |
|
|
172
|
+
| **Engine** | `VNRecognizeTextRequest` (language correction disabled) | `RecognizeDocumentsRequest` | ML Kit Text Recognition |
|
|
173
|
+
| **Clustering** | Word-level spatial clustering (LineCluster) | Native document paragraphs | Word-level spatial clustering (LineCluster) |
|
|
174
|
+
| **Block granularity** | One block per cluster (= one visual line) | One block per paragraph (≈ one visual line) | One block per cluster (= one visual line) |
|
|
175
|
+
| **`text` field** | Lines joined with `\n`, trailing newline included | Paragraphs joined with `\n`, trailing newline included | Lines joined with `\n`, trailing newline included |
|
|
176
|
+
| **Confidence** | Average across observations in the cluster | Not available (platform does not expose it) | Average across word elements in the cluster |
|
|
177
|
+
| **Noise filtering** | `minimumTextHeight` filters sub-1% height artifacts | Handled natively | N/A (ML Kit filters internally) |
|
|
178
|
+
| **Multilingual** | Auto-detected on iOS 16+ | Supported natively (26 languages) | Auto-detected by ML Kit |
|
|
179
|
+
| **Best for** | Receipts, invoices, tables, structured forms | Receipts, invoices, tables, structured forms | Receipts, invoices, tables, structured forms |
|
|
180
|
+
|
|
181
|
+
**When to use V2:** You need reliable line-by-line layout — e.g., aligning item names with their prices on a receipt, or overlaying bounding boxes on the scanned image.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
### Block Contract
|
|
186
|
+
|
|
187
|
+
V1 and V2 (LineCluster path) guarantee **one `TextBlock` per visual line**. `blocks[i].text` corresponds to line `i` in the `text` string, making `blocks` a reliable source for building overlay UIs:
|
|
188
|
+
|
|
189
|
+
```typescript
|
|
190
|
+
const { text, blocks } = result;
|
|
191
|
+
|
|
192
|
+
// Safe for V1 and V2 (LineCluster) on both platforms
|
|
193
|
+
blocks.forEach((block, i) => {
|
|
194
|
+
drawBoundingBox(block.frame); // Overlay each line's box on the image
|
|
195
|
+
console.log(block.text); // Text for that line
|
|
196
|
+
});
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
> **iOS 26+ V2 note:** `RecognizeDocumentsRequest` returns one block per **paragraph**. For most single-column documents this equals one visual line, but multi-column rows (e.g. a receipt item name and its price) may appear as two separate blocks side by side. Use `reconstructText` to merge and column-align them.
|
|
200
|
+
|
|
201
|
+
#### Confidence availability
|
|
202
|
+
|
|
203
|
+
`confidence` is not guaranteed to be present in all cases:
|
|
204
|
+
|
|
205
|
+
| Version | iOS | Android |
|
|
206
|
+
| ------------- | ------------------------ | -------------------------------------------------------------- |
|
|
207
|
+
| V1 | Always present | Present when ML Kit provides element-level detail for the line |
|
|
208
|
+
| V2 | Always present | Always present |
|
|
209
|
+
| V2 on iOS 26+ | **Absent** (`undefined`) | N/A |
|
|
210
|
+
|
|
211
|
+
Always guard when reading confidence:
|
|
212
|
+
|
|
213
|
+
```typescript
|
|
214
|
+
if (block.confidence !== undefined) {
|
|
215
|
+
console.log(`Confidence: ${block.confidence}`);
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
#### `text` trailing newline
|
|
220
|
+
|
|
221
|
+
V2 appends a trailing `\n` to the full `text` string. V1 does not. Account for this when splitting:
|
|
222
|
+
|
|
223
|
+
```typescript
|
|
224
|
+
// Safe on both V1 and V2 — trimEnd removes the trailing newline if present
|
|
225
|
+
const lines = result.text.trimEnd().split('\n');
|
|
226
|
+
// V1 + V2 (LineCluster): lines.length === result.blocks.length ✓
|
|
227
|
+
// iOS 26+ V2: lines.length may differ from blocks.length (paragraph vs line granularity)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## OCR Engine Deep-Dive
|
|
233
|
+
|
|
234
|
+
### V1: Raw Platform Output
|
|
235
|
+
|
|
236
|
+
Returns the native platform output directly. No spatial re-clustering is performed.
|
|
237
|
+
|
|
238
|
+
- **iOS**: One block per `VNRecognizedTextObservation`. Language correction is **enabled**, which helps with general prose but may merge or alter words in structured tables.
|
|
239
|
+
- **Android**: One block per ML Kit `TextLine`. The full `text` string is ML Kit's native concatenation of all recognized lines.
|
|
240
|
+
- **Latency**: Minimal — no additional processing beyond the platform OCR call.
|
|
241
|
+
|
|
242
|
+
### V2: Adaptive LineCluster (iOS < 26 and Android)
|
|
243
|
+
|
|
244
|
+
Our **LineCluster** algorithm operates at word level and reconstructs visual lines through four heuristics applied in order for each word element:
|
|
245
|
+
|
|
246
|
+
1. **Height Compatibility** — Elements whose height ratio (`minH / maxH`) falls below `0.40` are never grouped. This prevents subscripts, headers, and footnotes from being merged into adjacent body lines. Uses **median** cluster element height (not union bbox height) to prevent drift as more words are added.
|
|
247
|
+
|
|
248
|
+
2. **Vertical Overlap & Centerline** — An element must either vertically overlap the cluster by ≥ 50% of the smaller height, or have its center within 70% of the typical line height from the cluster's **median center Y** (not the union midpoint). Using the median prevents centerline drift on long lines.
|
|
249
|
+
|
|
250
|
+
3. **Adaptive Growth Constraint** — A candidate merge is rejected if it would grow the cluster's union bounding box to more than `1.2×` the typical line height (when horizontally stacked) or `2.0×` (when purely side-by-side). This blocks two adjacent lines from being merged while still accommodating natural OCR jitter.
|
|
251
|
+
|
|
252
|
+
4. **Best-Cluster Scoring** — When multiple clusters pass all three tests, the one with the highest vertical overlap ratio wins. Ties are broken by closest centerline distance.
|
|
253
|
+
|
|
254
|
+
After clustering, words within each cluster are sorted left-to-right and the gap between adjacent words is measured. If the gap exceeds `0.5×` the median character height, proportional spaces are inserted based on a `0.3×` space-width factor (capped at 10 spaces). This recreates column alignment on receipts and tables.
|
|
255
|
+
|
|
256
|
+
### V2: iOS 26+ Native Fast Path
|
|
257
|
+
|
|
258
|
+
On iOS 26 and later, `textVersion: 2` automatically uses `RecognizeDocumentsRequest` — Apple's structured document understanding API — instead of the heuristic LineCluster path. This API returns native paragraph groupings with precise bounding regions across 26 languages, with no risk of the clustering edge cases that affect older OS versions. The output format is identical to the LineCluster path, so no code changes are required.
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## Text Reconstruction
|
|
263
|
+
|
|
264
|
+
`reconstructText` is a pure JavaScript utility that re-renders the `blocks` array as a column-aligned plain-text string. It is most useful when the native engine returns one block per paragraph rather than per fully-assembled line — specifically on **iOS 26+** where `RecognizeDocumentsRequest` separates multi-column lines (e.g. an item name block and a price block) into individual paragraph blocks.
|
|
265
|
+
|
|
266
|
+
### Import
|
|
267
|
+
|
|
268
|
+
```typescript
|
|
269
|
+
import {
|
|
270
|
+
reconstructText,
|
|
271
|
+
getReconstructMode,
|
|
272
|
+
} from '@hoshomoh/react-native-document-scanner';
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### Recommended pattern — let metadata choose the mode
|
|
276
|
+
|
|
277
|
+
Every `ScanResult` now includes a `metadata` field that identifies the platform, OCR version, and exact engine used. Pass it to `getReconstructMode` to get the correct `mode` automatically:
|
|
278
|
+
|
|
279
|
+
```typescript
|
|
280
|
+
const results = await scanDocuments({ includeText: true, textVersion: 2 });
|
|
281
|
+
const result = results[0];
|
|
282
|
+
|
|
283
|
+
if (result.blocks && result.metadata) {
|
|
284
|
+
const mode = getReconstructMode(result.metadata);
|
|
285
|
+
const receipt = reconstructText(result.blocks, { mode });
|
|
286
|
+
console.log(receipt);
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
### `ScanMetadata`
|
|
291
|
+
|
|
292
|
+
| Field | Type | Description |
|
|
293
|
+
| ------------- | -------------------- | --------------------------------- |
|
|
294
|
+
| `platform` | `'ios' \| 'android'` | Platform that produced the result |
|
|
295
|
+
| `textVersion` | `1 \| 2` | OCR version that was requested |
|
|
296
|
+
| `filter` | `FilterType` | Image filter applied before OCR |
|
|
297
|
+
| `ocrEngine` | see below | Exact engine used |
|
|
298
|
+
|
|
299
|
+
`ocrEngine` values and the correct `reconstructText` mode for each:
|
|
300
|
+
|
|
301
|
+
| `ocrEngine` | When | `mode` |
|
|
302
|
+
| ----------------------------- | --------------------- | -------------- |
|
|
303
|
+
| `"RecognizeDocumentsRequest"` | iOS 26+ V2 | `'paragraphs'` |
|
|
304
|
+
| `"VNRecognizeTextRequest"` | iOS V1 or V2 < iOS 26 | `'clustered'` |
|
|
305
|
+
| `"MLKit"` | Android V1 or V2 | `'clustered'` |
|
|
306
|
+
| `"none"` | `includeText: false` | N/A |
|
|
307
|
+
|
|
308
|
+
### `reconstructText` options
|
|
309
|
+
|
|
310
|
+
| Option | Type | Default | Description |
|
|
311
|
+
| ------------------- | ----------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
312
|
+
| `mode` | `'paragraphs' \| 'clustered'` | `'paragraphs'` | Row-grouping strategy. `'paragraphs'` for iOS 26+ V2; `'clustered'` for everything else. |
|
|
313
|
+
| `lineWidth` | `number` | `56` | Output width in characters. Use 48 for narrow receipts, 64+ for wide documents. |
|
|
314
|
+
| `minConfidence` | `number` | none | Discard blocks below this confidence threshold before reconstruction. Useful when scan quality is poor. Has no effect on iOS 26+ (confidence is not provided by `RecognizeDocumentsRequest`). |
|
|
315
|
+
| `rowGroupingFactor` | `number` | from `mode` | Advanced: override the Y-proximity threshold directly (0.7 for `'paragraphs'`, 0.4 for `'clustered'`). |
|
|
316
|
+
|
|
317
|
+
### When to use `result.text` vs `reconstructText`
|
|
318
|
+
|
|
319
|
+
| Source | Use `result.text` | Use `reconstructText` |
|
|
320
|
+
| -------------------- | --------------------------------------------------- | ------------------------------------------------------------------- |
|
|
321
|
+
| iOS 26+ V2 | `text` has correct line order but no column spacing | ✅ Reconstructs column alignment from block X positions |
|
|
322
|
+
| iOS < 26 V2 | ✅ Column spacing already baked in by LineCluster | Optional — use `mode: 'clustered'` if you prefer block-based output |
|
|
323
|
+
| Android V2 | ✅ Column spacing already baked in by LineCluster | Optional — use `mode: 'clustered'` |
|
|
324
|
+
| V1 (either platform) | Lines present but no column spacing | ✅ Reconstructs alignment from block X positions |
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## OCR Accuracy
|
|
329
|
+
|
|
330
|
+
### Filter recommendation
|
|
331
|
+
|
|
332
|
+
The single biggest improvement for poor-quality scans is the `ocrOptimized` filter. It applies a **denoise → sharpen → monochrome** pipeline before OCR:
|
|
333
|
+
|
|
334
|
+
```typescript
|
|
335
|
+
await scanDocuments({
|
|
336
|
+
includeText: true,
|
|
337
|
+
filter: 'ocrOptimized',
|
|
338
|
+
textVersion: 2,
|
|
339
|
+
});
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
Use this whenever scans come from a phone camera rather than a dedicated flatbed scanner. It eliminates noise that would otherwise produce garbage characters in the OCR output.
|
|
343
|
+
|
|
344
|
+
### Confidence filtering
|
|
345
|
+
|
|
346
|
+
Low-confidence blocks often represent scan artefacts, smudged text, or regions where the engine guessed. Filter them during reconstruction:
|
|
347
|
+
|
|
348
|
+
```typescript
|
|
349
|
+
const receipt = reconstructText(result.blocks ?? [], {
|
|
350
|
+
mode: getReconstructMode(result.metadata!),
|
|
351
|
+
minConfidence: 0.4, // drop blocks the engine was less than 40% sure about
|
|
352
|
+
});
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
### Known accuracy limitations
|
|
356
|
+
|
|
357
|
+
| Scenario | Cause | Mitigation |
|
|
358
|
+
| ------------------------------------ | ---------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- |
|
|
359
|
+
| Unusual fonts / handwriting | OCR engine trained on standard print | Use `ocrOptimized` filter; V2 is more accurate than V1 for structured documents |
|
|
360
|
+
| iOS 26+ item/price column mispairing | `RecognizeDocumentsRequest` paragraph Y centres may differ slightly across columns | Adjust `rowGroupingFactor` in `reconstructText` |
|
|
361
|
+
| Non-Latin characters misread | Wrong language model active | iOS < 26: `automaticallyDetectsLanguage` is enabled on iOS 16+; Android: ML Kit auto-detects |
|
|
362
|
+
| Very small text dropped | Below `minimumTextHeight` (1% of image height) on iOS | Reduce `minimumTextHeight` in `OCRConfiguration.swift` if small legitimate text is lost |
|
|
363
|
+
|
|
364
|
+
---
|
|
365
|
+
|
|
366
|
+
## Technical Safeguards
|
|
367
|
+
|
|
368
|
+
- **Non-blocking**: All image processing and OCR run on background Swift Concurrency tasks (`Task(priority: .userInitiated)`) or Kotlin coroutines. The UI thread is never blocked.
|
|
369
|
+
- **Memory Efficient**: Original bitmaps are recycled immediately, and processed images are stored in temporary cache directories to prevent OOM errors.
|
|
370
|
+
- **Coordinate Parity**: Apple Vision uses a bottom-left origin. We mathematically normalize all bounding boxes to top-left origin (`y = 1 - originY - height`), ensuring overlay UIs work identically on both platforms.
|
|
371
|
+
- **Failsafe Normalization**: Safe division helpers return `0.0` instead of crashing when image dimensions are zero.
|
|
372
|
+
- **Noise Filtering** (iOS V1 & V2 < iOS 26): `minimumTextHeight = 0.01` discards Vision observations whose bounding box is smaller than 1% of the image height, eliminating ruled lines, watermarks, and scan artifacts from the OCR output.
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
## Requirements
|
|
377
|
+
|
|
378
|
+
- React Native 0.71+ (New Architecture recommended)
|
|
379
|
+
- iOS 13.0+
|
|
380
|
+
- Android API 21+
|
|
381
|
+
|
|
382
|
+
## License
|
|
383
|
+
|
|
384
|
+
MIT
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
buildscript {
|
|
2
|
+
// Helper function to get version from app's rootProject or fall back to library's gradle.properties
|
|
3
|
+
ext.getExtOrDefault = { name ->
|
|
4
|
+
return rootProject.ext.has(name) ? rootProject.ext.get(name) : project.properties['DocumentScanner_' + name]
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
repositories {
|
|
8
|
+
google()
|
|
9
|
+
mavenCentral()
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
dependencies {
|
|
13
|
+
classpath "com.android.tools.build:gradle:${getExtOrDefault('gradlePluginVersion')}"
|
|
14
|
+
// noinspection DifferentKotlinGradleVersion
|
|
15
|
+
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:${getExtOrDefault('kotlinVersion')}"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
apply plugin: 'com.android.library'
|
|
20
|
+
apply plugin: 'kotlin-android'
|
|
21
|
+
apply plugin: 'com.facebook.react'
|
|
22
|
+
|
|
23
|
+
// Helper function that returns an Integer for SDK version properties
|
|
24
|
+
def getExtOrIntegerDefault(name) {
|
|
25
|
+
return rootProject.ext.has(name) ? rootProject.ext.get(name) : (project.properties["DocumentScanner_" + name]).toInteger()
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
android {
|
|
29
|
+
namespace "com.documentscanner"
|
|
30
|
+
|
|
31
|
+
compileSdkVersion getExtOrIntegerDefault("compileSdkVersion")
|
|
32
|
+
|
|
33
|
+
defaultConfig {
|
|
34
|
+
minSdkVersion getExtOrIntegerDefault("minSdkVersion")
|
|
35
|
+
targetSdkVersion getExtOrIntegerDefault("targetSdkVersion")
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
buildFeatures {
|
|
39
|
+
buildConfig true
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
buildTypes {
|
|
43
|
+
release {
|
|
44
|
+
minifyEnabled false
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
lintOptions {
|
|
49
|
+
disable "GradleCompatible"
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
compileOptions {
|
|
53
|
+
sourceCompatibility JavaVersion.VERSION_1_8
|
|
54
|
+
targetCompatibility JavaVersion.VERSION_1_8
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
repositories {
|
|
59
|
+
google()
|
|
60
|
+
mavenCentral()
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def kotlin_version = getExtOrDefault("kotlinVersion")
|
|
64
|
+
|
|
65
|
+
dependencies {
|
|
66
|
+
implementation "com.facebook.react:react-android"
|
|
67
|
+
implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
|
|
68
|
+
implementation "com.google.android.gms:play-services-mlkit-document-scanner:${getExtOrDefault('mlkitScannerVersion')}"
|
|
69
|
+
implementation "com.google.android.gms:play-services-mlkit-text-recognition:${getExtOrDefault('mlkitTextRecognitionVersion')}"
|
|
70
|
+
implementation "androidx.exifinterface:exifinterface:${getExtOrDefault('exifInterfaceVersion')}"
|
|
71
|
+
implementation "org.jetbrains.kotlinx:kotlinx-coroutines-play-services:${getExtOrDefault('kotlinCoroutinesPlayServicesVersion')}"
|
|
72
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Library fallback values for SDK versions and dependencies
|
|
2
|
+
# These are used when the consuming app doesn't specify them in rootProject.ext
|
|
3
|
+
|
|
4
|
+
# Build Tools
|
|
5
|
+
DocumentScanner_gradlePluginVersion=8.2.1
|
|
6
|
+
DocumentScanner_kotlinVersion=1.9.22
|
|
7
|
+
|
|
8
|
+
# SDK Versions
|
|
9
|
+
DocumentScanner_compileSdkVersion=34
|
|
10
|
+
DocumentScanner_minSdkVersion=24
|
|
11
|
+
DocumentScanner_targetSdkVersion=34
|
|
12
|
+
|
|
13
|
+
# Dependencies
|
|
14
|
+
DocumentScanner_mlkitScannerVersion=16.0.0
|
|
15
|
+
DocumentScanner_mlkitTextRecognitionVersion=19.0.0
|
|
16
|
+
DocumentScanner_exifInterfaceVersion=1.3.7
|
|
17
|
+
DocumentScanner_kotlinCoroutinesPlayServicesVersion=1.8.0
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
## This file must *NOT* be checked into Version Control Systems,
|
|
2
|
+
# as it contains information specific to your local configuration.
|
|
3
|
+
#
|
|
4
|
+
# Location of the SDK. This is only used by Gradle.
|
|
5
|
+
# For customization when using a Version Control System, please read the
|
|
6
|
+
# header note.
|
|
7
|
+
#Wed Dec 24 10:07:33 CET 2025
|
|
8
|
+
sdk.dir=/Users/oshomo.oforomeh/Library/Android/sdk
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
|
2
|
+
package="com.documentscanner">
|
|
3
|
+
|
|
4
|
+
<!-- Camera permission is required for the document scanner -->
|
|
5
|
+
<uses-permission android:name="android.permission.CAMERA" />
|
|
6
|
+
<uses-permission android:name="android.permission.INTERNET" />
|
|
7
|
+
|
|
8
|
+
</manifest>
|