@hoshomoh/react-native-document-scanner 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/DocumentScanner.podspec +22 -0
  2. package/LICENSE +20 -0
  3. package/README.md +384 -0
  4. package/android/build.gradle +72 -0
  5. package/android/gradle.properties +17 -0
  6. package/android/local.properties +8 -0
  7. package/android/src/main/AndroidManifest.xml +8 -0
  8. package/android/src/main/java/com/documentscanner/DocumentScannerModule.kt +217 -0
  9. package/android/src/main/java/com/documentscanner/DocumentScannerPackage.kt +39 -0
  10. package/android/src/main/java/com/documentscanner/ImageProcessor.kt +325 -0
  11. package/android/src/main/java/com/documentscanner/Logger.kt +36 -0
  12. package/android/src/main/java/com/documentscanner/OCRConfiguration.kt +56 -0
  13. package/android/src/main/java/com/documentscanner/Options.kt +109 -0
  14. package/android/src/main/java/com/documentscanner/ScannerError.kt +18 -0
  15. package/android/src/main/java/com/documentscanner/TextRecognizer.kt +56 -0
  16. package/android/src/main/java/com/documentscanner/TextRecognizerV1.kt +68 -0
  17. package/android/src/main/java/com/documentscanner/TextRecognizerV2.kt +244 -0
  18. package/ios/DocumentScanner.h +5 -0
  19. package/ios/DocumentScanner.mm +113 -0
  20. package/ios/DocumentScannerManager.swift +148 -0
  21. package/ios/Errors.swift +33 -0
  22. package/ios/ImageProcessor.swift +78 -0
  23. package/ios/ImageUtil.swift +279 -0
  24. package/ios/Logger.swift +43 -0
  25. package/ios/OCRConfiguration.swift +60 -0
  26. package/ios/Options.swift +109 -0
  27. package/ios/ResponseUtil.swift +25 -0
  28. package/ios/ScanModels.swift +84 -0
  29. package/ios/TextRecognizer.swift +134 -0
  30. package/ios/TextRecognizerV1.swift +56 -0
  31. package/ios/TextRecognizerV2.swift +169 -0
  32. package/lib/module/NativeDocumentScanner.js +51 -0
  33. package/lib/module/NativeDocumentScanner.js.map +1 -0
  34. package/lib/module/index.js +40 -0
  35. package/lib/module/index.js.map +1 -0
  36. package/lib/module/package.json +1 -0
  37. package/lib/module/textReconstructor.js +147 -0
  38. package/lib/module/textReconstructor.js.map +1 -0
  39. package/lib/typescript/package.json +1 -0
  40. package/lib/typescript/src/NativeDocumentScanner.d.ts +191 -0
  41. package/lib/typescript/src/NativeDocumentScanner.d.ts.map +1 -0
  42. package/lib/typescript/src/index.d.ts +34 -0
  43. package/lib/typescript/src/index.d.ts.map +1 -0
  44. package/lib/typescript/src/textReconstructor.d.ts +60 -0
  45. package/lib/typescript/src/textReconstructor.d.ts.map +1 -0
  46. package/package.json +137 -0
  47. package/src/NativeDocumentScanner.ts +205 -0
  48. package/src/index.ts +61 -0
  49. package/src/textReconstructor.ts +212 -0
@@ -0,0 +1,205 @@
1
+ import { TurboModuleRegistry, type TurboModule } from 'react-native';
2
+
3
+ /**
4
+ * Type union of all available filter values.
5
+ * Required for React Native Codegen compatibility.
6
+ */
7
+ export type FilterType =
8
+ | 'color'
9
+ | 'grayscale'
10
+ | 'monochrome'
11
+ | 'denoise'
12
+ | 'sharpen'
13
+ | 'ocrOptimized';
14
+
15
+ /**
16
+ * Type union of all available format values.
17
+ * Required for React Native Codegen compatibility.
18
+ */
19
+ export type FormatType = 'jpg' | 'png';
20
+
21
+ /**
22
+ * Represents a discrete block of text recognized by the OCR engine.
23
+ * Useful for mapping text to specific regions on the image.
24
+ */
25
+ export interface TextBlock {
26
+ /** The text content within the block. */
27
+ text: string;
28
+ /**
29
+ * The normalized bounding box of the text.
30
+ * Coordinates (x, y, width, height) are in the range [0, 1].
31
+ * (0,0) is usually top-left.
32
+ */
33
+ frame: { x: number; y: number; width: number; height: number };
34
+ /**
35
+ * OCR confidence score (0.0 to 1.0).
36
+ * Higher values indicate more reliable recognition.
37
+ * Useful for LLM post-processing to weight or filter results.
38
+ */
39
+ confidence?: number;
40
+ }
41
+
42
+ /**
43
+ * Describes the OCR engine and configuration used to produce a ScanResult.
44
+ * Pass the parent `ScanResult` directly to `reconstructText` — it reads
45
+ * `metadata` internally to select the right reconstruction strategy.
46
+ */
47
+ export interface ScanMetadata {
48
+ /** Platform that generated this result. */
49
+ platform: 'ios' | 'android';
50
+ /** OCR engine version that was requested (1 = Raw, 2 = Heuristic). */
51
+ textVersion: 1 | 2;
52
+ /** Image filter applied before OCR. */
53
+ filter: FilterType;
54
+ /**
55
+ * The specific OCR engine used:
56
+ * - `"RecognizeDocumentsRequest"`: iOS 26+ native document understanding (V2).
57
+ * - `"VNRecognizeTextRequest"`: Apple Vision text request (V1 or V2 on iOS < 26).
58
+ * - `"MLKit"`: Android ML Kit Text Recognition (V1 or V2).
59
+ * - `"none"`: OCR was not performed (`includeText` was false).
60
+ */
61
+ ocrEngine:
62
+ | 'RecognizeDocumentsRequest'
63
+ | 'VNRecognizeTextRequest'
64
+ | 'MLKit'
65
+ | 'none';
66
+ }
67
+
68
+ /**
69
+ * The result of a single scanned page.
70
+ */
71
+ export interface ScanResult {
72
+ /** The local file URI of the scanned image (e.g., file:///...). */
73
+ uri?: string;
74
+ /** The Base64 encoded string of the image (if requested). */
75
+ base64?: string;
76
+ /** The full text extracted from the page, preserving layout. */
77
+ text?: string;
78
+ /** Array of structured text blocks with metadata. */
79
+ blocks?: TextBlock[];
80
+ /** Configuration and engine metadata for this result. */
81
+ metadata?: ScanMetadata;
82
+ }
83
+
84
+ /**
85
+ * Base configuration options shared by scan and process operations.
86
+ */
87
+ export interface BaseOptions {
88
+ /** Compression quality (0.0 to 1.0) for JPEG. Default is 1.0. */
89
+ quality?: number;
90
+ /** Output image format. Use the `Format` constant for type-safe values. Default is 'jpg'. */
91
+ format?: FormatType;
92
+ /**
93
+ * Post-processing filter to apply.
94
+ * - `color`: No filter (default).
95
+ * - `grayscale`: Desaturates the image.
96
+ * - `monochrome`: High-contrast black & white (best for OCR).
97
+ * - `denoise`: Reduces image noise (improves OCR on noisy photos).
98
+ * - `sharpen`: Enhances edge clarity (improves OCR on blurry text).
99
+ * - `ocrOptimized`: Full pipeline: denoise → sharpen → monochrome (best accuracy).
100
+ */
101
+ filter?: FilterType;
102
+ /** Whether to include the base64 string in the result. Default is false. */
103
+ includeBase64?: boolean;
104
+ /** Whether to perform OCR and include text/blocks. */
105
+ includeText?: boolean;
106
+ /**
107
+ * Version of the text recognizer to use.
108
+ * - 1: Raw output (standard Vision/ML Kit behavior).
109
+ * - 2: Heuristic enhanced (Adaptive Clustering for layout preservation). Default.
110
+ */
111
+ textVersion?: number;
112
+ }
113
+
114
+ /**
115
+ * Configuration options for the Document Scanner.
116
+ * Fields are listed explicitly (not via extends) for React Native Codegen compatibility —
117
+ * Codegen only generates struct fields declared directly on the interface.
118
+ */
119
+ export interface ScanOptions {
120
+ /** Maximum number of pages to scan. Default is unlimited (or hardware limit). */
121
+ maxPageCount?: number;
122
+ /** Compression quality (0.0 to 1.0) for JPEG. Default is 1.0. */
123
+ quality?: number;
124
+ /** Output image format. Use the `Format` constant for type-safe values. Default is 'jpg'. */
125
+ format?: FormatType;
126
+ /**
127
+ * Post-processing filter to apply.
128
+ * - `color`: No filter (default).
129
+ * - `grayscale`: Desaturates the image.
130
+ * - `monochrome`: High-contrast black & white (best for OCR).
131
+ * - `denoise`: Reduces image noise (improves OCR on noisy photos).
132
+ * - `sharpen`: Enhances edge clarity (improves OCR on blurry text).
133
+ * - `ocrOptimized`: Full pipeline: denoise → sharpen → monochrome (best accuracy).
134
+ */
135
+ filter?: FilterType;
136
+ /** Whether to include the base64 string in the result. Default is false. */
137
+ includeBase64?: boolean;
138
+ /** Whether to perform OCR and include text/blocks. */
139
+ includeText?: boolean;
140
+ /**
141
+ * Version of the text recognizer to use.
142
+ * - 1: Raw output (standard Vision/ML Kit behavior).
143
+ * - 2: Heuristic enhanced (Adaptive Clustering for layout preservation). Default.
144
+ */
145
+ textVersion?: number;
146
+ }
147
+
148
+ /**
149
+ * Configuration options for processing existing images.
150
+ * Fields are listed explicitly (not via extends) for React Native Codegen compatibility —
151
+ * Codegen only generates struct fields declared directly on the interface.
152
+ */
153
+ export interface ProcessOptions {
154
+ /**
155
+ * Array of image sources. Each can be:
156
+ * - A file URI (e.g., "file:///path/to/image.jpg")
157
+ * - A base64-encoded string (with or without data URI prefix)
158
+ */
159
+ images: string[];
160
+ /** Compression quality (0.0 to 1.0) for JPEG. Default is 1.0. */
161
+ quality?: number;
162
+ /** Output image format. Use the `Format` constant for type-safe values. Default is 'jpg'. */
163
+ format?: FormatType;
164
+ /**
165
+ * Post-processing filter to apply.
166
+ * - `color`: No filter (default).
167
+ * - `grayscale`: Desaturates the image.
168
+ * - `monochrome`: High-contrast black & white (best for OCR).
169
+ * - `denoise`: Reduces image noise (improves OCR on noisy photos).
170
+ * - `sharpen`: Enhances edge clarity (improves OCR on blurry text).
171
+ * - `ocrOptimized`: Full pipeline: denoise → sharpen → monochrome (best accuracy).
172
+ */
173
+ filter?: FilterType;
174
+ /** Whether to include the base64 string in the result. Default is false. */
175
+ includeBase64?: boolean;
176
+ /** Whether to perform OCR and include text/blocks. */
177
+ includeText?: boolean;
178
+ /**
179
+ * Version of the text recognizer to use.
180
+ * - 1: Raw output (standard Vision/ML Kit behavior).
181
+ * - 2: Heuristic enhanced (Adaptive Clustering for layout preservation). Default.
182
+ */
183
+ textVersion?: number;
184
+ }
185
+
186
+ /**
187
+ * TurboModule Specification for the Document Scanner.
188
+ */
189
+ export interface Spec extends TurboModule {
190
+ /**
191
+ * Opens the native document scanner UI.
192
+ * @param options Configuration options.
193
+ * @returns A Promise resolving to an array of ScanResults.
194
+ */
195
+ scanDocuments(options?: ScanOptions): Promise<ScanResult[]>;
196
+
197
+ /**
198
+ * Processes existing images without opening the camera UI.
199
+ * @param options Configuration including image sources.
200
+ * @returns A Promise resolving to an array of ScanResults.
201
+ */
202
+ processDocuments(options: ProcessOptions): Promise<ScanResult[]>;
203
+ }
204
+
205
+ export default TurboModuleRegistry.getEnforcing<Spec>('DocumentScanner');
package/src/index.ts ADDED
@@ -0,0 +1,61 @@
1
+ import DocumentScanner, {
2
+ type ScanOptions,
3
+ type ScanResult,
4
+ type ScanMetadata,
5
+ type TextBlock,
6
+ type ProcessOptions,
7
+ type FilterType,
8
+ type FormatType,
9
+ } from './NativeDocumentScanner';
10
+
11
+ /**
12
+ * Available image filters.
13
+ * Use these constants instead of raw strings for type safety.
14
+ */
15
+ export const Filter = {
16
+ /** No filter (original colors) */
17
+ COLOR: 'color',
18
+ /** Desaturated image */
19
+ GRAYSCALE: 'grayscale',
20
+ /** High-contrast black & white */
21
+ MONOCHROME: 'monochrome',
22
+ /** Noise reduction (for noisy photos) */
23
+ DENOISE: 'denoise',
24
+ /** Edge enhancement (for blurry text) */
25
+ SHARPEN: 'sharpen',
26
+ /** Full OCR pipeline: denoise → sharpen → monochrome */
27
+ OCR_OPTIMIZED: 'ocrOptimized',
28
+ } as const;
29
+
30
+ /**
31
+ * Available output formats.
32
+ */
33
+ export const Format = {
34
+ /** JPEG format (smaller file size) */
35
+ JPG: 'jpg',
36
+ /** PNG format (lossless) */
37
+ PNG: 'png',
38
+ } as const;
39
+
40
+ export function scanDocuments(options?: ScanOptions): Promise<ScanResult[]> {
41
+ return DocumentScanner.scanDocuments(options);
42
+ }
43
+
44
+ export function processDocuments(
45
+ options: ProcessOptions
46
+ ): Promise<ScanResult[]> {
47
+ return DocumentScanner.processDocuments(options);
48
+ }
49
+
50
+ export { reconstructText } from './textReconstructor';
51
+ export type { ReconstructOptions } from './textReconstructor';
52
+
53
+ export type {
54
+ ScanOptions,
55
+ ScanResult,
56
+ ScanMetadata,
57
+ TextBlock,
58
+ ProcessOptions,
59
+ FilterType,
60
+ FormatType,
61
+ };
@@ -0,0 +1,212 @@
1
+ import type { TextBlock, ScanMetadata } from './NativeDocumentScanner';
2
+
3
+ // Internal — not part of the public API.
4
+ type ReconstructMode = 'paragraphs' | 'clustered';
5
+
6
+ const MODE_FACTOR: Record<ReconstructMode, number> = {
7
+ paragraphs: 0.5,
8
+ clustered: 0.4,
9
+ };
10
+
11
+ export interface ReconstructOptions {
12
+ /**
13
+ * Width of the output in characters.
14
+ * Default: 56. Use 48 for narrow thermal receipts, 64+ for wide documents.
15
+ */
16
+ lineWidth?: number;
17
+ /**
18
+ * Discard blocks whose `confidence` is below this value before
19
+ * reconstruction. Useful when scan quality is poor and low-confidence
20
+ * blocks produce garbage characters that disrupt the output.
21
+ *
22
+ * Only applies to V1 on any platform. Has no effect on V2 paths (native
23
+ * text is returned directly) or on iOS 26+ RecognizeDocumentsRequest
24
+ * (paragraph confidence is not exposed by the API — blocks always pass).
25
+ *
26
+ * Suggested values: `0.3` (aggressive), `0.5` (moderate).
27
+ * Default: `undefined` (no filtering).
28
+ */
29
+ minConfidence?: number;
30
+ /**
31
+ * Fine-tune the Y-proximity threshold directly. The threshold is
32
+ * `rowGroupingFactor × medianLineHeight`. Increase if blocks on the same
33
+ * visual line are being split into separate rows. Decrease if adjacent
34
+ * lines are being merged.
35
+ *
36
+ * Only applies to the block-based reconstruction path (V1 and iOS 26+).
37
+ */
38
+ rowGroupingFactor?: number;
39
+ }
40
+
41
+ /**
42
+ * Reconstructs a visually-aligned plain-text document from a `ScanResult`.
43
+ *
44
+ * Automatically selects the right strategy based on the OCR engine recorded
45
+ * in `metadata` — no manual mode selection needed:
46
+ *
47
+ * | Engine | Strategy |
48
+ * |---------------------------------|-----------------------------------------|
49
+ * | V2 · MLKit or VNRecognizeText | Returns `text` directly — the native |
50
+ * | | clustering already produced column- |
51
+ * | | aligned output. |
52
+ * | iOS 26+ RecognizeDocumentsReq. | Spatially reconstructs from paragraph- |
53
+ * | | level blocks. |
54
+ * | V1 · either platform | Spatially reconstructs from line-level |
55
+ * | | blocks. |
56
+ *
57
+ * @param scanResult A `ScanResult` (or any object with `text`, `blocks`, `metadata`).
58
+ * @param options Optional output tuning.
59
+ * @returns A plain-text string with column-aligned rows.
60
+ *
61
+ * @example
62
+ * const results = await scanDocuments({ includeText: true });
63
+ * const text = reconstructText(results[0]);
64
+ */
65
+ export function reconstructText(
66
+ scanResult: { text?: string; blocks?: TextBlock[]; metadata?: ScanMetadata },
67
+ options: ReconstructOptions = {}
68
+ ): string {
69
+ const { metadata, blocks = [], text = '' } = scanResult;
70
+
71
+ // V2 via VNRecognizeTextRequest or MLKit: the native adaptive clustering
72
+ // already produced column-aligned text — return it directly.
73
+ // If text is absent (caller omitted it), fall back to block reconstruction
74
+ // using the line-level cluster blocks rather than returning empty.
75
+ if (
76
+ metadata?.textVersion === 2 &&
77
+ metadata.ocrEngine !== 'RecognizeDocumentsRequest'
78
+ ) {
79
+ if (text) {
80
+ return text.trimEnd();
81
+ }
82
+ return reconstructFromBlocks(blocks, 'clustered', options);
83
+ }
84
+
85
+ // iOS 26+ RecognizeDocumentsRequest returns paragraph-level blocks;
86
+ // V1 on any platform returns line-level blocks.
87
+ // Both paths use the spatial block reconstruction algorithm.
88
+ const mode: ReconstructMode =
89
+ metadata?.ocrEngine === 'RecognizeDocumentsRequest'
90
+ ? 'paragraphs'
91
+ : 'clustered';
92
+
93
+ return reconstructFromBlocks(blocks, mode, options);
94
+ }
95
+
96
+ // ─── Internal block reconstruction algorithm ─────────────────────────────────
97
+
98
+ function reconstructFromBlocks(
99
+ blocks: TextBlock[],
100
+ mode: ReconstructMode,
101
+ options: ReconstructOptions
102
+ ): string {
103
+ const { lineWidth = 56, minConfidence } = options;
104
+ const rowGroupingFactor = options.rowGroupingFactor ?? MODE_FACTOR[mode];
105
+
106
+ // Step 1: Filter by confidence when requested.
107
+ const filtered =
108
+ minConfidence !== undefined
109
+ ? blocks.filter(
110
+ (b) => b.confidence === undefined || b.confidence >= minConfidence
111
+ )
112
+ : blocks;
113
+
114
+ if (filtered.length === 0) {
115
+ return '';
116
+ }
117
+
118
+ // Step 2: Compute the median block height (= reference line height).
119
+ const heights = filtered.map((b) => b.frame.height).sort((a, b) => a - b);
120
+ const hMid = Math.floor(heights.length / 2);
121
+ const typicalHeight =
122
+ heights.length % 2 === 0
123
+ ? (heights[hMid - 1]! + heights[hMid]!) / 2
124
+ : heights[hMid]!;
125
+
126
+ const threshold =
127
+ rowGroupingFactor * (typicalHeight > 0 ? typicalHeight : 0.02);
128
+
129
+ // Step 3: Sort all blocks by midY ascending (top of page first).
130
+ const sorted = [...filtered].sort((a, b) => {
131
+ const midA = a.frame.y + a.frame.height / 2;
132
+ const midB = b.frame.y + b.frame.height / 2;
133
+ return midA - midB;
134
+ });
135
+
136
+ // Step 4: Greedy row grouping by midY proximity.
137
+ // Each row tracks a running median centerY so that adding blocks to one
138
+ // side of a long line doesn't pull the reference point away from the others.
139
+ interface Row {
140
+ blocks: TextBlock[];
141
+ midYs: number[];
142
+ medianMidY: number;
143
+ }
144
+
145
+ function computeMedian(values: number[]): number {
146
+ const s = [...values].sort((a, b) => a - b);
147
+ const m = Math.floor(s.length / 2);
148
+ return s.length % 2 === 0 ? (s[m - 1]! + s[m]!) / 2 : s[m]!;
149
+ }
150
+
151
+ const rows: Row[] = [];
152
+
153
+ for (const block of sorted) {
154
+ const blockMidY = block.frame.y + block.frame.height / 2;
155
+
156
+ let bestRow: Row | null = null;
157
+ let bestDist = Infinity;
158
+
159
+ for (const row of rows) {
160
+ const dist = Math.abs(row.medianMidY - blockMidY);
161
+ if (dist < threshold && dist < bestDist) {
162
+ bestDist = dist;
163
+ bestRow = row;
164
+ }
165
+ }
166
+
167
+ if (bestRow !== null) {
168
+ bestRow.blocks.push(block);
169
+ bestRow.midYs.push(blockMidY);
170
+ bestRow.medianMidY = computeMedian(bestRow.midYs);
171
+ } else {
172
+ rows.push({
173
+ blocks: [block],
174
+ midYs: [blockMidY],
175
+ medianMidY: blockMidY,
176
+ });
177
+ }
178
+ }
179
+
180
+ // Step 5: Sort rows top-to-bottom.
181
+ rows.sort((a, b) => a.medianMidY - b.medianMidY);
182
+
183
+ // Step 6: Render each row into a fixed-width character buffer.
184
+ const lines: string[] = [];
185
+
186
+ for (const row of rows) {
187
+ // Sort blocks left-to-right within the row.
188
+ const rowBlocks = [...row.blocks].sort((a, b) => a.frame.x - b.frame.x);
189
+
190
+ const buf = new Array<string>(lineWidth).fill(' ');
191
+ let cursor = 0;
192
+
193
+ for (const block of rowBlocks) {
194
+ // Map normalized X to a character column, but never retreat behind cursor.
195
+ const col = Math.max(Math.round(block.frame.x * lineWidth), cursor);
196
+
197
+ for (let i = 0; i < block.text.length; i++) {
198
+ const pos = col + i;
199
+ if (pos < lineWidth) {
200
+ buf[pos] = block.text[i]!;
201
+ }
202
+ }
203
+
204
+ // Advance cursor past this block + a minimum single-space gap.
205
+ cursor = col + block.text.length + 1;
206
+ }
207
+
208
+ lines.push(buf.join('').trimEnd());
209
+ }
210
+
211
+ return lines.join('\n');
212
+ }