@hoshomoh/react-native-document-scanner 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/DocumentScanner.podspec +22 -0
  2. package/LICENSE +20 -0
  3. package/README.md +384 -0
  4. package/android/build.gradle +72 -0
  5. package/android/gradle.properties +17 -0
  6. package/android/local.properties +8 -0
  7. package/android/src/main/AndroidManifest.xml +8 -0
  8. package/android/src/main/java/com/documentscanner/DocumentScannerModule.kt +217 -0
  9. package/android/src/main/java/com/documentscanner/DocumentScannerPackage.kt +39 -0
  10. package/android/src/main/java/com/documentscanner/ImageProcessor.kt +325 -0
  11. package/android/src/main/java/com/documentscanner/Logger.kt +36 -0
  12. package/android/src/main/java/com/documentscanner/OCRConfiguration.kt +56 -0
  13. package/android/src/main/java/com/documentscanner/Options.kt +109 -0
  14. package/android/src/main/java/com/documentscanner/ScannerError.kt +18 -0
  15. package/android/src/main/java/com/documentscanner/TextRecognizer.kt +56 -0
  16. package/android/src/main/java/com/documentscanner/TextRecognizerV1.kt +68 -0
  17. package/android/src/main/java/com/documentscanner/TextRecognizerV2.kt +244 -0
  18. package/ios/DocumentScanner.h +5 -0
  19. package/ios/DocumentScanner.mm +113 -0
  20. package/ios/DocumentScannerManager.swift +148 -0
  21. package/ios/Errors.swift +33 -0
  22. package/ios/ImageProcessor.swift +78 -0
  23. package/ios/ImageUtil.swift +279 -0
  24. package/ios/Logger.swift +43 -0
  25. package/ios/OCRConfiguration.swift +60 -0
  26. package/ios/Options.swift +109 -0
  27. package/ios/ResponseUtil.swift +25 -0
  28. package/ios/ScanModels.swift +84 -0
  29. package/ios/TextRecognizer.swift +134 -0
  30. package/ios/TextRecognizerV1.swift +56 -0
  31. package/ios/TextRecognizerV2.swift +169 -0
  32. package/lib/module/NativeDocumentScanner.js +51 -0
  33. package/lib/module/NativeDocumentScanner.js.map +1 -0
  34. package/lib/module/index.js +40 -0
  35. package/lib/module/index.js.map +1 -0
  36. package/lib/module/package.json +1 -0
  37. package/lib/module/textReconstructor.js +147 -0
  38. package/lib/module/textReconstructor.js.map +1 -0
  39. package/lib/typescript/package.json +1 -0
  40. package/lib/typescript/src/NativeDocumentScanner.d.ts +191 -0
  41. package/lib/typescript/src/NativeDocumentScanner.d.ts.map +1 -0
  42. package/lib/typescript/src/index.d.ts +34 -0
  43. package/lib/typescript/src/index.d.ts.map +1 -0
  44. package/lib/typescript/src/textReconstructor.d.ts +60 -0
  45. package/lib/typescript/src/textReconstructor.d.ts.map +1 -0
  46. package/package.json +137 -0
  47. package/src/NativeDocumentScanner.ts +205 -0
  48. package/src/index.ts +61 -0
  49. package/src/textReconstructor.ts +212 -0
@@ -0,0 +1,109 @@
1
+ import Foundation
2
+ import UIKit
3
+
4
+ /**
5
+ Base options shared by both Scan and Process operations.
6
+ Contains configuration for image output and OCR processing.
7
+ */
8
+ public class BaseOptions {
9
+ public let quality: CGFloat
10
+ public let format: String
11
+ public let filter: String
12
+ public let includeBase64: Bool
13
+ public let includeText: Bool
14
+ public let textVersion: Int
15
+
16
+ init(quality: CGFloat, format: String, filter: String, includeBase64: Bool, includeText: Bool, textVersion: Int) {
17
+ self.quality = quality
18
+ self.format = format
19
+ self.filter = filter
20
+ self.includeBase64 = includeBase64
21
+ self.includeText = includeText
22
+ self.textVersion = textVersion
23
+ }
24
+
25
+ /**
26
+ Reads an integer option from the bridge dictionary.
27
+ Covers NSNumber, Int, and Double — all three bridging representations that can appear
28
+ depending on whether the call comes through the old bridge or JSI (new arch).
29
+ */
30
+ static func intOption(from dictionary: [String: Any]?, key: String, fallback: Int) -> Int {
31
+ guard let raw = dictionary?[key] else { return fallback }
32
+ if let n = raw as? NSNumber { return n.intValue }
33
+ if let i = raw as? Int { return i }
34
+ if let d = raw as? Double { return Int(d) }
35
+ return fallback
36
+ }
37
+
38
+ /**
39
+ Convenience initializer to parse common options from a dictionary.
40
+ - Parameters:
41
+ - dictionary: Raw options dictionary.
42
+ - defaultIncludeText: Default value for includeText (Scan defaults to false, Process to true).
43
+ */
44
+ init(from dictionary: [String: Any]?, defaultIncludeText: Bool) {
45
+ /* Quality: Clamp [0.1, 1.0] */
46
+ let q = dictionary?["quality"] as? CGFloat ?? 1.0
47
+ self.quality = max(0.1, min(1.0, q))
48
+
49
+ /* Format: whitelist [jpg, png] */
50
+ let f = dictionary?["format"] as? String ?? "jpg"
51
+ self.format = (f == "png") ? "png" : "jpg"
52
+
53
+ /* Filter: whitelist supported types */
54
+ let filterInput = dictionary?["filter"] as? String ?? "color"
55
+ let validFilters = ["color", "grayscale", "monochrome", "denoise", "sharpen", "ocrOptimized"]
56
+ self.filter = validFilters.contains(filterInput) ? filterInput : "color"
57
+
58
+ self.includeBase64 = dictionary?["includeBase64"] as? Bool ?? false
59
+ self.includeText = dictionary?["includeText"] as? Bool ?? defaultIncludeText
60
+
61
+ /* Text Version: allow [1, 2]. */
62
+ let rawVersion = BaseOptions.intOption(from: dictionary, key: "textVersion", fallback: 2)
63
+ self.textVersion = (rawVersion == 1) ? 1 : 2
64
+ }
65
+ }
66
+
67
+ /**
68
+ Strongly-typed representation of scan options.
69
+ Parses the raw dictionary from React Native and provides defaults.
70
+ */
71
+ public class ScanOptions: BaseOptions {
72
+ public let maxPageCount: Int
73
+
74
+ /**
75
+ Initializes ScanOptions from a raw dictionary.
76
+ - Parameters:
77
+ - dictionary: The options dictionary from React Native.
78
+ - fallbackPageCount: Default page count if not specified.
79
+ */
80
+ public init(from dictionary: [String: Any]?, fallbackPageCount: Int) {
81
+ /* Max Page Count: Clamp [0, 100]. 0 = unlimited. */
82
+ let rawMax = BaseOptions.intOption(from: dictionary, key: "maxPageCount", fallback: fallbackPageCount)
83
+ self.maxPageCount = max(0, min(100, rawMax))
84
+
85
+ super.init(from: dictionary, defaultIncludeText: false)
86
+ }
87
+ }
88
+
89
+ /**
90
+ Strongly-typed representation of process options.
91
+ Parses the raw dictionary from React Native for processDocuments.
92
+ */
93
+ public class ProcessOptions: BaseOptions {
94
+ public let images: [String]
95
+
96
+ /**
97
+ Initializes ProcessOptions from a raw dictionary.
98
+ - Parameter dictionary: The options dictionary from React Native.
99
+ - Returns: nil if 'images' array is missing.
100
+ */
101
+ public init?(from dictionary: [String: Any]?) {
102
+ guard let dict = dictionary,
103
+ let images = dict["images"] as? [String] else {
104
+ return nil
105
+ }
106
+ self.images = images
107
+ super.init(from: dictionary, defaultIncludeText: true)
108
+ }
109
+ }
@@ -0,0 +1,25 @@
1
+ import Foundation
2
+ import UIKit
3
+
4
+ /** Utility class for constructing the response objects for React Native. */
5
+ public class ResponseUtil {
6
+
7
+ /**
8
+ Constructs the ScanResult struct.
9
+ - Parameters:
10
+ - uri: The local file path.
11
+ - text: Optional OCR text.
12
+ - blocks: Optional OCR blocks.
13
+ - base64: Optional Base64 string.
14
+ - Returns: A `ScanResult` struct.
15
+ */
16
+ public static func buildResult(uri: String?, base64: String?, text: String?, blocks: [TextBlock]?, metadata: ScanMetadata) -> ScanResult {
17
+ return ScanResult(
18
+ uri: uri,
19
+ base64: base64,
20
+ text: text,
21
+ blocks: blocks,
22
+ metadata: metadata
23
+ )
24
+ }
25
+ }
@@ -0,0 +1,84 @@
1
+ import Foundation
2
+
3
+ /** Describes which OCR engine and configuration produced a ScanResult. */
4
+ public struct ScanMetadata: Encodable {
5
+ /// Platform identifier. Always "ios".
6
+ public let platform: String
7
+ /// OCR version requested (1 = Raw, 2 = Heuristic / RecognizeDocuments on iOS 26+).
8
+ public let textVersion: Int
9
+ /// Image filter that was applied to the image before OCR.
10
+ public let filter: String
11
+ /// The specific OCR engine used, or "none" if OCR was not requested.
12
+ /// - "RecognizeDocumentsRequest": iOS 26+ native document understanding (V2).
13
+ /// - "VNRecognizeTextRequest": Vision framework text request (V1 or V2 on iOS < 26).
14
+ /// - "none": OCR was not performed (includeText was false).
15
+ public let ocrEngine: String
16
+ }
17
+
18
+ /** Represents the geometric bounds of a text block in normalized coordinates (0.0 - 1.0). */
19
+ public struct Frame: Encodable {
20
+ /// Horizontal position of the top-left corner.
21
+ public let x: Double
22
+ /// Vertical position of the top-left corner.
23
+ public let y: Double
24
+ /// Width of the bounding box.
25
+ public let width: Double
26
+ /// Height of the bounding box.
27
+ public let height: Double
28
+ }
29
+
30
+ /** Represents a recognized block of text with its position and confidence level. */
31
+ public struct TextBlock: Encodable {
32
+ /// The recognized text string.
33
+ public let text: String
34
+ /// The bounding box of the text.
35
+ public let frame: Frame
36
+ /// The confidence level of the recognition (0.0 - 1.0).
37
+ public let confidence: Double?
38
+ }
39
+
40
+ /** Represents the final result of a scanned page. */
41
+ public struct ScanResult: Encodable {
42
+ public let uri: String?
43
+ public let base64: String?
44
+ public let text: String?
45
+ public let blocks: [TextBlock]?
46
+ public let metadata: ScanMetadata
47
+
48
+ /** Converts the struct to a Dictionary for React Native bridge. */
49
+ public var dictionary: [String: Any] {
50
+ var dict: [String: Any] = [:]
51
+
52
+ if let uri = uri { dict["uri"] = uri }
53
+ if let base64 = base64 { dict["base64"] = base64 }
54
+ if let text = text { dict["text"] = text }
55
+
56
+ if let blocks = blocks {
57
+ /* Manually map blocks to ensure correct structure. */
58
+ dict["blocks"] = blocks.map { block in
59
+ var blockDict: [String: Any] = [
60
+ "text": block.text,
61
+ "frame": [
62
+ "x": block.frame.x,
63
+ "y": block.frame.y,
64
+ "width": block.frame.width,
65
+ "height": block.frame.height
66
+ ]
67
+ ]
68
+ if let confidence = block.confidence {
69
+ blockDict["confidence"] = confidence
70
+ }
71
+ return blockDict
72
+ }
73
+ }
74
+
75
+ dict["metadata"] = [
76
+ "platform": metadata.platform,
77
+ "textVersion": metadata.textVersion,
78
+ "filter": metadata.filter,
79
+ "ocrEngine": metadata.ocrEngine
80
+ ]
81
+
82
+ return dict
83
+ }
84
+ }
@@ -0,0 +1,134 @@
1
+ import Foundation
2
+ import Vision
3
+ import UIKit
4
+
5
+ /**
6
+ A robust utility class for performing Optical Character Recognition (OCR)
7
+ using Apple's Vision framework.
8
+
9
+ Acts as a Facade delegating to versioned implementations:
10
+ - TextRecognizerV1 (Raw)
11
+ - TextRecognizerV2 (Heuristic, or native document understanding on iOS 26+)
12
+ */
13
+ @available(iOS 13.0, *)
14
+ public class TextRecognizer {
15
+
16
+ /**
17
+ Extracts text from an image using Apple's Vision Framework.
18
+
19
+ - Parameter image: `UIImage` object to process.
20
+ - Parameter version: OCR engine version (1 = Raw, 2 = Heuristic).
21
+ - Returns: A tuple containing the structured text and raw blocks.
22
+ */
23
+ public static func recognizeText(from image: UIImage, version: Int = 2) async -> (text: String, blocks: [TextBlock])? {
24
+
25
+ guard let cgImage = image.cgImage else {
26
+ Logger.warn("Could not retrieve CGImage from input.")
27
+ return nil
28
+ }
29
+
30
+ /* iOS 26+: Use RecognizeDocumentsRequest for V2 — native document structure, no heuristics needed */
31
+ if version == 2, #available(iOS 26.0, *) {
32
+ return await recognizeWithDocumentRequest(cgImage: cgImage)
33
+ }
34
+
35
+ /* Configure the Vision Request */
36
+ let request = VNRecognizeTextRequest()
37
+ request.recognitionLevel = .accurate
38
+
39
+ /*
40
+ V1 (Raw) uses standard language correction for general text.
41
+ V2 (Heuristic) disables it to preserve document layout and prevent over-merging.
42
+ */
43
+ request.usesLanguageCorrection = (version == 1)
44
+
45
+ /* Filter out noise and tiny text artifacts */
46
+ request.minimumTextHeight = 0.01
47
+
48
+ /* Enable automatic language detection for multilingual documents (iOS 16+) */
49
+ if #available(iOS 16.0, *) {
50
+ request.automaticallyDetectsLanguage = true
51
+ }
52
+
53
+ let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
54
+
55
+ do {
56
+ /* Perform text recognition — blocking CPU-bound call, safe on background task */
57
+ try handler.perform([request])
58
+
59
+ guard let observations = request.results else {
60
+ Logger.info("No text found in image.")
61
+ return nil
62
+ }
63
+
64
+ if version == 1 {
65
+ return TextRecognizerV1.recognize(observations)
66
+ } else {
67
+ return TextRecognizerV2.recognize(observations)
68
+ }
69
+
70
+ } catch {
71
+ Logger.error("Text recognition request failed: \(error.localizedDescription)")
72
+ return nil
73
+ }
74
+ }
75
+
76
+ /**
77
+ Returns the name of the OCR engine that will be used for the given version on the current OS.
78
+ Used by ImageProcessor to populate ScanResult metadata.
79
+ */
80
+ public static func engineName(for version: Int) -> String {
81
+ if version == 2, #available(iOS 26.0, *) {
82
+ return "RecognizeDocumentsRequest"
83
+ }
84
+ return "VNRecognizeTextRequest"
85
+ }
86
+
87
+ /**
88
+ iOS 26+ fast path using RecognizeDocumentsRequest.
89
+ Returns native document structure (paragraphs) mapped to the same
90
+ (text, blocks) format used by the heuristic path, so callers see no difference.
91
+ One block per paragraph — for structured documents each paragraph is typically one visual line.
92
+ */
93
+ @available(iOS 26.0, *)
94
+ private static func recognizeWithDocumentRequest(cgImage: CGImage) async -> (text: String, blocks: [TextBlock])? {
95
+ do {
96
+ let request = RecognizeDocumentsRequest()
97
+ let observations = try await request.perform(on: cgImage)
98
+
99
+ guard let document = observations.first?.document else {
100
+ Logger.info("No document structure found in image.")
101
+ return nil
102
+ }
103
+
104
+ var lines: [String] = []
105
+ var blocks: [TextBlock] = []
106
+
107
+ for paragraph in document.paragraphs {
108
+ let paragraphText = paragraph.transcript
109
+ lines.append(paragraphText)
110
+
111
+ /* Map bounding region to normalised top-left origin coordinates.
112
+ boundingRegion is NormalizedRegion (Contour) → .boundingBox gives NormalizedRect → .cgRect gives CGRect.
113
+ Vision uses bottom-left origin — convert to top-left: 1 - y - height */
114
+ let box = paragraph.boundingRegion.boundingBox.cgRect
115
+ let androidStyleY = 1.0 - box.origin.y - box.size.height
116
+ let frame = Frame(
117
+ x: Double(box.origin.x),
118
+ y: Double(androidStyleY),
119
+ width: Double(box.size.width),
120
+ height: Double(box.size.height)
121
+ )
122
+
123
+ blocks.append(TextBlock(text: paragraphText, frame: frame, confidence: nil))
124
+ }
125
+
126
+ let text = lines.isEmpty ? "" : lines.joined(separator: "\n") + "\n"
127
+ return (text: text, blocks: blocks)
128
+
129
+ } catch {
130
+ Logger.error("RecognizeDocumentsRequest failed: \(error.localizedDescription)")
131
+ return nil
132
+ }
133
+ }
134
+ }
@@ -0,0 +1,56 @@
1
+ import Foundation
2
+ import Vision
3
+
4
+ /**
5
+ Version 1: Raw Output (Standard Vision Behavior)
6
+ */
7
+ @available(iOS 13.0, *)
8
+ public class TextRecognizerV1 {
9
+
10
+ /**
11
+ Performs raw text recognition from Vision observations.
12
+ - Parameter observations: Raw results from VNRecognizeTextRequest.
13
+ - Returns: Concatenated text and structured blocks.
14
+ */
15
+ public static func recognize(_ observations: [VNRecognizedTextObservation]) -> (text: String, blocks: [TextBlock]) {
16
+ var fullText = ""
17
+
18
+ var blocks: [TextBlock] = []
19
+
20
+ for obs in observations {
21
+ guard let candidate = obs.topCandidates(1).first else { continue }
22
+
23
+ let box = obs.boundingBox
24
+
25
+ /*
26
+ Vision uses bottom-left origin.
27
+ Convert to Android-style top-left origin: 1.0 - y - height
28
+ */
29
+ let androidStyleY = 1.0 - box.origin.y - box.size.height
30
+
31
+ let frame = Frame(
32
+ x: box.origin.x,
33
+ y: androidStyleY,
34
+ width: box.size.width,
35
+ height: box.size.height
36
+ )
37
+
38
+ blocks.append(TextBlock(
39
+ text: candidate.string,
40
+ frame: frame,
41
+ confidence: Double(candidate.confidence)
42
+ ))
43
+ }
44
+
45
+ /*
46
+ Sort top-to-bottom using the converted Y (top-left origin).
47
+ Smaller Y values are higher on the page.
48
+ */
49
+ let sortedBlocks = blocks.sorted { $0.frame.y < $1.frame.y }
50
+
51
+ /* Concatenate text based on reading order sort */
52
+ fullText = sortedBlocks.map { $0.text }.joined(separator: "\n")
53
+
54
+ return (text: fullText, blocks: sortedBlocks)
55
+ }
56
+ }
@@ -0,0 +1,169 @@
1
+ import Foundation
2
+ import Vision
3
+ import UIKit
4
+
5
+ /**
6
+ Version 2: Heuristic Enhanced (Line Clustering for Layout Preservation)
7
+ */
8
+ @available(iOS 13.0, *)
9
+ public class TextRecognizerV2 {
10
+
11
+ public static func recognize(_ observations: [VNRecognizedTextObservation]) -> (text: String, blocks: [TextBlock]) {
12
+
13
+ /* 1. LineCluster Strategy */
14
+ struct LineCluster {
15
+ var observations: [VNRecognizedTextObservation]
16
+ var unionBoundingBox: CGRect
17
+ var heights: [CGFloat]
18
+ var centerYs: [CGFloat]
19
+
20
+ var medianHeight: CGFloat {
21
+ let sorted = heights.sorted()
22
+ if sorted.isEmpty { return 0 }
23
+ let mid = sorted.count / 2
24
+ return sorted.count % 2 == 0
25
+ ? (sorted[mid - 1] + sorted[mid]) / 2.0
26
+ : sorted[mid]
27
+ }
28
+
29
+ var medianCenterY: CGFloat {
30
+ let sorted = centerYs.sorted()
31
+ if sorted.isEmpty { return 0 }
32
+ let mid = sorted.count / 2
33
+ return sorted.count % 2 == 0
34
+ ? (sorted[mid - 1] + sorted[mid]) / 2.0
35
+ : sorted[mid]
36
+ }
37
+ }
38
+
39
+ /*
40
+ Sort observations top-to-bottom for clustering.
41
+ In RAW Vision space, higher on page means BIGGER Y.
42
+ */
43
+ let sortedObservations = observations.sorted { $0.boundingBox.midY > $1.boundingBox.midY }
44
+ var clusters: [LineCluster] = []
45
+
46
+ for obs in sortedObservations {
47
+ let obsBox = obs.boundingBox
48
+ let obsHeight = obsBox.height
49
+ let obsCenterY = obsBox.midY
50
+
51
+ var bestClusterIndex: Int? = nil
52
+ var bestOverlapRatio: CGFloat = 0.0
53
+ var bestCenterDistance: CGFloat = .greatestFiniteMagnitude
54
+
55
+ for (index, cluster) in clusters.enumerated() {
56
+ let clusterBox = cluster.unionBoundingBox
57
+
58
+ /* Heuristic: Height Compatibility — use median height, not union bbox height */
59
+ let minH = min(cluster.medianHeight, obsHeight)
60
+ let maxH = max(cluster.medianHeight, obsHeight)
61
+ if (minH / maxH) < OCRConfiguration.heightCompatibilityThreshold { continue }
62
+
63
+ /* Heuristic: Overlap & Centerline — use median centerY, not union bbox midY */
64
+ let intersection = clusterBox.intersection(obsBox)
65
+ let overlapHeight = max(0, intersection.height)
66
+ let overlapRatio = overlapHeight / minH
67
+
68
+ let centerDistance = abs(cluster.medianCenterY - obsCenterY)
69
+ let typicalLineHeight = max(cluster.medianHeight, obsHeight)
70
+
71
+ let isOverlapGood = overlapRatio >= OCRConfiguration.overlapRatioThreshold
72
+ let isCenterClose = centerDistance <= (OCRConfiguration.centerlineDistanceFactor * typicalLineHeight)
73
+
74
+ if (isOverlapGood || isCenterClose) {
75
+ /* Heuristic: Adaptive Cluster Growth Constraint */
76
+ let intersectX = max(0, min(clusterBox.maxX, obsBox.maxX) - max(clusterBox.minX, obsBox.minX))
77
+ let isStacked = intersectX > 0
78
+
79
+ let growthLimit = isStacked ? OCRConfiguration.stackedGrowthLimit : OCRConfiguration.skewedGrowthLimit
80
+
81
+ let newUnion = clusterBox.union(obsBox)
82
+ if newUnion.height <= (CGFloat(growthLimit) * typicalLineHeight) {
83
+ /* Score this cluster */
84
+ if overlapRatio > bestOverlapRatio {
85
+ bestOverlapRatio = overlapRatio
86
+ bestCenterDistance = centerDistance
87
+ bestClusterIndex = index
88
+ } else if abs(overlapRatio - bestOverlapRatio) < 0.01 && centerDistance < bestCenterDistance {
89
+ bestCenterDistance = centerDistance
90
+ bestClusterIndex = index
91
+ }
92
+ }
93
+ }
94
+ }
95
+
96
+ if let idx = bestClusterIndex {
97
+ clusters[idx].observations.append(obs)
98
+ clusters[idx].unionBoundingBox = clusters[idx].unionBoundingBox.union(obsBox)
99
+ clusters[idx].heights.append(obsHeight)
100
+ clusters[idx].centerYs.append(obsCenterY)
101
+ } else {
102
+ clusters.append(LineCluster(
103
+ observations: [obs],
104
+ unionBoundingBox: obsBox,
105
+ heights: [obsHeight],
106
+ centerYs: [obsCenterY]
107
+ ))
108
+ }
109
+ }
110
+
111
+ /*
112
+ Sort clusters top-to-bottom for final output.
113
+ In RAW Vision space, higher on page means BIGGER Y.
114
+ */
115
+ clusters.sort { $0.unionBoundingBox.midY > $1.unionBoundingBox.midY }
116
+
117
+ /* 2. Column Reconstruction (Adaptive Spacing) + Cluster-Based Blocks */
118
+ var structuredText = ""
119
+ var clusterBlocks: [TextBlock] = []
120
+
121
+ for cluster in clusters {
122
+ /* Sort line elements left-to-right */
123
+ let lineObs = cluster.observations.sorted { $0.boundingBox.origin.x < $1.boundingBox.origin.x }
124
+ let medianH = cluster.medianHeight
125
+
126
+ var lineString = ""
127
+ var lastXEnd: CGFloat = 0.0
128
+
129
+ for (index, obs) in lineObs.enumerated() {
130
+ guard let candidate = obs.topCandidates(1).first else { continue }
131
+ let xStart = obs.boundingBox.origin.x
132
+
133
+ if index > 0 {
134
+ let gap = xStart - lastXEnd
135
+ /* Spacing Heuristic */
136
+ if gap > (medianH * CGFloat(OCRConfiguration.adaptiveSpacingFactor)) {
137
+ let spaceWidth = medianH * CGFloat(OCRConfiguration.spaceWidthFactor)
138
+ let spaces = max(1, Int(gap / spaceWidth))
139
+ lineString += String(repeating: " ", count: min(spaces, OCRConfiguration.maxSpaces))
140
+ } else {
141
+ lineString += " "
142
+ }
143
+ }
144
+
145
+ lineString += candidate.string
146
+ lastXEnd = xStart + obs.boundingBox.width
147
+ }
148
+
149
+ structuredText += lineString + "\n"
150
+
151
+ /* Build one block per cluster (line-level, aligned with text output) */
152
+ let unionBox = cluster.unionBoundingBox
153
+ let androidStyleY = 1.0 - unionBox.origin.y - unionBox.size.height
154
+ let frame = Frame(
155
+ x: Double(unionBox.origin.x),
156
+ y: Double(androidStyleY),
157
+ width: Double(unionBox.size.width),
158
+ height: Double(unionBox.size.height)
159
+ )
160
+ let confidences = cluster.observations.compactMap { obs -> Double? in
161
+ obs.topCandidates(1).first.map { Double($0.confidence) }
162
+ }
163
+ let avgConfidence = confidences.isEmpty ? nil : confidences.reduce(0.0, +) / Double(confidences.count)
164
+ clusterBlocks.append(TextBlock(text: lineString, frame: frame, confidence: avgConfidence))
165
+ }
166
+
167
+ return (text: structuredText, blocks: clusterBlocks)
168
+ }
169
+ }
@@ -0,0 +1,51 @@
1
+ "use strict";
2
+
3
+ import { TurboModuleRegistry } from 'react-native';
4
+
5
+ /**
6
+ * Type union of all available filter values.
7
+ * Required for React Native Codegen compatibility.
8
+ */
9
+
10
+ /**
11
+ * Type union of all available format values.
12
+ * Required for React Native Codegen compatibility.
13
+ */
14
+
15
+ /**
16
+ * Represents a discrete block of text recognized by the OCR engine.
17
+ * Useful for mapping text to specific regions on the image.
18
+ */
19
+
20
+ /**
21
+ * Describes the OCR engine and configuration used to produce a ScanResult.
22
+ * Pass the parent `ScanResult` directly to `reconstructText` — it reads
23
+ * `metadata` internally to select the right reconstruction strategy.
24
+ */
25
+
26
+ /**
27
+ * The result of a single scanned page.
28
+ */
29
+
30
+ /**
31
+ * Base configuration options shared by scan and process operations.
32
+ */
33
+
34
+ /**
35
+ * Configuration options for the Document Scanner.
36
+ * Fields are listed explicitly (not via extends) for React Native Codegen compatibility —
37
+ * Codegen only generates struct fields declared directly on the interface.
38
+ */
39
+
40
+ /**
41
+ * Configuration options for processing existing images.
42
+ * Fields are listed explicitly (not via extends) for React Native Codegen compatibility —
43
+ * Codegen only generates struct fields declared directly on the interface.
44
+ */
45
+
46
+ /**
47
+ * TurboModule Specification for the Document Scanner.
48
+ */
49
+
50
+ export default TurboModuleRegistry.getEnforcing('DocumentScanner');
51
+ //# sourceMappingURL=NativeDocumentScanner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"names":["TurboModuleRegistry","getEnforcing"],"sourceRoot":"../../src","sources":["NativeDocumentScanner.ts"],"mappings":";;AAAA,SAASA,mBAAmB,QAA0B,cAAc;;AAEpE;AACA;AACA;AACA;;AASA;AACA;AACA;AACA;;AAGA;AACA;AACA;AACA;;AAkBA;AACA;AACA;AACA;AACA;;AAsBA;AACA;AACA;;AAcA;AACA;AACA;;AA4BA;AACA;AACA;AACA;AACA;;AA8BA;AACA;AACA;AACA;AACA;;AAkCA;AACA;AACA;;AAiBA,eAAeA,mBAAmB,CAACC,YAAY,CAAO,iBAAiB,CAAC","ignoreList":[]}
@@ -0,0 +1,40 @@
1
+ "use strict";
2
+
3
+ import DocumentScanner from "./NativeDocumentScanner.js";
4
+
5
+ /**
6
+ * Available image filters.
7
+ * Use these constants instead of raw strings for type safety.
8
+ */
9
+ export const Filter = {
10
+ /** No filter (original colors) */
11
+ COLOR: 'color',
12
+ /** Desaturated image */
13
+ GRAYSCALE: 'grayscale',
14
+ /** High-contrast black & white */
15
+ MONOCHROME: 'monochrome',
16
+ /** Noise reduction (for noisy photos) */
17
+ DENOISE: 'denoise',
18
+ /** Edge enhancement (for blurry text) */
19
+ SHARPEN: 'sharpen',
20
+ /** Full OCR pipeline: denoise → sharpen → monochrome */
21
+ OCR_OPTIMIZED: 'ocrOptimized'
22
+ };
23
+
24
+ /**
25
+ * Available output formats.
26
+ */
27
+ export const Format = {
28
+ /** JPEG format (smaller file size) */
29
+ JPG: 'jpg',
30
+ /** PNG format (lossless) */
31
+ PNG: 'png'
32
+ };
33
+ export function scanDocuments(options) {
34
+ return DocumentScanner.scanDocuments(options);
35
+ }
36
+ export function processDocuments(options) {
37
+ return DocumentScanner.processDocuments(options);
38
+ }
39
+ export { reconstructText } from "./textReconstructor.js";
40
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"names":["DocumentScanner","Filter","COLOR","GRAYSCALE","MONOCHROME","DENOISE","SHARPEN","OCR_OPTIMIZED","Format","JPG","PNG","scanDocuments","options","processDocuments","reconstructText"],"sourceRoot":"../../src","sources":["index.ts"],"mappings":";;AAAA,OAAOA,eAAe,MAQf,4BAAyB;;AAEhC;AACA;AACA;AACA;AACA,OAAO,MAAMC,MAAM,GAAG;EACpB;EACAC,KAAK,EAAE,OAAO;EACd;EACAC,SAAS,EAAE,WAAW;EACtB;EACAC,UAAU,EAAE,YAAY;EACxB;EACAC,OAAO,EAAE,SAAS;EAClB;EACAC,OAAO,EAAE,SAAS;EAClB;EACAC,aAAa,EAAE;AACjB,CAAU;;AAEV;AACA;AACA;AACA,OAAO,MAAMC,MAAM,GAAG;EACpB;EACAC,GAAG,EAAE,KAAK;EACV;EACAC,GAAG,EAAE;AACP,CAAU;AAEV,OAAO,SAASC,aAAaA,CAACC,OAAqB,EAAyB;EAC1E,OAAOZ,eAAe,CAACW,aAAa,CAACC,OAAO,CAAC;AAC/C;AAEA,OAAO,SAASC,gBAAgBA,CAC9BD,OAAuB,EACA;EACvB,OAAOZ,eAAe,CAACa,gBAAgB,CAACD,OAAO,CAAC;AAClD;AAEA,SAASE,eAAe,QAAQ,wBAAqB","ignoreList":[]}
@@ -0,0 +1 @@
1
+ {"type":"module"}