@hoshomoh/react-native-document-scanner 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DocumentScanner.podspec +22 -0
- package/LICENSE +20 -0
- package/README.md +384 -0
- package/android/build.gradle +72 -0
- package/android/gradle.properties +17 -0
- package/android/local.properties +8 -0
- package/android/src/main/AndroidManifest.xml +8 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerModule.kt +217 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerPackage.kt +39 -0
- package/android/src/main/java/com/documentscanner/ImageProcessor.kt +325 -0
- package/android/src/main/java/com/documentscanner/Logger.kt +36 -0
- package/android/src/main/java/com/documentscanner/OCRConfiguration.kt +56 -0
- package/android/src/main/java/com/documentscanner/Options.kt +109 -0
- package/android/src/main/java/com/documentscanner/ScannerError.kt +18 -0
- package/android/src/main/java/com/documentscanner/TextRecognizer.kt +56 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV1.kt +68 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV2.kt +244 -0
- package/ios/DocumentScanner.h +5 -0
- package/ios/DocumentScanner.mm +113 -0
- package/ios/DocumentScannerManager.swift +148 -0
- package/ios/Errors.swift +33 -0
- package/ios/ImageProcessor.swift +78 -0
- package/ios/ImageUtil.swift +279 -0
- package/ios/Logger.swift +43 -0
- package/ios/OCRConfiguration.swift +60 -0
- package/ios/Options.swift +109 -0
- package/ios/ResponseUtil.swift +25 -0
- package/ios/ScanModels.swift +84 -0
- package/ios/TextRecognizer.swift +134 -0
- package/ios/TextRecognizerV1.swift +56 -0
- package/ios/TextRecognizerV2.swift +169 -0
- package/lib/module/NativeDocumentScanner.js +51 -0
- package/lib/module/NativeDocumentScanner.js.map +1 -0
- package/lib/module/index.js +40 -0
- package/lib/module/index.js.map +1 -0
- package/lib/module/package.json +1 -0
- package/lib/module/textReconstructor.js +147 -0
- package/lib/module/textReconstructor.js.map +1 -0
- package/lib/typescript/package.json +1 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts +191 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +34 -0
- package/lib/typescript/src/index.d.ts.map +1 -0
- package/lib/typescript/src/textReconstructor.d.ts +60 -0
- package/lib/typescript/src/textReconstructor.d.ts.map +1 -0
- package/package.json +137 -0
- package/src/NativeDocumentScanner.ts +205 -0
- package/src/index.ts +61 -0
- package/src/textReconstructor.ts +212 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import com.facebook.react.bridge.ReadableMap
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Reads an integer option from a ReadableMap.
|
|
7
|
+
* Uses getDouble instead of getInt because TypeScript `number` is always encoded as a
|
|
8
|
+
* JSI double — getInt can fail or return incorrect results on some RN versions.
|
|
9
|
+
*/
|
|
10
|
+
private fun intOption(options: ReadableMap?, key: String, fallback: Int): Int =
|
|
11
|
+
if (options?.hasKey(key) == true) options.getDouble(key).toInt() else fallback
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Base options shared by both Scan and Process operations.
|
|
15
|
+
* Allows parsing common properties from a React Native ReadableMap.
|
|
16
|
+
*/
|
|
17
|
+
abstract class BaseOptions(
|
|
18
|
+
val quality: Double,
|
|
19
|
+
val format: String,
|
|
20
|
+
val filter: String,
|
|
21
|
+
val includeBase64: Boolean,
|
|
22
|
+
val includeText: Boolean,
|
|
23
|
+
val textVersion: Int
|
|
24
|
+
) {
|
|
25
|
+
/**
|
|
26
|
+
* Secondary constructor to parse common options from a dictionary.
|
|
27
|
+
* Delegates to the primary constructor with validated values.
|
|
28
|
+
*/
|
|
29
|
+
constructor(options: ReadableMap?, defaultIncludeText: Boolean) : this(
|
|
30
|
+
quality = parseQuality(options),
|
|
31
|
+
format = parseFormat(options),
|
|
32
|
+
filter = parseFilter(options),
|
|
33
|
+
includeBase64 = parseBoolean(options, "includeBase64", false),
|
|
34
|
+
includeText = parseBoolean(options, "includeText", defaultIncludeText),
|
|
35
|
+
textVersion = parseTextVersion(options)
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
companion object {
|
|
39
|
+
/** Validates and returns the compression quality [0.1, 1.0]. */
|
|
40
|
+
private fun parseQuality(options: ReadableMap?): Double {
|
|
41
|
+
val value = if (options?.hasKey("quality") == true) options.getDouble("quality") else 1.0
|
|
42
|
+
return value.coerceIn(0.1, 1.0)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Whitelists and returns the output image format. */
|
|
46
|
+
private fun parseFormat(options: ReadableMap?): String {
|
|
47
|
+
val value = options?.getString("format") ?: "jpg"
|
|
48
|
+
return if (value == "png") "png" else "jpg"
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Whitelists and returns the post-processing filter. */
|
|
52
|
+
private fun parseFilter(options: ReadableMap?): String {
|
|
53
|
+
val value = options?.getString("filter") ?: "color"
|
|
54
|
+
val validFilters = listOf("color", "grayscale", "monochrome", "denoise", "sharpen", "ocrOptimized")
|
|
55
|
+
return if (validFilters.contains(value)) value else "color"
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Safely parses a boolean value with a default fallback. */
|
|
59
|
+
private fun parseBoolean(options: ReadableMap?, key: String, default: Boolean): Boolean {
|
|
60
|
+
return if (options?.hasKey(key) == true) options.getBoolean(key) else default
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Validates and returns the OCR engine version [1, 2]. */
|
|
64
|
+
private fun parseTextVersion(options: ReadableMap?): Int {
|
|
65
|
+
val value = intOption(options, "textVersion", 2)
|
|
66
|
+
return if (value == 1) 1 else 2
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Strongly-typed options for the scanDocuments() method.
|
|
73
|
+
*/
|
|
74
|
+
class ScanOptions(options: ReadableMap?, fallbackPageCount: Int) : BaseOptions(options, false) {
|
|
75
|
+
/** Maximum pages to scan, clamped to [0, 100]. */
|
|
76
|
+
val maxPageCount: Int = parseMaxPages(options, fallbackPageCount)
|
|
77
|
+
|
|
78
|
+
companion object {
|
|
79
|
+
fun from(options: ReadableMap?): ScanOptions = ScanOptions(options, 0)
|
|
80
|
+
|
|
81
|
+
private fun parseMaxPages(options: ReadableMap?, fallback: Int): Int =
|
|
82
|
+
intOption(options, "maxPageCount", fallback).coerceIn(0, 100)
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Strongly-typed options for the processDocuments() method.
|
|
88
|
+
*/
|
|
89
|
+
class ProcessOptions(options: ReadableMap?) : BaseOptions(options, true) {
|
|
90
|
+
/** List of image URIs or Base64 strings to process. */
|
|
91
|
+
val images: List<String> = parseImages(options)
|
|
92
|
+
|
|
93
|
+
companion object {
|
|
94
|
+
fun from(options: ReadableMap?): ProcessOptions = ProcessOptions(options)
|
|
95
|
+
|
|
96
|
+
private fun parseImages(options: ReadableMap?): List<String> {
|
|
97
|
+
val images = mutableListOf<String>()
|
|
98
|
+
if (options?.hasKey("images") == true) {
|
|
99
|
+
val array = options.getArray("images")
|
|
100
|
+
if (array != null) {
|
|
101
|
+
for (i in 0 until array.size()) {
|
|
102
|
+
array.getString(i)?.let { images.add(it) }
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return images
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents structured errors returned to the React Native layer.
|
|
5
|
+
*/
|
|
6
|
+
sealed class ScannerError(val code: String, val message: String) {
|
|
7
|
+
/** The device hardware or OS version does not support scanning. */
|
|
8
|
+
class NotSupported : ScannerError("NOT_SUPPORTED", "Device does not support document scanning")
|
|
9
|
+
|
|
10
|
+
/** The provided options were invalid or missing required fields. */
|
|
11
|
+
class ConfigurationError(detail: String) : ScannerError("CONFIGURATION_ERROR", "Configuration Error: $detail")
|
|
12
|
+
|
|
13
|
+
/** A runtime error occurred during scanning or processing. */
|
|
14
|
+
class OperationFailed(detail: String) : ScannerError("OPERATION_FAILED", "Operation Failed: $detail")
|
|
15
|
+
|
|
16
|
+
/** The user manually cancelled the scanner interface. */
|
|
17
|
+
class Canceled : ScannerError("CANCELED", "User canceled the scan")
|
|
18
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import android.content.Context
|
|
4
|
+
import android.net.Uri
|
|
5
|
+
import com.facebook.react.bridge.Arguments
|
|
6
|
+
import com.facebook.react.bridge.WritableMap
|
|
7
|
+
import com.google.mlkit.vision.common.InputImage
|
|
8
|
+
import com.google.mlkit.vision.text.TextRecognition
|
|
9
|
+
import com.google.mlkit.vision.text.latin.TextRecognizerOptions
|
|
10
|
+
import kotlinx.coroutines.tasks.await
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Utility class for performing Optical Character Recognition (OCR) using ML Kit.
|
|
14
|
+
* Extracts text and bounding boxes from images.
|
|
15
|
+
*
|
|
16
|
+
* Acts as a Facade delegating to versioned implementations:
|
|
17
|
+
* - TextRecognizerV1 (Raw)
|
|
18
|
+
* - TextRecognizerV2 (Heuristic)
|
|
19
|
+
*/
|
|
20
|
+
class TextRecognizer {
|
|
21
|
+
companion object {
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Processes an image and extracts text blocks based on the requested version.
|
|
25
|
+
*
|
|
26
|
+
* @param context Application context
|
|
27
|
+
* @param uri URI of the image to process from disk
|
|
28
|
+
* @param version OCR engine version (1 = Raw ML Kit, 2 = Layout Heuristics)
|
|
29
|
+
* @return WritableMap containing "text" and "blocks"
|
|
30
|
+
*/
|
|
31
|
+
suspend fun processImage(context: Context, uri: Uri, version: Int): WritableMap {
|
|
32
|
+
try {
|
|
33
|
+
val image = InputImage.fromFilePath(context, uri)
|
|
34
|
+
val imagePixelWidth = image.width.toDouble()
|
|
35
|
+
val imagePixelHeight = image.height.toDouble()
|
|
36
|
+
|
|
37
|
+
val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)
|
|
38
|
+
val visionText = recognizer.process(image).await()
|
|
39
|
+
|
|
40
|
+
return if (version == 1) {
|
|
41
|
+
TextRecognizerV1.process(visionText, imagePixelWidth, imagePixelHeight)
|
|
42
|
+
} else {
|
|
43
|
+
TextRecognizerV2.process(visionText, imagePixelWidth, imagePixelHeight)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
} catch (e: Exception) {
|
|
47
|
+
e.printStackTrace()
|
|
48
|
+
Logger.error("OCR processing failed: ${e.message}", e)
|
|
49
|
+
val emptyMap = Arguments.createMap()
|
|
50
|
+
emptyMap.putString("text", "")
|
|
51
|
+
emptyMap.putArray("blocks", Arguments.createArray())
|
|
52
|
+
return emptyMap
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import android.graphics.Rect
|
|
4
|
+
import com.facebook.react.bridge.Arguments
|
|
5
|
+
import com.facebook.react.bridge.WritableMap
|
|
6
|
+
import com.google.mlkit.vision.text.Text
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Version 1: Raw Output (Standard ML Kit Behavior)
|
|
10
|
+
*/
|
|
11
|
+
class TextRecognizerV1 {
|
|
12
|
+
companion object {
|
|
13
|
+
/**
|
|
14
|
+
* Performs raw text recognition using ML Kit's default output.
|
|
15
|
+
* Returns one block per visual line (ML Kit TextLine level) for
|
|
16
|
+
* consistent cross-platform granularity with iOS V1.
|
|
17
|
+
*
|
|
18
|
+
* @param visionText The raw result from ML Kit.
|
|
19
|
+
* @param imagePixelWidth Original image width in pixels.
|
|
20
|
+
* @param imagePixelHeight Original image height in pixels.
|
|
21
|
+
* @return WritableMap with "text" and "blocks".
|
|
22
|
+
*/
|
|
23
|
+
fun process(visionText: Text, imagePixelWidth: Double, imagePixelHeight: Double): WritableMap {
|
|
24
|
+
val resultMap = Arguments.createMap()
|
|
25
|
+
|
|
26
|
+
// Raw text from ML Kit
|
|
27
|
+
resultMap.putString("text", visionText.text)
|
|
28
|
+
|
|
29
|
+
// Line-level blocks — one block per ML Kit TextLine
|
|
30
|
+
val blocksArray = Arguments.createArray()
|
|
31
|
+
for (block in visionText.textBlocks) {
|
|
32
|
+
for (line in block.lines) {
|
|
33
|
+
val blockMap = Arguments.createMap()
|
|
34
|
+
blockMap.putString("text", line.text)
|
|
35
|
+
|
|
36
|
+
val frameMap = Arguments.createMap()
|
|
37
|
+
val box = line.boundingBox ?: Rect(0, 0, 0, 0)
|
|
38
|
+
|
|
39
|
+
frameMap.putDouble("x", safeNormalize(box.left.toDouble(), imagePixelWidth))
|
|
40
|
+
frameMap.putDouble("y", safeNormalize(box.top.toDouble(), imagePixelHeight))
|
|
41
|
+
frameMap.putDouble("width", safeNormalize(box.width().toDouble(), imagePixelWidth))
|
|
42
|
+
frameMap.putDouble("height", safeNormalize(box.height().toDouble(), imagePixelHeight))
|
|
43
|
+
|
|
44
|
+
blockMap.putMap("frame", frameMap)
|
|
45
|
+
|
|
46
|
+
// Average confidence from all elements in the line
|
|
47
|
+
if (line.elements.isNotEmpty()) {
|
|
48
|
+
val avgConfidence = line.elements.map { it.confidence.toDouble() }.average()
|
|
49
|
+
blockMap.putDouble("confidence", avgConfidence)
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
blocksArray.pushMap(blockMap)
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
resultMap.putArray("blocks", blocksArray)
|
|
56
|
+
|
|
57
|
+
return resultMap
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Safely normalizes a pixel value by dividing by the dimension.
|
|
62
|
+
* Returns 0.0 if the dimension is zero or negative to avoid divide-by-zero.
|
|
63
|
+
*/
|
|
64
|
+
private fun safeNormalize(value: Double, dimension: Double): Double {
|
|
65
|
+
return if (dimension > 0) value / dimension else 0.0
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import android.graphics.Rect
|
|
4
|
+
import com.facebook.react.bridge.Arguments
|
|
5
|
+
import com.facebook.react.bridge.WritableMap
|
|
6
|
+
import com.google.mlkit.vision.text.Text
|
|
7
|
+
import kotlin.math.abs
|
|
8
|
+
import kotlin.math.max
|
|
9
|
+
import kotlin.math.min
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Version 2: Heuristic Enhanced (Line Clustering for Layout Preservation)
|
|
13
|
+
*/
|
|
14
|
+
class TextRecognizerV2 {
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Internal data class representing a text element with its bounding box and confidence.
|
|
18
|
+
*/
|
|
19
|
+
private data class TextElement(
|
|
20
|
+
val text: String,
|
|
21
|
+
val left: Double,
|
|
22
|
+
val top: Double,
|
|
23
|
+
val width: Double,
|
|
24
|
+
val height: Double,
|
|
25
|
+
val confidence: Double
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
companion object {
|
|
29
|
+
/**
|
|
30
|
+
* Performs heuristic-enhanced text recognition with layout preservation.
|
|
31
|
+
* Matches horizontally aligned word-level elements into logical lines.
|
|
32
|
+
*
|
|
33
|
+
* @param visionText The raw result from ML Kit.
|
|
34
|
+
* @param imagePixelWidth Original image width in pixels.
|
|
35
|
+
* @param imagePixelHeight Original image height in pixels.
|
|
36
|
+
* @return WritableMap with "text" (structured) and "blocks" (line-level metadata).
|
|
37
|
+
*/
|
|
38
|
+
fun process(visionText: Text, imagePixelWidth: Double, imagePixelHeight: Double): WritableMap {
|
|
39
|
+
val resultMap = Arguments.createMap()
|
|
40
|
+
|
|
41
|
+
// Step 1: Extract generic "TextElement" from ML Kit's word-level elements
|
|
42
|
+
val allElements = mutableListOf<TextElement>()
|
|
43
|
+
|
|
44
|
+
// Flatten to word level (line.elements) for better granular clustering
|
|
45
|
+
for (block in visionText.textBlocks) {
|
|
46
|
+
for (line in block.lines) {
|
|
47
|
+
for (element in line.elements) {
|
|
48
|
+
val box = element.boundingBox ?: continue
|
|
49
|
+
allElements.add(TextElement(
|
|
50
|
+
text = element.text,
|
|
51
|
+
left = safeNormalize(box.left.toDouble(), imagePixelWidth),
|
|
52
|
+
top = safeNormalize(box.top.toDouble(), imagePixelHeight),
|
|
53
|
+
width = safeNormalize(box.width().toDouble(), imagePixelWidth),
|
|
54
|
+
height = safeNormalize(box.height().toDouble(), imagePixelHeight),
|
|
55
|
+
confidence = element.confidence.toDouble()
|
|
56
|
+
))
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Step 2: Group elements into Lines using LineCluster Strategy
|
|
62
|
+
class LineCluster(firstElement: TextElement) {
|
|
63
|
+
val elements = mutableListOf(firstElement)
|
|
64
|
+
val heights = mutableListOf(firstElement.height)
|
|
65
|
+
val centerYs = mutableListOf(firstElement.top + firstElement.height / 2.0)
|
|
66
|
+
|
|
67
|
+
// Union Bounding Box state
|
|
68
|
+
var uLeft = firstElement.left
|
|
69
|
+
var uTop = firstElement.top
|
|
70
|
+
var uRight = firstElement.left + firstElement.width
|
|
71
|
+
var uBottom = firstElement.top + firstElement.height
|
|
72
|
+
|
|
73
|
+
fun medianHeight(): Double {
|
|
74
|
+
if (heights.isEmpty()) return 0.0
|
|
75
|
+
val sorted = heights.sorted()
|
|
76
|
+
val mid = sorted.size / 2
|
|
77
|
+
return if (sorted.size % 2 == 0) {
|
|
78
|
+
(sorted[mid - 1] + sorted[mid]) / 2.0
|
|
79
|
+
} else {
|
|
80
|
+
sorted[mid]
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
fun medianCenterY(): Double {
|
|
85
|
+
if (centerYs.isEmpty()) return 0.0
|
|
86
|
+
val sorted = centerYs.sorted()
|
|
87
|
+
val mid = sorted.size / 2
|
|
88
|
+
return if (sorted.size % 2 == 0) {
|
|
89
|
+
(sorted[mid - 1] + sorted[mid]) / 2.0
|
|
90
|
+
} else {
|
|
91
|
+
sorted[mid]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
val height: Double get() = uBottom - uTop
|
|
96
|
+
val midY: Double get() = uTop + (height / 2.0) // retained for final cluster sort
|
|
97
|
+
|
|
98
|
+
fun add(element: TextElement) {
|
|
99
|
+
elements.add(element)
|
|
100
|
+
heights.add(element.height)
|
|
101
|
+
centerYs.add(element.top + element.height / 2.0)
|
|
102
|
+
|
|
103
|
+
// Update union box
|
|
104
|
+
uLeft = min(uLeft, element.left)
|
|
105
|
+
uTop = min(uTop, element.top)
|
|
106
|
+
uRight = max(uRight, element.left + element.width)
|
|
107
|
+
uBottom = max(uBottom, element.top + element.height)
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Sort by midY ascending (top of page first)
|
|
112
|
+
val sortedElements = allElements.sortedBy { it.top + (it.height / 2.0) }
|
|
113
|
+
val clusters = mutableListOf<LineCluster>()
|
|
114
|
+
|
|
115
|
+
for (element in sortedElements) {
|
|
116
|
+
val elHeight = element.height
|
|
117
|
+
val elMidY = element.top + (elHeight / 2.0)
|
|
118
|
+
|
|
119
|
+
var bestClusterIndex: Int? = null
|
|
120
|
+
var bestOverlapRatio = 0.0
|
|
121
|
+
var bestCenterDist = Double.MAX_VALUE
|
|
122
|
+
|
|
123
|
+
for ((index, cluster) in clusters.withIndex()) {
|
|
124
|
+
|
|
125
|
+
// 1. Height Similarity Check — use median height, not union bbox height
|
|
126
|
+
val clusterMedianH = cluster.medianHeight()
|
|
127
|
+
val minH = min(clusterMedianH, elHeight)
|
|
128
|
+
val maxH = max(clusterMedianH, elHeight)
|
|
129
|
+
if ((minH / maxH) < OCRConfiguration.HEIGHT_COMPATIBILITY_THRESHOLD) continue
|
|
130
|
+
|
|
131
|
+
// 2. Overlap & Centerline — use median centerY, not union bbox midY
|
|
132
|
+
val intTop = max(cluster.uTop, element.top)
|
|
133
|
+
val intBottom = min(cluster.uBottom, element.top + element.height)
|
|
134
|
+
val intHeight = max(0.0, intBottom - intTop)
|
|
135
|
+
|
|
136
|
+
val overlapRatio = intHeight / minH
|
|
137
|
+
|
|
138
|
+
val centerDist = abs(cluster.medianCenterY() - elMidY)
|
|
139
|
+
val typicalHeight = max(clusterMedianH, elHeight)
|
|
140
|
+
|
|
141
|
+
val isOverlapGood = overlapRatio >= OCRConfiguration.OVERLAP_RATIO_THRESHOLD
|
|
142
|
+
val isCenterClose = centerDist <= (OCRConfiguration.CENTERLINE_DISTANCE_FACTOR * typicalHeight)
|
|
143
|
+
|
|
144
|
+
if (isOverlapGood || isCenterClose) {
|
|
145
|
+
// 3. Adaptive Cluster Growth Constraint
|
|
146
|
+
val clusterRight = cluster.uRight
|
|
147
|
+
val elementRight = element.left + element.width
|
|
148
|
+
val intersectX = max(0.0, min(clusterRight, elementRight) - max(cluster.uLeft, element.left))
|
|
149
|
+
val isStacked = intersectX > 0
|
|
150
|
+
|
|
151
|
+
val growthLimit = if (isStacked) OCRConfiguration.STACKED_GROWTH_LIMIT else OCRConfiguration.SKEWED_GROWTH_LIMIT
|
|
152
|
+
|
|
153
|
+
val newTop = min(cluster.uTop, element.top)
|
|
154
|
+
val newBottom = max(cluster.uBottom, element.top + element.height)
|
|
155
|
+
val newHeight = newBottom - newTop
|
|
156
|
+
|
|
157
|
+
if (newHeight <= (growthLimit * typicalHeight)) {
|
|
158
|
+
if (overlapRatio > bestOverlapRatio) {
|
|
159
|
+
bestOverlapRatio = overlapRatio
|
|
160
|
+
bestCenterDist = centerDist
|
|
161
|
+
bestClusterIndex = index
|
|
162
|
+
} else if (abs(overlapRatio - bestOverlapRatio) < 0.01 && centerDist < bestCenterDist) {
|
|
163
|
+
bestCenterDist = centerDist
|
|
164
|
+
bestClusterIndex = index
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (bestClusterIndex != null) {
|
|
171
|
+
clusters[bestClusterIndex].add(element)
|
|
172
|
+
} else {
|
|
173
|
+
clusters.add(LineCluster(element))
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Sort clusters top-to-bottom
|
|
178
|
+
clusters.sortBy { it.midY }
|
|
179
|
+
|
|
180
|
+
// Step 3: Reconstruct text with adaptive column spacing + build cluster-based blocks
|
|
181
|
+
val structuredText = StringBuilder()
|
|
182
|
+
val blocksArray = Arguments.createArray()
|
|
183
|
+
|
|
184
|
+
for (cluster in clusters) {
|
|
185
|
+
// Sort elements left-to-right for reading order
|
|
186
|
+
val lineElements = cluster.elements.sortedBy { it.left }
|
|
187
|
+
val medianH = cluster.medianHeight()
|
|
188
|
+
|
|
189
|
+
val lineString = StringBuilder()
|
|
190
|
+
var lastXEnd = 0.0
|
|
191
|
+
|
|
192
|
+
for ((index, element) in lineElements.withIndex()) {
|
|
193
|
+
val xStart = element.left
|
|
194
|
+
|
|
195
|
+
if (index > 0) {
|
|
196
|
+
val gap = xStart - lastXEnd
|
|
197
|
+
|
|
198
|
+
if (gap > (medianH * OCRConfiguration.ADAPTIVE_SPACING_FACTOR)) {
|
|
199
|
+
val spaceWidth = medianH * OCRConfiguration.SPACE_WIDTH_FACTOR
|
|
200
|
+
val spaces = max(1, (gap / spaceWidth).toInt())
|
|
201
|
+
lineString.append(" ".repeat(min(spaces, OCRConfiguration.MAX_SPACES)))
|
|
202
|
+
} else {
|
|
203
|
+
lineString.append(" ")
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
lineString.append(element.text)
|
|
208
|
+
lastXEnd = xStart + element.width
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
structuredText.append(lineString).append("\n")
|
|
212
|
+
|
|
213
|
+
// Build one block per cluster (line-level, aligned with text output)
|
|
214
|
+
val blockMap = Arguments.createMap()
|
|
215
|
+
blockMap.putString("text", lineString.toString())
|
|
216
|
+
|
|
217
|
+
val frameMap = Arguments.createMap()
|
|
218
|
+
frameMap.putDouble("x", cluster.uLeft)
|
|
219
|
+
frameMap.putDouble("y", cluster.uTop)
|
|
220
|
+
frameMap.putDouble("width", cluster.uRight - cluster.uLeft)
|
|
221
|
+
frameMap.putDouble("height", cluster.uBottom - cluster.uTop)
|
|
222
|
+
blockMap.putMap("frame", frameMap)
|
|
223
|
+
|
|
224
|
+
val avgConfidence = cluster.elements.map { it.confidence }.average()
|
|
225
|
+
blockMap.putDouble("confidence", avgConfidence)
|
|
226
|
+
|
|
227
|
+
blocksArray.pushMap(blockMap)
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
resultMap.putString("text", structuredText.toString())
|
|
231
|
+
resultMap.putArray("blocks", blocksArray)
|
|
232
|
+
|
|
233
|
+
return resultMap
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Safely normalizes a pixel value by dividing by the dimension.
|
|
238
|
+
* Returns 0.0 if the dimension is zero or negative to avoid divide-by-zero.
|
|
239
|
+
*/
|
|
240
|
+
private fun safeNormalize(value: Double, dimension: Double): Double {
|
|
241
|
+
return if (dimension > 0) value / dimension else 0.0
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#import "DocumentScanner.h"
|
|
2
|
+
#import <Foundation/Foundation.h>
|
|
3
|
+
|
|
4
|
+
@protocol DocumentScannerSwiftProtocol <NSObject>
|
|
5
|
+
- (void)scanDocuments:(NSDictionary *)options
|
|
6
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
7
|
+
reject:(RCTPromiseRejectBlock)reject;
|
|
8
|
+
- (void)processDocuments:(NSDictionary *)options
|
|
9
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
10
|
+
reject:(RCTPromiseRejectBlock)reject;
|
|
11
|
+
@end
|
|
12
|
+
|
|
13
|
+
@implementation DocumentScanner {
|
|
14
|
+
id _impl;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
- (instancetype)init {
|
|
18
|
+
if (self = [super init]) {
|
|
19
|
+
Class swiftClass = NSClassFromString(@"DocumentScannerManager");
|
|
20
|
+
if (swiftClass) {
|
|
21
|
+
_impl = [[swiftClass alloc] init];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return self;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
- (void)scanDocuments:(JS::NativeDocumentScanner::ScanOptions &)options
|
|
28
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
29
|
+
reject:(RCTPromiseRejectBlock)reject {
|
|
30
|
+
NSMutableDictionary *dict = [NSMutableDictionary new];
|
|
31
|
+
|
|
32
|
+
if (options.maxPageCount().has_value()) {
|
|
33
|
+
dict[@"maxPageCount"] = @(options.maxPageCount().value());
|
|
34
|
+
}
|
|
35
|
+
if (options.quality().has_value()) {
|
|
36
|
+
dict[@"quality"] = @(options.quality().value());
|
|
37
|
+
}
|
|
38
|
+
if (options.format()) {
|
|
39
|
+
dict[@"format"] = options.format();
|
|
40
|
+
}
|
|
41
|
+
if (options.filter()) {
|
|
42
|
+
dict[@"filter"] = options.filter();
|
|
43
|
+
}
|
|
44
|
+
if (options.includeBase64().has_value()) {
|
|
45
|
+
dict[@"includeBase64"] = @(options.includeBase64().value());
|
|
46
|
+
}
|
|
47
|
+
if (options.includeText().has_value()) {
|
|
48
|
+
dict[@"includeText"] = @(options.includeText().value());
|
|
49
|
+
}
|
|
50
|
+
if (options.textVersion().has_value()) {
|
|
51
|
+
dict[@"textVersion"] = @(options.textVersion().value());
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (_impl) {
|
|
55
|
+
[(id<DocumentScannerSwiftProtocol>)_impl scanDocuments:dict
|
|
56
|
+
resolve:resolve
|
|
57
|
+
reject:reject];
|
|
58
|
+
} else {
|
|
59
|
+
reject(@"impl_error", @"Swift implementation not found", nil);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
- (void)processDocuments:(JS::NativeDocumentScanner::ProcessOptions &)options
|
|
64
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
65
|
+
reject:(RCTPromiseRejectBlock)reject {
|
|
66
|
+
NSMutableDictionary *dict = [NSMutableDictionary new];
|
|
67
|
+
|
|
68
|
+
/* Convert images array */
|
|
69
|
+
NSMutableArray *images = [NSMutableArray new];
|
|
70
|
+
for (NSString *img : options.images()) {
|
|
71
|
+
[images addObject:img];
|
|
72
|
+
}
|
|
73
|
+
dict[@"images"] = images;
|
|
74
|
+
|
|
75
|
+
if (options.quality().has_value()) {
|
|
76
|
+
dict[@"quality"] = @(options.quality().value());
|
|
77
|
+
}
|
|
78
|
+
if (options.format()) {
|
|
79
|
+
dict[@"format"] = options.format();
|
|
80
|
+
}
|
|
81
|
+
if (options.filter()) {
|
|
82
|
+
dict[@"filter"] = options.filter();
|
|
83
|
+
}
|
|
84
|
+
if (options.includeBase64().has_value()) {
|
|
85
|
+
dict[@"includeBase64"] = @(options.includeBase64().value());
|
|
86
|
+
}
|
|
87
|
+
if (options.includeText().has_value()) {
|
|
88
|
+
dict[@"includeText"] = @(options.includeText().value());
|
|
89
|
+
}
|
|
90
|
+
if (options.textVersion().has_value()) {
|
|
91
|
+
dict[@"textVersion"] = @(options.textVersion().value());
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (_impl) {
|
|
95
|
+
[(id<DocumentScannerSwiftProtocol>)_impl processDocuments:dict
|
|
96
|
+
resolve:resolve
|
|
97
|
+
reject:reject];
|
|
98
|
+
} else {
|
|
99
|
+
reject(@"impl_error", @"Swift implementation not found", nil);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
|
|
104
|
+
(const facebook::react::ObjCTurboModule::InitParams &)params {
|
|
105
|
+
return std::make_shared<facebook::react::NativeDocumentScannerSpecJSI>(
|
|
106
|
+
params);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
+ (NSString *)moduleName {
|
|
110
|
+
return @"DocumentScanner";
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
@end
|