@hoshomoh/react-native-document-scanner 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DocumentScanner.podspec +22 -0
- package/LICENSE +20 -0
- package/README.md +384 -0
- package/android/build.gradle +72 -0
- package/android/gradle.properties +17 -0
- package/android/local.properties +8 -0
- package/android/src/main/AndroidManifest.xml +8 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerModule.kt +217 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerPackage.kt +39 -0
- package/android/src/main/java/com/documentscanner/ImageProcessor.kt +325 -0
- package/android/src/main/java/com/documentscanner/Logger.kt +36 -0
- package/android/src/main/java/com/documentscanner/OCRConfiguration.kt +56 -0
- package/android/src/main/java/com/documentscanner/Options.kt +109 -0
- package/android/src/main/java/com/documentscanner/ScannerError.kt +18 -0
- package/android/src/main/java/com/documentscanner/TextRecognizer.kt +56 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV1.kt +68 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV2.kt +244 -0
- package/ios/DocumentScanner.h +5 -0
- package/ios/DocumentScanner.mm +113 -0
- package/ios/DocumentScannerManager.swift +148 -0
- package/ios/Errors.swift +33 -0
- package/ios/ImageProcessor.swift +78 -0
- package/ios/ImageUtil.swift +279 -0
- package/ios/Logger.swift +43 -0
- package/ios/OCRConfiguration.swift +60 -0
- package/ios/Options.swift +109 -0
- package/ios/ResponseUtil.swift +25 -0
- package/ios/ScanModels.swift +84 -0
- package/ios/TextRecognizer.swift +134 -0
- package/ios/TextRecognizerV1.swift +56 -0
- package/ios/TextRecognizerV2.swift +169 -0
- package/lib/module/NativeDocumentScanner.js +51 -0
- package/lib/module/NativeDocumentScanner.js.map +1 -0
- package/lib/module/index.js +40 -0
- package/lib/module/index.js.map +1 -0
- package/lib/module/package.json +1 -0
- package/lib/module/textReconstructor.js +147 -0
- package/lib/module/textReconstructor.js.map +1 -0
- package/lib/typescript/package.json +1 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts +191 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +34 -0
- package/lib/typescript/src/index.d.ts.map +1 -0
- package/lib/typescript/src/textReconstructor.d.ts +60 -0
- package/lib/typescript/src/textReconstructor.d.ts.map +1 -0
- package/package.json +137 -0
- package/src/NativeDocumentScanner.ts +205 -0
- package/src/index.ts +61 -0
- package/src/textReconstructor.ts +212 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import android.app.Activity
|
|
4
|
+
import android.content.Intent
|
|
5
|
+
import com.facebook.react.bridge.*
|
|
6
|
+
import com.google.mlkit.vision.documentscanner.GmsDocumentScannerOptions
|
|
7
|
+
import com.google.mlkit.vision.documentscanner.GmsDocumentScanning
|
|
8
|
+
import com.google.mlkit.vision.documentscanner.GmsDocumentScanningResult
|
|
9
|
+
import kotlinx.coroutines.CoroutineScope
|
|
10
|
+
import kotlinx.coroutines.Dispatchers
|
|
11
|
+
import kotlinx.coroutines.launch
|
|
12
|
+
import kotlinx.coroutines.tasks.await
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Native module for Document Scanner.
|
|
16
|
+
* Handles the communication between React Native and the Android ML Kit Document Scanner.
|
|
17
|
+
* Implements ActivityEventListener to handle the result of the native scanner activity.
|
|
18
|
+
*/
|
|
19
|
+
class DocumentScannerModule(reactContext: ReactApplicationContext) :
|
|
20
|
+
NativeDocumentScannerSpec(reactContext), ActivityEventListener {
|
|
21
|
+
|
|
22
|
+
private var scanPromise: Promise? = null
|
|
23
|
+
private var currentScanOptions: ScanOptions? = null
|
|
24
|
+
private val scope = CoroutineScope(Dispatchers.Main)
|
|
25
|
+
private val imageProcessor = ImageProcessor(reactContext)
|
|
26
|
+
|
|
27
|
+
init {
|
|
28
|
+
reactContext.addActivityEventListener(this)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
override fun getName(): String {
|
|
32
|
+
return NAME
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Launches the ML Kit Document Scanner activity.
|
|
37
|
+
*
|
|
38
|
+
* @param options Dictionary containing scan configuration (maxPageCount, quality, format, etc.)
|
|
39
|
+
* @param promise React Native promise to resolve with results or reject with error
|
|
40
|
+
*/
|
|
41
|
+
override fun scanDocuments(options: ReadableMap?, promise: Promise) {
|
|
42
|
+
val activity: Activity? = reactApplicationContext.currentActivity
|
|
43
|
+
if (activity == null) {
|
|
44
|
+
val error = ScannerError.OperationFailed("Activity doesn't exist")
|
|
45
|
+
promise.reject(error.code, error.message)
|
|
46
|
+
return
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Prevent concurrent scans to ensure state integrity
|
|
50
|
+
if (scanPromise != null) {
|
|
51
|
+
val error = ScannerError.OperationFailed("A scan is already in progress")
|
|
52
|
+
promise?.reject(error.code, error.message)
|
|
53
|
+
return
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
scanPromise = promise
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
// Parse options using strongly-typed helper
|
|
60
|
+
val scanOptions = ScanOptions.from(options)
|
|
61
|
+
currentScanOptions = scanOptions
|
|
62
|
+
Logger.log("Starting scanDocuments with options: MaxPages=${scanOptions.maxPageCount}, Quality=${scanOptions.quality}")
|
|
63
|
+
|
|
64
|
+
// Configure GmsDocumentScannerOptions
|
|
65
|
+
// We always request JPEG + PDF to provide maximum flexibility in post-processing,
|
|
66
|
+
// though we currently prioritize JPEG for React Native consumption.
|
|
67
|
+
val scannerOptionsBuilder = GmsDocumentScannerOptions.Builder()
|
|
68
|
+
.setScannerMode(GmsDocumentScannerOptions.SCANNER_MODE_FULL)
|
|
69
|
+
.setResultFormats(GmsDocumentScannerOptions.RESULT_FORMAT_JPEG)
|
|
70
|
+
.setGalleryImportAllowed(true)
|
|
71
|
+
|
|
72
|
+
if (scanOptions.maxPageCount > 0) {
|
|
73
|
+
scannerOptionsBuilder.setPageLimit(scanOptions.maxPageCount)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Initialize the scanner client
|
|
77
|
+
val scanner = GmsDocumentScanning.getClient(scannerOptionsBuilder.build())
|
|
78
|
+
|
|
79
|
+
// Launch the scanner intent
|
|
80
|
+
scanner.getStartScanIntent(activity)
|
|
81
|
+
.addOnSuccessListener(activity) { intentSender ->
|
|
82
|
+
try {
|
|
83
|
+
activity.startIntentSenderForResult(intentSender, START_SCAN_REQUEST_CODE, null, 0, 0, 0)
|
|
84
|
+
} catch (e: Exception) {
|
|
85
|
+
val error = ScannerError.OperationFailed("Failed to start scanner activity: ${e.message}")
|
|
86
|
+
Logger.error(error.message, e)
|
|
87
|
+
scanPromise?.reject(error.code, error.message)
|
|
88
|
+
scanPromise = null
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
.addOnFailureListener { e ->
|
|
92
|
+
val error = ScannerError.OperationFailed("Failed to launch scanner client: ${e.message}")
|
|
93
|
+
Logger.error(error.message, e)
|
|
94
|
+
scanPromise?.reject(error.code, error.message)
|
|
95
|
+
scanPromise = null
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
} catch (e: Exception) {
|
|
99
|
+
// Catch-all for configuration or unexpected runtime errors during setup
|
|
100
|
+
val error = ScannerError.ConfigurationError(e.message ?: "Unknown error")
|
|
101
|
+
Logger.error(error.message, e)
|
|
102
|
+
scanPromise?.reject(error.code, error.message)
|
|
103
|
+
scanPromise = null
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Processes existing images (e.g., from Gallery) with the same pipeline as scanned documents.
|
|
109
|
+
* Applying filters, resizing, compression, and OCR.
|
|
110
|
+
*
|
|
111
|
+
* @param options Dictionary containing input images and processing configuration.
|
|
112
|
+
* @param promise React Native promise to resolve with results.
|
|
113
|
+
*/
|
|
114
|
+
override fun processDocuments(options: ReadableMap?, promise: Promise) {
|
|
115
|
+
val processOptions = ProcessOptions.from(options)
|
|
116
|
+
|
|
117
|
+
// Validate inputs immediately
|
|
118
|
+
if (processOptions.images.isEmpty()) {
|
|
119
|
+
val error = ScannerError.ConfigurationError("No images provided to process")
|
|
120
|
+
promise?.reject(error.code, error.message)
|
|
121
|
+
return
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
Logger.log("Starting processDocuments with ${processOptions.images.size} images")
|
|
125
|
+
|
|
126
|
+
// Launch coroutine for background processing to avoid blocking the UI thread
|
|
127
|
+
scope.launch {
|
|
128
|
+
try {
|
|
129
|
+
val resultsArray = Arguments.createArray()
|
|
130
|
+
// Sequentially process each image
|
|
131
|
+
for (uri in processOptions.images) {
|
|
132
|
+
val result = imageProcessor.process(uri, processOptions)
|
|
133
|
+
resultsArray.pushMap(result)
|
|
134
|
+
}
|
|
135
|
+
promise?.resolve(resultsArray)
|
|
136
|
+
} catch (e: Exception) {
|
|
137
|
+
val error = ScannerError.OperationFailed(e.message ?: "Unknown processing error")
|
|
138
|
+
Logger.error("Error in processDocuments", e)
|
|
139
|
+
promise?.reject(error.code, error.message)
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Handles the result from the native scanner activity.
|
|
146
|
+
*/
|
|
147
|
+
override fun onActivityResult(activity: Activity, requestCode: Int, resultCode: Int, data: Intent?) {
|
|
148
|
+
if (requestCode == START_SCAN_REQUEST_CODE) {
|
|
149
|
+
if (scanPromise == null) {
|
|
150
|
+
Logger.warn("Received onActivityResult but scanPromise is null. Ignoring.")
|
|
151
|
+
return
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (resultCode == Activity.RESULT_OK && data != null) {
|
|
155
|
+
val result = GmsDocumentScanningResult.fromActivityResultIntent(data)
|
|
156
|
+
if (result != null) {
|
|
157
|
+
val pages = result.pages
|
|
158
|
+
Logger.log("Scan successful. Received ${pages?.size ?: 0} pages. Starting processing...")
|
|
159
|
+
|
|
160
|
+
// Offload post-processing (copy to cache, OCR, etc.) to background thread
|
|
161
|
+
scope.launch {
|
|
162
|
+
try {
|
|
163
|
+
val resultsArray = Arguments.createArray()
|
|
164
|
+
// Fallback to default options if somehow missing (should not happen in normal flow)
|
|
165
|
+
val options = currentScanOptions ?: ScanOptions.from(null)
|
|
166
|
+
|
|
167
|
+
if (pages != null) {
|
|
168
|
+
for (page in pages) {
|
|
169
|
+
val uri = page.imageUri.toString()
|
|
170
|
+
// Pass through the shared ImageProcessor pipeline
|
|
171
|
+
val processedResult = imageProcessor.process(uri, options)
|
|
172
|
+
resultsArray.pushMap(processedResult)
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
scanPromise?.resolve(resultsArray)
|
|
176
|
+
} catch (e: Exception) {
|
|
177
|
+
val error = ScannerError.OperationFailed("Post-processing failed: ${e.message}")
|
|
178
|
+
Logger.error(error.message, e)
|
|
179
|
+
scanPromise?.reject(error.code, error.message)
|
|
180
|
+
} finally {
|
|
181
|
+
// Cleanup state
|
|
182
|
+
scanPromise = null
|
|
183
|
+
currentScanOptions = null
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
} else {
|
|
187
|
+
val error = ScannerError.OperationFailed("Scanning result object was null")
|
|
188
|
+
Logger.error(error.message)
|
|
189
|
+
scanPromise?.reject(error.code, error.message)
|
|
190
|
+
scanPromise = null
|
|
191
|
+
currentScanOptions = null
|
|
192
|
+
}
|
|
193
|
+
} else if (resultCode == Activity.RESULT_CANCELED) {
|
|
194
|
+
val error = ScannerError.Canceled()
|
|
195
|
+
Logger.log("User canceled the scanner UI")
|
|
196
|
+
scanPromise?.reject(error.code, error.message)
|
|
197
|
+
scanPromise = null
|
|
198
|
+
currentScanOptions = null
|
|
199
|
+
} else {
|
|
200
|
+
val error = ScannerError.OperationFailed("Unknown Activity result code: $resultCode")
|
|
201
|
+
Logger.warn(error.message)
|
|
202
|
+
scanPromise?.reject(error.code, error.message)
|
|
203
|
+
scanPromise = null
|
|
204
|
+
currentScanOptions = null
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
override fun onNewIntent(intent: Intent) {
|
|
210
|
+
// No-op: We only care about onActivityResult
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
companion object {
|
|
214
|
+
const val NAME = "DocumentScanner"
|
|
215
|
+
const val START_SCAN_REQUEST_CODE = 1452
|
|
216
|
+
}
|
|
217
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import com.facebook.react.TurboReactPackage
|
|
4
|
+
import com.facebook.react.bridge.NativeModule
|
|
5
|
+
import com.facebook.react.bridge.ReactApplicationContext
|
|
6
|
+
import com.facebook.react.module.model.ReactModuleInfo
|
|
7
|
+
import com.facebook.react.module.model.ReactModuleInfoProvider
|
|
8
|
+
import java.util.HashMap
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* React Native package for the Document Scanner.
|
|
12
|
+
* Registers the native module for both Legacy and New Architecture (TurboModules).
|
|
13
|
+
*/
|
|
14
|
+
class DocumentScannerPackage : TurboReactPackage() {
|
|
15
|
+
override fun getModule(name: String, reactContext: ReactApplicationContext): NativeModule? {
|
|
16
|
+
return if (name == DocumentScannerModule.NAME) {
|
|
17
|
+
DocumentScannerModule(reactContext)
|
|
18
|
+
} else {
|
|
19
|
+
null
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
override fun getReactModuleInfoProvider(): ReactModuleInfoProvider {
|
|
24
|
+
return ReactModuleInfoProvider {
|
|
25
|
+
val moduleInfos: MutableMap<String, ReactModuleInfo> = HashMap()
|
|
26
|
+
val isTurboModule: Boolean = true
|
|
27
|
+
moduleInfos[DocumentScannerModule.NAME] = ReactModuleInfo(
|
|
28
|
+
DocumentScannerModule.NAME,
|
|
29
|
+
DocumentScannerModule.NAME,
|
|
30
|
+
false, // canOverrideExistingModule
|
|
31
|
+
false, // needsEagerInit
|
|
32
|
+
true, // hasConstants
|
|
33
|
+
false, // isCxxModule
|
|
34
|
+
isTurboModule
|
|
35
|
+
)
|
|
36
|
+
moduleInfos
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import android.content.Context
|
|
4
|
+
import android.graphics.Bitmap
|
|
5
|
+
import android.graphics.BitmapFactory
|
|
6
|
+
import android.graphics.Canvas
|
|
7
|
+
import android.graphics.ColorMatrix
|
|
8
|
+
import android.graphics.ColorMatrixColorFilter
|
|
9
|
+
import android.graphics.Matrix
|
|
10
|
+
import android.graphics.Paint
|
|
11
|
+
import android.net.Uri
|
|
12
|
+
import androidx.exifinterface.media.ExifInterface
|
|
13
|
+
import android.util.Base64
|
|
14
|
+
import com.facebook.react.bridge.Arguments
|
|
15
|
+
import com.facebook.react.bridge.ReadableMap
|
|
16
|
+
import com.facebook.react.bridge.WritableMap
|
|
17
|
+
import kotlinx.coroutines.Dispatchers
|
|
18
|
+
import kotlinx.coroutines.withContext
|
|
19
|
+
import java.io.ByteArrayOutputStream
|
|
20
|
+
import java.io.File
|
|
21
|
+
import java.io.FileOutputStream
|
|
22
|
+
import java.io.InputStream
|
|
23
|
+
import java.util.UUID
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Utility class for image processing operations.
|
|
27
|
+
* Handles loading, rotating, filtering, and running OCR on images.
|
|
28
|
+
* All operations are executed on a background IO dispatcher.
|
|
29
|
+
*/
|
|
30
|
+
class ImageProcessor(private val context: Context) {
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Main processing pipeline.
|
|
34
|
+
* 1. Loads the bitmap (handling EXIF rotation).
|
|
35
|
+
* 2. Applies filters (Grayscale, Monochrome).
|
|
36
|
+
* 3. Saves processing result to a temp file (required for OCR/Base64 consistency).
|
|
37
|
+
* 4. Encodes to Base64 (optional).
|
|
38
|
+
* 5. Runs OCR (optional).
|
|
39
|
+
*
|
|
40
|
+
* @param uriStr The source URI of the image (file://, content://, data:...)
|
|
41
|
+
* @param options Processing options (quality, format, filters, etc.)
|
|
42
|
+
* @return WritableMap containing uri, base64, text, blocks, etc.
|
|
43
|
+
*/
|
|
44
|
+
suspend fun process(uriStr: String, options: BaseOptions): WritableMap = withContext(Dispatchers.IO) {
|
|
45
|
+
val result = Arguments.createMap()
|
|
46
|
+
result.putString("uri", uriStr) // Default to original URI if something fails
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
val uri = Uri.parse(uriStr)
|
|
50
|
+
var bitmap = loadBitmap(uri) ?: return@withContext result
|
|
51
|
+
|
|
52
|
+
// 1. Apply Filters
|
|
53
|
+
val filter = options.filter
|
|
54
|
+
if (filter != "color") {
|
|
55
|
+
Logger.log("Applying filter: $filter")
|
|
56
|
+
bitmap = applyFilter(bitmap, filter)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// 2. Save Processed Image
|
|
60
|
+
// We save to the cache directory to ensure we deliver a clean file that matches the requested format/quality.
|
|
61
|
+
val qualityInt = (options.quality * 100).toInt().coerceIn(0, 100)
|
|
62
|
+
|
|
63
|
+
val format = options.format
|
|
64
|
+
val compressFormat = if (format == "png") Bitmap.CompressFormat.PNG else Bitmap.CompressFormat.JPEG
|
|
65
|
+
|
|
66
|
+
val newFile = File(context.cacheDir, "scan_${UUID.randomUUID()}.${if (format == "png") "png" else "jpg"}")
|
|
67
|
+
val fos = FileOutputStream(newFile)
|
|
68
|
+
bitmap.compress(compressFormat, qualityInt, fos)
|
|
69
|
+
fos.close()
|
|
70
|
+
|
|
71
|
+
val newUri = Uri.fromFile(newFile).toString()
|
|
72
|
+
result.putString("uri", newUri)
|
|
73
|
+
|
|
74
|
+
// 3. Base64 Generation
|
|
75
|
+
if (options.includeBase64) {
|
|
76
|
+
val byteArrayOutputStream = ByteArrayOutputStream()
|
|
77
|
+
bitmap.compress(compressFormat, qualityInt, byteArrayOutputStream)
|
|
78
|
+
val byteArray = byteArrayOutputStream.toByteArray()
|
|
79
|
+
val base64String = Base64.encodeToString(byteArray, Base64.NO_WRAP)
|
|
80
|
+
result.putString("base64", base64String)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// 4. Optical Character Recognition (OCR)
|
|
84
|
+
if (options.includeText) {
|
|
85
|
+
Logger.log("Running OCR on processed image (Version ${options.textVersion})")
|
|
86
|
+
val ocrResult = TextRecognizer.processImage(context, Uri.fromFile(newFile), options.textVersion)
|
|
87
|
+
result.putString("text", ocrResult.getString("text"))
|
|
88
|
+
if (ocrResult.hasKey("blocks")) {
|
|
89
|
+
result.putArray("blocks", ocrResult.getArray("blocks"))
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
} catch (e: Exception) {
|
|
94
|
+
Logger.error("Failed to process image: $uriStr", e)
|
|
95
|
+
// Swallow specific image error to allow other images in a batch to proceed.
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// 5. Metadata
|
|
99
|
+
val metadataMap = Arguments.createMap()
|
|
100
|
+
metadataMap.putString("platform", "android")
|
|
101
|
+
metadataMap.putInt("textVersion", options.textVersion)
|
|
102
|
+
metadataMap.putString("filter", options.filter)
|
|
103
|
+
metadataMap.putString("ocrEngine", if (options.includeText) "MLKit" else "none")
|
|
104
|
+
result.putMap("metadata", metadataMap)
|
|
105
|
+
|
|
106
|
+
return@withContext result
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Loads a Bitmap from a URI, handling various schemes and correcting orientation.
|
|
111
|
+
* Supports: content://, file://, data:image/base64
|
|
112
|
+
*/
|
|
113
|
+
private fun loadBitmap(uri: Uri): Bitmap? {
|
|
114
|
+
try {
|
|
115
|
+
// Handle data URI (Base64)
|
|
116
|
+
if (uri.scheme == "data") {
|
|
117
|
+
val base64Data = uri.toString().substringAfter(",")
|
|
118
|
+
val decodedBytes = Base64.decode(base64Data, Base64.DEFAULT)
|
|
119
|
+
return BitmapFactory.decodeByteArray(decodedBytes, 0, decodedBytes.size)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Handle Content/File URIs
|
|
123
|
+
var inputStream: InputStream? = context.contentResolver.openInputStream(uri)
|
|
124
|
+
val originalBitmap = BitmapFactory.decodeStream(inputStream)
|
|
125
|
+
inputStream?.close()
|
|
126
|
+
|
|
127
|
+
if (originalBitmap == null) {
|
|
128
|
+
Logger.warn("Failed to decode bitmap from $uri")
|
|
129
|
+
return null
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Read EXIF to determine orientation
|
|
133
|
+
// This is critical for images from Gallery or Camera which might use orientation tags
|
|
134
|
+
inputStream = context.contentResolver.openInputStream(uri)
|
|
135
|
+
if (inputStream == null) return originalBitmap
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
val exif = ExifInterface(inputStream)
|
|
139
|
+
val orientation = exif.getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL)
|
|
140
|
+
inputStream.close()
|
|
141
|
+
return rotateBitmap(originalBitmap, orientation)
|
|
142
|
+
} catch (e: Exception) {
|
|
143
|
+
// If EXIF fails (e.g. some streams don't support it), fail safe and return original
|
|
144
|
+
Logger.warn("Failed to read EXIF for rotation: ${e.message}")
|
|
145
|
+
return originalBitmap
|
|
146
|
+
}
|
|
147
|
+
} catch (e: Exception) {
|
|
148
|
+
Logger.error("Error loading bitmap", e)
|
|
149
|
+
return null
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Rotates a bitmap based on EXIF orientation.
|
|
155
|
+
*/
|
|
156
|
+
private fun rotateBitmap(bitmap: Bitmap, orientation: Int): Bitmap {
|
|
157
|
+
val matrix = Matrix()
|
|
158
|
+
when (orientation) {
|
|
159
|
+
ExifInterface.ORIENTATION_ROTATE_90 -> matrix.postRotate(90f)
|
|
160
|
+
ExifInterface.ORIENTATION_ROTATE_180 -> matrix.postRotate(180f)
|
|
161
|
+
ExifInterface.ORIENTATION_ROTATE_270 -> matrix.postRotate(270f)
|
|
162
|
+
else -> return bitmap
|
|
163
|
+
}
|
|
164
|
+
return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Applies color or convolution filters to the bitmap.
|
|
169
|
+
* Supports: "grayscale", "monochrome" (high contrast), "denoise", "sharpen", "ocrOptimized".
|
|
170
|
+
*/
|
|
171
|
+
private fun applyFilter(src: Bitmap, filterType: String?): Bitmap {
|
|
172
|
+
return when (filterType) {
|
|
173
|
+
"grayscale" -> applyColorMatrixFilter(src, createGrayscaleMatrix())
|
|
174
|
+
"monochrome" -> applyColorMatrixFilter(src, createMonochromeMatrix())
|
|
175
|
+
"denoise" -> applyConvolutionFilter(src, DENOISE_KERNEL)
|
|
176
|
+
"sharpen" -> applyConvolutionFilter(src, SHARPEN_KERNEL)
|
|
177
|
+
"ocrOptimized" -> applyOcrOptimizedPipeline(src)
|
|
178
|
+
else -> src
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Full OCR optimization pipeline: denoise → sharpen → monochrome.
|
|
184
|
+
*
|
|
185
|
+
* This is the recommended filter for maximum text recognition accuracy:
|
|
186
|
+
* 1. **Denoise**: Removes noise that would otherwise be amplified by sharpening.
|
|
187
|
+
* 2. **Sharpen**: Enhances edge clarity for better character recognition.
|
|
188
|
+
* 3. **Monochrome**: High-contrast B&W improves OCR engine performance.
|
|
189
|
+
*
|
|
190
|
+
* @param src The source bitmap to process.
|
|
191
|
+
* @return The fully processed bitmap.
|
|
192
|
+
*/
|
|
193
|
+
private fun applyOcrOptimizedPipeline(src: Bitmap): Bitmap {
|
|
194
|
+
var result = src
|
|
195
|
+
// Step 1: Denoise
|
|
196
|
+
result = applyConvolutionFilter(result, DENOISE_KERNEL)
|
|
197
|
+
// Step 2: Sharpen
|
|
198
|
+
result = applyConvolutionFilter(result, SHARPEN_KERNEL)
|
|
199
|
+
// Step 3: Monochrome (high contrast B&W)
|
|
200
|
+
result = applyColorMatrixFilter(result, createMonochromeMatrix())
|
|
201
|
+
return result
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Applies a ColorMatrix filter to the bitmap.
|
|
206
|
+
* Used for color transformations like grayscale and monochrome.
|
|
207
|
+
*
|
|
208
|
+
* @param src The source bitmap to filter.
|
|
209
|
+
* @param matrix The ColorMatrix defining the transformation.
|
|
210
|
+
* @return A new bitmap with the filter applied.
|
|
211
|
+
*/
|
|
212
|
+
private fun applyColorMatrixFilter(src: Bitmap, matrix: ColorMatrix): Bitmap {
|
|
213
|
+
val dest = Bitmap.createBitmap(src.width, src.height, Bitmap.Config.ARGB_8888)
|
|
214
|
+
val canvas = Canvas(dest)
|
|
215
|
+
val paint = Paint()
|
|
216
|
+
paint.colorFilter = ColorMatrixColorFilter(matrix)
|
|
217
|
+
canvas.drawBitmap(src, 0f, 0f, paint)
|
|
218
|
+
return dest
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Creates a grayscale ColorMatrix.
|
|
223
|
+
* Desaturates the image while preserving luminance values.
|
|
224
|
+
*
|
|
225
|
+
* @return A ColorMatrix configured for grayscale conversion.
|
|
226
|
+
*/
|
|
227
|
+
private fun createGrayscaleMatrix(): ColorMatrix {
|
|
228
|
+
val matrix = ColorMatrix()
|
|
229
|
+
matrix.setSaturation(0f)
|
|
230
|
+
return matrix
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Creates a high-contrast monochrome ColorMatrix.
|
|
235
|
+
* Combines grayscale desaturation with contrast enhancement.
|
|
236
|
+
* Produces a dramatic black & white effect ideal for document scanning.
|
|
237
|
+
*
|
|
238
|
+
* @return A ColorMatrix configured for monochrome conversion with contrast boost.
|
|
239
|
+
* @note Uses scale=1.3 for contrast enhancement, optimized for text readability.
|
|
240
|
+
*/
|
|
241
|
+
private fun createMonochromeMatrix(): ColorMatrix {
|
|
242
|
+
val matrix = ColorMatrix()
|
|
243
|
+
matrix.setSaturation(0f)
|
|
244
|
+
val contrast = ColorMatrix()
|
|
245
|
+
val scale = 1.3f
|
|
246
|
+
val translate = (-.5f * scale + .5f) * 255f
|
|
247
|
+
contrast.set(floatArrayOf(
|
|
248
|
+
scale, 0f, 0f, 0f, translate,
|
|
249
|
+
0f, scale, 0f, 0f, translate,
|
|
250
|
+
0f, 0f, scale, 0f, translate,
|
|
251
|
+
0f, 0f, 0f, 1f, 0f
|
|
252
|
+
))
|
|
253
|
+
matrix.postConcat(contrast)
|
|
254
|
+
return matrix
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Applies a 3x3 convolution kernel to the bitmap.
|
|
259
|
+
* Used for spatial filtering effects like blur (denoise) and sharpen.
|
|
260
|
+
*
|
|
261
|
+
* @param src The source bitmap to filter.
|
|
262
|
+
* @param kernel A 9-element float array representing the 3x3 convolution kernel.
|
|
263
|
+
* @return A new bitmap with the convolution applied.
|
|
264
|
+
* @note Edge pixels are not processed to avoid bounds checking complexity.
|
|
265
|
+
*/
|
|
266
|
+
private fun applyConvolutionFilter(src: Bitmap, kernel: FloatArray): Bitmap {
|
|
267
|
+
val width = src.width
|
|
268
|
+
val height = src.height
|
|
269
|
+
val pixels = IntArray(width * height)
|
|
270
|
+
val result = IntArray(width * height)
|
|
271
|
+
src.getPixels(pixels, 0, width, 0, 0, width, height)
|
|
272
|
+
|
|
273
|
+
// 3x3 kernel offsets: top-left to bottom-right
|
|
274
|
+
val offsets = arrayOf(
|
|
275
|
+
intArrayOf(-1, -1), intArrayOf(0, -1), intArrayOf(1, -1),
|
|
276
|
+
intArrayOf(-1, 0), intArrayOf(0, 0), intArrayOf(1, 0),
|
|
277
|
+
intArrayOf(-1, 1), intArrayOf(0, 1), intArrayOf(1, 1)
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
for (y in 1 until height - 1) {
|
|
281
|
+
for (x in 1 until width - 1) {
|
|
282
|
+
var r = 0f; var g = 0f; var b = 0f
|
|
283
|
+
for (k in 0 until 9) {
|
|
284
|
+
val px = pixels[(y + offsets[k][1]) * width + (x + offsets[k][0])]
|
|
285
|
+
r += ((px shr 16) and 0xFF) * kernel[k]
|
|
286
|
+
g += ((px shr 8) and 0xFF) * kernel[k]
|
|
287
|
+
b += (px and 0xFF) * kernel[k]
|
|
288
|
+
}
|
|
289
|
+
val a = (pixels[y * width + x] shr 24) and 0xFF
|
|
290
|
+
result[y * width + x] = (a shl 24) or
|
|
291
|
+
(r.toInt().coerceIn(0, 255) shl 16) or
|
|
292
|
+
(g.toInt().coerceIn(0, 255) shl 8) or
|
|
293
|
+
b.toInt().coerceIn(0, 255)
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
val dest = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888)
|
|
298
|
+
dest.setPixels(result, 0, width, 0, 0, width, height)
|
|
299
|
+
return dest
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
companion object {
|
|
303
|
+
/**
|
|
304
|
+
* Box blur kernel (3x3) for noise reduction.
|
|
305
|
+
* Averages each pixel with its 8 neighbors, smoothing out noise.
|
|
306
|
+
* Useful for improving OCR accuracy on noisy photographs.
|
|
307
|
+
*/
|
|
308
|
+
private val DENOISE_KERNEL = floatArrayOf(
|
|
309
|
+
1/9f, 1/9f, 1/9f,
|
|
310
|
+
1/9f, 1/9f, 1/9f,
|
|
311
|
+
1/9f, 1/9f, 1/9f
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Sharpen kernel (3x3) for edge enhancement.
|
|
316
|
+
* Emphasizes differences between adjacent pixels.
|
|
317
|
+
* Improves OCR accuracy on blurry or soft-focused text.
|
|
318
|
+
*/
|
|
319
|
+
private val SHARPEN_KERNEL = floatArrayOf(
|
|
320
|
+
0f, -1f, 0f,
|
|
321
|
+
-1f, 5f, -1f,
|
|
322
|
+
0f, -1f, 0f
|
|
323
|
+
)
|
|
324
|
+
}
|
|
325
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
import android.util.Log
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Centralized logging utility for the Document Scanner.
|
|
7
|
+
* Uses Android's native Log system with a consistent tag and visual indicators.
|
|
8
|
+
*/
|
|
9
|
+
object Logger {
|
|
10
|
+
private const val TAG = "DocumentScanner"
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Logs an informational message.
|
|
14
|
+
*/
|
|
15
|
+
fun log(message: String) {
|
|
16
|
+
Log.d(TAG, "ℹ️ $message")
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Logs a warning message.
|
|
21
|
+
*/
|
|
22
|
+
fun warn(message: String) {
|
|
23
|
+
Log.w(TAG, "⚠️ $message")
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Logs an error message with an optional exception.
|
|
28
|
+
*/
|
|
29
|
+
fun error(message: String, throwable: Throwable? = null) {
|
|
30
|
+
if (throwable != null) {
|
|
31
|
+
Log.e(TAG, "❌ $message", throwable)
|
|
32
|
+
} else {
|
|
33
|
+
Log.e(TAG, "❌ $message")
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
package com.documentscanner
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration constants for the OCR engine.
|
|
5
|
+
* Tuning these values affects how the TextRecognizer reconstructs layout from raw text blocks.
|
|
6
|
+
*/
|
|
7
|
+
object OCRConfiguration {
|
|
8
|
+
|
|
9
|
+
// --- V2 Clustering Heuristics ---
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Height Compatibility Threshold.
|
|
13
|
+
* Blocks must have similar heights to be clustered.
|
|
14
|
+
* Formula: minH / maxH >= threshold
|
|
15
|
+
*/
|
|
16
|
+
const val HEIGHT_COMPATIBILITY_THRESHOLD: Double = 0.40
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Overlap Ratio Threshold.
|
|
20
|
+
* Vertical intersection divided by min height.
|
|
21
|
+
*/
|
|
22
|
+
const val OVERLAP_RATIO_THRESHOLD: Double = 0.50
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Centerline Distance Factor.
|
|
26
|
+
* Max allowed vertical distance between centers as a factor of typical line height.
|
|
27
|
+
*/
|
|
28
|
+
const val CENTERLINE_DISTANCE_FACTOR: Double = 0.70
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Adaptive Cluster Growth Limits.
|
|
32
|
+
* Prevents lines from becoming too tall after merging blocks.
|
|
33
|
+
*/
|
|
34
|
+
const val STACKED_GROWTH_LIMIT: Double = 1.2
|
|
35
|
+
const val SKEWED_GROWTH_LIMIT: Double = 2.0
|
|
36
|
+
|
|
37
|
+
// --- Spacing & Reconstruction ---
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Adaptive Spacing Factor.
|
|
41
|
+
* Gap width relative to median height to trigger extra spaces.
|
|
42
|
+
*/
|
|
43
|
+
const val ADAPTIVE_SPACING_FACTOR: Double = 1.0
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Space Width Factor.
|
|
47
|
+
* Determines the "width" of a single space character relative to median height.
|
|
48
|
+
*/
|
|
49
|
+
const val SPACE_WIDTH_FACTOR: Double = 0.3
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Maximum Spaces Cap.
|
|
53
|
+
* Limits the number of consecutive spaces inserted.
|
|
54
|
+
*/
|
|
55
|
+
const val MAX_SPACES: Int = 10
|
|
56
|
+
}
|