@hoshomoh/react-native-document-scanner 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/DocumentScanner.podspec +22 -0
  2. package/LICENSE +20 -0
  3. package/README.md +384 -0
  4. package/android/build.gradle +72 -0
  5. package/android/gradle.properties +17 -0
  6. package/android/local.properties +8 -0
  7. package/android/src/main/AndroidManifest.xml +8 -0
  8. package/android/src/main/java/com/documentscanner/DocumentScannerModule.kt +217 -0
  9. package/android/src/main/java/com/documentscanner/DocumentScannerPackage.kt +39 -0
  10. package/android/src/main/java/com/documentscanner/ImageProcessor.kt +325 -0
  11. package/android/src/main/java/com/documentscanner/Logger.kt +36 -0
  12. package/android/src/main/java/com/documentscanner/OCRConfiguration.kt +56 -0
  13. package/android/src/main/java/com/documentscanner/Options.kt +109 -0
  14. package/android/src/main/java/com/documentscanner/ScannerError.kt +18 -0
  15. package/android/src/main/java/com/documentscanner/TextRecognizer.kt +56 -0
  16. package/android/src/main/java/com/documentscanner/TextRecognizerV1.kt +68 -0
  17. package/android/src/main/java/com/documentscanner/TextRecognizerV2.kt +244 -0
  18. package/ios/DocumentScanner.h +5 -0
  19. package/ios/DocumentScanner.mm +113 -0
  20. package/ios/DocumentScannerManager.swift +148 -0
  21. package/ios/Errors.swift +33 -0
  22. package/ios/ImageProcessor.swift +78 -0
  23. package/ios/ImageUtil.swift +279 -0
  24. package/ios/Logger.swift +43 -0
  25. package/ios/OCRConfiguration.swift +60 -0
  26. package/ios/Options.swift +109 -0
  27. package/ios/ResponseUtil.swift +25 -0
  28. package/ios/ScanModels.swift +84 -0
  29. package/ios/TextRecognizer.swift +134 -0
  30. package/ios/TextRecognizerV1.swift +56 -0
  31. package/ios/TextRecognizerV2.swift +169 -0
  32. package/lib/module/NativeDocumentScanner.js +51 -0
  33. package/lib/module/NativeDocumentScanner.js.map +1 -0
  34. package/lib/module/index.js +40 -0
  35. package/lib/module/index.js.map +1 -0
  36. package/lib/module/package.json +1 -0
  37. package/lib/module/textReconstructor.js +147 -0
  38. package/lib/module/textReconstructor.js.map +1 -0
  39. package/lib/typescript/package.json +1 -0
  40. package/lib/typescript/src/NativeDocumentScanner.d.ts +191 -0
  41. package/lib/typescript/src/NativeDocumentScanner.d.ts.map +1 -0
  42. package/lib/typescript/src/index.d.ts +34 -0
  43. package/lib/typescript/src/index.d.ts.map +1 -0
  44. package/lib/typescript/src/textReconstructor.d.ts +60 -0
  45. package/lib/typescript/src/textReconstructor.d.ts.map +1 -0
  46. package/package.json +137 -0
  47. package/src/NativeDocumentScanner.ts +205 -0
  48. package/src/index.ts +61 -0
  49. package/src/textReconstructor.ts +212 -0
@@ -0,0 +1,217 @@
1
+ package com.documentscanner
2
+
3
+ import android.app.Activity
4
+ import android.content.Intent
5
+ import com.facebook.react.bridge.*
6
+ import com.google.mlkit.vision.documentscanner.GmsDocumentScannerOptions
7
+ import com.google.mlkit.vision.documentscanner.GmsDocumentScanning
8
+ import com.google.mlkit.vision.documentscanner.GmsDocumentScanningResult
9
+ import kotlinx.coroutines.CoroutineScope
10
+ import kotlinx.coroutines.Dispatchers
11
+ import kotlinx.coroutines.launch
12
+ import kotlinx.coroutines.tasks.await
13
+
14
+ /**
15
+ * Native module for Document Scanner.
16
+ * Handles the communication between React Native and the Android ML Kit Document Scanner.
17
+ * Implements ActivityEventListener to handle the result of the native scanner activity.
18
+ */
19
+ class DocumentScannerModule(reactContext: ReactApplicationContext) :
20
+ NativeDocumentScannerSpec(reactContext), ActivityEventListener {
21
+
22
+ private var scanPromise: Promise? = null
23
+ private var currentScanOptions: ScanOptions? = null
24
+ private val scope = CoroutineScope(Dispatchers.Main)
25
+ private val imageProcessor = ImageProcessor(reactContext)
26
+
27
+ init {
28
+ reactContext.addActivityEventListener(this)
29
+ }
30
+
31
+ override fun getName(): String {
32
+ return NAME
33
+ }
34
+
35
+ /**
36
+ * Launches the ML Kit Document Scanner activity.
37
+ *
38
+ * @param options Dictionary containing scan configuration (maxPageCount, quality, format, etc.)
39
+ * @param promise React Native promise to resolve with results or reject with error
40
+ */
41
+ override fun scanDocuments(options: ReadableMap?, promise: Promise) {
42
+ val activity: Activity? = reactApplicationContext.currentActivity
43
+ if (activity == null) {
44
+ val error = ScannerError.OperationFailed("Activity doesn't exist")
45
+ promise.reject(error.code, error.message)
46
+ return
47
+ }
48
+
49
+ // Prevent concurrent scans to ensure state integrity
50
+ if (scanPromise != null) {
51
+ val error = ScannerError.OperationFailed("A scan is already in progress")
52
+ promise?.reject(error.code, error.message)
53
+ return
54
+ }
55
+
56
+ scanPromise = promise
57
+
58
+ try {
59
+ // Parse options using strongly-typed helper
60
+ val scanOptions = ScanOptions.from(options)
61
+ currentScanOptions = scanOptions
62
+ Logger.log("Starting scanDocuments with options: MaxPages=${scanOptions.maxPageCount}, Quality=${scanOptions.quality}")
63
+
64
+ // Configure GmsDocumentScannerOptions
65
+ // We always request JPEG + PDF to provide maximum flexibility in post-processing,
66
+ // though we currently prioritize JPEG for React Native consumption.
67
+ val scannerOptionsBuilder = GmsDocumentScannerOptions.Builder()
68
+ .setScannerMode(GmsDocumentScannerOptions.SCANNER_MODE_FULL)
69
+ .setResultFormats(GmsDocumentScannerOptions.RESULT_FORMAT_JPEG)
70
+ .setGalleryImportAllowed(true)
71
+
72
+ if (scanOptions.maxPageCount > 0) {
73
+ scannerOptionsBuilder.setPageLimit(scanOptions.maxPageCount)
74
+ }
75
+
76
+ // Initialize the scanner client
77
+ val scanner = GmsDocumentScanning.getClient(scannerOptionsBuilder.build())
78
+
79
+ // Launch the scanner intent
80
+ scanner.getStartScanIntent(activity)
81
+ .addOnSuccessListener(activity) { intentSender ->
82
+ try {
83
+ activity.startIntentSenderForResult(intentSender, START_SCAN_REQUEST_CODE, null, 0, 0, 0)
84
+ } catch (e: Exception) {
85
+ val error = ScannerError.OperationFailed("Failed to start scanner activity: ${e.message}")
86
+ Logger.error(error.message, e)
87
+ scanPromise?.reject(error.code, error.message)
88
+ scanPromise = null
89
+ }
90
+ }
91
+ .addOnFailureListener { e ->
92
+ val error = ScannerError.OperationFailed("Failed to launch scanner client: ${e.message}")
93
+ Logger.error(error.message, e)
94
+ scanPromise?.reject(error.code, error.message)
95
+ scanPromise = null
96
+ }
97
+
98
+ } catch (e: Exception) {
99
+ // Catch-all for configuration or unexpected runtime errors during setup
100
+ val error = ScannerError.ConfigurationError(e.message ?: "Unknown error")
101
+ Logger.error(error.message, e)
102
+ scanPromise?.reject(error.code, error.message)
103
+ scanPromise = null
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Processes existing images (e.g., from Gallery) with the same pipeline as scanned documents.
109
+ * Applying filters, resizing, compression, and OCR.
110
+ *
111
+ * @param options Dictionary containing input images and processing configuration.
112
+ * @param promise React Native promise to resolve with results.
113
+ */
114
+ override fun processDocuments(options: ReadableMap?, promise: Promise) {
115
+ val processOptions = ProcessOptions.from(options)
116
+
117
+ // Validate inputs immediately
118
+ if (processOptions.images.isEmpty()) {
119
+ val error = ScannerError.ConfigurationError("No images provided to process")
120
+ promise?.reject(error.code, error.message)
121
+ return
122
+ }
123
+
124
+ Logger.log("Starting processDocuments with ${processOptions.images.size} images")
125
+
126
+ // Launch coroutine for background processing to avoid blocking the UI thread
127
+ scope.launch {
128
+ try {
129
+ val resultsArray = Arguments.createArray()
130
+ // Sequentially process each image
131
+ for (uri in processOptions.images) {
132
+ val result = imageProcessor.process(uri, processOptions)
133
+ resultsArray.pushMap(result)
134
+ }
135
+ promise?.resolve(resultsArray)
136
+ } catch (e: Exception) {
137
+ val error = ScannerError.OperationFailed(e.message ?: "Unknown processing error")
138
+ Logger.error("Error in processDocuments", e)
139
+ promise?.reject(error.code, error.message)
140
+ }
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Handles the result from the native scanner activity.
146
+ */
147
+ override fun onActivityResult(activity: Activity, requestCode: Int, resultCode: Int, data: Intent?) {
148
+ if (requestCode == START_SCAN_REQUEST_CODE) {
149
+ if (scanPromise == null) {
150
+ Logger.warn("Received onActivityResult but scanPromise is null. Ignoring.")
151
+ return
152
+ }
153
+
154
+ if (resultCode == Activity.RESULT_OK && data != null) {
155
+ val result = GmsDocumentScanningResult.fromActivityResultIntent(data)
156
+ if (result != null) {
157
+ val pages = result.pages
158
+ Logger.log("Scan successful. Received ${pages?.size ?: 0} pages. Starting processing...")
159
+
160
+ // Offload post-processing (copy to cache, OCR, etc.) to background thread
161
+ scope.launch {
162
+ try {
163
+ val resultsArray = Arguments.createArray()
164
+ // Fallback to default options if somehow missing (should not happen in normal flow)
165
+ val options = currentScanOptions ?: ScanOptions.from(null)
166
+
167
+ if (pages != null) {
168
+ for (page in pages) {
169
+ val uri = page.imageUri.toString()
170
+ // Pass through the shared ImageProcessor pipeline
171
+ val processedResult = imageProcessor.process(uri, options)
172
+ resultsArray.pushMap(processedResult)
173
+ }
174
+ }
175
+ scanPromise?.resolve(resultsArray)
176
+ } catch (e: Exception) {
177
+ val error = ScannerError.OperationFailed("Post-processing failed: ${e.message}")
178
+ Logger.error(error.message, e)
179
+ scanPromise?.reject(error.code, error.message)
180
+ } finally {
181
+ // Cleanup state
182
+ scanPromise = null
183
+ currentScanOptions = null
184
+ }
185
+ }
186
+ } else {
187
+ val error = ScannerError.OperationFailed("Scanning result object was null")
188
+ Logger.error(error.message)
189
+ scanPromise?.reject(error.code, error.message)
190
+ scanPromise = null
191
+ currentScanOptions = null
192
+ }
193
+ } else if (resultCode == Activity.RESULT_CANCELED) {
194
+ val error = ScannerError.Canceled()
195
+ Logger.log("User canceled the scanner UI")
196
+ scanPromise?.reject(error.code, error.message)
197
+ scanPromise = null
198
+ currentScanOptions = null
199
+ } else {
200
+ val error = ScannerError.OperationFailed("Unknown Activity result code: $resultCode")
201
+ Logger.warn(error.message)
202
+ scanPromise?.reject(error.code, error.message)
203
+ scanPromise = null
204
+ currentScanOptions = null
205
+ }
206
+ }
207
+ }
208
+
209
+ override fun onNewIntent(intent: Intent) {
210
+ // No-op: We only care about onActivityResult
211
+ }
212
+
213
+ companion object {
214
+ const val NAME = "DocumentScanner"
215
+ const val START_SCAN_REQUEST_CODE = 1452
216
+ }
217
+ }
@@ -0,0 +1,39 @@
1
+ package com.documentscanner
2
+
3
+ import com.facebook.react.TurboReactPackage
4
+ import com.facebook.react.bridge.NativeModule
5
+ import com.facebook.react.bridge.ReactApplicationContext
6
+ import com.facebook.react.module.model.ReactModuleInfo
7
+ import com.facebook.react.module.model.ReactModuleInfoProvider
8
+ import java.util.HashMap
9
+
10
+ /**
11
+ * React Native package for the Document Scanner.
12
+ * Registers the native module for both Legacy and New Architecture (TurboModules).
13
+ */
14
+ class DocumentScannerPackage : TurboReactPackage() {
15
+ override fun getModule(name: String, reactContext: ReactApplicationContext): NativeModule? {
16
+ return if (name == DocumentScannerModule.NAME) {
17
+ DocumentScannerModule(reactContext)
18
+ } else {
19
+ null
20
+ }
21
+ }
22
+
23
+ override fun getReactModuleInfoProvider(): ReactModuleInfoProvider {
24
+ return ReactModuleInfoProvider {
25
+ val moduleInfos: MutableMap<String, ReactModuleInfo> = HashMap()
26
+ val isTurboModule: Boolean = true
27
+ moduleInfos[DocumentScannerModule.NAME] = ReactModuleInfo(
28
+ DocumentScannerModule.NAME,
29
+ DocumentScannerModule.NAME,
30
+ false, // canOverrideExistingModule
31
+ false, // needsEagerInit
32
+ true, // hasConstants
33
+ false, // isCxxModule
34
+ isTurboModule
35
+ )
36
+ moduleInfos
37
+ }
38
+ }
39
+ }
@@ -0,0 +1,325 @@
1
+ package com.documentscanner
2
+
3
+ import android.content.Context
4
+ import android.graphics.Bitmap
5
+ import android.graphics.BitmapFactory
6
+ import android.graphics.Canvas
7
+ import android.graphics.ColorMatrix
8
+ import android.graphics.ColorMatrixColorFilter
9
+ import android.graphics.Matrix
10
+ import android.graphics.Paint
11
+ import android.net.Uri
12
+ import androidx.exifinterface.media.ExifInterface
13
+ import android.util.Base64
14
+ import com.facebook.react.bridge.Arguments
15
+ import com.facebook.react.bridge.ReadableMap
16
+ import com.facebook.react.bridge.WritableMap
17
+ import kotlinx.coroutines.Dispatchers
18
+ import kotlinx.coroutines.withContext
19
+ import java.io.ByteArrayOutputStream
20
+ import java.io.File
21
+ import java.io.FileOutputStream
22
+ import java.io.InputStream
23
+ import java.util.UUID
24
+
25
+ /**
26
+ * Utility class for image processing operations.
27
+ * Handles loading, rotating, filtering, and running OCR on images.
28
+ * All operations are executed on a background IO dispatcher.
29
+ */
30
+ class ImageProcessor(private val context: Context) {
31
+
32
+ /**
33
+ * Main processing pipeline.
34
+ * 1. Loads the bitmap (handling EXIF rotation).
35
+ * 2. Applies filters (Grayscale, Monochrome).
36
+ * 3. Saves processing result to a temp file (required for OCR/Base64 consistency).
37
+ * 4. Encodes to Base64 (optional).
38
+ * 5. Runs OCR (optional).
39
+ *
40
+ * @param uriStr The source URI of the image (file://, content://, data:...)
41
+ * @param options Processing options (quality, format, filters, etc.)
42
+ * @return WritableMap containing uri, base64, text, blocks, etc.
43
+ */
44
+ suspend fun process(uriStr: String, options: BaseOptions): WritableMap = withContext(Dispatchers.IO) {
45
+ val result = Arguments.createMap()
46
+ result.putString("uri", uriStr) // Default to original URI if something fails
47
+
48
+ try {
49
+ val uri = Uri.parse(uriStr)
50
+ var bitmap = loadBitmap(uri) ?: return@withContext result
51
+
52
+ // 1. Apply Filters
53
+ val filter = options.filter
54
+ if (filter != "color") {
55
+ Logger.log("Applying filter: $filter")
56
+ bitmap = applyFilter(bitmap, filter)
57
+ }
58
+
59
+ // 2. Save Processed Image
60
+ // We save to the cache directory to ensure we deliver a clean file that matches the requested format/quality.
61
+ val qualityInt = (options.quality * 100).toInt().coerceIn(0, 100)
62
+
63
+ val format = options.format
64
+ val compressFormat = if (format == "png") Bitmap.CompressFormat.PNG else Bitmap.CompressFormat.JPEG
65
+
66
+ val newFile = File(context.cacheDir, "scan_${UUID.randomUUID()}.${if (format == "png") "png" else "jpg"}")
67
+ val fos = FileOutputStream(newFile)
68
+ bitmap.compress(compressFormat, qualityInt, fos)
69
+ fos.close()
70
+
71
+ val newUri = Uri.fromFile(newFile).toString()
72
+ result.putString("uri", newUri)
73
+
74
+ // 3. Base64 Generation
75
+ if (options.includeBase64) {
76
+ val byteArrayOutputStream = ByteArrayOutputStream()
77
+ bitmap.compress(compressFormat, qualityInt, byteArrayOutputStream)
78
+ val byteArray = byteArrayOutputStream.toByteArray()
79
+ val base64String = Base64.encodeToString(byteArray, Base64.NO_WRAP)
80
+ result.putString("base64", base64String)
81
+ }
82
+
83
+ // 4. Optical Character Recognition (OCR)
84
+ if (options.includeText) {
85
+ Logger.log("Running OCR on processed image (Version ${options.textVersion})")
86
+ val ocrResult = TextRecognizer.processImage(context, Uri.fromFile(newFile), options.textVersion)
87
+ result.putString("text", ocrResult.getString("text"))
88
+ if (ocrResult.hasKey("blocks")) {
89
+ result.putArray("blocks", ocrResult.getArray("blocks"))
90
+ }
91
+ }
92
+
93
+ } catch (e: Exception) {
94
+ Logger.error("Failed to process image: $uriStr", e)
95
+ // Swallow specific image error to allow other images in a batch to proceed.
96
+ }
97
+
98
+ // 5. Metadata
99
+ val metadataMap = Arguments.createMap()
100
+ metadataMap.putString("platform", "android")
101
+ metadataMap.putInt("textVersion", options.textVersion)
102
+ metadataMap.putString("filter", options.filter)
103
+ metadataMap.putString("ocrEngine", if (options.includeText) "MLKit" else "none")
104
+ result.putMap("metadata", metadataMap)
105
+
106
+ return@withContext result
107
+ }
108
+
109
+ /**
110
+ * Loads a Bitmap from a URI, handling various schemes and correcting orientation.
111
+ * Supports: content://, file://, data:image/base64
112
+ */
113
+ private fun loadBitmap(uri: Uri): Bitmap? {
114
+ try {
115
+ // Handle data URI (Base64)
116
+ if (uri.scheme == "data") {
117
+ val base64Data = uri.toString().substringAfter(",")
118
+ val decodedBytes = Base64.decode(base64Data, Base64.DEFAULT)
119
+ return BitmapFactory.decodeByteArray(decodedBytes, 0, decodedBytes.size)
120
+ }
121
+
122
+ // Handle Content/File URIs
123
+ var inputStream: InputStream? = context.contentResolver.openInputStream(uri)
124
+ val originalBitmap = BitmapFactory.decodeStream(inputStream)
125
+ inputStream?.close()
126
+
127
+ if (originalBitmap == null) {
128
+ Logger.warn("Failed to decode bitmap from $uri")
129
+ return null
130
+ }
131
+
132
+ // Read EXIF to determine orientation
133
+ // This is critical for images from Gallery or Camera which might use orientation tags
134
+ inputStream = context.contentResolver.openInputStream(uri)
135
+ if (inputStream == null) return originalBitmap
136
+
137
+ try {
138
+ val exif = ExifInterface(inputStream)
139
+ val orientation = exif.getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL)
140
+ inputStream.close()
141
+ return rotateBitmap(originalBitmap, orientation)
142
+ } catch (e: Exception) {
143
+ // If EXIF fails (e.g. some streams don't support it), fail safe and return original
144
+ Logger.warn("Failed to read EXIF for rotation: ${e.message}")
145
+ return originalBitmap
146
+ }
147
+ } catch (e: Exception) {
148
+ Logger.error("Error loading bitmap", e)
149
+ return null
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Rotates a bitmap based on EXIF orientation.
155
+ */
156
+ private fun rotateBitmap(bitmap: Bitmap, orientation: Int): Bitmap {
157
+ val matrix = Matrix()
158
+ when (orientation) {
159
+ ExifInterface.ORIENTATION_ROTATE_90 -> matrix.postRotate(90f)
160
+ ExifInterface.ORIENTATION_ROTATE_180 -> matrix.postRotate(180f)
161
+ ExifInterface.ORIENTATION_ROTATE_270 -> matrix.postRotate(270f)
162
+ else -> return bitmap
163
+ }
164
+ return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
165
+ }
166
+
167
+ /**
168
+ * Applies color or convolution filters to the bitmap.
169
+ * Supports: "grayscale", "monochrome" (high contrast), "denoise", "sharpen", "ocrOptimized".
170
+ */
171
+ private fun applyFilter(src: Bitmap, filterType: String?): Bitmap {
172
+ return when (filterType) {
173
+ "grayscale" -> applyColorMatrixFilter(src, createGrayscaleMatrix())
174
+ "monochrome" -> applyColorMatrixFilter(src, createMonochromeMatrix())
175
+ "denoise" -> applyConvolutionFilter(src, DENOISE_KERNEL)
176
+ "sharpen" -> applyConvolutionFilter(src, SHARPEN_KERNEL)
177
+ "ocrOptimized" -> applyOcrOptimizedPipeline(src)
178
+ else -> src
179
+ }
180
+ }
181
+
182
+ /**
183
+ * Full OCR optimization pipeline: denoise → sharpen → monochrome.
184
+ *
185
+ * This is the recommended filter for maximum text recognition accuracy:
186
+ * 1. **Denoise**: Removes noise that would otherwise be amplified by sharpening.
187
+ * 2. **Sharpen**: Enhances edge clarity for better character recognition.
188
+ * 3. **Monochrome**: High-contrast B&W improves OCR engine performance.
189
+ *
190
+ * @param src The source bitmap to process.
191
+ * @return The fully processed bitmap.
192
+ */
193
+ private fun applyOcrOptimizedPipeline(src: Bitmap): Bitmap {
194
+ var result = src
195
+ // Step 1: Denoise
196
+ result = applyConvolutionFilter(result, DENOISE_KERNEL)
197
+ // Step 2: Sharpen
198
+ result = applyConvolutionFilter(result, SHARPEN_KERNEL)
199
+ // Step 3: Monochrome (high contrast B&W)
200
+ result = applyColorMatrixFilter(result, createMonochromeMatrix())
201
+ return result
202
+ }
203
+
204
+ /**
205
+ * Applies a ColorMatrix filter to the bitmap.
206
+ * Used for color transformations like grayscale and monochrome.
207
+ *
208
+ * @param src The source bitmap to filter.
209
+ * @param matrix The ColorMatrix defining the transformation.
210
+ * @return A new bitmap with the filter applied.
211
+ */
212
+ private fun applyColorMatrixFilter(src: Bitmap, matrix: ColorMatrix): Bitmap {
213
+ val dest = Bitmap.createBitmap(src.width, src.height, Bitmap.Config.ARGB_8888)
214
+ val canvas = Canvas(dest)
215
+ val paint = Paint()
216
+ paint.colorFilter = ColorMatrixColorFilter(matrix)
217
+ canvas.drawBitmap(src, 0f, 0f, paint)
218
+ return dest
219
+ }
220
+
221
+ /**
222
+ * Creates a grayscale ColorMatrix.
223
+ * Desaturates the image while preserving luminance values.
224
+ *
225
+ * @return A ColorMatrix configured for grayscale conversion.
226
+ */
227
+ private fun createGrayscaleMatrix(): ColorMatrix {
228
+ val matrix = ColorMatrix()
229
+ matrix.setSaturation(0f)
230
+ return matrix
231
+ }
232
+
233
+ /**
234
+ * Creates a high-contrast monochrome ColorMatrix.
235
+ * Combines grayscale desaturation with contrast enhancement.
236
+ * Produces a dramatic black & white effect ideal for document scanning.
237
+ *
238
+ * @return A ColorMatrix configured for monochrome conversion with contrast boost.
239
+ * @note Uses scale=1.3 for contrast enhancement, optimized for text readability.
240
+ */
241
+ private fun createMonochromeMatrix(): ColorMatrix {
242
+ val matrix = ColorMatrix()
243
+ matrix.setSaturation(0f)
244
+ val contrast = ColorMatrix()
245
+ val scale = 1.3f
246
+ val translate = (-.5f * scale + .5f) * 255f
247
+ contrast.set(floatArrayOf(
248
+ scale, 0f, 0f, 0f, translate,
249
+ 0f, scale, 0f, 0f, translate,
250
+ 0f, 0f, scale, 0f, translate,
251
+ 0f, 0f, 0f, 1f, 0f
252
+ ))
253
+ matrix.postConcat(contrast)
254
+ return matrix
255
+ }
256
+
257
+ /**
258
+ * Applies a 3x3 convolution kernel to the bitmap.
259
+ * Used for spatial filtering effects like blur (denoise) and sharpen.
260
+ *
261
+ * @param src The source bitmap to filter.
262
+ * @param kernel A 9-element float array representing the 3x3 convolution kernel.
263
+ * @return A new bitmap with the convolution applied.
264
+ * @note Edge pixels are not processed to avoid bounds checking complexity.
265
+ */
266
+ private fun applyConvolutionFilter(src: Bitmap, kernel: FloatArray): Bitmap {
267
+ val width = src.width
268
+ val height = src.height
269
+ val pixels = IntArray(width * height)
270
+ val result = IntArray(width * height)
271
+ src.getPixels(pixels, 0, width, 0, 0, width, height)
272
+
273
+ // 3x3 kernel offsets: top-left to bottom-right
274
+ val offsets = arrayOf(
275
+ intArrayOf(-1, -1), intArrayOf(0, -1), intArrayOf(1, -1),
276
+ intArrayOf(-1, 0), intArrayOf(0, 0), intArrayOf(1, 0),
277
+ intArrayOf(-1, 1), intArrayOf(0, 1), intArrayOf(1, 1)
278
+ )
279
+
280
+ for (y in 1 until height - 1) {
281
+ for (x in 1 until width - 1) {
282
+ var r = 0f; var g = 0f; var b = 0f
283
+ for (k in 0 until 9) {
284
+ val px = pixels[(y + offsets[k][1]) * width + (x + offsets[k][0])]
285
+ r += ((px shr 16) and 0xFF) * kernel[k]
286
+ g += ((px shr 8) and 0xFF) * kernel[k]
287
+ b += (px and 0xFF) * kernel[k]
288
+ }
289
+ val a = (pixels[y * width + x] shr 24) and 0xFF
290
+ result[y * width + x] = (a shl 24) or
291
+ (r.toInt().coerceIn(0, 255) shl 16) or
292
+ (g.toInt().coerceIn(0, 255) shl 8) or
293
+ b.toInt().coerceIn(0, 255)
294
+ }
295
+ }
296
+
297
+ val dest = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888)
298
+ dest.setPixels(result, 0, width, 0, 0, width, height)
299
+ return dest
300
+ }
301
+
302
+ companion object {
303
+ /**
304
+ * Box blur kernel (3x3) for noise reduction.
305
+ * Averages each pixel with its 8 neighbors, smoothing out noise.
306
+ * Useful for improving OCR accuracy on noisy photographs.
307
+ */
308
+ private val DENOISE_KERNEL = floatArrayOf(
309
+ 1/9f, 1/9f, 1/9f,
310
+ 1/9f, 1/9f, 1/9f,
311
+ 1/9f, 1/9f, 1/9f
312
+ )
313
+
314
+ /**
315
+ * Sharpen kernel (3x3) for edge enhancement.
316
+ * Emphasizes differences between adjacent pixels.
317
+ * Improves OCR accuracy on blurry or soft-focused text.
318
+ */
319
+ private val SHARPEN_KERNEL = floatArrayOf(
320
+ 0f, -1f, 0f,
321
+ -1f, 5f, -1f,
322
+ 0f, -1f, 0f
323
+ )
324
+ }
325
+ }
@@ -0,0 +1,36 @@
1
+ package com.documentscanner
2
+
3
+ import android.util.Log
4
+
5
+ /**
6
+ * Centralized logging utility for the Document Scanner.
7
+ * Uses Android's native Log system with a consistent tag and visual indicators.
8
+ */
9
+ object Logger {
10
+ private const val TAG = "DocumentScanner"
11
+
12
+ /**
13
+ * Logs an informational message.
14
+ */
15
+ fun log(message: String) {
16
+ Log.d(TAG, "ℹ️ $message")
17
+ }
18
+
19
+ /**
20
+ * Logs a warning message.
21
+ */
22
+ fun warn(message: String) {
23
+ Log.w(TAG, "⚠️ $message")
24
+ }
25
+
26
+ /**
27
+ * Logs an error message with an optional exception.
28
+ */
29
+ fun error(message: String, throwable: Throwable? = null) {
30
+ if (throwable != null) {
31
+ Log.e(TAG, "❌ $message", throwable)
32
+ } else {
33
+ Log.e(TAG, "❌ $message")
34
+ }
35
+ }
36
+ }
@@ -0,0 +1,56 @@
1
+ package com.documentscanner
2
+
3
+ /**
4
+ * Configuration constants for the OCR engine.
5
+ * Tuning these values affects how the TextRecognizer reconstructs layout from raw text blocks.
6
+ */
7
+ object OCRConfiguration {
8
+
9
+ // --- V2 Clustering Heuristics ---
10
+
11
+ /**
12
+ * Height Compatibility Threshold.
13
+ * Blocks must have similar heights to be clustered.
14
+ * Formula: minH / maxH >= threshold
15
+ */
16
+ const val HEIGHT_COMPATIBILITY_THRESHOLD: Double = 0.40
17
+
18
+ /**
19
+ * Overlap Ratio Threshold.
20
+ * Vertical intersection divided by min height.
21
+ */
22
+ const val OVERLAP_RATIO_THRESHOLD: Double = 0.50
23
+
24
+ /**
25
+ * Centerline Distance Factor.
26
+ * Max allowed vertical distance between centers as a factor of typical line height.
27
+ */
28
+ const val CENTERLINE_DISTANCE_FACTOR: Double = 0.70
29
+
30
+ /**
31
+ * Adaptive Cluster Growth Limits.
32
+ * Prevents lines from becoming too tall after merging blocks.
33
+ */
34
+ const val STACKED_GROWTH_LIMIT: Double = 1.2
35
+ const val SKEWED_GROWTH_LIMIT: Double = 2.0
36
+
37
+ // --- Spacing & Reconstruction ---
38
+
39
+ /**
40
+ * Adaptive Spacing Factor.
41
+ * Gap width relative to median height to trigger extra spaces.
42
+ */
43
+ const val ADAPTIVE_SPACING_FACTOR: Double = 1.0
44
+
45
+ /**
46
+ * Space Width Factor.
47
+ * Determines the "width" of a single space character relative to median height.
48
+ */
49
+ const val SPACE_WIDTH_FACTOR: Double = 0.3
50
+
51
+ /**
52
+ * Maximum Spaces Cap.
53
+ * Limits the number of consecutive spaces inserted.
54
+ */
55
+ const val MAX_SPACES: Int = 10
56
+ }