npm - react-native-rectangle-doc-scanner - Versions diffs - 3.239.0 → 3.241.0 - Mend

react-native-rectangle-doc-scanner 3.239.0 → 3.241.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/android/build.gradle +3 -0
package/android/src/main/kotlin/com/reactnativerectangledocscanner/CameraController.kt +109 -28
package/package.json +1 -1

package/android/build.gradle CHANGED Viewed

@@ -68,6 +68,9 @@ dependencies {
     // OpenCV for document detection
     implementation 'org.opencv:opencv:4.9.0'
+    // ML Kit object detection for live rectangle hints
+    implementation 'com.google.mlkit:object-detection:17.0.1'
     // Coroutines for async operations
     implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3'
     implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'

package/android/src/main/kotlin/com/reactnativerectangledocscanner/CameraController.kt CHANGED Viewed

@@ -7,6 +7,7 @@ import android.graphics.Bitmap
 import android.graphics.BitmapFactory
 import android.graphics.Matrix
 import android.graphics.SurfaceTexture
+import android.graphics.Rect
 import android.graphics.RectF
 import android.graphics.ImageFormat
 import android.hardware.camera2.CameraCaptureSession
@@ -23,6 +24,10 @@ import android.util.Size
 import android.view.Surface
 import android.view.TextureView
 import androidx.core.content.ContextCompat
+import com.google.mlkit.vision.common.InputImage
+import com.google.mlkit.vision.objects.ObjectDetection
+import com.google.mlkit.vision.objects.defaults.ObjectDetectorOptions
+import org.opencv.core.Point
 import java.io.File
 import java.io.FileOutputStream
 import java.util.concurrent.atomic.AtomicReference
@@ -58,6 +63,13 @@ class CameraController(
     private val pendingCapture = AtomicReference<PendingCapture?>()
     private val analysisInFlight = AtomicBoolean(false)
+    private val objectDetector = ObjectDetection.getClient(
+        ObjectDetectorOptions.Builder()
+            .setDetectorMode(ObjectDetectorOptions.STREAM_MODE)
+            .enableMultipleObjects()
+            .build()
+    )
+    private var lastRefineTimestamp = 0L
     var onFrameAnalyzed: ((Rectangle?, Int, Int) -> Unit)? = null
@@ -179,6 +191,7 @@ class CameraController(
     fun shutdown() {
         stopCamera()
+        objectDetector.close()
         cameraThread.quitSafely()
         analysisThread.quitSafely()
     }
@@ -352,21 +365,40 @@ class CameraController(
     }
     private fun analyzeImage(image: Image) {
-        try {
-            val nv21 = image.toNv21()
-            val rotationDegrees = computeRotationDegrees()
-            val rectangle = DocumentDetector.detectRectangleInYUV(nv21, image.width, image.height, rotationDegrees)
-            val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
-            val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
-            onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
+        val rotationDegrees = computeRotationDegrees()
+        val inputImage = try {
+            InputImage.fromMediaImage(image, rotationDegrees)
         } catch (e: Exception) {
-            Log.e(TAG, "[CAMERA2] Error analyzing frame", e)
-        } finally {
+            Log.e(TAG, "[CAMERA2] Failed to create InputImage", e)
             image.close()
             analysisInFlight.set(false)
+            return
         }
+        objectDetector.process(inputImage)
+            .addOnSuccessListener { objects ->
+                val best = objects.maxByOrNull { obj ->
+                    val box = obj.boundingBox
+                    box.width() * box.height()
+                }
+                val mlBox = best?.boundingBox
+                val rectangle = when {
+                    mlBox == null -> null
+                    shouldRefineWithOpenCv() -> refineWithOpenCv(image, rotationDegrees, mlBox) ?: boxToRectangle(mlBox)
+                    else -> boxToRectangle(mlBox)
+                }
+                val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
+                val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
+                onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
+            }
+            .addOnFailureListener { e ->
+                Log.e(TAG, "[CAMERA2] ML Kit detection failed", e)
+            }
+            .addOnCompleteListener {
+                image.close()
+                analysisInFlight.set(false)
+            }
     }
     private fun processCapture(image: Image, pending: PendingCapture) {
@@ -453,10 +485,7 @@ class CameraController(
         val matrix = Matrix()
         bufferRect.offset(centerX - bufferRect.centerX(), centerY - bufferRect.centerY())
-        matrix.setRectToRect(viewRect, bufferRect, Matrix.ScaleToFit.FILL)
-        val scale = max(viewWidth / bufferWidth, viewHeight / bufferHeight)
-        matrix.postScale(scale, scale, centerX, centerY)
+        matrix.setRectToRect(bufferRect, viewRect, Matrix.ScaleToFit.FILL)
         matrix.postRotate(rotation.toFloat(), centerX, centerY)
         previewView.setTransform(matrix)
     }
@@ -497,19 +526,72 @@ class CameraController(
         return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
     }
-    private fun Image.toNv21(): ByteArray {
-        val width = width
-        val height = height
+    private fun shouldRefineWithOpenCv(): Boolean {
+        val now = System.currentTimeMillis()
+        if (now - lastRefineTimestamp < 200) {
+            return false
+        }
+        lastRefineTimestamp = now
+        return true
+    }
+    private fun refineWithOpenCv(image: Image, rotationDegrees: Int, mlBox: Rect): Rectangle? {
+        return try {
+            val nv21 = imageToNv21(image)
+            val openCvRect = DocumentDetector.detectRectangleInYUV(nv21, image.width, image.height, rotationDegrees)
+            if (openCvRect == null) {
+                null
+            } else {
+                val openRectBounds = rectangleBounds(openCvRect)
+                if (computeIoU(openRectBounds, mlBox) >= 0.2f) openCvRect else null
+            }
+        } catch (e: Exception) {
+            Log.w(TAG, "[CAMERA2] OpenCV refine failed", e)
+            null
+        }
+    }
+    private fun boxToRectangle(box: Rect): Rectangle {
+        return Rectangle(
+            Point(box.left.toDouble(), box.top.toDouble()),
+            Point(box.right.toDouble(), box.top.toDouble()),
+            Point(box.left.toDouble(), box.bottom.toDouble()),
+            Point(box.right.toDouble(), box.bottom.toDouble())
+        )
+    }
+    private fun rectangleBounds(rectangle: Rectangle): Rect {
+        val left = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).minOrNull() ?: 0.0
+        val right = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).maxOrNull() ?: 0.0
+        val top = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).minOrNull() ?: 0.0
+        val bottom = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).maxOrNull() ?: 0.0
+        return Rect(left.toInt(), top.toInt(), right.toInt(), bottom.toInt())
+    }
+    private fun computeIoU(a: Rect, b: Rect): Float {
+        val left = max(a.left, b.left)
+        val top = max(a.top, b.top)
+        val right = minOf(a.right, b.right)
+        val bottom = minOf(a.bottom, b.bottom)
+        if (right <= left || bottom <= top) return 0f
+        val intersection = (right - left).toFloat() * (bottom - top).toFloat()
+        val union = (a.width() * a.height() + b.width() * b.height() - intersection).toFloat()
+        return if (union <= 0f) 0f else intersection / union
+    }
+    private fun imageToNv21(image: Image): ByteArray {
+        val width = image.width
+        val height = image.height
         val ySize = width * height
         val uvSize = width * height / 2
         val nv21 = ByteArray(ySize + uvSize)
-        val yBuffer = planes[0].buffer
-        val uBuffer = planes[1].buffer
-        val vBuffer = planes[2].buffer
+        val yBuffer = image.planes[0].buffer
+        val uBuffer = image.planes[1].buffer
+        val vBuffer = image.planes[2].buffer
-        val yRowStride = planes[0].rowStride
-        val yPixelStride = planes[0].pixelStride
+        val yRowStride = image.planes[0].rowStride
+        val yPixelStride = image.planes[0].pixelStride
         var outputOffset = 0
         for (row in 0 until height) {
             var inputOffset = row * yRowStride
@@ -519,10 +601,10 @@ class CameraController(
             }
         }
-        val uvRowStride = planes[1].rowStride
-        val uvPixelStride = planes[1].pixelStride
-        val vRowStride = planes[2].rowStride
-        val vPixelStride = planes[2].pixelStride
+        val uvRowStride = image.planes[1].rowStride
+        val uvPixelStride = image.planes[1].pixelStride
+        val vRowStride = image.planes[2].rowStride
+        val vPixelStride = image.planes[2].pixelStride
         val uvHeight = height / 2
         val uvWidth = width / 2
         for (row in 0 until uvHeight) {
@@ -538,7 +620,6 @@ class CameraController(
         return nv21
     }
     private fun hasCameraPermission(): Boolean {
         return ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "react-native-rectangle-doc-scanner",
-  "version": "3.239.0",
+  "version": "3.241.0",
   "description": "Native-backed document scanner for React Native with customizable overlays.",
   "license": "MIT",
   "main": "dist/index.js",