npm - react-native-rectangle-doc-scanner - Versions diffs - 3.240.0 → 3.242.0 - Mend

react-native-rectangle-doc-scanner 3.240.0 → 3.242.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/android/src/main/kotlin/com/reactnativerectangledocscanner/CameraController.kt CHANGED Viewed

@@ -7,6 +7,7 @@ import android.graphics.Bitmap
 import android.graphics.BitmapFactory
 import android.graphics.Matrix
 import android.graphics.SurfaceTexture
+import android.graphics.Rect
 import android.graphics.RectF
 import android.graphics.ImageFormat
 import android.hardware.camera2.CameraCaptureSession
@@ -68,6 +69,9 @@ class CameraController(
             .enableMultipleObjects()
             .build()
     )
+    private var lastRefineTimestamp = 0L
+    private var lastRectangle: Rectangle? = null
+    private var lastRectangleTimestamp = 0L
     var onFrameAnalyzed: ((Rectangle?, Int, Int) -> Unit)? = null
@@ -211,7 +215,7 @@ class CameraController(
             }
             val previewSizes = streamConfigMap.getOutputSizes(SurfaceTexture::class.java)
-            previewSize = chooseBestSize(previewSizes, viewAspect, null)
+            previewSize = chooseBestSize(previewSizes, viewAspect, null, preferClosestAspect = true)
             val analysisSizes = streamConfigMap.getOutputSizes(ImageFormat.YUV_420_888)
             analysisSize = chooseBestSize(analysisSizes, viewAspect, ANALYSIS_MAX_AREA)
@@ -379,18 +383,16 @@ class CameraController(
                     val box = obj.boundingBox
                     box.width() * box.height()
                 }
-                val rectangle = best?.boundingBox?.let { box ->
-                    Rectangle(
-                        Point(box.left.toDouble(), box.top.toDouble()),
-                        Point(box.right.toDouble(), box.top.toDouble()),
-                        Point(box.left.toDouble(), box.bottom.toDouble()),
-                        Point(box.right.toDouble(), box.bottom.toDouble())
-                    )
+                val mlBox = best?.boundingBox
+                val rectangle = when {
+                    mlBox == null -> null
+                    shouldRefineWithOpenCv() -> refineWithOpenCv(image, rotationDegrees, mlBox) ?: boxToRectangle(mlBox)
+                    else -> boxToRectangle(mlBox)
                 }
                 val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
                 val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
-                onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
+                onFrameAnalyzed?.invoke(smoothRectangle(rectangle), frameWidth, frameHeight)
             }
             .addOnFailureListener { e ->
                 Log.e(TAG, "[CAMERA2] ML Kit detection failed", e)
@@ -485,34 +487,43 @@ class CameraController(
         val matrix = Matrix()
         bufferRect.offset(centerX - bufferRect.centerX(), centerY - bufferRect.centerY())
-        matrix.setRectToRect(viewRect, bufferRect, Matrix.ScaleToFit.FILL)
-        val scale = max(viewWidth / bufferWidth, viewHeight / bufferHeight)
-        matrix.postScale(scale, scale, centerX, centerY)
+        matrix.setRectToRect(bufferRect, viewRect, Matrix.ScaleToFit.FILL)
         matrix.postRotate(rotation.toFloat(), centerX, centerY)
         previewView.setTransform(matrix)
     }
-    private fun chooseBestSize(sizes: Array<Size>?, targetAspect: Double, maxArea: Int?): Size? {
+    private fun chooseBestSize(
+        sizes: Array<Size>?,
+        targetAspect: Double,
+        maxArea: Int?,
+        preferClosestAspect: Boolean = false
+    ): Size? {
         if (sizes == null || sizes.isEmpty()) return null
         val sorted = sizes.sortedByDescending { it.width * it.height }
-        val matching = sorted.filter {
-            val aspect = it.width.toDouble() / it.height.toDouble()
-            abs(aspect - targetAspect) <= ANALYSIS_ASPECT_TOLERANCE && (maxArea == null || it.width * it.height <= maxArea)
-        }
-        if (matching.isNotEmpty()) {
-            return matching.first()
-        }
         val capped = if (maxArea != null) {
             sorted.filter { it.width * it.height <= maxArea }
         } else {
             sorted
         }
-        return capped.firstOrNull() ?: sorted.first()
+        if (capped.isEmpty()) {
+            return sorted.first()
+        }
+        if (preferClosestAspect) {
+            return capped.minWithOrNull(
+                compareBy<Size> { abs(it.width.toDouble() / it.height.toDouble() - targetAspect) }
+                    .thenByDescending { it.width * it.height }
+            )
+        }
+        val matching = capped.filter {
+            val aspect = it.width.toDouble() / it.height.toDouble()
+            abs(aspect - targetAspect) <= ANALYSIS_ASPECT_TOLERANCE
+        }
+        return matching.firstOrNull() ?: capped.first()
     }
     private fun rotateAndMirror(bitmap: Bitmap, rotationDegrees: Int, mirror: Boolean): Bitmap {
@@ -529,6 +540,151 @@ class CameraController(
         return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
     }
+    private fun shouldRefineWithOpenCv(): Boolean {
+        val now = System.currentTimeMillis()
+        if (now - lastRefineTimestamp < 150) {
+            return false
+        }
+        lastRefineTimestamp = now
+        return true
+    }
+    private fun refineWithOpenCv(image: Image, rotationDegrees: Int, mlBox: Rect): Rectangle? {
+        return try {
+            val nv21 = imageToNv21(image)
+            val uprightWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
+            val uprightHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
+            val expanded = expandRect(mlBox, uprightWidth, uprightHeight, 0.2f)
+            val openCvRect = DocumentDetector.detectRectangleInYUVWithRoi(
+                nv21,
+                image.width,
+                image.height,
+                rotationDegrees,
+                expanded
+            )
+            if (openCvRect == null) {
+                null
+            } else {
+                val openRectBounds = rectangleBounds(openCvRect)
+                if (computeIoU(openRectBounds, mlBox) >= 0.2f) openCvRect else null
+            }
+        } catch (e: Exception) {
+            Log.w(TAG, "[CAMERA2] OpenCV refine failed", e)
+            null
+        }
+    }
+    private fun boxToRectangle(box: Rect): Rectangle {
+        return Rectangle(
+            Point(box.left.toDouble(), box.top.toDouble()),
+            Point(box.right.toDouble(), box.top.toDouble()),
+            Point(box.left.toDouble(), box.bottom.toDouble()),
+            Point(box.right.toDouble(), box.bottom.toDouble())
+        )
+    }
+    private fun expandRect(box: Rect, maxWidth: Int, maxHeight: Int, ratio: Float): Rect {
+        val padX = (box.width() * ratio).toInt()
+        val padY = (box.height() * ratio).toInt()
+        val left = (box.left - padX).coerceAtLeast(0)
+        val top = (box.top - padY).coerceAtLeast(0)
+        val right = (box.right + padX).coerceAtMost(maxWidth)
+        val bottom = (box.bottom + padY).coerceAtMost(maxHeight)
+        return Rect(left, top, right, bottom)
+    }
+    private fun smoothRectangle(current: Rectangle?): Rectangle? {
+        val now = System.currentTimeMillis()
+        val last = lastRectangle
+        if (current == null) {
+            if (last != null && now - lastRectangleTimestamp < 250) {
+                return last
+            }
+            lastRectangle = null
+            return null
+        }
+        val smoothed = if (last != null && now - lastRectangleTimestamp < 500) {
+            val t = 0.35
+            Rectangle(
+                Point(lerp(last.topLeft.x, current.topLeft.x, t), lerp(last.topLeft.y, current.topLeft.y, t)),
+                Point(lerp(last.topRight.x, current.topRight.x, t), lerp(last.topRight.y, current.topRight.y, t)),
+                Point(lerp(last.bottomLeft.x, current.bottomLeft.x, t), lerp(last.bottomLeft.y, current.bottomLeft.y, t)),
+                Point(lerp(last.bottomRight.x, current.bottomRight.x, t), lerp(last.bottomRight.y, current.bottomRight.y, t))
+            )
+        } else {
+            current
+        }
+        lastRectangle = smoothed
+        lastRectangleTimestamp = now
+        return smoothed
+    }
+    private fun lerp(start: Double, end: Double, t: Double): Double {
+        return start + (end - start) * t
+    }
+    private fun rectangleBounds(rectangle: Rectangle): Rect {
+        val left = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).minOrNull() ?: 0.0
+        val right = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).maxOrNull() ?: 0.0
+        val top = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).minOrNull() ?: 0.0
+        val bottom = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).maxOrNull() ?: 0.0
+        return Rect(left.toInt(), top.toInt(), right.toInt(), bottom.toInt())
+    }
+    private fun computeIoU(a: Rect, b: Rect): Float {
+        val left = max(a.left, b.left)
+        val top = max(a.top, b.top)
+        val right = minOf(a.right, b.right)
+        val bottom = minOf(a.bottom, b.bottom)
+        if (right <= left || bottom <= top) return 0f
+        val intersection = (right - left).toFloat() * (bottom - top).toFloat()
+        val union = (a.width() * a.height() + b.width() * b.height() - intersection).toFloat()
+        return if (union <= 0f) 0f else intersection / union
+    }
+    private fun imageToNv21(image: Image): ByteArray {
+        val width = image.width
+        val height = image.height
+        val ySize = width * height
+        val uvSize = width * height / 2
+        val nv21 = ByteArray(ySize + uvSize)
+        val yBuffer = image.planes[0].buffer
+        val uBuffer = image.planes[1].buffer
+        val vBuffer = image.planes[2].buffer
+        val yRowStride = image.planes[0].rowStride
+        val yPixelStride = image.planes[0].pixelStride
+        var outputOffset = 0
+        for (row in 0 until height) {
+            var inputOffset = row * yRowStride
+            for (col in 0 until width) {
+                nv21[outputOffset++] = yBuffer.get(inputOffset)
+                inputOffset += yPixelStride
+            }
+        }
+        val uvRowStride = image.planes[1].rowStride
+        val uvPixelStride = image.planes[1].pixelStride
+        val vRowStride = image.planes[2].rowStride
+        val vPixelStride = image.planes[2].pixelStride
+        val uvHeight = height / 2
+        val uvWidth = width / 2
+        for (row in 0 until uvHeight) {
+            var uInputOffset = row * uvRowStride
+            var vInputOffset = row * vRowStride
+            for (col in 0 until uvWidth) {
+                nv21[outputOffset++] = vBuffer.get(vInputOffset)
+                nv21[outputOffset++] = uBuffer.get(uInputOffset)
+                uInputOffset += uvPixelStride
+                vInputOffset += vPixelStride
+            }
+        }
+        return nv21
+    }
     private fun hasCameraPermission(): Boolean {
         return ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
     }

package/android/src/main/kotlin/com/reactnativerectangledocscanner/DocumentDetector.kt CHANGED Viewed

@@ -1,6 +1,7 @@
 package com.reactnativerectangledocscanner
 import android.graphics.Bitmap
+import android.graphics.Rect
 import android.util.Log
 import org.opencv.android.Utils
 import org.opencv.core.*
@@ -98,6 +99,59 @@ class DocumentDetector {
             return rectangle
         }
+        /**
+         * Detect rectangle within a region-of-interest (ROI) in YUV image.
+         * The ROI is specified in the rotated image coordinate space.
+         */
+        fun detectRectangleInYUVWithRoi(
+            yuvBytes: ByteArray,
+            width: Int,
+            height: Int,
+            rotation: Int,
+            roi: Rect
+        ): Rectangle? {
+            val yuvMat = Mat(height + height / 2, width, CvType.CV_8UC1)
+            yuvMat.put(0, 0, yuvBytes)
+            val rgbMat = Mat()
+            Imgproc.cvtColor(yuvMat, rgbMat, Imgproc.COLOR_YUV2RGB_NV21)
+            if (rotation != 0) {
+                val rotationCode = when (rotation) {
+                    90 -> Core.ROTATE_90_CLOCKWISE
+                    180 -> Core.ROTATE_180
+                    270 -> Core.ROTATE_90_COUNTERCLOCKWISE
+                    else -> null
+                }
+                if (rotationCode != null) {
+                    Core.rotate(rgbMat, rgbMat, rotationCode)
+                }
+            }
+            val x = roi.left.coerceIn(0, rgbMat.cols() - 1)
+            val y = roi.top.coerceIn(0, rgbMat.rows() - 1)
+            val right = roi.right.coerceIn(x + 1, rgbMat.cols())
+            val bottom = roi.bottom.coerceIn(y + 1, rgbMat.rows())
+            val w = right - x
+            val h = bottom - y
+            val roiRect = org.opencv.core.Rect(x, y, w, h)
+            val roiMat = Mat(rgbMat, roiRect)
+            val rectangle = detectRectangleInMat(roiMat)
+            roiMat.release()
+            yuvMat.release()
+            rgbMat.release()
+            return rectangle?.let {
+                Rectangle(
+                    Point(it.topLeft.x + x, it.topLeft.y + y),
+                    Point(it.topRight.x + x, it.topRight.y + y),
+                    Point(it.bottomLeft.x + x, it.bottomLeft.y + y),
+                    Point(it.bottomRight.x + x, it.bottomRight.y + y)
+                )
+            }
+        }
         /**
          * Core detection algorithm using OpenCV
          */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "react-native-rectangle-doc-scanner",
-  "version": "3.240.0",
+  "version": "3.242.0",
   "description": "Native-backed document scanner for React Native with customizable overlays.",
   "license": "MIT",
   "main": "dist/index.js",