react-native-rectangle-doc-scanner 3.240.0 → 3.242.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ import android.graphics.Bitmap
7
7
  import android.graphics.BitmapFactory
8
8
  import android.graphics.Matrix
9
9
  import android.graphics.SurfaceTexture
10
+ import android.graphics.Rect
10
11
  import android.graphics.RectF
11
12
  import android.graphics.ImageFormat
12
13
  import android.hardware.camera2.CameraCaptureSession
@@ -68,6 +69,9 @@ class CameraController(
68
69
  .enableMultipleObjects()
69
70
  .build()
70
71
  )
72
+ private var lastRefineTimestamp = 0L
73
+ private var lastRectangle: Rectangle? = null
74
+ private var lastRectangleTimestamp = 0L
71
75
 
72
76
  var onFrameAnalyzed: ((Rectangle?, Int, Int) -> Unit)? = null
73
77
 
@@ -211,7 +215,7 @@ class CameraController(
211
215
  }
212
216
 
213
217
  val previewSizes = streamConfigMap.getOutputSizes(SurfaceTexture::class.java)
214
- previewSize = chooseBestSize(previewSizes, viewAspect, null)
218
+ previewSize = chooseBestSize(previewSizes, viewAspect, null, preferClosestAspect = true)
215
219
 
216
220
  val analysisSizes = streamConfigMap.getOutputSizes(ImageFormat.YUV_420_888)
217
221
  analysisSize = chooseBestSize(analysisSizes, viewAspect, ANALYSIS_MAX_AREA)
@@ -379,18 +383,16 @@ class CameraController(
379
383
  val box = obj.boundingBox
380
384
  box.width() * box.height()
381
385
  }
382
- val rectangle = best?.boundingBox?.let { box ->
383
- Rectangle(
384
- Point(box.left.toDouble(), box.top.toDouble()),
385
- Point(box.right.toDouble(), box.top.toDouble()),
386
- Point(box.left.toDouble(), box.bottom.toDouble()),
387
- Point(box.right.toDouble(), box.bottom.toDouble())
388
- )
386
+ val mlBox = best?.boundingBox
387
+ val rectangle = when {
388
+ mlBox == null -> null
389
+ shouldRefineWithOpenCv() -> refineWithOpenCv(image, rotationDegrees, mlBox) ?: boxToRectangle(mlBox)
390
+ else -> boxToRectangle(mlBox)
389
391
  }
390
392
 
391
393
  val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
392
394
  val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
393
- onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
395
+ onFrameAnalyzed?.invoke(smoothRectangle(rectangle), frameWidth, frameHeight)
394
396
  }
395
397
  .addOnFailureListener { e ->
396
398
  Log.e(TAG, "[CAMERA2] ML Kit detection failed", e)
@@ -485,34 +487,43 @@ class CameraController(
485
487
 
486
488
  val matrix = Matrix()
487
489
  bufferRect.offset(centerX - bufferRect.centerX(), centerY - bufferRect.centerY())
488
- matrix.setRectToRect(viewRect, bufferRect, Matrix.ScaleToFit.FILL)
489
-
490
- val scale = max(viewWidth / bufferWidth, viewHeight / bufferHeight)
491
- matrix.postScale(scale, scale, centerX, centerY)
490
+ matrix.setRectToRect(bufferRect, viewRect, Matrix.ScaleToFit.FILL)
492
491
  matrix.postRotate(rotation.toFloat(), centerX, centerY)
493
492
  previewView.setTransform(matrix)
494
493
  }
495
494
 
496
- private fun chooseBestSize(sizes: Array<Size>?, targetAspect: Double, maxArea: Int?): Size? {
495
+ private fun chooseBestSize(
496
+ sizes: Array<Size>?,
497
+ targetAspect: Double,
498
+ maxArea: Int?,
499
+ preferClosestAspect: Boolean = false
500
+ ): Size? {
497
501
  if (sizes == null || sizes.isEmpty()) return null
498
502
  val sorted = sizes.sortedByDescending { it.width * it.height }
499
503
 
500
- val matching = sorted.filter {
501
- val aspect = it.width.toDouble() / it.height.toDouble()
502
- abs(aspect - targetAspect) <= ANALYSIS_ASPECT_TOLERANCE && (maxArea == null || it.width * it.height <= maxArea)
503
- }
504
-
505
- if (matching.isNotEmpty()) {
506
- return matching.first()
507
- }
508
-
509
504
  val capped = if (maxArea != null) {
510
505
  sorted.filter { it.width * it.height <= maxArea }
511
506
  } else {
512
507
  sorted
513
508
  }
514
509
 
515
- return capped.firstOrNull() ?: sorted.first()
510
+ if (capped.isEmpty()) {
511
+ return sorted.first()
512
+ }
513
+
514
+ if (preferClosestAspect) {
515
+ return capped.minWithOrNull(
516
+ compareBy<Size> { abs(it.width.toDouble() / it.height.toDouble() - targetAspect) }
517
+ .thenByDescending { it.width * it.height }
518
+ )
519
+ }
520
+
521
+ val matching = capped.filter {
522
+ val aspect = it.width.toDouble() / it.height.toDouble()
523
+ abs(aspect - targetAspect) <= ANALYSIS_ASPECT_TOLERANCE
524
+ }
525
+
526
+ return matching.firstOrNull() ?: capped.first()
516
527
  }
517
528
 
518
529
  private fun rotateAndMirror(bitmap: Bitmap, rotationDegrees: Int, mirror: Boolean): Bitmap {
@@ -529,6 +540,151 @@ class CameraController(
529
540
  return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
530
541
  }
531
542
 
543
+ private fun shouldRefineWithOpenCv(): Boolean {
544
+ val now = System.currentTimeMillis()
545
+ if (now - lastRefineTimestamp < 150) {
546
+ return false
547
+ }
548
+ lastRefineTimestamp = now
549
+ return true
550
+ }
551
+
552
+ private fun refineWithOpenCv(image: Image, rotationDegrees: Int, mlBox: Rect): Rectangle? {
553
+ return try {
554
+ val nv21 = imageToNv21(image)
555
+ val uprightWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
556
+ val uprightHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
557
+ val expanded = expandRect(mlBox, uprightWidth, uprightHeight, 0.2f)
558
+ val openCvRect = DocumentDetector.detectRectangleInYUVWithRoi(
559
+ nv21,
560
+ image.width,
561
+ image.height,
562
+ rotationDegrees,
563
+ expanded
564
+ )
565
+ if (openCvRect == null) {
566
+ null
567
+ } else {
568
+ val openRectBounds = rectangleBounds(openCvRect)
569
+ if (computeIoU(openRectBounds, mlBox) >= 0.2f) openCvRect else null
570
+ }
571
+ } catch (e: Exception) {
572
+ Log.w(TAG, "[CAMERA2] OpenCV refine failed", e)
573
+ null
574
+ }
575
+ }
576
+
577
+ private fun boxToRectangle(box: Rect): Rectangle {
578
+ return Rectangle(
579
+ Point(box.left.toDouble(), box.top.toDouble()),
580
+ Point(box.right.toDouble(), box.top.toDouble()),
581
+ Point(box.left.toDouble(), box.bottom.toDouble()),
582
+ Point(box.right.toDouble(), box.bottom.toDouble())
583
+ )
584
+ }
585
+
586
+ private fun expandRect(box: Rect, maxWidth: Int, maxHeight: Int, ratio: Float): Rect {
587
+ val padX = (box.width() * ratio).toInt()
588
+ val padY = (box.height() * ratio).toInt()
589
+ val left = (box.left - padX).coerceAtLeast(0)
590
+ val top = (box.top - padY).coerceAtLeast(0)
591
+ val right = (box.right + padX).coerceAtMost(maxWidth)
592
+ val bottom = (box.bottom + padY).coerceAtMost(maxHeight)
593
+ return Rect(left, top, right, bottom)
594
+ }
595
+
596
+ private fun smoothRectangle(current: Rectangle?): Rectangle? {
597
+ val now = System.currentTimeMillis()
598
+ val last = lastRectangle
599
+ if (current == null) {
600
+ if (last != null && now - lastRectangleTimestamp < 250) {
601
+ return last
602
+ }
603
+ lastRectangle = null
604
+ return null
605
+ }
606
+
607
+ val smoothed = if (last != null && now - lastRectangleTimestamp < 500) {
608
+ val t = 0.35
609
+ Rectangle(
610
+ Point(lerp(last.topLeft.x, current.topLeft.x, t), lerp(last.topLeft.y, current.topLeft.y, t)),
611
+ Point(lerp(last.topRight.x, current.topRight.x, t), lerp(last.topRight.y, current.topRight.y, t)),
612
+ Point(lerp(last.bottomLeft.x, current.bottomLeft.x, t), lerp(last.bottomLeft.y, current.bottomLeft.y, t)),
613
+ Point(lerp(last.bottomRight.x, current.bottomRight.x, t), lerp(last.bottomRight.y, current.bottomRight.y, t))
614
+ )
615
+ } else {
616
+ current
617
+ }
618
+
619
+ lastRectangle = smoothed
620
+ lastRectangleTimestamp = now
621
+ return smoothed
622
+ }
623
+
624
+ private fun lerp(start: Double, end: Double, t: Double): Double {
625
+ return start + (end - start) * t
626
+ }
627
+
628
+ private fun rectangleBounds(rectangle: Rectangle): Rect {
629
+ val left = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).minOrNull() ?: 0.0
630
+ val right = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).maxOrNull() ?: 0.0
631
+ val top = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).minOrNull() ?: 0.0
632
+ val bottom = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).maxOrNull() ?: 0.0
633
+ return Rect(left.toInt(), top.toInt(), right.toInt(), bottom.toInt())
634
+ }
635
+
636
+ private fun computeIoU(a: Rect, b: Rect): Float {
637
+ val left = max(a.left, b.left)
638
+ val top = max(a.top, b.top)
639
+ val right = minOf(a.right, b.right)
640
+ val bottom = minOf(a.bottom, b.bottom)
641
+ if (right <= left || bottom <= top) return 0f
642
+ val intersection = (right - left).toFloat() * (bottom - top).toFloat()
643
+ val union = (a.width() * a.height() + b.width() * b.height() - intersection).toFloat()
644
+ return if (union <= 0f) 0f else intersection / union
645
+ }
646
+
647
+ private fun imageToNv21(image: Image): ByteArray {
648
+ val width = image.width
649
+ val height = image.height
650
+ val ySize = width * height
651
+ val uvSize = width * height / 2
652
+ val nv21 = ByteArray(ySize + uvSize)
653
+
654
+ val yBuffer = image.planes[0].buffer
655
+ val uBuffer = image.planes[1].buffer
656
+ val vBuffer = image.planes[2].buffer
657
+
658
+ val yRowStride = image.planes[0].rowStride
659
+ val yPixelStride = image.planes[0].pixelStride
660
+ var outputOffset = 0
661
+ for (row in 0 until height) {
662
+ var inputOffset = row * yRowStride
663
+ for (col in 0 until width) {
664
+ nv21[outputOffset++] = yBuffer.get(inputOffset)
665
+ inputOffset += yPixelStride
666
+ }
667
+ }
668
+
669
+ val uvRowStride = image.planes[1].rowStride
670
+ val uvPixelStride = image.planes[1].pixelStride
671
+ val vRowStride = image.planes[2].rowStride
672
+ val vPixelStride = image.planes[2].pixelStride
673
+ val uvHeight = height / 2
674
+ val uvWidth = width / 2
675
+ for (row in 0 until uvHeight) {
676
+ var uInputOffset = row * uvRowStride
677
+ var vInputOffset = row * vRowStride
678
+ for (col in 0 until uvWidth) {
679
+ nv21[outputOffset++] = vBuffer.get(vInputOffset)
680
+ nv21[outputOffset++] = uBuffer.get(uInputOffset)
681
+ uInputOffset += uvPixelStride
682
+ vInputOffset += vPixelStride
683
+ }
684
+ }
685
+
686
+ return nv21
687
+ }
532
688
  private fun hasCameraPermission(): Boolean {
533
689
  return ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
534
690
  }
@@ -1,6 +1,7 @@
1
1
  package com.reactnativerectangledocscanner
2
2
 
3
3
  import android.graphics.Bitmap
4
+ import android.graphics.Rect
4
5
  import android.util.Log
5
6
  import org.opencv.android.Utils
6
7
  import org.opencv.core.*
@@ -98,6 +99,59 @@ class DocumentDetector {
98
99
  return rectangle
99
100
  }
100
101
 
102
+ /**
103
+ * Detect rectangle within a region-of-interest (ROI) in YUV image.
104
+ * The ROI is specified in the rotated image coordinate space.
105
+ */
106
+ fun detectRectangleInYUVWithRoi(
107
+ yuvBytes: ByteArray,
108
+ width: Int,
109
+ height: Int,
110
+ rotation: Int,
111
+ roi: Rect
112
+ ): Rectangle? {
113
+ val yuvMat = Mat(height + height / 2, width, CvType.CV_8UC1)
114
+ yuvMat.put(0, 0, yuvBytes)
115
+
116
+ val rgbMat = Mat()
117
+ Imgproc.cvtColor(yuvMat, rgbMat, Imgproc.COLOR_YUV2RGB_NV21)
118
+
119
+ if (rotation != 0) {
120
+ val rotationCode = when (rotation) {
121
+ 90 -> Core.ROTATE_90_CLOCKWISE
122
+ 180 -> Core.ROTATE_180
123
+ 270 -> Core.ROTATE_90_COUNTERCLOCKWISE
124
+ else -> null
125
+ }
126
+ if (rotationCode != null) {
127
+ Core.rotate(rgbMat, rgbMat, rotationCode)
128
+ }
129
+ }
130
+
131
+ val x = roi.left.coerceIn(0, rgbMat.cols() - 1)
132
+ val y = roi.top.coerceIn(0, rgbMat.rows() - 1)
133
+ val right = roi.right.coerceIn(x + 1, rgbMat.cols())
134
+ val bottom = roi.bottom.coerceIn(y + 1, rgbMat.rows())
135
+ val w = right - x
136
+ val h = bottom - y
137
+ val roiRect = org.opencv.core.Rect(x, y, w, h)
138
+
139
+ val roiMat = Mat(rgbMat, roiRect)
140
+ val rectangle = detectRectangleInMat(roiMat)
141
+ roiMat.release()
142
+ yuvMat.release()
143
+ rgbMat.release()
144
+
145
+ return rectangle?.let {
146
+ Rectangle(
147
+ Point(it.topLeft.x + x, it.topLeft.y + y),
148
+ Point(it.topRight.x + x, it.topRight.y + y),
149
+ Point(it.bottomLeft.x + x, it.bottomLeft.y + y),
150
+ Point(it.bottomRight.x + x, it.bottomRight.y + y)
151
+ )
152
+ }
153
+ }
154
+
101
155
  /**
102
156
  * Core detection algorithm using OpenCV
103
157
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-rectangle-doc-scanner",
3
- "version": "3.240.0",
3
+ "version": "3.242.0",
4
4
  "description": "Native-backed document scanner for React Native with customizable overlays.",
5
5
  "license": "MIT",
6
6
  "main": "dist/index.js",