react-native-rectangle-doc-scanner 3.239.0 → 3.241.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,6 +68,9 @@ dependencies {
68
68
  // OpenCV for document detection
69
69
  implementation 'org.opencv:opencv:4.9.0'
70
70
 
71
+ // ML Kit object detection for live rectangle hints
72
+ implementation 'com.google.mlkit:object-detection:17.0.1'
73
+
71
74
  // Coroutines for async operations
72
75
  implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3'
73
76
  implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
@@ -7,6 +7,7 @@ import android.graphics.Bitmap
7
7
  import android.graphics.BitmapFactory
8
8
  import android.graphics.Matrix
9
9
  import android.graphics.SurfaceTexture
10
+ import android.graphics.Rect
10
11
  import android.graphics.RectF
11
12
  import android.graphics.ImageFormat
12
13
  import android.hardware.camera2.CameraCaptureSession
@@ -23,6 +24,10 @@ import android.util.Size
23
24
  import android.view.Surface
24
25
  import android.view.TextureView
25
26
  import androidx.core.content.ContextCompat
27
+ import com.google.mlkit.vision.common.InputImage
28
+ import com.google.mlkit.vision.objects.ObjectDetection
29
+ import com.google.mlkit.vision.objects.defaults.ObjectDetectorOptions
30
+ import org.opencv.core.Point
26
31
  import java.io.File
27
32
  import java.io.FileOutputStream
28
33
  import java.util.concurrent.atomic.AtomicReference
@@ -58,6 +63,13 @@ class CameraController(
58
63
 
59
64
  private val pendingCapture = AtomicReference<PendingCapture?>()
60
65
  private val analysisInFlight = AtomicBoolean(false)
66
+ private val objectDetector = ObjectDetection.getClient(
67
+ ObjectDetectorOptions.Builder()
68
+ .setDetectorMode(ObjectDetectorOptions.STREAM_MODE)
69
+ .enableMultipleObjects()
70
+ .build()
71
+ )
72
+ private var lastRefineTimestamp = 0L
61
73
 
62
74
  var onFrameAnalyzed: ((Rectangle?, Int, Int) -> Unit)? = null
63
75
 
@@ -179,6 +191,7 @@ class CameraController(
179
191
 
180
192
  fun shutdown() {
181
193
  stopCamera()
194
+ objectDetector.close()
182
195
  cameraThread.quitSafely()
183
196
  analysisThread.quitSafely()
184
197
  }
@@ -352,21 +365,40 @@ class CameraController(
352
365
  }
353
366
 
354
367
  private fun analyzeImage(image: Image) {
355
- try {
356
- val nv21 = image.toNv21()
357
- val rotationDegrees = computeRotationDegrees()
358
- val rectangle = DocumentDetector.detectRectangleInYUV(nv21, image.width, image.height, rotationDegrees)
359
-
360
- val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
361
- val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
362
-
363
- onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
368
+ val rotationDegrees = computeRotationDegrees()
369
+ val inputImage = try {
370
+ InputImage.fromMediaImage(image, rotationDegrees)
364
371
  } catch (e: Exception) {
365
- Log.e(TAG, "[CAMERA2] Error analyzing frame", e)
366
- } finally {
372
+ Log.e(TAG, "[CAMERA2] Failed to create InputImage", e)
367
373
  image.close()
368
374
  analysisInFlight.set(false)
375
+ return
369
376
  }
377
+
378
+ objectDetector.process(inputImage)
379
+ .addOnSuccessListener { objects ->
380
+ val best = objects.maxByOrNull { obj ->
381
+ val box = obj.boundingBox
382
+ box.width() * box.height()
383
+ }
384
+ val mlBox = best?.boundingBox
385
+ val rectangle = when {
386
+ mlBox == null -> null
387
+ shouldRefineWithOpenCv() -> refineWithOpenCv(image, rotationDegrees, mlBox) ?: boxToRectangle(mlBox)
388
+ else -> boxToRectangle(mlBox)
389
+ }
390
+
391
+ val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
392
+ val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
393
+ onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
394
+ }
395
+ .addOnFailureListener { e ->
396
+ Log.e(TAG, "[CAMERA2] ML Kit detection failed", e)
397
+ }
398
+ .addOnCompleteListener {
399
+ image.close()
400
+ analysisInFlight.set(false)
401
+ }
370
402
  }
371
403
 
372
404
  private fun processCapture(image: Image, pending: PendingCapture) {
@@ -453,10 +485,7 @@ class CameraController(
453
485
 
454
486
  val matrix = Matrix()
455
487
  bufferRect.offset(centerX - bufferRect.centerX(), centerY - bufferRect.centerY())
456
- matrix.setRectToRect(viewRect, bufferRect, Matrix.ScaleToFit.FILL)
457
-
458
- val scale = max(viewWidth / bufferWidth, viewHeight / bufferHeight)
459
- matrix.postScale(scale, scale, centerX, centerY)
488
+ matrix.setRectToRect(bufferRect, viewRect, Matrix.ScaleToFit.FILL)
460
489
  matrix.postRotate(rotation.toFloat(), centerX, centerY)
461
490
  previewView.setTransform(matrix)
462
491
  }
@@ -497,19 +526,72 @@ class CameraController(
497
526
  return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
498
527
  }
499
528
 
500
- private fun Image.toNv21(): ByteArray {
501
- val width = width
502
- val height = height
529
+ private fun shouldRefineWithOpenCv(): Boolean {
530
+ val now = System.currentTimeMillis()
531
+ if (now - lastRefineTimestamp < 200) {
532
+ return false
533
+ }
534
+ lastRefineTimestamp = now
535
+ return true
536
+ }
537
+
538
+ private fun refineWithOpenCv(image: Image, rotationDegrees: Int, mlBox: Rect): Rectangle? {
539
+ return try {
540
+ val nv21 = imageToNv21(image)
541
+ val openCvRect = DocumentDetector.detectRectangleInYUV(nv21, image.width, image.height, rotationDegrees)
542
+ if (openCvRect == null) {
543
+ null
544
+ } else {
545
+ val openRectBounds = rectangleBounds(openCvRect)
546
+ if (computeIoU(openRectBounds, mlBox) >= 0.2f) openCvRect else null
547
+ }
548
+ } catch (e: Exception) {
549
+ Log.w(TAG, "[CAMERA2] OpenCV refine failed", e)
550
+ null
551
+ }
552
+ }
553
+
554
+ private fun boxToRectangle(box: Rect): Rectangle {
555
+ return Rectangle(
556
+ Point(box.left.toDouble(), box.top.toDouble()),
557
+ Point(box.right.toDouble(), box.top.toDouble()),
558
+ Point(box.left.toDouble(), box.bottom.toDouble()),
559
+ Point(box.right.toDouble(), box.bottom.toDouble())
560
+ )
561
+ }
562
+
563
+ private fun rectangleBounds(rectangle: Rectangle): Rect {
564
+ val left = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).minOrNull() ?: 0.0
565
+ val right = listOf(rectangle.topLeft.x, rectangle.bottomLeft.x, rectangle.topRight.x, rectangle.bottomRight.x).maxOrNull() ?: 0.0
566
+ val top = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).minOrNull() ?: 0.0
567
+ val bottom = listOf(rectangle.topLeft.y, rectangle.bottomLeft.y, rectangle.topRight.y, rectangle.bottomRight.y).maxOrNull() ?: 0.0
568
+ return Rect(left.toInt(), top.toInt(), right.toInt(), bottom.toInt())
569
+ }
570
+
571
+ private fun computeIoU(a: Rect, b: Rect): Float {
572
+ val left = max(a.left, b.left)
573
+ val top = max(a.top, b.top)
574
+ val right = minOf(a.right, b.right)
575
+ val bottom = minOf(a.bottom, b.bottom)
576
+ if (right <= left || bottom <= top) return 0f
577
+ val intersection = (right - left).toFloat() * (bottom - top).toFloat()
578
+ val union = (a.width() * a.height() + b.width() * b.height() - intersection).toFloat()
579
+ return if (union <= 0f) 0f else intersection / union
580
+ }
581
+
582
+ private fun imageToNv21(image: Image): ByteArray {
583
+ val width = image.width
584
+ val height = image.height
503
585
  val ySize = width * height
504
586
  val uvSize = width * height / 2
505
587
  val nv21 = ByteArray(ySize + uvSize)
506
588
 
507
- val yBuffer = planes[0].buffer
508
- val uBuffer = planes[1].buffer
509
- val vBuffer = planes[2].buffer
589
+ val yBuffer = image.planes[0].buffer
590
+ val uBuffer = image.planes[1].buffer
591
+ val vBuffer = image.planes[2].buffer
510
592
 
511
- val yRowStride = planes[0].rowStride
512
- val yPixelStride = planes[0].pixelStride
593
+ val yRowStride = image.planes[0].rowStride
594
+ val yPixelStride = image.planes[0].pixelStride
513
595
  var outputOffset = 0
514
596
  for (row in 0 until height) {
515
597
  var inputOffset = row * yRowStride
@@ -519,10 +601,10 @@ class CameraController(
519
601
  }
520
602
  }
521
603
 
522
- val uvRowStride = planes[1].rowStride
523
- val uvPixelStride = planes[1].pixelStride
524
- val vRowStride = planes[2].rowStride
525
- val vPixelStride = planes[2].pixelStride
604
+ val uvRowStride = image.planes[1].rowStride
605
+ val uvPixelStride = image.planes[1].pixelStride
606
+ val vRowStride = image.planes[2].rowStride
607
+ val vPixelStride = image.planes[2].pixelStride
526
608
  val uvHeight = height / 2
527
609
  val uvWidth = width / 2
528
610
  for (row in 0 until uvHeight) {
@@ -538,7 +620,6 @@ class CameraController(
538
620
 
539
621
  return nv21
540
622
  }
541
-
542
623
  private fun hasCameraPermission(): Boolean {
543
624
  return ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
544
625
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-rectangle-doc-scanner",
3
- "version": "3.239.0",
3
+ "version": "3.241.0",
4
4
  "description": "Native-backed document scanner for React Native with customizable overlays.",
5
5
  "license": "MIT",
6
6
  "main": "dist/index.js",