react-native-rectangle-doc-scanner 3.238.0 → 3.240.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,6 +68,9 @@ dependencies {
68
68
  // OpenCV for document detection
69
69
  implementation 'org.opencv:opencv:4.9.0'
70
70
 
71
+ // ML Kit object detection for live rectangle hints
72
+ implementation 'com.google.mlkit:object-detection:17.0.1'
73
+
71
74
  // Coroutines for async operations
72
75
  implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3'
73
76
  implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
@@ -23,6 +23,10 @@ import android.util.Size
23
23
  import android.view.Surface
24
24
  import android.view.TextureView
25
25
  import androidx.core.content.ContextCompat
26
+ import com.google.mlkit.vision.common.InputImage
27
+ import com.google.mlkit.vision.objects.ObjectDetection
28
+ import com.google.mlkit.vision.objects.defaults.ObjectDetectorOptions
29
+ import org.opencv.core.Point
26
30
  import java.io.File
27
31
  import java.io.FileOutputStream
28
32
  import java.util.concurrent.atomic.AtomicReference
@@ -58,6 +62,12 @@ class CameraController(
58
62
 
59
63
  private val pendingCapture = AtomicReference<PendingCapture?>()
60
64
  private val analysisInFlight = AtomicBoolean(false)
65
+ private val objectDetector = ObjectDetection.getClient(
66
+ ObjectDetectorOptions.Builder()
67
+ .setDetectorMode(ObjectDetectorOptions.STREAM_MODE)
68
+ .enableMultipleObjects()
69
+ .build()
70
+ )
61
71
 
62
72
  var onFrameAnalyzed: ((Rectangle?, Int, Int) -> Unit)? = null
63
73
 
@@ -179,6 +189,7 @@ class CameraController(
179
189
 
180
190
  fun shutdown() {
181
191
  stopCamera()
192
+ objectDetector.close()
182
193
  cameraThread.quitSafely()
183
194
  analysisThread.quitSafely()
184
195
  }
@@ -352,21 +363,42 @@ class CameraController(
352
363
  }
353
364
 
354
365
  private fun analyzeImage(image: Image) {
355
- try {
356
- val nv21 = image.toNv21()
357
- val rotationDegrees = computeRotationDegrees()
358
- val rectangle = DocumentDetector.detectRectangleInYUV(nv21, image.width, image.height, rotationDegrees)
359
-
360
- val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
361
- val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
362
-
363
- onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
366
+ val rotationDegrees = computeRotationDegrees()
367
+ val inputImage = try {
368
+ InputImage.fromMediaImage(image, rotationDegrees)
364
369
  } catch (e: Exception) {
365
- Log.e(TAG, "[CAMERA2] Error analyzing frame", e)
366
- } finally {
370
+ Log.e(TAG, "[CAMERA2] Failed to create InputImage", e)
367
371
  image.close()
368
372
  analysisInFlight.set(false)
373
+ return
369
374
  }
375
+
376
+ objectDetector.process(inputImage)
377
+ .addOnSuccessListener { objects ->
378
+ val best = objects.maxByOrNull { obj ->
379
+ val box = obj.boundingBox
380
+ box.width() * box.height()
381
+ }
382
+ val rectangle = best?.boundingBox?.let { box ->
383
+ Rectangle(
384
+ Point(box.left.toDouble(), box.top.toDouble()),
385
+ Point(box.right.toDouble(), box.top.toDouble()),
386
+ Point(box.left.toDouble(), box.bottom.toDouble()),
387
+ Point(box.right.toDouble(), box.bottom.toDouble())
388
+ )
389
+ }
390
+
391
+ val frameWidth = if (rotationDegrees == 90 || rotationDegrees == 270) image.height else image.width
392
+ val frameHeight = if (rotationDegrees == 90 || rotationDegrees == 270) image.width else image.height
393
+ onFrameAnalyzed?.invoke(rectangle, frameWidth, frameHeight)
394
+ }
395
+ .addOnFailureListener { e ->
396
+ Log.e(TAG, "[CAMERA2] ML Kit detection failed", e)
397
+ }
398
+ .addOnCompleteListener {
399
+ image.close()
400
+ analysisInFlight.set(false)
401
+ }
370
402
  }
371
403
 
372
404
  private fun processCapture(image: Image, pending: PendingCapture) {
@@ -497,48 +529,6 @@ class CameraController(
497
529
  return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
498
530
  }
499
531
 
500
- private fun Image.toNv21(): ByteArray {
501
- val width = width
502
- val height = height
503
- val ySize = width * height
504
- val uvSize = width * height / 2
505
- val nv21 = ByteArray(ySize + uvSize)
506
-
507
- val yBuffer = planes[0].buffer
508
- val uBuffer = planes[1].buffer
509
- val vBuffer = planes[2].buffer
510
-
511
- val yRowStride = planes[0].rowStride
512
- val yPixelStride = planes[0].pixelStride
513
- var outputOffset = 0
514
- for (row in 0 until height) {
515
- var inputOffset = row * yRowStride
516
- for (col in 0 until width) {
517
- nv21[outputOffset++] = yBuffer.get(inputOffset)
518
- inputOffset += yPixelStride
519
- }
520
- }
521
-
522
- val uvRowStride = planes[1].rowStride
523
- val uvPixelStride = planes[1].pixelStride
524
- val vRowStride = planes[2].rowStride
525
- val vPixelStride = planes[2].pixelStride
526
- val uvHeight = height / 2
527
- val uvWidth = width / 2
528
- for (row in 0 until uvHeight) {
529
- var uInputOffset = row * uvRowStride
530
- var vInputOffset = row * vRowStride
531
- for (col in 0 until uvWidth) {
532
- nv21[outputOffset++] = vBuffer.get(vInputOffset)
533
- nv21[outputOffset++] = uBuffer.get(uInputOffset)
534
- uInputOffset += uvPixelStride
535
- vInputOffset += vPixelStride
536
- }
537
- }
538
-
539
- return nv21
540
- }
541
-
542
532
  private fun hasCameraPermission(): Boolean {
543
533
  return ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
544
534
  }
@@ -106,6 +106,7 @@ class DocumentDetector {
106
106
  val blurredMat = Mat()
107
107
  val cannyMat = Mat()
108
108
  val morphMat = Mat()
109
+ val threshMat = Mat()
109
110
 
110
111
  try {
111
112
  // Convert to grayscale
@@ -124,57 +125,74 @@ class DocumentDetector {
124
125
  Imgproc.morphologyEx(cannyMat, morphMat, Imgproc.MORPH_CLOSE, kernel)
125
126
  kernel.release()
126
127
 
127
- // Find contours
128
- val contours = mutableListOf<MatOfPoint>()
129
- val hierarchy = Mat()
130
- Imgproc.findContours(
131
- morphMat,
132
- contours,
133
- hierarchy,
134
- Imgproc.RETR_EXTERNAL,
135
- Imgproc.CHAIN_APPROX_SIMPLE
136
- )
137
-
138
- // Find the largest contour that approximates to a quadrilateral
139
- var largestRectangle: Rectangle? = null
140
- var largestArea = 0.0
141
- val minArea = max(600.0, (srcMat.rows() * srcMat.cols()) * 0.001)
142
-
143
- for (contour in contours) {
144
- val contourArea = Imgproc.contourArea(contour)
145
-
146
- // Filter small contours
147
- if (contourArea < minArea) continue
148
-
149
- // Approximate contour to polygon
150
- val approx = MatOfPoint2f()
151
- val contour2f = MatOfPoint2f(*contour.toArray())
152
- val epsilon = 0.02 * Imgproc.arcLength(contour2f, true)
153
- Imgproc.approxPolyDP(contour2f, approx, epsilon, true)
154
-
155
- // Check if it's a quadrilateral
156
- if (approx.total() == 4L && Imgproc.isContourConvex(MatOfPoint(*approx.toArray()))) {
157
- val points = approx.toArray()
158
-
159
- if (contourArea > largestArea) {
160
- largestArea = contourArea
161
- largestRectangle = orderPoints(points)
128
+ fun findLargestRectangle(source: Mat): Rectangle? {
129
+ val contours = mutableListOf<MatOfPoint>()
130
+ val hierarchy = Mat()
131
+ Imgproc.findContours(
132
+ source,
133
+ contours,
134
+ hierarchy,
135
+ Imgproc.RETR_EXTERNAL,
136
+ Imgproc.CHAIN_APPROX_SIMPLE
137
+ )
138
+
139
+ var largestRectangle: Rectangle? = null
140
+ var largestArea = 0.0
141
+ val minArea = max(500.0, (srcMat.rows() * srcMat.cols()) * 0.0008)
142
+
143
+ for (contour in contours) {
144
+ val contourArea = Imgproc.contourArea(contour)
145
+ if (contourArea < minArea) continue
146
+
147
+ val approx = MatOfPoint2f()
148
+ val contour2f = MatOfPoint2f(*contour.toArray())
149
+ val epsilon = 0.018 * Imgproc.arcLength(contour2f, true)
150
+ Imgproc.approxPolyDP(contour2f, approx, epsilon, true)
151
+
152
+ if (approx.total() == 4L && Imgproc.isContourConvex(MatOfPoint(*approx.toArray()))) {
153
+ val points = approx.toArray()
154
+ if (contourArea > largestArea) {
155
+ largestArea = contourArea
156
+ largestRectangle = orderPoints(points)
157
+ }
162
158
  }
159
+
160
+ approx.release()
161
+ contour2f.release()
163
162
  }
164
163
 
165
- approx.release()
166
- contour2f.release()
164
+ hierarchy.release()
165
+ contours.forEach { it.release() }
166
+ return largestRectangle
167
167
  }
168
168
 
169
- hierarchy.release()
170
- contours.forEach { it.release() }
169
+ // First pass: Canny-based edges (good for strong edges).
170
+ var rectangle = findLargestRectangle(morphMat)
171
+
172
+ // Fallback: adaptive threshold (better for low-contrast cards).
173
+ if (rectangle == null) {
174
+ Imgproc.adaptiveThreshold(
175
+ blurredMat,
176
+ threshMat,
177
+ 255.0,
178
+ Imgproc.ADAPTIVE_THRESH_GAUSSIAN_C,
179
+ Imgproc.THRESH_BINARY,
180
+ 15,
181
+ 2.0
182
+ )
183
+ val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, Size(3.0, 3.0))
184
+ Imgproc.morphologyEx(threshMat, morphMat, Imgproc.MORPH_CLOSE, kernel)
185
+ kernel.release()
186
+ rectangle = findLargestRectangle(morphMat)
187
+ }
171
188
 
172
- return largestRectangle
189
+ return rectangle
173
190
  } finally {
174
191
  grayMat.release()
175
192
  blurredMat.release()
176
193
  cannyMat.release()
177
194
  morphMat.release()
195
+ threshMat.release()
178
196
  }
179
197
  }
180
198
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-rectangle-doc-scanner",
3
- "version": "3.238.0",
3
+ "version": "3.240.0",
4
4
  "description": "Native-backed document scanner for React Native with customizable overlays.",
5
5
  "license": "MIT",
6
6
  "main": "dist/index.js",