react-native-rectangle-doc-scanner 0.69.0 → 0.70.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,6 +50,6 @@ dependencies {
50
50
  implementation "androidx.camera:camera-camera2:1.3.1"
51
51
  implementation "androidx.camera:camera-lifecycle:1.3.1"
52
52
  implementation "androidx.camera:camera-view:1.3.1"
53
- implementation "com.google.mlkit:document-scanner:16.0.0-beta3"
54
53
  implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3"
54
+ implementation "org.opencv:opencv-android:4.9.0"
55
55
  }
@@ -1,23 +1,52 @@
1
1
  package com.reactnativerectangledocscanner
2
2
 
3
+ import android.Manifest
3
4
  import android.content.Context
5
+ import android.content.pm.PackageManager
6
+ import android.graphics.Bitmap
7
+ import android.graphics.BitmapFactory
4
8
  import android.graphics.Color
9
+ import android.media.Image
5
10
  import android.util.AttributeSet
6
11
  import android.util.Log
12
+ import android.util.Size as AndroidSize
7
13
  import android.widget.FrameLayout
8
- import androidx.camera.core.ImageAnalysis
9
- import androidx.camera.core.ImageCapture
14
+ import androidx.camera.core.*
10
15
  import androidx.camera.lifecycle.ProcessCameraProvider
11
16
  import androidx.camera.view.PreviewView
17
+ import androidx.concurrent.futures.await
12
18
  import androidx.core.content.ContextCompat
19
+ import androidx.lifecycle.LifecycleOwner
13
20
  import com.facebook.react.bridge.Arguments
14
21
  import com.facebook.react.bridge.Promise
15
22
  import com.facebook.react.bridge.ReactContext
16
23
  import com.facebook.react.bridge.WritableMap
17
24
  import com.facebook.react.uimanager.events.RCTEventEmitter
25
+ import kotlinx.coroutines.CoroutineScope
26
+ import kotlinx.coroutines.Dispatchers
27
+ import kotlinx.coroutines.Job
28
+ import kotlinx.coroutines.launch
29
+ import kotlinx.coroutines.withContext
30
+ import org.opencv.android.OpenCVLoader
31
+ import org.opencv.core.CvType
32
+ import org.opencv.core.Mat
33
+ import org.opencv.core.MatOfPoint
34
+ import org.opencv.core.MatOfPoint2f
35
+ import org.opencv.core.Point
36
+ import org.opencv.core.Size as MatSize
37
+ import org.opencv.imgproc.Imgproc
38
+ import java.io.File
39
+ import java.nio.ByteBuffer
40
+ import java.text.SimpleDateFormat
41
+ import java.util.Date
42
+ import java.util.Locale
18
43
  import java.util.concurrent.ExecutorService
19
44
  import java.util.concurrent.Executors
45
+ import kotlin.math.abs
46
+ import kotlin.math.hypot
47
+ import kotlin.math.max
20
48
 
49
+ @androidx.camera.core.ExperimentalGetImage
21
50
  class RNRDocScannerView @JvmOverloads constructor(
22
51
  context: Context,
23
52
  attrs: AttributeSet? = null,
@@ -35,10 +64,16 @@ class RNRDocScannerView @JvmOverloads constructor(
35
64
 
36
65
  private val previewView: PreviewView = PreviewView(context)
37
66
  private var cameraProvider: ProcessCameraProvider? = null
67
+ private var camera: Camera? = null
38
68
  private var imageCapture: ImageCapture? = null
39
69
  private var imageAnalysis: ImageAnalysis? = null
40
70
  private var cameraExecutor: ExecutorService? = null
71
+ private val scope = CoroutineScope(Dispatchers.Main + Job())
72
+
41
73
  private var currentStableCounter: Int = 0
74
+ private var lastQuad: QuadPoints? = null
75
+ private var lastFrameSize: AndroidSize? = null
76
+ private var capturePromise: Promise? = null
42
77
  private var captureInFlight: Boolean = false
43
78
 
44
79
  init {
@@ -47,77 +82,263 @@ class RNRDocScannerView @JvmOverloads constructor(
47
82
  previewView,
48
83
  LayoutParams(LayoutParams.MATCH_PARENT, LayoutParams.MATCH_PARENT),
49
84
  )
85
+
86
+ if (!OpenCVLoader.initDebug()) {
87
+ Log.w(TAG, "Failed to initialise OpenCV - detection will not run.")
88
+ }
89
+
50
90
  initializeCamera()
51
91
  }
52
92
 
53
93
  private fun initializeCamera() {
94
+ if (!hasCameraPermission()) {
95
+ Log.w(TAG, "Camera permission missing. Detection will not start.")
96
+ return
97
+ }
98
+
54
99
  cameraExecutor = Executors.newSingleThreadExecutor()
55
100
  val providerFuture = ProcessCameraProvider.getInstance(context)
56
101
  providerFuture.addListener(
57
102
  {
58
- cameraProvider = providerFuture.get()
59
- // TODO: Configure Preview + ImageAnalysis + ML Kit processing.
103
+ scope.launch {
104
+ try {
105
+ cameraProvider = providerFuture.await()
106
+ bindCameraUseCases()
107
+ } catch (error: Exception) {
108
+ Log.e(TAG, "Failed to initialise camera", error)
109
+ }
110
+ }
60
111
  },
61
112
  ContextCompat.getMainExecutor(context),
62
113
  )
63
114
  }
64
115
 
65
- fun emitRectangle(rectangle: WritableMap?) {
116
+ private fun hasCameraPermission(): Boolean {
117
+ return ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
118
+ }
119
+
120
+ private fun bindCameraUseCases() {
121
+ val provider = cameraProvider ?: return
122
+ val lifecycleOwner = context as? LifecycleOwner
123
+ if (lifecycleOwner == null) {
124
+ Log.w(TAG, "Context is not a LifecycleOwner; cannot bind camera use cases.")
125
+ return
126
+ }
127
+ provider.unbindAll()
128
+
129
+ val preview = Preview.Builder()
130
+ .setTargetAspectRatio(AspectRatio.RATIO_4_3)
131
+ .setTargetRotation(previewView.display.rotation)
132
+ .build()
133
+ .also { it.setSurfaceProvider(previewView.surfaceProvider) }
134
+
135
+ imageCapture = ImageCapture.Builder()
136
+ .setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
137
+ .setTargetAspectRatio(AspectRatio.RATIO_4_3)
138
+ .setTargetRotation(previewView.display.rotation)
139
+ .build()
140
+
141
+ imageAnalysis = ImageAnalysis.Builder()
142
+ .setTargetAspectRatio(AspectRatio.RATIO_4_3)
143
+ .setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
144
+ .setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_YUV_420_888)
145
+ .build()
146
+ .also { analysis ->
147
+ analysis.setAnalyzer(cameraExecutor!!) { imageProxy ->
148
+ try {
149
+ processFrame(imageProxy)
150
+ } catch (error: Exception) {
151
+ Log.e(TAG, "Frame processing error", error)
152
+ imageProxy.close()
153
+ }
154
+ }
155
+ }
156
+
157
+ val selector = CameraSelector.Builder()
158
+ .requireLensFacing(CameraSelector.LENS_FACING_BACK)
159
+ .build()
160
+
161
+ camera = provider.bindToLifecycle(
162
+ lifecycleOwner,
163
+ selector,
164
+ preview,
165
+ imageCapture,
166
+ imageAnalysis,
167
+ )
168
+
169
+ updateTorchMode(enableTorch)
170
+ }
171
+
172
+ private fun updateTorchMode(enabled: Boolean) {
173
+ camera?.cameraControl?.enableTorch(enabled)
174
+ }
175
+
176
+ private fun processFrame(imageProxy: ImageProxy) {
177
+ val mediaImage = imageProxy.image
178
+ if (mediaImage == null) {
179
+ imageProxy.close()
180
+ return
181
+ }
182
+
183
+ val frameSize = AndroidSize(imageProxy.width, imageProxy.height)
184
+ lastFrameSize = frameSize
185
+
186
+ val mat = yuvToMat(mediaImage, imageProxy.imageInfo.rotationDegrees)
187
+ val detectedQuad = detectDocument(mat, frameSize)
188
+
189
+ imageProxy.close()
190
+
191
+ scope.launch {
192
+ emitDetectionResult(detectedQuad, frameSize)
193
+ if (autoCapture && detectedQuad != null && currentStableCounter >= detectionCountBeforeCapture && !captureInFlight) {
194
+ triggerAutoCapture()
195
+ }
196
+ }
197
+ }
198
+
199
+ private fun emitDetectionResult(quad: QuadPoints?, frameSize: AndroidSize) {
200
+ val reactContext = context as? ReactContext ?: return
66
201
  val event: WritableMap = Arguments.createMap().apply {
67
- if (rectangle != null) {
68
- putMap("rectangleCoordinates", rectangle)
202
+ if (quad != null) {
203
+ val quadMap = Arguments.createMap().apply {
204
+ putMap("topLeft", quad.topLeft.toWritable())
205
+ putMap("topRight", quad.topRight.toWritable())
206
+ putMap("bottomRight", quad.bottomRight.toWritable())
207
+ putMap("bottomLeft", quad.bottomLeft.toWritable())
208
+ }
209
+ putMap("rectangleCoordinates", quadMap)
69
210
  currentStableCounter = (currentStableCounter + 1).coerceAtMost(detectionCountBeforeCapture)
211
+ lastQuad = quad
70
212
  } else {
71
213
  putNull("rectangleCoordinates")
72
214
  currentStableCounter = 0
215
+ lastQuad = null
73
216
  }
74
217
  putInt("stableCounter", currentStableCounter)
75
- // Frame size placeholders until analysis is wired.
76
- putDouble("frameWidth", width.toDouble())
77
- putDouble("frameHeight", height.toDouble())
218
+ putDouble("frameWidth", frameSize.width.toDouble())
219
+ putDouble("frameHeight", frameSize.height.toDouble())
78
220
  }
79
221
 
80
- (context as? ReactContext)
81
- ?.getJSModule(RCTEventEmitter::class.java)
222
+ reactContext
223
+ .getJSModule(RCTEventEmitter::class.java)
82
224
  ?.receiveEvent(id, "onRectangleDetect", event)
83
225
  }
84
226
 
85
- fun emitPictureTaken(payload: WritableMap) {
86
- (context as? ReactContext)
87
- ?.getJSModule(RCTEventEmitter::class.java)
88
- ?.receiveEvent(id, "onPictureTaken", payload)
227
+ private fun triggerAutoCapture() {
228
+ startCapture(null)
89
229
  }
90
230
 
91
231
  fun capture(promise: Promise) {
232
+ startCapture(promise)
233
+ }
234
+
235
+ private fun startCapture(promise: Promise?) {
92
236
  if (captureInFlight) {
93
- promise.reject("capture_in_progress", "A capture request is already running.")
237
+ promise?.reject("capture_in_progress", "A capture request is already running.")
94
238
  return
95
239
  }
96
240
 
97
241
  val imageCapture = this.imageCapture
98
242
  if (imageCapture == null) {
99
- promise.reject("capture_unavailable", "Image capture is not initialised yet.")
243
+ promise?.reject("capture_unavailable", "Image capture is not initialised yet.")
100
244
  return
101
245
  }
102
246
 
247
+ val outputDir = context.cacheDir
248
+ val photoFile = File(
249
+ outputDir,
250
+ "docscan-${SimpleDateFormat("yyyyMMdd-HHmmss-SSS", Locale.US).format(Date())}.jpg",
251
+ )
252
+
253
+ val outputOptions = ImageCapture.OutputFileOptions.Builder(photoFile).build()
254
+
103
255
  captureInFlight = true
104
- // TODO: Hook into ImageCapture#takePicture and ML Kit cropping.
105
- postDelayed(
106
- {
107
- captureInFlight = false
108
- promise.reject("not_implemented", "Native capture pipeline has not been implemented.")
256
+ pendingPromise = promise
257
+
258
+ imageCapture.takePicture(
259
+ outputOptions,
260
+ cameraExecutor ?: Executors.newSingleThreadExecutor(),
261
+ object : ImageCapture.OnImageSavedCallback {
262
+ override fun onImageSaved(outputFileResults: ImageCapture.OutputFileResults) {
263
+ scope.launch {
264
+ handleCaptureSuccess(photoFile)
265
+ }
266
+ }
267
+
268
+ override fun onError(exception: ImageCaptureException) {
269
+ scope.launch {
270
+ handleCaptureFailure(exception)
271
+ }
272
+ }
109
273
  },
110
- 100,
111
274
  )
112
275
  }
113
276
 
114
- fun reset() {
277
+ private suspend fun handleCaptureSuccess(file: File) {
278
+ withContext(Dispatchers.IO) {
279
+ try {
280
+ val bitmap = BitmapFactory.decodeFile(file.absolutePath)
281
+ val width = bitmap.width
282
+ val height = bitmap.height
283
+
284
+ val frameSize = lastFrameSize
285
+ val quadForCapture = if (lastQuad != null && frameSize != null) {
286
+ val scaleX = width.toDouble() / frameSize.width.toDouble()
287
+ val scaleY = height.toDouble() / frameSize.height.toDouble()
288
+ lastQuad!!.scaled(scaleX, scaleY)
289
+ } else {
290
+ null
291
+ }
292
+
293
+ val croppedPath = if (quadForCapture != null) {
294
+ cropAndSave(bitmap, quadForCapture, file.parentFile ?: context.cacheDir)
295
+ } else {
296
+ file.absolutePath
297
+ }
298
+
299
+ val event = Arguments.createMap().apply {
300
+ putString("initialImage", "file://${file.absolutePath}")
301
+ putString("croppedImage", "file://$croppedPath")
302
+ putDouble("width", width.toDouble())
303
+ putDouble("height", height.toDouble())
304
+ }
305
+
306
+ withContext(Dispatchers.Main) {
307
+ emitPictureTaken(event)
308
+ pendingPromise?.resolve(event)
309
+ resetAfterCapture()
310
+ }
311
+ } catch (error: Exception) {
312
+ bitmap.recycle()
313
+
314
+ withContext(Dispatchers.Main) {
315
+ handleCaptureFailure(error)
316
+ }
317
+ }
318
+ }
319
+ }
320
+
321
+ private fun handleCaptureFailure(error: Exception) {
322
+ pendingPromise?.reject(error)
323
+ resetAfterCapture()
324
+ }
325
+
326
+ private fun resetAfterCapture() {
327
+ captureInFlight = false
328
+ pendingPromise = null
115
329
  currentStableCounter = 0
116
330
  }
117
331
 
118
- private fun updateTorchMode(enabled: Boolean) {
119
- // TODO: Toggle torch once camera is integrated.
120
- Log.d("RNRDocScanner", "Torch set to $enabled (not yet wired).")
332
+ private fun emitPictureTaken(payload: WritableMap) {
333
+ val reactContext = context as? ReactContext ?: return
334
+ reactContext
335
+ .getJSModule(RCTEventEmitter::class.java)
336
+ ?.receiveEvent(id, "onPictureTaken", payload)
337
+ }
338
+
339
+ fun reset() {
340
+ currentStableCounter = 0
341
+ lastQuad = null
121
342
  }
122
343
 
123
344
  override fun onDetachedFromWindow() {
@@ -126,4 +347,190 @@ class RNRDocScannerView @JvmOverloads constructor(
126
347
  cameraExecutor = null
127
348
  cameraProvider?.unbindAll()
128
349
  }
350
+
351
+ // region Detection helpers
352
+
353
+ private fun yuvToMat(image: Image, rotationDegrees: Int): Mat {
354
+ val bufferY = image.planes[0].buffer.toByteArray()
355
+ val bufferU = image.planes[1].buffer.toByteArray()
356
+ val bufferV = image.planes[2].buffer.toByteArray()
357
+
358
+ val yuvBytes = ByteArray(bufferY.size + bufferU.size + bufferV.size)
359
+ bufferY.copyInto(yuvBytes, 0)
360
+ bufferV.copyInto(yuvBytes, bufferY.size)
361
+ bufferU.copyInto(yuvBytes, bufferY.size + bufferV.size)
362
+
363
+ val yuvMat = Mat(image.height + image.height / 2, image.width, CvType.CV_8UC1)
364
+ yuvMat.put(0, 0, yuvBytes)
365
+
366
+ val bgrMat = Mat()
367
+ Imgproc.cvtColor(yuvMat, bgrMat, Imgproc.COLOR_YUV2BGR_NV21, 3)
368
+ yuvMat.release()
369
+
370
+ val rotatedMat = Mat()
371
+ when (rotationDegrees) {
372
+ 90 -> Core.rotate(bgrMat, rotatedMat, Core.ROTATE_90_CLOCKWISE)
373
+ 180 -> Core.rotate(bgrMat, rotatedMat, Core.ROTATE_180)
374
+ 270 -> Core.rotate(bgrMat, rotatedMat, Core.ROTATE_90_COUNTERCLOCKWISE)
375
+ else -> bgrMat.copyTo(rotatedMat)
376
+ }
377
+ bgrMat.release()
378
+ return rotatedMat
379
+ }
380
+
381
+ private fun detectDocument(mat: Mat, frameSize: AndroidSize): QuadPoints? {
382
+ if (mat.empty()) {
383
+ mat.release()
384
+ return null
385
+ }
386
+
387
+ val gray = Mat()
388
+ Imgproc.cvtColor(mat, gray, Imgproc.COLOR_BGR2GRAY)
389
+
390
+ val blurred = Mat()
391
+ Imgproc.GaussianBlur(gray, blurred, MatSize(5.0, 5.0), 0.0)
392
+
393
+ val edges = Mat()
394
+ Imgproc.Canny(blurred, edges, 50.0, 150.0)
395
+
396
+ val contours = ArrayList<MatOfPoint>()
397
+ val hierarchy = Mat()
398
+ Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE)
399
+
400
+ var bestQuad: QuadPoints? = null
401
+ var maxArea = 0.0
402
+ val frameArea = frameSize.width * frameSize.height.toDouble()
403
+
404
+ val approxCurve = MatOfPoint2f()
405
+ for (contour in contours) {
406
+ val contour2f = MatOfPoint2f(*contour.toArray())
407
+ val perimeter = Imgproc.arcLength(contour2f, true)
408
+ Imgproc.approxPolyDP(contour2f, approxCurve, 0.02 * perimeter, true)
409
+
410
+ val points = approxCurve.toArray()
411
+ if (points.size != 4) {
412
+ contour.release()
413
+ contour2f.release()
414
+ continue
415
+ }
416
+
417
+ val area = abs(Imgproc.contourArea(approxCurve))
418
+ if (area < frameArea * 0.10 || area > frameArea * 0.95) {
419
+ contour.release()
420
+ contour2f.release()
421
+ continue
422
+ }
423
+
424
+ if (area > maxArea && Imgproc.isContourConvex(approxCurve)) {
425
+ val ordered = orderPoints(points)
426
+ bestQuad = QuadPoints(
427
+ topLeft = ordered[0],
428
+ topRight = ordered[1],
429
+ bottomRight = ordered[2],
430
+ bottomLeft = ordered[3],
431
+ )
432
+ maxArea = area
433
+ }
434
+
435
+ contour.release()
436
+ contour2f.release()
437
+ }
438
+
439
+ gray.release()
440
+ blurred.release()
441
+ edges.release()
442
+ hierarchy.release()
443
+ approxCurve.release()
444
+ mat.release()
445
+
446
+ return bestQuad
447
+ }
448
+
449
+ private fun orderPoints(points: Array<Point>): Array<Point> {
450
+ val sorted = points.sortedBy { it.x + it.y }
451
+ val tl = sorted.first()
452
+ val br = sorted.last()
453
+ val remaining = points.filter { it != tl && it != br }
454
+ val (tr, bl) =
455
+ if (remaining[0].x > remaining[1].x) remaining[0] to remaining[1] else remaining[1] to remaining[0]
456
+ return arrayOf(tl, tr, br, bl)
457
+ }
458
+
459
+ // endregion
460
+
461
+ private fun cropAndSave(bitmap: Bitmap, quad: QuadPoints, outputDir: File): String {
462
+ val srcMat = Mat()
463
+ org.opencv.android.Utils.bitmapToMat(bitmap, srcMat)
464
+
465
+ val ordered = quad.toArray()
466
+ val widthA = hypot(ordered[2].x - ordered[3].x, ordered[2].y - ordered[3].y)
467
+ val widthB = hypot(ordered[1].x - ordered[0].x, ordered[1].y - ordered[0].y)
468
+ val heightA = hypot(ordered[1].x - ordered[2].x, ordered[1].y - ordered[2].y)
469
+ val heightB = hypot(ordered[0].x - ordered[3].x, ordered[0].y - ordered[3].y)
470
+
471
+ val maxWidth = max(widthA, widthB).toInt().coerceAtLeast(1)
472
+ val maxHeight = max(heightA, heightB).toInt().coerceAtLeast(1)
473
+
474
+ val srcPoints = MatOfPoint2f(*ordered)
475
+ val dstPoints = MatOfPoint2f(
476
+ Point(0.0, 0.0),
477
+ Point(maxWidth - 1.0, 0.0),
478
+ Point(maxWidth - 1.0, maxHeight - 1.0),
479
+ Point(0.0, maxHeight - 1.0),
480
+ )
481
+
482
+ val transform = Imgproc.getPerspectiveTransform(srcPoints, dstPoints)
483
+ val warped = Mat(MatSize(maxWidth.toDouble(), maxHeight.toDouble()), srcMat.type())
484
+ Imgproc.warpPerspective(srcMat, warped, transform, warped.size())
485
+
486
+ val croppedBitmap = Bitmap.createBitmap(maxWidth, maxHeight, Bitmap.Config.ARGB_8888)
487
+ org.opencv.android.Utils.matToBitmap(warped, croppedBitmap)
488
+
489
+ val outputFile = File(
490
+ outputDir,
491
+ "docscan-cropped-${SimpleDateFormat("yyyyMMdd-HHmmss-SSS", Locale.US).format(Date())}.jpg",
492
+ )
493
+ outputFile.outputStream().use { stream ->
494
+ croppedBitmap.compress(Bitmap.CompressFormat.JPEG, quality.coerceIn(10, 100), stream)
495
+ }
496
+
497
+ srcMat.release()
498
+ warped.release()
499
+ transform.release()
500
+ srcPoints.release()
501
+ dstPoints.release()
502
+
503
+ return outputFile.absolutePath
504
+ }
505
+
506
+ private fun Point.toWritable(): WritableMap = Arguments.createMap().apply {
507
+ putDouble("x", x)
508
+ putDouble("y", y)
509
+ }
510
+
511
+ private fun ByteBuffer.toByteArray(): ByteArray {
512
+ val bytes = ByteArray(remaining())
513
+ get(bytes)
514
+ rewind()
515
+ return bytes
516
+ }
517
+
518
+ companion object {
519
+ private const val TAG = "RNRDocScanner"
520
+ }
521
+ }
522
+
523
+ data class QuadPoints(
524
+ val topLeft: Point,
525
+ val topRight: Point,
526
+ val bottomRight: Point,
527
+ val bottomLeft: Point,
528
+ ) {
529
+ fun toArray(): Array<Point> = arrayOf(topLeft, topRight, bottomRight, bottomLeft)
530
+ fun scaled(scaleX: Double, scaleY: Double): QuadPoints = QuadPoints(
531
+ topLeft = Point(topLeft.x * scaleX, topLeft.y * scaleY),
532
+ topRight = Point(topRight.x * scaleX, topRight.y * scaleY),
533
+ bottomRight = Point(bottomRight.x * scaleX, bottomRight.y * scaleY),
534
+ bottomLeft = Point(bottomLeft.x * scaleX, bottomLeft.y * scaleY),
535
+ )
129
536
  }
@@ -1,10 +1,12 @@
1
1
  import AVFoundation
2
+ import CoreImage
2
3
  import Foundation
3
4
  import React
5
+ import UIKit
4
6
  import Vision
5
7
 
6
8
  @objc(RNRDocScannerView)
7
- class RNRDocScannerView: UIView {
9
+ class RNRDocScannerView: UIView, AVCaptureVideoDataOutputSampleBufferDelegate, AVCapturePhotoCaptureDelegate {
8
10
  @objc var detectionCountBeforeCapture: NSNumber = 8
9
11
  @objc var autoCapture: Bool = true
10
12
  @objc var enableTorch: Bool = false {
@@ -21,11 +23,18 @@ class RNRDocScannerView: UIView {
21
23
  private let session = AVCaptureSession()
22
24
  private let sessionQueue = DispatchQueue(label: "com.reactnative.rectangledocscanner.session")
23
25
  private let analysisQueue = DispatchQueue(label: "com.reactnative.rectangledocscanner.analysis")
26
+ private let ciContext = CIContext()
27
+
24
28
  private var previewLayer: AVCaptureVideoPreviewLayer?
25
- private var photoOutput = AVCapturePhotoOutput()
29
+ private let videoOutput = AVCaptureVideoDataOutput()
30
+ private let photoOutput = AVCapturePhotoOutput()
26
31
 
27
32
  private var currentStableCounter: Int = 0
33
+ private var isProcessingFrame = false
28
34
  private var isCaptureInFlight = false
35
+ private var lastObservation: VNRectangleObservation?
36
+ private var lastFrameSize: CGSize = .zero
37
+ private var photoCaptureCompletion: ((Result<RNRDocScannerCaptureResult, Error>) -> Void)?
29
38
 
30
39
  override init(frame: CGRect) {
31
40
  super.init(frame: frame)
@@ -76,10 +85,19 @@ class RNRDocScannerView: UIView {
76
85
  session.addInput(videoInput)
77
86
 
78
87
  if session.canAddOutput(photoOutput) {
88
+ photoOutput.isHighResolutionCaptureEnabled = true
79
89
  session.addOutput(photoOutput)
80
90
  }
81
91
 
82
- // TODO: Wire up AVCaptureVideoDataOutput + rectangle detection pipeline.
92
+ videoOutput.videoSettings = [
93
+ kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
94
+ ]
95
+ videoOutput.alwaysDiscardsLateVideoFrames = true
96
+ videoOutput.setSampleBufferDelegate(self, queue: analysisQueue)
97
+
98
+ if session.canAddOutput(videoOutput) {
99
+ session.addOutput(videoOutput)
100
+ }
83
101
  }
84
102
  }
85
103
 
@@ -115,16 +133,73 @@ class RNRDocScannerView: UIView {
115
133
  return AVCaptureDevice.devices(for: .video).first(where: { $0.position == position })
116
134
  }
117
135
 
136
+ // MARK: - Detection
137
+
138
+ func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
139
+ if isProcessingFrame {
140
+ return
141
+ }
142
+ guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
143
+ return
144
+ }
145
+
146
+ isProcessingFrame = true
147
+ CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
148
+ let frameSize = CGSize(width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer))
149
+ lastFrameSize = frameSize
150
+ let orientation = currentExifOrientation()
151
+
152
+ defer {
153
+ CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly)
154
+ isProcessingFrame = false
155
+ }
156
+
157
+ let request = VNDetectRectanglesRequest { [weak self] request, error in
158
+ guard let self else { return }
159
+
160
+ if let error {
161
+ NSLog("[RNRDocScanner] detection error: \(error)")
162
+ self.lastObservation = nil
163
+ self.handleDetectedRectangle(nil, frameSize: frameSize)
164
+ return
165
+ }
166
+
167
+ guard let observation = (request.results as? [VNRectangleObservation])?.first else {
168
+ self.lastObservation = nil
169
+ self.handleDetectedRectangle(nil, frameSize: frameSize)
170
+ return
171
+ }
172
+
173
+ self.lastObservation = observation
174
+ self.handleDetectedRectangle(observation, frameSize: frameSize)
175
+ }
176
+
177
+ request.maximumObservations = 1
178
+ request.minimumConfidence = 0.6
179
+ request.minimumAspectRatio = 0.3
180
+ request.maximumAspectRatio = 1.0
181
+ request.minimumSize = 0.15
182
+
183
+ let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: orientation, options: [:])
184
+ do {
185
+ try handler.perform([request])
186
+ } catch {
187
+ NSLog("[RNRDocScanner] Failed to run Vision request: \(error)")
188
+ lastObservation = nil
189
+ handleDetectedRectangle(nil, frameSize: frameSize)
190
+ }
191
+ }
192
+
118
193
  func handleDetectedRectangle(_ rectangle: VNRectangleObservation?, frameSize: CGSize) {
119
194
  guard let onRectangleDetect else { return }
120
195
 
121
196
  let payload: [String: Any?]
122
197
  if let rectangle {
123
198
  let points = [
124
- point(from: rectangle.topLeft, frameSize: frameSize),
125
- point(from: rectangle.topRight, frameSize: frameSize),
126
- point(from: rectangle.bottomRight, frameSize: frameSize),
127
- point(from: rectangle.bottomLeft, frameSize: frameSize),
199
+ pointForOverlay(from: rectangle.topLeft, frameSize: frameSize),
200
+ pointForOverlay(from: rectangle.topRight, frameSize: frameSize),
201
+ pointForOverlay(from: rectangle.bottomRight, frameSize: frameSize),
202
+ pointForOverlay(from: rectangle.bottomLeft, frameSize: frameSize),
128
203
  ]
129
204
 
130
205
  currentStableCounter = min(currentStableCounter + 1, Int(truncating: detectionCountBeforeCapture))
@@ -154,10 +229,12 @@ class RNRDocScannerView: UIView {
154
229
  }
155
230
  }
156
231
 
157
- private func point(from normalizedPoint: CGPoint, frameSize: CGSize) -> CGPoint {
232
+ private func pointForOverlay(from normalizedPoint: CGPoint, frameSize: CGSize) -> CGPoint {
158
233
  CGPoint(x: normalizedPoint.x * frameSize.width, y: (1 - normalizedPoint.y) * frameSize.height)
159
234
  }
160
235
 
236
+ // MARK: - Capture
237
+
161
238
  func capture(completion: @escaping (Result<RNRDocScannerCaptureResult, Error>) -> Void) {
162
239
  sessionQueue.async { [weak self] in
163
240
  guard let self else { return }
@@ -167,23 +244,190 @@ class RNRDocScannerView: UIView {
167
244
  return
168
245
  }
169
246
 
170
- guard photoOutput.connections.isEmpty == false else {
247
+ guard photoOutput.connection(with: .video) != nil else {
171
248
  completion(.failure(RNRDocScannerError.captureUnavailable))
172
249
  return
173
250
  }
174
251
 
175
252
  isCaptureInFlight = true
253
+ photoCaptureCompletion = completion
176
254
 
177
- // TODO: Implement real capture logic; emit stub callback for now.
178
- DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
179
- self.isCaptureInFlight = false
180
- completion(.failure(RNRDocScannerError.notImplemented))
255
+ let settings = AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg])
256
+ settings.isHighResolutionPhotoEnabled = photoOutput.isHighResolutionCaptureEnabled
257
+ if photoOutput.supportedFlashModes.contains(.on) {
258
+ settings.flashMode = enableTorch ? .on : .off
181
259
  }
260
+
261
+ photoOutput.capturePhoto(with: settings, delegate: self)
182
262
  }
183
263
  }
184
264
 
185
265
  func resetStability() {
186
266
  currentStableCounter = 0
267
+ lastObservation = nil
268
+ }
269
+
270
+ // MARK: - AVCapturePhotoCaptureDelegate
271
+
272
+ func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
273
+ guard let completion = photoCaptureCompletion else {
274
+ isCaptureInFlight = false
275
+ return
276
+ }
277
+
278
+ if let error {
279
+ finishCapture(result: .failure(error))
280
+ return
281
+ }
282
+
283
+ guard let data = photo.fileDataRepresentation() else {
284
+ finishCapture(result: .failure(RNRDocScannerError.imageCreationFailed))
285
+ return
286
+ }
287
+
288
+ let dimensions = photoDimensions(photo: photo)
289
+ do {
290
+ let original = try serializeImageData(data, suffix: "original")
291
+ let croppedString: String?
292
+
293
+ if let croppedData = generateCroppedImage(from: data) {
294
+ croppedString = try serializeImageData(croppedData, suffix: "cropped").string
295
+ } else {
296
+ croppedString = original.string
297
+ }
298
+
299
+ let result = RNRDocScannerCaptureResult(
300
+ croppedImage: croppedString,
301
+ originalImage: original.string,
302
+ width: dimensions.width,
303
+ height: dimensions.height
304
+ )
305
+
306
+ finishCapture(result: .success(result))
307
+ } catch {
308
+ finishCapture(result: .failure(error))
309
+ }
310
+ }
311
+
312
+ func photoOutput(_ output: AVCapturePhotoOutput, didFinishCaptureFor resolvedSettings: AVCaptureResolvedPhotoSettings, error: Error?) {
313
+ if let error, isCaptureInFlight {
314
+ finishCapture(result: .failure(error))
315
+ }
316
+ }
317
+
318
+ private func finishCapture(result: Result<RNRDocScannerCaptureResult, Error>) {
319
+ let completion = photoCaptureCompletion
320
+ photoCaptureCompletion = nil
321
+ isCaptureInFlight = false
322
+
323
+ DispatchQueue.main.async {
324
+ switch result {
325
+ case let .success(payload):
326
+ completion?(.success(payload))
327
+ self.emitPictureTaken(payload)
328
+ case let .failure(error):
329
+ completion?(.failure(error))
330
+ }
331
+ }
332
+ }
333
+
334
+ private func emitPictureTaken(_ result: RNRDocScannerCaptureResult) {
335
+ guard let onPictureTaken else { return }
336
+ let payload: [String: Any] = [
337
+ "croppedImage": result.croppedImage ?? NSNull(),
338
+ "initialImage": result.originalImage,
339
+ "width": result.width,
340
+ "height": result.height,
341
+ ]
342
+ onPictureTaken(payload)
343
+ }
344
+
345
+ // MARK: - Helpers
346
+
347
+ private func currentExifOrientation() -> CGImagePropertyOrientation {
348
+ switch UIDevice.current.orientation {
349
+ case .landscapeLeft:
350
+ return .up
351
+ case .landscapeRight:
352
+ return .down
353
+ case .portraitUpsideDown:
354
+ return .left
355
+ default:
356
+ return .right
357
+ }
358
+ }
359
+
360
+ private func photoDimensions(photo: AVCapturePhoto) -> CGSize {
361
+ if let pixelBuffer = photo.pixelBuffer {
362
+ return CGSize(width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer))
363
+ }
364
+
365
+ let width = photo.metadata[kCGImagePropertyPixelWidth as String] as? Int ?? Int(lastFrameSize.width)
366
+ let height = photo.metadata[kCGImagePropertyPixelHeight as String] as? Int ?? Int(lastFrameSize.height)
367
+ return CGSize(width: CGFloat(width), height: CGFloat(height))
368
+ }
369
+
370
+ private func serializeImageData(_ data: Data, suffix: String) throws -> (string: String, url: URL?) {
371
+ let filename = "docscan-\(UUID().uuidString)-\(suffix).jpg"
372
+ let url = FileManager.default.temporaryDirectory.appendingPathComponent(filename)
373
+ do {
374
+ try data.write(to: url, options: .atomic)
375
+ } catch {
376
+ throw RNRDocScannerError.fileWriteFailed
377
+ }
378
+ return (url.absoluteString, url)
379
+ }
380
+
381
+ private func generateCroppedImage(from data: Data) -> Data? {
382
+ guard let ciImage = CIImage(data: data) else {
383
+ return nil
384
+ }
385
+
386
+ var observation: VNRectangleObservation? = nil
387
+ let request = VNDetectRectanglesRequest { request, _ in
388
+ observation = (request.results as? [VNRectangleObservation])?.first
389
+ }
390
+ request.maximumObservations = 1
391
+ request.minimumConfidence = 0.6
392
+
393
+ let handler = VNImageRequestHandler(ciImage: ciImage, options: [:])
394
+ try? handler.perform([request])
395
+
396
+ guard let targetObservation = observation ?? lastObservation else {
397
+ return nil
398
+ }
399
+
400
+ let size = ciImage.extent.size
401
+ let topLeft = normalizedPoint(targetObservation.topLeft, in: size, flipY: false)
402
+ let topRight = normalizedPoint(targetObservation.topRight, in: size, flipY: false)
403
+ let bottomLeft = normalizedPoint(targetObservation.bottomLeft, in: size, flipY: false)
404
+ let bottomRight = normalizedPoint(targetObservation.bottomRight, in: size, flipY: false)
405
+
406
+ guard let filter = CIFilter(name: "CIPerspectiveCorrection") else {
407
+ return nil
408
+ }
409
+
410
+ filter.setValue(ciImage, forKey: kCIInputImageKey)
411
+ filter.setValue(CIVector(cgPoint: topLeft), forKey: "inputTopLeft")
412
+ filter.setValue(CIVector(cgPoint: topRight), forKey: "inputTopRight")
413
+ filter.setValue(CIVector(cgPoint: bottomLeft), forKey: "inputBottomLeft")
414
+ filter.setValue(CIVector(cgPoint: bottomRight), forKey: "inputBottomRight")
415
+
416
+ guard let corrected = filter.outputImage else {
417
+ return nil
418
+ }
419
+
420
+ guard let cgImage = ciContext.createCGImage(corrected, from: corrected.extent) else {
421
+ return nil
422
+ }
423
+
424
+ let cropped = UIImage(cgImage: cgImage)
425
+ return cropped.jpegData(compressionQuality: CGFloat(max(0.05, min(1.0, quality.doubleValue / 100.0))))
426
+ }
427
+
428
+ private func normalizedPoint(_ point: CGPoint, in size: CGSize, flipY: Bool) -> CGPoint {
429
+ let yValue = flipY ? (1 - point.y) : point.y
430
+ return CGPoint(x: point.x * size.width, y: yValue * size.height)
187
431
  }
188
432
  }
189
433
 
@@ -197,7 +441,8 @@ struct RNRDocScannerCaptureResult {
197
441
  enum RNRDocScannerError: Error {
198
442
  case captureInProgress
199
443
  case captureUnavailable
200
- case notImplemented
444
+ case imageCreationFailed
445
+ case fileWriteFailed
201
446
  case viewNotFound
202
447
 
203
448
  var code: String {
@@ -206,8 +451,10 @@ enum RNRDocScannerError: Error {
206
451
  return "capture_in_progress"
207
452
  case .captureUnavailable:
208
453
  return "capture_unavailable"
209
- case .notImplemented:
210
- return "not_implemented"
454
+ case .imageCreationFailed:
455
+ return "image_creation_failed"
456
+ case .fileWriteFailed:
457
+ return "file_write_failed"
211
458
  case .viewNotFound:
212
459
  return "view_not_found"
213
460
  }
@@ -219,8 +466,10 @@ enum RNRDocScannerError: Error {
219
466
  return "A capture request is already in flight."
220
467
  case .captureUnavailable:
221
468
  return "Photo output is not configured yet."
222
- case .notImplemented:
223
- return "Native capture is not implemented yet."
469
+ case .imageCreationFailed:
470
+ return "Unable to create image data from capture."
471
+ case .fileWriteFailed:
472
+ return "Failed to persist captured image to disk."
224
473
  case .viewNotFound:
225
474
  return "Unable to locate the native DocScanner view."
226
475
  }
@@ -22,7 +22,7 @@ class RNRDocScannerViewManager: RCTViewManager {
22
22
  switch result {
23
23
  case let .success(payload):
24
24
  resolve([
25
- "croppedImage": payload.croppedImage as Any,
25
+ "croppedImage": payload.croppedImage ?? NSNull(),
26
26
  "initialImage": payload.originalImage,
27
27
  "width": payload.width,
28
28
  "height": payload.height,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-rectangle-doc-scanner",
3
- "version": "0.69.0",
3
+ "version": "0.70.0",
4
4
  "description": "Native-backed document scanner for React Native with customizable overlays.",
5
5
  "license": "MIT",
6
6
  "main": "dist/index.js",