@trustchex/react-native-sdk 1.362.4 → 1.374.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/TrustchexSDK.podspec +3 -3
- package/android/build.gradle +3 -3
- package/android/src/main/java/com/trustchex/reactnativesdk/camera/TrustchexCameraView.kt +71 -17
- package/ios/Camera/TrustchexCameraView.swift +166 -119
- package/lib/module/Shared/Components/FaceCamera.js +1 -0
- package/lib/module/Shared/Components/IdentityDocumentCamera.js +344 -207
- package/lib/module/Shared/Components/QrCodeScannerCamera.js +1 -8
- package/lib/module/Shared/Libs/mrz.utils.js +202 -9
- package/lib/module/Translation/Resources/en.js +0 -4
- package/lib/module/Translation/Resources/tr.js +0 -4
- package/lib/module/version.js +1 -1
- package/lib/typescript/src/Shared/Components/FaceCamera.d.ts.map +1 -1
- package/lib/typescript/src/Shared/Components/IdentityDocumentCamera.d.ts.map +1 -1
- package/lib/typescript/src/Shared/Components/QrCodeScannerCamera.d.ts.map +1 -1
- package/lib/typescript/src/Shared/Components/TrustchexCamera.d.ts +1 -0
- package/lib/typescript/src/Shared/Components/TrustchexCamera.d.ts.map +1 -1
- package/lib/typescript/src/Shared/Libs/mrz.utils.d.ts +8 -0
- package/lib/typescript/src/Shared/Libs/mrz.utils.d.ts.map +1 -1
- package/lib/typescript/src/Translation/Resources/en.d.ts +0 -4
- package/lib/typescript/src/Translation/Resources/en.d.ts.map +1 -1
- package/lib/typescript/src/Translation/Resources/tr.d.ts +0 -4
- package/lib/typescript/src/Translation/Resources/tr.d.ts.map +1 -1
- package/lib/typescript/src/version.d.ts +1 -1
- package/package.json +1 -1
- package/src/Shared/Components/FaceCamera.tsx +1 -0
- package/src/Shared/Components/IdentityDocumentCamera.tsx +443 -265
- package/src/Shared/Components/QrCodeScannerCamera.tsx +1 -9
- package/src/Shared/Components/TrustchexCamera.tsx +1 -0
- package/src/Shared/Libs/mrz.utils.ts +238 -26
- package/src/Translation/Resources/en.ts +0 -4
- package/src/Translation/Resources/tr.ts +0 -4
- package/src/version.ts +1 -1
package/TrustchexSDK.podspec
CHANGED
|
@@ -19,9 +19,9 @@ Pod::Spec.new do |s|
|
|
|
19
19
|
s.public_header_files = "ios/OpenCV/OpenCVHelper.h"
|
|
20
20
|
|
|
21
21
|
# ML Kit dependencies
|
|
22
|
-
s.dependency "GoogleMLKit/TextRecognition"
|
|
23
|
-
s.dependency "GoogleMLKit/FaceDetection"
|
|
24
|
-
s.dependency "GoogleMLKit/BarcodeScanning"
|
|
22
|
+
s.dependency "GoogleMLKit/TextRecognition", "~> 9.0"
|
|
23
|
+
s.dependency "GoogleMLKit/FaceDetection", "~> 9.0"
|
|
24
|
+
s.dependency "GoogleMLKit/BarcodeScanning", "~> 9.0"
|
|
25
25
|
|
|
26
26
|
# OpenCV dependency
|
|
27
27
|
s.dependency "OpenCV", "~> 4.3.0"
|
package/android/build.gradle
CHANGED
|
@@ -83,9 +83,9 @@ dependencies {
|
|
|
83
83
|
implementation "androidx.camera:camera-video:1.4.0"
|
|
84
84
|
|
|
85
85
|
// Google ML Kit dependencies
|
|
86
|
-
implementation 'com.google.mlkit:text-recognition:16.0.
|
|
87
|
-
implementation 'com.google.mlkit:face-detection:16.1.
|
|
88
|
-
implementation 'com.google.mlkit:barcode-scanning:17.
|
|
86
|
+
implementation 'com.google.mlkit:text-recognition:16.0.0'
|
|
87
|
+
implementation 'com.google.mlkit:face-detection:16.1.5'
|
|
88
|
+
implementation 'com.google.mlkit:barcode-scanning:17.1.0'
|
|
89
89
|
|
|
90
90
|
// Google Play Services dependency for Tasks
|
|
91
91
|
implementation 'com.google.android.gms:play-services-tasks:18.2.0'
|
|
@@ -44,10 +44,10 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
44
44
|
// ==================================================================================
|
|
45
45
|
// PORTRAIT-ONLY CAMERA VIEW
|
|
46
46
|
// All camera use cases are locked to ROTATION_0 (portrait) orientation.
|
|
47
|
-
//
|
|
48
|
-
//
|
|
49
|
-
//
|
|
50
|
-
//
|
|
47
|
+
// Supports two resolutions:
|
|
48
|
+
// - HD: 720x1280 (portrait HD) - lower bandwidth, faster processing
|
|
49
|
+
// - Full HD: 1080x1920 (portrait Full HD, default) - sharp text/document capture
|
|
50
|
+
// Selection criteria: use Full HD for document scanning, HD for real-time detection
|
|
51
51
|
// Activity must be locked to portrait in AndroidManifest.xml
|
|
52
52
|
// ==================================================================================
|
|
53
53
|
|
|
@@ -69,6 +69,7 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
69
69
|
private var torchEnabled = false
|
|
70
70
|
private var frameProcessingEnabled = false
|
|
71
71
|
private var targetFps = 10
|
|
72
|
+
private var resolution = "fullhd" // "hd" (720x1280) or "fullhd" (1080x1920, default)
|
|
72
73
|
private var isCameraInitialized = false
|
|
73
74
|
private var isStoppingRecording = false // Track if stopRecording was called to prevent cancelRecording from deleting the file
|
|
74
75
|
|
|
@@ -80,6 +81,8 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
80
81
|
|
|
81
82
|
// ML Kit detector instances (lazy, created once)
|
|
82
83
|
private val textRecognizer by lazy {
|
|
84
|
+
// Use DEFAULT_OPTIONS for best accuracy with Latin scripts (including MRZ)
|
|
85
|
+
// ML Kit Text Recognition v2 provides superior accuracy for document scanning
|
|
83
86
|
TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)
|
|
84
87
|
}
|
|
85
88
|
private val faceDetector by lazy {
|
|
@@ -202,6 +205,14 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
202
205
|
targetFps = fps.coerceIn(1, 30)
|
|
203
206
|
}
|
|
204
207
|
|
|
208
|
+
fun setResolution(res: String) {
|
|
209
|
+
// Accept "hd" or "fullhd" (default "fullhd" for 1080x1920)
|
|
210
|
+
resolution = if (res.lowercase() == "hd") "hd" else "fullhd"
|
|
211
|
+
if (isCameraInitialized) {
|
|
212
|
+
initializeCamera()
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
205
216
|
fun setFaceDetectionEnabled(enabled: Boolean) {
|
|
206
217
|
faceDetectionEnabled = enabled
|
|
207
218
|
}
|
|
@@ -238,14 +249,12 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
238
249
|
provider.unbindAll()
|
|
239
250
|
|
|
240
251
|
try {
|
|
241
|
-
// Determine resolution based on
|
|
242
|
-
//
|
|
243
|
-
//
|
|
244
|
-
val
|
|
245
|
-
|
|
246
|
-
Size(
|
|
247
|
-
} else {
|
|
248
|
-
Size(1080, 1920) // Portrait Full HD for back camera
|
|
252
|
+
// Determine resolution based on configured resolution setting
|
|
253
|
+
// "hd": 720x1280 (lower bandwidth, faster processing)
|
|
254
|
+
// "fullhd": 1080x1920 (default - sharp text/document capture)
|
|
255
|
+
val targetResolution = when (resolution.lowercase()) {
|
|
256
|
+
"hd" -> Size(720, 1280) // Portrait HD
|
|
257
|
+
else -> Size(1080, 1920) // Portrait Full HD (default)
|
|
249
258
|
}
|
|
250
259
|
|
|
251
260
|
// Preview use case
|
|
@@ -273,9 +282,13 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
273
282
|
// Removing it allows us to bind VideoCapture + Analyzer + Preview simultaneously
|
|
274
283
|
// within the 3-use-case limit of many Android devices.
|
|
275
284
|
|
|
276
|
-
// Video capture use case —
|
|
285
|
+
// Video capture use case — select quality based on resolution setting
|
|
286
|
+
val videoQuality = when (resolution.lowercase()) {
|
|
287
|
+
"hd" -> Quality.HD // 720x1280
|
|
288
|
+
else -> Quality.FHD // 1080x1920 (Full HD, default)
|
|
289
|
+
}
|
|
277
290
|
val recorder = Recorder.Builder()
|
|
278
|
-
.setQualitySelector(QualitySelector.from(
|
|
291
|
+
.setQualitySelector(QualitySelector.from(videoQuality))
|
|
279
292
|
.build()
|
|
280
293
|
videoCapture = VideoCapture.withOutput(recorder)
|
|
281
294
|
|
|
@@ -351,6 +364,12 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
351
364
|
return
|
|
352
365
|
}
|
|
353
366
|
|
|
367
|
+
// ML Kit Performance Optimization Tips Applied:
|
|
368
|
+
// 1. Throttle detector calls using STRATEGY_KEEP_ONLY_LATEST backpressure
|
|
369
|
+
// 2. Drop frames if detector is still busy (prevents queue buildup)
|
|
370
|
+
// 3. Process at lower resolution (720x1280 portrait) for real-time detection
|
|
371
|
+
// 4. Use ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST for frame dropping
|
|
372
|
+
// 5. Close ImageProxy immediately after ML Kit processing completes
|
|
354
373
|
val hasAnyDetection = frameProcessingEnabled && (faceDetectionEnabled || textRecognitionEnabled || barcodeScanningEnabled)
|
|
355
374
|
if (!hasAnyDetection) {
|
|
356
375
|
isProcessing.set(false)
|
|
@@ -363,9 +382,6 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
363
382
|
|
|
364
383
|
// Create InputImage directly from camera frame — zero-copy, no base64 for ML Kit
|
|
365
384
|
val inputImage = InputImage.fromMediaImage(mediaImage, rotationDegrees)
|
|
366
|
-
|
|
367
|
-
// Use original image for text recognition
|
|
368
|
-
val textInputImage = inputImage
|
|
369
385
|
|
|
370
386
|
// Calculate portrait-oriented dimensions early (JPEG is already rotated)
|
|
371
387
|
val isRotated = rotationDegrees == 90 || rotationDegrees == 270
|
|
@@ -376,6 +392,10 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
376
392
|
// Brightness calculation restricted to scanning frame area (between 36% from top and 36% from bottom, 5% margins on sides)
|
|
377
393
|
val averageBrightness = computeYPlaneBrightness(imageProxy, reportedWidth, reportedHeight)
|
|
378
394
|
|
|
395
|
+
// Use original inputImage directly for text recognition
|
|
396
|
+
// ML Kit works best with native camera frames, not processed bitmaps
|
|
397
|
+
val textInputImage = inputImage
|
|
398
|
+
|
|
379
399
|
// Generate JPEG base64 only when JS side explicitly needs the image
|
|
380
400
|
// NOTE: Do NOT auto-generate for face detection - too expensive, causes frame drops
|
|
381
401
|
val jpegBase64: String? = if (includeBase64) {
|
|
@@ -390,6 +410,9 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
390
410
|
} else null
|
|
391
411
|
|
|
392
412
|
val textTask = if (textRecognitionEnabled) {
|
|
413
|
+
if (!frameProcessingEnabled) {
|
|
414
|
+
android.util.Log.w("TrustchexCamera", "Text recognition enabled but frame processing disabled!")
|
|
415
|
+
}
|
|
393
416
|
textRecognizer.process(textInputImage).also { tasks.add(it) }
|
|
394
417
|
} else null
|
|
395
418
|
|
|
@@ -398,6 +421,7 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
398
421
|
} else null
|
|
399
422
|
|
|
400
423
|
// Wait for all detectors, then build + send a single event to JS
|
|
424
|
+
// ML Kit resource cleanup: InputImage is automatically cleaned after task completion
|
|
401
425
|
Tasks.whenAllComplete(tasks).addOnCompleteListener { _ ->
|
|
402
426
|
try {
|
|
403
427
|
val frameData = Arguments.createMap()
|
|
@@ -451,6 +475,9 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
451
475
|
if (textTask.isSuccessful) {
|
|
452
476
|
val result = textTask.result
|
|
453
477
|
frameData.putString("resultText", result.text)
|
|
478
|
+
if (result.text.isNotEmpty()) {
|
|
479
|
+
android.util.Log.d("TrustchexCamera", "✓ Text recognized: length=${result.text.length}")
|
|
480
|
+
}
|
|
454
481
|
|
|
455
482
|
val blocksArray = Arguments.createArray()
|
|
456
483
|
for (block in result.textBlocks) {
|
|
@@ -471,6 +498,7 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
471
498
|
}
|
|
472
499
|
frameData.putArray("textBlocks", blocksArray)
|
|
473
500
|
} else {
|
|
501
|
+
android.util.Log.d("TrustchexCamera", "⚠ Text recognition failed")
|
|
474
502
|
frameData.putString("resultText", "")
|
|
475
503
|
frameData.putArray("textBlocks", Arguments.createArray())
|
|
476
504
|
}
|
|
@@ -581,6 +609,32 @@ class TrustchexCameraView(context: ThemedReactContext) : FrameLayout(context) {
|
|
|
581
609
|
return if (count > 0) sum.toDouble() / count else 0.0
|
|
582
610
|
}
|
|
583
611
|
|
|
612
|
+
/**
|
|
613
|
+
* Validates if the image dimensions are suitable for ML Kit text recognition.
|
|
614
|
+
*
|
|
615
|
+
* ML Kit text recognition requirements:
|
|
616
|
+
* - Minimum 16x16 pixels per character for acceptable accuracy
|
|
617
|
+
* - Ideal: 16-24 pixels per character for optimal performance
|
|
618
|
+
* - At 720x1280 (portrait HD), assuming ~30px average character width:
|
|
619
|
+
* - Can fit ~24 characters per line (720/30)
|
|
620
|
+
* - Can fit ~42 lines (1280/30)
|
|
621
|
+
*
|
|
622
|
+
* @return quality score (0.0-1.0) where 1.0 is optimal
|
|
623
|
+
*/
|
|
624
|
+
private fun calculateTextRecognitionQuality(width: Int, height: Int): Double {
|
|
625
|
+
// For text at typical font sizes, we want at least 720px width
|
|
626
|
+
// Quality decreases significantly below 640px
|
|
627
|
+
val pixelScore = when {
|
|
628
|
+
width >= 720 && height >= 1280 -> 1.0 // Optimal
|
|
629
|
+
width >= 640 && height >= 960 -> 0.85 // Good
|
|
630
|
+
width >= 480 && height >= 640 -> 0.65 // Acceptable
|
|
631
|
+
else -> 0.4 // Poor
|
|
632
|
+
}
|
|
633
|
+
return pixelScore
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
|
|
584
638
|
private fun yuvImageProxyToJpegBase64(imageProxy: ImageProxy, rotationDegrees: Int): String? {
|
|
585
639
|
try {
|
|
586
640
|
val width = imageProxy.width
|
|
@@ -30,6 +30,14 @@ class TrustchexCameraView: UIView {
|
|
|
30
30
|
}
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
|
+
@objc var resolution: String = "fullhd" {
|
|
34
|
+
didSet {
|
|
35
|
+
if resolution != oldValue {
|
|
36
|
+
// \"hd\" or \"fullhd\" - reinitialize camera with new resolution
|
|
37
|
+
setupCamera()
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
33
41
|
@objc var torchEnabled: Bool = false {
|
|
34
42
|
didSet {
|
|
35
43
|
if torchEnabled != oldValue {
|
|
@@ -114,27 +122,23 @@ class TrustchexCameraView: UIView {
|
|
|
114
122
|
// Add video input
|
|
115
123
|
let cameraPosition: AVCaptureDevice.Position = (_cameraType == "front") ? .front : .back
|
|
116
124
|
|
|
117
|
-
// Set quality based on
|
|
118
|
-
//
|
|
119
|
-
//
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
session.sessionPreset = .hd1920x1080
|
|
124
|
-
} else if session.canSetSessionPreset(.hd1280x720) {
|
|
125
|
-
session.sessionPreset = .hd1280x720
|
|
126
|
-
} else {
|
|
127
|
-
session.sessionPreset = .high
|
|
128
|
-
}
|
|
125
|
+
// Set quality based on resolution setting
|
|
126
|
+
// \"hd\": 720x1280 (HD) - lower bandwidth, faster processing
|
|
127
|
+
// \"fullhd\": 1920x1080 (Full HD, default) - sharp text/document capture
|
|
128
|
+
let sessionPreset: AVCaptureSession.Preset
|
|
129
|
+
if resolution.lowercased() == \"hd\" {
|
|
130
|
+
sessionPreset = .hd1280x720
|
|
129
131
|
} else {
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
132
|
+
sessionPreset = .hd1920x1080 // Full HD (default)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if session.canSetSessionPreset(sessionPreset) {
|
|
136
|
+
session.sessionPreset = sessionPreset
|
|
137
|
+
} else if sessionPreset == .hd1920x1080 && session.canSetSessionPreset(.hd1280x720) {
|
|
138
|
+
// Fallback from Full HD to HD
|
|
139
|
+
session.sessionPreset = .hd1280x720
|
|
140
|
+
} else if session.canSetSessionPreset(.high) {
|
|
141
|
+
session.sessionPreset = .high
|
|
138
142
|
}
|
|
139
143
|
let camera = selectBestCamera(for: cameraPosition)
|
|
140
144
|
guard let camera = camera,
|
|
@@ -418,6 +422,12 @@ class TrustchexCameraView: UIView {
|
|
|
418
422
|
targetFps = fps
|
|
419
423
|
}
|
|
420
424
|
|
|
425
|
+
@objc(setResolution:)
|
|
426
|
+
func setResolution(_ res: String) {
|
|
427
|
+
// \"hd\" (720x1280) or \"fullhd\" (1920x1080, default)
|
|
428
|
+
resolution = res.lowercased() == \"hd\" ? \"hd\" : \"fullhd\"
|
|
429
|
+
}
|
|
430
|
+
|
|
421
431
|
@objc func setFocusPoint(_ x: NSNumber, _ y: NSNumber) {
|
|
422
432
|
sessionQueue.async { [weak self] in
|
|
423
433
|
guard let camera = self?.currentCamera else { return }
|
|
@@ -695,6 +705,12 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
|
|
|
695
705
|
}
|
|
696
706
|
lastFrameTime = currentTime
|
|
697
707
|
|
|
708
|
+
// ML Kit Performance Optimization Tips Applied (iOS):
|
|
709
|
+
// 1. alwaysDiscardsLateVideoFrames = true throttles detector calls
|
|
710
|
+
// 2. Drop frames if detector is still busy (prevents queue buildup)
|
|
711
|
+
// 3. Process at 1080x1920 (Full HD) for optimal real-time detection
|
|
712
|
+
// 4. Use synchronous results(in:) API for video frames (Google recommended)
|
|
713
|
+
// 5. Dispatch to background queue to unblock videoQueue immediately
|
|
698
714
|
// Mark as processing on videoQueue
|
|
699
715
|
isProcessing = true
|
|
700
716
|
|
|
@@ -733,16 +749,30 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
|
|
|
733
749
|
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
|
|
734
750
|
let orientedImage = isBufferLandscape ? ciImage.oriented(.right) : ciImage
|
|
735
751
|
|
|
736
|
-
//
|
|
752
|
+
// Compute brightness early for OCR-B enhancement decision
|
|
753
|
+
let brightness = computeBrightness(from: pixelBuffer, width: portraitWidth, height: portraitHeight)
|
|
754
|
+
|
|
755
|
+
// For text recognition, use original image directly
|
|
756
|
+
// ML Kit works best with native camera frames
|
|
757
|
+
let textEnhancedImage = orientedImage
|
|
758
|
+
|
|
759
|
+
// Create VisionImage from the image (enhanced or original) for better text recognition
|
|
737
760
|
// This ensures MLKit processes the image in the correct orientation
|
|
738
|
-
guard let cgImage = self.ciContext.createCGImage(
|
|
761
|
+
guard let cgImage = self.ciContext.createCGImage(textEnhancedImage, from: textEnhancedImage.extent) else {
|
|
739
762
|
resetProcessingState()
|
|
740
763
|
return
|
|
741
764
|
}
|
|
742
765
|
let visionImage = VisionImage(image: UIImage(cgImage: cgImage))
|
|
743
766
|
visionImage.orientation = .up // Already oriented correctly
|
|
744
767
|
|
|
745
|
-
// Use
|
|
768
|
+
// Use image for text recognition
|
|
769
|
+
// ML Kit text recognition best practices (iOS):
|
|
770
|
+
// - Requires minimum 16x16 pixels per character (ideal 16-24px per character)
|
|
771
|
+
// - Input image: 1080x1920 (portrait Full HD) provides excellent accuracy at real-time speed
|
|
772
|
+
// - Each character at ~30px = 36 characters per line @ 1080px width
|
|
773
|
+
// - Use synchronous results(in:) API from captureOutput(_:didOutput:from:)
|
|
774
|
+
// - Set AVCaptureVideoDataOutput.alwaysDiscardsLateVideoFrames = true (throttle)
|
|
775
|
+
// - Get results then render overlay in single step for optimal performance
|
|
746
776
|
let textVisionImage = visionImage
|
|
747
777
|
|
|
748
778
|
// Generate JPEG base64 only when JS side explicitly needs the image
|
|
@@ -755,125 +785,114 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
|
|
|
755
785
|
}
|
|
756
786
|
}
|
|
757
787
|
|
|
758
|
-
//
|
|
759
|
-
//
|
|
760
|
-
|
|
788
|
+
// ML Kit Performance Best Practice (iOS):
|
|
789
|
+
// Use synchronous results(in:) API for video processing instead of async process()
|
|
790
|
+
// This is Google's recommended approach for real-time video frame processing
|
|
791
|
+
// Source: https://developers.google.com/ml-kit/vision/barcode-scanning/ios#performance-tips
|
|
761
792
|
var facesArray: [[String: Any]] = []
|
|
762
793
|
var textBlocksArray: [[String: Any]] = []
|
|
763
794
|
var barcodesArray: [[String: Any]] = []
|
|
764
795
|
|
|
796
|
+
// Face detection using synchronous API
|
|
765
797
|
if enableFaceDetection {
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
}
|
|
792
|
-
if face.hasRightEyeOpenProbability {
|
|
793
|
-
faceMap["rightEyeOpenProbability"] = Double(face.rightEyeOpenProbability)
|
|
794
|
-
}
|
|
795
|
-
facesArray.append(faceMap)
|
|
798
|
+
do {
|
|
799
|
+
let faces = try faceDetector.results(in: visionImage)
|
|
800
|
+
for face in faces {
|
|
801
|
+
var faceMap: [String: Any] = [:]
|
|
802
|
+
let isFront = self._cameraType == "front"
|
|
803
|
+
let faceX = isFront ? CGFloat(portraitWidth) - face.frame.origin.x - face.frame.width : face.frame.origin.x
|
|
804
|
+
|
|
805
|
+
faceMap["bounds"] = [
|
|
806
|
+
"x": Int(faceX),
|
|
807
|
+
"y": Int(face.frame.origin.y),
|
|
808
|
+
"width": Int(face.frame.width),
|
|
809
|
+
"height": Int(face.frame.height)
|
|
810
|
+
]
|
|
811
|
+
faceMap["yawAngle"] = face.hasHeadEulerAngleY ? Double(face.headEulerAngleY) : 0.0
|
|
812
|
+
faceMap["pitchAngle"] = face.hasHeadEulerAngleX ? Double(face.headEulerAngleX) : 0.0
|
|
813
|
+
faceMap["rollAngle"] = face.hasHeadEulerAngleZ ? Double(face.headEulerAngleZ) : 0.0
|
|
814
|
+
if face.hasTrackingID {
|
|
815
|
+
faceMap["trackingId"] = face.trackingID
|
|
816
|
+
}
|
|
817
|
+
// Only include probability fields when available (matching Android behavior)
|
|
818
|
+
if face.hasSmilingProbability {
|
|
819
|
+
faceMap["smilingProbability"] = Double(face.smilingProbability)
|
|
820
|
+
}
|
|
821
|
+
if face.hasLeftEyeOpenProbability {
|
|
822
|
+
faceMap["leftEyeOpenProbability"] = Double(face.leftEyeOpenProbability)
|
|
796
823
|
}
|
|
824
|
+
if face.hasRightEyeOpenProbability {
|
|
825
|
+
faceMap["rightEyeOpenProbability"] = Double(face.rightEyeOpenProbability)
|
|
826
|
+
}
|
|
827
|
+
facesArray.append(faceMap)
|
|
797
828
|
}
|
|
798
|
-
|
|
829
|
+
} catch {
|
|
830
|
+
// Face detection failed - continue with empty array
|
|
799
831
|
}
|
|
800
|
-
} else {
|
|
801
|
-
semaphore.signal()
|
|
802
832
|
}
|
|
803
833
|
|
|
804
|
-
// Text recognition
|
|
805
|
-
|
|
834
|
+
// Text recognition using synchronous API
|
|
835
|
+
// On iOS, this completes quickly at 1080x1920 resolution (~100-300ms per frame)
|
|
806
836
|
var resultText = ""
|
|
807
837
|
if enableTextRecognition {
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
}
|
|
841
|
-
|
|
842
|
-
blockMap["blockFrame"] = [
|
|
843
|
-
"x": blockX,
|
|
844
|
-
"y": blockY,
|
|
845
|
-
"width": blockWidth,
|
|
846
|
-
"height": blockHeight,
|
|
847
|
-
"boundingCenterX": blockX + blockWidth / 2,
|
|
848
|
-
"boundingCenterY": blockY + blockHeight / 2
|
|
849
|
-
]
|
|
850
|
-
textBlocksArray.append(blockMap)
|
|
838
|
+
do {
|
|
839
|
+
let text = try textRecognizer.results(in: textVisionImage)
|
|
840
|
+
resultText = text.text
|
|
841
|
+
for block in text.blocks {
|
|
842
|
+
var blockMap: [String: Any] = ["text": block.text]
|
|
843
|
+
let bb = block.frame
|
|
844
|
+
|
|
845
|
+
// When buffer is landscape (1920x1080) but we set orientation to .right,
|
|
846
|
+
// ML Kit might still return coordinates in landscape space.
|
|
847
|
+
// We need to rotate them to portrait space (1080x1920) to match face detection.
|
|
848
|
+
let blockX: Int
|
|
849
|
+
let blockY: Int
|
|
850
|
+
let blockWidth: Int
|
|
851
|
+
let blockHeight: Int
|
|
852
|
+
|
|
853
|
+
if isBufferLandscape {
|
|
854
|
+
// Rotate from landscape (1920x1080) to portrait (1080x1920)
|
|
855
|
+
// When rotating 90° clockwise (.right):
|
|
856
|
+
// new_x = old_y
|
|
857
|
+
// new_y = landscape_width - old_x - width
|
|
858
|
+
// new_width = old_height
|
|
859
|
+
// new_height = old_width
|
|
860
|
+
blockX = Int(bb.origin.y)
|
|
861
|
+
blockY = pixelWidth - Int(bb.origin.x) - Int(bb.width)
|
|
862
|
+
blockWidth = Int(bb.height)
|
|
863
|
+
blockHeight = Int(bb.width)
|
|
864
|
+
} else {
|
|
865
|
+
// Already portrait, use directly
|
|
866
|
+
blockX = Int(bb.origin.x)
|
|
867
|
+
blockY = Int(bb.origin.y)
|
|
868
|
+
blockWidth = Int(bb.width)
|
|
869
|
+
blockHeight = Int(bb.height)
|
|
851
870
|
}
|
|
871
|
+
|
|
872
|
+
blockMap["blockFrame"] = [
|
|
873
|
+
"x": blockX,
|
|
874
|
+
"y": blockY,
|
|
875
|
+
"width": blockWidth,
|
|
876
|
+
"height": blockHeight,
|
|
877
|
+
"boundingCenterX": blockX + blockWidth / 2,
|
|
878
|
+
"boundingCenterY": blockY + blockHeight / 2
|
|
879
|
+
]
|
|
880
|
+
textBlocksArray.append(blockMap)
|
|
852
881
|
}
|
|
853
|
-
|
|
882
|
+
} catch {
|
|
883
|
+
// Text recognition failed - continue with empty result
|
|
854
884
|
}
|
|
855
|
-
} else {
|
|
856
|
-
textSemaphore.signal()
|
|
857
885
|
}
|
|
858
886
|
|
|
859
887
|
// Barcode scanning - use native AVFoundation results (captured via metadata delegate)
|
|
860
888
|
// This is much faster than MLKit barcode scanning
|
|
861
|
-
let barcodeSemaphore = DispatchSemaphore(value: 0)
|
|
862
889
|
if enableBarcodeScanning {
|
|
863
890
|
// Use the barcodes detected by the native AVCaptureMetadataOutput
|
|
864
891
|
barcodesArray = lastDetectedBarcodes
|
|
865
|
-
barcodeSemaphore.signal()
|
|
866
|
-
} else {
|
|
867
|
-
barcodeSemaphore.signal()
|
|
868
892
|
}
|
|
869
893
|
|
|
870
|
-
//
|
|
871
|
-
|
|
872
|
-
_ = textSemaphore.wait(timeout: .now() + 2.0)
|
|
873
|
-
|
|
874
|
-
// Only compute brightness if we haven't timed out or crashed
|
|
875
|
-
// Brightness calculation restricted to scanning frame area (between 36% from top and 36% from bottom, 5% margins on sides)
|
|
876
|
-
let brightness = computeBrightness(from: pixelBuffer, width: portraitWidth, height: portraitHeight)
|
|
894
|
+
// Brightness was already computed earlier for OCR-B enhancement
|
|
895
|
+
// No need to recompute here
|
|
877
896
|
|
|
878
897
|
let currentTime = CACurrentMediaTime() * 1000 // Convert to milliseconds to match Android
|
|
879
898
|
|
|
@@ -962,6 +981,35 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
|
|
|
962
981
|
|
|
963
982
|
return Double(sum) / Double(sampleCount)
|
|
964
983
|
}
|
|
984
|
+
|
|
985
|
+
/**
|
|
986
|
+
* Validates if the image dimensions are suitable for ML Kit text recognition (iOS).
|
|
987
|
+
*
|
|
988
|
+
* ML Kit text recognition requirements (iOS):
|
|
989
|
+
* - Minimum 16x16 pixels per character for acceptable accuracy
|
|
990
|
+
* - Ideal: 16-24 pixels per character for optimal performance
|
|
991
|
+
* - At 1080x1920 (portrait Full HD), assuming ~30px average character width:
|
|
992
|
+
* - Can fit ~36 characters per line (1080/30)
|
|
993
|
+
* - Can fit ~64 lines (1920/30)
|
|
994
|
+
* - Performance: ~100-300ms per frame at 1080x1920 with asynchronous API
|
|
995
|
+
*
|
|
996
|
+
* @return quality score (0.0-1.0) where 1.0 is optimal
|
|
997
|
+
*/
|
|
998
|
+
private func calculateTextRecognitionQuality(width: Int, height: Int) -> Double {
|
|
999
|
+
// iOS uses 1080x1920 as standard, so we prefer that
|
|
1000
|
+
let pixelScore: Double
|
|
1001
|
+
switch (width, height) {
|
|
1002
|
+
case (1080..., 1920...):
|
|
1003
|
+
pixelScore = 1.0 // Optimal (Full HD)
|
|
1004
|
+
case (720..., 1280...):
|
|
1005
|
+
pixelScore = 0.85 // Good (HD)
|
|
1006
|
+
case (640..., 960...):
|
|
1007
|
+
pixelScore = 0.65 // Acceptable
|
|
1008
|
+
default:
|
|
1009
|
+
pixelScore = 0.4 // Poor
|
|
1010
|
+
}
|
|
1011
|
+
return pixelScore
|
|
1012
|
+
}
|
|
965
1013
|
}
|
|
966
1014
|
|
|
967
1015
|
// MARK: - AVCaptureFileOutputRecordingDelegate
|
|
@@ -1131,7 +1179,6 @@ extension TrustchexCameraView: AVCaptureMetadataOutputObjectsDelegate {
|
|
|
1131
1179
|
lastDetectedBarcodes = barcodes
|
|
1132
1180
|
}
|
|
1133
1181
|
}
|
|
1134
|
-
|
|
1135
1182
|
// MARK: - Helper Extensions
|
|
1136
1183
|
extension Comparable {
|
|
1137
1184
|
func clamped(to limits: ClosedRange<Self>) -> Self {
|