npm - @trustchex/react-native-sdk - Versions diffs - 1.362.6 → 1.381.0 - Mend

@trustchex/react-native-sdk 1.362.6 → 1.381.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/ios/Camera/TrustchexCameraView.swift CHANGED Viewed

@@ -30,6 +30,14 @@ class TrustchexCameraView: UIView {
             }
         }
     }
+    @objc var resolution: String = "fullhd" {
+        didSet {
+            if resolution != oldValue {
+                // "hd" or "fullhd" - reinitialize camera with new resolution
+                setupCamera()
+            }
+        }
+    }
     @objc var torchEnabled: Bool = false {
         didSet {
             if torchEnabled != oldValue {
@@ -114,27 +122,23 @@ class TrustchexCameraView: UIView {
         // Add video input
         let cameraPosition: AVCaptureDevice.Position = (_cameraType == "front") ? .front : .back
-        // Set quality based on camera type
-        // Front camera (liveness): Full HD (1920x1080) for high-quality face detection
-        // Back camera (documents): Full HD (1920x1080) for sharp document capture
-        if cameraPosition == .front {
-            // Front camera: Use Full HD for high-quality liveness detection
-            if session.canSetSessionPreset(.hd1920x1080) {
-                session.sessionPreset = .hd1920x1080
-            } else if session.canSetSessionPreset(.hd1280x720) {
-                session.sessionPreset = .hd1280x720
-            } else {
-                session.sessionPreset = .high
-            }
+        // Set quality based on resolution setting
+        // "hd": 720x1280 (HD) - lower bandwidth, faster processing
+        // "fullhd": 1920x1080 (Full HD, default) - sharp text/document capture
+        let sessionPreset: AVCaptureSession.Preset
+        if resolution.lowercased() == "hd" {
+            sessionPreset = .hd1280x720
         } else {
-            // Back camera: Use Full HD for document scanning
-            if session.canSetSessionPreset(.hd1920x1080) {
-                session.sessionPreset = .hd1920x1080
-            } else if session.canSetSessionPreset(.hd1280x720) {
-                session.sessionPreset = .hd1280x720
-            } else {
-                session.sessionPreset = .high
-            }
+            sessionPreset = .hd1920x1080  // Full HD (default)
+        }
+        if session.canSetSessionPreset(sessionPreset) {
+            session.sessionPreset = sessionPreset
+        } else if sessionPreset == .hd1920x1080 && session.canSetSessionPreset(.hd1280x720) {
+            // Fallback from Full HD to HD
+            session.sessionPreset = .hd1280x720
+        } else if session.canSetSessionPreset(.high) {
+            session.sessionPreset = .high
         }
         let camera = selectBestCamera(for: cameraPosition)
         guard let camera = camera,
@@ -418,6 +422,12 @@ class TrustchexCameraView: UIView {
         targetFps = fps
     }
+    @objc(changeResolution:)
+    func changeResolution(_ res: String) {
+        // "hd" (720x1280) or "fullhd" (1920x1080, default)
+        resolution = res.lowercased() == "hd" ? "hd" : "fullhd"
+    }
     @objc func setFocusPoint(_ x: NSNumber, _ y: NSNumber) {
         sessionQueue.async { [weak self] in
             guard let camera = self?.currentCamera else { return }
@@ -695,6 +705,12 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
             }
             lastFrameTime = currentTime
+            // ML Kit Performance Optimization Tips Applied (iOS):
+            // 1. alwaysDiscardsLateVideoFrames = true throttles detector calls
+            // 2. Drop frames if detector is still busy (prevents queue buildup)
+            // 3. Process at 1080x1920 (Full HD) for optimal real-time detection
+            // 4. Use synchronous results(in:) API for video frames (Google recommended)
+            // 5. Dispatch to background queue to unblock videoQueue immediately
             // Mark as processing on videoQueue
             isProcessing = true
@@ -733,16 +749,30 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
         let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
         let orientedImage = isBufferLandscape ? ciImage.oriented(.right) : ciImage
-        // Create VisionImage from the oriented CIImage for better text recognition
+        // Compute brightness early for OCR-B enhancement decision
+        let brightness = computeBrightness(from: pixelBuffer, width: portraitWidth, height: portraitHeight)
+        // For text recognition, use original image directly
+        // ML Kit works best with native camera frames
+        let textEnhancedImage = orientedImage
+        // Create VisionImage from the image (enhanced or original) for better text recognition
         // This ensures MLKit processes the image in the correct orientation
-        guard let cgImage = self.ciContext.createCGImage(orientedImage, from: orientedImage.extent) else {
+        guard let cgImage = self.ciContext.createCGImage(textEnhancedImage, from: textEnhancedImage.extent) else {
             resetProcessingState()
             return
         }
         let visionImage = VisionImage(image: UIImage(cgImage: cgImage))
         visionImage.orientation = .up  // Already oriented correctly
-        // Use original image for text recognition
+        // Use image for text recognition
+        // ML Kit text recognition best practices (iOS):
+        // - Requires minimum 16x16 pixels per character (ideal 16-24px per character)
+        // - Input image: 1080x1920 (portrait Full HD) provides excellent accuracy at real-time speed
+        // - Each character at ~30px = 36 characters per line @ 1080px width
+        // - Use synchronous results(in:) API from captureOutput(_:didOutput:from:)
+        // - Set AVCaptureVideoDataOutput.alwaysDiscardsLateVideoFrames = true (throttle)
+        // - Get results then render overlay in single step for optimal performance
         let textVisionImage = visionImage
         // Generate JPEG base64 only when JS side explicitly needs the image
@@ -755,125 +785,114 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
             }
         }
-        // Synchronous processing using a semaphore to keep the buffer locked
-        // This blocks processingQueue, which is fine (we are off videoQueue)
-        let semaphore = DispatchSemaphore(value: 0)
+        // ML Kit Performance Best Practice (iOS):
+        // Use synchronous results(in:) API for video processing instead of async process()
+        // This is Google's recommended approach for real-time video frame processing
+        // Source: https://developers.google.com/ml-kit/vision/barcode-scanning/ios#performance-tips
         var facesArray: [[String: Any]] = []
         var textBlocksArray: [[String: Any]] = []
         var barcodesArray: [[String: Any]] = []
+        // Face detection using synchronous API
         if enableFaceDetection {
-            faceDetector.process(visionImage) { faces, error in
-                if let faces = faces {
-                    for face in faces {
-                        var faceMap: [String: Any] = [:]
-                        let isFront = self._cameraType == "front"
-                        let faceX = isFront ? CGFloat(portraitWidth) - face.frame.origin.x - face.frame.width : face.frame.origin.x
-                        faceMap["bounds"] = [
-                            "x": Int(faceX),
-                            "y": Int(face.frame.origin.y),
-                            "width": Int(face.frame.width),
-                            "height": Int(face.frame.height)
-                        ]
-                        faceMap["yawAngle"] = face.hasHeadEulerAngleY ? Double(face.headEulerAngleY) : 0.0
-                        faceMap["pitchAngle"] = face.hasHeadEulerAngleX ? Double(face.headEulerAngleX) : 0.0
-                        faceMap["rollAngle"] = face.hasHeadEulerAngleZ ? Double(face.headEulerAngleZ) : 0.0
-                        if face.hasTrackingID {
-                            faceMap["trackingId"] = face.trackingID
-                        }
-                        // Only include probability fields when available (matching Android behavior)
-                        if face.hasSmilingProbability {
-                            faceMap["smilingProbability"] = Double(face.smilingProbability)
-                        }
-                        if face.hasLeftEyeOpenProbability {
-                            faceMap["leftEyeOpenProbability"] = Double(face.leftEyeOpenProbability)
-                        }
-                        if face.hasRightEyeOpenProbability {
-                            faceMap["rightEyeOpenProbability"] = Double(face.rightEyeOpenProbability)
-                        }
-                        facesArray.append(faceMap)
+            do {
+                let faces = try faceDetector.results(in: visionImage)
+                for face in faces {
+                    var faceMap: [String: Any] = [:]
+                    let isFront = self._cameraType == "front"
+                    let faceX = isFront ? CGFloat(portraitWidth) - face.frame.origin.x - face.frame.width : face.frame.origin.x
+                    faceMap["bounds"] = [
+                        "x": Int(faceX),
+                        "y": Int(face.frame.origin.y),
+                        "width": Int(face.frame.width),
+                        "height": Int(face.frame.height)
+                    ]
+                    faceMap["yawAngle"] = face.hasHeadEulerAngleY ? Double(face.headEulerAngleY) : 0.0
+                    faceMap["pitchAngle"] = face.hasHeadEulerAngleX ? Double(face.headEulerAngleX) : 0.0
+                    faceMap["rollAngle"] = face.hasHeadEulerAngleZ ? Double(face.headEulerAngleZ) : 0.0
+                    if face.hasTrackingID {
+                        faceMap["trackingId"] = face.trackingID
+                    }
+                    // Only include probability fields when available (matching Android behavior)
+                    if face.hasSmilingProbability {
+                        faceMap["smilingProbability"] = Double(face.smilingProbability)
+                    }
+                    if face.hasLeftEyeOpenProbability {
+                        faceMap["leftEyeOpenProbability"] = Double(face.leftEyeOpenProbability)
                     }
+                    if face.hasRightEyeOpenProbability {
+                        faceMap["rightEyeOpenProbability"] = Double(face.rightEyeOpenProbability)
+                    }
+                    facesArray.append(faceMap)
                 }
-                semaphore.signal()
+            } catch {
+                // Face detection failed - continue with empty array
             }
-        } else {
-            semaphore.signal()
         }
-        // Text recognition
-        let textSemaphore = DispatchSemaphore(value: 0)
+        // Text recognition using synchronous API
+        // On iOS, this completes quickly at 1080x1920 resolution (~100-300ms per frame)
         var resultText = ""
         if enableTextRecognition {
-            textRecognizer.process(textVisionImage) { text, error in
-                if let text = text {
-                    resultText = text.text
-                    for block in text.blocks {
-                        var blockMap: [String: Any] = ["text": block.text]
-                        let bb = block.frame
-                        // When buffer is landscape (1920x1080) but we set orientation to .right,
-                        // ML Kit might still return coordinates in landscape space.
-                        // We need to rotate them to portrait space (1080x1920) to match face detection.
-                        let blockX: Int
-                        let blockY: Int
-                        let blockWidth: Int
-                        let blockHeight: Int
-                        if isBufferLandscape {
-                            // Rotate from landscape (1920x1080) to portrait (1080x1920)
-                            // When rotating 90° clockwise (.right):
-                            // new_x = old_y
-                            // new_y = landscape_width - old_x - width
-                            // new_width = old_height
-                            // new_height = old_width
-                            blockX = Int(bb.origin.y)
-                            blockY = pixelWidth - Int(bb.origin.x) - Int(bb.width)
-                            blockWidth = Int(bb.height)
-                            blockHeight = Int(bb.width)
-                        } else {
-                            // Already portrait, use directly
-                            blockX = Int(bb.origin.x)
-                            blockY = Int(bb.origin.y)
-                            blockWidth = Int(bb.width)
-                            blockHeight = Int(bb.height)
-                        }
-                        blockMap["blockFrame"] = [
-                            "x": blockX,
-                            "y": blockY,
-                            "width": blockWidth,
-                            "height": blockHeight,
-                            "boundingCenterX": blockX + blockWidth / 2,
-                            "boundingCenterY": blockY + blockHeight / 2
-                        ]
-                        textBlocksArray.append(blockMap)
+            do {
+                let text = try textRecognizer.results(in: textVisionImage)
+                resultText = text.text
+                for block in text.blocks {
+                    var blockMap: [String: Any] = ["text": block.text]
+                    let bb = block.frame
+                    // When buffer is landscape (1920x1080) but we set orientation to .right,
+                    // ML Kit might still return coordinates in landscape space.
+                    // We need to rotate them to portrait space (1080x1920) to match face detection.
+                    let blockX: Int
+                    let blockY: Int
+                    let blockWidth: Int
+                    let blockHeight: Int
+                    if isBufferLandscape {
+                        // Rotate from landscape (1920x1080) to portrait (1080x1920)
+                        // When rotating 90° clockwise (.right):
+                        // new_x = old_y
+                        // new_y = landscape_width - old_x - width
+                        // new_width = old_height
+                        // new_height = old_width
+                        blockX = Int(bb.origin.y)
+                        blockY = pixelWidth - Int(bb.origin.x) - Int(bb.width)
+                        blockWidth = Int(bb.height)
+                        blockHeight = Int(bb.width)
+                    } else {
+                        // Already portrait, use directly
+                        blockX = Int(bb.origin.x)
+                        blockY = Int(bb.origin.y)
+                        blockWidth = Int(bb.width)
+                        blockHeight = Int(bb.height)
                     }
+                    blockMap["blockFrame"] = [
+                        "x": blockX,
+                        "y": blockY,
+                        "width": blockWidth,
+                        "height": blockHeight,
+                        "boundingCenterX": blockX + blockWidth / 2,
+                        "boundingCenterY": blockY + blockHeight / 2
+                    ]
+                    textBlocksArray.append(blockMap)
                 }
-                textSemaphore.signal()
+            } catch {
+                // Text recognition failed - continue with empty result
             }
-        } else {
-            textSemaphore.signal()
         }
         // Barcode scanning - use native AVFoundation results (captured via metadata delegate)
         // This is much faster than MLKit barcode scanning
-        let barcodeSemaphore = DispatchSemaphore(value: 0)
         if enableBarcodeScanning {
             // Use the barcodes detected by the native AVCaptureMetadataOutput
             barcodesArray = lastDetectedBarcodes
-            barcodeSemaphore.signal()
-        } else {
-            barcodeSemaphore.signal()
         }
-        // Wait for vision tasks (with timeout to prevent hang)
-        _ = semaphore.wait(timeout: .now() + 2.0)
-        _ = textSemaphore.wait(timeout: .now() + 2.0)
-        // Only compute brightness if we haven't timed out or crashed
-        // Brightness calculation restricted to scanning frame area (between 36% from top and 36% from bottom, 5% margins on sides)
-        let brightness = computeBrightness(from: pixelBuffer, width: portraitWidth, height: portraitHeight)
+        // Brightness was already computed earlier for OCR-B enhancement
+        // No need to recompute here
         let currentTime = CACurrentMediaTime() * 1000 // Convert to milliseconds to match Android
@@ -962,6 +981,35 @@ extension TrustchexCameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
         return Double(sum) / Double(sampleCount)
     }
+    /**
+     * Validates if the image dimensions are suitable for ML Kit text recognition (iOS).
+     *
+     * ML Kit text recognition requirements (iOS):
+     * - Minimum 16x16 pixels per character for acceptable accuracy
+     * - Ideal: 16-24 pixels per character for optimal performance
+     * - At 1080x1920 (portrait Full HD), assuming ~30px average character width:
+     *   - Can fit ~36 characters per line (1080/30)
+     *   - Can fit ~64 lines (1920/30)
+     * - Performance: ~100-300ms per frame at 1080x1920 with asynchronous API
+     *
+     * @return quality score (0.0-1.0) where 1.0 is optimal
+     */
+    private func calculateTextRecognitionQuality(width: Int, height: Int) -> Double {
+        // iOS uses 1080x1920 as standard, so we prefer that
+        let pixelScore: Double
+        switch (width, height) {
+        case (1080..., 1920...):
+            pixelScore = 1.0  // Optimal (Full HD)
+        case (720..., 1280...):
+            pixelScore = 0.85   // Good (HD)
+        case (640..., 960...):
+            pixelScore = 0.65    // Acceptable
+        default:
+            pixelScore = 0.4     // Poor
+        }
+        return pixelScore
+    }
 }
 // MARK: - AVCaptureFileOutputRecordingDelegate
@@ -1131,7 +1179,6 @@ extension TrustchexCameraView: AVCaptureMetadataOutputObjectsDelegate {
         lastDetectedBarcodes = barcodes
     }
 }
 // MARK: - Helper Extensions
 extension Comparable {
     func clamped(to limits: ClosedRange<Self>) -> Self {

package/ios/OpenCV/OpenCVHelper.h CHANGED Viewed

@@ -5,13 +5,6 @@ NS_ASSUME_NONNULL_BEGIN
 @interface OpenCVHelper : NSObject
-/// Preprocesses an image for better OCR text recognition
-/// Applies bilateral filtering, CLAHE, and sharpening to enhance text clarity
-/// @param image The input UIImage to preprocess
-/// @param applyThresholding Whether to apply adaptive thresholding (for binary output)
-/// @return A preprocessed UIImage optimized for text recognition, or nil if preprocessing fails
-+ (UIImage * _Nullable)preprocessImageForOCR:(UIImage *)image applyThresholding:(BOOL)applyThresholding;
 @end
 NS_ASSUME_NONNULL_END

package/ios/OpenCV/OpenCVHelper.mm CHANGED Viewed

@@ -65,64 +65,4 @@
     return image;
 }
-+ (UIImage *)preprocessImageForOCR:(UIImage *)image applyThresholding:(BOOL)applyThresholding {
-    @try {
-        if (!image) return nil;
-        cv::Mat mat = [self imageToMat:image];
-        if (mat.empty()) return nil;
-        // Step 1: Convert to grayscale
-        cv::Mat gray;
-        cv::cvtColor(mat, gray, cv::COLOR_RGB2GRAY);
-        mat.release();
-        // Step 2: Suppress background using blackhat morphology
-        cv::Mat blackhat;
-        cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(15, 5));
-        cv::morphologyEx(gray, blackhat, cv::MORPH_BLACKHAT, kernel);
-        gray.release();
-        // Step 3: Advanced denoising - removes artifacts while keeping character details
-        cv::Mat denoised;
-        cv::fastNlMeansDenoising(blackhat, denoised, 6.0, 7, 21);
-        blackhat.release();
-        // Step 4: CLAHE for local contrast without over-amplifying noise
-        cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(2.0, cv::Size(8, 8));
-        cv::Mat enhanced;
-        clahe->apply(denoised, enhanced);
-        denoised.release();
-        // Step 5: Unsharp masking for clearer edges without halos
-        cv::Mat blurred;
-        cv::GaussianBlur(enhanced, blurred, cv::Size(0, 0), 1.2);
-        cv::Mat sharpened;
-        cv::addWeighted(enhanced, 1.8, blurred, -0.8, 0, sharpened);
-        blurred.release();
-        enhanced.release();
-        // Step 6: Normalize to full 0-255 range
-        // Ensures maximum contrast for ML Kit
-        cv::Mat result;
-        cv::normalize(sharpened, result, 0, 255, cv::NORM_MINMAX);
-        sharpened.release();
-        if (applyThresholding) {
-            cv::Mat thresholded;
-            cv::adaptiveThreshold(result, thresholded, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 31, 10);
-            result.release();
-            result = thresholded;
-        }
-        UIImage *resultImage = [self matToImage:result];
-        result.release();
-        return resultImage;
-    } @catch (NSException *exception) {
-        NSLog(@"OpenCV preprocessing error: %@", exception.reason);
-        return nil;
-    }
-}
 @end

package/ios/OpenCV/OpenCVModule.h CHANGED Viewed

@@ -3,8 +3,4 @@
 @interface OpenCVModule : NSObject <RCTBridgeModule>
-// Synchronous method for preprocessingimage for OCR
-// This is called directly from Swift camera code for better performance
-- (UIImage * _Nullable)preprocessImageForOCRSync:(UIImage * _Nonnull)image applyThresholding:(BOOL)applyThresholding;
 @end