npm - @mleonard9/vin-scanner - Versions diffs - 0.2.7 → 0.2.8 - Mend

@mleonard9/vin-scanner 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/ios/VisionCameraBarcodeScanner.m +14 -3
package/ios/VisionCameraTextRecognition.m +31 -6
package/package.json +1 -1

package/ios/VisionCameraBarcodeScanner.m CHANGED Viewed

@@ -5,6 +5,7 @@
 #import <VisionCamera/Frame.h>
 #import <VisionCamera/SharedArray.h>
 #import <React/RCTBridgeModule.h>
+#import <CoreVideo/CoreVideo.h>
 @import MLKitVision;
 @interface VisionCameraBarcodeScannerPlugin : FrameProcessorPlugin
@@ -57,8 +58,13 @@
     if (orientationOverride.length > 0) {
         orientation = [self orientationFromValue:orientationOverride fallback:orientation];
     }
+    // Get image dimensions for coordinate transformation
+    CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(buffer);
+    size_t imageHeight = CVPixelBufferGetHeight(imageBuffer);
     // VisionCamera already normalizes orientation per https://react-native-vision-camera.com/docs/guides/orientation,
-    // so ML Kit just needs the frame’s orientation metadata instead of rotating pixels manually.
+    // so ML Kit just needs the frame's orientation metadata instead of rotating pixels manually.
     MLKVisionImage *image = [[MLKVisionImage alloc] initWithBuffer:buffer];
     image.orientation = orientation;
     NSMutableArray *detections = [NSMutableArray array];
@@ -91,8 +97,13 @@
                 if (boxData != nil) {
                     CGRect frameRect = barcode.frame;
                     const NSUInteger baseIndex = idx * 6;
-                    boxData[baseIndex] = CGRectGetMinY(frameRect);
-                    boxData[baseIndex + 1] = CGRectGetMaxY(frameRect);
+                    // Transform Y coordinates to fix vertical flip issue on iOS
+                    // ML Kit returns coordinates in image space, but VisionCamera expects them flipped
+                    float minY = CGRectGetMinY(frameRect);
+                    float maxY = CGRectGetMaxY(frameRect);
+                    float imageHeightFloat = (float)imageHeight;
+                    boxData[baseIndex] = imageHeightFloat - maxY;  // top = height - bottom
+                    boxData[baseIndex + 1] = imageHeightFloat - minY;  // bottom = height - top
                     boxData[baseIndex + 2] = CGRectGetMinX(frameRect);
                     boxData[baseIndex + 3] = CGRectGetMaxX(frameRect);
                     boxData[baseIndex + 4] = CGRectGetWidth(frameRect);

package/ios/VisionCameraTextRecognition.m CHANGED Viewed

@@ -9,6 +9,7 @@
 #import <VisionCamera/VisionCameraProxyHolder.h>
 #import <VisionCamera/Frame.h>
 #import <VisionCamera/SharedArray.h>
+#import <CoreVideo/CoreVideo.h>
 @import MLKitVision;
 @interface VisionCameraTextRecognitionPlugin : FrameProcessorPlugin
@@ -86,6 +87,12 @@
     if (orientationOverride.length > 0) {
         orientation = [self orientationFromValue:orientationOverride fallback:orientation];
     }
+    // Get image dimensions for coordinate transformation
+    CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(buffer);
+    size_t imageHeight = CVPixelBufferGetHeight(imageBuffer);
+    float imageHeightFloat = (float)imageHeight;
     MLKVisionImage *image = [[MLKVisionImage alloc] initWithBuffer:buffer];
     image.orientation = orientation;
     NSMutableArray *detections = [NSMutableArray array];
@@ -105,6 +112,12 @@
             NSString *resultText = result.text;
             for (MLKTextBlock *block in result.blocks) {
                 CGRect blockFrame = block.frame;
+                // Transform Y coordinates to fix vertical flip issue on iOS
+                float blockMinY = CGRectGetMinY(blockFrame);
+                float blockMaxY = CGRectGetMaxY(blockFrame);
+                float blockTransformedMinY = imageHeightFloat - blockMaxY;
+                float blockTransformedMaxY = imageHeightFloat - blockMinY;
                 if (block.lines.count == 0) {
                     NSMutableDictionary *entry = [[NSMutableDictionary alloc] init];
                     entry[@"resultText"] = resultText ?: (id)kCFNull;
@@ -112,7 +125,7 @@
                     entry[@"boxIndex"] = @(boxValues.count);
                     [detections addObject:entry];
                     [boxValues addObject:@[
-                        @(CGRectGetMinY(blockFrame)), @(CGRectGetMaxY(blockFrame)),
+                        @(blockTransformedMinY), @(blockTransformedMaxY),
                         @(CGRectGetMinX(blockFrame)), @(CGRectGetMaxX(blockFrame)),
                         @(-1.f), @(-1.f), @(-1.f), @(-1.f),
                         @(-1.f), @(-1.f), @(-1.f), @(-1.f)
@@ -120,6 +133,12 @@
                 }
                 for (MLKTextLine *line in block.lines) {
                     CGRect lineFrame = line.frame;
+                    // Transform Y coordinates for line frame
+                    float lineMinY = CGRectGetMinY(lineFrame);
+                    float lineMaxY = CGRectGetMaxY(lineFrame);
+                    float lineTransformedMinY = imageHeightFloat - lineMaxY;
+                    float lineTransformedMaxY = imageHeightFloat - lineMinY;
                     if (line.elements.count == 0) {
                         NSMutableDictionary *entry = [[NSMutableDictionary alloc] init];
                         entry[@"resultText"] = resultText ?: (id)kCFNull;
@@ -128,15 +147,21 @@
                         entry[@"boxIndex"] = @(boxValues.count);
                         [detections addObject:entry];
                         [boxValues addObject:@[
-                            @(CGRectGetMinY(blockFrame)), @(CGRectGetMaxY(blockFrame)),
+                            @(blockTransformedMinY), @(blockTransformedMaxY),
                             @(CGRectGetMinX(blockFrame)), @(CGRectGetMaxX(blockFrame)),
-                            @(CGRectGetMinY(lineFrame)), @(CGRectGetMaxY(lineFrame)),
+                            @(lineTransformedMinY), @(lineTransformedMaxY),
                             @(CGRectGetMinX(lineFrame)), @(CGRectGetMaxX(lineFrame)),
                             @(-1.f), @(-1.f), @(-1.f), @(-1.f)
                         ]];
                     }
                     for (MLKTextElement *element in line.elements) {
                         CGRect elementFrame = element.frame;
+                        // Transform Y coordinates for element frame
+                        float elementMinY = CGRectGetMinY(elementFrame);
+                        float elementMaxY = CGRectGetMaxY(elementFrame);
+                        float elementTransformedMinY = imageHeightFloat - elementMaxY;
+                        float elementTransformedMaxY = imageHeightFloat - elementMinY;
                         NSMutableDictionary *entry = [[NSMutableDictionary alloc] init];
                         entry[@"resultText"] = resultText ?: (id)kCFNull;
                         entry[@"blockText"] = block.text ?: (id)kCFNull;
@@ -145,11 +170,11 @@
                         entry[@"boxIndex"] = @(boxValues.count);
                         [detections addObject:entry];
                         [boxValues addObject:@[
-                            @(CGRectGetMinY(blockFrame)), @(CGRectGetMaxY(blockFrame)),
+                            @(blockTransformedMinY), @(blockTransformedMaxY),
                             @(CGRectGetMinX(blockFrame)), @(CGRectGetMaxX(blockFrame)),
-                            @(CGRectGetMinY(lineFrame)), @(CGRectGetMaxY(lineFrame)),
+                            @(lineTransformedMinY), @(lineTransformedMaxY),
                             @(CGRectGetMinX(lineFrame)), @(CGRectGetMaxX(lineFrame)),
-                            @(CGRectGetMinY(elementFrame)), @(CGRectGetMaxY(elementFrame)),
+                            @(elementTransformedMinY), @(elementTransformedMaxY),
                             @(CGRectGetMinX(elementFrame)), @(CGRectGetMaxX(elementFrame))
                         ]];
                     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mleonard9/vin-scanner",
-  "version": "0.2.7",
+  "version": "0.2.8",
   "description": "High-performance VIN scanner for React Native Vision Camera powered by Google ML Kit barcode + text recognition.",
   "main": "lib/commonjs/index",
   "module": "lib/module/index",