@mleonard9/vin-scanner 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@
5
5
  #import <VisionCamera/Frame.h>
6
6
  #import <VisionCamera/SharedArray.h>
7
7
  #import <React/RCTBridgeModule.h>
8
+ #import <CoreVideo/CoreVideo.h>
8
9
  @import MLKitVision;
9
10
 
10
11
  @interface VisionCameraBarcodeScannerPlugin : FrameProcessorPlugin
@@ -57,8 +58,13 @@
57
58
  if (orientationOverride.length > 0) {
58
59
  orientation = [self orientationFromValue:orientationOverride fallback:orientation];
59
60
  }
61
+
62
+ // Get image dimensions for coordinate transformation
63
+ CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(buffer);
64
+ size_t imageHeight = CVPixelBufferGetHeight(imageBuffer);
65
+
60
66
  // VisionCamera already normalizes orientation per https://react-native-vision-camera.com/docs/guides/orientation,
61
- // so ML Kit just needs the frames orientation metadata instead of rotating pixels manually.
67
+ // so ML Kit just needs the frame's orientation metadata instead of rotating pixels manually.
62
68
  MLKVisionImage *image = [[MLKVisionImage alloc] initWithBuffer:buffer];
63
69
  image.orientation = orientation;
64
70
  NSMutableArray *detections = [NSMutableArray array];
@@ -91,8 +97,13 @@
91
97
  if (boxData != nil) {
92
98
  CGRect frameRect = barcode.frame;
93
99
  const NSUInteger baseIndex = idx * 6;
94
- boxData[baseIndex] = CGRectGetMinY(frameRect);
95
- boxData[baseIndex + 1] = CGRectGetMaxY(frameRect);
100
+ // Transform Y coordinates to fix vertical flip issue on iOS
101
+ // ML Kit returns coordinates in image space, but VisionCamera expects them flipped
102
+ float minY = CGRectGetMinY(frameRect);
103
+ float maxY = CGRectGetMaxY(frameRect);
104
+ float imageHeightFloat = (float)imageHeight;
105
+ boxData[baseIndex] = imageHeightFloat - maxY; // top = height - bottom
106
+ boxData[baseIndex + 1] = imageHeightFloat - minY; // bottom = height - top
96
107
  boxData[baseIndex + 2] = CGRectGetMinX(frameRect);
97
108
  boxData[baseIndex + 3] = CGRectGetMaxX(frameRect);
98
109
  boxData[baseIndex + 4] = CGRectGetWidth(frameRect);
@@ -9,6 +9,7 @@
9
9
  #import <VisionCamera/VisionCameraProxyHolder.h>
10
10
  #import <VisionCamera/Frame.h>
11
11
  #import <VisionCamera/SharedArray.h>
12
+ #import <CoreVideo/CoreVideo.h>
12
13
  @import MLKitVision;
13
14
 
14
15
  @interface VisionCameraTextRecognitionPlugin : FrameProcessorPlugin
@@ -86,6 +87,12 @@
86
87
  if (orientationOverride.length > 0) {
87
88
  orientation = [self orientationFromValue:orientationOverride fallback:orientation];
88
89
  }
90
+
91
+ // Get image dimensions for coordinate transformation
92
+ CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(buffer);
93
+ size_t imageHeight = CVPixelBufferGetHeight(imageBuffer);
94
+ float imageHeightFloat = (float)imageHeight;
95
+
89
96
  MLKVisionImage *image = [[MLKVisionImage alloc] initWithBuffer:buffer];
90
97
  image.orientation = orientation;
91
98
  NSMutableArray *detections = [NSMutableArray array];
@@ -105,6 +112,12 @@
105
112
  NSString *resultText = result.text;
106
113
  for (MLKTextBlock *block in result.blocks) {
107
114
  CGRect blockFrame = block.frame;
115
+ // Transform Y coordinates to fix vertical flip issue on iOS
116
+ float blockMinY = CGRectGetMinY(blockFrame);
117
+ float blockMaxY = CGRectGetMaxY(blockFrame);
118
+ float blockTransformedMinY = imageHeightFloat - blockMaxY;
119
+ float blockTransformedMaxY = imageHeightFloat - blockMinY;
120
+
108
121
  if (block.lines.count == 0) {
109
122
  NSMutableDictionary *entry = [[NSMutableDictionary alloc] init];
110
123
  entry[@"resultText"] = resultText ?: (id)kCFNull;
@@ -112,7 +125,7 @@
112
125
  entry[@"boxIndex"] = @(boxValues.count);
113
126
  [detections addObject:entry];
114
127
  [boxValues addObject:@[
115
- @(CGRectGetMinY(blockFrame)), @(CGRectGetMaxY(blockFrame)),
128
+ @(blockTransformedMinY), @(blockTransformedMaxY),
116
129
  @(CGRectGetMinX(blockFrame)), @(CGRectGetMaxX(blockFrame)),
117
130
  @(-1.f), @(-1.f), @(-1.f), @(-1.f),
118
131
  @(-1.f), @(-1.f), @(-1.f), @(-1.f)
@@ -120,6 +133,12 @@
120
133
  }
121
134
  for (MLKTextLine *line in block.lines) {
122
135
  CGRect lineFrame = line.frame;
136
+ // Transform Y coordinates for line frame
137
+ float lineMinY = CGRectGetMinY(lineFrame);
138
+ float lineMaxY = CGRectGetMaxY(lineFrame);
139
+ float lineTransformedMinY = imageHeightFloat - lineMaxY;
140
+ float lineTransformedMaxY = imageHeightFloat - lineMinY;
141
+
123
142
  if (line.elements.count == 0) {
124
143
  NSMutableDictionary *entry = [[NSMutableDictionary alloc] init];
125
144
  entry[@"resultText"] = resultText ?: (id)kCFNull;
@@ -128,15 +147,21 @@
128
147
  entry[@"boxIndex"] = @(boxValues.count);
129
148
  [detections addObject:entry];
130
149
  [boxValues addObject:@[
131
- @(CGRectGetMinY(blockFrame)), @(CGRectGetMaxY(blockFrame)),
150
+ @(blockTransformedMinY), @(blockTransformedMaxY),
132
151
  @(CGRectGetMinX(blockFrame)), @(CGRectGetMaxX(blockFrame)),
133
- @(CGRectGetMinY(lineFrame)), @(CGRectGetMaxY(lineFrame)),
152
+ @(lineTransformedMinY), @(lineTransformedMaxY),
134
153
  @(CGRectGetMinX(lineFrame)), @(CGRectGetMaxX(lineFrame)),
135
154
  @(-1.f), @(-1.f), @(-1.f), @(-1.f)
136
155
  ]];
137
156
  }
138
157
  for (MLKTextElement *element in line.elements) {
139
158
  CGRect elementFrame = element.frame;
159
+ // Transform Y coordinates for element frame
160
+ float elementMinY = CGRectGetMinY(elementFrame);
161
+ float elementMaxY = CGRectGetMaxY(elementFrame);
162
+ float elementTransformedMinY = imageHeightFloat - elementMaxY;
163
+ float elementTransformedMaxY = imageHeightFloat - elementMinY;
164
+
140
165
  NSMutableDictionary *entry = [[NSMutableDictionary alloc] init];
141
166
  entry[@"resultText"] = resultText ?: (id)kCFNull;
142
167
  entry[@"blockText"] = block.text ?: (id)kCFNull;
@@ -145,11 +170,11 @@
145
170
  entry[@"boxIndex"] = @(boxValues.count);
146
171
  [detections addObject:entry];
147
172
  [boxValues addObject:@[
148
- @(CGRectGetMinY(blockFrame)), @(CGRectGetMaxY(blockFrame)),
173
+ @(blockTransformedMinY), @(blockTransformedMaxY),
149
174
  @(CGRectGetMinX(blockFrame)), @(CGRectGetMaxX(blockFrame)),
150
- @(CGRectGetMinY(lineFrame)), @(CGRectGetMaxY(lineFrame)),
175
+ @(lineTransformedMinY), @(lineTransformedMaxY),
151
176
  @(CGRectGetMinX(lineFrame)), @(CGRectGetMaxX(lineFrame)),
152
- @(CGRectGetMinY(elementFrame)), @(CGRectGetMaxY(elementFrame)),
177
+ @(elementTransformedMinY), @(elementTransformedMaxY),
153
178
  @(CGRectGetMinX(elementFrame)), @(CGRectGetMaxX(elementFrame))
154
179
  ]];
155
180
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mleonard9/vin-scanner",
3
- "version": "0.2.7",
3
+ "version": "0.2.8",
4
4
  "description": "High-performance VIN scanner for React Native Vision Camera powered by Google ML Kit barcode + text recognition.",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",