@souscheflabs/ml-vision 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +274 -0
- package/dist/components/DetectionOverlay.d.ts +57 -0
- package/dist/components/DetectionOverlay.js +133 -0
- package/dist/components/index.d.ts +4 -0
- package/dist/components/index.js +9 -0
- package/dist/core/CacheManager.d.ts +168 -0
- package/dist/core/CacheManager.js +331 -0
- package/dist/core/MLVisionProvider.d.ts +90 -0
- package/dist/core/MLVisionProvider.js +188 -0
- package/dist/core/ServerClient.d.ts +131 -0
- package/dist/core/ServerClient.js +291 -0
- package/dist/core/index.d.ts +6 -0
- package/dist/core/index.js +18 -0
- package/dist/hooks/classLabels.d.ts +35 -0
- package/dist/hooks/classLabels.js +439 -0
- package/dist/hooks/classLabelsCoco.d.ts +43 -0
- package/dist/hooks/classLabelsCoco.js +103 -0
- package/dist/hooks/index.d.ts +8 -0
- package/dist/hooks/index.js +27 -0
- package/dist/hooks/useMultiBarcodeScanner.d.ts +34 -0
- package/dist/hooks/useMultiBarcodeScanner.js +290 -0
- package/dist/hooks/useProductDetector.d.ts +38 -0
- package/dist/hooks/useProductDetector.js +679 -0
- package/dist/hooks/useReceiptScanner.d.ts +37 -0
- package/dist/hooks/useReceiptScanner.js +405 -0
- package/dist/hooks/useVideoScanner.d.ts +118 -0
- package/dist/hooks/useVideoScanner.js +383 -0
- package/dist/index.d.ts +58 -0
- package/dist/index.js +130 -0
- package/dist/processors/detectionProcessor.d.ts +86 -0
- package/dist/processors/detectionProcessor.js +124 -0
- package/dist/processors/index.d.ts +5 -0
- package/dist/processors/index.js +16 -0
- package/dist/processors/tfliteFrameProcessor.d.ts +90 -0
- package/dist/processors/tfliteFrameProcessor.js +213 -0
- package/dist/types/barcode.d.ts +91 -0
- package/dist/types/barcode.js +19 -0
- package/dist/types/detection.d.ts +166 -0
- package/dist/types/detection.js +8 -0
- package/dist/types/index.d.ts +126 -0
- package/dist/types/index.js +25 -0
- package/dist/types/ocr.d.ts +202 -0
- package/dist/types/ocr.js +8 -0
- package/dist/utils/imagePreprocessor.d.ts +85 -0
- package/dist/utils/imagePreprocessor.js +304 -0
- package/dist/utils/yoloProcessor.d.ts +40 -0
- package/dist/utils/yoloProcessor.js +154 -0
- package/package.json +78 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Image Preprocessor for TFLite Models
|
|
4
|
+
*
|
|
5
|
+
* Converts images to the format expected by TFLite models:
|
|
6
|
+
* - Resized to model input size (e.g., 640x640)
|
|
7
|
+
* - Normalized to [0, 1] range
|
|
8
|
+
* - As Float32Array in NHWC format (batch, height, width, channels)
|
|
9
|
+
*
|
|
10
|
+
* This implementation uses react-native-skia for image manipulation.
|
|
11
|
+
* Install with: npm install @shopify/react-native-skia
|
|
12
|
+
*/
|
|
13
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
16
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
17
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
18
|
+
}
|
|
19
|
+
Object.defineProperty(o, k2, desc);
|
|
20
|
+
}) : (function(o, m, k, k2) {
|
|
21
|
+
if (k2 === undefined) k2 = k;
|
|
22
|
+
o[k2] = m[k];
|
|
23
|
+
}));
|
|
24
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
25
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
26
|
+
}) : function(o, v) {
|
|
27
|
+
o["default"] = v;
|
|
28
|
+
});
|
|
29
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
30
|
+
var ownKeys = function(o) {
|
|
31
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
32
|
+
var ar = [];
|
|
33
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
34
|
+
return ar;
|
|
35
|
+
};
|
|
36
|
+
return ownKeys(o);
|
|
37
|
+
};
|
|
38
|
+
return function (mod) {
|
|
39
|
+
if (mod && mod.__esModule) return mod;
|
|
40
|
+
var result = {};
|
|
41
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
42
|
+
__setModuleDefault(result, mod);
|
|
43
|
+
return result;
|
|
44
|
+
};
|
|
45
|
+
})();
|
|
46
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
47
|
+
exports.preprocessImage = preprocessImage;
|
|
48
|
+
exports.getFrameProcessorInstructions = getFrameProcessorInstructions;
|
|
49
|
+
exports.addBatchDimension = addBatchDimension;
|
|
50
|
+
exports.isSkiaAvailable = isSkiaAvailable;
|
|
51
|
+
exports.initSkia = initSkia;
|
|
52
|
+
exports.isResizePluginAvailable = isResizePluginAvailable;
|
|
53
|
+
const react_native_1 = require("react-native");
|
|
54
|
+
// Lazy-loaded Skia references (to avoid crashes when Skia isn't needed)
|
|
55
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
56
|
+
let SkiaModule = null;
|
|
57
|
+
let skiaLoadAttempted = false;
|
|
58
|
+
/**
|
|
59
|
+
* Load Skia module lazily (only when needed for photo preprocessing)
|
|
60
|
+
*/
|
|
61
|
+
async function loadSkiaModule() {
|
|
62
|
+
if (SkiaModule)
|
|
63
|
+
return true;
|
|
64
|
+
if (skiaLoadAttempted)
|
|
65
|
+
return SkiaModule !== null;
|
|
66
|
+
skiaLoadAttempted = true;
|
|
67
|
+
try {
|
|
68
|
+
console.log('[imagePreprocessor] Loading Skia module...');
|
|
69
|
+
// Lazy require to avoid loading Skia when not needed (e.g., video scanner)
|
|
70
|
+
SkiaModule = require('@shopify/react-native-skia');
|
|
71
|
+
console.log('[imagePreprocessor] Skia loaded successfully');
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
console.error('[imagePreprocessor] Failed to load Skia:', error);
|
|
76
|
+
SkiaModule = null;
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Check if Skia is available (synchronous check after loading)
|
|
82
|
+
*/
|
|
83
|
+
function checkSkiaAvailable() {
|
|
84
|
+
if (!SkiaModule)
|
|
85
|
+
return false;
|
|
86
|
+
try {
|
|
87
|
+
const { Skia } = SkiaModule;
|
|
88
|
+
return !!(Skia?.Data && Skia?.Image && Skia?.Surface);
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Preprocess an image for TFLite model inference
|
|
96
|
+
*
|
|
97
|
+
* @param uri Image URI (file:// or http://)
|
|
98
|
+
* @param options Preprocessing options
|
|
99
|
+
* @returns Preprocessed image data
|
|
100
|
+
*/
|
|
101
|
+
async function preprocessImage(uri, options = {}) {
|
|
102
|
+
const { width: targetWidth = 640, height: targetHeight = 640, normalize = true, letterbox = true, } = options;
|
|
103
|
+
// Load Skia module first (lazy loading)
|
|
104
|
+
const loaded = await loadSkiaModule();
|
|
105
|
+
if (!loaded || !checkSkiaAvailable()) {
|
|
106
|
+
throw new Error('Image preprocessing requires @shopify/react-native-skia. ' +
|
|
107
|
+
'Install with: npm install @shopify/react-native-skia\n\n' +
|
|
108
|
+
'Alternatively, use server-side inference or VisionCamera frame processor.');
|
|
109
|
+
}
|
|
110
|
+
// Get original image dimensions
|
|
111
|
+
const { width: originalWidth, height: originalHeight } = await getImageSize(uri);
|
|
112
|
+
// Use Skia for preprocessing
|
|
113
|
+
return preprocessWithSkia(uri, originalWidth, originalHeight, targetWidth, targetHeight, normalize, letterbox);
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Get image dimensions from URI
|
|
117
|
+
*/
|
|
118
|
+
async function getImageSize(uri) {
|
|
119
|
+
return new Promise((resolve, reject) => {
|
|
120
|
+
react_native_1.Image.getSize(uri, (width, height) => resolve({ width, height }), (error) => reject(new Error(`Failed to get image size: ${error}`)));
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Preprocess using react-native-skia
|
|
125
|
+
* Uses Skia.Data.fromURI() for proper image loading
|
|
126
|
+
*/
|
|
127
|
+
async function preprocessWithSkia(uri, originalWidth, originalHeight, targetWidth, targetHeight, normalize, letterbox) {
|
|
128
|
+
// Get Skia APIs from the lazy-loaded module
|
|
129
|
+
const { Skia, ColorType, AlphaType } = SkiaModule;
|
|
130
|
+
console.log('[imagePreprocessor] preprocessWithSkia called');
|
|
131
|
+
console.log('[imagePreprocessor] URI:', uri);
|
|
132
|
+
console.log('[imagePreprocessor] Original size:', originalWidth, 'x', originalHeight);
|
|
133
|
+
console.log('[imagePreprocessor] Target size:', targetWidth, 'x', targetHeight);
|
|
134
|
+
// Load image data using Skia's fromURI (handles file:// URIs properly)
|
|
135
|
+
console.log('[imagePreprocessor] Loading image data with Skia.Data.fromURI...');
|
|
136
|
+
const data = await Skia.Data.fromURI(uri);
|
|
137
|
+
console.log('[imagePreprocessor] Data loaded:', !!data);
|
|
138
|
+
// Create SkImage from encoded data
|
|
139
|
+
const image = Skia.Image.MakeImageFromEncoded(data);
|
|
140
|
+
console.log('[imagePreprocessor] Image created:', !!image);
|
|
141
|
+
if (!image) {
|
|
142
|
+
throw new Error('Failed to decode image with Skia. The image format may not be supported.');
|
|
143
|
+
}
|
|
144
|
+
console.log('[imagePreprocessor] Image dimensions:', image.width(), 'x', image.height());
|
|
145
|
+
// Calculate scaling and padding for letterboxing
|
|
146
|
+
let scale = { x: 1, y: 1 };
|
|
147
|
+
let padding = { x: 0, y: 0 };
|
|
148
|
+
if (letterbox) {
|
|
149
|
+
const ratio = Math.min(targetWidth / originalWidth, targetHeight / originalHeight);
|
|
150
|
+
scale = { x: ratio, y: ratio };
|
|
151
|
+
padding = {
|
|
152
|
+
x: (targetWidth - originalWidth * ratio) / 2,
|
|
153
|
+
y: (targetHeight - originalHeight * ratio) / 2,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
scale = { x: targetWidth / originalWidth, y: targetHeight / originalHeight };
|
|
158
|
+
}
|
|
159
|
+
// Create a surface to draw the resized image
|
|
160
|
+
console.log('[imagePreprocessor] Creating surface...');
|
|
161
|
+
const surface = Skia.Surface.Make(targetWidth, targetHeight);
|
|
162
|
+
if (!surface) {
|
|
163
|
+
throw new Error('Failed to create Skia surface. This may happen on devices without GPU support.');
|
|
164
|
+
}
|
|
165
|
+
const canvas = surface.getCanvas();
|
|
166
|
+
// Fill with gray (letterbox padding color) - Skia.Color takes a color string
|
|
167
|
+
if (letterbox) {
|
|
168
|
+
canvas.clear(Skia.Color('gray'));
|
|
169
|
+
}
|
|
170
|
+
// Draw the resized image
|
|
171
|
+
const srcRect = Skia.XYWHRect(0, 0, originalWidth, originalHeight);
|
|
172
|
+
const dstRect = Skia.XYWHRect(padding.x, padding.y, originalWidth * scale.x, originalHeight * scale.y);
|
|
173
|
+
canvas.drawImageRect(image, srcRect, dstRect, Skia.Paint());
|
|
174
|
+
// Get pixel data from the snapshot
|
|
175
|
+
console.log('[imagePreprocessor] Reading pixels...');
|
|
176
|
+
const snapshot = surface.makeImageSnapshot();
|
|
177
|
+
// readPixels returns Uint8Array directly in newer Skia versions
|
|
178
|
+
// ColorType and AlphaType are separate exports from @shopify/react-native-skia
|
|
179
|
+
const pixels = snapshot.readPixels(0, 0, {
|
|
180
|
+
width: targetWidth,
|
|
181
|
+
height: targetHeight,
|
|
182
|
+
colorType: ColorType.RGBA_8888,
|
|
183
|
+
alphaType: AlphaType.Unpremul,
|
|
184
|
+
});
|
|
185
|
+
if (!pixels) {
|
|
186
|
+
throw new Error('Failed to read pixels from Skia image');
|
|
187
|
+
}
|
|
188
|
+
console.log('[imagePreprocessor] Pixels read, length:', pixels.length);
|
|
189
|
+
// Convert RGBA Uint8Array to RGB Float32Array
|
|
190
|
+
const rgbData = new Float32Array(targetWidth * targetHeight * 3);
|
|
191
|
+
for (let i = 0, j = 0; i < pixels.length; i += 4, j += 3) {
|
|
192
|
+
const r = pixels[i];
|
|
193
|
+
const g = pixels[i + 1];
|
|
194
|
+
const b = pixels[i + 2];
|
|
195
|
+
// Skip alpha (i + 3)
|
|
196
|
+
if (normalize) {
|
|
197
|
+
rgbData[j] = r / 255.0;
|
|
198
|
+
rgbData[j + 1] = g / 255.0;
|
|
199
|
+
rgbData[j + 2] = b / 255.0;
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
rgbData[j] = r;
|
|
203
|
+
rgbData[j + 1] = g;
|
|
204
|
+
rgbData[j + 2] = b;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
// Clean up if dispose methods exist
|
|
208
|
+
if (surface.dispose)
|
|
209
|
+
surface.dispose();
|
|
210
|
+
if (data.dispose)
|
|
211
|
+
data.dispose();
|
|
212
|
+
console.log('[imagePreprocessor] Preprocessing complete, output size:', rgbData.length);
|
|
213
|
+
return {
|
|
214
|
+
data: rgbData,
|
|
215
|
+
originalWidth,
|
|
216
|
+
originalHeight,
|
|
217
|
+
scale,
|
|
218
|
+
padding,
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Preprocess a VisionCamera frame for TFLite inference
|
|
223
|
+
*
|
|
224
|
+
* This requires vision-camera-resize-plugin to be installed:
|
|
225
|
+
* npm install vision-camera-resize-plugin
|
|
226
|
+
*
|
|
227
|
+
* Usage in a frame processor:
|
|
228
|
+
* ```
|
|
229
|
+
* const frameProcessor = useFrameProcessor((frame) => {
|
|
230
|
+
* 'worklet';
|
|
231
|
+
* const resized = resize(frame, {
|
|
232
|
+
* size: { width: 640, height: 640 },
|
|
233
|
+
* pixelFormat: 'rgb',
|
|
234
|
+
* dataType: 'float32',
|
|
235
|
+
* });
|
|
236
|
+
* const result = model.runSync([resized]);
|
|
237
|
+
* // Process result...
|
|
238
|
+
* }, [model]);
|
|
239
|
+
* ```
|
|
240
|
+
*/
|
|
241
|
+
function getFrameProcessorInstructions() {
|
|
242
|
+
return `
|
|
243
|
+
To use on-device inference with VisionCamera frame processor:
|
|
244
|
+
|
|
245
|
+
1. Install the resize plugin:
|
|
246
|
+
npm install vision-camera-resize-plugin
|
|
247
|
+
cd ios && pod install
|
|
248
|
+
|
|
249
|
+
2. Use in your frame processor:
|
|
250
|
+
import { resize } from 'vision-camera-resize-plugin';
|
|
251
|
+
|
|
252
|
+
const frameProcessor = useFrameProcessor((frame) => {
|
|
253
|
+
'worklet';
|
|
254
|
+
const resized = resize(frame, {
|
|
255
|
+
size: { width: 640, height: 640 },
|
|
256
|
+
pixelFormat: 'rgb',
|
|
257
|
+
dataType: 'float32',
|
|
258
|
+
});
|
|
259
|
+
const result = model.runSync([resized]);
|
|
260
|
+
// Process result...
|
|
261
|
+
}, [model]);
|
|
262
|
+
`;
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Convert Float32Array from HWC to NHWC format (add batch dimension)
|
|
266
|
+
*/
|
|
267
|
+
function addBatchDimension(data) {
|
|
268
|
+
// Data is already in HWC format, just need to ensure it's treated as batch of 1
|
|
269
|
+
// The TFLite model expects [1, H, W, C] but since we pass a flat array,
|
|
270
|
+
// and the model knows the shape, we don't need to actually reshape
|
|
271
|
+
return data;
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Check if Skia is available for image preprocessing
|
|
275
|
+
*/
|
|
276
|
+
function isSkiaAvailable() {
|
|
277
|
+
return checkSkiaAvailable();
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Initialize Skia module (call before using Skia-dependent functions)
|
|
281
|
+
* @returns true if Skia was loaded successfully
|
|
282
|
+
*/
|
|
283
|
+
async function initSkia() {
|
|
284
|
+
return checkSkiaAvailable();
|
|
285
|
+
}
|
|
286
|
+
// Cached resize plugin availability
|
|
287
|
+
let resizePluginAvailable = null;
|
|
288
|
+
/**
|
|
289
|
+
* Check if resize plugin is available for frame processing
|
|
290
|
+
*/
|
|
291
|
+
async function isResizePluginAvailable() {
|
|
292
|
+
if (resizePluginAvailable !== null) {
|
|
293
|
+
return resizePluginAvailable;
|
|
294
|
+
}
|
|
295
|
+
try {
|
|
296
|
+
await Promise.resolve().then(() => __importStar(require('vision-camera-resize-plugin')));
|
|
297
|
+
resizePluginAvailable = true;
|
|
298
|
+
return true;
|
|
299
|
+
}
|
|
300
|
+
catch {
|
|
301
|
+
resizePluginAvailable = false;
|
|
302
|
+
return false;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* YOLO Output Processor
|
|
3
|
+
*
|
|
4
|
+
* Processes raw YOLOv8 TFLite model output into detection results.
|
|
5
|
+
* YOLOv8 output format: [1, 84, 8400] where:
|
|
6
|
+
* - 84 = 4 (bbox) + 80 (class scores for COCO)
|
|
7
|
+
* - 8400 = number of predictions (from 3 detection heads)
|
|
8
|
+
*
|
|
9
|
+
* For custom models with N classes: [1, 4+N, 8400]
|
|
10
|
+
*/
|
|
11
|
+
export interface RawDetection {
|
|
12
|
+
x: number;
|
|
13
|
+
y: number;
|
|
14
|
+
width: number;
|
|
15
|
+
height: number;
|
|
16
|
+
classIndex: number;
|
|
17
|
+
confidence: number;
|
|
18
|
+
}
|
|
19
|
+
export interface ProcessedDetection {
|
|
20
|
+
x: number;
|
|
21
|
+
y: number;
|
|
22
|
+
width: number;
|
|
23
|
+
height: number;
|
|
24
|
+
classIndex: number;
|
|
25
|
+
confidence: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Process YOLOv8 model output into detections
|
|
29
|
+
*
|
|
30
|
+
* @param output Raw model output as Float32Array
|
|
31
|
+
* @param numClasses Number of classes in the model
|
|
32
|
+
* @param confThreshold Minimum confidence threshold (0-1)
|
|
33
|
+
* @param iouThreshold IoU threshold for NMS (0-1)
|
|
34
|
+
* @returns Array of processed detections
|
|
35
|
+
*/
|
|
36
|
+
export declare function processYoloOutput(output: Float32Array, numClasses: number, confThreshold?: number, iouThreshold?: number): ProcessedDetection[];
|
|
37
|
+
/**
|
|
38
|
+
* Scale detection coordinates to actual image dimensions
|
|
39
|
+
*/
|
|
40
|
+
export declare function scaleDetections(detections: ProcessedDetection[], modelSize: number, imageWidth: number, imageHeight: number): ProcessedDetection[];
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* YOLO Output Processor
|
|
4
|
+
*
|
|
5
|
+
* Processes raw YOLOv8 TFLite model output into detection results.
|
|
6
|
+
* YOLOv8 output format: [1, 84, 8400] where:
|
|
7
|
+
* - 84 = 4 (bbox) + 80 (class scores for COCO)
|
|
8
|
+
* - 8400 = number of predictions (from 3 detection heads)
|
|
9
|
+
*
|
|
10
|
+
* For custom models with N classes: [1, 4+N, 8400]
|
|
11
|
+
*/
|
|
12
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
|
+
exports.processYoloOutput = processYoloOutput;
|
|
14
|
+
exports.scaleDetections = scaleDetections;
|
|
15
|
+
/**
|
|
16
|
+
* Process YOLOv8 model output into detections
|
|
17
|
+
*
|
|
18
|
+
* @param output Raw model output as Float32Array
|
|
19
|
+
* @param numClasses Number of classes in the model
|
|
20
|
+
* @param confThreshold Minimum confidence threshold (0-1)
|
|
21
|
+
* @param iouThreshold IoU threshold for NMS (0-1)
|
|
22
|
+
* @returns Array of processed detections
|
|
23
|
+
*/
|
|
24
|
+
function processYoloOutput(output, numClasses, confThreshold = 0.25, iouThreshold = 0.45) {
|
|
25
|
+
// YOLOv8 output shape: [1, 4+numClasses, 8400]
|
|
26
|
+
// We need to transpose to [8400, 4+numClasses] for easier processing
|
|
27
|
+
const numPredictions = 8400;
|
|
28
|
+
// Note: numOutputs = 4 + numClasses (4 bbox coords + class scores)
|
|
29
|
+
// Extract raw detections above confidence threshold
|
|
30
|
+
const rawDetections = [];
|
|
31
|
+
for (let i = 0; i < numPredictions; i++) {
|
|
32
|
+
// Find the class with highest score
|
|
33
|
+
let maxScore = 0;
|
|
34
|
+
let maxClassIdx = 0;
|
|
35
|
+
for (let c = 0; c < numClasses; c++) {
|
|
36
|
+
// Output is in format [1, 84, 8400], so index is: (4 + c) * 8400 + i
|
|
37
|
+
const score = output[(4 + c) * numPredictions + i];
|
|
38
|
+
if (score > maxScore) {
|
|
39
|
+
maxScore = score;
|
|
40
|
+
maxClassIdx = c;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
if (maxScore >= confThreshold) {
|
|
44
|
+
// Extract bounding box (center x, center y, width, height)
|
|
45
|
+
const cx = output[0 * numPredictions + i];
|
|
46
|
+
const cy = output[1 * numPredictions + i];
|
|
47
|
+
const w = output[2 * numPredictions + i];
|
|
48
|
+
const h = output[3 * numPredictions + i];
|
|
49
|
+
rawDetections.push({
|
|
50
|
+
x: cx,
|
|
51
|
+
y: cy,
|
|
52
|
+
width: w,
|
|
53
|
+
height: h,
|
|
54
|
+
classIndex: maxClassIdx,
|
|
55
|
+
confidence: maxScore,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Apply Non-Maximum Suppression
|
|
60
|
+
const nmsDetections = nonMaxSuppression(rawDetections, iouThreshold);
|
|
61
|
+
// Convert from center format to corner format
|
|
62
|
+
return nmsDetections.map((det) => ({
|
|
63
|
+
x: Math.max(0, det.x - det.width / 2),
|
|
64
|
+
y: Math.max(0, det.y - det.height / 2),
|
|
65
|
+
width: det.width,
|
|
66
|
+
height: det.height,
|
|
67
|
+
classIndex: det.classIndex,
|
|
68
|
+
confidence: det.confidence,
|
|
69
|
+
}));
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Non-Maximum Suppression to remove overlapping detections
|
|
73
|
+
*/
|
|
74
|
+
function nonMaxSuppression(detections, iouThreshold) {
|
|
75
|
+
if (detections.length === 0)
|
|
76
|
+
return [];
|
|
77
|
+
// Sort by confidence (highest first)
|
|
78
|
+
const sorted = [...detections].sort((a, b) => b.confidence - a.confidence);
|
|
79
|
+
const kept = [];
|
|
80
|
+
const suppressed = new Set();
|
|
81
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
82
|
+
if (suppressed.has(i))
|
|
83
|
+
continue;
|
|
84
|
+
const current = sorted[i];
|
|
85
|
+
kept.push(current);
|
|
86
|
+
// Suppress overlapping detections of the same class
|
|
87
|
+
for (let j = i + 1; j < sorted.length; j++) {
|
|
88
|
+
if (suppressed.has(j))
|
|
89
|
+
continue;
|
|
90
|
+
const other = sorted[j];
|
|
91
|
+
// Only suppress same class
|
|
92
|
+
if (current.classIndex !== other.classIndex)
|
|
93
|
+
continue;
|
|
94
|
+
const iou = calculateIoU(current, other);
|
|
95
|
+
if (iou > iouThreshold) {
|
|
96
|
+
suppressed.add(j);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return kept;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Calculate Intersection over Union between two boxes
|
|
104
|
+
* Boxes are in center format (cx, cy, w, h)
|
|
105
|
+
*/
|
|
106
|
+
function calculateIoU(box1, box2) {
|
|
107
|
+
// Convert to corner format
|
|
108
|
+
const x1_min = box1.x - box1.width / 2;
|
|
109
|
+
const y1_min = box1.y - box1.height / 2;
|
|
110
|
+
const x1_max = box1.x + box1.width / 2;
|
|
111
|
+
const y1_max = box1.y + box1.height / 2;
|
|
112
|
+
const x2_min = box2.x - box2.width / 2;
|
|
113
|
+
const y2_min = box2.y - box2.height / 2;
|
|
114
|
+
const x2_max = box2.x + box2.width / 2;
|
|
115
|
+
const y2_max = box2.y + box2.height / 2;
|
|
116
|
+
// Calculate intersection
|
|
117
|
+
const intersect_x_min = Math.max(x1_min, x2_min);
|
|
118
|
+
const intersect_y_min = Math.max(y1_min, y2_min);
|
|
119
|
+
const intersect_x_max = Math.min(x1_max, x2_max);
|
|
120
|
+
const intersect_y_max = Math.min(y1_max, y2_max);
|
|
121
|
+
const intersect_width = Math.max(0, intersect_x_max - intersect_x_min);
|
|
122
|
+
const intersect_height = Math.max(0, intersect_y_max - intersect_y_min);
|
|
123
|
+
const intersect_area = intersect_width * intersect_height;
|
|
124
|
+
// Calculate union
|
|
125
|
+
const area1 = box1.width * box1.height;
|
|
126
|
+
const area2 = box2.width * box2.height;
|
|
127
|
+
const union_area = area1 + area2 - intersect_area;
|
|
128
|
+
if (union_area === 0)
|
|
129
|
+
return 0;
|
|
130
|
+
return intersect_area / union_area;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Scale detection coordinates to actual image dimensions
|
|
134
|
+
*/
|
|
135
|
+
function scaleDetections(detections, modelSize, imageWidth, imageHeight) {
|
|
136
|
+
// The model was trained on square images, so we need to account for letterboxing
|
|
137
|
+
const scale = Math.min(modelSize / imageWidth, modelSize / imageHeight);
|
|
138
|
+
const padX = (modelSize - imageWidth * scale) / 2;
|
|
139
|
+
const padY = (modelSize - imageHeight * scale) / 2;
|
|
140
|
+
return detections.map((det) => {
|
|
141
|
+
// Remove padding and scale back to original image dimensions
|
|
142
|
+
const x = (det.x * modelSize - padX) / scale / imageWidth;
|
|
143
|
+
const y = (det.y * modelSize - padY) / scale / imageHeight;
|
|
144
|
+
const w = (det.width * modelSize) / scale / imageWidth;
|
|
145
|
+
const h = (det.height * modelSize) / scale / imageHeight;
|
|
146
|
+
return {
|
|
147
|
+
...det,
|
|
148
|
+
x: Math.max(0, Math.min(1, x)),
|
|
149
|
+
y: Math.max(0, Math.min(1, y)),
|
|
150
|
+
width: Math.max(0, Math.min(1 - x, w)),
|
|
151
|
+
height: Math.max(0, Math.min(1 - y, h)),
|
|
152
|
+
};
|
|
153
|
+
});
|
|
154
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@souscheflabs/ml-vision",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "ML-powered product detection for React Native - multi-barcode scanning, receipt OCR, and visual product recognition",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"watch": "tsc --watch",
|
|
10
|
+
"clean": "rm -rf dist",
|
|
11
|
+
"prepublishOnly": "npm run lint && npm run clean && npm run build",
|
|
12
|
+
"typecheck": "tsc --noEmit",
|
|
13
|
+
"lint": "eslint src --ext .ts,.tsx",
|
|
14
|
+
"lint:fix": "eslint src --ext .ts,.tsx --fix",
|
|
15
|
+
"test": "jest",
|
|
16
|
+
"test:watch": "jest --watch",
|
|
17
|
+
"test:coverage": "jest --coverage"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"react-native",
|
|
21
|
+
"ml",
|
|
22
|
+
"machine-learning",
|
|
23
|
+
"barcode",
|
|
24
|
+
"ocr",
|
|
25
|
+
"product-detection",
|
|
26
|
+
"vision-camera",
|
|
27
|
+
"tflite"
|
|
28
|
+
],
|
|
29
|
+
"author": "Artan Muzhaqi",
|
|
30
|
+
"license": "ISC",
|
|
31
|
+
"type": "commonjs",
|
|
32
|
+
"devDependencies": {
|
|
33
|
+
"@testing-library/react-native": "^12.4.3",
|
|
34
|
+
"@types/jest": "^29.5.12",
|
|
35
|
+
"@types/react": "^19.2.8",
|
|
36
|
+
"@types/react-native": "^0.73.0",
|
|
37
|
+
"jest": "^29.7.0",
|
|
38
|
+
"react-native-reanimated": "^4.2.1",
|
|
39
|
+
"ts-jest": "^29.1.2",
|
|
40
|
+
"typescript": "^5.9.3"
|
|
41
|
+
},
|
|
42
|
+
"peerDependencies": {
|
|
43
|
+
"react": ">=18.0.0",
|
|
44
|
+
"react-native": ">=0.73.0",
|
|
45
|
+
"react-native-mmkv": ">=3.0.0",
|
|
46
|
+
"react-native-reanimated": ">=3.0.0",
|
|
47
|
+
"react-native-vision-camera": ">=4.0.0"
|
|
48
|
+
},
|
|
49
|
+
"peerDependenciesMeta": {
|
|
50
|
+
"react-native-mmkv": {
|
|
51
|
+
"optional": false
|
|
52
|
+
},
|
|
53
|
+
"react-native-vision-camera": {
|
|
54
|
+
"optional": false
|
|
55
|
+
},
|
|
56
|
+
"react-native-reanimated": {
|
|
57
|
+
"optional": true
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"dependencies": {
|
|
61
|
+
"react-native-fast-tflite": "^1.5.0"
|
|
62
|
+
},
|
|
63
|
+
"engines": {
|
|
64
|
+
"node": ">=18.0.0"
|
|
65
|
+
},
|
|
66
|
+
"repository": {
|
|
67
|
+
"type": "git",
|
|
68
|
+
"url": "git@github.com:SousChefLabs/ml-vision.git"
|
|
69
|
+
},
|
|
70
|
+
"publishConfig": {
|
|
71
|
+
"access": "public"
|
|
72
|
+
},
|
|
73
|
+
"files": [
|
|
74
|
+
"dist",
|
|
75
|
+
"README.md",
|
|
76
|
+
"LICENSE"
|
|
77
|
+
]
|
|
78
|
+
}
|