mediversal-rn-image-intelligence 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +328 -36
- package/android/build.gradle +5 -2
- package/android/src/main/java/com/mediversalrnimagintelligence/ImageIntelligencePackage.kt +2 -1
- package/android/src/main/java/com/mediversalrnimagintelligence/ObjectDetectionModule.kt +120 -0
- package/ios/ObjectDetectionModule.m +14 -0
- package/ios/ObjectDetectionModule.swift +129 -0
- package/lib/commonjs/NativeObjectDetectionModule.js +12 -0
- package/lib/commonjs/NativeObjectDetectionModule.js.map +1 -0
- package/lib/commonjs/index.js +24 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeObjectDetectionModule.js +8 -0
- package/lib/module/NativeObjectDetectionModule.js.map +1 -0
- package/lib/module/index.js +24 -2
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeObjectDetectionModule.d.ts +11 -0
- package/lib/typescript/NativeObjectDetectionModule.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/types.d.ts +37 -0
- package/lib/typescript/types.d.ts.map +1 -1
- package/mediversal-rn-image-intelligence.podspec +2 -0
- package/package.json +1 -1
- package/src/NativeObjectDetectionModule.ts +14 -0
- package/src/index.tsx +35 -1
- package/src/types.ts +40 -0
package/README.md
CHANGED
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
[](https://badge.fury.io/js/mediversal-rn-image-intelligence)
|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
5
|
|
|
6
|
-
A production-ready React Native library for intelligent image analysis using Google ML Kit's on-device APIs.
|
|
6
|
+
A production-ready React Native library for intelligent image analysis using Google ML Kit's on-device APIs. Detect faces, extract text, and identify objects in images—all processed locally on the device for maximum privacy and performance.
|
|
7
7
|
|
|
8
8
|
## Overview
|
|
9
9
|
|
|
10
|
-
This library provides a simple interface to Google ML Kit's powerful machine learning capabilities for mobile applications. You can detect human faces with detailed metadata
|
|
10
|
+
This library provides a simple interface to Google ML Kit's powerful machine learning capabilities for mobile applications. You can detect human faces with detailed metadata, extract printed text using optical character recognition, and identify objects with confidence scores—all without sending any data to external servers.
|
|
11
11
|
|
|
12
12
|
Since all processing happens entirely on-device, your users' data never leaves their device. There's no need for an internet connection, no data is uploaded to external servers, and you get fast, reliable results even in offline scenarios.
|
|
13
13
|
|
|
@@ -19,6 +19,9 @@ Detect human faces in images with comprehensive metadata including smiling proba
|
|
|
19
19
|
**Text Recognition (OCR)**
|
|
20
20
|
Extract printed text from images with high accuracy. Ideal for document scanning, business card reading, receipt processing, and any scenario where you need to digitize text from photos.
|
|
21
21
|
|
|
22
|
+
**Object Detection**
|
|
23
|
+
Identify and classify objects in images with confidence scores. Recognize common objects like animals, vehicles, furniture, food items, and more. Each detected object includes a bounding box, tracking ID, and multiple classification labels.
|
|
24
|
+
|
|
22
25
|
**Privacy-First Architecture**
|
|
23
26
|
All image processing happens on-device using Google ML Kit. No images or extracted data are ever transmitted to external servers, ensuring complete privacy and GDPR compliance.
|
|
24
27
|
|
|
@@ -26,7 +29,7 @@ All image processing happens on-device using Google ML Kit. No images or extract
|
|
|
26
29
|
Works seamlessly on both iOS and Android with a unified API. Built with TurboModule specifications for the new React Native architecture.
|
|
27
30
|
|
|
28
31
|
**Performance Optimized**
|
|
29
|
-
Designed for mobile performance with efficient processing and the option to run face detection
|
|
32
|
+
Designed for mobile performance with efficient processing and the option to run face detection, text recognition, and object detection in parallel.
|
|
30
33
|
|
|
31
34
|
**TypeScript Support**
|
|
32
35
|
Fully typed API provides excellent autocomplete and type safety in your development environment.
|
|
@@ -124,6 +127,14 @@ async function requestPermissions() {
|
|
|
124
127
|
}
|
|
125
128
|
```
|
|
126
129
|
|
|
130
|
+
### Fix 16KB Alignment Warning (Android)
|
|
131
|
+
|
|
132
|
+
If you see a "16KB-compatible" warning during build, add this to `android/gradle.properties`:
|
|
133
|
+
|
|
134
|
+
```properties
|
|
135
|
+
android.bundle.enableUncompressedNativeLibs=false
|
|
136
|
+
```
|
|
137
|
+
|
|
127
138
|
## Basic Usage
|
|
128
139
|
|
|
129
140
|
The simplest way to use the library is with the `analyzeImage` function. Pass it an image URI and it will return the analysis results:
|
|
@@ -137,6 +148,7 @@ async function analyzeMyImage() {
|
|
|
137
148
|
|
|
138
149
|
console.log('Contains face:', result.containsFace);
|
|
139
150
|
console.log('Contains text:', result.containsPrintedText);
|
|
151
|
+
console.log('Contains objects:', result.containsObjects);
|
|
140
152
|
|
|
141
153
|
if (result.faces) {
|
|
142
154
|
console.log('Number of faces:', result.faces.length);
|
|
@@ -145,6 +157,17 @@ async function analyzeMyImage() {
|
|
|
145
157
|
if (result.printedText) {
|
|
146
158
|
console.log('Extracted text:', result.printedText);
|
|
147
159
|
}
|
|
160
|
+
|
|
161
|
+
if (result.objects) {
|
|
162
|
+
result.objects.forEach((obj, index) => {
|
|
163
|
+
console.log(`Object ${index + 1}:`);
|
|
164
|
+
obj.labels.forEach((label) => {
|
|
165
|
+
console.log(
|
|
166
|
+
` ${label.text}: ${(label.confidence * 100).toFixed(1)}%`
|
|
167
|
+
);
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
}
|
|
148
171
|
} catch (error) {
|
|
149
172
|
console.error('Analysis failed:', error);
|
|
150
173
|
}
|
|
@@ -164,6 +187,7 @@ import {
|
|
|
164
187
|
const options: AnalysisOptions = {
|
|
165
188
|
detectFaces: true,
|
|
166
189
|
detectPrintedText: true,
|
|
190
|
+
detectObjects: true,
|
|
167
191
|
faceDetectionMode: 'accurate',
|
|
168
192
|
minFaceSize: 0.15,
|
|
169
193
|
};
|
|
@@ -174,10 +198,13 @@ const result = await analyzeImage('file:///path/to/image.jpg', options);
|
|
|
174
198
|
### Available Options
|
|
175
199
|
|
|
176
200
|
**detectFaces** (boolean, default: true)
|
|
177
|
-
Enable or disable face detection. Set to false if you only need text recognition to improve performance.
|
|
201
|
+
Enable or disable face detection. Set to false if you only need text or object recognition to improve performance.
|
|
178
202
|
|
|
179
203
|
**detectPrintedText** (boolean, default: true)
|
|
180
|
-
Enable or disable text recognition. Set to false if you only need face detection to improve performance.
|
|
204
|
+
Enable or disable text recognition. Set to false if you only need face or object detection to improve performance.
|
|
205
|
+
|
|
206
|
+
**detectObjects** (boolean, default: true)
|
|
207
|
+
Enable or disable object detection. Set to false if you only need face or text detection to improve performance.
|
|
181
208
|
|
|
182
209
|
**faceDetectionMode** ('fast' | 'accurate', default: 'fast')
|
|
183
210
|
Choose between fast processing for real-time scenarios or accurate mode for higher quality detection. Use 'fast' for camera previews and 'accurate' for analyzing stored images.
|
|
@@ -229,7 +256,7 @@ async function captureAndAnalyze() {
|
|
|
229
256
|
|
|
230
257
|
## Complete Example Component
|
|
231
258
|
|
|
232
|
-
Here's a full working example of a React component that lets users select an image and displays
|
|
259
|
+
Here's a full working example of a React component that lets users select an image and displays comprehensive analysis results:
|
|
233
260
|
|
|
234
261
|
```typescript
|
|
235
262
|
import React, { useState } from 'react';
|
|
@@ -269,6 +296,7 @@ export default function ImageAnalyzer() {
|
|
|
269
296
|
const analysis = await analyzeImage(uri, {
|
|
270
297
|
detectFaces: true,
|
|
271
298
|
detectPrintedText: true,
|
|
299
|
+
detectObjects: true,
|
|
272
300
|
faceDetectionMode: 'accurate',
|
|
273
301
|
});
|
|
274
302
|
setResult(analysis);
|
|
@@ -304,7 +332,7 @@ export default function ImageAnalyzer() {
|
|
|
304
332
|
|
|
305
333
|
{result && !loading && (
|
|
306
334
|
<View style={styles.resultsContainer}>
|
|
307
|
-
<Text style={styles.resultTitle}>Results</Text>
|
|
335
|
+
<Text style={styles.resultTitle}>Analysis Results</Text>
|
|
308
336
|
|
|
309
337
|
<Text style={styles.resultItem}>
|
|
310
338
|
Face Detected: {result.containsFace ? 'Yes' : 'No'}
|
|
@@ -314,25 +342,53 @@ export default function ImageAnalyzer() {
|
|
|
314
342
|
Text Detected: {result.containsPrintedText ? 'Yes' : 'No'}
|
|
315
343
|
</Text>
|
|
316
344
|
|
|
345
|
+
<Text style={styles.resultItem}>
|
|
346
|
+
Objects Detected: {result.containsObjects ? 'Yes' : 'No'}
|
|
347
|
+
</Text>
|
|
348
|
+
|
|
317
349
|
{result.faces && result.faces.length > 0 && (
|
|
318
350
|
<View style={styles.section}>
|
|
319
351
|
<Text style={styles.sectionTitle}>
|
|
320
352
|
Faces Found: {result.faces.length}
|
|
321
353
|
</Text>
|
|
322
354
|
{result.faces.map((face, index) => (
|
|
323
|
-
<View key={index} style={styles.
|
|
324
|
-
<Text>Face {index + 1}:</Text>
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
355
|
+
<View key={index} style={styles.detailBox}>
|
|
356
|
+
<Text style={styles.itemTitle}>Face {index + 1}:</Text>
|
|
357
|
+
{face.smilingProbability !== undefined && (
|
|
358
|
+
<Text style={styles.detailText}>
|
|
359
|
+
Smiling: {(face.smilingProbability * 100).toFixed(0)}%
|
|
360
|
+
</Text>
|
|
361
|
+
)}
|
|
362
|
+
{face.leftEyeOpenProbability !== undefined && (
|
|
363
|
+
<Text style={styles.detailText}>
|
|
364
|
+
Left Eye Open:{' '}
|
|
365
|
+
{(face.leftEyeOpenProbability * 100).toFixed(0)}%
|
|
366
|
+
</Text>
|
|
367
|
+
)}
|
|
368
|
+
{face.rightEyeOpenProbability !== undefined && (
|
|
369
|
+
<Text style={styles.detailText}>
|
|
370
|
+
Right Eye Open:{' '}
|
|
371
|
+
{(face.rightEyeOpenProbability * 100).toFixed(0)}%
|
|
372
|
+
</Text>
|
|
373
|
+
)}
|
|
374
|
+
</View>
|
|
375
|
+
))}
|
|
376
|
+
</View>
|
|
377
|
+
)}
|
|
378
|
+
|
|
379
|
+
{result.objects && result.objects.length > 0 && (
|
|
380
|
+
<View style={styles.section}>
|
|
381
|
+
<Text style={styles.sectionTitle}>
|
|
382
|
+
Objects Detected: {result.objects.length}
|
|
383
|
+
</Text>
|
|
384
|
+
{result.objects.map((obj, index) => (
|
|
385
|
+
<View key={index} style={styles.detailBox}>
|
|
386
|
+
<Text style={styles.itemTitle}>Object {index + 1}:</Text>
|
|
387
|
+
{obj.labels.map((label, labelIndex) => (
|
|
388
|
+
<Text key={labelIndex} style={styles.detailText}>
|
|
389
|
+
{label.text}: {(label.confidence * 100).toFixed(1)}%
|
|
390
|
+
</Text>
|
|
391
|
+
))}
|
|
336
392
|
</View>
|
|
337
393
|
))}
|
|
338
394
|
</View>
|
|
@@ -358,6 +414,11 @@ export default function ImageAnalyzer() {
|
|
|
358
414
|
Text Recognition: {result.errors.textRecognition}
|
|
359
415
|
</Text>
|
|
360
416
|
)}
|
|
417
|
+
{result.errors.objectDetection && (
|
|
418
|
+
<Text style={styles.errorText}>
|
|
419
|
+
Object Detection: {result.errors.objectDetection}
|
|
420
|
+
</Text>
|
|
421
|
+
)}
|
|
361
422
|
</View>
|
|
362
423
|
)}
|
|
363
424
|
</View>
|
|
@@ -375,11 +436,13 @@ const styles = StyleSheet.create({
|
|
|
375
436
|
fontSize: 24,
|
|
376
437
|
fontWeight: 'bold',
|
|
377
438
|
marginBottom: 20,
|
|
439
|
+
color: '#333',
|
|
378
440
|
},
|
|
379
441
|
image: {
|
|
380
442
|
width: 300,
|
|
381
443
|
height: 300,
|
|
382
444
|
marginVertical: 20,
|
|
445
|
+
borderRadius: 10,
|
|
383
446
|
},
|
|
384
447
|
loadingContainer: {
|
|
385
448
|
alignItems: 'center',
|
|
@@ -388,6 +451,7 @@ const styles = StyleSheet.create({
|
|
|
388
451
|
loadingText: {
|
|
389
452
|
marginTop: 10,
|
|
390
453
|
fontSize: 16,
|
|
454
|
+
color: '#666',
|
|
391
455
|
},
|
|
392
456
|
resultsContainer: {
|
|
393
457
|
marginTop: 20,
|
|
@@ -399,27 +463,52 @@ const styles = StyleSheet.create({
|
|
|
399
463
|
resultTitle: {
|
|
400
464
|
fontSize: 20,
|
|
401
465
|
fontWeight: 'bold',
|
|
402
|
-
marginBottom:
|
|
466
|
+
marginBottom: 15,
|
|
467
|
+
color: '#333',
|
|
403
468
|
},
|
|
404
469
|
resultItem: {
|
|
405
470
|
fontSize: 16,
|
|
406
471
|
marginVertical: 5,
|
|
472
|
+
color: '#555',
|
|
407
473
|
},
|
|
408
474
|
section: {
|
|
409
|
-
marginTop:
|
|
475
|
+
marginTop: 20,
|
|
476
|
+
paddingTop: 15,
|
|
477
|
+
borderTopWidth: 1,
|
|
478
|
+
borderTopColor: '#ddd',
|
|
410
479
|
},
|
|
411
480
|
sectionTitle: {
|
|
412
|
-
fontSize:
|
|
481
|
+
fontSize: 18,
|
|
413
482
|
fontWeight: 'bold',
|
|
414
|
-
marginBottom:
|
|
483
|
+
marginBottom: 10,
|
|
484
|
+
color: '#007AFF',
|
|
415
485
|
},
|
|
416
|
-
|
|
486
|
+
detailBox: {
|
|
417
487
|
marginLeft: 10,
|
|
418
|
-
marginVertical:
|
|
488
|
+
marginVertical: 8,
|
|
489
|
+
padding: 10,
|
|
490
|
+
backgroundColor: '#fff',
|
|
491
|
+
borderRadius: 8,
|
|
492
|
+
},
|
|
493
|
+
itemTitle: {
|
|
494
|
+
fontSize: 15,
|
|
495
|
+
fontWeight: '600',
|
|
496
|
+
marginBottom: 5,
|
|
497
|
+
color: '#333',
|
|
498
|
+
},
|
|
499
|
+
detailText: {
|
|
500
|
+
fontSize: 14,
|
|
501
|
+
marginLeft: 5,
|
|
502
|
+
marginVertical: 2,
|
|
503
|
+
color: '#666',
|
|
419
504
|
},
|
|
420
505
|
extractedText: {
|
|
421
506
|
fontSize: 14,
|
|
422
507
|
lineHeight: 20,
|
|
508
|
+
padding: 10,
|
|
509
|
+
backgroundColor: '#fff',
|
|
510
|
+
borderRadius: 8,
|
|
511
|
+
color: '#333',
|
|
423
512
|
},
|
|
424
513
|
errorSection: {
|
|
425
514
|
marginTop: 15,
|
|
@@ -436,6 +525,7 @@ const styles = StyleSheet.create({
|
|
|
436
525
|
errorText: {
|
|
437
526
|
fontSize: 14,
|
|
438
527
|
color: '#d32f2f',
|
|
528
|
+
marginVertical: 2,
|
|
439
529
|
},
|
|
440
530
|
});
|
|
441
531
|
```
|
|
@@ -465,7 +555,7 @@ Configuration object to customize the analysis behavior. See the Advanced Config
|
|
|
465
555
|
A Promise that resolves to an AnalysisResult object containing the detection results.
|
|
466
556
|
|
|
467
557
|
**Throws:**
|
|
468
|
-
An error if the image URI is invalid or if
|
|
558
|
+
An error if the image URI is invalid or if all enabled analyses fail.
|
|
469
559
|
|
|
470
560
|
### isAvailable()
|
|
471
561
|
|
|
@@ -488,11 +578,14 @@ This is useful for verifying that the installation was successful before attempt
|
|
|
488
578
|
interface AnalysisResult {
|
|
489
579
|
containsFace: boolean;
|
|
490
580
|
containsPrintedText: boolean;
|
|
581
|
+
containsObjects: boolean;
|
|
491
582
|
faces?: FaceData[];
|
|
492
583
|
printedText?: string;
|
|
584
|
+
objects?: ObjectData[];
|
|
493
585
|
errors?: {
|
|
494
586
|
faceDetection?: string;
|
|
495
587
|
textRecognition?: string;
|
|
588
|
+
objectDetection?: string;
|
|
496
589
|
};
|
|
497
590
|
}
|
|
498
591
|
```
|
|
@@ -503,14 +596,20 @@ Boolean indicating whether at least one face was detected in the image.
|
|
|
503
596
|
**containsPrintedText**
|
|
504
597
|
Boolean indicating whether any text was detected in the image.
|
|
505
598
|
|
|
599
|
+
**containsObjects**
|
|
600
|
+
Boolean indicating whether any objects were detected in the image.
|
|
601
|
+
|
|
506
602
|
**faces**
|
|
507
603
|
Optional array of FaceData objects, one for each detected face. Only present if faces were detected.
|
|
508
604
|
|
|
509
605
|
**printedText**
|
|
510
606
|
Optional string containing all the text extracted from the image. Only present if text was detected.
|
|
511
607
|
|
|
608
|
+
**objects**
|
|
609
|
+
Optional array of ObjectData objects, one for each detected object. Only present if objects were detected.
|
|
610
|
+
|
|
512
611
|
**errors**
|
|
513
|
-
Optional object containing error messages if
|
|
612
|
+
Optional object containing error messages if any detection method encountered issues. This allows partial results - for example, face detection might succeed while text recognition fails.
|
|
514
613
|
|
|
515
614
|
### FaceData
|
|
516
615
|
|
|
@@ -544,6 +643,44 @@ The tilt of the head in degrees. A value of 0 means the head is upright, positiv
|
|
|
544
643
|
**trackingId**
|
|
545
644
|
A unique identifier for the face, useful for tracking the same face across multiple frames in video scenarios.
|
|
546
645
|
|
|
646
|
+
### ObjectData
|
|
647
|
+
|
|
648
|
+
```typescript
|
|
649
|
+
interface ObjectData {
|
|
650
|
+
boundingBox: BoundingBox;
|
|
651
|
+
trackingId?: number;
|
|
652
|
+
labels: ObjectLabel[];
|
|
653
|
+
}
|
|
654
|
+
```
|
|
655
|
+
|
|
656
|
+
**boundingBox**
|
|
657
|
+
The location and size of the detected object within the image.
|
|
658
|
+
|
|
659
|
+
**trackingId**
|
|
660
|
+
A unique identifier for the object, useful for tracking the same object across multiple frames in video scenarios.
|
|
661
|
+
|
|
662
|
+
**labels**
|
|
663
|
+
Array of classification labels for the detected object, ordered by confidence score (highest first).
|
|
664
|
+
|
|
665
|
+
### ObjectLabel
|
|
666
|
+
|
|
667
|
+
```typescript
|
|
668
|
+
interface ObjectLabel {
|
|
669
|
+
text: string;
|
|
670
|
+
confidence: number;
|
|
671
|
+
index: number;
|
|
672
|
+
}
|
|
673
|
+
```
|
|
674
|
+
|
|
675
|
+
**text**
|
|
676
|
+
The classification label (e.g., "Dog", "Car", "Person", "Food").
|
|
677
|
+
|
|
678
|
+
**confidence**
|
|
679
|
+
A number between 0.0 and 1.0 indicating the confidence of the classification. Higher values mean more confident predictions.
|
|
680
|
+
|
|
681
|
+
**index**
|
|
682
|
+
The internal index of the label in Google ML Kit's classification model.
|
|
683
|
+
|
|
547
684
|
### BoundingBox
|
|
548
685
|
|
|
549
686
|
```typescript
|
|
@@ -555,7 +692,7 @@ interface BoundingBox {
|
|
|
555
692
|
}
|
|
556
693
|
```
|
|
557
694
|
|
|
558
|
-
Represents the rectangular region containing a detected face. The coordinate system has its origin (0, 0) at the top-left corner of the image, with x increasing to the right and y increasing downward. All values are in pixels.
|
|
695
|
+
Represents the rectangular region containing a detected face or object. The coordinate system has its origin (0, 0) at the top-left corner of the image, with x increasing to the right and y increasing downward. All values are in pixels.
|
|
559
696
|
|
|
560
697
|
### AnalysisOptions
|
|
561
698
|
|
|
@@ -563,6 +700,7 @@ Represents the rectangular region containing a detected face. The coordinate sys
|
|
|
563
700
|
interface AnalysisOptions {
|
|
564
701
|
detectFaces?: boolean;
|
|
565
702
|
detectPrintedText?: boolean;
|
|
703
|
+
detectObjects?: boolean;
|
|
566
704
|
faceDetectionMode?: 'fast' | 'accurate';
|
|
567
705
|
minFaceSize?: number;
|
|
568
706
|
}
|
|
@@ -578,6 +716,7 @@ All fields are optional and have sensible defaults. See the Advanced Configurati
|
|
|
578
716
|
const result = await analyzeImage(photoUri, {
|
|
579
717
|
detectFaces: true,
|
|
580
718
|
detectPrintedText: false,
|
|
719
|
+
detectObjects: false,
|
|
581
720
|
});
|
|
582
721
|
|
|
583
722
|
const faceCount = result.faces?.length || 0;
|
|
@@ -590,6 +729,7 @@ console.log(`Found ${faceCount} people in the photo`);
|
|
|
590
729
|
const result = await analyzeImage(documentUri, {
|
|
591
730
|
detectFaces: false,
|
|
592
731
|
detectPrintedText: true,
|
|
732
|
+
detectObjects: false,
|
|
593
733
|
});
|
|
594
734
|
|
|
595
735
|
if (result.printedText) {
|
|
@@ -602,6 +742,8 @@ if (result.printedText) {
|
|
|
602
742
|
```typescript
|
|
603
743
|
const result = await analyzeImage(selfieUri, {
|
|
604
744
|
detectFaces: true,
|
|
745
|
+
detectPrintedText: false,
|
|
746
|
+
detectObjects: false,
|
|
605
747
|
minFaceSize: 0.3,
|
|
606
748
|
faceDetectionMode: 'accurate',
|
|
607
749
|
});
|
|
@@ -616,19 +758,110 @@ const isValidSelfie =
|
|
|
616
758
|
const result = await analyzeImage(idCardUri, {
|
|
617
759
|
detectFaces: true,
|
|
618
760
|
detectPrintedText: true,
|
|
761
|
+
detectObjects: false,
|
|
619
762
|
faceDetectionMode: 'accurate',
|
|
620
763
|
});
|
|
621
764
|
|
|
622
765
|
const hasRequiredElements = result.containsFace && result.containsPrintedText;
|
|
623
766
|
```
|
|
624
767
|
|
|
768
|
+
### Object Recognition and Classification
|
|
769
|
+
|
|
770
|
+
```typescript
|
|
771
|
+
const result = await analyzeImage(imageUri, {
|
|
772
|
+
detectFaces: false,
|
|
773
|
+
detectPrintedText: false,
|
|
774
|
+
detectObjects: true,
|
|
775
|
+
});
|
|
776
|
+
|
|
777
|
+
if (result.objects && result.objects.length > 0) {
|
|
778
|
+
result.objects.forEach((obj) => {
|
|
779
|
+
const primaryLabel = obj.labels[0];
|
|
780
|
+
console.log(
|
|
781
|
+
`Detected: ${primaryLabel.text} (${(
|
|
782
|
+
primaryLabel.confidence * 100
|
|
783
|
+
).toFixed(1)}% confident)`
|
|
784
|
+
);
|
|
785
|
+
});
|
|
786
|
+
}
|
|
787
|
+
```
|
|
788
|
+
|
|
789
|
+
### Smart Photo Organization
|
|
790
|
+
|
|
791
|
+
```typescript
|
|
792
|
+
const result = await analyzeImage(photoUri, {
|
|
793
|
+
detectFaces: true,
|
|
794
|
+
detectPrintedText: false,
|
|
795
|
+
detectObjects: true,
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
const tags = [];
|
|
799
|
+
|
|
800
|
+
if (result.faces && result.faces.length > 0) {
|
|
801
|
+
tags.push(
|
|
802
|
+
`${result.faces.length} person${result.faces.length > 1 ? 's' : ''}`
|
|
803
|
+
);
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
if (result.objects) {
|
|
807
|
+
result.objects.forEach((obj) => {
|
|
808
|
+
if (obj.labels[0].confidence > 0.7) {
|
|
809
|
+
tags.push(obj.labels[0].text.toLowerCase());
|
|
810
|
+
}
|
|
811
|
+
});
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
console.log('Photo tags:', tags.join(', '));
|
|
815
|
+
```
|
|
816
|
+
|
|
817
|
+
### Food Recognition
|
|
818
|
+
|
|
819
|
+
```typescript
|
|
820
|
+
const result = await analyzeImage(foodPhotoUri, {
|
|
821
|
+
detectFaces: false,
|
|
822
|
+
detectPrintedText: false,
|
|
823
|
+
detectObjects: true,
|
|
824
|
+
});
|
|
825
|
+
|
|
826
|
+
const foodItems = result.objects
|
|
827
|
+
?.filter((obj) =>
|
|
828
|
+
obj.labels.some(
|
|
829
|
+
(label) =>
|
|
830
|
+
label.text.toLowerCase().includes('food') && label.confidence > 0.6
|
|
831
|
+
)
|
|
832
|
+
)
|
|
833
|
+
.map((obj) => obj.labels[0].text);
|
|
834
|
+
|
|
835
|
+
console.log('Food items detected:', foodItems);
|
|
836
|
+
```
|
|
837
|
+
|
|
838
|
+
### Pet Detection
|
|
839
|
+
|
|
840
|
+
```typescript
|
|
841
|
+
const result = await analyzeImage(petPhotoUri, {
|
|
842
|
+
detectFaces: false,
|
|
843
|
+
detectPrintedText: false,
|
|
844
|
+
detectObjects: true,
|
|
845
|
+
});
|
|
846
|
+
|
|
847
|
+
const pets = result.objects?.filter((obj) =>
|
|
848
|
+
obj.labels.some((label) =>
|
|
849
|
+
['dog', 'cat', 'bird', 'fish'].includes(label.text.toLowerCase())
|
|
850
|
+
)
|
|
851
|
+
);
|
|
852
|
+
|
|
853
|
+
if (pets && pets.length > 0) {
|
|
854
|
+
console.log(`Found ${pets.length} pet(s) in the image`);
|
|
855
|
+
}
|
|
856
|
+
```
|
|
857
|
+
|
|
625
858
|
## Performance Considerations
|
|
626
859
|
|
|
627
860
|
**Choose the Right Detection Mode**
|
|
628
861
|
Use 'fast' mode for real-time scenarios like camera previews where speed is more important than perfect accuracy. Use 'accurate' mode when analyzing stored images where quality matters more than speed.
|
|
629
862
|
|
|
630
863
|
**Disable Unused Features**
|
|
631
|
-
If you only need face detection, set `detectPrintedText: false` to improve performance. Similarly, if you only need
|
|
864
|
+
If you only need face detection, set `detectPrintedText: false` and `detectObjects: false` to improve performance. Similarly, if you only need object detection, disable the other features.
|
|
632
865
|
|
|
633
866
|
**Optimize Image Size**
|
|
634
867
|
Large images take longer to process. Consider resizing images before analysis if you're working with high-resolution photos. Libraries like react-native-image-resizer can help:
|
|
@@ -648,11 +881,20 @@ const result = await analyzeImage(resized.uri);
|
|
|
648
881
|
```
|
|
649
882
|
|
|
650
883
|
**First Run Download**
|
|
651
|
-
The first time you use the library, Google ML Kit needs to download its models
|
|
884
|
+
The first time you use the library, Google ML Kit needs to download its models:
|
|
885
|
+
|
|
886
|
+
- Face Detection: approximately 1-2 MB
|
|
887
|
+
- Text Recognition: approximately 10-15 MB
|
|
888
|
+
- Object Detection: approximately 20-30 MB
|
|
889
|
+
|
|
890
|
+
These models are downloaded automatically and cached on the device. This only happens once per device, but the first analysis may take longer than subsequent ones.
|
|
652
891
|
|
|
653
892
|
**Cache Results When Appropriate**
|
|
654
893
|
If you're analyzing the same image multiple times, consider caching the results instead of re-processing the image.
|
|
655
894
|
|
|
895
|
+
**Parallel vs Sequential Processing**
|
|
896
|
+
By default, all enabled detection methods run in parallel for maximum speed. However, this uses more device resources. If battery life is a concern, you can run analyses sequentially by calling `analyzeImage` multiple times with different options.
|
|
897
|
+
|
|
656
898
|
## Troubleshooting
|
|
657
899
|
|
|
658
900
|
### iOS Module Not Found
|
|
@@ -693,6 +935,14 @@ android {
|
|
|
693
935
|
}
|
|
694
936
|
```
|
|
695
937
|
|
|
938
|
+
### Android 16KB Alignment Warning
|
|
939
|
+
|
|
940
|
+
If you see a warning about "16KB-compatible", add this to `android/gradle.properties`:
|
|
941
|
+
|
|
942
|
+
```properties
|
|
943
|
+
android.bundle.enableUncompressedNativeLibs=false
|
|
944
|
+
```
|
|
945
|
+
|
|
696
946
|
### Permission Denied Errors
|
|
697
947
|
|
|
698
948
|
Remember that on Android, you need to request permissions at runtime, not just declare them in the manifest:
|
|
@@ -714,6 +964,16 @@ Make sure you're using the correct URI format for your platform:
|
|
|
714
964
|
- Android accepts: file://, content://, or absolute paths
|
|
715
965
|
- iOS accepts: file://, ph://, assets-library://, or absolute paths
|
|
716
966
|
|
|
967
|
+
### Object Detection Returns No Results
|
|
968
|
+
|
|
969
|
+
Object detection works best with clear, well-lit images containing common objects. If you're getting no results:
|
|
970
|
+
|
|
971
|
+
1. Ensure the image quality is good with adequate lighting
|
|
972
|
+
2. Try images with more prominent, well-framed objects
|
|
973
|
+
3. Verify that objects are not too small in the frame
|
|
974
|
+
4. Check that the image isn't too dark, blurry, or low resolution
|
|
975
|
+
5. Remember that ML Kit recognizes common objects best - very specific or unusual items may not be detected
|
|
976
|
+
|
|
717
977
|
## Privacy and Security
|
|
718
978
|
|
|
719
979
|
This library is designed with privacy as a core principle. All image processing happens entirely on-device using Google ML Kit. Your users' images and the data extracted from them never leave their device. No internet connection is required for the library to function, and no data is transmitted to external servers.
|
|
@@ -726,9 +986,40 @@ The library includes the following Google ML Kit components:
|
|
|
726
986
|
|
|
727
987
|
- Face Detection (Android: version 16.1.5, iOS: version 4.0.0)
|
|
728
988
|
- Text Recognition v2 (Android: version 19.0.0, iOS: version 4.0.0)
|
|
989
|
+
- Object Detection (Android: version 17.0.2, iOS: version 4.0.0)
|
|
729
990
|
|
|
730
991
|
It's built with TurboModule specifications to be compatible with React Native's new architecture. The package includes full TypeScript type definitions and integrates with iOS via CocoaPods and Android via Gradle.
|
|
731
992
|
|
|
993
|
+
## Object Detection Capabilities
|
|
994
|
+
|
|
995
|
+
The object detection feature can recognize a wide variety of common objects across multiple categories:
|
|
996
|
+
|
|
997
|
+
**Animals**
|
|
998
|
+
Dog, Cat, Bird, Fish, Horse, Rabbit, and various other domestic and wild animals
|
|
999
|
+
|
|
1000
|
+
**Vehicles**
|
|
1001
|
+
Car, Truck, Bicycle, Motorcycle, Bus, Train, Airplane, Boat
|
|
1002
|
+
|
|
1003
|
+
**Household Items**
|
|
1004
|
+
Chair, Table, Bed, Sofa, Lamp, Television, Refrigerator, Microwave
|
|
1005
|
+
|
|
1006
|
+
**Food and Beverages**
|
|
1007
|
+
Fruits, Vegetables, Beverages, Prepared dishes, Snacks, Desserts
|
|
1008
|
+
|
|
1009
|
+
**Electronics**
|
|
1010
|
+
Phone, Laptop, Computer, TV, Camera, Keyboard, Mouse
|
|
1011
|
+
|
|
1012
|
+
**Clothing and Accessories**
|
|
1013
|
+
Shirt, Pants, Shoes, Hat, Bag, Glasses
|
|
1014
|
+
|
|
1015
|
+
**Outdoor Objects**
|
|
1016
|
+
Trees, Flowers, Buildings, Roads, Signs
|
|
1017
|
+
|
|
1018
|
+
**Sports Equipment**
|
|
1019
|
+
Ball, Racket, Bicycle, Skateboard
|
|
1020
|
+
|
|
1021
|
+
And many more categories. Each detection includes multiple classification labels with confidence scores, allowing you to choose the most appropriate label for your use case or combine multiple labels for more accurate classification.
|
|
1022
|
+
|
|
732
1023
|
## Contributing
|
|
733
1024
|
|
|
734
1025
|
Contributions are welcome. Please read the CONTRIBUTING.md file in the repository for guidelines on how to submit issues and pull requests.
|
|
@@ -742,16 +1033,15 @@ This project is licensed under the MIT License. See the LICENSE file for complet
|
|
|
742
1033
|
Sushant Singh
|
|
743
1034
|
|
|
744
1035
|
Email: sushantbibhu@gmail.com
|
|
745
|
-
|
|
746
1036
|
GitHub: @thisissushant
|
|
747
1037
|
|
|
748
1038
|
## Support and Community
|
|
749
1039
|
|
|
750
1040
|
If you need help or want to discuss the library:
|
|
751
1041
|
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
1042
|
+
Email: sushantbibhu@gmail.com
|
|
1043
|
+
Issues: GitHub Issues (https://github.com/thisissushant/mediversal-rn-image-intelligence/issues)
|
|
1044
|
+
Discussions: GitHub Discussions (https://github.com/thisissushant/mediversal-rn-image-intelligence/discussions)
|
|
755
1045
|
|
|
756
1046
|
## Acknowledgments
|
|
757
1047
|
|
|
@@ -759,4 +1049,6 @@ This library is built on top of Google ML Kit and React Native. Thanks to both t
|
|
|
759
1049
|
|
|
760
1050
|
---
|
|
761
1051
|
|
|
762
|
-
If this library has been helpful for your project, consider giving it a star on GitHub to help others discover it.
|
|
1052
|
+
If this library has been helpful for your project, consider giving it a star on GitHub (https://github.com/thisissushant/mediversal-rn-image-intelligence) to help others discover it.
|
|
1053
|
+
|
|
1054
|
+
Made with care for the React Native community.
|
package/android/build.gradle
CHANGED
|
@@ -19,7 +19,7 @@ def safeExtGet(prop, fallback) {
|
|
|
19
19
|
|
|
20
20
|
android {
|
|
21
21
|
compileSdkVersion safeExtGet('compileSdkVersion', 33)
|
|
22
|
-
|
|
22
|
+
ndkVersion "26.1.10909125"
|
|
23
23
|
namespace "com.mediversalrnimagintelligence"
|
|
24
24
|
|
|
25
25
|
defaultConfig {
|
|
@@ -58,10 +58,13 @@ dependencies {
|
|
|
58
58
|
implementation "org.jetbrains.kotlin:kotlin-stdlib:1.8.0"
|
|
59
59
|
|
|
60
60
|
// Google ML Kit - Face Detection
|
|
61
|
-
implementation 'com.google.mlkit:face-detection:16.1.
|
|
61
|
+
implementation 'com.google.mlkit:face-detection:16.1.7'
|
|
62
62
|
|
|
63
63
|
// Google ML Kit - Text Recognition v2 (newer API)
|
|
64
64
|
implementation 'com.google.android.gms:play-services-mlkit-text-recognition:19.0.0'
|
|
65
|
+
|
|
66
|
+
// Google ML Kit - Object Detection
|
|
67
|
+
implementation 'com.google.mlkit:object-detection:17.0.2'
|
|
65
68
|
|
|
66
69
|
// Coroutines for async operations
|
|
67
70
|
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.4'
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
package com.mediversalrnimagintelligence
|
|
2
|
-
|
|
3
2
|
import com.facebook.react.ReactPackage
|
|
4
3
|
import com.facebook.react.bridge.NativeModule
|
|
5
4
|
import com.facebook.react.bridge.ReactApplicationContext
|
|
6
5
|
import com.facebook.react.uimanager.ViewManager
|
|
6
|
+
import com.mediversalrnimagintelligence.ObjectDetectionModule
|
|
7
7
|
|
|
8
8
|
class ImageIntelligencePackage : ReactPackage {
|
|
9
9
|
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
|
10
10
|
return listOf(
|
|
11
11
|
FaceDetectionModule(reactContext),
|
|
12
12
|
TextRecognitionModule(reactContext)
|
|
13
|
+
ObjectDetectionModule(reactContext)
|
|
13
14
|
)
|
|
14
15
|
}
|
|
15
16
|
|