react-native-executorch 0.4.9 → 0.5.0-nightly-6f4dd53-20251211
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -71
- package/android/CMakeLists.txt +29 -0
- package/android/build.gradle +76 -13
- package/android/libs/classes.jar +0 -0
- package/android/src/main/cpp/CMakeLists.txt +109 -0
- package/android/src/main/cpp/ETInstallerModule.cpp +76 -0
- package/android/src/main/cpp/ETInstallerModule.h +43 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/ETInstaller.kt +66 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +6 -124
- package/common/ada/ada.cpp +17406 -0
- package/common/ada/ada.h +10274 -0
- package/common/pfft/pfft.c +2205 -0
- package/common/pfft/pfft.h +185 -0
- package/common/rnexecutorch/Log.h +489 -0
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +113 -0
- package/common/rnexecutorch/RnExecutorchInstaller.h +87 -0
- package/common/rnexecutorch/TokenizerModule.cpp +52 -0
- package/common/rnexecutorch/TokenizerModule.h +29 -0
- package/common/rnexecutorch/data_processing/FFT.cpp +21 -0
- package/common/rnexecutorch/data_processing/FFT.h +23 -0
- package/common/rnexecutorch/data_processing/FileUtils.h +30 -0
- package/common/rnexecutorch/data_processing/ImageProcessing.cpp +240 -0
- package/common/rnexecutorch/data_processing/ImageProcessing.h +55 -0
- package/common/rnexecutorch/data_processing/Numerical.cpp +111 -0
- package/common/rnexecutorch/data_processing/Numerical.h +77 -0
- package/common/rnexecutorch/data_processing/base64.cpp +110 -0
- package/common/rnexecutorch/data_processing/base64.h +46 -0
- package/common/rnexecutorch/data_processing/dsp.cpp +19 -0
- package/common/rnexecutorch/data_processing/dsp.h +12 -0
- package/common/rnexecutorch/data_processing/gzip.cpp +47 -0
- package/common/rnexecutorch/data_processing/gzip.h +7 -0
- package/common/rnexecutorch/host_objects/JSTensorViewIn.h +12 -0
- package/common/rnexecutorch/host_objects/JSTensorViewOut.h +22 -0
- package/common/rnexecutorch/host_objects/JsiConversions.h +418 -0
- package/common/rnexecutorch/host_objects/ModelHostObject.h +313 -0
- package/common/rnexecutorch/jsi/JsiHostObject.cpp +108 -0
- package/common/rnexecutorch/jsi/JsiHostObject.h +87 -0
- package/common/rnexecutorch/jsi/OwningArrayBuffer.h +57 -0
- package/common/rnexecutorch/jsi/Promise.cpp +20 -0
- package/common/rnexecutorch/jsi/Promise.h +69 -0
- package/common/rnexecutorch/jsi/RuntimeAwareCache.h +58 -0
- package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.cpp +53 -0
- package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.h +35 -0
- package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +133 -0
- package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +50 -0
- package/common/rnexecutorch/metaprogramming/TypeConcepts.h +37 -0
- package/common/rnexecutorch/models/BaseModel.cpp +183 -0
- package/common/rnexecutorch/models/BaseModel.h +61 -0
- package/common/rnexecutorch/models/classification/Classification.cpp +72 -0
- package/common/rnexecutorch/models/classification/Classification.h +31 -0
- package/{ios/RnExecutorch/models/classification/Constants.mm → common/rnexecutorch/models/classification/Constants.h} +7 -2
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +19 -0
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +17 -0
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +45 -0
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +28 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +60 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +31 -0
- package/{ios/RnExecutorch/models/image_segmentation/Constants.mm → common/rnexecutorch/models/image_segmentation/Constants.h} +7 -2
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +168 -0
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +50 -0
- package/common/rnexecutorch/models/llm/LLM.cpp +98 -0
- package/common/rnexecutorch/models/llm/LLM.h +38 -0
- package/{ios/RnExecutorch/utils/Constants.mm → common/rnexecutorch/models/object_detection/Constants.h} +9 -2
- package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +82 -0
- package/common/rnexecutorch/models/object_detection/ObjectDetection.h +37 -0
- package/common/rnexecutorch/models/object_detection/Types.h +13 -0
- package/{ios/RnExecutorch/utils/ObjectDetectionUtils.mm → common/rnexecutorch/models/object_detection/Utils.cpp} +17 -35
- package/common/rnexecutorch/models/object_detection/Utils.h +11 -0
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +88 -0
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +29 -0
- package/common/rnexecutorch/models/ocr/Constants.h +41 -0
- package/common/rnexecutorch/models/ocr/Detector.cpp +100 -0
- package/common/rnexecutorch/models/ocr/Detector.h +30 -0
- package/common/rnexecutorch/models/ocr/OCR.cpp +53 -0
- package/common/rnexecutorch/models/ocr/OCR.h +44 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +108 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandler.h +42 -0
- package/common/rnexecutorch/models/ocr/Recognizer.cpp +80 -0
- package/common/rnexecutorch/models/ocr/Recognizer.h +36 -0
- package/common/rnexecutorch/models/ocr/Types.h +35 -0
- package/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp +710 -0
- package/common/rnexecutorch/models/ocr/utils/DetectorUtils.h +81 -0
- package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.cpp +159 -0
- package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h +72 -0
- package/common/rnexecutorch/models/ocr/utils/RecognizerUtils.cpp +204 -0
- package/common/rnexecutorch/models/ocr/utils/RecognizerUtils.h +71 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +125 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +57 -0
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +303 -0
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.h +61 -0
- package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp +82 -0
- package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h +25 -0
- package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp +99 -0
- package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h +33 -0
- package/common/rnexecutorch/models/speech_to_text/types/DecodingOptions.h +15 -0
- package/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h +12 -0
- package/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h +12 -0
- package/common/rnexecutorch/models/speech_to_text/types/Segment.h +14 -0
- package/common/rnexecutorch/models/speech_to_text/types/Word.h +13 -0
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +55 -0
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +35 -0
- package/common/rnexecutorch/models/text_to_image/Constants.h +9 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.cpp +32 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.h +24 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.cpp +44 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.h +32 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.cpp +152 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.h +41 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +141 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.h +64 -0
- package/common/rnexecutorch/models/text_to_image/UNet.cpp +38 -0
- package/common/rnexecutorch/models/text_to_image/UNet.h +28 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +93 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +49 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +180 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +85 -0
- package/common/rnexecutorch/models/voice_activity_detection/Constants.h +27 -0
- package/common/rnexecutorch/models/voice_activity_detection/Types.h +12 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.cpp +15 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.h +13 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +160 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +36 -0
- package/common/rnexecutorch/tests/CMakeLists.txt +30 -0
- package/common/rnexecutorch/tests/LogTest.cpp +530 -0
- package/common/rnexecutorch/tests/NumericalTest.cpp +110 -0
- package/common/rnexecutorch/tests/README.md +37 -0
- package/common/rnexecutorch/threads/GlobalThreadPool.h +84 -0
- package/common/rnexecutorch/threads/HighPerformanceThreadPool.h +364 -0
- package/common/rnexecutorch/threads/utils/ThreadUtils.h +29 -0
- package/common/runner/arange_util.cpp +44 -0
- package/common/runner/arange_util.h +37 -0
- package/common/runner/constants.h +28 -0
- package/common/runner/io_manager.h +240 -0
- package/common/runner/irunner.h +119 -0
- package/common/runner/kernel_includes.h +23 -0
- package/common/runner/runner.cpp +369 -0
- package/common/runner/runner.h +85 -0
- package/common/runner/sampler.cpp +201 -0
- package/common/runner/sampler.h +67 -0
- package/common/runner/stats.h +161 -0
- package/common/runner/text_decoder_runner.cpp +79 -0
- package/common/runner/text_decoder_runner.h +126 -0
- package/common/runner/text_prefiller.cpp +125 -0
- package/common/runner/text_prefiller.h +85 -0
- package/common/runner/text_token_generator.h +205 -0
- package/common/runner/util.h +153 -0
- package/ios/RnExecutorch/ETInstaller.h +8 -0
- package/ios/RnExecutorch/ETInstaller.mm +56 -0
- package/ios/RnExecutorch.xcodeproj/project.pbxproj +73 -23
- package/lib/module/Error.js +3 -0
- package/lib/module/Error.js.map +1 -1
- package/lib/module/common/Logger.js +23 -0
- package/lib/module/common/Logger.js.map +1 -0
- package/lib/module/constants/directories.js +1 -1
- package/lib/module/constants/directories.js.map +1 -1
- package/lib/module/constants/llmDefaults.js +8 -0
- package/lib/module/constants/llmDefaults.js.map +1 -1
- package/lib/module/constants/modelUrls.js +356 -84
- package/lib/module/constants/modelUrls.js.map +1 -1
- package/lib/module/constants/ocr/models.js +181 -286
- package/lib/module/constants/ocr/models.js.map +1 -1
- package/lib/module/constants/ocr/symbols.js +66 -65
- package/lib/module/constants/ocr/symbols.js.map +1 -1
- package/lib/module/controllers/LLMController.js +54 -24
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/controllers/OCRController.js +17 -10
- package/lib/module/controllers/OCRController.js.map +1 -1
- package/lib/module/controllers/VerticalOCRController.js +17 -10
- package/lib/module/controllers/VerticalOCRController.js.map +1 -1
- package/lib/module/hooks/computer_vision/useClassification.js +4 -4
- package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js +13 -0
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -0
- package/lib/module/hooks/computer_vision/useImageSegmentation.js +2 -2
- package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
- package/lib/module/hooks/computer_vision/useOCR.js +16 -17
- package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
- package/lib/module/hooks/computer_vision/useObjectDetection.js +3 -3
- package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
- package/lib/module/hooks/computer_vision/useStyleTransfer.js +3 -3
- package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
- package/lib/module/hooks/computer_vision/useTextToImage.js +57 -0
- package/lib/module/hooks/computer_vision/useTextToImage.js.map +1 -0
- package/lib/module/hooks/computer_vision/useVerticalOCR.js +18 -19
- package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
- package/lib/module/hooks/general/useExecutorchModule.js +1 -1
- package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +30 -29
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js +72 -33
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +2 -3
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTokenizer.js +19 -18
- package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useVAD.js +13 -0
- package/lib/module/hooks/natural_language_processing/useVAD.js.map +1 -0
- package/lib/module/hooks/useModule.js +13 -9
- package/lib/module/hooks/useModule.js.map +1 -1
- package/lib/module/index.js +21 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/modules/BaseModule.js +9 -14
- package/lib/module/modules/BaseModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ClassificationModule.js +11 -6
- package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +19 -0
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -0
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js +20 -18
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/OCRModule.js +13 -10
- package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js +11 -6
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
- package/lib/module/modules/computer_vision/StyleTransferModule.js +11 -6
- package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
- package/lib/module/modules/computer_vision/TextToImageModule.js +48 -0
- package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -0
- package/lib/module/modules/computer_vision/VerticalOCRModule.js +15 -10
- package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
- package/lib/module/modules/general/ExecutorchModule.js +8 -34
- package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/LLMModule.js +25 -24
- package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +109 -27
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +13 -6
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/TokenizerModule.js +20 -14
- package/lib/module/modules/natural_language_processing/TokenizerModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/VADModule.js +19 -0
- package/lib/module/modules/natural_language_processing/VADModule.js.map +1 -0
- package/lib/module/native/NativeETInstaller.js +5 -0
- package/lib/module/native/NativeETInstaller.js.map +1 -0
- package/lib/module/native/RnExecutorchModules.js +2 -12
- package/lib/module/native/RnExecutorchModules.js.map +1 -1
- package/lib/module/types/common.js +25 -8
- package/lib/module/types/common.js.map +1 -1
- package/lib/module/types/llm.js.map +1 -1
- package/lib/module/types/stt.js +1 -79
- package/lib/module/types/stt.js.map +1 -1
- package/lib/module/types/vad.js +2 -0
- package/lib/module/types/vad.js.map +1 -0
- package/lib/module/utils/ResourceFetcher.js +275 -114
- package/lib/module/utils/ResourceFetcher.js.map +1 -1
- package/lib/module/utils/ResourceFetcherUtils.js +155 -0
- package/lib/module/utils/ResourceFetcherUtils.js.map +1 -0
- package/lib/module/utils/llm.js +41 -1
- package/lib/module/utils/llm.js.map +1 -1
- package/lib/typescript/Error.d.ts +3 -0
- package/lib/typescript/Error.d.ts.map +1 -1
- package/lib/typescript/common/Logger.d.ts +9 -0
- package/lib/typescript/common/Logger.d.ts.map +1 -0
- package/lib/typescript/constants/llmDefaults.d.ts +1 -0
- package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
- package/lib/typescript/constants/modelUrls.d.ts +263 -79
- package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/models.d.ts +882 -284
- package/lib/typescript/constants/ocr/models.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts.map +1 -1
- package/lib/typescript/controllers/LLMController.d.ts +8 -7
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/controllers/OCRController.d.ts +5 -6
- package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
- package/lib/typescript/controllers/VerticalOCRController.d.ts +5 -6
- package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useClassification.d.ts +8 -6
- package/lib/typescript/hooks/computer_vision/useClassification.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts +16 -0
- package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts.map +1 -0
- package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts +5 -5
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts +22 -0
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts.map +1 -0
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +4 -6
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/general/useExecutorchModule.d.ts +1 -1
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts +6 -4
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +15 -22
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts +9 -5
- package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts +6 -4
- package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts +16 -0
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts.map +1 -0
- package/lib/typescript/hooks/useModule.d.ts +8 -5
- package/lib/typescript/hooks/useModule.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +26 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/modules/BaseModule.d.ts +7 -6
- package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +4 -4
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +9 -0
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +7 -27
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts +8 -7
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +5 -3
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +4 -3
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts +16 -0
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +7 -8
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/general/ExecutorchModule.d.ts +4 -7
- package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +19 -17
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +17 -13
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +5 -3
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts +10 -9
- package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts +10 -0
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts.map +1 -0
- package/lib/typescript/native/{NativeStyleTransfer.d.ts → NativeETInstaller.d.ts} +2 -3
- package/lib/typescript/native/NativeETInstaller.d.ts.map +1 -0
- package/lib/typescript/native/RnExecutorchModules.d.ts +3 -23
- package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
- package/lib/typescript/types/common.d.ts +30 -2
- package/lib/typescript/types/common.d.ts.map +1 -1
- package/lib/typescript/types/llm.d.ts +9 -1
- package/lib/typescript/types/llm.d.ts.map +1 -1
- package/lib/typescript/types/stt.d.ts +9 -88
- package/lib/typescript/types/stt.d.ts.map +1 -1
- package/lib/typescript/types/vad.d.ts +5 -0
- package/lib/typescript/types/vad.d.ts.map +1 -0
- package/lib/typescript/utils/ResourceFetcher.d.ts +47 -10
- package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts +55 -0
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -0
- package/lib/typescript/utils/llm.d.ts +4 -0
- package/lib/typescript/utils/llm.d.ts.map +1 -1
- package/package.json +32 -68
- package/react-native-executorch.podspec +62 -4
- package/src/Error.ts +3 -0
- package/src/common/Logger.ts +25 -0
- package/src/constants/directories.ts +1 -1
- package/src/constants/llmDefaults.ts +11 -0
- package/src/constants/modelUrls.ts +433 -168
- package/src/constants/ocr/models.ts +826 -395
- package/src/constants/ocr/symbols.ts +66 -65
- package/src/controllers/LLMController.ts +77 -37
- package/src/controllers/OCRController.ts +25 -16
- package/src/controllers/VerticalOCRController.ts +25 -15
- package/src/hooks/computer_vision/useClassification.ts +8 -9
- package/src/hooks/computer_vision/useImageEmbeddings.ts +15 -0
- package/src/hooks/computer_vision/useImageSegmentation.ts +3 -6
- package/src/hooks/computer_vision/useOCR.ts +32 -25
- package/src/hooks/computer_vision/useObjectDetection.ts +4 -7
- package/src/hooks/computer_vision/useStyleTransfer.ts +4 -4
- package/src/hooks/computer_vision/useTextToImage.ts +92 -0
- package/src/hooks/computer_vision/useVerticalOCR.ts +33 -31
- package/src/hooks/general/useExecutorchModule.ts +1 -1
- package/src/hooks/natural_language_processing/useLLM.ts +54 -31
- package/src/hooks/natural_language_processing/useSpeechToText.ts +96 -88
- package/src/hooks/natural_language_processing/useTextEmbeddings.ts +9 -9
- package/src/hooks/natural_language_processing/useTokenizer.ts +21 -21
- package/src/hooks/natural_language_processing/useVAD.ts +15 -0
- package/src/hooks/useModule.ts +23 -13
- package/src/index.ts +126 -0
- package/src/modules/BaseModule.ts +17 -22
- package/src/modules/computer_vision/ClassificationModule.ts +18 -9
- package/src/modules/computer_vision/ImageEmbeddingsModule.ts +26 -0
- package/src/modules/computer_vision/ImageSegmentationModule.ts +34 -26
- package/src/modules/computer_vision/OCRModule.ts +23 -15
- package/src/modules/computer_vision/ObjectDetectionModule.ts +22 -9
- package/src/modules/computer_vision/StyleTransferModule.ts +18 -9
- package/src/modules/computer_vision/TextToImageModule.ts +93 -0
- package/src/modules/computer_vision/VerticalOCRModule.ts +25 -21
- package/src/modules/general/ExecutorchModule.ts +16 -46
- package/src/modules/natural_language_processing/LLMModule.ts +41 -32
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +164 -66
- package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +25 -10
- package/src/modules/natural_language_processing/TokenizerModule.ts +27 -17
- package/src/modules/natural_language_processing/VADModule.ts +27 -0
- package/src/native/NativeETInstaller.ts +8 -0
- package/src/native/RnExecutorchModules.ts +4 -50
- package/src/types/common.ts +40 -12
- package/src/types/llm.ts +10 -0
- package/src/types/stt.ts +87 -90
- package/src/types/vad.ts +4 -0
- package/src/utils/ResourceFetcher.ts +342 -120
- package/src/utils/ResourceFetcherUtils.ts +184 -0
- package/src/utils/llm.ts +65 -1
- package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_core.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_features2d.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_highgui.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_imgproc.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_photo.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_video.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_core.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_features2d.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_highgui.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_imgproc.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_photo.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_video.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_hal.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_thread.a +0 -0
- package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a +0 -0
- package/{ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib → third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a} +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a +0 -0
- package/third-party/include/c10/macros/Export.h +1 -0
- package/third-party/include/c10/macros/Macros.h +1 -0
- package/third-party/include/c10/util/BFloat16-inl.h +1 -0
- package/third-party/include/c10/util/BFloat16-math.h +266 -0
- package/third-party/include/c10/util/BFloat16.h +1 -0
- package/third-party/include/c10/util/Half-inl.h +1 -0
- package/third-party/include/c10/util/Half.h +8 -0
- package/third-party/include/c10/util/TypeSafeSignMath.h +1 -0
- package/third-party/include/c10/util/bit_cast.h +1 -0
- package/third-party/include/c10/util/complex.h +72 -0
- package/third-party/include/c10/util/complex_math.h +399 -0
- package/third-party/include/c10/util/complex_utils.h +41 -0
- package/third-party/include/c10/util/floating_point_utils.h +1 -0
- package/third-party/include/c10/util/irange.h +107 -0
- package/third-party/include/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/c10/util/overflows.h +95 -0
- package/third-party/include/c10/util/safe_numerics.h +97 -0
- package/third-party/include/cpuinfo/cpuinfo.h +2305 -0
- package/third-party/include/executorch/ExecuTorch.h +13 -0
- package/third-party/include/executorch/ExecuTorchError.h +90 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLM.h +12 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMConfig.h +56 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMError.h +16 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMMultimodalRunner.h +227 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMTextRunner.h +97 -0
- package/third-party/include/executorch/ExecuTorchLLM/module.modulemap +4 -0
- package/third-party/include/executorch/ExecuTorchLog.h +77 -0
- package/third-party/include/executorch/ExecuTorchModule.h +563 -0
- package/third-party/include/executorch/ExecuTorchTensor.h +1421 -0
- package/third-party/include/executorch/ExecuTorchValue.h +265 -0
- package/third-party/include/executorch/extension/module/bundled_module.h +131 -0
- package/third-party/include/executorch/extension/module/module.h +649 -0
- package/third-party/include/executorch/extension/tensor/tensor.h +14 -0
- package/third-party/include/executorch/extension/tensor/tensor_accessor.h +190 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr.h +409 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
- package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +24 -0
- package/third-party/include/executorch/extension/threadpool/threadpool.h +95 -0
- package/third-party/include/executorch/runtime/backend/backend_execution_context.h +71 -0
- package/third-party/include/executorch/runtime/backend/backend_init_context.h +84 -0
- package/third-party/include/executorch/runtime/backend/backend_option_context.h +34 -0
- package/third-party/include/executorch/runtime/backend/interface.h +227 -0
- package/third-party/include/executorch/runtime/backend/options.h +206 -0
- package/third-party/include/executorch/runtime/core/array_ref.h +235 -0
- package/third-party/include/executorch/runtime/core/data_loader.h +136 -0
- package/third-party/include/executorch/runtime/core/defines.h +20 -0
- package/third-party/include/executorch/runtime/core/error.h +256 -0
- package/third-party/include/executorch/runtime/core/evalue.h +515 -0
- package/third-party/include/executorch/runtime/core/event_tracer.h +580 -0
- package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +332 -0
- package/third-party/include/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
- package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +170 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +264 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1313 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +1251 -0
- package/third-party/include/executorch/runtime/core/freeable_buffer.h +107 -0
- package/third-party/include/executorch/runtime/core/function_ref.h +100 -0
- package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +107 -0
- package/third-party/include/executorch/runtime/core/memory_allocator.h +208 -0
- package/third-party/include/executorch/runtime/core/named_data_map.h +76 -0
- package/third-party/include/executorch/runtime/core/portable_type/bfloat16.h +27 -0
- package/third-party/include/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
- package/third-party/include/executorch/runtime/core/portable_type/bits_types.h +83 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +8 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +72 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_math.h +399 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_utils.h +41 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/overflows.h +95 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/safe_numerics.h +97 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +154 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/include/executorch/runtime/core/portable_type/complex.h +21 -0
- package/third-party/include/executorch/runtime/core/portable_type/device.h +70 -0
- package/third-party/include/executorch/runtime/core/portable_type/half.h +27 -0
- package/third-party/include/executorch/runtime/core/portable_type/optional.h +36 -0
- package/third-party/include/executorch/runtime/core/portable_type/qint_types.h +83 -0
- package/third-party/include/executorch/runtime/core/portable_type/scalar.h +110 -0
- package/third-party/include/executorch/runtime/core/portable_type/scalar_type.h +154 -0
- package/third-party/include/executorch/runtime/core/portable_type/string_view.h +29 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor.h +142 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +281 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor_options.h +60 -0
- package/third-party/include/executorch/runtime/core/result.h +258 -0
- package/third-party/include/executorch/runtime/core/span.h +97 -0
- package/third-party/include/executorch/runtime/core/tag.h +90 -0
- package/third-party/include/executorch/runtime/core/tensor_layout.h +79 -0
- package/third-party/include/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
- package/third-party/include/executorch/runtime/executor/memory_manager.h +113 -0
- package/third-party/include/executorch/runtime/executor/merged_data_map.h +142 -0
- package/third-party/include/executorch/runtime/executor/method.h +412 -0
- package/third-party/include/executorch/runtime/executor/method_meta.h +298 -0
- package/third-party/include/executorch/runtime/executor/program.h +309 -0
- package/third-party/include/executorch/runtime/executor/pte_data_map.h +145 -0
- package/third-party/include/executorch/runtime/executor/tensor_parser.h +157 -0
- package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +280 -0
- package/third-party/include/executorch/runtime/platform/abort.h +36 -0
- package/third-party/include/executorch/runtime/platform/assert.h +119 -0
- package/third-party/include/executorch/runtime/platform/clock.h +43 -0
- package/third-party/include/executorch/runtime/platform/compat_unistd.h +75 -0
- package/third-party/include/executorch/runtime/platform/compiler.h +201 -0
- package/third-party/include/executorch/runtime/platform/log.h +177 -0
- package/third-party/include/executorch/runtime/platform/platform.h +259 -0
- package/third-party/include/executorch/runtime/platform/profiler.h +292 -0
- package/third-party/include/executorch/runtime/platform/runtime.h +35 -0
- package/third-party/include/executorch/runtime/platform/system.h +49 -0
- package/third-party/include/executorch/runtime/platform/types.h +24 -0
- package/third-party/include/executorch/schema/extended_header.h +85 -0
- package/third-party/include/headeronly/macros/Export.h +88 -0
- package/third-party/include/opencv2/core/affine.hpp +676 -0
- package/third-party/include/opencv2/core/async.hpp +107 -0
- package/third-party/include/opencv2/core/base.hpp +735 -0
- package/third-party/include/opencv2/core/bindings_utils.hpp +279 -0
- package/third-party/include/opencv2/core/bufferpool.hpp +39 -0
- package/third-party/include/opencv2/core/check.hpp +231 -0
- package/third-party/include/opencv2/core/core.hpp +55 -0
- package/third-party/include/opencv2/core/core_c.h +3261 -0
- package/third-party/include/opencv2/core/cv_cpu_dispatch.h +404 -0
- package/third-party/include/opencv2/core/cv_cpu_helper.h +856 -0
- package/third-party/include/opencv2/core/cvdef.h +1003 -0
- package/third-party/include/opencv2/core/cvstd.hpp +196 -0
- package/third-party/include/opencv2/core/cvstd.inl.hpp +188 -0
- package/third-party/include/opencv2/core/cvstd_wrapper.hpp +187 -0
- package/third-party/include/opencv2/core/detail/async_promise.hpp +73 -0
- package/third-party/include/opencv2/core/detail/dispatch_helper.impl.hpp +48 -0
- package/third-party/include/opencv2/core/detail/exception_ptr.hpp +24 -0
- package/third-party/include/opencv2/core/dualquaternion.hpp +1054 -0
- package/third-party/include/opencv2/core/dualquaternion.inl.hpp +464 -0
- package/third-party/include/opencv2/core/eigen.hpp +405 -0
- package/third-party/include/opencv2/core/fast_math.hpp +433 -0
- package/third-party/include/opencv2/core/hal/hal.hpp +451 -0
- package/third-party/include/opencv2/core/hal/interface.h +191 -0
- package/third-party/include/opencv2/core/hal/intrin.hpp +1222 -0
- package/third-party/include/opencv2/core/hal/intrin_avx.hpp +3378 -0
- package/third-party/include/opencv2/core/hal/intrin_avx512.hpp +3688 -0
- package/third-party/include/opencv2/core/hal/intrin_cpp.hpp +3446 -0
- package/third-party/include/opencv2/core/hal/intrin_forward.hpp +195 -0
- package/third-party/include/opencv2/core/hal/intrin_lasx.hpp +3243 -0
- package/third-party/include/opencv2/core/hal/intrin_lsx.hpp +2671 -0
- package/third-party/include/opencv2/core/hal/intrin_math.hpp +772 -0
- package/third-party/include/opencv2/core/hal/intrin_msa.hpp +1973 -0
- package/third-party/include/opencv2/core/hal/intrin_neon.hpp +2710 -0
- package/third-party/include/opencv2/core/hal/intrin_rvv071.hpp +3452 -0
- package/third-party/include/opencv2/core/hal/intrin_rvv_scalable.hpp +2559 -0
- package/third-party/include/opencv2/core/hal/intrin_sse.hpp +3528 -0
- package/third-party/include/opencv2/core/hal/intrin_sse_em.hpp +175 -0
- package/third-party/include/opencv2/core/hal/intrin_vsx.hpp +1756 -0
- package/third-party/include/opencv2/core/hal/intrin_wasm.hpp +2911 -0
- package/third-party/include/opencv2/core/hal/msa_macros.h +2079 -0
- package/third-party/include/opencv2/core/hal/simd_utils.impl.hpp +313 -0
- package/third-party/include/opencv2/core/mat.hpp +3842 -0
- package/third-party/include/opencv2/core/mat.inl.hpp +2753 -0
- package/third-party/include/opencv2/core/matx.hpp +603 -0
- package/third-party/include/opencv2/core/matx.inl.hpp +1132 -0
- package/third-party/include/opencv2/core/neon_utils.hpp +127 -0
- package/third-party/include/opencv2/core/operations.hpp +610 -0
- package/third-party/include/opencv2/core/optim.hpp +362 -0
- package/third-party/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp +66 -0
- package/third-party/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp +148 -0
- package/third-party/include/opencv2/core/parallel/parallel_backend.hpp +108 -0
- package/third-party/include/opencv2/core/persistence.hpp +1321 -0
- package/third-party/include/opencv2/core/quaternion.hpp +1889 -0
- package/third-party/include/opencv2/core/quaternion.inl.hpp +907 -0
- package/third-party/include/opencv2/core/saturate.hpp +347 -0
- package/third-party/include/opencv2/core/simd_intrinsics.hpp +90 -0
- package/third-party/include/opencv2/core/softfloat.hpp +657 -0
- package/third-party/include/opencv2/core/sse_utils.hpp +861 -0
- package/third-party/include/opencv2/core/traits.hpp +417 -0
- package/third-party/include/opencv2/core/types.hpp +2368 -0
- package/third-party/include/opencv2/core/types_c.h +2064 -0
- package/third-party/include/opencv2/core/utility.hpp +1296 -0
- package/third-party/include/opencv2/core/utils/allocator_stats.hpp +31 -0
- package/third-party/include/opencv2/core/utils/allocator_stats.impl.hpp +111 -0
- package/third-party/include/opencv2/core/utils/filesystem.hpp +91 -0
- package/third-party/include/opencv2/core/utils/fp_control_utils.hpp +70 -0
- package/third-party/include/opencv2/core/utils/instrumentation.hpp +127 -0
- package/third-party/include/opencv2/core/utils/logger.defines.hpp +50 -0
- package/third-party/include/opencv2/core/utils/logger.hpp +258 -0
- package/third-party/include/opencv2/core/utils/logtag.hpp +27 -0
- package/third-party/include/opencv2/core/utils/tls.hpp +230 -0
- package/third-party/include/opencv2/core/utils/trace.hpp +281 -0
- package/third-party/include/opencv2/core/version.hpp +29 -0
- package/third-party/include/opencv2/core/vsx_utils.hpp +1115 -0
- package/third-party/include/opencv2/core.hpp +3699 -0
- package/third-party/include/opencv2/cvconfig.h +155 -0
- package/third-party/include/opencv2/dnn/dnn.hpp +51 -0
- package/third-party/include/opencv2/dnn.hpp +17 -0
- package/third-party/include/opencv2/features2d/features2d.hpp +55 -0
- package/third-party/include/opencv2/features2d/hal/interface.h +32 -0
- package/third-party/include/opencv2/features2d.hpp +1756 -0
- package/third-party/include/opencv2/highgui/highgui.hpp +113 -0
- package/third-party/include/opencv2/highgui.hpp +17 -0
- package/third-party/include/opencv2/imgproc/bindings.hpp +34 -0
- package/third-party/include/opencv2/imgproc/detail/gcgraph.hpp +355 -0
- package/third-party/include/opencv2/imgproc/detail/legacy.hpp +35 -0
- package/third-party/include/opencv2/imgproc/hal/hal.hpp +246 -0
- package/third-party/include/opencv2/imgproc/hal/interface.h +52 -0
- package/third-party/include/opencv2/imgproc/imgproc.hpp +55 -0
- package/third-party/include/opencv2/imgproc/imgproc_c.h +1261 -0
- package/third-party/include/opencv2/imgproc/segmentation.hpp +168 -0
- package/third-party/include/opencv2/imgproc/types_c.h +632 -0
- package/third-party/include/opencv2/imgproc.hpp +5956 -0
- package/third-party/include/opencv2/opencv.hpp +102 -0
- package/third-party/include/opencv2/opencv_modules.hpp +19 -0
- package/third-party/include/opencv2/photo/legacy/constants_c.h +10 -0
- package/third-party/include/opencv2/photo/photo.hpp +55 -0
- package/third-party/include/opencv2/photo.hpp +975 -0
- package/third-party/include/opencv2/video/background_segm.hpp +341 -0
- package/third-party/include/opencv2/video/detail/tracking.detail.hpp +435 -0
- package/third-party/include/opencv2/video/legacy/constants_c.h +15 -0
- package/third-party/include/opencv2/video/tracking.hpp +1014 -0
- package/third-party/include/opencv2/video/video.hpp +55 -0
- package/third-party/include/opencv2/video.hpp +65 -0
- package/third-party/include/pthreadpool/pthreadpool.h +2236 -0
- package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
- package/third-party/include/tokenizers-cpp/tokenizers_cpp.h +118 -0
- package/third-party/include/torch/headeronly/macros/Export.h +154 -0
- package/third-party/include/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/{ios → third-party/ios}/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/{ios → third-party/ios}/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/libs/cpuinfo/libcpuinfo.a +0 -0
- package/third-party/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a +0 -0
- package/third-party/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
- package/{ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib → third-party/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a} +0 -0
- package/third-party/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
- package/LICENSE +0 -79
- package/android/src/main/java/com/swmansion/rnexecutorch/Classification.kt +0 -64
- package/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt +0 -90
- package/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +0 -58
- package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +0 -63
- package/android/src/main/java/com/swmansion/rnexecutorch/OCR.kt +0 -90
- package/android/src/main/java/com/swmansion/rnexecutorch/ObjectDetection.kt +0 -64
- package/android/src/main/java/com/swmansion/rnexecutorch/SpeechToText.kt +0 -91
- package/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt +0 -54
- package/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt +0 -51
- package/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt +0 -86
- package/android/src/main/java/com/swmansion/rnexecutorch/VerticalOCR.kt +0 -179
- package/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt +0 -54
- package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt +0 -48
- package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt +0 -37
- package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt +0 -46
- package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Constants.kt +0 -1005
- package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt +0 -26
- package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +0 -142
- package/android/src/main/java/com/swmansion/rnexecutorch/models/objectDetection/SSDLiteLargeModel.kt +0 -74
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Detector.kt +0 -82
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/RecognitionHandler.kt +0 -117
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Recognizer.kt +0 -51
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt +0 -89
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/CTCLabelConverter.kt +0 -58
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/Constants.kt +0 -31
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt +0 -608
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt +0 -430
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TDecoder.kt +0 -39
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TModule.kt +0 -43
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Moonshine.kt +0 -16
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineDecoder.kt +0 -23
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineEncoder.kt +0 -20
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Whisper.kt +0 -16
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperDecoder.kt +0 -22
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperEncoder.kt +0 -29
- package/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt +0 -43
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ArrayUtils.kt +0 -87
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ETError.kt +0 -34
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ImageProcessor.kt +0 -237
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt +0 -8
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ObjectDetectionUtils.kt +0 -201
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/STFT.kt +0 -50
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/TensorUtils.kt +0 -103
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/ETModel.h +0 -27
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/ETModel.h +0 -27
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
- package/ios/RnExecutorch/Classification.h +0 -5
- package/ios/RnExecutorch/Classification.mm +0 -54
- package/ios/RnExecutorch/ETModule.h +0 -5
- package/ios/RnExecutorch/ETModule.mm +0 -75
- package/ios/RnExecutorch/ImageSegmentation.h +0 -5
- package/ios/RnExecutorch/ImageSegmentation.mm +0 -60
- package/ios/RnExecutorch/LLM.h +0 -5
- package/ios/RnExecutorch/LLM.mm +0 -78
- package/ios/RnExecutorch/OCR.h +0 -5
- package/ios/RnExecutorch/OCR.mm +0 -96
- package/ios/RnExecutorch/ObjectDetection.h +0 -5
- package/ios/RnExecutorch/ObjectDetection.mm +0 -56
- package/ios/RnExecutorch/SpeechToText.h +0 -5
- package/ios/RnExecutorch/SpeechToText.mm +0 -125
- package/ios/RnExecutorch/StyleTransfer.h +0 -5
- package/ios/RnExecutorch/StyleTransfer.mm +0 -55
- package/ios/RnExecutorch/TextEmbeddings.h +0 -5
- package/ios/RnExecutorch/TextEmbeddings.mm +0 -62
- package/ios/RnExecutorch/Tokenizer.h +0 -5
- package/ios/RnExecutorch/Tokenizer.mm +0 -83
- package/ios/RnExecutorch/VerticalOCR.h +0 -5
- package/ios/RnExecutorch/VerticalOCR.mm +0 -183
- package/ios/RnExecutorch/models/BaseModel.h +0 -21
- package/ios/RnExecutorch/models/BaseModel.mm +0 -43
- package/ios/RnExecutorch/models/classification/ClassificationModel.h +0 -10
- package/ios/RnExecutorch/models/classification/ClassificationModel.mm +0 -53
- package/ios/RnExecutorch/models/classification/Constants.h +0 -3
- package/ios/RnExecutorch/models/image_segmentation/Constants.h +0 -4
- package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +0 -10
- package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +0 -146
- package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.hpp +0 -11
- package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.mm +0 -64
- package/ios/RnExecutorch/models/ocr/Detector.h +0 -9
- package/ios/RnExecutorch/models/ocr/Detector.mm +0 -101
- package/ios/RnExecutorch/models/ocr/RecognitionHandler.h +0 -16
- package/ios/RnExecutorch/models/ocr/RecognitionHandler.mm +0 -135
- package/ios/RnExecutorch/models/ocr/Recognizer.h +0 -8
- package/ios/RnExecutorch/models/ocr/Recognizer.mm +0 -77
- package/ios/RnExecutorch/models/ocr/VerticalDetector.h +0 -10
- package/ios/RnExecutorch/models/ocr/VerticalDetector.mm +0 -118
- package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.h +0 -16
- package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.mm +0 -80
- package/ios/RnExecutorch/models/ocr/utils/Constants.h +0 -26
- package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.h +0 -31
- package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm +0 -754
- package/ios/RnExecutorch/models/ocr/utils/OCRUtils.h +0 -10
- package/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm +0 -67
- package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.h +0 -35
- package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm +0 -331
- package/ios/RnExecutorch/models/stt/Moonshine.hpp +0 -13
- package/ios/RnExecutorch/models/stt/Moonshine.mm +0 -64
- package/ios/RnExecutorch/models/stt/MoonshineDecoder.hpp +0 -16
- package/ios/RnExecutorch/models/stt/MoonshineDecoder.mm +0 -24
- package/ios/RnExecutorch/models/stt/MoonshineEncoder.hpp +0 -15
- package/ios/RnExecutorch/models/stt/MoonshineEncoder.mm +0 -18
- package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.hpp +0 -26
- package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.mm +0 -19
- package/ios/RnExecutorch/models/stt/Whisper.hpp +0 -12
- package/ios/RnExecutorch/models/stt/Whisper.mm +0 -68
- package/ios/RnExecutorch/models/stt/WhisperDecoder.hpp +0 -16
- package/ios/RnExecutorch/models/stt/WhisperDecoder.mm +0 -22
- package/ios/RnExecutorch/models/stt/WhisperEncoder.hpp +0 -15
- package/ios/RnExecutorch/models/stt/WhisperEncoder.mm +0 -21
- package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h +0 -11
- package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm +0 -50
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.h +0 -15
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm +0 -45
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.h +0 -8
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.mm +0 -49
- package/ios/RnExecutorch/utils/Constants.h +0 -8
- package/ios/RnExecutorch/utils/Conversions.h +0 -15
- package/ios/RnExecutorch/utils/ETError.h +0 -26
- package/ios/RnExecutorch/utils/ImageProcessor.h +0 -15
- package/ios/RnExecutorch/utils/ImageProcessor.mm +0 -147
- package/ios/RnExecutorch/utils/Numerical.h +0 -1
- package/ios/RnExecutorch/utils/Numerical.mm +0 -18
- package/ios/RnExecutorch/utils/ObjectDetectionUtils.hpp +0 -23
- package/ios/RnExecutorch/utils/SFFT.hpp +0 -13
- package/ios/RnExecutorch/utils/SFFT.mm +0 -71
- package/ios/RnExecutorch/utils/ScalarType.h +0 -14
- package/ios/RnExecutorch/utils/ScalarType.mm +0 -21
- package/lib/module/constants/sttDefaults.js +0 -72
- package/lib/module/constants/sttDefaults.js.map +0 -1
- package/lib/module/controllers/SpeechToTextController.js +0 -307
- package/lib/module/controllers/SpeechToTextController.js.map +0 -1
- package/lib/module/native/NativeClassification.js +0 -5
- package/lib/module/native/NativeClassification.js.map +0 -1
- package/lib/module/native/NativeETModule.js +0 -5
- package/lib/module/native/NativeETModule.js.map +0 -1
- package/lib/module/native/NativeImageSegmentation.js +0 -5
- package/lib/module/native/NativeImageSegmentation.js.map +0 -1
- package/lib/module/native/NativeLLM.js +0 -5
- package/lib/module/native/NativeLLM.js.map +0 -1
- package/lib/module/native/NativeOCR.js +0 -5
- package/lib/module/native/NativeOCR.js.map +0 -1
- package/lib/module/native/NativeObjectDetection.js +0 -5
- package/lib/module/native/NativeObjectDetection.js.map +0 -1
- package/lib/module/native/NativeSpeechToText.js +0 -5
- package/lib/module/native/NativeSpeechToText.js.map +0 -1
- package/lib/module/native/NativeStyleTransfer.js +0 -5
- package/lib/module/native/NativeStyleTransfer.js.map +0 -1
- package/lib/module/native/NativeTextEmbeddings.js +0 -5
- package/lib/module/native/NativeTextEmbeddings.js.map +0 -1
- package/lib/module/native/NativeTokenizer.js +0 -5
- package/lib/module/native/NativeTokenizer.js.map +0 -1
- package/lib/module/native/NativeVerticalOCR.js +0 -5
- package/lib/module/native/NativeVerticalOCR.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/utils/stt.js +0 -22
- package/lib/module/utils/stt.js.map +0 -1
- package/lib/typescript/constants/sttDefaults.d.ts +0 -28
- package/lib/typescript/constants/sttDefaults.d.ts.map +0 -1
- package/lib/typescript/controllers/SpeechToTextController.d.ts +0 -52
- package/lib/typescript/controllers/SpeechToTextController.d.ts.map +0 -1
- package/lib/typescript/native/NativeClassification.d.ts +0 -10
- package/lib/typescript/native/NativeClassification.d.ts.map +0 -1
- package/lib/typescript/native/NativeETModule.d.ts +0 -9
- package/lib/typescript/native/NativeETModule.d.ts.map +0 -1
- package/lib/typescript/native/NativeImageSegmentation.d.ts +0 -10
- package/lib/typescript/native/NativeImageSegmentation.d.ts.map +0 -1
- package/lib/typescript/native/NativeLLM.d.ts +0 -12
- package/lib/typescript/native/NativeLLM.d.ts.map +0 -1
- package/lib/typescript/native/NativeOCR.d.ts +0 -9
- package/lib/typescript/native/NativeOCR.d.ts.map +0 -1
- package/lib/typescript/native/NativeObjectDetection.d.ts +0 -9
- package/lib/typescript/native/NativeObjectDetection.d.ts.map +0 -1
- package/lib/typescript/native/NativeSpeechToText.d.ts +0 -12
- package/lib/typescript/native/NativeSpeechToText.d.ts.map +0 -1
- package/lib/typescript/native/NativeStyleTransfer.d.ts.map +0 -1
- package/lib/typescript/native/NativeTextEmbeddings.d.ts +0 -8
- package/lib/typescript/native/NativeTextEmbeddings.d.ts.map +0 -1
- package/lib/typescript/native/NativeTokenizer.d.ts +0 -12
- package/lib/typescript/native/NativeTokenizer.d.ts.map +0 -1
- package/lib/typescript/native/NativeVerticalOCR.d.ts +0 -9
- package/lib/typescript/native/NativeVerticalOCR.d.ts.map +0 -1
- package/lib/typescript/utils/stt.d.ts +0 -2
- package/lib/typescript/utils/stt.d.ts.map +0 -1
- package/src/constants/sttDefaults.ts +0 -86
- package/src/controllers/SpeechToTextController.ts +0 -458
- package/src/index.tsx +0 -47
- package/src/native/NativeClassification.ts +0 -9
- package/src/native/NativeETModule.ts +0 -14
- package/src/native/NativeImageSegmentation.ts +0 -14
- package/src/native/NativeLLM.ts +0 -14
- package/src/native/NativeOCR.ts +0 -16
- package/src/native/NativeObjectDetection.ts +0 -10
- package/src/native/NativeSpeechToText.ts +0 -17
- package/src/native/NativeStyleTransfer.ts +0 -10
- package/src/native/NativeTextEmbeddings.ts +0 -9
- package/src/native/NativeTokenizer.ts +0 -13
- package/src/native/NativeVerticalOCR.ts +0 -16
- package/src/utils/stt.ts +0 -28
- package/{ios → third-party/ios}/ExecutorchLib.xcframework/Info.plist +4 -4
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#include "ObjectDetection.h"
|
|
2
|
+
|
|
3
|
+
#include <rnexecutorch/data_processing/ImageProcessing.h>
|
|
4
|
+
|
|
5
|
+
namespace rnexecutorch::models::object_detection {
|
|
6
|
+
|
|
7
|
+
ObjectDetection::ObjectDetection(
|
|
8
|
+
const std::string &modelSource,
|
|
9
|
+
std::shared_ptr<react::CallInvoker> callInvoker)
|
|
10
|
+
: BaseModel(modelSource, callInvoker) {
|
|
11
|
+
auto inputTensors = getAllInputShapes();
|
|
12
|
+
if (inputTensors.size() == 0) {
|
|
13
|
+
throw std::runtime_error("Model seems to not take any input tensors.");
|
|
14
|
+
}
|
|
15
|
+
std::vector<int32_t> modelInputShape = inputTensors[0];
|
|
16
|
+
if (modelInputShape.size() < 2) {
|
|
17
|
+
char errorMessage[100];
|
|
18
|
+
std::snprintf(errorMessage, sizeof(errorMessage),
|
|
19
|
+
"Unexpected model input size, expected at least 2 dimentions "
|
|
20
|
+
"but got: %zu.",
|
|
21
|
+
modelInputShape.size());
|
|
22
|
+
throw std::runtime_error(errorMessage);
|
|
23
|
+
}
|
|
24
|
+
modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1],
|
|
25
|
+
modelInputShape[modelInputShape.size() - 2]);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
std::vector<types::Detection>
|
|
29
|
+
ObjectDetection::postprocess(const std::vector<EValue> &tensors,
|
|
30
|
+
cv::Size originalSize, double detectionThreshold) {
|
|
31
|
+
float widthRatio =
|
|
32
|
+
static_cast<float>(originalSize.width) / modelImageSize.width;
|
|
33
|
+
float heightRatio =
|
|
34
|
+
static_cast<float>(originalSize.height) / modelImageSize.height;
|
|
35
|
+
|
|
36
|
+
std::vector<types::Detection> detections;
|
|
37
|
+
auto bboxTensor = tensors.at(0).toTensor();
|
|
38
|
+
std::span<const float> bboxes(
|
|
39
|
+
static_cast<const float *>(bboxTensor.const_data_ptr()),
|
|
40
|
+
bboxTensor.numel());
|
|
41
|
+
|
|
42
|
+
auto scoreTensor = tensors.at(1).toTensor();
|
|
43
|
+
std::span<const float> scores(
|
|
44
|
+
static_cast<const float *>(scoreTensor.const_data_ptr()),
|
|
45
|
+
scoreTensor.numel());
|
|
46
|
+
|
|
47
|
+
auto labelTensor = tensors.at(2).toTensor();
|
|
48
|
+
std::span<const float> labels(
|
|
49
|
+
static_cast<const float *>(labelTensor.const_data_ptr()),
|
|
50
|
+
labelTensor.numel());
|
|
51
|
+
|
|
52
|
+
for (std::size_t i = 0; i < scores.size(); ++i) {
|
|
53
|
+
if (scores[i] < detectionThreshold) {
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
float x1 = bboxes[i * 4] * widthRatio;
|
|
57
|
+
float y1 = bboxes[i * 4 + 1] * heightRatio;
|
|
58
|
+
float x2 = bboxes[i * 4 + 2] * widthRatio;
|
|
59
|
+
float y2 = bboxes[i * 4 + 3] * heightRatio;
|
|
60
|
+
detections.emplace_back(x1, y1, x2, y2, static_cast<int>(labels[i]),
|
|
61
|
+
scores[i]);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
std::vector<types::Detection> output = utils::nonMaxSuppression(detections);
|
|
65
|
+
return output;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
std::vector<types::Detection>
|
|
69
|
+
ObjectDetection::generate(std::string imageSource, double detectionThreshold) {
|
|
70
|
+
auto [inputTensor, originalSize] =
|
|
71
|
+
image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]);
|
|
72
|
+
|
|
73
|
+
auto forwardResult = BaseModel::forward(inputTensor);
|
|
74
|
+
if (!forwardResult.ok()) {
|
|
75
|
+
throw std::runtime_error(
|
|
76
|
+
"Failed to forward, error: " +
|
|
77
|
+
std::to_string(static_cast<uint32_t>(forwardResult.error())));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return postprocess(forwardResult.get(), originalSize, detectionThreshold);
|
|
81
|
+
}
|
|
82
|
+
} // namespace rnexecutorch::models::object_detection
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <unordered_map>
|
|
4
|
+
|
|
5
|
+
#include <executorch/extension/tensor/tensor_ptr.h>
|
|
6
|
+
#include <executorch/runtime/core/evalue.h>
|
|
7
|
+
#include <opencv2/opencv.hpp>
|
|
8
|
+
|
|
9
|
+
#include "Types.h"
|
|
10
|
+
#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
|
|
11
|
+
#include <rnexecutorch/models/BaseModel.h>
|
|
12
|
+
#include <rnexecutorch/models/object_detection/Utils.h>
|
|
13
|
+
|
|
14
|
+
namespace rnexecutorch {
|
|
15
|
+
namespace models::object_detection {
|
|
16
|
+
using executorch::extension::TensorPtr;
|
|
17
|
+
using executorch::runtime::EValue;
|
|
18
|
+
|
|
19
|
+
class ObjectDetection : public BaseModel {
|
|
20
|
+
public:
|
|
21
|
+
ObjectDetection(const std::string &modelSource,
|
|
22
|
+
std::shared_ptr<react::CallInvoker> callInvoker);
|
|
23
|
+
std::vector<types::Detection> generate(std::string imageSource,
|
|
24
|
+
double detectionThreshold);
|
|
25
|
+
|
|
26
|
+
private:
|
|
27
|
+
std::vector<types::Detection> postprocess(const std::vector<EValue> &tensors,
|
|
28
|
+
cv::Size originalSize,
|
|
29
|
+
double detectionThreshold);
|
|
30
|
+
|
|
31
|
+
cv::Size modelImageSize{0, 0};
|
|
32
|
+
};
|
|
33
|
+
} // namespace models::object_detection
|
|
34
|
+
|
|
35
|
+
REGISTER_CONSTRUCTOR(models::object_detection::ObjectDetection, std::string,
|
|
36
|
+
std::shared_ptr<react::CallInvoker>);
|
|
37
|
+
} // namespace rnexecutorch
|
|
@@ -1,30 +1,9 @@
|
|
|
1
|
-
#include "
|
|
1
|
+
#include "Utils.h"
|
|
2
2
|
#include "Constants.h"
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
if (it != cocoLabelsMap.end()) {
|
|
8
|
-
return [NSString stringWithUTF8String:it->second.c_str()];
|
|
9
|
-
} else {
|
|
10
|
-
return [NSString stringWithUTF8String:"unknown"];
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
NSDictionary *detectionToNSDictionary(const Detection &detection) {
|
|
15
|
-
return @{
|
|
16
|
-
@"bbox" : @{
|
|
17
|
-
@"x1" : @(detection.x1),
|
|
18
|
-
@"y1" : @(detection.y1),
|
|
19
|
-
@"x2" : @(detection.x2),
|
|
20
|
-
@"y2" : @(detection.y2),
|
|
21
|
-
},
|
|
22
|
-
@"label" : floatLabelToNSString(detection.label),
|
|
23
|
-
@"score" : @(detection.score)
|
|
24
|
-
};
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
float iou(const Detection &a, const Detection &b) {
|
|
4
|
+
namespace rnexecutorch::models::object_detection::utils {
|
|
5
|
+
float intersectionOverUnion(const types::Detection &a,
|
|
6
|
+
const types::Detection &b) {
|
|
28
7
|
float x1 = std::max(a.x1, b.x1);
|
|
29
8
|
float y1 = std::max(a.y1, b.y1);
|
|
30
9
|
float x2 = std::min(a.x2, b.x2);
|
|
@@ -36,42 +15,43 @@ float iou(const Detection &a, const Detection &b) {
|
|
|
36
15
|
float unionArea = areaA + areaB - intersectionArea;
|
|
37
16
|
|
|
38
17
|
return intersectionArea / unionArea;
|
|
39
|
-
}
|
|
18
|
+
}
|
|
40
19
|
|
|
41
|
-
std::vector<
|
|
42
|
-
|
|
20
|
+
std::vector<types::Detection>
|
|
21
|
+
nonMaxSuppression(std::vector<types::Detection> detections) {
|
|
43
22
|
if (detections.empty()) {
|
|
44
23
|
return {};
|
|
45
24
|
}
|
|
46
25
|
|
|
47
26
|
// Sort by label, then by score
|
|
48
27
|
std::sort(detections.begin(), detections.end(),
|
|
49
|
-
[](const Detection &a, const Detection &b) {
|
|
28
|
+
[](const types::Detection &a, const types::Detection &b) {
|
|
50
29
|
if (a.label == b.label) {
|
|
51
30
|
return a.score > b.score;
|
|
52
31
|
}
|
|
53
32
|
return a.label < b.label;
|
|
54
33
|
});
|
|
55
34
|
|
|
56
|
-
std::vector<Detection> result;
|
|
35
|
+
std::vector<types::Detection> result;
|
|
57
36
|
// Apply NMS for each label
|
|
58
37
|
for (size_t i = 0; i < detections.size();) {
|
|
59
38
|
float currentLabel = detections[i].label;
|
|
60
39
|
|
|
61
|
-
std::vector<Detection> labelDetections;
|
|
40
|
+
std::vector<types::Detection> labelDetections;
|
|
62
41
|
while (i < detections.size() && detections[i].label == currentLabel) {
|
|
63
42
|
labelDetections.push_back(detections[i]);
|
|
64
43
|
++i;
|
|
65
44
|
}
|
|
66
45
|
|
|
67
|
-
std::vector<Detection> filteredLabelDetections;
|
|
46
|
+
std::vector<types::Detection> filteredLabelDetections;
|
|
68
47
|
while (!labelDetections.empty()) {
|
|
69
|
-
Detection current = labelDetections.front();
|
|
48
|
+
types::Detection current = labelDetections.front();
|
|
70
49
|
filteredLabelDetections.push_back(current);
|
|
71
50
|
labelDetections.erase(
|
|
72
51
|
std::remove_if(labelDetections.begin(), labelDetections.end(),
|
|
73
|
-
[&](const Detection &other) {
|
|
74
|
-
return
|
|
52
|
+
[¤t](const types::Detection &other) {
|
|
53
|
+
return intersectionOverUnion(current, other) >
|
|
54
|
+
constants::IOU_THRESHOLD;
|
|
75
55
|
}),
|
|
76
56
|
labelDetections.end());
|
|
77
57
|
}
|
|
@@ -80,3 +60,5 @@ std::vector<Detection> nms(std::vector<Detection> detections,
|
|
|
80
60
|
}
|
|
81
61
|
return result;
|
|
82
62
|
}
|
|
63
|
+
|
|
64
|
+
} // namespace rnexecutorch::models::object_detection::utils
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "Types.h"
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
6
|
+
namespace rnexecutorch::models::object_detection::utils {
|
|
7
|
+
float intersectionOverUnion(const types::Detection &a,
|
|
8
|
+
const types::Detection &b);
|
|
9
|
+
std::vector<types::Detection>
|
|
10
|
+
nonMaxSuppression(std::vector<types::Detection> detections);
|
|
11
|
+
} // namespace rnexecutorch::models::object_detection::utils
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#include "CTCLabelConverter.h"
|
|
2
|
+
#include <algorithm>
|
|
3
|
+
#include <optional>
|
|
4
|
+
|
|
5
|
+
namespace rnexecutorch::models::ocr {
|
|
6
|
+
CTCLabelConverter::CTCLabelConverter(const std::string &characters)
|
|
7
|
+
: ignoreIdx(0),
|
|
8
|
+
character({"[blank]"}) // blank character is ignored character (index 0).
|
|
9
|
+
{
|
|
10
|
+
for (size_t i = 0; i < characters.length();) {
|
|
11
|
+
size_t char_len = 0;
|
|
12
|
+
unsigned char first_byte = characters[i];
|
|
13
|
+
|
|
14
|
+
if ((first_byte & 0x80) == 0) { // 0xxxxxxx -> 1-byte character
|
|
15
|
+
char_len = 1;
|
|
16
|
+
} else if ((first_byte & 0xE0) == 0xC0) { // 110xxxxx -> 2-byte character
|
|
17
|
+
char_len = 2;
|
|
18
|
+
} else if ((first_byte & 0xF0) == 0xE0) { // 1110xxxx -> 3-byte character
|
|
19
|
+
char_len = 3;
|
|
20
|
+
} else if ((first_byte & 0xF8) == 0xF0) { // 11110xxx -> 4-byte character
|
|
21
|
+
char_len = 4;
|
|
22
|
+
} else {
|
|
23
|
+
// Invalid UTF-8 start byte, treat as a single byte character to avoid
|
|
24
|
+
// infinite loop
|
|
25
|
+
char_len = 1;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Ensure we don't read past the end of the string
|
|
29
|
+
if (i + char_len <= characters.length()) {
|
|
30
|
+
character.push_back(characters.substr(i, char_len));
|
|
31
|
+
}
|
|
32
|
+
i += char_len;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
std::vector<std::string>
|
|
37
|
+
CTCLabelConverter::decodeGreedy(const std::vector<int32_t> &textIndex,
|
|
38
|
+
size_t length) {
|
|
39
|
+
/*
|
|
40
|
+
The current strategy used for decoding is greedy approach
|
|
41
|
+
which iterates through the list of indices and process
|
|
42
|
+
each index using following steps:
|
|
43
|
+
1. Ignore if idx == 0
|
|
44
|
+
2. Ignore if idx is the same as last idx
|
|
45
|
+
3. decode idx -> char and append it to returned text.
|
|
46
|
+
|
|
47
|
+
Note that ignoring repeated indices, does not mean decoding
|
|
48
|
+
won't handle repeated letters in a word, since in most cases
|
|
49
|
+
actual chars are already seperated by blank tokens.
|
|
50
|
+
*/
|
|
51
|
+
std::vector<std::string> texts;
|
|
52
|
+
size_t index = 0;
|
|
53
|
+
|
|
54
|
+
while (index < textIndex.size()) {
|
|
55
|
+
size_t segmentLength = std::min(length, textIndex.size() - index);
|
|
56
|
+
|
|
57
|
+
std::vector<int32_t> subArray(textIndex.begin() + index,
|
|
58
|
+
textIndex.begin() + index + segmentLength);
|
|
59
|
+
|
|
60
|
+
std::string text;
|
|
61
|
+
|
|
62
|
+
if (!subArray.empty()) {
|
|
63
|
+
std::optional<int32_t> lastChar;
|
|
64
|
+
for (int32_t currentChar : subArray) {
|
|
65
|
+
bool isRepeated =
|
|
66
|
+
lastChar.has_value() && lastChar.value() == currentChar;
|
|
67
|
+
bool isIgnored = currentChar == ignoreIdx;
|
|
68
|
+
lastChar = currentChar;
|
|
69
|
+
|
|
70
|
+
if (currentChar >= 0 &&
|
|
71
|
+
currentChar < static_cast<int32_t>(character.size()) &&
|
|
72
|
+
!isRepeated && !isIgnored) {
|
|
73
|
+
text += character[currentChar];
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
texts.push_back(std::move(text));
|
|
79
|
+
index += segmentLength;
|
|
80
|
+
|
|
81
|
+
if (segmentLength < length) {
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return texts;
|
|
87
|
+
}
|
|
88
|
+
} // namespace rnexecutorch::models::ocr
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <string>
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
6
|
+
namespace rnexecutorch::models::ocr {
|
|
7
|
+
/*
|
|
8
|
+
CTC (Connectionist Temporal Classification) Label Converter
|
|
9
|
+
is used for decoding the returned list of indices by Recognizer into
|
|
10
|
+
actual characters.
|
|
11
|
+
For each Recognizer there is an 1:1 correspondence between
|
|
12
|
+
an index and a character. CTC Label Converter operates on this
|
|
13
|
+
mapping. Symbol corresponding to the first index is a [blank]
|
|
14
|
+
character, meaning "no character to decode here".
|
|
15
|
+
The decoder ignores [blank] char.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
class CTCLabelConverter final {
|
|
19
|
+
public:
|
|
20
|
+
explicit CTCLabelConverter(const std::string &characters);
|
|
21
|
+
|
|
22
|
+
std::vector<std::string> decodeGreedy(const std::vector<int32_t> &textIndex,
|
|
23
|
+
size_t length);
|
|
24
|
+
|
|
25
|
+
private:
|
|
26
|
+
int32_t ignoreIdx;
|
|
27
|
+
std::vector<std::string> character;
|
|
28
|
+
};
|
|
29
|
+
} // namespace rnexecutorch::models::ocr
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <cstdint>
|
|
4
|
+
#include <opencv2/opencv.hpp>
|
|
5
|
+
|
|
6
|
+
namespace rnexecutorch::models::ocr::constants {
|
|
7
|
+
|
|
8
|
+
inline constexpr float kTextThreshold = 0.4;
|
|
9
|
+
inline constexpr float kTextThresholdVertical = 0.3;
|
|
10
|
+
inline constexpr float kLinkThreshold = 0.4;
|
|
11
|
+
inline constexpr float kLowTextThreshold = 0.7;
|
|
12
|
+
inline constexpr float kCenterThreshold = 0.5;
|
|
13
|
+
inline constexpr float kDistanceThreshold = 2.0;
|
|
14
|
+
inline constexpr float kHeightThreshold = 2.0;
|
|
15
|
+
inline constexpr float kSingleCharacterCenterThreshold = 0.3;
|
|
16
|
+
inline constexpr float kLowConfidenceThreshold = 0.3;
|
|
17
|
+
inline constexpr float kAdjustContrast = 0.2;
|
|
18
|
+
inline constexpr int32_t kMinSideThreshold = 15;
|
|
19
|
+
inline constexpr int32_t kMaxSideThreshold = 30;
|
|
20
|
+
inline constexpr int32_t kRecognizerHeight = 64;
|
|
21
|
+
inline constexpr int32_t kLargeRecognizerWidth = 512;
|
|
22
|
+
inline constexpr int32_t kMediumRecognizerWidth = 256;
|
|
23
|
+
inline constexpr int32_t kSmallRecognizerWidth = 128;
|
|
24
|
+
inline constexpr int32_t kSmallVerticalRecognizerWidth = 64;
|
|
25
|
+
inline constexpr int32_t kMaxWidth =
|
|
26
|
+
kLargeRecognizerWidth + (kLargeRecognizerWidth * 0.15);
|
|
27
|
+
inline constexpr int32_t kSingleCharacterMinSize = 70;
|
|
28
|
+
inline constexpr int32_t kRecognizerImageSize = 1280;
|
|
29
|
+
inline constexpr int32_t kVerticalLineThreshold = 20;
|
|
30
|
+
|
|
31
|
+
/*
|
|
32
|
+
Mean and variance values for image normalization were used in EASYOCR pipeline
|
|
33
|
+
(see
|
|
34
|
+
https://github.com/JaidedAI/EasyOCR/blob/c4f3cd7225efd4f85451bd8b4a7646ae9a092420/easyocr/imgproc.py#L20)
|
|
35
|
+
but they originate from ImageNet dataset and they are widely use in
|
|
36
|
+
ComputerVision preprocessing.
|
|
37
|
+
*/
|
|
38
|
+
inline const cv::Scalar kNormalizationMean(0.485, 0.456, 0.406);
|
|
39
|
+
inline const cv::Scalar kNormalizationVariance(0.229, 0.224, 0.225);
|
|
40
|
+
|
|
41
|
+
} // namespace rnexecutorch::models::ocr::constants
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#include "Detector.h"
|
|
2
|
+
#include <rnexecutorch/data_processing/ImageProcessing.h>
|
|
3
|
+
#include <rnexecutorch/models/ocr/Constants.h>
|
|
4
|
+
#include <rnexecutorch/models/ocr/utils/DetectorUtils.h>
|
|
5
|
+
|
|
6
|
+
namespace rnexecutorch::models::ocr {
|
|
7
|
+
Detector::Detector(const std::string &modelSource,
|
|
8
|
+
std::shared_ptr<react::CallInvoker> callInvoker)
|
|
9
|
+
: BaseModel(modelSource, callInvoker) {
|
|
10
|
+
auto inputShapes = getAllInputShapes();
|
|
11
|
+
if (inputShapes.empty()) {
|
|
12
|
+
throw std::runtime_error(
|
|
13
|
+
"Detector model seems to not take any input tensors.");
|
|
14
|
+
}
|
|
15
|
+
std::vector<int32_t> modelInputShape = inputShapes[0];
|
|
16
|
+
if (modelInputShape.size() < 2) {
|
|
17
|
+
throw std::runtime_error("Unexpected detector model input size, expected "
|
|
18
|
+
"at least 2 dimensions but got: " +
|
|
19
|
+
std::to_string(modelInputShape.size()) + ".");
|
|
20
|
+
}
|
|
21
|
+
modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1],
|
|
22
|
+
modelInputShape[modelInputShape.size() - 2]);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
cv::Size Detector::getModelImageSize() const noexcept { return modelImageSize; }
|
|
26
|
+
|
|
27
|
+
std::vector<types::DetectorBBox> Detector::generate(const cv::Mat &inputImage) {
|
|
28
|
+
/*
|
|
29
|
+
Detector as an input accepts tensor with a shape of [1, 3, H, H].
|
|
30
|
+
where H is a constant for model. In our supported models it is currently
|
|
31
|
+
either H=800 or H=1280.
|
|
32
|
+
Due to big influence of resize to quality of recognition the image preserves
|
|
33
|
+
original aspect ratio and the missing parts are filled with padding.
|
|
34
|
+
*/
|
|
35
|
+
auto inputShapes = getAllInputShapes();
|
|
36
|
+
cv::Mat resizedInputImage =
|
|
37
|
+
image_processing::resizePadded(inputImage, getModelImageSize());
|
|
38
|
+
TensorPtr inputTensor = image_processing::getTensorFromMatrix(
|
|
39
|
+
inputShapes[0], resizedInputImage, constants::kNormalizationMean,
|
|
40
|
+
constants::kNormalizationVariance);
|
|
41
|
+
auto forwardResult = BaseModel::forward(inputTensor);
|
|
42
|
+
if (!forwardResult.ok()) {
|
|
43
|
+
throw std::runtime_error(
|
|
44
|
+
"Failed to forward, error: " +
|
|
45
|
+
std::to_string(static_cast<uint32_t>(forwardResult.error())));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return postprocess(forwardResult->at(0).toTensor());
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
std::vector<types::DetectorBBox>
|
|
52
|
+
Detector::postprocess(const Tensor &tensor) const {
|
|
53
|
+
/*
|
|
54
|
+
The output of the model consists of two matrices (heat maps):
|
|
55
|
+
1. ScoreText(Score map) - The probability of a region containing character.
|
|
56
|
+
2. ScoreAffinity(Affinity map) - affinity between characters, used to to
|
|
57
|
+
group each character into a single instance (sequence) Both matrices are
|
|
58
|
+
H/2xW/2 (400x400 or 640x640).
|
|
59
|
+
*/
|
|
60
|
+
std::span<const float> tensorData(tensor.const_data_ptr<float>(),
|
|
61
|
+
tensor.numel());
|
|
62
|
+
/*
|
|
63
|
+
The output of the model is a matrix half the size of the input image
|
|
64
|
+
containing two channels representing the heatmaps.
|
|
65
|
+
*/
|
|
66
|
+
auto [scoreTextMat, scoreAffinityMat] = utils::interleavedArrayToMats(
|
|
67
|
+
tensorData,
|
|
68
|
+
cv::Size(modelImageSize.width / 2, modelImageSize.height / 2));
|
|
69
|
+
|
|
70
|
+
/*
|
|
71
|
+
Heatmaps are then converted into list of bounding boxes.
|
|
72
|
+
*/
|
|
73
|
+
std::vector<types::DetectorBBox> bBoxesList = utils::getDetBoxesFromTextMap(
|
|
74
|
+
scoreTextMat, scoreAffinityMat, constants::kTextThreshold,
|
|
75
|
+
constants::kLinkThreshold, constants::kLowTextThreshold);
|
|
76
|
+
|
|
77
|
+
/*
|
|
78
|
+
Bounding boxes are at first corresponding to the 400x400 size or 640x640.
|
|
79
|
+
RecognitionHandler in the later part of processing works on images of size
|
|
80
|
+
1280x1280. To match this difference we has to scale by the proper factor
|
|
81
|
+
(3.2 or 2.0).
|
|
82
|
+
*/
|
|
83
|
+
const float restoreRatio = utils::calculateRestoreRatio(
|
|
84
|
+
scoreTextMat.rows, constants::kRecognizerImageSize);
|
|
85
|
+
utils::restoreBboxRatio(bBoxesList, restoreRatio);
|
|
86
|
+
/*
|
|
87
|
+
Since every bounding box is processed separately by Recognition models, we'd
|
|
88
|
+
like to reduce the number of boxes. Also, grouping nearby boxes means we
|
|
89
|
+
process many words / full line at once. It is not only faster but also easier
|
|
90
|
+
for Recognizer models than recognition of single characters.
|
|
91
|
+
*/
|
|
92
|
+
bBoxesList = utils::groupTextBoxes(
|
|
93
|
+
bBoxesList, constants::kCenterThreshold, constants::kDistanceThreshold,
|
|
94
|
+
constants::kHeightThreshold, constants::kMinSideThreshold,
|
|
95
|
+
constants::kMaxSideThreshold, constants::kMaxWidth);
|
|
96
|
+
|
|
97
|
+
return bBoxesList;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
} // namespace rnexecutorch::models::ocr
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <executorch/extension/tensor/tensor_ptr.h>
|
|
4
|
+
#include <opencv2/opencv.hpp>
|
|
5
|
+
#include <rnexecutorch/models/BaseModel.h>
|
|
6
|
+
#include <rnexecutorch/models/ocr/Types.h>
|
|
7
|
+
|
|
8
|
+
namespace rnexecutorch::models::ocr {
|
|
9
|
+
/*
|
|
10
|
+
Detector is a model responsible for recognizing the areas where text is
|
|
11
|
+
located. It returns the list of bounding boxes. The model used as detector is
|
|
12
|
+
based on CRAFT (Character Region Awareness for Text Detection) paper.
|
|
13
|
+
https://arxiv.org/pdf/1904.01941
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
using executorch::aten::Tensor;
|
|
17
|
+
using executorch::extension::TensorPtr;
|
|
18
|
+
|
|
19
|
+
class Detector final : public BaseModel {
|
|
20
|
+
public:
|
|
21
|
+
explicit Detector(const std::string &modelSource,
|
|
22
|
+
std::shared_ptr<react::CallInvoker> callInvoker);
|
|
23
|
+
std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage);
|
|
24
|
+
cv::Size getModelImageSize() const noexcept;
|
|
25
|
+
|
|
26
|
+
private:
|
|
27
|
+
std::vector<types::DetectorBBox> postprocess(const Tensor &tensor) const;
|
|
28
|
+
cv::Size modelImageSize;
|
|
29
|
+
};
|
|
30
|
+
} // namespace rnexecutorch::models::ocr
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#include "OCR.h"
|
|
2
|
+
#include <rnexecutorch/data_processing/ImageProcessing.h>
|
|
3
|
+
#include <rnexecutorch/models/ocr/Constants.h>
|
|
4
|
+
|
|
5
|
+
namespace rnexecutorch::models::ocr {
|
|
6
|
+
OCR::OCR(const std::string &detectorSource,
|
|
7
|
+
const std::string &recognizerSourceLarge,
|
|
8
|
+
const std::string &recognizerSourceMedium,
|
|
9
|
+
const std::string &recognizerSourceSmall, std::string symbols,
|
|
10
|
+
std::shared_ptr<react::CallInvoker> callInvoker)
|
|
11
|
+
: detector(detectorSource, callInvoker),
|
|
12
|
+
recognitionHandler(recognizerSourceLarge, recognizerSourceMedium,
|
|
13
|
+
recognizerSourceSmall, symbols, callInvoker) {}
|
|
14
|
+
|
|
15
|
+
std::vector<types::OCRDetection> OCR::generate(std::string input) {
|
|
16
|
+
cv::Mat image = image_processing::readImage(input);
|
|
17
|
+
if (image.empty()) {
|
|
18
|
+
throw std::runtime_error("Failed to load image from path: " + input);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/*
|
|
22
|
+
1. Detection process returns the list of bounding boxes containing areas
|
|
23
|
+
with text. They are corresponding to the image of size 1280x1280, which
|
|
24
|
+
is a size later used by Recognition Handler.
|
|
25
|
+
*/
|
|
26
|
+
std::vector<types::DetectorBBox> bboxesList = detector.generate(image);
|
|
27
|
+
cv::cvtColor(image, image, cv::COLOR_BGR2GRAY);
|
|
28
|
+
|
|
29
|
+
/*
|
|
30
|
+
Recognition Handler is responsible for deciding which Recognition model to
|
|
31
|
+
use for each box. It returns the list of tuples; each consisting of:
|
|
32
|
+
- recognized text
|
|
33
|
+
- coordinates of bounding box corresponding to the original image size
|
|
34
|
+
- confidence score
|
|
35
|
+
*/
|
|
36
|
+
std::vector<types::OCRDetection> result =
|
|
37
|
+
recognitionHandler.recognize(bboxesList, image,
|
|
38
|
+
cv::Size(constants::kRecognizerImageSize,
|
|
39
|
+
constants::kRecognizerImageSize));
|
|
40
|
+
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
std::size_t OCR::getMemoryLowerBound() const noexcept {
|
|
45
|
+
return detector.getMemoryLowerBound() +
|
|
46
|
+
recognitionHandler.getMemoryLowerBound();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
void OCR::unload() noexcept {
|
|
50
|
+
detector.unload();
|
|
51
|
+
recognitionHandler.unload();
|
|
52
|
+
}
|
|
53
|
+
} // namespace rnexecutorch::models::ocr
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <string>
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
6
|
+
#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
|
|
7
|
+
#include <rnexecutorch/models/ocr/Detector.h>
|
|
8
|
+
#include <rnexecutorch/models/ocr/RecognitionHandler.h>
|
|
9
|
+
#include <rnexecutorch/models/ocr/Types.h>
|
|
10
|
+
|
|
11
|
+
namespace rnexecutorch {
|
|
12
|
+
namespace models::ocr {
|
|
13
|
+
/*
|
|
14
|
+
The OCR consists of two phases:
|
|
15
|
+
1. Detection - detecting text regions in the image, the result of this phase
|
|
16
|
+
is a list of bounding boxes.
|
|
17
|
+
2. Recognition - recognizing the text in the bounding boxes, the result is a
|
|
18
|
+
list of strings and corresponding boxes & confidence scores.
|
|
19
|
+
|
|
20
|
+
Recognition uses three models, each model is resposible for recognizing text
|
|
21
|
+
of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
class OCR final {
|
|
25
|
+
public:
|
|
26
|
+
explicit OCR(const std::string &detectorSource,
|
|
27
|
+
const std::string &recognizerSourceLarge,
|
|
28
|
+
const std::string &recognizerSourceMedium,
|
|
29
|
+
const std::string &recognizerSourceSmall, std::string symbols,
|
|
30
|
+
std::shared_ptr<react::CallInvoker> callInvoker);
|
|
31
|
+
std::vector<types::OCRDetection> generate(std::string input);
|
|
32
|
+
std::size_t getMemoryLowerBound() const noexcept;
|
|
33
|
+
void unload() noexcept;
|
|
34
|
+
|
|
35
|
+
private:
|
|
36
|
+
Detector detector;
|
|
37
|
+
RecognitionHandler recognitionHandler;
|
|
38
|
+
};
|
|
39
|
+
} // namespace models::ocr
|
|
40
|
+
|
|
41
|
+
REGISTER_CONSTRUCTOR(models::ocr::OCR, std::string, std::string, std::string,
|
|
42
|
+
std::string, std::string,
|
|
43
|
+
std::shared_ptr<react::CallInvoker>);
|
|
44
|
+
} // namespace rnexecutorch
|