react-native-executorch 0.4.10 → 0.5.0-nightly-6f4dd53-20251211
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -71
- package/android/CMakeLists.txt +29 -0
- package/android/build.gradle +76 -13
- package/android/libs/classes.jar +0 -0
- package/android/src/main/cpp/CMakeLists.txt +109 -0
- package/android/src/main/cpp/ETInstallerModule.cpp +76 -0
- package/android/src/main/cpp/ETInstallerModule.h +43 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/ETInstaller.kt +66 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +6 -124
- package/common/ada/ada.cpp +17406 -0
- package/common/ada/ada.h +10274 -0
- package/common/pfft/pfft.c +2205 -0
- package/common/pfft/pfft.h +185 -0
- package/common/rnexecutorch/Log.h +489 -0
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +113 -0
- package/common/rnexecutorch/RnExecutorchInstaller.h +87 -0
- package/common/rnexecutorch/TokenizerModule.cpp +52 -0
- package/common/rnexecutorch/TokenizerModule.h +29 -0
- package/common/rnexecutorch/data_processing/FFT.cpp +21 -0
- package/common/rnexecutorch/data_processing/FFT.h +23 -0
- package/common/rnexecutorch/data_processing/FileUtils.h +30 -0
- package/common/rnexecutorch/data_processing/ImageProcessing.cpp +240 -0
- package/common/rnexecutorch/data_processing/ImageProcessing.h +55 -0
- package/common/rnexecutorch/data_processing/Numerical.cpp +111 -0
- package/common/rnexecutorch/data_processing/Numerical.h +77 -0
- package/common/rnexecutorch/data_processing/base64.cpp +110 -0
- package/common/rnexecutorch/data_processing/base64.h +46 -0
- package/common/rnexecutorch/data_processing/dsp.cpp +19 -0
- package/common/rnexecutorch/data_processing/dsp.h +12 -0
- package/common/rnexecutorch/data_processing/gzip.cpp +47 -0
- package/common/rnexecutorch/data_processing/gzip.h +7 -0
- package/common/rnexecutorch/host_objects/JSTensorViewIn.h +12 -0
- package/common/rnexecutorch/host_objects/JSTensorViewOut.h +22 -0
- package/common/rnexecutorch/host_objects/JsiConversions.h +418 -0
- package/common/rnexecutorch/host_objects/ModelHostObject.h +313 -0
- package/common/rnexecutorch/jsi/JsiHostObject.cpp +108 -0
- package/common/rnexecutorch/jsi/JsiHostObject.h +87 -0
- package/common/rnexecutorch/jsi/OwningArrayBuffer.h +57 -0
- package/common/rnexecutorch/jsi/Promise.cpp +20 -0
- package/common/rnexecutorch/jsi/Promise.h +69 -0
- package/common/rnexecutorch/jsi/RuntimeAwareCache.h +58 -0
- package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.cpp +53 -0
- package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.h +35 -0
- package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +133 -0
- package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +50 -0
- package/common/rnexecutorch/metaprogramming/TypeConcepts.h +37 -0
- package/common/rnexecutorch/models/BaseModel.cpp +183 -0
- package/common/rnexecutorch/models/BaseModel.h +61 -0
- package/common/rnexecutorch/models/classification/Classification.cpp +72 -0
- package/common/rnexecutorch/models/classification/Classification.h +31 -0
- package/{ios/RnExecutorch/models/classification/Constants.mm → common/rnexecutorch/models/classification/Constants.h} +7 -2
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +19 -0
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +17 -0
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +45 -0
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +28 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +60 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +31 -0
- package/{ios/RnExecutorch/models/image_segmentation/Constants.mm → common/rnexecutorch/models/image_segmentation/Constants.h} +7 -2
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +168 -0
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +50 -0
- package/common/rnexecutorch/models/llm/LLM.cpp +98 -0
- package/common/rnexecutorch/models/llm/LLM.h +38 -0
- package/{ios/RnExecutorch/utils/Constants.mm → common/rnexecutorch/models/object_detection/Constants.h} +9 -2
- package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +82 -0
- package/common/rnexecutorch/models/object_detection/ObjectDetection.h +37 -0
- package/common/rnexecutorch/models/object_detection/Types.h +13 -0
- package/{ios/RnExecutorch/utils/ObjectDetectionUtils.mm → common/rnexecutorch/models/object_detection/Utils.cpp} +17 -35
- package/common/rnexecutorch/models/object_detection/Utils.h +11 -0
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +88 -0
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +29 -0
- package/common/rnexecutorch/models/ocr/Constants.h +41 -0
- package/common/rnexecutorch/models/ocr/Detector.cpp +100 -0
- package/common/rnexecutorch/models/ocr/Detector.h +30 -0
- package/common/rnexecutorch/models/ocr/OCR.cpp +53 -0
- package/common/rnexecutorch/models/ocr/OCR.h +44 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +108 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandler.h +42 -0
- package/common/rnexecutorch/models/ocr/Recognizer.cpp +80 -0
- package/common/rnexecutorch/models/ocr/Recognizer.h +36 -0
- package/common/rnexecutorch/models/ocr/Types.h +35 -0
- package/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp +710 -0
- package/common/rnexecutorch/models/ocr/utils/DetectorUtils.h +81 -0
- package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.cpp +159 -0
- package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h +72 -0
- package/common/rnexecutorch/models/ocr/utils/RecognizerUtils.cpp +204 -0
- package/common/rnexecutorch/models/ocr/utils/RecognizerUtils.h +71 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +125 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +57 -0
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +303 -0
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.h +61 -0
- package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp +82 -0
- package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h +25 -0
- package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp +99 -0
- package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h +33 -0
- package/common/rnexecutorch/models/speech_to_text/types/DecodingOptions.h +15 -0
- package/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h +12 -0
- package/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h +12 -0
- package/common/rnexecutorch/models/speech_to_text/types/Segment.h +14 -0
- package/common/rnexecutorch/models/speech_to_text/types/Word.h +13 -0
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +55 -0
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +35 -0
- package/common/rnexecutorch/models/text_to_image/Constants.h +9 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.cpp +32 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.h +24 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.cpp +44 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.h +32 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.cpp +152 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.h +41 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +141 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.h +64 -0
- package/common/rnexecutorch/models/text_to_image/UNet.cpp +38 -0
- package/common/rnexecutorch/models/text_to_image/UNet.h +28 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +93 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +49 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +180 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +85 -0
- package/common/rnexecutorch/models/voice_activity_detection/Constants.h +27 -0
- package/common/rnexecutorch/models/voice_activity_detection/Types.h +12 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.cpp +15 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.h +13 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +160 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +36 -0
- package/common/rnexecutorch/tests/CMakeLists.txt +30 -0
- package/common/rnexecutorch/tests/LogTest.cpp +530 -0
- package/common/rnexecutorch/tests/NumericalTest.cpp +110 -0
- package/common/rnexecutorch/tests/README.md +37 -0
- package/common/rnexecutorch/threads/GlobalThreadPool.h +84 -0
- package/common/rnexecutorch/threads/HighPerformanceThreadPool.h +364 -0
- package/common/rnexecutorch/threads/utils/ThreadUtils.h +29 -0
- package/common/runner/arange_util.cpp +44 -0
- package/common/runner/arange_util.h +37 -0
- package/common/runner/constants.h +28 -0
- package/common/runner/io_manager.h +240 -0
- package/common/runner/irunner.h +119 -0
- package/common/runner/kernel_includes.h +23 -0
- package/common/runner/runner.cpp +369 -0
- package/common/runner/runner.h +85 -0
- package/common/runner/sampler.cpp +201 -0
- package/common/runner/sampler.h +67 -0
- package/common/runner/stats.h +161 -0
- package/common/runner/text_decoder_runner.cpp +79 -0
- package/common/runner/text_decoder_runner.h +126 -0
- package/common/runner/text_prefiller.cpp +125 -0
- package/common/runner/text_prefiller.h +85 -0
- package/common/runner/text_token_generator.h +205 -0
- package/common/runner/util.h +153 -0
- package/ios/RnExecutorch/ETInstaller.h +8 -0
- package/ios/RnExecutorch/ETInstaller.mm +56 -0
- package/ios/RnExecutorch.xcodeproj/project.pbxproj +73 -23
- package/lib/module/Error.js +3 -0
- package/lib/module/Error.js.map +1 -1
- package/lib/module/common/Logger.js +23 -0
- package/lib/module/common/Logger.js.map +1 -0
- package/lib/module/constants/directories.js +1 -1
- package/lib/module/constants/directories.js.map +1 -1
- package/lib/module/constants/llmDefaults.js +8 -0
- package/lib/module/constants/llmDefaults.js.map +1 -1
- package/lib/module/constants/modelUrls.js +356 -84
- package/lib/module/constants/modelUrls.js.map +1 -1
- package/lib/module/constants/ocr/models.js +181 -286
- package/lib/module/constants/ocr/models.js.map +1 -1
- package/lib/module/constants/ocr/symbols.js +66 -65
- package/lib/module/constants/ocr/symbols.js.map +1 -1
- package/lib/module/controllers/LLMController.js +53 -23
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/controllers/OCRController.js +17 -10
- package/lib/module/controllers/OCRController.js.map +1 -1
- package/lib/module/controllers/VerticalOCRController.js +17 -10
- package/lib/module/controllers/VerticalOCRController.js.map +1 -1
- package/lib/module/hooks/computer_vision/useClassification.js +4 -4
- package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js +13 -0
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -0
- package/lib/module/hooks/computer_vision/useImageSegmentation.js +2 -2
- package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
- package/lib/module/hooks/computer_vision/useOCR.js +16 -17
- package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
- package/lib/module/hooks/computer_vision/useObjectDetection.js +3 -3
- package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
- package/lib/module/hooks/computer_vision/useStyleTransfer.js +3 -3
- package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
- package/lib/module/hooks/computer_vision/useTextToImage.js +57 -0
- package/lib/module/hooks/computer_vision/useTextToImage.js.map +1 -0
- package/lib/module/hooks/computer_vision/useVerticalOCR.js +18 -19
- package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
- package/lib/module/hooks/general/useExecutorchModule.js +1 -1
- package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +30 -29
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js +72 -33
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +2 -3
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTokenizer.js +19 -18
- package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useVAD.js +13 -0
- package/lib/module/hooks/natural_language_processing/useVAD.js.map +1 -0
- package/lib/module/hooks/useModule.js +13 -9
- package/lib/module/hooks/useModule.js.map +1 -1
- package/lib/module/index.js +21 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/modules/BaseModule.js +9 -14
- package/lib/module/modules/BaseModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ClassificationModule.js +11 -6
- package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +19 -0
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -0
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js +20 -18
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/OCRModule.js +13 -10
- package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js +11 -6
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
- package/lib/module/modules/computer_vision/StyleTransferModule.js +11 -6
- package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
- package/lib/module/modules/computer_vision/TextToImageModule.js +48 -0
- package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -0
- package/lib/module/modules/computer_vision/VerticalOCRModule.js +15 -10
- package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
- package/lib/module/modules/general/ExecutorchModule.js +8 -34
- package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/LLMModule.js +25 -24
- package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +109 -27
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +13 -6
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/TokenizerModule.js +20 -14
- package/lib/module/modules/natural_language_processing/TokenizerModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/VADModule.js +19 -0
- package/lib/module/modules/natural_language_processing/VADModule.js.map +1 -0
- package/lib/module/native/NativeETInstaller.js +5 -0
- package/lib/module/native/NativeETInstaller.js.map +1 -0
- package/lib/module/native/RnExecutorchModules.js +2 -12
- package/lib/module/native/RnExecutorchModules.js.map +1 -1
- package/lib/module/types/common.js +25 -8
- package/lib/module/types/common.js.map +1 -1
- package/lib/module/types/llm.js.map +1 -1
- package/lib/module/types/stt.js +1 -79
- package/lib/module/types/stt.js.map +1 -1
- package/lib/module/types/vad.js +2 -0
- package/lib/module/types/vad.js.map +1 -0
- package/lib/module/utils/ResourceFetcher.js +275 -114
- package/lib/module/utils/ResourceFetcher.js.map +1 -1
- package/lib/module/utils/ResourceFetcherUtils.js +155 -0
- package/lib/module/utils/ResourceFetcherUtils.js.map +1 -0
- package/lib/module/utils/llm.js +41 -1
- package/lib/module/utils/llm.js.map +1 -1
- package/lib/typescript/Error.d.ts +3 -0
- package/lib/typescript/Error.d.ts.map +1 -1
- package/lib/typescript/common/Logger.d.ts +9 -0
- package/lib/typescript/common/Logger.d.ts.map +1 -0
- package/lib/typescript/constants/llmDefaults.d.ts +1 -0
- package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
- package/lib/typescript/constants/modelUrls.d.ts +263 -79
- package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/models.d.ts +882 -284
- package/lib/typescript/constants/ocr/models.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts.map +1 -1
- package/lib/typescript/controllers/LLMController.d.ts +8 -7
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/controllers/OCRController.d.ts +5 -6
- package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
- package/lib/typescript/controllers/VerticalOCRController.d.ts +5 -6
- package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useClassification.d.ts +8 -6
- package/lib/typescript/hooks/computer_vision/useClassification.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts +16 -0
- package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts.map +1 -0
- package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts +5 -5
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts +22 -0
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts.map +1 -0
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +4 -6
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/general/useExecutorchModule.d.ts +1 -1
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts +6 -4
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +15 -22
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts +9 -5
- package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts +6 -4
- package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts +16 -0
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts.map +1 -0
- package/lib/typescript/hooks/useModule.d.ts +8 -5
- package/lib/typescript/hooks/useModule.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +26 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/modules/BaseModule.d.ts +7 -6
- package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +4 -4
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +9 -0
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +7 -27
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts +8 -7
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +5 -3
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +4 -3
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts +16 -0
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +7 -8
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/general/ExecutorchModule.d.ts +4 -7
- package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +19 -17
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +17 -13
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +5 -3
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts +10 -9
- package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts +10 -0
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts.map +1 -0
- package/lib/typescript/native/{NativeStyleTransfer.d.ts → NativeETInstaller.d.ts} +2 -3
- package/lib/typescript/native/NativeETInstaller.d.ts.map +1 -0
- package/lib/typescript/native/RnExecutorchModules.d.ts +3 -23
- package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
- package/lib/typescript/types/common.d.ts +30 -2
- package/lib/typescript/types/common.d.ts.map +1 -1
- package/lib/typescript/types/llm.d.ts +9 -1
- package/lib/typescript/types/llm.d.ts.map +1 -1
- package/lib/typescript/types/stt.d.ts +9 -88
- package/lib/typescript/types/stt.d.ts.map +1 -1
- package/lib/typescript/types/vad.d.ts +5 -0
- package/lib/typescript/types/vad.d.ts.map +1 -0
- package/lib/typescript/utils/ResourceFetcher.d.ts +47 -10
- package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts +55 -0
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -0
- package/lib/typescript/utils/llm.d.ts +4 -0
- package/lib/typescript/utils/llm.d.ts.map +1 -1
- package/package.json +32 -68
- package/react-native-executorch.podspec +62 -4
- package/src/Error.ts +3 -0
- package/src/common/Logger.ts +25 -0
- package/src/constants/directories.ts +1 -1
- package/src/constants/llmDefaults.ts +11 -0
- package/src/constants/modelUrls.ts +433 -168
- package/src/constants/ocr/models.ts +826 -395
- package/src/constants/ocr/symbols.ts +66 -65
- package/src/controllers/LLMController.ts +76 -36
- package/src/controllers/OCRController.ts +25 -16
- package/src/controllers/VerticalOCRController.ts +25 -15
- package/src/hooks/computer_vision/useClassification.ts +8 -9
- package/src/hooks/computer_vision/useImageEmbeddings.ts +15 -0
- package/src/hooks/computer_vision/useImageSegmentation.ts +3 -6
- package/src/hooks/computer_vision/useOCR.ts +32 -25
- package/src/hooks/computer_vision/useObjectDetection.ts +4 -7
- package/src/hooks/computer_vision/useStyleTransfer.ts +4 -4
- package/src/hooks/computer_vision/useTextToImage.ts +92 -0
- package/src/hooks/computer_vision/useVerticalOCR.ts +33 -31
- package/src/hooks/general/useExecutorchModule.ts +1 -1
- package/src/hooks/natural_language_processing/useLLM.ts +54 -31
- package/src/hooks/natural_language_processing/useSpeechToText.ts +96 -88
- package/src/hooks/natural_language_processing/useTextEmbeddings.ts +9 -9
- package/src/hooks/natural_language_processing/useTokenizer.ts +21 -21
- package/src/hooks/natural_language_processing/useVAD.ts +15 -0
- package/src/hooks/useModule.ts +23 -13
- package/src/index.ts +126 -0
- package/src/modules/BaseModule.ts +17 -22
- package/src/modules/computer_vision/ClassificationModule.ts +18 -9
- package/src/modules/computer_vision/ImageEmbeddingsModule.ts +26 -0
- package/src/modules/computer_vision/ImageSegmentationModule.ts +34 -26
- package/src/modules/computer_vision/OCRModule.ts +23 -15
- package/src/modules/computer_vision/ObjectDetectionModule.ts +22 -9
- package/src/modules/computer_vision/StyleTransferModule.ts +18 -9
- package/src/modules/computer_vision/TextToImageModule.ts +93 -0
- package/src/modules/computer_vision/VerticalOCRModule.ts +25 -21
- package/src/modules/general/ExecutorchModule.ts +16 -46
- package/src/modules/natural_language_processing/LLMModule.ts +41 -32
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +164 -66
- package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +25 -10
- package/src/modules/natural_language_processing/TokenizerModule.ts +27 -17
- package/src/modules/natural_language_processing/VADModule.ts +27 -0
- package/src/native/NativeETInstaller.ts +8 -0
- package/src/native/RnExecutorchModules.ts +4 -50
- package/src/types/common.ts +40 -12
- package/src/types/llm.ts +10 -0
- package/src/types/stt.ts +87 -90
- package/src/types/vad.ts +4 -0
- package/src/utils/ResourceFetcher.ts +342 -120
- package/src/utils/ResourceFetcherUtils.ts +184 -0
- package/src/utils/llm.ts +65 -1
- package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_core.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_features2d.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_highgui.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_imgproc.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_photo.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_video.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_core.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_features2d.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_highgui.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_imgproc.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_photo.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_video.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_hal.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_thread.a +0 -0
- package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a +0 -0
- package/{ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib → third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a} +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a +0 -0
- package/third-party/include/c10/macros/Export.h +1 -0
- package/third-party/include/c10/macros/Macros.h +1 -0
- package/third-party/include/c10/util/BFloat16-inl.h +1 -0
- package/third-party/include/c10/util/BFloat16-math.h +266 -0
- package/third-party/include/c10/util/BFloat16.h +1 -0
- package/third-party/include/c10/util/Half-inl.h +1 -0
- package/third-party/include/c10/util/Half.h +8 -0
- package/third-party/include/c10/util/TypeSafeSignMath.h +1 -0
- package/third-party/include/c10/util/bit_cast.h +1 -0
- package/third-party/include/c10/util/complex.h +72 -0
- package/third-party/include/c10/util/complex_math.h +399 -0
- package/third-party/include/c10/util/complex_utils.h +41 -0
- package/third-party/include/c10/util/floating_point_utils.h +1 -0
- package/third-party/include/c10/util/irange.h +107 -0
- package/third-party/include/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/c10/util/overflows.h +95 -0
- package/third-party/include/c10/util/safe_numerics.h +97 -0
- package/third-party/include/cpuinfo/cpuinfo.h +2305 -0
- package/third-party/include/executorch/ExecuTorch.h +13 -0
- package/third-party/include/executorch/ExecuTorchError.h +90 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLM.h +12 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMConfig.h +56 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMError.h +16 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMMultimodalRunner.h +227 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMTextRunner.h +97 -0
- package/third-party/include/executorch/ExecuTorchLLM/module.modulemap +4 -0
- package/third-party/include/executorch/ExecuTorchLog.h +77 -0
- package/third-party/include/executorch/ExecuTorchModule.h +563 -0
- package/third-party/include/executorch/ExecuTorchTensor.h +1421 -0
- package/third-party/include/executorch/ExecuTorchValue.h +265 -0
- package/third-party/include/executorch/extension/module/bundled_module.h +131 -0
- package/third-party/include/executorch/extension/module/module.h +649 -0
- package/third-party/include/executorch/extension/tensor/tensor.h +14 -0
- package/third-party/include/executorch/extension/tensor/tensor_accessor.h +190 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr.h +409 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
- package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +24 -0
- package/third-party/include/executorch/extension/threadpool/threadpool.h +95 -0
- package/third-party/include/executorch/runtime/backend/backend_execution_context.h +71 -0
- package/third-party/include/executorch/runtime/backend/backend_init_context.h +84 -0
- package/third-party/include/executorch/runtime/backend/backend_option_context.h +34 -0
- package/third-party/include/executorch/runtime/backend/interface.h +227 -0
- package/third-party/include/executorch/runtime/backend/options.h +206 -0
- package/third-party/include/executorch/runtime/core/array_ref.h +235 -0
- package/third-party/include/executorch/runtime/core/data_loader.h +136 -0
- package/third-party/include/executorch/runtime/core/defines.h +20 -0
- package/third-party/include/executorch/runtime/core/error.h +256 -0
- package/third-party/include/executorch/runtime/core/evalue.h +515 -0
- package/third-party/include/executorch/runtime/core/event_tracer.h +580 -0
- package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +332 -0
- package/third-party/include/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
- package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +170 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +264 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1313 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +1251 -0
- package/third-party/include/executorch/runtime/core/freeable_buffer.h +107 -0
- package/third-party/include/executorch/runtime/core/function_ref.h +100 -0
- package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +107 -0
- package/third-party/include/executorch/runtime/core/memory_allocator.h +208 -0
- package/third-party/include/executorch/runtime/core/named_data_map.h +76 -0
- package/third-party/include/executorch/runtime/core/portable_type/bfloat16.h +27 -0
- package/third-party/include/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
- package/third-party/include/executorch/runtime/core/portable_type/bits_types.h +83 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +8 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +72 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_math.h +399 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_utils.h +41 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +1 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/overflows.h +95 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/safe_numerics.h +97 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +154 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/include/executorch/runtime/core/portable_type/complex.h +21 -0
- package/third-party/include/executorch/runtime/core/portable_type/device.h +70 -0
- package/third-party/include/executorch/runtime/core/portable_type/half.h +27 -0
- package/third-party/include/executorch/runtime/core/portable_type/optional.h +36 -0
- package/third-party/include/executorch/runtime/core/portable_type/qint_types.h +83 -0
- package/third-party/include/executorch/runtime/core/portable_type/scalar.h +110 -0
- package/third-party/include/executorch/runtime/core/portable_type/scalar_type.h +154 -0
- package/third-party/include/executorch/runtime/core/portable_type/string_view.h +29 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor.h +142 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +281 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor_options.h +60 -0
- package/third-party/include/executorch/runtime/core/result.h +258 -0
- package/third-party/include/executorch/runtime/core/span.h +97 -0
- package/third-party/include/executorch/runtime/core/tag.h +90 -0
- package/third-party/include/executorch/runtime/core/tensor_layout.h +79 -0
- package/third-party/include/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
- package/third-party/include/executorch/runtime/executor/memory_manager.h +113 -0
- package/third-party/include/executorch/runtime/executor/merged_data_map.h +142 -0
- package/third-party/include/executorch/runtime/executor/method.h +412 -0
- package/third-party/include/executorch/runtime/executor/method_meta.h +298 -0
- package/third-party/include/executorch/runtime/executor/program.h +309 -0
- package/third-party/include/executorch/runtime/executor/pte_data_map.h +145 -0
- package/third-party/include/executorch/runtime/executor/tensor_parser.h +157 -0
- package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +280 -0
- package/third-party/include/executorch/runtime/platform/abort.h +36 -0
- package/third-party/include/executorch/runtime/platform/assert.h +119 -0
- package/third-party/include/executorch/runtime/platform/clock.h +43 -0
- package/third-party/include/executorch/runtime/platform/compat_unistd.h +75 -0
- package/third-party/include/executorch/runtime/platform/compiler.h +201 -0
- package/third-party/include/executorch/runtime/platform/log.h +177 -0
- package/third-party/include/executorch/runtime/platform/platform.h +259 -0
- package/third-party/include/executorch/runtime/platform/profiler.h +292 -0
- package/third-party/include/executorch/runtime/platform/runtime.h +35 -0
- package/third-party/include/executorch/runtime/platform/system.h +49 -0
- package/third-party/include/executorch/runtime/platform/types.h +24 -0
- package/third-party/include/executorch/schema/extended_header.h +85 -0
- package/third-party/include/headeronly/macros/Export.h +88 -0
- package/third-party/include/opencv2/core/affine.hpp +676 -0
- package/third-party/include/opencv2/core/async.hpp +107 -0
- package/third-party/include/opencv2/core/base.hpp +735 -0
- package/third-party/include/opencv2/core/bindings_utils.hpp +279 -0
- package/third-party/include/opencv2/core/bufferpool.hpp +39 -0
- package/third-party/include/opencv2/core/check.hpp +231 -0
- package/third-party/include/opencv2/core/core.hpp +55 -0
- package/third-party/include/opencv2/core/core_c.h +3261 -0
- package/third-party/include/opencv2/core/cv_cpu_dispatch.h +404 -0
- package/third-party/include/opencv2/core/cv_cpu_helper.h +856 -0
- package/third-party/include/opencv2/core/cvdef.h +1003 -0
- package/third-party/include/opencv2/core/cvstd.hpp +196 -0
- package/third-party/include/opencv2/core/cvstd.inl.hpp +188 -0
- package/third-party/include/opencv2/core/cvstd_wrapper.hpp +187 -0
- package/third-party/include/opencv2/core/detail/async_promise.hpp +73 -0
- package/third-party/include/opencv2/core/detail/dispatch_helper.impl.hpp +48 -0
- package/third-party/include/opencv2/core/detail/exception_ptr.hpp +24 -0
- package/third-party/include/opencv2/core/dualquaternion.hpp +1054 -0
- package/third-party/include/opencv2/core/dualquaternion.inl.hpp +464 -0
- package/third-party/include/opencv2/core/eigen.hpp +405 -0
- package/third-party/include/opencv2/core/fast_math.hpp +433 -0
- package/third-party/include/opencv2/core/hal/hal.hpp +451 -0
- package/third-party/include/opencv2/core/hal/interface.h +191 -0
- package/third-party/include/opencv2/core/hal/intrin.hpp +1222 -0
- package/third-party/include/opencv2/core/hal/intrin_avx.hpp +3378 -0
- package/third-party/include/opencv2/core/hal/intrin_avx512.hpp +3688 -0
- package/third-party/include/opencv2/core/hal/intrin_cpp.hpp +3446 -0
- package/third-party/include/opencv2/core/hal/intrin_forward.hpp +195 -0
- package/third-party/include/opencv2/core/hal/intrin_lasx.hpp +3243 -0
- package/third-party/include/opencv2/core/hal/intrin_lsx.hpp +2671 -0
- package/third-party/include/opencv2/core/hal/intrin_math.hpp +772 -0
- package/third-party/include/opencv2/core/hal/intrin_msa.hpp +1973 -0
- package/third-party/include/opencv2/core/hal/intrin_neon.hpp +2710 -0
- package/third-party/include/opencv2/core/hal/intrin_rvv071.hpp +3452 -0
- package/third-party/include/opencv2/core/hal/intrin_rvv_scalable.hpp +2559 -0
- package/third-party/include/opencv2/core/hal/intrin_sse.hpp +3528 -0
- package/third-party/include/opencv2/core/hal/intrin_sse_em.hpp +175 -0
- package/third-party/include/opencv2/core/hal/intrin_vsx.hpp +1756 -0
- package/third-party/include/opencv2/core/hal/intrin_wasm.hpp +2911 -0
- package/third-party/include/opencv2/core/hal/msa_macros.h +2079 -0
- package/third-party/include/opencv2/core/hal/simd_utils.impl.hpp +313 -0
- package/third-party/include/opencv2/core/mat.hpp +3842 -0
- package/third-party/include/opencv2/core/mat.inl.hpp +2753 -0
- package/third-party/include/opencv2/core/matx.hpp +603 -0
- package/third-party/include/opencv2/core/matx.inl.hpp +1132 -0
- package/third-party/include/opencv2/core/neon_utils.hpp +127 -0
- package/third-party/include/opencv2/core/operations.hpp +610 -0
- package/third-party/include/opencv2/core/optim.hpp +362 -0
- package/third-party/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp +66 -0
- package/third-party/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp +148 -0
- package/third-party/include/opencv2/core/parallel/parallel_backend.hpp +108 -0
- package/third-party/include/opencv2/core/persistence.hpp +1321 -0
- package/third-party/include/opencv2/core/quaternion.hpp +1889 -0
- package/third-party/include/opencv2/core/quaternion.inl.hpp +907 -0
- package/third-party/include/opencv2/core/saturate.hpp +347 -0
- package/third-party/include/opencv2/core/simd_intrinsics.hpp +90 -0
- package/third-party/include/opencv2/core/softfloat.hpp +657 -0
- package/third-party/include/opencv2/core/sse_utils.hpp +861 -0
- package/third-party/include/opencv2/core/traits.hpp +417 -0
- package/third-party/include/opencv2/core/types.hpp +2368 -0
- package/third-party/include/opencv2/core/types_c.h +2064 -0
- package/third-party/include/opencv2/core/utility.hpp +1296 -0
- package/third-party/include/opencv2/core/utils/allocator_stats.hpp +31 -0
- package/third-party/include/opencv2/core/utils/allocator_stats.impl.hpp +111 -0
- package/third-party/include/opencv2/core/utils/filesystem.hpp +91 -0
- package/third-party/include/opencv2/core/utils/fp_control_utils.hpp +70 -0
- package/third-party/include/opencv2/core/utils/instrumentation.hpp +127 -0
- package/third-party/include/opencv2/core/utils/logger.defines.hpp +50 -0
- package/third-party/include/opencv2/core/utils/logger.hpp +258 -0
- package/third-party/include/opencv2/core/utils/logtag.hpp +27 -0
- package/third-party/include/opencv2/core/utils/tls.hpp +230 -0
- package/third-party/include/opencv2/core/utils/trace.hpp +281 -0
- package/third-party/include/opencv2/core/version.hpp +29 -0
- package/third-party/include/opencv2/core/vsx_utils.hpp +1115 -0
- package/third-party/include/opencv2/core.hpp +3699 -0
- package/third-party/include/opencv2/cvconfig.h +155 -0
- package/third-party/include/opencv2/dnn/dnn.hpp +51 -0
- package/third-party/include/opencv2/dnn.hpp +17 -0
- package/third-party/include/opencv2/features2d/features2d.hpp +55 -0
- package/third-party/include/opencv2/features2d/hal/interface.h +32 -0
- package/third-party/include/opencv2/features2d.hpp +1756 -0
- package/third-party/include/opencv2/highgui/highgui.hpp +113 -0
- package/third-party/include/opencv2/highgui.hpp +17 -0
- package/third-party/include/opencv2/imgproc/bindings.hpp +34 -0
- package/third-party/include/opencv2/imgproc/detail/gcgraph.hpp +355 -0
- package/third-party/include/opencv2/imgproc/detail/legacy.hpp +35 -0
- package/third-party/include/opencv2/imgproc/hal/hal.hpp +246 -0
- package/third-party/include/opencv2/imgproc/hal/interface.h +52 -0
- package/third-party/include/opencv2/imgproc/imgproc.hpp +55 -0
- package/third-party/include/opencv2/imgproc/imgproc_c.h +1261 -0
- package/third-party/include/opencv2/imgproc/segmentation.hpp +168 -0
- package/third-party/include/opencv2/imgproc/types_c.h +632 -0
- package/third-party/include/opencv2/imgproc.hpp +5956 -0
- package/third-party/include/opencv2/opencv.hpp +102 -0
- package/third-party/include/opencv2/opencv_modules.hpp +19 -0
- package/third-party/include/opencv2/photo/legacy/constants_c.h +10 -0
- package/third-party/include/opencv2/photo/photo.hpp +55 -0
- package/third-party/include/opencv2/photo.hpp +975 -0
- package/third-party/include/opencv2/video/background_segm.hpp +341 -0
- package/third-party/include/opencv2/video/detail/tracking.detail.hpp +435 -0
- package/third-party/include/opencv2/video/legacy/constants_c.h +15 -0
- package/third-party/include/opencv2/video/tracking.hpp +1014 -0
- package/third-party/include/opencv2/video/video.hpp +55 -0
- package/third-party/include/opencv2/video.hpp +65 -0
- package/third-party/include/pthreadpool/pthreadpool.h +2236 -0
- package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
- package/third-party/include/tokenizers-cpp/tokenizers_cpp.h +118 -0
- package/third-party/include/torch/headeronly/macros/Export.h +154 -0
- package/third-party/include/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/{ios → third-party/ios}/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/{ios → third-party/ios}/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/libs/cpuinfo/libcpuinfo.a +0 -0
- package/third-party/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a +0 -0
- package/third-party/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
- package/{ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib → third-party/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a} +0 -0
- package/third-party/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a +0 -0
- package/third-party/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
- package/LICENSE +0 -79
- package/android/src/main/java/com/swmansion/rnexecutorch/Classification.kt +0 -64
- package/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt +0 -90
- package/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +0 -58
- package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +0 -63
- package/android/src/main/java/com/swmansion/rnexecutorch/OCR.kt +0 -90
- package/android/src/main/java/com/swmansion/rnexecutorch/ObjectDetection.kt +0 -64
- package/android/src/main/java/com/swmansion/rnexecutorch/SpeechToText.kt +0 -91
- package/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt +0 -54
- package/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt +0 -51
- package/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt +0 -86
- package/android/src/main/java/com/swmansion/rnexecutorch/VerticalOCR.kt +0 -179
- package/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt +0 -54
- package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt +0 -48
- package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt +0 -37
- package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt +0 -46
- package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Constants.kt +0 -1005
- package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt +0 -26
- package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +0 -142
- package/android/src/main/java/com/swmansion/rnexecutorch/models/objectDetection/SSDLiteLargeModel.kt +0 -74
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Detector.kt +0 -82
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/RecognitionHandler.kt +0 -117
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Recognizer.kt +0 -51
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt +0 -89
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/CTCLabelConverter.kt +0 -58
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/Constants.kt +0 -31
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt +0 -608
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt +0 -430
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TDecoder.kt +0 -39
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TModule.kt +0 -43
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Moonshine.kt +0 -16
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineDecoder.kt +0 -23
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineEncoder.kt +0 -20
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Whisper.kt +0 -16
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperDecoder.kt +0 -22
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperEncoder.kt +0 -29
- package/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt +0 -43
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ArrayUtils.kt +0 -87
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ETError.kt +0 -34
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ImageProcessor.kt +0 -237
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt +0 -8
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ObjectDetectionUtils.kt +0 -201
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/STFT.kt +0 -50
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/TensorUtils.kt +0 -103
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/ETModel.h +0 -27
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/ETModel.h +0 -27
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
- package/ios/RnExecutorch/Classification.h +0 -5
- package/ios/RnExecutorch/Classification.mm +0 -54
- package/ios/RnExecutorch/ETModule.h +0 -5
- package/ios/RnExecutorch/ETModule.mm +0 -75
- package/ios/RnExecutorch/ImageSegmentation.h +0 -5
- package/ios/RnExecutorch/ImageSegmentation.mm +0 -60
- package/ios/RnExecutorch/LLM.h +0 -5
- package/ios/RnExecutorch/LLM.mm +0 -78
- package/ios/RnExecutorch/OCR.h +0 -5
- package/ios/RnExecutorch/OCR.mm +0 -96
- package/ios/RnExecutorch/ObjectDetection.h +0 -5
- package/ios/RnExecutorch/ObjectDetection.mm +0 -56
- package/ios/RnExecutorch/SpeechToText.h +0 -5
- package/ios/RnExecutorch/SpeechToText.mm +0 -125
- package/ios/RnExecutorch/StyleTransfer.h +0 -5
- package/ios/RnExecutorch/StyleTransfer.mm +0 -55
- package/ios/RnExecutorch/TextEmbeddings.h +0 -5
- package/ios/RnExecutorch/TextEmbeddings.mm +0 -62
- package/ios/RnExecutorch/Tokenizer.h +0 -5
- package/ios/RnExecutorch/Tokenizer.mm +0 -83
- package/ios/RnExecutorch/VerticalOCR.h +0 -5
- package/ios/RnExecutorch/VerticalOCR.mm +0 -183
- package/ios/RnExecutorch/models/BaseModel.h +0 -21
- package/ios/RnExecutorch/models/BaseModel.mm +0 -43
- package/ios/RnExecutorch/models/classification/ClassificationModel.h +0 -10
- package/ios/RnExecutorch/models/classification/ClassificationModel.mm +0 -53
- package/ios/RnExecutorch/models/classification/Constants.h +0 -3
- package/ios/RnExecutorch/models/image_segmentation/Constants.h +0 -4
- package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +0 -10
- package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +0 -146
- package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.hpp +0 -11
- package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.mm +0 -64
- package/ios/RnExecutorch/models/ocr/Detector.h +0 -9
- package/ios/RnExecutorch/models/ocr/Detector.mm +0 -101
- package/ios/RnExecutorch/models/ocr/RecognitionHandler.h +0 -16
- package/ios/RnExecutorch/models/ocr/RecognitionHandler.mm +0 -135
- package/ios/RnExecutorch/models/ocr/Recognizer.h +0 -8
- package/ios/RnExecutorch/models/ocr/Recognizer.mm +0 -77
- package/ios/RnExecutorch/models/ocr/VerticalDetector.h +0 -10
- package/ios/RnExecutorch/models/ocr/VerticalDetector.mm +0 -118
- package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.h +0 -16
- package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.mm +0 -80
- package/ios/RnExecutorch/models/ocr/utils/Constants.h +0 -26
- package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.h +0 -31
- package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm +0 -754
- package/ios/RnExecutorch/models/ocr/utils/OCRUtils.h +0 -10
- package/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm +0 -67
- package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.h +0 -35
- package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm +0 -331
- package/ios/RnExecutorch/models/stt/Moonshine.hpp +0 -13
- package/ios/RnExecutorch/models/stt/Moonshine.mm +0 -64
- package/ios/RnExecutorch/models/stt/MoonshineDecoder.hpp +0 -16
- package/ios/RnExecutorch/models/stt/MoonshineDecoder.mm +0 -24
- package/ios/RnExecutorch/models/stt/MoonshineEncoder.hpp +0 -15
- package/ios/RnExecutorch/models/stt/MoonshineEncoder.mm +0 -18
- package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.hpp +0 -26
- package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.mm +0 -19
- package/ios/RnExecutorch/models/stt/Whisper.hpp +0 -12
- package/ios/RnExecutorch/models/stt/Whisper.mm +0 -68
- package/ios/RnExecutorch/models/stt/WhisperDecoder.hpp +0 -16
- package/ios/RnExecutorch/models/stt/WhisperDecoder.mm +0 -22
- package/ios/RnExecutorch/models/stt/WhisperEncoder.hpp +0 -15
- package/ios/RnExecutorch/models/stt/WhisperEncoder.mm +0 -21
- package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h +0 -11
- package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm +0 -50
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.h +0 -15
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm +0 -45
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.h +0 -8
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.mm +0 -49
- package/ios/RnExecutorch/utils/Constants.h +0 -8
- package/ios/RnExecutorch/utils/Conversions.h +0 -15
- package/ios/RnExecutorch/utils/ETError.h +0 -26
- package/ios/RnExecutorch/utils/ImageProcessor.h +0 -15
- package/ios/RnExecutorch/utils/ImageProcessor.mm +0 -147
- package/ios/RnExecutorch/utils/Numerical.h +0 -1
- package/ios/RnExecutorch/utils/Numerical.mm +0 -18
- package/ios/RnExecutorch/utils/ObjectDetectionUtils.hpp +0 -23
- package/ios/RnExecutorch/utils/SFFT.hpp +0 -13
- package/ios/RnExecutorch/utils/SFFT.mm +0 -71
- package/ios/RnExecutorch/utils/ScalarType.h +0 -14
- package/ios/RnExecutorch/utils/ScalarType.mm +0 -21
- package/lib/module/constants/sttDefaults.js +0 -72
- package/lib/module/constants/sttDefaults.js.map +0 -1
- package/lib/module/controllers/SpeechToTextController.js +0 -307
- package/lib/module/controllers/SpeechToTextController.js.map +0 -1
- package/lib/module/native/NativeClassification.js +0 -5
- package/lib/module/native/NativeClassification.js.map +0 -1
- package/lib/module/native/NativeETModule.js +0 -5
- package/lib/module/native/NativeETModule.js.map +0 -1
- package/lib/module/native/NativeImageSegmentation.js +0 -5
- package/lib/module/native/NativeImageSegmentation.js.map +0 -1
- package/lib/module/native/NativeLLM.js +0 -5
- package/lib/module/native/NativeLLM.js.map +0 -1
- package/lib/module/native/NativeOCR.js +0 -5
- package/lib/module/native/NativeOCR.js.map +0 -1
- package/lib/module/native/NativeObjectDetection.js +0 -5
- package/lib/module/native/NativeObjectDetection.js.map +0 -1
- package/lib/module/native/NativeSpeechToText.js +0 -5
- package/lib/module/native/NativeSpeechToText.js.map +0 -1
- package/lib/module/native/NativeStyleTransfer.js +0 -5
- package/lib/module/native/NativeStyleTransfer.js.map +0 -1
- package/lib/module/native/NativeTextEmbeddings.js +0 -5
- package/lib/module/native/NativeTextEmbeddings.js.map +0 -1
- package/lib/module/native/NativeTokenizer.js +0 -5
- package/lib/module/native/NativeTokenizer.js.map +0 -1
- package/lib/module/native/NativeVerticalOCR.js +0 -5
- package/lib/module/native/NativeVerticalOCR.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/utils/stt.js +0 -22
- package/lib/module/utils/stt.js.map +0 -1
- package/lib/typescript/constants/sttDefaults.d.ts +0 -28
- package/lib/typescript/constants/sttDefaults.d.ts.map +0 -1
- package/lib/typescript/controllers/SpeechToTextController.d.ts +0 -52
- package/lib/typescript/controllers/SpeechToTextController.d.ts.map +0 -1
- package/lib/typescript/native/NativeClassification.d.ts +0 -10
- package/lib/typescript/native/NativeClassification.d.ts.map +0 -1
- package/lib/typescript/native/NativeETModule.d.ts +0 -9
- package/lib/typescript/native/NativeETModule.d.ts.map +0 -1
- package/lib/typescript/native/NativeImageSegmentation.d.ts +0 -10
- package/lib/typescript/native/NativeImageSegmentation.d.ts.map +0 -1
- package/lib/typescript/native/NativeLLM.d.ts +0 -12
- package/lib/typescript/native/NativeLLM.d.ts.map +0 -1
- package/lib/typescript/native/NativeOCR.d.ts +0 -9
- package/lib/typescript/native/NativeOCR.d.ts.map +0 -1
- package/lib/typescript/native/NativeObjectDetection.d.ts +0 -9
- package/lib/typescript/native/NativeObjectDetection.d.ts.map +0 -1
- package/lib/typescript/native/NativeSpeechToText.d.ts +0 -12
- package/lib/typescript/native/NativeSpeechToText.d.ts.map +0 -1
- package/lib/typescript/native/NativeStyleTransfer.d.ts.map +0 -1
- package/lib/typescript/native/NativeTextEmbeddings.d.ts +0 -8
- package/lib/typescript/native/NativeTextEmbeddings.d.ts.map +0 -1
- package/lib/typescript/native/NativeTokenizer.d.ts +0 -12
- package/lib/typescript/native/NativeTokenizer.d.ts.map +0 -1
- package/lib/typescript/native/NativeVerticalOCR.d.ts +0 -9
- package/lib/typescript/native/NativeVerticalOCR.d.ts.map +0 -1
- package/lib/typescript/utils/stt.d.ts +0 -2
- package/lib/typescript/utils/stt.d.ts.map +0 -1
- package/src/constants/sttDefaults.ts +0 -86
- package/src/controllers/SpeechToTextController.ts +0 -458
- package/src/index.tsx +0 -47
- package/src/native/NativeClassification.ts +0 -9
- package/src/native/NativeETModule.ts +0 -14
- package/src/native/NativeImageSegmentation.ts +0 -14
- package/src/native/NativeLLM.ts +0 -14
- package/src/native/NativeOCR.ts +0 -16
- package/src/native/NativeObjectDetection.ts +0 -10
- package/src/native/NativeSpeechToText.ts +0 -17
- package/src/native/NativeStyleTransfer.ts +0 -10
- package/src/native/NativeTextEmbeddings.ts +0 -9
- package/src/native/NativeTokenizer.ts +0 -13
- package/src/native/NativeVerticalOCR.ts +0 -16
- package/src/utils/stt.ts +0 -28
- package/{ios → third-party/ios}/ExecutorchLib.xcframework/Info.plist +4 -4
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <opencv2/opencv.hpp>
|
|
4
|
+
#include <rnexecutorch/models/ocr/Types.h>
|
|
5
|
+
#include <span>
|
|
6
|
+
|
|
7
|
+
namespace rnexecutorch::models::ocr::utils {
|
|
8
|
+
std::pair<cv::Mat, cv::Mat> interleavedArrayToMats(std::span<const float> data,
|
|
9
|
+
cv::Size size);
|
|
10
|
+
/**
|
|
11
|
+
* This method applies a series of image processing operations to identify
|
|
12
|
+
* likely areas of text in the textMap and return the bounding boxes for single
|
|
13
|
+
* words.
|
|
14
|
+
*
|
|
15
|
+
* @param textMap A cv::Mat representing a heat map of the characters of text
|
|
16
|
+
* being present in an image.
|
|
17
|
+
* @param affinityMap A cv::Mat representing a heat map of the affinity between
|
|
18
|
+
* characters.
|
|
19
|
+
* @param textThreshold A float representing the threshold for the text map.
|
|
20
|
+
* @param linkThreshold A float representing the threshold for the affinity
|
|
21
|
+
* map.
|
|
22
|
+
* @param lowTextThreshold A float representing the low text.
|
|
23
|
+
*
|
|
24
|
+
* @return A vector containing DetectorBBox bounding boxes. Each DetectorBBox
|
|
25
|
+
* includes:
|
|
26
|
+
* - "bbox": an array of Point values representing the vertices of the
|
|
27
|
+
* detected text box.
|
|
28
|
+
* - "angle": a float representing the rotation angle of the box.
|
|
29
|
+
*/
|
|
30
|
+
std::vector<types::DetectorBBox> getDetBoxesFromTextMap(cv::Mat &textMap,
|
|
31
|
+
cv::Mat &affinityMap,
|
|
32
|
+
float textThreshold,
|
|
33
|
+
float linkThreshold,
|
|
34
|
+
float lowTextThreshold);
|
|
35
|
+
std::vector<types::DetectorBBox>
|
|
36
|
+
getDetBoxesFromTextMapVertical(cv::Mat &textMap, cv::Mat &affinityMap,
|
|
37
|
+
float textThreshold, float linkThreshold,
|
|
38
|
+
bool independentCharacters);
|
|
39
|
+
|
|
40
|
+
float calculateRestoreRatio(int32_t currentSize, int32_t desiredSize);
|
|
41
|
+
|
|
42
|
+
void restoreBboxRatio(std::vector<types::DetectorBBox> &boxes,
|
|
43
|
+
float restoreRatio);
|
|
44
|
+
/**
|
|
45
|
+
* This method processes a vector of DetectorBBox bounding boxes, each
|
|
46
|
+
* containing details about individual text boxes, and attempts to group and
|
|
47
|
+
* merge these boxes based on specified criteria including proximity, alignment,
|
|
48
|
+
* and size thresholds. It prioritizes merging of boxes that are aligned closely
|
|
49
|
+
* in angle, are near each other, and whose sizes are compatible based on the
|
|
50
|
+
* given thresholds.
|
|
51
|
+
*
|
|
52
|
+
* @param boxes A vector of DetectorBBoxes where each bounding box
|
|
53
|
+
* represents a text box.
|
|
54
|
+
* @param centerThreshold A float representing the threshold for considering
|
|
55
|
+
* the distance between center and fitted line.
|
|
56
|
+
* @param distanceThreshold A float that defines the maximum allowed distance
|
|
57
|
+
* between boxes for them to be considered for merging.
|
|
58
|
+
* @param heightThreshold A float representing the maximum allowed difference
|
|
59
|
+
* in height between boxes for merging.
|
|
60
|
+
* @param minSideThreshold An int that defines the minimum dimension threshold
|
|
61
|
+
* to filter out small boxes after grouping.
|
|
62
|
+
* @param maxSideThreshold An int that specifies the maximum dimension threshold
|
|
63
|
+
* for filtering boxes post-grouping.
|
|
64
|
+
* @param maxWidth An int that represents the maximum width allowable for a
|
|
65
|
+
* merged box.
|
|
66
|
+
*
|
|
67
|
+
* @return A vector of DetectorBBoxes representing the merged boxes.
|
|
68
|
+
*
|
|
69
|
+
* Processing Steps:
|
|
70
|
+
* 1. Sort initial boxes based on their maximum side length.
|
|
71
|
+
* 2. Sequentially merge boxes considering alignment, proximity, and size
|
|
72
|
+
* compatibility.
|
|
73
|
+
* 3. Post-processing to remove any boxes that are too small.
|
|
74
|
+
* 4. Sort the final array of boxes by their vertical positions.
|
|
75
|
+
*/
|
|
76
|
+
std::vector<types::DetectorBBox>
|
|
77
|
+
groupTextBoxes(std::vector<types::DetectorBBox> &boxes, float centerThreshold,
|
|
78
|
+
float distanceThreshold, float heightThreshold,
|
|
79
|
+
int32_t minSideThreshold, int32_t maxSideThreshold,
|
|
80
|
+
int32_t maxWidth);
|
|
81
|
+
} // namespace rnexecutorch::models::ocr::utils
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
#include "RecognitionHandlerUtils.h"
|
|
2
|
+
#include <algorithm>
|
|
3
|
+
#include <rnexecutorch/data_processing/ImageProcessing.h>
|
|
4
|
+
#include <rnexecutorch/models/ocr/Constants.h>
|
|
5
|
+
|
|
6
|
+
namespace rnexecutorch::models::ocr::utils {
|
|
7
|
+
types::PaddingInfo calculateResizeRatioAndPaddings(cv::Size size,
|
|
8
|
+
cv::Size desiredSize) {
|
|
9
|
+
const auto newRatioH = static_cast<float>(desiredSize.height) / size.height;
|
|
10
|
+
const auto newRatioW = static_cast<float>(desiredSize.width) / size.width;
|
|
11
|
+
auto resizeRatio = std::min(newRatioH, newRatioW);
|
|
12
|
+
|
|
13
|
+
const auto newHeight = static_cast<int32_t>(size.height * resizeRatio);
|
|
14
|
+
const auto newWidth = static_cast<int32_t>(size.width * resizeRatio);
|
|
15
|
+
|
|
16
|
+
const int32_t deltaH = desiredSize.height - newHeight;
|
|
17
|
+
const int32_t deltaW = desiredSize.width - newWidth;
|
|
18
|
+
|
|
19
|
+
const int32_t top = deltaH / 2;
|
|
20
|
+
const int32_t left = deltaW / 2;
|
|
21
|
+
|
|
22
|
+
const auto heightRatio = static_cast<float>(size.height) / desiredSize.height;
|
|
23
|
+
const auto widthRatio = static_cast<float>(size.width) / desiredSize.width;
|
|
24
|
+
|
|
25
|
+
resizeRatio = std::max(heightRatio, widthRatio);
|
|
26
|
+
return {resizeRatio, top, left};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
void computeRatioAndResize(cv::Mat &img, cv::Size size, int32_t modelHeight) {
|
|
30
|
+
auto ratio =
|
|
31
|
+
static_cast<double>(size.width) / static_cast<double>(size.height);
|
|
32
|
+
cv::Size resizedSize;
|
|
33
|
+
if (ratio < 1.0) {
|
|
34
|
+
resizedSize =
|
|
35
|
+
cv::Size(modelHeight, static_cast<int32_t>(modelHeight / ratio));
|
|
36
|
+
} else {
|
|
37
|
+
resizedSize =
|
|
38
|
+
cv::Size(static_cast<int32_t>(modelHeight * ratio), modelHeight);
|
|
39
|
+
}
|
|
40
|
+
cv::resize(img, img, resizedSize, 0.0, 0.0, cv::INTER_LANCZOS4);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
cv::Mat cropImage(types::DetectorBBox box, cv::Mat &image,
|
|
44
|
+
int32_t modelHeight) {
|
|
45
|
+
// Convert custom points to cv::Point2f
|
|
46
|
+
std::array<cv::Point2f, 4> points;
|
|
47
|
+
#pragma unroll
|
|
48
|
+
for (std::size_t i = 0; i < points.size(); ++i) {
|
|
49
|
+
points[i] = cv::Point2f(box.bbox[i].x, box.bbox[i].y);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
cv::RotatedRect rotatedRect = cv::minAreaRect(points);
|
|
53
|
+
cv::Point2f rectPoints[4];
|
|
54
|
+
rotatedRect.points(rectPoints);
|
|
55
|
+
|
|
56
|
+
// Rotate the image
|
|
57
|
+
cv::Point2f imageCenter(image.cols / 2.0f, image.rows / 2.0f);
|
|
58
|
+
cv::Mat rotationMatrix = cv::getRotationMatrix2D(imageCenter, box.angle, 1.0);
|
|
59
|
+
cv::Mat rotatedImage;
|
|
60
|
+
cv::warpAffine(image, rotatedImage, rotationMatrix, image.size(),
|
|
61
|
+
cv::INTER_LINEAR);
|
|
62
|
+
|
|
63
|
+
constexpr int32_t rows = 4;
|
|
64
|
+
constexpr int32_t cols = 2;
|
|
65
|
+
cv::Mat rectMat(rows, cols, CV_32FC2);
|
|
66
|
+
#pragma unroll
|
|
67
|
+
for (int32_t i = 0; i < rows; ++i) {
|
|
68
|
+
rectMat.at<cv::Vec2f>(i, 0) = cv::Vec2f(rectPoints[i].x, rectPoints[i].y);
|
|
69
|
+
}
|
|
70
|
+
cv::transform(rectMat, rectMat, rotationMatrix);
|
|
71
|
+
|
|
72
|
+
constexpr size_t transformedPointsSize = 4;
|
|
73
|
+
std::vector<cv::Point2f> transformedPoints(transformedPointsSize);
|
|
74
|
+
#pragma unroll
|
|
75
|
+
for (std::size_t i = 0; i < transformedPointsSize; ++i) {
|
|
76
|
+
cv::Vec2f point = rectMat.at<cv::Vec2f>(i, 0);
|
|
77
|
+
transformedPoints[i] = cv::Point2f(point[0], point[1]);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
cv::Rect boundingBox = cv::boundingRect(transformedPoints);
|
|
81
|
+
|
|
82
|
+
cv::Rect validRegion(0, 0, rotatedImage.cols, rotatedImage.rows);
|
|
83
|
+
|
|
84
|
+
boundingBox = boundingBox & validRegion; // OpenCV's built-in intersection
|
|
85
|
+
|
|
86
|
+
if (boundingBox.empty()) {
|
|
87
|
+
return {};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
cv::Mat croppedImage = rotatedImage(boundingBox).clone();
|
|
91
|
+
|
|
92
|
+
computeRatioAndResize(croppedImage,
|
|
93
|
+
cv::Size(boundingBox.width, boundingBox.height),
|
|
94
|
+
modelHeight);
|
|
95
|
+
|
|
96
|
+
return croppedImage;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
void adjustContrastGrey(cv::Mat &img, double target) {
|
|
100
|
+
constexpr double minValue = 0.0;
|
|
101
|
+
constexpr double maxValue = 255.0;
|
|
102
|
+
|
|
103
|
+
// calculate the brightest and the darkest point from the img
|
|
104
|
+
double highDouble;
|
|
105
|
+
double lowDouble;
|
|
106
|
+
cv::minMaxLoc(img, &lowDouble, &highDouble);
|
|
107
|
+
const auto low = static_cast<int32_t>(lowDouble);
|
|
108
|
+
const auto high = static_cast<int32_t>(highDouble);
|
|
109
|
+
|
|
110
|
+
double contrast = (highDouble - lowDouble) / maxValue;
|
|
111
|
+
if (contrast < target) {
|
|
112
|
+
constexpr double maxStretchIntensity = 200.0;
|
|
113
|
+
constexpr int32_t minRangeClamp = 10;
|
|
114
|
+
// Defines how much the contrast will actually stretch.
|
|
115
|
+
// Formula obtained empirically.
|
|
116
|
+
double ratio = maxStretchIntensity / std::max(minRangeClamp, high - low);
|
|
117
|
+
cv::Mat tempImg;
|
|
118
|
+
img.convertTo(tempImg, CV_32F);
|
|
119
|
+
constexpr int32_t histogramShift = 25;
|
|
120
|
+
|
|
121
|
+
tempImg -= (low - histogramShift);
|
|
122
|
+
tempImg *= ratio;
|
|
123
|
+
|
|
124
|
+
cv::threshold(tempImg, tempImg, maxValue, maxValue, cv::THRESH_TRUNC);
|
|
125
|
+
cv::threshold(tempImg, tempImg, minValue, maxValue, cv::THRESH_TOZERO);
|
|
126
|
+
|
|
127
|
+
tempImg.convertTo(img, CV_8U);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
int32_t getDesiredWidth(const cv::Mat &img, bool isVertical) {
|
|
132
|
+
|
|
133
|
+
if (img.cols >= constants::kLargeRecognizerWidth) {
|
|
134
|
+
return constants::kLargeRecognizerWidth;
|
|
135
|
+
}
|
|
136
|
+
if (img.cols >= constants::kMediumRecognizerWidth) {
|
|
137
|
+
return constants::kMediumRecognizerWidth;
|
|
138
|
+
}
|
|
139
|
+
return isVertical ? constants::kSmallVerticalRecognizerWidth
|
|
140
|
+
: constants::kSmallRecognizerWidth;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
cv::Mat normalizeForRecognizer(const cv::Mat &image, int32_t modelHeight,
|
|
144
|
+
double adjustContrast, bool isVertical) {
|
|
145
|
+
auto img = image.clone();
|
|
146
|
+
if (adjustContrast > 0.0) {
|
|
147
|
+
adjustContrastGrey(img, adjustContrast);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
int32_t desiredWidth = getDesiredWidth(image, isVertical);
|
|
151
|
+
|
|
152
|
+
img =
|
|
153
|
+
image_processing::resizePadded(img, cv::Size(desiredWidth, modelHeight));
|
|
154
|
+
img.convertTo(img, CV_32F, 1.0f / 255.0f);
|
|
155
|
+
img -= 0.5f;
|
|
156
|
+
img *= 2.0f;
|
|
157
|
+
return img;
|
|
158
|
+
}
|
|
159
|
+
} // namespace rnexecutorch::models::ocr::utils
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <opencv2/opencv.hpp>
|
|
4
|
+
#include <rnexecutorch/models/ocr/Types.h>
|
|
5
|
+
|
|
6
|
+
namespace rnexecutorch::models::ocr::utils {
|
|
7
|
+
/**
|
|
8
|
+
* @brief Calculates the resize ratio and padding offsets needed to fit an image
|
|
9
|
+
* into a target size while maintaining aspect ratio.
|
|
10
|
+
* @param size Original dimensions of the image.
|
|
11
|
+
* @param desiredSize Target size.
|
|
12
|
+
* @return Struct containing the scaling factor and top/left padding amounts for
|
|
13
|
+
* centering the image.
|
|
14
|
+
*/
|
|
15
|
+
types::PaddingInfo calculateResizeRatioAndPaddings(cv::Size size,
|
|
16
|
+
cv::Size desiredSize);
|
|
17
|
+
/**
|
|
18
|
+
* @brief Resizes an image proportionally to match a target height while
|
|
19
|
+
* maintaining aspect ratio.
|
|
20
|
+
* @param img Input/output image to resize.
|
|
21
|
+
* @param size Original dimensions of the image.
|
|
22
|
+
* @param modelHeight Target height for the output image.
|
|
23
|
+
*/
|
|
24
|
+
void computeRatioAndResize(cv::Mat &img, cv::Size size, int32_t modelHeight);
|
|
25
|
+
/**
|
|
26
|
+
* @brief Crops and aligns a rotated bounding box region from an image, then
|
|
27
|
+
* resizes it to target height.
|
|
28
|
+
*
|
|
29
|
+
* Handles rotated boxes by:
|
|
30
|
+
* 1. Calculating minimum area rectangle around detected points
|
|
31
|
+
* 2. Rotating the entire image to align the box horizontally
|
|
32
|
+
* 3. Transforming the box coordinates to match the rotated image
|
|
33
|
+
* 4. Cropping the aligned region
|
|
34
|
+
*
|
|
35
|
+
* Resizing:
|
|
36
|
+
* - Maintains original aspect ratio while scaling to specified modelHeight
|
|
37
|
+
* - Uses high-quality interpolation for both rotation and resizing
|
|
38
|
+
*
|
|
39
|
+
* @param box Detected bounding box with rotation angle and corner points
|
|
40
|
+
* @param image Source image to crop from
|
|
41
|
+
* @param modelHeight Target height for output (width scales proportionally)
|
|
42
|
+
* @return Cropped, aligned and resized image region (empty if invalid box)
|
|
43
|
+
*/
|
|
44
|
+
cv::Mat cropImage(types::DetectorBBox box, cv::Mat &image, int32_t modelHeight);
|
|
45
|
+
void adjustContrastGrey(cv::Mat &img, double target);
|
|
46
|
+
/**
|
|
47
|
+
* @brief Prepares an image for recognition models by standardizing size,
|
|
48
|
+
* contrast, and pixel values.
|
|
49
|
+
*
|
|
50
|
+
* Performs the following processing pipeline:
|
|
51
|
+
* 1. Adjusts contrast (if coefficient > 0)
|
|
52
|
+
* 2. Resizes to target height while:
|
|
53
|
+
* - Preserving aspect ratio (using padding if needed)
|
|
54
|
+
* - Selecting width to match one of the Recognizer accepted
|
|
55
|
+
* widths; (Large,Medium or Small RecognizerWidth)
|
|
56
|
+
* 3. Normalizes pixel values to [-1, 1] range (from [0,255] input)
|
|
57
|
+
*
|
|
58
|
+
* @param image Input image to process (any size, will be cloned)
|
|
59
|
+
* @param modelHeight Target output height in pixels
|
|
60
|
+
* @param adjustContrast Contrast adjustment coefficient (0.0 = no adjustment)
|
|
61
|
+
* @param isVertical Whether the image is in portrait orientation (affects width
|
|
62
|
+
* selection)
|
|
63
|
+
*
|
|
64
|
+
* @return Processed image with:
|
|
65
|
+
* - Standardized dimensions (selected width × modelHeight)
|
|
66
|
+
* - Adjusted contrast (if requested)
|
|
67
|
+
* - Normalized float32 values in [-1, 1] range
|
|
68
|
+
*/
|
|
69
|
+
cv::Mat normalizeForRecognizer(const cv::Mat &image, int32_t modelHeight,
|
|
70
|
+
double adjustContrast = 0.0,
|
|
71
|
+
bool isVertical = false);
|
|
72
|
+
} // namespace rnexecutorch::models::ocr::utils
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#include "RecognizerUtils.h"
|
|
2
|
+
|
|
3
|
+
namespace rnexecutorch::models::ocr::utils {
|
|
4
|
+
cv::Mat softmax(const cv::Mat &inputs) {
|
|
5
|
+
cv::Mat maxVal;
|
|
6
|
+
cv::reduce(inputs, maxVal, 1, cv::REDUCE_MAX, CV_32F);
|
|
7
|
+
cv::Mat expInputs;
|
|
8
|
+
cv::exp(inputs - cv::repeat(maxVal, 1, inputs.cols), expInputs);
|
|
9
|
+
cv::Mat sumExp;
|
|
10
|
+
cv::reduce(expInputs, sumExp, 1, cv::REDUCE_SUM, CV_32F);
|
|
11
|
+
cv::Mat softmaxOutput = expInputs / cv::repeat(sumExp, 1, inputs.cols);
|
|
12
|
+
|
|
13
|
+
return softmaxOutput;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
std::vector<float> sumProbabilityRows(const cv::Mat &matrix) {
|
|
17
|
+
std::vector<float> sums;
|
|
18
|
+
sums.reserve(matrix.rows);
|
|
19
|
+
for (int32_t i = 0; i < matrix.rows; ++i) {
|
|
20
|
+
sums.push_back(cv::sum(matrix.row(i))[0]);
|
|
21
|
+
}
|
|
22
|
+
return sums;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
void divideMatrixByRows(cv::Mat &matrix, const std::vector<float> &rowSums) {
|
|
26
|
+
for (int32_t i = 0; i < matrix.rows; ++i) {
|
|
27
|
+
matrix.row(i) /= rowSums[i];
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
types::ValuesAndIndices findMaxValuesIndices(const cv::Mat &mat) {
|
|
32
|
+
CV_Assert(mat.type() == CV_32F);
|
|
33
|
+
types::ValuesAndIndices result{};
|
|
34
|
+
result.values.reserve(mat.rows);
|
|
35
|
+
result.indices.reserve(mat.rows);
|
|
36
|
+
|
|
37
|
+
for (int32_t i = 0; i < mat.rows; ++i) {
|
|
38
|
+
double maxVal;
|
|
39
|
+
cv::Point maxLoc;
|
|
40
|
+
cv::minMaxLoc(mat.row(i), nullptr, &maxVal, nullptr, &maxLoc);
|
|
41
|
+
result.values.push_back(static_cast<float>(maxVal));
|
|
42
|
+
result.indices.push_back(maxLoc.x);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
float confidenceScore(const std::vector<float> &values,
|
|
49
|
+
const std::vector<int32_t> &indices) {
|
|
50
|
+
float product = 1.0f;
|
|
51
|
+
int32_t count = 0;
|
|
52
|
+
|
|
53
|
+
for (size_t i = 0; i < indices.size(); ++i) {
|
|
54
|
+
if (indices[i] != 0) {
|
|
55
|
+
product *= values[i];
|
|
56
|
+
count++;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (count == 0) {
|
|
61
|
+
return 0.0f;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const float n = static_cast<float>(count);
|
|
65
|
+
const float exponent = 2.0f / std::sqrt(n);
|
|
66
|
+
return std::pow(product, exponent);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
cv::Rect extractBoundingBox(std::array<types::Point, 4> &points) {
|
|
70
|
+
cv::Mat pointsMat(4, 1, CV_32FC2, points.data());
|
|
71
|
+
return cv::boundingRect(pointsMat);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
cv::Mat characterBitMask(const cv::Mat &img) {
|
|
75
|
+
// 1. Determine if character is darker/lighter than background.
|
|
76
|
+
cv::Mat histogram;
|
|
77
|
+
int32_t histSize = 256;
|
|
78
|
+
float range[] = {0.0f, 256.0f};
|
|
79
|
+
const float *histRange = {range};
|
|
80
|
+
bool uniform = true;
|
|
81
|
+
bool accumulate = false;
|
|
82
|
+
|
|
83
|
+
cv::calcHist(&img, 1, 0, cv::Mat(), histogram, 1, &histSize, &histRange,
|
|
84
|
+
uniform, accumulate);
|
|
85
|
+
|
|
86
|
+
// Compare sum of darker (left half) vs brighter (right half) pixels.
|
|
87
|
+
const int32_t midPoint = histSize / 2;
|
|
88
|
+
double sumLeft = 0.0;
|
|
89
|
+
double sumRight = 0.0;
|
|
90
|
+
for (int32_t i = 0; i < midPoint; i++) {
|
|
91
|
+
sumLeft += histogram.at<float>(i);
|
|
92
|
+
}
|
|
93
|
+
for (int32_t i = midPoint; i < histSize; i++) {
|
|
94
|
+
sumRight += histogram.at<float>(i);
|
|
95
|
+
}
|
|
96
|
+
const int32_t thresholdType =
|
|
97
|
+
(sumLeft < sumRight) ? cv::THRESH_BINARY_INV : cv::THRESH_BINARY;
|
|
98
|
+
|
|
99
|
+
// 2. Binarize using Otsu's method (auto threshold).
|
|
100
|
+
cv::Mat thresh;
|
|
101
|
+
cv::threshold(img, thresh, 0, 255, thresholdType + cv::THRESH_OTSU);
|
|
102
|
+
|
|
103
|
+
// 3. Find the largest connected component near the center.
|
|
104
|
+
cv::Mat labels, stats, centroids;
|
|
105
|
+
const int32_t numLabels = cv::connectedComponentsWithStats(
|
|
106
|
+
thresh, labels, stats, centroids, 8, CV_32S);
|
|
107
|
+
|
|
108
|
+
const int32_t height = thresh.rows;
|
|
109
|
+
const int32_t width = thresh.cols;
|
|
110
|
+
const int32_t minX = constants::kSingleCharacterCenterThreshold * width;
|
|
111
|
+
const int32_t maxX = (1 - constants::kSingleCharacterCenterThreshold) * width;
|
|
112
|
+
const int32_t minY = constants::kSingleCharacterCenterThreshold * height;
|
|
113
|
+
const int32_t maxY =
|
|
114
|
+
(1 - constants::kSingleCharacterCenterThreshold) * height;
|
|
115
|
+
|
|
116
|
+
int32_t selectedComponent = -1;
|
|
117
|
+
int32_t maxArea = -1;
|
|
118
|
+
for (int32_t i = 1; i < numLabels; i++) { // Skip background (label 0)
|
|
119
|
+
const int32_t area = stats.at<int32_t>(i, cv::CC_STAT_AREA);
|
|
120
|
+
const double cx = centroids.at<double>(i, 0);
|
|
121
|
+
const double cy = centroids.at<double>(i, 1);
|
|
122
|
+
|
|
123
|
+
if ((minX < cx && cx < maxX && minY < cy &&
|
|
124
|
+
cy < maxY && // check if centered
|
|
125
|
+
area > constants::kSingleCharacterMinSize) && // check if large enough
|
|
126
|
+
area > maxArea) {
|
|
127
|
+
selectedComponent = i;
|
|
128
|
+
maxArea = area;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// 4. Extract the character and invert to white-on-black.
|
|
132
|
+
cv::Mat resultImage;
|
|
133
|
+
cv::Mat mask;
|
|
134
|
+
if (selectedComponent != -1) {
|
|
135
|
+
mask = (labels == selectedComponent);
|
|
136
|
+
img.copyTo(resultImage, mask);
|
|
137
|
+
} else {
|
|
138
|
+
resultImage = cv::Mat::zeros(img.size(), img.type());
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
cv::bitwise_not(resultImage, resultImage);
|
|
142
|
+
|
|
143
|
+
return resultImage;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
cv::Mat
|
|
147
|
+
cropImageWithBoundingBox(const cv::Mat &img,
|
|
148
|
+
const std::array<types::Point, 4> &bbox,
|
|
149
|
+
const std::array<types::Point, 4> &originalBbox,
|
|
150
|
+
const types::PaddingInfo &paddings,
|
|
151
|
+
const types::PaddingInfo &originalPaddings) {
|
|
152
|
+
if (originalBbox.empty()) {
|
|
153
|
+
throw std::runtime_error("Original bounding box cannot be empty.");
|
|
154
|
+
}
|
|
155
|
+
const types::Point topLeft = originalBbox[0];
|
|
156
|
+
|
|
157
|
+
std::vector<cv::Point2f> points;
|
|
158
|
+
points.reserve(bbox.size());
|
|
159
|
+
|
|
160
|
+
for (const auto &point : bbox) {
|
|
161
|
+
types::Point transformedPoint = point;
|
|
162
|
+
|
|
163
|
+
transformedPoint.x -= paddings.left;
|
|
164
|
+
transformedPoint.y -= paddings.top;
|
|
165
|
+
|
|
166
|
+
transformedPoint.x *= paddings.resizeRatio;
|
|
167
|
+
transformedPoint.y *= paddings.resizeRatio;
|
|
168
|
+
|
|
169
|
+
transformedPoint.x += topLeft.x;
|
|
170
|
+
transformedPoint.y += topLeft.y;
|
|
171
|
+
|
|
172
|
+
transformedPoint.x -= originalPaddings.left;
|
|
173
|
+
transformedPoint.y -= originalPaddings.top;
|
|
174
|
+
|
|
175
|
+
transformedPoint.x *= originalPaddings.resizeRatio;
|
|
176
|
+
transformedPoint.y *= originalPaddings.resizeRatio;
|
|
177
|
+
|
|
178
|
+
points.emplace_back(transformedPoint.x, transformedPoint.y);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
cv::Rect rect = cv::boundingRect(points);
|
|
182
|
+
rect &= cv::Rect(0, 0, img.cols, img.rows);
|
|
183
|
+
if (rect.empty()) {
|
|
184
|
+
return {};
|
|
185
|
+
}
|
|
186
|
+
auto croppedImage = img(rect).clone();
|
|
187
|
+
return croppedImage;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
cv::Mat prepareForRecognition(const cv::Mat &originalImage,
|
|
191
|
+
const std::array<types::Point, 4> &bbox,
|
|
192
|
+
const std::array<types::Point, 4> &originalBbox,
|
|
193
|
+
const types::PaddingInfo &paddings,
|
|
194
|
+
const types::PaddingInfo &originalPaddings) {
|
|
195
|
+
auto croppedChar = cropImageWithBoundingBox(originalImage, bbox, originalBbox,
|
|
196
|
+
paddings, originalPaddings);
|
|
197
|
+
cv::cvtColor(croppedChar, croppedChar, cv::COLOR_BGR2GRAY);
|
|
198
|
+
cv::resize(croppedChar, croppedChar,
|
|
199
|
+
cv::Size(constants::kSmallVerticalRecognizerWidth,
|
|
200
|
+
constants::kRecognizerHeight),
|
|
201
|
+
0, 0, cv::INTER_AREA);
|
|
202
|
+
return croppedChar;
|
|
203
|
+
}
|
|
204
|
+
} // namespace rnexecutorch::models::ocr::utils
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <array>
|
|
4
|
+
#include <opencv2/opencv.hpp>
|
|
5
|
+
#include <rnexecutorch/models/ocr/Constants.h>
|
|
6
|
+
#include <rnexecutorch/models/ocr/Types.h>
|
|
7
|
+
#include <vector>
|
|
8
|
+
|
|
9
|
+
namespace rnexecutorch::models::ocr::utils {
|
|
10
|
+
/**
|
|
11
|
+
* @brief Computes per row softmax funcion.
|
|
12
|
+
* Formula: softmax(x_i) = exp(x_i - max(x)) / sum(exp(x_j - max(x))) for each
|
|
13
|
+
* row.
|
|
14
|
+
*/
|
|
15
|
+
cv::Mat softmax(const cv::Mat &inputs);
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @brief For each row of matrix computes {maxValue, index} pair. Returns a list
|
|
19
|
+
* of maxValues and a list of corresponding indices.
|
|
20
|
+
*/
|
|
21
|
+
types::ValuesAndIndices findMaxValuesIndices(const cv::Mat &mat);
|
|
22
|
+
std::vector<float> sumProbabilityRows(const cv::Mat &matrix);
|
|
23
|
+
void divideMatrixByRows(cv::Mat &matrix, const std::vector<float> &rowSums);
|
|
24
|
+
cv::Rect extractBoundingBox(std::array<types::Point, 4> &points);
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @brief Computes confidence score for given values and indices vectors.
|
|
28
|
+
* Omits blank tokens.
|
|
29
|
+
* Formula: pow(\prod_{i=1}^{n}(p_i), 2/sqrt(n)), where n is a number of
|
|
30
|
+
* non-blank tokens, and p_i is the probability of i-th non-blank token.
|
|
31
|
+
* @details Formula derived from line 14 of
|
|
32
|
+
* https://github.com/JaidedAI/EasyOCR/blob/c4f3cd7225efd4f85451bd8b4a7646ae9a092420/easyocr/recognition.py#L14
|
|
33
|
+
* @details 'Some say that it's a code, sent to us from god'
|
|
34
|
+
*/
|
|
35
|
+
float confidenceScore(const std::vector<float> &values,
|
|
36
|
+
const std::vector<int32_t> &indices);
|
|
37
|
+
|
|
38
|
+
cv::Mat characterBitMask(const cv::Mat &img);
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* @brief Perform cropping of an image to a single character detector box.
|
|
42
|
+
* This function utilizes info about external bounding box and padding combined
|
|
43
|
+
* with internal bounding box and padding.
|
|
44
|
+
* It does so to preserve the best possible image quality.
|
|
45
|
+
*/
|
|
46
|
+
cv::Mat
|
|
47
|
+
cropImageWithBoundingBox(const cv::Mat &img,
|
|
48
|
+
const std::array<types::Point, 4> &bbox,
|
|
49
|
+
const std::array<types::Point, 4> &originalBbox,
|
|
50
|
+
const types::PaddingInfo &paddings,
|
|
51
|
+
const types::PaddingInfo &originalPaddings);
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* @brief Perform cropping, resizing and convert to grayscale to prepare image
|
|
55
|
+
* for Recognizer.
|
|
56
|
+
*
|
|
57
|
+
* Prepare for Recognition by following steps:
|
|
58
|
+
* 1. Crop image to the character bounding box,
|
|
59
|
+
* 2. Convert Image to gray.
|
|
60
|
+
* 3. Resize it to [kSmallVerticalRecognizerWidth x kRecognizerHeight] (64 x
|
|
61
|
+
* 64).
|
|
62
|
+
*
|
|
63
|
+
* @details it utilizes cropImageWithBoundingBox to perform specific cropping.
|
|
64
|
+
*/
|
|
65
|
+
|
|
66
|
+
cv::Mat prepareForRecognition(const cv::Mat &originalImage,
|
|
67
|
+
const std::array<types::Point, 4> &bbox,
|
|
68
|
+
const std::array<types::Point, 4> &originalBbox,
|
|
69
|
+
const types::PaddingInfo &paddings,
|
|
70
|
+
const types::PaddingInfo &originalPaddings);
|
|
71
|
+
} // namespace rnexecutorch::models::ocr::utils
|