react-native-executorch 0.5.15 → 0.6.0-nightly-897eae9-20251213
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -36
- package/android/CMakeLists.txt +13 -25
- package/android/build.gradle +2 -3
- package/android/libs/classes.jar +0 -0
- package/android/src/main/cpp/CMakeLists.txt +2 -1
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +18 -0
- package/common/rnexecutorch/TokenizerModule.cpp +3 -3
- package/common/rnexecutorch/data_processing/Numerical.cpp +31 -23
- package/common/rnexecutorch/data_processing/Numerical.h +6 -1
- package/common/rnexecutorch/data_processing/dsp.cpp +0 -46
- package/common/rnexecutorch/host_objects/JsiConversions.h +16 -0
- package/common/rnexecutorch/host_objects/ModelHostObject.h +26 -11
- package/common/rnexecutorch/jsi/OwningArrayBuffer.h +19 -2
- package/common/rnexecutorch/metaprogramming/TypeConcepts.h +0 -20
- package/common/rnexecutorch/models/BaseModel.cpp +12 -11
- package/common/rnexecutorch/models/BaseModel.h +18 -10
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +3 -11
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +0 -1
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +6 -12
- package/common/rnexecutorch/models/llm/LLM.cpp +25 -8
- package/common/rnexecutorch/models/llm/LLM.h +4 -4
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +1 -1
- package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.cpp +7 -4
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +8 -13
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +1 -3
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +12 -19
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.h +4 -5
- package/common/rnexecutorch/models/text_to_image/Constants.h +9 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.cpp +32 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.h +24 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.cpp +44 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.h +32 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.cpp +152 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.h +41 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +141 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.h +64 -0
- package/common/rnexecutorch/models/text_to_image/UNet.cpp +38 -0
- package/common/rnexecutorch/models/text_to_image/UNet.h +28 -0
- package/common/rnexecutorch/models/voice_activity_detection/Constants.h +27 -0
- package/common/rnexecutorch/models/voice_activity_detection/Types.h +12 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.cpp +15 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.h +13 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +160 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +36 -0
- package/common/rnexecutorch/tests/CMakeLists.txt +30 -0
- package/common/rnexecutorch/tests/NumericalTest.cpp +110 -0
- package/common/rnexecutorch/tests/README.md +30 -13
- package/common/rnexecutorch/threads/GlobalThreadPool.h +4 -0
- package/common/runner/arange_util.cpp +44 -0
- package/common/runner/arange_util.h +37 -0
- package/common/runner/constants.h +28 -0
- package/common/runner/io_manager.h +240 -0
- package/common/runner/irunner.h +87 -16
- package/common/runner/kernel_includes.h +23 -0
- package/common/runner/runner.cpp +151 -66
- package/common/runner/runner.h +39 -22
- package/common/runner/sampler.cpp +8 -1
- package/common/runner/sampler.h +4 -2
- package/common/runner/stats.h +1 -4
- package/common/runner/text_decoder_runner.cpp +26 -12
- package/common/runner/text_decoder_runner.h +52 -31
- package/common/runner/text_prefiller.cpp +46 -12
- package/common/runner/text_prefiller.h +38 -4
- package/common/runner/text_token_generator.h +51 -26
- package/common/runner/util.h +53 -8
- package/ios/RnExecutorch.xcodeproj/project.pbxproj +0 -23
- package/lib/module/Error.js +1 -0
- package/lib/module/Error.js.map +1 -1
- package/lib/module/constants/directories.js +1 -1
- package/lib/module/constants/directories.js.map +1 -1
- package/lib/module/constants/modelUrls.js +32 -1
- package/lib/module/constants/modelUrls.js.map +1 -1
- package/lib/module/constants/ocr/models.js +7 -7
- package/lib/module/constants/ocr/models.js.map +1 -1
- package/lib/module/constants/ocr/symbols.js +3 -2
- package/lib/module/constants/ocr/symbols.js.map +1 -1
- package/lib/module/controllers/LLMController.js +10 -1
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/controllers/OCRController.js +3 -3
- package/lib/module/controllers/OCRController.js.map +1 -1
- package/lib/module/controllers/VerticalOCRController.js +2 -2
- package/lib/module/controllers/VerticalOCRController.js.map +1 -1
- package/lib/module/hooks/computer_vision/useOCR.js +3 -3
- package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
- package/lib/module/hooks/{useNonStaticModule.js → computer_vision/useTextToImage.js} +21 -16
- package/lib/module/hooks/computer_vision/useTextToImage.js.map +1 -0
- package/lib/module/hooks/computer_vision/useVerticalOCR.js +3 -3
- package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +3 -3
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTokenizer.js +5 -5
- package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useVAD.js +13 -0
- package/lib/module/hooks/natural_language_processing/useVAD.js.map +1 -0
- package/lib/module/index.js +7 -2
- package/lib/module/index.js.map +1 -1
- package/lib/module/modules/computer_vision/OCRModule.js +2 -2
- package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
- package/lib/module/modules/computer_vision/TextToImageModule.js +48 -0
- package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -0
- package/lib/module/modules/computer_vision/VerticalOCRModule.js +2 -2
- package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +7 -4
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/VADModule.js +19 -0
- package/lib/module/modules/natural_language_processing/VADModule.js.map +1 -0
- package/lib/module/types/llm.js.map +1 -1
- package/lib/module/types/vad.js +2 -0
- package/lib/module/types/vad.js.map +1 -0
- package/lib/module/utils/ResourceFetcher.js +2 -1
- package/lib/module/utils/ResourceFetcher.js.map +1 -1
- package/lib/module/utils/ResourceFetcherUtils.js +6 -6
- package/lib/module/utils/ResourceFetcherUtils.js.map +1 -1
- package/lib/typescript/Error.d.ts +1 -0
- package/lib/typescript/Error.d.ts.map +1 -1
- package/lib/typescript/constants/modelUrls.d.ts +23 -0
- package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts.map +1 -1
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/controllers/OCRController.d.ts +1 -1
- package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
- package/lib/typescript/controllers/VerticalOCRController.d.ts +1 -1
- package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts +1 -1
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts +22 -0
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts.map +1 -0
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +1 -1
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +2 -2
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts +16 -0
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +8 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts +1 -1
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts +16 -0
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +1 -1
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +3 -2
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts +10 -0
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts.map +1 -0
- package/lib/typescript/types/llm.d.ts +2 -0
- package/lib/typescript/types/llm.d.ts.map +1 -1
- package/lib/typescript/types/vad.d.ts +5 -0
- package/lib/typescript/types/vad.d.ts.map +1 -0
- package/lib/typescript/utils/ResourceFetcher.d.ts +29 -0
- package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts +2 -2
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -1
- package/package.json +11 -8
- package/react-native-executorch.podspec +9 -9
- package/src/Error.ts +1 -0
- package/src/constants/directories.ts +1 -1
- package/src/constants/modelUrls.ts +36 -1
- package/src/constants/ocr/models.ts +7 -7
- package/src/constants/ocr/symbols.ts +3 -2
- package/src/controllers/LLMController.ts +12 -1
- package/src/controllers/OCRController.ts +3 -3
- package/src/controllers/VerticalOCRController.ts +2 -2
- package/src/hooks/computer_vision/useOCR.ts +4 -5
- package/src/hooks/computer_vision/useTextToImage.ts +92 -0
- package/src/hooks/computer_vision/useVerticalOCR.ts +4 -5
- package/src/hooks/natural_language_processing/useLLM.ts +3 -4
- package/src/hooks/natural_language_processing/useTokenizer.ts +5 -5
- package/src/hooks/natural_language_processing/useVAD.ts +15 -0
- package/src/index.ts +20 -1
- package/src/modules/computer_vision/OCRModule.ts +2 -2
- package/src/modules/computer_vision/TextToImageModule.ts +93 -0
- package/src/modules/computer_vision/VerticalOCRModule.ts +2 -2
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +8 -4
- package/src/modules/natural_language_processing/VADModule.ts +27 -0
- package/src/types/llm.ts +2 -0
- package/src/types/vad.ts +4 -0
- package/src/utils/ResourceFetcher.ts +2 -1
- package/src/utils/ResourceFetcherUtils.ts +8 -8
- package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
- package/third-party/include/c10/macros/Export.h +0 -78
- package/third-party/include/c10/macros/Macros.h +1 -520
- package/third-party/include/c10/util/BFloat16-inl.h +1 -339
- package/third-party/include/c10/util/BFloat16.h +1 -122
- package/third-party/include/c10/util/Half-inl.h +1 -347
- package/third-party/include/c10/util/Half.h +6 -419
- package/third-party/include/c10/util/TypeSafeSignMath.h +1 -133
- package/third-party/include/c10/util/bit_cast.h +1 -43
- package/third-party/include/c10/util/complex.h +1 -568
- package/third-party/include/c10/util/floating_point_utils.h +1 -33
- package/third-party/include/c10/util/irange.h +1 -1
- package/third-party/include/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/c10/util/safe_numerics.h +97 -0
- package/third-party/include/executorch/ExecuTorchError.h +6 -7
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLM.h +12 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMConfig.h +56 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMError.h +16 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMMultimodalRunner.h +227 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMTextRunner.h +97 -0
- package/third-party/include/executorch/ExecuTorchLLM/module.modulemap +4 -0
- package/third-party/include/executorch/ExecuTorchLog.h +1 -0
- package/third-party/include/executorch/ExecuTorchModule.h +177 -4
- package/third-party/include/executorch/ExecuTorchTensor.h +3 -4
- package/third-party/include/executorch/ExecuTorchValue.h +1 -7
- package/third-party/include/executorch/extension/module/module.h +139 -8
- package/third-party/include/executorch/extension/tensor/tensor.h +1 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr.h +88 -26
- package/third-party/include/executorch/extension/threadpool/threadpool.h +4 -1
- package/third-party/include/executorch/runtime/backend/backend_init_context.h +6 -0
- package/third-party/include/executorch/runtime/backend/interface.h +1 -1
- package/third-party/include/executorch/runtime/core/error.h +76 -49
- package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +18 -4
- package/third-party/include/executorch/runtime/core/memory_allocator.h +12 -2
- package/third-party/include/executorch/runtime/core/named_data_map.h +1 -11
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -78
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +1 -520
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -339
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +1 -122
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +1 -347
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +6 -419
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +1 -133
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -43
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +1 -568
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +1 -33
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +1 -1
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/safe_numerics.h +97 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +66 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/include/executorch/runtime/core/tensor_layout.h +1 -1
- package/third-party/include/executorch/runtime/executor/merged_data_map.h +142 -0
- package/third-party/include/executorch/runtime/executor/method.h +21 -8
- package/third-party/include/executorch/runtime/executor/method_meta.h +20 -2
- package/third-party/include/executorch/runtime/executor/program.h +0 -10
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +1 -1
- package/third-party/include/executorch/runtime/platform/compiler.h +2 -0
- package/third-party/include/executorch/schema/extended_header.h +10 -1
- package/third-party/include/torch/headeronly/macros/Export.h +66 -0
- package/third-party/include/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/common/rnexecutorch/tests/run_all_tests.sh +0 -14
- package/common/rnexecutorch/tests/run_test.sh +0 -18
- package/ios/RnExecutorch/utils/Conversions.h +0 -14
- package/ios/RnExecutorch/utils/ETError.h +0 -26
- package/ios/RnExecutorch/utils/ImageProcessor.h +0 -15
- package/ios/RnExecutorch/utils/ImageProcessor.mm +0 -147
- package/ios/RnExecutorch/utils/Numerical.h +0 -3
- package/ios/RnExecutorch/utils/Numerical.mm +0 -18
- package/ios/RnExecutorch/utils/ScalarType.h +0 -14
- package/ios/RnExecutorch/utils/ScalarType.mm +0 -21
- package/lib/module/hooks/useNonStaticModule.js.map +0 -1
- package/lib/typescript/hooks/useNonStaticModule.d.ts +0 -21
- package/lib/typescript/hooks/useNonStaticModule.d.ts.map +0 -1
- package/src/hooks/useNonStaticModule.ts +0 -74
- package/third-party/include/executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h +0 -181
- package/third-party/include/executorch/extension/kernel_util/meta_programming.h +0 -108
- package/third-party/include/executorch/extension/kernel_util/type_list.h +0 -137
- package/third-party/include/executorch/extension/threadpool/threadpool_guard.h +0 -35
package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/safe_numerics.h
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <c10/macros/Macros.h>
|
|
3
|
+
|
|
4
|
+
#include <cstddef>
|
|
5
|
+
#include <cstdint>
|
|
6
|
+
|
|
7
|
+
// GCC has __builtin_mul_overflow from before it supported __has_builtin
|
|
8
|
+
#ifdef _MSC_VER
|
|
9
|
+
#define C10_HAS_BUILTIN_OVERFLOW() (0)
|
|
10
|
+
#include <c10/util/llvmMathExtras.h>
|
|
11
|
+
#include <intrin.h>
|
|
12
|
+
#else
|
|
13
|
+
#define C10_HAS_BUILTIN_OVERFLOW() (1)
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
namespace c10 {
|
|
17
|
+
|
|
18
|
+
C10_ALWAYS_INLINE bool add_overflows(uint64_t a, uint64_t b, uint64_t *out) {
|
|
19
|
+
#if C10_HAS_BUILTIN_OVERFLOW()
|
|
20
|
+
return __builtin_add_overflow(a, b, out);
|
|
21
|
+
#else
|
|
22
|
+
unsigned long long tmp;
|
|
23
|
+
#if defined(_M_IX86) || defined(_M_X64)
|
|
24
|
+
auto carry = _addcarry_u64(0, a, b, &tmp);
|
|
25
|
+
#else
|
|
26
|
+
tmp = a + b;
|
|
27
|
+
unsigned long long vector = (a & b) ^ ((a ^ b) & ~tmp);
|
|
28
|
+
auto carry = vector >> 63;
|
|
29
|
+
#endif
|
|
30
|
+
*out = tmp;
|
|
31
|
+
return carry;
|
|
32
|
+
#endif
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
template <typename T> C10_ALWAYS_INLINE bool mul_overflows(T a, T b, T *out) {
|
|
36
|
+
#if C10_HAS_BUILTIN_OVERFLOW()
|
|
37
|
+
return __builtin_mul_overflow(a, b, out);
|
|
38
|
+
#else
|
|
39
|
+
static_assert(std::is_integral_v<T>,
|
|
40
|
+
"mul_overflows only supports integral types");
|
|
41
|
+
|
|
42
|
+
if constexpr (std::is_signed_v<T>) {
|
|
43
|
+
// For signed types, use the division-based check
|
|
44
|
+
volatile T tmp = a * b;
|
|
45
|
+
*out = tmp;
|
|
46
|
+
if (a == 0 || b == 0) {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
return !(a == tmp / b);
|
|
50
|
+
} else {
|
|
51
|
+
// For unsigned types, use leading zeros approach
|
|
52
|
+
// This test isn't exact, but avoids doing integer division
|
|
53
|
+
*out = a * b;
|
|
54
|
+
constexpr int bits = sizeof(T) * 8;
|
|
55
|
+
return ((c10::llvm::countLeadingZeros(a) +
|
|
56
|
+
c10::llvm::countLeadingZeros(b)) < bits);
|
|
57
|
+
}
|
|
58
|
+
#endif
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
C10_ALWAYS_INLINE bool mul_overflows(uint64_t a, uint64_t b, uint64_t *out) {
|
|
62
|
+
return mul_overflows<uint64_t>(a, b, out);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
template <typename It>
|
|
66
|
+
bool safe_multiplies_u64(It first, It last, uint64_t *out) {
|
|
67
|
+
#if C10_HAS_BUILTIN_OVERFLOW()
|
|
68
|
+
uint64_t prod = 1;
|
|
69
|
+
bool overflow = false;
|
|
70
|
+
for (; first != last; ++first) {
|
|
71
|
+
overflow |= c10::mul_overflows(prod, *first, &prod);
|
|
72
|
+
}
|
|
73
|
+
*out = prod;
|
|
74
|
+
return overflow;
|
|
75
|
+
#else
|
|
76
|
+
uint64_t prod = 1;
|
|
77
|
+
uint64_t prod_log2 = 0;
|
|
78
|
+
bool is_zero = false;
|
|
79
|
+
for (; first != last; ++first) {
|
|
80
|
+
auto x = static_cast<uint64_t>(*first);
|
|
81
|
+
prod *= x;
|
|
82
|
+
// log2(0) isn't valid, so need to track it specially
|
|
83
|
+
is_zero |= (x == 0);
|
|
84
|
+
prod_log2 += c10::llvm::Log2_64_Ceil(x);
|
|
85
|
+
}
|
|
86
|
+
*out = prod;
|
|
87
|
+
// This test isn't exact, but avoids doing integer division
|
|
88
|
+
return !is_zero && (prod_log2 >= 64);
|
|
89
|
+
#endif
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
template <typename Container>
|
|
93
|
+
bool safe_multiplies_u64(const Container &c, uint64_t *out) {
|
|
94
|
+
return safe_multiplies_u64(c.begin(), c.end(), out);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
} // namespace c10
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
#define C10_USING_CUSTOM_GENERATED_MACROS
|
|
2
2
|
#pragma once
|
|
3
3
|
|
|
4
|
+
#ifndef C10_MACROS_EXPORT_H_
|
|
5
|
+
#define C10_MACROS_EXPORT_H_
|
|
6
|
+
|
|
7
|
+
#ifndef C10_USING_CUSTOM_GENERATED_MACROS
|
|
8
|
+
#include <torch/headeronly/macros/cmake_macros.h>
|
|
9
|
+
#endif // C10_USING_CUSTOM_GENERATED_MACROS
|
|
10
|
+
|
|
4
11
|
/* Header file to define the common scaffolding for exported symbols.
|
|
5
12
|
*
|
|
6
13
|
* Export is by itself a quite tricky situation to deal with, and if you are
|
|
@@ -86,3 +93,62 @@
|
|
|
86
93
|
#else
|
|
87
94
|
#define C10_API C10_IMPORT
|
|
88
95
|
#endif
|
|
96
|
+
|
|
97
|
+
// This one is being used by libtorch.so
|
|
98
|
+
#ifdef CAFFE2_BUILD_MAIN_LIB
|
|
99
|
+
#define TORCH_API C10_EXPORT
|
|
100
|
+
#else
|
|
101
|
+
#define TORCH_API C10_IMPORT
|
|
102
|
+
#endif
|
|
103
|
+
|
|
104
|
+
// You may be wondering why we have TORCH_CUDA_CPP_API and TORCH_CUDA_CU_API
|
|
105
|
+
// belonging to the same library instead of just one TORCH_CUDA_API. Well, it
|
|
106
|
+
// can indeed just be one TORCH_CUDA_API (and used to be)! TORCH_CUDA_CPP_API
|
|
107
|
+
// and TORCH_CUDA_CU_API are artifacts of when we needed a split build to
|
|
108
|
+
// avoid relocation marker linking errors. The context is as follows:
|
|
109
|
+
//
|
|
110
|
+
// Once upon a time, there _was_ only TORCH_CUDA_API. All was happy until we
|
|
111
|
+
// tried to compile PyTorch for CUDA 11.1, which ran into relocation marker
|
|
112
|
+
// issues when linking big binaries.
|
|
113
|
+
// (https://github.com/pytorch/pytorch/issues/39968) We had two choices:
|
|
114
|
+
// (1) Stop supporting so many GPU architectures
|
|
115
|
+
// (2) Do something else
|
|
116
|
+
// We chose #2 and decided to split the behemoth that was torch_cuda into two
|
|
117
|
+
// smaller libraries, one with most of the core kernel functions (torch_cuda_cu)
|
|
118
|
+
// and the other that had..well..everything else (torch_cuda_cpp). The idea was
|
|
119
|
+
// this: instead of linking our static libraries (like the hefty
|
|
120
|
+
// libcudnn_static.a) with another huge library, torch_cuda, and run into pesky
|
|
121
|
+
// relocation marker issues, we could link our static libraries to a smaller
|
|
122
|
+
// part of torch_cuda (torch_cuda_cpp) and avoid the issues.
|
|
123
|
+
|
|
124
|
+
// libtorch_cuda.so (where torch_cuda_cu and torch_cuda_cpp are a part of the
|
|
125
|
+
// same api)
|
|
126
|
+
#ifdef TORCH_CUDA_BUILD_MAIN_LIB
|
|
127
|
+
#define TORCH_CUDA_CPP_API C10_EXPORT
|
|
128
|
+
#define TORCH_CUDA_CU_API C10_EXPORT
|
|
129
|
+
#else
|
|
130
|
+
#define TORCH_CUDA_CPP_API C10_IMPORT
|
|
131
|
+
#define TORCH_CUDA_CU_API C10_IMPORT
|
|
132
|
+
#endif
|
|
133
|
+
|
|
134
|
+
#if defined(TORCH_HIP_BUILD_MAIN_LIB)
|
|
135
|
+
#define TORCH_HIP_CPP_API C10_EXPORT
|
|
136
|
+
#define TORCH_HIP_API C10_EXPORT
|
|
137
|
+
#else
|
|
138
|
+
#define TORCH_HIP_CPP_API C10_IMPORT
|
|
139
|
+
#define TORCH_HIP_API C10_IMPORT
|
|
140
|
+
#endif
|
|
141
|
+
|
|
142
|
+
#if defined(TORCH_XPU_BUILD_MAIN_LIB)
|
|
143
|
+
#define TORCH_XPU_API C10_EXPORT
|
|
144
|
+
#else
|
|
145
|
+
#define TORCH_XPU_API C10_IMPORT
|
|
146
|
+
#endif
|
|
147
|
+
|
|
148
|
+
// Enums only need to be exported on windows for non-CUDA files
|
|
149
|
+
#if defined(_WIN32) && defined(__CUDACC__)
|
|
150
|
+
#define C10_API_ENUM C10_API
|
|
151
|
+
#else
|
|
152
|
+
#define C10_API_ENUM
|
|
153
|
+
#endif
|
|
154
|
+
#endif // C10_MACROS_EXPORT_H_
|