react-native-executorch 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/libs/classes.jar +0 -0
- package/android/src/main/cpp/CMakeLists.txt +23 -14
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +4 -21
- package/common/rnexecutorch/host_objects/ModelHostObject.h +67 -51
- package/common/rnexecutorch/models/llm/LLM.cpp +24 -1
- package/common/rnexecutorch/models/llm/LLM.h +4 -1
- package/common/rnexecutorch/threads/GlobalThreadPool.h +79 -0
- package/common/rnexecutorch/threads/HighPerformanceThreadPool.h +364 -0
- package/common/rnexecutorch/threads/utils/ThreadUtils.h +29 -0
- package/common/runner/runner.cpp +9 -3
- package/common/runner/runner.h +4 -3
- package/common/runner/text_token_generator.h +28 -10
- package/lib/Error.js +53 -0
- package/lib/ThreadPool.d.ts +10 -0
- package/lib/ThreadPool.js +28 -0
- package/lib/common/Logger.d.ts +8 -0
- package/lib/common/Logger.js +19 -0
- package/lib/constants/directories.js +2 -0
- package/lib/constants/llmDefaults.d.ts +6 -0
- package/lib/constants/llmDefaults.js +16 -0
- package/lib/constants/modelUrls.d.ts +223 -0
- package/lib/constants/modelUrls.js +322 -0
- package/lib/constants/ocr/models.d.ts +882 -0
- package/lib/constants/ocr/models.js +182 -0
- package/lib/constants/ocr/symbols.js +139 -0
- package/lib/constants/sttDefaults.d.ts +28 -0
- package/lib/constants/sttDefaults.js +68 -0
- package/lib/controllers/LLMController.d.ts +47 -0
- package/lib/controllers/LLMController.js +213 -0
- package/lib/controllers/OCRController.js +67 -0
- package/lib/controllers/SpeechToTextController.d.ts +56 -0
- package/lib/controllers/SpeechToTextController.js +349 -0
- package/lib/controllers/VerticalOCRController.js +70 -0
- package/lib/hooks/computer_vision/useClassification.d.ts +15 -0
- package/lib/hooks/computer_vision/useClassification.js +7 -0
- package/lib/hooks/computer_vision/useImageEmbeddings.d.ts +15 -0
- package/lib/hooks/computer_vision/useImageEmbeddings.js +7 -0
- package/lib/hooks/computer_vision/useImageSegmentation.d.ts +38 -0
- package/lib/hooks/computer_vision/useImageSegmentation.js +7 -0
- package/lib/hooks/computer_vision/useOCR.d.ts +20 -0
- package/lib/hooks/computer_vision/useOCR.js +41 -0
- package/lib/hooks/computer_vision/useObjectDetection.d.ts +15 -0
- package/lib/hooks/computer_vision/useObjectDetection.js +7 -0
- package/lib/hooks/computer_vision/useStyleTransfer.d.ts +15 -0
- package/lib/hooks/computer_vision/useStyleTransfer.js +7 -0
- package/lib/hooks/computer_vision/useVerticalOCR.d.ts +21 -0
- package/lib/hooks/computer_vision/useVerticalOCR.js +43 -0
- package/lib/hooks/general/useExecutorchModule.d.ts +13 -0
- package/lib/hooks/general/useExecutorchModule.js +7 -0
- package/lib/hooks/natural_language_processing/useLLM.d.ts +10 -0
- package/lib/hooks/natural_language_processing/useLLM.js +78 -0
- package/lib/hooks/natural_language_processing/useSpeechToText.d.ts +27 -0
- package/lib/hooks/natural_language_processing/useSpeechToText.js +49 -0
- package/lib/hooks/natural_language_processing/useTextEmbeddings.d.ts +16 -0
- package/lib/hooks/natural_language_processing/useTextEmbeddings.js +7 -0
- package/lib/hooks/natural_language_processing/useTokenizer.d.ts +17 -0
- package/lib/hooks/natural_language_processing/useTokenizer.js +52 -0
- package/lib/hooks/useModule.js +45 -0
- package/lib/hooks/useNonStaticModule.d.ts +20 -0
- package/lib/hooks/useNonStaticModule.js +49 -0
- package/lib/index.d.ts +48 -0
- package/lib/index.js +58 -0
- package/lib/module/controllers/LLMController.js +21 -2
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +6 -2
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/modules/natural_language_processing/LLMModule.js +7 -2
- package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
- package/lib/module/types/llm.js.map +1 -1
- package/lib/modules/BaseModule.js +25 -0
- package/lib/modules/BaseNonStaticModule.js +14 -0
- package/lib/modules/computer_vision/ClassificationModule.d.ts +8 -0
- package/lib/modules/computer_vision/ClassificationModule.js +17 -0
- package/lib/modules/computer_vision/ImageEmbeddingsModule.d.ts +8 -0
- package/lib/modules/computer_vision/ImageEmbeddingsModule.js +17 -0
- package/lib/modules/computer_vision/ImageSegmentationModule.d.ts +11 -0
- package/lib/modules/computer_vision/ImageSegmentationModule.js +27 -0
- package/lib/modules/computer_vision/OCRModule.d.ts +14 -0
- package/lib/modules/computer_vision/OCRModule.js +17 -0
- package/lib/modules/computer_vision/ObjectDetectionModule.d.ts +9 -0
- package/lib/modules/computer_vision/ObjectDetectionModule.js +17 -0
- package/lib/modules/computer_vision/StyleTransferModule.d.ts +8 -0
- package/lib/modules/computer_vision/StyleTransferModule.js +17 -0
- package/lib/modules/computer_vision/VerticalOCRModule.d.ts +14 -0
- package/lib/modules/computer_vision/VerticalOCRModule.js +19 -0
- package/lib/modules/general/ExecutorchModule.d.ts +7 -0
- package/lib/modules/general/ExecutorchModule.js +14 -0
- package/lib/modules/natural_language_processing/LLMModule.d.ts +28 -0
- package/lib/modules/natural_language_processing/LLMModule.js +45 -0
- package/lib/modules/natural_language_processing/SpeechToTextModule.d.ts +24 -0
- package/lib/modules/natural_language_processing/SpeechToTextModule.js +36 -0
- package/lib/modules/natural_language_processing/TextEmbeddingsModule.d.ts +9 -0
- package/lib/modules/natural_language_processing/TextEmbeddingsModule.js +21 -0
- package/lib/modules/natural_language_processing/TokenizerModule.d.ts +12 -0
- package/lib/modules/natural_language_processing/TokenizerModule.js +30 -0
- package/lib/native/NativeETInstaller.js +2 -0
- package/lib/native/NativeOCR.js +2 -0
- package/lib/native/NativeVerticalOCR.js +2 -0
- package/lib/native/RnExecutorchModules.d.ts +7 -0
- package/lib/native/RnExecutorchModules.js +18 -0
- package/lib/tsconfig.tsbuildinfo +1 -0
- package/lib/types/common.d.ts +32 -0
- package/lib/types/common.js +25 -0
- package/lib/types/imageSegmentation.js +26 -0
- package/lib/types/llm.d.ts +46 -0
- package/lib/types/llm.js +9 -0
- package/lib/types/objectDetection.js +94 -0
- package/lib/types/ocr.js +1 -0
- package/lib/types/stt.d.ts +94 -0
- package/lib/types/stt.js +85 -0
- package/lib/typescript/controllers/LLMController.d.ts +4 -2
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +4 -2
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
- package/lib/typescript/types/llm.d.ts +7 -1
- package/lib/typescript/types/llm.d.ts.map +1 -1
- package/lib/utils/ResourceFetcher.d.ts +24 -0
- package/lib/utils/ResourceFetcher.js +305 -0
- package/lib/utils/ResourceFetcherUtils.d.ts +54 -0
- package/lib/utils/ResourceFetcherUtils.js +127 -0
- package/lib/utils/llm.d.ts +6 -0
- package/lib/utils/llm.js +72 -0
- package/lib/utils/stt.js +21 -0
- package/package.json +3 -1
- package/react-native-executorch.podspec +12 -31
- package/src/controllers/LLMController.ts +29 -5
- package/src/hooks/natural_language_processing/useLLM.ts +15 -1
- package/src/modules/natural_language_processing/LLMModule.ts +14 -2
- package/src/types/llm.ts +8 -0
- package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a +0 -0
- package/third-party/include/c10/macros/Export.h +2 -86
- package/third-party/include/c10/macros/Macros.h +28 -5
- package/third-party/include/c10/util/BFloat16-inl.h +1 -4
- package/third-party/include/c10/util/BFloat16.h +5 -8
- package/third-party/include/c10/util/Half.h +5 -0
- package/third-party/include/c10/util/bit_cast.h +1 -1
- package/third-party/include/c10/util/complex.h +639 -0
- package/third-party/include/c10/util/complex_math.h +399 -0
- package/third-party/include/c10/util/complex_utils.h +41 -0
- package/third-party/include/c10/util/irange.h +2 -2
- package/third-party/include/c10/util/overflows.h +95 -0
- package/third-party/include/executorch/ExecuTorchError.h +75 -0
- package/third-party/include/executorch/ExecuTorchModule.h +115 -11
- package/third-party/include/executorch/ExecuTorchTensor.h +731 -51
- package/third-party/include/executorch/ExecuTorchValue.h +61 -9
- package/third-party/include/executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h +181 -0
- package/third-party/include/executorch/extension/kernel_util/meta_programming.h +108 -0
- package/third-party/include/executorch/extension/kernel_util/type_list.h +137 -0
- package/third-party/include/executorch/extension/module/bundled_module.h +131 -0
- package/third-party/include/executorch/extension/module/module.h +46 -20
- package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +1 -3
- package/third-party/include/executorch/extension/threadpool/threadpool.h +1 -3
- package/third-party/include/executorch/extension/threadpool/threadpool_guard.h +35 -0
- package/third-party/include/executorch/runtime/backend/backend_execution_context.h +3 -3
- package/third-party/include/executorch/runtime/backend/backend_init_context.h +12 -6
- package/third-party/include/executorch/runtime/backend/backend_option_context.h +34 -0
- package/third-party/include/executorch/runtime/backend/interface.h +70 -9
- package/third-party/include/executorch/runtime/backend/options.h +206 -0
- package/third-party/include/executorch/runtime/core/evalue.h +19 -25
- package/third-party/include/executorch/runtime/core/event_tracer.h +32 -17
- package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +23 -14
- package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +32 -9
- package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +3 -2
- package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +43 -75
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +88 -87
- package/third-party/include/executorch/runtime/core/function_ref.h +100 -0
- package/third-party/include/executorch/runtime/core/named_data_map.h +14 -14
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +2 -86
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +28 -5
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -4
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +5 -8
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +5 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -1
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +639 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_math.h +399 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_utils.h +41 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +2 -2
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/overflows.h +95 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +88 -0
- package/third-party/include/executorch/runtime/core/portable_type/complex.h +6 -29
- package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +20 -0
- package/third-party/include/executorch/runtime/core/span.h +4 -0
- package/third-party/include/executorch/runtime/core/tag.h +19 -0
- package/third-party/include/executorch/runtime/core/tensor_layout.h +2 -2
- package/third-party/include/executorch/runtime/executor/method.h +15 -3
- package/third-party/include/executorch/runtime/executor/method_meta.h +34 -5
- package/third-party/include/executorch/runtime/executor/program.h +3 -4
- package/third-party/include/executorch/runtime/executor/pte_data_map.h +9 -8
- package/third-party/include/executorch/runtime/executor/tensor_parser.h +14 -13
- package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +5 -5
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +21 -19
- package/third-party/include/executorch/runtime/platform/compiler.h +8 -0
- package/third-party/include/executorch/runtime/platform/platform.h +126 -0
- package/third-party/include/headeronly/macros/Export.h +88 -0
- package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
- package/third-party/include/torch/headeronly/macros/Export.h +88 -0
- package/third-party/ios/ExecutorchLib.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/libs/cpuinfo/libcpuinfo.a +0 -0
- package/third-party/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a +0 -0
- package/third-party/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a +0 -0
- package/ios/libs/executorch/libbackend_coreml_ios.a +0 -0
- package/ios/libs/executorch/libbackend_coreml_simulator.a +0 -0
- package/ios/libs/executorch/libbackend_mps_ios.a +0 -0
- package/ios/libs/executorch/libbackend_mps_simulator.a +0 -0
- package/ios/libs/executorch/libbackend_xnnpack_ios.a +0 -0
- package/ios/libs/executorch/libbackend_xnnpack_simulator.a +0 -0
- package/ios/libs/executorch/libexecutorch_ios.a +0 -0
- package/ios/libs/executorch/libexecutorch_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_custom_ios.a +0 -0
- package/ios/libs/executorch/libkernels_custom_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_optimized_ios.a +0 -0
- package/ios/libs/executorch/libkernels_optimized_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_portable_ios.a +0 -0
- package/ios/libs/executorch/libkernels_portable_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_quantized_ios.a +0 -0
- package/ios/libs/executorch/libkernels_quantized_simulator.a +0 -0
- package/third-party/ios/ios.toolchain.cmake +0 -1122
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
|
@@ -22,8 +22,7 @@
|
|
|
22
22
|
#include <executorch/runtime/platform/compiler.h>
|
|
23
23
|
|
|
24
24
|
namespace executorch {
|
|
25
|
-
namespace
|
|
26
|
-
|
|
25
|
+
namespace ET_RUNTIME_NAMESPACE {
|
|
27
26
|
/**
|
|
28
27
|
* Interface to access and retrieve data via name.
|
|
29
28
|
* See executorch/extension/flat_tensor/ for an example.
|
|
@@ -32,33 +31,34 @@ class ET_EXPERIMENTAL NamedDataMap {
|
|
|
32
31
|
public:
|
|
33
32
|
virtual ~NamedDataMap() = default;
|
|
34
33
|
/**
|
|
35
|
-
* Get
|
|
34
|
+
* Get tensor_layout by key.
|
|
36
35
|
*
|
|
37
36
|
* @param key The name of the tensor.
|
|
38
|
-
* @return Result containing TensorLayout
|
|
37
|
+
* @return Result containing TensorLayout.
|
|
39
38
|
*/
|
|
40
|
-
ET_NODISCARD virtual Result<const
|
|
41
|
-
|
|
39
|
+
ET_NODISCARD virtual Result<const TensorLayout>
|
|
40
|
+
get_tensor_layout(executorch::aten::string_view key) const = 0;
|
|
42
41
|
/**
|
|
43
42
|
* Get data by key.
|
|
44
43
|
*
|
|
45
44
|
* @param key Name of the data.
|
|
46
|
-
* @return Result containing a FreeableBuffer
|
|
45
|
+
* @return Result containing a FreeableBuffer.
|
|
47
46
|
*/
|
|
48
47
|
ET_NODISCARD virtual Result<FreeableBuffer>
|
|
49
|
-
get_data(
|
|
48
|
+
get_data(executorch::aten::string_view key) const = 0;
|
|
50
49
|
|
|
51
50
|
/**
|
|
52
51
|
* Loads data corresponding to the key into the provided buffer.
|
|
53
52
|
*
|
|
54
53
|
* @param key The name of the data.
|
|
55
|
-
* @param size The number of bytes to load. Use `
|
|
56
|
-
* size of the data for a given key.
|
|
54
|
+
* @param size The number of bytes to load. Use `get_tensor_layout` to
|
|
55
|
+
* retrieve the size of the data for a given key.
|
|
57
56
|
* @param buffer The buffer to load the data into. Must point to at least
|
|
58
57
|
* `size` bytes of memory.
|
|
59
58
|
* @returns an Error indicating if the load was successful.
|
|
60
59
|
*/
|
|
61
|
-
ET_NODISCARD virtual Error load_data_into(
|
|
60
|
+
ET_NODISCARD virtual Error load_data_into(executorch::aten::string_view key,
|
|
61
|
+
void *buffer,
|
|
62
62
|
size_t size) const = 0;
|
|
63
63
|
|
|
64
64
|
/**
|
|
@@ -66,7 +66,7 @@ public:
|
|
|
66
66
|
*
|
|
67
67
|
* @return Result containing the number of keys.
|
|
68
68
|
*/
|
|
69
|
-
ET_NODISCARD virtual Result<
|
|
69
|
+
ET_NODISCARD virtual Result<uint32_t> get_num_keys() const = 0;
|
|
70
70
|
|
|
71
71
|
/**
|
|
72
72
|
* Get the key at the given index.
|
|
@@ -75,10 +75,10 @@ public:
|
|
|
75
75
|
* @return Result containing the key at the given index. Note: the returned
|
|
76
76
|
* pointer is only valid for the lifetime of the DataMap.
|
|
77
77
|
*/
|
|
78
|
-
ET_NODISCARD virtual Result<const char *> get_key(
|
|
78
|
+
ET_NODISCARD virtual Result<const char *> get_key(uint32_t index) const = 0;
|
|
79
79
|
};
|
|
80
80
|
|
|
81
|
-
} // namespace
|
|
81
|
+
} // namespace ET_RUNTIME_NAMESPACE
|
|
82
82
|
} // namespace executorch
|
|
83
83
|
|
|
84
84
|
#ifdef __GNUC__
|
|
@@ -2,95 +2,11 @@
|
|
|
2
2
|
#ifndef C10_MACROS_EXPORT_H_
|
|
3
3
|
#define C10_MACROS_EXPORT_H_
|
|
4
4
|
|
|
5
|
-
/* Header file to define the common scaffolding for exported symbols.
|
|
6
|
-
*
|
|
7
|
-
* Export is by itself a quite tricky situation to deal with, and if you are
|
|
8
|
-
* hitting this file, make sure you start with the background here:
|
|
9
|
-
* - Linux: https://gcc.gnu.org/wiki/Visibility
|
|
10
|
-
* - Windows:
|
|
11
|
-
* https://docs.microsoft.com/en-us/cpp/cpp/dllexport-dllimport?view=vs-2017
|
|
12
|
-
*
|
|
13
|
-
* Do NOT include this file directly. Instead, use c10/macros/Macros.h
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
// You do not need to edit this part of file unless you are changing the core
|
|
17
|
-
// pytorch export abstractions.
|
|
18
|
-
//
|
|
19
|
-
// This part defines the C10 core export and import macros. This is controlled
|
|
20
|
-
// by whether we are building shared libraries or not, which is determined
|
|
21
|
-
// during build time and codified in c10/core/cmake_macros.h.
|
|
22
|
-
// When the library is built as a shared lib, EXPORT and IMPORT will contain
|
|
23
|
-
// visibility attributes. If it is being built as a static lib, then EXPORT
|
|
24
|
-
// and IMPORT basically have no effect.
|
|
25
|
-
|
|
26
|
-
// As a rule of thumb, you should almost NEVER mix static and shared builds for
|
|
27
|
-
// libraries that depend on c10. AKA, if c10 is built as a static library, we
|
|
28
|
-
// recommend everything dependent on c10 to be built statically. If c10 is built
|
|
29
|
-
// as a shared library, everything dependent on it should be built as shared. In
|
|
30
|
-
// the PyTorch project, all native libraries shall use the macro
|
|
31
|
-
// C10_BUILD_SHARED_LIB to check whether pytorch is building shared or static
|
|
32
|
-
// libraries.
|
|
33
|
-
|
|
34
|
-
// For build systems that do not directly depend on CMake and directly build
|
|
35
|
-
// from the source directory (such as Buck), one may not have a cmake_macros.h
|
|
36
|
-
// file at all. In this case, the build system is responsible for providing
|
|
37
|
-
// correct macro definitions corresponding to the cmake_macros.h.in file.
|
|
38
|
-
//
|
|
39
|
-
// In such scenarios, one should define the macro
|
|
40
|
-
// C10_USING_CUSTOM_GENERATED_MACROS
|
|
41
|
-
// to inform this header that it does not need to include the cmake_macros.h
|
|
42
|
-
// file.
|
|
43
|
-
|
|
44
5
|
#ifndef C10_USING_CUSTOM_GENERATED_MACROS
|
|
45
6
|
#include <c10/macros/cmake_macros.h>
|
|
46
7
|
#endif // C10_USING_CUSTOM_GENERATED_MACROS
|
|
47
8
|
|
|
48
|
-
#
|
|
49
|
-
#define C10_HIDDEN
|
|
50
|
-
#if defined(C10_BUILD_SHARED_LIBS)
|
|
51
|
-
#define C10_EXPORT __declspec(dllexport)
|
|
52
|
-
#define C10_IMPORT __declspec(dllimport)
|
|
53
|
-
#else
|
|
54
|
-
#define C10_EXPORT
|
|
55
|
-
#define C10_IMPORT
|
|
56
|
-
#endif
|
|
57
|
-
#else // _WIN32
|
|
58
|
-
#if defined(__GNUC__)
|
|
59
|
-
#define C10_EXPORT __attribute__((__visibility__("default")))
|
|
60
|
-
#define C10_HIDDEN __attribute__((__visibility__("hidden")))
|
|
61
|
-
#else // defined(__GNUC__)
|
|
62
|
-
#define C10_EXPORT
|
|
63
|
-
#define C10_HIDDEN
|
|
64
|
-
#endif // defined(__GNUC__)
|
|
65
|
-
#define C10_IMPORT C10_EXPORT
|
|
66
|
-
#endif // _WIN32
|
|
67
|
-
|
|
68
|
-
#ifdef NO_EXPORT
|
|
69
|
-
#undef C10_EXPORT
|
|
70
|
-
#define C10_EXPORT
|
|
71
|
-
#endif
|
|
72
|
-
|
|
73
|
-
// Definition of an adaptive XX_API macro, that depends on whether you are
|
|
74
|
-
// building the library itself or not, routes to XX_EXPORT and XX_IMPORT.
|
|
75
|
-
// Basically, you will need to do this for each shared library that you are
|
|
76
|
-
// building, and the instruction is as follows: assuming that you are building
|
|
77
|
-
// a library called libawesome.so. You should:
|
|
78
|
-
// (1) for your cmake target (usually done by "add_library(awesome, ...)"),
|
|
79
|
-
// define a macro called AWESOME_BUILD_MAIN_LIB using
|
|
80
|
-
// target_compile_options.
|
|
81
|
-
// (2) define the AWESOME_API macro similar to the one below.
|
|
82
|
-
// And in the source file of your awesome library, use AWESOME_API to
|
|
83
|
-
// annotate public symbols.
|
|
84
|
-
|
|
85
|
-
// Here, for the C10 library, we will define the macro C10_API for both import
|
|
86
|
-
// and export.
|
|
87
|
-
|
|
88
|
-
// This one is being used by libc10.so
|
|
89
|
-
#ifdef C10_BUILD_MAIN_LIB
|
|
90
|
-
#define C10_API C10_EXPORT
|
|
91
|
-
#else
|
|
92
|
-
#define C10_API C10_IMPORT
|
|
93
|
-
#endif
|
|
9
|
+
#include <torch/headeronly/macros/Export.h>
|
|
94
10
|
|
|
95
11
|
// This one is being used by libtorch.so
|
|
96
12
|
#ifdef CAFFE2_BUILD_MAIN_LIB
|
|
@@ -160,4 +76,4 @@
|
|
|
160
76
|
#define C10_API_ENUM
|
|
161
77
|
#endif
|
|
162
78
|
|
|
163
|
-
#endif //
|
|
79
|
+
#endif // C10_MACROS_EXPORT_H_
|
|
@@ -242,7 +242,7 @@ using namespace c10::xpu;
|
|
|
242
242
|
#ifdef __HIPCC__
|
|
243
243
|
// Unlike CUDA, HIP requires a HIP header to be included for __host__ to work.
|
|
244
244
|
// We do this #include here so that C10_HOST_DEVICE and friends will Just Work.
|
|
245
|
-
// See https://github.com/ROCm
|
|
245
|
+
// See https://github.com/ROCm/hip/issues/441
|
|
246
246
|
#include <hip/hip_runtime.h>
|
|
247
247
|
#endif
|
|
248
248
|
|
|
@@ -390,11 +390,24 @@ __host__ __device__
|
|
|
390
390
|
#endif // __SYCL_DEVICE_ONLY__
|
|
391
391
|
}
|
|
392
392
|
#endif // NDEBUG
|
|
393
|
-
// ROCm
|
|
393
|
+
// ROCm disables kernel assert by default for performance considerations.
|
|
394
|
+
// Though ROCm supports __assert_fail, it uses kernel printf which has
|
|
395
|
+
// a non-negligible performance impact even if the assert condition is
|
|
396
|
+
// never triggered. We choose to use abort() instead which will still
|
|
397
|
+
// terminate the application but without a more useful error message.
|
|
394
398
|
#if !defined(C10_USE_ROCM_KERNEL_ASSERT) and defined(USE_ROCM)
|
|
395
|
-
#define CUDA_KERNEL_ASSERT(cond)
|
|
396
|
-
|
|
397
|
-
|
|
399
|
+
#define CUDA_KERNEL_ASSERT(cond) \
|
|
400
|
+
if C10_UNLIKELY (!(cond)) { \
|
|
401
|
+
abort(); \
|
|
402
|
+
}
|
|
403
|
+
#define CUDA_KERNEL_ASSERT_MSG(cond, msg) \
|
|
404
|
+
if C10_UNLIKELY (!(cond)) { \
|
|
405
|
+
abort(); \
|
|
406
|
+
}
|
|
407
|
+
#define SYCL_KERNEL_ASSERT(cond) \
|
|
408
|
+
if C10_UNLIKELY (!(cond)) { \
|
|
409
|
+
abort(); \
|
|
410
|
+
}
|
|
398
411
|
#else
|
|
399
412
|
#define CUDA_KERNEL_ASSERT(cond) \
|
|
400
413
|
if (C10_UNLIKELY(!(cond))) { \
|
|
@@ -494,4 +507,14 @@ __host__ __device__
|
|
|
494
507
|
|
|
495
508
|
#endif
|
|
496
509
|
|
|
510
|
+
// This macro is used to find older C++ compilers
|
|
511
|
+
// that don't support move optimization for return values.
|
|
512
|
+
|
|
513
|
+
#if (defined(__GNUC__) && __GNUC__ < 13) || \
|
|
514
|
+
(defined(__clang_major__) && __clang_major__ < 13)
|
|
515
|
+
#define C10_RETURN_MOVE_IF_OLD_COMPILER 1
|
|
516
|
+
#else
|
|
517
|
+
#define C10_RETURN_MOVE_IF_OLD_COMPILER 0
|
|
518
|
+
#endif
|
|
519
|
+
|
|
497
520
|
#endif // C10_MACROS_MACROS_H_
|
package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h
CHANGED
|
@@ -10,14 +10,11 @@ C10_CLANG_DIAGNOSTIC_PUSH()
|
|
|
10
10
|
C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
|
|
11
11
|
#endif
|
|
12
12
|
|
|
13
|
-
#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS)
|
|
14
13
|
#if defined(CL_SYCL_LANGUAGE_VERSION)
|
|
15
14
|
#include <CL/sycl.hpp> // for SYCL 1.2.1
|
|
16
|
-
#
|
|
15
|
+
#elif defined(SYCL_LANGUAGE_VERSION)
|
|
17
16
|
#include <sycl/sycl.hpp> // for SYCL 2020
|
|
18
17
|
#endif
|
|
19
|
-
#include <ext/oneapi/bfloat16.hpp>
|
|
20
|
-
#endif
|
|
21
18
|
|
|
22
19
|
namespace c10 {
|
|
23
20
|
|
|
@@ -14,14 +14,11 @@
|
|
|
14
14
|
#include <cuda_bf16.h>
|
|
15
15
|
#endif
|
|
16
16
|
|
|
17
|
-
#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS)
|
|
18
17
|
#if defined(CL_SYCL_LANGUAGE_VERSION)
|
|
19
18
|
#include <CL/sycl.hpp> // for SYCL 1.2.1
|
|
20
|
-
#
|
|
19
|
+
#elif defined(SYCL_LANGUAGE_VERSION)
|
|
21
20
|
#include <sycl/sycl.hpp> // for SYCL 2020
|
|
22
21
|
#endif
|
|
23
|
-
#include <ext/oneapi/bfloat16.hpp>
|
|
24
|
-
#endif
|
|
25
22
|
|
|
26
23
|
namespace c10 {
|
|
27
24
|
|
|
@@ -31,7 +28,7 @@ inline C10_HOST_DEVICE float f32_from_bits(uint16_t src) {
|
|
|
31
28
|
uint32_t tmp = src;
|
|
32
29
|
tmp <<= 16;
|
|
33
30
|
|
|
34
|
-
#if defined(USE_ROCM)
|
|
31
|
+
#if defined(USE_ROCM) && defined(__HIPCC__)
|
|
35
32
|
float *tempRes;
|
|
36
33
|
|
|
37
34
|
// We should be using memcpy in order to respect the strict aliasing rule
|
|
@@ -48,7 +45,7 @@ inline C10_HOST_DEVICE float f32_from_bits(uint16_t src) {
|
|
|
48
45
|
inline C10_HOST_DEVICE uint16_t bits_from_f32(float src) {
|
|
49
46
|
uint32_t res = 0;
|
|
50
47
|
|
|
51
|
-
#if defined(USE_ROCM)
|
|
48
|
+
#if defined(USE_ROCM) && defined(__HIPCC__)
|
|
52
49
|
// We should be using memcpy in order to respect the strict aliasing rule
|
|
53
50
|
// but it fails in the HIP environment.
|
|
54
51
|
uint32_t *tempRes = reinterpret_cast<uint32_t *>(&src);
|
|
@@ -61,7 +58,7 @@ inline C10_HOST_DEVICE uint16_t bits_from_f32(float src) {
|
|
|
61
58
|
}
|
|
62
59
|
|
|
63
60
|
inline C10_HOST_DEVICE uint16_t round_to_nearest_even(float src) {
|
|
64
|
-
#if defined(USE_ROCM)
|
|
61
|
+
#if defined(USE_ROCM) && defined(__HIPCC__)
|
|
65
62
|
if (src != src) {
|
|
66
63
|
#elif defined(_MSC_VER)
|
|
67
64
|
if (isnan(src)) {
|
|
@@ -87,7 +84,7 @@ struct alignas(2) BFloat16 {
|
|
|
87
84
|
uint16_t x;
|
|
88
85
|
|
|
89
86
|
// HIP wants __host__ __device__ tag, CUDA does not
|
|
90
|
-
#if defined(USE_ROCM)
|
|
87
|
+
#if defined(USE_ROCM) && defined(__HIPCC__)
|
|
91
88
|
C10_HOST_DEVICE BFloat16() = default;
|
|
92
89
|
#else
|
|
93
90
|
BFloat16() = default;
|
|
@@ -242,7 +242,12 @@ C10_HOST_DEVICE inline float fp16_ieee_to_fp32_value(uint16_t h) {
|
|
|
242
242
|
// const float exp_scale = 0x1.0p-112f;
|
|
243
243
|
constexpr uint32_t scale_bits = (uint32_t)15 << 23;
|
|
244
244
|
float exp_scale_val = 0;
|
|
245
|
+
#if defined(_MSC_VER) && defined(__clang__)
|
|
246
|
+
__builtin_memcpy(&exp_scale_val, &scale_bits, sizeof(exp_scale_val));
|
|
247
|
+
#else
|
|
245
248
|
std::memcpy(&exp_scale_val, &scale_bits, sizeof(exp_scale_val));
|
|
249
|
+
#endif
|
|
250
|
+
|
|
246
251
|
const float exp_scale = exp_scale_val;
|
|
247
252
|
const float normalized_value =
|
|
248
253
|
fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#include <cstring>
|
|
4
4
|
#include <type_traits>
|
|
5
5
|
|
|
6
|
-
#if __has_include(<bit>) && (
|
|
6
|
+
#if __has_include(<bit>) && (defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L)
|
|
7
7
|
#include <bit>
|
|
8
8
|
#define C10_HAVE_STD_BIT_CAST 1
|
|
9
9
|
#else
|