react-native-executorch 0.5.15 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (277) hide show
  1. package/README.md +42 -36
  2. package/android/CMakeLists.txt +13 -25
  3. package/android/build.gradle +2 -3
  4. package/android/libs/classes.jar +0 -0
  5. package/android/src/main/cpp/CMakeLists.txt +2 -1
  6. package/common/rnexecutorch/RnExecutorchInstaller.cpp +18 -0
  7. package/common/rnexecutorch/TokenizerModule.cpp +3 -3
  8. package/common/rnexecutorch/data_processing/Numerical.cpp +31 -23
  9. package/common/rnexecutorch/data_processing/Numerical.h +6 -1
  10. package/common/rnexecutorch/data_processing/dsp.cpp +0 -46
  11. package/common/rnexecutorch/host_objects/JsiConversions.h +16 -0
  12. package/common/rnexecutorch/host_objects/ModelHostObject.h +26 -11
  13. package/common/rnexecutorch/jsi/OwningArrayBuffer.h +19 -2
  14. package/common/rnexecutorch/metaprogramming/TypeConcepts.h +0 -20
  15. package/common/rnexecutorch/models/BaseModel.cpp +12 -11
  16. package/common/rnexecutorch/models/BaseModel.h +18 -10
  17. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +3 -11
  18. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +0 -1
  19. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +6 -12
  20. package/common/rnexecutorch/models/llm/LLM.cpp +25 -8
  21. package/common/rnexecutorch/models/llm/LLM.h +4 -4
  22. package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +1 -1
  23. package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.cpp +7 -4
  24. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +8 -13
  25. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +1 -3
  26. package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +12 -19
  27. package/common/rnexecutorch/models/speech_to_text/asr/ASR.h +4 -5
  28. package/common/rnexecutorch/models/text_to_image/Constants.h +9 -0
  29. package/common/rnexecutorch/models/text_to_image/Decoder.cpp +32 -0
  30. package/common/rnexecutorch/models/text_to_image/Decoder.h +24 -0
  31. package/common/rnexecutorch/models/text_to_image/Encoder.cpp +44 -0
  32. package/common/rnexecutorch/models/text_to_image/Encoder.h +32 -0
  33. package/common/rnexecutorch/models/text_to_image/Scheduler.cpp +152 -0
  34. package/common/rnexecutorch/models/text_to_image/Scheduler.h +41 -0
  35. package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +141 -0
  36. package/common/rnexecutorch/models/text_to_image/TextToImage.h +64 -0
  37. package/common/rnexecutorch/models/text_to_image/UNet.cpp +38 -0
  38. package/common/rnexecutorch/models/text_to_image/UNet.h +28 -0
  39. package/common/rnexecutorch/models/voice_activity_detection/Constants.h +27 -0
  40. package/common/rnexecutorch/models/voice_activity_detection/Types.h +12 -0
  41. package/common/rnexecutorch/models/voice_activity_detection/Utils.cpp +15 -0
  42. package/common/rnexecutorch/models/voice_activity_detection/Utils.h +13 -0
  43. package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +160 -0
  44. package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +36 -0
  45. package/common/rnexecutorch/tests/CMakeLists.txt +30 -0
  46. package/common/rnexecutorch/tests/NumericalTest.cpp +110 -0
  47. package/common/rnexecutorch/tests/README.md +30 -13
  48. package/common/rnexecutorch/threads/GlobalThreadPool.h +4 -0
  49. package/common/runner/arange_util.cpp +44 -0
  50. package/common/runner/arange_util.h +37 -0
  51. package/common/runner/constants.h +28 -0
  52. package/common/runner/io_manager.h +240 -0
  53. package/common/runner/irunner.h +87 -16
  54. package/common/runner/kernel_includes.h +23 -0
  55. package/common/runner/runner.cpp +151 -66
  56. package/common/runner/runner.h +39 -22
  57. package/common/runner/sampler.cpp +8 -1
  58. package/common/runner/sampler.h +4 -2
  59. package/common/runner/stats.h +1 -4
  60. package/common/runner/text_decoder_runner.cpp +26 -12
  61. package/common/runner/text_decoder_runner.h +52 -31
  62. package/common/runner/text_prefiller.cpp +46 -12
  63. package/common/runner/text_prefiller.h +38 -4
  64. package/common/runner/text_token_generator.h +51 -26
  65. package/common/runner/util.h +53 -8
  66. package/ios/RnExecutorch.xcodeproj/project.pbxproj +0 -23
  67. package/lib/module/Error.js +1 -0
  68. package/lib/module/Error.js.map +1 -1
  69. package/lib/module/constants/directories.js +1 -1
  70. package/lib/module/constants/directories.js.map +1 -1
  71. package/lib/module/constants/modelUrls.js +32 -1
  72. package/lib/module/constants/modelUrls.js.map +1 -1
  73. package/lib/module/constants/ocr/models.js +7 -7
  74. package/lib/module/constants/ocr/models.js.map +1 -1
  75. package/lib/module/constants/ocr/symbols.js +3 -2
  76. package/lib/module/constants/ocr/symbols.js.map +1 -1
  77. package/lib/module/controllers/LLMController.js +10 -1
  78. package/lib/module/controllers/LLMController.js.map +1 -1
  79. package/lib/module/controllers/OCRController.js +3 -3
  80. package/lib/module/controllers/OCRController.js.map +1 -1
  81. package/lib/module/controllers/VerticalOCRController.js +2 -2
  82. package/lib/module/controllers/VerticalOCRController.js.map +1 -1
  83. package/lib/module/hooks/computer_vision/useOCR.js +3 -3
  84. package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
  85. package/lib/module/hooks/{useNonStaticModule.js → computer_vision/useTextToImage.js} +21 -16
  86. package/lib/module/hooks/computer_vision/useTextToImage.js.map +1 -0
  87. package/lib/module/hooks/computer_vision/useVerticalOCR.js +3 -3
  88. package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
  89. package/lib/module/hooks/natural_language_processing/useLLM.js +3 -3
  90. package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
  91. package/lib/module/hooks/natural_language_processing/useTokenizer.js +5 -5
  92. package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
  93. package/lib/module/hooks/natural_language_processing/useVAD.js +13 -0
  94. package/lib/module/hooks/natural_language_processing/useVAD.js.map +1 -0
  95. package/lib/module/index.js +7 -2
  96. package/lib/module/index.js.map +1 -1
  97. package/lib/module/modules/computer_vision/OCRModule.js +2 -2
  98. package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
  99. package/lib/module/modules/computer_vision/TextToImageModule.js +48 -0
  100. package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -0
  101. package/lib/module/modules/computer_vision/VerticalOCRModule.js +2 -2
  102. package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
  103. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +7 -4
  104. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
  105. package/lib/module/modules/natural_language_processing/VADModule.js +19 -0
  106. package/lib/module/modules/natural_language_processing/VADModule.js.map +1 -0
  107. package/lib/module/types/llm.js.map +1 -1
  108. package/lib/module/types/vad.js +2 -0
  109. package/lib/module/types/vad.js.map +1 -0
  110. package/lib/module/utils/ResourceFetcher.js +2 -1
  111. package/lib/module/utils/ResourceFetcher.js.map +1 -1
  112. package/lib/module/utils/ResourceFetcherUtils.js +6 -6
  113. package/lib/module/utils/ResourceFetcherUtils.js.map +1 -1
  114. package/lib/typescript/Error.d.ts +1 -0
  115. package/lib/typescript/Error.d.ts.map +1 -1
  116. package/lib/typescript/constants/modelUrls.d.ts +23 -0
  117. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  118. package/lib/typescript/constants/ocr/symbols.d.ts +1 -1
  119. package/lib/typescript/constants/ocr/symbols.d.ts.map +1 -1
  120. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  121. package/lib/typescript/controllers/OCRController.d.ts +1 -1
  122. package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
  123. package/lib/typescript/controllers/VerticalOCRController.d.ts +1 -1
  124. package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
  125. package/lib/typescript/hooks/computer_vision/useOCR.d.ts +1 -1
  126. package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
  127. package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts +22 -0
  128. package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts.map +1 -0
  129. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +1 -1
  130. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
  131. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
  132. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +2 -2
  133. package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts +16 -0
  134. package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts.map +1 -0
  135. package/lib/typescript/index.d.ts +8 -1
  136. package/lib/typescript/index.d.ts.map +1 -1
  137. package/lib/typescript/modules/computer_vision/OCRModule.d.ts +1 -1
  138. package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
  139. package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts +16 -0
  140. package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -0
  141. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +1 -1
  142. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
  143. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +3 -2
  144. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
  145. package/lib/typescript/modules/natural_language_processing/VADModule.d.ts +10 -0
  146. package/lib/typescript/modules/natural_language_processing/VADModule.d.ts.map +1 -0
  147. package/lib/typescript/types/llm.d.ts +2 -0
  148. package/lib/typescript/types/llm.d.ts.map +1 -1
  149. package/lib/typescript/types/vad.d.ts +5 -0
  150. package/lib/typescript/types/vad.d.ts.map +1 -0
  151. package/lib/typescript/utils/ResourceFetcher.d.ts +29 -0
  152. package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
  153. package/lib/typescript/utils/ResourceFetcherUtils.d.ts +2 -2
  154. package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -1
  155. package/package.json +11 -8
  156. package/react-native-executorch.podspec +9 -9
  157. package/src/Error.ts +1 -0
  158. package/src/constants/directories.ts +1 -1
  159. package/src/constants/modelUrls.ts +36 -1
  160. package/src/constants/ocr/models.ts +7 -7
  161. package/src/constants/ocr/symbols.ts +3 -2
  162. package/src/controllers/LLMController.ts +12 -1
  163. package/src/controllers/OCRController.ts +3 -3
  164. package/src/controllers/VerticalOCRController.ts +2 -2
  165. package/src/hooks/computer_vision/useOCR.ts +4 -5
  166. package/src/hooks/computer_vision/useTextToImage.ts +92 -0
  167. package/src/hooks/computer_vision/useVerticalOCR.ts +4 -5
  168. package/src/hooks/natural_language_processing/useLLM.ts +3 -4
  169. package/src/hooks/natural_language_processing/useTokenizer.ts +5 -5
  170. package/src/hooks/natural_language_processing/useVAD.ts +15 -0
  171. package/src/index.ts +20 -1
  172. package/src/modules/computer_vision/OCRModule.ts +2 -2
  173. package/src/modules/computer_vision/TextToImageModule.ts +93 -0
  174. package/src/modules/computer_vision/VerticalOCRModule.ts +2 -2
  175. package/src/modules/natural_language_processing/SpeechToTextModule.ts +8 -4
  176. package/src/modules/natural_language_processing/VADModule.ts +27 -0
  177. package/src/types/llm.ts +2 -0
  178. package/src/types/vad.ts +4 -0
  179. package/src/utils/ResourceFetcher.ts +2 -1
  180. package/src/utils/ResourceFetcherUtils.ts +8 -8
  181. package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
  182. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  183. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  184. package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
  185. package/third-party/include/c10/macros/Export.h +0 -78
  186. package/third-party/include/c10/macros/Macros.h +1 -520
  187. package/third-party/include/c10/util/BFloat16-inl.h +1 -339
  188. package/third-party/include/c10/util/BFloat16.h +1 -122
  189. package/third-party/include/c10/util/Half-inl.h +1 -347
  190. package/third-party/include/c10/util/Half.h +6 -419
  191. package/third-party/include/c10/util/TypeSafeSignMath.h +1 -133
  192. package/third-party/include/c10/util/bit_cast.h +1 -43
  193. package/third-party/include/c10/util/complex.h +1 -568
  194. package/third-party/include/c10/util/floating_point_utils.h +1 -33
  195. package/third-party/include/c10/util/irange.h +1 -1
  196. package/third-party/include/c10/util/llvmMathExtras.h +866 -0
  197. package/third-party/include/c10/util/safe_numerics.h +97 -0
  198. package/third-party/include/executorch/ExecuTorchError.h +6 -7
  199. package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLM.h +12 -0
  200. package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMConfig.h +56 -0
  201. package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMError.h +16 -0
  202. package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMMultimodalRunner.h +227 -0
  203. package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMTextRunner.h +97 -0
  204. package/third-party/include/executorch/ExecuTorchLLM/module.modulemap +4 -0
  205. package/third-party/include/executorch/ExecuTorchLog.h +1 -0
  206. package/third-party/include/executorch/ExecuTorchModule.h +177 -4
  207. package/third-party/include/executorch/ExecuTorchTensor.h +3 -4
  208. package/third-party/include/executorch/ExecuTorchValue.h +1 -7
  209. package/third-party/include/executorch/extension/module/module.h +139 -8
  210. package/third-party/include/executorch/extension/tensor/tensor.h +1 -0
  211. package/third-party/include/executorch/extension/tensor/tensor_ptr.h +88 -26
  212. package/third-party/include/executorch/extension/threadpool/threadpool.h +4 -1
  213. package/third-party/include/executorch/runtime/backend/backend_init_context.h +6 -0
  214. package/third-party/include/executorch/runtime/backend/interface.h +1 -1
  215. package/third-party/include/executorch/runtime/core/error.h +76 -49
  216. package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +18 -4
  217. package/third-party/include/executorch/runtime/core/memory_allocator.h +12 -2
  218. package/third-party/include/executorch/runtime/core/named_data_map.h +1 -11
  219. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -78
  220. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +1 -520
  221. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -339
  222. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +1 -122
  223. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +1 -347
  224. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +6 -419
  225. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +1 -133
  226. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -43
  227. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +1 -568
  228. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +1 -33
  229. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +1 -1
  230. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h +866 -0
  231. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/safe_numerics.h +97 -0
  232. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +66 -0
  233. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +553 -0
  234. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +477 -0
  235. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/Half.h +781 -0
  236. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/TypeSafeSignMath.h +141 -0
  237. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/bit_cast.h +49 -0
  238. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/complex.h +593 -0
  239. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/floating_point_utils.h +38 -0
  240. package/third-party/include/executorch/runtime/core/tensor_layout.h +1 -1
  241. package/third-party/include/executorch/runtime/executor/merged_data_map.h +142 -0
  242. package/third-party/include/executorch/runtime/executor/method.h +21 -8
  243. package/third-party/include/executorch/runtime/executor/method_meta.h +20 -2
  244. package/third-party/include/executorch/runtime/executor/program.h +0 -10
  245. package/third-party/include/executorch/runtime/kernel/operator_registry.h +1 -1
  246. package/third-party/include/executorch/runtime/platform/compiler.h +2 -0
  247. package/third-party/include/executorch/schema/extended_header.h +10 -1
  248. package/third-party/include/torch/headeronly/macros/Export.h +66 -0
  249. package/third-party/include/torch/headeronly/macros/Macros.h +553 -0
  250. package/third-party/include/torch/headeronly/util/BFloat16.h +477 -0
  251. package/third-party/include/torch/headeronly/util/Half.h +781 -0
  252. package/third-party/include/torch/headeronly/util/TypeSafeSignMath.h +141 -0
  253. package/third-party/include/torch/headeronly/util/bit_cast.h +49 -0
  254. package/third-party/include/torch/headeronly/util/complex.h +593 -0
  255. package/third-party/include/torch/headeronly/util/floating_point_utils.h +38 -0
  256. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  257. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  258. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  259. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  260. package/common/rnexecutorch/tests/run_all_tests.sh +0 -14
  261. package/common/rnexecutorch/tests/run_test.sh +0 -18
  262. package/ios/RnExecutorch/utils/Conversions.h +0 -14
  263. package/ios/RnExecutorch/utils/ETError.h +0 -26
  264. package/ios/RnExecutorch/utils/ImageProcessor.h +0 -15
  265. package/ios/RnExecutorch/utils/ImageProcessor.mm +0 -147
  266. package/ios/RnExecutorch/utils/Numerical.h +0 -3
  267. package/ios/RnExecutorch/utils/Numerical.mm +0 -18
  268. package/ios/RnExecutorch/utils/ScalarType.h +0 -14
  269. package/ios/RnExecutorch/utils/ScalarType.mm +0 -21
  270. package/lib/module/hooks/useNonStaticModule.js.map +0 -1
  271. package/lib/typescript/hooks/useNonStaticModule.d.ts +0 -21
  272. package/lib/typescript/hooks/useNonStaticModule.d.ts.map +0 -1
  273. package/src/hooks/useNonStaticModule.ts +0 -74
  274. package/third-party/include/executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h +0 -181
  275. package/third-party/include/executorch/extension/kernel_util/meta_programming.h +0 -108
  276. package/third-party/include/executorch/extension/kernel_util/type_list.h +0 -137
  277. package/third-party/include/executorch/extension/threadpool/threadpool_guard.h +0 -35
@@ -1,520 +1 @@
1
- #define C10_USING_CUSTOM_GENERATED_MACROS
2
- #ifndef C10_MACROS_MACROS_H_
3
- #define C10_MACROS_MACROS_H_
4
- #include <cassert>
5
-
6
- /* Main entry for c10/macros.
7
- *
8
- * In your code, include c10/macros/Macros.h directly, instead of individual
9
- * files in this folder.
10
- */
11
-
12
- // For build systems that do not directly depend on CMake and directly build
13
- // from the source directory (such as Buck), one may not have a cmake_macros.h
14
- // file at all. In this case, the build system is responsible for providing
15
- // correct macro definitions corresponding to the cmake_macros.h.in file.
16
- //
17
- // In such scenarios, one should define the macro
18
- // C10_USING_CUSTOM_GENERATED_MACROS
19
- // to inform this header that it does not need to include the cmake_macros.h
20
- // file.
21
-
22
- #ifndef C10_USING_CUSTOM_GENERATED_MACROS
23
- #include <c10/macros/cmake_macros.h>
24
- #endif // C10_USING_CUSTOM_GENERATED_MACROS
25
-
26
- #include <c10/macros/Export.h>
27
-
28
- #if defined(__clang__)
29
- #define __ubsan_ignore_float_divide_by_zero__ \
30
- __attribute__((no_sanitize("float-divide-by-zero")))
31
- #define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined")))
32
- #define __ubsan_ignore_signed_int_overflow__ \
33
- __attribute__((no_sanitize("signed-integer-overflow")))
34
- #define __ubsan_ignore_pointer_overflow__ \
35
- __attribute__((no_sanitize("pointer-overflow")))
36
- #define __ubsan_ignore_function__ __attribute__((no_sanitize("function")))
37
- #define __ubsan_ignore_float_cast_overflow__ \
38
- __attribute__((no_sanitize("float-cast-overflow")))
39
- #else
40
- #define __ubsan_ignore_float_divide_by_zero__
41
- #define __ubsan_ignore_undefined__
42
- #define __ubsan_ignore_signed_int_overflow__
43
- #define __ubsan_ignore_pointer_overflow__
44
- #define __ubsan_ignore_function__
45
- #define __ubsan_ignore_float_cast_overflow__
46
- #endif
47
-
48
- // Detect address sanitizer as some stuff doesn't work with it
49
- #undef C10_ASAN_ENABLED
50
-
51
- // for clang
52
- #if defined(__has_feature)
53
- #if ((__has_feature(address_sanitizer)))
54
- #define C10_ASAN_ENABLED 1
55
- #endif
56
- #endif
57
-
58
- // for gcc
59
- #if defined(__SANITIZE_ADDRESS__)
60
- #if __SANITIZE_ADDRESS__
61
- #if !defined(C10_ASAN_ENABLED)
62
- #define C10_ASAN_ENABLED 1
63
- #endif
64
- #endif
65
- #endif
66
-
67
- #if !defined(C10_ASAN_ENABLED)
68
- #define C10_ASAN_ENABLED 0
69
- #endif
70
-
71
- // Detect undefined-behavior sanitizer (UBSAN)
72
- #undef C10_UBSAN_ENABLED
73
-
74
- // for clang or gcc >= 14
75
- // NB: gcc 14 adds support for Clang's __has_feature
76
- // https://gcc.gnu.org/gcc-14/changes.html
77
- // gcc < 14 doesn't have a macro for UBSAN
78
- // (e.g. __SANITIZE_UNDEFINED__ does not exist in gcc)
79
- // https://github.com/google/sanitizers/issues/765
80
- #if defined(__has_feature)
81
- #if ((__has_feature(undefined_behavior_sanitizer)))
82
- #define C10_UBSAN_ENABLED 1
83
- #endif
84
- #endif
85
-
86
- #if !defined(C10_UBSAN_ENABLED)
87
- #define C10_UBSAN_ENABLED 0
88
- #endif
89
-
90
- // Disable the copy and assignment operator for a class. Note that this will
91
- // disable the usage of the class in std containers.
92
- #define C10_DISABLE_COPY_AND_ASSIGN(classname) \
93
- classname(const classname &) = delete; \
94
- classname &operator=(const classname &) = delete
95
-
96
- #define C10_CONCATENATE_IMPL(s1, s2) s1##s2
97
- #define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
98
-
99
- #define C10_MACRO_EXPAND(args) args
100
-
101
- #define C10_STRINGIZE_IMPL(x) #x
102
- #define C10_STRINGIZE(x) C10_STRINGIZE_IMPL(x)
103
-
104
- /**
105
- * C10_ANONYMOUS_VARIABLE(str) introduces a new identifier which starts with
106
- * str and ends with a unique number.
107
- */
108
- #ifdef __COUNTER__
109
- #define C10_UID __COUNTER__
110
- #define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
111
- #else
112
- #define C10_UID __LINE__
113
- #define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
114
- #endif
115
-
116
- #ifdef __has_cpp_attribute
117
- #define C10_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
118
- #else
119
- #define C10_HAS_CPP_ATTRIBUTE(x) (0)
120
- #endif
121
-
122
- #ifndef FBCODE_CAFFE2
123
- /// DEPRECATED: Warn if a type or return value is discarded.
124
- #define C10_NODISCARD [[nodiscard]]
125
-
126
- /// DEPRECATED: Suppress an unused variable.
127
- #define C10_UNUSED [[maybe_unused]]
128
- #endif
129
-
130
- #if !defined(__has_attribute)
131
- #define __has_attribute(x) 0
132
- #endif
133
-
134
- // Direct port of LLVM_ATTRIBUTE_USED.
135
- #if __has_attribute(used)
136
- #define C10_USED __attribute__((__used__))
137
- #else
138
- #define C10_USED
139
- #endif
140
-
141
- #define C10_RESTRICT __restrict
142
-
143
- // Simply define the namespace, in case a dependent library want to refer to
144
- // the c10 namespace but not any nontrivial files.
145
- namespace c10 {}
146
- namespace c10::cuda {}
147
- namespace c10::hip {}
148
- namespace c10::xpu {}
149
-
150
- // Since C10 is the core library for caffe2 (and aten), we will simply reroute
151
- // all abstractions defined in c10 to be available in caffe2 as well.
152
- // This is only for backwards compatibility. Please use the symbols from the
153
- // c10 namespace where possible.
154
- namespace caffe2 {
155
- using namespace c10;
156
- }
157
- namespace at {
158
- using namespace c10;
159
- }
160
- namespace at::cuda {
161
- using namespace c10::cuda;
162
- } // namespace at::cuda
163
-
164
- // WARNING!!! THIS IS A GIANT HACK!!!
165
- // This line means you cannot simultaneously include c10/hip
166
- // and c10/cuda and then use them from the at::cuda namespace.
167
- // This is true in practice, because HIPIFY works inplace on
168
- // files in ATen/cuda, so it assumes that c10::hip is available
169
- // from at::cuda. This namespace makes that happen. When
170
- // HIPIFY is no longer out-of-place, we can switch the cuda
171
- // here to hip and everyone is happy.
172
- namespace at::cuda {
173
- using namespace c10::hip;
174
- } // namespace at::cuda
175
-
176
- namespace at::xpu {
177
- using namespace c10::xpu;
178
- } // namespace at::xpu
179
-
180
- // C10_LIKELY/C10_UNLIKELY
181
- //
182
- // These macros provide parentheses, so you can use these macros as:
183
- //
184
- // if C10_LIKELY(some_expr) {
185
- // ...
186
- // }
187
- //
188
- // NB: static_cast to boolean is mandatory in C++, because __builtin_expect
189
- // takes a long argument, which means you may trigger the wrong conversion
190
- // without it.
191
- //
192
- #if defined(__GNUC__) || defined(__ICL) || defined(__clang__)
193
- #define C10_LIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 1))
194
- #define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0))
195
- #else
196
- #define C10_LIKELY(expr) (expr)
197
- #define C10_UNLIKELY(expr) (expr)
198
- #endif
199
-
200
- /// C10_NOINLINE - Functions whose declaration is annotated with this will not
201
- /// be inlined.
202
- #ifdef __GNUC__
203
- #define C10_NOINLINE __attribute__((noinline))
204
- #elif _MSC_VER
205
- #define C10_NOINLINE __declspec(noinline)
206
- #else
207
- #define C10_NOINLINE
208
- #endif
209
-
210
- #if defined(_MSC_VER)
211
- #define C10_ALWAYS_INLINE __forceinline
212
- #elif __has_attribute(always_inline) || defined(__GNUC__)
213
- #define C10_ALWAYS_INLINE __attribute__((__always_inline__)) inline
214
- #else
215
- #define C10_ALWAYS_INLINE inline
216
- #endif
217
-
218
- // Unlike C10_ALWAYS_INLINE, C10_ALWAYS_INLINE_ATTRIBUTE can be used
219
- // on a lambda.
220
- #if defined(_MSC_VER)
221
- // MSVC 14.39 is reasonably recent and doesn't like
222
- // [[msvc::forceinline]] on a lambda, so don't try to use it.
223
- #define C10_ALWAYS_INLINE_ATTRIBUTE
224
- #elif __has_attribute(always_inline) || defined(__GNUC__)
225
- #define C10_ALWAYS_INLINE_ATTRIBUTE __attribute__((__always_inline__))
226
- #else
227
- #define C10_ALWAYS_INLINE_ATTRIBUTE
228
- #endif
229
-
230
- #if defined(_MSC_VER)
231
- #define C10_ATTR_VISIBILITY_HIDDEN
232
- #elif defined(__GNUC__)
233
- #define C10_ATTR_VISIBILITY_HIDDEN __attribute__((__visibility__("hidden")))
234
- #else
235
- #define C10_ATTR_VISIBILITY_HIDDEN
236
- #endif
237
-
238
- #define C10_ERASE C10_ALWAYS_INLINE C10_ATTR_VISIBILITY_HIDDEN
239
-
240
- #include <cstdint>
241
-
242
- #ifdef __HIPCC__
243
- // Unlike CUDA, HIP requires a HIP header to be included for __host__ to work.
244
- // We do this #include here so that C10_HOST_DEVICE and friends will Just Work.
245
- // See https://github.com/ROCm/hip/issues/441
246
- #include <hip/hip_runtime.h>
247
- #endif
248
-
249
- #if defined(__CUDACC__) || defined(__HIPCC__)
250
- // Designates functions callable from the host (CPU) and the device (GPU)
251
- #define C10_HOST_DEVICE __host__ __device__
252
- #define C10_DEVICE __device__
253
- #define C10_HOST __host__
254
- // constants from
255
- // (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications)
256
- // The maximum number of threads per multiprocessor is 1024 for Turing
257
- // architecture (7.5), 1536 for Geforce Ampere (8.6)/Jetson Orin (8.7), and
258
- // 2048 for all other architectures. You'll get warnings if you exceed these
259
- // constants. Hence, the following macros adjust the input values from the user
260
- // to resolve potential warnings.
261
- #if __CUDA_ARCH__ == 750
262
- constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024;
263
- #elif __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890
264
- constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1536;
265
- #else
266
- constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048;
267
- #endif
268
- // CUDA_MAX_THREADS_PER_BLOCK is same for all architectures currently
269
- constexpr uint32_t CUDA_MAX_THREADS_PER_BLOCK = 1024;
270
- // CUDA_THREADS_PER_BLOCK_FALLBACK is the "canonical fallback" choice of block
271
- // size. 256 is a good number for this fallback and should give good occupancy
272
- // and versatility across all architectures.
273
- constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
274
- // NOTE: if you are thinking of constexpr-ify the inputs to launch bounds, it
275
- // turns out that although __launch_bounds__ can take constexpr, it
276
- // can't take a constexpr that has anything to do with templates.
277
- // Currently we use launch_bounds that depend on template arguments in
278
- // Loops.cuh, Reduce.cuh and LossCTC.cuh. Hence, C10_MAX_THREADS_PER_BLOCK
279
- // and C10_MIN_BLOCKS_PER_SM are kept as macros.
280
- // Suppose you were planning to write __launch_bounds__(a, b), based on your
281
- // performance tuning on a modern GPU. Instead, you should write
282
- // __launch_bounds__(C10_MAX_THREADS_PER_BLOCK(a), C10_MIN_BLOCKS_PER_SM(a, b)),
283
- // which will also properly respect limits on old architectures.
284
- #define C10_MAX_THREADS_PER_BLOCK(val) \
285
- (((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) \
286
- : CUDA_THREADS_PER_BLOCK_FALLBACK)
287
- #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \
288
- ((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
289
- ? (blocks_per_sm) \
290
- : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) / \
291
- (threads_per_block))))
292
- // C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
293
- #define C10_LAUNCH_BOUNDS_0 \
294
- __launch_bounds__(256, \
295
- 4) // default launch bounds that should give good occupancy
296
- // and versatility across all architectures.
297
- #define C10_LAUNCH_BOUNDS_1(max_threads_per_block) \
298
- __launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))))
299
- #define C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) \
300
- __launch_bounds__( \
301
- (C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))), \
302
- (C10_MIN_BLOCKS_PER_SM((max_threads_per_block), (min_blocks_per_sm))))
303
- #else
304
- #define C10_HOST_DEVICE
305
- #define C10_HOST
306
- #define C10_DEVICE
307
- #endif
308
-
309
- #if defined(USE_ROCM)
310
- #define C10_HIP_HOST_DEVICE __host__ __device__
311
- #else
312
- #define C10_HIP_HOST_DEVICE
313
- #endif
314
-
315
- #if defined(USE_ROCM)
316
- #define C10_WARP_SIZE warpSize // = 64 or 32 (Defined in hip_runtime.h)
317
- #else
318
- #define C10_WARP_SIZE 32
319
- #endif
320
-
321
- #if defined(_MSC_VER) && _MSC_VER <= 1900
322
- #define __func__ __FUNCTION__
323
- #endif
324
-
325
- // CUDA_KERNEL_ASSERT checks the assertion
326
- // even when NDEBUG is defined. This is useful for important assertions in CUDA
327
- // code that would otherwise be suppressed when building Release.
328
- #if defined(__ANDROID__) || defined(__APPLE__) || defined(__FreeBSD__)
329
- // Those platforms do not support assert()
330
- #define CUDA_KERNEL_ASSERT(cond)
331
- #define CUDA_KERNEL_ASSERT_MSG(cond, msg)
332
- #define SYCL_KERNEL_ASSERT(cond)
333
- #elif defined(_MSC_VER)
334
- #if defined(NDEBUG)
335
- extern "C" {
336
- C10_IMPORT
337
- #if defined(__SYCL_DEVICE_ONLY__)
338
- extern SYCL_EXTERNAL void _wassert(const wchar_t *wexpr, const wchar_t *wfile,
339
- unsigned line);
340
- #else
341
- #if defined(__CUDA_ARCH__)
342
- __host__ __device__
343
- #endif // __CUDA_ARCH__
344
- void
345
- _wassert(wchar_t const *_Message, wchar_t const *_File, unsigned _Line);
346
- #endif // __SYCL_DEVICE_ONLY__
347
- }
348
- #endif // NDEBUG
349
- #define CUDA_KERNEL_ASSERT(cond) \
350
- if (C10_UNLIKELY(!(cond))) { \
351
- (void)(_wassert(_CRT_WIDE(#cond), _CRT_WIDE(__FILE__), \
352
- static_cast<unsigned>(__LINE__)), \
353
- 0); \
354
- }
355
- // TODO: This doesn't assert the message because I (chilli) couldn't figure out
356
- // a nice way to convert a char* to a wchar_t*
357
- #define CUDA_KERNEL_ASSERT_MSG(cond, msg) \
358
- if (C10_UNLIKELY(!(cond))) { \
359
- (void)(_wassert(_CRT_WIDE(#cond), _CRT_WIDE(__FILE__), \
360
- static_cast<unsigned>(__LINE__)), \
361
- 0); \
362
- }
363
- #define SYCL_KERNEL_ASSERT(cond) \
364
- if (C10_UNLIKELY(!(cond))) { \
365
- (void)(_wassert(_CRT_WIDE(#cond), _CRT_WIDE(__FILE__), \
366
- static_cast<unsigned>(__LINE__)), \
367
- 0); \
368
- }
369
- #else // __APPLE__, _MSC_VER
370
- #if defined(NDEBUG)
371
- extern "C" {
372
- #if defined(__SYCL_DEVICE_ONLY__)
373
- extern SYCL_EXTERNAL void __assert_fail(const char *expr, const char *file,
374
- unsigned int line, const char *func);
375
- #else // __SYCL_DEVICE_ONLY__
376
- #if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)))
377
- // CUDA supports __assert_fail function which are common for both device
378
- // and host side code.
379
- __host__ __device__
380
- #endif
381
-
382
- // This forward declaration matching the declaration of __assert_fail
383
- // exactly how it is in glibc in case parts of the program are compiled with
384
- // different NDEBUG settings. Otherwise we might get 'ambiguous declaration'
385
- // error. Note: On ROCm - this declaration serves for host side compilation.
386
- void
387
- __assert_fail(const char *assertion, const char *file, unsigned int line,
388
- const char *function) noexcept __attribute__((__noreturn__));
389
-
390
- #endif // __SYCL_DEVICE_ONLY__
391
- }
392
- #endif // NDEBUG
393
- // ROCm disables kernel assert by default for performance considerations.
394
- // Though ROCm supports __assert_fail, it uses kernel printf which has
395
- // a non-negligible performance impact even if the assert condition is
396
- // never triggered. We choose to use abort() instead which will still
397
- // terminate the application but without a more useful error message.
398
- #if !defined(C10_USE_ROCM_KERNEL_ASSERT) and defined(USE_ROCM)
399
- #define CUDA_KERNEL_ASSERT(cond) \
400
- if C10_UNLIKELY (!(cond)) { \
401
- abort(); \
402
- }
403
- #define CUDA_KERNEL_ASSERT_MSG(cond, msg) \
404
- if C10_UNLIKELY (!(cond)) { \
405
- abort(); \
406
- }
407
- #define SYCL_KERNEL_ASSERT(cond) \
408
- if C10_UNLIKELY (!(cond)) { \
409
- abort(); \
410
- }
411
- #else
412
- #define CUDA_KERNEL_ASSERT(cond) \
413
- if (C10_UNLIKELY(!(cond))) { \
414
- __assert_fail(#cond, __FILE__, static_cast<unsigned int>(__LINE__), \
415
- __func__); \
416
- }
417
- #define CUDA_KERNEL_ASSERT_MSG(cond, msg) \
418
- if (C10_UNLIKELY(!(cond))) { \
419
- __assert_fail(msg, __FILE__, static_cast<unsigned int>(__LINE__), \
420
- __func__); \
421
- }
422
- #define SYCL_KERNEL_ASSERT(cond) \
423
- if (C10_UNLIKELY(!(cond))) { \
424
- __assert_fail(#cond, __FILE__, static_cast<unsigned int>(__LINE__), \
425
- __func__); \
426
- }
427
- #endif // C10_USE_ROCM_KERNEL_ASSERT and USE_ROCM
428
- #endif // __APPLE__
429
-
430
- #ifdef __APPLE__
431
- #include <TargetConditionals.h>
432
- #endif
433
-
434
- #if defined(__ANDROID__)
435
- #define C10_ANDROID 1
436
- #define C10_MOBILE 1
437
- #elif (defined(__APPLE__) && \
438
- (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE))
439
- #define C10_IOS 1
440
- #define C10_MOBILE 1
441
- #endif // ANDROID / IOS
442
-
443
- #if defined(C10_MOBILE) && C10_MOBILE
444
- #define C10_ALWAYS_INLINE_UNLESS_MOBILE inline
445
- #else
446
- #define C10_ALWAYS_INLINE_UNLESS_MOBILE C10_ALWAYS_INLINE
447
- #endif
448
-
449
- #if !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED)
450
- #define CONSTEXPR_EXCEPT_WIN_CUDA constexpr
451
- #define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA constexpr
452
-
453
- #define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \
454
- static constexpr const char field[] = val;
455
- #define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val)
456
- #endif // !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED)
457
-
458
- #ifndef HAS_DEMANGLE
459
- #if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__)
460
- #define HAS_DEMANGLE 0
461
- #elif defined(__APPLE__) && \
462
- (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE)
463
- #define HAS_DEMANGLE 0
464
- #else
465
- #define HAS_DEMANGLE 1
466
- #endif
467
- #endif // HAS_DEMANGLE
468
-
469
- #define _C10_PRAGMA__(string) _Pragma(#string)
470
- #define _C10_PRAGMA_(string) _C10_PRAGMA__(string)
471
-
472
- #ifdef __clang__
473
- #define C10_CLANG_DIAGNOSTIC_PUSH() _Pragma("clang diagnostic push")
474
- #define C10_CLANG_DIAGNOSTIC_POP() _Pragma("clang diagnostic pop")
475
- #define C10_CLANG_DIAGNOSTIC_IGNORE(flag) \
476
- _C10_PRAGMA_(clang diagnostic ignored flag)
477
- #define C10_CLANG_HAS_WARNING(flag) __has_warning(flag)
478
- #else
479
- #define C10_CLANG_DIAGNOSTIC_PUSH()
480
- #define C10_CLANG_DIAGNOSTIC_POP()
481
- #define C10_CLANG_DIAGNOSTIC_IGNORE(flag)
482
- #define C10_CLANG_HAS_WARNING(flag) 0
483
- #endif
484
-
485
- #ifdef __clang__
486
-
487
- #define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \
488
- _C10_PRAGMA_(clang diagnostic push) \
489
- _C10_PRAGMA_(clang diagnostic ignored "-Wunknown-warning-option") \
490
- _C10_PRAGMA_(clang diagnostic ignored warning)
491
-
492
- #define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(clang diagnostic pop)
493
-
494
- #elif __GNUC__
495
-
496
- #define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \
497
- _C10_PRAGMA_(GCC diagnostic push) \
498
- _C10_PRAGMA_(GCC diagnostic ignored "-Wpragmas") \
499
- _C10_PRAGMA_(GCC diagnostic ignored warning)
500
-
501
- #define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(GCC diagnostic pop)
502
-
503
- #else
504
-
505
- #define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning)
506
- #define C10_DIAGNOSTIC_POP()
507
-
508
- #endif
509
-
510
- // This macro is used to find older C++ compilers
511
- // that don't support move optimization for return values.
512
-
513
- #if (defined(__GNUC__) && __GNUC__ < 13) || \
514
- (defined(__clang_major__) && __clang_major__ < 13)
515
- #define C10_RETURN_MOVE_IF_OLD_COMPILER 1
516
- #else
517
- #define C10_RETURN_MOVE_IF_OLD_COMPILER 0
518
- #endif
519
-
520
- #endif // C10_MACROS_MACROS_H_
1
+ #include <torch/headeronly/macros/Macros.h>