react-native-executorch 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. package/android/CMakeLists.txt +24 -0
  2. package/android/build.gradle +1 -0
  3. package/android/src/main/cpp/CMakeLists.txt +25 -0
  4. package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +1 -13
  5. package/common/rnexecutorch/RnExecutorchInstaller.cpp +52 -18
  6. package/common/rnexecutorch/RnExecutorchInstaller.h +0 -25
  7. package/common/rnexecutorch/TokenizerModule.cpp +1 -1
  8. package/common/rnexecutorch/TokenizerModule.h +4 -1
  9. package/common/rnexecutorch/data_processing/FileUtils.h +2 -2
  10. package/common/rnexecutorch/data_processing/ImageProcessing.cpp +5 -5
  11. package/common/rnexecutorch/data_processing/ImageProcessing.h +2 -2
  12. package/common/rnexecutorch/data_processing/Numerical.cpp +13 -0
  13. package/common/rnexecutorch/host_objects/JsiConversions.h +43 -62
  14. package/common/rnexecutorch/host_objects/ModelHostObject.h +43 -24
  15. package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +8 -6
  16. package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +1 -1
  17. package/common/rnexecutorch/models/BaseModel.cpp +2 -2
  18. package/common/rnexecutorch/models/BaseModel.h +5 -0
  19. package/common/rnexecutorch/models/EncoderDecoderBase.cpp +2 -2
  20. package/common/rnexecutorch/models/EncoderDecoderBase.h +2 -2
  21. package/common/rnexecutorch/models/classification/Classification.cpp +6 -6
  22. package/common/rnexecutorch/models/classification/Classification.h +5 -0
  23. package/common/rnexecutorch/models/classification/Constants.h +3 -3
  24. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +2 -2
  25. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +2 -2
  26. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +3 -3
  27. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +5 -0
  28. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +2 -2
  29. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +6 -1
  30. package/common/rnexecutorch/models/image_segmentation/Constants.h +3 -3
  31. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +6 -5
  32. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +8 -1
  33. package/common/rnexecutorch/models/llm/LLM.cpp +58 -0
  34. package/common/rnexecutorch/models/llm/LLM.h +35 -0
  35. package/common/rnexecutorch/models/object_detection/Constants.h +3 -3
  36. package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +8 -8
  37. package/common/rnexecutorch/models/object_detection/ObjectDetection.h +11 -5
  38. package/common/rnexecutorch/models/object_detection/Types.h +13 -0
  39. package/common/rnexecutorch/models/object_detection/Utils.cpp +13 -11
  40. package/common/rnexecutorch/models/object_detection/Utils.h +7 -13
  41. package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +2 -2
  42. package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +2 -2
  43. package/common/rnexecutorch/models/ocr/Constants.h +33 -26
  44. package/common/rnexecutorch/models/ocr/Detector.cpp +20 -22
  45. package/common/rnexecutorch/models/ocr/Detector.h +4 -4
  46. package/common/rnexecutorch/models/ocr/OCR.cpp +9 -8
  47. package/common/rnexecutorch/models/ocr/OCR.h +11 -3
  48. package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +20 -19
  49. package/common/rnexecutorch/models/ocr/RecognitionHandler.h +9 -7
  50. package/common/rnexecutorch/models/ocr/Recognizer.cpp +7 -7
  51. package/common/rnexecutorch/models/ocr/Recognizer.h +2 -2
  52. package/common/rnexecutorch/models/ocr/Types.h +4 -6
  53. package/common/rnexecutorch/models/ocr/{DetectorUtils.cpp → utils/DetectorUtils.cpp} +70 -63
  54. package/common/rnexecutorch/models/ocr/{DetectorUtils.h → utils/DetectorUtils.h} +12 -11
  55. package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.cpp → utils/RecognitionHandlerUtils.cpp} +14 -11
  56. package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.h → utils/RecognitionHandlerUtils.h} +5 -5
  57. package/common/rnexecutorch/models/ocr/{RecognizerUtils.cpp → utils/RecognizerUtils.cpp} +28 -26
  58. package/common/rnexecutorch/models/ocr/{RecognizerUtils.h → utils/RecognizerUtils.h} +15 -14
  59. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +2 -2
  60. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +9 -2
  61. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +2 -2
  62. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +2 -2
  63. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +2 -2
  64. package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +5 -5
  65. package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +6 -0
  66. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +23 -22
  67. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +4 -4
  68. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +34 -34
  69. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +27 -20
  70. package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.cpp +3 -2
  71. package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.h +3 -2
  72. package/ios/libs/executorch/libbackend_coreml_ios.a +0 -0
  73. package/ios/libs/executorch/libbackend_coreml_simulator.a +0 -0
  74. package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64 → ios/libs/executorch}/libbackend_mps_ios.a +0 -0
  75. package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator → ios/libs/executorch}/libbackend_mps_simulator.a +0 -0
  76. package/ios/libs/executorch/libbackend_xnnpack_ios.a +0 -0
  77. package/ios/libs/executorch/libbackend_xnnpack_simulator.a +0 -0
  78. package/ios/libs/executorch/libexecutorch_ios.a +0 -0
  79. package/ios/libs/executorch/libexecutorch_simulator.a +0 -0
  80. package/ios/libs/executorch/libkernels_custom_ios.a +0 -0
  81. package/ios/libs/executorch/libkernels_custom_simulator.a +0 -0
  82. package/ios/libs/executorch/libkernels_optimized_ios.a +0 -0
  83. package/ios/libs/executorch/libkernels_optimized_simulator.a +0 -0
  84. package/ios/libs/executorch/libkernels_portable_ios.a +0 -0
  85. package/ios/libs/executorch/libkernels_portable_simulator.a +0 -0
  86. package/ios/libs/executorch/libkernels_quantized_ios.a +0 -0
  87. package/ios/libs/executorch/libkernels_quantized_simulator.a +0 -0
  88. package/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
  89. package/ios/{ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a} +0 -0
  90. package/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
  91. package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
  92. package/ios/{ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a} +0 -0
  93. package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
  94. package/lib/Error.js +9 -6
  95. package/lib/ThreadPool.d.ts +10 -0
  96. package/lib/ThreadPool.js +28 -0
  97. package/lib/constants/modelUrls.js +1 -1
  98. package/lib/controllers/OCRController.js +9 -14
  99. package/lib/controllers/VerticalOCRController.js +9 -14
  100. package/lib/hooks/computer_vision/useOCR.js +7 -8
  101. package/lib/hooks/computer_vision/useVerticalOCR.js +3 -5
  102. package/lib/index.d.ts +0 -2
  103. package/lib/index.js +1 -3
  104. package/lib/module/controllers/LLMController.js +6 -10
  105. package/lib/module/controllers/LLMController.js.map +1 -1
  106. package/lib/module/hooks/computer_vision/useClassification.js +2 -2
  107. package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
  108. package/lib/module/hooks/computer_vision/useImageEmbeddings.js +2 -2
  109. package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -1
  110. package/lib/module/hooks/computer_vision/useImageSegmentation.js +2 -2
  111. package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
  112. package/lib/module/hooks/computer_vision/useObjectDetection.js +2 -2
  113. package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
  114. package/lib/module/hooks/computer_vision/useStyleTransfer.js +2 -2
  115. package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
  116. package/lib/module/hooks/general/useExecutorchModule.js +2 -2
  117. package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
  118. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +2 -2
  119. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
  120. package/lib/module/hooks/useModule.js +13 -9
  121. package/lib/module/hooks/useModule.js.map +1 -1
  122. package/lib/module/index.js +1 -1
  123. package/lib/module/index.js.map +1 -1
  124. package/lib/module/modules/BaseModule.js +9 -17
  125. package/lib/module/modules/BaseModule.js.map +1 -1
  126. package/lib/module/modules/computer_vision/ClassificationModule.js +2 -2
  127. package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
  128. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +2 -2
  129. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -1
  130. package/lib/module/modules/computer_vision/ImageSegmentationModule.js +2 -2
  131. package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
  132. package/lib/module/modules/computer_vision/ObjectDetectionModule.js +2 -2
  133. package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
  134. package/lib/module/modules/computer_vision/StyleTransferModule.js +2 -2
  135. package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
  136. package/lib/module/modules/general/ExecutorchModule.js +2 -2
  137. package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
  138. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +2 -2
  139. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
  140. package/lib/module/native/RnExecutorchModules.js +1 -2
  141. package/lib/module/native/RnExecutorchModules.js.map +1 -1
  142. package/lib/module/utils/SpeechToTextModule/ASR.js +3 -3
  143. package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -1
  144. package/lib/modules/computer_vision/OCRModule.d.ts +4 -5
  145. package/lib/modules/computer_vision/OCRModule.js +9 -12
  146. package/lib/modules/computer_vision/VerticalOCRModule.d.ts +4 -5
  147. package/lib/modules/computer_vision/VerticalOCRModule.js +9 -12
  148. package/lib/native/RnExecutorchModules.d.ts +5 -1
  149. package/lib/native/RnExecutorchModules.js +3 -1
  150. package/lib/tsconfig.tsbuildinfo +1 -0
  151. package/lib/types/common.d.ts +1 -0
  152. package/lib/typescript/controllers/LLMController.d.ts +1 -1
  153. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  154. package/lib/typescript/hooks/useModule.d.ts +8 -5
  155. package/lib/typescript/hooks/useModule.d.ts.map +1 -1
  156. package/lib/typescript/index.d.ts +1 -0
  157. package/lib/typescript/index.d.ts.map +1 -1
  158. package/lib/typescript/modules/BaseModule.d.ts +7 -6
  159. package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
  160. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +2 -2
  161. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
  162. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +2 -2
  163. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -1
  164. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +2 -2
  165. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
  166. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +2 -2
  167. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
  168. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +2 -2
  169. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
  170. package/lib/typescript/modules/general/ExecutorchModule.d.ts +2 -2
  171. package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
  172. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +2 -2
  173. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
  174. package/lib/typescript/native/RnExecutorchModules.d.ts +1 -3
  175. package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
  176. package/lib/utils/ResourceFetcherUtils.js +0 -1
  177. package/lib/utils/llm.js +0 -1
  178. package/package.json +1 -2
  179. package/react-native-executorch.podspec +47 -44
  180. package/src/controllers/LLMController.ts +8 -13
  181. package/src/hooks/computer_vision/useClassification.ts +2 -2
  182. package/src/hooks/computer_vision/useImageEmbeddings.ts +2 -2
  183. package/src/hooks/computer_vision/useImageSegmentation.ts +2 -2
  184. package/src/hooks/computer_vision/useObjectDetection.ts +2 -2
  185. package/src/hooks/computer_vision/useStyleTransfer.ts +2 -2
  186. package/src/hooks/general/useExecutorchModule.ts +2 -2
  187. package/src/hooks/natural_language_processing/useTextEmbeddings.ts +2 -2
  188. package/src/hooks/useModule.ts +23 -13
  189. package/src/index.ts +3 -2
  190. package/src/modules/BaseModule.ts +17 -28
  191. package/src/modules/computer_vision/ClassificationModule.ts +2 -2
  192. package/src/modules/computer_vision/ImageEmbeddingsModule.ts +2 -2
  193. package/src/modules/computer_vision/ImageSegmentationModule.ts +2 -2
  194. package/src/modules/computer_vision/ObjectDetectionModule.ts +2 -2
  195. package/src/modules/computer_vision/StyleTransferModule.ts +2 -2
  196. package/src/modules/general/ExecutorchModule.ts +2 -2
  197. package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +2 -2
  198. package/src/native/RnExecutorchModules.ts +1 -5
  199. package/src/utils/SpeechToTextModule/ASR.ts +4 -4
  200. package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
  201. package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
  202. package/third-party/include/cpuinfo/cpuinfo.h +2305 -0
  203. package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +26 -0
  204. package/third-party/include/executorch/extension/threadpool/threadpool.h +94 -0
  205. package/third-party/include/pthreadpool/pthreadpool.h +2236 -0
  206. package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +0 -63
  207. package/ios/ExecutorchLib.xcframework/Info.plist +0 -43
  208. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/ETModel.h +0 -27
  209. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
  210. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
  211. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  212. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/ETModel.h +0 -27
  213. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
  214. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
  215. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  216. package/ios/RnExecutorch/LLM.h +0 -5
  217. package/ios/RnExecutorch/LLM.mm +0 -78
  218. package/lib/Error.d.ts +0 -30
  219. package/lib/constants/directories.d.ts +0 -1
  220. package/lib/constants/ocr/symbols.d.ts +0 -75
  221. package/lib/controllers/OCRController.d.ts +0 -23
  222. package/lib/controllers/VerticalOCRController.d.ts +0 -25
  223. package/lib/hooks/useModule.d.ts +0 -17
  224. package/lib/module/modules/BaseNonStaticModule.js +0 -17
  225. package/lib/module/modules/BaseNonStaticModule.js.map +0 -1
  226. package/lib/module/native/NativeLLM.js +0 -5
  227. package/lib/module/native/NativeLLM.js.map +0 -1
  228. package/lib/modules/BaseModule.d.ts +0 -8
  229. package/lib/modules/BaseNonStaticModule.d.ts +0 -9
  230. package/lib/native/NativeETInstaller.d.ts +0 -6
  231. package/lib/native/NativeOCR.d.ts +0 -8
  232. package/lib/native/NativeVerticalOCR.d.ts +0 -8
  233. package/lib/types/imageSegmentation.d.ts +0 -24
  234. package/lib/types/objectDetection.d.ts +0 -104
  235. package/lib/types/ocr.d.ts +0 -11
  236. package/lib/typescript/modules/BaseNonStaticModule.d.ts +0 -10
  237. package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +0 -1
  238. package/lib/typescript/native/NativeLLM.d.ts +0 -12
  239. package/lib/typescript/native/NativeLLM.d.ts.map +0 -1
  240. package/lib/utils/stt.d.ts +0 -1
  241. package/src/modules/BaseNonStaticModule.ts +0 -26
  242. package/src/native/NativeLLM.ts +0 -14
  243. package/third-party/include/tokenizers-cpp/tokenizers_c.h +0 -61
  244. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +0 -27
  245. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +0 -249
  246. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +0 -14
  247. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +0 -80
  248. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +0 -32
  249. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +0 -95
  250. package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +0 -12
  251. package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +0 -217
  252. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +0 -11
  253. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +0 -11
  254. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +0 -202
  255. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +0 -313
  256. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +0 -57
  257. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +0 -78
  258. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +0 -23
  259. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +0 -427
  260. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +0 -87
  261. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +0 -76
  262. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +0 -683
  263. package/third-party/ios/ExecutorchLib/build.sh +0 -44
  264. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +0 -43
  265. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
  266. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
  267. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +0 -43
  268. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +0 -43
  269. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
  270. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
  271. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +0 -47
  272. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +0 -163
  273. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +0 -497
  274. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +0 -342
  275. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +0 -266
  276. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +0 -125
  277. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +0 -347
  278. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +0 -416
  279. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +0 -133
  280. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +0 -43
  281. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +0 -33
  282. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +0 -107
  283. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +0 -13
  284. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +0 -16
  285. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +0 -76
  286. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +0 -286
  287. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +0 -742
  288. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +0 -219
  289. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +0 -492
  290. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +0 -13
  291. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
  292. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
  293. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
  294. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
  295. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
  296. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +0 -166
  297. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +0 -235
  298. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +0 -136
  299. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +0 -20
  300. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +0 -229
  301. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +0 -521
  302. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +0 -565
  303. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
  304. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
  305. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
  306. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
  307. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
  308. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
  309. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
  310. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
  311. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
  312. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
  313. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +0 -198
  314. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +0 -86
  315. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
  316. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
  317. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
  318. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
  319. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
  320. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
  321. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
  322. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
  323. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
  324. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
  325. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
  326. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
  327. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
  328. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
  329. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
  330. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +0 -70
  331. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +0 -27
  332. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
  333. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
  334. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
  335. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
  336. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
  337. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
  338. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
  339. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
  340. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +0 -258
  341. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +0 -93
  342. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +0 -71
  343. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +0 -79
  344. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
  345. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +0 -113
  346. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +0 -387
  347. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +0 -251
  348. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +0 -320
  349. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
  350. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
  351. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
  352. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
  353. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +0 -36
  354. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +0 -119
  355. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +0 -43
  356. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
  357. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +0 -191
  358. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +0 -177
  359. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +0 -133
  360. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +0 -292
  361. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +0 -35
  362. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +0 -49
  363. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +0 -24
  364. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +0 -76
  365. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +0 -5
  366. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
  367. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +0 -163
  368. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +0 -497
  369. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +0 -342
  370. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +0 -266
  371. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +0 -125
  372. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +0 -347
  373. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +0 -416
  374. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +0 -133
  375. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +0 -43
  376. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +0 -33
  377. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +0 -107
  378. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +0 -13
  379. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +0 -16
  380. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +0 -76
  381. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +0 -286
  382. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +0 -742
  383. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +0 -219
  384. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +0 -492
  385. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +0 -13
  386. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
  387. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
  388. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
  389. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
  390. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
  391. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +0 -166
  392. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +0 -235
  393. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +0 -136
  394. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +0 -20
  395. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +0 -229
  396. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +0 -521
  397. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +0 -565
  398. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
  399. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
  400. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
  401. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
  402. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
  403. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
  404. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
  405. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
  406. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
  407. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
  408. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +0 -198
  409. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +0 -86
  410. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
  411. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
  412. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
  413. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
  414. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
  415. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
  416. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
  417. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
  418. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
  419. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
  420. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
  421. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
  422. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
  423. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
  424. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
  425. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +0 -70
  426. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +0 -27
  427. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
  428. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
  429. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
  430. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
  431. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
  432. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
  433. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
  434. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
  435. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +0 -258
  436. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +0 -93
  437. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +0 -71
  438. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +0 -79
  439. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
  440. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +0 -113
  441. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +0 -387
  442. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +0 -251
  443. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +0 -320
  444. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
  445. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
  446. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
  447. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
  448. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +0 -36
  449. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +0 -119
  450. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +0 -43
  451. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
  452. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +0 -191
  453. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +0 -177
  454. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +0 -133
  455. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +0 -292
  456. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +0 -35
  457. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +0 -49
  458. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +0 -24
  459. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +0 -76
  460. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +0 -5
  461. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
  462. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +0 -43
  463. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
  464. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
  465. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +0 -43
  466. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
  467. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
  468. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +0 -43
  469. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
  470. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
  471. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +0 -43
  472. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
  473. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
  474. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +0 -43
  475. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +0 -82
  476. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +0 -111
  477. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +0 -43
  478. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +0 -130
  479. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +0 -139
  480. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +0 -483
  481. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +0 -994
  482. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +0 -692
  483. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +0 -85
  484. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +0 -367
  485. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +0 -241
  486. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +0 -205
  487. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +0 -78
  488. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +0 -64
  489. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +0 -235
  490. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +0 -26
  491. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
  492. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +0 -82
  493. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +0 -111
  494. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +0 -43
  495. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +0 -130
  496. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +0 -139
  497. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +0 -483
  498. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +0 -994
  499. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +0 -692
  500. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +0 -85
  501. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +0 -367
  502. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +0 -241
  503. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +0 -205
  504. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +0 -78
  505. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +0 -64
  506. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +0 -235
  507. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +0 -26
  508. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
  509. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/irunner.h +0 -0
  510. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.cpp +0 -0
  511. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.h +0 -0
  512. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/stats.h +0 -0
  513. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.cpp +0 -0
  514. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.h +0 -0
  515. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.cpp +0 -0
  516. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.h +0 -0
  517. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_token_generator.h +0 -0
  518. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/util.h +0 -0
@@ -0,0 +1,2236 @@
1
+ #ifndef PTHREADPOOL_H_
2
+ #define PTHREADPOOL_H_
3
+
4
+ #include <stddef.h>
5
+ #include <stdint.h>
6
+
7
+ typedef struct pthreadpool *pthreadpool_t;
8
+
9
+ typedef void (*pthreadpool_task_1d_t)(void *, size_t);
10
+ typedef void (*pthreadpool_task_1d_with_thread_t)(void *, size_t, size_t);
11
+ typedef void (*pthreadpool_task_1d_tile_1d_t)(void *, size_t, size_t);
12
+ typedef void (*pthreadpool_task_2d_t)(void *, size_t, size_t);
13
+ typedef void (*pthreadpool_task_2d_with_thread_t)(void *, size_t, size_t,
14
+ size_t);
15
+ typedef void (*pthreadpool_task_2d_tile_1d_t)(void *, size_t, size_t, size_t);
16
+ typedef void (*pthreadpool_task_2d_tile_2d_t)(void *, size_t, size_t, size_t,
17
+ size_t);
18
+ typedef void (*pthreadpool_task_3d_t)(void *, size_t, size_t, size_t);
19
+ typedef void (*pthreadpool_task_3d_tile_1d_t)(void *, size_t, size_t, size_t,
20
+ size_t);
21
+ typedef void (*pthreadpool_task_3d_tile_1d_with_thread_t)(void *, size_t,
22
+ size_t, size_t,
23
+ size_t, size_t);
24
+ typedef void (*pthreadpool_task_3d_tile_2d_t)(void *, size_t, size_t, size_t,
25
+ size_t, size_t);
26
+ typedef void (*pthreadpool_task_4d_t)(void *, size_t, size_t, size_t, size_t);
27
+ typedef void (*pthreadpool_task_4d_tile_1d_t)(void *, size_t, size_t, size_t,
28
+ size_t, size_t);
29
+ typedef void (*pthreadpool_task_4d_tile_2d_t)(void *, size_t, size_t, size_t,
30
+ size_t, size_t, size_t);
31
+ typedef void (*pthreadpool_task_5d_t)(void *, size_t, size_t, size_t, size_t,
32
+ size_t);
33
+ typedef void (*pthreadpool_task_5d_tile_1d_t)(void *, size_t, size_t, size_t,
34
+ size_t, size_t, size_t);
35
+ typedef void (*pthreadpool_task_5d_tile_2d_t)(void *, size_t, size_t, size_t,
36
+ size_t, size_t, size_t, size_t);
37
+ typedef void (*pthreadpool_task_6d_t)(void *, size_t, size_t, size_t, size_t,
38
+ size_t, size_t);
39
+ typedef void (*pthreadpool_task_6d_tile_1d_t)(void *, size_t, size_t, size_t,
40
+ size_t, size_t, size_t, size_t);
41
+ typedef void (*pthreadpool_task_6d_tile_2d_t)(void *, size_t, size_t, size_t,
42
+ size_t, size_t, size_t, size_t,
43
+ size_t);
44
+
45
+ typedef void (*pthreadpool_task_1d_with_id_t)(void *, uint32_t, size_t);
46
+ typedef void (*pthreadpool_task_2d_tile_1d_with_id_t)(void *, uint32_t, size_t,
47
+ size_t, size_t);
48
+ typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void *, uint32_t, size_t,
49
+ size_t, size_t, size_t);
50
+ typedef void (*pthreadpool_task_3d_tile_1d_with_id_t)(void *, uint32_t, size_t,
51
+ size_t, size_t, size_t);
52
+ typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void *, uint32_t, size_t,
53
+ size_t, size_t, size_t,
54
+ size_t);
55
+ typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void *, uint32_t, size_t,
56
+ size_t, size_t, size_t,
57
+ size_t, size_t);
58
+
59
+ typedef void (*pthreadpool_task_2d_tile_1d_with_id_with_thread_t)(
60
+ void *, uint32_t, size_t, size_t, size_t, size_t);
61
+ typedef void (*pthreadpool_task_3d_tile_1d_with_id_with_thread_t)(
62
+ void *, uint32_t, size_t, size_t, size_t, size_t, size_t);
63
+
64
+ /**
65
+ * Disable support for denormalized numbers to the maximum extent possible for
66
+ * the duration of the computation.
67
+ *
68
+ * Handling denormalized floating-point numbers is often implemented in
69
+ * microcode, and incurs significant performance degradation. This hint
70
+ * instructs the thread pool to disable support for denormalized numbers before
71
+ * running the computation by manipulating architecture-specific control
72
+ * registers, and restore the initial value of control registers after the
73
+ * computation is complete. The thread pool temporary disables denormalized
74
+ * numbers on all threads involved in the computation (i.e. the caller threads,
75
+ * and potentially worker threads).
76
+ *
77
+ * Disabling denormalized numbers may have a small negative effect on results'
78
+ * accuracy. As various architectures differ in capabilities to control
79
+ * processing of denormalized numbers, using this flag may also hurt results'
80
+ * reproducibility across different instruction set architectures.
81
+ */
82
+ #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
83
+
84
+ /**
85
+ * Yield worker threads to the system scheduler after the operation is finished.
86
+ *
87
+ * Force workers to use kernel wait (instead of active spin-wait by default) for
88
+ * new commands after this command is processed. This flag affects only the
89
+ * immediate next operation on this thread pool. To make the thread pool always
90
+ * use kernel wait, pass this flag to all parallelization functions.
91
+ */
92
+ #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
93
+
94
+ #ifdef __cplusplus
95
+ extern "C" {
96
+ #endif
97
+
98
+ /**
99
+ * Create a thread pool with the specified number of threads.
100
+ *
101
+ * @param threads_count the number of threads in the thread pool.
102
+ * A value of 0 has special interpretation: it creates a thread pool with as
103
+ * many threads as there are logical processors in the system.
104
+ *
105
+ * @returns A pointer to an opaque thread pool object if the call is
106
+ * successful, or NULL pointer if the call failed.
107
+ */
108
+ pthreadpool_t pthreadpool_create(size_t threads_count);
109
+
110
+ /**
111
+ * Query the number of threads in a thread pool.
112
+ *
113
+ * @param threadpool the thread pool to query.
114
+ *
115
+ * @returns The number of threads in the thread pool.
116
+ */
117
+ size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
118
+
119
+ /**
120
+ * Process items on a 1D grid.
121
+ *
122
+ * The function implements a parallel version of the following snippet:
123
+ *
124
+ * for (size_t i = 0; i < range; i++)
125
+ * function(context, i);
126
+ *
127
+ * When the function returns, all items have been processed and the thread pool
128
+ * is ready for a new task.
129
+ *
130
+ * @note If multiple threads call this function with the same thread pool, the
131
+ * calls are serialized.
132
+ *
133
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
134
+ * is NULL, all items are processed serially on the calling thread.
135
+ * @param function the function to call for each item.
136
+ * @param context the first argument passed to the specified function.
137
+ * @param range the number of items on the 1D grid to process. The
138
+ * specified function will be called once for each item.
139
+ * @param flags a bitwise combination of zero or more optional flags
140
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
141
+ */
142
+ void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
143
+ pthreadpool_task_1d_t function, void *context,
144
+ size_t range, uint32_t flags);
145
+
146
+ /**
147
+ * Process items on a 1D grid passing along the current thread id.
148
+ *
149
+ * The function implements a parallel version of the following snippet:
150
+ *
151
+ * for (size_t i = 0; i < range; i++)
152
+ * function(context, thread_index, i);
153
+ *
154
+ * When the function returns, all items have been processed and the thread pool
155
+ * is ready for a new task.
156
+ *
157
+ * @note If multiple threads call this function with the same thread pool, the
158
+ * calls are serialized.
159
+ *
160
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
161
+ * is NULL, all items are processed serially on the calling thread.
162
+ * @param function the function to call for each item.
163
+ * @param context the first argument passed to the specified function.
164
+ * @param range the number of items on the 1D grid to process. The
165
+ * specified function will be called once for each item.
166
+ * @param flags a bitwise combination of zero or more optional flags
167
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
168
+ */
169
+ void pthreadpool_parallelize_1d_with_thread(
170
+ pthreadpool_t threadpool, pthreadpool_task_1d_with_thread_t function,
171
+ void *context, size_t range, uint32_t flags);
172
+
173
+ /**
174
+ * Process items on a 1D grid using a microarchitecture-aware task function.
175
+ *
176
+ * The function implements a parallel version of the following snippet:
177
+ *
178
+ * uint32_t uarch_index = cpuinfo_initialize() ?
179
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
180
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
181
+ * for (size_t i = 0; i < range; i++)
182
+ * function(context, uarch_index, i);
183
+ *
184
+ * When the function returns, all items have been processed and the thread pool
185
+ * is ready for a new task.
186
+ *
187
+ * @note If multiple threads call this function with the same thread pool, the
188
+ * calls are serialized.
189
+ *
190
+ * @param threadpool the thread pool to use for parallelisation. If
191
+ * threadpool is NULL, all items are processed serially on the calling
192
+ * thread.
193
+ * @param function the function to call for each item.
194
+ * @param context the first argument passed to the specified
195
+ * function.
196
+ * @param default_uarch_index the microarchitecture index to use when
197
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
198
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
199
+ * max_uarch_index value.
200
+ * @param max_uarch_index the maximum microarchitecture index expected by
201
+ * the specified function. If the index returned by
202
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
203
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
204
+ * @param range the number of items on the 1D grid to process.
205
+ * The specified function will be called once for each item.
206
+ * @param flags a bitwise combination of zero or more optional
207
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
208
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
209
+ */
210
+ void pthreadpool_parallelize_1d_with_uarch(
211
+ pthreadpool_t threadpool, pthreadpool_task_1d_with_id_t function,
212
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
213
+ size_t range, uint32_t flags);
214
+
215
+ /**
216
+ * Process items on a 1D grid with specified maximum tile size.
217
+ *
218
+ * The function implements a parallel version of the following snippet:
219
+ *
220
+ * for (size_t i = 0; i < range; i += tile)
221
+ * function(context, i, min(range - i, tile));
222
+ *
223
+ * When the call returns, all items have been processed and the thread pool is
224
+ * ready for a new task.
225
+ *
226
+ * @note If multiple threads call this function with the same thread pool,
227
+ * the calls are serialized.
228
+ *
229
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
230
+ * is NULL, all items are processed serially on the calling thread.
231
+ * @param function the function to call for each tile.
232
+ * @param context the first argument passed to the specified function.
233
+ * @param range the number of items on the 1D grid to process.
234
+ * @param tile the maximum number of items on the 1D grid to process in
235
+ * one function call.
236
+ * @param flags a bitwise combination of zero or more optional flags
237
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
238
+ */
239
+ void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
240
+ pthreadpool_task_1d_tile_1d_t function,
241
+ void *context, size_t range,
242
+ size_t tile, uint32_t flags);
243
+
244
+ /**
245
+ * Process items on a 2D grid.
246
+ *
247
+ * The function implements a parallel version of the following snippet:
248
+ *
249
+ * for (size_t i = 0; i < range_i; i++)
250
+ * for (size_t j = 0; j < range_j; j++)
251
+ * function(context, i, j);
252
+ *
253
+ * When the function returns, all items have been processed and the thread pool
254
+ * is ready for a new task.
255
+ *
256
+ * @note If multiple threads call this function with the same thread pool, the
257
+ * calls are serialized.
258
+ *
259
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
260
+ * is NULL, all items are processed serially on the calling thread.
261
+ * @param function the function to call for each item.
262
+ * @param context the first argument passed to the specified function.
263
+ * @param range_i the number of items to process along the first dimension
264
+ * of the 2D grid.
265
+ * @param range_j the number of items to process along the second dimension
266
+ * of the 2D grid.
267
+ * @param flags a bitwise combination of zero or more optional flags
268
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
269
+ */
270
+ void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
271
+ pthreadpool_task_2d_t function, void *context,
272
+ size_t range_i, size_t range_j, uint32_t flags);
273
+
274
+ /**
275
+ * Process items on a 2D grid passing along the current thread id.
276
+ *
277
+ * The function implements a parallel version of the following snippet:
278
+ *
279
+ * for (size_t i = 0; i < range_i; i++)
280
+ * for (size_t j = 0; j < range_j; j++)
281
+ * function(context, thread_index, i, j);
282
+ *
283
+ * When the function returns, all items have been processed and the thread pool
284
+ * is ready for a new task.
285
+ *
286
+ * @note If multiple threads call this function with the same thread pool, the
287
+ * calls are serialized.
288
+ *
289
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
290
+ * is NULL, all items are processed serially on the calling thread.
291
+ * @param function the function to call for each item.
292
+ * @param context the first argument passed to the specified function.
293
+ * @param range_i the number of items to process along the first dimension
294
+ * of the 2D grid.
295
+ * @param range_j the number of items to process along the second dimension
296
+ * of the 2D grid.
297
+ * @param flags a bitwise combination of zero or more optional flags
298
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
299
+ */
300
+ void pthreadpool_parallelize_2d_with_thread(
301
+ pthreadpool_t threadpool, pthreadpool_task_2d_with_thread_t function,
302
+ void *context, size_t range_i, size_t range_j, uint32_t flags);
303
+
304
+ /**
305
+ * Process items on a 2D grid with the specified maximum tile size along the
306
+ * last grid dimension.
307
+ *
308
+ * The function implements a parallel version of the following snippet:
309
+ *
310
+ * for (size_t i = 0; i < range_i; i++)
311
+ * for (size_t j = 0; j < range_j; j += tile_j)
312
+ * function(context, i, j, min(range_j - j, tile_j));
313
+ *
314
+ * When the function returns, all items have been processed and the thread pool
315
+ * is ready for a new task.
316
+ *
317
+ * @note If multiple threads call this function with the same thread pool, the
318
+ * calls are serialized.
319
+ *
320
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
321
+ * is NULL, all items are processed serially on the calling thread.
322
+ * @param function the function to call for each tile.
323
+ * @param context the first argument passed to the specified function.
324
+ * @param range_i the number of items to process along the first dimension
325
+ * of the 2D grid.
326
+ * @param range_j the number of items to process along the second dimension
327
+ * of the 2D grid.
328
+ * @param tile_j the maximum number of items along the second dimension of
329
+ * the 2D grid to process in one function call.
330
+ * @param flags a bitwise combination of zero or more optional flags
331
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
332
+ */
333
+ void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
334
+ pthreadpool_task_2d_tile_1d_t function,
335
+ void *context, size_t range_i,
336
+ size_t range_j, size_t tile_j,
337
+ uint32_t flags);
338
+
339
+ /**
340
+ * Process items on a 2D grid with the specified maximum tile size along the
341
+ * last grid dimension using a microarchitecture-aware task function.
342
+ *
343
+ * The function implements a parallel version of the following snippet:
344
+ *
345
+ * uint32_t uarch_index = cpuinfo_initialize() ?
346
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
347
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
348
+ * for (size_t i = 0; i < range_i; i++)
349
+ * for (size_t j = 0; j < range_j; j += tile_j)
350
+ * function(context, uarch_index, i, j, min(range_j - j, tile_j));
351
+ *
352
+ * When the function returns, all items have been processed and the thread pool
353
+ * is ready for a new task.
354
+ *
355
+ * @note If multiple threads call this function with the same thread pool, the
356
+ * calls are serialized.
357
+ *
358
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
359
+ * is NULL, all items are processed serially on the calling thread.
360
+ * @param function the function to call for each tile.
361
+ * @param context the first argument passed to the specified function.
362
+ * @param default_uarch_index the microarchitecture index to use when
363
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
364
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
365
+ * max_uarch_index value.
366
+ * @param max_uarch_index the maximum microarchitecture index expected by
367
+ * the specified function. If the index returned by
368
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
369
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
370
+ * @param range_i the number of items to process along the first dimension
371
+ * of the 2D grid.
372
+ * @param range_j the number of items to process along the second dimension
373
+ * of the 2D grid.
374
+ * @param tile_j the maximum number of items along the second dimension of
375
+ * the 2D grid to process in one function call.
376
+ * @param flags a bitwise combination of zero or more optional flags
377
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
378
+ */
379
+ void pthreadpool_parallelize_2d_tile_1d_with_uarch(
380
+ pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_with_id_t function,
381
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
382
+ size_t range_i, size_t range_j, size_t tile_j, uint32_t flags);
383
+
384
+ /**
385
+ * Process items on a 2D grid with the specified maximum tile size along the
386
+ * last grid dimension using a microarchitecture-aware task function and passing
387
+ * along the current thread id.
388
+ *
389
+ * The function implements a parallel version of the following snippet:
390
+ *
391
+ * uint32_t uarch_index = cpuinfo_initialize() ?
392
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
393
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
394
+ * for (size_t i = 0; i < range_i; i++)
395
+ * for (size_t j = 0; j < range_j; j += tile_j)
396
+ * function(context, uarch_index, thread_index, i, j, min(range_j - j,
397
+ * tile_j));
398
+ *
399
+ * When the function returns, all items have been processed and the thread pool
400
+ * is ready for a new task.
401
+ *
402
+ * @note If multiple threads call this function with the same thread pool, the
403
+ * calls are serialized.
404
+ *
405
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
406
+ * is NULL, all items are processed serially on the calling thread.
407
+ * @param function the function to call for each tile.
408
+ * @param context the first argument passed to the specified function.
409
+ * @param default_uarch_index the microarchitecture index to use when
410
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
411
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
412
+ * max_uarch_index value.
413
+ * @param max_uarch_index the maximum microarchitecture index expected by
414
+ * the specified function. If the index returned by
415
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
416
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
417
+ * @param range_i the number of items to process along the first dimension
418
+ * of the 2D grid.
419
+ * @param range_j the number of items to process along the second dimension
420
+ * of the 2D grid.
421
+ * @param tile_j the maximum number of items along the second dimension of
422
+ * the 2D grid to process in one function call.
423
+ * @param flags a bitwise combination of zero or more optional flags
424
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
425
+ */
426
+ void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread(
427
+ pthreadpool_t threadpool,
428
+ pthreadpool_task_2d_tile_1d_with_id_with_thread_t function, void *context,
429
+ uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
430
+ size_t range_j, size_t tile_j, uint32_t flags);
431
+
432
+ /**
433
+ * Process items on a 2D grid with the specified maximum tile size along each
434
+ * grid dimension.
435
+ *
436
+ * The function implements a parallel version of the following snippet:
437
+ *
438
+ * for (size_t i = 0; i < range_i; i += tile_i)
439
+ * for (size_t j = 0; j < range_j; j += tile_j)
440
+ * function(context, i, j,
441
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
442
+ *
443
+ * When the function returns, all items have been processed and the thread pool
444
+ * is ready for a new task.
445
+ *
446
+ * @note If multiple threads call this function with the same thread pool, the
447
+ * calls are serialized.
448
+ *
449
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
450
+ * is NULL, all items are processed serially on the calling thread.
451
+ * @param function the function to call for each tile.
452
+ * @param context the first argument passed to the specified function.
453
+ * @param range_i the number of items to process along the first dimension
454
+ * of the 2D grid.
455
+ * @param range_j the number of items to process along the second dimension
456
+ * of the 2D grid.
457
+ * @param tile_j the maximum number of items along the first dimension of
458
+ * the 2D grid to process in one function call.
459
+ * @param tile_j the maximum number of items along the second dimension of
460
+ * the 2D grid to process in one function call.
461
+ * @param flags a bitwise combination of zero or more optional flags
462
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
463
+ */
464
+ void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
465
+ pthreadpool_task_2d_tile_2d_t function,
466
+ void *context, size_t range_i,
467
+ size_t range_j, size_t tile_i,
468
+ size_t tile_j, uint32_t flags);
469
+
470
+ /**
471
+ * Process items on a 2D grid with the specified maximum tile size along each
472
+ * grid dimension using a microarchitecture-aware task function.
473
+ *
474
+ * The function implements a parallel version of the following snippet:
475
+ *
476
+ * uint32_t uarch_index = cpuinfo_initialize() ?
477
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
478
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
479
+ * for (size_t i = 0; i < range_i; i += tile_i)
480
+ * for (size_t j = 0; j < range_j; j += tile_j)
481
+ * function(context, uarch_index, i, j,
482
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
483
+ *
484
+ * When the function returns, all items have been processed and the thread pool
485
+ * is ready for a new task.
486
+ *
487
+ * @note If multiple threads call this function with the same thread pool, the
488
+ * calls are serialized.
489
+ *
490
+ * @param threadpool the thread pool to use for parallelisation. If
491
+ * threadpool is NULL, all items are processed serially on the calling
492
+ * thread.
493
+ * @param function the function to call for each tile.
494
+ * @param context the first argument passed to the specified
495
+ * function.
496
+ * @param default_uarch_index the microarchitecture index to use when
497
+ * pthreadpool is configured without cpuinfo,
498
+ * cpuinfo initialization failed, or index returned
499
+ * by cpuinfo_get_current_uarch_index() exceeds
500
+ * the max_uarch_index value.
501
+ * @param max_uarch_index the maximum microarchitecture index expected
502
+ * by the specified function. If the index returned
503
+ * by cpuinfo_get_current_uarch_index() exceeds this
504
+ * value, default_uarch_index will be used instead.
505
+ * default_uarch_index can exceed max_uarch_index.
506
+ * @param range_i the number of items to process along the first
507
+ * dimension of the 2D grid.
508
+ * @param range_j the number of items to process along the second
509
+ * dimension of the 2D grid.
510
+ * @param tile_j the maximum number of items along the first
511
+ * dimension of the 2D grid to process in one function call.
512
+ * @param tile_j the maximum number of items along the second
513
+ * dimension of the 2D grid to process in one function call.
514
+ * @param flags a bitwise combination of zero or more optional
515
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
516
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
517
+ */
518
+ void pthreadpool_parallelize_2d_tile_2d_with_uarch(
519
+ pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_with_id_t function,
520
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
521
+ size_t range_i, size_t range_j, size_t tile_i, size_t tile_j,
522
+ uint32_t flags);
523
+
524
+ /**
525
+ * Process items on a 3D grid.
526
+ *
527
+ * The function implements a parallel version of the following snippet:
528
+ *
529
+ * for (size_t i = 0; i < range_i; i++)
530
+ * for (size_t j = 0; j < range_j; j++)
531
+ * for (size_t k = 0; k < range_k; k++)
532
+ * function(context, i, j, k);
533
+ *
534
+ * When the function returns, all items have been processed and the thread pool
535
+ * is ready for a new task.
536
+ *
537
+ * @note If multiple threads call this function with the same thread pool, the
538
+ * calls are serialized.
539
+ *
540
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
541
+ * is NULL, all items are processed serially on the calling thread.
542
+ * @param function the function to call for each tile.
543
+ * @param context the first argument passed to the specified function.
544
+ * @param range_i the number of items to process along the first dimension
545
+ * of the 3D grid.
546
+ * @param range_j the number of items to process along the second dimension
547
+ * of the 3D grid.
548
+ * @param range_k the number of items to process along the third dimension
549
+ * of the 3D grid.
550
+ * @param flags a bitwise combination of zero or more optional flags
551
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
552
+ */
553
+ void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
554
+ pthreadpool_task_3d_t function, void *context,
555
+ size_t range_i, size_t range_j, size_t range_k,
556
+ uint32_t flags);
557
+
558
+ /**
559
+ * Process items on a 3D grid with the specified maximum tile size along the
560
+ * last grid dimension.
561
+ *
562
+ * The function implements a parallel version of the following snippet:
563
+ *
564
+ * for (size_t i = 0; i < range_i; i++)
565
+ * for (size_t j = 0; j < range_j; j++)
566
+ * for (size_t k = 0; k < range_k; k += tile_k)
567
+ * function(context, i, j, k, min(range_k - k, tile_k));
568
+ *
569
+ * When the function returns, all items have been processed and the thread pool
570
+ * is ready for a new task.
571
+ *
572
+ * @note If multiple threads call this function with the same thread pool, the
573
+ * calls are serialized.
574
+ *
575
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
576
+ * is NULL, all items are processed serially on the calling thread.
577
+ * @param function the function to call for each tile.
578
+ * @param context the first argument passed to the specified function.
579
+ * @param range_i the number of items to process along the first dimension
580
+ * of the 3D grid.
581
+ * @param range_j the number of items to process along the second dimension
582
+ * of the 3D grid.
583
+ * @param range_k the number of items to process along the third dimension
584
+ * of the 3D grid.
585
+ * @param tile_k the maximum number of items along the third dimension of
586
+ * the 3D grid to process in one function call.
587
+ * @param flags a bitwise combination of zero or more optional flags
588
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
589
+ */
590
+ void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
591
+ pthreadpool_task_3d_tile_1d_t function,
592
+ void *context, size_t range_i,
593
+ size_t range_j, size_t range_k,
594
+ size_t tile_k, uint32_t flags);
595
+
596
+ /**
597
+ * Process items on a 3D grid with the specified maximum tile size along the
598
+ * last grid dimension and passing along the current thread id.
599
+ *
600
+ * The function implements a parallel version of the following snippet:
601
+ *
602
+ * for (size_t i = 0; i < range_i; i++)
603
+ * for (size_t j = 0; j < range_j; j++)
604
+ * for (size_t k = 0; k < range_k; k += tile_k)
605
+ * function(context, thread_index, i, j, k, min(range_k - k, tile_k));
606
+ *
607
+ * When the function returns, all items have been processed and the thread pool
608
+ * is ready for a new task.
609
+ *
610
+ * @note If multiple threads call this function with the same thread pool, the
611
+ * calls are serialized.
612
+ *
613
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
614
+ * is NULL, all items are processed serially on the calling thread.
615
+ * @param function the function to call for each tile.
616
+ * @param context the first argument passed to the specified function.
617
+ * @param range_i the number of items to process along the first dimension
618
+ * of the 3D grid.
619
+ * @param range_j the number of items to process along the second dimension
620
+ * of the 3D grid.
621
+ * @param range_k the number of items to process along the third dimension
622
+ * of the 3D grid.
623
+ * @param tile_k the maximum number of items along the third dimension of
624
+ * the 3D grid to process in one function call.
625
+ * @param flags a bitwise combination of zero or more optional flags
626
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
627
+ */
628
+ void pthreadpool_parallelize_3d_tile_1d_with_thread(
629
+ pthreadpool_t threadpool,
630
+ pthreadpool_task_3d_tile_1d_with_thread_t function, void *context,
631
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
632
+ uint32_t flags);
633
+
634
+ /**
635
+ * Process items on a 3D grid with the specified maximum tile size along the
636
+ * last grid dimension using a microarchitecture-aware task function.
637
+ *
638
+ * The function implements a parallel version of the following snippet:
639
+ *
640
+ * uint32_t uarch_index = cpuinfo_initialize() ?
641
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
642
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
643
+ * for (size_t i = 0; i < range_i; i++)
644
+ * for (size_t j = 0; j < range_j; j++)
645
+ * for (size_t k = 0; k < range_k; k += tile_k)
646
+ * function(context, uarch_index, i, j, k, min(range_k - k, tile_k));
647
+ *
648
+ * When the function returns, all items have been processed and the thread pool
649
+ * is ready for a new task.
650
+ *
651
+ * @note If multiple threads call this function with the same thread pool, the
652
+ * calls are serialized.
653
+ *
654
+ * @param threadpool the thread pool to use for parallelisation. If
655
+ * threadpool is NULL, all items are processed serially on the calling
656
+ * thread.
657
+ * @param function the function to call for each tile.
658
+ * @param context the first argument passed to the specified
659
+ * function.
660
+ * @param default_uarch_index the microarchitecture index to use when
661
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
662
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
663
+ * max_uarch_index value.
664
+ * @param max_uarch_index the maximum microarchitecture index expected by
665
+ * the specified function. If the index returned by
666
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
667
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
668
+ * @param range_i the number of items to process along the first
669
+ * dimension of the 3D grid.
670
+ * @param range_j the number of items to process along the second
671
+ * dimension of the 3D grid.
672
+ * @param range_k the number of items to process along the third
673
+ * dimension of the 3D grid.
674
+ * @param tile_k the maximum number of items along the third
675
+ * dimension of the 3D grid to process in one function call.
676
+ * @param flags a bitwise combination of zero or more optional
677
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
678
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
679
+ */
680
+ void pthreadpool_parallelize_3d_tile_1d_with_uarch(
681
+ pthreadpool_t threadpool, pthreadpool_task_3d_tile_1d_with_id_t function,
682
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
683
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
684
+ uint32_t flags);
685
+
686
+ /**
687
+ * Process items on a 3D grid with the specified maximum tile size along the
688
+ * last grid dimension using a microarchitecture-aware task function and passing
689
+ * along the current thread id.
690
+ *
691
+ * The function implements a parallel version of the following snippet:
692
+ *
693
+ * uint32_t uarch_index = cpuinfo_initialize() ?
694
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
695
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
696
+ * for (size_t i = 0; i < range_i; i++)
697
+ * for (size_t j = 0; j < range_j; j++)
698
+ * for (size_t k = 0; k < range_k; k += tile_k)
699
+ * function(context, uarch_index, thread_index, i, j, k, min(range_k -
700
+ * k, tile_k));
701
+ *
702
+ * When the function returns, all items have been processed and the thread pool
703
+ * is ready for a new task.
704
+ *
705
+ * @note If multiple threads call this function with the same thread pool, the
706
+ * calls are serialized.
707
+ *
708
+ * @param threadpool the thread pool to use for parallelisation. If
709
+ * threadpool is NULL, all items are processed serially on the calling
710
+ * thread.
711
+ * @param function the function to call for each tile.
712
+ * @param context the first argument passed to the specified
713
+ * function.
714
+ * @param default_uarch_index the microarchitecture index to use when
715
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
716
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
717
+ * max_uarch_index value.
718
+ * @param max_uarch_index the maximum microarchitecture index expected by
719
+ * the specified function. If the index returned by
720
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
721
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
722
+ * @param range_i the number of items to process along the first
723
+ * dimension of the 3D grid.
724
+ * @param range_j the number of items to process along the second
725
+ * dimension of the 3D grid.
726
+ * @param range_k the number of items to process along the third
727
+ * dimension of the 3D grid.
728
+ * @param tile_k the maximum number of items along the third
729
+ * dimension of the 3D grid to process in one function call.
730
+ * @param flags a bitwise combination of zero or more optional
731
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
732
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
733
+ */
734
+ void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread(
735
+ pthreadpool_t threadpool,
736
+ pthreadpool_task_3d_tile_1d_with_id_with_thread_t function, void *context,
737
+ uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
738
+ size_t range_j, size_t range_k, size_t tile_k, uint32_t flags);
739
+
740
+ /**
741
+ * Process items on a 3D grid with the specified maximum tile size along the
742
+ * last two grid dimensions.
743
+ *
744
+ * The function implements a parallel version of the following snippet:
745
+ *
746
+ * for (size_t i = 0; i < range_i; i++)
747
+ * for (size_t j = 0; j < range_j; j += tile_j)
748
+ * for (size_t k = 0; k < range_k; k += tile_k)
749
+ * function(context, i, j, k,
750
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
751
+ *
752
+ * When the function returns, all items have been processed and the thread pool
753
+ * is ready for a new task.
754
+ *
755
+ * @note If multiple threads call this function with the same thread pool, the
756
+ * calls are serialized.
757
+ *
758
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
759
+ * is NULL, all items are processed serially on the calling thread.
760
+ * @param function the function to call for each tile.
761
+ * @param context the first argument passed to the specified function.
762
+ * @param range_i the number of items to process along the first dimension
763
+ * of the 3D grid.
764
+ * @param range_j the number of items to process along the second dimension
765
+ * of the 3D grid.
766
+ * @param range_k the number of items to process along the third dimension
767
+ * of the 3D grid.
768
+ * @param tile_j the maximum number of items along the second dimension of
769
+ * the 3D grid to process in one function call.
770
+ * @param tile_k the maximum number of items along the third dimension of
771
+ * the 3D grid to process in one function call.
772
+ * @param flags a bitwise combination of zero or more optional flags
773
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
774
+ */
775
+ void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
776
+ pthreadpool_task_3d_tile_2d_t function,
777
+ void *context, size_t range_i,
778
+ size_t range_j, size_t range_k,
779
+ size_t tile_j, size_t tile_k,
780
+ uint32_t flags);
781
+
782
+ /**
783
+ * Process items on a 3D grid with the specified maximum tile size along the
784
+ * last two grid dimensions using a microarchitecture-aware task function.
785
+ *
786
+ * The function implements a parallel version of the following snippet:
787
+ *
788
+ * uint32_t uarch_index = cpuinfo_initialize() ?
789
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
790
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
791
+ * for (size_t i = 0; i < range_i; i++)
792
+ * for (size_t j = 0; j < range_j; j += tile_j)
793
+ * for (size_t k = 0; k < range_k; k += tile_k)
794
+ * function(context, uarch_index, i, j, k,
795
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
796
+ *
797
+ * When the function returns, all items have been processed and the thread pool
798
+ * is ready for a new task.
799
+ *
800
+ * @note If multiple threads call this function with the same thread pool, the
801
+ * calls are serialized.
802
+ *
803
+ * @param threadpool the thread pool to use for parallelisation. If
804
+ * threadpool is NULL, all items are processed serially on the calling
805
+ * thread.
806
+ * @param function the function to call for each tile.
807
+ * @param context the first argument passed to the specified
808
+ * function.
809
+ * @param default_uarch_index the microarchitecture index to use when
810
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
811
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
812
+ * max_uarch_index value.
813
+ * @param max_uarch_index the maximum microarchitecture index expected by
814
+ * the specified function. If the index returned by
815
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
816
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
817
+ * @param range_i the number of items to process along the first
818
+ * dimension of the 3D grid.
819
+ * @param range_j the number of items to process along the second
820
+ * dimension of the 3D grid.
821
+ * @param range_k the number of items to process along the third
822
+ * dimension of the 3D grid.
823
+ * @param tile_j the maximum number of items along the second
824
+ * dimension of the 3D grid to process in one function call.
825
+ * @param tile_k the maximum number of items along the third
826
+ * dimension of the 3D grid to process in one function call.
827
+ * @param flags a bitwise combination of zero or more optional
828
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
829
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
830
+ */
831
+ void pthreadpool_parallelize_3d_tile_2d_with_uarch(
832
+ pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_with_id_t function,
833
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
834
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_j,
835
+ size_t tile_k, uint32_t flags);
836
+
837
+ /**
838
+ * Process items on a 4D grid.
839
+ *
840
+ * The function implements a parallel version of the following snippet:
841
+ *
842
+ * for (size_t i = 0; i < range_i; i++)
843
+ * for (size_t j = 0; j < range_j; j++)
844
+ * for (size_t k = 0; k < range_k; k++)
845
+ * for (size_t l = 0; l < range_l; l++)
846
+ * function(context, i, j, k, l);
847
+ *
848
+ * When the function returns, all items have been processed and the thread pool
849
+ * is ready for a new task.
850
+ *
851
+ * @note If multiple threads call this function with the same thread pool, the
852
+ * calls are serialized.
853
+ *
854
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
855
+ * is NULL, all items are processed serially on the calling thread.
856
+ * @param function the function to call for each tile.
857
+ * @param context the first argument passed to the specified function.
858
+ * @param range_i the number of items to process along the first dimension
859
+ * of the 4D grid.
860
+ * @param range_j the number of items to process along the second dimension
861
+ * of the 4D grid.
862
+ * @param range_k the number of items to process along the third dimension
863
+ * of the 4D grid.
864
+ * @param range_l the number of items to process along the fourth dimension
865
+ * of the 4D grid.
866
+ * @param flags a bitwise combination of zero or more optional flags
867
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
868
+ */
869
+ void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
870
+ pthreadpool_task_4d_t function, void *context,
871
+ size_t range_i, size_t range_j, size_t range_k,
872
+ size_t range_l, uint32_t flags);
873
+
874
+ /**
875
+ * Process items on a 4D grid with the specified maximum tile size along the
876
+ * last grid dimension.
877
+ *
878
+ * The function implements a parallel version of the following snippet:
879
+ *
880
+ * for (size_t i = 0; i < range_i; i++)
881
+ * for (size_t j = 0; j < range_j; j++)
882
+ * for (size_t k = 0; k < range_k; k++)
883
+ * for (size_t l = 0; l < range_l; l += tile_l)
884
+ * function(context, i, j, k, l, min(range_l - l, tile_l));
885
+ *
886
+ * When the function returns, all items have been processed and the thread pool
887
+ * is ready for a new task.
888
+ *
889
+ * @note If multiple threads call this function with the same thread pool, the
890
+ * calls are serialized.
891
+ *
892
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
893
+ * is NULL, all items are processed serially on the calling thread.
894
+ * @param function the function to call for each tile.
895
+ * @param context the first argument passed to the specified function.
896
+ * @param range_i the number of items to process along the first dimension
897
+ * of the 4D grid.
898
+ * @param range_j the number of items to process along the second dimension
899
+ * of the 4D grid.
900
+ * @param range_k the number of items to process along the third dimension
901
+ * of the 4D grid.
902
+ * @param range_l the number of items to process along the fourth dimension
903
+ * of the 4D grid.
904
+ * @param tile_l the maximum number of items along the fourth dimension of
905
+ * the 4D grid to process in one function call.
906
+ * @param flags a bitwise combination of zero or more optional flags
907
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
908
+ */
909
+ void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
910
+ pthreadpool_task_4d_tile_1d_t function,
911
+ void *context, size_t range_i,
912
+ size_t range_j, size_t range_k,
913
+ size_t range_l, size_t tile_l,
914
+ uint32_t flags);
915
+
916
+ /**
917
+ * Process items on a 4D grid with the specified maximum tile size along the
918
+ * last two grid dimensions.
919
+ *
920
+ * The function implements a parallel version of the following snippet:
921
+ *
922
+ * for (size_t i = 0; i < range_i; i++)
923
+ * for (size_t j = 0; j < range_j; j++)
924
+ * for (size_t k = 0; k < range_k; k += tile_k)
925
+ * for (size_t l = 0; l < range_l; l += tile_l)
926
+ * function(context, i, j, k, l,
927
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
928
+ *
929
+ * When the function returns, all items have been processed and the thread pool
930
+ * is ready for a new task.
931
+ *
932
+ * @note If multiple threads call this function with the same thread pool, the
933
+ * calls are serialized.
934
+ *
935
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
936
+ * is NULL, all items are processed serially on the calling thread.
937
+ * @param function the function to call for each tile.
938
+ * @param context the first argument passed to the specified function.
939
+ * @param range_i the number of items to process along the first dimension
940
+ * of the 4D grid.
941
+ * @param range_j the number of items to process along the second dimension
942
+ * of the 4D grid.
943
+ * @param range_k the number of items to process along the third dimension
944
+ * of the 4D grid.
945
+ * @param range_l the number of items to process along the fourth dimension
946
+ * of the 4D grid.
947
+ * @param tile_k the maximum number of items along the third dimension of
948
+ * the 4D grid to process in one function call.
949
+ * @param tile_l the maximum number of items along the fourth dimension of
950
+ * the 4D grid to process in one function call.
951
+ * @param flags a bitwise combination of zero or more optional flags
952
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
953
+ */
954
+ void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
955
+ pthreadpool_task_4d_tile_2d_t function,
956
+ void *context, size_t range_i,
957
+ size_t range_j, size_t range_k,
958
+ size_t range_l, size_t tile_k,
959
+ size_t tile_l, uint32_t flags);
960
+
961
+ /**
962
+ * Process items on a 4D grid with the specified maximum tile size along the
963
+ * last two grid dimensions using a microarchitecture-aware task function.
964
+ *
965
+ * The function implements a parallel version of the following snippet:
966
+ *
967
+ * uint32_t uarch_index = cpuinfo_initialize() ?
968
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
969
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
970
+ * for (size_t i = 0; i < range_i; i++)
971
+ * for (size_t j = 0; j < range_j; j++)
972
+ * for (size_t k = 0; k < range_k; k += tile_k)
973
+ * for (size_t l = 0; l < range_l; l += tile_l)
974
+ * function(context, uarch_index, i, j, k, l,
975
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
976
+ *
977
+ * When the function returns, all items have been processed and the thread pool
978
+ * is ready for a new task.
979
+ *
980
+ * @note If multiple threads call this function with the same thread pool, the
981
+ * calls are serialized.
982
+ *
983
+ * @param threadpool the thread pool to use for parallelisation. If
984
+ * threadpool is NULL, all items are processed serially on the calling
985
+ * thread.
986
+ * @param function the function to call for each tile.
987
+ * @param context the first argument passed to the specified
988
+ * function.
989
+ * @param default_uarch_index the microarchitecture index to use when
990
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
991
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
992
+ * max_uarch_index value.
993
+ * @param max_uarch_index the maximum microarchitecture index expected by
994
+ * the specified function. If the index returned by
995
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
996
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
997
+ * @param range_i the number of items to process along the first
998
+ * dimension of the 4D grid.
999
+ * @param range_j the number of items to process along the second
1000
+ * dimension of the 4D grid.
1001
+ * @param range_k the number of items to process along the third
1002
+ * dimension of the 4D grid.
1003
+ * @param range_l the number of items to process along the fourth
1004
+ * dimension of the 4D grid.
1005
+ * @param tile_k the maximum number of items along the third
1006
+ * dimension of the 4D grid to process in one function call.
1007
+ * @param tile_l the maximum number of items along the fourth
1008
+ * dimension of the 4D grid to process in one function call.
1009
+ * @param flags a bitwise combination of zero or more optional
1010
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
1011
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
1012
+ */
1013
+ void pthreadpool_parallelize_4d_tile_2d_with_uarch(
1014
+ pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_with_id_t function,
1015
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
1016
+ size_t range_i, size_t range_j, size_t range_k, size_t range_l,
1017
+ size_t tile_k, size_t tile_l, uint32_t flags);
1018
+
1019
+ /**
1020
+ * Process items on a 5D grid.
1021
+ *
1022
+ * The function implements a parallel version of the following snippet:
1023
+ *
1024
+ * for (size_t i = 0; i < range_i; i++)
1025
+ * for (size_t j = 0; j < range_j; j++)
1026
+ * for (size_t k = 0; k < range_k; k++)
1027
+ * for (size_t l = 0; l < range_l; l++)
1028
+ * for (size_t m = 0; m < range_m; m++)
1029
+ * function(context, i, j, k, l, m);
1030
+ *
1031
+ * When the function returns, all items have been processed and the thread pool
1032
+ * is ready for a new task.
1033
+ *
1034
+ * @note If multiple threads call this function with the same thread pool, the
1035
+ * calls are serialized.
1036
+ *
1037
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1038
+ * is NULL, all items are processed serially on the calling thread.
1039
+ * @param function the function to call for each tile.
1040
+ * @param context the first argument passed to the specified function.
1041
+ * @param range_i the number of items to process along the first dimension
1042
+ * of the 5D grid.
1043
+ * @param range_j the number of items to process along the second dimension
1044
+ * of the 5D grid.
1045
+ * @param range_k the number of items to process along the third dimension
1046
+ * of the 5D grid.
1047
+ * @param range_l the number of items to process along the fourth dimension
1048
+ * of the 5D grid.
1049
+ * @param range_m the number of items to process along the fifth dimension
1050
+ * of the 5D grid.
1051
+ * @param flags a bitwise combination of zero or more optional flags
1052
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1053
+ */
1054
+ void pthreadpool_parallelize_5d(pthreadpool_t threadpool,
1055
+ pthreadpool_task_5d_t function, void *context,
1056
+ size_t range_i, size_t range_j, size_t range_k,
1057
+ size_t range_l, size_t range_m, uint32_t flags);
1058
+
1059
+ /**
1060
+ * Process items on a 5D grid with the specified maximum tile size along the
1061
+ * last grid dimension.
1062
+ *
1063
+ * The function implements a parallel version of the following snippet:
1064
+ *
1065
+ * for (size_t i = 0; i < range_i; i++)
1066
+ * for (size_t j = 0; j < range_j; j++)
1067
+ * for (size_t k = 0; k < range_k; k++)
1068
+ * for (size_t l = 0; l < range_l; l++)
1069
+ * for (size_t m = 0; m < range_m; m += tile_m)
1070
+ * function(context, i, j, k, l, m, min(range_m - m, tile_m));
1071
+ *
1072
+ * When the function returns, all items have been processed and the thread pool
1073
+ * is ready for a new task.
1074
+ *
1075
+ * @note If multiple threads call this function with the same thread pool, the
1076
+ * calls are serialized.
1077
+ *
1078
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1079
+ * is NULL, all items are processed serially on the calling thread.
1080
+ * @param function the function to call for each tile.
1081
+ * @param context the first argument passed to the specified function.
1082
+ * @param range_i the number of items to process along the first dimension
1083
+ * of the 5D grid.
1084
+ * @param range_j the number of items to process along the second dimension
1085
+ * of the 5D grid.
1086
+ * @param range_k the number of items to process along the third dimension
1087
+ * of the 5D grid.
1088
+ * @param range_l the number of items to process along the fourth dimension
1089
+ * of the 5D grid.
1090
+ * @param range_m the number of items to process along the fifth dimension
1091
+ * of the 5D grid.
1092
+ * @param tile_m the maximum number of items along the fifth dimension of
1093
+ * the 5D grid to process in one function call.
1094
+ * @param flags a bitwise combination of zero or more optional flags
1095
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1096
+ */
1097
+ void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
1098
+ pthreadpool_task_5d_tile_1d_t function,
1099
+ void *context, size_t range_i,
1100
+ size_t range_j, size_t range_k,
1101
+ size_t range_l, size_t range_m,
1102
+ size_t tile_m, uint32_t flags);
1103
+
1104
+ /**
1105
+ * Process items on a 5D grid with the specified maximum tile size along the
1106
+ * last two grid dimensions.
1107
+ *
1108
+ * The function implements a parallel version of the following snippet:
1109
+ *
1110
+ * for (size_t i = 0; i < range_i; i++)
1111
+ * for (size_t j = 0; j < range_j; j++)
1112
+ * for (size_t k = 0; k < range_k; k++)
1113
+ * for (size_t l = 0; l < range_l; l += tile_l)
1114
+ * for (size_t m = 0; m < range_m; m += tile_m)
1115
+ * function(context, i, j, k, l, m,
1116
+ * min(range_l - l, tile_l), min(range_m - m, tile_m));
1117
+ *
1118
+ * When the function returns, all items have been processed and the thread pool
1119
+ * is ready for a new task.
1120
+ *
1121
+ * @note If multiple threads call this function with the same thread pool, the
1122
+ * calls are serialized.
1123
+ *
1124
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1125
+ * is NULL, all items are processed serially on the calling thread.
1126
+ * @param function the function to call for each tile.
1127
+ * @param context the first argument passed to the specified function.
1128
+ * @param range_i the number of items to process along the first dimension
1129
+ * of the 5D grid.
1130
+ * @param range_j the number of items to process along the second dimension
1131
+ * of the 5D grid.
1132
+ * @param range_k the number of items to process along the third dimension
1133
+ * of the 5D grid.
1134
+ * @param range_l the number of items to process along the fourth dimension
1135
+ * of the 5D grid.
1136
+ * @param range_m the number of items to process along the fifth dimension
1137
+ * of the 5D grid.
1138
+ * @param tile_l the maximum number of items along the fourth dimension of
1139
+ * the 5D grid to process in one function call.
1140
+ * @param tile_m the maximum number of items along the fifth dimension of
1141
+ * the 5D grid to process in one function call.
1142
+ * @param flags a bitwise combination of zero or more optional flags
1143
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1144
+ */
1145
+ void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
1146
+ pthreadpool_task_5d_tile_2d_t function,
1147
+ void *context, size_t range_i,
1148
+ size_t range_j, size_t range_k,
1149
+ size_t range_l, size_t range_m,
1150
+ size_t tile_l, size_t tile_m,
1151
+ uint32_t flags);
1152
+
1153
+ /**
1154
+ * Process items on a 6D grid.
1155
+ *
1156
+ * The function implements a parallel version of the following snippet:
1157
+ *
1158
+ * for (size_t i = 0; i < range_i; i++)
1159
+ * for (size_t j = 0; j < range_j; j++)
1160
+ * for (size_t k = 0; k < range_k; k++)
1161
+ * for (size_t l = 0; l < range_l; l++)
1162
+ * for (size_t m = 0; m < range_m; m++)
1163
+ * for (size_t n = 0; n < range_n; n++)
1164
+ * function(context, i, j, k, l, m, n);
1165
+ *
1166
+ * When the function returns, all items have been processed and the thread pool
1167
+ * is ready for a new task.
1168
+ *
1169
+ * @note If multiple threads call this function with the same thread pool, the
1170
+ * calls are serialized.
1171
+ *
1172
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1173
+ * is NULL, all items are processed serially on the calling thread.
1174
+ * @param function the function to call for each tile.
1175
+ * @param context the first argument passed to the specified function.
1176
+ * @param range_i the number of items to process along the first dimension
1177
+ * of the 6D grid.
1178
+ * @param range_j the number of items to process along the second dimension
1179
+ * of the 6D grid.
1180
+ * @param range_k the number of items to process along the third dimension
1181
+ * of the 6D grid.
1182
+ * @param range_l the number of items to process along the fourth dimension
1183
+ * of the 6D grid.
1184
+ * @param range_m the number of items to process along the fifth dimension
1185
+ * of the 6D grid.
1186
+ * @param range_n the number of items to process along the sixth dimension
1187
+ * of the 6D grid.
1188
+ * @param tile_n the maximum number of items along the sixth dimension of
1189
+ * the 6D grid to process in one function call.
1190
+ * @param flags a bitwise combination of zero or more optional flags
1191
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1192
+ */
1193
+ void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
1194
+ pthreadpool_task_6d_t function, void *context,
1195
+ size_t range_i, size_t range_j, size_t range_k,
1196
+ size_t range_l, size_t range_m, size_t range_n,
1197
+ uint32_t flags);
1198
+
1199
+ /**
1200
+ * Process items on a 6D grid with the specified maximum tile size along the
1201
+ * last grid dimension.
1202
+ *
1203
+ * The function implements a parallel version of the following snippet:
1204
+ *
1205
+ * for (size_t i = 0; i < range_i; i++)
1206
+ * for (size_t j = 0; j < range_j; j++)
1207
+ * for (size_t k = 0; k < range_k; k++)
1208
+ * for (size_t l = 0; l < range_l; l++)
1209
+ * for (size_t m = 0; m < range_m; m++)
1210
+ * for (size_t n = 0; n < range_n; n += tile_n)
1211
+ * function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
1212
+ *
1213
+ * When the function returns, all items have been processed and the thread pool
1214
+ * is ready for a new task.
1215
+ *
1216
+ * @note If multiple threads call this function with the same thread pool, the
1217
+ * calls are serialized.
1218
+ *
1219
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1220
+ * is NULL, all items are processed serially on the calling thread.
1221
+ * @param function the function to call for each tile.
1222
+ * @param context the first argument passed to the specified function.
1223
+ * @param range_i the number of items to process along the first dimension
1224
+ * of the 6D grid.
1225
+ * @param range_j the number of items to process along the second dimension
1226
+ * of the 6D grid.
1227
+ * @param range_k the number of items to process along the third dimension
1228
+ * of the 6D grid.
1229
+ * @param range_l the number of items to process along the fourth dimension
1230
+ * of the 6D grid.
1231
+ * @param range_m the number of items to process along the fifth dimension
1232
+ * of the 6D grid.
1233
+ * @param range_n the number of items to process along the sixth dimension
1234
+ * of the 6D grid.
1235
+ * @param tile_n the maximum number of items along the sixth dimension of
1236
+ * the 6D grid to process in one function call.
1237
+ * @param flags a bitwise combination of zero or more optional flags
1238
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1239
+ */
1240
+ void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
1241
+ pthreadpool_task_6d_tile_1d_t function,
1242
+ void *context, size_t range_i,
1243
+ size_t range_j, size_t range_k,
1244
+ size_t range_l, size_t range_m,
1245
+ size_t range_n, size_t tile_n,
1246
+ uint32_t flags);
1247
+
1248
+ /**
1249
+ * Process items on a 6D grid with the specified maximum tile size along the
1250
+ * last two grid dimensions.
1251
+ *
1252
+ * The function implements a parallel version of the following snippet:
1253
+ *
1254
+ * for (size_t i = 0; i < range_i; i++)
1255
+ * for (size_t j = 0; j < range_j; j++)
1256
+ * for (size_t k = 0; k < range_k; k++)
1257
+ * for (size_t l = 0; l < range_l; l++)
1258
+ * for (size_t m = 0; m < range_m; m += tile_m)
1259
+ * for (size_t n = 0; n < range_n; n += tile_n)
1260
+ * function(context, i, j, k, l, m, n,
1261
+ * min(range_m - m, tile_m), min(range_n - n, tile_n));
1262
+ *
1263
+ * When the function returns, all items have been processed and the thread pool
1264
+ * is ready for a new task.
1265
+ *
1266
+ * @note If multiple threads call this function with the same thread pool, the
1267
+ * calls are serialized.
1268
+ *
1269
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1270
+ * is NULL, all items are processed serially on the calling thread.
1271
+ * @param function the function to call for each tile.
1272
+ * @param context the first argument passed to the specified function.
1273
+ * @param range_i the number of items to process along the first dimension
1274
+ * of the 6D grid.
1275
+ * @param range_j the number of items to process along the second dimension
1276
+ * of the 6D grid.
1277
+ * @param range_k the number of items to process along the third dimension
1278
+ * of the 6D grid.
1279
+ * @param range_l the number of items to process along the fourth dimension
1280
+ * of the 6D grid.
1281
+ * @param range_m the number of items to process along the fifth dimension
1282
+ * of the 6D grid.
1283
+ * @param range_n the number of items to process along the sixth dimension
1284
+ * of the 6D grid.
1285
+ * @param tile_m the maximum number of items along the fifth dimension of
1286
+ * the 6D grid to process in one function call.
1287
+ * @param tile_n the maximum number of items along the sixth dimension of
1288
+ * the 6D grid to process in one function call.
1289
+ * @param flags a bitwise combination of zero or more optional flags
1290
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1291
+ */
1292
+ void pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool,
1293
+ pthreadpool_task_6d_tile_2d_t function,
1294
+ void *context, size_t range_i,
1295
+ size_t range_j, size_t range_k,
1296
+ size_t range_l, size_t range_m,
1297
+ size_t range_n, size_t tile_m,
1298
+ size_t tile_n, uint32_t flags);
1299
+
1300
+ /**
1301
+ * Terminates threads in the thread pool and releases associated resources.
1302
+ *
1303
+ * @warning Accessing the thread pool after a call to this function constitutes
1304
+ * undefined behaviour and may cause data corruption.
1305
+ *
1306
+ * @param[in,out] threadpool The thread pool to destroy.
1307
+ */
1308
+ void pthreadpool_destroy(pthreadpool_t threadpool);
1309
+
1310
+ #ifndef PTHREADPOOL_NO_DEPRECATED_API
1311
+
1312
+ /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
1313
+ #if defined(__GNUC__)
1314
+ #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
1315
+ #else
1316
+ #define PTHREADPOOL_DEPRECATED
1317
+ #endif
1318
+
1319
+ typedef void (*pthreadpool_function_1d_t)(void *, size_t);
1320
+ typedef void (*pthreadpool_function_1d_tiled_t)(void *, size_t, size_t);
1321
+ typedef void (*pthreadpool_function_2d_t)(void *, size_t, size_t);
1322
+ typedef void (*pthreadpool_function_2d_tiled_t)(void *, size_t, size_t, size_t,
1323
+ size_t);
1324
+ typedef void (*pthreadpool_function_3d_tiled_t)(void *, size_t, size_t, size_t,
1325
+ size_t, size_t, size_t);
1326
+ typedef void (*pthreadpool_function_4d_tiled_t)(void *, size_t, size_t, size_t,
1327
+ size_t, size_t, size_t, size_t,
1328
+ size_t);
1329
+
1330
+ void pthreadpool_compute_1d(pthreadpool_t threadpool,
1331
+ pthreadpool_function_1d_t function, void *argument,
1332
+ size_t range) PTHREADPOOL_DEPRECATED;
1333
+
1334
+ void pthreadpool_compute_1d_tiled(pthreadpool_t threadpool,
1335
+ pthreadpool_function_1d_tiled_t function,
1336
+ void *argument, size_t range,
1337
+ size_t tile) PTHREADPOOL_DEPRECATED;
1338
+
1339
+ void pthreadpool_compute_2d(pthreadpool_t threadpool,
1340
+ pthreadpool_function_2d_t function, void *argument,
1341
+ size_t range_i,
1342
+ size_t range_j) PTHREADPOOL_DEPRECATED;
1343
+
1344
+ void pthreadpool_compute_2d_tiled(pthreadpool_t threadpool,
1345
+ pthreadpool_function_2d_tiled_t function,
1346
+ void *argument, size_t range_i,
1347
+ size_t range_j, size_t tile_i,
1348
+ size_t tile_j) PTHREADPOOL_DEPRECATED;
1349
+
1350
+ void pthreadpool_compute_3d_tiled(pthreadpool_t threadpool,
1351
+ pthreadpool_function_3d_tiled_t function,
1352
+ void *argument, size_t range_i,
1353
+ size_t range_j, size_t range_k, size_t tile_i,
1354
+ size_t tile_j,
1355
+ size_t tile_k) PTHREADPOOL_DEPRECATED;
1356
+
1357
+ void pthreadpool_compute_4d_tiled(pthreadpool_t threadpool,
1358
+ pthreadpool_function_4d_tiled_t function,
1359
+ void *argument, size_t range_i,
1360
+ size_t range_j, size_t range_k,
1361
+ size_t range_l, size_t tile_i, size_t tile_j,
1362
+ size_t tile_k,
1363
+ size_t tile_l) PTHREADPOOL_DEPRECATED;
1364
+
1365
+ #endif /* PTHREADPOOL_NO_DEPRECATED_API */
1366
+
1367
+ #ifdef __cplusplus
1368
+ } /* extern "C" */
1369
+ #endif
1370
+
1371
+ #ifdef __cplusplus
1372
+
1373
+ namespace libpthreadpool {
1374
+ namespace detail {
1375
+ namespace {
1376
+
1377
+ template <class T> void call_wrapper_1d(void *arg, size_t i) {
1378
+ (*static_cast<const T *>(arg))(i);
1379
+ }
1380
+
1381
+ template <class T>
1382
+ void call_wrapper_1d_tile_1d(void *arg, size_t range_i, size_t tile_i) {
1383
+ (*static_cast<const T *>(arg))(range_i, tile_i);
1384
+ }
1385
+
1386
+ template <class T> void call_wrapper_2d(void *functor, size_t i, size_t j) {
1387
+ (*static_cast<const T *>(functor))(i, j);
1388
+ }
1389
+
1390
+ template <class T>
1391
+ void call_wrapper_2d_tile_1d(void *functor, size_t i, size_t range_j,
1392
+ size_t tile_j) {
1393
+ (*static_cast<const T *>(functor))(i, range_j, tile_j);
1394
+ }
1395
+
1396
+ template <class T>
1397
+ void call_wrapper_2d_tile_2d(void *functor, size_t range_i, size_t range_j,
1398
+ size_t tile_i, size_t tile_j) {
1399
+ (*static_cast<const T *>(functor))(range_i, range_j, tile_i, tile_j);
1400
+ }
1401
+
1402
+ template <class T>
1403
+ void call_wrapper_3d(void *functor, size_t i, size_t j, size_t k) {
1404
+ (*static_cast<const T *>(functor))(i, j, k);
1405
+ }
1406
+
1407
+ template <class T>
1408
+ void call_wrapper_3d_tile_1d(void *functor, size_t i, size_t j, size_t range_k,
1409
+ size_t tile_k) {
1410
+ (*static_cast<const T *>(functor))(i, j, range_k, tile_k);
1411
+ }
1412
+
1413
+ template <class T>
1414
+ void call_wrapper_3d_tile_2d(void *functor, size_t i, size_t range_j,
1415
+ size_t range_k, size_t tile_j, size_t tile_k) {
1416
+ (*static_cast<const T *>(functor))(i, range_j, range_k, tile_j, tile_k);
1417
+ }
1418
+
1419
+ template <class T>
1420
+ void call_wrapper_4d(void *functor, size_t i, size_t j, size_t k, size_t l) {
1421
+ (*static_cast<const T *>(functor))(i, j, k, l);
1422
+ }
1423
+
1424
+ template <class T>
1425
+ void call_wrapper_4d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1426
+ size_t range_l, size_t tile_l) {
1427
+ (*static_cast<const T *>(functor))(i, j, k, range_l, tile_l);
1428
+ }
1429
+
1430
+ template <class T>
1431
+ void call_wrapper_4d_tile_2d(void *functor, size_t i, size_t j, size_t range_k,
1432
+ size_t range_l, size_t tile_k, size_t tile_l) {
1433
+ (*static_cast<const T *>(functor))(i, j, range_k, range_l, tile_k, tile_l);
1434
+ }
1435
+
1436
+ template <class T>
1437
+ void call_wrapper_5d(void *functor, size_t i, size_t j, size_t k, size_t l,
1438
+ size_t m) {
1439
+ (*static_cast<const T *>(functor))(i, j, k, l, m);
1440
+ }
1441
+
1442
+ template <class T>
1443
+ void call_wrapper_5d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1444
+ size_t l, size_t range_m, size_t tile_m) {
1445
+ (*static_cast<const T *>(functor))(i, j, k, l, range_m, tile_m);
1446
+ }
1447
+
1448
+ template <class T>
1449
+ void call_wrapper_5d_tile_2d(void *functor, size_t i, size_t j, size_t k,
1450
+ size_t range_l, size_t range_m, size_t tile_l,
1451
+ size_t tile_m) {
1452
+ (*static_cast<const T *>(functor))(i, j, k, range_l, range_m, tile_l, tile_m);
1453
+ }
1454
+
1455
+ template <class T>
1456
+ void call_wrapper_6d(void *functor, size_t i, size_t j, size_t k, size_t l,
1457
+ size_t m, size_t n) {
1458
+ (*static_cast<const T *>(functor))(i, j, k, l, m, n);
1459
+ }
1460
+
1461
+ template <class T>
1462
+ void call_wrapper_6d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1463
+ size_t l, size_t m, size_t range_n,
1464
+ size_t tile_n) {
1465
+ (*static_cast<const T *>(functor))(i, j, k, l, m, range_n, tile_n);
1466
+ }
1467
+
1468
+ template <class T>
1469
+ void call_wrapper_6d_tile_2d(void *functor, size_t i, size_t j, size_t k,
1470
+ size_t l, size_t range_m, size_t range_n,
1471
+ size_t tile_m, size_t tile_n) {
1472
+ (*static_cast<const T *>(functor))(i, j, k, l, range_m, range_n, tile_m,
1473
+ tile_n);
1474
+ }
1475
+
1476
+ } /* namespace */
1477
+ } /* namespace detail */
1478
+ } /* namespace libpthreadpool */
1479
+
1480
+ /**
1481
+ * Process items on a 1D grid.
1482
+ *
1483
+ * The function implements a parallel version of the following snippet:
1484
+ *
1485
+ * for (size_t i = 0; i < range; i++)
1486
+ * functor(i);
1487
+ *
1488
+ * When the function returns, all items have been processed and the thread pool
1489
+ * is ready for a new task.
1490
+ *
1491
+ * @note If multiple threads call this function with the same thread pool, the
1492
+ * calls are serialized.
1493
+ *
1494
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1495
+ * is NULL, all items are processed serially on the calling thread.
1496
+ * @param functor the functor to call for each item.
1497
+ * @param range the number of items on the 1D grid to process. The
1498
+ * specified functor will be called once for each item.
1499
+ * @param flags a bitwise combination of zero or more optional flags
1500
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1501
+ */
1502
+ template <class T>
1503
+ inline void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
1504
+ const T &functor, size_t range,
1505
+ uint32_t flags = 0) {
1506
+ pthreadpool_parallelize_1d(
1507
+ threadpool, &libpthreadpool::detail::call_wrapper_1d<const T>,
1508
+ const_cast<void *>(static_cast<const void *>(&functor)), range, flags);
1509
+ }
1510
+
1511
+ /**
1512
+ * Process items on a 1D grid with specified maximum tile size.
1513
+ *
1514
+ * The function implements a parallel version of the following snippet:
1515
+ *
1516
+ * for (size_t i = 0; i < range; i += tile)
1517
+ * functor(i, min(range - i, tile));
1518
+ *
1519
+ * When the call returns, all items have been processed and the thread pool is
1520
+ * ready for a new task.
1521
+ *
1522
+ * @note If multiple threads call this function with the same thread pool,
1523
+ * the calls are serialized.
1524
+ *
1525
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1526
+ * is NULL, all items are processed serially on the calling thread.
1527
+ * @param functor the functor to call for each tile.
1528
+ * @param range the number of items on the 1D grid to process.
1529
+ * @param tile the maximum number of items on the 1D grid to process in
1530
+ * one functor call.
1531
+ * @param flags a bitwise combination of zero or more optional flags
1532
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1533
+ */
1534
+ template <class T>
1535
+ inline void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
1536
+ const T &functor, size_t range,
1537
+ size_t tile,
1538
+ uint32_t flags = 0) {
1539
+ pthreadpool_parallelize_1d_tile_1d(
1540
+ threadpool, &libpthreadpool::detail::call_wrapper_1d_tile_1d<const T>,
1541
+ const_cast<void *>(static_cast<const void *>(&functor)), range, tile,
1542
+ flags);
1543
+ }
1544
+
1545
+ /**
1546
+ * Process items on a 2D grid.
1547
+ *
1548
+ * The function implements a parallel version of the following snippet:
1549
+ *
1550
+ * for (size_t i = 0; i < range_i; i++)
1551
+ * for (size_t j = 0; j < range_j; j++)
1552
+ * functor(i, j);
1553
+ *
1554
+ * When the function returns, all items have been processed and the thread pool
1555
+ * is ready for a new task.
1556
+ *
1557
+ * @note If multiple threads call this function with the same thread pool, the
1558
+ * calls are serialized.
1559
+ *
1560
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1561
+ * is NULL, all items are processed serially on the calling thread.
1562
+ * @param functor the functor to call for each item.
1563
+ * @param range_i the number of items to process along the first dimension
1564
+ * of the 2D grid.
1565
+ * @param range_j the number of items to process along the second dimension
1566
+ * of the 2D grid.
1567
+ * @param flags a bitwise combination of zero or more optional flags
1568
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1569
+ */
1570
+ template <class T>
1571
+ inline void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
1572
+ const T &functor, size_t range_i,
1573
+ size_t range_j, uint32_t flags = 0) {
1574
+ pthreadpool_parallelize_2d(
1575
+ threadpool, &libpthreadpool::detail::call_wrapper_2d<const T>,
1576
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1577
+ flags);
1578
+ }
1579
+
1580
+ /**
1581
+ * Process items on a 2D grid with the specified maximum tile size along the
1582
+ * last grid dimension.
1583
+ *
1584
+ * The function implements a parallel version of the following snippet:
1585
+ *
1586
+ * for (size_t i = 0; i < range_i; i++)
1587
+ * for (size_t j = 0; j < range_j; j += tile_j)
1588
+ * functor(i, j, min(range_j - j, tile_j));
1589
+ *
1590
+ * When the function returns, all items have been processed and the thread pool
1591
+ * is ready for a new task.
1592
+ *
1593
+ * @note If multiple threads call this function with the same thread pool, the
1594
+ * calls are serialized.
1595
+ *
1596
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1597
+ * is NULL, all items are processed serially on the calling thread.
1598
+ * @param functor the functor to call for each tile.
1599
+ * @param range_i the number of items to process along the first dimension
1600
+ * of the 2D grid.
1601
+ * @param range_j the number of items to process along the second dimension
1602
+ * of the 2D grid.
1603
+ * @param tile_j the maximum number of items along the second dimension of
1604
+ * the 2D grid to process in one functor call.
1605
+ * @param flags a bitwise combination of zero or more optional flags
1606
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1607
+ */
1608
+ template <class T>
1609
+ inline void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
1610
+ const T &functor, size_t range_i,
1611
+ size_t range_j, size_t tile_j,
1612
+ uint32_t flags = 0) {
1613
+ pthreadpool_parallelize_2d_tile_1d(
1614
+ threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_1d<const T>,
1615
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1616
+ tile_j, flags);
1617
+ }
1618
+
1619
+ /**
1620
+ * Process items on a 2D grid with the specified maximum tile size along each
1621
+ * grid dimension.
1622
+ *
1623
+ * The function implements a parallel version of the following snippet:
1624
+ *
1625
+ * for (size_t i = 0; i < range_i; i += tile_i)
1626
+ * for (size_t j = 0; j < range_j; j += tile_j)
1627
+ * functor(i, j,
1628
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
1629
+ *
1630
+ * When the function returns, all items have been processed and the thread pool
1631
+ * is ready for a new task.
1632
+ *
1633
+ * @note If multiple threads call this function with the same thread pool, the
1634
+ * calls are serialized.
1635
+ *
1636
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1637
+ * is NULL, all items are processed serially on the calling thread.
1638
+ * @param functor the functor to call for each tile.
1639
+ * @param range_i the number of items to process along the first dimension
1640
+ * of the 2D grid.
1641
+ * @param range_j the number of items to process along the second dimension
1642
+ * of the 2D grid.
1643
+ * @param tile_j the maximum number of items along the first dimension of
1644
+ * the 2D grid to process in one functor call.
1645
+ * @param tile_j the maximum number of items along the second dimension of
1646
+ * the 2D grid to process in one functor call.
1647
+ * @param flags a bitwise combination of zero or more optional flags
1648
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1649
+ */
1650
+ template <class T>
1651
+ inline void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
1652
+ const T &functor, size_t range_i,
1653
+ size_t range_j, size_t tile_i,
1654
+ size_t tile_j,
1655
+ uint32_t flags = 0) {
1656
+ pthreadpool_parallelize_2d_tile_2d(
1657
+ threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_2d<const T>,
1658
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1659
+ tile_i, tile_j, flags);
1660
+ }
1661
+
1662
+ /**
1663
+ * Process items on a 3D grid.
1664
+ *
1665
+ * The function implements a parallel version of the following snippet:
1666
+ *
1667
+ * for (size_t i = 0; i < range_i; i++)
1668
+ * for (size_t j = 0; j < range_j; j++)
1669
+ * for (size_t k = 0; k < range_k; k++)
1670
+ * functor(i, j, k);
1671
+ *
1672
+ * When the function returns, all items have been processed and the thread pool
1673
+ * is ready for a new task.
1674
+ *
1675
+ * @note If multiple threads call this function with the same thread pool, the
1676
+ * calls are serialized.
1677
+ *
1678
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1679
+ * is NULL, all items are processed serially on the calling thread.
1680
+ * @param functor the functor to call for each tile.
1681
+ * @param range_i the number of items to process along the first dimension
1682
+ * of the 3D grid.
1683
+ * @param range_j the number of items to process along the second dimension
1684
+ * of the 3D grid.
1685
+ * @param range_k the number of items to process along the third dimension
1686
+ * of the 3D grid.
1687
+ * @param flags a bitwise combination of zero or more optional flags
1688
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1689
+ */
1690
+ template <class T>
1691
+ inline void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
1692
+ const T &functor, size_t range_i,
1693
+ size_t range_j, size_t range_k,
1694
+ uint32_t flags = 0) {
1695
+ pthreadpool_parallelize_3d(
1696
+ threadpool, &libpthreadpool::detail::call_wrapper_3d<const T>,
1697
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1698
+ range_k, flags);
1699
+ }
1700
+
1701
+ /**
1702
+ * Process items on a 3D grid with the specified maximum tile size along the
1703
+ * last grid dimension.
1704
+ *
1705
+ * The function implements a parallel version of the following snippet:
1706
+ *
1707
+ * for (size_t i = 0; i < range_i; i++)
1708
+ * for (size_t j = 0; j < range_j; j++)
1709
+ * for (size_t k = 0; k < range_k; k += tile_k)
1710
+ * functor(i, j, k, min(range_k - k, tile_k));
1711
+ *
1712
+ * When the function returns, all items have been processed and the thread pool
1713
+ * is ready for a new task.
1714
+ *
1715
+ * @note If multiple threads call this function with the same thread pool, the
1716
+ * calls are serialized.
1717
+ *
1718
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1719
+ * is NULL, all items are processed serially on the calling thread.
1720
+ * @param functor the functor to call for each tile.
1721
+ * @param range_i the number of items to process along the first dimension
1722
+ * of the 3D grid.
1723
+ * @param range_j the number of items to process along the second dimension
1724
+ * of the 3D grid.
1725
+ * @param range_k the number of items to process along the third dimension
1726
+ * of the 3D grid.
1727
+ * @param tile_k the maximum number of items along the third dimension of
1728
+ * the 3D grid to process in one functor call.
1729
+ * @param flags a bitwise combination of zero or more optional flags
1730
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1731
+ */
1732
+ template <class T>
1733
+ inline void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
1734
+ const T &functor, size_t range_i,
1735
+ size_t range_j, size_t range_k,
1736
+ size_t tile_k,
1737
+ uint32_t flags = 0) {
1738
+ pthreadpool_parallelize_3d_tile_1d(
1739
+ threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_1d<const T>,
1740
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1741
+ range_k, tile_k, flags);
1742
+ }
1743
+
1744
+ /**
1745
+ * Process items on a 3D grid with the specified maximum tile size along the
1746
+ * last two grid dimensions.
1747
+ *
1748
+ * The function implements a parallel version of the following snippet:
1749
+ *
1750
+ * for (size_t i = 0; i < range_i; i++)
1751
+ * for (size_t j = 0; j < range_j; j += tile_j)
1752
+ * for (size_t k = 0; k < range_k; k += tile_k)
1753
+ * functor(i, j, k,
1754
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
1755
+ *
1756
+ * When the function returns, all items have been processed and the thread pool
1757
+ * is ready for a new task.
1758
+ *
1759
+ * @note If multiple threads call this function with the same thread pool, the
1760
+ * calls are serialized.
1761
+ *
1762
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1763
+ * is NULL, all items are processed serially on the calling thread.
1764
+ * @param functor the functor to call for each tile.
1765
+ * @param range_i the number of items to process along the first dimension
1766
+ * of the 3D grid.
1767
+ * @param range_j the number of items to process along the second dimension
1768
+ * of the 3D grid.
1769
+ * @param range_k the number of items to process along the third dimension
1770
+ * of the 3D grid.
1771
+ * @param tile_j the maximum number of items along the second dimension of
1772
+ * the 3D grid to process in one functor call.
1773
+ * @param tile_k the maximum number of items along the third dimension of
1774
+ * the 3D grid to process in one functor call.
1775
+ * @param flags a bitwise combination of zero or more optional flags
1776
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1777
+ */
1778
+ template <class T>
1779
+ inline void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
1780
+ const T &functor, size_t range_i,
1781
+ size_t range_j, size_t range_k,
1782
+ size_t tile_j, size_t tile_k,
1783
+ uint32_t flags = 0) {
1784
+ pthreadpool_parallelize_3d_tile_2d(
1785
+ threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_2d<const T>,
1786
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1787
+ range_k, tile_j, tile_k, flags);
1788
+ }
1789
+
1790
+ /**
1791
+ * Process items on a 4D grid.
1792
+ *
1793
+ * The function implements a parallel version of the following snippet:
1794
+ *
1795
+ * for (size_t i = 0; i < range_i; i++)
1796
+ * for (size_t j = 0; j < range_j; j++)
1797
+ * for (size_t k = 0; k < range_k; k++)
1798
+ * for (size_t l = 0; l < range_l; l++)
1799
+ * functor(i, j, k, l);
1800
+ *
1801
+ * When the function returns, all items have been processed and the thread pool
1802
+ * is ready for a new task.
1803
+ *
1804
+ * @note If multiple threads call this function with the same thread pool, the
1805
+ * calls are serialized.
1806
+ *
1807
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1808
+ * is NULL, all items are processed serially on the calling thread.
1809
+ * @param functor the functor to call for each tile.
1810
+ * @param range_i the number of items to process along the first dimension
1811
+ * of the 4D grid.
1812
+ * @param range_j the number of items to process along the second dimension
1813
+ * of the 4D grid.
1814
+ * @param range_k the number of items to process along the third dimension
1815
+ * of the 4D grid.
1816
+ * @param range_l the number of items to process along the fourth dimension
1817
+ * of the 4D grid.
1818
+ * @param flags a bitwise combination of zero or more optional flags
1819
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1820
+ */
1821
+ template <class T>
1822
+ inline void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
1823
+ const T &functor, size_t range_i,
1824
+ size_t range_j, size_t range_k,
1825
+ size_t range_l, uint32_t flags = 0) {
1826
+ pthreadpool_parallelize_4d(
1827
+ threadpool, &libpthreadpool::detail::call_wrapper_4d<const T>,
1828
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1829
+ range_k, range_l, flags);
1830
+ }
1831
+
1832
+ /**
1833
+ * Process items on a 4D grid with the specified maximum tile size along the
1834
+ * last grid dimension.
1835
+ *
1836
+ * The function implements a parallel version of the following snippet:
1837
+ *
1838
+ * for (size_t i = 0; i < range_i; i++)
1839
+ * for (size_t j = 0; j < range_j; j++)
1840
+ * for (size_t k = 0; k < range_k; k++)
1841
+ * for (size_t l = 0; l < range_l; l += tile_l)
1842
+ * functor(i, j, k, l, min(range_l - l, tile_l));
1843
+ *
1844
+ * When the function returns, all items have been processed and the thread pool
1845
+ * is ready for a new task.
1846
+ *
1847
+ * @note If multiple threads call this function with the same thread pool, the
1848
+ * calls are serialized.
1849
+ *
1850
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1851
+ * is NULL, all items are processed serially on the calling thread.
1852
+ * @param functor the functor to call for each tile.
1853
+ * @param range_i the number of items to process along the first dimension
1854
+ * of the 4D grid.
1855
+ * @param range_j the number of items to process along the second dimension
1856
+ * of the 4D grid.
1857
+ * @param range_k the number of items to process along the third dimension
1858
+ * of the 4D grid.
1859
+ * @param range_l the number of items to process along the fourth dimension
1860
+ * of the 4D grid.
1861
+ * @param tile_l the maximum number of items along the fourth dimension of
1862
+ * the 4D grid to process in one functor call.
1863
+ * @param flags a bitwise combination of zero or more optional flags
1864
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1865
+ */
1866
+ template <class T>
1867
+ inline void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
1868
+ const T &functor, size_t range_i,
1869
+ size_t range_j, size_t range_k,
1870
+ size_t range_l, size_t tile_l,
1871
+ uint32_t flags = 0) {
1872
+ pthreadpool_parallelize_4d_tile_1d(
1873
+ threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_1d<const T>,
1874
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1875
+ range_k, range_l, tile_l, flags);
1876
+ }
1877
+
1878
+ /**
1879
+ * Process items on a 4D grid with the specified maximum tile size along the
1880
+ * last two grid dimensions.
1881
+ *
1882
+ * The function implements a parallel version of the following snippet:
1883
+ *
1884
+ * for (size_t i = 0; i < range_i; i++)
1885
+ * for (size_t j = 0; j < range_j; j++)
1886
+ * for (size_t k = 0; k < range_k; k += tile_k)
1887
+ * for (size_t l = 0; l < range_l; l += tile_l)
1888
+ * functor(i, j, k, l,
1889
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
1890
+ *
1891
+ * When the function returns, all items have been processed and the thread pool
1892
+ * is ready for a new task.
1893
+ *
1894
+ * @note If multiple threads call this function with the same thread pool, the
1895
+ * calls are serialized.
1896
+ *
1897
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1898
+ * is NULL, all items are processed serially on the calling thread.
1899
+ * @param functor the functor to call for each tile.
1900
+ * @param range_i the number of items to process along the first dimension
1901
+ * of the 4D grid.
1902
+ * @param range_j the number of items to process along the second dimension
1903
+ * of the 4D grid.
1904
+ * @param range_k the number of items to process along the third dimension
1905
+ * of the 4D grid.
1906
+ * @param range_l the number of items to process along the fourth dimension
1907
+ * of the 4D grid.
1908
+ * @param tile_k the maximum number of items along the third dimension of
1909
+ * the 4D grid to process in one functor call.
1910
+ * @param tile_l the maximum number of items along the fourth dimension of
1911
+ * the 4D grid to process in one functor call.
1912
+ * @param flags a bitwise combination of zero or more optional flags
1913
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1914
+ */
1915
+ template <class T>
1916
+ inline void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
1917
+ const T &functor, size_t range_i,
1918
+ size_t range_j, size_t range_k,
1919
+ size_t range_l, size_t tile_k,
1920
+ size_t tile_l,
1921
+ uint32_t flags = 0) {
1922
+ pthreadpool_parallelize_4d_tile_2d(
1923
+ threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_2d<const T>,
1924
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1925
+ range_k, range_l, tile_k, tile_l, flags);
1926
+ }
1927
+
1928
+ /**
1929
+ * Process items on a 5D grid.
1930
+ *
1931
+ * The function implements a parallel version of the following snippet:
1932
+ *
1933
+ * for (size_t i = 0; i < range_i; i++)
1934
+ * for (size_t j = 0; j < range_j; j++)
1935
+ * for (size_t k = 0; k < range_k; k++)
1936
+ * for (size_t l = 0; l < range_l; l++)
1937
+ * for (size_t m = 0; m < range_m; m++)
1938
+ * functor(i, j, k, l, m);
1939
+ *
1940
+ * When the function returns, all items have been processed and the thread pool
1941
+ * is ready for a new task.
1942
+ *
1943
+ * @note If multiple threads call this function with the same thread pool, the
1944
+ * calls are serialized.
1945
+ *
1946
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1947
+ * is NULL, all items are processed serially on the calling thread.
1948
+ * @param functor the functor to call for each tile.
1949
+ * @param range_i the number of items to process along the first dimension
1950
+ * of the 5D grid.
1951
+ * @param range_j the number of items to process along the second dimension
1952
+ * of the 5D grid.
1953
+ * @param range_k the number of items to process along the third dimension
1954
+ * of the 5D grid.
1955
+ * @param range_l the number of items to process along the fourth dimension
1956
+ * of the 5D grid.
1957
+ * @param range_m the number of items to process along the fifth dimension
1958
+ * of the 5D grid.
1959
+ * @param flags a bitwise combination of zero or more optional flags
1960
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1961
+ */
1962
+ template <class T>
1963
+ inline void
1964
+ pthreadpool_parallelize_5d(pthreadpool_t threadpool, const T &functor,
1965
+ size_t range_i, size_t range_j, size_t range_k,
1966
+ size_t range_l, size_t range_m, uint32_t flags = 0) {
1967
+ pthreadpool_parallelize_5d(
1968
+ threadpool, &libpthreadpool::detail::call_wrapper_5d<const T>,
1969
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1970
+ range_k, range_l, range_m, flags);
1971
+ }
1972
+
1973
+ /**
1974
+ * Process items on a 5D grid with the specified maximum tile size along the
1975
+ * last grid dimension.
1976
+ *
1977
+ * The function implements a parallel version of the following snippet:
1978
+ *
1979
+ * for (size_t i = 0; i < range_i; i++)
1980
+ * for (size_t j = 0; j < range_j; j++)
1981
+ * for (size_t k = 0; k < range_k; k++)
1982
+ * for (size_t l = 0; l < range_l; l++)
1983
+ * for (size_t m = 0; m < range_m; m += tile_m)
1984
+ * functor(i, j, k, l, m, min(range_m - m, tile_m));
1985
+ *
1986
+ * When the function returns, all items have been processed and the thread pool
1987
+ * is ready for a new task.
1988
+ *
1989
+ * @note If multiple threads call this function with the same thread pool, the
1990
+ * calls are serialized.
1991
+ *
1992
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1993
+ * is NULL, all items are processed serially on the calling thread.
1994
+ * @param functor the functor to call for each tile.
1995
+ * @param range_i the number of items to process along the first dimension
1996
+ * of the 5D grid.
1997
+ * @param range_j the number of items to process along the second dimension
1998
+ * of the 5D grid.
1999
+ * @param range_k the number of items to process along the third dimension
2000
+ * of the 5D grid.
2001
+ * @param range_l the number of items to process along the fourth dimension
2002
+ * of the 5D grid.
2003
+ * @param range_m the number of items to process along the fifth dimension
2004
+ * of the 5D grid.
2005
+ * @param tile_m the maximum number of items along the fifth dimension of
2006
+ * the 5D grid to process in one functor call.
2007
+ * @param flags a bitwise combination of zero or more optional flags
2008
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2009
+ */
2010
+ template <class T>
2011
+ inline void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
2012
+ const T &functor, size_t range_i,
2013
+ size_t range_j, size_t range_k,
2014
+ size_t range_l, size_t range_m,
2015
+ size_t tile_m,
2016
+ uint32_t flags = 0) {
2017
+ pthreadpool_parallelize_5d_tile_1d(
2018
+ threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_1d<const T>,
2019
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2020
+ range_k, range_l, range_m, tile_m, flags);
2021
+ }
2022
+
2023
+ /**
2024
+ * Process items on a 5D grid with the specified maximum tile size along the
2025
+ * last two grid dimensions.
2026
+ *
2027
+ * The function implements a parallel version of the following snippet:
2028
+ *
2029
+ * for (size_t i = 0; i < range_i; i++)
2030
+ * for (size_t j = 0; j < range_j; j++)
2031
+ * for (size_t k = 0; k < range_k; k++)
2032
+ * for (size_t l = 0; l < range_l; l += tile_l)
2033
+ * for (size_t m = 0; m < range_m; m += tile_m)
2034
+ * functor(i, j, k, l, m,
2035
+ * min(range_l - l, tile_l), min(range_m - m, tile_m));
2036
+ *
2037
+ * When the function returns, all items have been processed and the thread pool
2038
+ * is ready for a new task.
2039
+ *
2040
+ * @note If multiple threads call this function with the same thread pool, the
2041
+ * calls are serialized.
2042
+ *
2043
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2044
+ * is NULL, all items are processed serially on the calling thread.
2045
+ * @param functor the functor to call for each tile.
2046
+ * @param range_i the number of items to process along the first dimension
2047
+ * of the 5D grid.
2048
+ * @param range_j the number of items to process along the second dimension
2049
+ * of the 5D grid.
2050
+ * @param range_k the number of items to process along the third dimension
2051
+ * of the 5D grid.
2052
+ * @param range_l the number of items to process along the fourth dimension
2053
+ * of the 5D grid.
2054
+ * @param range_m the number of items to process along the fifth dimension
2055
+ * of the 5D grid.
2056
+ * @param tile_l the maximum number of items along the fourth dimension of
2057
+ * the 5D grid to process in one functor call.
2058
+ * @param tile_m the maximum number of items along the fifth dimension of
2059
+ * the 5D grid to process in one functor call.
2060
+ * @param flags a bitwise combination of zero or more optional flags
2061
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2062
+ */
2063
+ template <class T>
2064
+ inline void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
2065
+ const T &functor, size_t range_i,
2066
+ size_t range_j, size_t range_k,
2067
+ size_t range_l, size_t range_m,
2068
+ size_t tile_l, size_t tile_m,
2069
+ uint32_t flags = 0) {
2070
+ pthreadpool_parallelize_5d_tile_2d(
2071
+ threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_2d<const T>,
2072
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2073
+ range_k, range_l, range_m, tile_l, tile_m, flags);
2074
+ }
2075
+
2076
+ /**
2077
+ * Process items on a 6D grid.
2078
+ *
2079
+ * The function implements a parallel version of the following snippet:
2080
+ *
2081
+ * for (size_t i = 0; i < range_i; i++)
2082
+ * for (size_t j = 0; j < range_j; j++)
2083
+ * for (size_t k = 0; k < range_k; k++)
2084
+ * for (size_t l = 0; l < range_l; l++)
2085
+ * for (size_t m = 0; m < range_m; m++)
2086
+ * for (size_t n = 0; n < range_n; n++)
2087
+ * functor(i, j, k, l, m, n);
2088
+ *
2089
+ * When the function returns, all items have been processed and the thread pool
2090
+ * is ready for a new task.
2091
+ *
2092
+ * @note If multiple threads call this function with the same thread pool, the
2093
+ * calls are serialized.
2094
+ *
2095
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2096
+ * is NULL, all items are processed serially on the calling thread.
2097
+ * @param functor the functor to call for each tile.
2098
+ * @param range_i the number of items to process along the first dimension
2099
+ * of the 6D grid.
2100
+ * @param range_j the number of items to process along the second dimension
2101
+ * of the 6D grid.
2102
+ * @param range_k the number of items to process along the third dimension
2103
+ * of the 6D grid.
2104
+ * @param range_l the number of items to process along the fourth dimension
2105
+ * of the 6D grid.
2106
+ * @param range_m the number of items to process along the fifth dimension
2107
+ * of the 6D grid.
2108
+ * @param range_n the number of items to process along the sixth dimension
2109
+ * of the 6D grid.
2110
+ * @param tile_n the maximum number of items along the sixth dimension of
2111
+ * the 6D grid to process in one functor call.
2112
+ * @param flags a bitwise combination of zero or more optional flags
2113
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2114
+ */
2115
+ template <class T>
2116
+ inline void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
2117
+ const T &functor, size_t range_i,
2118
+ size_t range_j, size_t range_k,
2119
+ size_t range_l, size_t range_m,
2120
+ size_t range_n, uint32_t flags = 0) {
2121
+ pthreadpool_parallelize_6d(
2122
+ threadpool, &libpthreadpool::detail::call_wrapper_6d<const T>,
2123
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2124
+ range_k, range_l, range_m, range_n, flags);
2125
+ }
2126
+
2127
+ /**
2128
+ * Process items on a 6D grid with the specified maximum tile size along the
2129
+ * last grid dimension.
2130
+ *
2131
+ * The function implements a parallel version of the following snippet:
2132
+ *
2133
+ * for (size_t i = 0; i < range_i; i++)
2134
+ * for (size_t j = 0; j < range_j; j++)
2135
+ * for (size_t k = 0; k < range_k; k++)
2136
+ * for (size_t l = 0; l < range_l; l++)
2137
+ * for (size_t m = 0; m < range_m; m++)
2138
+ * for (size_t n = 0; n < range_n; n += tile_n)
2139
+ * functor(i, j, k, l, m, n, min(range_n - n, tile_n));
2140
+ *
2141
+ * When the function returns, all items have been processed and the thread pool
2142
+ * is ready for a new task.
2143
+ *
2144
+ * @note If multiple threads call this function with the same thread pool, the
2145
+ * calls are serialized.
2146
+ *
2147
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2148
+ * is NULL, all items are processed serially on the calling thread.
2149
+ * @param functor the functor to call for each tile.
2150
+ * @param range_i the number of items to process along the first dimension
2151
+ * of the 6D grid.
2152
+ * @param range_j the number of items to process along the second dimension
2153
+ * of the 6D grid.
2154
+ * @param range_k the number of items to process along the third dimension
2155
+ * of the 6D grid.
2156
+ * @param range_l the number of items to process along the fourth dimension
2157
+ * of the 6D grid.
2158
+ * @param range_m the number of items to process along the fifth dimension
2159
+ * of the 6D grid.
2160
+ * @param range_n the number of items to process along the sixth dimension
2161
+ * of the 6D grid.
2162
+ * @param tile_n the maximum number of items along the sixth dimension of
2163
+ * the 6D grid to process in one functor call.
2164
+ * @param flags a bitwise combination of zero or more optional flags
2165
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2166
+ */
2167
+ template <class T>
2168
+ inline void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
2169
+ const T &functor, size_t range_i,
2170
+ size_t range_j, size_t range_k,
2171
+ size_t range_l, size_t range_m,
2172
+ size_t range_n, size_t tile_n,
2173
+ uint32_t flags = 0) {
2174
+ pthreadpool_parallelize_6d_tile_1d(
2175
+ threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_1d<const T>,
2176
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2177
+ range_k, range_l, range_m, range_n, tile_n, flags);
2178
+ }
2179
+
2180
+ /**
2181
+ * Process items on a 6D grid with the specified maximum tile size along the
2182
+ * last two grid dimensions.
2183
+ *
2184
+ * The function implements a parallel version of the following snippet:
2185
+ *
2186
+ * for (size_t i = 0; i < range_i; i++)
2187
+ * for (size_t j = 0; j < range_j; j++)
2188
+ * for (size_t k = 0; k < range_k; k++)
2189
+ * for (size_t l = 0; l < range_l; l++)
2190
+ * for (size_t m = 0; m < range_m; m += tile_m)
2191
+ * for (size_t n = 0; n < range_n; n += tile_n)
2192
+ * functor(i, j, k, l, m, n,
2193
+ * min(range_m - m, tile_m), min(range_n - n, tile_n));
2194
+ *
2195
+ * When the function returns, all items have been processed and the thread pool
2196
+ * is ready for a new task.
2197
+ *
2198
+ * @note If multiple threads call this function with the same thread pool, the
2199
+ * calls are serialized.
2200
+ *
2201
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2202
+ * is NULL, all items are processed serially on the calling thread.
2203
+ * @param functor the functor to call for each tile.
2204
+ * @param range_i the number of items to process along the first dimension
2205
+ * of the 6D grid.
2206
+ * @param range_j the number of items to process along the second dimension
2207
+ * of the 6D grid.
2208
+ * @param range_k the number of items to process along the third dimension
2209
+ * of the 6D grid.
2210
+ * @param range_l the number of items to process along the fourth dimension
2211
+ * of the 6D grid.
2212
+ * @param range_m the number of items to process along the fifth dimension
2213
+ * of the 6D grid.
2214
+ * @param range_n the number of items to process along the sixth dimension
2215
+ * of the 6D grid.
2216
+ * @param tile_m the maximum number of items along the fifth dimension of
2217
+ * the 6D grid to process in one functor call.
2218
+ * @param tile_n the maximum number of items along the sixth dimension of
2219
+ * the 6D grid to process in one functor call.
2220
+ * @param flags a bitwise combination of zero or more optional flags
2221
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2222
+ */
2223
+ template <class T>
2224
+ inline void pthreadpool_parallelize_6d_tile_2d(
2225
+ pthreadpool_t threadpool, const T &functor, size_t range_i, size_t range_j,
2226
+ size_t range_k, size_t range_l, size_t range_m, size_t range_n,
2227
+ size_t tile_m, size_t tile_n, uint32_t flags = 0) {
2228
+ pthreadpool_parallelize_6d_tile_2d(
2229
+ threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_2d<const T>,
2230
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2231
+ range_k, range_l, range_m, range_n, tile_m, tile_n, flags);
2232
+ }
2233
+
2234
+ #endif /* __cplusplus */
2235
+
2236
+ #endif /* PTHREADPOOL_H_ */