react-native-executorch 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (595) hide show
  1. package/android/CMakeLists.txt +24 -0
  2. package/android/build.gradle +1 -0
  3. package/android/src/main/cpp/CMakeLists.txt +25 -0
  4. package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +1 -13
  5. package/common/rnexecutorch/RnExecutorchInstaller.cpp +52 -18
  6. package/common/rnexecutorch/RnExecutorchInstaller.h +0 -25
  7. package/common/rnexecutorch/TokenizerModule.cpp +1 -1
  8. package/common/rnexecutorch/TokenizerModule.h +4 -1
  9. package/common/rnexecutorch/data_processing/FileUtils.h +2 -2
  10. package/common/rnexecutorch/data_processing/ImageProcessing.cpp +5 -5
  11. package/common/rnexecutorch/data_processing/ImageProcessing.h +2 -2
  12. package/common/rnexecutorch/data_processing/Numerical.cpp +13 -0
  13. package/common/rnexecutorch/host_objects/JsiConversions.h +43 -62
  14. package/common/rnexecutorch/host_objects/ModelHostObject.h +43 -24
  15. package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +8 -6
  16. package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +1 -1
  17. package/common/rnexecutorch/models/BaseModel.cpp +2 -2
  18. package/common/rnexecutorch/models/BaseModel.h +5 -0
  19. package/common/rnexecutorch/models/EncoderDecoderBase.cpp +2 -2
  20. package/common/rnexecutorch/models/EncoderDecoderBase.h +2 -2
  21. package/common/rnexecutorch/models/classification/Classification.cpp +6 -6
  22. package/common/rnexecutorch/models/classification/Classification.h +5 -0
  23. package/common/rnexecutorch/models/classification/Constants.h +3 -3
  24. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +2 -2
  25. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +2 -2
  26. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +3 -3
  27. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +5 -0
  28. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +2 -2
  29. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +6 -1
  30. package/common/rnexecutorch/models/image_segmentation/Constants.h +3 -3
  31. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +6 -5
  32. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +8 -1
  33. package/common/rnexecutorch/models/llm/LLM.cpp +58 -0
  34. package/common/rnexecutorch/models/llm/LLM.h +35 -0
  35. package/common/rnexecutorch/models/object_detection/Constants.h +3 -3
  36. package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +8 -8
  37. package/common/rnexecutorch/models/object_detection/ObjectDetection.h +11 -5
  38. package/common/rnexecutorch/models/object_detection/Types.h +13 -0
  39. package/common/rnexecutorch/models/object_detection/Utils.cpp +13 -11
  40. package/common/rnexecutorch/models/object_detection/Utils.h +7 -13
  41. package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +2 -2
  42. package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +2 -2
  43. package/common/rnexecutorch/models/ocr/Constants.h +33 -26
  44. package/common/rnexecutorch/models/ocr/Detector.cpp +20 -22
  45. package/common/rnexecutorch/models/ocr/Detector.h +4 -4
  46. package/common/rnexecutorch/models/ocr/OCR.cpp +9 -8
  47. package/common/rnexecutorch/models/ocr/OCR.h +11 -3
  48. package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +20 -19
  49. package/common/rnexecutorch/models/ocr/RecognitionHandler.h +9 -7
  50. package/common/rnexecutorch/models/ocr/Recognizer.cpp +7 -7
  51. package/common/rnexecutorch/models/ocr/Recognizer.h +2 -2
  52. package/common/rnexecutorch/models/ocr/Types.h +4 -6
  53. package/common/rnexecutorch/models/ocr/{DetectorUtils.cpp → utils/DetectorUtils.cpp} +70 -63
  54. package/common/rnexecutorch/models/ocr/{DetectorUtils.h → utils/DetectorUtils.h} +12 -11
  55. package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.cpp → utils/RecognitionHandlerUtils.cpp} +14 -11
  56. package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.h → utils/RecognitionHandlerUtils.h} +5 -5
  57. package/common/rnexecutorch/models/ocr/{RecognizerUtils.cpp → utils/RecognizerUtils.cpp} +28 -26
  58. package/common/rnexecutorch/models/ocr/{RecognizerUtils.h → utils/RecognizerUtils.h} +15 -14
  59. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +2 -2
  60. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +9 -2
  61. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +2 -2
  62. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +2 -2
  63. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +2 -2
  64. package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +5 -5
  65. package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +6 -0
  66. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +23 -22
  67. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +4 -4
  68. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +34 -34
  69. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +27 -20
  70. package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.cpp +3 -2
  71. package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.h +3 -2
  72. package/ios/libs/executorch/libbackend_coreml_ios.a +0 -0
  73. package/ios/libs/executorch/libbackend_coreml_simulator.a +0 -0
  74. package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64 → ios/libs/executorch}/libbackend_mps_ios.a +0 -0
  75. package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator → ios/libs/executorch}/libbackend_mps_simulator.a +0 -0
  76. package/ios/libs/executorch/libbackend_xnnpack_ios.a +0 -0
  77. package/ios/libs/executorch/libbackend_xnnpack_simulator.a +0 -0
  78. package/ios/libs/executorch/libexecutorch_ios.a +0 -0
  79. package/ios/libs/executorch/libexecutorch_simulator.a +0 -0
  80. package/ios/libs/executorch/libkernels_custom_ios.a +0 -0
  81. package/ios/libs/executorch/libkernels_custom_simulator.a +0 -0
  82. package/ios/libs/executorch/libkernels_optimized_ios.a +0 -0
  83. package/ios/libs/executorch/libkernels_optimized_simulator.a +0 -0
  84. package/ios/libs/executorch/libkernels_portable_ios.a +0 -0
  85. package/ios/libs/executorch/libkernels_portable_simulator.a +0 -0
  86. package/ios/libs/executorch/libkernels_quantized_ios.a +0 -0
  87. package/ios/libs/executorch/libkernels_quantized_simulator.a +0 -0
  88. package/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
  89. package/ios/{ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a} +0 -0
  90. package/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
  91. package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
  92. package/ios/{ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a} +0 -0
  93. package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
  94. package/lib/Error.js +53 -0
  95. package/lib/ThreadPool.d.ts +10 -0
  96. package/lib/ThreadPool.js +28 -0
  97. package/lib/common/Logger.d.ts +8 -0
  98. package/lib/common/Logger.js +19 -0
  99. package/lib/constants/directories.js +2 -0
  100. package/lib/constants/llmDefaults.d.ts +6 -0
  101. package/lib/constants/llmDefaults.js +16 -0
  102. package/lib/constants/modelUrls.d.ts +223 -0
  103. package/lib/constants/modelUrls.js +322 -0
  104. package/lib/constants/ocr/models.d.ts +882 -0
  105. package/lib/constants/ocr/models.js +182 -0
  106. package/lib/constants/ocr/symbols.js +139 -0
  107. package/lib/constants/sttDefaults.d.ts +28 -0
  108. package/lib/constants/sttDefaults.js +68 -0
  109. package/lib/controllers/LLMController.d.ts +47 -0
  110. package/lib/controllers/LLMController.js +213 -0
  111. package/lib/controllers/OCRController.js +67 -0
  112. package/lib/controllers/SpeechToTextController.d.ts +56 -0
  113. package/lib/controllers/SpeechToTextController.js +349 -0
  114. package/lib/controllers/VerticalOCRController.js +70 -0
  115. package/lib/hooks/computer_vision/useClassification.d.ts +15 -0
  116. package/lib/hooks/computer_vision/useClassification.js +7 -0
  117. package/lib/hooks/computer_vision/useImageEmbeddings.d.ts +15 -0
  118. package/lib/hooks/computer_vision/useImageEmbeddings.js +7 -0
  119. package/lib/hooks/computer_vision/useImageSegmentation.d.ts +38 -0
  120. package/lib/hooks/computer_vision/useImageSegmentation.js +7 -0
  121. package/lib/hooks/computer_vision/useOCR.d.ts +20 -0
  122. package/lib/hooks/computer_vision/useOCR.js +41 -0
  123. package/lib/hooks/computer_vision/useObjectDetection.d.ts +15 -0
  124. package/lib/hooks/computer_vision/useObjectDetection.js +7 -0
  125. package/lib/hooks/computer_vision/useStyleTransfer.d.ts +15 -0
  126. package/lib/hooks/computer_vision/useStyleTransfer.js +7 -0
  127. package/lib/hooks/computer_vision/useVerticalOCR.d.ts +21 -0
  128. package/lib/hooks/computer_vision/useVerticalOCR.js +43 -0
  129. package/lib/hooks/general/useExecutorchModule.d.ts +13 -0
  130. package/lib/hooks/general/useExecutorchModule.js +7 -0
  131. package/lib/hooks/natural_language_processing/useLLM.d.ts +10 -0
  132. package/lib/hooks/natural_language_processing/useLLM.js +78 -0
  133. package/lib/hooks/natural_language_processing/useSpeechToText.d.ts +27 -0
  134. package/lib/hooks/natural_language_processing/useSpeechToText.js +49 -0
  135. package/lib/hooks/natural_language_processing/useTextEmbeddings.d.ts +16 -0
  136. package/lib/hooks/natural_language_processing/useTextEmbeddings.js +7 -0
  137. package/lib/hooks/natural_language_processing/useTokenizer.d.ts +17 -0
  138. package/lib/hooks/natural_language_processing/useTokenizer.js +52 -0
  139. package/lib/hooks/useModule.js +45 -0
  140. package/lib/hooks/useNonStaticModule.d.ts +20 -0
  141. package/lib/hooks/useNonStaticModule.js +49 -0
  142. package/lib/index.d.ts +48 -0
  143. package/lib/index.js +58 -0
  144. package/lib/module/controllers/LLMController.js +6 -10
  145. package/lib/module/controllers/LLMController.js.map +1 -1
  146. package/lib/module/hooks/computer_vision/useClassification.js +2 -2
  147. package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
  148. package/lib/module/hooks/computer_vision/useImageEmbeddings.js +2 -2
  149. package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -1
  150. package/lib/module/hooks/computer_vision/useImageSegmentation.js +2 -2
  151. package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
  152. package/lib/module/hooks/computer_vision/useObjectDetection.js +2 -2
  153. package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
  154. package/lib/module/hooks/computer_vision/useStyleTransfer.js +2 -2
  155. package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
  156. package/lib/module/hooks/general/useExecutorchModule.js +2 -2
  157. package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
  158. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +2 -2
  159. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
  160. package/lib/module/hooks/useModule.js +13 -9
  161. package/lib/module/hooks/useModule.js.map +1 -1
  162. package/lib/module/index.js +1 -1
  163. package/lib/module/index.js.map +1 -1
  164. package/lib/module/modules/BaseModule.js +9 -17
  165. package/lib/module/modules/BaseModule.js.map +1 -1
  166. package/lib/module/modules/computer_vision/ClassificationModule.js +2 -2
  167. package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
  168. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +2 -2
  169. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -1
  170. package/lib/module/modules/computer_vision/ImageSegmentationModule.js +2 -2
  171. package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
  172. package/lib/module/modules/computer_vision/ObjectDetectionModule.js +2 -2
  173. package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
  174. package/lib/module/modules/computer_vision/StyleTransferModule.js +2 -2
  175. package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
  176. package/lib/module/modules/general/ExecutorchModule.js +2 -2
  177. package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
  178. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +2 -2
  179. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
  180. package/lib/module/native/RnExecutorchModules.js +1 -2
  181. package/lib/module/native/RnExecutorchModules.js.map +1 -1
  182. package/lib/module/utils/ResourceFetcher.js +6 -8
  183. package/lib/module/utils/ResourceFetcher.js.map +1 -1
  184. package/lib/module/utils/ResourceFetcherUtils.js +20 -20
  185. package/lib/module/utils/ResourceFetcherUtils.js.map +1 -1
  186. package/lib/module/utils/SpeechToTextModule/ASR.js +3 -3
  187. package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -1
  188. package/lib/modules/BaseModule.js +25 -0
  189. package/lib/modules/BaseNonStaticModule.js +14 -0
  190. package/lib/modules/computer_vision/ClassificationModule.d.ts +8 -0
  191. package/lib/modules/computer_vision/ClassificationModule.js +17 -0
  192. package/lib/modules/computer_vision/ImageEmbeddingsModule.d.ts +8 -0
  193. package/lib/modules/computer_vision/ImageEmbeddingsModule.js +17 -0
  194. package/lib/modules/computer_vision/ImageSegmentationModule.d.ts +11 -0
  195. package/lib/modules/computer_vision/ImageSegmentationModule.js +27 -0
  196. package/lib/modules/computer_vision/OCRModule.d.ts +14 -0
  197. package/lib/modules/computer_vision/OCRModule.js +17 -0
  198. package/lib/modules/computer_vision/ObjectDetectionModule.d.ts +9 -0
  199. package/lib/modules/computer_vision/ObjectDetectionModule.js +17 -0
  200. package/lib/modules/computer_vision/StyleTransferModule.d.ts +8 -0
  201. package/lib/modules/computer_vision/StyleTransferModule.js +17 -0
  202. package/lib/modules/computer_vision/VerticalOCRModule.d.ts +14 -0
  203. package/lib/modules/computer_vision/VerticalOCRModule.js +19 -0
  204. package/lib/modules/general/ExecutorchModule.d.ts +7 -0
  205. package/lib/modules/general/ExecutorchModule.js +14 -0
  206. package/lib/modules/natural_language_processing/LLMModule.d.ts +28 -0
  207. package/lib/modules/natural_language_processing/LLMModule.js +45 -0
  208. package/lib/modules/natural_language_processing/SpeechToTextModule.d.ts +24 -0
  209. package/lib/modules/natural_language_processing/SpeechToTextModule.js +36 -0
  210. package/lib/modules/natural_language_processing/TextEmbeddingsModule.d.ts +9 -0
  211. package/lib/modules/natural_language_processing/TextEmbeddingsModule.js +21 -0
  212. package/lib/modules/natural_language_processing/TokenizerModule.d.ts +12 -0
  213. package/lib/modules/natural_language_processing/TokenizerModule.js +30 -0
  214. package/lib/native/NativeETInstaller.js +2 -0
  215. package/lib/native/NativeOCR.js +2 -0
  216. package/lib/native/NativeVerticalOCR.js +2 -0
  217. package/lib/native/RnExecutorchModules.d.ts +7 -0
  218. package/lib/native/RnExecutorchModules.js +18 -0
  219. package/lib/tsconfig.tsbuildinfo +1 -1
  220. package/lib/types/common.d.ts +32 -0
  221. package/lib/types/common.js +25 -0
  222. package/lib/types/imageSegmentation.js +26 -0
  223. package/lib/types/llm.d.ts +46 -0
  224. package/lib/types/llm.js +9 -0
  225. package/lib/types/objectDetection.js +94 -0
  226. package/lib/types/ocr.js +1 -0
  227. package/lib/types/stt.d.ts +94 -0
  228. package/lib/types/stt.js +85 -0
  229. package/lib/typescript/controllers/LLMController.d.ts +1 -1
  230. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  231. package/lib/typescript/hooks/useModule.d.ts +8 -5
  232. package/lib/typescript/hooks/useModule.d.ts.map +1 -1
  233. package/lib/typescript/index.d.ts +1 -0
  234. package/lib/typescript/index.d.ts.map +1 -1
  235. package/lib/typescript/modules/BaseModule.d.ts +7 -6
  236. package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
  237. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +2 -2
  238. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
  239. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +2 -2
  240. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -1
  241. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +2 -2
  242. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
  243. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +2 -2
  244. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
  245. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +2 -2
  246. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
  247. package/lib/typescript/modules/general/ExecutorchModule.d.ts +2 -2
  248. package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
  249. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +2 -2
  250. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
  251. package/lib/typescript/native/RnExecutorchModules.d.ts +1 -3
  252. package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
  253. package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
  254. package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -1
  255. package/lib/utils/ResourceFetcher.d.ts +24 -0
  256. package/lib/utils/ResourceFetcher.js +305 -0
  257. package/lib/utils/ResourceFetcherUtils.d.ts +54 -0
  258. package/lib/utils/ResourceFetcherUtils.js +127 -0
  259. package/lib/utils/llm.d.ts +6 -0
  260. package/lib/utils/llm.js +72 -0
  261. package/lib/utils/stt.js +21 -0
  262. package/package.json +1 -2
  263. package/react-native-executorch.podspec +47 -44
  264. package/src/controllers/LLMController.ts +8 -13
  265. package/src/hooks/computer_vision/useClassification.ts +2 -2
  266. package/src/hooks/computer_vision/useImageEmbeddings.ts +2 -2
  267. package/src/hooks/computer_vision/useImageSegmentation.ts +2 -2
  268. package/src/hooks/computer_vision/useObjectDetection.ts +2 -2
  269. package/src/hooks/computer_vision/useStyleTransfer.ts +2 -2
  270. package/src/hooks/general/useExecutorchModule.ts +2 -2
  271. package/src/hooks/natural_language_processing/useTextEmbeddings.ts +2 -2
  272. package/src/hooks/useModule.ts +23 -13
  273. package/src/index.ts +3 -2
  274. package/src/modules/BaseModule.ts +17 -28
  275. package/src/modules/computer_vision/ClassificationModule.ts +2 -2
  276. package/src/modules/computer_vision/ImageEmbeddingsModule.ts +2 -2
  277. package/src/modules/computer_vision/ImageSegmentationModule.ts +2 -2
  278. package/src/modules/computer_vision/ObjectDetectionModule.ts +2 -2
  279. package/src/modules/computer_vision/StyleTransferModule.ts +2 -2
  280. package/src/modules/general/ExecutorchModule.ts +2 -2
  281. package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +2 -2
  282. package/src/native/RnExecutorchModules.ts +1 -5
  283. package/src/utils/ResourceFetcher.ts +9 -7
  284. package/src/utils/ResourceFetcherUtils.ts +15 -17
  285. package/src/utils/SpeechToTextModule/ASR.ts +4 -4
  286. package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
  287. package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
  288. package/third-party/include/cpuinfo/cpuinfo.h +2305 -0
  289. package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +26 -0
  290. package/third-party/include/executorch/extension/threadpool/threadpool.h +94 -0
  291. package/third-party/include/pthreadpool/pthreadpool.h +2236 -0
  292. package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +0 -63
  293. package/ios/ExecutorchLib.xcframework/Info.plist +0 -43
  294. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/ETModel.h +0 -27
  295. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
  296. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
  297. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  298. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/ETModel.h +0 -27
  299. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
  300. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
  301. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  302. package/ios/RnExecutorch/LLM.h +0 -5
  303. package/ios/RnExecutorch/LLM.mm +0 -78
  304. package/ios/RnExecutorch.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -7
  305. package/ios/RnExecutorch.xcodeproj/project.xcworkspace/xcuserdata/jakubchmura.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  306. package/ios/RnExecutorch.xcodeproj/xcuserdata/jakubchmura.xcuserdatad/xcschemes/xcschememanagement.plist +0 -14
  307. package/lib/module/modules/BaseNonStaticModule.js +0 -17
  308. package/lib/module/modules/BaseNonStaticModule.js.map +0 -1
  309. package/lib/module/native/NativeLLM.js +0 -5
  310. package/lib/module/native/NativeLLM.js.map +0 -1
  311. package/lib/typescript/modules/BaseNonStaticModule.d.ts +0 -10
  312. package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +0 -1
  313. package/lib/typescript/native/NativeLLM.d.ts +0 -12
  314. package/lib/typescript/native/NativeLLM.d.ts.map +0 -1
  315. package/src/modules/BaseNonStaticModule.ts +0 -26
  316. package/src/native/NativeLLM.ts +0 -14
  317. package/third-party/include/tokenizers-cpp/tokenizers_c.h +0 -61
  318. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +0 -27
  319. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +0 -249
  320. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +0 -14
  321. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +0 -80
  322. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +0 -32
  323. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +0 -95
  324. package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +0 -12
  325. package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +0 -217
  326. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +0 -11
  327. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +0 -11
  328. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +0 -202
  329. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +0 -313
  330. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +0 -57
  331. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +0 -78
  332. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +0 -23
  333. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +0 -427
  334. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +0 -87
  335. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +0 -76
  336. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +0 -683
  337. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -7
  338. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/jakubchmura.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  339. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/xcuserdata/jakubchmura.xcuserdatad/xcschemes/xcschememanagement.plist +0 -14
  340. package/third-party/ios/ExecutorchLib/build.sh +0 -44
  341. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +0 -43
  342. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
  343. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
  344. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +0 -43
  345. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +0 -43
  346. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
  347. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
  348. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +0 -47
  349. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +0 -163
  350. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +0 -497
  351. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +0 -342
  352. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +0 -266
  353. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +0 -125
  354. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +0 -347
  355. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +0 -416
  356. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +0 -133
  357. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +0 -43
  358. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +0 -33
  359. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +0 -107
  360. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +0 -13
  361. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +0 -16
  362. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +0 -76
  363. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +0 -286
  364. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +0 -742
  365. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +0 -219
  366. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +0 -492
  367. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +0 -13
  368. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
  369. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
  370. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
  371. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
  372. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
  373. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +0 -166
  374. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +0 -235
  375. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +0 -136
  376. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +0 -20
  377. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +0 -229
  378. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +0 -521
  379. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +0 -565
  380. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
  381. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
  382. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
  383. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
  384. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
  385. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
  386. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
  387. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
  388. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
  389. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
  390. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +0 -198
  391. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +0 -86
  392. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
  393. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
  394. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
  395. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
  396. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
  397. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
  398. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
  399. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
  400. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
  401. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
  402. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
  403. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
  404. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
  405. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
  406. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
  407. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +0 -70
  408. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +0 -27
  409. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
  410. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
  411. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
  412. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
  413. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
  414. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
  415. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
  416. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
  417. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +0 -258
  418. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +0 -93
  419. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +0 -71
  420. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +0 -79
  421. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
  422. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +0 -113
  423. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +0 -387
  424. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +0 -251
  425. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +0 -320
  426. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
  427. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
  428. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
  429. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
  430. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +0 -36
  431. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +0 -119
  432. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +0 -43
  433. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
  434. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +0 -191
  435. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +0 -177
  436. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +0 -133
  437. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +0 -292
  438. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +0 -35
  439. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +0 -49
  440. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +0 -24
  441. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +0 -76
  442. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +0 -5
  443. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
  444. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +0 -163
  445. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +0 -497
  446. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +0 -342
  447. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +0 -266
  448. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +0 -125
  449. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +0 -347
  450. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +0 -416
  451. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +0 -133
  452. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +0 -43
  453. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +0 -33
  454. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +0 -107
  455. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +0 -13
  456. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +0 -16
  457. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +0 -76
  458. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +0 -286
  459. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +0 -742
  460. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +0 -219
  461. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +0 -492
  462. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +0 -13
  463. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
  464. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
  465. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
  466. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
  467. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
  468. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +0 -166
  469. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +0 -235
  470. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +0 -136
  471. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +0 -20
  472. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +0 -229
  473. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +0 -521
  474. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +0 -565
  475. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
  476. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
  477. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
  478. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
  479. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
  480. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
  481. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
  482. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
  483. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
  484. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
  485. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +0 -198
  486. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +0 -86
  487. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
  488. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
  489. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
  490. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
  491. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
  492. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
  493. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
  494. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
  495. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
  496. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
  497. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
  498. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
  499. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
  500. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
  501. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
  502. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +0 -70
  503. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +0 -27
  504. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
  505. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
  506. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
  507. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
  508. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
  509. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
  510. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
  511. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
  512. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +0 -258
  513. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +0 -93
  514. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +0 -71
  515. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +0 -79
  516. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
  517. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +0 -113
  518. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +0 -387
  519. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +0 -251
  520. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +0 -320
  521. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
  522. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
  523. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
  524. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
  525. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +0 -36
  526. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +0 -119
  527. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +0 -43
  528. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
  529. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +0 -191
  530. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +0 -177
  531. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +0 -133
  532. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +0 -292
  533. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +0 -35
  534. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +0 -49
  535. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +0 -24
  536. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +0 -76
  537. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +0 -5
  538. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
  539. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +0 -43
  540. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
  541. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
  542. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +0 -43
  543. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
  544. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
  545. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +0 -43
  546. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
  547. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
  548. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +0 -43
  549. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
  550. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
  551. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +0 -43
  552. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +0 -82
  553. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +0 -111
  554. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +0 -43
  555. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +0 -130
  556. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +0 -139
  557. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +0 -483
  558. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +0 -994
  559. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +0 -692
  560. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +0 -85
  561. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +0 -367
  562. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +0 -241
  563. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +0 -205
  564. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +0 -78
  565. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +0 -64
  566. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +0 -235
  567. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +0 -26
  568. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
  569. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +0 -82
  570. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +0 -111
  571. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +0 -43
  572. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +0 -130
  573. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +0 -139
  574. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +0 -483
  575. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +0 -994
  576. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +0 -692
  577. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +0 -85
  578. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +0 -367
  579. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +0 -241
  580. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +0 -205
  581. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +0 -78
  582. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +0 -64
  583. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +0 -235
  584. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +0 -26
  585. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
  586. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/irunner.h +0 -0
  587. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.cpp +0 -0
  588. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.h +0 -0
  589. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/stats.h +0 -0
  590. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.cpp +0 -0
  591. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.h +0 -0
  592. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.cpp +0 -0
  593. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.h +0 -0
  594. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_token_generator.h +0 -0
  595. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/util.h +0 -0
@@ -0,0 +1,2236 @@
1
+ #ifndef PTHREADPOOL_H_
2
+ #define PTHREADPOOL_H_
3
+
4
+ #include <stddef.h>
5
+ #include <stdint.h>
6
+
7
+ typedef struct pthreadpool *pthreadpool_t;
8
+
9
+ typedef void (*pthreadpool_task_1d_t)(void *, size_t);
10
+ typedef void (*pthreadpool_task_1d_with_thread_t)(void *, size_t, size_t);
11
+ typedef void (*pthreadpool_task_1d_tile_1d_t)(void *, size_t, size_t);
12
+ typedef void (*pthreadpool_task_2d_t)(void *, size_t, size_t);
13
+ typedef void (*pthreadpool_task_2d_with_thread_t)(void *, size_t, size_t,
14
+ size_t);
15
+ typedef void (*pthreadpool_task_2d_tile_1d_t)(void *, size_t, size_t, size_t);
16
+ typedef void (*pthreadpool_task_2d_tile_2d_t)(void *, size_t, size_t, size_t,
17
+ size_t);
18
+ typedef void (*pthreadpool_task_3d_t)(void *, size_t, size_t, size_t);
19
+ typedef void (*pthreadpool_task_3d_tile_1d_t)(void *, size_t, size_t, size_t,
20
+ size_t);
21
+ typedef void (*pthreadpool_task_3d_tile_1d_with_thread_t)(void *, size_t,
22
+ size_t, size_t,
23
+ size_t, size_t);
24
+ typedef void (*pthreadpool_task_3d_tile_2d_t)(void *, size_t, size_t, size_t,
25
+ size_t, size_t);
26
+ typedef void (*pthreadpool_task_4d_t)(void *, size_t, size_t, size_t, size_t);
27
+ typedef void (*pthreadpool_task_4d_tile_1d_t)(void *, size_t, size_t, size_t,
28
+ size_t, size_t);
29
+ typedef void (*pthreadpool_task_4d_tile_2d_t)(void *, size_t, size_t, size_t,
30
+ size_t, size_t, size_t);
31
+ typedef void (*pthreadpool_task_5d_t)(void *, size_t, size_t, size_t, size_t,
32
+ size_t);
33
+ typedef void (*pthreadpool_task_5d_tile_1d_t)(void *, size_t, size_t, size_t,
34
+ size_t, size_t, size_t);
35
+ typedef void (*pthreadpool_task_5d_tile_2d_t)(void *, size_t, size_t, size_t,
36
+ size_t, size_t, size_t, size_t);
37
+ typedef void (*pthreadpool_task_6d_t)(void *, size_t, size_t, size_t, size_t,
38
+ size_t, size_t);
39
+ typedef void (*pthreadpool_task_6d_tile_1d_t)(void *, size_t, size_t, size_t,
40
+ size_t, size_t, size_t, size_t);
41
+ typedef void (*pthreadpool_task_6d_tile_2d_t)(void *, size_t, size_t, size_t,
42
+ size_t, size_t, size_t, size_t,
43
+ size_t);
44
+
45
+ typedef void (*pthreadpool_task_1d_with_id_t)(void *, uint32_t, size_t);
46
+ typedef void (*pthreadpool_task_2d_tile_1d_with_id_t)(void *, uint32_t, size_t,
47
+ size_t, size_t);
48
+ typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void *, uint32_t, size_t,
49
+ size_t, size_t, size_t);
50
+ typedef void (*pthreadpool_task_3d_tile_1d_with_id_t)(void *, uint32_t, size_t,
51
+ size_t, size_t, size_t);
52
+ typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void *, uint32_t, size_t,
53
+ size_t, size_t, size_t,
54
+ size_t);
55
+ typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void *, uint32_t, size_t,
56
+ size_t, size_t, size_t,
57
+ size_t, size_t);
58
+
59
+ typedef void (*pthreadpool_task_2d_tile_1d_with_id_with_thread_t)(
60
+ void *, uint32_t, size_t, size_t, size_t, size_t);
61
+ typedef void (*pthreadpool_task_3d_tile_1d_with_id_with_thread_t)(
62
+ void *, uint32_t, size_t, size_t, size_t, size_t, size_t);
63
+
64
+ /**
65
+ * Disable support for denormalized numbers to the maximum extent possible for
66
+ * the duration of the computation.
67
+ *
68
+ * Handling denormalized floating-point numbers is often implemented in
69
+ * microcode, and incurs significant performance degradation. This hint
70
+ * instructs the thread pool to disable support for denormalized numbers before
71
+ * running the computation by manipulating architecture-specific control
72
+ * registers, and restore the initial value of control registers after the
73
+ * computation is complete. The thread pool temporary disables denormalized
74
+ * numbers on all threads involved in the computation (i.e. the caller threads,
75
+ * and potentially worker threads).
76
+ *
77
+ * Disabling denormalized numbers may have a small negative effect on results'
78
+ * accuracy. As various architectures differ in capabilities to control
79
+ * processing of denormalized numbers, using this flag may also hurt results'
80
+ * reproducibility across different instruction set architectures.
81
+ */
82
+ #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
83
+
84
+ /**
85
+ * Yield worker threads to the system scheduler after the operation is finished.
86
+ *
87
+ * Force workers to use kernel wait (instead of active spin-wait by default) for
88
+ * new commands after this command is processed. This flag affects only the
89
+ * immediate next operation on this thread pool. To make the thread pool always
90
+ * use kernel wait, pass this flag to all parallelization functions.
91
+ */
92
+ #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
93
+
94
+ #ifdef __cplusplus
95
+ extern "C" {
96
+ #endif
97
+
98
+ /**
99
+ * Create a thread pool with the specified number of threads.
100
+ *
101
+ * @param threads_count the number of threads in the thread pool.
102
+ * A value of 0 has special interpretation: it creates a thread pool with as
103
+ * many threads as there are logical processors in the system.
104
+ *
105
+ * @returns A pointer to an opaque thread pool object if the call is
106
+ * successful, or NULL pointer if the call failed.
107
+ */
108
+ pthreadpool_t pthreadpool_create(size_t threads_count);
109
+
110
+ /**
111
+ * Query the number of threads in a thread pool.
112
+ *
113
+ * @param threadpool the thread pool to query.
114
+ *
115
+ * @returns The number of threads in the thread pool.
116
+ */
117
+ size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
118
+
119
+ /**
120
+ * Process items on a 1D grid.
121
+ *
122
+ * The function implements a parallel version of the following snippet:
123
+ *
124
+ * for (size_t i = 0; i < range; i++)
125
+ * function(context, i);
126
+ *
127
+ * When the function returns, all items have been processed and the thread pool
128
+ * is ready for a new task.
129
+ *
130
+ * @note If multiple threads call this function with the same thread pool, the
131
+ * calls are serialized.
132
+ *
133
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
134
+ * is NULL, all items are processed serially on the calling thread.
135
+ * @param function the function to call for each item.
136
+ * @param context the first argument passed to the specified function.
137
+ * @param range the number of items on the 1D grid to process. The
138
+ * specified function will be called once for each item.
139
+ * @param flags a bitwise combination of zero or more optional flags
140
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
141
+ */
142
+ void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
143
+ pthreadpool_task_1d_t function, void *context,
144
+ size_t range, uint32_t flags);
145
+
146
+ /**
147
+ * Process items on a 1D grid passing along the current thread id.
148
+ *
149
+ * The function implements a parallel version of the following snippet:
150
+ *
151
+ * for (size_t i = 0; i < range; i++)
152
+ * function(context, thread_index, i);
153
+ *
154
+ * When the function returns, all items have been processed and the thread pool
155
+ * is ready for a new task.
156
+ *
157
+ * @note If multiple threads call this function with the same thread pool, the
158
+ * calls are serialized.
159
+ *
160
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
161
+ * is NULL, all items are processed serially on the calling thread.
162
+ * @param function the function to call for each item.
163
+ * @param context the first argument passed to the specified function.
164
+ * @param range the number of items on the 1D grid to process. The
165
+ * specified function will be called once for each item.
166
+ * @param flags a bitwise combination of zero or more optional flags
167
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
168
+ */
169
+ void pthreadpool_parallelize_1d_with_thread(
170
+ pthreadpool_t threadpool, pthreadpool_task_1d_with_thread_t function,
171
+ void *context, size_t range, uint32_t flags);
172
+
173
+ /**
174
+ * Process items on a 1D grid using a microarchitecture-aware task function.
175
+ *
176
+ * The function implements a parallel version of the following snippet:
177
+ *
178
+ * uint32_t uarch_index = cpuinfo_initialize() ?
179
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
180
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
181
+ * for (size_t i = 0; i < range; i++)
182
+ * function(context, uarch_index, i);
183
+ *
184
+ * When the function returns, all items have been processed and the thread pool
185
+ * is ready for a new task.
186
+ *
187
+ * @note If multiple threads call this function with the same thread pool, the
188
+ * calls are serialized.
189
+ *
190
+ * @param threadpool the thread pool to use for parallelisation. If
191
+ * threadpool is NULL, all items are processed serially on the calling
192
+ * thread.
193
+ * @param function the function to call for each item.
194
+ * @param context the first argument passed to the specified
195
+ * function.
196
+ * @param default_uarch_index the microarchitecture index to use when
197
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
198
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
199
+ * max_uarch_index value.
200
+ * @param max_uarch_index the maximum microarchitecture index expected by
201
+ * the specified function. If the index returned by
202
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
203
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
204
+ * @param range the number of items on the 1D grid to process.
205
+ * The specified function will be called once for each item.
206
+ * @param flags a bitwise combination of zero or more optional
207
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
208
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
209
+ */
210
+ void pthreadpool_parallelize_1d_with_uarch(
211
+ pthreadpool_t threadpool, pthreadpool_task_1d_with_id_t function,
212
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
213
+ size_t range, uint32_t flags);
214
+
215
+ /**
216
+ * Process items on a 1D grid with specified maximum tile size.
217
+ *
218
+ * The function implements a parallel version of the following snippet:
219
+ *
220
+ * for (size_t i = 0; i < range; i += tile)
221
+ * function(context, i, min(range - i, tile));
222
+ *
223
+ * When the call returns, all items have been processed and the thread pool is
224
+ * ready for a new task.
225
+ *
226
+ * @note If multiple threads call this function with the same thread pool,
227
+ * the calls are serialized.
228
+ *
229
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
230
+ * is NULL, all items are processed serially on the calling thread.
231
+ * @param function the function to call for each tile.
232
+ * @param context the first argument passed to the specified function.
233
+ * @param range the number of items on the 1D grid to process.
234
+ * @param tile the maximum number of items on the 1D grid to process in
235
+ * one function call.
236
+ * @param flags a bitwise combination of zero or more optional flags
237
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
238
+ */
239
+ void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
240
+ pthreadpool_task_1d_tile_1d_t function,
241
+ void *context, size_t range,
242
+ size_t tile, uint32_t flags);
243
+
244
+ /**
245
+ * Process items on a 2D grid.
246
+ *
247
+ * The function implements a parallel version of the following snippet:
248
+ *
249
+ * for (size_t i = 0; i < range_i; i++)
250
+ * for (size_t j = 0; j < range_j; j++)
251
+ * function(context, i, j);
252
+ *
253
+ * When the function returns, all items have been processed and the thread pool
254
+ * is ready for a new task.
255
+ *
256
+ * @note If multiple threads call this function with the same thread pool, the
257
+ * calls are serialized.
258
+ *
259
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
260
+ * is NULL, all items are processed serially on the calling thread.
261
+ * @param function the function to call for each item.
262
+ * @param context the first argument passed to the specified function.
263
+ * @param range_i the number of items to process along the first dimension
264
+ * of the 2D grid.
265
+ * @param range_j the number of items to process along the second dimension
266
+ * of the 2D grid.
267
+ * @param flags a bitwise combination of zero or more optional flags
268
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
269
+ */
270
+ void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
271
+ pthreadpool_task_2d_t function, void *context,
272
+ size_t range_i, size_t range_j, uint32_t flags);
273
+
274
+ /**
275
+ * Process items on a 2D grid passing along the current thread id.
276
+ *
277
+ * The function implements a parallel version of the following snippet:
278
+ *
279
+ * for (size_t i = 0; i < range_i; i++)
280
+ * for (size_t j = 0; j < range_j; j++)
281
+ * function(context, thread_index, i, j);
282
+ *
283
+ * When the function returns, all items have been processed and the thread pool
284
+ * is ready for a new task.
285
+ *
286
+ * @note If multiple threads call this function with the same thread pool, the
287
+ * calls are serialized.
288
+ *
289
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
290
+ * is NULL, all items are processed serially on the calling thread.
291
+ * @param function the function to call for each item.
292
+ * @param context the first argument passed to the specified function.
293
+ * @param range_i the number of items to process along the first dimension
294
+ * of the 2D grid.
295
+ * @param range_j the number of items to process along the second dimension
296
+ * of the 2D grid.
297
+ * @param flags a bitwise combination of zero or more optional flags
298
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
299
+ */
300
+ void pthreadpool_parallelize_2d_with_thread(
301
+ pthreadpool_t threadpool, pthreadpool_task_2d_with_thread_t function,
302
+ void *context, size_t range_i, size_t range_j, uint32_t flags);
303
+
304
+ /**
305
+ * Process items on a 2D grid with the specified maximum tile size along the
306
+ * last grid dimension.
307
+ *
308
+ * The function implements a parallel version of the following snippet:
309
+ *
310
+ * for (size_t i = 0; i < range_i; i++)
311
+ * for (size_t j = 0; j < range_j; j += tile_j)
312
+ * function(context, i, j, min(range_j - j, tile_j));
313
+ *
314
+ * When the function returns, all items have been processed and the thread pool
315
+ * is ready for a new task.
316
+ *
317
+ * @note If multiple threads call this function with the same thread pool, the
318
+ * calls are serialized.
319
+ *
320
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
321
+ * is NULL, all items are processed serially on the calling thread.
322
+ * @param function the function to call for each tile.
323
+ * @param context the first argument passed to the specified function.
324
+ * @param range_i the number of items to process along the first dimension
325
+ * of the 2D grid.
326
+ * @param range_j the number of items to process along the second dimension
327
+ * of the 2D grid.
328
+ * @param tile_j the maximum number of items along the second dimension of
329
+ * the 2D grid to process in one function call.
330
+ * @param flags a bitwise combination of zero or more optional flags
331
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
332
+ */
333
+ void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
334
+ pthreadpool_task_2d_tile_1d_t function,
335
+ void *context, size_t range_i,
336
+ size_t range_j, size_t tile_j,
337
+ uint32_t flags);
338
+
339
+ /**
340
+ * Process items on a 2D grid with the specified maximum tile size along the
341
+ * last grid dimension using a microarchitecture-aware task function.
342
+ *
343
+ * The function implements a parallel version of the following snippet:
344
+ *
345
+ * uint32_t uarch_index = cpuinfo_initialize() ?
346
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
347
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
348
+ * for (size_t i = 0; i < range_i; i++)
349
+ * for (size_t j = 0; j < range_j; j += tile_j)
350
+ * function(context, uarch_index, i, j, min(range_j - j, tile_j));
351
+ *
352
+ * When the function returns, all items have been processed and the thread pool
353
+ * is ready for a new task.
354
+ *
355
+ * @note If multiple threads call this function with the same thread pool, the
356
+ * calls are serialized.
357
+ *
358
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
359
+ * is NULL, all items are processed serially on the calling thread.
360
+ * @param function the function to call for each tile.
361
+ * @param context the first argument passed to the specified function.
362
+ * @param default_uarch_index the microarchitecture index to use when
363
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
364
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
365
+ * max_uarch_index value.
366
+ * @param max_uarch_index the maximum microarchitecture index expected by
367
+ * the specified function. If the index returned by
368
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
369
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
370
+ * @param range_i the number of items to process along the first dimension
371
+ * of the 2D grid.
372
+ * @param range_j the number of items to process along the second dimension
373
+ * of the 2D grid.
374
+ * @param tile_j the maximum number of items along the second dimension of
375
+ * the 2D grid to process in one function call.
376
+ * @param flags a bitwise combination of zero or more optional flags
377
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
378
+ */
379
+ void pthreadpool_parallelize_2d_tile_1d_with_uarch(
380
+ pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_with_id_t function,
381
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
382
+ size_t range_i, size_t range_j, size_t tile_j, uint32_t flags);
383
+
384
+ /**
385
+ * Process items on a 2D grid with the specified maximum tile size along the
386
+ * last grid dimension using a microarchitecture-aware task function and passing
387
+ * along the current thread id.
388
+ *
389
+ * The function implements a parallel version of the following snippet:
390
+ *
391
+ * uint32_t uarch_index = cpuinfo_initialize() ?
392
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
393
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
394
+ * for (size_t i = 0; i < range_i; i++)
395
+ * for (size_t j = 0; j < range_j; j += tile_j)
396
+ * function(context, uarch_index, thread_index, i, j, min(range_j - j,
397
+ * tile_j));
398
+ *
399
+ * When the function returns, all items have been processed and the thread pool
400
+ * is ready for a new task.
401
+ *
402
+ * @note If multiple threads call this function with the same thread pool, the
403
+ * calls are serialized.
404
+ *
405
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
406
+ * is NULL, all items are processed serially on the calling thread.
407
+ * @param function the function to call for each tile.
408
+ * @param context the first argument passed to the specified function.
409
+ * @param default_uarch_index the microarchitecture index to use when
410
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
411
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
412
+ * max_uarch_index value.
413
+ * @param max_uarch_index the maximum microarchitecture index expected by
414
+ * the specified function. If the index returned by
415
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
416
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
417
+ * @param range_i the number of items to process along the first dimension
418
+ * of the 2D grid.
419
+ * @param range_j the number of items to process along the second dimension
420
+ * of the 2D grid.
421
+ * @param tile_j the maximum number of items along the second dimension of
422
+ * the 2D grid to process in one function call.
423
+ * @param flags a bitwise combination of zero or more optional flags
424
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
425
+ */
426
+ void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread(
427
+ pthreadpool_t threadpool,
428
+ pthreadpool_task_2d_tile_1d_with_id_with_thread_t function, void *context,
429
+ uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
430
+ size_t range_j, size_t tile_j, uint32_t flags);
431
+
432
+ /**
433
+ * Process items on a 2D grid with the specified maximum tile size along each
434
+ * grid dimension.
435
+ *
436
+ * The function implements a parallel version of the following snippet:
437
+ *
438
+ * for (size_t i = 0; i < range_i; i += tile_i)
439
+ * for (size_t j = 0; j < range_j; j += tile_j)
440
+ * function(context, i, j,
441
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
442
+ *
443
+ * When the function returns, all items have been processed and the thread pool
444
+ * is ready for a new task.
445
+ *
446
+ * @note If multiple threads call this function with the same thread pool, the
447
+ * calls are serialized.
448
+ *
449
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
450
+ * is NULL, all items are processed serially on the calling thread.
451
+ * @param function the function to call for each tile.
452
+ * @param context the first argument passed to the specified function.
453
+ * @param range_i the number of items to process along the first dimension
454
+ * of the 2D grid.
455
+ * @param range_j the number of items to process along the second dimension
456
+ * of the 2D grid.
457
+ * @param tile_j the maximum number of items along the first dimension of
458
+ * the 2D grid to process in one function call.
459
+ * @param tile_j the maximum number of items along the second dimension of
460
+ * the 2D grid to process in one function call.
461
+ * @param flags a bitwise combination of zero or more optional flags
462
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
463
+ */
464
+ void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
465
+ pthreadpool_task_2d_tile_2d_t function,
466
+ void *context, size_t range_i,
467
+ size_t range_j, size_t tile_i,
468
+ size_t tile_j, uint32_t flags);
469
+
470
+ /**
471
+ * Process items on a 2D grid with the specified maximum tile size along each
472
+ * grid dimension using a microarchitecture-aware task function.
473
+ *
474
+ * The function implements a parallel version of the following snippet:
475
+ *
476
+ * uint32_t uarch_index = cpuinfo_initialize() ?
477
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
478
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
479
+ * for (size_t i = 0; i < range_i; i += tile_i)
480
+ * for (size_t j = 0; j < range_j; j += tile_j)
481
+ * function(context, uarch_index, i, j,
482
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
483
+ *
484
+ * When the function returns, all items have been processed and the thread pool
485
+ * is ready for a new task.
486
+ *
487
+ * @note If multiple threads call this function with the same thread pool, the
488
+ * calls are serialized.
489
+ *
490
+ * @param threadpool the thread pool to use for parallelisation. If
491
+ * threadpool is NULL, all items are processed serially on the calling
492
+ * thread.
493
+ * @param function the function to call for each tile.
494
+ * @param context the first argument passed to the specified
495
+ * function.
496
+ * @param default_uarch_index the microarchitecture index to use when
497
+ * pthreadpool is configured without cpuinfo,
498
+ * cpuinfo initialization failed, or index returned
499
+ * by cpuinfo_get_current_uarch_index() exceeds
500
+ * the max_uarch_index value.
501
+ * @param max_uarch_index the maximum microarchitecture index expected
502
+ * by the specified function. If the index returned
503
+ * by cpuinfo_get_current_uarch_index() exceeds this
504
+ * value, default_uarch_index will be used instead.
505
+ * default_uarch_index can exceed max_uarch_index.
506
+ * @param range_i the number of items to process along the first
507
+ * dimension of the 2D grid.
508
+ * @param range_j the number of items to process along the second
509
+ * dimension of the 2D grid.
510
+ * @param tile_j the maximum number of items along the first
511
+ * dimension of the 2D grid to process in one function call.
512
+ * @param tile_j the maximum number of items along the second
513
+ * dimension of the 2D grid to process in one function call.
514
+ * @param flags a bitwise combination of zero or more optional
515
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
516
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
517
+ */
518
+ void pthreadpool_parallelize_2d_tile_2d_with_uarch(
519
+ pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_with_id_t function,
520
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
521
+ size_t range_i, size_t range_j, size_t tile_i, size_t tile_j,
522
+ uint32_t flags);
523
+
524
+ /**
525
+ * Process items on a 3D grid.
526
+ *
527
+ * The function implements a parallel version of the following snippet:
528
+ *
529
+ * for (size_t i = 0; i < range_i; i++)
530
+ * for (size_t j = 0; j < range_j; j++)
531
+ * for (size_t k = 0; k < range_k; k++)
532
+ * function(context, i, j, k);
533
+ *
534
+ * When the function returns, all items have been processed and the thread pool
535
+ * is ready for a new task.
536
+ *
537
+ * @note If multiple threads call this function with the same thread pool, the
538
+ * calls are serialized.
539
+ *
540
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
541
+ * is NULL, all items are processed serially on the calling thread.
542
+ * @param function the function to call for each tile.
543
+ * @param context the first argument passed to the specified function.
544
+ * @param range_i the number of items to process along the first dimension
545
+ * of the 3D grid.
546
+ * @param range_j the number of items to process along the second dimension
547
+ * of the 3D grid.
548
+ * @param range_k the number of items to process along the third dimension
549
+ * of the 3D grid.
550
+ * @param flags a bitwise combination of zero or more optional flags
551
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
552
+ */
553
+ void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
554
+ pthreadpool_task_3d_t function, void *context,
555
+ size_t range_i, size_t range_j, size_t range_k,
556
+ uint32_t flags);
557
+
558
+ /**
559
+ * Process items on a 3D grid with the specified maximum tile size along the
560
+ * last grid dimension.
561
+ *
562
+ * The function implements a parallel version of the following snippet:
563
+ *
564
+ * for (size_t i = 0; i < range_i; i++)
565
+ * for (size_t j = 0; j < range_j; j++)
566
+ * for (size_t k = 0; k < range_k; k += tile_k)
567
+ * function(context, i, j, k, min(range_k - k, tile_k));
568
+ *
569
+ * When the function returns, all items have been processed and the thread pool
570
+ * is ready for a new task.
571
+ *
572
+ * @note If multiple threads call this function with the same thread pool, the
573
+ * calls are serialized.
574
+ *
575
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
576
+ * is NULL, all items are processed serially on the calling thread.
577
+ * @param function the function to call for each tile.
578
+ * @param context the first argument passed to the specified function.
579
+ * @param range_i the number of items to process along the first dimension
580
+ * of the 3D grid.
581
+ * @param range_j the number of items to process along the second dimension
582
+ * of the 3D grid.
583
+ * @param range_k the number of items to process along the third dimension
584
+ * of the 3D grid.
585
+ * @param tile_k the maximum number of items along the third dimension of
586
+ * the 3D grid to process in one function call.
587
+ * @param flags a bitwise combination of zero or more optional flags
588
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
589
+ */
590
+ void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
591
+ pthreadpool_task_3d_tile_1d_t function,
592
+ void *context, size_t range_i,
593
+ size_t range_j, size_t range_k,
594
+ size_t tile_k, uint32_t flags);
595
+
596
+ /**
597
+ * Process items on a 3D grid with the specified maximum tile size along the
598
+ * last grid dimension and passing along the current thread id.
599
+ *
600
+ * The function implements a parallel version of the following snippet:
601
+ *
602
+ * for (size_t i = 0; i < range_i; i++)
603
+ * for (size_t j = 0; j < range_j; j++)
604
+ * for (size_t k = 0; k < range_k; k += tile_k)
605
+ * function(context, thread_index, i, j, k, min(range_k - k, tile_k));
606
+ *
607
+ * When the function returns, all items have been processed and the thread pool
608
+ * is ready for a new task.
609
+ *
610
+ * @note If multiple threads call this function with the same thread pool, the
611
+ * calls are serialized.
612
+ *
613
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
614
+ * is NULL, all items are processed serially on the calling thread.
615
+ * @param function the function to call for each tile.
616
+ * @param context the first argument passed to the specified function.
617
+ * @param range_i the number of items to process along the first dimension
618
+ * of the 3D grid.
619
+ * @param range_j the number of items to process along the second dimension
620
+ * of the 3D grid.
621
+ * @param range_k the number of items to process along the third dimension
622
+ * of the 3D grid.
623
+ * @param tile_k the maximum number of items along the third dimension of
624
+ * the 3D grid to process in one function call.
625
+ * @param flags a bitwise combination of zero or more optional flags
626
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
627
+ */
628
+ void pthreadpool_parallelize_3d_tile_1d_with_thread(
629
+ pthreadpool_t threadpool,
630
+ pthreadpool_task_3d_tile_1d_with_thread_t function, void *context,
631
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
632
+ uint32_t flags);
633
+
634
+ /**
635
+ * Process items on a 3D grid with the specified maximum tile size along the
636
+ * last grid dimension using a microarchitecture-aware task function.
637
+ *
638
+ * The function implements a parallel version of the following snippet:
639
+ *
640
+ * uint32_t uarch_index = cpuinfo_initialize() ?
641
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
642
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
643
+ * for (size_t i = 0; i < range_i; i++)
644
+ * for (size_t j = 0; j < range_j; j++)
645
+ * for (size_t k = 0; k < range_k; k += tile_k)
646
+ * function(context, uarch_index, i, j, k, min(range_k - k, tile_k));
647
+ *
648
+ * When the function returns, all items have been processed and the thread pool
649
+ * is ready for a new task.
650
+ *
651
+ * @note If multiple threads call this function with the same thread pool, the
652
+ * calls are serialized.
653
+ *
654
+ * @param threadpool the thread pool to use for parallelisation. If
655
+ * threadpool is NULL, all items are processed serially on the calling
656
+ * thread.
657
+ * @param function the function to call for each tile.
658
+ * @param context the first argument passed to the specified
659
+ * function.
660
+ * @param default_uarch_index the microarchitecture index to use when
661
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
662
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
663
+ * max_uarch_index value.
664
+ * @param max_uarch_index the maximum microarchitecture index expected by
665
+ * the specified function. If the index returned by
666
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
667
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
668
+ * @param range_i the number of items to process along the first
669
+ * dimension of the 3D grid.
670
+ * @param range_j the number of items to process along the second
671
+ * dimension of the 3D grid.
672
+ * @param range_k the number of items to process along the third
673
+ * dimension of the 3D grid.
674
+ * @param tile_k the maximum number of items along the third
675
+ * dimension of the 3D grid to process in one function call.
676
+ * @param flags a bitwise combination of zero or more optional
677
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
678
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
679
+ */
680
+ void pthreadpool_parallelize_3d_tile_1d_with_uarch(
681
+ pthreadpool_t threadpool, pthreadpool_task_3d_tile_1d_with_id_t function,
682
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
683
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
684
+ uint32_t flags);
685
+
686
+ /**
687
+ * Process items on a 3D grid with the specified maximum tile size along the
688
+ * last grid dimension using a microarchitecture-aware task function and passing
689
+ * along the current thread id.
690
+ *
691
+ * The function implements a parallel version of the following snippet:
692
+ *
693
+ * uint32_t uarch_index = cpuinfo_initialize() ?
694
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
695
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
696
+ * for (size_t i = 0; i < range_i; i++)
697
+ * for (size_t j = 0; j < range_j; j++)
698
+ * for (size_t k = 0; k < range_k; k += tile_k)
699
+ * function(context, uarch_index, thread_index, i, j, k, min(range_k -
700
+ * k, tile_k));
701
+ *
702
+ * When the function returns, all items have been processed and the thread pool
703
+ * is ready for a new task.
704
+ *
705
+ * @note If multiple threads call this function with the same thread pool, the
706
+ * calls are serialized.
707
+ *
708
+ * @param threadpool the thread pool to use for parallelisation. If
709
+ * threadpool is NULL, all items are processed serially on the calling
710
+ * thread.
711
+ * @param function the function to call for each tile.
712
+ * @param context the first argument passed to the specified
713
+ * function.
714
+ * @param default_uarch_index the microarchitecture index to use when
715
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
716
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
717
+ * max_uarch_index value.
718
+ * @param max_uarch_index the maximum microarchitecture index expected by
719
+ * the specified function. If the index returned by
720
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
721
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
722
+ * @param range_i the number of items to process along the first
723
+ * dimension of the 3D grid.
724
+ * @param range_j the number of items to process along the second
725
+ * dimension of the 3D grid.
726
+ * @param range_k the number of items to process along the third
727
+ * dimension of the 3D grid.
728
+ * @param tile_k the maximum number of items along the third
729
+ * dimension of the 3D grid to process in one function call.
730
+ * @param flags a bitwise combination of zero or more optional
731
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
732
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
733
+ */
734
+ void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread(
735
+ pthreadpool_t threadpool,
736
+ pthreadpool_task_3d_tile_1d_with_id_with_thread_t function, void *context,
737
+ uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
738
+ size_t range_j, size_t range_k, size_t tile_k, uint32_t flags);
739
+
740
+ /**
741
+ * Process items on a 3D grid with the specified maximum tile size along the
742
+ * last two grid dimensions.
743
+ *
744
+ * The function implements a parallel version of the following snippet:
745
+ *
746
+ * for (size_t i = 0; i < range_i; i++)
747
+ * for (size_t j = 0; j < range_j; j += tile_j)
748
+ * for (size_t k = 0; k < range_k; k += tile_k)
749
+ * function(context, i, j, k,
750
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
751
+ *
752
+ * When the function returns, all items have been processed and the thread pool
753
+ * is ready for a new task.
754
+ *
755
+ * @note If multiple threads call this function with the same thread pool, the
756
+ * calls are serialized.
757
+ *
758
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
759
+ * is NULL, all items are processed serially on the calling thread.
760
+ * @param function the function to call for each tile.
761
+ * @param context the first argument passed to the specified function.
762
+ * @param range_i the number of items to process along the first dimension
763
+ * of the 3D grid.
764
+ * @param range_j the number of items to process along the second dimension
765
+ * of the 3D grid.
766
+ * @param range_k the number of items to process along the third dimension
767
+ * of the 3D grid.
768
+ * @param tile_j the maximum number of items along the second dimension of
769
+ * the 3D grid to process in one function call.
770
+ * @param tile_k the maximum number of items along the third dimension of
771
+ * the 3D grid to process in one function call.
772
+ * @param flags a bitwise combination of zero or more optional flags
773
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
774
+ */
775
+ void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
776
+ pthreadpool_task_3d_tile_2d_t function,
777
+ void *context, size_t range_i,
778
+ size_t range_j, size_t range_k,
779
+ size_t tile_j, size_t tile_k,
780
+ uint32_t flags);
781
+
782
+ /**
783
+ * Process items on a 3D grid with the specified maximum tile size along the
784
+ * last two grid dimensions using a microarchitecture-aware task function.
785
+ *
786
+ * The function implements a parallel version of the following snippet:
787
+ *
788
+ * uint32_t uarch_index = cpuinfo_initialize() ?
789
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
790
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
791
+ * for (size_t i = 0; i < range_i; i++)
792
+ * for (size_t j = 0; j < range_j; j += tile_j)
793
+ * for (size_t k = 0; k < range_k; k += tile_k)
794
+ * function(context, uarch_index, i, j, k,
795
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
796
+ *
797
+ * When the function returns, all items have been processed and the thread pool
798
+ * is ready for a new task.
799
+ *
800
+ * @note If multiple threads call this function with the same thread pool, the
801
+ * calls are serialized.
802
+ *
803
+ * @param threadpool the thread pool to use for parallelisation. If
804
+ * threadpool is NULL, all items are processed serially on the calling
805
+ * thread.
806
+ * @param function the function to call for each tile.
807
+ * @param context the first argument passed to the specified
808
+ * function.
809
+ * @param default_uarch_index the microarchitecture index to use when
810
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
811
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
812
+ * max_uarch_index value.
813
+ * @param max_uarch_index the maximum microarchitecture index expected by
814
+ * the specified function. If the index returned by
815
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
816
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
817
+ * @param range_i the number of items to process along the first
818
+ * dimension of the 3D grid.
819
+ * @param range_j the number of items to process along the second
820
+ * dimension of the 3D grid.
821
+ * @param range_k the number of items to process along the third
822
+ * dimension of the 3D grid.
823
+ * @param tile_j the maximum number of items along the second
824
+ * dimension of the 3D grid to process in one function call.
825
+ * @param tile_k the maximum number of items along the third
826
+ * dimension of the 3D grid to process in one function call.
827
+ * @param flags a bitwise combination of zero or more optional
828
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
829
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
830
+ */
831
+ void pthreadpool_parallelize_3d_tile_2d_with_uarch(
832
+ pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_with_id_t function,
833
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
834
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_j,
835
+ size_t tile_k, uint32_t flags);
836
+
837
+ /**
838
+ * Process items on a 4D grid.
839
+ *
840
+ * The function implements a parallel version of the following snippet:
841
+ *
842
+ * for (size_t i = 0; i < range_i; i++)
843
+ * for (size_t j = 0; j < range_j; j++)
844
+ * for (size_t k = 0; k < range_k; k++)
845
+ * for (size_t l = 0; l < range_l; l++)
846
+ * function(context, i, j, k, l);
847
+ *
848
+ * When the function returns, all items have been processed and the thread pool
849
+ * is ready for a new task.
850
+ *
851
+ * @note If multiple threads call this function with the same thread pool, the
852
+ * calls are serialized.
853
+ *
854
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
855
+ * is NULL, all items are processed serially on the calling thread.
856
+ * @param function the function to call for each tile.
857
+ * @param context the first argument passed to the specified function.
858
+ * @param range_i the number of items to process along the first dimension
859
+ * of the 4D grid.
860
+ * @param range_j the number of items to process along the second dimension
861
+ * of the 4D grid.
862
+ * @param range_k the number of items to process along the third dimension
863
+ * of the 4D grid.
864
+ * @param range_l the number of items to process along the fourth dimension
865
+ * of the 4D grid.
866
+ * @param flags a bitwise combination of zero or more optional flags
867
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
868
+ */
869
+ void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
870
+ pthreadpool_task_4d_t function, void *context,
871
+ size_t range_i, size_t range_j, size_t range_k,
872
+ size_t range_l, uint32_t flags);
873
+
874
+ /**
875
+ * Process items on a 4D grid with the specified maximum tile size along the
876
+ * last grid dimension.
877
+ *
878
+ * The function implements a parallel version of the following snippet:
879
+ *
880
+ * for (size_t i = 0; i < range_i; i++)
881
+ * for (size_t j = 0; j < range_j; j++)
882
+ * for (size_t k = 0; k < range_k; k++)
883
+ * for (size_t l = 0; l < range_l; l += tile_l)
884
+ * function(context, i, j, k, l, min(range_l - l, tile_l));
885
+ *
886
+ * When the function returns, all items have been processed and the thread pool
887
+ * is ready for a new task.
888
+ *
889
+ * @note If multiple threads call this function with the same thread pool, the
890
+ * calls are serialized.
891
+ *
892
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
893
+ * is NULL, all items are processed serially on the calling thread.
894
+ * @param function the function to call for each tile.
895
+ * @param context the first argument passed to the specified function.
896
+ * @param range_i the number of items to process along the first dimension
897
+ * of the 4D grid.
898
+ * @param range_j the number of items to process along the second dimension
899
+ * of the 4D grid.
900
+ * @param range_k the number of items to process along the third dimension
901
+ * of the 4D grid.
902
+ * @param range_l the number of items to process along the fourth dimension
903
+ * of the 4D grid.
904
+ * @param tile_l the maximum number of items along the fourth dimension of
905
+ * the 4D grid to process in one function call.
906
+ * @param flags a bitwise combination of zero or more optional flags
907
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
908
+ */
909
+ void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
910
+ pthreadpool_task_4d_tile_1d_t function,
911
+ void *context, size_t range_i,
912
+ size_t range_j, size_t range_k,
913
+ size_t range_l, size_t tile_l,
914
+ uint32_t flags);
915
+
916
+ /**
917
+ * Process items on a 4D grid with the specified maximum tile size along the
918
+ * last two grid dimensions.
919
+ *
920
+ * The function implements a parallel version of the following snippet:
921
+ *
922
+ * for (size_t i = 0; i < range_i; i++)
923
+ * for (size_t j = 0; j < range_j; j++)
924
+ * for (size_t k = 0; k < range_k; k += tile_k)
925
+ * for (size_t l = 0; l < range_l; l += tile_l)
926
+ * function(context, i, j, k, l,
927
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
928
+ *
929
+ * When the function returns, all items have been processed and the thread pool
930
+ * is ready for a new task.
931
+ *
932
+ * @note If multiple threads call this function with the same thread pool, the
933
+ * calls are serialized.
934
+ *
935
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
936
+ * is NULL, all items are processed serially on the calling thread.
937
+ * @param function the function to call for each tile.
938
+ * @param context the first argument passed to the specified function.
939
+ * @param range_i the number of items to process along the first dimension
940
+ * of the 4D grid.
941
+ * @param range_j the number of items to process along the second dimension
942
+ * of the 4D grid.
943
+ * @param range_k the number of items to process along the third dimension
944
+ * of the 4D grid.
945
+ * @param range_l the number of items to process along the fourth dimension
946
+ * of the 4D grid.
947
+ * @param tile_k the maximum number of items along the third dimension of
948
+ * the 4D grid to process in one function call.
949
+ * @param tile_l the maximum number of items along the fourth dimension of
950
+ * the 4D grid to process in one function call.
951
+ * @param flags a bitwise combination of zero or more optional flags
952
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
953
+ */
954
+ void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
955
+ pthreadpool_task_4d_tile_2d_t function,
956
+ void *context, size_t range_i,
957
+ size_t range_j, size_t range_k,
958
+ size_t range_l, size_t tile_k,
959
+ size_t tile_l, uint32_t flags);
960
+
961
+ /**
962
+ * Process items on a 4D grid with the specified maximum tile size along the
963
+ * last two grid dimensions using a microarchitecture-aware task function.
964
+ *
965
+ * The function implements a parallel version of the following snippet:
966
+ *
967
+ * uint32_t uarch_index = cpuinfo_initialize() ?
968
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
969
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
970
+ * for (size_t i = 0; i < range_i; i++)
971
+ * for (size_t j = 0; j < range_j; j++)
972
+ * for (size_t k = 0; k < range_k; k += tile_k)
973
+ * for (size_t l = 0; l < range_l; l += tile_l)
974
+ * function(context, uarch_index, i, j, k, l,
975
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
976
+ *
977
+ * When the function returns, all items have been processed and the thread pool
978
+ * is ready for a new task.
979
+ *
980
+ * @note If multiple threads call this function with the same thread pool, the
981
+ * calls are serialized.
982
+ *
983
+ * @param threadpool the thread pool to use for parallelisation. If
984
+ * threadpool is NULL, all items are processed serially on the calling
985
+ * thread.
986
+ * @param function the function to call for each tile.
987
+ * @param context the first argument passed to the specified
988
+ * function.
989
+ * @param default_uarch_index the microarchitecture index to use when
990
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
991
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
992
+ * max_uarch_index value.
993
+ * @param max_uarch_index the maximum microarchitecture index expected by
994
+ * the specified function. If the index returned by
995
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
996
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
997
+ * @param range_i the number of items to process along the first
998
+ * dimension of the 4D grid.
999
+ * @param range_j the number of items to process along the second
1000
+ * dimension of the 4D grid.
1001
+ * @param range_k the number of items to process along the third
1002
+ * dimension of the 4D grid.
1003
+ * @param range_l the number of items to process along the fourth
1004
+ * dimension of the 4D grid.
1005
+ * @param tile_k the maximum number of items along the third
1006
+ * dimension of the 4D grid to process in one function call.
1007
+ * @param tile_l the maximum number of items along the fourth
1008
+ * dimension of the 4D grid to process in one function call.
1009
+ * @param flags a bitwise combination of zero or more optional
1010
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
1011
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
1012
+ */
1013
+ void pthreadpool_parallelize_4d_tile_2d_with_uarch(
1014
+ pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_with_id_t function,
1015
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
1016
+ size_t range_i, size_t range_j, size_t range_k, size_t range_l,
1017
+ size_t tile_k, size_t tile_l, uint32_t flags);
1018
+
1019
+ /**
1020
+ * Process items on a 5D grid.
1021
+ *
1022
+ * The function implements a parallel version of the following snippet:
1023
+ *
1024
+ * for (size_t i = 0; i < range_i; i++)
1025
+ * for (size_t j = 0; j < range_j; j++)
1026
+ * for (size_t k = 0; k < range_k; k++)
1027
+ * for (size_t l = 0; l < range_l; l++)
1028
+ * for (size_t m = 0; m < range_m; m++)
1029
+ * function(context, i, j, k, l, m);
1030
+ *
1031
+ * When the function returns, all items have been processed and the thread pool
1032
+ * is ready for a new task.
1033
+ *
1034
+ * @note If multiple threads call this function with the same thread pool, the
1035
+ * calls are serialized.
1036
+ *
1037
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1038
+ * is NULL, all items are processed serially on the calling thread.
1039
+ * @param function the function to call for each tile.
1040
+ * @param context the first argument passed to the specified function.
1041
+ * @param range_i the number of items to process along the first dimension
1042
+ * of the 5D grid.
1043
+ * @param range_j the number of items to process along the second dimension
1044
+ * of the 5D grid.
1045
+ * @param range_k the number of items to process along the third dimension
1046
+ * of the 5D grid.
1047
+ * @param range_l the number of items to process along the fourth dimension
1048
+ * of the 5D grid.
1049
+ * @param range_m the number of items to process along the fifth dimension
1050
+ * of the 5D grid.
1051
+ * @param flags a bitwise combination of zero or more optional flags
1052
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1053
+ */
1054
+ void pthreadpool_parallelize_5d(pthreadpool_t threadpool,
1055
+ pthreadpool_task_5d_t function, void *context,
1056
+ size_t range_i, size_t range_j, size_t range_k,
1057
+ size_t range_l, size_t range_m, uint32_t flags);
1058
+
1059
+ /**
1060
+ * Process items on a 5D grid with the specified maximum tile size along the
1061
+ * last grid dimension.
1062
+ *
1063
+ * The function implements a parallel version of the following snippet:
1064
+ *
1065
+ * for (size_t i = 0; i < range_i; i++)
1066
+ * for (size_t j = 0; j < range_j; j++)
1067
+ * for (size_t k = 0; k < range_k; k++)
1068
+ * for (size_t l = 0; l < range_l; l++)
1069
+ * for (size_t m = 0; m < range_m; m += tile_m)
1070
+ * function(context, i, j, k, l, m, min(range_m - m, tile_m));
1071
+ *
1072
+ * When the function returns, all items have been processed and the thread pool
1073
+ * is ready for a new task.
1074
+ *
1075
+ * @note If multiple threads call this function with the same thread pool, the
1076
+ * calls are serialized.
1077
+ *
1078
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1079
+ * is NULL, all items are processed serially on the calling thread.
1080
+ * @param function the function to call for each tile.
1081
+ * @param context the first argument passed to the specified function.
1082
+ * @param range_i the number of items to process along the first dimension
1083
+ * of the 5D grid.
1084
+ * @param range_j the number of items to process along the second dimension
1085
+ * of the 5D grid.
1086
+ * @param range_k the number of items to process along the third dimension
1087
+ * of the 5D grid.
1088
+ * @param range_l the number of items to process along the fourth dimension
1089
+ * of the 5D grid.
1090
+ * @param range_m the number of items to process along the fifth dimension
1091
+ * of the 5D grid.
1092
+ * @param tile_m the maximum number of items along the fifth dimension of
1093
+ * the 5D grid to process in one function call.
1094
+ * @param flags a bitwise combination of zero or more optional flags
1095
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1096
+ */
1097
+ void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
1098
+ pthreadpool_task_5d_tile_1d_t function,
1099
+ void *context, size_t range_i,
1100
+ size_t range_j, size_t range_k,
1101
+ size_t range_l, size_t range_m,
1102
+ size_t tile_m, uint32_t flags);
1103
+
1104
+ /**
1105
+ * Process items on a 5D grid with the specified maximum tile size along the
1106
+ * last two grid dimensions.
1107
+ *
1108
+ * The function implements a parallel version of the following snippet:
1109
+ *
1110
+ * for (size_t i = 0; i < range_i; i++)
1111
+ * for (size_t j = 0; j < range_j; j++)
1112
+ * for (size_t k = 0; k < range_k; k++)
1113
+ * for (size_t l = 0; l < range_l; l += tile_l)
1114
+ * for (size_t m = 0; m < range_m; m += tile_m)
1115
+ * function(context, i, j, k, l, m,
1116
+ * min(range_l - l, tile_l), min(range_m - m, tile_m));
1117
+ *
1118
+ * When the function returns, all items have been processed and the thread pool
1119
+ * is ready for a new task.
1120
+ *
1121
+ * @note If multiple threads call this function with the same thread pool, the
1122
+ * calls are serialized.
1123
+ *
1124
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1125
+ * is NULL, all items are processed serially on the calling thread.
1126
+ * @param function the function to call for each tile.
1127
+ * @param context the first argument passed to the specified function.
1128
+ * @param range_i the number of items to process along the first dimension
1129
+ * of the 5D grid.
1130
+ * @param range_j the number of items to process along the second dimension
1131
+ * of the 5D grid.
1132
+ * @param range_k the number of items to process along the third dimension
1133
+ * of the 5D grid.
1134
+ * @param range_l the number of items to process along the fourth dimension
1135
+ * of the 5D grid.
1136
+ * @param range_m the number of items to process along the fifth dimension
1137
+ * of the 5D grid.
1138
+ * @param tile_l the maximum number of items along the fourth dimension of
1139
+ * the 5D grid to process in one function call.
1140
+ * @param tile_m the maximum number of items along the fifth dimension of
1141
+ * the 5D grid to process in one function call.
1142
+ * @param flags a bitwise combination of zero or more optional flags
1143
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1144
+ */
1145
+ void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
1146
+ pthreadpool_task_5d_tile_2d_t function,
1147
+ void *context, size_t range_i,
1148
+ size_t range_j, size_t range_k,
1149
+ size_t range_l, size_t range_m,
1150
+ size_t tile_l, size_t tile_m,
1151
+ uint32_t flags);
1152
+
1153
+ /**
1154
+ * Process items on a 6D grid.
1155
+ *
1156
+ * The function implements a parallel version of the following snippet:
1157
+ *
1158
+ * for (size_t i = 0; i < range_i; i++)
1159
+ * for (size_t j = 0; j < range_j; j++)
1160
+ * for (size_t k = 0; k < range_k; k++)
1161
+ * for (size_t l = 0; l < range_l; l++)
1162
+ * for (size_t m = 0; m < range_m; m++)
1163
+ * for (size_t n = 0; n < range_n; n++)
1164
+ * function(context, i, j, k, l, m, n);
1165
+ *
1166
+ * When the function returns, all items have been processed and the thread pool
1167
+ * is ready for a new task.
1168
+ *
1169
+ * @note If multiple threads call this function with the same thread pool, the
1170
+ * calls are serialized.
1171
+ *
1172
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1173
+ * is NULL, all items are processed serially on the calling thread.
1174
+ * @param function the function to call for each tile.
1175
+ * @param context the first argument passed to the specified function.
1176
+ * @param range_i the number of items to process along the first dimension
1177
+ * of the 6D grid.
1178
+ * @param range_j the number of items to process along the second dimension
1179
+ * of the 6D grid.
1180
+ * @param range_k the number of items to process along the third dimension
1181
+ * of the 6D grid.
1182
+ * @param range_l the number of items to process along the fourth dimension
1183
+ * of the 6D grid.
1184
+ * @param range_m the number of items to process along the fifth dimension
1185
+ * of the 6D grid.
1186
+ * @param range_n the number of items to process along the sixth dimension
1187
+ * of the 6D grid.
1188
+ * @param tile_n the maximum number of items along the sixth dimension of
1189
+ * the 6D grid to process in one function call.
1190
+ * @param flags a bitwise combination of zero or more optional flags
1191
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1192
+ */
1193
+ void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
1194
+ pthreadpool_task_6d_t function, void *context,
1195
+ size_t range_i, size_t range_j, size_t range_k,
1196
+ size_t range_l, size_t range_m, size_t range_n,
1197
+ uint32_t flags);
1198
+
1199
+ /**
1200
+ * Process items on a 6D grid with the specified maximum tile size along the
1201
+ * last grid dimension.
1202
+ *
1203
+ * The function implements a parallel version of the following snippet:
1204
+ *
1205
+ * for (size_t i = 0; i < range_i; i++)
1206
+ * for (size_t j = 0; j < range_j; j++)
1207
+ * for (size_t k = 0; k < range_k; k++)
1208
+ * for (size_t l = 0; l < range_l; l++)
1209
+ * for (size_t m = 0; m < range_m; m++)
1210
+ * for (size_t n = 0; n < range_n; n += tile_n)
1211
+ * function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
1212
+ *
1213
+ * When the function returns, all items have been processed and the thread pool
1214
+ * is ready for a new task.
1215
+ *
1216
+ * @note If multiple threads call this function with the same thread pool, the
1217
+ * calls are serialized.
1218
+ *
1219
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1220
+ * is NULL, all items are processed serially on the calling thread.
1221
+ * @param function the function to call for each tile.
1222
+ * @param context the first argument passed to the specified function.
1223
+ * @param range_i the number of items to process along the first dimension
1224
+ * of the 6D grid.
1225
+ * @param range_j the number of items to process along the second dimension
1226
+ * of the 6D grid.
1227
+ * @param range_k the number of items to process along the third dimension
1228
+ * of the 6D grid.
1229
+ * @param range_l the number of items to process along the fourth dimension
1230
+ * of the 6D grid.
1231
+ * @param range_m the number of items to process along the fifth dimension
1232
+ * of the 6D grid.
1233
+ * @param range_n the number of items to process along the sixth dimension
1234
+ * of the 6D grid.
1235
+ * @param tile_n the maximum number of items along the sixth dimension of
1236
+ * the 6D grid to process in one function call.
1237
+ * @param flags a bitwise combination of zero or more optional flags
1238
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1239
+ */
1240
+ void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
1241
+ pthreadpool_task_6d_tile_1d_t function,
1242
+ void *context, size_t range_i,
1243
+ size_t range_j, size_t range_k,
1244
+ size_t range_l, size_t range_m,
1245
+ size_t range_n, size_t tile_n,
1246
+ uint32_t flags);
1247
+
1248
+ /**
1249
+ * Process items on a 6D grid with the specified maximum tile size along the
1250
+ * last two grid dimensions.
1251
+ *
1252
+ * The function implements a parallel version of the following snippet:
1253
+ *
1254
+ * for (size_t i = 0; i < range_i; i++)
1255
+ * for (size_t j = 0; j < range_j; j++)
1256
+ * for (size_t k = 0; k < range_k; k++)
1257
+ * for (size_t l = 0; l < range_l; l++)
1258
+ * for (size_t m = 0; m < range_m; m += tile_m)
1259
+ * for (size_t n = 0; n < range_n; n += tile_n)
1260
+ * function(context, i, j, k, l, m, n,
1261
+ * min(range_m - m, tile_m), min(range_n - n, tile_n));
1262
+ *
1263
+ * When the function returns, all items have been processed and the thread pool
1264
+ * is ready for a new task.
1265
+ *
1266
+ * @note If multiple threads call this function with the same thread pool, the
1267
+ * calls are serialized.
1268
+ *
1269
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1270
+ * is NULL, all items are processed serially on the calling thread.
1271
+ * @param function the function to call for each tile.
1272
+ * @param context the first argument passed to the specified function.
1273
+ * @param range_i the number of items to process along the first dimension
1274
+ * of the 6D grid.
1275
+ * @param range_j the number of items to process along the second dimension
1276
+ * of the 6D grid.
1277
+ * @param range_k the number of items to process along the third dimension
1278
+ * of the 6D grid.
1279
+ * @param range_l the number of items to process along the fourth dimension
1280
+ * of the 6D grid.
1281
+ * @param range_m the number of items to process along the fifth dimension
1282
+ * of the 6D grid.
1283
+ * @param range_n the number of items to process along the sixth dimension
1284
+ * of the 6D grid.
1285
+ * @param tile_m the maximum number of items along the fifth dimension of
1286
+ * the 6D grid to process in one function call.
1287
+ * @param tile_n the maximum number of items along the sixth dimension of
1288
+ * the 6D grid to process in one function call.
1289
+ * @param flags a bitwise combination of zero or more optional flags
1290
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1291
+ */
1292
+ void pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool,
1293
+ pthreadpool_task_6d_tile_2d_t function,
1294
+ void *context, size_t range_i,
1295
+ size_t range_j, size_t range_k,
1296
+ size_t range_l, size_t range_m,
1297
+ size_t range_n, size_t tile_m,
1298
+ size_t tile_n, uint32_t flags);
1299
+
1300
+ /**
1301
+ * Terminates threads in the thread pool and releases associated resources.
1302
+ *
1303
+ * @warning Accessing the thread pool after a call to this function constitutes
1304
+ * undefined behaviour and may cause data corruption.
1305
+ *
1306
+ * @param[in,out] threadpool The thread pool to destroy.
1307
+ */
1308
+ void pthreadpool_destroy(pthreadpool_t threadpool);
1309
+
1310
+ #ifndef PTHREADPOOL_NO_DEPRECATED_API
1311
+
1312
+ /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
1313
+ #if defined(__GNUC__)
1314
+ #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
1315
+ #else
1316
+ #define PTHREADPOOL_DEPRECATED
1317
+ #endif
1318
+
1319
+ typedef void (*pthreadpool_function_1d_t)(void *, size_t);
1320
+ typedef void (*pthreadpool_function_1d_tiled_t)(void *, size_t, size_t);
1321
+ typedef void (*pthreadpool_function_2d_t)(void *, size_t, size_t);
1322
+ typedef void (*pthreadpool_function_2d_tiled_t)(void *, size_t, size_t, size_t,
1323
+ size_t);
1324
+ typedef void (*pthreadpool_function_3d_tiled_t)(void *, size_t, size_t, size_t,
1325
+ size_t, size_t, size_t);
1326
+ typedef void (*pthreadpool_function_4d_tiled_t)(void *, size_t, size_t, size_t,
1327
+ size_t, size_t, size_t, size_t,
1328
+ size_t);
1329
+
1330
+ void pthreadpool_compute_1d(pthreadpool_t threadpool,
1331
+ pthreadpool_function_1d_t function, void *argument,
1332
+ size_t range) PTHREADPOOL_DEPRECATED;
1333
+
1334
+ void pthreadpool_compute_1d_tiled(pthreadpool_t threadpool,
1335
+ pthreadpool_function_1d_tiled_t function,
1336
+ void *argument, size_t range,
1337
+ size_t tile) PTHREADPOOL_DEPRECATED;
1338
+
1339
+ void pthreadpool_compute_2d(pthreadpool_t threadpool,
1340
+ pthreadpool_function_2d_t function, void *argument,
1341
+ size_t range_i,
1342
+ size_t range_j) PTHREADPOOL_DEPRECATED;
1343
+
1344
+ void pthreadpool_compute_2d_tiled(pthreadpool_t threadpool,
1345
+ pthreadpool_function_2d_tiled_t function,
1346
+ void *argument, size_t range_i,
1347
+ size_t range_j, size_t tile_i,
1348
+ size_t tile_j) PTHREADPOOL_DEPRECATED;
1349
+
1350
+ void pthreadpool_compute_3d_tiled(pthreadpool_t threadpool,
1351
+ pthreadpool_function_3d_tiled_t function,
1352
+ void *argument, size_t range_i,
1353
+ size_t range_j, size_t range_k, size_t tile_i,
1354
+ size_t tile_j,
1355
+ size_t tile_k) PTHREADPOOL_DEPRECATED;
1356
+
1357
+ void pthreadpool_compute_4d_tiled(pthreadpool_t threadpool,
1358
+ pthreadpool_function_4d_tiled_t function,
1359
+ void *argument, size_t range_i,
1360
+ size_t range_j, size_t range_k,
1361
+ size_t range_l, size_t tile_i, size_t tile_j,
1362
+ size_t tile_k,
1363
+ size_t tile_l) PTHREADPOOL_DEPRECATED;
1364
+
1365
+ #endif /* PTHREADPOOL_NO_DEPRECATED_API */
1366
+
1367
+ #ifdef __cplusplus
1368
+ } /* extern "C" */
1369
+ #endif
1370
+
1371
+ #ifdef __cplusplus
1372
+
1373
+ namespace libpthreadpool {
1374
+ namespace detail {
1375
+ namespace {
1376
+
1377
+ template <class T> void call_wrapper_1d(void *arg, size_t i) {
1378
+ (*static_cast<const T *>(arg))(i);
1379
+ }
1380
+
1381
+ template <class T>
1382
+ void call_wrapper_1d_tile_1d(void *arg, size_t range_i, size_t tile_i) {
1383
+ (*static_cast<const T *>(arg))(range_i, tile_i);
1384
+ }
1385
+
1386
+ template <class T> void call_wrapper_2d(void *functor, size_t i, size_t j) {
1387
+ (*static_cast<const T *>(functor))(i, j);
1388
+ }
1389
+
1390
+ template <class T>
1391
+ void call_wrapper_2d_tile_1d(void *functor, size_t i, size_t range_j,
1392
+ size_t tile_j) {
1393
+ (*static_cast<const T *>(functor))(i, range_j, tile_j);
1394
+ }
1395
+
1396
+ template <class T>
1397
+ void call_wrapper_2d_tile_2d(void *functor, size_t range_i, size_t range_j,
1398
+ size_t tile_i, size_t tile_j) {
1399
+ (*static_cast<const T *>(functor))(range_i, range_j, tile_i, tile_j);
1400
+ }
1401
+
1402
+ template <class T>
1403
+ void call_wrapper_3d(void *functor, size_t i, size_t j, size_t k) {
1404
+ (*static_cast<const T *>(functor))(i, j, k);
1405
+ }
1406
+
1407
+ template <class T>
1408
+ void call_wrapper_3d_tile_1d(void *functor, size_t i, size_t j, size_t range_k,
1409
+ size_t tile_k) {
1410
+ (*static_cast<const T *>(functor))(i, j, range_k, tile_k);
1411
+ }
1412
+
1413
+ template <class T>
1414
+ void call_wrapper_3d_tile_2d(void *functor, size_t i, size_t range_j,
1415
+ size_t range_k, size_t tile_j, size_t tile_k) {
1416
+ (*static_cast<const T *>(functor))(i, range_j, range_k, tile_j, tile_k);
1417
+ }
1418
+
1419
+ template <class T>
1420
+ void call_wrapper_4d(void *functor, size_t i, size_t j, size_t k, size_t l) {
1421
+ (*static_cast<const T *>(functor))(i, j, k, l);
1422
+ }
1423
+
1424
+ template <class T>
1425
+ void call_wrapper_4d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1426
+ size_t range_l, size_t tile_l) {
1427
+ (*static_cast<const T *>(functor))(i, j, k, range_l, tile_l);
1428
+ }
1429
+
1430
+ template <class T>
1431
+ void call_wrapper_4d_tile_2d(void *functor, size_t i, size_t j, size_t range_k,
1432
+ size_t range_l, size_t tile_k, size_t tile_l) {
1433
+ (*static_cast<const T *>(functor))(i, j, range_k, range_l, tile_k, tile_l);
1434
+ }
1435
+
1436
+ template <class T>
1437
+ void call_wrapper_5d(void *functor, size_t i, size_t j, size_t k, size_t l,
1438
+ size_t m) {
1439
+ (*static_cast<const T *>(functor))(i, j, k, l, m);
1440
+ }
1441
+
1442
+ template <class T>
1443
+ void call_wrapper_5d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1444
+ size_t l, size_t range_m, size_t tile_m) {
1445
+ (*static_cast<const T *>(functor))(i, j, k, l, range_m, tile_m);
1446
+ }
1447
+
1448
+ template <class T>
1449
+ void call_wrapper_5d_tile_2d(void *functor, size_t i, size_t j, size_t k,
1450
+ size_t range_l, size_t range_m, size_t tile_l,
1451
+ size_t tile_m) {
1452
+ (*static_cast<const T *>(functor))(i, j, k, range_l, range_m, tile_l, tile_m);
1453
+ }
1454
+
1455
+ template <class T>
1456
+ void call_wrapper_6d(void *functor, size_t i, size_t j, size_t k, size_t l,
1457
+ size_t m, size_t n) {
1458
+ (*static_cast<const T *>(functor))(i, j, k, l, m, n);
1459
+ }
1460
+
1461
+ template <class T>
1462
+ void call_wrapper_6d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1463
+ size_t l, size_t m, size_t range_n,
1464
+ size_t tile_n) {
1465
+ (*static_cast<const T *>(functor))(i, j, k, l, m, range_n, tile_n);
1466
+ }
1467
+
1468
+ template <class T>
1469
+ void call_wrapper_6d_tile_2d(void *functor, size_t i, size_t j, size_t k,
1470
+ size_t l, size_t range_m, size_t range_n,
1471
+ size_t tile_m, size_t tile_n) {
1472
+ (*static_cast<const T *>(functor))(i, j, k, l, range_m, range_n, tile_m,
1473
+ tile_n);
1474
+ }
1475
+
1476
+ } /* namespace */
1477
+ } /* namespace detail */
1478
+ } /* namespace libpthreadpool */
1479
+
1480
+ /**
1481
+ * Process items on a 1D grid.
1482
+ *
1483
+ * The function implements a parallel version of the following snippet:
1484
+ *
1485
+ * for (size_t i = 0; i < range; i++)
1486
+ * functor(i);
1487
+ *
1488
+ * When the function returns, all items have been processed and the thread pool
1489
+ * is ready for a new task.
1490
+ *
1491
+ * @note If multiple threads call this function with the same thread pool, the
1492
+ * calls are serialized.
1493
+ *
1494
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1495
+ * is NULL, all items are processed serially on the calling thread.
1496
+ * @param functor the functor to call for each item.
1497
+ * @param range the number of items on the 1D grid to process. The
1498
+ * specified functor will be called once for each item.
1499
+ * @param flags a bitwise combination of zero or more optional flags
1500
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1501
+ */
1502
+ template <class T>
1503
+ inline void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
1504
+ const T &functor, size_t range,
1505
+ uint32_t flags = 0) {
1506
+ pthreadpool_parallelize_1d(
1507
+ threadpool, &libpthreadpool::detail::call_wrapper_1d<const T>,
1508
+ const_cast<void *>(static_cast<const void *>(&functor)), range, flags);
1509
+ }
1510
+
1511
+ /**
1512
+ * Process items on a 1D grid with specified maximum tile size.
1513
+ *
1514
+ * The function implements a parallel version of the following snippet:
1515
+ *
1516
+ * for (size_t i = 0; i < range; i += tile)
1517
+ * functor(i, min(range - i, tile));
1518
+ *
1519
+ * When the call returns, all items have been processed and the thread pool is
1520
+ * ready for a new task.
1521
+ *
1522
+ * @note If multiple threads call this function with the same thread pool,
1523
+ * the calls are serialized.
1524
+ *
1525
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1526
+ * is NULL, all items are processed serially on the calling thread.
1527
+ * @param functor the functor to call for each tile.
1528
+ * @param range the number of items on the 1D grid to process.
1529
+ * @param tile the maximum number of items on the 1D grid to process in
1530
+ * one functor call.
1531
+ * @param flags a bitwise combination of zero or more optional flags
1532
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1533
+ */
1534
+ template <class T>
1535
+ inline void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
1536
+ const T &functor, size_t range,
1537
+ size_t tile,
1538
+ uint32_t flags = 0) {
1539
+ pthreadpool_parallelize_1d_tile_1d(
1540
+ threadpool, &libpthreadpool::detail::call_wrapper_1d_tile_1d<const T>,
1541
+ const_cast<void *>(static_cast<const void *>(&functor)), range, tile,
1542
+ flags);
1543
+ }
1544
+
1545
+ /**
1546
+ * Process items on a 2D grid.
1547
+ *
1548
+ * The function implements a parallel version of the following snippet:
1549
+ *
1550
+ * for (size_t i = 0; i < range_i; i++)
1551
+ * for (size_t j = 0; j < range_j; j++)
1552
+ * functor(i, j);
1553
+ *
1554
+ * When the function returns, all items have been processed and the thread pool
1555
+ * is ready for a new task.
1556
+ *
1557
+ * @note If multiple threads call this function with the same thread pool, the
1558
+ * calls are serialized.
1559
+ *
1560
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1561
+ * is NULL, all items are processed serially on the calling thread.
1562
+ * @param functor the functor to call for each item.
1563
+ * @param range_i the number of items to process along the first dimension
1564
+ * of the 2D grid.
1565
+ * @param range_j the number of items to process along the second dimension
1566
+ * of the 2D grid.
1567
+ * @param flags a bitwise combination of zero or more optional flags
1568
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1569
+ */
1570
+ template <class T>
1571
+ inline void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
1572
+ const T &functor, size_t range_i,
1573
+ size_t range_j, uint32_t flags = 0) {
1574
+ pthreadpool_parallelize_2d(
1575
+ threadpool, &libpthreadpool::detail::call_wrapper_2d<const T>,
1576
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1577
+ flags);
1578
+ }
1579
+
1580
+ /**
1581
+ * Process items on a 2D grid with the specified maximum tile size along the
1582
+ * last grid dimension.
1583
+ *
1584
+ * The function implements a parallel version of the following snippet:
1585
+ *
1586
+ * for (size_t i = 0; i < range_i; i++)
1587
+ * for (size_t j = 0; j < range_j; j += tile_j)
1588
+ * functor(i, j, min(range_j - j, tile_j));
1589
+ *
1590
+ * When the function returns, all items have been processed and the thread pool
1591
+ * is ready for a new task.
1592
+ *
1593
+ * @note If multiple threads call this function with the same thread pool, the
1594
+ * calls are serialized.
1595
+ *
1596
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1597
+ * is NULL, all items are processed serially on the calling thread.
1598
+ * @param functor the functor to call for each tile.
1599
+ * @param range_i the number of items to process along the first dimension
1600
+ * of the 2D grid.
1601
+ * @param range_j the number of items to process along the second dimension
1602
+ * of the 2D grid.
1603
+ * @param tile_j the maximum number of items along the second dimension of
1604
+ * the 2D grid to process in one functor call.
1605
+ * @param flags a bitwise combination of zero or more optional flags
1606
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1607
+ */
1608
+ template <class T>
1609
+ inline void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
1610
+ const T &functor, size_t range_i,
1611
+ size_t range_j, size_t tile_j,
1612
+ uint32_t flags = 0) {
1613
+ pthreadpool_parallelize_2d_tile_1d(
1614
+ threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_1d<const T>,
1615
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1616
+ tile_j, flags);
1617
+ }
1618
+
1619
+ /**
1620
+ * Process items on a 2D grid with the specified maximum tile size along each
1621
+ * grid dimension.
1622
+ *
1623
+ * The function implements a parallel version of the following snippet:
1624
+ *
1625
+ * for (size_t i = 0; i < range_i; i += tile_i)
1626
+ * for (size_t j = 0; j < range_j; j += tile_j)
1627
+ * functor(i, j,
1628
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
1629
+ *
1630
+ * When the function returns, all items have been processed and the thread pool
1631
+ * is ready for a new task.
1632
+ *
1633
+ * @note If multiple threads call this function with the same thread pool, the
1634
+ * calls are serialized.
1635
+ *
1636
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1637
+ * is NULL, all items are processed serially on the calling thread.
1638
+ * @param functor the functor to call for each tile.
1639
+ * @param range_i the number of items to process along the first dimension
1640
+ * of the 2D grid.
1641
+ * @param range_j the number of items to process along the second dimension
1642
+ * of the 2D grid.
1643
+ * @param tile_j the maximum number of items along the first dimension of
1644
+ * the 2D grid to process in one functor call.
1645
+ * @param tile_j the maximum number of items along the second dimension of
1646
+ * the 2D grid to process in one functor call.
1647
+ * @param flags a bitwise combination of zero or more optional flags
1648
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1649
+ */
1650
+ template <class T>
1651
+ inline void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
1652
+ const T &functor, size_t range_i,
1653
+ size_t range_j, size_t tile_i,
1654
+ size_t tile_j,
1655
+ uint32_t flags = 0) {
1656
+ pthreadpool_parallelize_2d_tile_2d(
1657
+ threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_2d<const T>,
1658
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1659
+ tile_i, tile_j, flags);
1660
+ }
1661
+
1662
+ /**
1663
+ * Process items on a 3D grid.
1664
+ *
1665
+ * The function implements a parallel version of the following snippet:
1666
+ *
1667
+ * for (size_t i = 0; i < range_i; i++)
1668
+ * for (size_t j = 0; j < range_j; j++)
1669
+ * for (size_t k = 0; k < range_k; k++)
1670
+ * functor(i, j, k);
1671
+ *
1672
+ * When the function returns, all items have been processed and the thread pool
1673
+ * is ready for a new task.
1674
+ *
1675
+ * @note If multiple threads call this function with the same thread pool, the
1676
+ * calls are serialized.
1677
+ *
1678
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1679
+ * is NULL, all items are processed serially on the calling thread.
1680
+ * @param functor the functor to call for each tile.
1681
+ * @param range_i the number of items to process along the first dimension
1682
+ * of the 3D grid.
1683
+ * @param range_j the number of items to process along the second dimension
1684
+ * of the 3D grid.
1685
+ * @param range_k the number of items to process along the third dimension
1686
+ * of the 3D grid.
1687
+ * @param flags a bitwise combination of zero or more optional flags
1688
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1689
+ */
1690
+ template <class T>
1691
+ inline void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
1692
+ const T &functor, size_t range_i,
1693
+ size_t range_j, size_t range_k,
1694
+ uint32_t flags = 0) {
1695
+ pthreadpool_parallelize_3d(
1696
+ threadpool, &libpthreadpool::detail::call_wrapper_3d<const T>,
1697
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1698
+ range_k, flags);
1699
+ }
1700
+
1701
+ /**
1702
+ * Process items on a 3D grid with the specified maximum tile size along the
1703
+ * last grid dimension.
1704
+ *
1705
+ * The function implements a parallel version of the following snippet:
1706
+ *
1707
+ * for (size_t i = 0; i < range_i; i++)
1708
+ * for (size_t j = 0; j < range_j; j++)
1709
+ * for (size_t k = 0; k < range_k; k += tile_k)
1710
+ * functor(i, j, k, min(range_k - k, tile_k));
1711
+ *
1712
+ * When the function returns, all items have been processed and the thread pool
1713
+ * is ready for a new task.
1714
+ *
1715
+ * @note If multiple threads call this function with the same thread pool, the
1716
+ * calls are serialized.
1717
+ *
1718
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1719
+ * is NULL, all items are processed serially on the calling thread.
1720
+ * @param functor the functor to call for each tile.
1721
+ * @param range_i the number of items to process along the first dimension
1722
+ * of the 3D grid.
1723
+ * @param range_j the number of items to process along the second dimension
1724
+ * of the 3D grid.
1725
+ * @param range_k the number of items to process along the third dimension
1726
+ * of the 3D grid.
1727
+ * @param tile_k the maximum number of items along the third dimension of
1728
+ * the 3D grid to process in one functor call.
1729
+ * @param flags a bitwise combination of zero or more optional flags
1730
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1731
+ */
1732
+ template <class T>
1733
+ inline void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
1734
+ const T &functor, size_t range_i,
1735
+ size_t range_j, size_t range_k,
1736
+ size_t tile_k,
1737
+ uint32_t flags = 0) {
1738
+ pthreadpool_parallelize_3d_tile_1d(
1739
+ threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_1d<const T>,
1740
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1741
+ range_k, tile_k, flags);
1742
+ }
1743
+
1744
+ /**
1745
+ * Process items on a 3D grid with the specified maximum tile size along the
1746
+ * last two grid dimensions.
1747
+ *
1748
+ * The function implements a parallel version of the following snippet:
1749
+ *
1750
+ * for (size_t i = 0; i < range_i; i++)
1751
+ * for (size_t j = 0; j < range_j; j += tile_j)
1752
+ * for (size_t k = 0; k < range_k; k += tile_k)
1753
+ * functor(i, j, k,
1754
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
1755
+ *
1756
+ * When the function returns, all items have been processed and the thread pool
1757
+ * is ready for a new task.
1758
+ *
1759
+ * @note If multiple threads call this function with the same thread pool, the
1760
+ * calls are serialized.
1761
+ *
1762
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1763
+ * is NULL, all items are processed serially on the calling thread.
1764
+ * @param functor the functor to call for each tile.
1765
+ * @param range_i the number of items to process along the first dimension
1766
+ * of the 3D grid.
1767
+ * @param range_j the number of items to process along the second dimension
1768
+ * of the 3D grid.
1769
+ * @param range_k the number of items to process along the third dimension
1770
+ * of the 3D grid.
1771
+ * @param tile_j the maximum number of items along the second dimension of
1772
+ * the 3D grid to process in one functor call.
1773
+ * @param tile_k the maximum number of items along the third dimension of
1774
+ * the 3D grid to process in one functor call.
1775
+ * @param flags a bitwise combination of zero or more optional flags
1776
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1777
+ */
1778
+ template <class T>
1779
+ inline void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
1780
+ const T &functor, size_t range_i,
1781
+ size_t range_j, size_t range_k,
1782
+ size_t tile_j, size_t tile_k,
1783
+ uint32_t flags = 0) {
1784
+ pthreadpool_parallelize_3d_tile_2d(
1785
+ threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_2d<const T>,
1786
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1787
+ range_k, tile_j, tile_k, flags);
1788
+ }
1789
+
1790
+ /**
1791
+ * Process items on a 4D grid.
1792
+ *
1793
+ * The function implements a parallel version of the following snippet:
1794
+ *
1795
+ * for (size_t i = 0; i < range_i; i++)
1796
+ * for (size_t j = 0; j < range_j; j++)
1797
+ * for (size_t k = 0; k < range_k; k++)
1798
+ * for (size_t l = 0; l < range_l; l++)
1799
+ * functor(i, j, k, l);
1800
+ *
1801
+ * When the function returns, all items have been processed and the thread pool
1802
+ * is ready for a new task.
1803
+ *
1804
+ * @note If multiple threads call this function with the same thread pool, the
1805
+ * calls are serialized.
1806
+ *
1807
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1808
+ * is NULL, all items are processed serially on the calling thread.
1809
+ * @param functor the functor to call for each tile.
1810
+ * @param range_i the number of items to process along the first dimension
1811
+ * of the 4D grid.
1812
+ * @param range_j the number of items to process along the second dimension
1813
+ * of the 4D grid.
1814
+ * @param range_k the number of items to process along the third dimension
1815
+ * of the 4D grid.
1816
+ * @param range_l the number of items to process along the fourth dimension
1817
+ * of the 4D grid.
1818
+ * @param flags a bitwise combination of zero or more optional flags
1819
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1820
+ */
1821
+ template <class T>
1822
+ inline void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
1823
+ const T &functor, size_t range_i,
1824
+ size_t range_j, size_t range_k,
1825
+ size_t range_l, uint32_t flags = 0) {
1826
+ pthreadpool_parallelize_4d(
1827
+ threadpool, &libpthreadpool::detail::call_wrapper_4d<const T>,
1828
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1829
+ range_k, range_l, flags);
1830
+ }
1831
+
1832
+ /**
1833
+ * Process items on a 4D grid with the specified maximum tile size along the
1834
+ * last grid dimension.
1835
+ *
1836
+ * The function implements a parallel version of the following snippet:
1837
+ *
1838
+ * for (size_t i = 0; i < range_i; i++)
1839
+ * for (size_t j = 0; j < range_j; j++)
1840
+ * for (size_t k = 0; k < range_k; k++)
1841
+ * for (size_t l = 0; l < range_l; l += tile_l)
1842
+ * functor(i, j, k, l, min(range_l - l, tile_l));
1843
+ *
1844
+ * When the function returns, all items have been processed and the thread pool
1845
+ * is ready for a new task.
1846
+ *
1847
+ * @note If multiple threads call this function with the same thread pool, the
1848
+ * calls are serialized.
1849
+ *
1850
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1851
+ * is NULL, all items are processed serially on the calling thread.
1852
+ * @param functor the functor to call for each tile.
1853
+ * @param range_i the number of items to process along the first dimension
1854
+ * of the 4D grid.
1855
+ * @param range_j the number of items to process along the second dimension
1856
+ * of the 4D grid.
1857
+ * @param range_k the number of items to process along the third dimension
1858
+ * of the 4D grid.
1859
+ * @param range_l the number of items to process along the fourth dimension
1860
+ * of the 4D grid.
1861
+ * @param tile_l the maximum number of items along the fourth dimension of
1862
+ * the 4D grid to process in one functor call.
1863
+ * @param flags a bitwise combination of zero or more optional flags
1864
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1865
+ */
1866
+ template <class T>
1867
+ inline void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
1868
+ const T &functor, size_t range_i,
1869
+ size_t range_j, size_t range_k,
1870
+ size_t range_l, size_t tile_l,
1871
+ uint32_t flags = 0) {
1872
+ pthreadpool_parallelize_4d_tile_1d(
1873
+ threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_1d<const T>,
1874
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1875
+ range_k, range_l, tile_l, flags);
1876
+ }
1877
+
1878
+ /**
1879
+ * Process items on a 4D grid with the specified maximum tile size along the
1880
+ * last two grid dimensions.
1881
+ *
1882
+ * The function implements a parallel version of the following snippet:
1883
+ *
1884
+ * for (size_t i = 0; i < range_i; i++)
1885
+ * for (size_t j = 0; j < range_j; j++)
1886
+ * for (size_t k = 0; k < range_k; k += tile_k)
1887
+ * for (size_t l = 0; l < range_l; l += tile_l)
1888
+ * functor(i, j, k, l,
1889
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
1890
+ *
1891
+ * When the function returns, all items have been processed and the thread pool
1892
+ * is ready for a new task.
1893
+ *
1894
+ * @note If multiple threads call this function with the same thread pool, the
1895
+ * calls are serialized.
1896
+ *
1897
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1898
+ * is NULL, all items are processed serially on the calling thread.
1899
+ * @param functor the functor to call for each tile.
1900
+ * @param range_i the number of items to process along the first dimension
1901
+ * of the 4D grid.
1902
+ * @param range_j the number of items to process along the second dimension
1903
+ * of the 4D grid.
1904
+ * @param range_k the number of items to process along the third dimension
1905
+ * of the 4D grid.
1906
+ * @param range_l the number of items to process along the fourth dimension
1907
+ * of the 4D grid.
1908
+ * @param tile_k the maximum number of items along the third dimension of
1909
+ * the 4D grid to process in one functor call.
1910
+ * @param tile_l the maximum number of items along the fourth dimension of
1911
+ * the 4D grid to process in one functor call.
1912
+ * @param flags a bitwise combination of zero or more optional flags
1913
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1914
+ */
1915
+ template <class T>
1916
+ inline void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
1917
+ const T &functor, size_t range_i,
1918
+ size_t range_j, size_t range_k,
1919
+ size_t range_l, size_t tile_k,
1920
+ size_t tile_l,
1921
+ uint32_t flags = 0) {
1922
+ pthreadpool_parallelize_4d_tile_2d(
1923
+ threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_2d<const T>,
1924
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1925
+ range_k, range_l, tile_k, tile_l, flags);
1926
+ }
1927
+
1928
+ /**
1929
+ * Process items on a 5D grid.
1930
+ *
1931
+ * The function implements a parallel version of the following snippet:
1932
+ *
1933
+ * for (size_t i = 0; i < range_i; i++)
1934
+ * for (size_t j = 0; j < range_j; j++)
1935
+ * for (size_t k = 0; k < range_k; k++)
1936
+ * for (size_t l = 0; l < range_l; l++)
1937
+ * for (size_t m = 0; m < range_m; m++)
1938
+ * functor(i, j, k, l, m);
1939
+ *
1940
+ * When the function returns, all items have been processed and the thread pool
1941
+ * is ready for a new task.
1942
+ *
1943
+ * @note If multiple threads call this function with the same thread pool, the
1944
+ * calls are serialized.
1945
+ *
1946
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1947
+ * is NULL, all items are processed serially on the calling thread.
1948
+ * @param functor the functor to call for each tile.
1949
+ * @param range_i the number of items to process along the first dimension
1950
+ * of the 5D grid.
1951
+ * @param range_j the number of items to process along the second dimension
1952
+ * of the 5D grid.
1953
+ * @param range_k the number of items to process along the third dimension
1954
+ * of the 5D grid.
1955
+ * @param range_l the number of items to process along the fourth dimension
1956
+ * of the 5D grid.
1957
+ * @param range_m the number of items to process along the fifth dimension
1958
+ * of the 5D grid.
1959
+ * @param flags a bitwise combination of zero or more optional flags
1960
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1961
+ */
1962
+ template <class T>
1963
+ inline void
1964
+ pthreadpool_parallelize_5d(pthreadpool_t threadpool, const T &functor,
1965
+ size_t range_i, size_t range_j, size_t range_k,
1966
+ size_t range_l, size_t range_m, uint32_t flags = 0) {
1967
+ pthreadpool_parallelize_5d(
1968
+ threadpool, &libpthreadpool::detail::call_wrapper_5d<const T>,
1969
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1970
+ range_k, range_l, range_m, flags);
1971
+ }
1972
+
1973
+ /**
1974
+ * Process items on a 5D grid with the specified maximum tile size along the
1975
+ * last grid dimension.
1976
+ *
1977
+ * The function implements a parallel version of the following snippet:
1978
+ *
1979
+ * for (size_t i = 0; i < range_i; i++)
1980
+ * for (size_t j = 0; j < range_j; j++)
1981
+ * for (size_t k = 0; k < range_k; k++)
1982
+ * for (size_t l = 0; l < range_l; l++)
1983
+ * for (size_t m = 0; m < range_m; m += tile_m)
1984
+ * functor(i, j, k, l, m, min(range_m - m, tile_m));
1985
+ *
1986
+ * When the function returns, all items have been processed and the thread pool
1987
+ * is ready for a new task.
1988
+ *
1989
+ * @note If multiple threads call this function with the same thread pool, the
1990
+ * calls are serialized.
1991
+ *
1992
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1993
+ * is NULL, all items are processed serially on the calling thread.
1994
+ * @param functor the functor to call for each tile.
1995
+ * @param range_i the number of items to process along the first dimension
1996
+ * of the 5D grid.
1997
+ * @param range_j the number of items to process along the second dimension
1998
+ * of the 5D grid.
1999
+ * @param range_k the number of items to process along the third dimension
2000
+ * of the 5D grid.
2001
+ * @param range_l the number of items to process along the fourth dimension
2002
+ * of the 5D grid.
2003
+ * @param range_m the number of items to process along the fifth dimension
2004
+ * of the 5D grid.
2005
+ * @param tile_m the maximum number of items along the fifth dimension of
2006
+ * the 5D grid to process in one functor call.
2007
+ * @param flags a bitwise combination of zero or more optional flags
2008
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2009
+ */
2010
+ template <class T>
2011
+ inline void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
2012
+ const T &functor, size_t range_i,
2013
+ size_t range_j, size_t range_k,
2014
+ size_t range_l, size_t range_m,
2015
+ size_t tile_m,
2016
+ uint32_t flags = 0) {
2017
+ pthreadpool_parallelize_5d_tile_1d(
2018
+ threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_1d<const T>,
2019
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2020
+ range_k, range_l, range_m, tile_m, flags);
2021
+ }
2022
+
2023
+ /**
2024
+ * Process items on a 5D grid with the specified maximum tile size along the
2025
+ * last two grid dimensions.
2026
+ *
2027
+ * The function implements a parallel version of the following snippet:
2028
+ *
2029
+ * for (size_t i = 0; i < range_i; i++)
2030
+ * for (size_t j = 0; j < range_j; j++)
2031
+ * for (size_t k = 0; k < range_k; k++)
2032
+ * for (size_t l = 0; l < range_l; l += tile_l)
2033
+ * for (size_t m = 0; m < range_m; m += tile_m)
2034
+ * functor(i, j, k, l, m,
2035
+ * min(range_l - l, tile_l), min(range_m - m, tile_m));
2036
+ *
2037
+ * When the function returns, all items have been processed and the thread pool
2038
+ * is ready for a new task.
2039
+ *
2040
+ * @note If multiple threads call this function with the same thread pool, the
2041
+ * calls are serialized.
2042
+ *
2043
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2044
+ * is NULL, all items are processed serially on the calling thread.
2045
+ * @param functor the functor to call for each tile.
2046
+ * @param range_i the number of items to process along the first dimension
2047
+ * of the 5D grid.
2048
+ * @param range_j the number of items to process along the second dimension
2049
+ * of the 5D grid.
2050
+ * @param range_k the number of items to process along the third dimension
2051
+ * of the 5D grid.
2052
+ * @param range_l the number of items to process along the fourth dimension
2053
+ * of the 5D grid.
2054
+ * @param range_m the number of items to process along the fifth dimension
2055
+ * of the 5D grid.
2056
+ * @param tile_l the maximum number of items along the fourth dimension of
2057
+ * the 5D grid to process in one functor call.
2058
+ * @param tile_m the maximum number of items along the fifth dimension of
2059
+ * the 5D grid to process in one functor call.
2060
+ * @param flags a bitwise combination of zero or more optional flags
2061
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2062
+ */
2063
+ template <class T>
2064
+ inline void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
2065
+ const T &functor, size_t range_i,
2066
+ size_t range_j, size_t range_k,
2067
+ size_t range_l, size_t range_m,
2068
+ size_t tile_l, size_t tile_m,
2069
+ uint32_t flags = 0) {
2070
+ pthreadpool_parallelize_5d_tile_2d(
2071
+ threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_2d<const T>,
2072
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2073
+ range_k, range_l, range_m, tile_l, tile_m, flags);
2074
+ }
2075
+
2076
+ /**
2077
+ * Process items on a 6D grid.
2078
+ *
2079
+ * The function implements a parallel version of the following snippet:
2080
+ *
2081
+ * for (size_t i = 0; i < range_i; i++)
2082
+ * for (size_t j = 0; j < range_j; j++)
2083
+ * for (size_t k = 0; k < range_k; k++)
2084
+ * for (size_t l = 0; l < range_l; l++)
2085
+ * for (size_t m = 0; m < range_m; m++)
2086
+ * for (size_t n = 0; n < range_n; n++)
2087
+ * functor(i, j, k, l, m, n);
2088
+ *
2089
+ * When the function returns, all items have been processed and the thread pool
2090
+ * is ready for a new task.
2091
+ *
2092
+ * @note If multiple threads call this function with the same thread pool, the
2093
+ * calls are serialized.
2094
+ *
2095
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2096
+ * is NULL, all items are processed serially on the calling thread.
2097
+ * @param functor the functor to call for each tile.
2098
+ * @param range_i the number of items to process along the first dimension
2099
+ * of the 6D grid.
2100
+ * @param range_j the number of items to process along the second dimension
2101
+ * of the 6D grid.
2102
+ * @param range_k the number of items to process along the third dimension
2103
+ * of the 6D grid.
2104
+ * @param range_l the number of items to process along the fourth dimension
2105
+ * of the 6D grid.
2106
+ * @param range_m the number of items to process along the fifth dimension
2107
+ * of the 6D grid.
2108
+ * @param range_n the number of items to process along the sixth dimension
2109
+ * of the 6D grid.
2110
+ * @param tile_n the maximum number of items along the sixth dimension of
2111
+ * the 6D grid to process in one functor call.
2112
+ * @param flags a bitwise combination of zero or more optional flags
2113
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2114
+ */
2115
+ template <class T>
2116
+ inline void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
2117
+ const T &functor, size_t range_i,
2118
+ size_t range_j, size_t range_k,
2119
+ size_t range_l, size_t range_m,
2120
+ size_t range_n, uint32_t flags = 0) {
2121
+ pthreadpool_parallelize_6d(
2122
+ threadpool, &libpthreadpool::detail::call_wrapper_6d<const T>,
2123
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2124
+ range_k, range_l, range_m, range_n, flags);
2125
+ }
2126
+
2127
+ /**
2128
+ * Process items on a 6D grid with the specified maximum tile size along the
2129
+ * last grid dimension.
2130
+ *
2131
+ * The function implements a parallel version of the following snippet:
2132
+ *
2133
+ * for (size_t i = 0; i < range_i; i++)
2134
+ * for (size_t j = 0; j < range_j; j++)
2135
+ * for (size_t k = 0; k < range_k; k++)
2136
+ * for (size_t l = 0; l < range_l; l++)
2137
+ * for (size_t m = 0; m < range_m; m++)
2138
+ * for (size_t n = 0; n < range_n; n += tile_n)
2139
+ * functor(i, j, k, l, m, n, min(range_n - n, tile_n));
2140
+ *
2141
+ * When the function returns, all items have been processed and the thread pool
2142
+ * is ready for a new task.
2143
+ *
2144
+ * @note If multiple threads call this function with the same thread pool, the
2145
+ * calls are serialized.
2146
+ *
2147
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2148
+ * is NULL, all items are processed serially on the calling thread.
2149
+ * @param functor the functor to call for each tile.
2150
+ * @param range_i the number of items to process along the first dimension
2151
+ * of the 6D grid.
2152
+ * @param range_j the number of items to process along the second dimension
2153
+ * of the 6D grid.
2154
+ * @param range_k the number of items to process along the third dimension
2155
+ * of the 6D grid.
2156
+ * @param range_l the number of items to process along the fourth dimension
2157
+ * of the 6D grid.
2158
+ * @param range_m the number of items to process along the fifth dimension
2159
+ * of the 6D grid.
2160
+ * @param range_n the number of items to process along the sixth dimension
2161
+ * of the 6D grid.
2162
+ * @param tile_n the maximum number of items along the sixth dimension of
2163
+ * the 6D grid to process in one functor call.
2164
+ * @param flags a bitwise combination of zero or more optional flags
2165
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2166
+ */
2167
+ template <class T>
2168
+ inline void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
2169
+ const T &functor, size_t range_i,
2170
+ size_t range_j, size_t range_k,
2171
+ size_t range_l, size_t range_m,
2172
+ size_t range_n, size_t tile_n,
2173
+ uint32_t flags = 0) {
2174
+ pthreadpool_parallelize_6d_tile_1d(
2175
+ threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_1d<const T>,
2176
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2177
+ range_k, range_l, range_m, range_n, tile_n, flags);
2178
+ }
2179
+
2180
+ /**
2181
+ * Process items on a 6D grid with the specified maximum tile size along the
2182
+ * last two grid dimensions.
2183
+ *
2184
+ * The function implements a parallel version of the following snippet:
2185
+ *
2186
+ * for (size_t i = 0; i < range_i; i++)
2187
+ * for (size_t j = 0; j < range_j; j++)
2188
+ * for (size_t k = 0; k < range_k; k++)
2189
+ * for (size_t l = 0; l < range_l; l++)
2190
+ * for (size_t m = 0; m < range_m; m += tile_m)
2191
+ * for (size_t n = 0; n < range_n; n += tile_n)
2192
+ * functor(i, j, k, l, m, n,
2193
+ * min(range_m - m, tile_m), min(range_n - n, tile_n));
2194
+ *
2195
+ * When the function returns, all items have been processed and the thread pool
2196
+ * is ready for a new task.
2197
+ *
2198
+ * @note If multiple threads call this function with the same thread pool, the
2199
+ * calls are serialized.
2200
+ *
2201
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2202
+ * is NULL, all items are processed serially on the calling thread.
2203
+ * @param functor the functor to call for each tile.
2204
+ * @param range_i the number of items to process along the first dimension
2205
+ * of the 6D grid.
2206
+ * @param range_j the number of items to process along the second dimension
2207
+ * of the 6D grid.
2208
+ * @param range_k the number of items to process along the third dimension
2209
+ * of the 6D grid.
2210
+ * @param range_l the number of items to process along the fourth dimension
2211
+ * of the 6D grid.
2212
+ * @param range_m the number of items to process along the fifth dimension
2213
+ * of the 6D grid.
2214
+ * @param range_n the number of items to process along the sixth dimension
2215
+ * of the 6D grid.
2216
+ * @param tile_m the maximum number of items along the fifth dimension of
2217
+ * the 6D grid to process in one functor call.
2218
+ * @param tile_n the maximum number of items along the sixth dimension of
2219
+ * the 6D grid to process in one functor call.
2220
+ * @param flags a bitwise combination of zero or more optional flags
2221
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2222
+ */
2223
+ template <class T>
2224
+ inline void pthreadpool_parallelize_6d_tile_2d(
2225
+ pthreadpool_t threadpool, const T &functor, size_t range_i, size_t range_j,
2226
+ size_t range_k, size_t range_l, size_t range_m, size_t range_n,
2227
+ size_t tile_m, size_t tile_n, uint32_t flags = 0) {
2228
+ pthreadpool_parallelize_6d_tile_2d(
2229
+ threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_2d<const T>,
2230
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2231
+ range_k, range_l, range_m, range_n, tile_m, tile_n, flags);
2232
+ }
2233
+
2234
+ #endif /* __cplusplus */
2235
+
2236
+ #endif /* PTHREADPOOL_H_ */