react-native-executorch 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (561) hide show
  1. package/android/CMakeLists.txt +24 -0
  2. package/android/build.gradle +1 -0
  3. package/android/src/main/cpp/CMakeLists.txt +27 -1
  4. package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +1 -13
  5. package/common/rnexecutorch/RnExecutorchInstaller.cpp +52 -18
  6. package/common/rnexecutorch/RnExecutorchInstaller.h +0 -25
  7. package/common/rnexecutorch/TokenizerModule.cpp +1 -1
  8. package/common/rnexecutorch/TokenizerModule.h +4 -1
  9. package/common/rnexecutorch/data_processing/FileUtils.h +2 -2
  10. package/common/rnexecutorch/data_processing/ImageProcessing.cpp +5 -5
  11. package/common/rnexecutorch/data_processing/ImageProcessing.h +2 -2
  12. package/common/rnexecutorch/data_processing/Numerical.cpp +40 -19
  13. package/common/rnexecutorch/data_processing/Numerical.h +53 -4
  14. package/common/rnexecutorch/data_processing/dsp.cpp +1 -1
  15. package/common/rnexecutorch/data_processing/dsp.h +1 -1
  16. package/common/rnexecutorch/data_processing/gzip.cpp +47 -0
  17. package/common/rnexecutorch/data_processing/gzip.h +7 -0
  18. package/common/rnexecutorch/host_objects/JsiConversions.h +43 -62
  19. package/common/rnexecutorch/host_objects/ModelHostObject.h +67 -24
  20. package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +8 -6
  21. package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +1 -1
  22. package/common/rnexecutorch/metaprogramming/TypeConcepts.h +21 -1
  23. package/common/rnexecutorch/models/BaseModel.cpp +5 -4
  24. package/common/rnexecutorch/models/BaseModel.h +8 -2
  25. package/common/rnexecutorch/models/classification/Classification.cpp +6 -6
  26. package/common/rnexecutorch/models/classification/Classification.h +5 -0
  27. package/common/rnexecutorch/models/classification/Constants.h +3 -3
  28. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +2 -2
  29. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +2 -2
  30. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +3 -3
  31. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +5 -0
  32. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +2 -2
  33. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +6 -1
  34. package/common/rnexecutorch/models/image_segmentation/Constants.h +3 -3
  35. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +6 -5
  36. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +8 -1
  37. package/common/rnexecutorch/models/llm/LLM.cpp +58 -0
  38. package/common/rnexecutorch/models/llm/LLM.h +35 -0
  39. package/common/rnexecutorch/models/object_detection/Constants.h +3 -3
  40. package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +8 -8
  41. package/common/rnexecutorch/models/object_detection/ObjectDetection.h +11 -5
  42. package/common/rnexecutorch/models/object_detection/Types.h +13 -0
  43. package/common/rnexecutorch/models/object_detection/Utils.cpp +13 -11
  44. package/common/rnexecutorch/models/object_detection/Utils.h +7 -13
  45. package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +2 -2
  46. package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +2 -2
  47. package/common/rnexecutorch/models/ocr/Constants.h +33 -26
  48. package/common/rnexecutorch/models/ocr/Detector.cpp +20 -22
  49. package/common/rnexecutorch/models/ocr/Detector.h +4 -4
  50. package/common/rnexecutorch/models/ocr/OCR.cpp +9 -8
  51. package/common/rnexecutorch/models/ocr/OCR.h +11 -3
  52. package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +20 -19
  53. package/common/rnexecutorch/models/ocr/RecognitionHandler.h +9 -7
  54. package/common/rnexecutorch/models/ocr/Recognizer.cpp +7 -7
  55. package/common/rnexecutorch/models/ocr/Recognizer.h +2 -2
  56. package/common/rnexecutorch/models/ocr/Types.h +4 -6
  57. package/common/rnexecutorch/models/ocr/{DetectorUtils.cpp → utils/DetectorUtils.cpp} +70 -63
  58. package/common/rnexecutorch/models/ocr/{DetectorUtils.h → utils/DetectorUtils.h} +12 -11
  59. package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.cpp → utils/RecognitionHandlerUtils.cpp} +14 -11
  60. package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.h → utils/RecognitionHandlerUtils.h} +5 -5
  61. package/common/rnexecutorch/models/ocr/{RecognizerUtils.cpp → utils/RecognizerUtils.cpp} +28 -26
  62. package/common/rnexecutorch/models/ocr/{RecognizerUtils.h → utils/RecognizerUtils.h} +15 -14
  63. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +102 -41
  64. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +48 -19
  65. package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +307 -0
  66. package/common/rnexecutorch/models/speech_to_text/asr/ASR.h +61 -0
  67. package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp +80 -0
  68. package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h +27 -0
  69. package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp +96 -0
  70. package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h +36 -0
  71. package/common/rnexecutorch/models/speech_to_text/types/DecodingOptions.h +15 -0
  72. package/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h +12 -0
  73. package/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h +12 -0
  74. package/common/rnexecutorch/models/speech_to_text/types/Segment.h +14 -0
  75. package/common/rnexecutorch/models/speech_to_text/types/Word.h +13 -0
  76. package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +5 -5
  77. package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +6 -0
  78. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +23 -22
  79. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +4 -4
  80. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +34 -34
  81. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +27 -20
  82. package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.cpp +3 -2
  83. package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.h +3 -2
  84. package/ios/libs/executorch/libbackend_coreml_ios.a +0 -0
  85. package/ios/libs/executorch/libbackend_coreml_simulator.a +0 -0
  86. package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64 → ios/libs/executorch}/libbackend_mps_ios.a +0 -0
  87. package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator → ios/libs/executorch}/libbackend_mps_simulator.a +0 -0
  88. package/ios/libs/executorch/libbackend_xnnpack_ios.a +0 -0
  89. package/ios/libs/executorch/libbackend_xnnpack_simulator.a +0 -0
  90. package/ios/libs/executorch/libexecutorch_ios.a +0 -0
  91. package/ios/libs/executorch/libexecutorch_simulator.a +0 -0
  92. package/ios/libs/executorch/libkernels_custom_ios.a +0 -0
  93. package/ios/libs/executorch/libkernels_custom_simulator.a +0 -0
  94. package/ios/libs/executorch/libkernels_optimized_ios.a +0 -0
  95. package/ios/libs/executorch/libkernels_optimized_simulator.a +0 -0
  96. package/ios/libs/executorch/libkernels_portable_ios.a +0 -0
  97. package/ios/libs/executorch/libkernels_portable_simulator.a +0 -0
  98. package/ios/libs/executorch/libkernels_quantized_ios.a +0 -0
  99. package/ios/libs/executorch/libkernels_quantized_simulator.a +0 -0
  100. package/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
  101. package/ios/{ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a} +0 -0
  102. package/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
  103. package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
  104. package/ios/{ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a} +0 -0
  105. package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
  106. package/lib/Error.js +9 -6
  107. package/lib/ThreadPool.d.ts +10 -0
  108. package/lib/ThreadPool.js +28 -0
  109. package/lib/constants/modelUrls.js +1 -1
  110. package/lib/controllers/OCRController.js +9 -14
  111. package/lib/controllers/VerticalOCRController.js +9 -14
  112. package/lib/hooks/computer_vision/useOCR.js +7 -8
  113. package/lib/hooks/computer_vision/useVerticalOCR.js +3 -5
  114. package/lib/index.d.ts +0 -2
  115. package/lib/index.js +1 -3
  116. package/lib/module/controllers/LLMController.js +6 -10
  117. package/lib/module/controllers/LLMController.js.map +1 -1
  118. package/lib/module/hooks/computer_vision/useClassification.js +2 -2
  119. package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
  120. package/lib/module/hooks/computer_vision/useImageEmbeddings.js +2 -2
  121. package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -1
  122. package/lib/module/hooks/computer_vision/useImageSegmentation.js +2 -2
  123. package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
  124. package/lib/module/hooks/computer_vision/useObjectDetection.js +2 -2
  125. package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
  126. package/lib/module/hooks/computer_vision/useStyleTransfer.js +2 -2
  127. package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
  128. package/lib/module/hooks/general/useExecutorchModule.js +2 -2
  129. package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
  130. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +2 -2
  131. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
  132. package/lib/module/hooks/useModule.js +13 -9
  133. package/lib/module/hooks/useModule.js.map +1 -1
  134. package/lib/module/index.js +1 -1
  135. package/lib/module/index.js.map +1 -1
  136. package/lib/module/modules/BaseModule.js +9 -17
  137. package/lib/module/modules/BaseModule.js.map +1 -1
  138. package/lib/module/modules/computer_vision/ClassificationModule.js +2 -2
  139. package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
  140. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +2 -2
  141. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -1
  142. package/lib/module/modules/computer_vision/ImageSegmentationModule.js +2 -2
  143. package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
  144. package/lib/module/modules/computer_vision/ObjectDetectionModule.js +2 -2
  145. package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
  146. package/lib/module/modules/computer_vision/StyleTransferModule.js +2 -2
  147. package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
  148. package/lib/module/modules/general/ExecutorchModule.js +2 -2
  149. package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
  150. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +75 -53
  151. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
  152. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +2 -2
  153. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
  154. package/lib/module/native/RnExecutorchModules.js +1 -2
  155. package/lib/module/native/RnExecutorchModules.js.map +1 -1
  156. package/lib/modules/computer_vision/OCRModule.d.ts +4 -5
  157. package/lib/modules/computer_vision/OCRModule.js +9 -12
  158. package/lib/modules/computer_vision/VerticalOCRModule.d.ts +4 -5
  159. package/lib/modules/computer_vision/VerticalOCRModule.js +9 -12
  160. package/lib/native/RnExecutorchModules.d.ts +5 -1
  161. package/lib/native/RnExecutorchModules.js +3 -1
  162. package/lib/tsconfig.tsbuildinfo +1 -0
  163. package/lib/types/common.d.ts +1 -0
  164. package/lib/typescript/controllers/LLMController.d.ts +1 -1
  165. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  166. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +5 -5
  167. package/lib/typescript/hooks/useModule.d.ts +8 -5
  168. package/lib/typescript/hooks/useModule.d.ts.map +1 -1
  169. package/lib/typescript/index.d.ts +1 -0
  170. package/lib/typescript/index.d.ts.map +1 -1
  171. package/lib/typescript/modules/BaseModule.d.ts +7 -6
  172. package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
  173. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +2 -2
  174. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
  175. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +2 -2
  176. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -1
  177. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +2 -2
  178. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
  179. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +2 -2
  180. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
  181. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +2 -2
  182. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
  183. package/lib/typescript/modules/general/ExecutorchModule.d.ts +2 -2
  184. package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
  185. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +7 -12
  186. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
  187. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +2 -2
  188. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
  189. package/lib/typescript/native/RnExecutorchModules.d.ts +1 -3
  190. package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
  191. package/lib/typescript/types/stt.d.ts +0 -9
  192. package/lib/typescript/types/stt.d.ts.map +1 -1
  193. package/lib/utils/ResourceFetcherUtils.js +0 -1
  194. package/lib/utils/llm.js +0 -1
  195. package/package.json +1 -2
  196. package/react-native-executorch.podspec +49 -44
  197. package/src/controllers/LLMController.ts +8 -13
  198. package/src/hooks/computer_vision/useClassification.ts +2 -2
  199. package/src/hooks/computer_vision/useImageEmbeddings.ts +2 -2
  200. package/src/hooks/computer_vision/useImageSegmentation.ts +2 -2
  201. package/src/hooks/computer_vision/useObjectDetection.ts +2 -2
  202. package/src/hooks/computer_vision/useStyleTransfer.ts +2 -2
  203. package/src/hooks/general/useExecutorchModule.ts +2 -2
  204. package/src/hooks/natural_language_processing/useTextEmbeddings.ts +2 -2
  205. package/src/hooks/useModule.ts +23 -13
  206. package/src/index.ts +3 -2
  207. package/src/modules/BaseModule.ts +17 -28
  208. package/src/modules/computer_vision/ClassificationModule.ts +2 -2
  209. package/src/modules/computer_vision/ImageEmbeddingsModule.ts +2 -2
  210. package/src/modules/computer_vision/ImageSegmentationModule.ts +2 -2
  211. package/src/modules/computer_vision/ObjectDetectionModule.ts +2 -2
  212. package/src/modules/computer_vision/StyleTransferModule.ts +2 -2
  213. package/src/modules/general/ExecutorchModule.ts +2 -2
  214. package/src/modules/natural_language_processing/SpeechToTextModule.ts +118 -54
  215. package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +2 -2
  216. package/src/native/RnExecutorchModules.ts +1 -5
  217. package/src/types/stt.ts +0 -12
  218. package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
  219. package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
  220. package/third-party/include/cpuinfo/cpuinfo.h +2305 -0
  221. package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +26 -0
  222. package/third-party/include/executorch/extension/threadpool/threadpool.h +94 -0
  223. package/third-party/include/pthreadpool/pthreadpool.h +2236 -0
  224. package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +0 -63
  225. package/common/rnexecutorch/models/EncoderDecoderBase.cpp +0 -21
  226. package/common/rnexecutorch/models/EncoderDecoderBase.h +0 -31
  227. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +0 -27
  228. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +0 -50
  229. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +0 -25
  230. package/ios/ExecutorchLib.xcframework/Info.plist +0 -43
  231. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/ETModel.h +0 -27
  232. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
  233. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
  234. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  235. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/ETModel.h +0 -27
  236. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
  237. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
  238. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  239. package/ios/RnExecutorch/LLM.h +0 -5
  240. package/ios/RnExecutorch/LLM.mm +0 -78
  241. package/lib/Error.d.ts +0 -30
  242. package/lib/constants/directories.d.ts +0 -1
  243. package/lib/constants/ocr/symbols.d.ts +0 -75
  244. package/lib/controllers/OCRController.d.ts +0 -23
  245. package/lib/controllers/VerticalOCRController.d.ts +0 -25
  246. package/lib/hooks/useModule.d.ts +0 -17
  247. package/lib/module/modules/BaseNonStaticModule.js +0 -17
  248. package/lib/module/modules/BaseNonStaticModule.js.map +0 -1
  249. package/lib/module/native/NativeLLM.js +0 -5
  250. package/lib/module/native/NativeLLM.js.map +0 -1
  251. package/lib/module/utils/SpeechToTextModule/ASR.js +0 -191
  252. package/lib/module/utils/SpeechToTextModule/ASR.js.map +0 -1
  253. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js +0 -73
  254. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js.map +0 -1
  255. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js +0 -56
  256. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js.map +0 -1
  257. package/lib/module/utils/stt.js +0 -22
  258. package/lib/module/utils/stt.js.map +0 -1
  259. package/lib/modules/BaseModule.d.ts +0 -8
  260. package/lib/modules/BaseNonStaticModule.d.ts +0 -9
  261. package/lib/native/NativeETInstaller.d.ts +0 -6
  262. package/lib/native/NativeOCR.d.ts +0 -8
  263. package/lib/native/NativeVerticalOCR.d.ts +0 -8
  264. package/lib/types/imageSegmentation.d.ts +0 -24
  265. package/lib/types/objectDetection.d.ts +0 -104
  266. package/lib/types/ocr.d.ts +0 -11
  267. package/lib/typescript/modules/BaseNonStaticModule.d.ts +0 -10
  268. package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +0 -1
  269. package/lib/typescript/native/NativeLLM.d.ts +0 -12
  270. package/lib/typescript/native/NativeLLM.d.ts.map +0 -1
  271. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts +0 -27
  272. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts.map +0 -1
  273. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts +0 -23
  274. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts.map +0 -1
  275. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts +0 -13
  276. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts.map +0 -1
  277. package/lib/typescript/utils/stt.d.ts +0 -2
  278. package/lib/typescript/utils/stt.d.ts.map +0 -1
  279. package/lib/utils/stt.d.ts +0 -1
  280. package/src/modules/BaseNonStaticModule.ts +0 -26
  281. package/src/native/NativeLLM.ts +0 -14
  282. package/src/utils/SpeechToTextModule/ASR.ts +0 -303
  283. package/src/utils/SpeechToTextModule/OnlineProcessor.ts +0 -87
  284. package/src/utils/SpeechToTextModule/hypothesisBuffer.ts +0 -79
  285. package/src/utils/stt.ts +0 -28
  286. package/third-party/include/tokenizers-cpp/tokenizers_c.h +0 -61
  287. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +0 -27
  288. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +0 -249
  289. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +0 -14
  290. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +0 -80
  291. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +0 -32
  292. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +0 -95
  293. package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +0 -12
  294. package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +0 -217
  295. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +0 -11
  296. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +0 -11
  297. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +0 -202
  298. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +0 -313
  299. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +0 -57
  300. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +0 -78
  301. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +0 -23
  302. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +0 -427
  303. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +0 -87
  304. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +0 -76
  305. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +0 -683
  306. package/third-party/ios/ExecutorchLib/build.sh +0 -44
  307. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +0 -43
  308. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
  309. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
  310. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +0 -43
  311. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +0 -43
  312. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
  313. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
  314. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +0 -47
  315. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +0 -163
  316. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +0 -497
  317. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +0 -342
  318. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +0 -266
  319. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +0 -125
  320. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +0 -347
  321. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +0 -416
  322. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +0 -133
  323. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +0 -43
  324. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +0 -33
  325. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +0 -107
  326. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +0 -13
  327. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +0 -16
  328. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +0 -76
  329. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +0 -286
  330. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +0 -742
  331. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +0 -219
  332. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +0 -492
  333. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +0 -13
  334. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
  335. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
  336. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
  337. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
  338. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
  339. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +0 -166
  340. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +0 -235
  341. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +0 -136
  342. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +0 -20
  343. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +0 -229
  344. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +0 -521
  345. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +0 -565
  346. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
  347. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
  348. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
  349. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
  350. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
  351. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
  352. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
  353. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
  354. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
  355. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
  356. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +0 -198
  357. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +0 -86
  358. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
  359. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
  360. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
  361. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
  362. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
  363. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
  364. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
  365. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
  366. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
  367. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
  368. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
  369. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
  370. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
  371. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
  372. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
  373. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +0 -70
  374. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +0 -27
  375. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
  376. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
  377. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
  378. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
  379. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
  380. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
  381. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
  382. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
  383. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +0 -258
  384. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +0 -93
  385. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +0 -71
  386. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +0 -79
  387. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
  388. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +0 -113
  389. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +0 -387
  390. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +0 -251
  391. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +0 -320
  392. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
  393. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
  394. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
  395. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
  396. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +0 -36
  397. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +0 -119
  398. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +0 -43
  399. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
  400. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +0 -191
  401. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +0 -177
  402. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +0 -133
  403. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +0 -292
  404. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +0 -35
  405. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +0 -49
  406. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +0 -24
  407. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +0 -76
  408. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +0 -5
  409. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
  410. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +0 -163
  411. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +0 -497
  412. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +0 -342
  413. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +0 -266
  414. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +0 -125
  415. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +0 -347
  416. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +0 -416
  417. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +0 -133
  418. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +0 -43
  419. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +0 -33
  420. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +0 -107
  421. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +0 -13
  422. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +0 -16
  423. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +0 -76
  424. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +0 -286
  425. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +0 -742
  426. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +0 -219
  427. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +0 -492
  428. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +0 -13
  429. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
  430. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
  431. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
  432. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
  433. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
  434. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +0 -166
  435. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +0 -235
  436. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +0 -136
  437. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +0 -20
  438. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +0 -229
  439. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +0 -521
  440. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +0 -565
  441. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
  442. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
  443. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
  444. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
  445. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
  446. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
  447. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
  448. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
  449. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
  450. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
  451. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +0 -198
  452. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +0 -86
  453. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
  454. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
  455. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
  456. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
  457. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
  458. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
  459. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
  460. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
  461. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
  462. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
  463. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
  464. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
  465. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
  466. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
  467. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
  468. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +0 -70
  469. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +0 -27
  470. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
  471. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
  472. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
  473. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
  474. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
  475. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
  476. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
  477. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
  478. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +0 -258
  479. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +0 -93
  480. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +0 -71
  481. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +0 -79
  482. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
  483. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +0 -113
  484. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +0 -387
  485. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +0 -251
  486. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +0 -320
  487. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
  488. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
  489. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
  490. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
  491. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +0 -36
  492. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +0 -119
  493. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +0 -43
  494. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
  495. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +0 -191
  496. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +0 -177
  497. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +0 -133
  498. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +0 -292
  499. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +0 -35
  500. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +0 -49
  501. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +0 -24
  502. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +0 -76
  503. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +0 -5
  504. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
  505. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +0 -43
  506. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
  507. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
  508. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +0 -43
  509. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
  510. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
  511. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +0 -43
  512. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
  513. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
  514. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +0 -43
  515. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
  516. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
  517. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +0 -43
  518. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +0 -82
  519. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +0 -111
  520. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +0 -43
  521. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +0 -130
  522. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +0 -139
  523. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +0 -483
  524. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +0 -994
  525. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +0 -692
  526. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +0 -85
  527. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +0 -367
  528. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +0 -241
  529. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +0 -205
  530. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +0 -78
  531. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +0 -64
  532. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +0 -235
  533. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +0 -26
  534. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
  535. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +0 -82
  536. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +0 -111
  537. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +0 -43
  538. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +0 -130
  539. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +0 -139
  540. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +0 -483
  541. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +0 -994
  542. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +0 -692
  543. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +0 -85
  544. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +0 -367
  545. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +0 -241
  546. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +0 -205
  547. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +0 -78
  548. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +0 -64
  549. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +0 -235
  550. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +0 -26
  551. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
  552. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/irunner.h +0 -0
  553. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.cpp +0 -0
  554. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.h +0 -0
  555. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/stats.h +0 -0
  556. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.cpp +0 -0
  557. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.h +0 -0
  558. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.cpp +0 -0
  559. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.h +0 -0
  560. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_token_generator.h +0 -0
  561. /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/util.h +0 -0
@@ -0,0 +1,2236 @@
1
+ #ifndef PTHREADPOOL_H_
2
+ #define PTHREADPOOL_H_
3
+
4
+ #include <stddef.h>
5
+ #include <stdint.h>
6
+
7
+ typedef struct pthreadpool *pthreadpool_t;
8
+
9
+ typedef void (*pthreadpool_task_1d_t)(void *, size_t);
10
+ typedef void (*pthreadpool_task_1d_with_thread_t)(void *, size_t, size_t);
11
+ typedef void (*pthreadpool_task_1d_tile_1d_t)(void *, size_t, size_t);
12
+ typedef void (*pthreadpool_task_2d_t)(void *, size_t, size_t);
13
+ typedef void (*pthreadpool_task_2d_with_thread_t)(void *, size_t, size_t,
14
+ size_t);
15
+ typedef void (*pthreadpool_task_2d_tile_1d_t)(void *, size_t, size_t, size_t);
16
+ typedef void (*pthreadpool_task_2d_tile_2d_t)(void *, size_t, size_t, size_t,
17
+ size_t);
18
+ typedef void (*pthreadpool_task_3d_t)(void *, size_t, size_t, size_t);
19
+ typedef void (*pthreadpool_task_3d_tile_1d_t)(void *, size_t, size_t, size_t,
20
+ size_t);
21
+ typedef void (*pthreadpool_task_3d_tile_1d_with_thread_t)(void *, size_t,
22
+ size_t, size_t,
23
+ size_t, size_t);
24
+ typedef void (*pthreadpool_task_3d_tile_2d_t)(void *, size_t, size_t, size_t,
25
+ size_t, size_t);
26
+ typedef void (*pthreadpool_task_4d_t)(void *, size_t, size_t, size_t, size_t);
27
+ typedef void (*pthreadpool_task_4d_tile_1d_t)(void *, size_t, size_t, size_t,
28
+ size_t, size_t);
29
+ typedef void (*pthreadpool_task_4d_tile_2d_t)(void *, size_t, size_t, size_t,
30
+ size_t, size_t, size_t);
31
+ typedef void (*pthreadpool_task_5d_t)(void *, size_t, size_t, size_t, size_t,
32
+ size_t);
33
+ typedef void (*pthreadpool_task_5d_tile_1d_t)(void *, size_t, size_t, size_t,
34
+ size_t, size_t, size_t);
35
+ typedef void (*pthreadpool_task_5d_tile_2d_t)(void *, size_t, size_t, size_t,
36
+ size_t, size_t, size_t, size_t);
37
+ typedef void (*pthreadpool_task_6d_t)(void *, size_t, size_t, size_t, size_t,
38
+ size_t, size_t);
39
+ typedef void (*pthreadpool_task_6d_tile_1d_t)(void *, size_t, size_t, size_t,
40
+ size_t, size_t, size_t, size_t);
41
+ typedef void (*pthreadpool_task_6d_tile_2d_t)(void *, size_t, size_t, size_t,
42
+ size_t, size_t, size_t, size_t,
43
+ size_t);
44
+
45
+ typedef void (*pthreadpool_task_1d_with_id_t)(void *, uint32_t, size_t);
46
+ typedef void (*pthreadpool_task_2d_tile_1d_with_id_t)(void *, uint32_t, size_t,
47
+ size_t, size_t);
48
+ typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void *, uint32_t, size_t,
49
+ size_t, size_t, size_t);
50
+ typedef void (*pthreadpool_task_3d_tile_1d_with_id_t)(void *, uint32_t, size_t,
51
+ size_t, size_t, size_t);
52
+ typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void *, uint32_t, size_t,
53
+ size_t, size_t, size_t,
54
+ size_t);
55
+ typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void *, uint32_t, size_t,
56
+ size_t, size_t, size_t,
57
+ size_t, size_t);
58
+
59
+ typedef void (*pthreadpool_task_2d_tile_1d_with_id_with_thread_t)(
60
+ void *, uint32_t, size_t, size_t, size_t, size_t);
61
+ typedef void (*pthreadpool_task_3d_tile_1d_with_id_with_thread_t)(
62
+ void *, uint32_t, size_t, size_t, size_t, size_t, size_t);
63
+
64
+ /**
65
+ * Disable support for denormalized numbers to the maximum extent possible for
66
+ * the duration of the computation.
67
+ *
68
+ * Handling denormalized floating-point numbers is often implemented in
69
+ * microcode, and incurs significant performance degradation. This hint
70
+ * instructs the thread pool to disable support for denormalized numbers before
71
+ * running the computation by manipulating architecture-specific control
72
+ * registers, and restore the initial value of control registers after the
73
+ * computation is complete. The thread pool temporary disables denormalized
74
+ * numbers on all threads involved in the computation (i.e. the caller threads,
75
+ * and potentially worker threads).
76
+ *
77
+ * Disabling denormalized numbers may have a small negative effect on results'
78
+ * accuracy. As various architectures differ in capabilities to control
79
+ * processing of denormalized numbers, using this flag may also hurt results'
80
+ * reproducibility across different instruction set architectures.
81
+ */
82
+ #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
83
+
84
+ /**
85
+ * Yield worker threads to the system scheduler after the operation is finished.
86
+ *
87
+ * Force workers to use kernel wait (instead of active spin-wait by default) for
88
+ * new commands after this command is processed. This flag affects only the
89
+ * immediate next operation on this thread pool. To make the thread pool always
90
+ * use kernel wait, pass this flag to all parallelization functions.
91
+ */
92
+ #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
93
+
94
+ #ifdef __cplusplus
95
+ extern "C" {
96
+ #endif
97
+
98
+ /**
99
+ * Create a thread pool with the specified number of threads.
100
+ *
101
+ * @param threads_count the number of threads in the thread pool.
102
+ * A value of 0 has special interpretation: it creates a thread pool with as
103
+ * many threads as there are logical processors in the system.
104
+ *
105
+ * @returns A pointer to an opaque thread pool object if the call is
106
+ * successful, or NULL pointer if the call failed.
107
+ */
108
+ pthreadpool_t pthreadpool_create(size_t threads_count);
109
+
110
+ /**
111
+ * Query the number of threads in a thread pool.
112
+ *
113
+ * @param threadpool the thread pool to query.
114
+ *
115
+ * @returns The number of threads in the thread pool.
116
+ */
117
+ size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
118
+
119
+ /**
120
+ * Process items on a 1D grid.
121
+ *
122
+ * The function implements a parallel version of the following snippet:
123
+ *
124
+ * for (size_t i = 0; i < range; i++)
125
+ * function(context, i);
126
+ *
127
+ * When the function returns, all items have been processed and the thread pool
128
+ * is ready for a new task.
129
+ *
130
+ * @note If multiple threads call this function with the same thread pool, the
131
+ * calls are serialized.
132
+ *
133
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
134
+ * is NULL, all items are processed serially on the calling thread.
135
+ * @param function the function to call for each item.
136
+ * @param context the first argument passed to the specified function.
137
+ * @param range the number of items on the 1D grid to process. The
138
+ * specified function will be called once for each item.
139
+ * @param flags a bitwise combination of zero or more optional flags
140
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
141
+ */
142
+ void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
143
+ pthreadpool_task_1d_t function, void *context,
144
+ size_t range, uint32_t flags);
145
+
146
+ /**
147
+ * Process items on a 1D grid passing along the current thread id.
148
+ *
149
+ * The function implements a parallel version of the following snippet:
150
+ *
151
+ * for (size_t i = 0; i < range; i++)
152
+ * function(context, thread_index, i);
153
+ *
154
+ * When the function returns, all items have been processed and the thread pool
155
+ * is ready for a new task.
156
+ *
157
+ * @note If multiple threads call this function with the same thread pool, the
158
+ * calls are serialized.
159
+ *
160
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
161
+ * is NULL, all items are processed serially on the calling thread.
162
+ * @param function the function to call for each item.
163
+ * @param context the first argument passed to the specified function.
164
+ * @param range the number of items on the 1D grid to process. The
165
+ * specified function will be called once for each item.
166
+ * @param flags a bitwise combination of zero or more optional flags
167
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
168
+ */
169
+ void pthreadpool_parallelize_1d_with_thread(
170
+ pthreadpool_t threadpool, pthreadpool_task_1d_with_thread_t function,
171
+ void *context, size_t range, uint32_t flags);
172
+
173
+ /**
174
+ * Process items on a 1D grid using a microarchitecture-aware task function.
175
+ *
176
+ * The function implements a parallel version of the following snippet:
177
+ *
178
+ * uint32_t uarch_index = cpuinfo_initialize() ?
179
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
180
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
181
+ * for (size_t i = 0; i < range; i++)
182
+ * function(context, uarch_index, i);
183
+ *
184
+ * When the function returns, all items have been processed and the thread pool
185
+ * is ready for a new task.
186
+ *
187
+ * @note If multiple threads call this function with the same thread pool, the
188
+ * calls are serialized.
189
+ *
190
+ * @param threadpool the thread pool to use for parallelisation. If
191
+ * threadpool is NULL, all items are processed serially on the calling
192
+ * thread.
193
+ * @param function the function to call for each item.
194
+ * @param context the first argument passed to the specified
195
+ * function.
196
+ * @param default_uarch_index the microarchitecture index to use when
197
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
198
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
199
+ * max_uarch_index value.
200
+ * @param max_uarch_index the maximum microarchitecture index expected by
201
+ * the specified function. If the index returned by
202
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
203
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
204
+ * @param range the number of items on the 1D grid to process.
205
+ * The specified function will be called once for each item.
206
+ * @param flags a bitwise combination of zero or more optional
207
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
208
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
209
+ */
210
+ void pthreadpool_parallelize_1d_with_uarch(
211
+ pthreadpool_t threadpool, pthreadpool_task_1d_with_id_t function,
212
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
213
+ size_t range, uint32_t flags);
214
+
215
+ /**
216
+ * Process items on a 1D grid with specified maximum tile size.
217
+ *
218
+ * The function implements a parallel version of the following snippet:
219
+ *
220
+ * for (size_t i = 0; i < range; i += tile)
221
+ * function(context, i, min(range - i, tile));
222
+ *
223
+ * When the call returns, all items have been processed and the thread pool is
224
+ * ready for a new task.
225
+ *
226
+ * @note If multiple threads call this function with the same thread pool,
227
+ * the calls are serialized.
228
+ *
229
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
230
+ * is NULL, all items are processed serially on the calling thread.
231
+ * @param function the function to call for each tile.
232
+ * @param context the first argument passed to the specified function.
233
+ * @param range the number of items on the 1D grid to process.
234
+ * @param tile the maximum number of items on the 1D grid to process in
235
+ * one function call.
236
+ * @param flags a bitwise combination of zero or more optional flags
237
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
238
+ */
239
+ void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
240
+ pthreadpool_task_1d_tile_1d_t function,
241
+ void *context, size_t range,
242
+ size_t tile, uint32_t flags);
243
+
244
+ /**
245
+ * Process items on a 2D grid.
246
+ *
247
+ * The function implements a parallel version of the following snippet:
248
+ *
249
+ * for (size_t i = 0; i < range_i; i++)
250
+ * for (size_t j = 0; j < range_j; j++)
251
+ * function(context, i, j);
252
+ *
253
+ * When the function returns, all items have been processed and the thread pool
254
+ * is ready for a new task.
255
+ *
256
+ * @note If multiple threads call this function with the same thread pool, the
257
+ * calls are serialized.
258
+ *
259
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
260
+ * is NULL, all items are processed serially on the calling thread.
261
+ * @param function the function to call for each item.
262
+ * @param context the first argument passed to the specified function.
263
+ * @param range_i the number of items to process along the first dimension
264
+ * of the 2D grid.
265
+ * @param range_j the number of items to process along the second dimension
266
+ * of the 2D grid.
267
+ * @param flags a bitwise combination of zero or more optional flags
268
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
269
+ */
270
+ void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
271
+ pthreadpool_task_2d_t function, void *context,
272
+ size_t range_i, size_t range_j, uint32_t flags);
273
+
274
+ /**
275
+ * Process items on a 2D grid passing along the current thread id.
276
+ *
277
+ * The function implements a parallel version of the following snippet:
278
+ *
279
+ * for (size_t i = 0; i < range_i; i++)
280
+ * for (size_t j = 0; j < range_j; j++)
281
+ * function(context, thread_index, i, j);
282
+ *
283
+ * When the function returns, all items have been processed and the thread pool
284
+ * is ready for a new task.
285
+ *
286
+ * @note If multiple threads call this function with the same thread pool, the
287
+ * calls are serialized.
288
+ *
289
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
290
+ * is NULL, all items are processed serially on the calling thread.
291
+ * @param function the function to call for each item.
292
+ * @param context the first argument passed to the specified function.
293
+ * @param range_i the number of items to process along the first dimension
294
+ * of the 2D grid.
295
+ * @param range_j the number of items to process along the second dimension
296
+ * of the 2D grid.
297
+ * @param flags a bitwise combination of zero or more optional flags
298
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
299
+ */
300
+ void pthreadpool_parallelize_2d_with_thread(
301
+ pthreadpool_t threadpool, pthreadpool_task_2d_with_thread_t function,
302
+ void *context, size_t range_i, size_t range_j, uint32_t flags);
303
+
304
+ /**
305
+ * Process items on a 2D grid with the specified maximum tile size along the
306
+ * last grid dimension.
307
+ *
308
+ * The function implements a parallel version of the following snippet:
309
+ *
310
+ * for (size_t i = 0; i < range_i; i++)
311
+ * for (size_t j = 0; j < range_j; j += tile_j)
312
+ * function(context, i, j, min(range_j - j, tile_j));
313
+ *
314
+ * When the function returns, all items have been processed and the thread pool
315
+ * is ready for a new task.
316
+ *
317
+ * @note If multiple threads call this function with the same thread pool, the
318
+ * calls are serialized.
319
+ *
320
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
321
+ * is NULL, all items are processed serially on the calling thread.
322
+ * @param function the function to call for each tile.
323
+ * @param context the first argument passed to the specified function.
324
+ * @param range_i the number of items to process along the first dimension
325
+ * of the 2D grid.
326
+ * @param range_j the number of items to process along the second dimension
327
+ * of the 2D grid.
328
+ * @param tile_j the maximum number of items along the second dimension of
329
+ * the 2D grid to process in one function call.
330
+ * @param flags a bitwise combination of zero or more optional flags
331
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
332
+ */
333
+ void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
334
+ pthreadpool_task_2d_tile_1d_t function,
335
+ void *context, size_t range_i,
336
+ size_t range_j, size_t tile_j,
337
+ uint32_t flags);
338
+
339
+ /**
340
+ * Process items on a 2D grid with the specified maximum tile size along the
341
+ * last grid dimension using a microarchitecture-aware task function.
342
+ *
343
+ * The function implements a parallel version of the following snippet:
344
+ *
345
+ * uint32_t uarch_index = cpuinfo_initialize() ?
346
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
347
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
348
+ * for (size_t i = 0; i < range_i; i++)
349
+ * for (size_t j = 0; j < range_j; j += tile_j)
350
+ * function(context, uarch_index, i, j, min(range_j - j, tile_j));
351
+ *
352
+ * When the function returns, all items have been processed and the thread pool
353
+ * is ready for a new task.
354
+ *
355
+ * @note If multiple threads call this function with the same thread pool, the
356
+ * calls are serialized.
357
+ *
358
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
359
+ * is NULL, all items are processed serially on the calling thread.
360
+ * @param function the function to call for each tile.
361
+ * @param context the first argument passed to the specified function.
362
+ * @param default_uarch_index the microarchitecture index to use when
363
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
364
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
365
+ * max_uarch_index value.
366
+ * @param max_uarch_index the maximum microarchitecture index expected by
367
+ * the specified function. If the index returned by
368
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
369
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
370
+ * @param range_i the number of items to process along the first dimension
371
+ * of the 2D grid.
372
+ * @param range_j the number of items to process along the second dimension
373
+ * of the 2D grid.
374
+ * @param tile_j the maximum number of items along the second dimension of
375
+ * the 2D grid to process in one function call.
376
+ * @param flags a bitwise combination of zero or more optional flags
377
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
378
+ */
379
+ void pthreadpool_parallelize_2d_tile_1d_with_uarch(
380
+ pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_with_id_t function,
381
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
382
+ size_t range_i, size_t range_j, size_t tile_j, uint32_t flags);
383
+
384
+ /**
385
+ * Process items on a 2D grid with the specified maximum tile size along the
386
+ * last grid dimension using a microarchitecture-aware task function and passing
387
+ * along the current thread id.
388
+ *
389
+ * The function implements a parallel version of the following snippet:
390
+ *
391
+ * uint32_t uarch_index = cpuinfo_initialize() ?
392
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
393
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
394
+ * for (size_t i = 0; i < range_i; i++)
395
+ * for (size_t j = 0; j < range_j; j += tile_j)
396
+ * function(context, uarch_index, thread_index, i, j, min(range_j - j,
397
+ * tile_j));
398
+ *
399
+ * When the function returns, all items have been processed and the thread pool
400
+ * is ready for a new task.
401
+ *
402
+ * @note If multiple threads call this function with the same thread pool, the
403
+ * calls are serialized.
404
+ *
405
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
406
+ * is NULL, all items are processed serially on the calling thread.
407
+ * @param function the function to call for each tile.
408
+ * @param context the first argument passed to the specified function.
409
+ * @param default_uarch_index the microarchitecture index to use when
410
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
411
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
412
+ * max_uarch_index value.
413
+ * @param max_uarch_index the maximum microarchitecture index expected by
414
+ * the specified function. If the index returned by
415
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
416
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
417
+ * @param range_i the number of items to process along the first dimension
418
+ * of the 2D grid.
419
+ * @param range_j the number of items to process along the second dimension
420
+ * of the 2D grid.
421
+ * @param tile_j the maximum number of items along the second dimension of
422
+ * the 2D grid to process in one function call.
423
+ * @param flags a bitwise combination of zero or more optional flags
424
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
425
+ */
426
+ void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread(
427
+ pthreadpool_t threadpool,
428
+ pthreadpool_task_2d_tile_1d_with_id_with_thread_t function, void *context,
429
+ uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
430
+ size_t range_j, size_t tile_j, uint32_t flags);
431
+
432
+ /**
433
+ * Process items on a 2D grid with the specified maximum tile size along each
434
+ * grid dimension.
435
+ *
436
+ * The function implements a parallel version of the following snippet:
437
+ *
438
+ * for (size_t i = 0; i < range_i; i += tile_i)
439
+ * for (size_t j = 0; j < range_j; j += tile_j)
440
+ * function(context, i, j,
441
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
442
+ *
443
+ * When the function returns, all items have been processed and the thread pool
444
+ * is ready for a new task.
445
+ *
446
+ * @note If multiple threads call this function with the same thread pool, the
447
+ * calls are serialized.
448
+ *
449
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
450
+ * is NULL, all items are processed serially on the calling thread.
451
+ * @param function the function to call for each tile.
452
+ * @param context the first argument passed to the specified function.
453
+ * @param range_i the number of items to process along the first dimension
454
+ * of the 2D grid.
455
+ * @param range_j the number of items to process along the second dimension
456
+ * of the 2D grid.
457
+ * @param tile_j the maximum number of items along the first dimension of
458
+ * the 2D grid to process in one function call.
459
+ * @param tile_j the maximum number of items along the second dimension of
460
+ * the 2D grid to process in one function call.
461
+ * @param flags a bitwise combination of zero or more optional flags
462
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
463
+ */
464
+ void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
465
+ pthreadpool_task_2d_tile_2d_t function,
466
+ void *context, size_t range_i,
467
+ size_t range_j, size_t tile_i,
468
+ size_t tile_j, uint32_t flags);
469
+
470
+ /**
471
+ * Process items on a 2D grid with the specified maximum tile size along each
472
+ * grid dimension using a microarchitecture-aware task function.
473
+ *
474
+ * The function implements a parallel version of the following snippet:
475
+ *
476
+ * uint32_t uarch_index = cpuinfo_initialize() ?
477
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
478
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
479
+ * for (size_t i = 0; i < range_i; i += tile_i)
480
+ * for (size_t j = 0; j < range_j; j += tile_j)
481
+ * function(context, uarch_index, i, j,
482
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
483
+ *
484
+ * When the function returns, all items have been processed and the thread pool
485
+ * is ready for a new task.
486
+ *
487
+ * @note If multiple threads call this function with the same thread pool, the
488
+ * calls are serialized.
489
+ *
490
+ * @param threadpool the thread pool to use for parallelisation. If
491
+ * threadpool is NULL, all items are processed serially on the calling
492
+ * thread.
493
+ * @param function the function to call for each tile.
494
+ * @param context the first argument passed to the specified
495
+ * function.
496
+ * @param default_uarch_index the microarchitecture index to use when
497
+ * pthreadpool is configured without cpuinfo,
498
+ * cpuinfo initialization failed, or index returned
499
+ * by cpuinfo_get_current_uarch_index() exceeds
500
+ * the max_uarch_index value.
501
+ * @param max_uarch_index the maximum microarchitecture index expected
502
+ * by the specified function. If the index returned
503
+ * by cpuinfo_get_current_uarch_index() exceeds this
504
+ * value, default_uarch_index will be used instead.
505
+ * default_uarch_index can exceed max_uarch_index.
506
+ * @param range_i the number of items to process along the first
507
+ * dimension of the 2D grid.
508
+ * @param range_j the number of items to process along the second
509
+ * dimension of the 2D grid.
510
+ * @param tile_j the maximum number of items along the first
511
+ * dimension of the 2D grid to process in one function call.
512
+ * @param tile_j the maximum number of items along the second
513
+ * dimension of the 2D grid to process in one function call.
514
+ * @param flags a bitwise combination of zero or more optional
515
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
516
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
517
+ */
518
+ void pthreadpool_parallelize_2d_tile_2d_with_uarch(
519
+ pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_with_id_t function,
520
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
521
+ size_t range_i, size_t range_j, size_t tile_i, size_t tile_j,
522
+ uint32_t flags);
523
+
524
+ /**
525
+ * Process items on a 3D grid.
526
+ *
527
+ * The function implements a parallel version of the following snippet:
528
+ *
529
+ * for (size_t i = 0; i < range_i; i++)
530
+ * for (size_t j = 0; j < range_j; j++)
531
+ * for (size_t k = 0; k < range_k; k++)
532
+ * function(context, i, j, k);
533
+ *
534
+ * When the function returns, all items have been processed and the thread pool
535
+ * is ready for a new task.
536
+ *
537
+ * @note If multiple threads call this function with the same thread pool, the
538
+ * calls are serialized.
539
+ *
540
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
541
+ * is NULL, all items are processed serially on the calling thread.
542
+ * @param function the function to call for each tile.
543
+ * @param context the first argument passed to the specified function.
544
+ * @param range_i the number of items to process along the first dimension
545
+ * of the 3D grid.
546
+ * @param range_j the number of items to process along the second dimension
547
+ * of the 3D grid.
548
+ * @param range_k the number of items to process along the third dimension
549
+ * of the 3D grid.
550
+ * @param flags a bitwise combination of zero or more optional flags
551
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
552
+ */
553
+ void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
554
+ pthreadpool_task_3d_t function, void *context,
555
+ size_t range_i, size_t range_j, size_t range_k,
556
+ uint32_t flags);
557
+
558
+ /**
559
+ * Process items on a 3D grid with the specified maximum tile size along the
560
+ * last grid dimension.
561
+ *
562
+ * The function implements a parallel version of the following snippet:
563
+ *
564
+ * for (size_t i = 0; i < range_i; i++)
565
+ * for (size_t j = 0; j < range_j; j++)
566
+ * for (size_t k = 0; k < range_k; k += tile_k)
567
+ * function(context, i, j, k, min(range_k - k, tile_k));
568
+ *
569
+ * When the function returns, all items have been processed and the thread pool
570
+ * is ready for a new task.
571
+ *
572
+ * @note If multiple threads call this function with the same thread pool, the
573
+ * calls are serialized.
574
+ *
575
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
576
+ * is NULL, all items are processed serially on the calling thread.
577
+ * @param function the function to call for each tile.
578
+ * @param context the first argument passed to the specified function.
579
+ * @param range_i the number of items to process along the first dimension
580
+ * of the 3D grid.
581
+ * @param range_j the number of items to process along the second dimension
582
+ * of the 3D grid.
583
+ * @param range_k the number of items to process along the third dimension
584
+ * of the 3D grid.
585
+ * @param tile_k the maximum number of items along the third dimension of
586
+ * the 3D grid to process in one function call.
587
+ * @param flags a bitwise combination of zero or more optional flags
588
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
589
+ */
590
+ void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
591
+ pthreadpool_task_3d_tile_1d_t function,
592
+ void *context, size_t range_i,
593
+ size_t range_j, size_t range_k,
594
+ size_t tile_k, uint32_t flags);
595
+
596
+ /**
597
+ * Process items on a 3D grid with the specified maximum tile size along the
598
+ * last grid dimension and passing along the current thread id.
599
+ *
600
+ * The function implements a parallel version of the following snippet:
601
+ *
602
+ * for (size_t i = 0; i < range_i; i++)
603
+ * for (size_t j = 0; j < range_j; j++)
604
+ * for (size_t k = 0; k < range_k; k += tile_k)
605
+ * function(context, thread_index, i, j, k, min(range_k - k, tile_k));
606
+ *
607
+ * When the function returns, all items have been processed and the thread pool
608
+ * is ready for a new task.
609
+ *
610
+ * @note If multiple threads call this function with the same thread pool, the
611
+ * calls are serialized.
612
+ *
613
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
614
+ * is NULL, all items are processed serially on the calling thread.
615
+ * @param function the function to call for each tile.
616
+ * @param context the first argument passed to the specified function.
617
+ * @param range_i the number of items to process along the first dimension
618
+ * of the 3D grid.
619
+ * @param range_j the number of items to process along the second dimension
620
+ * of the 3D grid.
621
+ * @param range_k the number of items to process along the third dimension
622
+ * of the 3D grid.
623
+ * @param tile_k the maximum number of items along the third dimension of
624
+ * the 3D grid to process in one function call.
625
+ * @param flags a bitwise combination of zero or more optional flags
626
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
627
+ */
628
+ void pthreadpool_parallelize_3d_tile_1d_with_thread(
629
+ pthreadpool_t threadpool,
630
+ pthreadpool_task_3d_tile_1d_with_thread_t function, void *context,
631
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
632
+ uint32_t flags);
633
+
634
+ /**
635
+ * Process items on a 3D grid with the specified maximum tile size along the
636
+ * last grid dimension using a microarchitecture-aware task function.
637
+ *
638
+ * The function implements a parallel version of the following snippet:
639
+ *
640
+ * uint32_t uarch_index = cpuinfo_initialize() ?
641
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
642
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
643
+ * for (size_t i = 0; i < range_i; i++)
644
+ * for (size_t j = 0; j < range_j; j++)
645
+ * for (size_t k = 0; k < range_k; k += tile_k)
646
+ * function(context, uarch_index, i, j, k, min(range_k - k, tile_k));
647
+ *
648
+ * When the function returns, all items have been processed and the thread pool
649
+ * is ready for a new task.
650
+ *
651
+ * @note If multiple threads call this function with the same thread pool, the
652
+ * calls are serialized.
653
+ *
654
+ * @param threadpool the thread pool to use for parallelisation. If
655
+ * threadpool is NULL, all items are processed serially on the calling
656
+ * thread.
657
+ * @param function the function to call for each tile.
658
+ * @param context the first argument passed to the specified
659
+ * function.
660
+ * @param default_uarch_index the microarchitecture index to use when
661
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
662
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
663
+ * max_uarch_index value.
664
+ * @param max_uarch_index the maximum microarchitecture index expected by
665
+ * the specified function. If the index returned by
666
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
667
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
668
+ * @param range_i the number of items to process along the first
669
+ * dimension of the 3D grid.
670
+ * @param range_j the number of items to process along the second
671
+ * dimension of the 3D grid.
672
+ * @param range_k the number of items to process along the third
673
+ * dimension of the 3D grid.
674
+ * @param tile_k the maximum number of items along the third
675
+ * dimension of the 3D grid to process in one function call.
676
+ * @param flags a bitwise combination of zero or more optional
677
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
678
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
679
+ */
680
+ void pthreadpool_parallelize_3d_tile_1d_with_uarch(
681
+ pthreadpool_t threadpool, pthreadpool_task_3d_tile_1d_with_id_t function,
682
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
683
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
684
+ uint32_t flags);
685
+
686
+ /**
687
+ * Process items on a 3D grid with the specified maximum tile size along the
688
+ * last grid dimension using a microarchitecture-aware task function and passing
689
+ * along the current thread id.
690
+ *
691
+ * The function implements a parallel version of the following snippet:
692
+ *
693
+ * uint32_t uarch_index = cpuinfo_initialize() ?
694
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
695
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
696
+ * for (size_t i = 0; i < range_i; i++)
697
+ * for (size_t j = 0; j < range_j; j++)
698
+ * for (size_t k = 0; k < range_k; k += tile_k)
699
+ * function(context, uarch_index, thread_index, i, j, k, min(range_k -
700
+ * k, tile_k));
701
+ *
702
+ * When the function returns, all items have been processed and the thread pool
703
+ * is ready for a new task.
704
+ *
705
+ * @note If multiple threads call this function with the same thread pool, the
706
+ * calls are serialized.
707
+ *
708
+ * @param threadpool the thread pool to use for parallelisation. If
709
+ * threadpool is NULL, all items are processed serially on the calling
710
+ * thread.
711
+ * @param function the function to call for each tile.
712
+ * @param context the first argument passed to the specified
713
+ * function.
714
+ * @param default_uarch_index the microarchitecture index to use when
715
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
716
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
717
+ * max_uarch_index value.
718
+ * @param max_uarch_index the maximum microarchitecture index expected by
719
+ * the specified function. If the index returned by
720
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
721
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
722
+ * @param range_i the number of items to process along the first
723
+ * dimension of the 3D grid.
724
+ * @param range_j the number of items to process along the second
725
+ * dimension of the 3D grid.
726
+ * @param range_k the number of items to process along the third
727
+ * dimension of the 3D grid.
728
+ * @param tile_k the maximum number of items along the third
729
+ * dimension of the 3D grid to process in one function call.
730
+ * @param flags a bitwise combination of zero or more optional
731
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
732
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
733
+ */
734
+ void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread(
735
+ pthreadpool_t threadpool,
736
+ pthreadpool_task_3d_tile_1d_with_id_with_thread_t function, void *context,
737
+ uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
738
+ size_t range_j, size_t range_k, size_t tile_k, uint32_t flags);
739
+
740
+ /**
741
+ * Process items on a 3D grid with the specified maximum tile size along the
742
+ * last two grid dimensions.
743
+ *
744
+ * The function implements a parallel version of the following snippet:
745
+ *
746
+ * for (size_t i = 0; i < range_i; i++)
747
+ * for (size_t j = 0; j < range_j; j += tile_j)
748
+ * for (size_t k = 0; k < range_k; k += tile_k)
749
+ * function(context, i, j, k,
750
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
751
+ *
752
+ * When the function returns, all items have been processed and the thread pool
753
+ * is ready for a new task.
754
+ *
755
+ * @note If multiple threads call this function with the same thread pool, the
756
+ * calls are serialized.
757
+ *
758
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
759
+ * is NULL, all items are processed serially on the calling thread.
760
+ * @param function the function to call for each tile.
761
+ * @param context the first argument passed to the specified function.
762
+ * @param range_i the number of items to process along the first dimension
763
+ * of the 3D grid.
764
+ * @param range_j the number of items to process along the second dimension
765
+ * of the 3D grid.
766
+ * @param range_k the number of items to process along the third dimension
767
+ * of the 3D grid.
768
+ * @param tile_j the maximum number of items along the second dimension of
769
+ * the 3D grid to process in one function call.
770
+ * @param tile_k the maximum number of items along the third dimension of
771
+ * the 3D grid to process in one function call.
772
+ * @param flags a bitwise combination of zero or more optional flags
773
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
774
+ */
775
+ void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
776
+ pthreadpool_task_3d_tile_2d_t function,
777
+ void *context, size_t range_i,
778
+ size_t range_j, size_t range_k,
779
+ size_t tile_j, size_t tile_k,
780
+ uint32_t flags);
781
+
782
+ /**
783
+ * Process items on a 3D grid with the specified maximum tile size along the
784
+ * last two grid dimensions using a microarchitecture-aware task function.
785
+ *
786
+ * The function implements a parallel version of the following snippet:
787
+ *
788
+ * uint32_t uarch_index = cpuinfo_initialize() ?
789
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
790
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
791
+ * for (size_t i = 0; i < range_i; i++)
792
+ * for (size_t j = 0; j < range_j; j += tile_j)
793
+ * for (size_t k = 0; k < range_k; k += tile_k)
794
+ * function(context, uarch_index, i, j, k,
795
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
796
+ *
797
+ * When the function returns, all items have been processed and the thread pool
798
+ * is ready for a new task.
799
+ *
800
+ * @note If multiple threads call this function with the same thread pool, the
801
+ * calls are serialized.
802
+ *
803
+ * @param threadpool the thread pool to use for parallelisation. If
804
+ * threadpool is NULL, all items are processed serially on the calling
805
+ * thread.
806
+ * @param function the function to call for each tile.
807
+ * @param context the first argument passed to the specified
808
+ * function.
809
+ * @param default_uarch_index the microarchitecture index to use when
810
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
811
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
812
+ * max_uarch_index value.
813
+ * @param max_uarch_index the maximum microarchitecture index expected by
814
+ * the specified function. If the index returned by
815
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
816
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
817
+ * @param range_i the number of items to process along the first
818
+ * dimension of the 3D grid.
819
+ * @param range_j the number of items to process along the second
820
+ * dimension of the 3D grid.
821
+ * @param range_k the number of items to process along the third
822
+ * dimension of the 3D grid.
823
+ * @param tile_j the maximum number of items along the second
824
+ * dimension of the 3D grid to process in one function call.
825
+ * @param tile_k the maximum number of items along the third
826
+ * dimension of the 3D grid to process in one function call.
827
+ * @param flags a bitwise combination of zero or more optional
828
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
829
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
830
+ */
831
+ void pthreadpool_parallelize_3d_tile_2d_with_uarch(
832
+ pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_with_id_t function,
833
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
834
+ size_t range_i, size_t range_j, size_t range_k, size_t tile_j,
835
+ size_t tile_k, uint32_t flags);
836
+
837
+ /**
838
+ * Process items on a 4D grid.
839
+ *
840
+ * The function implements a parallel version of the following snippet:
841
+ *
842
+ * for (size_t i = 0; i < range_i; i++)
843
+ * for (size_t j = 0; j < range_j; j++)
844
+ * for (size_t k = 0; k < range_k; k++)
845
+ * for (size_t l = 0; l < range_l; l++)
846
+ * function(context, i, j, k, l);
847
+ *
848
+ * When the function returns, all items have been processed and the thread pool
849
+ * is ready for a new task.
850
+ *
851
+ * @note If multiple threads call this function with the same thread pool, the
852
+ * calls are serialized.
853
+ *
854
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
855
+ * is NULL, all items are processed serially on the calling thread.
856
+ * @param function the function to call for each tile.
857
+ * @param context the first argument passed to the specified function.
858
+ * @param range_i the number of items to process along the first dimension
859
+ * of the 4D grid.
860
+ * @param range_j the number of items to process along the second dimension
861
+ * of the 4D grid.
862
+ * @param range_k the number of items to process along the third dimension
863
+ * of the 4D grid.
864
+ * @param range_l the number of items to process along the fourth dimension
865
+ * of the 4D grid.
866
+ * @param flags a bitwise combination of zero or more optional flags
867
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
868
+ */
869
+ void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
870
+ pthreadpool_task_4d_t function, void *context,
871
+ size_t range_i, size_t range_j, size_t range_k,
872
+ size_t range_l, uint32_t flags);
873
+
874
+ /**
875
+ * Process items on a 4D grid with the specified maximum tile size along the
876
+ * last grid dimension.
877
+ *
878
+ * The function implements a parallel version of the following snippet:
879
+ *
880
+ * for (size_t i = 0; i < range_i; i++)
881
+ * for (size_t j = 0; j < range_j; j++)
882
+ * for (size_t k = 0; k < range_k; k++)
883
+ * for (size_t l = 0; l < range_l; l += tile_l)
884
+ * function(context, i, j, k, l, min(range_l - l, tile_l));
885
+ *
886
+ * When the function returns, all items have been processed and the thread pool
887
+ * is ready for a new task.
888
+ *
889
+ * @note If multiple threads call this function with the same thread pool, the
890
+ * calls are serialized.
891
+ *
892
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
893
+ * is NULL, all items are processed serially on the calling thread.
894
+ * @param function the function to call for each tile.
895
+ * @param context the first argument passed to the specified function.
896
+ * @param range_i the number of items to process along the first dimension
897
+ * of the 4D grid.
898
+ * @param range_j the number of items to process along the second dimension
899
+ * of the 4D grid.
900
+ * @param range_k the number of items to process along the third dimension
901
+ * of the 4D grid.
902
+ * @param range_l the number of items to process along the fourth dimension
903
+ * of the 4D grid.
904
+ * @param tile_l the maximum number of items along the fourth dimension of
905
+ * the 4D grid to process in one function call.
906
+ * @param flags a bitwise combination of zero or more optional flags
907
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
908
+ */
909
+ void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
910
+ pthreadpool_task_4d_tile_1d_t function,
911
+ void *context, size_t range_i,
912
+ size_t range_j, size_t range_k,
913
+ size_t range_l, size_t tile_l,
914
+ uint32_t flags);
915
+
916
+ /**
917
+ * Process items on a 4D grid with the specified maximum tile size along the
918
+ * last two grid dimensions.
919
+ *
920
+ * The function implements a parallel version of the following snippet:
921
+ *
922
+ * for (size_t i = 0; i < range_i; i++)
923
+ * for (size_t j = 0; j < range_j; j++)
924
+ * for (size_t k = 0; k < range_k; k += tile_k)
925
+ * for (size_t l = 0; l < range_l; l += tile_l)
926
+ * function(context, i, j, k, l,
927
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
928
+ *
929
+ * When the function returns, all items have been processed and the thread pool
930
+ * is ready for a new task.
931
+ *
932
+ * @note If multiple threads call this function with the same thread pool, the
933
+ * calls are serialized.
934
+ *
935
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
936
+ * is NULL, all items are processed serially on the calling thread.
937
+ * @param function the function to call for each tile.
938
+ * @param context the first argument passed to the specified function.
939
+ * @param range_i the number of items to process along the first dimension
940
+ * of the 4D grid.
941
+ * @param range_j the number of items to process along the second dimension
942
+ * of the 4D grid.
943
+ * @param range_k the number of items to process along the third dimension
944
+ * of the 4D grid.
945
+ * @param range_l the number of items to process along the fourth dimension
946
+ * of the 4D grid.
947
+ * @param tile_k the maximum number of items along the third dimension of
948
+ * the 4D grid to process in one function call.
949
+ * @param tile_l the maximum number of items along the fourth dimension of
950
+ * the 4D grid to process in one function call.
951
+ * @param flags a bitwise combination of zero or more optional flags
952
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
953
+ */
954
+ void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
955
+ pthreadpool_task_4d_tile_2d_t function,
956
+ void *context, size_t range_i,
957
+ size_t range_j, size_t range_k,
958
+ size_t range_l, size_t tile_k,
959
+ size_t tile_l, uint32_t flags);
960
+
961
+ /**
962
+ * Process items on a 4D grid with the specified maximum tile size along the
963
+ * last two grid dimensions using a microarchitecture-aware task function.
964
+ *
965
+ * The function implements a parallel version of the following snippet:
966
+ *
967
+ * uint32_t uarch_index = cpuinfo_initialize() ?
968
+ * cpuinfo_get_current_uarch_index() : default_uarch_index;
969
+ * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
970
+ * for (size_t i = 0; i < range_i; i++)
971
+ * for (size_t j = 0; j < range_j; j++)
972
+ * for (size_t k = 0; k < range_k; k += tile_k)
973
+ * for (size_t l = 0; l < range_l; l += tile_l)
974
+ * function(context, uarch_index, i, j, k, l,
975
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
976
+ *
977
+ * When the function returns, all items have been processed and the thread pool
978
+ * is ready for a new task.
979
+ *
980
+ * @note If multiple threads call this function with the same thread pool, the
981
+ * calls are serialized.
982
+ *
983
+ * @param threadpool the thread pool to use for parallelisation. If
984
+ * threadpool is NULL, all items are processed serially on the calling
985
+ * thread.
986
+ * @param function the function to call for each tile.
987
+ * @param context the first argument passed to the specified
988
+ * function.
989
+ * @param default_uarch_index the microarchitecture index to use when
990
+ * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
991
+ * or index returned by cpuinfo_get_current_uarch_index() exceeds the
992
+ * max_uarch_index value.
993
+ * @param max_uarch_index the maximum microarchitecture index expected by
994
+ * the specified function. If the index returned by
995
+ * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
996
+ * will be used instead. default_uarch_index can exceed max_uarch_index.
997
+ * @param range_i the number of items to process along the first
998
+ * dimension of the 4D grid.
999
+ * @param range_j the number of items to process along the second
1000
+ * dimension of the 4D grid.
1001
+ * @param range_k the number of items to process along the third
1002
+ * dimension of the 4D grid.
1003
+ * @param range_l the number of items to process along the fourth
1004
+ * dimension of the 4D grid.
1005
+ * @param tile_k the maximum number of items along the third
1006
+ * dimension of the 4D grid to process in one function call.
1007
+ * @param tile_l the maximum number of items along the fourth
1008
+ * dimension of the 4D grid to process in one function call.
1009
+ * @param flags a bitwise combination of zero or more optional
1010
+ * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
1011
+ * PTHREADPOOL_FLAG_YIELD_WORKERS)
1012
+ */
1013
+ void pthreadpool_parallelize_4d_tile_2d_with_uarch(
1014
+ pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_with_id_t function,
1015
+ void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
1016
+ size_t range_i, size_t range_j, size_t range_k, size_t range_l,
1017
+ size_t tile_k, size_t tile_l, uint32_t flags);
1018
+
1019
+ /**
1020
+ * Process items on a 5D grid.
1021
+ *
1022
+ * The function implements a parallel version of the following snippet:
1023
+ *
1024
+ * for (size_t i = 0; i < range_i; i++)
1025
+ * for (size_t j = 0; j < range_j; j++)
1026
+ * for (size_t k = 0; k < range_k; k++)
1027
+ * for (size_t l = 0; l < range_l; l++)
1028
+ * for (size_t m = 0; m < range_m; m++)
1029
+ * function(context, i, j, k, l, m);
1030
+ *
1031
+ * When the function returns, all items have been processed and the thread pool
1032
+ * is ready for a new task.
1033
+ *
1034
+ * @note If multiple threads call this function with the same thread pool, the
1035
+ * calls are serialized.
1036
+ *
1037
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1038
+ * is NULL, all items are processed serially on the calling thread.
1039
+ * @param function the function to call for each tile.
1040
+ * @param context the first argument passed to the specified function.
1041
+ * @param range_i the number of items to process along the first dimension
1042
+ * of the 5D grid.
1043
+ * @param range_j the number of items to process along the second dimension
1044
+ * of the 5D grid.
1045
+ * @param range_k the number of items to process along the third dimension
1046
+ * of the 5D grid.
1047
+ * @param range_l the number of items to process along the fourth dimension
1048
+ * of the 5D grid.
1049
+ * @param range_m the number of items to process along the fifth dimension
1050
+ * of the 5D grid.
1051
+ * @param flags a bitwise combination of zero or more optional flags
1052
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1053
+ */
1054
+ void pthreadpool_parallelize_5d(pthreadpool_t threadpool,
1055
+ pthreadpool_task_5d_t function, void *context,
1056
+ size_t range_i, size_t range_j, size_t range_k,
1057
+ size_t range_l, size_t range_m, uint32_t flags);
1058
+
1059
+ /**
1060
+ * Process items on a 5D grid with the specified maximum tile size along the
1061
+ * last grid dimension.
1062
+ *
1063
+ * The function implements a parallel version of the following snippet:
1064
+ *
1065
+ * for (size_t i = 0; i < range_i; i++)
1066
+ * for (size_t j = 0; j < range_j; j++)
1067
+ * for (size_t k = 0; k < range_k; k++)
1068
+ * for (size_t l = 0; l < range_l; l++)
1069
+ * for (size_t m = 0; m < range_m; m += tile_m)
1070
+ * function(context, i, j, k, l, m, min(range_m - m, tile_m));
1071
+ *
1072
+ * When the function returns, all items have been processed and the thread pool
1073
+ * is ready for a new task.
1074
+ *
1075
+ * @note If multiple threads call this function with the same thread pool, the
1076
+ * calls are serialized.
1077
+ *
1078
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1079
+ * is NULL, all items are processed serially on the calling thread.
1080
+ * @param function the function to call for each tile.
1081
+ * @param context the first argument passed to the specified function.
1082
+ * @param range_i the number of items to process along the first dimension
1083
+ * of the 5D grid.
1084
+ * @param range_j the number of items to process along the second dimension
1085
+ * of the 5D grid.
1086
+ * @param range_k the number of items to process along the third dimension
1087
+ * of the 5D grid.
1088
+ * @param range_l the number of items to process along the fourth dimension
1089
+ * of the 5D grid.
1090
+ * @param range_m the number of items to process along the fifth dimension
1091
+ * of the 5D grid.
1092
+ * @param tile_m the maximum number of items along the fifth dimension of
1093
+ * the 5D grid to process in one function call.
1094
+ * @param flags a bitwise combination of zero or more optional flags
1095
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1096
+ */
1097
+ void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
1098
+ pthreadpool_task_5d_tile_1d_t function,
1099
+ void *context, size_t range_i,
1100
+ size_t range_j, size_t range_k,
1101
+ size_t range_l, size_t range_m,
1102
+ size_t tile_m, uint32_t flags);
1103
+
1104
+ /**
1105
+ * Process items on a 5D grid with the specified maximum tile size along the
1106
+ * last two grid dimensions.
1107
+ *
1108
+ * The function implements a parallel version of the following snippet:
1109
+ *
1110
+ * for (size_t i = 0; i < range_i; i++)
1111
+ * for (size_t j = 0; j < range_j; j++)
1112
+ * for (size_t k = 0; k < range_k; k++)
1113
+ * for (size_t l = 0; l < range_l; l += tile_l)
1114
+ * for (size_t m = 0; m < range_m; m += tile_m)
1115
+ * function(context, i, j, k, l, m,
1116
+ * min(range_l - l, tile_l), min(range_m - m, tile_m));
1117
+ *
1118
+ * When the function returns, all items have been processed and the thread pool
1119
+ * is ready for a new task.
1120
+ *
1121
+ * @note If multiple threads call this function with the same thread pool, the
1122
+ * calls are serialized.
1123
+ *
1124
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1125
+ * is NULL, all items are processed serially on the calling thread.
1126
+ * @param function the function to call for each tile.
1127
+ * @param context the first argument passed to the specified function.
1128
+ * @param range_i the number of items to process along the first dimension
1129
+ * of the 5D grid.
1130
+ * @param range_j the number of items to process along the second dimension
1131
+ * of the 5D grid.
1132
+ * @param range_k the number of items to process along the third dimension
1133
+ * of the 5D grid.
1134
+ * @param range_l the number of items to process along the fourth dimension
1135
+ * of the 5D grid.
1136
+ * @param range_m the number of items to process along the fifth dimension
1137
+ * of the 5D grid.
1138
+ * @param tile_l the maximum number of items along the fourth dimension of
1139
+ * the 5D grid to process in one function call.
1140
+ * @param tile_m the maximum number of items along the fifth dimension of
1141
+ * the 5D grid to process in one function call.
1142
+ * @param flags a bitwise combination of zero or more optional flags
1143
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1144
+ */
1145
+ void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
1146
+ pthreadpool_task_5d_tile_2d_t function,
1147
+ void *context, size_t range_i,
1148
+ size_t range_j, size_t range_k,
1149
+ size_t range_l, size_t range_m,
1150
+ size_t tile_l, size_t tile_m,
1151
+ uint32_t flags);
1152
+
1153
+ /**
1154
+ * Process items on a 6D grid.
1155
+ *
1156
+ * The function implements a parallel version of the following snippet:
1157
+ *
1158
+ * for (size_t i = 0; i < range_i; i++)
1159
+ * for (size_t j = 0; j < range_j; j++)
1160
+ * for (size_t k = 0; k < range_k; k++)
1161
+ * for (size_t l = 0; l < range_l; l++)
1162
+ * for (size_t m = 0; m < range_m; m++)
1163
+ * for (size_t n = 0; n < range_n; n++)
1164
+ * function(context, i, j, k, l, m, n);
1165
+ *
1166
+ * When the function returns, all items have been processed and the thread pool
1167
+ * is ready for a new task.
1168
+ *
1169
+ * @note If multiple threads call this function with the same thread pool, the
1170
+ * calls are serialized.
1171
+ *
1172
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1173
+ * is NULL, all items are processed serially on the calling thread.
1174
+ * @param function the function to call for each tile.
1175
+ * @param context the first argument passed to the specified function.
1176
+ * @param range_i the number of items to process along the first dimension
1177
+ * of the 6D grid.
1178
+ * @param range_j the number of items to process along the second dimension
1179
+ * of the 6D grid.
1180
+ * @param range_k the number of items to process along the third dimension
1181
+ * of the 6D grid.
1182
+ * @param range_l the number of items to process along the fourth dimension
1183
+ * of the 6D grid.
1184
+ * @param range_m the number of items to process along the fifth dimension
1185
+ * of the 6D grid.
1186
+ * @param range_n the number of items to process along the sixth dimension
1187
+ * of the 6D grid.
1188
+ * @param tile_n the maximum number of items along the sixth dimension of
1189
+ * the 6D grid to process in one function call.
1190
+ * @param flags a bitwise combination of zero or more optional flags
1191
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1192
+ */
1193
+ void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
1194
+ pthreadpool_task_6d_t function, void *context,
1195
+ size_t range_i, size_t range_j, size_t range_k,
1196
+ size_t range_l, size_t range_m, size_t range_n,
1197
+ uint32_t flags);
1198
+
1199
+ /**
1200
+ * Process items on a 6D grid with the specified maximum tile size along the
1201
+ * last grid dimension.
1202
+ *
1203
+ * The function implements a parallel version of the following snippet:
1204
+ *
1205
+ * for (size_t i = 0; i < range_i; i++)
1206
+ * for (size_t j = 0; j < range_j; j++)
1207
+ * for (size_t k = 0; k < range_k; k++)
1208
+ * for (size_t l = 0; l < range_l; l++)
1209
+ * for (size_t m = 0; m < range_m; m++)
1210
+ * for (size_t n = 0; n < range_n; n += tile_n)
1211
+ * function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
1212
+ *
1213
+ * When the function returns, all items have been processed and the thread pool
1214
+ * is ready for a new task.
1215
+ *
1216
+ * @note If multiple threads call this function with the same thread pool, the
1217
+ * calls are serialized.
1218
+ *
1219
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1220
+ * is NULL, all items are processed serially on the calling thread.
1221
+ * @param function the function to call for each tile.
1222
+ * @param context the first argument passed to the specified function.
1223
+ * @param range_i the number of items to process along the first dimension
1224
+ * of the 6D grid.
1225
+ * @param range_j the number of items to process along the second dimension
1226
+ * of the 6D grid.
1227
+ * @param range_k the number of items to process along the third dimension
1228
+ * of the 6D grid.
1229
+ * @param range_l the number of items to process along the fourth dimension
1230
+ * of the 6D grid.
1231
+ * @param range_m the number of items to process along the fifth dimension
1232
+ * of the 6D grid.
1233
+ * @param range_n the number of items to process along the sixth dimension
1234
+ * of the 6D grid.
1235
+ * @param tile_n the maximum number of items along the sixth dimension of
1236
+ * the 6D grid to process in one function call.
1237
+ * @param flags a bitwise combination of zero or more optional flags
1238
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1239
+ */
1240
+ void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
1241
+ pthreadpool_task_6d_tile_1d_t function,
1242
+ void *context, size_t range_i,
1243
+ size_t range_j, size_t range_k,
1244
+ size_t range_l, size_t range_m,
1245
+ size_t range_n, size_t tile_n,
1246
+ uint32_t flags);
1247
+
1248
+ /**
1249
+ * Process items on a 6D grid with the specified maximum tile size along the
1250
+ * last two grid dimensions.
1251
+ *
1252
+ * The function implements a parallel version of the following snippet:
1253
+ *
1254
+ * for (size_t i = 0; i < range_i; i++)
1255
+ * for (size_t j = 0; j < range_j; j++)
1256
+ * for (size_t k = 0; k < range_k; k++)
1257
+ * for (size_t l = 0; l < range_l; l++)
1258
+ * for (size_t m = 0; m < range_m; m += tile_m)
1259
+ * for (size_t n = 0; n < range_n; n += tile_n)
1260
+ * function(context, i, j, k, l, m, n,
1261
+ * min(range_m - m, tile_m), min(range_n - n, tile_n));
1262
+ *
1263
+ * When the function returns, all items have been processed and the thread pool
1264
+ * is ready for a new task.
1265
+ *
1266
+ * @note If multiple threads call this function with the same thread pool, the
1267
+ * calls are serialized.
1268
+ *
1269
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1270
+ * is NULL, all items are processed serially on the calling thread.
1271
+ * @param function the function to call for each tile.
1272
+ * @param context the first argument passed to the specified function.
1273
+ * @param range_i the number of items to process along the first dimension
1274
+ * of the 6D grid.
1275
+ * @param range_j the number of items to process along the second dimension
1276
+ * of the 6D grid.
1277
+ * @param range_k the number of items to process along the third dimension
1278
+ * of the 6D grid.
1279
+ * @param range_l the number of items to process along the fourth dimension
1280
+ * of the 6D grid.
1281
+ * @param range_m the number of items to process along the fifth dimension
1282
+ * of the 6D grid.
1283
+ * @param range_n the number of items to process along the sixth dimension
1284
+ * of the 6D grid.
1285
+ * @param tile_m the maximum number of items along the fifth dimension of
1286
+ * the 6D grid to process in one function call.
1287
+ * @param tile_n the maximum number of items along the sixth dimension of
1288
+ * the 6D grid to process in one function call.
1289
+ * @param flags a bitwise combination of zero or more optional flags
1290
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1291
+ */
1292
+ void pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool,
1293
+ pthreadpool_task_6d_tile_2d_t function,
1294
+ void *context, size_t range_i,
1295
+ size_t range_j, size_t range_k,
1296
+ size_t range_l, size_t range_m,
1297
+ size_t range_n, size_t tile_m,
1298
+ size_t tile_n, uint32_t flags);
1299
+
1300
+ /**
1301
+ * Terminates threads in the thread pool and releases associated resources.
1302
+ *
1303
+ * @warning Accessing the thread pool after a call to this function constitutes
1304
+ * undefined behaviour and may cause data corruption.
1305
+ *
1306
+ * @param[in,out] threadpool The thread pool to destroy.
1307
+ */
1308
+ void pthreadpool_destroy(pthreadpool_t threadpool);
1309
+
1310
+ #ifndef PTHREADPOOL_NO_DEPRECATED_API
1311
+
1312
+ /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
1313
+ #if defined(__GNUC__)
1314
+ #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
1315
+ #else
1316
+ #define PTHREADPOOL_DEPRECATED
1317
+ #endif
1318
+
1319
+ typedef void (*pthreadpool_function_1d_t)(void *, size_t);
1320
+ typedef void (*pthreadpool_function_1d_tiled_t)(void *, size_t, size_t);
1321
+ typedef void (*pthreadpool_function_2d_t)(void *, size_t, size_t);
1322
+ typedef void (*pthreadpool_function_2d_tiled_t)(void *, size_t, size_t, size_t,
1323
+ size_t);
1324
+ typedef void (*pthreadpool_function_3d_tiled_t)(void *, size_t, size_t, size_t,
1325
+ size_t, size_t, size_t);
1326
+ typedef void (*pthreadpool_function_4d_tiled_t)(void *, size_t, size_t, size_t,
1327
+ size_t, size_t, size_t, size_t,
1328
+ size_t);
1329
+
1330
+ void pthreadpool_compute_1d(pthreadpool_t threadpool,
1331
+ pthreadpool_function_1d_t function, void *argument,
1332
+ size_t range) PTHREADPOOL_DEPRECATED;
1333
+
1334
+ void pthreadpool_compute_1d_tiled(pthreadpool_t threadpool,
1335
+ pthreadpool_function_1d_tiled_t function,
1336
+ void *argument, size_t range,
1337
+ size_t tile) PTHREADPOOL_DEPRECATED;
1338
+
1339
+ void pthreadpool_compute_2d(pthreadpool_t threadpool,
1340
+ pthreadpool_function_2d_t function, void *argument,
1341
+ size_t range_i,
1342
+ size_t range_j) PTHREADPOOL_DEPRECATED;
1343
+
1344
+ void pthreadpool_compute_2d_tiled(pthreadpool_t threadpool,
1345
+ pthreadpool_function_2d_tiled_t function,
1346
+ void *argument, size_t range_i,
1347
+ size_t range_j, size_t tile_i,
1348
+ size_t tile_j) PTHREADPOOL_DEPRECATED;
1349
+
1350
+ void pthreadpool_compute_3d_tiled(pthreadpool_t threadpool,
1351
+ pthreadpool_function_3d_tiled_t function,
1352
+ void *argument, size_t range_i,
1353
+ size_t range_j, size_t range_k, size_t tile_i,
1354
+ size_t tile_j,
1355
+ size_t tile_k) PTHREADPOOL_DEPRECATED;
1356
+
1357
+ void pthreadpool_compute_4d_tiled(pthreadpool_t threadpool,
1358
+ pthreadpool_function_4d_tiled_t function,
1359
+ void *argument, size_t range_i,
1360
+ size_t range_j, size_t range_k,
1361
+ size_t range_l, size_t tile_i, size_t tile_j,
1362
+ size_t tile_k,
1363
+ size_t tile_l) PTHREADPOOL_DEPRECATED;
1364
+
1365
+ #endif /* PTHREADPOOL_NO_DEPRECATED_API */
1366
+
1367
+ #ifdef __cplusplus
1368
+ } /* extern "C" */
1369
+ #endif
1370
+
1371
+ #ifdef __cplusplus
1372
+
1373
+ namespace libpthreadpool {
1374
+ namespace detail {
1375
+ namespace {
1376
+
1377
+ template <class T> void call_wrapper_1d(void *arg, size_t i) {
1378
+ (*static_cast<const T *>(arg))(i);
1379
+ }
1380
+
1381
+ template <class T>
1382
+ void call_wrapper_1d_tile_1d(void *arg, size_t range_i, size_t tile_i) {
1383
+ (*static_cast<const T *>(arg))(range_i, tile_i);
1384
+ }
1385
+
1386
+ template <class T> void call_wrapper_2d(void *functor, size_t i, size_t j) {
1387
+ (*static_cast<const T *>(functor))(i, j);
1388
+ }
1389
+
1390
+ template <class T>
1391
+ void call_wrapper_2d_tile_1d(void *functor, size_t i, size_t range_j,
1392
+ size_t tile_j) {
1393
+ (*static_cast<const T *>(functor))(i, range_j, tile_j);
1394
+ }
1395
+
1396
+ template <class T>
1397
+ void call_wrapper_2d_tile_2d(void *functor, size_t range_i, size_t range_j,
1398
+ size_t tile_i, size_t tile_j) {
1399
+ (*static_cast<const T *>(functor))(range_i, range_j, tile_i, tile_j);
1400
+ }
1401
+
1402
+ template <class T>
1403
+ void call_wrapper_3d(void *functor, size_t i, size_t j, size_t k) {
1404
+ (*static_cast<const T *>(functor))(i, j, k);
1405
+ }
1406
+
1407
+ template <class T>
1408
+ void call_wrapper_3d_tile_1d(void *functor, size_t i, size_t j, size_t range_k,
1409
+ size_t tile_k) {
1410
+ (*static_cast<const T *>(functor))(i, j, range_k, tile_k);
1411
+ }
1412
+
1413
+ template <class T>
1414
+ void call_wrapper_3d_tile_2d(void *functor, size_t i, size_t range_j,
1415
+ size_t range_k, size_t tile_j, size_t tile_k) {
1416
+ (*static_cast<const T *>(functor))(i, range_j, range_k, tile_j, tile_k);
1417
+ }
1418
+
1419
+ template <class T>
1420
+ void call_wrapper_4d(void *functor, size_t i, size_t j, size_t k, size_t l) {
1421
+ (*static_cast<const T *>(functor))(i, j, k, l);
1422
+ }
1423
+
1424
+ template <class T>
1425
+ void call_wrapper_4d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1426
+ size_t range_l, size_t tile_l) {
1427
+ (*static_cast<const T *>(functor))(i, j, k, range_l, tile_l);
1428
+ }
1429
+
1430
+ template <class T>
1431
+ void call_wrapper_4d_tile_2d(void *functor, size_t i, size_t j, size_t range_k,
1432
+ size_t range_l, size_t tile_k, size_t tile_l) {
1433
+ (*static_cast<const T *>(functor))(i, j, range_k, range_l, tile_k, tile_l);
1434
+ }
1435
+
1436
+ template <class T>
1437
+ void call_wrapper_5d(void *functor, size_t i, size_t j, size_t k, size_t l,
1438
+ size_t m) {
1439
+ (*static_cast<const T *>(functor))(i, j, k, l, m);
1440
+ }
1441
+
1442
+ template <class T>
1443
+ void call_wrapper_5d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1444
+ size_t l, size_t range_m, size_t tile_m) {
1445
+ (*static_cast<const T *>(functor))(i, j, k, l, range_m, tile_m);
1446
+ }
1447
+
1448
+ template <class T>
1449
+ void call_wrapper_5d_tile_2d(void *functor, size_t i, size_t j, size_t k,
1450
+ size_t range_l, size_t range_m, size_t tile_l,
1451
+ size_t tile_m) {
1452
+ (*static_cast<const T *>(functor))(i, j, k, range_l, range_m, tile_l, tile_m);
1453
+ }
1454
+
1455
+ template <class T>
1456
+ void call_wrapper_6d(void *functor, size_t i, size_t j, size_t k, size_t l,
1457
+ size_t m, size_t n) {
1458
+ (*static_cast<const T *>(functor))(i, j, k, l, m, n);
1459
+ }
1460
+
1461
+ template <class T>
1462
+ void call_wrapper_6d_tile_1d(void *functor, size_t i, size_t j, size_t k,
1463
+ size_t l, size_t m, size_t range_n,
1464
+ size_t tile_n) {
1465
+ (*static_cast<const T *>(functor))(i, j, k, l, m, range_n, tile_n);
1466
+ }
1467
+
1468
+ template <class T>
1469
+ void call_wrapper_6d_tile_2d(void *functor, size_t i, size_t j, size_t k,
1470
+ size_t l, size_t range_m, size_t range_n,
1471
+ size_t tile_m, size_t tile_n) {
1472
+ (*static_cast<const T *>(functor))(i, j, k, l, range_m, range_n, tile_m,
1473
+ tile_n);
1474
+ }
1475
+
1476
+ } /* namespace */
1477
+ } /* namespace detail */
1478
+ } /* namespace libpthreadpool */
1479
+
1480
+ /**
1481
+ * Process items on a 1D grid.
1482
+ *
1483
+ * The function implements a parallel version of the following snippet:
1484
+ *
1485
+ * for (size_t i = 0; i < range; i++)
1486
+ * functor(i);
1487
+ *
1488
+ * When the function returns, all items have been processed and the thread pool
1489
+ * is ready for a new task.
1490
+ *
1491
+ * @note If multiple threads call this function with the same thread pool, the
1492
+ * calls are serialized.
1493
+ *
1494
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1495
+ * is NULL, all items are processed serially on the calling thread.
1496
+ * @param functor the functor to call for each item.
1497
+ * @param range the number of items on the 1D grid to process. The
1498
+ * specified functor will be called once for each item.
1499
+ * @param flags a bitwise combination of zero or more optional flags
1500
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1501
+ */
1502
+ template <class T>
1503
+ inline void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
1504
+ const T &functor, size_t range,
1505
+ uint32_t flags = 0) {
1506
+ pthreadpool_parallelize_1d(
1507
+ threadpool, &libpthreadpool::detail::call_wrapper_1d<const T>,
1508
+ const_cast<void *>(static_cast<const void *>(&functor)), range, flags);
1509
+ }
1510
+
1511
+ /**
1512
+ * Process items on a 1D grid with specified maximum tile size.
1513
+ *
1514
+ * The function implements a parallel version of the following snippet:
1515
+ *
1516
+ * for (size_t i = 0; i < range; i += tile)
1517
+ * functor(i, min(range - i, tile));
1518
+ *
1519
+ * When the call returns, all items have been processed and the thread pool is
1520
+ * ready for a new task.
1521
+ *
1522
+ * @note If multiple threads call this function with the same thread pool,
1523
+ * the calls are serialized.
1524
+ *
1525
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1526
+ * is NULL, all items are processed serially on the calling thread.
1527
+ * @param functor the functor to call for each tile.
1528
+ * @param range the number of items on the 1D grid to process.
1529
+ * @param tile the maximum number of items on the 1D grid to process in
1530
+ * one functor call.
1531
+ * @param flags a bitwise combination of zero or more optional flags
1532
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1533
+ */
1534
+ template <class T>
1535
+ inline void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
1536
+ const T &functor, size_t range,
1537
+ size_t tile,
1538
+ uint32_t flags = 0) {
1539
+ pthreadpool_parallelize_1d_tile_1d(
1540
+ threadpool, &libpthreadpool::detail::call_wrapper_1d_tile_1d<const T>,
1541
+ const_cast<void *>(static_cast<const void *>(&functor)), range, tile,
1542
+ flags);
1543
+ }
1544
+
1545
+ /**
1546
+ * Process items on a 2D grid.
1547
+ *
1548
+ * The function implements a parallel version of the following snippet:
1549
+ *
1550
+ * for (size_t i = 0; i < range_i; i++)
1551
+ * for (size_t j = 0; j < range_j; j++)
1552
+ * functor(i, j);
1553
+ *
1554
+ * When the function returns, all items have been processed and the thread pool
1555
+ * is ready for a new task.
1556
+ *
1557
+ * @note If multiple threads call this function with the same thread pool, the
1558
+ * calls are serialized.
1559
+ *
1560
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1561
+ * is NULL, all items are processed serially on the calling thread.
1562
+ * @param functor the functor to call for each item.
1563
+ * @param range_i the number of items to process along the first dimension
1564
+ * of the 2D grid.
1565
+ * @param range_j the number of items to process along the second dimension
1566
+ * of the 2D grid.
1567
+ * @param flags a bitwise combination of zero or more optional flags
1568
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1569
+ */
1570
+ template <class T>
1571
+ inline void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
1572
+ const T &functor, size_t range_i,
1573
+ size_t range_j, uint32_t flags = 0) {
1574
+ pthreadpool_parallelize_2d(
1575
+ threadpool, &libpthreadpool::detail::call_wrapper_2d<const T>,
1576
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1577
+ flags);
1578
+ }
1579
+
1580
+ /**
1581
+ * Process items on a 2D grid with the specified maximum tile size along the
1582
+ * last grid dimension.
1583
+ *
1584
+ * The function implements a parallel version of the following snippet:
1585
+ *
1586
+ * for (size_t i = 0; i < range_i; i++)
1587
+ * for (size_t j = 0; j < range_j; j += tile_j)
1588
+ * functor(i, j, min(range_j - j, tile_j));
1589
+ *
1590
+ * When the function returns, all items have been processed and the thread pool
1591
+ * is ready for a new task.
1592
+ *
1593
+ * @note If multiple threads call this function with the same thread pool, the
1594
+ * calls are serialized.
1595
+ *
1596
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1597
+ * is NULL, all items are processed serially on the calling thread.
1598
+ * @param functor the functor to call for each tile.
1599
+ * @param range_i the number of items to process along the first dimension
1600
+ * of the 2D grid.
1601
+ * @param range_j the number of items to process along the second dimension
1602
+ * of the 2D grid.
1603
+ * @param tile_j the maximum number of items along the second dimension of
1604
+ * the 2D grid to process in one functor call.
1605
+ * @param flags a bitwise combination of zero or more optional flags
1606
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1607
+ */
1608
+ template <class T>
1609
+ inline void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
1610
+ const T &functor, size_t range_i,
1611
+ size_t range_j, size_t tile_j,
1612
+ uint32_t flags = 0) {
1613
+ pthreadpool_parallelize_2d_tile_1d(
1614
+ threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_1d<const T>,
1615
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1616
+ tile_j, flags);
1617
+ }
1618
+
1619
+ /**
1620
+ * Process items on a 2D grid with the specified maximum tile size along each
1621
+ * grid dimension.
1622
+ *
1623
+ * The function implements a parallel version of the following snippet:
1624
+ *
1625
+ * for (size_t i = 0; i < range_i; i += tile_i)
1626
+ * for (size_t j = 0; j < range_j; j += tile_j)
1627
+ * functor(i, j,
1628
+ * min(range_i - i, tile_i), min(range_j - j, tile_j));
1629
+ *
1630
+ * When the function returns, all items have been processed and the thread pool
1631
+ * is ready for a new task.
1632
+ *
1633
+ * @note If multiple threads call this function with the same thread pool, the
1634
+ * calls are serialized.
1635
+ *
1636
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1637
+ * is NULL, all items are processed serially on the calling thread.
1638
+ * @param functor the functor to call for each tile.
1639
+ * @param range_i the number of items to process along the first dimension
1640
+ * of the 2D grid.
1641
+ * @param range_j the number of items to process along the second dimension
1642
+ * of the 2D grid.
1643
+ * @param tile_j the maximum number of items along the first dimension of
1644
+ * the 2D grid to process in one functor call.
1645
+ * @param tile_j the maximum number of items along the second dimension of
1646
+ * the 2D grid to process in one functor call.
1647
+ * @param flags a bitwise combination of zero or more optional flags
1648
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1649
+ */
1650
+ template <class T>
1651
+ inline void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
1652
+ const T &functor, size_t range_i,
1653
+ size_t range_j, size_t tile_i,
1654
+ size_t tile_j,
1655
+ uint32_t flags = 0) {
1656
+ pthreadpool_parallelize_2d_tile_2d(
1657
+ threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_2d<const T>,
1658
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1659
+ tile_i, tile_j, flags);
1660
+ }
1661
+
1662
+ /**
1663
+ * Process items on a 3D grid.
1664
+ *
1665
+ * The function implements a parallel version of the following snippet:
1666
+ *
1667
+ * for (size_t i = 0; i < range_i; i++)
1668
+ * for (size_t j = 0; j < range_j; j++)
1669
+ * for (size_t k = 0; k < range_k; k++)
1670
+ * functor(i, j, k);
1671
+ *
1672
+ * When the function returns, all items have been processed and the thread pool
1673
+ * is ready for a new task.
1674
+ *
1675
+ * @note If multiple threads call this function with the same thread pool, the
1676
+ * calls are serialized.
1677
+ *
1678
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1679
+ * is NULL, all items are processed serially on the calling thread.
1680
+ * @param functor the functor to call for each tile.
1681
+ * @param range_i the number of items to process along the first dimension
1682
+ * of the 3D grid.
1683
+ * @param range_j the number of items to process along the second dimension
1684
+ * of the 3D grid.
1685
+ * @param range_k the number of items to process along the third dimension
1686
+ * of the 3D grid.
1687
+ * @param flags a bitwise combination of zero or more optional flags
1688
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1689
+ */
1690
+ template <class T>
1691
+ inline void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
1692
+ const T &functor, size_t range_i,
1693
+ size_t range_j, size_t range_k,
1694
+ uint32_t flags = 0) {
1695
+ pthreadpool_parallelize_3d(
1696
+ threadpool, &libpthreadpool::detail::call_wrapper_3d<const T>,
1697
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1698
+ range_k, flags);
1699
+ }
1700
+
1701
+ /**
1702
+ * Process items on a 3D grid with the specified maximum tile size along the
1703
+ * last grid dimension.
1704
+ *
1705
+ * The function implements a parallel version of the following snippet:
1706
+ *
1707
+ * for (size_t i = 0; i < range_i; i++)
1708
+ * for (size_t j = 0; j < range_j; j++)
1709
+ * for (size_t k = 0; k < range_k; k += tile_k)
1710
+ * functor(i, j, k, min(range_k - k, tile_k));
1711
+ *
1712
+ * When the function returns, all items have been processed and the thread pool
1713
+ * is ready for a new task.
1714
+ *
1715
+ * @note If multiple threads call this function with the same thread pool, the
1716
+ * calls are serialized.
1717
+ *
1718
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1719
+ * is NULL, all items are processed serially on the calling thread.
1720
+ * @param functor the functor to call for each tile.
1721
+ * @param range_i the number of items to process along the first dimension
1722
+ * of the 3D grid.
1723
+ * @param range_j the number of items to process along the second dimension
1724
+ * of the 3D grid.
1725
+ * @param range_k the number of items to process along the third dimension
1726
+ * of the 3D grid.
1727
+ * @param tile_k the maximum number of items along the third dimension of
1728
+ * the 3D grid to process in one functor call.
1729
+ * @param flags a bitwise combination of zero or more optional flags
1730
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1731
+ */
1732
+ template <class T>
1733
+ inline void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
1734
+ const T &functor, size_t range_i,
1735
+ size_t range_j, size_t range_k,
1736
+ size_t tile_k,
1737
+ uint32_t flags = 0) {
1738
+ pthreadpool_parallelize_3d_tile_1d(
1739
+ threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_1d<const T>,
1740
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1741
+ range_k, tile_k, flags);
1742
+ }
1743
+
1744
+ /**
1745
+ * Process items on a 3D grid with the specified maximum tile size along the
1746
+ * last two grid dimensions.
1747
+ *
1748
+ * The function implements a parallel version of the following snippet:
1749
+ *
1750
+ * for (size_t i = 0; i < range_i; i++)
1751
+ * for (size_t j = 0; j < range_j; j += tile_j)
1752
+ * for (size_t k = 0; k < range_k; k += tile_k)
1753
+ * functor(i, j, k,
1754
+ * min(range_j - j, tile_j), min(range_k - k, tile_k));
1755
+ *
1756
+ * When the function returns, all items have been processed and the thread pool
1757
+ * is ready for a new task.
1758
+ *
1759
+ * @note If multiple threads call this function with the same thread pool, the
1760
+ * calls are serialized.
1761
+ *
1762
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1763
+ * is NULL, all items are processed serially on the calling thread.
1764
+ * @param functor the functor to call for each tile.
1765
+ * @param range_i the number of items to process along the first dimension
1766
+ * of the 3D grid.
1767
+ * @param range_j the number of items to process along the second dimension
1768
+ * of the 3D grid.
1769
+ * @param range_k the number of items to process along the third dimension
1770
+ * of the 3D grid.
1771
+ * @param tile_j the maximum number of items along the second dimension of
1772
+ * the 3D grid to process in one functor call.
1773
+ * @param tile_k the maximum number of items along the third dimension of
1774
+ * the 3D grid to process in one functor call.
1775
+ * @param flags a bitwise combination of zero or more optional flags
1776
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1777
+ */
1778
+ template <class T>
1779
+ inline void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
1780
+ const T &functor, size_t range_i,
1781
+ size_t range_j, size_t range_k,
1782
+ size_t tile_j, size_t tile_k,
1783
+ uint32_t flags = 0) {
1784
+ pthreadpool_parallelize_3d_tile_2d(
1785
+ threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_2d<const T>,
1786
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1787
+ range_k, tile_j, tile_k, flags);
1788
+ }
1789
+
1790
+ /**
1791
+ * Process items on a 4D grid.
1792
+ *
1793
+ * The function implements a parallel version of the following snippet:
1794
+ *
1795
+ * for (size_t i = 0; i < range_i; i++)
1796
+ * for (size_t j = 0; j < range_j; j++)
1797
+ * for (size_t k = 0; k < range_k; k++)
1798
+ * for (size_t l = 0; l < range_l; l++)
1799
+ * functor(i, j, k, l);
1800
+ *
1801
+ * When the function returns, all items have been processed and the thread pool
1802
+ * is ready for a new task.
1803
+ *
1804
+ * @note If multiple threads call this function with the same thread pool, the
1805
+ * calls are serialized.
1806
+ *
1807
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1808
+ * is NULL, all items are processed serially on the calling thread.
1809
+ * @param functor the functor to call for each tile.
1810
+ * @param range_i the number of items to process along the first dimension
1811
+ * of the 4D grid.
1812
+ * @param range_j the number of items to process along the second dimension
1813
+ * of the 4D grid.
1814
+ * @param range_k the number of items to process along the third dimension
1815
+ * of the 4D grid.
1816
+ * @param range_l the number of items to process along the fourth dimension
1817
+ * of the 4D grid.
1818
+ * @param flags a bitwise combination of zero or more optional flags
1819
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1820
+ */
1821
+ template <class T>
1822
+ inline void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
1823
+ const T &functor, size_t range_i,
1824
+ size_t range_j, size_t range_k,
1825
+ size_t range_l, uint32_t flags = 0) {
1826
+ pthreadpool_parallelize_4d(
1827
+ threadpool, &libpthreadpool::detail::call_wrapper_4d<const T>,
1828
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1829
+ range_k, range_l, flags);
1830
+ }
1831
+
1832
+ /**
1833
+ * Process items on a 4D grid with the specified maximum tile size along the
1834
+ * last grid dimension.
1835
+ *
1836
+ * The function implements a parallel version of the following snippet:
1837
+ *
1838
+ * for (size_t i = 0; i < range_i; i++)
1839
+ * for (size_t j = 0; j < range_j; j++)
1840
+ * for (size_t k = 0; k < range_k; k++)
1841
+ * for (size_t l = 0; l < range_l; l += tile_l)
1842
+ * functor(i, j, k, l, min(range_l - l, tile_l));
1843
+ *
1844
+ * When the function returns, all items have been processed and the thread pool
1845
+ * is ready for a new task.
1846
+ *
1847
+ * @note If multiple threads call this function with the same thread pool, the
1848
+ * calls are serialized.
1849
+ *
1850
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1851
+ * is NULL, all items are processed serially on the calling thread.
1852
+ * @param functor the functor to call for each tile.
1853
+ * @param range_i the number of items to process along the first dimension
1854
+ * of the 4D grid.
1855
+ * @param range_j the number of items to process along the second dimension
1856
+ * of the 4D grid.
1857
+ * @param range_k the number of items to process along the third dimension
1858
+ * of the 4D grid.
1859
+ * @param range_l the number of items to process along the fourth dimension
1860
+ * of the 4D grid.
1861
+ * @param tile_l the maximum number of items along the fourth dimension of
1862
+ * the 4D grid to process in one functor call.
1863
+ * @param flags a bitwise combination of zero or more optional flags
1864
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1865
+ */
1866
+ template <class T>
1867
+ inline void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
1868
+ const T &functor, size_t range_i,
1869
+ size_t range_j, size_t range_k,
1870
+ size_t range_l, size_t tile_l,
1871
+ uint32_t flags = 0) {
1872
+ pthreadpool_parallelize_4d_tile_1d(
1873
+ threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_1d<const T>,
1874
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1875
+ range_k, range_l, tile_l, flags);
1876
+ }
1877
+
1878
+ /**
1879
+ * Process items on a 4D grid with the specified maximum tile size along the
1880
+ * last two grid dimensions.
1881
+ *
1882
+ * The function implements a parallel version of the following snippet:
1883
+ *
1884
+ * for (size_t i = 0; i < range_i; i++)
1885
+ * for (size_t j = 0; j < range_j; j++)
1886
+ * for (size_t k = 0; k < range_k; k += tile_k)
1887
+ * for (size_t l = 0; l < range_l; l += tile_l)
1888
+ * functor(i, j, k, l,
1889
+ * min(range_k - k, tile_k), min(range_l - l, tile_l));
1890
+ *
1891
+ * When the function returns, all items have been processed and the thread pool
1892
+ * is ready for a new task.
1893
+ *
1894
+ * @note If multiple threads call this function with the same thread pool, the
1895
+ * calls are serialized.
1896
+ *
1897
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1898
+ * is NULL, all items are processed serially on the calling thread.
1899
+ * @param functor the functor to call for each tile.
1900
+ * @param range_i the number of items to process along the first dimension
1901
+ * of the 4D grid.
1902
+ * @param range_j the number of items to process along the second dimension
1903
+ * of the 4D grid.
1904
+ * @param range_k the number of items to process along the third dimension
1905
+ * of the 4D grid.
1906
+ * @param range_l the number of items to process along the fourth dimension
1907
+ * of the 4D grid.
1908
+ * @param tile_k the maximum number of items along the third dimension of
1909
+ * the 4D grid to process in one functor call.
1910
+ * @param tile_l the maximum number of items along the fourth dimension of
1911
+ * the 4D grid to process in one functor call.
1912
+ * @param flags a bitwise combination of zero or more optional flags
1913
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1914
+ */
1915
+ template <class T>
1916
+ inline void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
1917
+ const T &functor, size_t range_i,
1918
+ size_t range_j, size_t range_k,
1919
+ size_t range_l, size_t tile_k,
1920
+ size_t tile_l,
1921
+ uint32_t flags = 0) {
1922
+ pthreadpool_parallelize_4d_tile_2d(
1923
+ threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_2d<const T>,
1924
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1925
+ range_k, range_l, tile_k, tile_l, flags);
1926
+ }
1927
+
1928
+ /**
1929
+ * Process items on a 5D grid.
1930
+ *
1931
+ * The function implements a parallel version of the following snippet:
1932
+ *
1933
+ * for (size_t i = 0; i < range_i; i++)
1934
+ * for (size_t j = 0; j < range_j; j++)
1935
+ * for (size_t k = 0; k < range_k; k++)
1936
+ * for (size_t l = 0; l < range_l; l++)
1937
+ * for (size_t m = 0; m < range_m; m++)
1938
+ * functor(i, j, k, l, m);
1939
+ *
1940
+ * When the function returns, all items have been processed and the thread pool
1941
+ * is ready for a new task.
1942
+ *
1943
+ * @note If multiple threads call this function with the same thread pool, the
1944
+ * calls are serialized.
1945
+ *
1946
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1947
+ * is NULL, all items are processed serially on the calling thread.
1948
+ * @param functor the functor to call for each tile.
1949
+ * @param range_i the number of items to process along the first dimension
1950
+ * of the 5D grid.
1951
+ * @param range_j the number of items to process along the second dimension
1952
+ * of the 5D grid.
1953
+ * @param range_k the number of items to process along the third dimension
1954
+ * of the 5D grid.
1955
+ * @param range_l the number of items to process along the fourth dimension
1956
+ * of the 5D grid.
1957
+ * @param range_m the number of items to process along the fifth dimension
1958
+ * of the 5D grid.
1959
+ * @param flags a bitwise combination of zero or more optional flags
1960
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
1961
+ */
1962
+ template <class T>
1963
+ inline void
1964
+ pthreadpool_parallelize_5d(pthreadpool_t threadpool, const T &functor,
1965
+ size_t range_i, size_t range_j, size_t range_k,
1966
+ size_t range_l, size_t range_m, uint32_t flags = 0) {
1967
+ pthreadpool_parallelize_5d(
1968
+ threadpool, &libpthreadpool::detail::call_wrapper_5d<const T>,
1969
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
1970
+ range_k, range_l, range_m, flags);
1971
+ }
1972
+
1973
+ /**
1974
+ * Process items on a 5D grid with the specified maximum tile size along the
1975
+ * last grid dimension.
1976
+ *
1977
+ * The function implements a parallel version of the following snippet:
1978
+ *
1979
+ * for (size_t i = 0; i < range_i; i++)
1980
+ * for (size_t j = 0; j < range_j; j++)
1981
+ * for (size_t k = 0; k < range_k; k++)
1982
+ * for (size_t l = 0; l < range_l; l++)
1983
+ * for (size_t m = 0; m < range_m; m += tile_m)
1984
+ * functor(i, j, k, l, m, min(range_m - m, tile_m));
1985
+ *
1986
+ * When the function returns, all items have been processed and the thread pool
1987
+ * is ready for a new task.
1988
+ *
1989
+ * @note If multiple threads call this function with the same thread pool, the
1990
+ * calls are serialized.
1991
+ *
1992
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
1993
+ * is NULL, all items are processed serially on the calling thread.
1994
+ * @param functor the functor to call for each tile.
1995
+ * @param range_i the number of items to process along the first dimension
1996
+ * of the 5D grid.
1997
+ * @param range_j the number of items to process along the second dimension
1998
+ * of the 5D grid.
1999
+ * @param range_k the number of items to process along the third dimension
2000
+ * of the 5D grid.
2001
+ * @param range_l the number of items to process along the fourth dimension
2002
+ * of the 5D grid.
2003
+ * @param range_m the number of items to process along the fifth dimension
2004
+ * of the 5D grid.
2005
+ * @param tile_m the maximum number of items along the fifth dimension of
2006
+ * the 5D grid to process in one functor call.
2007
+ * @param flags a bitwise combination of zero or more optional flags
2008
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2009
+ */
2010
+ template <class T>
2011
+ inline void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
2012
+ const T &functor, size_t range_i,
2013
+ size_t range_j, size_t range_k,
2014
+ size_t range_l, size_t range_m,
2015
+ size_t tile_m,
2016
+ uint32_t flags = 0) {
2017
+ pthreadpool_parallelize_5d_tile_1d(
2018
+ threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_1d<const T>,
2019
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2020
+ range_k, range_l, range_m, tile_m, flags);
2021
+ }
2022
+
2023
+ /**
2024
+ * Process items on a 5D grid with the specified maximum tile size along the
2025
+ * last two grid dimensions.
2026
+ *
2027
+ * The function implements a parallel version of the following snippet:
2028
+ *
2029
+ * for (size_t i = 0; i < range_i; i++)
2030
+ * for (size_t j = 0; j < range_j; j++)
2031
+ * for (size_t k = 0; k < range_k; k++)
2032
+ * for (size_t l = 0; l < range_l; l += tile_l)
2033
+ * for (size_t m = 0; m < range_m; m += tile_m)
2034
+ * functor(i, j, k, l, m,
2035
+ * min(range_l - l, tile_l), min(range_m - m, tile_m));
2036
+ *
2037
+ * When the function returns, all items have been processed and the thread pool
2038
+ * is ready for a new task.
2039
+ *
2040
+ * @note If multiple threads call this function with the same thread pool, the
2041
+ * calls are serialized.
2042
+ *
2043
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2044
+ * is NULL, all items are processed serially on the calling thread.
2045
+ * @param functor the functor to call for each tile.
2046
+ * @param range_i the number of items to process along the first dimension
2047
+ * of the 5D grid.
2048
+ * @param range_j the number of items to process along the second dimension
2049
+ * of the 5D grid.
2050
+ * @param range_k the number of items to process along the third dimension
2051
+ * of the 5D grid.
2052
+ * @param range_l the number of items to process along the fourth dimension
2053
+ * of the 5D grid.
2054
+ * @param range_m the number of items to process along the fifth dimension
2055
+ * of the 5D grid.
2056
+ * @param tile_l the maximum number of items along the fourth dimension of
2057
+ * the 5D grid to process in one functor call.
2058
+ * @param tile_m the maximum number of items along the fifth dimension of
2059
+ * the 5D grid to process in one functor call.
2060
+ * @param flags a bitwise combination of zero or more optional flags
2061
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2062
+ */
2063
+ template <class T>
2064
+ inline void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
2065
+ const T &functor, size_t range_i,
2066
+ size_t range_j, size_t range_k,
2067
+ size_t range_l, size_t range_m,
2068
+ size_t tile_l, size_t tile_m,
2069
+ uint32_t flags = 0) {
2070
+ pthreadpool_parallelize_5d_tile_2d(
2071
+ threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_2d<const T>,
2072
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2073
+ range_k, range_l, range_m, tile_l, tile_m, flags);
2074
+ }
2075
+
2076
+ /**
2077
+ * Process items on a 6D grid.
2078
+ *
2079
+ * The function implements a parallel version of the following snippet:
2080
+ *
2081
+ * for (size_t i = 0; i < range_i; i++)
2082
+ * for (size_t j = 0; j < range_j; j++)
2083
+ * for (size_t k = 0; k < range_k; k++)
2084
+ * for (size_t l = 0; l < range_l; l++)
2085
+ * for (size_t m = 0; m < range_m; m++)
2086
+ * for (size_t n = 0; n < range_n; n++)
2087
+ * functor(i, j, k, l, m, n);
2088
+ *
2089
+ * When the function returns, all items have been processed and the thread pool
2090
+ * is ready for a new task.
2091
+ *
2092
+ * @note If multiple threads call this function with the same thread pool, the
2093
+ * calls are serialized.
2094
+ *
2095
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2096
+ * is NULL, all items are processed serially on the calling thread.
2097
+ * @param functor the functor to call for each tile.
2098
+ * @param range_i the number of items to process along the first dimension
2099
+ * of the 6D grid.
2100
+ * @param range_j the number of items to process along the second dimension
2101
+ * of the 6D grid.
2102
+ * @param range_k the number of items to process along the third dimension
2103
+ * of the 6D grid.
2104
+ * @param range_l the number of items to process along the fourth dimension
2105
+ * of the 6D grid.
2106
+ * @param range_m the number of items to process along the fifth dimension
2107
+ * of the 6D grid.
2108
+ * @param range_n the number of items to process along the sixth dimension
2109
+ * of the 6D grid.
2110
+ * @param tile_n the maximum number of items along the sixth dimension of
2111
+ * the 6D grid to process in one functor call.
2112
+ * @param flags a bitwise combination of zero or more optional flags
2113
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2114
+ */
2115
+ template <class T>
2116
+ inline void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
2117
+ const T &functor, size_t range_i,
2118
+ size_t range_j, size_t range_k,
2119
+ size_t range_l, size_t range_m,
2120
+ size_t range_n, uint32_t flags = 0) {
2121
+ pthreadpool_parallelize_6d(
2122
+ threadpool, &libpthreadpool::detail::call_wrapper_6d<const T>,
2123
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2124
+ range_k, range_l, range_m, range_n, flags);
2125
+ }
2126
+
2127
+ /**
2128
+ * Process items on a 6D grid with the specified maximum tile size along the
2129
+ * last grid dimension.
2130
+ *
2131
+ * The function implements a parallel version of the following snippet:
2132
+ *
2133
+ * for (size_t i = 0; i < range_i; i++)
2134
+ * for (size_t j = 0; j < range_j; j++)
2135
+ * for (size_t k = 0; k < range_k; k++)
2136
+ * for (size_t l = 0; l < range_l; l++)
2137
+ * for (size_t m = 0; m < range_m; m++)
2138
+ * for (size_t n = 0; n < range_n; n += tile_n)
2139
+ * functor(i, j, k, l, m, n, min(range_n - n, tile_n));
2140
+ *
2141
+ * When the function returns, all items have been processed and the thread pool
2142
+ * is ready for a new task.
2143
+ *
2144
+ * @note If multiple threads call this function with the same thread pool, the
2145
+ * calls are serialized.
2146
+ *
2147
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2148
+ * is NULL, all items are processed serially on the calling thread.
2149
+ * @param functor the functor to call for each tile.
2150
+ * @param range_i the number of items to process along the first dimension
2151
+ * of the 6D grid.
2152
+ * @param range_j the number of items to process along the second dimension
2153
+ * of the 6D grid.
2154
+ * @param range_k the number of items to process along the third dimension
2155
+ * of the 6D grid.
2156
+ * @param range_l the number of items to process along the fourth dimension
2157
+ * of the 6D grid.
2158
+ * @param range_m the number of items to process along the fifth dimension
2159
+ * of the 6D grid.
2160
+ * @param range_n the number of items to process along the sixth dimension
2161
+ * of the 6D grid.
2162
+ * @param tile_n the maximum number of items along the sixth dimension of
2163
+ * the 6D grid to process in one functor call.
2164
+ * @param flags a bitwise combination of zero or more optional flags
2165
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2166
+ */
2167
+ template <class T>
2168
+ inline void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
2169
+ const T &functor, size_t range_i,
2170
+ size_t range_j, size_t range_k,
2171
+ size_t range_l, size_t range_m,
2172
+ size_t range_n, size_t tile_n,
2173
+ uint32_t flags = 0) {
2174
+ pthreadpool_parallelize_6d_tile_1d(
2175
+ threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_1d<const T>,
2176
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2177
+ range_k, range_l, range_m, range_n, tile_n, flags);
2178
+ }
2179
+
2180
+ /**
2181
+ * Process items on a 6D grid with the specified maximum tile size along the
2182
+ * last two grid dimensions.
2183
+ *
2184
+ * The function implements a parallel version of the following snippet:
2185
+ *
2186
+ * for (size_t i = 0; i < range_i; i++)
2187
+ * for (size_t j = 0; j < range_j; j++)
2188
+ * for (size_t k = 0; k < range_k; k++)
2189
+ * for (size_t l = 0; l < range_l; l++)
2190
+ * for (size_t m = 0; m < range_m; m += tile_m)
2191
+ * for (size_t n = 0; n < range_n; n += tile_n)
2192
+ * functor(i, j, k, l, m, n,
2193
+ * min(range_m - m, tile_m), min(range_n - n, tile_n));
2194
+ *
2195
+ * When the function returns, all items have been processed and the thread pool
2196
+ * is ready for a new task.
2197
+ *
2198
+ * @note If multiple threads call this function with the same thread pool, the
2199
+ * calls are serialized.
2200
+ *
2201
+ * @param threadpool the thread pool to use for parallelisation. If threadpool
2202
+ * is NULL, all items are processed serially on the calling thread.
2203
+ * @param functor the functor to call for each tile.
2204
+ * @param range_i the number of items to process along the first dimension
2205
+ * of the 6D grid.
2206
+ * @param range_j the number of items to process along the second dimension
2207
+ * of the 6D grid.
2208
+ * @param range_k the number of items to process along the third dimension
2209
+ * of the 6D grid.
2210
+ * @param range_l the number of items to process along the fourth dimension
2211
+ * of the 6D grid.
2212
+ * @param range_m the number of items to process along the fifth dimension
2213
+ * of the 6D grid.
2214
+ * @param range_n the number of items to process along the sixth dimension
2215
+ * of the 6D grid.
2216
+ * @param tile_m the maximum number of items along the fifth dimension of
2217
+ * the 6D grid to process in one functor call.
2218
+ * @param tile_n the maximum number of items along the sixth dimension of
2219
+ * the 6D grid to process in one functor call.
2220
+ * @param flags a bitwise combination of zero or more optional flags
2221
+ * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
2222
+ */
2223
+ template <class T>
2224
+ inline void pthreadpool_parallelize_6d_tile_2d(
2225
+ pthreadpool_t threadpool, const T &functor, size_t range_i, size_t range_j,
2226
+ size_t range_k, size_t range_l, size_t range_m, size_t range_n,
2227
+ size_t tile_m, size_t tile_n, uint32_t flags = 0) {
2228
+ pthreadpool_parallelize_6d_tile_2d(
2229
+ threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_2d<const T>,
2230
+ const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
2231
+ range_k, range_l, range_m, range_n, tile_m, tile_n, flags);
2232
+ }
2233
+
2234
+ #endif /* __cplusplus */
2235
+
2236
+ #endif /* PTHREADPOOL_H_ */