react-native-executorch 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/CMakeLists.txt +24 -0
- package/android/build.gradle +1 -0
- package/android/src/main/cpp/CMakeLists.txt +25 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +1 -13
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +52 -18
- package/common/rnexecutorch/RnExecutorchInstaller.h +0 -25
- package/common/rnexecutorch/TokenizerModule.cpp +1 -1
- package/common/rnexecutorch/TokenizerModule.h +4 -1
- package/common/rnexecutorch/data_processing/FileUtils.h +2 -2
- package/common/rnexecutorch/data_processing/ImageProcessing.cpp +5 -5
- package/common/rnexecutorch/data_processing/ImageProcessing.h +2 -2
- package/common/rnexecutorch/data_processing/Numerical.cpp +13 -0
- package/common/rnexecutorch/host_objects/JsiConversions.h +43 -62
- package/common/rnexecutorch/host_objects/ModelHostObject.h +43 -24
- package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +8 -6
- package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +1 -1
- package/common/rnexecutorch/models/BaseModel.cpp +2 -2
- package/common/rnexecutorch/models/BaseModel.h +5 -0
- package/common/rnexecutorch/models/EncoderDecoderBase.cpp +2 -2
- package/common/rnexecutorch/models/EncoderDecoderBase.h +2 -2
- package/common/rnexecutorch/models/classification/Classification.cpp +6 -6
- package/common/rnexecutorch/models/classification/Classification.h +5 -0
- package/common/rnexecutorch/models/classification/Constants.h +3 -3
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +2 -2
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +2 -2
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +3 -3
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +5 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +2 -2
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +6 -1
- package/common/rnexecutorch/models/image_segmentation/Constants.h +3 -3
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +6 -5
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +8 -1
- package/common/rnexecutorch/models/llm/LLM.cpp +58 -0
- package/common/rnexecutorch/models/llm/LLM.h +35 -0
- package/common/rnexecutorch/models/object_detection/Constants.h +3 -3
- package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +8 -8
- package/common/rnexecutorch/models/object_detection/ObjectDetection.h +11 -5
- package/common/rnexecutorch/models/object_detection/Types.h +13 -0
- package/common/rnexecutorch/models/object_detection/Utils.cpp +13 -11
- package/common/rnexecutorch/models/object_detection/Utils.h +7 -13
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +2 -2
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +2 -2
- package/common/rnexecutorch/models/ocr/Constants.h +33 -26
- package/common/rnexecutorch/models/ocr/Detector.cpp +20 -22
- package/common/rnexecutorch/models/ocr/Detector.h +4 -4
- package/common/rnexecutorch/models/ocr/OCR.cpp +9 -8
- package/common/rnexecutorch/models/ocr/OCR.h +11 -3
- package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +20 -19
- package/common/rnexecutorch/models/ocr/RecognitionHandler.h +9 -7
- package/common/rnexecutorch/models/ocr/Recognizer.cpp +7 -7
- package/common/rnexecutorch/models/ocr/Recognizer.h +2 -2
- package/common/rnexecutorch/models/ocr/Types.h +4 -6
- package/common/rnexecutorch/models/ocr/{DetectorUtils.cpp → utils/DetectorUtils.cpp} +70 -63
- package/common/rnexecutorch/models/ocr/{DetectorUtils.h → utils/DetectorUtils.h} +12 -11
- package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.cpp → utils/RecognitionHandlerUtils.cpp} +14 -11
- package/common/rnexecutorch/models/ocr/{RecognitionHandlerUtils.h → utils/RecognitionHandlerUtils.h} +5 -5
- package/common/rnexecutorch/models/ocr/{RecognizerUtils.cpp → utils/RecognizerUtils.cpp} +28 -26
- package/common/rnexecutorch/models/ocr/{RecognizerUtils.h → utils/RecognizerUtils.h} +15 -14
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +2 -2
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +9 -2
- package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +2 -2
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +2 -2
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +2 -2
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +5 -5
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +6 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +23 -22
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +4 -4
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +34 -34
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +27 -20
- package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.cpp +3 -2
- package/{third-party/ios/ExecutorchLib/ExecutorchLib/sampler → common/runner}/sampler.h +3 -2
- package/ios/libs/executorch/libbackend_coreml_ios.a +0 -0
- package/ios/libs/executorch/libbackend_coreml_simulator.a +0 -0
- package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64 → ios/libs/executorch}/libbackend_mps_ios.a +0 -0
- package/{third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator → ios/libs/executorch}/libbackend_mps_simulator.a +0 -0
- package/ios/libs/executorch/libbackend_xnnpack_ios.a +0 -0
- package/ios/libs/executorch/libbackend_xnnpack_simulator.a +0 -0
- package/ios/libs/executorch/libexecutorch_ios.a +0 -0
- package/ios/libs/executorch/libexecutorch_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_custom_ios.a +0 -0
- package/ios/libs/executorch/libkernels_custom_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_optimized_ios.a +0 -0
- package/ios/libs/executorch/libkernels_optimized_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_portable_ios.a +0 -0
- package/ios/libs/executorch/libkernels_portable_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_quantized_ios.a +0 -0
- package/ios/libs/executorch/libkernels_quantized_simulator.a +0 -0
- package/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
- package/ios/{ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a} +0 -0
- package/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
- package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
- package/ios/{ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib → libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a} +0 -0
- package/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
- package/lib/Error.js +9 -6
- package/lib/ThreadPool.d.ts +10 -0
- package/lib/ThreadPool.js +28 -0
- package/lib/constants/modelUrls.js +1 -1
- package/lib/controllers/OCRController.js +9 -14
- package/lib/controllers/VerticalOCRController.js +9 -14
- package/lib/hooks/computer_vision/useOCR.js +7 -8
- package/lib/hooks/computer_vision/useVerticalOCR.js +3 -5
- package/lib/index.d.ts +0 -2
- package/lib/index.js +1 -3
- package/lib/module/controllers/LLMController.js +6 -10
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/hooks/computer_vision/useClassification.js +2 -2
- package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js +2 -2
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -1
- package/lib/module/hooks/computer_vision/useImageSegmentation.js +2 -2
- package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
- package/lib/module/hooks/computer_vision/useObjectDetection.js +2 -2
- package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
- package/lib/module/hooks/computer_vision/useStyleTransfer.js +2 -2
- package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
- package/lib/module/hooks/general/useExecutorchModule.js +2 -2
- package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +2 -2
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
- package/lib/module/hooks/useModule.js +13 -9
- package/lib/module/hooks/useModule.js.map +1 -1
- package/lib/module/index.js +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/modules/BaseModule.js +9 -17
- package/lib/module/modules/BaseModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ClassificationModule.js +2 -2
- package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +2 -2
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js +2 -2
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js +2 -2
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
- package/lib/module/modules/computer_vision/StyleTransferModule.js +2 -2
- package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
- package/lib/module/modules/general/ExecutorchModule.js +2 -2
- package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +2 -2
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
- package/lib/module/native/RnExecutorchModules.js +1 -2
- package/lib/module/native/RnExecutorchModules.js.map +1 -1
- package/lib/module/utils/SpeechToTextModule/ASR.js +3 -3
- package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -1
- package/lib/modules/computer_vision/OCRModule.d.ts +4 -5
- package/lib/modules/computer_vision/OCRModule.js +9 -12
- package/lib/modules/computer_vision/VerticalOCRModule.d.ts +4 -5
- package/lib/modules/computer_vision/VerticalOCRModule.js +9 -12
- package/lib/native/RnExecutorchModules.d.ts +5 -1
- package/lib/native/RnExecutorchModules.js +3 -1
- package/lib/tsconfig.tsbuildinfo +1 -0
- package/lib/types/common.d.ts +1 -0
- package/lib/typescript/controllers/LLMController.d.ts +1 -1
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/hooks/useModule.d.ts +8 -5
- package/lib/typescript/hooks/useModule.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +1 -0
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/modules/BaseModule.d.ts +7 -6
- package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +2 -2
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +2 -2
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +2 -2
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +2 -2
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +2 -2
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
- package/lib/typescript/modules/general/ExecutorchModule.d.ts +2 -2
- package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +2 -2
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
- package/lib/typescript/native/RnExecutorchModules.d.ts +1 -3
- package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
- package/lib/utils/ResourceFetcherUtils.js +0 -1
- package/lib/utils/llm.js +0 -1
- package/package.json +1 -2
- package/react-native-executorch.podspec +47 -44
- package/src/controllers/LLMController.ts +8 -13
- package/src/hooks/computer_vision/useClassification.ts +2 -2
- package/src/hooks/computer_vision/useImageEmbeddings.ts +2 -2
- package/src/hooks/computer_vision/useImageSegmentation.ts +2 -2
- package/src/hooks/computer_vision/useObjectDetection.ts +2 -2
- package/src/hooks/computer_vision/useStyleTransfer.ts +2 -2
- package/src/hooks/general/useExecutorchModule.ts +2 -2
- package/src/hooks/natural_language_processing/useTextEmbeddings.ts +2 -2
- package/src/hooks/useModule.ts +23 -13
- package/src/index.ts +3 -2
- package/src/modules/BaseModule.ts +17 -28
- package/src/modules/computer_vision/ClassificationModule.ts +2 -2
- package/src/modules/computer_vision/ImageEmbeddingsModule.ts +2 -2
- package/src/modules/computer_vision/ImageSegmentationModule.ts +2 -2
- package/src/modules/computer_vision/ObjectDetectionModule.ts +2 -2
- package/src/modules/computer_vision/StyleTransferModule.ts +2 -2
- package/src/modules/general/ExecutorchModule.ts +2 -2
- package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +2 -2
- package/src/native/RnExecutorchModules.ts +1 -5
- package/src/utils/SpeechToTextModule/ASR.ts +4 -4
- package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
- package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
- package/third-party/include/cpuinfo/cpuinfo.h +2305 -0
- package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +26 -0
- package/third-party/include/executorch/extension/threadpool/threadpool.h +94 -0
- package/third-party/include/pthreadpool/pthreadpool.h +2236 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +0 -63
- package/ios/ExecutorchLib.xcframework/Info.plist +0 -43
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/ETModel.h +0 -27
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/ETModel.h +0 -27
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +0 -14
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h +0 -32
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/ios/RnExecutorch/LLM.h +0 -5
- package/ios/RnExecutorch/LLM.mm +0 -78
- package/lib/Error.d.ts +0 -30
- package/lib/constants/directories.d.ts +0 -1
- package/lib/constants/ocr/symbols.d.ts +0 -75
- package/lib/controllers/OCRController.d.ts +0 -23
- package/lib/controllers/VerticalOCRController.d.ts +0 -25
- package/lib/hooks/useModule.d.ts +0 -17
- package/lib/module/modules/BaseNonStaticModule.js +0 -17
- package/lib/module/modules/BaseNonStaticModule.js.map +0 -1
- package/lib/module/native/NativeLLM.js +0 -5
- package/lib/module/native/NativeLLM.js.map +0 -1
- package/lib/modules/BaseModule.d.ts +0 -8
- package/lib/modules/BaseNonStaticModule.d.ts +0 -9
- package/lib/native/NativeETInstaller.d.ts +0 -6
- package/lib/native/NativeOCR.d.ts +0 -8
- package/lib/native/NativeVerticalOCR.d.ts +0 -8
- package/lib/types/imageSegmentation.d.ts +0 -24
- package/lib/types/objectDetection.d.ts +0 -104
- package/lib/types/ocr.d.ts +0 -11
- package/lib/typescript/modules/BaseNonStaticModule.d.ts +0 -10
- package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +0 -1
- package/lib/typescript/native/NativeLLM.d.ts +0 -12
- package/lib/typescript/native/NativeLLM.d.ts.map +0 -1
- package/lib/utils/stt.d.ts +0 -1
- package/src/modules/BaseNonStaticModule.ts +0 -26
- package/src/native/NativeLLM.ts +0 -14
- package/third-party/include/tokenizers-cpp/tokenizers_c.h +0 -61
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +0 -27
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +0 -249
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +0 -14
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +0 -80
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +0 -32
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +0 -95
- package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +0 -12
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +0 -217
- package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +0 -11
- package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +0 -11
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +0 -202
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +0 -313
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +0 -57
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +0 -78
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +0 -23
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +0 -427
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +0 -87
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +0 -76
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +0 -683
- package/third-party/ios/ExecutorchLib/build.sh +0 -44
- package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +0 -47
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +0 -163
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +0 -497
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +0 -342
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +0 -266
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +0 -125
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +0 -347
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +0 -416
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +0 -133
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +0 -33
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +0 -13
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +0 -16
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +0 -76
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +0 -286
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +0 -742
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +0 -219
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +0 -492
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +0 -13
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +0 -166
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +0 -235
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +0 -136
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +0 -20
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +0 -229
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +0 -521
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +0 -565
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +0 -198
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +0 -86
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +0 -70
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +0 -27
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +0 -258
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +0 -93
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +0 -71
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +0 -79
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +0 -113
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +0 -387
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +0 -251
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +0 -320
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +0 -36
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +0 -119
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +0 -191
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +0 -177
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +0 -133
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +0 -292
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +0 -35
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +0 -49
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +0 -24
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +0 -76
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +0 -5
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +0 -163
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +0 -497
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +0 -342
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +0 -266
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +0 -125
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +0 -347
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +0 -416
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +0 -133
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +0 -33
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +0 -13
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +0 -16
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +0 -76
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +0 -286
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +0 -742
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +0 -219
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +0 -492
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +0 -13
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +0 -190
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +0 -347
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +0 -653
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +0 -71
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +0 -72
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +0 -166
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +0 -235
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +0 -136
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +0 -20
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +0 -229
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +0 -521
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +0 -565
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +0 -323
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +0 -197
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +0 -147
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +0 -263
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +0 -1331
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +0 -21
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +0 -69
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +0 -1250
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +0 -198
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +0 -86
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +0 -27
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +0 -14
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +0 -83
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -163
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +0 -497
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +0 -342
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +0 -266
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +0 -125
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +0 -347
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +0 -416
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +0 -133
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +0 -33
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +0 -107
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +0 -44
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +0 -70
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +0 -27
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +0 -36
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +0 -83
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +0 -110
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +0 -154
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +0 -29
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +0 -142
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +0 -261
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +0 -60
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +0 -258
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +0 -93
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +0 -71
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +0 -79
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +0 -39
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +0 -113
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +0 -387
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +0 -251
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +0 -320
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +0 -144
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +0 -156
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +0 -122
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +0 -278
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +0 -36
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +0 -119
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +0 -75
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +0 -191
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +0 -177
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +0 -133
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +0 -292
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +0 -35
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +0 -49
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +0 -24
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +0 -76
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +0 -5
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +0 -82
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +0 -111
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +0 -130
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +0 -139
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +0 -483
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +0 -994
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +0 -692
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +0 -85
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +0 -367
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +0 -241
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +0 -205
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +0 -78
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +0 -64
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +0 -235
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +0 -26
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +0 -82
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +0 -111
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +0 -43
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +0 -130
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +0 -139
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +0 -483
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +0 -994
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +0 -692
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +0 -85
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +0 -367
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +0 -241
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +0 -205
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +0 -78
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +0 -64
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +0 -235
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +0 -26
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/irunner.h +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.cpp +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/runner.h +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/stats.h +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.cpp +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_decoder_runner.h +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.cpp +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_prefiller.h +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/text_token_generator.h +0 -0
- /package/{third-party/ios/ExecutorchLib/ExecutorchLib → common}/runner/util.h +0 -0
|
@@ -0,0 +1,2236 @@
|
|
|
1
|
+
#ifndef PTHREADPOOL_H_
|
|
2
|
+
#define PTHREADPOOL_H_
|
|
3
|
+
|
|
4
|
+
#include <stddef.h>
|
|
5
|
+
#include <stdint.h>
|
|
6
|
+
|
|
7
|
+
typedef struct pthreadpool *pthreadpool_t;
|
|
8
|
+
|
|
9
|
+
typedef void (*pthreadpool_task_1d_t)(void *, size_t);
|
|
10
|
+
typedef void (*pthreadpool_task_1d_with_thread_t)(void *, size_t, size_t);
|
|
11
|
+
typedef void (*pthreadpool_task_1d_tile_1d_t)(void *, size_t, size_t);
|
|
12
|
+
typedef void (*pthreadpool_task_2d_t)(void *, size_t, size_t);
|
|
13
|
+
typedef void (*pthreadpool_task_2d_with_thread_t)(void *, size_t, size_t,
|
|
14
|
+
size_t);
|
|
15
|
+
typedef void (*pthreadpool_task_2d_tile_1d_t)(void *, size_t, size_t, size_t);
|
|
16
|
+
typedef void (*pthreadpool_task_2d_tile_2d_t)(void *, size_t, size_t, size_t,
|
|
17
|
+
size_t);
|
|
18
|
+
typedef void (*pthreadpool_task_3d_t)(void *, size_t, size_t, size_t);
|
|
19
|
+
typedef void (*pthreadpool_task_3d_tile_1d_t)(void *, size_t, size_t, size_t,
|
|
20
|
+
size_t);
|
|
21
|
+
typedef void (*pthreadpool_task_3d_tile_1d_with_thread_t)(void *, size_t,
|
|
22
|
+
size_t, size_t,
|
|
23
|
+
size_t, size_t);
|
|
24
|
+
typedef void (*pthreadpool_task_3d_tile_2d_t)(void *, size_t, size_t, size_t,
|
|
25
|
+
size_t, size_t);
|
|
26
|
+
typedef void (*pthreadpool_task_4d_t)(void *, size_t, size_t, size_t, size_t);
|
|
27
|
+
typedef void (*pthreadpool_task_4d_tile_1d_t)(void *, size_t, size_t, size_t,
|
|
28
|
+
size_t, size_t);
|
|
29
|
+
typedef void (*pthreadpool_task_4d_tile_2d_t)(void *, size_t, size_t, size_t,
|
|
30
|
+
size_t, size_t, size_t);
|
|
31
|
+
typedef void (*pthreadpool_task_5d_t)(void *, size_t, size_t, size_t, size_t,
|
|
32
|
+
size_t);
|
|
33
|
+
typedef void (*pthreadpool_task_5d_tile_1d_t)(void *, size_t, size_t, size_t,
|
|
34
|
+
size_t, size_t, size_t);
|
|
35
|
+
typedef void (*pthreadpool_task_5d_tile_2d_t)(void *, size_t, size_t, size_t,
|
|
36
|
+
size_t, size_t, size_t, size_t);
|
|
37
|
+
typedef void (*pthreadpool_task_6d_t)(void *, size_t, size_t, size_t, size_t,
|
|
38
|
+
size_t, size_t);
|
|
39
|
+
typedef void (*pthreadpool_task_6d_tile_1d_t)(void *, size_t, size_t, size_t,
|
|
40
|
+
size_t, size_t, size_t, size_t);
|
|
41
|
+
typedef void (*pthreadpool_task_6d_tile_2d_t)(void *, size_t, size_t, size_t,
|
|
42
|
+
size_t, size_t, size_t, size_t,
|
|
43
|
+
size_t);
|
|
44
|
+
|
|
45
|
+
typedef void (*pthreadpool_task_1d_with_id_t)(void *, uint32_t, size_t);
|
|
46
|
+
typedef void (*pthreadpool_task_2d_tile_1d_with_id_t)(void *, uint32_t, size_t,
|
|
47
|
+
size_t, size_t);
|
|
48
|
+
typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void *, uint32_t, size_t,
|
|
49
|
+
size_t, size_t, size_t);
|
|
50
|
+
typedef void (*pthreadpool_task_3d_tile_1d_with_id_t)(void *, uint32_t, size_t,
|
|
51
|
+
size_t, size_t, size_t);
|
|
52
|
+
typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void *, uint32_t, size_t,
|
|
53
|
+
size_t, size_t, size_t,
|
|
54
|
+
size_t);
|
|
55
|
+
typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void *, uint32_t, size_t,
|
|
56
|
+
size_t, size_t, size_t,
|
|
57
|
+
size_t, size_t);
|
|
58
|
+
|
|
59
|
+
typedef void (*pthreadpool_task_2d_tile_1d_with_id_with_thread_t)(
|
|
60
|
+
void *, uint32_t, size_t, size_t, size_t, size_t);
|
|
61
|
+
typedef void (*pthreadpool_task_3d_tile_1d_with_id_with_thread_t)(
|
|
62
|
+
void *, uint32_t, size_t, size_t, size_t, size_t, size_t);
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Disable support for denormalized numbers to the maximum extent possible for
|
|
66
|
+
* the duration of the computation.
|
|
67
|
+
*
|
|
68
|
+
* Handling denormalized floating-point numbers is often implemented in
|
|
69
|
+
* microcode, and incurs significant performance degradation. This hint
|
|
70
|
+
* instructs the thread pool to disable support for denormalized numbers before
|
|
71
|
+
* running the computation by manipulating architecture-specific control
|
|
72
|
+
* registers, and restore the initial value of control registers after the
|
|
73
|
+
* computation is complete. The thread pool temporary disables denormalized
|
|
74
|
+
* numbers on all threads involved in the computation (i.e. the caller threads,
|
|
75
|
+
* and potentially worker threads).
|
|
76
|
+
*
|
|
77
|
+
* Disabling denormalized numbers may have a small negative effect on results'
|
|
78
|
+
* accuracy. As various architectures differ in capabilities to control
|
|
79
|
+
* processing of denormalized numbers, using this flag may also hurt results'
|
|
80
|
+
* reproducibility across different instruction set architectures.
|
|
81
|
+
*/
|
|
82
|
+
#define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Yield worker threads to the system scheduler after the operation is finished.
|
|
86
|
+
*
|
|
87
|
+
* Force workers to use kernel wait (instead of active spin-wait by default) for
|
|
88
|
+
* new commands after this command is processed. This flag affects only the
|
|
89
|
+
* immediate next operation on this thread pool. To make the thread pool always
|
|
90
|
+
* use kernel wait, pass this flag to all parallelization functions.
|
|
91
|
+
*/
|
|
92
|
+
#define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
|
|
93
|
+
|
|
94
|
+
#ifdef __cplusplus
|
|
95
|
+
extern "C" {
|
|
96
|
+
#endif
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Create a thread pool with the specified number of threads.
|
|
100
|
+
*
|
|
101
|
+
* @param threads_count the number of threads in the thread pool.
|
|
102
|
+
* A value of 0 has special interpretation: it creates a thread pool with as
|
|
103
|
+
* many threads as there are logical processors in the system.
|
|
104
|
+
*
|
|
105
|
+
* @returns A pointer to an opaque thread pool object if the call is
|
|
106
|
+
* successful, or NULL pointer if the call failed.
|
|
107
|
+
*/
|
|
108
|
+
pthreadpool_t pthreadpool_create(size_t threads_count);
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Query the number of threads in a thread pool.
|
|
112
|
+
*
|
|
113
|
+
* @param threadpool the thread pool to query.
|
|
114
|
+
*
|
|
115
|
+
* @returns The number of threads in the thread pool.
|
|
116
|
+
*/
|
|
117
|
+
size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Process items on a 1D grid.
|
|
121
|
+
*
|
|
122
|
+
* The function implements a parallel version of the following snippet:
|
|
123
|
+
*
|
|
124
|
+
* for (size_t i = 0; i < range; i++)
|
|
125
|
+
* function(context, i);
|
|
126
|
+
*
|
|
127
|
+
* When the function returns, all items have been processed and the thread pool
|
|
128
|
+
* is ready for a new task.
|
|
129
|
+
*
|
|
130
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
131
|
+
* calls are serialized.
|
|
132
|
+
*
|
|
133
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
134
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
135
|
+
* @param function the function to call for each item.
|
|
136
|
+
* @param context the first argument passed to the specified function.
|
|
137
|
+
* @param range the number of items on the 1D grid to process. The
|
|
138
|
+
* specified function will be called once for each item.
|
|
139
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
140
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
141
|
+
*/
|
|
142
|
+
void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
|
|
143
|
+
pthreadpool_task_1d_t function, void *context,
|
|
144
|
+
size_t range, uint32_t flags);
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Process items on a 1D grid passing along the current thread id.
|
|
148
|
+
*
|
|
149
|
+
* The function implements a parallel version of the following snippet:
|
|
150
|
+
*
|
|
151
|
+
* for (size_t i = 0; i < range; i++)
|
|
152
|
+
* function(context, thread_index, i);
|
|
153
|
+
*
|
|
154
|
+
* When the function returns, all items have been processed and the thread pool
|
|
155
|
+
* is ready for a new task.
|
|
156
|
+
*
|
|
157
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
158
|
+
* calls are serialized.
|
|
159
|
+
*
|
|
160
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
161
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
162
|
+
* @param function the function to call for each item.
|
|
163
|
+
* @param context the first argument passed to the specified function.
|
|
164
|
+
* @param range the number of items on the 1D grid to process. The
|
|
165
|
+
* specified function will be called once for each item.
|
|
166
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
167
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
168
|
+
*/
|
|
169
|
+
void pthreadpool_parallelize_1d_with_thread(
|
|
170
|
+
pthreadpool_t threadpool, pthreadpool_task_1d_with_thread_t function,
|
|
171
|
+
void *context, size_t range, uint32_t flags);
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Process items on a 1D grid using a microarchitecture-aware task function.
|
|
175
|
+
*
|
|
176
|
+
* The function implements a parallel version of the following snippet:
|
|
177
|
+
*
|
|
178
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
179
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
180
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
181
|
+
* for (size_t i = 0; i < range; i++)
|
|
182
|
+
* function(context, uarch_index, i);
|
|
183
|
+
*
|
|
184
|
+
* When the function returns, all items have been processed and the thread pool
|
|
185
|
+
* is ready for a new task.
|
|
186
|
+
*
|
|
187
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
188
|
+
* calls are serialized.
|
|
189
|
+
*
|
|
190
|
+
* @param threadpool the thread pool to use for parallelisation. If
|
|
191
|
+
* threadpool is NULL, all items are processed serially on the calling
|
|
192
|
+
* thread.
|
|
193
|
+
* @param function the function to call for each item.
|
|
194
|
+
* @param context the first argument passed to the specified
|
|
195
|
+
* function.
|
|
196
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
197
|
+
* pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
|
|
198
|
+
* or index returned by cpuinfo_get_current_uarch_index() exceeds the
|
|
199
|
+
* max_uarch_index value.
|
|
200
|
+
* @param max_uarch_index the maximum microarchitecture index expected by
|
|
201
|
+
* the specified function. If the index returned by
|
|
202
|
+
* cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
|
|
203
|
+
* will be used instead. default_uarch_index can exceed max_uarch_index.
|
|
204
|
+
* @param range the number of items on the 1D grid to process.
|
|
205
|
+
* The specified function will be called once for each item.
|
|
206
|
+
* @param flags a bitwise combination of zero or more optional
|
|
207
|
+
* flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
|
|
208
|
+
* PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
209
|
+
*/
|
|
210
|
+
void pthreadpool_parallelize_1d_with_uarch(
|
|
211
|
+
pthreadpool_t threadpool, pthreadpool_task_1d_with_id_t function,
|
|
212
|
+
void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
|
|
213
|
+
size_t range, uint32_t flags);
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Process items on a 1D grid with specified maximum tile size.
|
|
217
|
+
*
|
|
218
|
+
* The function implements a parallel version of the following snippet:
|
|
219
|
+
*
|
|
220
|
+
* for (size_t i = 0; i < range; i += tile)
|
|
221
|
+
* function(context, i, min(range - i, tile));
|
|
222
|
+
*
|
|
223
|
+
* When the call returns, all items have been processed and the thread pool is
|
|
224
|
+
* ready for a new task.
|
|
225
|
+
*
|
|
226
|
+
* @note If multiple threads call this function with the same thread pool,
|
|
227
|
+
* the calls are serialized.
|
|
228
|
+
*
|
|
229
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
230
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
231
|
+
* @param function the function to call for each tile.
|
|
232
|
+
* @param context the first argument passed to the specified function.
|
|
233
|
+
* @param range the number of items on the 1D grid to process.
|
|
234
|
+
* @param tile the maximum number of items on the 1D grid to process in
|
|
235
|
+
* one function call.
|
|
236
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
237
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
238
|
+
*/
|
|
239
|
+
void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
|
|
240
|
+
pthreadpool_task_1d_tile_1d_t function,
|
|
241
|
+
void *context, size_t range,
|
|
242
|
+
size_t tile, uint32_t flags);
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Process items on a 2D grid.
|
|
246
|
+
*
|
|
247
|
+
* The function implements a parallel version of the following snippet:
|
|
248
|
+
*
|
|
249
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
250
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
251
|
+
* function(context, i, j);
|
|
252
|
+
*
|
|
253
|
+
* When the function returns, all items have been processed and the thread pool
|
|
254
|
+
* is ready for a new task.
|
|
255
|
+
*
|
|
256
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
257
|
+
* calls are serialized.
|
|
258
|
+
*
|
|
259
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
260
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
261
|
+
* @param function the function to call for each item.
|
|
262
|
+
* @param context the first argument passed to the specified function.
|
|
263
|
+
* @param range_i the number of items to process along the first dimension
|
|
264
|
+
* of the 2D grid.
|
|
265
|
+
* @param range_j the number of items to process along the second dimension
|
|
266
|
+
* of the 2D grid.
|
|
267
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
268
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
269
|
+
*/
|
|
270
|
+
void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
|
|
271
|
+
pthreadpool_task_2d_t function, void *context,
|
|
272
|
+
size_t range_i, size_t range_j, uint32_t flags);
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Process items on a 2D grid passing along the current thread id.
|
|
276
|
+
*
|
|
277
|
+
* The function implements a parallel version of the following snippet:
|
|
278
|
+
*
|
|
279
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
280
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
281
|
+
* function(context, thread_index, i, j);
|
|
282
|
+
*
|
|
283
|
+
* When the function returns, all items have been processed and the thread pool
|
|
284
|
+
* is ready for a new task.
|
|
285
|
+
*
|
|
286
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
287
|
+
* calls are serialized.
|
|
288
|
+
*
|
|
289
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
290
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
291
|
+
* @param function the function to call for each item.
|
|
292
|
+
* @param context the first argument passed to the specified function.
|
|
293
|
+
* @param range_i the number of items to process along the first dimension
|
|
294
|
+
* of the 2D grid.
|
|
295
|
+
* @param range_j the number of items to process along the second dimension
|
|
296
|
+
* of the 2D grid.
|
|
297
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
298
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
299
|
+
*/
|
|
300
|
+
void pthreadpool_parallelize_2d_with_thread(
|
|
301
|
+
pthreadpool_t threadpool, pthreadpool_task_2d_with_thread_t function,
|
|
302
|
+
void *context, size_t range_i, size_t range_j, uint32_t flags);
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Process items on a 2D grid with the specified maximum tile size along the
|
|
306
|
+
* last grid dimension.
|
|
307
|
+
*
|
|
308
|
+
* The function implements a parallel version of the following snippet:
|
|
309
|
+
*
|
|
310
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
311
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
312
|
+
* function(context, i, j, min(range_j - j, tile_j));
|
|
313
|
+
*
|
|
314
|
+
* When the function returns, all items have been processed and the thread pool
|
|
315
|
+
* is ready for a new task.
|
|
316
|
+
*
|
|
317
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
318
|
+
* calls are serialized.
|
|
319
|
+
*
|
|
320
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
321
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
322
|
+
* @param function the function to call for each tile.
|
|
323
|
+
* @param context the first argument passed to the specified function.
|
|
324
|
+
* @param range_i the number of items to process along the first dimension
|
|
325
|
+
* of the 2D grid.
|
|
326
|
+
* @param range_j the number of items to process along the second dimension
|
|
327
|
+
* of the 2D grid.
|
|
328
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
329
|
+
* the 2D grid to process in one function call.
|
|
330
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
331
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
332
|
+
*/
|
|
333
|
+
void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
|
|
334
|
+
pthreadpool_task_2d_tile_1d_t function,
|
|
335
|
+
void *context, size_t range_i,
|
|
336
|
+
size_t range_j, size_t tile_j,
|
|
337
|
+
uint32_t flags);
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Process items on a 2D grid with the specified maximum tile size along the
|
|
341
|
+
* last grid dimension using a microarchitecture-aware task function.
|
|
342
|
+
*
|
|
343
|
+
* The function implements a parallel version of the following snippet:
|
|
344
|
+
*
|
|
345
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
346
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
347
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
348
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
349
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
350
|
+
* function(context, uarch_index, i, j, min(range_j - j, tile_j));
|
|
351
|
+
*
|
|
352
|
+
* When the function returns, all items have been processed and the thread pool
|
|
353
|
+
* is ready for a new task.
|
|
354
|
+
*
|
|
355
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
356
|
+
* calls are serialized.
|
|
357
|
+
*
|
|
358
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
359
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
360
|
+
* @param function the function to call for each tile.
|
|
361
|
+
* @param context the first argument passed to the specified function.
|
|
362
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
363
|
+
* pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
|
|
364
|
+
* or index returned by cpuinfo_get_current_uarch_index() exceeds the
|
|
365
|
+
* max_uarch_index value.
|
|
366
|
+
* @param max_uarch_index the maximum microarchitecture index expected by
|
|
367
|
+
* the specified function. If the index returned by
|
|
368
|
+
* cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
|
|
369
|
+
* will be used instead. default_uarch_index can exceed max_uarch_index.
|
|
370
|
+
* @param range_i the number of items to process along the first dimension
|
|
371
|
+
* of the 2D grid.
|
|
372
|
+
* @param range_j the number of items to process along the second dimension
|
|
373
|
+
* of the 2D grid.
|
|
374
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
375
|
+
* the 2D grid to process in one function call.
|
|
376
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
377
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
378
|
+
*/
|
|
379
|
+
void pthreadpool_parallelize_2d_tile_1d_with_uarch(
|
|
380
|
+
pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_with_id_t function,
|
|
381
|
+
void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
|
|
382
|
+
size_t range_i, size_t range_j, size_t tile_j, uint32_t flags);
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Process items on a 2D grid with the specified maximum tile size along the
|
|
386
|
+
* last grid dimension using a microarchitecture-aware task function and passing
|
|
387
|
+
* along the current thread id.
|
|
388
|
+
*
|
|
389
|
+
* The function implements a parallel version of the following snippet:
|
|
390
|
+
*
|
|
391
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
392
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
393
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
394
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
395
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
396
|
+
* function(context, uarch_index, thread_index, i, j, min(range_j - j,
|
|
397
|
+
* tile_j));
|
|
398
|
+
*
|
|
399
|
+
* When the function returns, all items have been processed and the thread pool
|
|
400
|
+
* is ready for a new task.
|
|
401
|
+
*
|
|
402
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
403
|
+
* calls are serialized.
|
|
404
|
+
*
|
|
405
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
406
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
407
|
+
* @param function the function to call for each tile.
|
|
408
|
+
* @param context the first argument passed to the specified function.
|
|
409
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
410
|
+
* pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
|
|
411
|
+
* or index returned by cpuinfo_get_current_uarch_index() exceeds the
|
|
412
|
+
* max_uarch_index value.
|
|
413
|
+
* @param max_uarch_index the maximum microarchitecture index expected by
|
|
414
|
+
* the specified function. If the index returned by
|
|
415
|
+
* cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
|
|
416
|
+
* will be used instead. default_uarch_index can exceed max_uarch_index.
|
|
417
|
+
* @param range_i the number of items to process along the first dimension
|
|
418
|
+
* of the 2D grid.
|
|
419
|
+
* @param range_j the number of items to process along the second dimension
|
|
420
|
+
* of the 2D grid.
|
|
421
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
422
|
+
* the 2D grid to process in one function call.
|
|
423
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
424
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
425
|
+
*/
|
|
426
|
+
void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread(
|
|
427
|
+
pthreadpool_t threadpool,
|
|
428
|
+
pthreadpool_task_2d_tile_1d_with_id_with_thread_t function, void *context,
|
|
429
|
+
uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
|
|
430
|
+
size_t range_j, size_t tile_j, uint32_t flags);
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Process items on a 2D grid with the specified maximum tile size along each
|
|
434
|
+
* grid dimension.
|
|
435
|
+
*
|
|
436
|
+
* The function implements a parallel version of the following snippet:
|
|
437
|
+
*
|
|
438
|
+
* for (size_t i = 0; i < range_i; i += tile_i)
|
|
439
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
440
|
+
* function(context, i, j,
|
|
441
|
+
* min(range_i - i, tile_i), min(range_j - j, tile_j));
|
|
442
|
+
*
|
|
443
|
+
* When the function returns, all items have been processed and the thread pool
|
|
444
|
+
* is ready for a new task.
|
|
445
|
+
*
|
|
446
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
447
|
+
* calls are serialized.
|
|
448
|
+
*
|
|
449
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
450
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
451
|
+
* @param function the function to call for each tile.
|
|
452
|
+
* @param context the first argument passed to the specified function.
|
|
453
|
+
* @param range_i the number of items to process along the first dimension
|
|
454
|
+
* of the 2D grid.
|
|
455
|
+
* @param range_j the number of items to process along the second dimension
|
|
456
|
+
* of the 2D grid.
|
|
457
|
+
* @param tile_j the maximum number of items along the first dimension of
|
|
458
|
+
* the 2D grid to process in one function call.
|
|
459
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
460
|
+
* the 2D grid to process in one function call.
|
|
461
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
462
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
463
|
+
*/
|
|
464
|
+
void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
|
|
465
|
+
pthreadpool_task_2d_tile_2d_t function,
|
|
466
|
+
void *context, size_t range_i,
|
|
467
|
+
size_t range_j, size_t tile_i,
|
|
468
|
+
size_t tile_j, uint32_t flags);
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
* Process items on a 2D grid with the specified maximum tile size along each
|
|
472
|
+
* grid dimension using a microarchitecture-aware task function.
|
|
473
|
+
*
|
|
474
|
+
* The function implements a parallel version of the following snippet:
|
|
475
|
+
*
|
|
476
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
477
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
478
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
479
|
+
* for (size_t i = 0; i < range_i; i += tile_i)
|
|
480
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
481
|
+
* function(context, uarch_index, i, j,
|
|
482
|
+
* min(range_i - i, tile_i), min(range_j - j, tile_j));
|
|
483
|
+
*
|
|
484
|
+
* When the function returns, all items have been processed and the thread pool
|
|
485
|
+
* is ready for a new task.
|
|
486
|
+
*
|
|
487
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
488
|
+
* calls are serialized.
|
|
489
|
+
*
|
|
490
|
+
* @param threadpool the thread pool to use for parallelisation. If
|
|
491
|
+
* threadpool is NULL, all items are processed serially on the calling
|
|
492
|
+
* thread.
|
|
493
|
+
* @param function the function to call for each tile.
|
|
494
|
+
* @param context the first argument passed to the specified
|
|
495
|
+
* function.
|
|
496
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
497
|
+
* pthreadpool is configured without cpuinfo,
|
|
498
|
+
* cpuinfo initialization failed, or index returned
|
|
499
|
+
* by cpuinfo_get_current_uarch_index() exceeds
|
|
500
|
+
* the max_uarch_index value.
|
|
501
|
+
* @param max_uarch_index the maximum microarchitecture index expected
|
|
502
|
+
* by the specified function. If the index returned
|
|
503
|
+
* by cpuinfo_get_current_uarch_index() exceeds this
|
|
504
|
+
* value, default_uarch_index will be used instead.
|
|
505
|
+
* default_uarch_index can exceed max_uarch_index.
|
|
506
|
+
* @param range_i the number of items to process along the first
|
|
507
|
+
* dimension of the 2D grid.
|
|
508
|
+
* @param range_j the number of items to process along the second
|
|
509
|
+
* dimension of the 2D grid.
|
|
510
|
+
* @param tile_j the maximum number of items along the first
|
|
511
|
+
* dimension of the 2D grid to process in one function call.
|
|
512
|
+
* @param tile_j the maximum number of items along the second
|
|
513
|
+
* dimension of the 2D grid to process in one function call.
|
|
514
|
+
* @param flags a bitwise combination of zero or more optional
|
|
515
|
+
* flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
|
|
516
|
+
* PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
517
|
+
*/
|
|
518
|
+
void pthreadpool_parallelize_2d_tile_2d_with_uarch(
|
|
519
|
+
pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_with_id_t function,
|
|
520
|
+
void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
|
|
521
|
+
size_t range_i, size_t range_j, size_t tile_i, size_t tile_j,
|
|
522
|
+
uint32_t flags);
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Process items on a 3D grid.
|
|
526
|
+
*
|
|
527
|
+
* The function implements a parallel version of the following snippet:
|
|
528
|
+
*
|
|
529
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
530
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
531
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
532
|
+
* function(context, i, j, k);
|
|
533
|
+
*
|
|
534
|
+
* When the function returns, all items have been processed and the thread pool
|
|
535
|
+
* is ready for a new task.
|
|
536
|
+
*
|
|
537
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
538
|
+
* calls are serialized.
|
|
539
|
+
*
|
|
540
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
541
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
542
|
+
* @param function the function to call for each tile.
|
|
543
|
+
* @param context the first argument passed to the specified function.
|
|
544
|
+
* @param range_i the number of items to process along the first dimension
|
|
545
|
+
* of the 3D grid.
|
|
546
|
+
* @param range_j the number of items to process along the second dimension
|
|
547
|
+
* of the 3D grid.
|
|
548
|
+
* @param range_k the number of items to process along the third dimension
|
|
549
|
+
* of the 3D grid.
|
|
550
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
551
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
552
|
+
*/
|
|
553
|
+
void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
|
|
554
|
+
pthreadpool_task_3d_t function, void *context,
|
|
555
|
+
size_t range_i, size_t range_j, size_t range_k,
|
|
556
|
+
uint32_t flags);
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
560
|
+
* last grid dimension.
|
|
561
|
+
*
|
|
562
|
+
* The function implements a parallel version of the following snippet:
|
|
563
|
+
*
|
|
564
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
565
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
566
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
567
|
+
* function(context, i, j, k, min(range_k - k, tile_k));
|
|
568
|
+
*
|
|
569
|
+
* When the function returns, all items have been processed and the thread pool
|
|
570
|
+
* is ready for a new task.
|
|
571
|
+
*
|
|
572
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
573
|
+
* calls are serialized.
|
|
574
|
+
*
|
|
575
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
576
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
577
|
+
* @param function the function to call for each tile.
|
|
578
|
+
* @param context the first argument passed to the specified function.
|
|
579
|
+
* @param range_i the number of items to process along the first dimension
|
|
580
|
+
* of the 3D grid.
|
|
581
|
+
* @param range_j the number of items to process along the second dimension
|
|
582
|
+
* of the 3D grid.
|
|
583
|
+
* @param range_k the number of items to process along the third dimension
|
|
584
|
+
* of the 3D grid.
|
|
585
|
+
* @param tile_k the maximum number of items along the third dimension of
|
|
586
|
+
* the 3D grid to process in one function call.
|
|
587
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
588
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
589
|
+
*/
|
|
590
|
+
void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
|
|
591
|
+
pthreadpool_task_3d_tile_1d_t function,
|
|
592
|
+
void *context, size_t range_i,
|
|
593
|
+
size_t range_j, size_t range_k,
|
|
594
|
+
size_t tile_k, uint32_t flags);
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
598
|
+
* last grid dimension and passing along the current thread id.
|
|
599
|
+
*
|
|
600
|
+
* The function implements a parallel version of the following snippet:
|
|
601
|
+
*
|
|
602
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
603
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
604
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
605
|
+
* function(context, thread_index, i, j, k, min(range_k - k, tile_k));
|
|
606
|
+
*
|
|
607
|
+
* When the function returns, all items have been processed and the thread pool
|
|
608
|
+
* is ready for a new task.
|
|
609
|
+
*
|
|
610
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
611
|
+
* calls are serialized.
|
|
612
|
+
*
|
|
613
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
614
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
615
|
+
* @param function the function to call for each tile.
|
|
616
|
+
* @param context the first argument passed to the specified function.
|
|
617
|
+
* @param range_i the number of items to process along the first dimension
|
|
618
|
+
* of the 3D grid.
|
|
619
|
+
* @param range_j the number of items to process along the second dimension
|
|
620
|
+
* of the 3D grid.
|
|
621
|
+
* @param range_k the number of items to process along the third dimension
|
|
622
|
+
* of the 3D grid.
|
|
623
|
+
* @param tile_k the maximum number of items along the third dimension of
|
|
624
|
+
* the 3D grid to process in one function call.
|
|
625
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
626
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
627
|
+
*/
|
|
628
|
+
void pthreadpool_parallelize_3d_tile_1d_with_thread(
|
|
629
|
+
pthreadpool_t threadpool,
|
|
630
|
+
pthreadpool_task_3d_tile_1d_with_thread_t function, void *context,
|
|
631
|
+
size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
|
|
632
|
+
uint32_t flags);
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
636
|
+
* last grid dimension using a microarchitecture-aware task function.
|
|
637
|
+
*
|
|
638
|
+
* The function implements a parallel version of the following snippet:
|
|
639
|
+
*
|
|
640
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
641
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
642
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
643
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
644
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
645
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
646
|
+
* function(context, uarch_index, i, j, k, min(range_k - k, tile_k));
|
|
647
|
+
*
|
|
648
|
+
* When the function returns, all items have been processed and the thread pool
|
|
649
|
+
* is ready for a new task.
|
|
650
|
+
*
|
|
651
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
652
|
+
* calls are serialized.
|
|
653
|
+
*
|
|
654
|
+
* @param threadpool the thread pool to use for parallelisation. If
|
|
655
|
+
* threadpool is NULL, all items are processed serially on the calling
|
|
656
|
+
* thread.
|
|
657
|
+
* @param function the function to call for each tile.
|
|
658
|
+
* @param context the first argument passed to the specified
|
|
659
|
+
* function.
|
|
660
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
661
|
+
* pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
|
|
662
|
+
* or index returned by cpuinfo_get_current_uarch_index() exceeds the
|
|
663
|
+
* max_uarch_index value.
|
|
664
|
+
* @param max_uarch_index the maximum microarchitecture index expected by
|
|
665
|
+
* the specified function. If the index returned by
|
|
666
|
+
* cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
|
|
667
|
+
* will be used instead. default_uarch_index can exceed max_uarch_index.
|
|
668
|
+
* @param range_i the number of items to process along the first
|
|
669
|
+
* dimension of the 3D grid.
|
|
670
|
+
* @param range_j the number of items to process along the second
|
|
671
|
+
* dimension of the 3D grid.
|
|
672
|
+
* @param range_k the number of items to process along the third
|
|
673
|
+
* dimension of the 3D grid.
|
|
674
|
+
* @param tile_k the maximum number of items along the third
|
|
675
|
+
* dimension of the 3D grid to process in one function call.
|
|
676
|
+
* @param flags a bitwise combination of zero or more optional
|
|
677
|
+
* flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
|
|
678
|
+
* PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
679
|
+
*/
|
|
680
|
+
void pthreadpool_parallelize_3d_tile_1d_with_uarch(
|
|
681
|
+
pthreadpool_t threadpool, pthreadpool_task_3d_tile_1d_with_id_t function,
|
|
682
|
+
void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
|
|
683
|
+
size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
|
|
684
|
+
uint32_t flags);
|
|
685
|
+
|
|
686
|
+
/**
|
|
687
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
688
|
+
* last grid dimension using a microarchitecture-aware task function and passing
|
|
689
|
+
* along the current thread id.
|
|
690
|
+
*
|
|
691
|
+
* The function implements a parallel version of the following snippet:
|
|
692
|
+
*
|
|
693
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
694
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
695
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
696
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
697
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
698
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
699
|
+
* function(context, uarch_index, thread_index, i, j, k, min(range_k -
|
|
700
|
+
* k, tile_k));
|
|
701
|
+
*
|
|
702
|
+
* When the function returns, all items have been processed and the thread pool
|
|
703
|
+
* is ready for a new task.
|
|
704
|
+
*
|
|
705
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
706
|
+
* calls are serialized.
|
|
707
|
+
*
|
|
708
|
+
* @param threadpool the thread pool to use for parallelisation. If
|
|
709
|
+
* threadpool is NULL, all items are processed serially on the calling
|
|
710
|
+
* thread.
|
|
711
|
+
* @param function the function to call for each tile.
|
|
712
|
+
* @param context the first argument passed to the specified
|
|
713
|
+
* function.
|
|
714
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
715
|
+
* pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
|
|
716
|
+
* or index returned by cpuinfo_get_current_uarch_index() exceeds the
|
|
717
|
+
* max_uarch_index value.
|
|
718
|
+
* @param max_uarch_index the maximum microarchitecture index expected by
|
|
719
|
+
* the specified function. If the index returned by
|
|
720
|
+
* cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
|
|
721
|
+
* will be used instead. default_uarch_index can exceed max_uarch_index.
|
|
722
|
+
* @param range_i the number of items to process along the first
|
|
723
|
+
* dimension of the 3D grid.
|
|
724
|
+
* @param range_j the number of items to process along the second
|
|
725
|
+
* dimension of the 3D grid.
|
|
726
|
+
* @param range_k the number of items to process along the third
|
|
727
|
+
* dimension of the 3D grid.
|
|
728
|
+
* @param tile_k the maximum number of items along the third
|
|
729
|
+
* dimension of the 3D grid to process in one function call.
|
|
730
|
+
* @param flags a bitwise combination of zero or more optional
|
|
731
|
+
* flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
|
|
732
|
+
* PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
733
|
+
*/
|
|
734
|
+
void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread(
|
|
735
|
+
pthreadpool_t threadpool,
|
|
736
|
+
pthreadpool_task_3d_tile_1d_with_id_with_thread_t function, void *context,
|
|
737
|
+
uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
|
|
738
|
+
size_t range_j, size_t range_k, size_t tile_k, uint32_t flags);
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
742
|
+
* last two grid dimensions.
|
|
743
|
+
*
|
|
744
|
+
* The function implements a parallel version of the following snippet:
|
|
745
|
+
*
|
|
746
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
747
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
748
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
749
|
+
* function(context, i, j, k,
|
|
750
|
+
* min(range_j - j, tile_j), min(range_k - k, tile_k));
|
|
751
|
+
*
|
|
752
|
+
* When the function returns, all items have been processed and the thread pool
|
|
753
|
+
* is ready for a new task.
|
|
754
|
+
*
|
|
755
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
756
|
+
* calls are serialized.
|
|
757
|
+
*
|
|
758
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
759
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
760
|
+
* @param function the function to call for each tile.
|
|
761
|
+
* @param context the first argument passed to the specified function.
|
|
762
|
+
* @param range_i the number of items to process along the first dimension
|
|
763
|
+
* of the 3D grid.
|
|
764
|
+
* @param range_j the number of items to process along the second dimension
|
|
765
|
+
* of the 3D grid.
|
|
766
|
+
* @param range_k the number of items to process along the third dimension
|
|
767
|
+
* of the 3D grid.
|
|
768
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
769
|
+
* the 3D grid to process in one function call.
|
|
770
|
+
* @param tile_k the maximum number of items along the third dimension of
|
|
771
|
+
* the 3D grid to process in one function call.
|
|
772
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
773
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
774
|
+
*/
|
|
775
|
+
void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
|
|
776
|
+
pthreadpool_task_3d_tile_2d_t function,
|
|
777
|
+
void *context, size_t range_i,
|
|
778
|
+
size_t range_j, size_t range_k,
|
|
779
|
+
size_t tile_j, size_t tile_k,
|
|
780
|
+
uint32_t flags);
|
|
781
|
+
|
|
782
|
+
/**
|
|
783
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
784
|
+
* last two grid dimensions using a microarchitecture-aware task function.
|
|
785
|
+
*
|
|
786
|
+
* The function implements a parallel version of the following snippet:
|
|
787
|
+
*
|
|
788
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
789
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
790
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
791
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
792
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
793
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
794
|
+
* function(context, uarch_index, i, j, k,
|
|
795
|
+
* min(range_j - j, tile_j), min(range_k - k, tile_k));
|
|
796
|
+
*
|
|
797
|
+
* When the function returns, all items have been processed and the thread pool
|
|
798
|
+
* is ready for a new task.
|
|
799
|
+
*
|
|
800
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
801
|
+
* calls are serialized.
|
|
802
|
+
*
|
|
803
|
+
* @param threadpool the thread pool to use for parallelisation. If
|
|
804
|
+
* threadpool is NULL, all items are processed serially on the calling
|
|
805
|
+
* thread.
|
|
806
|
+
* @param function the function to call for each tile.
|
|
807
|
+
* @param context the first argument passed to the specified
|
|
808
|
+
* function.
|
|
809
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
810
|
+
* pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
|
|
811
|
+
* or index returned by cpuinfo_get_current_uarch_index() exceeds the
|
|
812
|
+
* max_uarch_index value.
|
|
813
|
+
* @param max_uarch_index the maximum microarchitecture index expected by
|
|
814
|
+
* the specified function. If the index returned by
|
|
815
|
+
* cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
|
|
816
|
+
* will be used instead. default_uarch_index can exceed max_uarch_index.
|
|
817
|
+
* @param range_i the number of items to process along the first
|
|
818
|
+
* dimension of the 3D grid.
|
|
819
|
+
* @param range_j the number of items to process along the second
|
|
820
|
+
* dimension of the 3D grid.
|
|
821
|
+
* @param range_k the number of items to process along the third
|
|
822
|
+
* dimension of the 3D grid.
|
|
823
|
+
* @param tile_j the maximum number of items along the second
|
|
824
|
+
* dimension of the 3D grid to process in one function call.
|
|
825
|
+
* @param tile_k the maximum number of items along the third
|
|
826
|
+
* dimension of the 3D grid to process in one function call.
|
|
827
|
+
* @param flags a bitwise combination of zero or more optional
|
|
828
|
+
* flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
|
|
829
|
+
* PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
830
|
+
*/
|
|
831
|
+
void pthreadpool_parallelize_3d_tile_2d_with_uarch(
|
|
832
|
+
pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_with_id_t function,
|
|
833
|
+
void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
|
|
834
|
+
size_t range_i, size_t range_j, size_t range_k, size_t tile_j,
|
|
835
|
+
size_t tile_k, uint32_t flags);
|
|
836
|
+
|
|
837
|
+
/**
|
|
838
|
+
* Process items on a 4D grid.
|
|
839
|
+
*
|
|
840
|
+
* The function implements a parallel version of the following snippet:
|
|
841
|
+
*
|
|
842
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
843
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
844
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
845
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
846
|
+
* function(context, i, j, k, l);
|
|
847
|
+
*
|
|
848
|
+
* When the function returns, all items have been processed and the thread pool
|
|
849
|
+
* is ready for a new task.
|
|
850
|
+
*
|
|
851
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
852
|
+
* calls are serialized.
|
|
853
|
+
*
|
|
854
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
855
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
856
|
+
* @param function the function to call for each tile.
|
|
857
|
+
* @param context the first argument passed to the specified function.
|
|
858
|
+
* @param range_i the number of items to process along the first dimension
|
|
859
|
+
* of the 4D grid.
|
|
860
|
+
* @param range_j the number of items to process along the second dimension
|
|
861
|
+
* of the 4D grid.
|
|
862
|
+
* @param range_k the number of items to process along the third dimension
|
|
863
|
+
* of the 4D grid.
|
|
864
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
865
|
+
* of the 4D grid.
|
|
866
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
867
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
868
|
+
*/
|
|
869
|
+
void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
|
|
870
|
+
pthreadpool_task_4d_t function, void *context,
|
|
871
|
+
size_t range_i, size_t range_j, size_t range_k,
|
|
872
|
+
size_t range_l, uint32_t flags);
|
|
873
|
+
|
|
874
|
+
/**
|
|
875
|
+
* Process items on a 4D grid with the specified maximum tile size along the
|
|
876
|
+
* last grid dimension.
|
|
877
|
+
*
|
|
878
|
+
* The function implements a parallel version of the following snippet:
|
|
879
|
+
*
|
|
880
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
881
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
882
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
883
|
+
* for (size_t l = 0; l < range_l; l += tile_l)
|
|
884
|
+
* function(context, i, j, k, l, min(range_l - l, tile_l));
|
|
885
|
+
*
|
|
886
|
+
* When the function returns, all items have been processed and the thread pool
|
|
887
|
+
* is ready for a new task.
|
|
888
|
+
*
|
|
889
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
890
|
+
* calls are serialized.
|
|
891
|
+
*
|
|
892
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
893
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
894
|
+
* @param function the function to call for each tile.
|
|
895
|
+
* @param context the first argument passed to the specified function.
|
|
896
|
+
* @param range_i the number of items to process along the first dimension
|
|
897
|
+
* of the 4D grid.
|
|
898
|
+
* @param range_j the number of items to process along the second dimension
|
|
899
|
+
* of the 4D grid.
|
|
900
|
+
* @param range_k the number of items to process along the third dimension
|
|
901
|
+
* of the 4D grid.
|
|
902
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
903
|
+
* of the 4D grid.
|
|
904
|
+
* @param tile_l the maximum number of items along the fourth dimension of
|
|
905
|
+
* the 4D grid to process in one function call.
|
|
906
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
907
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
908
|
+
*/
|
|
909
|
+
void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
|
|
910
|
+
pthreadpool_task_4d_tile_1d_t function,
|
|
911
|
+
void *context, size_t range_i,
|
|
912
|
+
size_t range_j, size_t range_k,
|
|
913
|
+
size_t range_l, size_t tile_l,
|
|
914
|
+
uint32_t flags);
|
|
915
|
+
|
|
916
|
+
/**
|
|
917
|
+
* Process items on a 4D grid with the specified maximum tile size along the
|
|
918
|
+
* last two grid dimensions.
|
|
919
|
+
*
|
|
920
|
+
* The function implements a parallel version of the following snippet:
|
|
921
|
+
*
|
|
922
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
923
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
924
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
925
|
+
* for (size_t l = 0; l < range_l; l += tile_l)
|
|
926
|
+
* function(context, i, j, k, l,
|
|
927
|
+
* min(range_k - k, tile_k), min(range_l - l, tile_l));
|
|
928
|
+
*
|
|
929
|
+
* When the function returns, all items have been processed and the thread pool
|
|
930
|
+
* is ready for a new task.
|
|
931
|
+
*
|
|
932
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
933
|
+
* calls are serialized.
|
|
934
|
+
*
|
|
935
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
936
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
937
|
+
* @param function the function to call for each tile.
|
|
938
|
+
* @param context the first argument passed to the specified function.
|
|
939
|
+
* @param range_i the number of items to process along the first dimension
|
|
940
|
+
* of the 4D grid.
|
|
941
|
+
* @param range_j the number of items to process along the second dimension
|
|
942
|
+
* of the 4D grid.
|
|
943
|
+
* @param range_k the number of items to process along the third dimension
|
|
944
|
+
* of the 4D grid.
|
|
945
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
946
|
+
* of the 4D grid.
|
|
947
|
+
* @param tile_k the maximum number of items along the third dimension of
|
|
948
|
+
* the 4D grid to process in one function call.
|
|
949
|
+
* @param tile_l the maximum number of items along the fourth dimension of
|
|
950
|
+
* the 4D grid to process in one function call.
|
|
951
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
952
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
953
|
+
*/
|
|
954
|
+
void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
|
|
955
|
+
pthreadpool_task_4d_tile_2d_t function,
|
|
956
|
+
void *context, size_t range_i,
|
|
957
|
+
size_t range_j, size_t range_k,
|
|
958
|
+
size_t range_l, size_t tile_k,
|
|
959
|
+
size_t tile_l, uint32_t flags);
|
|
960
|
+
|
|
961
|
+
/**
|
|
962
|
+
* Process items on a 4D grid with the specified maximum tile size along the
|
|
963
|
+
* last two grid dimensions using a microarchitecture-aware task function.
|
|
964
|
+
*
|
|
965
|
+
* The function implements a parallel version of the following snippet:
|
|
966
|
+
*
|
|
967
|
+
* uint32_t uarch_index = cpuinfo_initialize() ?
|
|
968
|
+
* cpuinfo_get_current_uarch_index() : default_uarch_index;
|
|
969
|
+
* if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
|
|
970
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
971
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
972
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
973
|
+
* for (size_t l = 0; l < range_l; l += tile_l)
|
|
974
|
+
* function(context, uarch_index, i, j, k, l,
|
|
975
|
+
* min(range_k - k, tile_k), min(range_l - l, tile_l));
|
|
976
|
+
*
|
|
977
|
+
* When the function returns, all items have been processed and the thread pool
|
|
978
|
+
* is ready for a new task.
|
|
979
|
+
*
|
|
980
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
981
|
+
* calls are serialized.
|
|
982
|
+
*
|
|
983
|
+
* @param threadpool the thread pool to use for parallelisation. If
|
|
984
|
+
* threadpool is NULL, all items are processed serially on the calling
|
|
985
|
+
* thread.
|
|
986
|
+
* @param function the function to call for each tile.
|
|
987
|
+
* @param context the first argument passed to the specified
|
|
988
|
+
* function.
|
|
989
|
+
* @param default_uarch_index the microarchitecture index to use when
|
|
990
|
+
* pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
|
|
991
|
+
* or index returned by cpuinfo_get_current_uarch_index() exceeds the
|
|
992
|
+
* max_uarch_index value.
|
|
993
|
+
* @param max_uarch_index the maximum microarchitecture index expected by
|
|
994
|
+
* the specified function. If the index returned by
|
|
995
|
+
* cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
|
|
996
|
+
* will be used instead. default_uarch_index can exceed max_uarch_index.
|
|
997
|
+
* @param range_i the number of items to process along the first
|
|
998
|
+
* dimension of the 4D grid.
|
|
999
|
+
* @param range_j the number of items to process along the second
|
|
1000
|
+
* dimension of the 4D grid.
|
|
1001
|
+
* @param range_k the number of items to process along the third
|
|
1002
|
+
* dimension of the 4D grid.
|
|
1003
|
+
* @param range_l the number of items to process along the fourth
|
|
1004
|
+
* dimension of the 4D grid.
|
|
1005
|
+
* @param tile_k the maximum number of items along the third
|
|
1006
|
+
* dimension of the 4D grid to process in one function call.
|
|
1007
|
+
* @param tile_l the maximum number of items along the fourth
|
|
1008
|
+
* dimension of the 4D grid to process in one function call.
|
|
1009
|
+
* @param flags a bitwise combination of zero or more optional
|
|
1010
|
+
* flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
|
|
1011
|
+
* PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1012
|
+
*/
|
|
1013
|
+
void pthreadpool_parallelize_4d_tile_2d_with_uarch(
|
|
1014
|
+
pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_with_id_t function,
|
|
1015
|
+
void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
|
|
1016
|
+
size_t range_i, size_t range_j, size_t range_k, size_t range_l,
|
|
1017
|
+
size_t tile_k, size_t tile_l, uint32_t flags);
|
|
1018
|
+
|
|
1019
|
+
/**
|
|
1020
|
+
* Process items on a 5D grid.
|
|
1021
|
+
*
|
|
1022
|
+
* The function implements a parallel version of the following snippet:
|
|
1023
|
+
*
|
|
1024
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1025
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1026
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1027
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1028
|
+
* for (size_t m = 0; m < range_m; m++)
|
|
1029
|
+
* function(context, i, j, k, l, m);
|
|
1030
|
+
*
|
|
1031
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1032
|
+
* is ready for a new task.
|
|
1033
|
+
*
|
|
1034
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1035
|
+
* calls are serialized.
|
|
1036
|
+
*
|
|
1037
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1038
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1039
|
+
* @param function the function to call for each tile.
|
|
1040
|
+
* @param context the first argument passed to the specified function.
|
|
1041
|
+
* @param range_i the number of items to process along the first dimension
|
|
1042
|
+
* of the 5D grid.
|
|
1043
|
+
* @param range_j the number of items to process along the second dimension
|
|
1044
|
+
* of the 5D grid.
|
|
1045
|
+
* @param range_k the number of items to process along the third dimension
|
|
1046
|
+
* of the 5D grid.
|
|
1047
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1048
|
+
* of the 5D grid.
|
|
1049
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
1050
|
+
* of the 5D grid.
|
|
1051
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1052
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1053
|
+
*/
|
|
1054
|
+
void pthreadpool_parallelize_5d(pthreadpool_t threadpool,
|
|
1055
|
+
pthreadpool_task_5d_t function, void *context,
|
|
1056
|
+
size_t range_i, size_t range_j, size_t range_k,
|
|
1057
|
+
size_t range_l, size_t range_m, uint32_t flags);
|
|
1058
|
+
|
|
1059
|
+
/**
|
|
1060
|
+
* Process items on a 5D grid with the specified maximum tile size along the
|
|
1061
|
+
* last grid dimension.
|
|
1062
|
+
*
|
|
1063
|
+
* The function implements a parallel version of the following snippet:
|
|
1064
|
+
*
|
|
1065
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1066
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1067
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1068
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1069
|
+
* for (size_t m = 0; m < range_m; m += tile_m)
|
|
1070
|
+
* function(context, i, j, k, l, m, min(range_m - m, tile_m));
|
|
1071
|
+
*
|
|
1072
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1073
|
+
* is ready for a new task.
|
|
1074
|
+
*
|
|
1075
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1076
|
+
* calls are serialized.
|
|
1077
|
+
*
|
|
1078
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1079
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1080
|
+
* @param function the function to call for each tile.
|
|
1081
|
+
* @param context the first argument passed to the specified function.
|
|
1082
|
+
* @param range_i the number of items to process along the first dimension
|
|
1083
|
+
* of the 5D grid.
|
|
1084
|
+
* @param range_j the number of items to process along the second dimension
|
|
1085
|
+
* of the 5D grid.
|
|
1086
|
+
* @param range_k the number of items to process along the third dimension
|
|
1087
|
+
* of the 5D grid.
|
|
1088
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1089
|
+
* of the 5D grid.
|
|
1090
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
1091
|
+
* of the 5D grid.
|
|
1092
|
+
* @param tile_m the maximum number of items along the fifth dimension of
|
|
1093
|
+
* the 5D grid to process in one function call.
|
|
1094
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1095
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1096
|
+
*/
|
|
1097
|
+
void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
|
|
1098
|
+
pthreadpool_task_5d_tile_1d_t function,
|
|
1099
|
+
void *context, size_t range_i,
|
|
1100
|
+
size_t range_j, size_t range_k,
|
|
1101
|
+
size_t range_l, size_t range_m,
|
|
1102
|
+
size_t tile_m, uint32_t flags);
|
|
1103
|
+
|
|
1104
|
+
/**
|
|
1105
|
+
* Process items on a 5D grid with the specified maximum tile size along the
|
|
1106
|
+
* last two grid dimensions.
|
|
1107
|
+
*
|
|
1108
|
+
* The function implements a parallel version of the following snippet:
|
|
1109
|
+
*
|
|
1110
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1111
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1112
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1113
|
+
* for (size_t l = 0; l < range_l; l += tile_l)
|
|
1114
|
+
* for (size_t m = 0; m < range_m; m += tile_m)
|
|
1115
|
+
* function(context, i, j, k, l, m,
|
|
1116
|
+
* min(range_l - l, tile_l), min(range_m - m, tile_m));
|
|
1117
|
+
*
|
|
1118
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1119
|
+
* is ready for a new task.
|
|
1120
|
+
*
|
|
1121
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1122
|
+
* calls are serialized.
|
|
1123
|
+
*
|
|
1124
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1125
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1126
|
+
* @param function the function to call for each tile.
|
|
1127
|
+
* @param context the first argument passed to the specified function.
|
|
1128
|
+
* @param range_i the number of items to process along the first dimension
|
|
1129
|
+
* of the 5D grid.
|
|
1130
|
+
* @param range_j the number of items to process along the second dimension
|
|
1131
|
+
* of the 5D grid.
|
|
1132
|
+
* @param range_k the number of items to process along the third dimension
|
|
1133
|
+
* of the 5D grid.
|
|
1134
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1135
|
+
* of the 5D grid.
|
|
1136
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
1137
|
+
* of the 5D grid.
|
|
1138
|
+
* @param tile_l the maximum number of items along the fourth dimension of
|
|
1139
|
+
* the 5D grid to process in one function call.
|
|
1140
|
+
* @param tile_m the maximum number of items along the fifth dimension of
|
|
1141
|
+
* the 5D grid to process in one function call.
|
|
1142
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1143
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1144
|
+
*/
|
|
1145
|
+
void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
|
|
1146
|
+
pthreadpool_task_5d_tile_2d_t function,
|
|
1147
|
+
void *context, size_t range_i,
|
|
1148
|
+
size_t range_j, size_t range_k,
|
|
1149
|
+
size_t range_l, size_t range_m,
|
|
1150
|
+
size_t tile_l, size_t tile_m,
|
|
1151
|
+
uint32_t flags);
|
|
1152
|
+
|
|
1153
|
+
/**
|
|
1154
|
+
* Process items on a 6D grid.
|
|
1155
|
+
*
|
|
1156
|
+
* The function implements a parallel version of the following snippet:
|
|
1157
|
+
*
|
|
1158
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1159
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1160
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1161
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1162
|
+
* for (size_t m = 0; m < range_m; m++)
|
|
1163
|
+
* for (size_t n = 0; n < range_n; n++)
|
|
1164
|
+
* function(context, i, j, k, l, m, n);
|
|
1165
|
+
*
|
|
1166
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1167
|
+
* is ready for a new task.
|
|
1168
|
+
*
|
|
1169
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1170
|
+
* calls are serialized.
|
|
1171
|
+
*
|
|
1172
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1173
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1174
|
+
* @param function the function to call for each tile.
|
|
1175
|
+
* @param context the first argument passed to the specified function.
|
|
1176
|
+
* @param range_i the number of items to process along the first dimension
|
|
1177
|
+
* of the 6D grid.
|
|
1178
|
+
* @param range_j the number of items to process along the second dimension
|
|
1179
|
+
* of the 6D grid.
|
|
1180
|
+
* @param range_k the number of items to process along the third dimension
|
|
1181
|
+
* of the 6D grid.
|
|
1182
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1183
|
+
* of the 6D grid.
|
|
1184
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
1185
|
+
* of the 6D grid.
|
|
1186
|
+
* @param range_n the number of items to process along the sixth dimension
|
|
1187
|
+
* of the 6D grid.
|
|
1188
|
+
* @param tile_n the maximum number of items along the sixth dimension of
|
|
1189
|
+
* the 6D grid to process in one function call.
|
|
1190
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1191
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1192
|
+
*/
|
|
1193
|
+
void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
|
|
1194
|
+
pthreadpool_task_6d_t function, void *context,
|
|
1195
|
+
size_t range_i, size_t range_j, size_t range_k,
|
|
1196
|
+
size_t range_l, size_t range_m, size_t range_n,
|
|
1197
|
+
uint32_t flags);
|
|
1198
|
+
|
|
1199
|
+
/**
|
|
1200
|
+
* Process items on a 6D grid with the specified maximum tile size along the
|
|
1201
|
+
* last grid dimension.
|
|
1202
|
+
*
|
|
1203
|
+
* The function implements a parallel version of the following snippet:
|
|
1204
|
+
*
|
|
1205
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1206
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1207
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1208
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1209
|
+
* for (size_t m = 0; m < range_m; m++)
|
|
1210
|
+
* for (size_t n = 0; n < range_n; n += tile_n)
|
|
1211
|
+
* function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
|
|
1212
|
+
*
|
|
1213
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1214
|
+
* is ready for a new task.
|
|
1215
|
+
*
|
|
1216
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1217
|
+
* calls are serialized.
|
|
1218
|
+
*
|
|
1219
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1220
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1221
|
+
* @param function the function to call for each tile.
|
|
1222
|
+
* @param context the first argument passed to the specified function.
|
|
1223
|
+
* @param range_i the number of items to process along the first dimension
|
|
1224
|
+
* of the 6D grid.
|
|
1225
|
+
* @param range_j the number of items to process along the second dimension
|
|
1226
|
+
* of the 6D grid.
|
|
1227
|
+
* @param range_k the number of items to process along the third dimension
|
|
1228
|
+
* of the 6D grid.
|
|
1229
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1230
|
+
* of the 6D grid.
|
|
1231
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
1232
|
+
* of the 6D grid.
|
|
1233
|
+
* @param range_n the number of items to process along the sixth dimension
|
|
1234
|
+
* of the 6D grid.
|
|
1235
|
+
* @param tile_n the maximum number of items along the sixth dimension of
|
|
1236
|
+
* the 6D grid to process in one function call.
|
|
1237
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1238
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1239
|
+
*/
|
|
1240
|
+
void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
|
|
1241
|
+
pthreadpool_task_6d_tile_1d_t function,
|
|
1242
|
+
void *context, size_t range_i,
|
|
1243
|
+
size_t range_j, size_t range_k,
|
|
1244
|
+
size_t range_l, size_t range_m,
|
|
1245
|
+
size_t range_n, size_t tile_n,
|
|
1246
|
+
uint32_t flags);
|
|
1247
|
+
|
|
1248
|
+
/**
|
|
1249
|
+
* Process items on a 6D grid with the specified maximum tile size along the
|
|
1250
|
+
* last two grid dimensions.
|
|
1251
|
+
*
|
|
1252
|
+
* The function implements a parallel version of the following snippet:
|
|
1253
|
+
*
|
|
1254
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1255
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1256
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1257
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1258
|
+
* for (size_t m = 0; m < range_m; m += tile_m)
|
|
1259
|
+
* for (size_t n = 0; n < range_n; n += tile_n)
|
|
1260
|
+
* function(context, i, j, k, l, m, n,
|
|
1261
|
+
* min(range_m - m, tile_m), min(range_n - n, tile_n));
|
|
1262
|
+
*
|
|
1263
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1264
|
+
* is ready for a new task.
|
|
1265
|
+
*
|
|
1266
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1267
|
+
* calls are serialized.
|
|
1268
|
+
*
|
|
1269
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1270
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1271
|
+
* @param function the function to call for each tile.
|
|
1272
|
+
* @param context the first argument passed to the specified function.
|
|
1273
|
+
* @param range_i the number of items to process along the first dimension
|
|
1274
|
+
* of the 6D grid.
|
|
1275
|
+
* @param range_j the number of items to process along the second dimension
|
|
1276
|
+
* of the 6D grid.
|
|
1277
|
+
* @param range_k the number of items to process along the third dimension
|
|
1278
|
+
* of the 6D grid.
|
|
1279
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1280
|
+
* of the 6D grid.
|
|
1281
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
1282
|
+
* of the 6D grid.
|
|
1283
|
+
* @param range_n the number of items to process along the sixth dimension
|
|
1284
|
+
* of the 6D grid.
|
|
1285
|
+
* @param tile_m the maximum number of items along the fifth dimension of
|
|
1286
|
+
* the 6D grid to process in one function call.
|
|
1287
|
+
* @param tile_n the maximum number of items along the sixth dimension of
|
|
1288
|
+
* the 6D grid to process in one function call.
|
|
1289
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1290
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1291
|
+
*/
|
|
1292
|
+
void pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool,
|
|
1293
|
+
pthreadpool_task_6d_tile_2d_t function,
|
|
1294
|
+
void *context, size_t range_i,
|
|
1295
|
+
size_t range_j, size_t range_k,
|
|
1296
|
+
size_t range_l, size_t range_m,
|
|
1297
|
+
size_t range_n, size_t tile_m,
|
|
1298
|
+
size_t tile_n, uint32_t flags);
|
|
1299
|
+
|
|
1300
|
+
/**
|
|
1301
|
+
* Terminates threads in the thread pool and releases associated resources.
|
|
1302
|
+
*
|
|
1303
|
+
* @warning Accessing the thread pool after a call to this function constitutes
|
|
1304
|
+
* undefined behaviour and may cause data corruption.
|
|
1305
|
+
*
|
|
1306
|
+
* @param[in,out] threadpool The thread pool to destroy.
|
|
1307
|
+
*/
|
|
1308
|
+
void pthreadpool_destroy(pthreadpool_t threadpool);
|
|
1309
|
+
|
|
1310
|
+
#ifndef PTHREADPOOL_NO_DEPRECATED_API
|
|
1311
|
+
|
|
1312
|
+
/* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
|
|
1313
|
+
#if defined(__GNUC__)
|
|
1314
|
+
#define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
|
|
1315
|
+
#else
|
|
1316
|
+
#define PTHREADPOOL_DEPRECATED
|
|
1317
|
+
#endif
|
|
1318
|
+
|
|
1319
|
+
typedef void (*pthreadpool_function_1d_t)(void *, size_t);
|
|
1320
|
+
typedef void (*pthreadpool_function_1d_tiled_t)(void *, size_t, size_t);
|
|
1321
|
+
typedef void (*pthreadpool_function_2d_t)(void *, size_t, size_t);
|
|
1322
|
+
typedef void (*pthreadpool_function_2d_tiled_t)(void *, size_t, size_t, size_t,
|
|
1323
|
+
size_t);
|
|
1324
|
+
typedef void (*pthreadpool_function_3d_tiled_t)(void *, size_t, size_t, size_t,
|
|
1325
|
+
size_t, size_t, size_t);
|
|
1326
|
+
typedef void (*pthreadpool_function_4d_tiled_t)(void *, size_t, size_t, size_t,
|
|
1327
|
+
size_t, size_t, size_t, size_t,
|
|
1328
|
+
size_t);
|
|
1329
|
+
|
|
1330
|
+
void pthreadpool_compute_1d(pthreadpool_t threadpool,
|
|
1331
|
+
pthreadpool_function_1d_t function, void *argument,
|
|
1332
|
+
size_t range) PTHREADPOOL_DEPRECATED;
|
|
1333
|
+
|
|
1334
|
+
void pthreadpool_compute_1d_tiled(pthreadpool_t threadpool,
|
|
1335
|
+
pthreadpool_function_1d_tiled_t function,
|
|
1336
|
+
void *argument, size_t range,
|
|
1337
|
+
size_t tile) PTHREADPOOL_DEPRECATED;
|
|
1338
|
+
|
|
1339
|
+
void pthreadpool_compute_2d(pthreadpool_t threadpool,
|
|
1340
|
+
pthreadpool_function_2d_t function, void *argument,
|
|
1341
|
+
size_t range_i,
|
|
1342
|
+
size_t range_j) PTHREADPOOL_DEPRECATED;
|
|
1343
|
+
|
|
1344
|
+
void pthreadpool_compute_2d_tiled(pthreadpool_t threadpool,
|
|
1345
|
+
pthreadpool_function_2d_tiled_t function,
|
|
1346
|
+
void *argument, size_t range_i,
|
|
1347
|
+
size_t range_j, size_t tile_i,
|
|
1348
|
+
size_t tile_j) PTHREADPOOL_DEPRECATED;
|
|
1349
|
+
|
|
1350
|
+
void pthreadpool_compute_3d_tiled(pthreadpool_t threadpool,
|
|
1351
|
+
pthreadpool_function_3d_tiled_t function,
|
|
1352
|
+
void *argument, size_t range_i,
|
|
1353
|
+
size_t range_j, size_t range_k, size_t tile_i,
|
|
1354
|
+
size_t tile_j,
|
|
1355
|
+
size_t tile_k) PTHREADPOOL_DEPRECATED;
|
|
1356
|
+
|
|
1357
|
+
void pthreadpool_compute_4d_tiled(pthreadpool_t threadpool,
|
|
1358
|
+
pthreadpool_function_4d_tiled_t function,
|
|
1359
|
+
void *argument, size_t range_i,
|
|
1360
|
+
size_t range_j, size_t range_k,
|
|
1361
|
+
size_t range_l, size_t tile_i, size_t tile_j,
|
|
1362
|
+
size_t tile_k,
|
|
1363
|
+
size_t tile_l) PTHREADPOOL_DEPRECATED;
|
|
1364
|
+
|
|
1365
|
+
#endif /* PTHREADPOOL_NO_DEPRECATED_API */
|
|
1366
|
+
|
|
1367
|
+
#ifdef __cplusplus
|
|
1368
|
+
} /* extern "C" */
|
|
1369
|
+
#endif
|
|
1370
|
+
|
|
1371
|
+
#ifdef __cplusplus
|
|
1372
|
+
|
|
1373
|
+
namespace libpthreadpool {
|
|
1374
|
+
namespace detail {
|
|
1375
|
+
namespace {
|
|
1376
|
+
|
|
1377
|
+
template <class T> void call_wrapper_1d(void *arg, size_t i) {
|
|
1378
|
+
(*static_cast<const T *>(arg))(i);
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
template <class T>
|
|
1382
|
+
void call_wrapper_1d_tile_1d(void *arg, size_t range_i, size_t tile_i) {
|
|
1383
|
+
(*static_cast<const T *>(arg))(range_i, tile_i);
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
template <class T> void call_wrapper_2d(void *functor, size_t i, size_t j) {
|
|
1387
|
+
(*static_cast<const T *>(functor))(i, j);
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
template <class T>
|
|
1391
|
+
void call_wrapper_2d_tile_1d(void *functor, size_t i, size_t range_j,
|
|
1392
|
+
size_t tile_j) {
|
|
1393
|
+
(*static_cast<const T *>(functor))(i, range_j, tile_j);
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
template <class T>
|
|
1397
|
+
void call_wrapper_2d_tile_2d(void *functor, size_t range_i, size_t range_j,
|
|
1398
|
+
size_t tile_i, size_t tile_j) {
|
|
1399
|
+
(*static_cast<const T *>(functor))(range_i, range_j, tile_i, tile_j);
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
template <class T>
|
|
1403
|
+
void call_wrapper_3d(void *functor, size_t i, size_t j, size_t k) {
|
|
1404
|
+
(*static_cast<const T *>(functor))(i, j, k);
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
template <class T>
|
|
1408
|
+
void call_wrapper_3d_tile_1d(void *functor, size_t i, size_t j, size_t range_k,
|
|
1409
|
+
size_t tile_k) {
|
|
1410
|
+
(*static_cast<const T *>(functor))(i, j, range_k, tile_k);
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
template <class T>
|
|
1414
|
+
void call_wrapper_3d_tile_2d(void *functor, size_t i, size_t range_j,
|
|
1415
|
+
size_t range_k, size_t tile_j, size_t tile_k) {
|
|
1416
|
+
(*static_cast<const T *>(functor))(i, range_j, range_k, tile_j, tile_k);
|
|
1417
|
+
}
|
|
1418
|
+
|
|
1419
|
+
template <class T>
|
|
1420
|
+
void call_wrapper_4d(void *functor, size_t i, size_t j, size_t k, size_t l) {
|
|
1421
|
+
(*static_cast<const T *>(functor))(i, j, k, l);
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
template <class T>
|
|
1425
|
+
void call_wrapper_4d_tile_1d(void *functor, size_t i, size_t j, size_t k,
|
|
1426
|
+
size_t range_l, size_t tile_l) {
|
|
1427
|
+
(*static_cast<const T *>(functor))(i, j, k, range_l, tile_l);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
template <class T>
|
|
1431
|
+
void call_wrapper_4d_tile_2d(void *functor, size_t i, size_t j, size_t range_k,
|
|
1432
|
+
size_t range_l, size_t tile_k, size_t tile_l) {
|
|
1433
|
+
(*static_cast<const T *>(functor))(i, j, range_k, range_l, tile_k, tile_l);
|
|
1434
|
+
}
|
|
1435
|
+
|
|
1436
|
+
template <class T>
|
|
1437
|
+
void call_wrapper_5d(void *functor, size_t i, size_t j, size_t k, size_t l,
|
|
1438
|
+
size_t m) {
|
|
1439
|
+
(*static_cast<const T *>(functor))(i, j, k, l, m);
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
template <class T>
|
|
1443
|
+
void call_wrapper_5d_tile_1d(void *functor, size_t i, size_t j, size_t k,
|
|
1444
|
+
size_t l, size_t range_m, size_t tile_m) {
|
|
1445
|
+
(*static_cast<const T *>(functor))(i, j, k, l, range_m, tile_m);
|
|
1446
|
+
}
|
|
1447
|
+
|
|
1448
|
+
template <class T>
|
|
1449
|
+
void call_wrapper_5d_tile_2d(void *functor, size_t i, size_t j, size_t k,
|
|
1450
|
+
size_t range_l, size_t range_m, size_t tile_l,
|
|
1451
|
+
size_t tile_m) {
|
|
1452
|
+
(*static_cast<const T *>(functor))(i, j, k, range_l, range_m, tile_l, tile_m);
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
template <class T>
|
|
1456
|
+
void call_wrapper_6d(void *functor, size_t i, size_t j, size_t k, size_t l,
|
|
1457
|
+
size_t m, size_t n) {
|
|
1458
|
+
(*static_cast<const T *>(functor))(i, j, k, l, m, n);
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
template <class T>
|
|
1462
|
+
void call_wrapper_6d_tile_1d(void *functor, size_t i, size_t j, size_t k,
|
|
1463
|
+
size_t l, size_t m, size_t range_n,
|
|
1464
|
+
size_t tile_n) {
|
|
1465
|
+
(*static_cast<const T *>(functor))(i, j, k, l, m, range_n, tile_n);
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
template <class T>
|
|
1469
|
+
void call_wrapper_6d_tile_2d(void *functor, size_t i, size_t j, size_t k,
|
|
1470
|
+
size_t l, size_t range_m, size_t range_n,
|
|
1471
|
+
size_t tile_m, size_t tile_n) {
|
|
1472
|
+
(*static_cast<const T *>(functor))(i, j, k, l, range_m, range_n, tile_m,
|
|
1473
|
+
tile_n);
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
} /* namespace */
|
|
1477
|
+
} /* namespace detail */
|
|
1478
|
+
} /* namespace libpthreadpool */
|
|
1479
|
+
|
|
1480
|
+
/**
|
|
1481
|
+
* Process items on a 1D grid.
|
|
1482
|
+
*
|
|
1483
|
+
* The function implements a parallel version of the following snippet:
|
|
1484
|
+
*
|
|
1485
|
+
* for (size_t i = 0; i < range; i++)
|
|
1486
|
+
* functor(i);
|
|
1487
|
+
*
|
|
1488
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1489
|
+
* is ready for a new task.
|
|
1490
|
+
*
|
|
1491
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1492
|
+
* calls are serialized.
|
|
1493
|
+
*
|
|
1494
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1495
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1496
|
+
* @param functor the functor to call for each item.
|
|
1497
|
+
* @param range the number of items on the 1D grid to process. The
|
|
1498
|
+
* specified functor will be called once for each item.
|
|
1499
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1500
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1501
|
+
*/
|
|
1502
|
+
template <class T>
|
|
1503
|
+
inline void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
|
|
1504
|
+
const T &functor, size_t range,
|
|
1505
|
+
uint32_t flags = 0) {
|
|
1506
|
+
pthreadpool_parallelize_1d(
|
|
1507
|
+
threadpool, &libpthreadpool::detail::call_wrapper_1d<const T>,
|
|
1508
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range, flags);
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
/**
|
|
1512
|
+
* Process items on a 1D grid with specified maximum tile size.
|
|
1513
|
+
*
|
|
1514
|
+
* The function implements a parallel version of the following snippet:
|
|
1515
|
+
*
|
|
1516
|
+
* for (size_t i = 0; i < range; i += tile)
|
|
1517
|
+
* functor(i, min(range - i, tile));
|
|
1518
|
+
*
|
|
1519
|
+
* When the call returns, all items have been processed and the thread pool is
|
|
1520
|
+
* ready for a new task.
|
|
1521
|
+
*
|
|
1522
|
+
* @note If multiple threads call this function with the same thread pool,
|
|
1523
|
+
* the calls are serialized.
|
|
1524
|
+
*
|
|
1525
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1526
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1527
|
+
* @param functor the functor to call for each tile.
|
|
1528
|
+
* @param range the number of items on the 1D grid to process.
|
|
1529
|
+
* @param tile the maximum number of items on the 1D grid to process in
|
|
1530
|
+
* one functor call.
|
|
1531
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1532
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1533
|
+
*/
|
|
1534
|
+
template <class T>
|
|
1535
|
+
inline void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
|
|
1536
|
+
const T &functor, size_t range,
|
|
1537
|
+
size_t tile,
|
|
1538
|
+
uint32_t flags = 0) {
|
|
1539
|
+
pthreadpool_parallelize_1d_tile_1d(
|
|
1540
|
+
threadpool, &libpthreadpool::detail::call_wrapper_1d_tile_1d<const T>,
|
|
1541
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range, tile,
|
|
1542
|
+
flags);
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
/**
|
|
1546
|
+
* Process items on a 2D grid.
|
|
1547
|
+
*
|
|
1548
|
+
* The function implements a parallel version of the following snippet:
|
|
1549
|
+
*
|
|
1550
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1551
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1552
|
+
* functor(i, j);
|
|
1553
|
+
*
|
|
1554
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1555
|
+
* is ready for a new task.
|
|
1556
|
+
*
|
|
1557
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1558
|
+
* calls are serialized.
|
|
1559
|
+
*
|
|
1560
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1561
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1562
|
+
* @param functor the functor to call for each item.
|
|
1563
|
+
* @param range_i the number of items to process along the first dimension
|
|
1564
|
+
* of the 2D grid.
|
|
1565
|
+
* @param range_j the number of items to process along the second dimension
|
|
1566
|
+
* of the 2D grid.
|
|
1567
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1568
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1569
|
+
*/
|
|
1570
|
+
template <class T>
|
|
1571
|
+
inline void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
|
|
1572
|
+
const T &functor, size_t range_i,
|
|
1573
|
+
size_t range_j, uint32_t flags = 0) {
|
|
1574
|
+
pthreadpool_parallelize_2d(
|
|
1575
|
+
threadpool, &libpthreadpool::detail::call_wrapper_2d<const T>,
|
|
1576
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1577
|
+
flags);
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
/**
|
|
1581
|
+
* Process items on a 2D grid with the specified maximum tile size along the
|
|
1582
|
+
* last grid dimension.
|
|
1583
|
+
*
|
|
1584
|
+
* The function implements a parallel version of the following snippet:
|
|
1585
|
+
*
|
|
1586
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1587
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
1588
|
+
* functor(i, j, min(range_j - j, tile_j));
|
|
1589
|
+
*
|
|
1590
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1591
|
+
* is ready for a new task.
|
|
1592
|
+
*
|
|
1593
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1594
|
+
* calls are serialized.
|
|
1595
|
+
*
|
|
1596
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1597
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1598
|
+
* @param functor the functor to call for each tile.
|
|
1599
|
+
* @param range_i the number of items to process along the first dimension
|
|
1600
|
+
* of the 2D grid.
|
|
1601
|
+
* @param range_j the number of items to process along the second dimension
|
|
1602
|
+
* of the 2D grid.
|
|
1603
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
1604
|
+
* the 2D grid to process in one functor call.
|
|
1605
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1606
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1607
|
+
*/
|
|
1608
|
+
template <class T>
|
|
1609
|
+
inline void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
|
|
1610
|
+
const T &functor, size_t range_i,
|
|
1611
|
+
size_t range_j, size_t tile_j,
|
|
1612
|
+
uint32_t flags = 0) {
|
|
1613
|
+
pthreadpool_parallelize_2d_tile_1d(
|
|
1614
|
+
threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_1d<const T>,
|
|
1615
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1616
|
+
tile_j, flags);
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
/**
|
|
1620
|
+
* Process items on a 2D grid with the specified maximum tile size along each
|
|
1621
|
+
* grid dimension.
|
|
1622
|
+
*
|
|
1623
|
+
* The function implements a parallel version of the following snippet:
|
|
1624
|
+
*
|
|
1625
|
+
* for (size_t i = 0; i < range_i; i += tile_i)
|
|
1626
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
1627
|
+
* functor(i, j,
|
|
1628
|
+
* min(range_i - i, tile_i), min(range_j - j, tile_j));
|
|
1629
|
+
*
|
|
1630
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1631
|
+
* is ready for a new task.
|
|
1632
|
+
*
|
|
1633
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1634
|
+
* calls are serialized.
|
|
1635
|
+
*
|
|
1636
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1637
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1638
|
+
* @param functor the functor to call for each tile.
|
|
1639
|
+
* @param range_i the number of items to process along the first dimension
|
|
1640
|
+
* of the 2D grid.
|
|
1641
|
+
* @param range_j the number of items to process along the second dimension
|
|
1642
|
+
* of the 2D grid.
|
|
1643
|
+
* @param tile_j the maximum number of items along the first dimension of
|
|
1644
|
+
* the 2D grid to process in one functor call.
|
|
1645
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
1646
|
+
* the 2D grid to process in one functor call.
|
|
1647
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1648
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1649
|
+
*/
|
|
1650
|
+
template <class T>
|
|
1651
|
+
inline void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
|
|
1652
|
+
const T &functor, size_t range_i,
|
|
1653
|
+
size_t range_j, size_t tile_i,
|
|
1654
|
+
size_t tile_j,
|
|
1655
|
+
uint32_t flags = 0) {
|
|
1656
|
+
pthreadpool_parallelize_2d_tile_2d(
|
|
1657
|
+
threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_2d<const T>,
|
|
1658
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1659
|
+
tile_i, tile_j, flags);
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
/**
|
|
1663
|
+
* Process items on a 3D grid.
|
|
1664
|
+
*
|
|
1665
|
+
* The function implements a parallel version of the following snippet:
|
|
1666
|
+
*
|
|
1667
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1668
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1669
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1670
|
+
* functor(i, j, k);
|
|
1671
|
+
*
|
|
1672
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1673
|
+
* is ready for a new task.
|
|
1674
|
+
*
|
|
1675
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1676
|
+
* calls are serialized.
|
|
1677
|
+
*
|
|
1678
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1679
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1680
|
+
* @param functor the functor to call for each tile.
|
|
1681
|
+
* @param range_i the number of items to process along the first dimension
|
|
1682
|
+
* of the 3D grid.
|
|
1683
|
+
* @param range_j the number of items to process along the second dimension
|
|
1684
|
+
* of the 3D grid.
|
|
1685
|
+
* @param range_k the number of items to process along the third dimension
|
|
1686
|
+
* of the 3D grid.
|
|
1687
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1688
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1689
|
+
*/
|
|
1690
|
+
template <class T>
|
|
1691
|
+
inline void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
|
|
1692
|
+
const T &functor, size_t range_i,
|
|
1693
|
+
size_t range_j, size_t range_k,
|
|
1694
|
+
uint32_t flags = 0) {
|
|
1695
|
+
pthreadpool_parallelize_3d(
|
|
1696
|
+
threadpool, &libpthreadpool::detail::call_wrapper_3d<const T>,
|
|
1697
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1698
|
+
range_k, flags);
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
/**
|
|
1702
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
1703
|
+
* last grid dimension.
|
|
1704
|
+
*
|
|
1705
|
+
* The function implements a parallel version of the following snippet:
|
|
1706
|
+
*
|
|
1707
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1708
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1709
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
1710
|
+
* functor(i, j, k, min(range_k - k, tile_k));
|
|
1711
|
+
*
|
|
1712
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1713
|
+
* is ready for a new task.
|
|
1714
|
+
*
|
|
1715
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1716
|
+
* calls are serialized.
|
|
1717
|
+
*
|
|
1718
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1719
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1720
|
+
* @param functor the functor to call for each tile.
|
|
1721
|
+
* @param range_i the number of items to process along the first dimension
|
|
1722
|
+
* of the 3D grid.
|
|
1723
|
+
* @param range_j the number of items to process along the second dimension
|
|
1724
|
+
* of the 3D grid.
|
|
1725
|
+
* @param range_k the number of items to process along the third dimension
|
|
1726
|
+
* of the 3D grid.
|
|
1727
|
+
* @param tile_k the maximum number of items along the third dimension of
|
|
1728
|
+
* the 3D grid to process in one functor call.
|
|
1729
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1730
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1731
|
+
*/
|
|
1732
|
+
template <class T>
|
|
1733
|
+
inline void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
|
|
1734
|
+
const T &functor, size_t range_i,
|
|
1735
|
+
size_t range_j, size_t range_k,
|
|
1736
|
+
size_t tile_k,
|
|
1737
|
+
uint32_t flags = 0) {
|
|
1738
|
+
pthreadpool_parallelize_3d_tile_1d(
|
|
1739
|
+
threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_1d<const T>,
|
|
1740
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1741
|
+
range_k, tile_k, flags);
|
|
1742
|
+
}
|
|
1743
|
+
|
|
1744
|
+
/**
|
|
1745
|
+
* Process items on a 3D grid with the specified maximum tile size along the
|
|
1746
|
+
* last two grid dimensions.
|
|
1747
|
+
*
|
|
1748
|
+
* The function implements a parallel version of the following snippet:
|
|
1749
|
+
*
|
|
1750
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1751
|
+
* for (size_t j = 0; j < range_j; j += tile_j)
|
|
1752
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
1753
|
+
* functor(i, j, k,
|
|
1754
|
+
* min(range_j - j, tile_j), min(range_k - k, tile_k));
|
|
1755
|
+
*
|
|
1756
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1757
|
+
* is ready for a new task.
|
|
1758
|
+
*
|
|
1759
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1760
|
+
* calls are serialized.
|
|
1761
|
+
*
|
|
1762
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1763
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1764
|
+
* @param functor the functor to call for each tile.
|
|
1765
|
+
* @param range_i the number of items to process along the first dimension
|
|
1766
|
+
* of the 3D grid.
|
|
1767
|
+
* @param range_j the number of items to process along the second dimension
|
|
1768
|
+
* of the 3D grid.
|
|
1769
|
+
* @param range_k the number of items to process along the third dimension
|
|
1770
|
+
* of the 3D grid.
|
|
1771
|
+
* @param tile_j the maximum number of items along the second dimension of
|
|
1772
|
+
* the 3D grid to process in one functor call.
|
|
1773
|
+
* @param tile_k the maximum number of items along the third dimension of
|
|
1774
|
+
* the 3D grid to process in one functor call.
|
|
1775
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1776
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1777
|
+
*/
|
|
1778
|
+
template <class T>
|
|
1779
|
+
inline void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
|
|
1780
|
+
const T &functor, size_t range_i,
|
|
1781
|
+
size_t range_j, size_t range_k,
|
|
1782
|
+
size_t tile_j, size_t tile_k,
|
|
1783
|
+
uint32_t flags = 0) {
|
|
1784
|
+
pthreadpool_parallelize_3d_tile_2d(
|
|
1785
|
+
threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_2d<const T>,
|
|
1786
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1787
|
+
range_k, tile_j, tile_k, flags);
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
/**
|
|
1791
|
+
* Process items on a 4D grid.
|
|
1792
|
+
*
|
|
1793
|
+
* The function implements a parallel version of the following snippet:
|
|
1794
|
+
*
|
|
1795
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1796
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1797
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1798
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1799
|
+
* functor(i, j, k, l);
|
|
1800
|
+
*
|
|
1801
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1802
|
+
* is ready for a new task.
|
|
1803
|
+
*
|
|
1804
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1805
|
+
* calls are serialized.
|
|
1806
|
+
*
|
|
1807
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1808
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1809
|
+
* @param functor the functor to call for each tile.
|
|
1810
|
+
* @param range_i the number of items to process along the first dimension
|
|
1811
|
+
* of the 4D grid.
|
|
1812
|
+
* @param range_j the number of items to process along the second dimension
|
|
1813
|
+
* of the 4D grid.
|
|
1814
|
+
* @param range_k the number of items to process along the third dimension
|
|
1815
|
+
* of the 4D grid.
|
|
1816
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1817
|
+
* of the 4D grid.
|
|
1818
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1819
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1820
|
+
*/
|
|
1821
|
+
template <class T>
|
|
1822
|
+
inline void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
|
|
1823
|
+
const T &functor, size_t range_i,
|
|
1824
|
+
size_t range_j, size_t range_k,
|
|
1825
|
+
size_t range_l, uint32_t flags = 0) {
|
|
1826
|
+
pthreadpool_parallelize_4d(
|
|
1827
|
+
threadpool, &libpthreadpool::detail::call_wrapper_4d<const T>,
|
|
1828
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1829
|
+
range_k, range_l, flags);
|
|
1830
|
+
}
|
|
1831
|
+
|
|
1832
|
+
/**
|
|
1833
|
+
* Process items on a 4D grid with the specified maximum tile size along the
|
|
1834
|
+
* last grid dimension.
|
|
1835
|
+
*
|
|
1836
|
+
* The function implements a parallel version of the following snippet:
|
|
1837
|
+
*
|
|
1838
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1839
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1840
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1841
|
+
* for (size_t l = 0; l < range_l; l += tile_l)
|
|
1842
|
+
* functor(i, j, k, l, min(range_l - l, tile_l));
|
|
1843
|
+
*
|
|
1844
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1845
|
+
* is ready for a new task.
|
|
1846
|
+
*
|
|
1847
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1848
|
+
* calls are serialized.
|
|
1849
|
+
*
|
|
1850
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1851
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1852
|
+
* @param functor the functor to call for each tile.
|
|
1853
|
+
* @param range_i the number of items to process along the first dimension
|
|
1854
|
+
* of the 4D grid.
|
|
1855
|
+
* @param range_j the number of items to process along the second dimension
|
|
1856
|
+
* of the 4D grid.
|
|
1857
|
+
* @param range_k the number of items to process along the third dimension
|
|
1858
|
+
* of the 4D grid.
|
|
1859
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1860
|
+
* of the 4D grid.
|
|
1861
|
+
* @param tile_l the maximum number of items along the fourth dimension of
|
|
1862
|
+
* the 4D grid to process in one functor call.
|
|
1863
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1864
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1865
|
+
*/
|
|
1866
|
+
template <class T>
|
|
1867
|
+
inline void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
|
|
1868
|
+
const T &functor, size_t range_i,
|
|
1869
|
+
size_t range_j, size_t range_k,
|
|
1870
|
+
size_t range_l, size_t tile_l,
|
|
1871
|
+
uint32_t flags = 0) {
|
|
1872
|
+
pthreadpool_parallelize_4d_tile_1d(
|
|
1873
|
+
threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_1d<const T>,
|
|
1874
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1875
|
+
range_k, range_l, tile_l, flags);
|
|
1876
|
+
}
|
|
1877
|
+
|
|
1878
|
+
/**
|
|
1879
|
+
* Process items on a 4D grid with the specified maximum tile size along the
|
|
1880
|
+
* last two grid dimensions.
|
|
1881
|
+
*
|
|
1882
|
+
* The function implements a parallel version of the following snippet:
|
|
1883
|
+
*
|
|
1884
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1885
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1886
|
+
* for (size_t k = 0; k < range_k; k += tile_k)
|
|
1887
|
+
* for (size_t l = 0; l < range_l; l += tile_l)
|
|
1888
|
+
* functor(i, j, k, l,
|
|
1889
|
+
* min(range_k - k, tile_k), min(range_l - l, tile_l));
|
|
1890
|
+
*
|
|
1891
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1892
|
+
* is ready for a new task.
|
|
1893
|
+
*
|
|
1894
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1895
|
+
* calls are serialized.
|
|
1896
|
+
*
|
|
1897
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1898
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1899
|
+
* @param functor the functor to call for each tile.
|
|
1900
|
+
* @param range_i the number of items to process along the first dimension
|
|
1901
|
+
* of the 4D grid.
|
|
1902
|
+
* @param range_j the number of items to process along the second dimension
|
|
1903
|
+
* of the 4D grid.
|
|
1904
|
+
* @param range_k the number of items to process along the third dimension
|
|
1905
|
+
* of the 4D grid.
|
|
1906
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1907
|
+
* of the 4D grid.
|
|
1908
|
+
* @param tile_k the maximum number of items along the third dimension of
|
|
1909
|
+
* the 4D grid to process in one functor call.
|
|
1910
|
+
* @param tile_l the maximum number of items along the fourth dimension of
|
|
1911
|
+
* the 4D grid to process in one functor call.
|
|
1912
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1913
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1914
|
+
*/
|
|
1915
|
+
template <class T>
|
|
1916
|
+
inline void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
|
|
1917
|
+
const T &functor, size_t range_i,
|
|
1918
|
+
size_t range_j, size_t range_k,
|
|
1919
|
+
size_t range_l, size_t tile_k,
|
|
1920
|
+
size_t tile_l,
|
|
1921
|
+
uint32_t flags = 0) {
|
|
1922
|
+
pthreadpool_parallelize_4d_tile_2d(
|
|
1923
|
+
threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_2d<const T>,
|
|
1924
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1925
|
+
range_k, range_l, tile_k, tile_l, flags);
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
/**
|
|
1929
|
+
* Process items on a 5D grid.
|
|
1930
|
+
*
|
|
1931
|
+
* The function implements a parallel version of the following snippet:
|
|
1932
|
+
*
|
|
1933
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1934
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1935
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1936
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1937
|
+
* for (size_t m = 0; m < range_m; m++)
|
|
1938
|
+
* functor(i, j, k, l, m);
|
|
1939
|
+
*
|
|
1940
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1941
|
+
* is ready for a new task.
|
|
1942
|
+
*
|
|
1943
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1944
|
+
* calls are serialized.
|
|
1945
|
+
*
|
|
1946
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1947
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1948
|
+
* @param functor the functor to call for each tile.
|
|
1949
|
+
* @param range_i the number of items to process along the first dimension
|
|
1950
|
+
* of the 5D grid.
|
|
1951
|
+
* @param range_j the number of items to process along the second dimension
|
|
1952
|
+
* of the 5D grid.
|
|
1953
|
+
* @param range_k the number of items to process along the third dimension
|
|
1954
|
+
* of the 5D grid.
|
|
1955
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
1956
|
+
* of the 5D grid.
|
|
1957
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
1958
|
+
* of the 5D grid.
|
|
1959
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
1960
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
1961
|
+
*/
|
|
1962
|
+
template <class T>
|
|
1963
|
+
inline void
|
|
1964
|
+
pthreadpool_parallelize_5d(pthreadpool_t threadpool, const T &functor,
|
|
1965
|
+
size_t range_i, size_t range_j, size_t range_k,
|
|
1966
|
+
size_t range_l, size_t range_m, uint32_t flags = 0) {
|
|
1967
|
+
pthreadpool_parallelize_5d(
|
|
1968
|
+
threadpool, &libpthreadpool::detail::call_wrapper_5d<const T>,
|
|
1969
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
1970
|
+
range_k, range_l, range_m, flags);
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1973
|
+
/**
|
|
1974
|
+
* Process items on a 5D grid with the specified maximum tile size along the
|
|
1975
|
+
* last grid dimension.
|
|
1976
|
+
*
|
|
1977
|
+
* The function implements a parallel version of the following snippet:
|
|
1978
|
+
*
|
|
1979
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
1980
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
1981
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
1982
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
1983
|
+
* for (size_t m = 0; m < range_m; m += tile_m)
|
|
1984
|
+
* functor(i, j, k, l, m, min(range_m - m, tile_m));
|
|
1985
|
+
*
|
|
1986
|
+
* When the function returns, all items have been processed and the thread pool
|
|
1987
|
+
* is ready for a new task.
|
|
1988
|
+
*
|
|
1989
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
1990
|
+
* calls are serialized.
|
|
1991
|
+
*
|
|
1992
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
1993
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
1994
|
+
* @param functor the functor to call for each tile.
|
|
1995
|
+
* @param range_i the number of items to process along the first dimension
|
|
1996
|
+
* of the 5D grid.
|
|
1997
|
+
* @param range_j the number of items to process along the second dimension
|
|
1998
|
+
* of the 5D grid.
|
|
1999
|
+
* @param range_k the number of items to process along the third dimension
|
|
2000
|
+
* of the 5D grid.
|
|
2001
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
2002
|
+
* of the 5D grid.
|
|
2003
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
2004
|
+
* of the 5D grid.
|
|
2005
|
+
* @param tile_m the maximum number of items along the fifth dimension of
|
|
2006
|
+
* the 5D grid to process in one functor call.
|
|
2007
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
2008
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
2009
|
+
*/
|
|
2010
|
+
template <class T>
|
|
2011
|
+
inline void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
|
|
2012
|
+
const T &functor, size_t range_i,
|
|
2013
|
+
size_t range_j, size_t range_k,
|
|
2014
|
+
size_t range_l, size_t range_m,
|
|
2015
|
+
size_t tile_m,
|
|
2016
|
+
uint32_t flags = 0) {
|
|
2017
|
+
pthreadpool_parallelize_5d_tile_1d(
|
|
2018
|
+
threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_1d<const T>,
|
|
2019
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
2020
|
+
range_k, range_l, range_m, tile_m, flags);
|
|
2021
|
+
}
|
|
2022
|
+
|
|
2023
|
+
/**
|
|
2024
|
+
* Process items on a 5D grid with the specified maximum tile size along the
|
|
2025
|
+
* last two grid dimensions.
|
|
2026
|
+
*
|
|
2027
|
+
* The function implements a parallel version of the following snippet:
|
|
2028
|
+
*
|
|
2029
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
2030
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
2031
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
2032
|
+
* for (size_t l = 0; l < range_l; l += tile_l)
|
|
2033
|
+
* for (size_t m = 0; m < range_m; m += tile_m)
|
|
2034
|
+
* functor(i, j, k, l, m,
|
|
2035
|
+
* min(range_l - l, tile_l), min(range_m - m, tile_m));
|
|
2036
|
+
*
|
|
2037
|
+
* When the function returns, all items have been processed and the thread pool
|
|
2038
|
+
* is ready for a new task.
|
|
2039
|
+
*
|
|
2040
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
2041
|
+
* calls are serialized.
|
|
2042
|
+
*
|
|
2043
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
2044
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
2045
|
+
* @param functor the functor to call for each tile.
|
|
2046
|
+
* @param range_i the number of items to process along the first dimension
|
|
2047
|
+
* of the 5D grid.
|
|
2048
|
+
* @param range_j the number of items to process along the second dimension
|
|
2049
|
+
* of the 5D grid.
|
|
2050
|
+
* @param range_k the number of items to process along the third dimension
|
|
2051
|
+
* of the 5D grid.
|
|
2052
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
2053
|
+
* of the 5D grid.
|
|
2054
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
2055
|
+
* of the 5D grid.
|
|
2056
|
+
* @param tile_l the maximum number of items along the fourth dimension of
|
|
2057
|
+
* the 5D grid to process in one functor call.
|
|
2058
|
+
* @param tile_m the maximum number of items along the fifth dimension of
|
|
2059
|
+
* the 5D grid to process in one functor call.
|
|
2060
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
2061
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
2062
|
+
*/
|
|
2063
|
+
template <class T>
|
|
2064
|
+
inline void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
|
|
2065
|
+
const T &functor, size_t range_i,
|
|
2066
|
+
size_t range_j, size_t range_k,
|
|
2067
|
+
size_t range_l, size_t range_m,
|
|
2068
|
+
size_t tile_l, size_t tile_m,
|
|
2069
|
+
uint32_t flags = 0) {
|
|
2070
|
+
pthreadpool_parallelize_5d_tile_2d(
|
|
2071
|
+
threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_2d<const T>,
|
|
2072
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
2073
|
+
range_k, range_l, range_m, tile_l, tile_m, flags);
|
|
2074
|
+
}
|
|
2075
|
+
|
|
2076
|
+
/**
|
|
2077
|
+
* Process items on a 6D grid.
|
|
2078
|
+
*
|
|
2079
|
+
* The function implements a parallel version of the following snippet:
|
|
2080
|
+
*
|
|
2081
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
2082
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
2083
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
2084
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
2085
|
+
* for (size_t m = 0; m < range_m; m++)
|
|
2086
|
+
* for (size_t n = 0; n < range_n; n++)
|
|
2087
|
+
* functor(i, j, k, l, m, n);
|
|
2088
|
+
*
|
|
2089
|
+
* When the function returns, all items have been processed and the thread pool
|
|
2090
|
+
* is ready for a new task.
|
|
2091
|
+
*
|
|
2092
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
2093
|
+
* calls are serialized.
|
|
2094
|
+
*
|
|
2095
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
2096
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
2097
|
+
* @param functor the functor to call for each tile.
|
|
2098
|
+
* @param range_i the number of items to process along the first dimension
|
|
2099
|
+
* of the 6D grid.
|
|
2100
|
+
* @param range_j the number of items to process along the second dimension
|
|
2101
|
+
* of the 6D grid.
|
|
2102
|
+
* @param range_k the number of items to process along the third dimension
|
|
2103
|
+
* of the 6D grid.
|
|
2104
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
2105
|
+
* of the 6D grid.
|
|
2106
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
2107
|
+
* of the 6D grid.
|
|
2108
|
+
* @param range_n the number of items to process along the sixth dimension
|
|
2109
|
+
* of the 6D grid.
|
|
2110
|
+
* @param tile_n the maximum number of items along the sixth dimension of
|
|
2111
|
+
* the 6D grid to process in one functor call.
|
|
2112
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
2113
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
2114
|
+
*/
|
|
2115
|
+
template <class T>
|
|
2116
|
+
inline void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
|
|
2117
|
+
const T &functor, size_t range_i,
|
|
2118
|
+
size_t range_j, size_t range_k,
|
|
2119
|
+
size_t range_l, size_t range_m,
|
|
2120
|
+
size_t range_n, uint32_t flags = 0) {
|
|
2121
|
+
pthreadpool_parallelize_6d(
|
|
2122
|
+
threadpool, &libpthreadpool::detail::call_wrapper_6d<const T>,
|
|
2123
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
2124
|
+
range_k, range_l, range_m, range_n, flags);
|
|
2125
|
+
}
|
|
2126
|
+
|
|
2127
|
+
/**
|
|
2128
|
+
* Process items on a 6D grid with the specified maximum tile size along the
|
|
2129
|
+
* last grid dimension.
|
|
2130
|
+
*
|
|
2131
|
+
* The function implements a parallel version of the following snippet:
|
|
2132
|
+
*
|
|
2133
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
2134
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
2135
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
2136
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
2137
|
+
* for (size_t m = 0; m < range_m; m++)
|
|
2138
|
+
* for (size_t n = 0; n < range_n; n += tile_n)
|
|
2139
|
+
* functor(i, j, k, l, m, n, min(range_n - n, tile_n));
|
|
2140
|
+
*
|
|
2141
|
+
* When the function returns, all items have been processed and the thread pool
|
|
2142
|
+
* is ready for a new task.
|
|
2143
|
+
*
|
|
2144
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
2145
|
+
* calls are serialized.
|
|
2146
|
+
*
|
|
2147
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
2148
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
2149
|
+
* @param functor the functor to call for each tile.
|
|
2150
|
+
* @param range_i the number of items to process along the first dimension
|
|
2151
|
+
* of the 6D grid.
|
|
2152
|
+
* @param range_j the number of items to process along the second dimension
|
|
2153
|
+
* of the 6D grid.
|
|
2154
|
+
* @param range_k the number of items to process along the third dimension
|
|
2155
|
+
* of the 6D grid.
|
|
2156
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
2157
|
+
* of the 6D grid.
|
|
2158
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
2159
|
+
* of the 6D grid.
|
|
2160
|
+
* @param range_n the number of items to process along the sixth dimension
|
|
2161
|
+
* of the 6D grid.
|
|
2162
|
+
* @param tile_n the maximum number of items along the sixth dimension of
|
|
2163
|
+
* the 6D grid to process in one functor call.
|
|
2164
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
2165
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
2166
|
+
*/
|
|
2167
|
+
template <class T>
|
|
2168
|
+
inline void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
|
|
2169
|
+
const T &functor, size_t range_i,
|
|
2170
|
+
size_t range_j, size_t range_k,
|
|
2171
|
+
size_t range_l, size_t range_m,
|
|
2172
|
+
size_t range_n, size_t tile_n,
|
|
2173
|
+
uint32_t flags = 0) {
|
|
2174
|
+
pthreadpool_parallelize_6d_tile_1d(
|
|
2175
|
+
threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_1d<const T>,
|
|
2176
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
2177
|
+
range_k, range_l, range_m, range_n, tile_n, flags);
|
|
2178
|
+
}
|
|
2179
|
+
|
|
2180
|
+
/**
|
|
2181
|
+
* Process items on a 6D grid with the specified maximum tile size along the
|
|
2182
|
+
* last two grid dimensions.
|
|
2183
|
+
*
|
|
2184
|
+
* The function implements a parallel version of the following snippet:
|
|
2185
|
+
*
|
|
2186
|
+
* for (size_t i = 0; i < range_i; i++)
|
|
2187
|
+
* for (size_t j = 0; j < range_j; j++)
|
|
2188
|
+
* for (size_t k = 0; k < range_k; k++)
|
|
2189
|
+
* for (size_t l = 0; l < range_l; l++)
|
|
2190
|
+
* for (size_t m = 0; m < range_m; m += tile_m)
|
|
2191
|
+
* for (size_t n = 0; n < range_n; n += tile_n)
|
|
2192
|
+
* functor(i, j, k, l, m, n,
|
|
2193
|
+
* min(range_m - m, tile_m), min(range_n - n, tile_n));
|
|
2194
|
+
*
|
|
2195
|
+
* When the function returns, all items have been processed and the thread pool
|
|
2196
|
+
* is ready for a new task.
|
|
2197
|
+
*
|
|
2198
|
+
* @note If multiple threads call this function with the same thread pool, the
|
|
2199
|
+
* calls are serialized.
|
|
2200
|
+
*
|
|
2201
|
+
* @param threadpool the thread pool to use for parallelisation. If threadpool
|
|
2202
|
+
* is NULL, all items are processed serially on the calling thread.
|
|
2203
|
+
* @param functor the functor to call for each tile.
|
|
2204
|
+
* @param range_i the number of items to process along the first dimension
|
|
2205
|
+
* of the 6D grid.
|
|
2206
|
+
* @param range_j the number of items to process along the second dimension
|
|
2207
|
+
* of the 6D grid.
|
|
2208
|
+
* @param range_k the number of items to process along the third dimension
|
|
2209
|
+
* of the 6D grid.
|
|
2210
|
+
* @param range_l the number of items to process along the fourth dimension
|
|
2211
|
+
* of the 6D grid.
|
|
2212
|
+
* @param range_m the number of items to process along the fifth dimension
|
|
2213
|
+
* of the 6D grid.
|
|
2214
|
+
* @param range_n the number of items to process along the sixth dimension
|
|
2215
|
+
* of the 6D grid.
|
|
2216
|
+
* @param tile_m the maximum number of items along the fifth dimension of
|
|
2217
|
+
* the 6D grid to process in one functor call.
|
|
2218
|
+
* @param tile_n the maximum number of items along the sixth dimension of
|
|
2219
|
+
* the 6D grid to process in one functor call.
|
|
2220
|
+
* @param flags a bitwise combination of zero or more optional flags
|
|
2221
|
+
* (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
|
|
2222
|
+
*/
|
|
2223
|
+
template <class T>
|
|
2224
|
+
inline void pthreadpool_parallelize_6d_tile_2d(
|
|
2225
|
+
pthreadpool_t threadpool, const T &functor, size_t range_i, size_t range_j,
|
|
2226
|
+
size_t range_k, size_t range_l, size_t range_m, size_t range_n,
|
|
2227
|
+
size_t tile_m, size_t tile_n, uint32_t flags = 0) {
|
|
2228
|
+
pthreadpool_parallelize_6d_tile_2d(
|
|
2229
|
+
threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_2d<const T>,
|
|
2230
|
+
const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
|
|
2231
|
+
range_k, range_l, range_m, range_n, tile_m, tile_n, flags);
|
|
2232
|
+
}
|
|
2233
|
+
|
|
2234
|
+
#endif /* __cplusplus */
|
|
2235
|
+
|
|
2236
|
+
#endif /* PTHREADPOOL_H_ */
|