halide 19.0.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halide/__init__.py +39 -0
- halide/_generator_helpers.py +835 -0
- halide/bin/adams2019_retrain_cost_model +0 -0
- halide/bin/adams2019_weightsdir_to_weightsfile +0 -0
- halide/bin/anderson2021_retrain_cost_model +0 -0
- halide/bin/anderson2021_weightsdir_to_weightsfile +0 -0
- halide/bin/featurization_to_sample +0 -0
- halide/bin/gengen +0 -0
- halide/bin/get_host_target +0 -0
- halide/halide_.cpython-39-x86_64-linux-gnu.so +0 -0
- halide/imageio.py +60 -0
- halide/include/Halide.h +35293 -0
- halide/include/HalideBuffer.h +2618 -0
- halide/include/HalidePyTorchCudaHelpers.h +64 -0
- halide/include/HalidePyTorchHelpers.h +120 -0
- halide/include/HalideRuntime.h +2221 -0
- halide/include/HalideRuntimeCuda.h +89 -0
- halide/include/HalideRuntimeD3D12Compute.h +91 -0
- halide/include/HalideRuntimeHexagonDma.h +104 -0
- halide/include/HalideRuntimeHexagonHost.h +157 -0
- halide/include/HalideRuntimeMetal.h +112 -0
- halide/include/HalideRuntimeOpenCL.h +119 -0
- halide/include/HalideRuntimeQurt.h +32 -0
- halide/include/HalideRuntimeVulkan.h +137 -0
- halide/include/HalideRuntimeWebGPU.h +44 -0
- halide/lib64/cmake/Halide/FindHalide_LLVM.cmake +152 -0
- halide/lib64/cmake/Halide/FindV8.cmake +33 -0
- halide/lib64/cmake/Halide/Halide-shared-deps.cmake +0 -0
- halide/lib64/cmake/Halide/Halide-shared-targets-release.cmake +29 -0
- halide/lib64/cmake/Halide/Halide-shared-targets.cmake +154 -0
- halide/lib64/cmake/Halide/HalideConfig.cmake +162 -0
- halide/lib64/cmake/Halide/HalideConfigVersion.cmake +65 -0
- halide/lib64/cmake/HalideHelpers/FindHalide_WebGPU.cmake +27 -0
- halide/lib64/cmake/HalideHelpers/Halide-Interfaces-release.cmake +116 -0
- halide/lib64/cmake/HalideHelpers/Halide-Interfaces.cmake +236 -0
- halide/lib64/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +1056 -0
- halide/lib64/cmake/HalideHelpers/HalideHelpersConfig.cmake +28 -0
- halide/lib64/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
- halide/lib64/cmake/HalideHelpers/HalideTargetHelpers.cmake +99 -0
- halide/lib64/cmake/HalideHelpers/MutexCopy.ps1 +31 -0
- halide/lib64/cmake/HalideHelpers/TargetExportScript.cmake +55 -0
- halide/lib64/cmake/Halide_Python/Halide_Python-targets-release.cmake +30 -0
- halide/lib64/cmake/Halide_Python/Halide_Python-targets.cmake +125 -0
- halide/lib64/cmake/Halide_Python/Halide_PythonConfig.cmake +26 -0
- halide/lib64/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +65 -0
- halide/lib64/libHalide.so +0 -0
- halide/lib64/libHalidePyStubs.a +0 -0
- halide/lib64/libHalide_GenGen.a +0 -0
- halide/lib64/libautoschedule_adams2019.so +0 -0
- halide/lib64/libautoschedule_anderson2021.so +0 -0
- halide/lib64/libautoschedule_li2018.so +0 -0
- halide/lib64/libautoschedule_mullapudi2016.so +0 -0
- halide/share/doc/Halide/LICENSE.txt +233 -0
- halide/share/doc/Halide/README.md +439 -0
- halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +626 -0
- halide/share/doc/Halide/doc/CodeStyleCMake.md +393 -0
- halide/share/doc/Halide/doc/FuzzTesting.md +104 -0
- halide/share/doc/Halide/doc/HalideCMakePackage.md +812 -0
- halide/share/doc/Halide/doc/Hexagon.md +73 -0
- halide/share/doc/Halide/doc/Python.md +844 -0
- halide/share/doc/Halide/doc/RunGen.md +283 -0
- halide/share/doc/Halide/doc/Testing.md +125 -0
- halide/share/doc/Halide/doc/Vulkan.md +287 -0
- halide/share/doc/Halide/doc/WebAssembly.md +228 -0
- halide/share/doc/Halide/doc/WebGPU.md +128 -0
- halide/share/tools/RunGen.h +1470 -0
- halide/share/tools/RunGenMain.cpp +642 -0
- halide/share/tools/adams2019_autotune_loop.sh +227 -0
- halide/share/tools/anderson2021_autotune_loop.sh +591 -0
- halide/share/tools/halide_benchmark.h +240 -0
- halide/share/tools/halide_image.h +31 -0
- halide/share/tools/halide_image_info.h +318 -0
- halide/share/tools/halide_image_io.h +2794 -0
- halide/share/tools/halide_malloc_trace.h +102 -0
- halide/share/tools/halide_thread_pool.h +161 -0
- halide/share/tools/halide_trace_config.h +559 -0
- halide-19.0.0.data/data/share/cmake/Halide/HalideConfig.cmake +6 -0
- halide-19.0.0.data/data/share/cmake/Halide/HalideConfigVersion.cmake +65 -0
- halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +6 -0
- halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
- halide-19.0.0.dist-info/METADATA +301 -0
- halide-19.0.0.dist-info/RECORD +84 -0
- halide-19.0.0.dist-info/WHEEL +6 -0
- halide-19.0.0.dist-info/licenses/LICENSE.txt +233 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
#ifndef HL_PYTORCH_CUDA_HELPERS_H
|
2
|
+
#define HL_PYTORCH_CUDA_HELPERS_H
|
3
|
+
|
4
|
+
/** \file
|
5
|
+
* Override Halide's CUDA hooks so that the Halide code called from PyTorch uses
|
6
|
+
* the correct GPU device and stream. This header should be included once in
|
7
|
+
* the PyTorch/C++ binding source file (see apps/HelloPyTorch/setup.py for an
|
8
|
+
* example).
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include "HalideRuntimeCuda.h"
|
12
|
+
#include "cuda.h"
|
13
|
+
#include "cuda_runtime.h"
|
14
|
+
|
15
|
+
namespace Halide {
|
16
|
+
namespace PyTorch {
|
17
|
+
|
18
|
+
typedef struct UserContext {
|
19
|
+
UserContext(int id, CUcontext *ctx, cudaStream_t *stream)
|
20
|
+
: device_id(id), cuda_context(ctx), stream(stream){};
|
21
|
+
|
22
|
+
int device_id;
|
23
|
+
CUcontext *cuda_context;
|
24
|
+
cudaStream_t *stream;
|
25
|
+
} UserContext;
|
26
|
+
|
27
|
+
} // namespace PyTorch
|
28
|
+
} // namespace Halide
|
29
|
+
|
30
|
+
// Replace Halide weakly-linked CUDA handles
|
31
|
+
extern "C" {
|
32
|
+
|
33
|
+
int halide_cuda_acquire_context(void *user_context, CUcontext *ctx, bool create = true) {
|
34
|
+
if (user_context != nullptr) {
|
35
|
+
Halide::PyTorch::UserContext *user_ctx = (Halide::PyTorch::UserContext *)user_context;
|
36
|
+
*ctx = *user_ctx->cuda_context;
|
37
|
+
} else {
|
38
|
+
*ctx = nullptr;
|
39
|
+
}
|
40
|
+
return halide_error_code_success;
|
41
|
+
}
|
42
|
+
|
43
|
+
int halide_cuda_get_stream(void *user_context, CUcontext ctx, CUstream *stream) {
|
44
|
+
if (user_context != nullptr) {
|
45
|
+
Halide::PyTorch::UserContext *user_ctx = (Halide::PyTorch::UserContext *)user_context;
|
46
|
+
*stream = *user_ctx->stream;
|
47
|
+
} else {
|
48
|
+
*stream = 0;
|
49
|
+
}
|
50
|
+
return halide_error_code_success;
|
51
|
+
}
|
52
|
+
|
53
|
+
int halide_get_gpu_device(void *user_context) {
|
54
|
+
if (user_context != nullptr) {
|
55
|
+
Halide::PyTorch::UserContext *user_ctx = (Halide::PyTorch::UserContext *)user_context;
|
56
|
+
return user_ctx->device_id;
|
57
|
+
} else {
|
58
|
+
return 0;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
|
62
|
+
} // extern "C"
|
63
|
+
|
64
|
+
#endif /* end of include guard: HL_PYTORCH_CUDA_HELPERS_H */
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#ifndef HL_PYTORCH_WRAPPER_H
|
2
|
+
#define HL_PYTORCH_WRAPPER_H
|
3
|
+
|
4
|
+
/** \file
|
5
|
+
* Set of utility functions to wrap PyTorch tensors into Halide buffers,
|
6
|
+
* making sure the data in on the correct device (CPU/GPU). This header
|
7
|
+
* is included in each generated op by the PyTorch CodeGen.
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include <exception>
|
11
|
+
#include <iostream>
|
12
|
+
#include <sstream>
|
13
|
+
#include <string>
|
14
|
+
#include <vector>
|
15
|
+
|
16
|
+
#include "HalideBuffer.h"
|
17
|
+
|
18
|
+
// Forward declare the cuda_device_interface, for tensor wrapper.
|
19
|
+
extern "C" const halide_device_interface_t *halide_cuda_device_interface();
|
20
|
+
|
21
|
+
#define HLPT_CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
|
22
|
+
#define HLPT_CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
|
23
|
+
#define HLPT_CHECK_DEVICE(x, dev) AT_ASSERTM(x.is_cuda() && x.get_device() == dev, #x " must be a CUDA tensor")
|
24
|
+
|
25
|
+
namespace Halide {
|
26
|
+
namespace PyTorch {
|
27
|
+
|
28
|
+
using Halide::Runtime::Buffer;
|
29
|
+
|
30
|
+
inline std::vector<int> get_dims(const at::Tensor tensor) {
|
31
|
+
int ndims = tensor.ndimension();
|
32
|
+
std::vector<int> dims(ndims, 0);
|
33
|
+
// PyTorch dim order is reverse of Halide
|
34
|
+
for (int dim = 0; dim < ndims; ++dim) {
|
35
|
+
dims[dim] = tensor.size(ndims - 1 - dim);
|
36
|
+
}
|
37
|
+
return dims;
|
38
|
+
}
|
39
|
+
|
40
|
+
template<class scalar_t>
|
41
|
+
inline void check_type(at::Tensor &tensor) {
|
42
|
+
AT_ERROR("Scalar type ", tensor.scalar_type(), " not handled by Halide's PyTorch wrapper");
|
43
|
+
}
|
44
|
+
|
45
|
+
// TODO: if PyTorch exposes any variable with the API version,
|
46
|
+
// I haven't found it in source or documentation; for now, we'll sniff
|
47
|
+
// this macro's existence to infer that we are building with v1.3+ (vs 1.2)
|
48
|
+
#ifdef AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS
|
49
|
+
#define HL_PYTORCH_API_VERSION 13
|
50
|
+
#else
|
51
|
+
#define HL_PYTORCH_API_VERSION 12
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#if HL_PYTORCH_API_VERSION >= 13
|
55
|
+
|
56
|
+
// PyTorch 1.3+
|
57
|
+
#define HL_PT_DEFINE_TYPECHECK(ctype, ttype) \
|
58
|
+
template<> \
|
59
|
+
inline void check_type<ctype>(at::Tensor & tensor) { \
|
60
|
+
AT_ASSERTM(tensor.scalar_type() == at::ScalarType::ttype, "scalar type do not match"); \
|
61
|
+
}
|
62
|
+
|
63
|
+
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(HL_PT_DEFINE_TYPECHECK);
|
64
|
+
|
65
|
+
#undef HL_PT_DEFINE_TYPECHECK
|
66
|
+
|
67
|
+
#else // HL_PYTORCH_API_VERSION < 13
|
68
|
+
|
69
|
+
// PyTorch 1.2
|
70
|
+
|
71
|
+
#define HL_PT_DEFINE_TYPECHECK(ctype, ttype, _3) \
|
72
|
+
template<> \
|
73
|
+
inline void check_type<ctype>(at::Tensor & tensor) { \
|
74
|
+
AT_ASSERTM(tensor.scalar_type() == at::ScalarType::ttype, "scalar type do not match"); \
|
75
|
+
}
|
76
|
+
|
77
|
+
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(HL_PT_DEFINE_TYPECHECK);
|
78
|
+
|
79
|
+
#undef HL_PT_DEFINE_TYPECHECK
|
80
|
+
|
81
|
+
#endif // HL_PYTORCH_API_VERSION check
|
82
|
+
|
83
|
+
template<class scalar_t>
|
84
|
+
inline Buffer<scalar_t> wrap(at::Tensor &tensor) {
|
85
|
+
check_type<scalar_t>(tensor);
|
86
|
+
std::vector<int> dims = get_dims(tensor);
|
87
|
+
#if HL_PYTORCH_API_VERSION >= 13
|
88
|
+
scalar_t *pData = tensor.data_ptr<scalar_t>();
|
89
|
+
#else
|
90
|
+
scalar_t *pData = tensor.data<scalar_t>();
|
91
|
+
#endif
|
92
|
+
return Buffer<scalar_t>(pData, dims);
|
93
|
+
}
|
94
|
+
|
95
|
+
template<class scalar_t>
|
96
|
+
inline Buffer<scalar_t> wrap_cuda(at::Tensor &tensor) {
|
97
|
+
check_type<scalar_t>(tensor);
|
98
|
+
std::vector<int> dims = get_dims(tensor);
|
99
|
+
#if HL_PYTORCH_API_VERSION >= 13
|
100
|
+
scalar_t *pData = tensor.data_ptr<scalar_t>();
|
101
|
+
#else
|
102
|
+
scalar_t *pData = tensor.data<scalar_t>();
|
103
|
+
#endif
|
104
|
+
AT_ASSERTM(tensor.is_cuda(), "expected input tensor to be on a CUDA device.");
|
105
|
+
|
106
|
+
Buffer<scalar_t> buffer(dims);
|
107
|
+
|
108
|
+
const halide_device_interface_t *cuda_interface = halide_cuda_device_interface();
|
109
|
+
int err = buffer.device_wrap_native(cuda_interface, (uint64_t)pData);
|
110
|
+
AT_ASSERTM(err == 0, "(CUDA) halide_device_wrap failed");
|
111
|
+
|
112
|
+
buffer.set_device_dirty();
|
113
|
+
|
114
|
+
return buffer;
|
115
|
+
}
|
116
|
+
|
117
|
+
} // namespace PyTorch
|
118
|
+
} // namespace Halide
|
119
|
+
|
120
|
+
#endif // HL_PYTORCH_WRAPPER_H
|