halide 19.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halide/__init__.py +39 -0
- halide/_generator_helpers.py +835 -0
- halide/bin/Halide.dll +0 -0
- halide/bin/adams2019_retrain_cost_model.exe +0 -0
- halide/bin/adams2019_weightsdir_to_weightsfile.exe +0 -0
- halide/bin/anderson2021_retrain_cost_model.exe +0 -0
- halide/bin/anderson2021_weightsdir_to_weightsfile.exe +0 -0
- halide/bin/featurization_to_sample.exe +0 -0
- halide/bin/gengen.exe +0 -0
- halide/bin/get_host_target.exe +0 -0
- halide/halide_.cp311-win_amd64.pyd +0 -0
- halide/imageio.py +60 -0
- halide/include/Halide.h +35293 -0
- halide/include/HalideBuffer.h +2618 -0
- halide/include/HalidePyTorchCudaHelpers.h +64 -0
- halide/include/HalidePyTorchHelpers.h +120 -0
- halide/include/HalideRuntime.h +2221 -0
- halide/include/HalideRuntimeCuda.h +89 -0
- halide/include/HalideRuntimeD3D12Compute.h +91 -0
- halide/include/HalideRuntimeHexagonDma.h +104 -0
- halide/include/HalideRuntimeHexagonHost.h +157 -0
- halide/include/HalideRuntimeMetal.h +112 -0
- halide/include/HalideRuntimeOpenCL.h +119 -0
- halide/include/HalideRuntimeQurt.h +32 -0
- halide/include/HalideRuntimeVulkan.h +137 -0
- halide/include/HalideRuntimeWebGPU.h +44 -0
- halide/lib/Halide.lib +0 -0
- halide/lib/HalidePyStubs.lib +0 -0
- halide/lib/Halide_GenGen.lib +0 -0
- halide/lib/autoschedule_adams2019.dll +0 -0
- halide/lib/autoschedule_anderson2021.dll +0 -0
- halide/lib/autoschedule_li2018.dll +0 -0
- halide/lib/autoschedule_mullapudi2016.dll +0 -0
- halide/lib/cmake/Halide/FindHalide_LLVM.cmake +152 -0
- halide/lib/cmake/Halide/FindV8.cmake +33 -0
- halide/lib/cmake/Halide/Halide-shared-deps.cmake +0 -0
- halide/lib/cmake/Halide/Halide-shared-targets-release.cmake +29 -0
- halide/lib/cmake/Halide/Halide-shared-targets.cmake +154 -0
- halide/lib/cmake/Halide/HalideConfig.cmake +162 -0
- halide/lib/cmake/Halide/HalideConfigVersion.cmake +65 -0
- halide/lib/cmake/HalideHelpers/FindHalide_WebGPU.cmake +27 -0
- halide/lib/cmake/HalideHelpers/Halide-Interfaces-release.cmake +112 -0
- halide/lib/cmake/HalideHelpers/Halide-Interfaces.cmake +236 -0
- halide/lib/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +1056 -0
- halide/lib/cmake/HalideHelpers/HalideHelpersConfig.cmake +28 -0
- halide/lib/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
- halide/lib/cmake/HalideHelpers/HalideTargetHelpers.cmake +99 -0
- halide/lib/cmake/HalideHelpers/MutexCopy.ps1 +31 -0
- halide/lib/cmake/HalideHelpers/TargetExportScript.cmake +55 -0
- halide/lib/cmake/Halide_Python/Halide_Python-targets-release.cmake +29 -0
- halide/lib/cmake/Halide_Python/Halide_Python-targets.cmake +125 -0
- halide/lib/cmake/Halide_Python/Halide_PythonConfig.cmake +26 -0
- halide/lib/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +65 -0
- halide/share/doc/Halide/LICENSE.txt +233 -0
- halide/share/doc/Halide/README.md +439 -0
- halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +626 -0
- halide/share/doc/Halide/doc/CodeStyleCMake.md +393 -0
- halide/share/doc/Halide/doc/FuzzTesting.md +104 -0
- halide/share/doc/Halide/doc/HalideCMakePackage.md +812 -0
- halide/share/doc/Halide/doc/Hexagon.md +73 -0
- halide/share/doc/Halide/doc/Python.md +844 -0
- halide/share/doc/Halide/doc/RunGen.md +283 -0
- halide/share/doc/Halide/doc/Testing.md +125 -0
- halide/share/doc/Halide/doc/Vulkan.md +287 -0
- halide/share/doc/Halide/doc/WebAssembly.md +228 -0
- halide/share/doc/Halide/doc/WebGPU.md +128 -0
- halide/share/tools/RunGen.h +1470 -0
- halide/share/tools/RunGenMain.cpp +642 -0
- halide/share/tools/adams2019_autotune_loop.sh +227 -0
- halide/share/tools/anderson2021_autotune_loop.sh +591 -0
- halide/share/tools/halide_benchmark.h +240 -0
- halide/share/tools/halide_image.h +31 -0
- halide/share/tools/halide_image_info.h +318 -0
- halide/share/tools/halide_image_io.h +2794 -0
- halide/share/tools/halide_malloc_trace.h +102 -0
- halide/share/tools/halide_thread_pool.h +161 -0
- halide/share/tools/halide_trace_config.h +559 -0
- halide-19.0.0.data/data/share/cmake/Halide/HalideConfig.cmake +6 -0
- halide-19.0.0.data/data/share/cmake/Halide/HalideConfigVersion.cmake +65 -0
- halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +6 -0
- halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
- halide-19.0.0.dist-info/METADATA +301 -0
- halide-19.0.0.dist-info/RECORD +85 -0
- halide-19.0.0.dist-info/WHEEL +5 -0
- halide-19.0.0.dist-info/licenses/LICENSE.txt +233 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
#ifndef HALIDE_HALIDERUNTIMECUDA_H
|
2
|
+
#define HALIDE_HALIDERUNTIMECUDA_H
|
3
|
+
|
4
|
+
// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
|
5
|
+
#ifndef HALIDE_HALIDERUNTIME_H
|
6
|
+
|
7
|
+
#include "HalideRuntime.h"
|
8
|
+
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#ifdef __cplusplus
|
12
|
+
extern "C" {
|
13
|
+
#endif
|
14
|
+
|
15
|
+
/** \file
|
16
|
+
* Routines specific to the Halide Cuda runtime.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#define HALIDE_RUNTIME_CUDA
|
20
|
+
|
21
|
+
extern const struct halide_device_interface_t *halide_cuda_device_interface();
|
22
|
+
|
23
|
+
/** These are forward declared here to allow clients to override the
|
24
|
+
* Halide Cuda runtime. Do not call them. */
|
25
|
+
// @{
|
26
|
+
extern int halide_cuda_initialize_kernels(void *user_context, void **state_ptr,
|
27
|
+
const char *src, int size);
|
28
|
+
extern int halide_cuda_run(void *user_context,
|
29
|
+
void *state_ptr,
|
30
|
+
const char *entry_name,
|
31
|
+
int blocksX, int blocksY, int blocksZ,
|
32
|
+
int threadsX, int threadsY, int threadsZ,
|
33
|
+
int shared_mem_bytes,
|
34
|
+
size_t arg_sizes[],
|
35
|
+
void *args[],
|
36
|
+
int8_t arg_is_buffer[]);
|
37
|
+
extern void halide_cuda_finalize_kernels(void *user_context, void *state_ptr);
|
38
|
+
// @}
|
39
|
+
|
40
|
+
/** Set the underlying cuda device poiner for a buffer. The device
|
41
|
+
* pointer should be allocated using cuMemAlloc or similar and must
|
42
|
+
* have an extent large enough to cover that specified by the
|
43
|
+
* halide_buffer_t extent fields. The dev field of the halide_buffer_t
|
44
|
+
* must be NULL when this routine is called. This call can fail due to
|
45
|
+
* being passed an invalid device pointer. The device and host dirty
|
46
|
+
* bits are left unmodified. */
|
47
|
+
extern int halide_cuda_wrap_device_ptr(void *user_context, struct halide_buffer_t *buf, uint64_t device_ptr);
|
48
|
+
|
49
|
+
/** Disconnect this halide_buffer_t from the device pointer it was
|
50
|
+
* previously wrapped around. Should only be called for a
|
51
|
+
* halide_buffer_t that halide_cuda_wrap_device_ptr was previously
|
52
|
+
* called on. The device field of the halide_buffer_t will be NULL on
|
53
|
+
* return.
|
54
|
+
*/
|
55
|
+
extern int halide_cuda_detach_device_ptr(void *user_context, struct halide_buffer_t *buf);
|
56
|
+
|
57
|
+
/** Return the underlying device pointer for a halide_buffer_t. This buffer
|
58
|
+
* must be valid on a Cuda device, or not have any associated device
|
59
|
+
* memory. If there is no device memory (dev field is NULL), this
|
60
|
+
* returns 0.
|
61
|
+
*/
|
62
|
+
extern uintptr_t halide_cuda_get_device_ptr(void *user_context, struct halide_buffer_t *buf);
|
63
|
+
|
64
|
+
/** Release any currently-unused device allocations back to the cuda
|
65
|
+
* driver. See halide_reuse_device_allocations. */
|
66
|
+
extern int halide_cuda_release_unused_device_allocations(void *user_context);
|
67
|
+
|
68
|
+
// These typedefs treat both a CUcontext and a CUstream as a void *,
|
69
|
+
// to avoid dependencies on cuda headers.
|
70
|
+
typedef int (*halide_cuda_acquire_context_t)(void *, // user_context
|
71
|
+
void **, // cuda context out parameter
|
72
|
+
bool); // should create a context if none exist
|
73
|
+
typedef int (*halide_cuda_release_context_t)(void * /* user_context */);
|
74
|
+
typedef int (*halide_cuda_get_stream_t)(void *, // user_context
|
75
|
+
void *, // context
|
76
|
+
void **); // stream out parameter
|
77
|
+
|
78
|
+
/** Set custom methods to acquire and release cuda contexts and streams */
|
79
|
+
// @{
|
80
|
+
extern halide_cuda_acquire_context_t halide_set_cuda_acquire_context(halide_cuda_acquire_context_t handler);
|
81
|
+
extern halide_cuda_release_context_t halide_set_cuda_release_context(halide_cuda_release_context_t handler);
|
82
|
+
extern halide_cuda_get_stream_t halide_set_cuda_get_stream(halide_cuda_get_stream_t handler);
|
83
|
+
// @}
|
84
|
+
|
85
|
+
#ifdef __cplusplus
|
86
|
+
} // End extern "C"
|
87
|
+
#endif
|
88
|
+
|
89
|
+
#endif // HALIDE_HALIDERUNTIMECUDA_H
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#ifndef HALIDE_HALIDERUNTIMED3D12COMPUTE_H
|
2
|
+
#define HALIDE_HALIDERUNTIMED3D12COMPUTE_H
|
3
|
+
|
4
|
+
// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
|
5
|
+
#ifndef HALIDE_HALIDERUNTIME_H
|
6
|
+
|
7
|
+
#include "HalideRuntime.h"
|
8
|
+
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#ifdef __cplusplus
|
12
|
+
extern "C" {
|
13
|
+
#endif
|
14
|
+
|
15
|
+
/** \file
|
16
|
+
* Routines specific to the Halide Direct3D 12 Compute runtime.
|
17
|
+
*/
|
18
|
+
|
19
|
+
extern const struct halide_device_interface_t *halide_d3d12compute_device_interface();
|
20
|
+
|
21
|
+
/** These are forward declared here to allow clients to override the
|
22
|
+
* Halide Direct3D 12 Compute runtime. Do not call them. */
|
23
|
+
// @{
|
24
|
+
extern int halide_d3d12compute_initialize_kernels(void *user_context, void **state_ptr,
|
25
|
+
const char *src, int size);
|
26
|
+
|
27
|
+
extern int halide_d3d12compute_run(void *user_context,
|
28
|
+
void *state_ptr,
|
29
|
+
const char *entry_name,
|
30
|
+
int blocksX, int blocksY, int blocksZ,
|
31
|
+
int threadsX, int threadsY, int threadsZ,
|
32
|
+
int shared_mem_bytes,
|
33
|
+
struct halide_type_t arg_types[], void *args[], int8_t arg_is_buffer[]);
|
34
|
+
extern void halide_d3d12compute_finalize_kernels(void *user_context, void *state_ptr);
|
35
|
+
// @}
|
36
|
+
|
37
|
+
/** Set the underlying ID3D12Resource for a halide_buffer_t. The memory backing
|
38
|
+
* the resource should be managed by the caller (via a default/device heap) and
|
39
|
+
* must be large enough to cover the extent of the halide_buffer_t. The device
|
40
|
+
* field of the halide_buffer_t must be NULL when this routine is called. This
|
41
|
+
* call can fail due to running out of memory or if an invalid D3D12 resource is
|
42
|
+
* passed. The device and host dirty bits are left unmodified. */
|
43
|
+
extern int halide_d3d12compute_wrap_buffer(void *user_context, struct halide_buffer_t *buf, uint64_t d3d12_resource);
|
44
|
+
|
45
|
+
/** Disconnect a halide_buffer_t from the ID3D12Resource it was previously
|
46
|
+
* wrapped around. Should only be called for a halide_buffer_t that
|
47
|
+
* halide_d3d12compute_wrap_buffer was previously called on. Frees any
|
48
|
+
* storage associated with the binding of the halide_buffer_t and the
|
49
|
+
* buffer, but does not free the ID3D12Resource. The dev field of the
|
50
|
+
* halide_buffer_t will be NULL on return.
|
51
|
+
*/
|
52
|
+
extern int halide_d3d12compute_detach_buffer(void *user_context, struct halide_buffer_t *buf);
|
53
|
+
|
54
|
+
/** Return the underlying ID3D12Resource for a halide_buffer_t. This resource
|
55
|
+
* must be valid on an D3D12 device, unless halide_buffer_t has no associated
|
56
|
+
* resource. If there is no device memory (device field is NULL), returns 0.
|
57
|
+
*/
|
58
|
+
extern uintptr_t halide_d3d12compute_get_buffer(void *user_context, struct halide_buffer_t *buf);
|
59
|
+
|
60
|
+
struct halide_d3d12compute_device;
|
61
|
+
struct halide_d3d12compute_command_queue;
|
62
|
+
|
63
|
+
/** This prototype is exported as applications will typically need to
|
64
|
+
* replace it to get Halide filters to execute on the same device and
|
65
|
+
* command queue used for other purposes. The halide_d3d12compute_device is an
|
66
|
+
* ID3D12Device and halide_d3d12compute_command_queue is an ID3D12CommandQueue.
|
67
|
+
* No reference counting is done by Halide on these objects. They must remain
|
68
|
+
* valid until all off the following are true:
|
69
|
+
* - A balancing halide_d3d12compute_release_context has occurred for each
|
70
|
+
* halide_d3d12compute_acquire_context which returned the device/queue
|
71
|
+
* - All Halide filters using the context information have completed
|
72
|
+
* - All halide_buffer_t objects on the device have had
|
73
|
+
* halide_device_free called or have been detached via
|
74
|
+
* halide_d3d12compute_detach_buffer.
|
75
|
+
* - halide_device_release has been called on the interface returned from
|
76
|
+
* halide_d3d12compute_device_interface(). (This releases the programs on the context.)
|
77
|
+
*/
|
78
|
+
extern int halide_d3d12compute_acquire_context(void *user_context, struct halide_d3d12compute_device **device_ret,
|
79
|
+
struct halide_d3d12compute_command_queue **queue_ret, bool create);
|
80
|
+
|
81
|
+
/** This call balances each successful halide_d3d12compute_acquire_context call.
|
82
|
+
* If halide_d3d12compute_acquire_context is replaced, this routine must be replaced
|
83
|
+
* as well.
|
84
|
+
*/
|
85
|
+
extern int halide_d3d12compute_release_context(void *user_context);
|
86
|
+
|
87
|
+
#ifdef __cplusplus
|
88
|
+
} // End extern "C"
|
89
|
+
#endif
|
90
|
+
|
91
|
+
#endif // HALIDE_HALIDERUNTIMED3D12COMPUTE_H
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#ifndef HALIDE_HALIDERUNTIMEHEXAGONDMA_H
|
2
|
+
#define HALIDE_HALIDERUNTIMEHEXAGONDMA_H
|
3
|
+
|
4
|
+
/** \file
|
5
|
+
* Routines specific to the Halide Hexagon DMA host-side runtime.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
|
9
|
+
#ifndef HALIDE_HALIDERUNTIME_H
|
10
|
+
|
11
|
+
#include "HalideRuntime.h"
|
12
|
+
|
13
|
+
#endif
|
14
|
+
|
15
|
+
// Don't include HalideRuntimeHexagonHost.h if the contents of it were already pasted into a generated header above this one
|
16
|
+
#ifndef HALIDE_HALIDERUNTIMEHEXAGONHOST_H
|
17
|
+
|
18
|
+
#include "HalideRuntimeHexagonHost.h"
|
19
|
+
|
20
|
+
#endif
|
21
|
+
|
22
|
+
#ifdef __cplusplus
|
23
|
+
extern "C" {
|
24
|
+
#endif
|
25
|
+
|
26
|
+
/**
|
27
|
+
* \defgroup rt_hexagon_dma Halide Hexagon DMA runtime
|
28
|
+
* @{
|
29
|
+
*/
|
30
|
+
|
31
|
+
/**
|
32
|
+
* Image Formats to prepare the application for DMA Transfer
|
33
|
+
*/
|
34
|
+
typedef enum {
|
35
|
+
halide_hexagon_fmt_RawData,
|
36
|
+
halide_hexagon_fmt_NV12,
|
37
|
+
halide_hexagon_fmt_NV12_Y,
|
38
|
+
halide_hexagon_fmt_NV12_UV,
|
39
|
+
halide_hexagon_fmt_P010,
|
40
|
+
halide_hexagon_fmt_P010_Y,
|
41
|
+
halide_hexagon_fmt_P010_UV,
|
42
|
+
halide_hexagon_fmt_TP10,
|
43
|
+
halide_hexagon_fmt_TP10_Y,
|
44
|
+
halide_hexagon_fmt_TP10_UV,
|
45
|
+
halide_hexagon_fmt_NV124R,
|
46
|
+
halide_hexagon_fmt_NV124R_Y,
|
47
|
+
halide_hexagon_fmt_NV124R_UV
|
48
|
+
} halide_hexagon_image_fmt_t;
|
49
|
+
|
50
|
+
extern const struct halide_device_interface_t *halide_hexagon_dma_device_interface();
|
51
|
+
|
52
|
+
/** This API is used to set up the DMA device interface to be used for DMA transfer. This also internally
|
53
|
+
* creates the DMA device handle and populates all the Buffer related parameters (width, height, stride)
|
54
|
+
* to be used for DMA configuration.
|
55
|
+
*/
|
56
|
+
extern int halide_hexagon_dma_device_wrap_native(void *user_context, struct halide_buffer_t *buf,
|
57
|
+
uint64_t mem);
|
58
|
+
|
59
|
+
/** Detach the Input/Output Buffer from DMA device handle and deallocate the DMA device handle buffer allocation
|
60
|
+
* This API also frees up the DMA device and makes it available for another usage.
|
61
|
+
*/
|
62
|
+
extern int halide_hexagon_dma_device_detach_native(void *user_context, struct halide_buffer_t *buf);
|
63
|
+
|
64
|
+
/** This API will allocate a DMA Engine needed for DMA read/write. This is the first step Before
|
65
|
+
* a buffer can be used in a copy operation (i.e. a DMA read/write operation).
|
66
|
+
*/
|
67
|
+
extern int halide_hexagon_dma_allocate_engine(void *user_context, void **dma_engine);
|
68
|
+
|
69
|
+
/** This API free up the allocated DMA engine. This need to be called after a user program ends
|
70
|
+
* all the DMA Operations and make it available for subsequent DMA transfers */
|
71
|
+
extern int halide_hexagon_dma_deallocate_engine(void *user_context, void *dma_engine);
|
72
|
+
|
73
|
+
/** This API Prepares a buffer for DMA Read Operation. This will setup the DMA format, direction (read).
|
74
|
+
* Will also make necessary adjustments to the DMA frame parameters based on Image format provided.
|
75
|
+
*/
|
76
|
+
extern int halide_hexagon_dma_prepare_for_copy_to_host(void *user_context, struct halide_buffer_t *buf,
|
77
|
+
void *dma_engine, bool is_ubwc, halide_hexagon_image_fmt_t fmt);
|
78
|
+
|
79
|
+
/** This API Prepares a buffer for DMA Write Operation. This will setup the DMA format, direction (write).
|
80
|
+
* Will also make necessary adjustments to the DMA frame parameters based on Image format provided.
|
81
|
+
*/
|
82
|
+
extern int halide_hexagon_dma_prepare_for_copy_to_device(void *user_context, struct halide_buffer_t *buf,
|
83
|
+
void *dma_engine, bool is_ubwc,
|
84
|
+
halide_hexagon_image_fmt_t fmt);
|
85
|
+
|
86
|
+
/** This API is used to frees up the DMA Resources associated with the buffer.
|
87
|
+
* TODO: Currently this API is a dummy as all the necessary freeing is done in an another API.
|
88
|
+
* This will be used in future.
|
89
|
+
*/
|
90
|
+
extern int halide_hexagon_dma_unprepare(void *user_context, struct halide_buffer_t *buf);
|
91
|
+
|
92
|
+
/** This API is used to setup the hexagon Operation modes. We will setup the necessary Operating frequency
|
93
|
+
* based on the power mode chosen. Check the structure halide_hexagon_power_mode_t defined in Halide HalideRuntimeHexagonHost.h
|
94
|
+
* for the supported power modes.
|
95
|
+
*/
|
96
|
+
extern int halide_hexagon_dma_power_mode_voting(void *user_context, halide_hexagon_power_mode_t cornercase);
|
97
|
+
|
98
|
+
///@}
|
99
|
+
|
100
|
+
#ifdef __cplusplus
|
101
|
+
} // End extern "C"
|
102
|
+
#endif
|
103
|
+
|
104
|
+
#endif // HALIDE_HALIDERUNTIMEHEXAGONDMA_H
|
@@ -0,0 +1,157 @@
|
|
1
|
+
#ifndef HALIDE_HALIDERUNTIMEHEXAGONHOST_H
|
2
|
+
#define HALIDE_HALIDERUNTIMEHEXAGONHOST_H
|
3
|
+
|
4
|
+
// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
|
5
|
+
#ifndef HALIDE_HALIDERUNTIME_H
|
6
|
+
|
7
|
+
#include "HalideRuntime.h"
|
8
|
+
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#ifdef __cplusplus
|
12
|
+
extern "C" {
|
13
|
+
#endif
|
14
|
+
|
15
|
+
/** \file
|
16
|
+
* Routines specific to the Halide Hexagon host-side runtime.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#define HALIDE_RUNTIME_HEXAGON
|
20
|
+
|
21
|
+
typedef int halide_hexagon_handle_t;
|
22
|
+
|
23
|
+
extern const struct halide_device_interface_t *halide_hexagon_device_interface();
|
24
|
+
|
25
|
+
/** Check if the Hexagon runtime (libhalide_hexagon_host.so) is
|
26
|
+
* available. If it is not, pipelines using Hexagon will fail. */
|
27
|
+
extern bool halide_is_hexagon_available(void *user_context);
|
28
|
+
|
29
|
+
/** The device handle for Hexagon is simply a pointer and size, stored
|
30
|
+
* in the dev field of the halide_buffer_t. If the buffer is allocated in a
|
31
|
+
* particular way (ion_alloc), the buffer will be shared with Hexagon
|
32
|
+
* (not copied). The device field of the halide_buffer_t must be NULL when this
|
33
|
+
* routine is called. This call can fail due to running out of memory
|
34
|
+
* or being passed an invalid device handle. The device and host
|
35
|
+
* dirty bits are left unmodified. */
|
36
|
+
extern int halide_hexagon_wrap_device_handle(void *user_context, struct halide_buffer_t *buf,
|
37
|
+
void *ptr, uint64_t size);
|
38
|
+
|
39
|
+
/** Disconnect this halide_buffer_t from the device handle it was
|
40
|
+
* previously wrapped around. Should only be called for a
|
41
|
+
* halide_buffer_t that halide_hexagon_wrap_device_handle was
|
42
|
+
* previously called on. Frees any storage associated with the binding
|
43
|
+
* of the halide_buffer_t and the device handle, but does not free the
|
44
|
+
* device handle. The device field of the halide_buffer_t will be NULL
|
45
|
+
* on return. */
|
46
|
+
extern int halide_hexagon_detach_device_handle(void *user_context, struct halide_buffer_t *buf);
|
47
|
+
|
48
|
+
/** Return the underlying device handle for a halide_buffer_t. If there is
|
49
|
+
* no device memory (dev field is NULL), this returns 0. */
|
50
|
+
extern void *halide_hexagon_get_device_handle(void *user_context, struct halide_buffer_t *buf);
|
51
|
+
extern uint64_t halide_hexagon_get_device_size(void *user_context, struct halide_buffer_t *buf);
|
52
|
+
|
53
|
+
/** Return a pointer to the module_state. */
|
54
|
+
extern void *halide_hexagon_get_module_state(void *user_context, void **host);
|
55
|
+
|
56
|
+
/** Power HVX on and off. Calling a Halide pipeline will do this
|
57
|
+
* automatically on each pipeline invocation; however, it costs a
|
58
|
+
* small but possibly significant amount of time for short running
|
59
|
+
* pipelines. To avoid this cost, HVX can be powered on prior to
|
60
|
+
* running several pipelines, and powered off afterwards. If HVX is
|
61
|
+
* powered on, subsequent calls to power HVX on will be cheap. */
|
62
|
+
// @{
|
63
|
+
extern int halide_hexagon_power_hvx_on(void *user_context);
|
64
|
+
extern int halide_hexagon_power_hvx_off(void *user_context);
|
65
|
+
extern void halide_hexagon_power_hvx_off_as_destructor(void *user_context, void * /* obj */);
|
66
|
+
// @}
|
67
|
+
|
68
|
+
/** Power modes for Hexagon. */
|
69
|
+
typedef enum halide_hexagon_power_mode_t {
|
70
|
+
halide_hexagon_power_low = 0,
|
71
|
+
halide_hexagon_power_nominal = 1,
|
72
|
+
halide_hexagon_power_turbo = 2,
|
73
|
+
halide_hexagon_power_default = 3, /// Resets power to its default state.
|
74
|
+
halide_hexagon_power_low_plus = 4,
|
75
|
+
halide_hexagon_power_low_2 = 5,
|
76
|
+
halide_hexagon_power_nominal_plus = 6,
|
77
|
+
|
78
|
+
// These are deprecated.
|
79
|
+
halide_hvx_power_low = halide_hexagon_power_low,
|
80
|
+
halide_hvx_power_nominal = halide_hexagon_power_nominal,
|
81
|
+
halide_hvx_power_turbo = halide_hexagon_power_turbo,
|
82
|
+
halide_hvx_power_default = halide_hexagon_power_default,
|
83
|
+
} halide_hexagon_power_mode_t;
|
84
|
+
|
85
|
+
/** More detailed power settings to control Hexagon.
|
86
|
+
* @param set_mips - Set to TRUE to request MIPS
|
87
|
+
* @param mipsPerThread - mips requested per thread, to establish a minimal clock frequency per HW thread
|
88
|
+
* @param mipsTotal - Total mips requested, to establish total number of MIPS required across all HW threads
|
89
|
+
* @param set_bus_bw - Set to TRUE to request bus_bw
|
90
|
+
* @param bwMeagabytesPerSec - Max bus BW requested (megabytes per second)
|
91
|
+
* @param busbwUsagePercentage - Percentage of time during which bwBytesPerSec BW is required from the bus (0..100)
|
92
|
+
* @param set_latency - Set to TRUE to set latency
|
93
|
+
* @param latency - maximum hardware wakeup latency in microseconds. The
|
94
|
+
* higher the value the deeper state of sleep
|
95
|
+
* that can be entered but the longer it may
|
96
|
+
* take to awaken. Only values > 0 are supported (1 microsecond is the smallest valid value)
|
97
|
+
*/
|
98
|
+
typedef struct {
|
99
|
+
bool set_mips;
|
100
|
+
unsigned int mipsPerThread;
|
101
|
+
unsigned int mipsTotal;
|
102
|
+
bool set_bus_bw;
|
103
|
+
unsigned int bwMegabytesPerSec;
|
104
|
+
unsigned short busbwUsagePercentage;
|
105
|
+
bool set_latency;
|
106
|
+
int latency;
|
107
|
+
} halide_hexagon_power_t;
|
108
|
+
|
109
|
+
// This is deprecated.
|
110
|
+
typedef halide_hexagon_power_t halide_hvx_power_perf_t;
|
111
|
+
|
112
|
+
/** Set a performance target for Hexagon. Hexagon applications can
|
113
|
+
* vote for the performance levels they want, which may or may not be
|
114
|
+
* respected by Hexagon. Applications should be careful not to leave
|
115
|
+
* Hexagon in a high power state for too long. These functions can
|
116
|
+
* significantly increase standby power consumption. Use
|
117
|
+
* halide_hexagon_power_default to reset performance to the default
|
118
|
+
* power state. */
|
119
|
+
// @{
|
120
|
+
extern int halide_hexagon_set_performance_mode(void *user_context, halide_hexagon_power_mode_t mode);
|
121
|
+
extern int halide_hexagon_set_performance(void *user_context, halide_hexagon_power_t *perf);
|
122
|
+
// @}
|
123
|
+
|
124
|
+
/** Set the default priority for Halide Hexagon user threads:
|
125
|
+
* - Valid priority values range from 1 to 255
|
126
|
+
* - Smaller number for higher priority
|
127
|
+
* - The highest priority for a user thread is 1
|
128
|
+
* - Priority 0 is reserved for OS usage
|
129
|
+
* If this routine is not called, the priority will default to 100.
|
130
|
+
* This is intended to be called before dispatching any pipeline. */
|
131
|
+
// @{
|
132
|
+
extern int halide_hexagon_set_thread_priority(void *user_context, int priority);
|
133
|
+
// @}
|
134
|
+
|
135
|
+
/** These are forward declared here to allow clients to override the
|
136
|
+
* Halide Hexagon runtime. Do not call them. */
|
137
|
+
// @{
|
138
|
+
extern int halide_hexagon_initialize_kernels(void *user_context,
|
139
|
+
void **module_ptr,
|
140
|
+
const uint8_t *code, uint64_t code_size,
|
141
|
+
const uint8_t *runtime, uint64_t runtime_size);
|
142
|
+
extern int halide_hexagon_run(void *user_context,
|
143
|
+
void *module_ptr,
|
144
|
+
const char *name,
|
145
|
+
halide_hexagon_handle_t *function,
|
146
|
+
uint64_t arg_sizes[],
|
147
|
+
void *args[],
|
148
|
+
int arg_flags[]);
|
149
|
+
extern void halide_hexagon_finalize_kernels(void *user_context, void *state_ptr);
|
150
|
+
extern int halide_hexagon_device_release(void *user_context);
|
151
|
+
// @}
|
152
|
+
|
153
|
+
#ifdef __cplusplus
|
154
|
+
} // End extern "C"
|
155
|
+
#endif
|
156
|
+
|
157
|
+
#endif // HALIDE_HALIDERUNTIMEHEXAGONHOST_H
|
@@ -0,0 +1,112 @@
|
|
1
|
+
#ifndef HALIDE_HALIDERUNTIMEMETAL_H
|
2
|
+
#define HALIDE_HALIDERUNTIMEMETAL_H
|
3
|
+
|
4
|
+
// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
|
5
|
+
#ifndef HALIDE_HALIDERUNTIME_H
|
6
|
+
|
7
|
+
#include "HalideRuntime.h"
|
8
|
+
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#ifdef __cplusplus
|
12
|
+
extern "C" {
|
13
|
+
#endif
|
14
|
+
|
15
|
+
/** \file
|
16
|
+
* Routines specific to the Halide Metal runtime.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#define HALIDE_RUNTIME_METAL
|
20
|
+
|
21
|
+
extern const struct halide_device_interface_t *halide_metal_device_interface();
|
22
|
+
|
23
|
+
/** These are forward declared here to allow clients to override the
|
24
|
+
* Halide Metal runtime. Do not call them. */
|
25
|
+
// @{
|
26
|
+
extern int halide_metal_initialize_kernels(void *user_context, void **state_ptr,
|
27
|
+
const char *src, int size);
|
28
|
+
void halide_metal_finalize_kernels(void *user_context, void *state_ptr);
|
29
|
+
|
30
|
+
extern int halide_metal_run(void *user_context,
|
31
|
+
void *state_ptr,
|
32
|
+
const char *entry_name,
|
33
|
+
int blocksX, int blocksY, int blocksZ,
|
34
|
+
int threadsX, int threadsY, int threadsZ,
|
35
|
+
int shared_mem_bytes,
|
36
|
+
struct halide_type_t arg_types[],
|
37
|
+
void *args[],
|
38
|
+
int8_t arg_is_buffer[]);
|
39
|
+
// @}
|
40
|
+
|
41
|
+
/** Set the underlying MTLBuffer for a halide_buffer_t. This memory should be
|
42
|
+
* allocated using newBufferWithLength:options or similar and must
|
43
|
+
* have an extent large enough to cover that specified by the halide_buffer_t
|
44
|
+
* extent fields. The dev field of the halide_buffer_t must be NULL when this
|
45
|
+
* routine is called. This call can fail due to running out of memory
|
46
|
+
* or being passed an invalid buffer. The device and host dirty bits
|
47
|
+
* are left unmodified. */
|
48
|
+
extern int halide_metal_wrap_buffer(void *user_context, struct halide_buffer_t *buf, uint64_t buffer);
|
49
|
+
|
50
|
+
/** Disconnect a halide_buffer_t from the memory it was previously
|
51
|
+
* wrapped around. Should only be called for a halide_buffer_t that
|
52
|
+
* halide_metal_wrap_buffer was previously called on. Frees any
|
53
|
+
* storage associated with the binding of the halide_buffer_t and the
|
54
|
+
* buffer, but does not free the MTLBuffer. The dev field of the
|
55
|
+
* halide_buffer_t will be NULL on return.
|
56
|
+
*/
|
57
|
+
extern int halide_metal_detach_buffer(void *user_context, struct halide_buffer_t *buf);
|
58
|
+
|
59
|
+
/** Return the underlying MTLBuffer for a halide_buffer_t. This buffer must be
|
60
|
+
* valid on an Metal device, or not have any associated device
|
61
|
+
* memory. If there is no device memory (dev field is NULL), this
|
62
|
+
* returns 0.
|
63
|
+
*/
|
64
|
+
extern uintptr_t halide_metal_get_buffer(void *user_context, struct halide_buffer_t *buf);
|
65
|
+
|
66
|
+
/** Returns the offset associated with the Metal Buffer allocation via device_crop or device_slice. */
|
67
|
+
extern uint64_t halide_metal_get_crop_offset(void *user_context, struct halide_buffer_t *buf);
|
68
|
+
|
69
|
+
struct halide_metal_device;
|
70
|
+
struct halide_metal_command_queue;
|
71
|
+
struct halide_metal_command_buffer;
|
72
|
+
|
73
|
+
/** This prototype is exported as applications will typically need to
|
74
|
+
* replace it to get Halide filters to execute on the same device and
|
75
|
+
* command queue used for other purposes. The halide_metal_device is an
|
76
|
+
* id \<MTLDevice\> and halide_metal_command_queue is an id \<MTLCommandQueue\>.
|
77
|
+
* No reference counting is done by Halide on these objects. They must remain
|
78
|
+
* valid until all off the following are true:
|
79
|
+
* - A balancing halide_metal_release_context has occurred for each
|
80
|
+
* halide_metal_acquire_context which returned the device/queue
|
81
|
+
* - All Halide filters using the context information have completed
|
82
|
+
* - All halide_buffer_t objects on the device have had
|
83
|
+
* halide_device_free called or have been detached via
|
84
|
+
* halide_metal_detach_buffer.
|
85
|
+
* - halide_device_release has been called on the interface returned from
|
86
|
+
* halide_metal_device_interface(). (This releases the programs on the context.)
|
87
|
+
*/
|
88
|
+
extern int halide_metal_acquire_context(void *user_context, struct halide_metal_device **device_ret,
|
89
|
+
struct halide_metal_command_queue **queue_ret, bool create);
|
90
|
+
|
91
|
+
/** This call balances each successful halide_metal_acquire_context call.
|
92
|
+
* If halide_metal_acquire_context is replaced, this routine must be replaced
|
93
|
+
* as well.
|
94
|
+
*/
|
95
|
+
extern int halide_metal_release_context(void *user_context);
|
96
|
+
|
97
|
+
/** This function is called as part of the callback when a Metal command buffer completes.
|
98
|
+
* The return value, if not halide_error_code_success, will be stashed in Metal runtime and returned
|
99
|
+
* to the next call into the runtime, and the error string will be saved as well.
|
100
|
+
* The error string will be freed by the caller. The return value must be a valid Halide error code.
|
101
|
+
* This is called from the Metal driver, and thus:
|
102
|
+
* - Any user_context must be preserved between the call to halide_metal_run and the corresponding callback
|
103
|
+
* - The function must be thread-safe
|
104
|
+
*/
|
105
|
+
extern int halide_metal_command_buffer_completion_handler(void *user_context, struct halide_metal_command_buffer *buffer,
|
106
|
+
char **returned_error_string);
|
107
|
+
|
108
|
+
#ifdef __cplusplus
|
109
|
+
} // End extern "C"
|
110
|
+
#endif
|
111
|
+
|
112
|
+
#endif // HALIDE_HALIDERUNTIMEMETAL_H
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#ifndef HALIDE_HALIDERUNTIMEOPENCL_H
|
2
|
+
#define HALIDE_HALIDERUNTIMEOPENCL_H
|
3
|
+
|
4
|
+
// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
|
5
|
+
#ifndef HALIDE_HALIDERUNTIME_H
|
6
|
+
|
7
|
+
#include "HalideRuntime.h"
|
8
|
+
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#ifdef __cplusplus
|
12
|
+
extern "C" {
|
13
|
+
#endif
|
14
|
+
|
15
|
+
/** \file
|
16
|
+
* Routines specific to the Halide OpenCL runtime.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#define HALIDE_RUNTIME_OPENCL
|
20
|
+
|
21
|
+
extern const struct halide_device_interface_t *halide_opencl_device_interface();
|
22
|
+
extern const struct halide_device_interface_t *halide_opencl_image_device_interface();
|
23
|
+
|
24
|
+
/** These are forward declared here to allow clients to override the
|
25
|
+
* Halide OpenCL runtime. Do not call them. */
|
26
|
+
// @{
|
27
|
+
extern int halide_opencl_initialize_kernels(void *user_context, void **state_ptr,
|
28
|
+
const char *src, int size);
|
29
|
+
extern int halide_opencl_run(void *user_context,
|
30
|
+
void *state_ptr,
|
31
|
+
const char *entry_name,
|
32
|
+
int blocksX, int blocksY, int blocksZ,
|
33
|
+
int threadsX, int threadsY, int threadsZ,
|
34
|
+
int shared_mem_bytes,
|
35
|
+
size_t arg_sizes[],
|
36
|
+
void *args[],
|
37
|
+
int8_t arg_is_buffer[]);
|
38
|
+
extern void halide_opencl_finalize_kernels(void *user_context, void *state_ptr);
|
39
|
+
// @}
|
40
|
+
|
41
|
+
/** Set the platform name for OpenCL to use (e.g. "Intel" or
|
42
|
+
* "NVIDIA"). The argument is copied internally. The opencl runtime
|
43
|
+
* will select a platform that includes this as a substring. If never
|
44
|
+
* called, Halide uses the environment variable HL_OCL_PLATFORM_NAME,
|
45
|
+
* or defaults to the first available platform. */
|
46
|
+
extern void halide_opencl_set_platform_name(const char *n);
|
47
|
+
|
48
|
+
/** Halide calls this to get the desired OpenCL platform
|
49
|
+
* name. Implement this yourself to use a different platform per
|
50
|
+
* user_context. The default implementation returns the value set by
|
51
|
+
* halide_set_ocl_platform_name, or the value of the environment
|
52
|
+
* variable HL_OCL_PLATFORM_NAME. The output is valid until the next
|
53
|
+
* call to halide_set_ocl_platform_name. */
|
54
|
+
extern const char *halide_opencl_get_platform_name(void *user_context);
|
55
|
+
|
56
|
+
/** Set the device type for OpenCL to use. The argument is copied
|
57
|
+
* internally. It must be "cpu", "gpu", or "acc". If never called,
|
58
|
+
* Halide uses the environment variable HL_OCL_DEVICE_TYPE. */
|
59
|
+
extern void halide_opencl_set_device_type(const char *n);
|
60
|
+
|
61
|
+
/** Halide calls this to gets the desired OpenCL device
|
62
|
+
* type. Implement this yourself to use a different device type per
|
63
|
+
* user_context. The default implementation returns the value set by
|
64
|
+
* halide_set_ocl_device_type, or the environment variable
|
65
|
+
* HL_OCL_DEVICE_TYPE. The result is valid until the next call to
|
66
|
+
* halide_set_ocl_device_type. */
|
67
|
+
extern const char *halide_opencl_get_device_type(void *user_context);
|
68
|
+
|
69
|
+
/** Set the additional build options for OpenCL to use. The argument
|
70
|
+
* is copied internally. If never called,
|
71
|
+
* Halide uses the environment variable HL_OCL_BUILD_OPTIONS. */
|
72
|
+
extern void halide_opencl_set_build_options(const char *n);
|
73
|
+
|
74
|
+
/** Halide calls this to gets the additional build options for OpenCL to
|
75
|
+
* use. Implement this yourself to use a different build options per
|
76
|
+
* user_context. The default implementation returns the value set by
|
77
|
+
* halide_opencl_set_build_options, or the environment variable
|
78
|
+
* HL_OCL_BUILD_OPTIONS. The result is valid until the next call to
|
79
|
+
* halide_opencl_set_build_options. */
|
80
|
+
extern const char *halide_opencl_get_build_options(void *user_context);
|
81
|
+
|
82
|
+
/** Set the underlying cl_mem for a halide_buffer_t. This memory should be
|
83
|
+
* allocated using clCreateBuffer or similar and must have an extent
|
84
|
+
* large enough to cover that specified by the halide_buffer_t extent
|
85
|
+
* fields. The dev field of the halide_buffer_t must be NULL when this
|
86
|
+
* routine is called. This call can fail due to running out of memory
|
87
|
+
* or being passed an invalid device pointer. The device and host
|
88
|
+
* dirty bits are left unmodified. */
|
89
|
+
extern int halide_opencl_wrap_cl_mem(void *user_context, struct halide_buffer_t *buf, uint64_t device_ptr);
|
90
|
+
|
91
|
+
/** Same as halide_opencl_wrap_cl_mem but wraps a cl_mem created with
|
92
|
+
* clCreateImage
|
93
|
+
*/
|
94
|
+
extern int halide_opencl_image_wrap_cl_mem(void *user_context, struct halide_buffer_t *buf, uint64_t device_ptr);
|
95
|
+
|
96
|
+
/** Disconnect a halide_buffer_t from the memory it was previously
|
97
|
+
* wrapped around. Should only be called for a halide_buffer_t that
|
98
|
+
* halide_opencl_wrap_device_ptr was previously called on. Frees any
|
99
|
+
* storage associated with the binding of the halide_buffer_t and the
|
100
|
+
* device pointer, but does not free the cl_mem. The dev field of the
|
101
|
+
* halide_buffer_t will be NULL on return.
|
102
|
+
*/
|
103
|
+
extern int halide_opencl_detach_cl_mem(void *user_context, struct halide_buffer_t *buf);
|
104
|
+
|
105
|
+
/** Return the underlying cl_mem for a halide_buffer_t. This buffer must be
|
106
|
+
* valid on an OpenCL device, or not have any associated device
|
107
|
+
* memory. If there is no device memory (dev field is NULL), this
|
108
|
+
* returns 0.
|
109
|
+
*/
|
110
|
+
extern uintptr_t halide_opencl_get_cl_mem(void *user_context, struct halide_buffer_t *buf);
|
111
|
+
|
112
|
+
/** Returns the offset associated with the OpenCL memory allocation via device_crop or device_slice. */
|
113
|
+
extern uint64_t halide_opencl_get_crop_offset(void *user_context, halide_buffer_t *buf);
|
114
|
+
|
115
|
+
#ifdef __cplusplus
|
116
|
+
} // End extern "C"
|
117
|
+
#endif
|
118
|
+
|
119
|
+
#endif // HALIDE_HALIDERUNTIMEOPENCL_H
|