halide 19.0.0__cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. halide/__init__.py +39 -0
  2. halide/_generator_helpers.py +835 -0
  3. halide/bin/adams2019_retrain_cost_model +0 -0
  4. halide/bin/adams2019_weightsdir_to_weightsfile +0 -0
  5. halide/bin/anderson2021_retrain_cost_model +0 -0
  6. halide/bin/anderson2021_weightsdir_to_weightsfile +0 -0
  7. halide/bin/featurization_to_sample +0 -0
  8. halide/bin/gengen +0 -0
  9. halide/bin/get_host_target +0 -0
  10. halide/halide_.cpython-310-x86_64-linux-gnu.so +0 -0
  11. halide/imageio.py +60 -0
  12. halide/include/Halide.h +35293 -0
  13. halide/include/HalideBuffer.h +2618 -0
  14. halide/include/HalidePyTorchCudaHelpers.h +64 -0
  15. halide/include/HalidePyTorchHelpers.h +120 -0
  16. halide/include/HalideRuntime.h +2221 -0
  17. halide/include/HalideRuntimeCuda.h +89 -0
  18. halide/include/HalideRuntimeD3D12Compute.h +91 -0
  19. halide/include/HalideRuntimeHexagonDma.h +104 -0
  20. halide/include/HalideRuntimeHexagonHost.h +157 -0
  21. halide/include/HalideRuntimeMetal.h +112 -0
  22. halide/include/HalideRuntimeOpenCL.h +119 -0
  23. halide/include/HalideRuntimeQurt.h +32 -0
  24. halide/include/HalideRuntimeVulkan.h +137 -0
  25. halide/include/HalideRuntimeWebGPU.h +44 -0
  26. halide/lib64/cmake/Halide/FindHalide_LLVM.cmake +152 -0
  27. halide/lib64/cmake/Halide/FindV8.cmake +33 -0
  28. halide/lib64/cmake/Halide/Halide-shared-deps.cmake +0 -0
  29. halide/lib64/cmake/Halide/Halide-shared-targets-release.cmake +29 -0
  30. halide/lib64/cmake/Halide/Halide-shared-targets.cmake +154 -0
  31. halide/lib64/cmake/Halide/HalideConfig.cmake +162 -0
  32. halide/lib64/cmake/Halide/HalideConfigVersion.cmake +65 -0
  33. halide/lib64/cmake/HalideHelpers/FindHalide_WebGPU.cmake +27 -0
  34. halide/lib64/cmake/HalideHelpers/Halide-Interfaces-release.cmake +116 -0
  35. halide/lib64/cmake/HalideHelpers/Halide-Interfaces.cmake +236 -0
  36. halide/lib64/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +1056 -0
  37. halide/lib64/cmake/HalideHelpers/HalideHelpersConfig.cmake +28 -0
  38. halide/lib64/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
  39. halide/lib64/cmake/HalideHelpers/HalideTargetHelpers.cmake +99 -0
  40. halide/lib64/cmake/HalideHelpers/MutexCopy.ps1 +31 -0
  41. halide/lib64/cmake/HalideHelpers/TargetExportScript.cmake +55 -0
  42. halide/lib64/cmake/Halide_Python/Halide_Python-targets-release.cmake +30 -0
  43. halide/lib64/cmake/Halide_Python/Halide_Python-targets.cmake +125 -0
  44. halide/lib64/cmake/Halide_Python/Halide_PythonConfig.cmake +26 -0
  45. halide/lib64/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +65 -0
  46. halide/lib64/libHalide.so +0 -0
  47. halide/lib64/libHalidePyStubs.a +0 -0
  48. halide/lib64/libHalide_GenGen.a +0 -0
  49. halide/lib64/libautoschedule_adams2019.so +0 -0
  50. halide/lib64/libautoschedule_anderson2021.so +0 -0
  51. halide/lib64/libautoschedule_li2018.so +0 -0
  52. halide/lib64/libautoschedule_mullapudi2016.so +0 -0
  53. halide/share/doc/Halide/LICENSE.txt +233 -0
  54. halide/share/doc/Halide/README.md +439 -0
  55. halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +626 -0
  56. halide/share/doc/Halide/doc/CodeStyleCMake.md +393 -0
  57. halide/share/doc/Halide/doc/FuzzTesting.md +104 -0
  58. halide/share/doc/Halide/doc/HalideCMakePackage.md +812 -0
  59. halide/share/doc/Halide/doc/Hexagon.md +73 -0
  60. halide/share/doc/Halide/doc/Python.md +844 -0
  61. halide/share/doc/Halide/doc/RunGen.md +283 -0
  62. halide/share/doc/Halide/doc/Testing.md +125 -0
  63. halide/share/doc/Halide/doc/Vulkan.md +287 -0
  64. halide/share/doc/Halide/doc/WebAssembly.md +228 -0
  65. halide/share/doc/Halide/doc/WebGPU.md +128 -0
  66. halide/share/tools/RunGen.h +1470 -0
  67. halide/share/tools/RunGenMain.cpp +642 -0
  68. halide/share/tools/adams2019_autotune_loop.sh +227 -0
  69. halide/share/tools/anderson2021_autotune_loop.sh +591 -0
  70. halide/share/tools/halide_benchmark.h +240 -0
  71. halide/share/tools/halide_image.h +31 -0
  72. halide/share/tools/halide_image_info.h +318 -0
  73. halide/share/tools/halide_image_io.h +2794 -0
  74. halide/share/tools/halide_malloc_trace.h +102 -0
  75. halide/share/tools/halide_thread_pool.h +161 -0
  76. halide/share/tools/halide_trace_config.h +559 -0
  77. halide-19.0.0.data/data/share/cmake/Halide/HalideConfig.cmake +6 -0
  78. halide-19.0.0.data/data/share/cmake/Halide/HalideConfigVersion.cmake +65 -0
  79. halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +6 -0
  80. halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
  81. halide-19.0.0.dist-info/METADATA +301 -0
  82. halide-19.0.0.dist-info/RECORD +84 -0
  83. halide-19.0.0.dist-info/WHEEL +6 -0
  84. halide-19.0.0.dist-info/licenses/LICENSE.txt +233 -0
@@ -0,0 +1,89 @@
1
+ #ifndef HALIDE_HALIDERUNTIMECUDA_H
2
+ #define HALIDE_HALIDERUNTIMECUDA_H
3
+
4
+ // Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
5
+ #ifndef HALIDE_HALIDERUNTIME_H
6
+
7
+ #include "HalideRuntime.h"
8
+
9
+ #endif
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ /** \file
16
+ * Routines specific to the Halide Cuda runtime.
17
+ */
18
+
19
+ #define HALIDE_RUNTIME_CUDA
20
+
21
+ extern const struct halide_device_interface_t *halide_cuda_device_interface();
22
+
23
+ /** These are forward declared here to allow clients to override the
24
+ * Halide Cuda runtime. Do not call them. */
25
+ // @{
26
+ extern int halide_cuda_initialize_kernels(void *user_context, void **state_ptr,
27
+ const char *src, int size);
28
+ extern int halide_cuda_run(void *user_context,
29
+ void *state_ptr,
30
+ const char *entry_name,
31
+ int blocksX, int blocksY, int blocksZ,
32
+ int threadsX, int threadsY, int threadsZ,
33
+ int shared_mem_bytes,
34
+ size_t arg_sizes[],
35
+ void *args[],
36
+ int8_t arg_is_buffer[]);
37
+ extern void halide_cuda_finalize_kernels(void *user_context, void *state_ptr);
38
+ // @}
39
+
40
+ /** Set the underlying cuda device poiner for a buffer. The device
41
+ * pointer should be allocated using cuMemAlloc or similar and must
42
+ * have an extent large enough to cover that specified by the
43
+ * halide_buffer_t extent fields. The dev field of the halide_buffer_t
44
+ * must be NULL when this routine is called. This call can fail due to
45
+ * being passed an invalid device pointer. The device and host dirty
46
+ * bits are left unmodified. */
47
+ extern int halide_cuda_wrap_device_ptr(void *user_context, struct halide_buffer_t *buf, uint64_t device_ptr);
48
+
49
+ /** Disconnect this halide_buffer_t from the device pointer it was
50
+ * previously wrapped around. Should only be called for a
51
+ * halide_buffer_t that halide_cuda_wrap_device_ptr was previously
52
+ * called on. The device field of the halide_buffer_t will be NULL on
53
+ * return.
54
+ */
55
+ extern int halide_cuda_detach_device_ptr(void *user_context, struct halide_buffer_t *buf);
56
+
57
+ /** Return the underlying device pointer for a halide_buffer_t. This buffer
58
+ * must be valid on a Cuda device, or not have any associated device
59
+ * memory. If there is no device memory (dev field is NULL), this
60
+ * returns 0.
61
+ */
62
+ extern uintptr_t halide_cuda_get_device_ptr(void *user_context, struct halide_buffer_t *buf);
63
+
64
+ /** Release any currently-unused device allocations back to the cuda
65
+ * driver. See halide_reuse_device_allocations. */
66
+ extern int halide_cuda_release_unused_device_allocations(void *user_context);
67
+
68
+ // These typedefs treat both a CUcontext and a CUstream as a void *,
69
+ // to avoid dependencies on cuda headers.
70
+ typedef int (*halide_cuda_acquire_context_t)(void *, // user_context
71
+ void **, // cuda context out parameter
72
+ bool); // should create a context if none exist
73
+ typedef int (*halide_cuda_release_context_t)(void * /* user_context */);
74
+ typedef int (*halide_cuda_get_stream_t)(void *, // user_context
75
+ void *, // context
76
+ void **); // stream out parameter
77
+
78
+ /** Set custom methods to acquire and release cuda contexts and streams */
79
+ // @{
80
+ extern halide_cuda_acquire_context_t halide_set_cuda_acquire_context(halide_cuda_acquire_context_t handler);
81
+ extern halide_cuda_release_context_t halide_set_cuda_release_context(halide_cuda_release_context_t handler);
82
+ extern halide_cuda_get_stream_t halide_set_cuda_get_stream(halide_cuda_get_stream_t handler);
83
+ // @}
84
+
85
+ #ifdef __cplusplus
86
+ } // End extern "C"
87
+ #endif
88
+
89
+ #endif // HALIDE_HALIDERUNTIMECUDA_H
@@ -0,0 +1,91 @@
1
+ #ifndef HALIDE_HALIDERUNTIMED3D12COMPUTE_H
2
+ #define HALIDE_HALIDERUNTIMED3D12COMPUTE_H
3
+
4
+ // Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
5
+ #ifndef HALIDE_HALIDERUNTIME_H
6
+
7
+ #include "HalideRuntime.h"
8
+
9
+ #endif
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ /** \file
16
+ * Routines specific to the Halide Direct3D 12 Compute runtime.
17
+ */
18
+
19
+ extern const struct halide_device_interface_t *halide_d3d12compute_device_interface();
20
+
21
+ /** These are forward declared here to allow clients to override the
22
+ * Halide Direct3D 12 Compute runtime. Do not call them. */
23
+ // @{
24
+ extern int halide_d3d12compute_initialize_kernels(void *user_context, void **state_ptr,
25
+ const char *src, int size);
26
+
27
+ extern int halide_d3d12compute_run(void *user_context,
28
+ void *state_ptr,
29
+ const char *entry_name,
30
+ int blocksX, int blocksY, int blocksZ,
31
+ int threadsX, int threadsY, int threadsZ,
32
+ int shared_mem_bytes,
33
+ struct halide_type_t arg_types[], void *args[], int8_t arg_is_buffer[]);
34
+ extern void halide_d3d12compute_finalize_kernels(void *user_context, void *state_ptr);
35
+ // @}
36
+
37
+ /** Set the underlying ID3D12Resource for a halide_buffer_t. The memory backing
38
+ * the resource should be managed by the caller (via a default/device heap) and
39
+ * must be large enough to cover the extent of the halide_buffer_t. The device
40
+ * field of the halide_buffer_t must be NULL when this routine is called. This
41
+ * call can fail due to running out of memory or if an invalid D3D12 resource is
42
+ * passed. The device and host dirty bits are left unmodified. */
43
+ extern int halide_d3d12compute_wrap_buffer(void *user_context, struct halide_buffer_t *buf, uint64_t d3d12_resource);
44
+
45
+ /** Disconnect a halide_buffer_t from the ID3D12Resource it was previously
46
+ * wrapped around. Should only be called for a halide_buffer_t that
47
+ * halide_d3d12compute_wrap_buffer was previously called on. Frees any
48
+ * storage associated with the binding of the halide_buffer_t and the
49
+ * buffer, but does not free the ID3D12Resource. The dev field of the
50
+ * halide_buffer_t will be NULL on return.
51
+ */
52
+ extern int halide_d3d12compute_detach_buffer(void *user_context, struct halide_buffer_t *buf);
53
+
54
+ /** Return the underlying ID3D12Resource for a halide_buffer_t. This resource
55
+ * must be valid on an D3D12 device, unless halide_buffer_t has no associated
56
+ * resource. If there is no device memory (device field is NULL), returns 0.
57
+ */
58
+ extern uintptr_t halide_d3d12compute_get_buffer(void *user_context, struct halide_buffer_t *buf);
59
+
60
+ struct halide_d3d12compute_device;
61
+ struct halide_d3d12compute_command_queue;
62
+
63
+ /** This prototype is exported as applications will typically need to
64
+ * replace it to get Halide filters to execute on the same device and
65
+ * command queue used for other purposes. The halide_d3d12compute_device is an
66
+ * ID3D12Device and halide_d3d12compute_command_queue is an ID3D12CommandQueue.
67
+ * No reference counting is done by Halide on these objects. They must remain
68
+ * valid until all off the following are true:
69
+ * - A balancing halide_d3d12compute_release_context has occurred for each
70
+ * halide_d3d12compute_acquire_context which returned the device/queue
71
+ * - All Halide filters using the context information have completed
72
+ * - All halide_buffer_t objects on the device have had
73
+ * halide_device_free called or have been detached via
74
+ * halide_d3d12compute_detach_buffer.
75
+ * - halide_device_release has been called on the interface returned from
76
+ * halide_d3d12compute_device_interface(). (This releases the programs on the context.)
77
+ */
78
+ extern int halide_d3d12compute_acquire_context(void *user_context, struct halide_d3d12compute_device **device_ret,
79
+ struct halide_d3d12compute_command_queue **queue_ret, bool create);
80
+
81
+ /** This call balances each successful halide_d3d12compute_acquire_context call.
82
+ * If halide_d3d12compute_acquire_context is replaced, this routine must be replaced
83
+ * as well.
84
+ */
85
+ extern int halide_d3d12compute_release_context(void *user_context);
86
+
87
+ #ifdef __cplusplus
88
+ } // End extern "C"
89
+ #endif
90
+
91
+ #endif // HALIDE_HALIDERUNTIMED3D12COMPUTE_H
@@ -0,0 +1,104 @@
1
+ #ifndef HALIDE_HALIDERUNTIMEHEXAGONDMA_H
2
+ #define HALIDE_HALIDERUNTIMEHEXAGONDMA_H
3
+
4
+ /** \file
5
+ * Routines specific to the Halide Hexagon DMA host-side runtime.
6
+ */
7
+
8
+ // Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
9
+ #ifndef HALIDE_HALIDERUNTIME_H
10
+
11
+ #include "HalideRuntime.h"
12
+
13
+ #endif
14
+
15
+ // Don't include HalideRuntimeHexagonHost.h if the contents of it were already pasted into a generated header above this one
16
+ #ifndef HALIDE_HALIDERUNTIMEHEXAGONHOST_H
17
+
18
+ #include "HalideRuntimeHexagonHost.h"
19
+
20
+ #endif
21
+
22
+ #ifdef __cplusplus
23
+ extern "C" {
24
+ #endif
25
+
26
+ /**
27
+ * \defgroup rt_hexagon_dma Halide Hexagon DMA runtime
28
+ * @{
29
+ */
30
+
31
+ /**
32
+ * Image Formats to prepare the application for DMA Transfer
33
+ */
34
+ typedef enum {
35
+ halide_hexagon_fmt_RawData,
36
+ halide_hexagon_fmt_NV12,
37
+ halide_hexagon_fmt_NV12_Y,
38
+ halide_hexagon_fmt_NV12_UV,
39
+ halide_hexagon_fmt_P010,
40
+ halide_hexagon_fmt_P010_Y,
41
+ halide_hexagon_fmt_P010_UV,
42
+ halide_hexagon_fmt_TP10,
43
+ halide_hexagon_fmt_TP10_Y,
44
+ halide_hexagon_fmt_TP10_UV,
45
+ halide_hexagon_fmt_NV124R,
46
+ halide_hexagon_fmt_NV124R_Y,
47
+ halide_hexagon_fmt_NV124R_UV
48
+ } halide_hexagon_image_fmt_t;
49
+
50
+ extern const struct halide_device_interface_t *halide_hexagon_dma_device_interface();
51
+
52
+ /** This API is used to set up the DMA device interface to be used for DMA transfer. This also internally
53
+ * creates the DMA device handle and populates all the Buffer related parameters (width, height, stride)
54
+ * to be used for DMA configuration.
55
+ */
56
+ extern int halide_hexagon_dma_device_wrap_native(void *user_context, struct halide_buffer_t *buf,
57
+ uint64_t mem);
58
+
59
+ /** Detach the Input/Output Buffer from DMA device handle and deallocate the DMA device handle buffer allocation
60
+ * This API also frees up the DMA device and makes it available for another usage.
61
+ */
62
+ extern int halide_hexagon_dma_device_detach_native(void *user_context, struct halide_buffer_t *buf);
63
+
64
+ /** This API will allocate a DMA Engine needed for DMA read/write. This is the first step Before
65
+ * a buffer can be used in a copy operation (i.e. a DMA read/write operation).
66
+ */
67
+ extern int halide_hexagon_dma_allocate_engine(void *user_context, void **dma_engine);
68
+
69
+ /** This API free up the allocated DMA engine. This need to be called after a user program ends
70
+ * all the DMA Operations and make it available for subsequent DMA transfers */
71
+ extern int halide_hexagon_dma_deallocate_engine(void *user_context, void *dma_engine);
72
+
73
+ /** This API Prepares a buffer for DMA Read Operation. This will setup the DMA format, direction (read).
74
+ * Will also make necessary adjustments to the DMA frame parameters based on Image format provided.
75
+ */
76
+ extern int halide_hexagon_dma_prepare_for_copy_to_host(void *user_context, struct halide_buffer_t *buf,
77
+ void *dma_engine, bool is_ubwc, halide_hexagon_image_fmt_t fmt);
78
+
79
+ /** This API Prepares a buffer for DMA Write Operation. This will setup the DMA format, direction (write).
80
+ * Will also make necessary adjustments to the DMA frame parameters based on Image format provided.
81
+ */
82
+ extern int halide_hexagon_dma_prepare_for_copy_to_device(void *user_context, struct halide_buffer_t *buf,
83
+ void *dma_engine, bool is_ubwc,
84
+ halide_hexagon_image_fmt_t fmt);
85
+
86
+ /** This API is used to frees up the DMA Resources associated with the buffer.
87
+ * TODO: Currently this API is a dummy as all the necessary freeing is done in an another API.
88
+ * This will be used in future.
89
+ */
90
+ extern int halide_hexagon_dma_unprepare(void *user_context, struct halide_buffer_t *buf);
91
+
92
+ /** This API is used to setup the hexagon Operation modes. We will setup the necessary Operating frequency
93
+ * based on the power mode chosen. Check the structure halide_hexagon_power_mode_t defined in Halide HalideRuntimeHexagonHost.h
94
+ * for the supported power modes.
95
+ */
96
+ extern int halide_hexagon_dma_power_mode_voting(void *user_context, halide_hexagon_power_mode_t cornercase);
97
+
98
+ ///@}
99
+
100
+ #ifdef __cplusplus
101
+ } // End extern "C"
102
+ #endif
103
+
104
+ #endif // HALIDE_HALIDERUNTIMEHEXAGONDMA_H
@@ -0,0 +1,157 @@
1
+ #ifndef HALIDE_HALIDERUNTIMEHEXAGONHOST_H
2
+ #define HALIDE_HALIDERUNTIMEHEXAGONHOST_H
3
+
4
+ // Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
5
+ #ifndef HALIDE_HALIDERUNTIME_H
6
+
7
+ #include "HalideRuntime.h"
8
+
9
+ #endif
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ /** \file
16
+ * Routines specific to the Halide Hexagon host-side runtime.
17
+ */
18
+
19
+ #define HALIDE_RUNTIME_HEXAGON
20
+
21
+ typedef int halide_hexagon_handle_t;
22
+
23
+ extern const struct halide_device_interface_t *halide_hexagon_device_interface();
24
+
25
+ /** Check if the Hexagon runtime (libhalide_hexagon_host.so) is
26
+ * available. If it is not, pipelines using Hexagon will fail. */
27
+ extern bool halide_is_hexagon_available(void *user_context);
28
+
29
+ /** The device handle for Hexagon is simply a pointer and size, stored
30
+ * in the dev field of the halide_buffer_t. If the buffer is allocated in a
31
+ * particular way (ion_alloc), the buffer will be shared with Hexagon
32
+ * (not copied). The device field of the halide_buffer_t must be NULL when this
33
+ * routine is called. This call can fail due to running out of memory
34
+ * or being passed an invalid device handle. The device and host
35
+ * dirty bits are left unmodified. */
36
+ extern int halide_hexagon_wrap_device_handle(void *user_context, struct halide_buffer_t *buf,
37
+ void *ptr, uint64_t size);
38
+
39
+ /** Disconnect this halide_buffer_t from the device handle it was
40
+ * previously wrapped around. Should only be called for a
41
+ * halide_buffer_t that halide_hexagon_wrap_device_handle was
42
+ * previously called on. Frees any storage associated with the binding
43
+ * of the halide_buffer_t and the device handle, but does not free the
44
+ * device handle. The device field of the halide_buffer_t will be NULL
45
+ * on return. */
46
+ extern int halide_hexagon_detach_device_handle(void *user_context, struct halide_buffer_t *buf);
47
+
48
+ /** Return the underlying device handle for a halide_buffer_t. If there is
49
+ * no device memory (dev field is NULL), this returns 0. */
50
+ extern void *halide_hexagon_get_device_handle(void *user_context, struct halide_buffer_t *buf);
51
+ extern uint64_t halide_hexagon_get_device_size(void *user_context, struct halide_buffer_t *buf);
52
+
53
+ /** Return a pointer to the module_state. */
54
+ extern void *halide_hexagon_get_module_state(void *user_context, void **host);
55
+
56
+ /** Power HVX on and off. Calling a Halide pipeline will do this
57
+ * automatically on each pipeline invocation; however, it costs a
58
+ * small but possibly significant amount of time for short running
59
+ * pipelines. To avoid this cost, HVX can be powered on prior to
60
+ * running several pipelines, and powered off afterwards. If HVX is
61
+ * powered on, subsequent calls to power HVX on will be cheap. */
62
+ // @{
63
+ extern int halide_hexagon_power_hvx_on(void *user_context);
64
+ extern int halide_hexagon_power_hvx_off(void *user_context);
65
+ extern void halide_hexagon_power_hvx_off_as_destructor(void *user_context, void * /* obj */);
66
+ // @}
67
+
68
+ /** Power modes for Hexagon. */
69
+ typedef enum halide_hexagon_power_mode_t {
70
+ halide_hexagon_power_low = 0,
71
+ halide_hexagon_power_nominal = 1,
72
+ halide_hexagon_power_turbo = 2,
73
+ halide_hexagon_power_default = 3, /// Resets power to its default state.
74
+ halide_hexagon_power_low_plus = 4,
75
+ halide_hexagon_power_low_2 = 5,
76
+ halide_hexagon_power_nominal_plus = 6,
77
+
78
+ // These are deprecated.
79
+ halide_hvx_power_low = halide_hexagon_power_low,
80
+ halide_hvx_power_nominal = halide_hexagon_power_nominal,
81
+ halide_hvx_power_turbo = halide_hexagon_power_turbo,
82
+ halide_hvx_power_default = halide_hexagon_power_default,
83
+ } halide_hexagon_power_mode_t;
84
+
85
+ /** More detailed power settings to control Hexagon.
86
+ * @param set_mips - Set to TRUE to request MIPS
87
+ * @param mipsPerThread - mips requested per thread, to establish a minimal clock frequency per HW thread
88
+ * @param mipsTotal - Total mips requested, to establish total number of MIPS required across all HW threads
89
+ * @param set_bus_bw - Set to TRUE to request bus_bw
90
+ * @param bwMeagabytesPerSec - Max bus BW requested (megabytes per second)
91
+ * @param busbwUsagePercentage - Percentage of time during which bwBytesPerSec BW is required from the bus (0..100)
92
+ * @param set_latency - Set to TRUE to set latency
93
+ * @param latency - maximum hardware wakeup latency in microseconds. The
94
+ * higher the value the deeper state of sleep
95
+ * that can be entered but the longer it may
96
+ * take to awaken. Only values > 0 are supported (1 microsecond is the smallest valid value)
97
+ */
98
+ typedef struct {
99
+ bool set_mips;
100
+ unsigned int mipsPerThread;
101
+ unsigned int mipsTotal;
102
+ bool set_bus_bw;
103
+ unsigned int bwMegabytesPerSec;
104
+ unsigned short busbwUsagePercentage;
105
+ bool set_latency;
106
+ int latency;
107
+ } halide_hexagon_power_t;
108
+
109
+ // This is deprecated.
110
+ typedef halide_hexagon_power_t halide_hvx_power_perf_t;
111
+
112
+ /** Set a performance target for Hexagon. Hexagon applications can
113
+ * vote for the performance levels they want, which may or may not be
114
+ * respected by Hexagon. Applications should be careful not to leave
115
+ * Hexagon in a high power state for too long. These functions can
116
+ * significantly increase standby power consumption. Use
117
+ * halide_hexagon_power_default to reset performance to the default
118
+ * power state. */
119
+ // @{
120
+ extern int halide_hexagon_set_performance_mode(void *user_context, halide_hexagon_power_mode_t mode);
121
+ extern int halide_hexagon_set_performance(void *user_context, halide_hexagon_power_t *perf);
122
+ // @}
123
+
124
+ /** Set the default priority for Halide Hexagon user threads:
125
+ * - Valid priority values range from 1 to 255
126
+ * - Smaller number for higher priority
127
+ * - The highest priority for a user thread is 1
128
+ * - Priority 0 is reserved for OS usage
129
+ * If this routine is not called, the priority will default to 100.
130
+ * This is intended to be called before dispatching any pipeline. */
131
+ // @{
132
+ extern int halide_hexagon_set_thread_priority(void *user_context, int priority);
133
+ // @}
134
+
135
+ /** These are forward declared here to allow clients to override the
136
+ * Halide Hexagon runtime. Do not call them. */
137
+ // @{
138
+ extern int halide_hexagon_initialize_kernels(void *user_context,
139
+ void **module_ptr,
140
+ const uint8_t *code, uint64_t code_size,
141
+ const uint8_t *runtime, uint64_t runtime_size);
142
+ extern int halide_hexagon_run(void *user_context,
143
+ void *module_ptr,
144
+ const char *name,
145
+ halide_hexagon_handle_t *function,
146
+ uint64_t arg_sizes[],
147
+ void *args[],
148
+ int arg_flags[]);
149
+ extern void halide_hexagon_finalize_kernels(void *user_context, void *state_ptr);
150
+ extern int halide_hexagon_device_release(void *user_context);
151
+ // @}
152
+
153
+ #ifdef __cplusplus
154
+ } // End extern "C"
155
+ #endif
156
+
157
+ #endif // HALIDE_HALIDERUNTIMEHEXAGONHOST_H
@@ -0,0 +1,112 @@
1
+ #ifndef HALIDE_HALIDERUNTIMEMETAL_H
2
+ #define HALIDE_HALIDERUNTIMEMETAL_H
3
+
4
+ // Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
5
+ #ifndef HALIDE_HALIDERUNTIME_H
6
+
7
+ #include "HalideRuntime.h"
8
+
9
+ #endif
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ /** \file
16
+ * Routines specific to the Halide Metal runtime.
17
+ */
18
+
19
+ #define HALIDE_RUNTIME_METAL
20
+
21
+ extern const struct halide_device_interface_t *halide_metal_device_interface();
22
+
23
+ /** These are forward declared here to allow clients to override the
24
+ * Halide Metal runtime. Do not call them. */
25
+ // @{
26
+ extern int halide_metal_initialize_kernels(void *user_context, void **state_ptr,
27
+ const char *src, int size);
28
+ void halide_metal_finalize_kernels(void *user_context, void *state_ptr);
29
+
30
+ extern int halide_metal_run(void *user_context,
31
+ void *state_ptr,
32
+ const char *entry_name,
33
+ int blocksX, int blocksY, int blocksZ,
34
+ int threadsX, int threadsY, int threadsZ,
35
+ int shared_mem_bytes,
36
+ struct halide_type_t arg_types[],
37
+ void *args[],
38
+ int8_t arg_is_buffer[]);
39
+ // @}
40
+
41
+ /** Set the underlying MTLBuffer for a halide_buffer_t. This memory should be
42
+ * allocated using newBufferWithLength:options or similar and must
43
+ * have an extent large enough to cover that specified by the halide_buffer_t
44
+ * extent fields. The dev field of the halide_buffer_t must be NULL when this
45
+ * routine is called. This call can fail due to running out of memory
46
+ * or being passed an invalid buffer. The device and host dirty bits
47
+ * are left unmodified. */
48
+ extern int halide_metal_wrap_buffer(void *user_context, struct halide_buffer_t *buf, uint64_t buffer);
49
+
50
+ /** Disconnect a halide_buffer_t from the memory it was previously
51
+ * wrapped around. Should only be called for a halide_buffer_t that
52
+ * halide_metal_wrap_buffer was previously called on. Frees any
53
+ * storage associated with the binding of the halide_buffer_t and the
54
+ * buffer, but does not free the MTLBuffer. The dev field of the
55
+ * halide_buffer_t will be NULL on return.
56
+ */
57
+ extern int halide_metal_detach_buffer(void *user_context, struct halide_buffer_t *buf);
58
+
59
+ /** Return the underlying MTLBuffer for a halide_buffer_t. This buffer must be
60
+ * valid on an Metal device, or not have any associated device
61
+ * memory. If there is no device memory (dev field is NULL), this
62
+ * returns 0.
63
+ */
64
+ extern uintptr_t halide_metal_get_buffer(void *user_context, struct halide_buffer_t *buf);
65
+
66
+ /** Returns the offset associated with the Metal Buffer allocation via device_crop or device_slice. */
67
+ extern uint64_t halide_metal_get_crop_offset(void *user_context, struct halide_buffer_t *buf);
68
+
69
+ struct halide_metal_device;
70
+ struct halide_metal_command_queue;
71
+ struct halide_metal_command_buffer;
72
+
73
+ /** This prototype is exported as applications will typically need to
74
+ * replace it to get Halide filters to execute on the same device and
75
+ * command queue used for other purposes. The halide_metal_device is an
76
+ * id \<MTLDevice\> and halide_metal_command_queue is an id \<MTLCommandQueue\>.
77
+ * No reference counting is done by Halide on these objects. They must remain
78
+ * valid until all off the following are true:
79
+ * - A balancing halide_metal_release_context has occurred for each
80
+ * halide_metal_acquire_context which returned the device/queue
81
+ * - All Halide filters using the context information have completed
82
+ * - All halide_buffer_t objects on the device have had
83
+ * halide_device_free called or have been detached via
84
+ * halide_metal_detach_buffer.
85
+ * - halide_device_release has been called on the interface returned from
86
+ * halide_metal_device_interface(). (This releases the programs on the context.)
87
+ */
88
+ extern int halide_metal_acquire_context(void *user_context, struct halide_metal_device **device_ret,
89
+ struct halide_metal_command_queue **queue_ret, bool create);
90
+
91
+ /** This call balances each successful halide_metal_acquire_context call.
92
+ * If halide_metal_acquire_context is replaced, this routine must be replaced
93
+ * as well.
94
+ */
95
+ extern int halide_metal_release_context(void *user_context);
96
+
97
+ /** This function is called as part of the callback when a Metal command buffer completes.
98
+ * The return value, if not halide_error_code_success, will be stashed in Metal runtime and returned
99
+ * to the next call into the runtime, and the error string will be saved as well.
100
+ * The error string will be freed by the caller. The return value must be a valid Halide error code.
101
+ * This is called from the Metal driver, and thus:
102
+ * - Any user_context must be preserved between the call to halide_metal_run and the corresponding callback
103
+ * - The function must be thread-safe
104
+ */
105
+ extern int halide_metal_command_buffer_completion_handler(void *user_context, struct halide_metal_command_buffer *buffer,
106
+ char **returned_error_string);
107
+
108
+ #ifdef __cplusplus
109
+ } // End extern "C"
110
+ #endif
111
+
112
+ #endif // HALIDE_HALIDERUNTIMEMETAL_H
@@ -0,0 +1,119 @@
1
+ #ifndef HALIDE_HALIDERUNTIMEOPENCL_H
2
+ #define HALIDE_HALIDERUNTIMEOPENCL_H
3
+
4
+ // Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one
5
+ #ifndef HALIDE_HALIDERUNTIME_H
6
+
7
+ #include "HalideRuntime.h"
8
+
9
+ #endif
10
+
11
+ #ifdef __cplusplus
12
+ extern "C" {
13
+ #endif
14
+
15
+ /** \file
16
+ * Routines specific to the Halide OpenCL runtime.
17
+ */
18
+
19
+ #define HALIDE_RUNTIME_OPENCL
20
+
21
+ extern const struct halide_device_interface_t *halide_opencl_device_interface();
22
+ extern const struct halide_device_interface_t *halide_opencl_image_device_interface();
23
+
24
+ /** These are forward declared here to allow clients to override the
25
+ * Halide OpenCL runtime. Do not call them. */
26
+ // @{
27
+ extern int halide_opencl_initialize_kernels(void *user_context, void **state_ptr,
28
+ const char *src, int size);
29
+ extern int halide_opencl_run(void *user_context,
30
+ void *state_ptr,
31
+ const char *entry_name,
32
+ int blocksX, int blocksY, int blocksZ,
33
+ int threadsX, int threadsY, int threadsZ,
34
+ int shared_mem_bytes,
35
+ size_t arg_sizes[],
36
+ void *args[],
37
+ int8_t arg_is_buffer[]);
38
+ extern void halide_opencl_finalize_kernels(void *user_context, void *state_ptr);
39
+ // @}
40
+
41
+ /** Set the platform name for OpenCL to use (e.g. "Intel" or
42
+ * "NVIDIA"). The argument is copied internally. The opencl runtime
43
+ * will select a platform that includes this as a substring. If never
44
+ * called, Halide uses the environment variable HL_OCL_PLATFORM_NAME,
45
+ * or defaults to the first available platform. */
46
+ extern void halide_opencl_set_platform_name(const char *n);
47
+
48
+ /** Halide calls this to get the desired OpenCL platform
49
+ * name. Implement this yourself to use a different platform per
50
+ * user_context. The default implementation returns the value set by
51
+ * halide_set_ocl_platform_name, or the value of the environment
52
+ * variable HL_OCL_PLATFORM_NAME. The output is valid until the next
53
+ * call to halide_set_ocl_platform_name. */
54
+ extern const char *halide_opencl_get_platform_name(void *user_context);
55
+
56
+ /** Set the device type for OpenCL to use. The argument is copied
57
+ * internally. It must be "cpu", "gpu", or "acc". If never called,
58
+ * Halide uses the environment variable HL_OCL_DEVICE_TYPE. */
59
+ extern void halide_opencl_set_device_type(const char *n);
60
+
61
+ /** Halide calls this to gets the desired OpenCL device
62
+ * type. Implement this yourself to use a different device type per
63
+ * user_context. The default implementation returns the value set by
64
+ * halide_set_ocl_device_type, or the environment variable
65
+ * HL_OCL_DEVICE_TYPE. The result is valid until the next call to
66
+ * halide_set_ocl_device_type. */
67
+ extern const char *halide_opencl_get_device_type(void *user_context);
68
+
69
+ /** Set the additional build options for OpenCL to use. The argument
70
+ * is copied internally. If never called,
71
+ * Halide uses the environment variable HL_OCL_BUILD_OPTIONS. */
72
+ extern void halide_opencl_set_build_options(const char *n);
73
+
74
+ /** Halide calls this to gets the additional build options for OpenCL to
75
+ * use. Implement this yourself to use a different build options per
76
+ * user_context. The default implementation returns the value set by
77
+ * halide_opencl_set_build_options, or the environment variable
78
+ * HL_OCL_BUILD_OPTIONS. The result is valid until the next call to
79
+ * halide_opencl_set_build_options. */
80
+ extern const char *halide_opencl_get_build_options(void *user_context);
81
+
82
+ /** Set the underlying cl_mem for a halide_buffer_t. This memory should be
83
+ * allocated using clCreateBuffer or similar and must have an extent
84
+ * large enough to cover that specified by the halide_buffer_t extent
85
+ * fields. The dev field of the halide_buffer_t must be NULL when this
86
+ * routine is called. This call can fail due to running out of memory
87
+ * or being passed an invalid device pointer. The device and host
88
+ * dirty bits are left unmodified. */
89
+ extern int halide_opencl_wrap_cl_mem(void *user_context, struct halide_buffer_t *buf, uint64_t device_ptr);
90
+
91
+ /** Same as halide_opencl_wrap_cl_mem but wraps a cl_mem created with
92
+ * clCreateImage
93
+ */
94
+ extern int halide_opencl_image_wrap_cl_mem(void *user_context, struct halide_buffer_t *buf, uint64_t device_ptr);
95
+
96
+ /** Disconnect a halide_buffer_t from the memory it was previously
97
+ * wrapped around. Should only be called for a halide_buffer_t that
98
+ * halide_opencl_wrap_device_ptr was previously called on. Frees any
99
+ * storage associated with the binding of the halide_buffer_t and the
100
+ * device pointer, but does not free the cl_mem. The dev field of the
101
+ * halide_buffer_t will be NULL on return.
102
+ */
103
+ extern int halide_opencl_detach_cl_mem(void *user_context, struct halide_buffer_t *buf);
104
+
105
+ /** Return the underlying cl_mem for a halide_buffer_t. This buffer must be
106
+ * valid on an OpenCL device, or not have any associated device
107
+ * memory. If there is no device memory (dev field is NULL), this
108
+ * returns 0.
109
+ */
110
+ extern uintptr_t halide_opencl_get_cl_mem(void *user_context, struct halide_buffer_t *buf);
111
+
112
+ /** Returns the offset associated with the OpenCL memory allocation via device_crop or device_slice. */
113
+ extern uint64_t halide_opencl_get_crop_offset(void *user_context, halide_buffer_t *buf);
114
+
115
+ #ifdef __cplusplus
116
+ } // End extern "C"
117
+ #endif
118
+
119
+ #endif // HALIDE_HALIDERUNTIMEOPENCL_H