halide 19.0.0__cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. halide/__init__.py +39 -0
  2. halide/_generator_helpers.py +835 -0
  3. halide/bin/adams2019_retrain_cost_model +0 -0
  4. halide/bin/adams2019_weightsdir_to_weightsfile +0 -0
  5. halide/bin/anderson2021_retrain_cost_model +0 -0
  6. halide/bin/anderson2021_weightsdir_to_weightsfile +0 -0
  7. halide/bin/featurization_to_sample +0 -0
  8. halide/bin/gengen +0 -0
  9. halide/bin/get_host_target +0 -0
  10. halide/halide_.cpython-313-x86_64-linux-gnu.so +0 -0
  11. halide/imageio.py +60 -0
  12. halide/include/Halide.h +35293 -0
  13. halide/include/HalideBuffer.h +2618 -0
  14. halide/include/HalidePyTorchCudaHelpers.h +64 -0
  15. halide/include/HalidePyTorchHelpers.h +120 -0
  16. halide/include/HalideRuntime.h +2221 -0
  17. halide/include/HalideRuntimeCuda.h +89 -0
  18. halide/include/HalideRuntimeD3D12Compute.h +91 -0
  19. halide/include/HalideRuntimeHexagonDma.h +104 -0
  20. halide/include/HalideRuntimeHexagonHost.h +157 -0
  21. halide/include/HalideRuntimeMetal.h +112 -0
  22. halide/include/HalideRuntimeOpenCL.h +119 -0
  23. halide/include/HalideRuntimeQurt.h +32 -0
  24. halide/include/HalideRuntimeVulkan.h +137 -0
  25. halide/include/HalideRuntimeWebGPU.h +44 -0
  26. halide/lib64/cmake/Halide/FindHalide_LLVM.cmake +152 -0
  27. halide/lib64/cmake/Halide/FindV8.cmake +33 -0
  28. halide/lib64/cmake/Halide/Halide-shared-deps.cmake +0 -0
  29. halide/lib64/cmake/Halide/Halide-shared-targets-release.cmake +29 -0
  30. halide/lib64/cmake/Halide/Halide-shared-targets.cmake +154 -0
  31. halide/lib64/cmake/Halide/HalideConfig.cmake +162 -0
  32. halide/lib64/cmake/Halide/HalideConfigVersion.cmake +65 -0
  33. halide/lib64/cmake/HalideHelpers/FindHalide_WebGPU.cmake +27 -0
  34. halide/lib64/cmake/HalideHelpers/Halide-Interfaces-release.cmake +116 -0
  35. halide/lib64/cmake/HalideHelpers/Halide-Interfaces.cmake +236 -0
  36. halide/lib64/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +1056 -0
  37. halide/lib64/cmake/HalideHelpers/HalideHelpersConfig.cmake +28 -0
  38. halide/lib64/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
  39. halide/lib64/cmake/HalideHelpers/HalideTargetHelpers.cmake +99 -0
  40. halide/lib64/cmake/HalideHelpers/MutexCopy.ps1 +31 -0
  41. halide/lib64/cmake/HalideHelpers/TargetExportScript.cmake +55 -0
  42. halide/lib64/cmake/Halide_Python/Halide_Python-targets-release.cmake +30 -0
  43. halide/lib64/cmake/Halide_Python/Halide_Python-targets.cmake +125 -0
  44. halide/lib64/cmake/Halide_Python/Halide_PythonConfig.cmake +26 -0
  45. halide/lib64/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +65 -0
  46. halide/lib64/libHalide.so +0 -0
  47. halide/lib64/libHalidePyStubs.a +0 -0
  48. halide/lib64/libHalide_GenGen.a +0 -0
  49. halide/lib64/libautoschedule_adams2019.so +0 -0
  50. halide/lib64/libautoschedule_anderson2021.so +0 -0
  51. halide/lib64/libautoschedule_li2018.so +0 -0
  52. halide/lib64/libautoschedule_mullapudi2016.so +0 -0
  53. halide/share/doc/Halide/LICENSE.txt +233 -0
  54. halide/share/doc/Halide/README.md +439 -0
  55. halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +626 -0
  56. halide/share/doc/Halide/doc/CodeStyleCMake.md +393 -0
  57. halide/share/doc/Halide/doc/FuzzTesting.md +104 -0
  58. halide/share/doc/Halide/doc/HalideCMakePackage.md +812 -0
  59. halide/share/doc/Halide/doc/Hexagon.md +73 -0
  60. halide/share/doc/Halide/doc/Python.md +844 -0
  61. halide/share/doc/Halide/doc/RunGen.md +283 -0
  62. halide/share/doc/Halide/doc/Testing.md +125 -0
  63. halide/share/doc/Halide/doc/Vulkan.md +287 -0
  64. halide/share/doc/Halide/doc/WebAssembly.md +228 -0
  65. halide/share/doc/Halide/doc/WebGPU.md +128 -0
  66. halide/share/tools/RunGen.h +1470 -0
  67. halide/share/tools/RunGenMain.cpp +642 -0
  68. halide/share/tools/adams2019_autotune_loop.sh +227 -0
  69. halide/share/tools/anderson2021_autotune_loop.sh +591 -0
  70. halide/share/tools/halide_benchmark.h +240 -0
  71. halide/share/tools/halide_image.h +31 -0
  72. halide/share/tools/halide_image_info.h +318 -0
  73. halide/share/tools/halide_image_io.h +2794 -0
  74. halide/share/tools/halide_malloc_trace.h +102 -0
  75. halide/share/tools/halide_thread_pool.h +161 -0
  76. halide/share/tools/halide_trace_config.h +559 -0
  77. halide-19.0.0.data/data/share/cmake/Halide/HalideConfig.cmake +6 -0
  78. halide-19.0.0.data/data/share/cmake/Halide/HalideConfigVersion.cmake +65 -0
  79. halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +6 -0
  80. halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
  81. halide-19.0.0.dist-info/METADATA +301 -0
  82. halide-19.0.0.dist-info/RECORD +84 -0
  83. halide-19.0.0.dist-info/WHEEL +6 -0
  84. halide-19.0.0.dist-info/licenses/LICENSE.txt +233 -0
@@ -0,0 +1,102 @@
1
+ #ifndef HALIDE_MALLOC_TRACE_H
2
+ #define HALIDE_MALLOC_TRACE_H
3
+
4
+ //---------------------------------------------------------------------------
5
+ // The custom trace allocator can be used in an application by calling:
6
+ //
7
+ // halide_enable_malloc_trace();
8
+ //
9
+ // When the app is run, calls to halide_malloc/free will produce output like:
10
+ //
11
+ // halide_malloc => [0x9e400, 0xa27ff], # size:17408, align:1K
12
+ // halide-header => [0x9e390, 0x9e3ff], # size:112, align:16
13
+ // halide_malloc => [0xa2880, 0xa6e9f], # size:17952, align:128
14
+ // halide-header => [0xa2820, 0xa287f], # size:96, align:32
15
+ // halide_free => [0x9e390, 0x9e3ff], # size:112, align:16
16
+ // halide_free => [0xa2820, 0xa287f], # size:96, align:32
17
+ //
18
+ //---------------------------------------------------------------------------
19
+
20
+ #include <cstdlib>
21
+ #include <iostream>
22
+ #include <memory>
23
+
24
+ namespace Halide {
25
+ namespace Tools {
26
+
27
+ static inline void print_meminfoalign(intptr_t val) {
28
+ intptr_t align_chk = 1024 * 1024;
29
+ while (align_chk > 0) {
30
+ if ((val & (align_chk - 1)) == 0) {
31
+ char aunit = ' ';
32
+ if (align_chk >= 1024) {
33
+ align_chk >>= 10;
34
+ aunit = 'K';
35
+ }
36
+ if (align_chk >= 1024) {
37
+ align_chk >>= 10;
38
+ aunit = 'M';
39
+ }
40
+ std::cout << "align:" << align_chk;
41
+ if (aunit != ' ') {
42
+ std::cout << aunit;
43
+ }
44
+ break;
45
+ }
46
+ align_chk >>= 1;
47
+ }
48
+ }
49
+
50
+ void *halide_malloc_trace(void *user_context, size_t x) {
51
+ // Halide requires halide_malloc to allocate memory that can be
52
+ // read 8 bytes before the start to store the original pointer.
53
+ // Additionally, we also need to align it to the natural vector
54
+ // width.
55
+ void *orig = malloc(x + 128);
56
+ if (orig == nullptr) {
57
+ // Will result in a failed assertion and a call to halide_error
58
+ return nullptr;
59
+ }
60
+ // Round up to next multiple of 128.
61
+ void *ptr = (void *)((((size_t)orig + 128) >> 7) << 7);
62
+ ((void **)ptr)[-1] = orig;
63
+
64
+ void *headend = (orig == ptr) ? orig : (char *)ptr - 1;
65
+ std::cout << "halide_malloc => [0x" << std::hex
66
+ << (intptr_t)ptr << ", 0x"
67
+ << (intptr_t)ptr + x - 1 << std::dec
68
+ << "], # size:"
69
+ << (intptr_t)x << ", ";
70
+ print_meminfoalign((intptr_t)ptr);
71
+ std::cout << std::endl;
72
+
73
+ std::cout << "halide-header => [0x" << std::hex
74
+ << (intptr_t)orig << ", 0x"
75
+ << (intptr_t)headend << std::dec
76
+ << "], # size:"
77
+ << (intptr_t)ptr - (intptr_t)orig << ", ";
78
+ print_meminfoalign((intptr_t)orig);
79
+ std::cout << std::endl;
80
+ return ptr;
81
+ }
82
+
83
+ void halide_free_trace(void *user_context, void *ptr) {
84
+ std::cout << "halide_free => [0x" << std::hex
85
+ << (intptr_t)((void **)ptr)[-1] << ", 0x"
86
+ << (intptr_t)ptr - 1 << std::dec
87
+ << "], # size:"
88
+ << (intptr_t)ptr - (intptr_t)((void **)ptr)[-1] << ", ";
89
+ print_meminfoalign((intptr_t)((void **)ptr)[-1]);
90
+ std::cout << std::endl;
91
+ free(((void **)ptr)[-1]);
92
+ }
93
+
94
+ void halide_enable_malloc_trace(void) {
95
+ halide_set_custom_malloc(halide_malloc_trace);
96
+ halide_set_custom_free(halide_free_trace);
97
+ }
98
+
99
+ } // namespace Tools
100
+ } // namespace Halide
101
+
102
+ #endif // HALIDE_MALLOC_TRACE_H
@@ -0,0 +1,161 @@
1
+ #ifndef HALIDE_THREAD_POOL_H
2
+ #define HALIDE_THREAD_POOL_H
3
+
4
+ #include <cassert>
5
+ #include <condition_variable>
6
+ #include <functional>
7
+ #include <future>
8
+ #include <mutex>
9
+ #include <queue>
10
+ #include <thread>
11
+ #include <utility>
12
+
13
+ #ifdef _MSC_VER
14
+ #else
15
+ #include <unistd.h>
16
+ #endif
17
+
18
+ /** \file
19
+ * Define a simple thread pool utility that is modeled on the api of
20
+ * std::async(); since implementation details of std::async
21
+ * can vary considerably, with no control over thread spawning, this class
22
+ * allows us to use the same model but with precise control over thread usage.
23
+ *
24
+ * A ThreadPool is created with a specific number of threads, which will never
25
+ * vary over the life of the ThreadPool. (If created without a specific number
26
+ * of threads, it will attempt to use threads == number-of-cores.)
27
+ *
28
+ * Each async request will go into a queue, and will be serviced by the next
29
+ * available thread from the pool.
30
+ *
31
+ * The ThreadPool's dtor will block until all currently-executing tasks
32
+ * to finish (but won't schedule any more).
33
+ *
34
+ * Note that this is a fairly simpleminded ThreadPool, meant for tasks
35
+ * that are fairly coarse (e.g. different tasks in a test); it is specifically
36
+ * *not* intended to be the underlying implementation for Halide runtime threads
37
+ */
38
+ namespace Halide {
39
+ namespace Tools {
40
+
41
+ template<typename T>
42
+ class ThreadPool {
43
+ struct Job {
44
+ std::function<T()> func;
45
+ std::promise<T> result;
46
+
47
+ void run_unlocked(std::unique_lock<std::mutex> &unique_lock);
48
+ };
49
+
50
+ // all fields are protected by this mutex.
51
+ std::mutex mutex;
52
+
53
+ // Queue of Jobs.
54
+ std::queue<Job> jobs;
55
+
56
+ // Broadcast whenever items are added to the Job queue.
57
+ std::condition_variable wakeup_threads;
58
+
59
+ // Keep track of threads so they can be joined at shutdown
60
+ std::vector<std::thread> threads;
61
+
62
+ // True if the pool is shutting down.
63
+ bool shutting_down{false};
64
+
65
+ void worker_thread() {
66
+ std::unique_lock<std::mutex> unique_lock(mutex);
67
+ while (!shutting_down) {
68
+ if (jobs.empty()) {
69
+ // There are no jobs pending. Wait until more jobs are enqueued.
70
+ wakeup_threads.wait(unique_lock);
71
+ } else {
72
+ // Grab the next job.
73
+ Job cur_job = std::move(jobs.front());
74
+ jobs.pop();
75
+ cur_job.run_unlocked(unique_lock);
76
+ }
77
+ }
78
+ }
79
+
80
+ public:
81
+ static size_t num_processors_online() {
82
+ #ifdef _WIN32
83
+ char *num_cores = getenv("NUMBER_OF_PROCESSORS");
84
+ return num_cores ? atoi(num_cores) : 8;
85
+ #else
86
+ return sysconf(_SC_NPROCESSORS_ONLN);
87
+ #endif
88
+ }
89
+
90
+ // Default to number of available cores if not specified otherwise
91
+ ThreadPool(size_t desired_num_threads = num_processors_online()) {
92
+ // This file doesn't depend on anything else in libHalide, so
93
+ // we'll use assert, not internal_assert.
94
+ assert(desired_num_threads > 0);
95
+
96
+ std::lock_guard<std::mutex> lock(mutex);
97
+
98
+ // Create all the threads.
99
+ for (size_t i = 0; i < desired_num_threads; ++i) {
100
+ threads.emplace_back([this] { worker_thread(); });
101
+ }
102
+ }
103
+
104
+ ~ThreadPool() {
105
+ // Wake everyone up and tell them the party's over and it's time to go home
106
+ {
107
+ std::lock_guard<std::mutex> lock(mutex);
108
+ shutting_down = true;
109
+ wakeup_threads.notify_all();
110
+ }
111
+
112
+ // Wait until they leave
113
+ for (auto &t : threads) {
114
+ t.join();
115
+ }
116
+ }
117
+
118
+ template<typename Func, typename... Args>
119
+ std::future<T> async(Func func, Args... args) {
120
+ std::lock_guard<std::mutex> lock(mutex);
121
+
122
+ Job job;
123
+ // Don't use std::forward here: we never want args passed by reference,
124
+ // since they will be accessed from an arbitrary thread.
125
+ //
126
+ // Some versions of GCC won't allow capturing variadic arguments in a lambda;
127
+ //
128
+ // job.func = [func, args...]() -> T { return func(args...); }; // Nope, sorry
129
+ //
130
+ // fortunately, we can use std::bind() to accomplish the same thing.
131
+ job.func = std::bind(func, args...);
132
+ jobs.emplace(std::move(job));
133
+ std::future<T> result = jobs.back().result.get_future();
134
+
135
+ // Wake up our threads.
136
+ wakeup_threads.notify_all();
137
+
138
+ return result;
139
+ }
140
+ };
141
+
142
+ template<typename T>
143
+ inline void ThreadPool<T>::Job::run_unlocked(std::unique_lock<std::mutex> &unique_lock) {
144
+ unique_lock.unlock();
145
+ T r = func();
146
+ unique_lock.lock();
147
+ result.set_value(std::move(r));
148
+ }
149
+
150
+ template<>
151
+ inline void ThreadPool<void>::Job::run_unlocked(std::unique_lock<std::mutex> &unique_lock) {
152
+ unique_lock.unlock();
153
+ func();
154
+ unique_lock.lock();
155
+ result.set_value();
156
+ }
157
+
158
+ } // namespace Tools
159
+ } // namespace Halide
160
+
161
+ #endif // HALIDE_THREAD_POOL_H