halide 19.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halide/__init__.py +39 -0
- halide/_generator_helpers.py +835 -0
- halide/bin/Halide.dll +0 -0
- halide/bin/adams2019_retrain_cost_model.exe +0 -0
- halide/bin/adams2019_weightsdir_to_weightsfile.exe +0 -0
- halide/bin/anderson2021_retrain_cost_model.exe +0 -0
- halide/bin/anderson2021_weightsdir_to_weightsfile.exe +0 -0
- halide/bin/featurization_to_sample.exe +0 -0
- halide/bin/gengen.exe +0 -0
- halide/bin/get_host_target.exe +0 -0
- halide/halide_.cp311-win_amd64.pyd +0 -0
- halide/imageio.py +60 -0
- halide/include/Halide.h +35293 -0
- halide/include/HalideBuffer.h +2618 -0
- halide/include/HalidePyTorchCudaHelpers.h +64 -0
- halide/include/HalidePyTorchHelpers.h +120 -0
- halide/include/HalideRuntime.h +2221 -0
- halide/include/HalideRuntimeCuda.h +89 -0
- halide/include/HalideRuntimeD3D12Compute.h +91 -0
- halide/include/HalideRuntimeHexagonDma.h +104 -0
- halide/include/HalideRuntimeHexagonHost.h +157 -0
- halide/include/HalideRuntimeMetal.h +112 -0
- halide/include/HalideRuntimeOpenCL.h +119 -0
- halide/include/HalideRuntimeQurt.h +32 -0
- halide/include/HalideRuntimeVulkan.h +137 -0
- halide/include/HalideRuntimeWebGPU.h +44 -0
- halide/lib/Halide.lib +0 -0
- halide/lib/HalidePyStubs.lib +0 -0
- halide/lib/Halide_GenGen.lib +0 -0
- halide/lib/autoschedule_adams2019.dll +0 -0
- halide/lib/autoschedule_anderson2021.dll +0 -0
- halide/lib/autoschedule_li2018.dll +0 -0
- halide/lib/autoschedule_mullapudi2016.dll +0 -0
- halide/lib/cmake/Halide/FindHalide_LLVM.cmake +152 -0
- halide/lib/cmake/Halide/FindV8.cmake +33 -0
- halide/lib/cmake/Halide/Halide-shared-deps.cmake +0 -0
- halide/lib/cmake/Halide/Halide-shared-targets-release.cmake +29 -0
- halide/lib/cmake/Halide/Halide-shared-targets.cmake +154 -0
- halide/lib/cmake/Halide/HalideConfig.cmake +162 -0
- halide/lib/cmake/Halide/HalideConfigVersion.cmake +65 -0
- halide/lib/cmake/HalideHelpers/FindHalide_WebGPU.cmake +27 -0
- halide/lib/cmake/HalideHelpers/Halide-Interfaces-release.cmake +112 -0
- halide/lib/cmake/HalideHelpers/Halide-Interfaces.cmake +236 -0
- halide/lib/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +1056 -0
- halide/lib/cmake/HalideHelpers/HalideHelpersConfig.cmake +28 -0
- halide/lib/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
- halide/lib/cmake/HalideHelpers/HalideTargetHelpers.cmake +99 -0
- halide/lib/cmake/HalideHelpers/MutexCopy.ps1 +31 -0
- halide/lib/cmake/HalideHelpers/TargetExportScript.cmake +55 -0
- halide/lib/cmake/Halide_Python/Halide_Python-targets-release.cmake +29 -0
- halide/lib/cmake/Halide_Python/Halide_Python-targets.cmake +125 -0
- halide/lib/cmake/Halide_Python/Halide_PythonConfig.cmake +26 -0
- halide/lib/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +65 -0
- halide/share/doc/Halide/LICENSE.txt +233 -0
- halide/share/doc/Halide/README.md +439 -0
- halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +626 -0
- halide/share/doc/Halide/doc/CodeStyleCMake.md +393 -0
- halide/share/doc/Halide/doc/FuzzTesting.md +104 -0
- halide/share/doc/Halide/doc/HalideCMakePackage.md +812 -0
- halide/share/doc/Halide/doc/Hexagon.md +73 -0
- halide/share/doc/Halide/doc/Python.md +844 -0
- halide/share/doc/Halide/doc/RunGen.md +283 -0
- halide/share/doc/Halide/doc/Testing.md +125 -0
- halide/share/doc/Halide/doc/Vulkan.md +287 -0
- halide/share/doc/Halide/doc/WebAssembly.md +228 -0
- halide/share/doc/Halide/doc/WebGPU.md +128 -0
- halide/share/tools/RunGen.h +1470 -0
- halide/share/tools/RunGenMain.cpp +642 -0
- halide/share/tools/adams2019_autotune_loop.sh +227 -0
- halide/share/tools/anderson2021_autotune_loop.sh +591 -0
- halide/share/tools/halide_benchmark.h +240 -0
- halide/share/tools/halide_image.h +31 -0
- halide/share/tools/halide_image_info.h +318 -0
- halide/share/tools/halide_image_io.h +2794 -0
- halide/share/tools/halide_malloc_trace.h +102 -0
- halide/share/tools/halide_thread_pool.h +161 -0
- halide/share/tools/halide_trace_config.h +559 -0
- halide-19.0.0.data/data/share/cmake/Halide/HalideConfig.cmake +6 -0
- halide-19.0.0.data/data/share/cmake/Halide/HalideConfigVersion.cmake +65 -0
- halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +6 -0
- halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
- halide-19.0.0.dist-info/METADATA +301 -0
- halide-19.0.0.dist-info/RECORD +85 -0
- halide-19.0.0.dist-info/WHEEL +5 -0
- halide-19.0.0.dist-info/licenses/LICENSE.txt +233 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
#ifndef HALIDE_MALLOC_TRACE_H
|
2
|
+
#define HALIDE_MALLOC_TRACE_H
|
3
|
+
|
4
|
+
//---------------------------------------------------------------------------
|
5
|
+
// The custom trace allocator can be used in an application by calling:
|
6
|
+
//
|
7
|
+
// halide_enable_malloc_trace();
|
8
|
+
//
|
9
|
+
// When the app is run, calls to halide_malloc/free will produce output like:
|
10
|
+
//
|
11
|
+
// halide_malloc => [0x9e400, 0xa27ff], # size:17408, align:1K
|
12
|
+
// halide-header => [0x9e390, 0x9e3ff], # size:112, align:16
|
13
|
+
// halide_malloc => [0xa2880, 0xa6e9f], # size:17952, align:128
|
14
|
+
// halide-header => [0xa2820, 0xa287f], # size:96, align:32
|
15
|
+
// halide_free => [0x9e390, 0x9e3ff], # size:112, align:16
|
16
|
+
// halide_free => [0xa2820, 0xa287f], # size:96, align:32
|
17
|
+
//
|
18
|
+
//---------------------------------------------------------------------------
|
19
|
+
|
20
|
+
#include <cstdlib>
|
21
|
+
#include <iostream>
|
22
|
+
#include <memory>
|
23
|
+
|
24
|
+
namespace Halide {
|
25
|
+
namespace Tools {
|
26
|
+
|
27
|
+
static inline void print_meminfoalign(intptr_t val) {
|
28
|
+
intptr_t align_chk = 1024 * 1024;
|
29
|
+
while (align_chk > 0) {
|
30
|
+
if ((val & (align_chk - 1)) == 0) {
|
31
|
+
char aunit = ' ';
|
32
|
+
if (align_chk >= 1024) {
|
33
|
+
align_chk >>= 10;
|
34
|
+
aunit = 'K';
|
35
|
+
}
|
36
|
+
if (align_chk >= 1024) {
|
37
|
+
align_chk >>= 10;
|
38
|
+
aunit = 'M';
|
39
|
+
}
|
40
|
+
std::cout << "align:" << align_chk;
|
41
|
+
if (aunit != ' ') {
|
42
|
+
std::cout << aunit;
|
43
|
+
}
|
44
|
+
break;
|
45
|
+
}
|
46
|
+
align_chk >>= 1;
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
void *halide_malloc_trace(void *user_context, size_t x) {
|
51
|
+
// Halide requires halide_malloc to allocate memory that can be
|
52
|
+
// read 8 bytes before the start to store the original pointer.
|
53
|
+
// Additionally, we also need to align it to the natural vector
|
54
|
+
// width.
|
55
|
+
void *orig = malloc(x + 128);
|
56
|
+
if (orig == nullptr) {
|
57
|
+
// Will result in a failed assertion and a call to halide_error
|
58
|
+
return nullptr;
|
59
|
+
}
|
60
|
+
// Round up to next multiple of 128.
|
61
|
+
void *ptr = (void *)((((size_t)orig + 128) >> 7) << 7);
|
62
|
+
((void **)ptr)[-1] = orig;
|
63
|
+
|
64
|
+
void *headend = (orig == ptr) ? orig : (char *)ptr - 1;
|
65
|
+
std::cout << "halide_malloc => [0x" << std::hex
|
66
|
+
<< (intptr_t)ptr << ", 0x"
|
67
|
+
<< (intptr_t)ptr + x - 1 << std::dec
|
68
|
+
<< "], # size:"
|
69
|
+
<< (intptr_t)x << ", ";
|
70
|
+
print_meminfoalign((intptr_t)ptr);
|
71
|
+
std::cout << std::endl;
|
72
|
+
|
73
|
+
std::cout << "halide-header => [0x" << std::hex
|
74
|
+
<< (intptr_t)orig << ", 0x"
|
75
|
+
<< (intptr_t)headend << std::dec
|
76
|
+
<< "], # size:"
|
77
|
+
<< (intptr_t)ptr - (intptr_t)orig << ", ";
|
78
|
+
print_meminfoalign((intptr_t)orig);
|
79
|
+
std::cout << std::endl;
|
80
|
+
return ptr;
|
81
|
+
}
|
82
|
+
|
83
|
+
void halide_free_trace(void *user_context, void *ptr) {
|
84
|
+
std::cout << "halide_free => [0x" << std::hex
|
85
|
+
<< (intptr_t)((void **)ptr)[-1] << ", 0x"
|
86
|
+
<< (intptr_t)ptr - 1 << std::dec
|
87
|
+
<< "], # size:"
|
88
|
+
<< (intptr_t)ptr - (intptr_t)((void **)ptr)[-1] << ", ";
|
89
|
+
print_meminfoalign((intptr_t)((void **)ptr)[-1]);
|
90
|
+
std::cout << std::endl;
|
91
|
+
free(((void **)ptr)[-1]);
|
92
|
+
}
|
93
|
+
|
94
|
+
void halide_enable_malloc_trace(void) {
|
95
|
+
halide_set_custom_malloc(halide_malloc_trace);
|
96
|
+
halide_set_custom_free(halide_free_trace);
|
97
|
+
}
|
98
|
+
|
99
|
+
} // namespace Tools
|
100
|
+
} // namespace Halide
|
101
|
+
|
102
|
+
#endif // HALIDE_MALLOC_TRACE_H
|
@@ -0,0 +1,161 @@
|
|
1
|
+
#ifndef HALIDE_THREAD_POOL_H
|
2
|
+
#define HALIDE_THREAD_POOL_H
|
3
|
+
|
4
|
+
#include <cassert>
|
5
|
+
#include <condition_variable>
|
6
|
+
#include <functional>
|
7
|
+
#include <future>
|
8
|
+
#include <mutex>
|
9
|
+
#include <queue>
|
10
|
+
#include <thread>
|
11
|
+
#include <utility>
|
12
|
+
|
13
|
+
#ifdef _MSC_VER
|
14
|
+
#else
|
15
|
+
#include <unistd.h>
|
16
|
+
#endif
|
17
|
+
|
18
|
+
/** \file
|
19
|
+
* Define a simple thread pool utility that is modeled on the api of
|
20
|
+
* std::async(); since implementation details of std::async
|
21
|
+
* can vary considerably, with no control over thread spawning, this class
|
22
|
+
* allows us to use the same model but with precise control over thread usage.
|
23
|
+
*
|
24
|
+
* A ThreadPool is created with a specific number of threads, which will never
|
25
|
+
* vary over the life of the ThreadPool. (If created without a specific number
|
26
|
+
* of threads, it will attempt to use threads == number-of-cores.)
|
27
|
+
*
|
28
|
+
* Each async request will go into a queue, and will be serviced by the next
|
29
|
+
* available thread from the pool.
|
30
|
+
*
|
31
|
+
* The ThreadPool's dtor will block until all currently-executing tasks
|
32
|
+
* to finish (but won't schedule any more).
|
33
|
+
*
|
34
|
+
* Note that this is a fairly simpleminded ThreadPool, meant for tasks
|
35
|
+
* that are fairly coarse (e.g. different tasks in a test); it is specifically
|
36
|
+
* *not* intended to be the underlying implementation for Halide runtime threads
|
37
|
+
*/
|
38
|
+
namespace Halide {
|
39
|
+
namespace Tools {
|
40
|
+
|
41
|
+
template<typename T>
|
42
|
+
class ThreadPool {
|
43
|
+
struct Job {
|
44
|
+
std::function<T()> func;
|
45
|
+
std::promise<T> result;
|
46
|
+
|
47
|
+
void run_unlocked(std::unique_lock<std::mutex> &unique_lock);
|
48
|
+
};
|
49
|
+
|
50
|
+
// all fields are protected by this mutex.
|
51
|
+
std::mutex mutex;
|
52
|
+
|
53
|
+
// Queue of Jobs.
|
54
|
+
std::queue<Job> jobs;
|
55
|
+
|
56
|
+
// Broadcast whenever items are added to the Job queue.
|
57
|
+
std::condition_variable wakeup_threads;
|
58
|
+
|
59
|
+
// Keep track of threads so they can be joined at shutdown
|
60
|
+
std::vector<std::thread> threads;
|
61
|
+
|
62
|
+
// True if the pool is shutting down.
|
63
|
+
bool shutting_down{false};
|
64
|
+
|
65
|
+
void worker_thread() {
|
66
|
+
std::unique_lock<std::mutex> unique_lock(mutex);
|
67
|
+
while (!shutting_down) {
|
68
|
+
if (jobs.empty()) {
|
69
|
+
// There are no jobs pending. Wait until more jobs are enqueued.
|
70
|
+
wakeup_threads.wait(unique_lock);
|
71
|
+
} else {
|
72
|
+
// Grab the next job.
|
73
|
+
Job cur_job = std::move(jobs.front());
|
74
|
+
jobs.pop();
|
75
|
+
cur_job.run_unlocked(unique_lock);
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
public:
|
81
|
+
static size_t num_processors_online() {
|
82
|
+
#ifdef _WIN32
|
83
|
+
char *num_cores = getenv("NUMBER_OF_PROCESSORS");
|
84
|
+
return num_cores ? atoi(num_cores) : 8;
|
85
|
+
#else
|
86
|
+
return sysconf(_SC_NPROCESSORS_ONLN);
|
87
|
+
#endif
|
88
|
+
}
|
89
|
+
|
90
|
+
// Default to number of available cores if not specified otherwise
|
91
|
+
ThreadPool(size_t desired_num_threads = num_processors_online()) {
|
92
|
+
// This file doesn't depend on anything else in libHalide, so
|
93
|
+
// we'll use assert, not internal_assert.
|
94
|
+
assert(desired_num_threads > 0);
|
95
|
+
|
96
|
+
std::lock_guard<std::mutex> lock(mutex);
|
97
|
+
|
98
|
+
// Create all the threads.
|
99
|
+
for (size_t i = 0; i < desired_num_threads; ++i) {
|
100
|
+
threads.emplace_back([this] { worker_thread(); });
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
~ThreadPool() {
|
105
|
+
// Wake everyone up and tell them the party's over and it's time to go home
|
106
|
+
{
|
107
|
+
std::lock_guard<std::mutex> lock(mutex);
|
108
|
+
shutting_down = true;
|
109
|
+
wakeup_threads.notify_all();
|
110
|
+
}
|
111
|
+
|
112
|
+
// Wait until they leave
|
113
|
+
for (auto &t : threads) {
|
114
|
+
t.join();
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
template<typename Func, typename... Args>
|
119
|
+
std::future<T> async(Func func, Args... args) {
|
120
|
+
std::lock_guard<std::mutex> lock(mutex);
|
121
|
+
|
122
|
+
Job job;
|
123
|
+
// Don't use std::forward here: we never want args passed by reference,
|
124
|
+
// since they will be accessed from an arbitrary thread.
|
125
|
+
//
|
126
|
+
// Some versions of GCC won't allow capturing variadic arguments in a lambda;
|
127
|
+
//
|
128
|
+
// job.func = [func, args...]() -> T { return func(args...); }; // Nope, sorry
|
129
|
+
//
|
130
|
+
// fortunately, we can use std::bind() to accomplish the same thing.
|
131
|
+
job.func = std::bind(func, args...);
|
132
|
+
jobs.emplace(std::move(job));
|
133
|
+
std::future<T> result = jobs.back().result.get_future();
|
134
|
+
|
135
|
+
// Wake up our threads.
|
136
|
+
wakeup_threads.notify_all();
|
137
|
+
|
138
|
+
return result;
|
139
|
+
}
|
140
|
+
};
|
141
|
+
|
142
|
+
template<typename T>
|
143
|
+
inline void ThreadPool<T>::Job::run_unlocked(std::unique_lock<std::mutex> &unique_lock) {
|
144
|
+
unique_lock.unlock();
|
145
|
+
T r = func();
|
146
|
+
unique_lock.lock();
|
147
|
+
result.set_value(std::move(r));
|
148
|
+
}
|
149
|
+
|
150
|
+
template<>
|
151
|
+
inline void ThreadPool<void>::Job::run_unlocked(std::unique_lock<std::mutex> &unique_lock) {
|
152
|
+
unique_lock.unlock();
|
153
|
+
func();
|
154
|
+
unique_lock.lock();
|
155
|
+
result.set_value();
|
156
|
+
}
|
157
|
+
|
158
|
+
} // namespace Tools
|
159
|
+
} // namespace Halide
|
160
|
+
|
161
|
+
#endif // HALIDE_THREAD_POOL_H
|