halide 19.0.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. halide/__init__.py +39 -0
  2. halide/_generator_helpers.py +835 -0
  3. halide/bin/adams2019_retrain_cost_model +0 -0
  4. halide/bin/adams2019_weightsdir_to_weightsfile +0 -0
  5. halide/bin/anderson2021_retrain_cost_model +0 -0
  6. halide/bin/anderson2021_weightsdir_to_weightsfile +0 -0
  7. halide/bin/featurization_to_sample +0 -0
  8. halide/bin/gengen +0 -0
  9. halide/bin/get_host_target +0 -0
  10. halide/halide_.cpython-311-darwin.so +0 -0
  11. halide/imageio.py +60 -0
  12. halide/include/Halide.h +35293 -0
  13. halide/include/HalideBuffer.h +2618 -0
  14. halide/include/HalidePyTorchCudaHelpers.h +64 -0
  15. halide/include/HalidePyTorchHelpers.h +120 -0
  16. halide/include/HalideRuntime.h +2221 -0
  17. halide/include/HalideRuntimeCuda.h +89 -0
  18. halide/include/HalideRuntimeD3D12Compute.h +91 -0
  19. halide/include/HalideRuntimeHexagonDma.h +104 -0
  20. halide/include/HalideRuntimeHexagonHost.h +157 -0
  21. halide/include/HalideRuntimeMetal.h +112 -0
  22. halide/include/HalideRuntimeOpenCL.h +119 -0
  23. halide/include/HalideRuntimeQurt.h +32 -0
  24. halide/include/HalideRuntimeVulkan.h +137 -0
  25. halide/include/HalideRuntimeWebGPU.h +44 -0
  26. halide/lib/cmake/Halide/FindHalide_LLVM.cmake +152 -0
  27. halide/lib/cmake/Halide/FindV8.cmake +33 -0
  28. halide/lib/cmake/Halide/Halide-shared-deps.cmake +0 -0
  29. halide/lib/cmake/Halide/Halide-shared-targets-release.cmake +29 -0
  30. halide/lib/cmake/Halide/Halide-shared-targets.cmake +154 -0
  31. halide/lib/cmake/Halide/HalideConfig.cmake +162 -0
  32. halide/lib/cmake/Halide/HalideConfigVersion.cmake +65 -0
  33. halide/lib/cmake/HalideHelpers/FindHalide_WebGPU.cmake +27 -0
  34. halide/lib/cmake/HalideHelpers/Halide-Interfaces-release.cmake +116 -0
  35. halide/lib/cmake/HalideHelpers/Halide-Interfaces.cmake +236 -0
  36. halide/lib/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +1056 -0
  37. halide/lib/cmake/HalideHelpers/HalideHelpersConfig.cmake +28 -0
  38. halide/lib/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
  39. halide/lib/cmake/HalideHelpers/HalideTargetHelpers.cmake +99 -0
  40. halide/lib/cmake/HalideHelpers/MutexCopy.ps1 +31 -0
  41. halide/lib/cmake/HalideHelpers/TargetExportScript.cmake +55 -0
  42. halide/lib/cmake/Halide_Python/Halide_Python-targets-release.cmake +30 -0
  43. halide/lib/cmake/Halide_Python/Halide_Python-targets.cmake +125 -0
  44. halide/lib/cmake/Halide_Python/Halide_PythonConfig.cmake +26 -0
  45. halide/lib/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +65 -0
  46. halide/lib/libHalide.dylib +0 -0
  47. halide/lib/libHalidePyStubs.a +0 -0
  48. halide/lib/libHalide_GenGen.a +0 -0
  49. halide/lib/libautoschedule_adams2019.so +0 -0
  50. halide/lib/libautoschedule_anderson2021.so +0 -0
  51. halide/lib/libautoschedule_li2018.so +0 -0
  52. halide/lib/libautoschedule_mullapudi2016.so +0 -0
  53. halide/share/doc/Halide/LICENSE.txt +233 -0
  54. halide/share/doc/Halide/README.md +439 -0
  55. halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +626 -0
  56. halide/share/doc/Halide/doc/CodeStyleCMake.md +393 -0
  57. halide/share/doc/Halide/doc/FuzzTesting.md +104 -0
  58. halide/share/doc/Halide/doc/HalideCMakePackage.md +812 -0
  59. halide/share/doc/Halide/doc/Hexagon.md +73 -0
  60. halide/share/doc/Halide/doc/Python.md +844 -0
  61. halide/share/doc/Halide/doc/RunGen.md +283 -0
  62. halide/share/doc/Halide/doc/Testing.md +125 -0
  63. halide/share/doc/Halide/doc/Vulkan.md +287 -0
  64. halide/share/doc/Halide/doc/WebAssembly.md +228 -0
  65. halide/share/doc/Halide/doc/WebGPU.md +128 -0
  66. halide/share/tools/RunGen.h +1470 -0
  67. halide/share/tools/RunGenMain.cpp +642 -0
  68. halide/share/tools/adams2019_autotune_loop.sh +227 -0
  69. halide/share/tools/anderson2021_autotune_loop.sh +591 -0
  70. halide/share/tools/halide_benchmark.h +240 -0
  71. halide/share/tools/halide_image.h +31 -0
  72. halide/share/tools/halide_image_info.h +318 -0
  73. halide/share/tools/halide_image_io.h +2794 -0
  74. halide/share/tools/halide_malloc_trace.h +102 -0
  75. halide/share/tools/halide_thread_pool.h +161 -0
  76. halide/share/tools/halide_trace_config.h +559 -0
  77. halide-19.0.0.data/data/share/cmake/Halide/HalideConfig.cmake +6 -0
  78. halide-19.0.0.data/data/share/cmake/Halide/HalideConfigVersion.cmake +65 -0
  79. halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +6 -0
  80. halide-19.0.0.data/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +54 -0
  81. halide-19.0.0.dist-info/METADATA +301 -0
  82. halide-19.0.0.dist-info/RECORD +84 -0
  83. halide-19.0.0.dist-info/WHEEL +5 -0
  84. halide-19.0.0.dist-info/licenses/LICENSE.txt +233 -0
@@ -0,0 +1,227 @@
1
+ #!/bin/bash
2
+
3
+ # Build the generator to autotune. This script will be autotuning the
4
+ # autoscheduler's cost model training pipeline, which is large enough
5
+ # to be interesting.
6
+ if [ $# -lt 6 -o $# -gt 8 ]; then
7
+ echo "Usage: $0 /path/to/some.generator generatorname halide_target weights_file autoschedule_bin_dir halide_distrib_path samples_out_path [generator_args_sets]"
8
+ exit
9
+ fi
10
+
11
+ set -eu
12
+
13
+ #trap "exit" INT TERM
14
+ #trap "kill 0" EXIT
15
+
16
+ GENERATOR=${1}
17
+ PIPELINE=${2}
18
+ HL_TARGET=${3}
19
+ START_WEIGHTS_FILE=${4}
20
+ AUTOSCHED_BIN=${5}
21
+ HALIDE_DISTRIB_PATH=${6}
22
+ SAMPLES=${7}
23
+
24
+ # Read the generator-arg sets into an array. Each set is delimited
25
+ # by space; multiple values within each set are are delimited with ;
26
+ # e.g. "set1arg1=1;set1arg2=foo set2=bar set3arg1=3.14;set4arg2=42"
27
+ if [ $# -ge 8 ]; then
28
+ IFS=' ' read -r -a GENERATOR_ARGS_SETS_ARRAY <<< "${8}"
29
+ else
30
+ declare -a GENERATOR_ARGS_SETS_ARRAY=
31
+ fi
32
+
33
+ # Ensure the length is at least 1
34
+ if [ ${#GENERATOR_ARGS_SETS_ARRAY[@]} -eq 0 ]; then
35
+ GENERATOR_ARGS_SETS_ARRAY=( '' )
36
+ fi
37
+
38
+ COMPILATION_TIMEOUT=600s
39
+ BENCHMARKING_TIMEOUT=60s
40
+
41
+ if [ -z ${HL_TARGET} ]; then
42
+ # Use the host target -- but remove features that we don't want to train
43
+ # for by default, at least not yet (most notably, AVX512).
44
+ HL_TARGET=`${AUTOSCHED_BIN}/get_host_target avx512 avx512_knl avx512_skylake avx512_cannonlake`
45
+ fi
46
+ echo Training target is: ${HL_TARGET}
47
+
48
+ if [ -z ${GENERATOR} ]; then
49
+ GENERATOR=./bin/adams2019_demo.generator
50
+ fi
51
+
52
+ if [ -z ${PIPELINE} ]; then
53
+ PIPELINE=demo
54
+ fi
55
+
56
+ mkdir -p ${SAMPLES}
57
+
58
+ WEIGHTS=${SAMPLES}/updated.weights
59
+ if [[ -f ${WEIGHTS} ]]; then
60
+ echo Using existing weights "${WEIGHTS}"
61
+ else
62
+ # Only copy over the weights if we don't have any already,
63
+ # so that restarted jobs can continue from where they left off
64
+ cp ${START_WEIGHTS_FILE} ${WEIGHTS}
65
+ echo Copying starting weights from ${START_WEIGHTS_FILE} to ${WEIGHTS}
66
+ fi
67
+
68
+ # A batch of this many samples is built in parallel, and then
69
+ # benchmarked serially.
70
+ BATCH_SIZE=32
71
+
72
+ TIMEOUT_CMD="timeout"
73
+ if [ $(uname -s) = "Darwin" ] && ! which $TIMEOUT_CMD 2>&1 >/dev/null; then
74
+ # OSX doesn't have timeout; gtimeout is equivalent and available via Homebrew
75
+ TIMEOUT_CMD="gtimeout"
76
+ if ! which $TIMEOUT_CMD 2>&1 >/dev/null; then
77
+ echo "Can't find the command 'gtimeout'. Run 'brew install coreutils' to install it."
78
+ exit 1
79
+ fi
80
+ fi
81
+
82
+ PLUGIN_EXT=so
83
+
84
+ # Build a single featurization of the pipeline with a random schedule
85
+ make_featurization() {
86
+ D=${1}
87
+ SEED=${2}
88
+ FNAME=${3}
89
+ EXTRA_GENERATOR_ARGS=${4}
90
+ mkdir -p ${D}
91
+ rm -f "${D}/${FNAME}.featurization"
92
+ rm -f "${D}/${FNAME}.sample"
93
+ if [[ $D == */0 ]]; then
94
+ # Sample 0 in each batch is best effort beam search, with no randomness
95
+ dropout=100
96
+ beam=32
97
+ else
98
+ # The other samples are random probes biased by the cost model
99
+ dropout=1 # 1% chance of operating entirely greedily
100
+ beam=1
101
+ fi
102
+ ${TIMEOUT_CMD} -k ${COMPILATION_TIMEOUT} ${COMPILATION_TIMEOUT} \
103
+ ${GENERATOR} \
104
+ -g ${PIPELINE} \
105
+ -f ${FNAME} \
106
+ -o ${D} \
107
+ -e stmt,assembly,static_library,c_header,registration,schedule,featurization \
108
+ target=${HL_TARGET} \
109
+ ${EXTRA_GENERATOR_ARGS} \
110
+ -p ${AUTOSCHED_BIN}/libautoschedule_adams2019.${PLUGIN_EXT} \
111
+ autoscheduler=Adams2019 \
112
+ autoscheduler.parallelism=32 \
113
+ autoscheduler.beam_size=${beam} \
114
+ autoscheduler.random_dropout=${dropout} \
115
+ autoscheduler.random_dropout_seed=${SEED} \
116
+ autoscheduler.weights_path=${WEIGHTS} \
117
+ 2> ${D}/compile_log.txt || echo "Compilation failed or timed out for ${D}"
118
+
119
+
120
+ # We don't need image I/O for this purpose,
121
+ # so leave out libpng and libjpeg
122
+ c++ \
123
+ -std=c++17 \
124
+ -I ${HALIDE_DISTRIB_PATH}/include \
125
+ ${HALIDE_DISTRIB_PATH}/tools/RunGenMain.cpp \
126
+ ${D}/*.registration.cpp \
127
+ ${D}/*.a \
128
+ -o ${D}/bench \
129
+ -DHALIDE_NO_PNG -DHALIDE_NO_JPEG \
130
+ -ldl -lpthread
131
+ }
132
+
133
+ # Benchmark one of the random samples
134
+ benchmark_sample() {
135
+ sleep 1 # Give CPU clocks a chance to spin back up if we're thermally throttling
136
+ D=${1}
137
+ HL_NUM_THREADS=32 \
138
+ ${TIMEOUT_CMD} -k ${BENCHMARKING_TIMEOUT} ${BENCHMARKING_TIMEOUT} \
139
+ ${D}/bench \
140
+ --estimate_all \
141
+ --benchmarks=all \
142
+ | tee ${D}/bench.txt || echo "Benchmarking failed or timed out for ${D}"
143
+
144
+ # Add the runtime, pipeline id, and schedule id to the feature file
145
+ R=$(cut -d' ' -f8 < ${D}/bench.txt)
146
+ P=$3
147
+ S=$2
148
+ FNAME=$4
149
+ ${AUTOSCHED_BIN}/featurization_to_sample ${D}/${FNAME}.featurization $R $P $S ${D}/${FNAME}.sample || echo "featurization_to_sample failed for ${D} (probably because benchmarking failed)"
150
+ }
151
+
152
+ # Don't clobber existing samples
153
+ FIRST=$(ls -d ${SAMPLES}/batch_* 2>/dev/null | sed -e "s|.*/batch_||;s|_.*||" | sort -n | tail -n1)
154
+
155
+ if [ $(uname -s) = "Darwin" ]; then
156
+ LOCAL_CORES=`sysctl -n hw.ncpu`
157
+ else
158
+ LOCAL_CORES=`nproc`
159
+ fi
160
+ echo Local number of cores detected as ${LOCAL_CORES}
161
+
162
+ NUM_BATCHES=1
163
+
164
+ for ((BATCH_ID=$((FIRST+1));BATCH_ID<$((FIRST+1+NUM_BATCHES));BATCH_ID++)); do
165
+
166
+ SECONDS=0
167
+
168
+ for ((EXTRA_ARGS_IDX=0;EXTRA_ARGS_IDX<${#GENERATOR_ARGS_SETS_ARRAY[@]};EXTRA_ARGS_IDX++)); do
169
+
170
+ # Compile a batch of samples using the generator in parallel
171
+ DIR=${SAMPLES}/batch_${BATCH_ID}_${EXTRA_ARGS_IDX}
172
+
173
+ # Copy the weights being used into the batch folder so that we can repro failures
174
+ mkdir -p ${DIR}/
175
+ cp ${WEIGHTS} ${DIR}/used.weights
176
+
177
+ EXTRA_GENERATOR_ARGS=${GENERATOR_ARGS_SETS_ARRAY[EXTRA_ARGS_IDX]/;/ }
178
+ if [ ! -z "${EXTRA_GENERATOR_ARGS}" ]; then
179
+ echo "Adding extra generator args (${EXTRA_GENERATOR_ARGS}) for batch_${BATCH_ID}"
180
+ fi
181
+
182
+ echo ${EXTRA_GENERATOR_ARGS} > ${DIR}/extra_generator_args.txt
183
+
184
+ # Do parallel compilation in batches, so that machines with fewer than BATCH_SIZE cores
185
+ # don't get swamped and timeout unnecessarily
186
+ echo -n Compiling ${BATCH_SIZE} samples
187
+ for ((SAMPLE_ID=0;SAMPLE_ID<${BATCH_SIZE};SAMPLE_ID++)); do
188
+ while [[ 1 ]]; do
189
+ RUNNING=$(jobs -r | wc -l)
190
+ if [[ RUNNING -ge LOCAL_CORES ]]; then
191
+ sleep 1
192
+ else
193
+ break
194
+ fi
195
+ done
196
+
197
+ S=$(printf "%04d%04d" $BATCH_ID $SAMPLE_ID)
198
+ FNAME=$(printf "%s_batch_%04d_sample_%04d" ${PIPELINE} $BATCH_ID $SAMPLE_ID)
199
+ make_featurization "${DIR}/${SAMPLE_ID}" $S $FNAME "$EXTRA_GENERATOR_ARGS" &
200
+ echo -n .
201
+ done
202
+ wait
203
+ echo done.
204
+
205
+ # benchmark them serially using rungen
206
+ for ((SAMPLE_ID=0;SAMPLE_ID<${BATCH_SIZE};SAMPLE_ID++)); do
207
+ S=$(printf "%04d%04d" $BATCH_ID $SAMPLE_ID)
208
+ FNAME=$(printf "%s_batch_%04d_sample_%04d" ${PIPELINE} $BATCH_ID $SAMPLE_ID)
209
+ benchmark_sample "${DIR}/${SAMPLE_ID}" $S $EXTRA_ARGS_IDX $FNAME
210
+ done
211
+
212
+ # retrain model weights on all samples seen so far
213
+ echo Retraining model...
214
+
215
+ find ${SAMPLES} -name "*.sample" | \
216
+ ${AUTOSCHED_BIN}/adams2019_retrain_cost_model \
217
+ --epochs=${BATCH_SIZE} \
218
+ --rates="0.0001" \
219
+ --num_cores=32 \
220
+ --initial_weights=${WEIGHTS} \
221
+ --weights_out=${WEIGHTS} \
222
+ --best_benchmark=${SAMPLES}/best.${PIPELINE}.benchmark.txt \
223
+ --best_schedule=${SAMPLES}/best.${PIPELINE}.schedule.h
224
+ done
225
+
226
+ echo Batch ${BATCH_ID} took ${SECONDS} seconds to compile, benchmark, and retrain
227
+ done