halide 19.0.0__cp312-cp312-win_amd64.whl → 21.0.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- halide/__init__.py +10 -6
- halide/_generator_helpers.py +190 -127
- halide/bin/Halide.dll +0 -0
- halide/bin/adams2019_retrain_cost_model.exe +0 -0
- halide/bin/adams2019_weightsdir_to_weightsfile.exe +0 -0
- halide/bin/anderson2021_retrain_cost_model.exe +0 -0
- halide/bin/anderson2021_weightsdir_to_weightsfile.exe +0 -0
- halide/bin/featurization_to_sample.exe +0 -0
- halide/bin/gengen.exe +0 -0
- halide/bin/get_host_target.exe +0 -0
- halide/halide_.cp312-win_amd64.pyd +0 -0
- halide/imageio.py +1 -1
- halide/include/Halide.h +1775 -1477
- halide/include/HalideBuffer.h +13 -13
- halide/include/HalidePyTorchCudaHelpers.h +1 -1
- halide/include/HalideRuntime.h +35 -16
- halide/lib/Halide.lib +0 -0
- halide/lib/HalidePyStubs.lib +0 -0
- halide/lib/Halide_GenGen.lib +0 -0
- halide/lib/autoschedule_adams2019.dll +0 -0
- halide/lib/autoschedule_anderson2021.dll +0 -0
- halide/lib/autoschedule_li2018.dll +0 -0
- halide/lib/autoschedule_mullapudi2016.dll +0 -0
- halide/lib/cmake/Halide/FindHalide_LLVM.cmake +44 -15
- halide/lib/cmake/Halide/FindV8.cmake +0 -12
- halide/lib/cmake/Halide/Halide-shared-targets.cmake +1 -1
- halide/lib/cmake/Halide/HalideConfig.cmake +1 -1
- halide/lib/cmake/Halide/HalideConfigVersion.cmake +3 -3
- halide/lib/cmake/HalideHelpers/Halide-Interfaces.cmake +1 -0
- halide/lib/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +31 -9
- halide/lib/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +3 -3
- halide/lib/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +3 -3
- halide/share/doc/Halide/README.md +7 -6
- halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +78 -6
- halide/share/doc/Halide/doc/HalideCMakePackage.md +9 -2
- halide/share/doc/Halide/doc/Python.md +19 -4
- halide/share/doc/Halide/doc/RunGen.md +1 -1
- {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/Halide/HalideConfig.cmake +4 -1
- {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/Halide/HalideConfigVersion.cmake +3 -3
- {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +4 -1
- {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +3 -3
- halide-21.0.0.dist-info/METADATA +302 -0
- {halide-19.0.0.dist-info → halide-21.0.0.dist-info}/RECORD +45 -45
- {halide-19.0.0.dist-info → halide-21.0.0.dist-info}/WHEEL +1 -1
- halide-19.0.0.dist-info/METADATA +0 -301
- {halide-19.0.0.dist-info → halide-21.0.0.dist-info}/licenses/LICENSE.txt +0 -0
halide/include/Halide.h
CHANGED
@@ -315,7 +315,7 @@
|
|
315
315
|
// our CMake build, so that we ensure that the in-build metadata (eg soversion)
|
316
316
|
// matches, but keeping the canonical version here makes it easier to keep
|
317
317
|
// downstream build systems (eg Blaze/Bazel) properly in sync with the source.
|
318
|
-
#define HALIDE_VERSION_MAJOR
|
318
|
+
#define HALIDE_VERSION_MAJOR 21
|
319
319
|
#define HALIDE_VERSION_MINOR 0
|
320
320
|
#define HALIDE_VERSION_PATCH 0
|
321
321
|
|
@@ -1643,21 +1643,27 @@ extern int halide_error_vscale_invalid(void *user_context, const char *func_name
|
|
1643
1643
|
// @}
|
1644
1644
|
|
1645
1645
|
/** Optional features a compilation Target can have.
|
1646
|
-
*
|
1647
|
-
*
|
1646
|
+
*
|
1647
|
+
* Be sure to keep this in sync with:
|
1648
|
+
* 1. the Feature enum in Target.h,
|
1649
|
+
* 2. the implementation of get_runtime_compatible_target in Target.cpp,
|
1650
|
+
* 3. PyEnums.cpp,
|
1651
|
+
* if you add a new feature.
|
1648
1652
|
*/
|
1649
1653
|
typedef enum halide_target_feature_t {
|
1650
|
-
halide_target_feature_jit = 0,
|
1651
|
-
halide_target_feature_debug,
|
1652
|
-
|
1653
|
-
|
1654
|
-
|
1655
|
-
|
1656
|
-
|
1657
|
-
|
1658
|
-
|
1659
|
-
|
1660
|
-
|
1654
|
+
halide_target_feature_jit = 0, ///< Generate code that will run immediately inside the calling process.
|
1655
|
+
halide_target_feature_debug, ///< Turn on debug info and output for runtime code.
|
1656
|
+
halide_target_feature_enable_backtraces, ///< Preserve frame pointers and include unwind tables to support accurate backtraces for debugging and profiling.
|
1657
|
+
halide_target_feature_no_asserts, ///< Disable all runtime checks, for slightly tighter code.
|
1658
|
+
halide_target_feature_no_bounds_query, ///< Disable the bounds querying functionality.
|
1659
|
+
|
1660
|
+
halide_target_feature_sse41, ///< Use SSE 4.1 and earlier instructions. Only relevant on x86.
|
1661
|
+
halide_target_feature_avx, ///< Use AVX 1 instructions. Only relevant on x86.
|
1662
|
+
halide_target_feature_avx2, ///< Use AVX 2 instructions. Only relevant on x86.
|
1663
|
+
halide_target_feature_avxvnni, ///< Enable the AVX-VNNI features supported by AVX2 instructions. Supports 256-bit VNNI instructions without EVEX encoding.
|
1664
|
+
halide_target_feature_fma, ///< Enable x86 FMA instruction
|
1665
|
+
halide_target_feature_fma4, ///< Enable x86 (AMD) FMA4 instruction set
|
1666
|
+
halide_target_feature_f16c, ///< Enable x86 16-bit float support
|
1661
1667
|
|
1662
1668
|
halide_target_feature_armv7s, ///< Generate code for ARMv7s. Only relevant for 32-bit ARM.
|
1663
1669
|
halide_target_feature_no_neon, ///< Avoid using NEON instructions. Only relevant for 32-bit ARM.
|
@@ -1701,6 +1707,7 @@ typedef enum halide_target_feature_t {
|
|
1701
1707
|
halide_target_feature_avx512_skylake, ///< Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL, AVX512-BW, and AVX512-DQ to the base set. The main difference from the base AVX512 set is better support for small integer ops. Note that this does not include the Knight's Landing features. Note also that these features are not available on Skylake desktop and mobile processors.
|
1702
1708
|
halide_target_feature_avx512_cannonlake, ///< Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI.
|
1703
1709
|
halide_target_feature_avx512_zen4, ///< Enable the AVX512 features supported by Zen4 processors. This include all of the Cannonlake features, plus AVX512-VNNI, AVX512-BF16, and more.
|
1710
|
+
halide_target_feature_avx512_zen5, ///< Enable the AVX512 features supported by Zen5 processors. This include all of the Cannonlake features, plus AVX512-VNNI, AVX512-BF16, AVX-VNNI and more.
|
1704
1711
|
halide_target_feature_avx512_sapphirerapids, ///< Enable the AVX512 features supported by Sapphire Rapids processors. This include all of the Zen4 features, plus AVX-VNNI and AMX instructions.
|
1705
1712
|
halide_target_feature_trace_loads, ///< Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Func.
|
1706
1713
|
halide_target_feature_trace_stores, ///< Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined Func.
|
@@ -1755,6 +1762,7 @@ typedef enum halide_target_feature_t {
|
|
1755
1762
|
halide_target_feature_semihosting, ///< Used together with Target::NoOS for the baremetal target built with semihosting library and run with semihosting mode where minimum I/O communication with a host PC is available.
|
1756
1763
|
halide_target_feature_avx10_1, ///< Intel AVX10 version 1 support. vector_bits is used to indicate width.
|
1757
1764
|
halide_target_feature_x86_apx, ///< Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd .
|
1765
|
+
halide_target_feature_simulator, ///< Target is for a simulator environment. Currently only applies to iOS.
|
1758
1766
|
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
|
1759
1767
|
} halide_target_feature_t;
|
1760
1768
|
|
@@ -1831,8 +1839,19 @@ typedef struct halide_dimension_t {
|
|
1831
1839
|
} // extern "C"
|
1832
1840
|
#endif
|
1833
1841
|
|
1834
|
-
|
1835
|
-
|
1842
|
+
#if __cplusplus > 201100L || _MSVC_LANG > 201100L || __STDC_VERSION__ > 202300L
|
1843
|
+
// In C++, an underlying type is required to let the user define their own flag
|
1844
|
+
// values, without those values being undefined behavior when passed around as
|
1845
|
+
// this enum typedef.
|
1846
|
+
#define BUFFER_FLAGS_UNDERLYING_TYPE : uint64_t
|
1847
|
+
#else
|
1848
|
+
#define BUFFER_FLAGS_UNDERLYING_TYPE
|
1849
|
+
#endif
|
1850
|
+
typedef enum BUFFER_FLAGS_UNDERLYING_TYPE {
|
1851
|
+
halide_buffer_flag_host_dirty = 1,
|
1852
|
+
halide_buffer_flag_device_dirty = 2
|
1853
|
+
} halide_buffer_flags;
|
1854
|
+
#undef BUFFER_FLAGS_UNDERLYING_TYPE
|
1836
1855
|
|
1837
1856
|
/**
|
1838
1857
|
* The raw representation of an image passed around by generated
|
@@ -2730,12 +2749,15 @@ std::ostream &operator<<(std::ostream &stream, const Stmt &);
|
|
2730
2749
|
struct LoweredFunc;
|
2731
2750
|
std::ostream &operator<<(std::ostream &, const LoweredFunc &);
|
2732
2751
|
|
2733
|
-
|
2752
|
+
bool debug_is_active_impl(int verbosity, const char *file, const char *function, int line);
|
2753
|
+
#define debug_is_active(n) (::Halide::Internal::debug_is_active_impl((n), __FILE__, __FUNCTION__, __LINE__))
|
2754
|
+
|
2755
|
+
/** For optional debugging during codegen, use the debug macro as
|
2734
2756
|
* follows:
|
2735
2757
|
*
|
2736
|
-
\code
|
2737
|
-
debug(verbosity) << "The expression is " << expr << "\n";
|
2738
|
-
\endcode
|
2758
|
+
* \code
|
2759
|
+
* debug(verbosity) << "The expression is " << expr << "\n";
|
2760
|
+
* \endcode
|
2739
2761
|
*
|
2740
2762
|
* verbosity of 0 always prints, 1 should print after every major
|
2741
2763
|
* stage, 2 should be used for more detail, and 3 should be used for
|
@@ -2743,25 +2765,11 @@ std::ostream &operator<<(std::ostream &, const LoweredFunc &);
|
|
2743
2765
|
* is determined by the value of the environment variable
|
2744
2766
|
* HL_DEBUG_CODEGEN
|
2745
2767
|
*/
|
2746
|
-
|
2747
|
-
|
2748
|
-
|
2749
|
-
|
2750
|
-
|
2751
|
-
debug(int verbosity)
|
2752
|
-
: logging(verbosity <= debug_level()) {
|
2753
|
-
}
|
2754
|
-
|
2755
|
-
template<typename T>
|
2756
|
-
debug &operator<<(T &&x) {
|
2757
|
-
if (logging) {
|
2758
|
-
std::cerr << std::forward<T>(x);
|
2759
|
-
}
|
2760
|
-
return *this;
|
2761
|
-
}
|
2762
|
-
|
2763
|
-
static int debug_level();
|
2764
|
-
};
|
2768
|
+
// clang-format off
|
2769
|
+
#define debug(n) \
|
2770
|
+
/* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \
|
2771
|
+
if (debug_is_active((n))) std::cerr
|
2772
|
+
// clang-format on
|
2765
2773
|
|
2766
2774
|
/** Allow easily printing the contents of containers, or std::vector-like containers,
|
2767
2775
|
* in debug output. Used like so:
|
@@ -2867,14 +2875,18 @@ private:
|
|
2867
2875
|
};
|
2868
2876
|
|
2869
2877
|
/** An error that occurs while running a JIT-compiled Halide pipeline. */
|
2870
|
-
struct HALIDE_EXPORT_SYMBOL RuntimeError :
|
2878
|
+
struct HALIDE_EXPORT_SYMBOL RuntimeError final : Error {
|
2879
|
+
static constexpr auto error_name = "Runtime error";
|
2880
|
+
|
2871
2881
|
explicit RuntimeError(const char *msg);
|
2872
2882
|
explicit RuntimeError(const std::string &msg);
|
2873
2883
|
};
|
2874
2884
|
|
2875
2885
|
/** An error that occurs while compiling a Halide pipeline that Halide
|
2876
2886
|
* attributes to a user error. */
|
2877
|
-
struct HALIDE_EXPORT_SYMBOL CompileError :
|
2887
|
+
struct HALIDE_EXPORT_SYMBOL CompileError final : Error {
|
2888
|
+
static constexpr auto error_name = "User error";
|
2889
|
+
|
2878
2890
|
explicit CompileError(const char *msg);
|
2879
2891
|
explicit CompileError(const std::string &msg);
|
2880
2892
|
};
|
@@ -2882,7 +2894,9 @@ struct HALIDE_EXPORT_SYMBOL CompileError : public Error {
|
|
2882
2894
|
/** An error that occurs while compiling a Halide pipeline that Halide
|
2883
2895
|
* attributes to an internal compiler bug, or to an invalid use of
|
2884
2896
|
* Halide's internals. */
|
2885
|
-
struct HALIDE_EXPORT_SYMBOL InternalError :
|
2897
|
+
struct HALIDE_EXPORT_SYMBOL InternalError final : Error {
|
2898
|
+
static constexpr auto error_name = "Internal error";
|
2899
|
+
|
2886
2900
|
explicit InternalError(const char *msg);
|
2887
2901
|
explicit InternalError(const std::string &msg);
|
2888
2902
|
};
|
@@ -2898,7 +2912,7 @@ class CompileTimeErrorReporter {
|
|
2898
2912
|
public:
|
2899
2913
|
virtual ~CompileTimeErrorReporter() = default;
|
2900
2914
|
virtual void warning(const char *msg) = 0;
|
2901
|
-
virtual void error(const char *msg) = 0;
|
2915
|
+
[[noreturn]] virtual void error(const char *msg) = 0;
|
2902
2916
|
};
|
2903
2917
|
|
2904
2918
|
/** The default error reporter logs to stderr, then throws an exception
|
@@ -2912,84 +2926,136 @@ void set_custom_compile_time_error_reporter(CompileTimeErrorReporter *error_repo
|
|
2912
2926
|
|
2913
2927
|
namespace Internal {
|
2914
2928
|
|
2915
|
-
|
2916
|
-
|
2917
|
-
|
2918
|
-
|
2919
|
-
|
2920
|
-
|
2929
|
+
/**
|
2930
|
+
* If a custom error reporter is configured, notifies the reporter by calling
|
2931
|
+
* its error() function with the value of \p e.what()
|
2932
|
+
*
|
2933
|
+
* Otherwise, if Halide was built with exceptions, throw \p e unless an
|
2934
|
+
* existing exception is in flight. On the other hand, if Halide was built
|
2935
|
+
* without exceptions, print the error message to stderr and abort().
|
2936
|
+
*
|
2937
|
+
* @param e The error to throw or report
|
2938
|
+
*/
|
2939
|
+
/// @{
|
2940
|
+
[[noreturn]] void throw_error(const RuntimeError &e);
|
2941
|
+
[[noreturn]] void throw_error(const CompileError &e);
|
2942
|
+
[[noreturn]] void throw_error(const InternalError &e);
|
2943
|
+
/// @}
|
2921
2944
|
|
2922
|
-
|
2923
|
-
|
2945
|
+
/**
|
2946
|
+
* If a custom error reporter is configured, notifies the reporter by calling
|
2947
|
+
* its warning() function. Otherwise, prints the warning to stderr.
|
2948
|
+
*
|
2949
|
+
* @param warning The warning to issue
|
2950
|
+
*/
|
2951
|
+
void issue_warning(const char *warning);
|
2924
2952
|
|
2925
|
-
|
2953
|
+
template<typename T>
|
2954
|
+
struct ReportBase {
|
2955
|
+
template<typename S>
|
2956
|
+
HALIDE_ALWAYS_INLINE T &operator<<(const S &x) {
|
2957
|
+
msg << x;
|
2958
|
+
return *static_cast<T *>(this);
|
2959
|
+
}
|
2926
2960
|
|
2927
|
-
|
2928
|
-
|
2929
|
-
return *this;
|
2961
|
+
HALIDE_ALWAYS_INLINE operator bool() const {
|
2962
|
+
return !finalized;
|
2930
2963
|
}
|
2931
2964
|
|
2932
|
-
|
2933
|
-
|
2934
|
-
|
2935
|
-
|
2965
|
+
protected:
|
2966
|
+
std::ostringstream msg{};
|
2967
|
+
bool finalized{false};
|
2968
|
+
|
2969
|
+
// This function is called as part of issue() below. We can't use a
|
2970
|
+
// virtual function because issue() needs to be marked [[noreturn]]
|
2971
|
+
// for errors and be left alone for warnings (i.e., they have
|
2972
|
+
// different signatures).
|
2973
|
+
std::string finalize_message() {
|
2974
|
+
if (!msg.str().empty() && msg.str().back() != '\n') {
|
2975
|
+
msg << "\n";
|
2976
|
+
}
|
2977
|
+
finalized = true;
|
2978
|
+
return msg.str();
|
2979
|
+
}
|
2980
|
+
|
2981
|
+
T &init(const char *file, const char *function, const int line, const char *condition_string, const char *prefix) {
|
2982
|
+
if (debug_is_active_impl(1, file, function, line)) {
|
2983
|
+
msg << prefix << " at " << file << ":" << line << ' ';
|
2984
|
+
if (condition_string) {
|
2985
|
+
msg << "Condition failed: " << condition_string << ' ';
|
2986
|
+
}
|
2987
|
+
}
|
2988
|
+
return *static_cast<T *>(this);
|
2936
2989
|
}
|
2990
|
+
};
|
2937
2991
|
|
2938
|
-
|
2939
|
-
|
2940
|
-
|
2941
|
-
|
2942
|
-
|
2943
|
-
|
2944
|
-
|
2945
|
-
|
2946
|
-
|
2992
|
+
template<typename Exception>
|
2993
|
+
struct ErrorReport final : ReportBase<ErrorReport<Exception>> {
|
2994
|
+
ErrorReport &init(const char *file, const char *function, const int line, const char *condition_string) {
|
2995
|
+
return ReportBase<ErrorReport>::init(file, function, line, condition_string, Exception::error_name) << "Error: ";
|
2996
|
+
}
|
2997
|
+
|
2998
|
+
[[noreturn]] void issue() noexcept(false) {
|
2999
|
+
throw_error(Exception(this->finalize_message()));
|
3000
|
+
}
|
2947
3001
|
};
|
2948
3002
|
|
2949
|
-
|
2950
|
-
|
2951
|
-
|
2952
|
-
|
2953
|
-
|
2954
|
-
|
2955
|
-
|
2956
|
-
// This has to be an operator with a precedence lower than << but
|
2957
|
-
// higher than ?:
|
2958
|
-
HALIDE_ALWAYS_INLINE void operator&(ErrorReport &) {
|
3003
|
+
struct WarningReport final : ReportBase<WarningReport> {
|
3004
|
+
WarningReport &init(const char *file, const char *function, const int line, const char *condition_string) {
|
3005
|
+
return ReportBase::init(file, function, line, condition_string, "Warning") << "Warning: ";
|
3006
|
+
}
|
3007
|
+
|
3008
|
+
void issue() {
|
3009
|
+
issue_warning(this->finalize_message().c_str());
|
2959
3010
|
}
|
2960
3011
|
};
|
2961
3012
|
|
2962
3013
|
/**
|
2963
|
-
*
|
2964
|
-
*
|
2965
|
-
* evaluated if the assertion's value is false.
|
2966
|
-
*
|
2967
|
-
* Note that this macro intentionally has no parens internally; in actual
|
2968
|
-
* use, the implicit grouping will end up being
|
2969
|
-
*
|
2970
|
-
* condition ? (void) : (Voidifier() & (ErrorReport << arg1 << arg2 ... << argN))
|
3014
|
+
* The following three diagnostic macros are implemented such that the
|
3015
|
+
* message is evaluated only if the assertion's value is false.
|
2971
3016
|
*
|
2972
3017
|
* This (regrettably) requires a macro to work, but has the highly desirable
|
2973
3018
|
* effect that all assertion parameters are totally skipped (not ever evaluated)
|
2974
3019
|
* when the assertion is true.
|
3020
|
+
*
|
3021
|
+
* The macros work by deferring the call to issue() until after the stream
|
3022
|
+
* has been evaluated. This previously used a trick where ErrorReport would
|
3023
|
+
* throw in the destructor, but throwing in a destructor is UB in a lot of
|
3024
|
+
* scenarios, and it was easy to break things by mistake.
|
2975
3025
|
*/
|
2976
|
-
|
2977
|
-
|
2978
|
-
|
3026
|
+
/// @{
|
3027
|
+
#define _halide_error_impl(type) \
|
3028
|
+
for (Halide::Internal::ErrorReport<type> _err; 1; _err.issue()) \
|
3029
|
+
/**/ _err.init(__FILE__, __FUNCTION__, __LINE__, nullptr)
|
3030
|
+
|
3031
|
+
#define _halide_assert_impl(condition, type) \
|
3032
|
+
if (!(condition)) \
|
3033
|
+
for (Halide::Internal::ErrorReport<type> _err; 1; _err.issue()) \
|
3034
|
+
/*****/ _err.init(__FILE__, __FUNCTION__, __LINE__, #condition)
|
3035
|
+
|
3036
|
+
#define _halide_user_warning \
|
3037
|
+
for (Halide::Internal::WarningReport _err; _err; _err.issue()) \
|
3038
|
+
/**/ _err.init(__FILE__, __FUNCTION__, __LINE__, nullptr)
|
3039
|
+
/// @}
|
3040
|
+
|
3041
|
+
#define user_warning _halide_user_warning
|
2979
3042
|
|
2980
|
-
#define
|
2981
|
-
#define
|
2982
|
-
#define
|
2983
|
-
#define halide_runtime_error Halide::Internal::ErrorReport(__FILE__, __LINE__, nullptr, Halide::Internal::ErrorReport::User | Halide::Internal::ErrorReport::Runtime)
|
3043
|
+
#define user_error _halide_error_impl(Halide::CompileError)
|
3044
|
+
#define internal_error _halide_error_impl(Halide::InternalError)
|
3045
|
+
#define halide_runtime_error _halide_error_impl(Halide::RuntimeError)
|
2984
3046
|
|
2985
|
-
#define internal_assert(c)
|
2986
|
-
#define user_assert(c)
|
3047
|
+
#define internal_assert(c) _halide_assert_impl(c, Halide::InternalError)
|
3048
|
+
#define user_assert(c) _halide_assert_impl(c, Halide::CompileError)
|
2987
3049
|
|
2988
3050
|
// The nicely named versions get cleaned up at the end of Halide.h,
|
2989
3051
|
// but user code might want to do halide-style user_asserts (e.g. the
|
2990
3052
|
// Extern macros introduce calls to user_assert), so for that purpose
|
2991
3053
|
// we define an equivalent macro that can be used outside of Halide.h
|
2992
|
-
#define
|
3054
|
+
#define _halide_user_error _halide_error_impl(Halide::CompileError)
|
3055
|
+
#define _halide_internal_error _halide_error_impl(Halide::InternalError)
|
3056
|
+
#define _halide_runtime_error _halide_error_impl(Halide::RuntimeError)
|
3057
|
+
#define _halide_internal_assert(c) _halide_assert_impl(c, Halide::InternalError)
|
3058
|
+
#define _halide_user_assert(c) _halide_assert_impl(c, Halide::CompileError)
|
2993
3059
|
|
2994
3060
|
// N.B. Any function that might throw a user_assert or user_error may
|
2995
3061
|
// not be inlined into the user's code, or the line number will be
|
@@ -3459,8 +3525,12 @@ bool starts_with(const std::string &str, const std::string &prefix);
|
|
3459
3525
|
/** Test if the first string ends with the second string */
|
3460
3526
|
bool ends_with(const std::string &str, const std::string &suffix);
|
3461
3527
|
|
3462
|
-
/** Replace all matches of the second string in the first string with the last string
|
3463
|
-
|
3528
|
+
/** Replace all matches of the second string in the first string with the last string.
|
3529
|
+
* The string to search-and-replace in is passed by value, offering the ability to
|
3530
|
+
* std::move() a string in if you're not interested in keeping the original string.
|
3531
|
+
* This is useful when the original string does not contain the find-string, causing
|
3532
|
+
* this function to return the same string without any copies being made. */
|
3533
|
+
std::string replace_all(std::string str, const std::string &find, const std::string &replace);
|
3464
3534
|
|
3465
3535
|
/** Split the source string using 'delim' as the divider. */
|
3466
3536
|
std::vector<std::string> split_string(const std::string &source, const std::string &delim);
|
@@ -3671,7 +3741,7 @@ struct ScopedValue {
|
|
3671
3741
|
: var(var), old_value(var) {
|
3672
3742
|
}
|
3673
3743
|
/** Preserve the old value, then set the var to a new value. */
|
3674
|
-
ScopedValue(T &var, T new_value)
|
3744
|
+
ScopedValue(T &var, const T &new_value)
|
3675
3745
|
: var(var), old_value(var) {
|
3676
3746
|
var = new_value;
|
3677
3747
|
}
|
@@ -4980,6 +5050,7 @@ struct Target {
|
|
4980
5050
|
ZnVer2, /// Tune for AMD Zen 2 CPU (AMD Family 17h, launched 2019).
|
4981
5051
|
ZnVer3, /// Tune for AMD Zen 3 CPU (AMD Family 19h, launched 2020).
|
4982
5052
|
ZnVer4, /// Tune for AMD Zen 4 CPU (AMD Family 19h, launched 2022).
|
5053
|
+
ZnVer5, /// Tune for AMD Zen 5 CPU (AMD Family 1Ah, launched 2024).
|
4983
5054
|
} processor_tune = ProcessorGeneric;
|
4984
5055
|
|
4985
5056
|
/** Optional features a target can have.
|
@@ -4989,11 +5060,13 @@ struct Target {
|
|
4989
5060
|
enum Feature {
|
4990
5061
|
JIT = halide_target_feature_jit,
|
4991
5062
|
Debug = halide_target_feature_debug,
|
5063
|
+
EnableBacktraces = halide_target_feature_enable_backtraces,
|
4992
5064
|
NoAsserts = halide_target_feature_no_asserts,
|
4993
5065
|
NoBoundsQuery = halide_target_feature_no_bounds_query,
|
4994
5066
|
SSE41 = halide_target_feature_sse41,
|
4995
5067
|
AVX = halide_target_feature_avx,
|
4996
5068
|
AVX2 = halide_target_feature_avx2,
|
5069
|
+
AVXVNNI = halide_target_feature_avxvnni,
|
4997
5070
|
FMA = halide_target_feature_fma,
|
4998
5071
|
FMA4 = halide_target_feature_fma4,
|
4999
5072
|
F16C = halide_target_feature_f16c,
|
@@ -5038,6 +5111,7 @@ struct Target {
|
|
5038
5111
|
AVX512_Cannonlake = halide_target_feature_avx512_cannonlake,
|
5039
5112
|
AVX512_SapphireRapids = halide_target_feature_avx512_sapphirerapids,
|
5040
5113
|
AVX512_Zen4 = halide_target_feature_avx512_zen4,
|
5114
|
+
AVX512_Zen5 = halide_target_feature_avx512_zen5,
|
5041
5115
|
TraceLoads = halide_target_feature_trace_loads,
|
5042
5116
|
TraceStores = halide_target_feature_trace_stores,
|
5043
5117
|
TraceRealizations = halide_target_feature_trace_realizations,
|
@@ -5085,6 +5159,7 @@ struct Target {
|
|
5085
5159
|
Semihosting = halide_target_feature_semihosting,
|
5086
5160
|
AVX10_1 = halide_target_feature_avx10_1,
|
5087
5161
|
X86APX = halide_target_feature_x86_apx,
|
5162
|
+
Simulator = halide_target_feature_simulator,
|
5088
5163
|
FeatureEnd = halide_target_feature_end
|
5089
5164
|
};
|
5090
5165
|
Target() = default;
|
@@ -5413,10 +5488,12 @@ static_assert(((HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT & (HALIDE_RUNTIME_BUF
|
|
5413
5488
|
#ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
|
5414
5489
|
|
5415
5490
|
// clang-format off
|
5416
|
-
#ifdef
|
5491
|
+
#ifdef _WIN32
|
5417
5492
|
|
5418
|
-
//
|
5419
|
-
// has stated they probably never will,
|
5493
|
+
// Windows (regardless of which compiler) doesn't implement aligned_alloc(),
|
5494
|
+
// even in C++17 mode, and has stated they probably never will, as the issue
|
5495
|
+
// is in the incompatibility that free() needs to be able to free both pointers
|
5496
|
+
// returned by malloc() and aligned_alloc(). So, always default it off here.
|
5420
5497
|
#define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
|
5421
5498
|
|
5422
5499
|
#elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
|
@@ -6317,7 +6394,7 @@ public:
|
|
6317
6394
|
|
6318
6395
|
/** Allocate a new image of the given size with a runtime
|
6319
6396
|
* type. Only used when you do know what size you want but you
|
6320
|
-
* don't know statically what type the elements are. Pass
|
6397
|
+
* don't know statically what type the elements are. Pass zeros
|
6321
6398
|
* to make a buffer suitable for bounds query calls. */
|
6322
6399
|
template<typename... Args,
|
6323
6400
|
typename = typename std::enable_if<AllInts<Args...>::value>::type>
|
@@ -6336,7 +6413,7 @@ public:
|
|
6336
6413
|
}
|
6337
6414
|
}
|
6338
6415
|
|
6339
|
-
/** Allocate a new image of the given size. Pass
|
6416
|
+
/** Allocate a new image of the given size. Pass zeros to make a
|
6340
6417
|
* buffer suitable for bounds query calls. */
|
6341
6418
|
// @{
|
6342
6419
|
|
@@ -7339,9 +7416,12 @@ public:
|
|
7339
7416
|
/** Make a buffer with the same shape and memory nesting order as
|
7340
7417
|
* another buffer. It may have a different type. */
|
7341
7418
|
template<typename T2, int D2, int S2>
|
7419
|
+
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
7342
7420
|
static Buffer<T, Dims, InClassDimStorage> make_with_shape_of(Buffer<T2, D2, S2> src,
|
7343
7421
|
void *(*allocate_fn)(size_t) = nullptr,
|
7344
7422
|
void (*deallocate_fn)(void *) = nullptr) {
|
7423
|
+
// Note that src is taken by value because its dims are mutated
|
7424
|
+
// in-place by the helper. Do not change to taking it by reference.
|
7345
7425
|
static_assert(Dims == D2 || Dims == AnyDims);
|
7346
7426
|
const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
|
7347
7427
|
return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
|
@@ -7407,9 +7487,7 @@ private:
|
|
7407
7487
|
}
|
7408
7488
|
|
7409
7489
|
template<typename... Args>
|
7410
|
-
HALIDE_ALWAYS_INLINE
|
7411
|
-
storage_T *
|
7412
|
-
address_of(Args... args) const {
|
7490
|
+
HALIDE_ALWAYS_INLINE storage_T *address_of(Args... args) const {
|
7413
7491
|
if (T_is_void) {
|
7414
7492
|
return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
|
7415
7493
|
} else {
|
@@ -7464,8 +7542,7 @@ public:
|
|
7464
7542
|
}
|
7465
7543
|
|
7466
7544
|
HALIDE_ALWAYS_INLINE
|
7467
|
-
const not_void_T &
|
7468
|
-
operator()() const {
|
7545
|
+
const not_void_T &operator()() const {
|
7469
7546
|
static_assert(!T_is_void,
|
7470
7547
|
"Cannot use operator() on Buffer<void> types");
|
7471
7548
|
constexpr int expected_dims = 0;
|
@@ -7485,9 +7562,7 @@ public:
|
|
7485
7562
|
|
7486
7563
|
template<typename... Args,
|
7487
7564
|
typename = typename std::enable_if<AllInts<Args...>::value>::type>
|
7488
|
-
HALIDE_ALWAYS_INLINE
|
7489
|
-
not_void_T &
|
7490
|
-
operator()(int first, Args... rest) {
|
7565
|
+
HALIDE_ALWAYS_INLINE not_void_T &operator()(int first, Args... rest) {
|
7491
7566
|
static_assert(!T_is_void,
|
7492
7567
|
"Cannot use operator() on Buffer<void> types");
|
7493
7568
|
constexpr int expected_dims = 1 + (int)(sizeof...(rest));
|
@@ -8181,7 +8256,7 @@ public:
|
|
8181
8256
|
|
8182
8257
|
template<typename... Args,
|
8183
8258
|
typename = typename std::enable_if<Internal::all_ints_and_optional_name<Args...>::value>::type>
|
8184
|
-
explicit Buffer(int first, Args
|
8259
|
+
explicit Buffer(int first, const Args &...rest)
|
8185
8260
|
: Buffer(Runtime::Buffer<T, Dims>(Internal::get_shape_from_start_of_parameter_pack(first, rest...)),
|
8186
8261
|
Internal::get_name_from_end_of_parameter_pack(rest...)) {
|
8187
8262
|
}
|
@@ -8408,6 +8483,7 @@ public:
|
|
8408
8483
|
HALIDE_BUFFER_FORWARD_CONST(contains)
|
8409
8484
|
HALIDE_BUFFER_FORWARD(crop)
|
8410
8485
|
HALIDE_BUFFER_FORWARD_INITIALIZER_LIST(crop, std::vector<std::pair<int, int>>)
|
8486
|
+
HALIDE_BUFFER_FORWARD_CONST(cropped)
|
8411
8487
|
HALIDE_BUFFER_FORWARD(slice)
|
8412
8488
|
HALIDE_BUFFER_FORWARD_CONST(sliced)
|
8413
8489
|
HALIDE_BUFFER_FORWARD(embed)
|
@@ -8415,6 +8491,7 @@ public:
|
|
8415
8491
|
HALIDE_BUFFER_FORWARD(set_min)
|
8416
8492
|
HALIDE_BUFFER_FORWARD(translate)
|
8417
8493
|
HALIDE_BUFFER_FORWARD_INITIALIZER_LIST(translate, std::vector<int>)
|
8494
|
+
HALIDE_BUFFER_FORWARD_CONST(translated)
|
8418
8495
|
HALIDE_BUFFER_FORWARD(transpose)
|
8419
8496
|
HALIDE_BUFFER_FORWARD_CONST(transposed)
|
8420
8497
|
HALIDE_BUFFER_FORWARD(add_dimension)
|
@@ -8935,6 +9012,12 @@ public:
|
|
8935
9012
|
|
8936
9013
|
void store_in(MemoryType memory_type);
|
8937
9014
|
MemoryType memory_type() const;
|
9015
|
+
|
9016
|
+
void trace_loads();
|
9017
|
+
bool is_tracing_loads() const;
|
9018
|
+
|
9019
|
+
void add_trace_tag(const std::string &trace_tag);
|
9020
|
+
std::vector<std::string> get_trace_tags() const;
|
8938
9021
|
};
|
8939
9022
|
|
8940
9023
|
namespace Internal {
|
@@ -10251,8 +10334,7 @@ struct Split {
|
|
10251
10334
|
|
10252
10335
|
enum SplitType { SplitVar = 0,
|
10253
10336
|
RenameVar,
|
10254
|
-
FuseVars
|
10255
|
-
PurifyRVar };
|
10337
|
+
FuseVars };
|
10256
10338
|
|
10257
10339
|
// If split_type is Rename, then this is just a renaming of the
|
10258
10340
|
// old_var to the outer and not a split. The inner var should
|
@@ -10260,10 +10342,6 @@ struct Split {
|
|
10260
10342
|
// the same list as splits so that ordering between them is
|
10261
10343
|
// respected.
|
10262
10344
|
|
10263
|
-
// If split type is Purify, this replaces the old_var RVar to
|
10264
|
-
// the outer Var. The inner var should be ignored, and factor
|
10265
|
-
// should be one.
|
10266
|
-
|
10267
10345
|
// If split_type is Fuse, then this does the opposite of a
|
10268
10346
|
// split, it joins the outer and inner into the old_var.
|
10269
10347
|
SplitType split_type;
|
@@ -10854,7 +10932,12 @@ class IRMutator;
|
|
10854
10932
|
|
10855
10933
|
/** A single named dimension of a reduction domain */
|
10856
10934
|
struct ReductionVariable {
|
10935
|
+
/**
|
10936
|
+
* A variable name for the reduction variable. This name must be a
|
10937
|
+
* valid Var name, i.e. it must not contain a <tt>.</tt> character.
|
10938
|
+
*/
|
10857
10939
|
std::string var;
|
10940
|
+
|
10858
10941
|
Expr min, extent;
|
10859
10942
|
|
10860
10943
|
/** This lets you use a ReductionVariable as a key in a map of the form
|
@@ -11680,7 +11763,7 @@ struct ExternFuncArgument {
|
|
11680
11763
|
}
|
11681
11764
|
|
11682
11765
|
template<typename T, int Dims>
|
11683
|
-
ExternFuncArgument(Buffer<T, Dims> b)
|
11766
|
+
ExternFuncArgument(const Buffer<T, Dims> &b)
|
11684
11767
|
: arg_type(BufferArg), buffer(b) {
|
11685
11768
|
}
|
11686
11769
|
ExternFuncArgument(Expr e)
|
@@ -12323,9 +12406,25 @@ struct Call : public ExprNode<Call> {
|
|
12323
12406
|
|
12324
12407
|
// Compute (arg[0] + arg[1]) / 2, assuming arg[0] < arg[1].
|
12325
12408
|
sorted_avg,
|
12326
|
-
|
12409
|
+
|
12410
|
+
// strict floating point ops. These are floating point ops that we would
|
12411
|
+
// like to optimize around (or let llvm optimize around) by treating
|
12412
|
+
// them as reals and ignoring the existence of nan and inf. Using these
|
12413
|
+
// intrinsics instead prevents any such optimizations.
|
12414
|
+
strict_add,
|
12415
|
+
strict_div,
|
12416
|
+
strict_eq,
|
12417
|
+
strict_le,
|
12418
|
+
strict_lt,
|
12419
|
+
strict_max,
|
12420
|
+
strict_min,
|
12421
|
+
strict_mul,
|
12422
|
+
strict_sub,
|
12423
|
+
|
12424
|
+
// Convert a list of Exprs to a string
|
12327
12425
|
stringify,
|
12328
12426
|
|
12427
|
+
// Query properties of the compiled-for target (resolved at compile-time)
|
12329
12428
|
target_arch_is,
|
12330
12429
|
target_bits,
|
12331
12430
|
target_has_feature,
|
@@ -12450,7 +12549,7 @@ struct Call : public ExprNode<Call> {
|
|
12450
12549
|
}
|
12451
12550
|
|
12452
12551
|
bool is_tag() const {
|
12453
|
-
return is_intrinsic({Call::likely, Call::likely_if_innermost
|
12552
|
+
return is_intrinsic({Call::likely, Call::likely_if_innermost});
|
12454
12553
|
}
|
12455
12554
|
|
12456
12555
|
/** Returns a pointer to a call node if the expression is a call to
|
@@ -12467,7 +12566,7 @@ struct Call : public ExprNode<Call> {
|
|
12467
12566
|
}
|
12468
12567
|
|
12469
12568
|
static const Call *as_tag(const Expr &e) {
|
12470
|
-
return as_intrinsic(e, {Call::likely, Call::likely_if_innermost
|
12569
|
+
return as_intrinsic(e, {Call::likely, Call::likely_if_innermost});
|
12471
12570
|
}
|
12472
12571
|
|
12473
12572
|
bool is_extern() const {
|
@@ -12476,6 +12575,19 @@ struct Call : public ExprNode<Call> {
|
|
12476
12575
|
call_type == PureExtern);
|
12477
12576
|
}
|
12478
12577
|
|
12578
|
+
bool is_strict_float_intrinsic() const {
|
12579
|
+
return is_intrinsic(
|
12580
|
+
{Call::strict_add,
|
12581
|
+
Call::strict_div,
|
12582
|
+
Call::strict_max,
|
12583
|
+
Call::strict_min,
|
12584
|
+
Call::strict_mul,
|
12585
|
+
Call::strict_sub,
|
12586
|
+
Call::strict_lt,
|
12587
|
+
Call::strict_le,
|
12588
|
+
Call::strict_eq});
|
12589
|
+
}
|
12590
|
+
|
12479
12591
|
static const IRNodeType _node_type = IRNodeType::Call;
|
12480
12592
|
};
|
12481
12593
|
|
@@ -12628,6 +12740,10 @@ struct Shuffle : public ExprNode<Shuffle> {
|
|
12628
12740
|
* arguments. */
|
12629
12741
|
bool is_extract_element() const;
|
12630
12742
|
|
12743
|
+
/** Returns the sequence of vector and lane indices that represent each
|
12744
|
+
* entry to be used for the shuffled vector */
|
12745
|
+
std::vector<std::pair<int, int>> vector_and_lane_indices() const;
|
12746
|
+
|
12631
12747
|
static const IRNodeType _node_type = IRNodeType::Shuffle;
|
12632
12748
|
};
|
12633
12749
|
|
@@ -13070,6 +13186,577 @@ inline Expr user_context_value() {
|
|
13070
13186
|
#include <map>
|
13071
13187
|
#include <optional>
|
13072
13188
|
|
13189
|
+
#ifndef HALIDE_CONSTANT_INTERVAL_H
|
13190
|
+
#define HALIDE_CONSTANT_INTERVAL_H
|
13191
|
+
|
13192
|
+
#include <stdint.h>
|
13193
|
+
|
13194
|
+
/** \file
|
13195
|
+
* Defines the ConstantInterval class, and operators on it.
|
13196
|
+
*/
|
13197
|
+
|
13198
|
+
namespace Halide {
|
13199
|
+
|
13200
|
+
struct Type;
|
13201
|
+
|
13202
|
+
namespace Internal {
|
13203
|
+
|
13204
|
+
/** A class to represent ranges of integers. Can be unbounded above or below,
|
13205
|
+
* but they cannot be empty. */
|
13206
|
+
struct ConstantInterval {
|
13207
|
+
/** The lower and upper bound of the interval. They are included
|
13208
|
+
* in the interval. */
|
13209
|
+
int64_t min = 0, max = 0;
|
13210
|
+
bool min_defined = false, max_defined = false;
|
13211
|
+
|
13212
|
+
/* A default-constructed Interval is everything */
|
13213
|
+
ConstantInterval() = default;
|
13214
|
+
|
13215
|
+
/** Construct an interval from a lower and upper bound. */
|
13216
|
+
ConstantInterval(int64_t min, int64_t max);
|
13217
|
+
|
13218
|
+
/** The interval representing everything. */
|
13219
|
+
static ConstantInterval everything();
|
13220
|
+
|
13221
|
+
/** Construct an interval representing a single point. */
|
13222
|
+
static ConstantInterval single_point(int64_t x);
|
13223
|
+
|
13224
|
+
/** Construct intervals bounded above or below. */
|
13225
|
+
static ConstantInterval bounded_below(int64_t min);
|
13226
|
+
static ConstantInterval bounded_above(int64_t max);
|
13227
|
+
|
13228
|
+
/** Is the interval the entire range */
|
13229
|
+
bool is_everything() const;
|
13230
|
+
|
13231
|
+
/** Is the interval just a single value (min == max) */
|
13232
|
+
bool is_single_point() const;
|
13233
|
+
|
13234
|
+
/** Is the interval a particular single value */
|
13235
|
+
bool is_single_point(int64_t x) const;
|
13236
|
+
|
13237
|
+
/** Does the interval have a finite upper and lower bound */
|
13238
|
+
bool is_bounded() const;
|
13239
|
+
|
13240
|
+
/** Expand the interval to include another Interval */
|
13241
|
+
void include(const ConstantInterval &i);
|
13242
|
+
|
13243
|
+
/** Expand the interval to include a point */
|
13244
|
+
void include(int64_t x);
|
13245
|
+
|
13246
|
+
/** Test if the interval contains a particular value */
|
13247
|
+
bool contains(int32_t x) const;
|
13248
|
+
|
13249
|
+
/** Test if the interval contains a particular value */
|
13250
|
+
bool contains(int64_t x) const;
|
13251
|
+
|
13252
|
+
/** Test if the interval contains a particular unsigned value */
|
13253
|
+
bool contains(uint64_t x) const;
|
13254
|
+
|
13255
|
+
/** Construct the smallest interval containing two intervals. */
|
13256
|
+
static ConstantInterval make_union(const ConstantInterval &a, const ConstantInterval &b);
|
13257
|
+
|
13258
|
+
/** Construct the largest interval contained within two intervals. Throws an
|
13259
|
+
* error if the interval is empty. */
|
13260
|
+
static ConstantInterval make_intersection(const ConstantInterval &a, const ConstantInterval &b);
|
13261
|
+
|
13262
|
+
/** Equivalent to same_as. Exists so that the autoscheduler can
|
13263
|
+
* compare two map<string, Interval> for equality in order to
|
13264
|
+
* cache computations. */
|
13265
|
+
bool operator==(const ConstantInterval &other) const;
|
13266
|
+
|
13267
|
+
/** In-place versions of the arithmetic operators below. */
|
13268
|
+
// @{
|
13269
|
+
void operator+=(const ConstantInterval &other);
|
13270
|
+
void operator+=(int64_t);
|
13271
|
+
void operator-=(const ConstantInterval &other);
|
13272
|
+
void operator-=(int64_t);
|
13273
|
+
void operator*=(const ConstantInterval &other);
|
13274
|
+
void operator*=(int64_t);
|
13275
|
+
void operator/=(const ConstantInterval &other);
|
13276
|
+
void operator/=(int64_t);
|
13277
|
+
void operator%=(const ConstantInterval &other);
|
13278
|
+
void operator%=(int64_t);
|
13279
|
+
// @}
|
13280
|
+
|
13281
|
+
/** Negate an interval. */
|
13282
|
+
ConstantInterval operator-() const;
|
13283
|
+
|
13284
|
+
/** Track what happens if a constant integer interval is forced to fit into
|
13285
|
+
* a concrete integer type. */
|
13286
|
+
void cast_to(const Type &t);
|
13287
|
+
|
13288
|
+
/** Get constant integer bounds on a type. */
|
13289
|
+
static ConstantInterval bounds_of_type(Type);
|
13290
|
+
};
|
13291
|
+
|
13292
|
+
/** Arithmetic operators on ConstantIntervals. The resulting interval contains
|
13293
|
+
* all possible values of the operator applied to any two elements of the
|
13294
|
+
* argument intervals. Note that these operator on unbounded integers. If you
|
13295
|
+
* are applying this to concrete small integer types, you will need to manually
|
13296
|
+
* cast the constant interval back to the desired type to model the effect of
|
13297
|
+
* overflow. */
|
13298
|
+
// @{
|
13299
|
+
ConstantInterval operator+(const ConstantInterval &a, const ConstantInterval &b);
|
13300
|
+
ConstantInterval operator+(const ConstantInterval &a, int64_t b);
|
13301
|
+
ConstantInterval operator-(const ConstantInterval &a, const ConstantInterval &b);
|
13302
|
+
ConstantInterval operator-(const ConstantInterval &a, int64_t b);
|
13303
|
+
ConstantInterval operator/(const ConstantInterval &a, const ConstantInterval &b);
|
13304
|
+
ConstantInterval operator/(const ConstantInterval &a, int64_t b);
|
13305
|
+
ConstantInterval operator*(const ConstantInterval &a, const ConstantInterval &b);
|
13306
|
+
ConstantInterval operator*(const ConstantInterval &a, int64_t b);
|
13307
|
+
ConstantInterval operator%(const ConstantInterval &a, const ConstantInterval &b);
|
13308
|
+
ConstantInterval operator%(const ConstantInterval &a, int64_t b);
|
13309
|
+
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b);
|
13310
|
+
ConstantInterval min(const ConstantInterval &a, int64_t b);
|
13311
|
+
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b);
|
13312
|
+
ConstantInterval max(const ConstantInterval &a, int64_t b);
|
13313
|
+
ConstantInterval abs(const ConstantInterval &a);
|
13314
|
+
ConstantInterval operator<<(const ConstantInterval &a, const ConstantInterval &b);
|
13315
|
+
ConstantInterval operator<<(const ConstantInterval &a, int64_t b);
|
13316
|
+
ConstantInterval operator<<(int64_t a, const ConstantInterval &b);
|
13317
|
+
ConstantInterval operator>>(const ConstantInterval &a, const ConstantInterval &b);
|
13318
|
+
ConstantInterval operator>>(const ConstantInterval &a, int64_t b);
|
13319
|
+
ConstantInterval operator>>(int64_t a, const ConstantInterval &b);
|
13320
|
+
// @}
|
13321
|
+
|
13322
|
+
/** Comparison operators on ConstantIntervals. Returns whether the comparison is
|
13323
|
+
* true for all values of the two intervals. */
|
13324
|
+
// @{
|
13325
|
+
bool operator<=(const ConstantInterval &a, const ConstantInterval &b);
|
13326
|
+
bool operator<=(const ConstantInterval &a, int64_t b);
|
13327
|
+
bool operator<=(int64_t a, const ConstantInterval &b);
|
13328
|
+
bool operator<(const ConstantInterval &a, const ConstantInterval &b);
|
13329
|
+
bool operator<(const ConstantInterval &a, int64_t b);
|
13330
|
+
bool operator<(int64_t a, const ConstantInterval &b);
|
13331
|
+
|
13332
|
+
inline bool operator>=(const ConstantInterval &a, const ConstantInterval &b) {
|
13333
|
+
return b <= a;
|
13334
|
+
}
|
13335
|
+
inline bool operator>(const ConstantInterval &a, const ConstantInterval &b) {
|
13336
|
+
return b < a;
|
13337
|
+
}
|
13338
|
+
inline bool operator>=(const ConstantInterval &a, int64_t b) {
|
13339
|
+
return b <= a;
|
13340
|
+
}
|
13341
|
+
inline bool operator>(const ConstantInterval &a, int64_t b) {
|
13342
|
+
return b < a;
|
13343
|
+
}
|
13344
|
+
inline bool operator>=(int64_t a, const ConstantInterval &b) {
|
13345
|
+
return b <= a;
|
13346
|
+
}
|
13347
|
+
inline bool operator>(int64_t a, const ConstantInterval &b) {
|
13348
|
+
return b < a;
|
13349
|
+
}
|
13350
|
+
|
13351
|
+
// @}
|
13352
|
+
} // namespace Internal
|
13353
|
+
|
13354
|
+
/** Cast operators for ConstantIntervals. These ones have to live out in
|
13355
|
+
* Halide::, to avoid C++ name lookup confusion with the Halide::cast variants
|
13356
|
+
* that take Exprs. */
|
13357
|
+
// @{
|
13358
|
+
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a);
|
13359
|
+
Internal::ConstantInterval saturating_cast(Type t, const Internal::ConstantInterval &a);
|
13360
|
+
// @}
|
13361
|
+
|
13362
|
+
} // namespace Halide
|
13363
|
+
|
13364
|
+
#endif
|
13365
|
+
#ifndef HALIDE_SCOPE_H
|
13366
|
+
#define HALIDE_SCOPE_H
|
13367
|
+
|
13368
|
+
#include <iostream>
|
13369
|
+
#include <map>
|
13370
|
+
#include <stack>
|
13371
|
+
#include <string>
|
13372
|
+
#include <utility>
|
13373
|
+
#include <vector>
|
13374
|
+
|
13375
|
+
|
13376
|
+
/** \file
|
13377
|
+
* Defines the Scope class, which is used for keeping track of names in a scope while traversing IR
|
13378
|
+
*/
|
13379
|
+
|
13380
|
+
namespace Halide {
|
13381
|
+
namespace Internal {
|
13382
|
+
|
13383
|
+
/** A stack which can store one item very efficiently. Using this
|
13384
|
+
* instead of std::stack speeds up Scope substantially. */
|
13385
|
+
template<typename T>
|
13386
|
+
class SmallStack {
|
13387
|
+
private:
|
13388
|
+
T _top;
|
13389
|
+
std::vector<T> _rest;
|
13390
|
+
bool _empty = true;
|
13391
|
+
|
13392
|
+
public:
|
13393
|
+
SmallStack() = default;
|
13394
|
+
|
13395
|
+
void pop() {
|
13396
|
+
if (_rest.empty()) {
|
13397
|
+
_empty = true;
|
13398
|
+
_top = T();
|
13399
|
+
} else {
|
13400
|
+
_top = std::move(_rest.back());
|
13401
|
+
_rest.pop_back();
|
13402
|
+
}
|
13403
|
+
}
|
13404
|
+
|
13405
|
+
void push(T t) {
|
13406
|
+
if (!_empty) {
|
13407
|
+
_rest.push_back(std::move(_top));
|
13408
|
+
}
|
13409
|
+
_top = std::move(t);
|
13410
|
+
_empty = false;
|
13411
|
+
}
|
13412
|
+
|
13413
|
+
T top() const {
|
13414
|
+
return _top;
|
13415
|
+
}
|
13416
|
+
|
13417
|
+
T &top_ref() {
|
13418
|
+
return _top;
|
13419
|
+
}
|
13420
|
+
|
13421
|
+
const T &top_ref() const {
|
13422
|
+
return _top;
|
13423
|
+
}
|
13424
|
+
|
13425
|
+
bool empty() const {
|
13426
|
+
return _empty;
|
13427
|
+
}
|
13428
|
+
|
13429
|
+
size_t size() const {
|
13430
|
+
return _empty ? 0 : (_rest.size() + 1);
|
13431
|
+
}
|
13432
|
+
};
|
13433
|
+
|
13434
|
+
template<>
|
13435
|
+
class SmallStack<void> {
|
13436
|
+
// A stack of voids. Voids are all the same, so just record how many voids are in the stack
|
13437
|
+
int counter = 0;
|
13438
|
+
|
13439
|
+
public:
|
13440
|
+
void pop() {
|
13441
|
+
counter--;
|
13442
|
+
}
|
13443
|
+
void push() {
|
13444
|
+
counter++;
|
13445
|
+
}
|
13446
|
+
bool empty() const {
|
13447
|
+
return counter == 0;
|
13448
|
+
}
|
13449
|
+
};
|
13450
|
+
|
13451
|
+
/** A common pattern when traversing Halide IR is that you need to
|
13452
|
+
* keep track of stuff when you find a Let or a LetStmt, and that it
|
13453
|
+
* should hide previous values with the same name until you leave the
|
13454
|
+
* Let or LetStmt nodes This class helps with that. */
|
13455
|
+
template<typename T = void>
|
13456
|
+
class Scope {
|
13457
|
+
private:
|
13458
|
+
std::map<std::string, SmallStack<T>> table;
|
13459
|
+
|
13460
|
+
const Scope<T> *containing_scope = nullptr;
|
13461
|
+
|
13462
|
+
public:
|
13463
|
+
Scope() = default;
|
13464
|
+
Scope(Scope &&that) noexcept = default;
|
13465
|
+
Scope &operator=(Scope &&that) noexcept = default;
|
13466
|
+
|
13467
|
+
// Copying a scope object copies a large table full of strings and
|
13468
|
+
// stacks. Bad idea.
|
13469
|
+
Scope(const Scope<T> &) = delete;
|
13470
|
+
Scope<T> &operator=(const Scope<T> &) = delete;
|
13471
|
+
|
13472
|
+
/** Set the parent scope. If lookups fail in this scope, they
|
13473
|
+
* check the containing scope before returning an error. Caller is
|
13474
|
+
* responsible for managing the memory of the containing scope. */
|
13475
|
+
void set_containing_scope(const Scope<T> *s) {
|
13476
|
+
containing_scope = s;
|
13477
|
+
}
|
13478
|
+
|
13479
|
+
/** A const ref to an empty scope. Useful for default function
|
13480
|
+
* arguments, which would otherwise require a copy constructor
|
13481
|
+
* (with llvm in c++98 mode) */
|
13482
|
+
static const Scope<T> &empty_scope() {
|
13483
|
+
static Scope<T> _empty_scope;
|
13484
|
+
return _empty_scope;
|
13485
|
+
}
|
13486
|
+
|
13487
|
+
/** Retrieve the value referred to by a name */
|
13488
|
+
template<typename T2 = T,
|
13489
|
+
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
13490
|
+
T2 get(const std::string &name) const {
|
13491
|
+
typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
|
13492
|
+
if (iter == table.end() || iter->second.empty()) {
|
13493
|
+
if (containing_scope) {
|
13494
|
+
return containing_scope->get(name);
|
13495
|
+
} else {
|
13496
|
+
internal_error << "Name not in Scope: " << name << "\n"
|
13497
|
+
<< *this << "\n";
|
13498
|
+
}
|
13499
|
+
}
|
13500
|
+
return iter->second.top();
|
13501
|
+
}
|
13502
|
+
|
13503
|
+
/** Return a reference to an entry. Does not consider the containing scope. */
|
13504
|
+
template<typename T2 = T,
|
13505
|
+
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
13506
|
+
T2 &ref(const std::string &name) {
|
13507
|
+
typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
|
13508
|
+
if (iter == table.end() || iter->second.empty()) {
|
13509
|
+
internal_error << "Name not in Scope: " << name << "\n"
|
13510
|
+
<< *this << "\n";
|
13511
|
+
}
|
13512
|
+
return iter->second.top_ref();
|
13513
|
+
}
|
13514
|
+
|
13515
|
+
/** Returns a const pointer to an entry if it exists in this scope or any
|
13516
|
+
* containing scope, or nullptr if it does not. Use this instead of if
|
13517
|
+
* (scope.contains(foo)) { ... scope.get(foo) ... } to avoid doing two
|
13518
|
+
* lookups. */
|
13519
|
+
template<typename T2 = T,
|
13520
|
+
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
13521
|
+
const T2 *find(const std::string &name) const {
|
13522
|
+
typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
|
13523
|
+
if (iter == table.end() || iter->second.empty()) {
|
13524
|
+
if (containing_scope) {
|
13525
|
+
return containing_scope->find(name);
|
13526
|
+
} else {
|
13527
|
+
return nullptr;
|
13528
|
+
}
|
13529
|
+
}
|
13530
|
+
return &(iter->second.top_ref());
|
13531
|
+
}
|
13532
|
+
|
13533
|
+
/** A version of find that returns a non-const pointer, but ignores
|
13534
|
+
* containing scope. */
|
13535
|
+
template<typename T2 = T,
|
13536
|
+
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
13537
|
+
T2 *shallow_find(const std::string &name) {
|
13538
|
+
typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
|
13539
|
+
if (iter == table.end() || iter->second.empty()) {
|
13540
|
+
return nullptr;
|
13541
|
+
} else {
|
13542
|
+
return &(iter->second.top_ref());
|
13543
|
+
}
|
13544
|
+
}
|
13545
|
+
|
13546
|
+
/** Tests if a name is in scope. If you plan to use the value if it is, call
|
13547
|
+
* find instead. */
|
13548
|
+
bool contains(const std::string &name) const {
|
13549
|
+
typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
|
13550
|
+
if (iter == table.end() || iter->second.empty()) {
|
13551
|
+
if (containing_scope) {
|
13552
|
+
return containing_scope->contains(name);
|
13553
|
+
} else {
|
13554
|
+
return false;
|
13555
|
+
}
|
13556
|
+
}
|
13557
|
+
return true;
|
13558
|
+
}
|
13559
|
+
|
13560
|
+
/** How many nested definitions of a single name exist? */
|
13561
|
+
size_t count(const std::string &name) const {
|
13562
|
+
auto it = table.find(name);
|
13563
|
+
if (it == table.end()) {
|
13564
|
+
return 0;
|
13565
|
+
} else {
|
13566
|
+
return it->second.size();
|
13567
|
+
}
|
13568
|
+
}
|
13569
|
+
|
13570
|
+
/** How many distinct names exist (does not count nested definitions of the same name) */
|
13571
|
+
size_t size() const {
|
13572
|
+
return table.size();
|
13573
|
+
}
|
13574
|
+
|
13575
|
+
struct PushToken {
|
13576
|
+
typename std::map<std::string, SmallStack<T>>::iterator iter;
|
13577
|
+
};
|
13578
|
+
|
13579
|
+
/** Add a new (name, value) pair to the current scope. Hide old values that
|
13580
|
+
* have this name until we pop this name. Returns a token that can be used
|
13581
|
+
* to pop the same value without doing a fresh lookup.
|
13582
|
+
*/
|
13583
|
+
template<typename T2 = T,
|
13584
|
+
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
13585
|
+
PushToken push(const std::string &name, T2 &&value) {
|
13586
|
+
auto it = table.try_emplace(name).first;
|
13587
|
+
it->second.push(std::forward<T2>(value));
|
13588
|
+
return PushToken{it};
|
13589
|
+
}
|
13590
|
+
|
13591
|
+
template<typename T2 = T,
|
13592
|
+
typename = typename std::enable_if<std::is_same<T2, void>::value>::type>
|
13593
|
+
PushToken push(const std::string &name) {
|
13594
|
+
auto it = table.try_emplace(name).first;
|
13595
|
+
it->second.push();
|
13596
|
+
return PushToken{it};
|
13597
|
+
}
|
13598
|
+
|
13599
|
+
/** A name goes out of scope. Restore whatever its old value
|
13600
|
+
* was (or remove it entirely if there was nothing else of the
|
13601
|
+
* same name in an outer scope) */
|
13602
|
+
void pop(const std::string &name) {
|
13603
|
+
typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
|
13604
|
+
internal_assert(iter != table.end()) << "Name not in Scope: " << name << "\n"
|
13605
|
+
<< *this << "\n";
|
13606
|
+
iter->second.pop();
|
13607
|
+
if (iter->second.empty()) {
|
13608
|
+
table.erase(iter);
|
13609
|
+
}
|
13610
|
+
}
|
13611
|
+
|
13612
|
+
/** Pop a name using a token returned by push instead of a string. */
|
13613
|
+
void pop(PushToken p) {
|
13614
|
+
p.iter->second.pop();
|
13615
|
+
if (p.iter->second.empty()) {
|
13616
|
+
table.erase(p.iter);
|
13617
|
+
}
|
13618
|
+
}
|
13619
|
+
|
13620
|
+
/** Iterate through the scope. Does not capture any containing scope. */
|
13621
|
+
class const_iterator {
|
13622
|
+
typename std::map<std::string, SmallStack<T>>::const_iterator iter;
|
13623
|
+
|
13624
|
+
public:
|
13625
|
+
explicit const_iterator(const typename std::map<std::string, SmallStack<T>>::const_iterator &i)
|
13626
|
+
: iter(i) {
|
13627
|
+
}
|
13628
|
+
|
13629
|
+
const_iterator() = default;
|
13630
|
+
|
13631
|
+
bool operator!=(const const_iterator &other) {
|
13632
|
+
return iter != other.iter;
|
13633
|
+
}
|
13634
|
+
|
13635
|
+
void operator++() {
|
13636
|
+
++iter;
|
13637
|
+
}
|
13638
|
+
|
13639
|
+
const std::string &name() {
|
13640
|
+
return iter->first;
|
13641
|
+
}
|
13642
|
+
|
13643
|
+
const SmallStack<T> &stack() {
|
13644
|
+
return iter->second;
|
13645
|
+
}
|
13646
|
+
|
13647
|
+
template<typename T2 = T,
|
13648
|
+
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
13649
|
+
const T2 &value() {
|
13650
|
+
return iter->second.top_ref();
|
13651
|
+
}
|
13652
|
+
};
|
13653
|
+
|
13654
|
+
const_iterator cbegin() const {
|
13655
|
+
return const_iterator(table.begin());
|
13656
|
+
}
|
13657
|
+
|
13658
|
+
const_iterator cend() const {
|
13659
|
+
return const_iterator(table.end());
|
13660
|
+
}
|
13661
|
+
|
13662
|
+
void swap(Scope<T> &other) noexcept {
|
13663
|
+
table.swap(other.table);
|
13664
|
+
std::swap(containing_scope, other.containing_scope);
|
13665
|
+
}
|
13666
|
+
};
|
13667
|
+
|
13668
|
+
template<typename T>
|
13669
|
+
std::ostream &operator<<(std::ostream &stream, const Scope<T> &s) {
|
13670
|
+
stream << "{\n";
|
13671
|
+
typename Scope<T>::const_iterator iter;
|
13672
|
+
for (iter = s.cbegin(); iter != s.cend(); ++iter) {
|
13673
|
+
stream << " " << iter.name() << "\n";
|
13674
|
+
}
|
13675
|
+
stream << "}";
|
13676
|
+
return stream;
|
13677
|
+
}
|
13678
|
+
|
13679
|
+
/** Helper class for pushing/popping Scope<> values, to allow
|
13680
|
+
* for early-exit in Visitor/Mutators that preserves correctness.
|
13681
|
+
* Note that this name can be a bit confusing, since there are two "scopes"
|
13682
|
+
* involved here:
|
13683
|
+
* - the Scope object itself
|
13684
|
+
* - the lifetime of this helper object
|
13685
|
+
* The "Scoped" in this class name refers to the latter, as it temporarily binds
|
13686
|
+
* a name within the scope of this helper's lifetime. */
|
13687
|
+
template<typename T = void>
|
13688
|
+
struct ScopedBinding {
|
13689
|
+
Scope<T> *scope = nullptr;
|
13690
|
+
typename Scope<T>::PushToken token;
|
13691
|
+
|
13692
|
+
ScopedBinding() = default;
|
13693
|
+
|
13694
|
+
ScopedBinding(Scope<T> &s, const std::string &n, T value)
|
13695
|
+
: scope(&s), token(scope->push(n, std::move(value))) {
|
13696
|
+
}
|
13697
|
+
|
13698
|
+
ScopedBinding(bool condition, Scope<T> &s, const std::string &n, const T &value)
|
13699
|
+
: scope(condition ? &s : nullptr),
|
13700
|
+
token(condition ? scope->push(n, value) : typename Scope<T>::PushToken{}) {
|
13701
|
+
}
|
13702
|
+
|
13703
|
+
bool bound() const {
|
13704
|
+
return scope != nullptr;
|
13705
|
+
}
|
13706
|
+
|
13707
|
+
~ScopedBinding() {
|
13708
|
+
if (scope) {
|
13709
|
+
scope->pop(token);
|
13710
|
+
}
|
13711
|
+
}
|
13712
|
+
|
13713
|
+
// allow move but not copy
|
13714
|
+
ScopedBinding(const ScopedBinding &that) = delete;
|
13715
|
+
ScopedBinding(ScopedBinding &&that) noexcept
|
13716
|
+
: scope(that.scope),
|
13717
|
+
token(that.token) {
|
13718
|
+
// The move constructor must null out scope, so we don't try to pop it
|
13719
|
+
that.scope = nullptr;
|
13720
|
+
}
|
13721
|
+
|
13722
|
+
void operator=(const ScopedBinding &that) = delete;
|
13723
|
+
void operator=(ScopedBinding &&that) = delete;
|
13724
|
+
};
|
13725
|
+
|
13726
|
+
template<>
|
13727
|
+
struct ScopedBinding<void> {
|
13728
|
+
Scope<> *scope;
|
13729
|
+
Scope<>::PushToken token;
|
13730
|
+
ScopedBinding(Scope<> &s, const std::string &n)
|
13731
|
+
: scope(&s), token(scope->push(n)) {
|
13732
|
+
}
|
13733
|
+
ScopedBinding(bool condition, Scope<> &s, const std::string &n)
|
13734
|
+
: scope(condition ? &s : nullptr),
|
13735
|
+
token(condition ? scope->push(n) : Scope<>::PushToken{}) {
|
13736
|
+
}
|
13737
|
+
~ScopedBinding() {
|
13738
|
+
if (scope) {
|
13739
|
+
scope->pop(token);
|
13740
|
+
}
|
13741
|
+
}
|
13742
|
+
|
13743
|
+
// allow move but not copy
|
13744
|
+
ScopedBinding(const ScopedBinding &that) = delete;
|
13745
|
+
ScopedBinding(ScopedBinding &&that) noexcept
|
13746
|
+
: scope(that.scope),
|
13747
|
+
token(that.token) {
|
13748
|
+
// The move constructor must null out scope, so we don't try to pop it
|
13749
|
+
that.scope = nullptr;
|
13750
|
+
}
|
13751
|
+
|
13752
|
+
void operator=(const ScopedBinding &that) = delete;
|
13753
|
+
void operator=(ScopedBinding &&that) = delete;
|
13754
|
+
};
|
13755
|
+
|
13756
|
+
} // namespace Internal
|
13757
|
+
} // namespace Halide
|
13758
|
+
|
13759
|
+
#endif
|
13073
13760
|
#ifndef HALIDE_TUPLE_H
|
13074
13761
|
#define HALIDE_TUPLE_H
|
13075
13762
|
|
@@ -13275,13 +13962,16 @@ Expr const_false(int lanes = 1);
|
|
13275
13962
|
/** Attempt to cast an expression to a smaller type while provably not losing
|
13276
13963
|
* information. If it can't be done, return an undefined Expr.
|
13277
13964
|
*
|
13278
|
-
* Optionally accepts a
|
13279
|
-
*
|
13280
|
-
*
|
13281
|
-
* take on a different value. For
|
13282
|
-
* (let x = 4 in some_expr_object) + (let x = 5 in
|
13283
|
-
* It is safe to use it after uniquify_variable_names
|
13284
|
-
|
13965
|
+
* Optionally accepts a scope giving the constant bounds of any variables, and a
|
13966
|
+
* map that gives the constant bounds of exprs already analyzed to avoid redoing
|
13967
|
+
* work across many calls to lossless_cast. It is not safe to use this optional
|
13968
|
+
* map in contexts where the same Expr object may take on a different value. For
|
13969
|
+
* example: (let x = 4 in some_expr_object) + (let x = 5 in
|
13970
|
+
* the_same_expr_object)). It is safe to use it after uniquify_variable_names
|
13971
|
+
* has been run. */
|
13972
|
+
Expr lossless_cast(Type t, Expr e,
|
13973
|
+
const Scope<ConstantInterval> &scope = Scope<ConstantInterval>::empty_scope(),
|
13974
|
+
std::map<Expr, ConstantInterval, ExprCompare> *cache = nullptr);
|
13285
13975
|
|
13286
13976
|
/** Attempt to negate x without introducing new IR and without overflow.
|
13287
13977
|
* If it can't be done, return an undefined Expr. */
|
@@ -14095,8 +14785,9 @@ Expr pow(Expr x, Expr y);
|
|
14095
14785
|
* mantissa. Vectorizes cleanly. */
|
14096
14786
|
Expr erf(const Expr &x);
|
14097
14787
|
|
14098
|
-
/** Fast vectorizable approximation to some trigonometric functions for
|
14099
|
-
* Absolute approximation error is less than 1e-5.
|
14788
|
+
/** Fast vectorizable approximation to some trigonometric functions for
|
14789
|
+
* Float(32). Absolute approximation error is less than 1e-5. Slow on x86 if
|
14790
|
+
* you don't have at least sse 4.1. */
|
14100
14791
|
// @{
|
14101
14792
|
Expr fast_sin(const Expr &x);
|
14102
14793
|
Expr fast_cos(const Expr &x);
|
@@ -14104,19 +14795,22 @@ Expr fast_cos(const Expr &x);
|
|
14104
14795
|
|
14105
14796
|
/** Fast approximate cleanly vectorizable log for Float(32). Returns
|
14106
14797
|
* nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
|
14107
|
-
* mantissa. Vectorizes cleanly.
|
14798
|
+
* mantissa. Vectorizes cleanly. Slow on x86 if you don't
|
14799
|
+
* have at least sse 4.1. */
|
14108
14800
|
Expr fast_log(const Expr &x);
|
14109
14801
|
|
14110
14802
|
/** Fast approximate cleanly vectorizable exp for Float(32). Returns
|
14111
14803
|
* nonsense for inputs that would overflow or underflow. Typically
|
14112
14804
|
* accurate up to the last 5 bits of the mantissa. Gets worse when
|
14113
|
-
* approaching overflow. Vectorizes cleanly.
|
14805
|
+
* approaching overflow. Vectorizes cleanly. Slow on x86 if you don't
|
14806
|
+
* have at least sse 4.1. */
|
14114
14807
|
Expr fast_exp(const Expr &x);
|
14115
14808
|
|
14116
14809
|
/** Fast approximate cleanly vectorizable pow for Float(32). Returns
|
14117
14810
|
* nonsense for x < 0.0f. Accurate up to the last 5 bits of the
|
14118
14811
|
* mantissa for typical exponents. Gets worse when approaching
|
14119
|
-
* overflow. Vectorizes cleanly.
|
14812
|
+
* overflow. Vectorizes cleanly. Slow on x86 if you don't
|
14813
|
+
* have at least sse 4.1. */
|
14120
14814
|
Expr fast_pow(Expr x, Expr y);
|
14121
14815
|
|
14122
14816
|
/** Fast approximate inverse for Float(32). Corresponds to the rcpps
|
@@ -14559,7 +15253,7 @@ Expr saturating_cast(Type t, Expr e);
|
|
14559
15253
|
* all backends. (E.g. it is difficult to do this for C++ code
|
14560
15254
|
* generation as it depends on the compiler flags used to compile the
|
14561
15255
|
* generated code. */
|
14562
|
-
Expr strict_float(Expr e);
|
15256
|
+
Expr strict_float(const Expr &e);
|
14563
15257
|
|
14564
15258
|
/** Create an Expr that that promises another Expr is clamped but do
|
14565
15259
|
* not generate code to check the assertion or modify the value. No
|
@@ -14671,7 +15365,7 @@ f(scatter(3, 5)) = f(select(p, gather(5, 3), gather(3, 5)));
|
|
14671
15365
|
f(select(p, scatter(3, 5, 5), scatter(1, 2, 3))) = f(select(p, gather(5, 3, 3), gather(2, 3, 1)));
|
14672
15366
|
\endcode
|
14673
15367
|
*
|
14674
|
-
* Note that in the p == true case, we
|
15368
|
+
* Note that in the p == true case, we redundantly load from 3 and write
|
14675
15369
|
* to 5 twice.
|
14676
15370
|
*/
|
14677
15371
|
//@{
|
@@ -14952,7 +15646,7 @@ struct PipelineContents;
|
|
14952
15646
|
*
|
14953
15647
|
* The 'name' field specifies the type of Autoscheduler
|
14954
15648
|
* to be used (e.g. Adams2019, Mullapudi2016). If this is an empty string,
|
14955
|
-
* no autoscheduling will be done; if not, it
|
15649
|
+
* no autoscheduling will be done; if not, it must be the name of a known Autoscheduler.
|
14956
15650
|
*
|
14957
15651
|
* At this time, well-known autoschedulers include:
|
14958
15652
|
* "Mullapudi2016" -- heuristics-based; the first working autoscheduler; currently built in to libHalide
|
@@ -15743,7 +16437,7 @@ public:
|
|
15743
16437
|
}
|
15744
16438
|
|
15745
16439
|
template<typename... Args>
|
15746
|
-
HALIDE_NO_USER_CODE_INLINE RDom(Expr min, Expr extent, Args &&...args) {
|
16440
|
+
HALIDE_NO_USER_CODE_INLINE RDom(const Expr &min, const Expr &extent, Args &&...args) {
|
15747
16441
|
// This should really just be a delegating constructor, but I couldn't make
|
15748
16442
|
// that work with variadic template unpacking in visual studio 2013
|
15749
16443
|
Region region;
|
@@ -15895,12 +16589,14 @@ class Var {
|
|
15895
16589
|
/* The expression representing the Var. Guaranteed to be an
|
15896
16590
|
* Internal::Variable of type Int(32). Created once on
|
15897
16591
|
* construction of the Var to avoid making a fresh Expr every time
|
15898
|
-
* the Var is used in a context in which
|
16592
|
+
* the Var is used in a context in which it will be converted to
|
15899
16593
|
* one. */
|
15900
16594
|
Expr e;
|
15901
16595
|
|
15902
16596
|
public:
|
15903
|
-
/** Construct a Var with the given name
|
16597
|
+
/** Construct a Var with the given name. Unlike Funcs, this will be treated
|
16598
|
+
* as the same Var as another other Var with the same name, including
|
16599
|
+
* implicit Vars. */
|
15904
16600
|
Var(const std::string &n);
|
15905
16601
|
|
15906
16602
|
/** Construct a Var with an automatically-generated unique name. */
|
@@ -15995,9 +16691,6 @@ public:
|
|
15995
16691
|
static Var implicit(int n);
|
15996
16692
|
|
15997
16693
|
/** Return whether a variable name is of the form for an implicit argument.
|
15998
|
-
* TODO: This is almost guaranteed to incorrectly fire on user
|
15999
|
-
* declared variables at some point. We should likely prevent
|
16000
|
-
* user Var declarations from making names of this form.
|
16001
16694
|
*/
|
16002
16695
|
//{
|
16003
16696
|
static bool is_implicit(const std::string &name);
|
@@ -16130,6 +16823,7 @@ struct VarOrRVar {
|
|
16130
16823
|
class ImageParam;
|
16131
16824
|
|
16132
16825
|
namespace Internal {
|
16826
|
+
struct AssociativeOp;
|
16133
16827
|
class Function;
|
16134
16828
|
struct Split;
|
16135
16829
|
struct StorageDim;
|
@@ -16151,7 +16845,6 @@ class Stage {
|
|
16151
16845
|
void split(const std::string &old, const std::string &outer, const std::string &inner,
|
16152
16846
|
const Expr &factor, bool exact, TailStrategy tail);
|
16153
16847
|
void remove(const std::string &var);
|
16154
|
-
Stage &purify(const VarOrRVar &old_name, const VarOrRVar &new_name);
|
16155
16848
|
|
16156
16849
|
const std::vector<Internal::StorageDim> &storage_dims() const {
|
16157
16850
|
return function.schedule().storage_dims();
|
@@ -16159,6 +16852,9 @@ class Stage {
|
|
16159
16852
|
|
16160
16853
|
Stage &compute_with(LoopLevel loop_level, const std::map<std::string, LoopAlignStrategy> &align);
|
16161
16854
|
|
16855
|
+
std::pair<std::vector<Internal::Split>, std::vector<Internal::Split>>
|
16856
|
+
rfactor_validate_args(const std::vector<std::pair<RVar, Var>> &preserved, const Internal::AssociativeOp &prover_result);
|
16857
|
+
|
16162
16858
|
public:
|
16163
16859
|
Stage(Internal::Function f, Internal::Definition d, size_t stage_index)
|
16164
16860
|
: function(std::move(f)), definition(std::move(d)), stage_index(stage_index) {
|
@@ -16254,7 +16950,7 @@ public:
|
|
16254
16950
|
*
|
16255
16951
|
*/
|
16256
16952
|
// @{
|
16257
|
-
Func rfactor(std::vector<std::pair<RVar, Var>> preserved);
|
16953
|
+
Func rfactor(const std::vector<std::pair<RVar, Var>> &preserved);
|
16258
16954
|
Func rfactor(const RVar &r, const Var &v);
|
16259
16955
|
// @}
|
16260
16956
|
|
@@ -16575,7 +17271,7 @@ class FuncRef {
|
|
16575
17271
|
* already have a pure definition, init_val will be used as RHS in
|
16576
17272
|
* the initial function definition. */
|
16577
17273
|
template<typename BinaryOp>
|
16578
|
-
Stage func_ref_update(Expr e, int init_val);
|
17274
|
+
Stage func_ref_update(const Expr &e, int init_val);
|
16579
17275
|
|
16580
17276
|
public:
|
16581
17277
|
FuncRef(const Internal::Function &, const std::vector<Expr> &,
|
@@ -16598,7 +17294,7 @@ public:
|
|
16598
17294
|
* pure definition, this sets it to zero.
|
16599
17295
|
*/
|
16600
17296
|
// @{
|
16601
|
-
Stage operator+=(Expr);
|
17297
|
+
Stage operator+=(const Expr &);
|
16602
17298
|
Stage operator+=(const Tuple &);
|
16603
17299
|
Stage operator+=(const FuncRef &);
|
16604
17300
|
// @}
|
@@ -16609,7 +17305,7 @@ public:
|
|
16609
17305
|
* not already have a pure definition, this sets it to zero.
|
16610
17306
|
*/
|
16611
17307
|
// @{
|
16612
|
-
Stage operator-=(Expr);
|
17308
|
+
Stage operator-=(const Expr &);
|
16613
17309
|
Stage operator-=(const Tuple &);
|
16614
17310
|
Stage operator-=(const FuncRef &);
|
16615
17311
|
// @}
|
@@ -16620,7 +17316,7 @@ public:
|
|
16620
17316
|
* definition, this sets it to 1.
|
16621
17317
|
*/
|
16622
17318
|
// @{
|
16623
|
-
Stage operator*=(Expr);
|
17319
|
+
Stage operator*=(const Expr &);
|
16624
17320
|
Stage operator*=(const Tuple &);
|
16625
17321
|
Stage operator*=(const FuncRef &);
|
16626
17322
|
// @}
|
@@ -16631,7 +17327,7 @@ public:
|
|
16631
17327
|
* function does not already have a pure definition, this sets it to 1.
|
16632
17328
|
*/
|
16633
17329
|
// @{
|
16634
|
-
Stage operator/=(Expr);
|
17330
|
+
Stage operator/=(const Expr &);
|
16635
17331
|
Stage operator/=(const Tuple &);
|
16636
17332
|
Stage operator/=(const FuncRef &);
|
16637
17333
|
// @}
|
@@ -16654,6 +17350,9 @@ public:
|
|
16654
17350
|
/** How many outputs does the function this refers to produce. */
|
16655
17351
|
size_t size() const;
|
16656
17352
|
|
17353
|
+
/** Is this FuncRef syntactically equivalent to another one? */
|
17354
|
+
bool equivalent_to(const FuncRef &other) const;
|
17355
|
+
|
16657
17356
|
/** What function is this calling? */
|
16658
17357
|
Internal::Function function() const {
|
16659
17358
|
return func;
|
@@ -16820,7 +17519,7 @@ public:
|
|
16820
17519
|
* not contain free variables). */
|
16821
17520
|
explicit Func(const Expr &e);
|
16822
17521
|
|
16823
|
-
/** Construct a new Func to wrap an existing, already-
|
17522
|
+
/** Construct a new Func to wrap an existing, already-defined
|
16824
17523
|
* Function object. */
|
16825
17524
|
explicit Func(Internal::Function f);
|
16826
17525
|
|
@@ -17231,14 +17930,6 @@ public:
|
|
17231
17930
|
device_api);
|
17232
17931
|
}
|
17233
17932
|
|
17234
|
-
void define_extern(const std::string &function_name,
|
17235
|
-
const std::vector<ExternFuncArgument> ¶ms,
|
17236
|
-
const std::vector<Type> &types, int dimensionality,
|
17237
|
-
NameMangling mangling) {
|
17238
|
-
define_extern(function_name, params, types,
|
17239
|
-
Internal::make_argument_list(dimensionality), mangling);
|
17240
|
-
}
|
17241
|
-
|
17242
17933
|
void define_extern(const std::string &function_name,
|
17243
17934
|
const std::vector<ExternFuncArgument> ¶ms,
|
17244
17935
|
const std::vector<Type> &types, int dimensionality,
|
@@ -18331,6 +19022,11 @@ public:
|
|
18331
19022
|
* to remove memoized entries using this eviction key from the
|
18332
19023
|
* cache. Memoized computations that do not provide an eviction
|
18333
19024
|
* key will never be evicted by this mechanism.
|
19025
|
+
*
|
19026
|
+
* It is invalid to memoize the output of a Pipeline; attempting
|
19027
|
+
* to do so will issue an error. To cache an entire pipeline,
|
19028
|
+
* either implement a caching mechanism outside of Halide or
|
19029
|
+
* explicitly copy out of the cache with another output Func.
|
18334
19030
|
*/
|
18335
19031
|
Func &memoize(const EvictionKey &eviction_key = EvictionKey());
|
18336
19032
|
|
@@ -19195,401 +19891,6 @@ private:
|
|
19195
19891
|
} // namespace Internal
|
19196
19892
|
} // namespace Halide
|
19197
19893
|
|
19198
|
-
#endif
|
19199
|
-
#ifndef HALIDE_SCOPE_H
|
19200
|
-
#define HALIDE_SCOPE_H
|
19201
|
-
|
19202
|
-
#include <iostream>
|
19203
|
-
#include <map>
|
19204
|
-
#include <stack>
|
19205
|
-
#include <string>
|
19206
|
-
#include <utility>
|
19207
|
-
#include <vector>
|
19208
|
-
|
19209
|
-
|
19210
|
-
/** \file
|
19211
|
-
* Defines the Scope class, which is used for keeping track of names in a scope while traversing IR
|
19212
|
-
*/
|
19213
|
-
|
19214
|
-
namespace Halide {
|
19215
|
-
namespace Internal {
|
19216
|
-
|
19217
|
-
/** A stack which can store one item very efficiently. Using this
|
19218
|
-
* instead of std::stack speeds up Scope substantially. */
|
19219
|
-
template<typename T>
|
19220
|
-
class SmallStack {
|
19221
|
-
private:
|
19222
|
-
T _top;
|
19223
|
-
std::vector<T> _rest;
|
19224
|
-
bool _empty = true;
|
19225
|
-
|
19226
|
-
public:
|
19227
|
-
SmallStack() = default;
|
19228
|
-
|
19229
|
-
void pop() {
|
19230
|
-
if (_rest.empty()) {
|
19231
|
-
_empty = true;
|
19232
|
-
_top = T();
|
19233
|
-
} else {
|
19234
|
-
_top = std::move(_rest.back());
|
19235
|
-
_rest.pop_back();
|
19236
|
-
}
|
19237
|
-
}
|
19238
|
-
|
19239
|
-
void push(T t) {
|
19240
|
-
if (!_empty) {
|
19241
|
-
_rest.push_back(std::move(_top));
|
19242
|
-
}
|
19243
|
-
_top = std::move(t);
|
19244
|
-
_empty = false;
|
19245
|
-
}
|
19246
|
-
|
19247
|
-
T top() const {
|
19248
|
-
return _top;
|
19249
|
-
}
|
19250
|
-
|
19251
|
-
T &top_ref() {
|
19252
|
-
return _top;
|
19253
|
-
}
|
19254
|
-
|
19255
|
-
const T &top_ref() const {
|
19256
|
-
return _top;
|
19257
|
-
}
|
19258
|
-
|
19259
|
-
bool empty() const {
|
19260
|
-
return _empty;
|
19261
|
-
}
|
19262
|
-
|
19263
|
-
size_t size() const {
|
19264
|
-
return _empty ? 0 : (_rest.size() + 1);
|
19265
|
-
}
|
19266
|
-
};
|
19267
|
-
|
19268
|
-
template<>
|
19269
|
-
class SmallStack<void> {
|
19270
|
-
// A stack of voids. Voids are all the same, so just record how many voids are in the stack
|
19271
|
-
int counter = 0;
|
19272
|
-
|
19273
|
-
public:
|
19274
|
-
void pop() {
|
19275
|
-
counter--;
|
19276
|
-
}
|
19277
|
-
void push() {
|
19278
|
-
counter++;
|
19279
|
-
}
|
19280
|
-
bool empty() const {
|
19281
|
-
return counter == 0;
|
19282
|
-
}
|
19283
|
-
};
|
19284
|
-
|
19285
|
-
/** A common pattern when traversing Halide IR is that you need to
|
19286
|
-
* keep track of stuff when you find a Let or a LetStmt, and that it
|
19287
|
-
* should hide previous values with the same name until you leave the
|
19288
|
-
* Let or LetStmt nodes This class helps with that. */
|
19289
|
-
template<typename T = void>
|
19290
|
-
class Scope {
|
19291
|
-
private:
|
19292
|
-
std::map<std::string, SmallStack<T>> table;
|
19293
|
-
|
19294
|
-
const Scope<T> *containing_scope = nullptr;
|
19295
|
-
|
19296
|
-
public:
|
19297
|
-
Scope() = default;
|
19298
|
-
Scope(Scope &&that) noexcept = default;
|
19299
|
-
Scope &operator=(Scope &&that) noexcept = default;
|
19300
|
-
|
19301
|
-
// Copying a scope object copies a large table full of strings and
|
19302
|
-
// stacks. Bad idea.
|
19303
|
-
Scope(const Scope<T> &) = delete;
|
19304
|
-
Scope<T> &operator=(const Scope<T> &) = delete;
|
19305
|
-
|
19306
|
-
/** Set the parent scope. If lookups fail in this scope, they
|
19307
|
-
* check the containing scope before returning an error. Caller is
|
19308
|
-
* responsible for managing the memory of the containing scope. */
|
19309
|
-
void set_containing_scope(const Scope<T> *s) {
|
19310
|
-
containing_scope = s;
|
19311
|
-
}
|
19312
|
-
|
19313
|
-
/** A const ref to an empty scope. Useful for default function
|
19314
|
-
* arguments, which would otherwise require a copy constructor
|
19315
|
-
* (with llvm in c++98 mode) */
|
19316
|
-
static const Scope<T> &empty_scope() {
|
19317
|
-
static Scope<T> _empty_scope;
|
19318
|
-
return _empty_scope;
|
19319
|
-
}
|
19320
|
-
|
19321
|
-
/** Retrieve the value referred to by a name */
|
19322
|
-
template<typename T2 = T,
|
19323
|
-
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
19324
|
-
T2 get(const std::string &name) const {
|
19325
|
-
typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
|
19326
|
-
if (iter == table.end() || iter->second.empty()) {
|
19327
|
-
if (containing_scope) {
|
19328
|
-
return containing_scope->get(name);
|
19329
|
-
} else {
|
19330
|
-
internal_error << "Name not in Scope: " << name << "\n"
|
19331
|
-
<< *this << "\n";
|
19332
|
-
}
|
19333
|
-
}
|
19334
|
-
return iter->second.top();
|
19335
|
-
}
|
19336
|
-
|
19337
|
-
/** Return a reference to an entry. Does not consider the containing scope. */
|
19338
|
-
template<typename T2 = T,
|
19339
|
-
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
19340
|
-
T2 &ref(const std::string &name) {
|
19341
|
-
typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
|
19342
|
-
if (iter == table.end() || iter->second.empty()) {
|
19343
|
-
internal_error << "Name not in Scope: " << name << "\n"
|
19344
|
-
<< *this << "\n";
|
19345
|
-
}
|
19346
|
-
return iter->second.top_ref();
|
19347
|
-
}
|
19348
|
-
|
19349
|
-
/** Returns a const pointer to an entry if it exists in this scope or any
|
19350
|
-
* containing scope, or nullptr if it does not. Use this instead of if
|
19351
|
-
* (scope.contains(foo)) { ... scope.get(foo) ... } to avoid doing two
|
19352
|
-
* lookups. */
|
19353
|
-
template<typename T2 = T,
|
19354
|
-
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
19355
|
-
const T2 *find(const std::string &name) const {
|
19356
|
-
typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
|
19357
|
-
if (iter == table.end() || iter->second.empty()) {
|
19358
|
-
if (containing_scope) {
|
19359
|
-
return containing_scope->find(name);
|
19360
|
-
} else {
|
19361
|
-
return nullptr;
|
19362
|
-
}
|
19363
|
-
}
|
19364
|
-
return &(iter->second.top_ref());
|
19365
|
-
}
|
19366
|
-
|
19367
|
-
/** A version of find that returns a non-const pointer, but ignores
|
19368
|
-
* containing scope. */
|
19369
|
-
template<typename T2 = T,
|
19370
|
-
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
19371
|
-
T2 *shallow_find(const std::string &name) {
|
19372
|
-
typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
|
19373
|
-
if (iter == table.end() || iter->second.empty()) {
|
19374
|
-
return nullptr;
|
19375
|
-
} else {
|
19376
|
-
return &(iter->second.top_ref());
|
19377
|
-
}
|
19378
|
-
}
|
19379
|
-
|
19380
|
-
/** Tests if a name is in scope. If you plan to use the value if it is, call
|
19381
|
-
* find instead. */
|
19382
|
-
bool contains(const std::string &name) const {
|
19383
|
-
typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
|
19384
|
-
if (iter == table.end() || iter->second.empty()) {
|
19385
|
-
if (containing_scope) {
|
19386
|
-
return containing_scope->contains(name);
|
19387
|
-
} else {
|
19388
|
-
return false;
|
19389
|
-
}
|
19390
|
-
}
|
19391
|
-
return true;
|
19392
|
-
}
|
19393
|
-
|
19394
|
-
/** How many nested definitions of a single name exist? */
|
19395
|
-
size_t count(const std::string &name) const {
|
19396
|
-
auto it = table.find(name);
|
19397
|
-
if (it == table.end()) {
|
19398
|
-
return 0;
|
19399
|
-
} else {
|
19400
|
-
return it->second.size();
|
19401
|
-
}
|
19402
|
-
}
|
19403
|
-
|
19404
|
-
/** How many distinct names exist (does not count nested definitions of the same name) */
|
19405
|
-
size_t size() const {
|
19406
|
-
return table.size();
|
19407
|
-
}
|
19408
|
-
|
19409
|
-
struct PushToken {
|
19410
|
-
typename std::map<std::string, SmallStack<T>>::iterator iter;
|
19411
|
-
};
|
19412
|
-
|
19413
|
-
/** Add a new (name, value) pair to the current scope. Hide old values that
|
19414
|
-
* have this name until we pop this name. Returns a token that can be used
|
19415
|
-
* to pop the same value without doing a fresh lookup.
|
19416
|
-
*/
|
19417
|
-
template<typename T2 = T,
|
19418
|
-
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
19419
|
-
PushToken push(const std::string &name, T2 &&value) {
|
19420
|
-
auto it = table.try_emplace(name).first;
|
19421
|
-
it->second.push(std::forward<T2>(value));
|
19422
|
-
return PushToken{it};
|
19423
|
-
}
|
19424
|
-
|
19425
|
-
template<typename T2 = T,
|
19426
|
-
typename = typename std::enable_if<std::is_same<T2, void>::value>::type>
|
19427
|
-
PushToken push(const std::string &name) {
|
19428
|
-
auto it = table.try_emplace(name).first;
|
19429
|
-
it->second.push();
|
19430
|
-
return PushToken{it};
|
19431
|
-
}
|
19432
|
-
|
19433
|
-
/** A name goes out of scope. Restore whatever its old value
|
19434
|
-
* was (or remove it entirely if there was nothing else of the
|
19435
|
-
* same name in an outer scope) */
|
19436
|
-
void pop(const std::string &name) {
|
19437
|
-
typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
|
19438
|
-
internal_assert(iter != table.end()) << "Name not in Scope: " << name << "\n"
|
19439
|
-
<< *this << "\n";
|
19440
|
-
iter->second.pop();
|
19441
|
-
if (iter->second.empty()) {
|
19442
|
-
table.erase(iter);
|
19443
|
-
}
|
19444
|
-
}
|
19445
|
-
|
19446
|
-
/** Pop a name using a token returned by push instead of a string. */
|
19447
|
-
void pop(PushToken p) {
|
19448
|
-
p.iter->second.pop();
|
19449
|
-
if (p.iter->second.empty()) {
|
19450
|
-
table.erase(p.iter);
|
19451
|
-
}
|
19452
|
-
}
|
19453
|
-
|
19454
|
-
/** Iterate through the scope. Does not capture any containing scope. */
|
19455
|
-
class const_iterator {
|
19456
|
-
typename std::map<std::string, SmallStack<T>>::const_iterator iter;
|
19457
|
-
|
19458
|
-
public:
|
19459
|
-
explicit const_iterator(const typename std::map<std::string, SmallStack<T>>::const_iterator &i)
|
19460
|
-
: iter(i) {
|
19461
|
-
}
|
19462
|
-
|
19463
|
-
const_iterator() = default;
|
19464
|
-
|
19465
|
-
bool operator!=(const const_iterator &other) {
|
19466
|
-
return iter != other.iter;
|
19467
|
-
}
|
19468
|
-
|
19469
|
-
void operator++() {
|
19470
|
-
++iter;
|
19471
|
-
}
|
19472
|
-
|
19473
|
-
const std::string &name() {
|
19474
|
-
return iter->first;
|
19475
|
-
}
|
19476
|
-
|
19477
|
-
const SmallStack<T> &stack() {
|
19478
|
-
return iter->second;
|
19479
|
-
}
|
19480
|
-
|
19481
|
-
template<typename T2 = T,
|
19482
|
-
typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
|
19483
|
-
const T2 &value() {
|
19484
|
-
return iter->second.top_ref();
|
19485
|
-
}
|
19486
|
-
};
|
19487
|
-
|
19488
|
-
const_iterator cbegin() const {
|
19489
|
-
return const_iterator(table.begin());
|
19490
|
-
}
|
19491
|
-
|
19492
|
-
const_iterator cend() const {
|
19493
|
-
return const_iterator(table.end());
|
19494
|
-
}
|
19495
|
-
|
19496
|
-
void swap(Scope<T> &other) noexcept {
|
19497
|
-
table.swap(other.table);
|
19498
|
-
std::swap(containing_scope, other.containing_scope);
|
19499
|
-
}
|
19500
|
-
};
|
19501
|
-
|
19502
|
-
template<typename T>
|
19503
|
-
std::ostream &operator<<(std::ostream &stream, const Scope<T> &s) {
|
19504
|
-
stream << "{\n";
|
19505
|
-
typename Scope<T>::const_iterator iter;
|
19506
|
-
for (iter = s.cbegin(); iter != s.cend(); ++iter) {
|
19507
|
-
stream << " " << iter.name() << "\n";
|
19508
|
-
}
|
19509
|
-
stream << "}";
|
19510
|
-
return stream;
|
19511
|
-
}
|
19512
|
-
|
19513
|
-
/** Helper class for pushing/popping Scope<> values, to allow
|
19514
|
-
* for early-exit in Visitor/Mutators that preserves correctness.
|
19515
|
-
* Note that this name can be a bit confusing, since there are two "scopes"
|
19516
|
-
* involved here:
|
19517
|
-
* - the Scope object itself
|
19518
|
-
* - the lifetime of this helper object
|
19519
|
-
* The "Scoped" in this class name refers to the latter, as it temporarily binds
|
19520
|
-
* a name within the scope of this helper's lifetime. */
|
19521
|
-
template<typename T = void>
|
19522
|
-
struct ScopedBinding {
|
19523
|
-
Scope<T> *scope = nullptr;
|
19524
|
-
typename Scope<T>::PushToken token;
|
19525
|
-
|
19526
|
-
ScopedBinding() = default;
|
19527
|
-
|
19528
|
-
ScopedBinding(Scope<T> &s, const std::string &n, T value)
|
19529
|
-
: scope(&s), token(scope->push(n, std::move(value))) {
|
19530
|
-
}
|
19531
|
-
|
19532
|
-
ScopedBinding(bool condition, Scope<T> &s, const std::string &n, const T &value)
|
19533
|
-
: scope(condition ? &s : nullptr),
|
19534
|
-
token(condition ? scope->push(n, value) : typename Scope<T>::PushToken{}) {
|
19535
|
-
}
|
19536
|
-
|
19537
|
-
bool bound() const {
|
19538
|
-
return scope != nullptr;
|
19539
|
-
}
|
19540
|
-
|
19541
|
-
~ScopedBinding() {
|
19542
|
-
if (scope) {
|
19543
|
-
scope->pop(token);
|
19544
|
-
}
|
19545
|
-
}
|
19546
|
-
|
19547
|
-
// allow move but not copy
|
19548
|
-
ScopedBinding(const ScopedBinding &that) = delete;
|
19549
|
-
ScopedBinding(ScopedBinding &&that) noexcept
|
19550
|
-
: scope(that.scope),
|
19551
|
-
token(that.token) {
|
19552
|
-
// The move constructor must null out scope, so we don't try to pop it
|
19553
|
-
that.scope = nullptr;
|
19554
|
-
}
|
19555
|
-
|
19556
|
-
void operator=(const ScopedBinding &that) = delete;
|
19557
|
-
void operator=(ScopedBinding &&that) = delete;
|
19558
|
-
};
|
19559
|
-
|
19560
|
-
template<>
|
19561
|
-
struct ScopedBinding<void> {
|
19562
|
-
Scope<> *scope;
|
19563
|
-
Scope<>::PushToken token;
|
19564
|
-
ScopedBinding(Scope<> &s, const std::string &n)
|
19565
|
-
: scope(&s), token(scope->push(n)) {
|
19566
|
-
}
|
19567
|
-
ScopedBinding(bool condition, Scope<> &s, const std::string &n)
|
19568
|
-
: scope(condition ? &s : nullptr),
|
19569
|
-
token(condition ? scope->push(n) : Scope<>::PushToken{}) {
|
19570
|
-
}
|
19571
|
-
~ScopedBinding() {
|
19572
|
-
if (scope) {
|
19573
|
-
scope->pop(token);
|
19574
|
-
}
|
19575
|
-
}
|
19576
|
-
|
19577
|
-
// allow move but not copy
|
19578
|
-
ScopedBinding(const ScopedBinding &that) = delete;
|
19579
|
-
ScopedBinding(ScopedBinding &&that) noexcept
|
19580
|
-
: scope(that.scope),
|
19581
|
-
token(that.token) {
|
19582
|
-
// The move constructor must null out scope, so we don't try to pop it
|
19583
|
-
that.scope = nullptr;
|
19584
|
-
}
|
19585
|
-
|
19586
|
-
void operator=(const ScopedBinding &that) = delete;
|
19587
|
-
void operator=(ScopedBinding &&that) = delete;
|
19588
|
-
};
|
19589
|
-
|
19590
|
-
} // namespace Internal
|
19591
|
-
} // namespace Halide
|
19592
|
-
|
19593
19894
|
#endif
|
19594
19895
|
|
19595
19896
|
namespace Halide {
|
@@ -20029,7 +20330,7 @@ bool graph_equal(const IRNode &a, const IRNode &b) {
|
|
20029
20330
|
} else if (a.node_type != b.node_type) {
|
20030
20331
|
return false;
|
20031
20332
|
} else {
|
20032
|
-
return
|
20333
|
+
return graph_equal_impl(a, b);
|
20033
20334
|
}
|
20034
20335
|
}
|
20035
20336
|
|
@@ -20042,7 +20343,7 @@ bool graph_equal(const IRHandle &a, const IRHandle &b) {
|
|
20042
20343
|
} else if (!b.defined()) {
|
20043
20344
|
return false;
|
20044
20345
|
} else {
|
20045
|
-
return
|
20346
|
+
return graph_equal(*(a.get()), *(b.get()));
|
20046
20347
|
}
|
20047
20348
|
}
|
20048
20349
|
|
@@ -20438,8 +20739,10 @@ protected:
|
|
20438
20739
|
// @}
|
20439
20740
|
|
20440
20741
|
private:
|
20441
|
-
/** The nodes visited so far
|
20442
|
-
|
20742
|
+
/** The nodes visited so far. Only includes nodes with a ref count greater
|
20743
|
+
* than one, because we know that nodes with a ref count of 1 will only be
|
20744
|
+
* visited once if their parents are only visited once. */
|
20745
|
+
std::set<const IRNode *> visited;
|
20443
20746
|
|
20444
20747
|
protected:
|
20445
20748
|
/** These methods should call 'include' on the children to only
|
@@ -20822,360 +21125,6 @@ void propagate_estimate_test();
|
|
20822
21125
|
} // namespace Internal
|
20823
21126
|
} // namespace Halide
|
20824
21127
|
|
20825
|
-
#endif
|
20826
|
-
#ifndef HALIDE_BOUNDARY_CONDITIONS_H
|
20827
|
-
#define HALIDE_BOUNDARY_CONDITIONS_H
|
20828
|
-
|
20829
|
-
/** \file
|
20830
|
-
* Support for imposing boundary conditions on Halide::Funcs.
|
20831
|
-
*/
|
20832
|
-
|
20833
|
-
#include <vector>
|
20834
|
-
|
20835
|
-
#ifndef HALIDE_LAMBDA_H
|
20836
|
-
#define HALIDE_LAMBDA_H
|
20837
|
-
|
20838
|
-
|
20839
|
-
/** \file
|
20840
|
-
* Convenience functions for creating small anonymous Halide
|
20841
|
-
* functions. See test/lambda.cpp for example usage. */
|
20842
|
-
|
20843
|
-
namespace Halide {
|
20844
|
-
|
20845
|
-
/** Create a zero-dimensional halide function that returns the given
|
20846
|
-
* expression. The function may have more dimensions if the expression
|
20847
|
-
* contains implicit arguments. */
|
20848
|
-
Func lambda(const Expr &e);
|
20849
|
-
|
20850
|
-
/** Create a 1-D halide function in the first argument that returns
|
20851
|
-
* the second argument. The function may have more dimensions if the
|
20852
|
-
* expression contains implicit arguments and the list of Var
|
20853
|
-
* arguments contains a placeholder ("_"). */
|
20854
|
-
Func lambda(const Var &x, const Expr &e);
|
20855
|
-
|
20856
|
-
/** Create a 2-D halide function in the first two arguments that
|
20857
|
-
* returns the last argument. The function may have more dimensions if
|
20858
|
-
* the expression contains implicit arguments and the list of Var
|
20859
|
-
* arguments contains a placeholder ("_"). */
|
20860
|
-
Func lambda(const Var &x, const Var &y, const Expr &e);
|
20861
|
-
|
20862
|
-
/** Create a 3-D halide function in the first three arguments that
|
20863
|
-
* returns the last argument. The function may have more dimensions
|
20864
|
-
* if the expression contains implicit arguments and the list of Var
|
20865
|
-
* arguments contains a placeholder ("_"). */
|
20866
|
-
Func lambda(const Var &x, const Var &y, const Var &z, const Expr &e);
|
20867
|
-
|
20868
|
-
/** Create a 4-D halide function in the first four arguments that
|
20869
|
-
* returns the last argument. The function may have more dimensions if
|
20870
|
-
* the expression contains implicit arguments and the list of Var
|
20871
|
-
* arguments contains a placeholder ("_"). */
|
20872
|
-
Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Expr &e);
|
20873
|
-
|
20874
|
-
/** Create a 5-D halide function in the first five arguments that
|
20875
|
-
* returns the last argument. The function may have more dimensions if
|
20876
|
-
* the expression contains implicit arguments and the list of Var
|
20877
|
-
* arguments contains a placeholder ("_"). */
|
20878
|
-
Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Var &v, const Expr &e);
|
20879
|
-
|
20880
|
-
} // namespace Halide
|
20881
|
-
|
20882
|
-
#endif // HALIDE_LAMBDA_H
|
20883
|
-
|
20884
|
-
namespace Halide {
|
20885
|
-
|
20886
|
-
/** namespace to hold functions for imposing boundary conditions on
|
20887
|
-
* Halide Funcs.
|
20888
|
-
*
|
20889
|
-
* All functions in this namespace transform a source Func to a
|
20890
|
-
* result Func where the result produces the values of the source
|
20891
|
-
* within a given region and a different set of values outside the
|
20892
|
-
* given region. A region is an N dimensional box specified by
|
20893
|
-
* mins and extents.
|
20894
|
-
*
|
20895
|
-
* Three areas are defined:
|
20896
|
-
* The image is the entire set of values in the region.
|
20897
|
-
* The edge is the set of pixels in the image but adjacent
|
20898
|
-
* to coordinates that are not
|
20899
|
-
* The interior is the image minus the edge (and is undefined
|
20900
|
-
* if the extent of any region is 1 or less).
|
20901
|
-
*
|
20902
|
-
* If the source Func has more dimensions than are specified, the extra ones
|
20903
|
-
* are unmodified. Additionally, passing an undefined (default constructed)
|
20904
|
-
* 'Expr' for the min and extent of a dimension will keep that dimension
|
20905
|
-
* unmodified.
|
20906
|
-
*
|
20907
|
-
* Numerous options for specifing the outside area are provided,
|
20908
|
-
* including replacement with an expression, repeating the edge
|
20909
|
-
* samples, mirroring over the edge, and repeating or mirroring the
|
20910
|
-
* entire image.
|
20911
|
-
*
|
20912
|
-
* Using these functions to express your boundary conditions is highly
|
20913
|
-
* recommended for correctness and performance. Some of these are hard
|
20914
|
-
* to get right. The versions here are both understood by bounds
|
20915
|
-
* inference, and also judiciously use the 'likely' intrinsic to minimize
|
20916
|
-
* runtime overhead.
|
20917
|
-
*
|
20918
|
-
*/
|
20919
|
-
namespace BoundaryConditions {
|
20920
|
-
|
20921
|
-
namespace Internal {
|
20922
|
-
|
20923
|
-
inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
|
20924
|
-
const Expr &a1, const Expr &a2) {
|
20925
|
-
collected_args.emplace_back(a1, a2);
|
20926
|
-
}
|
20927
|
-
|
20928
|
-
template<typename... Args>
|
20929
|
-
inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
|
20930
|
-
const Expr &a1, const Expr &a2, Args &&...args) {
|
20931
|
-
collected_args.emplace_back(a1, a2);
|
20932
|
-
collect_region(collected_args, std::forward<Args>(args)...);
|
20933
|
-
}
|
20934
|
-
|
20935
|
-
inline const Func &func_like_to_func(const Func &func) {
|
20936
|
-
return func;
|
20937
|
-
}
|
20938
|
-
|
20939
|
-
template<typename T>
|
20940
|
-
inline HALIDE_NO_USER_CODE_INLINE Func func_like_to_func(const T &func_like) {
|
20941
|
-
return lambda(_, func_like(_));
|
20942
|
-
}
|
20943
|
-
|
20944
|
-
} // namespace Internal
|
20945
|
-
|
20946
|
-
/** Impose a boundary condition such that a given expression is returned
|
20947
|
-
* everywhere outside the boundary. Generally the expression will be a
|
20948
|
-
* constant, though the code currently allows accessing the arguments
|
20949
|
-
* of source.
|
20950
|
-
*
|
20951
|
-
* An ImageParam, Buffer<T>, or similar can be passed instead of a
|
20952
|
-
* Func. If this is done and no bounds are given, the boundaries will
|
20953
|
-
* be taken from the min and extent methods of the passed
|
20954
|
-
* object. Note that objects are taken by mutable ref. Pipelines
|
20955
|
-
* capture Buffers via mutable refs, because running a pipeline might
|
20956
|
-
* alter the Buffer metadata (e.g. device allocation state).
|
20957
|
-
*
|
20958
|
-
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_BORDER
|
20959
|
-
* and putting value in the border of the texture.)
|
20960
|
-
*
|
20961
|
-
* You may pass undefined Exprs for dimensions that you do not wish
|
20962
|
-
* to bound.
|
20963
|
-
*/
|
20964
|
-
// @{
|
20965
|
-
Func constant_exterior(const Func &source, const Tuple &value,
|
20966
|
-
const Region &bounds);
|
20967
|
-
Func constant_exterior(const Func &source, const Expr &value,
|
20968
|
-
const Region &bounds);
|
20969
|
-
|
20970
|
-
template<typename T>
|
20971
|
-
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value, const Region &bounds) {
|
20972
|
-
return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
|
20973
|
-
}
|
20974
|
-
|
20975
|
-
template<typename T>
|
20976
|
-
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value, const Region &bounds) {
|
20977
|
-
return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
|
20978
|
-
}
|
20979
|
-
|
20980
|
-
template<typename T>
|
20981
|
-
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value) {
|
20982
|
-
Region object_bounds;
|
20983
|
-
for (int i = 0; i < func_like.dimensions(); i++) {
|
20984
|
-
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
20985
|
-
}
|
20986
|
-
|
20987
|
-
return constant_exterior(Internal::func_like_to_func(func_like), value, object_bounds);
|
20988
|
-
}
|
20989
|
-
template<typename T>
|
20990
|
-
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value) {
|
20991
|
-
return constant_exterior(func_like, Tuple(value));
|
20992
|
-
}
|
20993
|
-
|
20994
|
-
template<typename T, typename... Bounds,
|
20995
|
-
typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
|
20996
|
-
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value,
|
20997
|
-
Bounds &&...bounds) {
|
20998
|
-
Region collected_bounds;
|
20999
|
-
Internal::collect_region(collected_bounds, std::forward<Bounds>(bounds)...);
|
21000
|
-
return constant_exterior(Internal::func_like_to_func(func_like), value, collected_bounds);
|
21001
|
-
}
|
21002
|
-
template<typename T, typename... Bounds,
|
21003
|
-
typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
|
21004
|
-
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value,
|
21005
|
-
Bounds &&...bounds) {
|
21006
|
-
return constant_exterior(func_like, Tuple(value), std::forward<Bounds>(bounds)...);
|
21007
|
-
}
|
21008
|
-
// @}
|
21009
|
-
|
21010
|
-
/** Impose a boundary condition such that the nearest edge sample is returned
|
21011
|
-
* everywhere outside the given region.
|
21012
|
-
*
|
21013
|
-
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21014
|
-
* is done and no bounds are given, the boundaries will be taken from the
|
21015
|
-
* min and extent methods of the passed object.
|
21016
|
-
*
|
21017
|
-
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_EDGE.)
|
21018
|
-
*
|
21019
|
-
* You may pass undefined Exprs for dimensions that you do not wish
|
21020
|
-
* to bound.
|
21021
|
-
*/
|
21022
|
-
// @{
|
21023
|
-
Func repeat_edge(const Func &source, const Region &bounds);
|
21024
|
-
|
21025
|
-
template<typename T>
|
21026
|
-
HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like, const Region &bounds) {
|
21027
|
-
return repeat_edge(Internal::func_like_to_func(func_like), bounds);
|
21028
|
-
}
|
21029
|
-
|
21030
|
-
template<typename T>
|
21031
|
-
HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like) {
|
21032
|
-
Region object_bounds;
|
21033
|
-
for (int i = 0; i < func_like.dimensions(); i++) {
|
21034
|
-
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21035
|
-
}
|
21036
|
-
|
21037
|
-
return repeat_edge(Internal::func_like_to_func(func_like), object_bounds);
|
21038
|
-
}
|
21039
|
-
// @}
|
21040
|
-
|
21041
|
-
/** Impose a boundary condition such that the entire coordinate space is
|
21042
|
-
* tiled with copies of the image abutted against each other.
|
21043
|
-
*
|
21044
|
-
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21045
|
-
* is done and no bounds are given, the boundaries will be taken from the
|
21046
|
-
* min and extent methods of the passed object.
|
21047
|
-
*
|
21048
|
-
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_REPEAT.)
|
21049
|
-
*
|
21050
|
-
* You may pass undefined Exprs for dimensions that you do not wish
|
21051
|
-
* to bound.
|
21052
|
-
*/
|
21053
|
-
// @{
|
21054
|
-
Func repeat_image(const Func &source, const Region &bounds);
|
21055
|
-
|
21056
|
-
template<typename T>
|
21057
|
-
HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like, const Region &bounds) {
|
21058
|
-
return repeat_image(Internal::func_like_to_func(func_like), bounds);
|
21059
|
-
}
|
21060
|
-
|
21061
|
-
template<typename T>
|
21062
|
-
HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like) {
|
21063
|
-
Region object_bounds;
|
21064
|
-
for (int i = 0; i < func_like.dimensions(); i++) {
|
21065
|
-
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21066
|
-
}
|
21067
|
-
|
21068
|
-
return repeat_image(Internal::func_like_to_func(func_like), object_bounds);
|
21069
|
-
}
|
21070
|
-
|
21071
|
-
/** Impose a boundary condition such that the entire coordinate space is
|
21072
|
-
* tiled with copies of the image abutted against each other, but mirror
|
21073
|
-
* them such that adjacent edges are the same.
|
21074
|
-
*
|
21075
|
-
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21076
|
-
* is done and no bounds are given, the boundaries will be taken from the
|
21077
|
-
* min and extent methods of the passed object.
|
21078
|
-
*
|
21079
|
-
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_MIRRORED_REPEAT.)
|
21080
|
-
*
|
21081
|
-
* You may pass undefined Exprs for dimensions that you do not wish
|
21082
|
-
* to bound.
|
21083
|
-
*/
|
21084
|
-
// @{
|
21085
|
-
Func mirror_image(const Func &source, const Region &bounds);
|
21086
|
-
|
21087
|
-
template<typename T>
|
21088
|
-
HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like, const Region &bounds) {
|
21089
|
-
return mirror_image(Internal::func_like_to_func(func_like), bounds);
|
21090
|
-
}
|
21091
|
-
|
21092
|
-
template<typename T>
|
21093
|
-
HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like) {
|
21094
|
-
Region object_bounds;
|
21095
|
-
for (int i = 0; i < func_like.dimensions(); i++) {
|
21096
|
-
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21097
|
-
}
|
21098
|
-
|
21099
|
-
return mirror_image(Internal::func_like_to_func(func_like), object_bounds);
|
21100
|
-
}
|
21101
|
-
|
21102
|
-
// @}
|
21103
|
-
|
21104
|
-
/** Impose a boundary condition such that the entire coordinate space is
|
21105
|
-
* tiled with copies of the image abutted against each other, but mirror
|
21106
|
-
* them such that adjacent edges are the same and then overlap the edges.
|
21107
|
-
*
|
21108
|
-
* This produces an error if any extent is 1 or less. (TODO: check this.)
|
21109
|
-
*
|
21110
|
-
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21111
|
-
* is done and no bounds are given, the boundaries will be taken from the
|
21112
|
-
* min and extent methods of the passed object.
|
21113
|
-
*
|
21114
|
-
* (I do not believe there is a direct GL_TEXTURE_WRAP_* equivalent for this.)
|
21115
|
-
*
|
21116
|
-
* You may pass undefined Exprs for dimensions that you do not wish
|
21117
|
-
* to bound.
|
21118
|
-
*/
|
21119
|
-
// @{
|
21120
|
-
Func mirror_interior(const Func &source, const Region &bounds);
|
21121
|
-
|
21122
|
-
template<typename T>
|
21123
|
-
HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like, const Region &bounds) {
|
21124
|
-
return mirror_interior(Internal::func_like_to_func(func_like), bounds);
|
21125
|
-
}
|
21126
|
-
|
21127
|
-
template<typename T>
|
21128
|
-
HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like) {
|
21129
|
-
Region object_bounds;
|
21130
|
-
for (int i = 0; i < func_like.dimensions(); i++) {
|
21131
|
-
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21132
|
-
}
|
21133
|
-
|
21134
|
-
return mirror_interior(Internal::func_like_to_func(func_like), object_bounds);
|
21135
|
-
}
|
21136
|
-
|
21137
|
-
// @}
|
21138
|
-
|
21139
|
-
} // namespace BoundaryConditions
|
21140
|
-
|
21141
|
-
} // namespace Halide
|
21142
|
-
|
21143
|
-
#endif
|
21144
|
-
#ifndef HALIDE_BOUNDS_INFERENCE_H
|
21145
|
-
#define HALIDE_BOUNDS_INFERENCE_H
|
21146
|
-
|
21147
|
-
/** \file
|
21148
|
-
* Defines the bounds_inference lowering pass.
|
21149
|
-
*/
|
21150
|
-
|
21151
|
-
#include <map>
|
21152
|
-
#include <string>
|
21153
|
-
#include <vector>
|
21154
|
-
|
21155
|
-
|
21156
|
-
namespace Halide {
|
21157
|
-
|
21158
|
-
struct Target;
|
21159
|
-
|
21160
|
-
namespace Internal {
|
21161
|
-
|
21162
|
-
class Function;
|
21163
|
-
|
21164
|
-
/** Take a partially lowered statement that includes symbolic
|
21165
|
-
* representations of the bounds over which things should be realized,
|
21166
|
-
* and inject expressions defining those bounds.
|
21167
|
-
*/
|
21168
|
-
Stmt bounds_inference(Stmt,
|
21169
|
-
const std::vector<Function> &outputs,
|
21170
|
-
const std::vector<std::string> &realization_order,
|
21171
|
-
const std::vector<std::vector<std::string>> &fused_groups,
|
21172
|
-
const std::map<std::string, Function> &environment,
|
21173
|
-
const std::map<std::pair<std::string, int>, Interval> &func_bounds,
|
21174
|
-
const Target &target);
|
21175
|
-
|
21176
|
-
} // namespace Internal
|
21177
|
-
} // namespace Halide
|
21178
|
-
|
21179
21128
|
#endif
|
21180
21129
|
#ifndef HALIDE_BOUND_CONSTANT_EXTENT_LOOPS_H
|
21181
21130
|
#define HALIDE_BOUND_CONSTANT_EXTENT_LOOPS_H
|
@@ -21223,6 +21172,431 @@ Stmt bound_small_allocations(const Stmt &s);
|
|
21223
21172
|
} // namespace Internal
|
21224
21173
|
} // namespace Halide
|
21225
21174
|
|
21175
|
+
#endif
|
21176
|
+
#ifndef HALIDE_BOUNDARY_CONDITIONS_H
|
21177
|
+
#define HALIDE_BOUNDARY_CONDITIONS_H
|
21178
|
+
|
21179
|
+
/** \file
|
21180
|
+
* Support for imposing boundary conditions on Halide::Funcs.
|
21181
|
+
*/
|
21182
|
+
|
21183
|
+
#include <vector>
|
21184
|
+
|
21185
|
+
#ifndef HALIDE_LAMBDA_H
|
21186
|
+
#define HALIDE_LAMBDA_H
|
21187
|
+
|
21188
|
+
|
21189
|
+
/** \file
|
21190
|
+
* Convenience functions for creating small anonymous Halide
|
21191
|
+
* functions. See test/lambda.cpp for example usage. */
|
21192
|
+
|
21193
|
+
namespace Halide {
|
21194
|
+
|
21195
|
+
/** Create a zero-dimensional halide function that returns the given
|
21196
|
+
* expression. The function may have more dimensions if the expression
|
21197
|
+
* contains implicit arguments. */
|
21198
|
+
Func lambda(const Expr &e);
|
21199
|
+
|
21200
|
+
/** Create a 1-D halide function in the first argument that returns
|
21201
|
+
* the second argument. The function may have more dimensions if the
|
21202
|
+
* expression contains implicit arguments and the list of Var
|
21203
|
+
* arguments contains a placeholder ("_"). */
|
21204
|
+
Func lambda(const Var &x, const Expr &e);
|
21205
|
+
|
21206
|
+
/** Create a 2-D halide function in the first two arguments that
|
21207
|
+
* returns the last argument. The function may have more dimensions if
|
21208
|
+
* the expression contains implicit arguments and the list of Var
|
21209
|
+
* arguments contains a placeholder ("_"). */
|
21210
|
+
Func lambda(const Var &x, const Var &y, const Expr &e);
|
21211
|
+
|
21212
|
+
/** Create a 3-D halide function in the first three arguments that
|
21213
|
+
* returns the last argument. The function may have more dimensions
|
21214
|
+
* if the expression contains implicit arguments and the list of Var
|
21215
|
+
* arguments contains a placeholder ("_"). */
|
21216
|
+
Func lambda(const Var &x, const Var &y, const Var &z, const Expr &e);
|
21217
|
+
|
21218
|
+
/** Create a 4-D halide function in the first four arguments that
|
21219
|
+
* returns the last argument. The function may have more dimensions if
|
21220
|
+
* the expression contains implicit arguments and the list of Var
|
21221
|
+
* arguments contains a placeholder ("_"). */
|
21222
|
+
Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Expr &e);
|
21223
|
+
|
21224
|
+
/** Create a 5-D halide function in the first five arguments that
|
21225
|
+
* returns the last argument. The function may have more dimensions if
|
21226
|
+
* the expression contains implicit arguments and the list of Var
|
21227
|
+
* arguments contains a placeholder ("_"). */
|
21228
|
+
Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Var &v, const Expr &e);
|
21229
|
+
|
21230
|
+
} // namespace Halide
|
21231
|
+
|
21232
|
+
#endif // HALIDE_LAMBDA_H
|
21233
|
+
|
21234
|
+
namespace Halide {
|
21235
|
+
|
21236
|
+
/** namespace to hold functions for imposing boundary conditions on
|
21237
|
+
* Halide Funcs.
|
21238
|
+
*
|
21239
|
+
* All functions in this namespace transform a source Func to a
|
21240
|
+
* result Func where the result produces the values of the source
|
21241
|
+
* within a given region and a different set of values outside the
|
21242
|
+
* given region. A region is an N dimensional box specified by
|
21243
|
+
* mins and extents.
|
21244
|
+
*
|
21245
|
+
* Three areas are defined:
|
21246
|
+
* The image is the entire set of values in the region.
|
21247
|
+
* The edge is the set of pixels in the image but adjacent
|
21248
|
+
* to coordinates that are not
|
21249
|
+
* The interior is the image minus the edge (and is undefined
|
21250
|
+
* if the extent of any region is 1 or less).
|
21251
|
+
*
|
21252
|
+
* If the source Func has more dimensions than are specified, the extra ones
|
21253
|
+
* are unmodified. Additionally, passing an undefined (default constructed)
|
21254
|
+
* 'Expr' for the min and extent of a dimension will keep that dimension
|
21255
|
+
* unmodified.
|
21256
|
+
*
|
21257
|
+
* Numerous options for specifing the outside area are provided,
|
21258
|
+
* including replacement with an expression, repeating the edge
|
21259
|
+
* samples, mirroring over the edge, and repeating or mirroring the
|
21260
|
+
* entire image.
|
21261
|
+
*
|
21262
|
+
* Using these functions to express your boundary conditions is highly
|
21263
|
+
* recommended for correctness and performance. Some of these are hard
|
21264
|
+
* to get right. The versions here are both understood by bounds
|
21265
|
+
* inference, and also judiciously use the 'likely' intrinsic to minimize
|
21266
|
+
* runtime overhead.
|
21267
|
+
*
|
21268
|
+
*/
|
21269
|
+
namespace BoundaryConditions {
|
21270
|
+
|
21271
|
+
namespace Internal {
|
21272
|
+
|
21273
|
+
inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
|
21274
|
+
const Expr &a1, const Expr &a2) {
|
21275
|
+
collected_args.emplace_back(a1, a2);
|
21276
|
+
}
|
21277
|
+
|
21278
|
+
template<typename... Args>
|
21279
|
+
inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
|
21280
|
+
const Expr &a1, const Expr &a2, Args &&...args) {
|
21281
|
+
collected_args.emplace_back(a1, a2);
|
21282
|
+
collect_region(collected_args, std::forward<Args>(args)...);
|
21283
|
+
}
|
21284
|
+
|
21285
|
+
inline const Func &func_like_to_func(const Func &func) {
|
21286
|
+
return func;
|
21287
|
+
}
|
21288
|
+
|
21289
|
+
template<typename T>
|
21290
|
+
inline HALIDE_NO_USER_CODE_INLINE Func func_like_to_func(const T &func_like) {
|
21291
|
+
return lambda(_, func_like(_));
|
21292
|
+
}
|
21293
|
+
|
21294
|
+
} // namespace Internal
|
21295
|
+
|
21296
|
+
/** Impose a boundary condition such that a given expression is returned
|
21297
|
+
* everywhere outside the boundary. Generally the expression will be a
|
21298
|
+
* constant, though the code currently allows accessing the arguments
|
21299
|
+
* of source.
|
21300
|
+
*
|
21301
|
+
* An ImageParam, Buffer<T>, or similar can be passed instead of a
|
21302
|
+
* Func. If this is done and no bounds are given, the boundaries will
|
21303
|
+
* be taken from the min and extent methods of the passed
|
21304
|
+
* object. Note that objects are taken by mutable ref. Pipelines
|
21305
|
+
* capture Buffers via mutable refs, because running a pipeline might
|
21306
|
+
* alter the Buffer metadata (e.g. device allocation state).
|
21307
|
+
*
|
21308
|
+
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_BORDER
|
21309
|
+
* and putting value in the border of the texture.)
|
21310
|
+
*
|
21311
|
+
* You may pass undefined Exprs for dimensions that you do not wish
|
21312
|
+
* to bound.
|
21313
|
+
*/
|
21314
|
+
// @{
|
21315
|
+
Func constant_exterior(const Func &source, const Tuple &value,
|
21316
|
+
const Region &bounds);
|
21317
|
+
Func constant_exterior(const Func &source, const Expr &value,
|
21318
|
+
const Region &bounds);
|
21319
|
+
|
21320
|
+
template<typename T>
|
21321
|
+
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value, const Region &bounds) {
|
21322
|
+
return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
|
21323
|
+
}
|
21324
|
+
|
21325
|
+
template<typename T>
|
21326
|
+
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value, const Region &bounds) {
|
21327
|
+
return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
|
21328
|
+
}
|
21329
|
+
|
21330
|
+
template<typename T>
|
21331
|
+
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value) {
|
21332
|
+
Region object_bounds;
|
21333
|
+
for (int i = 0; i < func_like.dimensions(); i++) {
|
21334
|
+
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21335
|
+
}
|
21336
|
+
|
21337
|
+
return constant_exterior(Internal::func_like_to_func(func_like), value, object_bounds);
|
21338
|
+
}
|
21339
|
+
template<typename T>
|
21340
|
+
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value) {
|
21341
|
+
return constant_exterior(func_like, Tuple(value));
|
21342
|
+
}
|
21343
|
+
|
21344
|
+
template<typename T, typename... Bounds,
|
21345
|
+
typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
|
21346
|
+
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value,
|
21347
|
+
Bounds &&...bounds) {
|
21348
|
+
Region collected_bounds;
|
21349
|
+
Internal::collect_region(collected_bounds, std::forward<Bounds>(bounds)...);
|
21350
|
+
return constant_exterior(Internal::func_like_to_func(func_like), value, collected_bounds);
|
21351
|
+
}
|
21352
|
+
template<typename T, typename... Bounds,
|
21353
|
+
typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
|
21354
|
+
HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value,
|
21355
|
+
Bounds &&...bounds) {
|
21356
|
+
return constant_exterior(func_like, Tuple(value), std::forward<Bounds>(bounds)...);
|
21357
|
+
}
|
21358
|
+
// @}
|
21359
|
+
|
21360
|
+
/** Impose a boundary condition such that the nearest edge sample is returned
|
21361
|
+
* everywhere outside the given region.
|
21362
|
+
*
|
21363
|
+
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21364
|
+
* is done and no bounds are given, the boundaries will be taken from the
|
21365
|
+
* min and extent methods of the passed object.
|
21366
|
+
*
|
21367
|
+
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_EDGE.)
|
21368
|
+
*
|
21369
|
+
* You may pass undefined Exprs for dimensions that you do not wish
|
21370
|
+
* to bound.
|
21371
|
+
*/
|
21372
|
+
// @{
|
21373
|
+
Func repeat_edge(const Func &source, const Region &bounds);
|
21374
|
+
|
21375
|
+
template<typename T>
|
21376
|
+
HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like, const Region &bounds) {
|
21377
|
+
return repeat_edge(Internal::func_like_to_func(func_like), bounds);
|
21378
|
+
}
|
21379
|
+
|
21380
|
+
template<typename T>
|
21381
|
+
HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like) {
|
21382
|
+
Region object_bounds;
|
21383
|
+
for (int i = 0; i < func_like.dimensions(); i++) {
|
21384
|
+
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21385
|
+
}
|
21386
|
+
|
21387
|
+
return repeat_edge(Internal::func_like_to_func(func_like), object_bounds);
|
21388
|
+
}
|
21389
|
+
// @}
|
21390
|
+
|
21391
|
+
/** Impose a boundary condition such that the entire coordinate space is
|
21392
|
+
* tiled with copies of the image abutted against each other.
|
21393
|
+
*
|
21394
|
+
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21395
|
+
* is done and no bounds are given, the boundaries will be taken from the
|
21396
|
+
* min and extent methods of the passed object.
|
21397
|
+
*
|
21398
|
+
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_REPEAT.)
|
21399
|
+
*
|
21400
|
+
* You may pass undefined Exprs for dimensions that you do not wish
|
21401
|
+
* to bound.
|
21402
|
+
*/
|
21403
|
+
// @{
|
21404
|
+
Func repeat_image(const Func &source, const Region &bounds);
|
21405
|
+
|
21406
|
+
template<typename T>
|
21407
|
+
HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like, const Region &bounds) {
|
21408
|
+
return repeat_image(Internal::func_like_to_func(func_like), bounds);
|
21409
|
+
}
|
21410
|
+
|
21411
|
+
template<typename T>
|
21412
|
+
HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like) {
|
21413
|
+
Region object_bounds;
|
21414
|
+
for (int i = 0; i < func_like.dimensions(); i++) {
|
21415
|
+
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21416
|
+
}
|
21417
|
+
|
21418
|
+
return repeat_image(Internal::func_like_to_func(func_like), object_bounds);
|
21419
|
+
}
|
21420
|
+
|
21421
|
+
/** Impose a boundary condition such that the entire coordinate space is
|
21422
|
+
* tiled with copies of the image abutted against each other, but mirror
|
21423
|
+
* them such that adjacent edges are the same.
|
21424
|
+
*
|
21425
|
+
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21426
|
+
* is done and no bounds are given, the boundaries will be taken from the
|
21427
|
+
* min and extent methods of the passed object.
|
21428
|
+
*
|
21429
|
+
* (This is similar to setting GL_TEXTURE_WRAP_* to GL_MIRRORED_REPEAT.)
|
21430
|
+
*
|
21431
|
+
* You may pass undefined Exprs for dimensions that you do not wish
|
21432
|
+
* to bound.
|
21433
|
+
*/
|
21434
|
+
// @{
|
21435
|
+
Func mirror_image(const Func &source, const Region &bounds);
|
21436
|
+
|
21437
|
+
template<typename T>
|
21438
|
+
HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like, const Region &bounds) {
|
21439
|
+
return mirror_image(Internal::func_like_to_func(func_like), bounds);
|
21440
|
+
}
|
21441
|
+
|
21442
|
+
template<typename T>
|
21443
|
+
HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like) {
|
21444
|
+
Region object_bounds;
|
21445
|
+
for (int i = 0; i < func_like.dimensions(); i++) {
|
21446
|
+
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21447
|
+
}
|
21448
|
+
|
21449
|
+
return mirror_image(Internal::func_like_to_func(func_like), object_bounds);
|
21450
|
+
}
|
21451
|
+
|
21452
|
+
// @}
|
21453
|
+
|
21454
|
+
/** Impose a boundary condition such that the entire coordinate space is
|
21455
|
+
* tiled with copies of the image abutted against each other, but mirror
|
21456
|
+
* them such that adjacent edges are the same and then overlap the edges.
|
21457
|
+
*
|
21458
|
+
* This produces an error if any extent is 1 or less. (TODO: check this.)
|
21459
|
+
*
|
21460
|
+
* An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
|
21461
|
+
* is done and no bounds are given, the boundaries will be taken from the
|
21462
|
+
* min and extent methods of the passed object.
|
21463
|
+
*
|
21464
|
+
* (I do not believe there is a direct GL_TEXTURE_WRAP_* equivalent for this.)
|
21465
|
+
*
|
21466
|
+
* You may pass undefined Exprs for dimensions that you do not wish
|
21467
|
+
* to bound.
|
21468
|
+
*/
|
21469
|
+
// @{
|
21470
|
+
Func mirror_interior(const Func &source, const Region &bounds);
|
21471
|
+
|
21472
|
+
template<typename T>
|
21473
|
+
HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like, const Region &bounds) {
|
21474
|
+
return mirror_interior(Internal::func_like_to_func(func_like), bounds);
|
21475
|
+
}
|
21476
|
+
|
21477
|
+
template<typename T>
|
21478
|
+
HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like) {
|
21479
|
+
Region object_bounds;
|
21480
|
+
for (int i = 0; i < func_like.dimensions(); i++) {
|
21481
|
+
object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
|
21482
|
+
}
|
21483
|
+
|
21484
|
+
return mirror_interior(Internal::func_like_to_func(func_like), object_bounds);
|
21485
|
+
}
|
21486
|
+
|
21487
|
+
// @}
|
21488
|
+
|
21489
|
+
} // namespace BoundaryConditions
|
21490
|
+
|
21491
|
+
} // namespace Halide
|
21492
|
+
|
21493
|
+
#endif
|
21494
|
+
#ifndef HALIDE_BOUNDS_INFERENCE_H
|
21495
|
+
#define HALIDE_BOUNDS_INFERENCE_H
|
21496
|
+
|
21497
|
+
/** \file
|
21498
|
+
* Defines the bounds_inference lowering pass.
|
21499
|
+
*/
|
21500
|
+
|
21501
|
+
#include <map>
|
21502
|
+
#include <string>
|
21503
|
+
#include <vector>
|
21504
|
+
|
21505
|
+
|
21506
|
+
namespace Halide {
|
21507
|
+
|
21508
|
+
struct Target;
|
21509
|
+
|
21510
|
+
namespace Internal {
|
21511
|
+
|
21512
|
+
class Function;
|
21513
|
+
|
21514
|
+
/** Take a partially lowered statement that includes symbolic
|
21515
|
+
* representations of the bounds over which things should be realized,
|
21516
|
+
* and inject expressions defining those bounds.
|
21517
|
+
*/
|
21518
|
+
Stmt bounds_inference(Stmt,
|
21519
|
+
const std::vector<Function> &outputs,
|
21520
|
+
const std::vector<std::string> &realization_order,
|
21521
|
+
const std::vector<std::vector<std::string>> &fused_groups,
|
21522
|
+
const std::map<std::string, Function> &environment,
|
21523
|
+
const std::map<std::pair<std::string, int>, Interval> &func_bounds,
|
21524
|
+
const Target &target);
|
21525
|
+
|
21526
|
+
} // namespace Internal
|
21527
|
+
} // namespace Halide
|
21528
|
+
|
21529
|
+
#endif
|
21530
|
+
#ifndef HALIDE_CPLUSPLUS_MANGLE_H
|
21531
|
+
#define HALIDE_CPLUSPLUS_MANGLE_H
|
21532
|
+
|
21533
|
+
/** \file
|
21534
|
+
*
|
21535
|
+
* A simple function to get a C++ mangled function name for a function.
|
21536
|
+
*/
|
21537
|
+
#include <string>
|
21538
|
+
#include <vector>
|
21539
|
+
|
21540
|
+
|
21541
|
+
namespace Halide {
|
21542
|
+
|
21543
|
+
struct ExternFuncArgument;
|
21544
|
+
struct Target;
|
21545
|
+
|
21546
|
+
namespace Internal {
|
21547
|
+
|
21548
|
+
/** Return the mangled C++ name for a function.
|
21549
|
+
* The target parameter is used to decide on the C++
|
21550
|
+
* ABI/mangling style to use.
|
21551
|
+
*/
|
21552
|
+
std::string cplusplus_function_mangled_name(const std::string &name,
|
21553
|
+
const std::vector<std::string> &namespaces,
|
21554
|
+
Type return_type,
|
21555
|
+
const std::vector<ExternFuncArgument> &args,
|
21556
|
+
const Target &target);
|
21557
|
+
|
21558
|
+
void cplusplus_mangle_test();
|
21559
|
+
|
21560
|
+
} // namespace Internal
|
21561
|
+
|
21562
|
+
} // namespace Halide
|
21563
|
+
|
21564
|
+
#endif
|
21565
|
+
#ifndef HALIDE_INTERNAL_CSE_H
|
21566
|
+
#define HALIDE_INTERNAL_CSE_H
|
21567
|
+
|
21568
|
+
/** \file
|
21569
|
+
* Defines a pass for introducing let expressions to wrap common sub-expressions. */
|
21570
|
+
|
21571
|
+
|
21572
|
+
namespace Halide {
|
21573
|
+
namespace Internal {
|
21574
|
+
|
21575
|
+
/** Replace each common sub-expression in the argument with a
|
21576
|
+
* variable, and wrap the resulting expr in a let statement giving a
|
21577
|
+
* value to that variable.
|
21578
|
+
*
|
21579
|
+
* This is important to do within Halide (instead of punting to llvm),
|
21580
|
+
* because exprs that come in from the front-end are small when
|
21581
|
+
* considered as a graph, but combinatorially large when considered as
|
21582
|
+
* a tree. For an example of a such a case, see
|
21583
|
+
* test/code_explosion.cpp
|
21584
|
+
*
|
21585
|
+
* The last parameter determines whether all common subexpressions are
|
21586
|
+
* lifted, or only those that the simplifier would not subsitute back
|
21587
|
+
* in (e.g. addition of a constant).
|
21588
|
+
*/
|
21589
|
+
Expr common_subexpression_elimination(const Expr &, bool lift_all = false);
|
21590
|
+
|
21591
|
+
/** Do common-subexpression-elimination on each expression in a
|
21592
|
+
* statement. Does not introduce let statements. */
|
21593
|
+
Stmt common_subexpression_elimination(const Stmt &, bool lift_all = false);
|
21594
|
+
|
21595
|
+
void cse_test();
|
21596
|
+
|
21597
|
+
} // namespace Internal
|
21598
|
+
} // namespace Halide
|
21599
|
+
|
21226
21600
|
#endif
|
21227
21601
|
#ifndef HALIDE_CANONICALIZE_GPU_VARS_H
|
21228
21602
|
#define HALIDE_CANONICALIZE_GPU_VARS_H
|
@@ -21498,6 +21872,24 @@ struct Indentation {
|
|
21498
21872
|
};
|
21499
21873
|
std::ostream &operator<<(std::ostream &stream, const Indentation &);
|
21500
21874
|
|
21875
|
+
template<typename T>
|
21876
|
+
struct Ansi {
|
21877
|
+
const T &cnt;
|
21878
|
+
const char *open, *close;
|
21879
|
+
};
|
21880
|
+
|
21881
|
+
template<typename T>
|
21882
|
+
std::ostream &operator<<(std::ostream &out, const Ansi<T> &a) {
|
21883
|
+
if (a.open) {
|
21884
|
+
out << a.open;
|
21885
|
+
}
|
21886
|
+
out << a.cnt;
|
21887
|
+
if (a.close) {
|
21888
|
+
out << a.close;
|
21889
|
+
}
|
21890
|
+
return out;
|
21891
|
+
}
|
21892
|
+
|
21501
21893
|
/** An IRVisitor that emits IR to the given output stream in a human
|
21502
21894
|
* readable form. Can be subclassed if you want to modify the way in
|
21503
21895
|
* which it prints.
|
@@ -21547,12 +21939,51 @@ protected:
|
|
21547
21939
|
* ellipses (...). */
|
21548
21940
|
bool is_summary = false;
|
21549
21941
|
|
21942
|
+
bool ansi = false;
|
21943
|
+
int paren_depth = 0;
|
21944
|
+
|
21945
|
+
const char *ansi_hl = "";
|
21946
|
+
const char *ansi_dim = "";
|
21947
|
+
const char *ansi_kw = "";
|
21948
|
+
const char *ansi_imm_int = "";
|
21949
|
+
const char *ansi_imm_float = "";
|
21950
|
+
const char *ansi_imm_str = "";
|
21951
|
+
const char *ansi_var = "";
|
21952
|
+
const char *ansi_buf = "";
|
21953
|
+
const char *ansi_fn = "";
|
21954
|
+
const char *ansi_type = "";
|
21955
|
+
const char *ansi_reset_col = "";
|
21956
|
+
const char *ansi_reset = "";
|
21957
|
+
|
21958
|
+
// clang-format off
|
21959
|
+
template<typename T> Ansi<T> hl(const T &t);
|
21960
|
+
template<typename T> Ansi<T> kw(const T &t);
|
21961
|
+
template<typename T> Ansi<T> imm_int(const T &t);
|
21962
|
+
template<typename T> Ansi<T> imm_float(const T &t);
|
21963
|
+
template<typename T> Ansi<T> imm_str(const T &t);
|
21964
|
+
template<typename T> Ansi<T> var(const T &t);
|
21965
|
+
template<typename T> Ansi<T> buf(const T &t);
|
21966
|
+
template<typename T> Ansi<T> fn(const T &t);
|
21967
|
+
template<typename T> Ansi<T> type(const T &t);
|
21968
|
+
template<typename T> Ansi<T> typep(const T &t);
|
21969
|
+
template<typename T> Ansi<T> paren(const T &t, bool bold = true, int d = -1);
|
21970
|
+
// clang-format on
|
21971
|
+
|
21550
21972
|
/** Either emits "(" or "", depending on the value of implicit_parens */
|
21551
21973
|
void open();
|
21552
21974
|
|
21553
21975
|
/** Either emits ")" or "", depending on the value of implicit_parens */
|
21554
21976
|
void close();
|
21555
21977
|
|
21978
|
+
/** Emits "(" always */
|
21979
|
+
void openf();
|
21980
|
+
|
21981
|
+
/** Emits "name(" always */
|
21982
|
+
void openf(const char *name);
|
21983
|
+
|
21984
|
+
/** Emits ")" always */
|
21985
|
+
void closef();
|
21986
|
+
|
21556
21987
|
/** The symbols whose types can be inferred from values printed
|
21557
21988
|
* already. */
|
21558
21989
|
Scope<> known_type;
|
@@ -21625,6 +22056,8 @@ std::string lldb_string(const Stmt &);
|
|
21625
22056
|
|
21626
22057
|
#endif
|
21627
22058
|
|
22059
|
+
#include <unordered_map>
|
22060
|
+
|
21628
22061
|
namespace Halide {
|
21629
22062
|
|
21630
22063
|
struct Argument;
|
@@ -21749,6 +22182,8 @@ protected:
|
|
21749
22182
|
* use different syntax for other C-like languages. */
|
21750
22183
|
virtual void add_vector_typedefs(const std::set<Type> &vector_types);
|
21751
22184
|
|
22185
|
+
std::unordered_map<std::string, std::string> extern_function_name_map;
|
22186
|
+
|
21752
22187
|
/** Bottleneck to allow customization of calls to generic Extern/PureExtern calls. */
|
21753
22188
|
virtual std::string print_extern_call(const Call *op);
|
21754
22189
|
|
@@ -22153,7 +22588,10 @@ protected:
|
|
22153
22588
|
void visit(const Shuffle *op) override;
|
22154
22589
|
void visit(const Call *op) override;
|
22155
22590
|
|
22591
|
+
std::string print_extern_call(const Call *op) override;
|
22592
|
+
|
22156
22593
|
VectorDeclarationStyle vector_declaration_style = VectorDeclarationStyle::CLikeSyntax;
|
22594
|
+
bool abs_returns_unsigned_type{false};
|
22157
22595
|
};
|
22158
22596
|
|
22159
22597
|
} // namespace Internal
|
@@ -22292,6 +22730,7 @@ template<typename, typename>
|
|
22292
22730
|
class IRBuilder;
|
22293
22731
|
class LLVMContext;
|
22294
22732
|
class Type;
|
22733
|
+
class PointerType;
|
22295
22734
|
class StructType;
|
22296
22735
|
class Instruction;
|
22297
22736
|
class CallInst;
|
@@ -22437,10 +22876,31 @@ protected:
|
|
22437
22876
|
std::unique_ptr<llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>> builder;
|
22438
22877
|
llvm::Value *value = nullptr;
|
22439
22878
|
llvm::MDNode *very_likely_branch = nullptr;
|
22440
|
-
llvm::MDNode *
|
22879
|
+
llvm::MDNode *fast_fp_math_md = nullptr;
|
22441
22880
|
llvm::MDNode *strict_fp_math_md = nullptr;
|
22442
22881
|
std::vector<LoweredArgument> current_function_args;
|
22443
22882
|
|
22883
|
+
bool in_strict_float = false;
|
22884
|
+
bool any_strict_float = false;
|
22885
|
+
|
22886
|
+
/** Change floating-point math op emission to use fast flags. */
|
22887
|
+
void set_fast_fp_math();
|
22888
|
+
|
22889
|
+
/** Change floating-point math op emission to use strict flags. */
|
22890
|
+
void set_strict_fp_math();
|
22891
|
+
|
22892
|
+
/** If any_strict_float is true, sets fast math flags for the lifetime of
|
22893
|
+
* this object, then sets them to strict on destruction. If any_strict_float
|
22894
|
+
* is false, does nothing. Any call to an IRBuilder method that starts with
|
22895
|
+
* "CreateF" should probably be wrapped in one of these, but it's safe to
|
22896
|
+
* miss one - we just miss out on some optimizations. In this way codegen is
|
22897
|
+
* designed to fail safe. */
|
22898
|
+
struct ScopedFastMath {
|
22899
|
+
CodeGen_LLVM *codegen;
|
22900
|
+
ScopedFastMath(CodeGen_LLVM *);
|
22901
|
+
~ScopedFastMath();
|
22902
|
+
};
|
22903
|
+
|
22444
22904
|
/** The target we're generating code for */
|
22445
22905
|
Halide::Target target;
|
22446
22906
|
|
@@ -22478,6 +22938,7 @@ protected:
|
|
22478
22938
|
/** Some useful llvm types */
|
22479
22939
|
// @{
|
22480
22940
|
llvm::Type *void_t = nullptr, *i1_t = nullptr, *i8_t = nullptr, *i16_t = nullptr, *i32_t = nullptr, *i64_t = nullptr, *f16_t = nullptr, *f32_t = nullptr, *f64_t = nullptr;
|
22941
|
+
llvm::PointerType *ptr_t = nullptr;
|
22481
22942
|
llvm::StructType *halide_buffer_t_type = nullptr,
|
22482
22943
|
*type_t_type,
|
22483
22944
|
*dimension_t_type,
|
@@ -22985,7 +23446,7 @@ private:
|
|
22985
23446
|
|
22986
23447
|
void codegen_atomic_rmw(const Store *op);
|
22987
23448
|
|
22988
|
-
void init_codegen(const std::string &name
|
23449
|
+
void init_codegen(const std::string &name);
|
22989
23450
|
std::unique_ptr<llvm::Module> finish_codegen();
|
22990
23451
|
|
22991
23452
|
/** A helper routine for generating folded vector reductions. */
|
@@ -23054,6 +23515,29 @@ std::unique_ptr<CodeGen_GPU_Dev> new_CodeGen_OpenCL_Dev(const Target &target);
|
|
23054
23515
|
} // namespace Internal
|
23055
23516
|
} // namespace Halide
|
23056
23517
|
|
23518
|
+
#endif
|
23519
|
+
#ifndef HALIDE_CODEGEN_PTX_DEV_H
|
23520
|
+
#define HALIDE_CODEGEN_PTX_DEV_H
|
23521
|
+
|
23522
|
+
/** \file
|
23523
|
+
* Defines the code-generator for producing CUDA host code
|
23524
|
+
*/
|
23525
|
+
|
23526
|
+
#include <memory>
|
23527
|
+
|
23528
|
+
namespace Halide {
|
23529
|
+
|
23530
|
+
struct Target;
|
23531
|
+
|
23532
|
+
namespace Internal {
|
23533
|
+
|
23534
|
+
struct CodeGen_GPU_Dev;
|
23535
|
+
|
23536
|
+
std::unique_ptr<CodeGen_GPU_Dev> new_CodeGen_PTX_Dev(const Target &target);
|
23537
|
+
|
23538
|
+
} // namespace Internal
|
23539
|
+
} // namespace Halide
|
23540
|
+
|
23057
23541
|
#endif
|
23058
23542
|
#ifndef HALIDE_CODEGEN_POSIX_H
|
23059
23543
|
#define HALIDE_CODEGEN_POSIX_H
|
@@ -23163,29 +23647,6 @@ private:
|
|
23163
23647
|
} // namespace Internal
|
23164
23648
|
} // namespace Halide
|
23165
23649
|
|
23166
|
-
#endif
|
23167
|
-
#ifndef HALIDE_CODEGEN_PTX_DEV_H
|
23168
|
-
#define HALIDE_CODEGEN_PTX_DEV_H
|
23169
|
-
|
23170
|
-
/** \file
|
23171
|
-
* Defines the code-generator for producing CUDA host code
|
23172
|
-
*/
|
23173
|
-
|
23174
|
-
#include <memory>
|
23175
|
-
|
23176
|
-
namespace Halide {
|
23177
|
-
|
23178
|
-
struct Target;
|
23179
|
-
|
23180
|
-
namespace Internal {
|
23181
|
-
|
23182
|
-
struct CodeGen_GPU_Dev;
|
23183
|
-
|
23184
|
-
std::unique_ptr<CodeGen_GPU_Dev> new_CodeGen_PTX_Dev(const Target &target);
|
23185
|
-
|
23186
|
-
} // namespace Internal
|
23187
|
-
} // namespace Halide
|
23188
|
-
|
23189
23650
|
#endif
|
23190
23651
|
#ifndef HALIDE_CODEGEN_PYTORCH_H
|
23191
23652
|
#define HALIDE_CODEGEN_PYTORCH_H
|
@@ -23549,221 +24010,10 @@ inline Expr u64_sat(Expr e) {
|
|
23549
24010
|
}; // namespace ConciseCasts
|
23550
24011
|
}; // namespace Halide
|
23551
24012
|
|
23552
|
-
#endif
|
23553
|
-
#ifndef HALIDE_CPLUSPLUS_MANGLE_H
|
23554
|
-
#define HALIDE_CPLUSPLUS_MANGLE_H
|
23555
|
-
|
23556
|
-
/** \file
|
23557
|
-
*
|
23558
|
-
* A simple function to get a C++ mangled function name for a function.
|
23559
|
-
*/
|
23560
|
-
#include <string>
|
23561
|
-
#include <vector>
|
23562
|
-
|
23563
|
-
|
23564
|
-
namespace Halide {
|
23565
|
-
|
23566
|
-
struct ExternFuncArgument;
|
23567
|
-
struct Target;
|
23568
|
-
|
23569
|
-
namespace Internal {
|
23570
|
-
|
23571
|
-
/** Return the mangled C++ name for a function.
|
23572
|
-
* The target parameter is used to decide on the C++
|
23573
|
-
* ABI/mangling style to use.
|
23574
|
-
*/
|
23575
|
-
std::string cplusplus_function_mangled_name(const std::string &name,
|
23576
|
-
const std::vector<std::string> &namespaces,
|
23577
|
-
Type return_type,
|
23578
|
-
const std::vector<ExternFuncArgument> &args,
|
23579
|
-
const Target &target);
|
23580
|
-
|
23581
|
-
void cplusplus_mangle_test();
|
23582
|
-
|
23583
|
-
} // namespace Internal
|
23584
|
-
|
23585
|
-
} // namespace Halide
|
23586
|
-
|
23587
24013
|
#endif
|
23588
24014
|
#ifndef HALIDE_CONSTANT_BOUNDS_H
|
23589
24015
|
#define HALIDE_CONSTANT_BOUNDS_H
|
23590
24016
|
|
23591
|
-
#ifndef HALIDE_CONSTANT_INTERVAL_H
|
23592
|
-
#define HALIDE_CONSTANT_INTERVAL_H
|
23593
|
-
|
23594
|
-
#include <stdint.h>
|
23595
|
-
|
23596
|
-
/** \file
|
23597
|
-
* Defines the ConstantInterval class, and operators on it.
|
23598
|
-
*/
|
23599
|
-
|
23600
|
-
namespace Halide {
|
23601
|
-
|
23602
|
-
struct Type;
|
23603
|
-
|
23604
|
-
namespace Internal {
|
23605
|
-
|
23606
|
-
/** A class to represent ranges of integers. Can be unbounded above or below,
|
23607
|
-
* but they cannot be empty. */
|
23608
|
-
struct ConstantInterval {
|
23609
|
-
/** The lower and upper bound of the interval. They are included
|
23610
|
-
* in the interval. */
|
23611
|
-
int64_t min = 0, max = 0;
|
23612
|
-
bool min_defined = false, max_defined = false;
|
23613
|
-
|
23614
|
-
/* A default-constructed Interval is everything */
|
23615
|
-
ConstantInterval() = default;
|
23616
|
-
|
23617
|
-
/** Construct an interval from a lower and upper bound. */
|
23618
|
-
ConstantInterval(int64_t min, int64_t max);
|
23619
|
-
|
23620
|
-
/** The interval representing everything. */
|
23621
|
-
static ConstantInterval everything();
|
23622
|
-
|
23623
|
-
/** Construct an interval representing a single point. */
|
23624
|
-
static ConstantInterval single_point(int64_t x);
|
23625
|
-
|
23626
|
-
/** Construct intervals bounded above or below. */
|
23627
|
-
static ConstantInterval bounded_below(int64_t min);
|
23628
|
-
static ConstantInterval bounded_above(int64_t max);
|
23629
|
-
|
23630
|
-
/** Is the interval the entire range */
|
23631
|
-
bool is_everything() const;
|
23632
|
-
|
23633
|
-
/** Is the interval just a single value (min == max) */
|
23634
|
-
bool is_single_point() const;
|
23635
|
-
|
23636
|
-
/** Is the interval a particular single value */
|
23637
|
-
bool is_single_point(int64_t x) const;
|
23638
|
-
|
23639
|
-
/** Does the interval have a finite upper and lower bound */
|
23640
|
-
bool is_bounded() const;
|
23641
|
-
|
23642
|
-
/** Expand the interval to include another Interval */
|
23643
|
-
void include(const ConstantInterval &i);
|
23644
|
-
|
23645
|
-
/** Expand the interval to include a point */
|
23646
|
-
void include(int64_t x);
|
23647
|
-
|
23648
|
-
/** Test if the interval contains a particular value */
|
23649
|
-
bool contains(int32_t x) const;
|
23650
|
-
|
23651
|
-
/** Test if the interval contains a particular value */
|
23652
|
-
bool contains(int64_t x) const;
|
23653
|
-
|
23654
|
-
/** Test if the interval contains a particular unsigned value */
|
23655
|
-
bool contains(uint64_t x) const;
|
23656
|
-
|
23657
|
-
/** Construct the smallest interval containing two intervals. */
|
23658
|
-
static ConstantInterval make_union(const ConstantInterval &a, const ConstantInterval &b);
|
23659
|
-
|
23660
|
-
/** Construct the largest interval contained within two intervals. Throws an
|
23661
|
-
* error if the interval is empty. */
|
23662
|
-
static ConstantInterval make_intersection(const ConstantInterval &a, const ConstantInterval &b);
|
23663
|
-
|
23664
|
-
/** Equivalent to same_as. Exists so that the autoscheduler can
|
23665
|
-
* compare two map<string, Interval> for equality in order to
|
23666
|
-
* cache computations. */
|
23667
|
-
bool operator==(const ConstantInterval &other) const;
|
23668
|
-
|
23669
|
-
/** In-place versions of the arithmetic operators below. */
|
23670
|
-
// @{
|
23671
|
-
void operator+=(const ConstantInterval &other);
|
23672
|
-
void operator+=(int64_t);
|
23673
|
-
void operator-=(const ConstantInterval &other);
|
23674
|
-
void operator-=(int64_t);
|
23675
|
-
void operator*=(const ConstantInterval &other);
|
23676
|
-
void operator*=(int64_t);
|
23677
|
-
void operator/=(const ConstantInterval &other);
|
23678
|
-
void operator/=(int64_t);
|
23679
|
-
void operator%=(const ConstantInterval &other);
|
23680
|
-
void operator%=(int64_t);
|
23681
|
-
// @}
|
23682
|
-
|
23683
|
-
/** Negate an interval. */
|
23684
|
-
ConstantInterval operator-() const;
|
23685
|
-
|
23686
|
-
/** Track what happens if a constant integer interval is forced to fit into
|
23687
|
-
* a concrete integer type. */
|
23688
|
-
void cast_to(const Type &t);
|
23689
|
-
|
23690
|
-
/** Get constant integer bounds on a type. */
|
23691
|
-
static ConstantInterval bounds_of_type(Type);
|
23692
|
-
};
|
23693
|
-
|
23694
|
-
/** Arithmetic operators on ConstantIntervals. The resulting interval contains
|
23695
|
-
* all possible values of the operator applied to any two elements of the
|
23696
|
-
* argument intervals. Note that these operator on unbounded integers. If you
|
23697
|
-
* are applying this to concrete small integer types, you will need to manually
|
23698
|
-
* cast the constant interval back to the desired type to model the effect of
|
23699
|
-
* overflow. */
|
23700
|
-
// @{
|
23701
|
-
ConstantInterval operator+(const ConstantInterval &a, const ConstantInterval &b);
|
23702
|
-
ConstantInterval operator+(const ConstantInterval &a, int64_t b);
|
23703
|
-
ConstantInterval operator-(const ConstantInterval &a, const ConstantInterval &b);
|
23704
|
-
ConstantInterval operator-(const ConstantInterval &a, int64_t b);
|
23705
|
-
ConstantInterval operator/(const ConstantInterval &a, const ConstantInterval &b);
|
23706
|
-
ConstantInterval operator/(const ConstantInterval &a, int64_t b);
|
23707
|
-
ConstantInterval operator*(const ConstantInterval &a, const ConstantInterval &b);
|
23708
|
-
ConstantInterval operator*(const ConstantInterval &a, int64_t b);
|
23709
|
-
ConstantInterval operator%(const ConstantInterval &a, const ConstantInterval &b);
|
23710
|
-
ConstantInterval operator%(const ConstantInterval &a, int64_t b);
|
23711
|
-
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b);
|
23712
|
-
ConstantInterval min(const ConstantInterval &a, int64_t b);
|
23713
|
-
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b);
|
23714
|
-
ConstantInterval max(const ConstantInterval &a, int64_t b);
|
23715
|
-
ConstantInterval abs(const ConstantInterval &a);
|
23716
|
-
ConstantInterval operator<<(const ConstantInterval &a, const ConstantInterval &b);
|
23717
|
-
ConstantInterval operator<<(const ConstantInterval &a, int64_t b);
|
23718
|
-
ConstantInterval operator<<(int64_t a, const ConstantInterval &b);
|
23719
|
-
ConstantInterval operator>>(const ConstantInterval &a, const ConstantInterval &b);
|
23720
|
-
ConstantInterval operator>>(const ConstantInterval &a, int64_t b);
|
23721
|
-
ConstantInterval operator>>(int64_t a, const ConstantInterval &b);
|
23722
|
-
// @}
|
23723
|
-
|
23724
|
-
/** Comparison operators on ConstantIntervals. Returns whether the comparison is
|
23725
|
-
* true for all values of the two intervals. */
|
23726
|
-
// @{
|
23727
|
-
bool operator<=(const ConstantInterval &a, const ConstantInterval &b);
|
23728
|
-
bool operator<=(const ConstantInterval &a, int64_t b);
|
23729
|
-
bool operator<=(int64_t a, const ConstantInterval &b);
|
23730
|
-
bool operator<(const ConstantInterval &a, const ConstantInterval &b);
|
23731
|
-
bool operator<(const ConstantInterval &a, int64_t b);
|
23732
|
-
bool operator<(int64_t a, const ConstantInterval &b);
|
23733
|
-
|
23734
|
-
inline bool operator>=(const ConstantInterval &a, const ConstantInterval &b) {
|
23735
|
-
return b <= a;
|
23736
|
-
}
|
23737
|
-
inline bool operator>(const ConstantInterval &a, const ConstantInterval &b) {
|
23738
|
-
return b < a;
|
23739
|
-
}
|
23740
|
-
inline bool operator>=(const ConstantInterval &a, int64_t b) {
|
23741
|
-
return b <= a;
|
23742
|
-
}
|
23743
|
-
inline bool operator>(const ConstantInterval &a, int64_t b) {
|
23744
|
-
return b < a;
|
23745
|
-
}
|
23746
|
-
inline bool operator>=(int64_t a, const ConstantInterval &b) {
|
23747
|
-
return b <= a;
|
23748
|
-
}
|
23749
|
-
inline bool operator>(int64_t a, const ConstantInterval &b) {
|
23750
|
-
return b < a;
|
23751
|
-
}
|
23752
|
-
|
23753
|
-
// @}
|
23754
|
-
} // namespace Internal
|
23755
|
-
|
23756
|
-
/** Cast operators for ConstantIntervals. These ones have to live out in
|
23757
|
-
* Halide::, to avoid C++ name lookup confusion with the Halide::cast variants
|
23758
|
-
* that take Exprs. */
|
23759
|
-
// @{
|
23760
|
-
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a);
|
23761
|
-
Internal::ConstantInterval saturating_cast(Type t, const Internal::ConstantInterval &a);
|
23762
|
-
// @}
|
23763
|
-
|
23764
|
-
} // namespace Halide
|
23765
|
-
|
23766
|
-
#endif
|
23767
24017
|
|
23768
24018
|
/** \file
|
23769
24019
|
* Methods for computing compile-time constant int64_t upper and lower bounds of
|
@@ -23792,42 +24042,6 @@ ConstantInterval constant_integer_bounds(const Expr &e,
|
|
23792
24042
|
} // namespace Internal
|
23793
24043
|
} // namespace Halide
|
23794
24044
|
|
23795
|
-
#endif
|
23796
|
-
#ifndef HALIDE_INTERNAL_CSE_H
|
23797
|
-
#define HALIDE_INTERNAL_CSE_H
|
23798
|
-
|
23799
|
-
/** \file
|
23800
|
-
* Defines a pass for introducing let expressions to wrap common sub-expressions. */
|
23801
|
-
|
23802
|
-
|
23803
|
-
namespace Halide {
|
23804
|
-
namespace Internal {
|
23805
|
-
|
23806
|
-
/** Replace each common sub-expression in the argument with a
|
23807
|
-
* variable, and wrap the resulting expr in a let statement giving a
|
23808
|
-
* value to that variable.
|
23809
|
-
*
|
23810
|
-
* This is important to do within Halide (instead of punting to llvm),
|
23811
|
-
* because exprs that come in from the front-end are small when
|
23812
|
-
* considered as a graph, but combinatorially large when considered as
|
23813
|
-
* a tree. For an example of a such a case, see
|
23814
|
-
* test/code_explosion.cpp
|
23815
|
-
*
|
23816
|
-
* The last parameter determines whether all common subexpressions are
|
23817
|
-
* lifted, or only those that the simplifier would not subsitute back
|
23818
|
-
* in (e.g. addition of a constant).
|
23819
|
-
*/
|
23820
|
-
Expr common_subexpression_elimination(const Expr &, bool lift_all = false);
|
23821
|
-
|
23822
|
-
/** Do common-subexpression-elimination on each expression in a
|
23823
|
-
* statement. Does not introduce let statements. */
|
23824
|
-
Stmt common_subexpression_elimination(const Stmt &, bool lift_all = false);
|
23825
|
-
|
23826
|
-
void cse_test();
|
23827
|
-
|
23828
|
-
} // namespace Internal
|
23829
|
-
} // namespace Halide
|
23830
|
-
|
23831
24045
|
#endif
|
23832
24046
|
#ifndef HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
|
23833
24047
|
#define HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
|
@@ -24124,27 +24338,27 @@ Pipeline deserialize_pipeline(const std::string &filename, const std::map<std::s
|
|
24124
24338
|
/// @return Returns a newly constructed deserialized Pipeline object/
|
24125
24339
|
Pipeline deserialize_pipeline(std::istream &in, const std::map<std::string, Parameter> &user_params);
|
24126
24340
|
|
24127
|
-
/// @brief Deserialize a Halide pipeline from a byte buffer containing a
|
24341
|
+
/// @brief Deserialize a Halide pipeline from a byte buffer containing a serialized pipeline in binary format
|
24128
24342
|
/// @param data The data buffer containing a serialized Halide pipeline
|
24129
24343
|
/// @param user_params Map of named input/output parameters to bind with the resulting pipeline (used to avoid deserializing specific objects and enable the use of externally defined ones instead).
|
24130
24344
|
/// @return Returns a newly constructed deserialized Pipeline object/
|
24131
24345
|
Pipeline deserialize_pipeline(const std::vector<uint8_t> &data, const std::map<std::string, Parameter> &user_params);
|
24132
24346
|
|
24133
|
-
/// @brief Deserialize the
|
24347
|
+
/// @brief Deserialize the external parameters for the Halide pipeline from a file.
|
24134
24348
|
/// This method allows a minimal deserialization of just the external pipeline parameters, so they can be
|
24135
24349
|
/// remapped and overridden with user parameters prior to deserializing the pipeline definition.
|
24136
24350
|
/// @param filename The location of the file to deserialize. Must use .hlpipe extension.
|
24137
24351
|
/// @return Returns a map containing the names and description of external parameters referenced in the pipeline
|
24138
24352
|
std::map<std::string, Parameter> deserialize_parameters(const std::string &filename);
|
24139
24353
|
|
24140
|
-
/// @brief Deserialize the
|
24354
|
+
/// @brief Deserialize the external parameters for the Halide pipeline from input stream.
|
24141
24355
|
/// This method allows a minimal deserialization of just the external pipeline parameters, so they can be
|
24142
24356
|
/// remapped and overridden with user parameters prior to deserializing the pipeline definition.
|
24143
24357
|
/// @param in The input stream to read from containing a serialized Halide pipeline
|
24144
24358
|
/// @return Returns a map containing the names and description of external parameters referenced in the pipeline
|
24145
24359
|
std::map<std::string, Parameter> deserialize_parameters(std::istream &in);
|
24146
24360
|
|
24147
|
-
/// @brief Deserialize the
|
24361
|
+
/// @brief Deserialize the external parameters for the Halide pipeline from a byte buffer containing a serialized
|
24148
24362
|
/// pipeline in binary format. This method allows a minimal deserialization of just the external pipeline
|
24149
24363
|
/// parameters, so they can be remapped and overridden with user parameters prior to deserializing the
|
24150
24364
|
/// pipeline definition.
|
@@ -24301,6 +24515,7 @@ Stmt inject_early_frees(const Stmt &s);
|
|
24301
24515
|
#define HALIDE_ELF_H
|
24302
24516
|
|
24303
24517
|
#include <algorithm>
|
24518
|
+
#include <cstdint>
|
24304
24519
|
#include <iterator>
|
24305
24520
|
#include <list>
|
24306
24521
|
#include <memory>
|
@@ -24524,17 +24739,17 @@ public:
|
|
24524
24739
|
SHT_REL = 9,
|
24525
24740
|
SHT_SHLIB = 10,
|
24526
24741
|
SHT_DYNSYM = 11,
|
24527
|
-
SHT_LOPROC =
|
24528
|
-
SHT_HIPROC =
|
24529
|
-
SHT_LOUSER =
|
24530
|
-
SHT_HIUSER =
|
24742
|
+
SHT_LOPROC = 0x70000000u,
|
24743
|
+
SHT_HIPROC = 0x7fffffffu,
|
24744
|
+
SHT_LOUSER = 0x80000000u,
|
24745
|
+
SHT_HIUSER = 0xffffffffu,
|
24531
24746
|
};
|
24532
24747
|
|
24533
24748
|
enum Flag : uint32_t {
|
24534
24749
|
SHF_WRITE = 0x1,
|
24535
24750
|
SHF_ALLOC = 0x2,
|
24536
24751
|
SHF_EXECINSTR = 0x4,
|
24537
|
-
SHF_MASKPROC =
|
24752
|
+
SHF_MASKPROC = 0xf0000000u,
|
24538
24753
|
};
|
24539
24754
|
|
24540
24755
|
typedef std::vector<Relocation> RelocationList;
|
@@ -24762,8 +24977,8 @@ public:
|
|
24762
24977
|
ET_EXEC = 2,
|
24763
24978
|
ET_DYN = 3,
|
24764
24979
|
ET_CORE = 4,
|
24765
|
-
ET_LOPROC =
|
24766
|
-
ET_HIPROC =
|
24980
|
+
ET_LOPROC = 0xff00u,
|
24981
|
+
ET_HIPROC = 0xffffu,
|
24767
24982
|
};
|
24768
24983
|
|
24769
24984
|
// We use lists for sections and symbols to avoid iterator
|
@@ -28128,6 +28343,11 @@ public:
|
|
28128
28343
|
#undef HALIDE_OUTPUT_FORWARD
|
28129
28344
|
#undef HALIDE_OUTPUT_FORWARD_CONST
|
28130
28345
|
|
28346
|
+
using GIOBase::set_type;
|
28347
|
+
|
28348
|
+
/** Set types dynamically for tuple outputs. */
|
28349
|
+
void set_type(const std::vector<Type> &types);
|
28350
|
+
|
28131
28351
|
protected:
|
28132
28352
|
GeneratorOutputBase(size_t array_size,
|
28133
28353
|
const std::string &name,
|
@@ -28999,11 +29219,21 @@ public:
|
|
28999
29219
|
// long as all Outputs have been defined.)
|
29000
29220
|
Pipeline get_pipeline();
|
29001
29221
|
|
29222
|
+
protected:
|
29223
|
+
void claim_name(const std::string &name, const char *param_type) {
|
29224
|
+
user_assert(param_info_ptr->names.count(name) == 0)
|
29225
|
+
<< "Cannot add " << param_type << " with name " << name
|
29226
|
+
<< ". It is already taken by another input or output parameter.";
|
29227
|
+
param_info_ptr->names.insert(name);
|
29228
|
+
}
|
29229
|
+
|
29230
|
+
public:
|
29002
29231
|
// Create Input<Func> with dynamic type & dimensions
|
29003
29232
|
template<typename T,
|
29004
29233
|
typename std::enable_if<std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29005
29234
|
GeneratorInput<T> *add_input(const std::string &name, const Type &t, int dimensions) {
|
29006
29235
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29236
|
+
claim_name(name, "input");
|
29007
29237
|
auto *p = new GeneratorInput<T>(name, t, dimensions);
|
29008
29238
|
p->generator = this;
|
29009
29239
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29018,6 +29248,7 @@ public:
|
|
29018
29248
|
static_assert(!T::has_static_halide_type, "You can only call this version of add_input() for a Buffer<T, D> where T is void or omitted .");
|
29019
29249
|
static_assert(!T::has_static_dimensions, "You can only call this version of add_input() for a Buffer<T, D> where D is -1 or omitted.");
|
29020
29250
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29251
|
+
claim_name(name, "input");
|
29021
29252
|
auto *p = new GeneratorInput<T>(name, t, dimensions);
|
29022
29253
|
p->generator = this;
|
29023
29254
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29032,6 +29263,7 @@ public:
|
|
29032
29263
|
static_assert(T::has_static_halide_type, "You can only call this version of add_input() for a Buffer<T, D> where T is not void.");
|
29033
29264
|
static_assert(!T::has_static_dimensions, "You can only call this version of add_input() for a Buffer<T, D> where D is -1 or omitted.");
|
29034
29265
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29266
|
+
claim_name(name, "input");
|
29035
29267
|
auto *p = new GeneratorInput<T>(name, dimensions);
|
29036
29268
|
p->generator = this;
|
29037
29269
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29046,6 +29278,7 @@ public:
|
|
29046
29278
|
static_assert(T::has_static_halide_type, "You can only call this version of add_input() for a Buffer<T, D> where T is not void.");
|
29047
29279
|
static_assert(T::has_static_dimensions, "You can only call this version of add_input() for a Buffer<T, D> where D is not -1.");
|
29048
29280
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29281
|
+
claim_name(name, "input");
|
29049
29282
|
auto *p = new GeneratorInput<T>(name);
|
29050
29283
|
p->generator = this;
|
29051
29284
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29057,6 +29290,7 @@ public:
|
|
29057
29290
|
typename std::enable_if<std::is_arithmetic<T>::value>::type * = nullptr>
|
29058
29291
|
GeneratorInput<T> *add_input(const std::string &name) {
|
29059
29292
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29293
|
+
claim_name(name, "input");
|
29060
29294
|
auto *p = new GeneratorInput<T>(name);
|
29061
29295
|
p->generator = this;
|
29062
29296
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29068,6 +29302,7 @@ public:
|
|
29068
29302
|
typename std::enable_if<std::is_same<T, Expr>::value>::type * = nullptr>
|
29069
29303
|
GeneratorInput<T> *add_input(const std::string &name, const Type &type) {
|
29070
29304
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29305
|
+
claim_name(name, "input");
|
29071
29306
|
auto *p = new GeneratorInput<Expr>(name);
|
29072
29307
|
p->generator = this;
|
29073
29308
|
p->set_type(type);
|
@@ -29079,8 +29314,9 @@ public:
|
|
29079
29314
|
// Create Output<Func> with dynamic type & dimensions
|
29080
29315
|
template<typename T,
|
29081
29316
|
typename std::enable_if<std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29082
|
-
GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
|
29317
|
+
GeneratorOutput<T> *add_output(const std::string &name, const std::vector<Type> &t, int dimensions) {
|
29083
29318
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29319
|
+
claim_name(name, "output");
|
29084
29320
|
auto *p = new GeneratorOutput<T>(name, t, dimensions);
|
29085
29321
|
p->generator = this;
|
29086
29322
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29088,13 +29324,20 @@ public:
|
|
29088
29324
|
return p;
|
29089
29325
|
}
|
29090
29326
|
|
29327
|
+
template<typename T,
|
29328
|
+
typename std::enable_if<std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29329
|
+
GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
|
29330
|
+
return add_output<T>(name, std::vector<Type>{t}, dimensions);
|
29331
|
+
}
|
29332
|
+
|
29091
29333
|
// Create Output<Buffer> with dynamic type & dimensions
|
29092
29334
|
template<typename T,
|
29093
29335
|
typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29094
|
-
GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
|
29336
|
+
GeneratorOutput<T> *add_output(const std::string &name, const std::vector<Type> &t, int dimensions) {
|
29095
29337
|
static_assert(!T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is void or omitted .");
|
29096
29338
|
static_assert(!T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<T, D> where D is -1 or omitted.");
|
29097
29339
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29340
|
+
claim_name(name, "output");
|
29098
29341
|
auto *p = new GeneratorOutput<T>(name, t, dimensions);
|
29099
29342
|
p->generator = this;
|
29100
29343
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29102,13 +29345,20 @@ public:
|
|
29102
29345
|
return p;
|
29103
29346
|
}
|
29104
29347
|
|
29105
|
-
|
29348
|
+
template<typename T,
|
29349
|
+
typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29350
|
+
GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
|
29351
|
+
return add_output<T>(name, std::vector<Type>{t}, dimensions);
|
29352
|
+
}
|
29353
|
+
|
29354
|
+
// Create Output<Buffer> with either a compile-time type or a
|
29355
|
+
// to-be-set-later type and dynamic dimensions
|
29106
29356
|
template<typename T,
|
29107
29357
|
typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29108
29358
|
GeneratorOutput<T> *add_output(const std::string &name, int dimensions) {
|
29109
|
-
static_assert(T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is not void.");
|
29110
29359
|
static_assert(!T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<T, D> where D is -1 or omitted.");
|
29111
29360
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29361
|
+
claim_name(name, "output");
|
29112
29362
|
auto *p = new GeneratorOutput<T>(name, dimensions);
|
29113
29363
|
p->generator = this;
|
29114
29364
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29116,13 +29366,35 @@ public:
|
|
29116
29366
|
return p;
|
29117
29367
|
}
|
29118
29368
|
|
29119
|
-
// Create Output<Buffer> with compile-time
|
29369
|
+
// Create Output<Buffer> with compile-time dimensions and dynamic type
|
29370
|
+
template<typename T,
|
29371
|
+
typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29372
|
+
GeneratorOutput<T> *add_output(const std::string &name, const std::vector<Type> &t) {
|
29373
|
+
static_assert(!T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is void or omitted.");
|
29374
|
+
static_assert(T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<void, D> where D is not -1.");
|
29375
|
+
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29376
|
+
claim_name(name, "output");
|
29377
|
+
auto *p = new GeneratorOutput<T>(name, t);
|
29378
|
+
p->generator = this;
|
29379
|
+
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
29380
|
+
param_info_ptr->filter_outputs.push_back(p);
|
29381
|
+
return p;
|
29382
|
+
}
|
29383
|
+
|
29384
|
+
template<typename T,
|
29385
|
+
typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29386
|
+
GeneratorOutput<T> *add_output(const std::string &name, const Type &t) {
|
29387
|
+
return add_output<T>(name, std::vector<Type>{t});
|
29388
|
+
}
|
29389
|
+
|
29390
|
+
// Create Output<Buffer> with compile-time type and dimensions
|
29120
29391
|
template<typename T,
|
29121
29392
|
typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
|
29122
29393
|
GeneratorOutput<T> *add_output(const std::string &name) {
|
29123
29394
|
static_assert(T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is not void.");
|
29124
29395
|
static_assert(T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<T, D> where D is not -1.");
|
29125
29396
|
check_exact_phase(GeneratorBase::ConfigureCalled);
|
29397
|
+
claim_name(name, "output");
|
29126
29398
|
auto *p = new GeneratorOutput<T>(name);
|
29127
29399
|
p->generator = this;
|
29128
29400
|
param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
|
@@ -29954,240 +30226,6 @@ std::string type_suffix(const std::vector<Expr> &ops, bool signed_variants = tru
|
|
29954
30226
|
} // namespace Internal
|
29955
30227
|
} // namespace Halide
|
29956
30228
|
|
29957
|
-
#endif
|
29958
|
-
#ifndef HALIDE_INFER_ARGUMENTS_H
|
29959
|
-
#define HALIDE_INFER_ARGUMENTS_H
|
29960
|
-
|
29961
|
-
#include <vector>
|
29962
|
-
|
29963
|
-
|
29964
|
-
/** \file
|
29965
|
-
*
|
29966
|
-
* Interface for a visitor to infer arguments used in a body Stmt.
|
29967
|
-
*/
|
29968
|
-
|
29969
|
-
namespace Halide {
|
29970
|
-
namespace Internal {
|
29971
|
-
|
29972
|
-
/** An inferred argument. Inferred args are either Params,
|
29973
|
-
* ImageParams, or Buffers. The first two are handled by the param
|
29974
|
-
* field, and global images are tracked via the buf field. These
|
29975
|
-
* are used directly when jitting, or used for validation when
|
29976
|
-
* compiling with an explicit argument list. */
|
29977
|
-
struct InferredArgument {
|
29978
|
-
Argument arg;
|
29979
|
-
Parameter param;
|
29980
|
-
Buffer<> buffer;
|
29981
|
-
|
29982
|
-
bool operator<(const InferredArgument &other) const {
|
29983
|
-
if (arg.is_buffer() && !other.arg.is_buffer()) {
|
29984
|
-
return true;
|
29985
|
-
} else if (other.arg.is_buffer() && !arg.is_buffer()) {
|
29986
|
-
return false;
|
29987
|
-
} else {
|
29988
|
-
return arg.name < other.arg.name;
|
29989
|
-
}
|
29990
|
-
}
|
29991
|
-
};
|
29992
|
-
|
29993
|
-
class Function;
|
29994
|
-
|
29995
|
-
std::vector<InferredArgument> infer_arguments(const Stmt &body, const std::vector<Function> &outputs);
|
29996
|
-
|
29997
|
-
} // namespace Internal
|
29998
|
-
} // namespace Halide
|
29999
|
-
|
30000
|
-
#endif
|
30001
|
-
#ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H
|
30002
|
-
#define HALIDE_HOST_GPU_BUFFER_COPIES_H
|
30003
|
-
|
30004
|
-
/** \file
|
30005
|
-
* Defines the lowering passes that deal with host and device buffer flow.
|
30006
|
-
*/
|
30007
|
-
|
30008
|
-
#include <string>
|
30009
|
-
#include <vector>
|
30010
|
-
|
30011
|
-
|
30012
|
-
namespace Halide {
|
30013
|
-
|
30014
|
-
struct Target;
|
30015
|
-
|
30016
|
-
namespace Internal {
|
30017
|
-
|
30018
|
-
/** A helper function to call an extern function, and assert that it
|
30019
|
-
* returns 0. */
|
30020
|
-
Stmt call_extern_and_assert(const std::string &name, const std::vector<Expr> &args);
|
30021
|
-
|
30022
|
-
/** Inject calls to halide_device_malloc, halide_copy_to_device, and
|
30023
|
-
* halide_copy_to_host as needed. */
|
30024
|
-
Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t);
|
30025
|
-
|
30026
|
-
} // namespace Internal
|
30027
|
-
} // namespace Halide
|
30028
|
-
|
30029
|
-
#endif
|
30030
|
-
#ifndef HALIDE_INLINE_H
|
30031
|
-
#define HALIDE_INLINE_H
|
30032
|
-
|
30033
|
-
/** \file
|
30034
|
-
* Methods for replacing calls to functions with their definitions.
|
30035
|
-
*/
|
30036
|
-
|
30037
|
-
|
30038
|
-
namespace Halide {
|
30039
|
-
namespace Internal {
|
30040
|
-
|
30041
|
-
class Function;
|
30042
|
-
|
30043
|
-
/** Inline a single named function, which must be pure. For a pure function to
|
30044
|
-
* be inlined, it must not have any specializations (i.e. it can only have one
|
30045
|
-
* values definition). */
|
30046
|
-
// @{
|
30047
|
-
Stmt inline_function(Stmt s, const Function &f);
|
30048
|
-
Expr inline_function(Expr e, const Function &f);
|
30049
|
-
void inline_function(Function caller, const Function &f);
|
30050
|
-
// @}
|
30051
|
-
|
30052
|
-
/** Check if the schedule of an inlined function is legal, throwing an error
|
30053
|
-
* if it is not. */
|
30054
|
-
void validate_schedule_inlined_function(Function f);
|
30055
|
-
|
30056
|
-
} // namespace Internal
|
30057
|
-
} // namespace Halide
|
30058
|
-
|
30059
|
-
#endif
|
30060
|
-
#ifndef HALIDE_INLINE_REDUCTIONS_H
|
30061
|
-
#define HALIDE_INLINE_REDUCTIONS_H
|
30062
|
-
|
30063
|
-
#include <string>
|
30064
|
-
|
30065
|
-
|
30066
|
-
/** \file
|
30067
|
-
* Defines some inline reductions: sum, product, minimum, maximum.
|
30068
|
-
*/
|
30069
|
-
namespace Halide {
|
30070
|
-
|
30071
|
-
class Func;
|
30072
|
-
|
30073
|
-
/** An inline reduction. This is suitable for convolution-type
|
30074
|
-
* operations - the reduction will be computed in the innermost loop
|
30075
|
-
* that it is used in. The argument may contain free or implicit
|
30076
|
-
* variables, and must refer to some reduction domain. The free
|
30077
|
-
* variables are still free in the return value, but the reduction
|
30078
|
-
* domain is captured - the result expression does not refer to a
|
30079
|
-
* reduction domain and can be used in a pure function definition.
|
30080
|
-
*
|
30081
|
-
* An example using \ref sum :
|
30082
|
-
*
|
30083
|
-
\code
|
30084
|
-
Func f, g;
|
30085
|
-
Var x;
|
30086
|
-
RDom r(0, 10);
|
30087
|
-
f(x) = x*x;
|
30088
|
-
g(x) = sum(f(x + r));
|
30089
|
-
\endcode
|
30090
|
-
*
|
30091
|
-
* Here g computes some blur of x, but g is still a pure function. The
|
30092
|
-
* sum is being computed by an anonymous reduction function that is
|
30093
|
-
* scheduled innermost within g.
|
30094
|
-
*/
|
30095
|
-
//@{
|
30096
|
-
Expr sum(Expr, const std::string &s = "sum");
|
30097
|
-
Expr saturating_sum(Expr, const std::string &s = "saturating_sum");
|
30098
|
-
Expr product(Expr, const std::string &s = "product");
|
30099
|
-
Expr maximum(Expr, const std::string &s = "maximum");
|
30100
|
-
Expr minimum(Expr, const std::string &s = "minimum");
|
30101
|
-
//@}
|
30102
|
-
|
30103
|
-
/** Variants of the inline reduction in which the RDom is stated
|
30104
|
-
* explicitly. The expression can refer to multiple RDoms, and only
|
30105
|
-
* the inner one is captured by the reduction. This allows you to
|
30106
|
-
* write expressions like:
|
30107
|
-
\code
|
30108
|
-
RDom r1(0, 10), r2(0, 10), r3(0, 10);
|
30109
|
-
Expr e = minimum(r1, product(r2, sum(r3, r1 + r2 + r3)));
|
30110
|
-
\endcode
|
30111
|
-
*/
|
30112
|
-
// @{
|
30113
|
-
Expr sum(const RDom &, Expr, const std::string &s = "sum");
|
30114
|
-
Expr saturating_sum(const RDom &r, Expr e, const std::string &s = "saturating_sum");
|
30115
|
-
Expr product(const RDom &, Expr, const std::string &s = "product");
|
30116
|
-
Expr maximum(const RDom &, Expr, const std::string &s = "maximum");
|
30117
|
-
Expr minimum(const RDom &, Expr, const std::string &s = "minimum");
|
30118
|
-
// @}
|
30119
|
-
|
30120
|
-
/** Returns an Expr or Tuple representing the coordinates of the point
|
30121
|
-
* in the RDom which minimizes or maximizes the expression. The
|
30122
|
-
* expression must refer to some RDom. Also returns the extreme value
|
30123
|
-
* of the expression as the last element of the tuple. */
|
30124
|
-
// @{
|
30125
|
-
Tuple argmax(Expr, const std::string &s = "argmax");
|
30126
|
-
Tuple argmin(Expr, const std::string &s = "argmin");
|
30127
|
-
Tuple argmax(const RDom &, Expr, const std::string &s = "argmax");
|
30128
|
-
Tuple argmin(const RDom &, Expr, const std::string &s = "argmin");
|
30129
|
-
// @}
|
30130
|
-
|
30131
|
-
/** Inline reductions create an anonymous helper Func to do the
|
30132
|
-
* work. The variants below instead take a named Func object to use,
|
30133
|
-
* so that it is no longer anonymous and can be scheduled
|
30134
|
-
* (e.g. unrolled across the reduction domain). The Func passed must
|
30135
|
-
* not have any existing definition. */
|
30136
|
-
//@{
|
30137
|
-
Expr sum(Expr, const Func &);
|
30138
|
-
Expr saturating_sum(Expr, const Func &);
|
30139
|
-
Expr product(Expr, const Func &);
|
30140
|
-
Expr maximum(Expr, const Func &);
|
30141
|
-
Expr minimum(Expr, const Func &);
|
30142
|
-
Expr sum(const RDom &, Expr, const Func &);
|
30143
|
-
Expr saturating_sum(const RDom &r, Expr e, const Func &);
|
30144
|
-
Expr product(const RDom &, Expr, const Func &);
|
30145
|
-
Expr maximum(const RDom &, Expr, const Func &);
|
30146
|
-
Expr minimum(const RDom &, Expr, const Func &);
|
30147
|
-
Tuple argmax(Expr, const Func &);
|
30148
|
-
Tuple argmin(Expr, const Func &);
|
30149
|
-
Tuple argmax(const RDom &, Expr, const Func &);
|
30150
|
-
Tuple argmin(const RDom &, Expr, const Func &);
|
30151
|
-
//@}
|
30152
|
-
|
30153
|
-
} // namespace Halide
|
30154
|
-
|
30155
|
-
#endif
|
30156
|
-
#ifndef HALIDE_INTEGER_DIVISION_TABLE_H
|
30157
|
-
#define HALIDE_INTEGER_DIVISION_TABLE_H
|
30158
|
-
|
30159
|
-
#include <cstdint>
|
30160
|
-
|
30161
|
-
/** \file
|
30162
|
-
* Tables telling us how to do integer division via fixed-point
|
30163
|
-
* multiplication for various small constants. This file is
|
30164
|
-
* automatically generated by find_inverse.cpp.
|
30165
|
-
*/
|
30166
|
-
namespace Halide {
|
30167
|
-
namespace Internal {
|
30168
|
-
namespace IntegerDivision {
|
30169
|
-
extern const int64_t table_u8[256][4];
|
30170
|
-
extern const int64_t table_s8[256][4];
|
30171
|
-
extern const int64_t table_srz8[256][4];
|
30172
|
-
extern const int64_t table_u16[256][4];
|
30173
|
-
extern const int64_t table_s16[256][4];
|
30174
|
-
extern const int64_t table_srz16[256][4];
|
30175
|
-
extern const int64_t table_u32[256][4];
|
30176
|
-
extern const int64_t table_s32[256][4];
|
30177
|
-
extern const int64_t table_srz32[256][4];
|
30178
|
-
extern const int64_t table_runtime_u8[256][4];
|
30179
|
-
extern const int64_t table_runtime_s8[256][4];
|
30180
|
-
extern const int64_t table_runtime_srz8[256][4];
|
30181
|
-
extern const int64_t table_runtime_u16[256][4];
|
30182
|
-
extern const int64_t table_runtime_s16[256][4];
|
30183
|
-
extern const int64_t table_runtime_srz16[256][4];
|
30184
|
-
extern const int64_t table_runtime_u32[256][4];
|
30185
|
-
extern const int64_t table_runtime_s32[256][4];
|
30186
|
-
extern const int64_t table_runtime_srz32[256][4];
|
30187
|
-
} // namespace IntegerDivision
|
30188
|
-
} // namespace Internal
|
30189
|
-
} // namespace Halide
|
30190
|
-
|
30191
30229
|
#endif
|
30192
30230
|
#ifndef HALIDE_IR_MATCH_H
|
30193
30231
|
#define HALIDE_IR_MATCH_H
|
@@ -30836,14 +30874,14 @@ struct BinOp {
|
|
30836
30874
|
}
|
30837
30875
|
const Op &op = (const Op &)e;
|
30838
30876
|
return (a.template match<bound>(*op.a.get(), state) &&
|
30839
|
-
b.template match<bound | bindings<A>::mask>(*op.b.get(), state));
|
30877
|
+
b.template match<(bound | bindings<A>::mask)>(*op.b.get(), state));
|
30840
30878
|
}
|
30841
30879
|
|
30842
30880
|
template<uint32_t bound, typename Op2, typename A2, typename B2>
|
30843
30881
|
HALIDE_ALWAYS_INLINE bool match(const BinOp<Op2, A2, B2> &op, MatcherState &state) const noexcept {
|
30844
30882
|
return (std::is_same<Op, Op2>::value &&
|
30845
30883
|
a.template match<bound>(unwrap(op.a), state) &&
|
30846
|
-
b.template match<bound | bindings<A>::mask>(unwrap(op.b), state));
|
30884
|
+
b.template match<(bound | bindings<A>::mask)>(unwrap(op.b), state));
|
30847
30885
|
}
|
30848
30886
|
|
30849
30887
|
constexpr static bool foldable = A::foldable && B::foldable;
|
@@ -30938,14 +30976,14 @@ struct CmpOp {
|
|
30938
30976
|
}
|
30939
30977
|
const Op &op = (const Op &)e;
|
30940
30978
|
return (a.template match<bound>(*op.a.get(), state) &&
|
30941
|
-
b.template match<bound | bindings<A>::mask>(*op.b.get(), state));
|
30979
|
+
b.template match<(bound | bindings<A>::mask)>(*op.b.get(), state));
|
30942
30980
|
}
|
30943
30981
|
|
30944
30982
|
template<uint32_t bound, typename Op2, typename A2, typename B2>
|
30945
30983
|
HALIDE_ALWAYS_INLINE bool match(const CmpOp<Op2, A2, B2> &op, MatcherState &state) const noexcept {
|
30946
30984
|
return (std::is_same<Op, Op2>::value &&
|
30947
30985
|
a.template match<bound>(unwrap(op.a), state) &&
|
30948
|
-
b.template match<bound | bindings<A>::mask>(unwrap(op.b), state));
|
30986
|
+
b.template match<(bound | bindings<A>::mask)>(unwrap(op.b), state));
|
30949
30987
|
}
|
30950
30988
|
|
30951
30989
|
constexpr static bool foldable = A::foldable && B::foldable;
|
@@ -31508,11 +31546,6 @@ constexpr bool and_reduce(bool first, Args... rest) {
|
|
31508
31546
|
return first && and_reduce(rest...);
|
31509
31547
|
}
|
31510
31548
|
|
31511
|
-
// TODO: this can be replaced with std::min() once we require C++14 or later
|
31512
|
-
constexpr int const_min(int a, int b) {
|
31513
|
-
return a < b ? a : b;
|
31514
|
-
}
|
31515
|
-
|
31516
31549
|
template<Call::IntrinsicOp intrin>
|
31517
31550
|
struct OptionalIntrinType {
|
31518
31551
|
bool check(const Type &) const {
|
@@ -31550,7 +31583,7 @@ struct Intrin {
|
|
31550
31583
|
HALIDE_ALWAYS_INLINE bool match_args(int, const Call &c, MatcherState &state) const noexcept {
|
31551
31584
|
using T = decltype(std::get<i>(args));
|
31552
31585
|
return (std::get<i>(args).template match<bound>(*c.args[i].get(), state) &&
|
31553
|
-
match_args<i + 1, bound | bindings<T>::mask>(0, c, state));
|
31586
|
+
match_args<i + 1, (bound | bindings<T>::mask)>(0, c, state));
|
31554
31587
|
}
|
31555
31588
|
|
31556
31589
|
template<int i, uint32_t binds>
|
@@ -31601,7 +31634,7 @@ struct Intrin {
|
|
31601
31634
|
return saturating_cast(optional_type_hint.type, std::move(arg0));
|
31602
31635
|
}
|
31603
31636
|
|
31604
|
-
Expr arg1 = std::get<
|
31637
|
+
Expr arg1 = std::get<std::min<size_t>(1, sizeof...(Args) - 1)>(args).make(state, type_hint);
|
31605
31638
|
if (intrin == Call::absd) {
|
31606
31639
|
return absd(std::move(arg0), std::move(arg1));
|
31607
31640
|
} else if (intrin == Call::widen_right_add) {
|
@@ -31636,7 +31669,7 @@ struct Intrin {
|
|
31636
31669
|
return rounding_shift_right(std::move(arg0), std::move(arg1));
|
31637
31670
|
}
|
31638
31671
|
|
31639
|
-
Expr arg2 = std::get<
|
31672
|
+
Expr arg2 = std::get<std::min<size_t>(2, sizeof...(Args) - 1)>(args).make(state, type_hint);
|
31640
31673
|
if (intrin == Call::mul_shift_right) {
|
31641
31674
|
return mul_shift_right(std::move(arg0), std::move(arg1), std::move(arg2));
|
31642
31675
|
} else if (intrin == Call::rounding_mul_shift_right) {
|
@@ -31880,14 +31913,14 @@ struct SelectOp {
|
|
31880
31913
|
}
|
31881
31914
|
const Select &op = (const Select &)e;
|
31882
31915
|
return (c.template match<bound>(*op.condition.get(), state) &&
|
31883
|
-
t.template match<bound | bindings<C>::mask>(*op.true_value.get(), state) &&
|
31884
|
-
f.template match<bound | bindings<C>::mask | bindings<T>::mask>(*op.false_value.get(), state));
|
31916
|
+
t.template match<(bound | bindings<C>::mask)>(*op.true_value.get(), state) &&
|
31917
|
+
f.template match<(bound | bindings<C>::mask | bindings<T>::mask)>(*op.false_value.get(), state));
|
31885
31918
|
}
|
31886
31919
|
template<uint32_t bound, typename C2, typename T2, typename F2>
|
31887
31920
|
HALIDE_ALWAYS_INLINE bool match(const SelectOp<C2, T2, F2> &instance, MatcherState &state) const noexcept {
|
31888
31921
|
return (c.template match<bound>(unwrap(instance.c), state) &&
|
31889
|
-
t.template match<bound | bindings<C>::mask>(unwrap(instance.t), state) &&
|
31890
|
-
f.template match<bound | bindings<C>::mask | bindings<T>::mask>(unwrap(instance.f), state));
|
31922
|
+
t.template match<(bound | bindings<C>::mask)>(unwrap(instance.t), state) &&
|
31923
|
+
f.template match<(bound | bindings<C>::mask | bindings<T>::mask)>(unwrap(instance.f), state));
|
31891
31924
|
}
|
31892
31925
|
|
31893
31926
|
HALIDE_ALWAYS_INLINE
|
@@ -31953,7 +31986,7 @@ struct BroadcastOp {
|
|
31953
31986
|
template<uint32_t bound, typename A2, typename B2>
|
31954
31987
|
HALIDE_ALWAYS_INLINE bool match(const BroadcastOp<A2, B2> &op, MatcherState &state) const noexcept {
|
31955
31988
|
return (a.template match<bound>(unwrap(op.a), state) &&
|
31956
|
-
lanes.template match<bound | bindings<A>::mask>(unwrap(op.lanes), state));
|
31989
|
+
lanes.template match<(bound | bindings<A>::mask)>(unwrap(op.lanes), state));
|
31957
31990
|
}
|
31958
31991
|
|
31959
31992
|
HALIDE_ALWAYS_INLINE
|
@@ -32017,8 +32050,8 @@ struct RampOp {
|
|
32017
32050
|
}
|
32018
32051
|
const Ramp &op = (const Ramp &)e;
|
32019
32052
|
if (a.template match<bound>(*op.base.get(), state) &&
|
32020
|
-
b.template match<bound | bindings<A>::mask>(*op.stride.get(), state) &&
|
32021
|
-
lanes.template match<bound | bindings<A>::mask | bindings<B>::mask>(op.lanes, state)) {
|
32053
|
+
b.template match<(bound | bindings<A>::mask)>(*op.stride.get(), state) &&
|
32054
|
+
lanes.template match<(bound | bindings<A>::mask | bindings<B>::mask)>(op.lanes, state)) {
|
32022
32055
|
return true;
|
32023
32056
|
} else {
|
32024
32057
|
return false;
|
@@ -32028,8 +32061,8 @@ struct RampOp {
|
|
32028
32061
|
template<uint32_t bound, typename A2, typename B2, typename C2>
|
32029
32062
|
HALIDE_ALWAYS_INLINE bool match(const RampOp<A2, B2, C2> &op, MatcherState &state) const noexcept {
|
32030
32063
|
return (a.template match<bound>(unwrap(op.a), state) &&
|
32031
|
-
b.template match<bound | bindings<A>::mask>(unwrap(op.b), state) &&
|
32032
|
-
lanes.template match<bound | bindings<A>::mask | bindings<B>::mask>(unwrap(op.lanes), state));
|
32064
|
+
b.template match<(bound | bindings<A>::mask)>(unwrap(op.b), state) &&
|
32065
|
+
lanes.template match<(bound | bindings<A>::mask | bindings<B>::mask)>(unwrap(op.lanes), state));
|
32033
32066
|
}
|
32034
32067
|
|
32035
32068
|
HALIDE_ALWAYS_INLINE
|
@@ -32080,7 +32113,7 @@ struct VectorReduceOp {
|
|
32080
32113
|
const VectorReduce &op = (const VectorReduce &)e;
|
32081
32114
|
if (op.op == reduce_op &&
|
32082
32115
|
a.template match<bound>(*op.value.get(), state) &&
|
32083
|
-
lanes.template match<bound | bindings<A>::mask>(op.type.lanes(), state)) {
|
32116
|
+
lanes.template match<(bound | bindings<A>::mask)>(op.type.lanes(), state)) {
|
32084
32117
|
return true;
|
32085
32118
|
}
|
32086
32119
|
}
|
@@ -32091,7 +32124,7 @@ struct VectorReduceOp {
|
|
32091
32124
|
HALIDE_ALWAYS_INLINE bool match(const VectorReduceOp<A2, B2, reduce_op_2> &op, MatcherState &state) const noexcept {
|
32092
32125
|
return (reduce_op == reduce_op_2 &&
|
32093
32126
|
a.template match<bound>(unwrap(op.a), state) &&
|
32094
|
-
lanes.template match<bound | bindings<A>::mask>(unwrap(op.lanes), state));
|
32127
|
+
lanes.template match<(bound | bindings<A>::mask)>(unwrap(op.lanes), state));
|
32095
32128
|
}
|
32096
32129
|
|
32097
32130
|
HALIDE_ALWAYS_INLINE
|
@@ -32340,9 +32373,9 @@ struct SliceOp {
|
|
32340
32373
|
return v.vectors.size() == 1 &&
|
32341
32374
|
v.is_slice() &&
|
32342
32375
|
vec.template match<bound>(*v.vectors[0].get(), state) &&
|
32343
|
-
base.template match<bound | bindings<Vec>::mask>(v.slice_begin(), state) &&
|
32344
|
-
stride.template match<bound | bindings<Vec>::mask | bindings<Base>::mask>(v.slice_stride(), state) &&
|
32345
|
-
lanes.template match<bound | bindings<Vec>::mask | bindings<Base>::mask | bindings<Stride>::mask>(v.type.lanes(), state);
|
32376
|
+
base.template match<(bound | bindings<Vec>::mask)>(v.slice_begin(), state) &&
|
32377
|
+
stride.template match<(bound | bindings<Vec>::mask | bindings<Base>::mask)>(v.slice_stride(), state) &&
|
32378
|
+
lanes.template match<(bound | bindings<Vec>::mask | bindings<Base>::mask | bindings<Stride>::mask)>(v.type.lanes(), state);
|
32346
32379
|
}
|
32347
32380
|
|
32348
32381
|
HALIDE_ALWAYS_INLINE
|
@@ -33409,13 +33442,59 @@ std::pair<Region, bool> mutate_region(Mutator *mutator, const Region &bounds, Ar
|
|
33409
33442
|
} // namespace Halide
|
33410
33443
|
|
33411
33444
|
#endif
|
33412
|
-
#ifndef
|
33413
|
-
#define
|
33445
|
+
#ifndef HALIDE_INFER_ARGUMENTS_H
|
33446
|
+
#define HALIDE_INFER_ARGUMENTS_H
|
33447
|
+
|
33448
|
+
#include <vector>
|
33449
|
+
|
33414
33450
|
|
33415
33451
|
/** \file
|
33416
|
-
*
|
33452
|
+
*
|
33453
|
+
* Interface for a visitor to infer arguments used in a body Stmt.
|
33417
33454
|
*/
|
33418
33455
|
|
33456
|
+
namespace Halide {
|
33457
|
+
namespace Internal {
|
33458
|
+
|
33459
|
+
/** An inferred argument. Inferred args are either Params,
|
33460
|
+
* ImageParams, or Buffers. The first two are handled by the param
|
33461
|
+
* field, and global images are tracked via the buf field. These
|
33462
|
+
* are used directly when jitting, or used for validation when
|
33463
|
+
* compiling with an explicit argument list. */
|
33464
|
+
struct InferredArgument {
|
33465
|
+
Argument arg;
|
33466
|
+
Parameter param;
|
33467
|
+
Buffer<> buffer;
|
33468
|
+
|
33469
|
+
bool operator<(const InferredArgument &other) const {
|
33470
|
+
if (arg.is_buffer() && !other.arg.is_buffer()) {
|
33471
|
+
return true;
|
33472
|
+
} else if (other.arg.is_buffer() && !arg.is_buffer()) {
|
33473
|
+
return false;
|
33474
|
+
} else {
|
33475
|
+
return arg.name < other.arg.name;
|
33476
|
+
}
|
33477
|
+
}
|
33478
|
+
};
|
33479
|
+
|
33480
|
+
class Function;
|
33481
|
+
|
33482
|
+
std::vector<InferredArgument> infer_arguments(const Stmt &body, const std::vector<Function> &outputs);
|
33483
|
+
|
33484
|
+
} // namespace Internal
|
33485
|
+
} // namespace Halide
|
33486
|
+
|
33487
|
+
#endif
|
33488
|
+
#ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H
|
33489
|
+
#define HALIDE_HOST_GPU_BUFFER_COPIES_H
|
33490
|
+
|
33491
|
+
/** \file
|
33492
|
+
* Defines the lowering passes that deal with host and device buffer flow.
|
33493
|
+
*/
|
33494
|
+
|
33495
|
+
#include <string>
|
33496
|
+
#include <vector>
|
33497
|
+
|
33419
33498
|
|
33420
33499
|
namespace Halide {
|
33421
33500
|
|
@@ -33423,15 +33502,179 @@ struct Target;
|
|
33423
33502
|
|
33424
33503
|
namespace Internal {
|
33425
33504
|
|
33426
|
-
/**
|
33427
|
-
*
|
33428
|
-
|
33429
|
-
|
33430
|
-
|
33505
|
+
/** A helper function to call an extern function, and assert that it
|
33506
|
+
* returns 0. */
|
33507
|
+
Stmt call_extern_and_assert(const std::string &name, const std::vector<Expr> &args);
|
33508
|
+
|
33509
|
+
/** Inject calls to halide_device_malloc, halide_copy_to_device, and
|
33510
|
+
* halide_copy_to_host as needed. */
|
33511
|
+
Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t);
|
33431
33512
|
|
33432
33513
|
} // namespace Internal
|
33433
33514
|
} // namespace Halide
|
33434
33515
|
|
33516
|
+
#endif
|
33517
|
+
#ifndef HALIDE_INLINE_H
|
33518
|
+
#define HALIDE_INLINE_H
|
33519
|
+
|
33520
|
+
/** \file
|
33521
|
+
* Methods for replacing calls to functions with their definitions.
|
33522
|
+
*/
|
33523
|
+
|
33524
|
+
|
33525
|
+
namespace Halide {
|
33526
|
+
namespace Internal {
|
33527
|
+
|
33528
|
+
class Function;
|
33529
|
+
|
33530
|
+
/** Inline a single named function, which must be pure. For a pure function to
|
33531
|
+
* be inlined, it must not have any specializations (i.e. it can only have one
|
33532
|
+
* values definition). */
|
33533
|
+
// @{
|
33534
|
+
Stmt inline_function(Stmt s, const Function &f);
|
33535
|
+
Expr inline_function(Expr e, const Function &f);
|
33536
|
+
void inline_function(Function caller, const Function &f);
|
33537
|
+
// @}
|
33538
|
+
|
33539
|
+
/** Check if the schedule of an inlined function is legal, throwing an error
|
33540
|
+
* if it is not. */
|
33541
|
+
void validate_schedule_inlined_function(Function f);
|
33542
|
+
|
33543
|
+
} // namespace Internal
|
33544
|
+
} // namespace Halide
|
33545
|
+
|
33546
|
+
#endif
|
33547
|
+
#ifndef HALIDE_INLINE_REDUCTIONS_H
|
33548
|
+
#define HALIDE_INLINE_REDUCTIONS_H
|
33549
|
+
|
33550
|
+
#include <string>
|
33551
|
+
|
33552
|
+
|
33553
|
+
/** \file
|
33554
|
+
* Defines some inline reductions: sum, product, minimum, maximum.
|
33555
|
+
*/
|
33556
|
+
namespace Halide {
|
33557
|
+
|
33558
|
+
class Func;
|
33559
|
+
|
33560
|
+
/** An inline reduction. This is suitable for convolution-type
|
33561
|
+
* operations - the reduction will be computed in the innermost loop
|
33562
|
+
* that it is used in. The argument may contain free or implicit
|
33563
|
+
* variables, and must refer to some reduction domain. The free
|
33564
|
+
* variables are still free in the return value, but the reduction
|
33565
|
+
* domain is captured - the result expression does not refer to a
|
33566
|
+
* reduction domain and can be used in a pure function definition.
|
33567
|
+
*
|
33568
|
+
* An example using \ref sum :
|
33569
|
+
*
|
33570
|
+
\code
|
33571
|
+
Func f, g;
|
33572
|
+
Var x;
|
33573
|
+
RDom r(0, 10);
|
33574
|
+
f(x) = x*x;
|
33575
|
+
g(x) = sum(f(x + r));
|
33576
|
+
\endcode
|
33577
|
+
*
|
33578
|
+
* Here g computes some blur of x, but g is still a pure function. The
|
33579
|
+
* sum is being computed by an anonymous reduction function that is
|
33580
|
+
* scheduled innermost within g.
|
33581
|
+
*/
|
33582
|
+
//@{
|
33583
|
+
Expr sum(Expr, const std::string &s = "sum");
|
33584
|
+
Expr saturating_sum(Expr, const std::string &s = "saturating_sum");
|
33585
|
+
Expr product(Expr, const std::string &s = "product");
|
33586
|
+
Expr maximum(Expr, const std::string &s = "maximum");
|
33587
|
+
Expr minimum(Expr, const std::string &s = "minimum");
|
33588
|
+
//@}
|
33589
|
+
|
33590
|
+
/** Variants of the inline reduction in which the RDom is stated
|
33591
|
+
* explicitly. The expression can refer to multiple RDoms, and only
|
33592
|
+
* the inner one is captured by the reduction. This allows you to
|
33593
|
+
* write expressions like:
|
33594
|
+
\code
|
33595
|
+
RDom r1(0, 10), r2(0, 10), r3(0, 10);
|
33596
|
+
Expr e = minimum(r1, product(r2, sum(r3, r1 + r2 + r3)));
|
33597
|
+
\endcode
|
33598
|
+
*/
|
33599
|
+
// @{
|
33600
|
+
Expr sum(const RDom &, Expr, const std::string &s = "sum");
|
33601
|
+
Expr saturating_sum(const RDom &r, Expr e, const std::string &s = "saturating_sum");
|
33602
|
+
Expr product(const RDom &, Expr, const std::string &s = "product");
|
33603
|
+
Expr maximum(const RDom &, Expr, const std::string &s = "maximum");
|
33604
|
+
Expr minimum(const RDom &, Expr, const std::string &s = "minimum");
|
33605
|
+
// @}
|
33606
|
+
|
33607
|
+
/** Returns an Expr or Tuple representing the coordinates of the point
|
33608
|
+
* in the RDom which minimizes or maximizes the expression. The
|
33609
|
+
* expression must refer to some RDom. Also returns the extreme value
|
33610
|
+
* of the expression as the last element of the tuple. */
|
33611
|
+
// @{
|
33612
|
+
Tuple argmax(Expr, const std::string &s = "argmax");
|
33613
|
+
Tuple argmin(Expr, const std::string &s = "argmin");
|
33614
|
+
Tuple argmax(const RDom &, Expr, const std::string &s = "argmax");
|
33615
|
+
Tuple argmin(const RDom &, Expr, const std::string &s = "argmin");
|
33616
|
+
// @}
|
33617
|
+
|
33618
|
+
/** Inline reductions create an anonymous helper Func to do the
|
33619
|
+
* work. The variants below instead take a named Func object to use,
|
33620
|
+
* so that it is no longer anonymous and can be scheduled
|
33621
|
+
* (e.g. unrolled across the reduction domain). The Func passed must
|
33622
|
+
* not have any existing definition. */
|
33623
|
+
//@{
|
33624
|
+
Expr sum(Expr, const Func &);
|
33625
|
+
Expr saturating_sum(Expr, const Func &);
|
33626
|
+
Expr product(Expr, const Func &);
|
33627
|
+
Expr maximum(Expr, const Func &);
|
33628
|
+
Expr minimum(Expr, const Func &);
|
33629
|
+
Expr sum(const RDom &, Expr, const Func &);
|
33630
|
+
Expr saturating_sum(const RDom &r, Expr e, const Func &);
|
33631
|
+
Expr product(const RDom &, Expr, const Func &);
|
33632
|
+
Expr maximum(const RDom &, Expr, const Func &);
|
33633
|
+
Expr minimum(const RDom &, Expr, const Func &);
|
33634
|
+
Tuple argmax(Expr, const Func &);
|
33635
|
+
Tuple argmin(Expr, const Func &);
|
33636
|
+
Tuple argmax(const RDom &, Expr, const Func &);
|
33637
|
+
Tuple argmin(const RDom &, Expr, const Func &);
|
33638
|
+
//@}
|
33639
|
+
|
33640
|
+
} // namespace Halide
|
33641
|
+
|
33642
|
+
#endif
|
33643
|
+
#ifndef HALIDE_INTEGER_DIVISION_TABLE_H
|
33644
|
+
#define HALIDE_INTEGER_DIVISION_TABLE_H
|
33645
|
+
|
33646
|
+
#include <cstdint>
|
33647
|
+
|
33648
|
+
/** \file
|
33649
|
+
* Tables telling us how to do integer division via fixed-point
|
33650
|
+
* multiplication for various small constants. This file is
|
33651
|
+
* automatically generated by find_inverse.cpp.
|
33652
|
+
*/
|
33653
|
+
namespace Halide {
|
33654
|
+
namespace Internal {
|
33655
|
+
namespace IntegerDivision {
|
33656
|
+
extern const int64_t table_u8[256][4];
|
33657
|
+
extern const int64_t table_s8[256][4];
|
33658
|
+
extern const int64_t table_srz8[256][4];
|
33659
|
+
extern const int64_t table_u16[256][4];
|
33660
|
+
extern const int64_t table_s16[256][4];
|
33661
|
+
extern const int64_t table_srz16[256][4];
|
33662
|
+
extern const int64_t table_u32[256][4];
|
33663
|
+
extern const int64_t table_s32[256][4];
|
33664
|
+
extern const int64_t table_srz32[256][4];
|
33665
|
+
extern const int64_t table_runtime_u8[256][4];
|
33666
|
+
extern const int64_t table_runtime_s8[256][4];
|
33667
|
+
extern const int64_t table_runtime_srz8[256][4];
|
33668
|
+
extern const int64_t table_runtime_u16[256][4];
|
33669
|
+
extern const int64_t table_runtime_s16[256][4];
|
33670
|
+
extern const int64_t table_runtime_srz16[256][4];
|
33671
|
+
extern const int64_t table_runtime_u32[256][4];
|
33672
|
+
extern const int64_t table_runtime_s32[256][4];
|
33673
|
+
extern const int64_t table_runtime_srz32[256][4];
|
33674
|
+
} // namespace IntegerDivision
|
33675
|
+
} // namespace Internal
|
33676
|
+
} // namespace Halide
|
33677
|
+
|
33435
33678
|
#endif
|
33436
33679
|
#ifndef HALIDE_LICM_H
|
33437
33680
|
#define HALIDE_LICM_H
|
@@ -33524,6 +33767,7 @@ void create_static_library(const std::vector<std::string> &src_files, const Targ
|
|
33524
33767
|
* Support for linking LLVM modules that comprise the runtime.
|
33525
33768
|
*/
|
33526
33769
|
|
33770
|
+
#include <cstdint>
|
33527
33771
|
#include <memory>
|
33528
33772
|
#include <string>
|
33529
33773
|
#include <vector>
|
@@ -33562,6 +33806,30 @@ std::unique_ptr<llvm::Module> link_with_wasm_jit_runtime(llvm::LLVMContext *c, c
|
|
33562
33806
|
} // namespace Internal
|
33563
33807
|
} // namespace Halide
|
33564
33808
|
|
33809
|
+
#endif
|
33810
|
+
#ifndef HALIDE_LERP_H
|
33811
|
+
#define HALIDE_LERP_H
|
33812
|
+
|
33813
|
+
/** \file
|
33814
|
+
* Defines methods for converting a lerp intrinsic into Halide IR.
|
33815
|
+
*/
|
33816
|
+
|
33817
|
+
|
33818
|
+
namespace Halide {
|
33819
|
+
|
33820
|
+
struct Target;
|
33821
|
+
|
33822
|
+
namespace Internal {
|
33823
|
+
|
33824
|
+
/** Build Halide IR that computes a lerp. Use by codegen targets that don't have
|
33825
|
+
* a native lerp. The lerp is done in the type of the zero value. The final_type
|
33826
|
+
* is a cast that should occur after the lerp. It's included because in some
|
33827
|
+
* cases you can incorporate a final cast into the lerp math. */
|
33828
|
+
Expr lower_lerp(Type final_type, Expr zero_val, Expr one_val, const Expr &weight, const Target &target);
|
33829
|
+
|
33830
|
+
} // namespace Internal
|
33831
|
+
} // namespace Halide
|
33832
|
+
|
33565
33833
|
#endif
|
33566
33834
|
#ifndef HALIDE_LOOP_CARRY_H
|
33567
33835
|
#define HALIDE_LOOP_CARRY_H
|
@@ -34754,6 +35022,13 @@ Interval solve_for_inner_interval(const Expr &c, const std::string &variable);
|
|
34754
35022
|
* 'and' over the vector lanes, and return a scalar result. */
|
34755
35023
|
Expr and_condition_over_domain(const Expr &c, const Scope<Interval> &varying);
|
34756
35024
|
|
35025
|
+
/** Take a conditional that includes variables that vary over some
|
35026
|
+
* domain, and convert it to a weaker (less frequently false) condition
|
35027
|
+
* that doesn't depend on those variables. Formally, the input expr
|
35028
|
+
* implies the output expr. Note that this function might be unable to
|
35029
|
+
* provide a better response than simply const_true(). */
|
35030
|
+
Expr or_condition_over_domain(const Expr &c, const Scope<Interval> &varying);
|
35031
|
+
|
34757
35032
|
void solve_test();
|
34758
35033
|
|
34759
35034
|
} // namespace Internal
|
@@ -34948,19 +35223,26 @@ Stmt storage_folding(const Stmt &s, const std::map<std::string, Function> &env);
|
|
34948
35223
|
namespace Halide {
|
34949
35224
|
|
34950
35225
|
struct Target;
|
35226
|
+
struct Expr;
|
34951
35227
|
|
34952
35228
|
namespace Internal {
|
34953
35229
|
|
34954
35230
|
class Function;
|
35231
|
+
struct Call;
|
34955
35232
|
|
34956
|
-
/**
|
34957
|
-
*
|
34958
|
-
|
34959
|
-
|
34960
|
-
|
34961
|
-
|
34962
|
-
|
34963
|
-
|
35233
|
+
/** Replace all rounding floating point ops and floating point ops that need to
|
35234
|
+
* handle nan and inf differently with strict float intrinsics. */
|
35235
|
+
Expr strictify_float(const Expr &e);
|
35236
|
+
|
35237
|
+
/** Replace a strict float intrinsic with its non-strict equivalent. Non-recursive. */
|
35238
|
+
Expr unstrictify_float(const Call *op);
|
35239
|
+
|
35240
|
+
/** If the StrictFloat target feature is set, replace add, sub, mul, div, etc
|
35241
|
+
* operations with strict float intrinsics for all Funcs in the environment. If
|
35242
|
+
* StrictFloat is not set does nothing. Returns whether or not there's any usage
|
35243
|
+
* of strict float intrinsics or if the target flag is set (i.e. returns whether
|
35244
|
+
* or not the rest of lowering and codegen needs to worry about floating point
|
35245
|
+
* strictness). */
|
34964
35246
|
bool strictify_float(std::map<std::string, Function> &env, const Target &t);
|
34965
35247
|
|
34966
35248
|
} // namespace Internal
|
@@ -34992,6 +35274,8 @@ Stmt strip_asserts(const Stmt &s);
|
|
34992
35274
|
* Defines methods for substituting out variables in expressions and
|
34993
35275
|
* statements. */
|
34994
35276
|
|
35277
|
+
#include <algorithm>
|
35278
|
+
#include <iterator>
|
34995
35279
|
#include <map>
|
34996
35280
|
|
34997
35281
|
|
@@ -35022,6 +35306,16 @@ Expr substitute(const Expr &find, const Expr &replacement, const Expr &expr);
|
|
35022
35306
|
Stmt substitute(const Expr &find, const Expr &replacement, const Stmt &stmt);
|
35023
35307
|
// @}
|
35024
35308
|
|
35309
|
+
/** Substitute a container of Exprs or Stmts out of place */
|
35310
|
+
template<typename T>
|
35311
|
+
T substitute(const std::map<std::string, Expr> &replacements, const T &container) {
|
35312
|
+
T output;
|
35313
|
+
std::transform(container.begin(), container.end(), std::back_inserter(output), [&](const auto &expr_or_stmt) {
|
35314
|
+
return substitute(replacements, expr_or_stmt);
|
35315
|
+
});
|
35316
|
+
return output;
|
35317
|
+
}
|
35318
|
+
|
35025
35319
|
/** Substitutions where the IR may be a general graph (and not just a
|
35026
35320
|
* DAG). */
|
35027
35321
|
// @{
|
@@ -35284,10 +35578,14 @@ std::map<std::string, Function> wrap_func_calls(const std::map<std::string, Func
|
|
35284
35578
|
#endif
|
35285
35579
|
|
35286
35580
|
// Clean up macros used inside Halide headers
|
35581
|
+
#ifndef HALIDE_KEEP_MACROS
|
35287
35582
|
#undef user_assert
|
35288
35583
|
#undef user_error
|
35289
35584
|
#undef user_warning
|
35290
35585
|
#undef internal_error
|
35291
35586
|
#undef internal_assert
|
35292
35587
|
#undef halide_runtime_error
|
35588
|
+
#undef debug
|
35589
|
+
#undef debug_is_active
|
35590
|
+
#endif
|
35293
35591
|
#endif // HALIDE_H
|