halide 19.0.0__cp39-cp39-macosx_11_0_arm64.whl → 21.0.0__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. halide/__init__.py +10 -6
  2. halide/_generator_helpers.py +190 -127
  3. halide/bin/adams2019_retrain_cost_model +0 -0
  4. halide/bin/anderson2021_retrain_cost_model +0 -0
  5. halide/bin/gengen +0 -0
  6. halide/bin/get_host_target +0 -0
  7. halide/halide_.cpython-39-darwin.so +0 -0
  8. halide/imageio.py +1 -1
  9. halide/include/Halide.h +1775 -1477
  10. halide/include/HalideBuffer.h +13 -13
  11. halide/include/HalidePyTorchCudaHelpers.h +1 -1
  12. halide/include/HalideRuntime.h +35 -16
  13. halide/lib/cmake/Halide/FindHalide_LLVM.cmake +44 -15
  14. halide/lib/cmake/Halide/FindV8.cmake +0 -12
  15. halide/lib/cmake/Halide/Halide-shared-targets.cmake +1 -1
  16. halide/lib/cmake/Halide/HalideConfig.cmake +1 -1
  17. halide/lib/cmake/Halide/HalideConfigVersion.cmake +3 -3
  18. halide/lib/cmake/HalideHelpers/Halide-Interfaces.cmake +1 -0
  19. halide/lib/cmake/HalideHelpers/HalideGeneratorHelpers.cmake +31 -9
  20. halide/lib/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +3 -3
  21. halide/lib/cmake/Halide_Python/Halide_PythonConfigVersion.cmake +3 -3
  22. halide/lib/libHalide.dylib +0 -0
  23. halide/lib/libHalidePyStubs.a +0 -0
  24. halide/lib/libHalide_GenGen.a +0 -0
  25. halide/lib/libautoschedule_adams2019.so +0 -0
  26. halide/lib/libautoschedule_anderson2021.so +0 -0
  27. halide/lib/libautoschedule_li2018.so +0 -0
  28. halide/lib/libautoschedule_mullapudi2016.so +0 -0
  29. halide/share/doc/Halide/README.md +7 -6
  30. halide/share/doc/Halide/doc/BuildingHalideWithCMake.md +78 -6
  31. halide/share/doc/Halide/doc/HalideCMakePackage.md +9 -2
  32. halide/share/doc/Halide/doc/Python.md +19 -4
  33. halide/share/doc/Halide/doc/RunGen.md +1 -1
  34. {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/Halide/HalideConfig.cmake +4 -1
  35. {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/Halide/HalideConfigVersion.cmake +3 -3
  36. {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/HalideHelpers/HalideHelpersConfig.cmake +4 -1
  37. {halide-19.0.0.data → halide-21.0.0.data}/data/share/cmake/HalideHelpers/HalideHelpersConfigVersion.cmake +3 -3
  38. halide-21.0.0.dist-info/METADATA +302 -0
  39. {halide-19.0.0.dist-info → halide-21.0.0.dist-info}/RECORD +41 -41
  40. {halide-19.0.0.dist-info → halide-21.0.0.dist-info}/WHEEL +1 -1
  41. halide-19.0.0.dist-info/METADATA +0 -301
  42. {halide-19.0.0.dist-info → halide-21.0.0.dist-info}/licenses/LICENSE.txt +0 -0
halide/include/Halide.h CHANGED
@@ -315,7 +315,7 @@
315
315
  // our CMake build, so that we ensure that the in-build metadata (eg soversion)
316
316
  // matches, but keeping the canonical version here makes it easier to keep
317
317
  // downstream build systems (eg Blaze/Bazel) properly in sync with the source.
318
- #define HALIDE_VERSION_MAJOR 19
318
+ #define HALIDE_VERSION_MAJOR 21
319
319
  #define HALIDE_VERSION_MINOR 0
320
320
  #define HALIDE_VERSION_PATCH 0
321
321
 
@@ -1643,21 +1643,27 @@ extern int halide_error_vscale_invalid(void *user_context, const char *func_name
1643
1643
  // @}
1644
1644
 
1645
1645
  /** Optional features a compilation Target can have.
1646
- * Be sure to keep this in sync with the Feature enum in Target.h and the implementation of
1647
- * get_runtime_compatible_target in Target.cpp if you add a new feature.
1646
+ *
1647
+ * Be sure to keep this in sync with:
1648
+ * 1. the Feature enum in Target.h,
1649
+ * 2. the implementation of get_runtime_compatible_target in Target.cpp,
1650
+ * 3. PyEnums.cpp,
1651
+ * if you add a new feature.
1648
1652
  */
1649
1653
  typedef enum halide_target_feature_t {
1650
- halide_target_feature_jit = 0, ///< Generate code that will run immediately inside the calling process.
1651
- halide_target_feature_debug, ///< Turn on debug info and output for runtime code.
1652
- halide_target_feature_no_asserts, ///< Disable all runtime checks, for slightly tighter code.
1653
- halide_target_feature_no_bounds_query, ///< Disable the bounds querying functionality.
1654
-
1655
- halide_target_feature_sse41, ///< Use SSE 4.1 and earlier instructions. Only relevant on x86.
1656
- halide_target_feature_avx, ///< Use AVX 1 instructions. Only relevant on x86.
1657
- halide_target_feature_avx2, ///< Use AVX 2 instructions. Only relevant on x86.
1658
- halide_target_feature_fma, ///< Enable x86 FMA instruction
1659
- halide_target_feature_fma4, ///< Enable x86 (AMD) FMA4 instruction set
1660
- halide_target_feature_f16c, ///< Enable x86 16-bit float support
1654
+ halide_target_feature_jit = 0, ///< Generate code that will run immediately inside the calling process.
1655
+ halide_target_feature_debug, ///< Turn on debug info and output for runtime code.
1656
+ halide_target_feature_enable_backtraces, ///< Preserve frame pointers and include unwind tables to support accurate backtraces for debugging and profiling.
1657
+ halide_target_feature_no_asserts, ///< Disable all runtime checks, for slightly tighter code.
1658
+ halide_target_feature_no_bounds_query, ///< Disable the bounds querying functionality.
1659
+
1660
+ halide_target_feature_sse41, ///< Use SSE 4.1 and earlier instructions. Only relevant on x86.
1661
+ halide_target_feature_avx, ///< Use AVX 1 instructions. Only relevant on x86.
1662
+ halide_target_feature_avx2, ///< Use AVX 2 instructions. Only relevant on x86.
1663
+ halide_target_feature_avxvnni, ///< Enable the AVX-VNNI features supported by AVX2 instructions. Supports 256-bit VNNI instructions without EVEX encoding.
1664
+ halide_target_feature_fma, ///< Enable x86 FMA instruction
1665
+ halide_target_feature_fma4, ///< Enable x86 (AMD) FMA4 instruction set
1666
+ halide_target_feature_f16c, ///< Enable x86 16-bit float support
1661
1667
 
1662
1668
  halide_target_feature_armv7s, ///< Generate code for ARMv7s. Only relevant for 32-bit ARM.
1663
1669
  halide_target_feature_no_neon, ///< Avoid using NEON instructions. Only relevant for 32-bit ARM.
@@ -1701,6 +1707,7 @@ typedef enum halide_target_feature_t {
1701
1707
  halide_target_feature_avx512_skylake, ///< Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL, AVX512-BW, and AVX512-DQ to the base set. The main difference from the base AVX512 set is better support for small integer ops. Note that this does not include the Knight's Landing features. Note also that these features are not available on Skylake desktop and mobile processors.
1702
1708
  halide_target_feature_avx512_cannonlake, ///< Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI.
1703
1709
  halide_target_feature_avx512_zen4, ///< Enable the AVX512 features supported by Zen4 processors. This include all of the Cannonlake features, plus AVX512-VNNI, AVX512-BF16, and more.
1710
+ halide_target_feature_avx512_zen5, ///< Enable the AVX512 features supported by Zen5 processors. This include all of the Cannonlake features, plus AVX512-VNNI, AVX512-BF16, AVX-VNNI and more.
1704
1711
  halide_target_feature_avx512_sapphirerapids, ///< Enable the AVX512 features supported by Sapphire Rapids processors. This include all of the Zen4 features, plus AVX-VNNI and AMX instructions.
1705
1712
  halide_target_feature_trace_loads, ///< Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Func.
1706
1713
  halide_target_feature_trace_stores, ///< Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined Func.
@@ -1755,6 +1762,7 @@ typedef enum halide_target_feature_t {
1755
1762
  halide_target_feature_semihosting, ///< Used together with Target::NoOS for the baremetal target built with semihosting library and run with semihosting mode where minimum I/O communication with a host PC is available.
1756
1763
  halide_target_feature_avx10_1, ///< Intel AVX10 version 1 support. vector_bits is used to indicate width.
1757
1764
  halide_target_feature_x86_apx, ///< Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd .
1765
+ halide_target_feature_simulator, ///< Target is for a simulator environment. Currently only applies to iOS.
1758
1766
  halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
1759
1767
  } halide_target_feature_t;
1760
1768
 
@@ -1831,8 +1839,19 @@ typedef struct halide_dimension_t {
1831
1839
  } // extern "C"
1832
1840
  #endif
1833
1841
 
1834
- typedef enum { halide_buffer_flag_host_dirty = 1,
1835
- halide_buffer_flag_device_dirty = 2 } halide_buffer_flags;
1842
+ #if __cplusplus > 201100L || _MSVC_LANG > 201100L || __STDC_VERSION__ > 202300L
1843
+ // In C++, an underlying type is required to let the user define their own flag
1844
+ // values, without those values being undefined behavior when passed around as
1845
+ // this enum typedef.
1846
+ #define BUFFER_FLAGS_UNDERLYING_TYPE : uint64_t
1847
+ #else
1848
+ #define BUFFER_FLAGS_UNDERLYING_TYPE
1849
+ #endif
1850
+ typedef enum BUFFER_FLAGS_UNDERLYING_TYPE {
1851
+ halide_buffer_flag_host_dirty = 1,
1852
+ halide_buffer_flag_device_dirty = 2
1853
+ } halide_buffer_flags;
1854
+ #undef BUFFER_FLAGS_UNDERLYING_TYPE
1836
1855
 
1837
1856
  /**
1838
1857
  * The raw representation of an image passed around by generated
@@ -2730,12 +2749,15 @@ std::ostream &operator<<(std::ostream &stream, const Stmt &);
2730
2749
  struct LoweredFunc;
2731
2750
  std::ostream &operator<<(std::ostream &, const LoweredFunc &);
2732
2751
 
2733
- /** For optional debugging during codegen, use the debug class as
2752
+ bool debug_is_active_impl(int verbosity, const char *file, const char *function, int line);
2753
+ #define debug_is_active(n) (::Halide::Internal::debug_is_active_impl((n), __FILE__, __FUNCTION__, __LINE__))
2754
+
2755
+ /** For optional debugging during codegen, use the debug macro as
2734
2756
  * follows:
2735
2757
  *
2736
- \code
2737
- debug(verbosity) << "The expression is " << expr << "\n";
2738
- \endcode
2758
+ * \code
2759
+ * debug(verbosity) << "The expression is " << expr << "\n";
2760
+ * \endcode
2739
2761
  *
2740
2762
  * verbosity of 0 always prints, 1 should print after every major
2741
2763
  * stage, 2 should be used for more detail, and 3 should be used for
@@ -2743,25 +2765,11 @@ std::ostream &operator<<(std::ostream &, const LoweredFunc &);
2743
2765
  * is determined by the value of the environment variable
2744
2766
  * HL_DEBUG_CODEGEN
2745
2767
  */
2746
-
2747
- class debug {
2748
- const bool logging;
2749
-
2750
- public:
2751
- debug(int verbosity)
2752
- : logging(verbosity <= debug_level()) {
2753
- }
2754
-
2755
- template<typename T>
2756
- debug &operator<<(T &&x) {
2757
- if (logging) {
2758
- std::cerr << std::forward<T>(x);
2759
- }
2760
- return *this;
2761
- }
2762
-
2763
- static int debug_level();
2764
- };
2768
+ // clang-format off
2769
+ #define debug(n) \
2770
+ /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \
2771
+ if (debug_is_active((n))) std::cerr
2772
+ // clang-format on
2765
2773
 
2766
2774
  /** Allow easily printing the contents of containers, or std::vector-like containers,
2767
2775
  * in debug output. Used like so:
@@ -2867,14 +2875,18 @@ private:
2867
2875
  };
2868
2876
 
2869
2877
  /** An error that occurs while running a JIT-compiled Halide pipeline. */
2870
- struct HALIDE_EXPORT_SYMBOL RuntimeError : public Error {
2878
+ struct HALIDE_EXPORT_SYMBOL RuntimeError final : Error {
2879
+ static constexpr auto error_name = "Runtime error";
2880
+
2871
2881
  explicit RuntimeError(const char *msg);
2872
2882
  explicit RuntimeError(const std::string &msg);
2873
2883
  };
2874
2884
 
2875
2885
  /** An error that occurs while compiling a Halide pipeline that Halide
2876
2886
  * attributes to a user error. */
2877
- struct HALIDE_EXPORT_SYMBOL CompileError : public Error {
2887
+ struct HALIDE_EXPORT_SYMBOL CompileError final : Error {
2888
+ static constexpr auto error_name = "User error";
2889
+
2878
2890
  explicit CompileError(const char *msg);
2879
2891
  explicit CompileError(const std::string &msg);
2880
2892
  };
@@ -2882,7 +2894,9 @@ struct HALIDE_EXPORT_SYMBOL CompileError : public Error {
2882
2894
  /** An error that occurs while compiling a Halide pipeline that Halide
2883
2895
  * attributes to an internal compiler bug, or to an invalid use of
2884
2896
  * Halide's internals. */
2885
- struct HALIDE_EXPORT_SYMBOL InternalError : public Error {
2897
+ struct HALIDE_EXPORT_SYMBOL InternalError final : Error {
2898
+ static constexpr auto error_name = "Internal error";
2899
+
2886
2900
  explicit InternalError(const char *msg);
2887
2901
  explicit InternalError(const std::string &msg);
2888
2902
  };
@@ -2898,7 +2912,7 @@ class CompileTimeErrorReporter {
2898
2912
  public:
2899
2913
  virtual ~CompileTimeErrorReporter() = default;
2900
2914
  virtual void warning(const char *msg) = 0;
2901
- virtual void error(const char *msg) = 0;
2915
+ [[noreturn]] virtual void error(const char *msg) = 0;
2902
2916
  };
2903
2917
 
2904
2918
  /** The default error reporter logs to stderr, then throws an exception
@@ -2912,84 +2926,136 @@ void set_custom_compile_time_error_reporter(CompileTimeErrorReporter *error_repo
2912
2926
 
2913
2927
  namespace Internal {
2914
2928
 
2915
- struct ErrorReport {
2916
- enum {
2917
- User = 0x0001,
2918
- Warning = 0x0002,
2919
- Runtime = 0x0004
2920
- };
2929
+ /**
2930
+ * If a custom error reporter is configured, notifies the reporter by calling
2931
+ * its error() function with the value of \p e.what()
2932
+ *
2933
+ * Otherwise, if Halide was built with exceptions, throw \p e unless an
2934
+ * existing exception is in flight. On the other hand, if Halide was built
2935
+ * without exceptions, print the error message to stderr and abort().
2936
+ *
2937
+ * @param e The error to throw or report
2938
+ */
2939
+ /// @{
2940
+ [[noreturn]] void throw_error(const RuntimeError &e);
2941
+ [[noreturn]] void throw_error(const CompileError &e);
2942
+ [[noreturn]] void throw_error(const InternalError &e);
2943
+ /// @}
2921
2944
 
2922
- std::ostringstream msg;
2923
- const int flags;
2945
+ /**
2946
+ * If a custom error reporter is configured, notifies the reporter by calling
2947
+ * its warning() function. Otherwise, prints the warning to stderr.
2948
+ *
2949
+ * @param warning The warning to issue
2950
+ */
2951
+ void issue_warning(const char *warning);
2924
2952
 
2925
- ErrorReport(const char *f, int l, const char *cs, int flags);
2953
+ template<typename T>
2954
+ struct ReportBase {
2955
+ template<typename S>
2956
+ HALIDE_ALWAYS_INLINE T &operator<<(const S &x) {
2957
+ msg << x;
2958
+ return *static_cast<T *>(this);
2959
+ }
2926
2960
 
2927
- // Just a trick used to convert RValue into LValue
2928
- HALIDE_ALWAYS_INLINE ErrorReport &ref() {
2929
- return *this;
2961
+ HALIDE_ALWAYS_INLINE operator bool() const {
2962
+ return !finalized;
2930
2963
  }
2931
2964
 
2932
- template<typename T>
2933
- ErrorReport &operator<<(const T &x) {
2934
- msg << x;
2935
- return *this;
2965
+ protected:
2966
+ std::ostringstream msg{};
2967
+ bool finalized{false};
2968
+
2969
+ // This function is called as part of issue() below. We can't use a
2970
+ // virtual function because issue() needs to be marked [[noreturn]]
2971
+ // for errors and be left alone for warnings (i.e., they have
2972
+ // different signatures).
2973
+ std::string finalize_message() {
2974
+ if (!msg.str().empty() && msg.str().back() != '\n') {
2975
+ msg << "\n";
2976
+ }
2977
+ finalized = true;
2978
+ return msg.str();
2979
+ }
2980
+
2981
+ T &init(const char *file, const char *function, const int line, const char *condition_string, const char *prefix) {
2982
+ if (debug_is_active_impl(1, file, function, line)) {
2983
+ msg << prefix << " at " << file << ":" << line << ' ';
2984
+ if (condition_string) {
2985
+ msg << "Condition failed: " << condition_string << ' ';
2986
+ }
2987
+ }
2988
+ return *static_cast<T *>(this);
2936
2989
  }
2990
+ };
2937
2991
 
2938
- /** When you're done using << on the object, and let it fall out of
2939
- * scope, this errors out, or throws an exception if they are
2940
- * enabled. This is a little dangerous because the destructor will
2941
- * also be called if there's an exception in flight due to an
2942
- * error in one of the arguments passed to operator<<. We handle
2943
- * this by only actually throwing if there isn't an exception in
2944
- * flight already.
2945
- */
2946
- ~ErrorReport() noexcept(false);
2992
+ template<typename Exception>
2993
+ struct ErrorReport final : ReportBase<ErrorReport<Exception>> {
2994
+ ErrorReport &init(const char *file, const char *function, const int line, const char *condition_string) {
2995
+ return ReportBase<ErrorReport>::init(file, function, line, condition_string, Exception::error_name) << "Error: ";
2996
+ }
2997
+
2998
+ [[noreturn]] void issue() noexcept(false) {
2999
+ throw_error(Exception(this->finalize_message()));
3000
+ }
2947
3001
  };
2948
3002
 
2949
- // This uses operator precedence as a trick to avoid argument evaluation if
2950
- // an assertion is true: it is intended to be used as part of the
2951
- // _halide_internal_assertion macro, to coerce the result of the stream
2952
- // expression to void (to match the condition-is-false case).
2953
- class Voidifier {
2954
- public:
2955
- HALIDE_ALWAYS_INLINE Voidifier() = default;
2956
- // This has to be an operator with a precedence lower than << but
2957
- // higher than ?:
2958
- HALIDE_ALWAYS_INLINE void operator&(ErrorReport &) {
3003
+ struct WarningReport final : ReportBase<WarningReport> {
3004
+ WarningReport &init(const char *file, const char *function, const int line, const char *condition_string) {
3005
+ return ReportBase::init(file, function, line, condition_string, "Warning") << "Warning: ";
3006
+ }
3007
+
3008
+ void issue() {
3009
+ issue_warning(this->finalize_message().c_str());
2959
3010
  }
2960
3011
  };
2961
3012
 
2962
3013
  /**
2963
- * _halide_internal_assertion is used to implement our assertion macros
2964
- * in such a way that the messages output for the assertion are only
2965
- * evaluated if the assertion's value is false.
2966
- *
2967
- * Note that this macro intentionally has no parens internally; in actual
2968
- * use, the implicit grouping will end up being
2969
- *
2970
- * condition ? (void) : (Voidifier() & (ErrorReport << arg1 << arg2 ... << argN))
3014
+ * The following three diagnostic macros are implemented such that the
3015
+ * message is evaluated only if the assertion's value is false.
2971
3016
  *
2972
3017
  * This (regrettably) requires a macro to work, but has the highly desirable
2973
3018
  * effect that all assertion parameters are totally skipped (not ever evaluated)
2974
3019
  * when the assertion is true.
3020
+ *
3021
+ * The macros work by deferring the call to issue() until after the stream
3022
+ * has been evaluated. This previously used a trick where ErrorReport would
3023
+ * throw in the destructor, but throwing in a destructor is UB in a lot of
3024
+ * scenarios, and it was easy to break things by mistake.
2975
3025
  */
2976
- #define _halide_internal_assertion(condition, flags) \
2977
- /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \
2978
- (condition) ? (void)0 : ::Halide::Internal::Voidifier() & ::Halide::Internal::ErrorReport(__FILE__, __LINE__, #condition, flags).ref()
3026
+ /// @{
3027
+ #define _halide_error_impl(type) \
3028
+ for (Halide::Internal::ErrorReport<type> _err; 1; _err.issue()) \
3029
+ /**/ _err.init(__FILE__, __FUNCTION__, __LINE__, nullptr)
3030
+
3031
+ #define _halide_assert_impl(condition, type) \
3032
+ if (!(condition)) \
3033
+ for (Halide::Internal::ErrorReport<type> _err; 1; _err.issue()) \
3034
+ /*****/ _err.init(__FILE__, __FUNCTION__, __LINE__, #condition)
3035
+
3036
+ #define _halide_user_warning \
3037
+ for (Halide::Internal::WarningReport _err; _err; _err.issue()) \
3038
+ /**/ _err.init(__FILE__, __FUNCTION__, __LINE__, nullptr)
3039
+ /// @}
3040
+
3041
+ #define user_warning _halide_user_warning
2979
3042
 
2980
- #define internal_error Halide::Internal::ErrorReport(__FILE__, __LINE__, nullptr, 0)
2981
- #define user_error Halide::Internal::ErrorReport(__FILE__, __LINE__, nullptr, Halide::Internal::ErrorReport::User)
2982
- #define user_warning Halide::Internal::ErrorReport(__FILE__, __LINE__, nullptr, Halide::Internal::ErrorReport::User | Halide::Internal::ErrorReport::Warning)
2983
- #define halide_runtime_error Halide::Internal::ErrorReport(__FILE__, __LINE__, nullptr, Halide::Internal::ErrorReport::User | Halide::Internal::ErrorReport::Runtime)
3043
+ #define user_error _halide_error_impl(Halide::CompileError)
3044
+ #define internal_error _halide_error_impl(Halide::InternalError)
3045
+ #define halide_runtime_error _halide_error_impl(Halide::RuntimeError)
2984
3046
 
2985
- #define internal_assert(c) _halide_internal_assertion(c, 0)
2986
- #define user_assert(c) _halide_internal_assertion(c, Halide::Internal::ErrorReport::User)
3047
+ #define internal_assert(c) _halide_assert_impl(c, Halide::InternalError)
3048
+ #define user_assert(c) _halide_assert_impl(c, Halide::CompileError)
2987
3049
 
2988
3050
  // The nicely named versions get cleaned up at the end of Halide.h,
2989
3051
  // but user code might want to do halide-style user_asserts (e.g. the
2990
3052
  // Extern macros introduce calls to user_assert), so for that purpose
2991
3053
  // we define an equivalent macro that can be used outside of Halide.h
2992
- #define _halide_user_assert(c) _halide_internal_assertion(c, Halide::Internal::ErrorReport::User)
3054
+ #define _halide_user_error _halide_error_impl(Halide::CompileError)
3055
+ #define _halide_internal_error _halide_error_impl(Halide::InternalError)
3056
+ #define _halide_runtime_error _halide_error_impl(Halide::RuntimeError)
3057
+ #define _halide_internal_assert(c) _halide_assert_impl(c, Halide::InternalError)
3058
+ #define _halide_user_assert(c) _halide_assert_impl(c, Halide::CompileError)
2993
3059
 
2994
3060
  // N.B. Any function that might throw a user_assert or user_error may
2995
3061
  // not be inlined into the user's code, or the line number will be
@@ -3459,8 +3525,12 @@ bool starts_with(const std::string &str, const std::string &prefix);
3459
3525
  /** Test if the first string ends with the second string */
3460
3526
  bool ends_with(const std::string &str, const std::string &suffix);
3461
3527
 
3462
- /** Replace all matches of the second string in the first string with the last string */
3463
- std::string replace_all(const std::string &str, const std::string &find, const std::string &replace);
3528
+ /** Replace all matches of the second string in the first string with the last string.
3529
+ * The string to search-and-replace in is passed by value, offering the ability to
3530
+ * std::move() a string in if you're not interested in keeping the original string.
3531
+ * This is useful when the original string does not contain the find-string, causing
3532
+ * this function to return the same string without any copies being made. */
3533
+ std::string replace_all(std::string str, const std::string &find, const std::string &replace);
3464
3534
 
3465
3535
  /** Split the source string using 'delim' as the divider. */
3466
3536
  std::vector<std::string> split_string(const std::string &source, const std::string &delim);
@@ -3671,7 +3741,7 @@ struct ScopedValue {
3671
3741
  : var(var), old_value(var) {
3672
3742
  }
3673
3743
  /** Preserve the old value, then set the var to a new value. */
3674
- ScopedValue(T &var, T new_value)
3744
+ ScopedValue(T &var, const T &new_value)
3675
3745
  : var(var), old_value(var) {
3676
3746
  var = new_value;
3677
3747
  }
@@ -4980,6 +5050,7 @@ struct Target {
4980
5050
  ZnVer2, /// Tune for AMD Zen 2 CPU (AMD Family 17h, launched 2019).
4981
5051
  ZnVer3, /// Tune for AMD Zen 3 CPU (AMD Family 19h, launched 2020).
4982
5052
  ZnVer4, /// Tune for AMD Zen 4 CPU (AMD Family 19h, launched 2022).
5053
+ ZnVer5, /// Tune for AMD Zen 5 CPU (AMD Family 1Ah, launched 2024).
4983
5054
  } processor_tune = ProcessorGeneric;
4984
5055
 
4985
5056
  /** Optional features a target can have.
@@ -4989,11 +5060,13 @@ struct Target {
4989
5060
  enum Feature {
4990
5061
  JIT = halide_target_feature_jit,
4991
5062
  Debug = halide_target_feature_debug,
5063
+ EnableBacktraces = halide_target_feature_enable_backtraces,
4992
5064
  NoAsserts = halide_target_feature_no_asserts,
4993
5065
  NoBoundsQuery = halide_target_feature_no_bounds_query,
4994
5066
  SSE41 = halide_target_feature_sse41,
4995
5067
  AVX = halide_target_feature_avx,
4996
5068
  AVX2 = halide_target_feature_avx2,
5069
+ AVXVNNI = halide_target_feature_avxvnni,
4997
5070
  FMA = halide_target_feature_fma,
4998
5071
  FMA4 = halide_target_feature_fma4,
4999
5072
  F16C = halide_target_feature_f16c,
@@ -5038,6 +5111,7 @@ struct Target {
5038
5111
  AVX512_Cannonlake = halide_target_feature_avx512_cannonlake,
5039
5112
  AVX512_SapphireRapids = halide_target_feature_avx512_sapphirerapids,
5040
5113
  AVX512_Zen4 = halide_target_feature_avx512_zen4,
5114
+ AVX512_Zen5 = halide_target_feature_avx512_zen5,
5041
5115
  TraceLoads = halide_target_feature_trace_loads,
5042
5116
  TraceStores = halide_target_feature_trace_stores,
5043
5117
  TraceRealizations = halide_target_feature_trace_realizations,
@@ -5085,6 +5159,7 @@ struct Target {
5085
5159
  Semihosting = halide_target_feature_semihosting,
5086
5160
  AVX10_1 = halide_target_feature_avx10_1,
5087
5161
  X86APX = halide_target_feature_x86_apx,
5162
+ Simulator = halide_target_feature_simulator,
5088
5163
  FeatureEnd = halide_target_feature_end
5089
5164
  };
5090
5165
  Target() = default;
@@ -5413,10 +5488,12 @@ static_assert(((HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT & (HALIDE_RUNTIME_BUF
5413
5488
  #ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
5414
5489
 
5415
5490
  // clang-format off
5416
- #ifdef _MSC_VER
5491
+ #ifdef _WIN32
5417
5492
 
5418
- // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
5419
- // has stated they probably never will, so, always default it off here.
5493
+ // Windows (regardless of which compiler) doesn't implement aligned_alloc(),
5494
+ // even in C++17 mode, and has stated they probably never will, as the issue
5495
+ // is in the incompatibility that free() needs to be able to free both pointers
5496
+ // returned by malloc() and aligned_alloc(). So, always default it off here.
5420
5497
  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
5421
5498
 
5422
5499
  #elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
@@ -6317,7 +6394,7 @@ public:
6317
6394
 
6318
6395
  /** Allocate a new image of the given size with a runtime
6319
6396
  * type. Only used when you do know what size you want but you
6320
- * don't know statically what type the elements are. Pass zeroes
6397
+ * don't know statically what type the elements are. Pass zeros
6321
6398
  * to make a buffer suitable for bounds query calls. */
6322
6399
  template<typename... Args,
6323
6400
  typename = typename std::enable_if<AllInts<Args...>::value>::type>
@@ -6336,7 +6413,7 @@ public:
6336
6413
  }
6337
6414
  }
6338
6415
 
6339
- /** Allocate a new image of the given size. Pass zeroes to make a
6416
+ /** Allocate a new image of the given size. Pass zeros to make a
6340
6417
  * buffer suitable for bounds query calls. */
6341
6418
  // @{
6342
6419
 
@@ -7339,9 +7416,12 @@ public:
7339
7416
  /** Make a buffer with the same shape and memory nesting order as
7340
7417
  * another buffer. It may have a different type. */
7341
7418
  template<typename T2, int D2, int S2>
7419
+ // NOLINTNEXTLINE(performance-unnecessary-value-param)
7342
7420
  static Buffer<T, Dims, InClassDimStorage> make_with_shape_of(Buffer<T2, D2, S2> src,
7343
7421
  void *(*allocate_fn)(size_t) = nullptr,
7344
7422
  void (*deallocate_fn)(void *) = nullptr) {
7423
+ // Note that src is taken by value because its dims are mutated
7424
+ // in-place by the helper. Do not change to taking it by reference.
7345
7425
  static_assert(Dims == D2 || Dims == AnyDims);
7346
7426
  const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
7347
7427
  return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
@@ -7407,9 +7487,7 @@ private:
7407
7487
  }
7408
7488
 
7409
7489
  template<typename... Args>
7410
- HALIDE_ALWAYS_INLINE
7411
- storage_T *
7412
- address_of(Args... args) const {
7490
+ HALIDE_ALWAYS_INLINE storage_T *address_of(Args... args) const {
7413
7491
  if (T_is_void) {
7414
7492
  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
7415
7493
  } else {
@@ -7464,8 +7542,7 @@ public:
7464
7542
  }
7465
7543
 
7466
7544
  HALIDE_ALWAYS_INLINE
7467
- const not_void_T &
7468
- operator()() const {
7545
+ const not_void_T &operator()() const {
7469
7546
  static_assert(!T_is_void,
7470
7547
  "Cannot use operator() on Buffer<void> types");
7471
7548
  constexpr int expected_dims = 0;
@@ -7485,9 +7562,7 @@ public:
7485
7562
 
7486
7563
  template<typename... Args,
7487
7564
  typename = typename std::enable_if<AllInts<Args...>::value>::type>
7488
- HALIDE_ALWAYS_INLINE
7489
- not_void_T &
7490
- operator()(int first, Args... rest) {
7565
+ HALIDE_ALWAYS_INLINE not_void_T &operator()(int first, Args... rest) {
7491
7566
  static_assert(!T_is_void,
7492
7567
  "Cannot use operator() on Buffer<void> types");
7493
7568
  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
@@ -8181,7 +8256,7 @@ public:
8181
8256
 
8182
8257
  template<typename... Args,
8183
8258
  typename = typename std::enable_if<Internal::all_ints_and_optional_name<Args...>::value>::type>
8184
- explicit Buffer(int first, Args... rest)
8259
+ explicit Buffer(int first, const Args &...rest)
8185
8260
  : Buffer(Runtime::Buffer<T, Dims>(Internal::get_shape_from_start_of_parameter_pack(first, rest...)),
8186
8261
  Internal::get_name_from_end_of_parameter_pack(rest...)) {
8187
8262
  }
@@ -8408,6 +8483,7 @@ public:
8408
8483
  HALIDE_BUFFER_FORWARD_CONST(contains)
8409
8484
  HALIDE_BUFFER_FORWARD(crop)
8410
8485
  HALIDE_BUFFER_FORWARD_INITIALIZER_LIST(crop, std::vector<std::pair<int, int>>)
8486
+ HALIDE_BUFFER_FORWARD_CONST(cropped)
8411
8487
  HALIDE_BUFFER_FORWARD(slice)
8412
8488
  HALIDE_BUFFER_FORWARD_CONST(sliced)
8413
8489
  HALIDE_BUFFER_FORWARD(embed)
@@ -8415,6 +8491,7 @@ public:
8415
8491
  HALIDE_BUFFER_FORWARD(set_min)
8416
8492
  HALIDE_BUFFER_FORWARD(translate)
8417
8493
  HALIDE_BUFFER_FORWARD_INITIALIZER_LIST(translate, std::vector<int>)
8494
+ HALIDE_BUFFER_FORWARD_CONST(translated)
8418
8495
  HALIDE_BUFFER_FORWARD(transpose)
8419
8496
  HALIDE_BUFFER_FORWARD_CONST(transposed)
8420
8497
  HALIDE_BUFFER_FORWARD(add_dimension)
@@ -8935,6 +9012,12 @@ public:
8935
9012
 
8936
9013
  void store_in(MemoryType memory_type);
8937
9014
  MemoryType memory_type() const;
9015
+
9016
+ void trace_loads();
9017
+ bool is_tracing_loads() const;
9018
+
9019
+ void add_trace_tag(const std::string &trace_tag);
9020
+ std::vector<std::string> get_trace_tags() const;
8938
9021
  };
8939
9022
 
8940
9023
  namespace Internal {
@@ -10251,8 +10334,7 @@ struct Split {
10251
10334
 
10252
10335
  enum SplitType { SplitVar = 0,
10253
10336
  RenameVar,
10254
- FuseVars,
10255
- PurifyRVar };
10337
+ FuseVars };
10256
10338
 
10257
10339
  // If split_type is Rename, then this is just a renaming of the
10258
10340
  // old_var to the outer and not a split. The inner var should
@@ -10260,10 +10342,6 @@ struct Split {
10260
10342
  // the same list as splits so that ordering between them is
10261
10343
  // respected.
10262
10344
 
10263
- // If split type is Purify, this replaces the old_var RVar to
10264
- // the outer Var. The inner var should be ignored, and factor
10265
- // should be one.
10266
-
10267
10345
  // If split_type is Fuse, then this does the opposite of a
10268
10346
  // split, it joins the outer and inner into the old_var.
10269
10347
  SplitType split_type;
@@ -10854,7 +10932,12 @@ class IRMutator;
10854
10932
 
10855
10933
  /** A single named dimension of a reduction domain */
10856
10934
  struct ReductionVariable {
10935
+ /**
10936
+ * A variable name for the reduction variable. This name must be a
10937
+ * valid Var name, i.e. it must not contain a <tt>.</tt> character.
10938
+ */
10857
10939
  std::string var;
10940
+
10858
10941
  Expr min, extent;
10859
10942
 
10860
10943
  /** This lets you use a ReductionVariable as a key in a map of the form
@@ -11680,7 +11763,7 @@ struct ExternFuncArgument {
11680
11763
  }
11681
11764
 
11682
11765
  template<typename T, int Dims>
11683
- ExternFuncArgument(Buffer<T, Dims> b)
11766
+ ExternFuncArgument(const Buffer<T, Dims> &b)
11684
11767
  : arg_type(BufferArg), buffer(b) {
11685
11768
  }
11686
11769
  ExternFuncArgument(Expr e)
@@ -12323,9 +12406,25 @@ struct Call : public ExprNode<Call> {
12323
12406
 
12324
12407
  // Compute (arg[0] + arg[1]) / 2, assuming arg[0] < arg[1].
12325
12408
  sorted_avg,
12326
- strict_float,
12409
+
12410
+ // strict floating point ops. These are floating point ops that we would
12411
+ // like to optimize around (or let llvm optimize around) by treating
12412
+ // them as reals and ignoring the existence of nan and inf. Using these
12413
+ // intrinsics instead prevents any such optimizations.
12414
+ strict_add,
12415
+ strict_div,
12416
+ strict_eq,
12417
+ strict_le,
12418
+ strict_lt,
12419
+ strict_max,
12420
+ strict_min,
12421
+ strict_mul,
12422
+ strict_sub,
12423
+
12424
+ // Convert a list of Exprs to a string
12327
12425
  stringify,
12328
12426
 
12427
+ // Query properties of the compiled-for target (resolved at compile-time)
12329
12428
  target_arch_is,
12330
12429
  target_bits,
12331
12430
  target_has_feature,
@@ -12450,7 +12549,7 @@ struct Call : public ExprNode<Call> {
12450
12549
  }
12451
12550
 
12452
12551
  bool is_tag() const {
12453
- return is_intrinsic({Call::likely, Call::likely_if_innermost, Call::strict_float});
12552
+ return is_intrinsic({Call::likely, Call::likely_if_innermost});
12454
12553
  }
12455
12554
 
12456
12555
  /** Returns a pointer to a call node if the expression is a call to
@@ -12467,7 +12566,7 @@ struct Call : public ExprNode<Call> {
12467
12566
  }
12468
12567
 
12469
12568
  static const Call *as_tag(const Expr &e) {
12470
- return as_intrinsic(e, {Call::likely, Call::likely_if_innermost, Call::strict_float});
12569
+ return as_intrinsic(e, {Call::likely, Call::likely_if_innermost});
12471
12570
  }
12472
12571
 
12473
12572
  bool is_extern() const {
@@ -12476,6 +12575,19 @@ struct Call : public ExprNode<Call> {
12476
12575
  call_type == PureExtern);
12477
12576
  }
12478
12577
 
12578
+ bool is_strict_float_intrinsic() const {
12579
+ return is_intrinsic(
12580
+ {Call::strict_add,
12581
+ Call::strict_div,
12582
+ Call::strict_max,
12583
+ Call::strict_min,
12584
+ Call::strict_mul,
12585
+ Call::strict_sub,
12586
+ Call::strict_lt,
12587
+ Call::strict_le,
12588
+ Call::strict_eq});
12589
+ }
12590
+
12479
12591
  static const IRNodeType _node_type = IRNodeType::Call;
12480
12592
  };
12481
12593
 
@@ -12628,6 +12740,10 @@ struct Shuffle : public ExprNode<Shuffle> {
12628
12740
  * arguments. */
12629
12741
  bool is_extract_element() const;
12630
12742
 
12743
+ /** Returns the sequence of vector and lane indices that represent each
12744
+ * entry to be used for the shuffled vector */
12745
+ std::vector<std::pair<int, int>> vector_and_lane_indices() const;
12746
+
12631
12747
  static const IRNodeType _node_type = IRNodeType::Shuffle;
12632
12748
  };
12633
12749
 
@@ -13070,6 +13186,577 @@ inline Expr user_context_value() {
13070
13186
  #include <map>
13071
13187
  #include <optional>
13072
13188
 
13189
+ #ifndef HALIDE_CONSTANT_INTERVAL_H
13190
+ #define HALIDE_CONSTANT_INTERVAL_H
13191
+
13192
+ #include <stdint.h>
13193
+
13194
+ /** \file
13195
+ * Defines the ConstantInterval class, and operators on it.
13196
+ */
13197
+
13198
+ namespace Halide {
13199
+
13200
+ struct Type;
13201
+
13202
+ namespace Internal {
13203
+
13204
+ /** A class to represent ranges of integers. Can be unbounded above or below,
13205
+ * but they cannot be empty. */
13206
+ struct ConstantInterval {
13207
+ /** The lower and upper bound of the interval. They are included
13208
+ * in the interval. */
13209
+ int64_t min = 0, max = 0;
13210
+ bool min_defined = false, max_defined = false;
13211
+
13212
+ /* A default-constructed Interval is everything */
13213
+ ConstantInterval() = default;
13214
+
13215
+ /** Construct an interval from a lower and upper bound. */
13216
+ ConstantInterval(int64_t min, int64_t max);
13217
+
13218
+ /** The interval representing everything. */
13219
+ static ConstantInterval everything();
13220
+
13221
+ /** Construct an interval representing a single point. */
13222
+ static ConstantInterval single_point(int64_t x);
13223
+
13224
+ /** Construct intervals bounded above or below. */
13225
+ static ConstantInterval bounded_below(int64_t min);
13226
+ static ConstantInterval bounded_above(int64_t max);
13227
+
13228
+ /** Is the interval the entire range */
13229
+ bool is_everything() const;
13230
+
13231
+ /** Is the interval just a single value (min == max) */
13232
+ bool is_single_point() const;
13233
+
13234
+ /** Is the interval a particular single value */
13235
+ bool is_single_point(int64_t x) const;
13236
+
13237
+ /** Does the interval have a finite upper and lower bound */
13238
+ bool is_bounded() const;
13239
+
13240
+ /** Expand the interval to include another Interval */
13241
+ void include(const ConstantInterval &i);
13242
+
13243
+ /** Expand the interval to include a point */
13244
+ void include(int64_t x);
13245
+
13246
+ /** Test if the interval contains a particular value */
13247
+ bool contains(int32_t x) const;
13248
+
13249
+ /** Test if the interval contains a particular value */
13250
+ bool contains(int64_t x) const;
13251
+
13252
+ /** Test if the interval contains a particular unsigned value */
13253
+ bool contains(uint64_t x) const;
13254
+
13255
+ /** Construct the smallest interval containing two intervals. */
13256
+ static ConstantInterval make_union(const ConstantInterval &a, const ConstantInterval &b);
13257
+
13258
+ /** Construct the largest interval contained within two intervals. Throws an
13259
+ * error if the interval is empty. */
13260
+ static ConstantInterval make_intersection(const ConstantInterval &a, const ConstantInterval &b);
13261
+
13262
+ /** Equivalent to same_as. Exists so that the autoscheduler can
13263
+ * compare two map<string, Interval> for equality in order to
13264
+ * cache computations. */
13265
+ bool operator==(const ConstantInterval &other) const;
13266
+
13267
+ /** In-place versions of the arithmetic operators below. */
13268
+ // @{
13269
+ void operator+=(const ConstantInterval &other);
13270
+ void operator+=(int64_t);
13271
+ void operator-=(const ConstantInterval &other);
13272
+ void operator-=(int64_t);
13273
+ void operator*=(const ConstantInterval &other);
13274
+ void operator*=(int64_t);
13275
+ void operator/=(const ConstantInterval &other);
13276
+ void operator/=(int64_t);
13277
+ void operator%=(const ConstantInterval &other);
13278
+ void operator%=(int64_t);
13279
+ // @}
13280
+
13281
+ /** Negate an interval. */
13282
+ ConstantInterval operator-() const;
13283
+
13284
+ /** Track what happens if a constant integer interval is forced to fit into
13285
+ * a concrete integer type. */
13286
+ void cast_to(const Type &t);
13287
+
13288
+ /** Get constant integer bounds on a type. */
13289
+ static ConstantInterval bounds_of_type(Type);
13290
+ };
13291
+
13292
+ /** Arithmetic operators on ConstantIntervals. The resulting interval contains
13293
+ * all possible values of the operator applied to any two elements of the
13294
+ * argument intervals. Note that these operator on unbounded integers. If you
13295
+ * are applying this to concrete small integer types, you will need to manually
13296
+ * cast the constant interval back to the desired type to model the effect of
13297
+ * overflow. */
13298
+ // @{
13299
+ ConstantInterval operator+(const ConstantInterval &a, const ConstantInterval &b);
13300
+ ConstantInterval operator+(const ConstantInterval &a, int64_t b);
13301
+ ConstantInterval operator-(const ConstantInterval &a, const ConstantInterval &b);
13302
+ ConstantInterval operator-(const ConstantInterval &a, int64_t b);
13303
+ ConstantInterval operator/(const ConstantInterval &a, const ConstantInterval &b);
13304
+ ConstantInterval operator/(const ConstantInterval &a, int64_t b);
13305
+ ConstantInterval operator*(const ConstantInterval &a, const ConstantInterval &b);
13306
+ ConstantInterval operator*(const ConstantInterval &a, int64_t b);
13307
+ ConstantInterval operator%(const ConstantInterval &a, const ConstantInterval &b);
13308
+ ConstantInterval operator%(const ConstantInterval &a, int64_t b);
13309
+ ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b);
13310
+ ConstantInterval min(const ConstantInterval &a, int64_t b);
13311
+ ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b);
13312
+ ConstantInterval max(const ConstantInterval &a, int64_t b);
13313
+ ConstantInterval abs(const ConstantInterval &a);
13314
+ ConstantInterval operator<<(const ConstantInterval &a, const ConstantInterval &b);
13315
+ ConstantInterval operator<<(const ConstantInterval &a, int64_t b);
13316
+ ConstantInterval operator<<(int64_t a, const ConstantInterval &b);
13317
+ ConstantInterval operator>>(const ConstantInterval &a, const ConstantInterval &b);
13318
+ ConstantInterval operator>>(const ConstantInterval &a, int64_t b);
13319
+ ConstantInterval operator>>(int64_t a, const ConstantInterval &b);
13320
+ // @}
13321
+
13322
+ /** Comparison operators on ConstantIntervals. Returns whether the comparison is
13323
+ * true for all values of the two intervals. */
13324
+ // @{
13325
+ bool operator<=(const ConstantInterval &a, const ConstantInterval &b);
13326
+ bool operator<=(const ConstantInterval &a, int64_t b);
13327
+ bool operator<=(int64_t a, const ConstantInterval &b);
13328
+ bool operator<(const ConstantInterval &a, const ConstantInterval &b);
13329
+ bool operator<(const ConstantInterval &a, int64_t b);
13330
+ bool operator<(int64_t a, const ConstantInterval &b);
13331
+
13332
+ inline bool operator>=(const ConstantInterval &a, const ConstantInterval &b) {
13333
+ return b <= a;
13334
+ }
13335
+ inline bool operator>(const ConstantInterval &a, const ConstantInterval &b) {
13336
+ return b < a;
13337
+ }
13338
+ inline bool operator>=(const ConstantInterval &a, int64_t b) {
13339
+ return b <= a;
13340
+ }
13341
+ inline bool operator>(const ConstantInterval &a, int64_t b) {
13342
+ return b < a;
13343
+ }
13344
+ inline bool operator>=(int64_t a, const ConstantInterval &b) {
13345
+ return b <= a;
13346
+ }
13347
+ inline bool operator>(int64_t a, const ConstantInterval &b) {
13348
+ return b < a;
13349
+ }
13350
+
13351
+ // @}
13352
+ } // namespace Internal
13353
+
13354
+ /** Cast operators for ConstantIntervals. These ones have to live out in
13355
+ * Halide::, to avoid C++ name lookup confusion with the Halide::cast variants
13356
+ * that take Exprs. */
13357
+ // @{
13358
+ Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a);
13359
+ Internal::ConstantInterval saturating_cast(Type t, const Internal::ConstantInterval &a);
13360
+ // @}
13361
+
13362
+ } // namespace Halide
13363
+
13364
+ #endif
13365
+ #ifndef HALIDE_SCOPE_H
13366
+ #define HALIDE_SCOPE_H
13367
+
13368
+ #include <iostream>
13369
+ #include <map>
13370
+ #include <stack>
13371
+ #include <string>
13372
+ #include <utility>
13373
+ #include <vector>
13374
+
13375
+
13376
+ /** \file
13377
+ * Defines the Scope class, which is used for keeping track of names in a scope while traversing IR
13378
+ */
13379
+
13380
+ namespace Halide {
13381
+ namespace Internal {
13382
+
13383
+ /** A stack which can store one item very efficiently. Using this
13384
+ * instead of std::stack speeds up Scope substantially. */
13385
+ template<typename T>
13386
+ class SmallStack {
13387
+ private:
13388
+ T _top;
13389
+ std::vector<T> _rest;
13390
+ bool _empty = true;
13391
+
13392
+ public:
13393
+ SmallStack() = default;
13394
+
13395
+ void pop() {
13396
+ if (_rest.empty()) {
13397
+ _empty = true;
13398
+ _top = T();
13399
+ } else {
13400
+ _top = std::move(_rest.back());
13401
+ _rest.pop_back();
13402
+ }
13403
+ }
13404
+
13405
+ void push(T t) {
13406
+ if (!_empty) {
13407
+ _rest.push_back(std::move(_top));
13408
+ }
13409
+ _top = std::move(t);
13410
+ _empty = false;
13411
+ }
13412
+
13413
+ T top() const {
13414
+ return _top;
13415
+ }
13416
+
13417
+ T &top_ref() {
13418
+ return _top;
13419
+ }
13420
+
13421
+ const T &top_ref() const {
13422
+ return _top;
13423
+ }
13424
+
13425
+ bool empty() const {
13426
+ return _empty;
13427
+ }
13428
+
13429
+ size_t size() const {
13430
+ return _empty ? 0 : (_rest.size() + 1);
13431
+ }
13432
+ };
13433
+
13434
+ template<>
13435
+ class SmallStack<void> {
13436
+ // A stack of voids. Voids are all the same, so just record how many voids are in the stack
13437
+ int counter = 0;
13438
+
13439
+ public:
13440
+ void pop() {
13441
+ counter--;
13442
+ }
13443
+ void push() {
13444
+ counter++;
13445
+ }
13446
+ bool empty() const {
13447
+ return counter == 0;
13448
+ }
13449
+ };
13450
+
13451
+ /** A common pattern when traversing Halide IR is that you need to
13452
+ * keep track of stuff when you find a Let or a LetStmt, and that it
13453
+ * should hide previous values with the same name until you leave the
13454
+ * Let or LetStmt nodes This class helps with that. */
13455
+ template<typename T = void>
13456
+ class Scope {
13457
+ private:
13458
+ std::map<std::string, SmallStack<T>> table;
13459
+
13460
+ const Scope<T> *containing_scope = nullptr;
13461
+
13462
+ public:
13463
+ Scope() = default;
13464
+ Scope(Scope &&that) noexcept = default;
13465
+ Scope &operator=(Scope &&that) noexcept = default;
13466
+
13467
+ // Copying a scope object copies a large table full of strings and
13468
+ // stacks. Bad idea.
13469
+ Scope(const Scope<T> &) = delete;
13470
+ Scope<T> &operator=(const Scope<T> &) = delete;
13471
+
13472
+ /** Set the parent scope. If lookups fail in this scope, they
13473
+ * check the containing scope before returning an error. Caller is
13474
+ * responsible for managing the memory of the containing scope. */
13475
+ void set_containing_scope(const Scope<T> *s) {
13476
+ containing_scope = s;
13477
+ }
13478
+
13479
+ /** A const ref to an empty scope. Useful for default function
13480
+ * arguments, which would otherwise require a copy constructor
13481
+ * (with llvm in c++98 mode) */
13482
+ static const Scope<T> &empty_scope() {
13483
+ static Scope<T> _empty_scope;
13484
+ return _empty_scope;
13485
+ }
13486
+
13487
+ /** Retrieve the value referred to by a name */
13488
+ template<typename T2 = T,
13489
+ typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
13490
+ T2 get(const std::string &name) const {
13491
+ typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
13492
+ if (iter == table.end() || iter->second.empty()) {
13493
+ if (containing_scope) {
13494
+ return containing_scope->get(name);
13495
+ } else {
13496
+ internal_error << "Name not in Scope: " << name << "\n"
13497
+ << *this << "\n";
13498
+ }
13499
+ }
13500
+ return iter->second.top();
13501
+ }
13502
+
13503
+ /** Return a reference to an entry. Does not consider the containing scope. */
13504
+ template<typename T2 = T,
13505
+ typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
13506
+ T2 &ref(const std::string &name) {
13507
+ typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
13508
+ if (iter == table.end() || iter->second.empty()) {
13509
+ internal_error << "Name not in Scope: " << name << "\n"
13510
+ << *this << "\n";
13511
+ }
13512
+ return iter->second.top_ref();
13513
+ }
13514
+
13515
+ /** Returns a const pointer to an entry if it exists in this scope or any
13516
+ * containing scope, or nullptr if it does not. Use this instead of if
13517
+ * (scope.contains(foo)) { ... scope.get(foo) ... } to avoid doing two
13518
+ * lookups. */
13519
+ template<typename T2 = T,
13520
+ typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
13521
+ const T2 *find(const std::string &name) const {
13522
+ typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
13523
+ if (iter == table.end() || iter->second.empty()) {
13524
+ if (containing_scope) {
13525
+ return containing_scope->find(name);
13526
+ } else {
13527
+ return nullptr;
13528
+ }
13529
+ }
13530
+ return &(iter->second.top_ref());
13531
+ }
13532
+
13533
+ /** A version of find that returns a non-const pointer, but ignores
13534
+ * containing scope. */
13535
+ template<typename T2 = T,
13536
+ typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
13537
+ T2 *shallow_find(const std::string &name) {
13538
+ typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
13539
+ if (iter == table.end() || iter->second.empty()) {
13540
+ return nullptr;
13541
+ } else {
13542
+ return &(iter->second.top_ref());
13543
+ }
13544
+ }
13545
+
13546
+ /** Tests if a name is in scope. If you plan to use the value if it is, call
13547
+ * find instead. */
13548
+ bool contains(const std::string &name) const {
13549
+ typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
13550
+ if (iter == table.end() || iter->second.empty()) {
13551
+ if (containing_scope) {
13552
+ return containing_scope->contains(name);
13553
+ } else {
13554
+ return false;
13555
+ }
13556
+ }
13557
+ return true;
13558
+ }
13559
+
13560
+ /** How many nested definitions of a single name exist? */
13561
+ size_t count(const std::string &name) const {
13562
+ auto it = table.find(name);
13563
+ if (it == table.end()) {
13564
+ return 0;
13565
+ } else {
13566
+ return it->second.size();
13567
+ }
13568
+ }
13569
+
13570
+ /** How many distinct names exist (does not count nested definitions of the same name) */
13571
+ size_t size() const {
13572
+ return table.size();
13573
+ }
13574
+
13575
+ struct PushToken {
13576
+ typename std::map<std::string, SmallStack<T>>::iterator iter;
13577
+ };
13578
+
13579
+ /** Add a new (name, value) pair to the current scope. Hide old values that
13580
+ * have this name until we pop this name. Returns a token that can be used
13581
+ * to pop the same value without doing a fresh lookup.
13582
+ */
13583
+ template<typename T2 = T,
13584
+ typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
13585
+ PushToken push(const std::string &name, T2 &&value) {
13586
+ auto it = table.try_emplace(name).first;
13587
+ it->second.push(std::forward<T2>(value));
13588
+ return PushToken{it};
13589
+ }
13590
+
13591
+ template<typename T2 = T,
13592
+ typename = typename std::enable_if<std::is_same<T2, void>::value>::type>
13593
+ PushToken push(const std::string &name) {
13594
+ auto it = table.try_emplace(name).first;
13595
+ it->second.push();
13596
+ return PushToken{it};
13597
+ }
13598
+
13599
+ /** A name goes out of scope. Restore whatever its old value
13600
+ * was (or remove it entirely if there was nothing else of the
13601
+ * same name in an outer scope) */
13602
+ void pop(const std::string &name) {
13603
+ typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
13604
+ internal_assert(iter != table.end()) << "Name not in Scope: " << name << "\n"
13605
+ << *this << "\n";
13606
+ iter->second.pop();
13607
+ if (iter->second.empty()) {
13608
+ table.erase(iter);
13609
+ }
13610
+ }
13611
+
13612
+ /** Pop a name using a token returned by push instead of a string. */
13613
+ void pop(PushToken p) {
13614
+ p.iter->second.pop();
13615
+ if (p.iter->second.empty()) {
13616
+ table.erase(p.iter);
13617
+ }
13618
+ }
13619
+
13620
+ /** Iterate through the scope. Does not capture any containing scope. */
13621
+ class const_iterator {
13622
+ typename std::map<std::string, SmallStack<T>>::const_iterator iter;
13623
+
13624
+ public:
13625
+ explicit const_iterator(const typename std::map<std::string, SmallStack<T>>::const_iterator &i)
13626
+ : iter(i) {
13627
+ }
13628
+
13629
+ const_iterator() = default;
13630
+
13631
+ bool operator!=(const const_iterator &other) {
13632
+ return iter != other.iter;
13633
+ }
13634
+
13635
+ void operator++() {
13636
+ ++iter;
13637
+ }
13638
+
13639
+ const std::string &name() {
13640
+ return iter->first;
13641
+ }
13642
+
13643
+ const SmallStack<T> &stack() {
13644
+ return iter->second;
13645
+ }
13646
+
13647
+ template<typename T2 = T,
13648
+ typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
13649
+ const T2 &value() {
13650
+ return iter->second.top_ref();
13651
+ }
13652
+ };
13653
+
13654
+ const_iterator cbegin() const {
13655
+ return const_iterator(table.begin());
13656
+ }
13657
+
13658
+ const_iterator cend() const {
13659
+ return const_iterator(table.end());
13660
+ }
13661
+
13662
+ void swap(Scope<T> &other) noexcept {
13663
+ table.swap(other.table);
13664
+ std::swap(containing_scope, other.containing_scope);
13665
+ }
13666
+ };
13667
+
13668
+ template<typename T>
13669
+ std::ostream &operator<<(std::ostream &stream, const Scope<T> &s) {
13670
+ stream << "{\n";
13671
+ typename Scope<T>::const_iterator iter;
13672
+ for (iter = s.cbegin(); iter != s.cend(); ++iter) {
13673
+ stream << " " << iter.name() << "\n";
13674
+ }
13675
+ stream << "}";
13676
+ return stream;
13677
+ }
13678
+
13679
+ /** Helper class for pushing/popping Scope<> values, to allow
13680
+ * for early-exit in Visitor/Mutators that preserves correctness.
13681
+ * Note that this name can be a bit confusing, since there are two "scopes"
13682
+ * involved here:
13683
+ * - the Scope object itself
13684
+ * - the lifetime of this helper object
13685
+ * The "Scoped" in this class name refers to the latter, as it temporarily binds
13686
+ * a name within the scope of this helper's lifetime. */
13687
+ template<typename T = void>
13688
+ struct ScopedBinding {
13689
+ Scope<T> *scope = nullptr;
13690
+ typename Scope<T>::PushToken token;
13691
+
13692
+ ScopedBinding() = default;
13693
+
13694
+ ScopedBinding(Scope<T> &s, const std::string &n, T value)
13695
+ : scope(&s), token(scope->push(n, std::move(value))) {
13696
+ }
13697
+
13698
+ ScopedBinding(bool condition, Scope<T> &s, const std::string &n, const T &value)
13699
+ : scope(condition ? &s : nullptr),
13700
+ token(condition ? scope->push(n, value) : typename Scope<T>::PushToken{}) {
13701
+ }
13702
+
13703
+ bool bound() const {
13704
+ return scope != nullptr;
13705
+ }
13706
+
13707
+ ~ScopedBinding() {
13708
+ if (scope) {
13709
+ scope->pop(token);
13710
+ }
13711
+ }
13712
+
13713
+ // allow move but not copy
13714
+ ScopedBinding(const ScopedBinding &that) = delete;
13715
+ ScopedBinding(ScopedBinding &&that) noexcept
13716
+ : scope(that.scope),
13717
+ token(that.token) {
13718
+ // The move constructor must null out scope, so we don't try to pop it
13719
+ that.scope = nullptr;
13720
+ }
13721
+
13722
+ void operator=(const ScopedBinding &that) = delete;
13723
+ void operator=(ScopedBinding &&that) = delete;
13724
+ };
13725
+
13726
+ template<>
13727
+ struct ScopedBinding<void> {
13728
+ Scope<> *scope;
13729
+ Scope<>::PushToken token;
13730
+ ScopedBinding(Scope<> &s, const std::string &n)
13731
+ : scope(&s), token(scope->push(n)) {
13732
+ }
13733
+ ScopedBinding(bool condition, Scope<> &s, const std::string &n)
13734
+ : scope(condition ? &s : nullptr),
13735
+ token(condition ? scope->push(n) : Scope<>::PushToken{}) {
13736
+ }
13737
+ ~ScopedBinding() {
13738
+ if (scope) {
13739
+ scope->pop(token);
13740
+ }
13741
+ }
13742
+
13743
+ // allow move but not copy
13744
+ ScopedBinding(const ScopedBinding &that) = delete;
13745
+ ScopedBinding(ScopedBinding &&that) noexcept
13746
+ : scope(that.scope),
13747
+ token(that.token) {
13748
+ // The move constructor must null out scope, so we don't try to pop it
13749
+ that.scope = nullptr;
13750
+ }
13751
+
13752
+ void operator=(const ScopedBinding &that) = delete;
13753
+ void operator=(ScopedBinding &&that) = delete;
13754
+ };
13755
+
13756
+ } // namespace Internal
13757
+ } // namespace Halide
13758
+
13759
+ #endif
13073
13760
  #ifndef HALIDE_TUPLE_H
13074
13761
  #define HALIDE_TUPLE_H
13075
13762
 
@@ -13275,13 +13962,16 @@ Expr const_false(int lanes = 1);
13275
13962
  /** Attempt to cast an expression to a smaller type while provably not losing
13276
13963
  * information. If it can't be done, return an undefined Expr.
13277
13964
  *
13278
- * Optionally accepts a map that gives the constant bounds of exprs already
13279
- * analyzed to avoid redoing work across many calls to lossless_cast. It is not
13280
- * safe to use this optional map in contexts where the same Expr object may
13281
- * take on a different value. For example:
13282
- * (let x = 4 in some_expr_object) + (let x = 5 in the_same_expr_object)).
13283
- * It is safe to use it after uniquify_variable_names has been run. */
13284
- Expr lossless_cast(Type t, Expr e, std::map<Expr, ConstantInterval, ExprCompare> *cache = nullptr);
13965
+ * Optionally accepts a scope giving the constant bounds of any variables, and a
13966
+ * map that gives the constant bounds of exprs already analyzed to avoid redoing
13967
+ * work across many calls to lossless_cast. It is not safe to use this optional
13968
+ * map in contexts where the same Expr object may take on a different value. For
13969
+ * example: (let x = 4 in some_expr_object) + (let x = 5 in
13970
+ * the_same_expr_object)). It is safe to use it after uniquify_variable_names
13971
+ * has been run. */
13972
+ Expr lossless_cast(Type t, Expr e,
13973
+ const Scope<ConstantInterval> &scope = Scope<ConstantInterval>::empty_scope(),
13974
+ std::map<Expr, ConstantInterval, ExprCompare> *cache = nullptr);
13285
13975
 
13286
13976
  /** Attempt to negate x without introducing new IR and without overflow.
13287
13977
  * If it can't be done, return an undefined Expr. */
@@ -14095,8 +14785,9 @@ Expr pow(Expr x, Expr y);
14095
14785
  * mantissa. Vectorizes cleanly. */
14096
14786
  Expr erf(const Expr &x);
14097
14787
 
14098
- /** Fast vectorizable approximation to some trigonometric functions for Float(32).
14099
- * Absolute approximation error is less than 1e-5. */
14788
+ /** Fast vectorizable approximation to some trigonometric functions for
14789
+ * Float(32). Absolute approximation error is less than 1e-5. Slow on x86 if
14790
+ * you don't have at least sse 4.1. */
14100
14791
  // @{
14101
14792
  Expr fast_sin(const Expr &x);
14102
14793
  Expr fast_cos(const Expr &x);
@@ -14104,19 +14795,22 @@ Expr fast_cos(const Expr &x);
14104
14795
 
14105
14796
  /** Fast approximate cleanly vectorizable log for Float(32). Returns
14106
14797
  * nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
14107
- * mantissa. Vectorizes cleanly. */
14798
+ * mantissa. Vectorizes cleanly. Slow on x86 if you don't
14799
+ * have at least sse 4.1. */
14108
14800
  Expr fast_log(const Expr &x);
14109
14801
 
14110
14802
  /** Fast approximate cleanly vectorizable exp for Float(32). Returns
14111
14803
  * nonsense for inputs that would overflow or underflow. Typically
14112
14804
  * accurate up to the last 5 bits of the mantissa. Gets worse when
14113
- * approaching overflow. Vectorizes cleanly. */
14805
+ * approaching overflow. Vectorizes cleanly. Slow on x86 if you don't
14806
+ * have at least sse 4.1. */
14114
14807
  Expr fast_exp(const Expr &x);
14115
14808
 
14116
14809
  /** Fast approximate cleanly vectorizable pow for Float(32). Returns
14117
14810
  * nonsense for x < 0.0f. Accurate up to the last 5 bits of the
14118
14811
  * mantissa for typical exponents. Gets worse when approaching
14119
- * overflow. Vectorizes cleanly. */
14812
+ * overflow. Vectorizes cleanly. Slow on x86 if you don't
14813
+ * have at least sse 4.1. */
14120
14814
  Expr fast_pow(Expr x, Expr y);
14121
14815
 
14122
14816
  /** Fast approximate inverse for Float(32). Corresponds to the rcpps
@@ -14559,7 +15253,7 @@ Expr saturating_cast(Type t, Expr e);
14559
15253
  * all backends. (E.g. it is difficult to do this for C++ code
14560
15254
  * generation as it depends on the compiler flags used to compile the
14561
15255
  * generated code. */
14562
- Expr strict_float(Expr e);
15256
+ Expr strict_float(const Expr &e);
14563
15257
 
14564
15258
  /** Create an Expr that that promises another Expr is clamped but do
14565
15259
  * not generate code to check the assertion or modify the value. No
@@ -14671,7 +15365,7 @@ f(scatter(3, 5)) = f(select(p, gather(5, 3), gather(3, 5)));
14671
15365
  f(select(p, scatter(3, 5, 5), scatter(1, 2, 3))) = f(select(p, gather(5, 3, 3), gather(2, 3, 1)));
14672
15366
  \endcode
14673
15367
  *
14674
- * Note that in the p == true case, we redudantly load from 3 and write
15368
+ * Note that in the p == true case, we redundantly load from 3 and write
14675
15369
  * to 5 twice.
14676
15370
  */
14677
15371
  //@{
@@ -14952,7 +15646,7 @@ struct PipelineContents;
14952
15646
  *
14953
15647
  * The 'name' field specifies the type of Autoscheduler
14954
15648
  * to be used (e.g. Adams2019, Mullapudi2016). If this is an empty string,
14955
- * no autoscheduling will be done; if not, it mustbe the name of a known Autoscheduler.
15649
+ * no autoscheduling will be done; if not, it must be the name of a known Autoscheduler.
14956
15650
  *
14957
15651
  * At this time, well-known autoschedulers include:
14958
15652
  * "Mullapudi2016" -- heuristics-based; the first working autoscheduler; currently built in to libHalide
@@ -15743,7 +16437,7 @@ public:
15743
16437
  }
15744
16438
 
15745
16439
  template<typename... Args>
15746
- HALIDE_NO_USER_CODE_INLINE RDom(Expr min, Expr extent, Args &&...args) {
16440
+ HALIDE_NO_USER_CODE_INLINE RDom(const Expr &min, const Expr &extent, Args &&...args) {
15747
16441
  // This should really just be a delegating constructor, but I couldn't make
15748
16442
  // that work with variadic template unpacking in visual studio 2013
15749
16443
  Region region;
@@ -15895,12 +16589,14 @@ class Var {
15895
16589
  /* The expression representing the Var. Guaranteed to be an
15896
16590
  * Internal::Variable of type Int(32). Created once on
15897
16591
  * construction of the Var to avoid making a fresh Expr every time
15898
- * the Var is used in a context in which is will be converted to
16592
+ * the Var is used in a context in which it will be converted to
15899
16593
  * one. */
15900
16594
  Expr e;
15901
16595
 
15902
16596
  public:
15903
- /** Construct a Var with the given name */
16597
+ /** Construct a Var with the given name. Unlike Funcs, this will be treated
16598
+ * as the same Var as another other Var with the same name, including
16599
+ * implicit Vars. */
15904
16600
  Var(const std::string &n);
15905
16601
 
15906
16602
  /** Construct a Var with an automatically-generated unique name. */
@@ -15995,9 +16691,6 @@ public:
15995
16691
  static Var implicit(int n);
15996
16692
 
15997
16693
  /** Return whether a variable name is of the form for an implicit argument.
15998
- * TODO: This is almost guaranteed to incorrectly fire on user
15999
- * declared variables at some point. We should likely prevent
16000
- * user Var declarations from making names of this form.
16001
16694
  */
16002
16695
  //{
16003
16696
  static bool is_implicit(const std::string &name);
@@ -16130,6 +16823,7 @@ struct VarOrRVar {
16130
16823
  class ImageParam;
16131
16824
 
16132
16825
  namespace Internal {
16826
+ struct AssociativeOp;
16133
16827
  class Function;
16134
16828
  struct Split;
16135
16829
  struct StorageDim;
@@ -16151,7 +16845,6 @@ class Stage {
16151
16845
  void split(const std::string &old, const std::string &outer, const std::string &inner,
16152
16846
  const Expr &factor, bool exact, TailStrategy tail);
16153
16847
  void remove(const std::string &var);
16154
- Stage &purify(const VarOrRVar &old_name, const VarOrRVar &new_name);
16155
16848
 
16156
16849
  const std::vector<Internal::StorageDim> &storage_dims() const {
16157
16850
  return function.schedule().storage_dims();
@@ -16159,6 +16852,9 @@ class Stage {
16159
16852
 
16160
16853
  Stage &compute_with(LoopLevel loop_level, const std::map<std::string, LoopAlignStrategy> &align);
16161
16854
 
16855
+ std::pair<std::vector<Internal::Split>, std::vector<Internal::Split>>
16856
+ rfactor_validate_args(const std::vector<std::pair<RVar, Var>> &preserved, const Internal::AssociativeOp &prover_result);
16857
+
16162
16858
  public:
16163
16859
  Stage(Internal::Function f, Internal::Definition d, size_t stage_index)
16164
16860
  : function(std::move(f)), definition(std::move(d)), stage_index(stage_index) {
@@ -16254,7 +16950,7 @@ public:
16254
16950
  *
16255
16951
  */
16256
16952
  // @{
16257
- Func rfactor(std::vector<std::pair<RVar, Var>> preserved);
16953
+ Func rfactor(const std::vector<std::pair<RVar, Var>> &preserved);
16258
16954
  Func rfactor(const RVar &r, const Var &v);
16259
16955
  // @}
16260
16956
 
@@ -16575,7 +17271,7 @@ class FuncRef {
16575
17271
  * already have a pure definition, init_val will be used as RHS in
16576
17272
  * the initial function definition. */
16577
17273
  template<typename BinaryOp>
16578
- Stage func_ref_update(Expr e, int init_val);
17274
+ Stage func_ref_update(const Expr &e, int init_val);
16579
17275
 
16580
17276
  public:
16581
17277
  FuncRef(const Internal::Function &, const std::vector<Expr> &,
@@ -16598,7 +17294,7 @@ public:
16598
17294
  * pure definition, this sets it to zero.
16599
17295
  */
16600
17296
  // @{
16601
- Stage operator+=(Expr);
17297
+ Stage operator+=(const Expr &);
16602
17298
  Stage operator+=(const Tuple &);
16603
17299
  Stage operator+=(const FuncRef &);
16604
17300
  // @}
@@ -16609,7 +17305,7 @@ public:
16609
17305
  * not already have a pure definition, this sets it to zero.
16610
17306
  */
16611
17307
  // @{
16612
- Stage operator-=(Expr);
17308
+ Stage operator-=(const Expr &);
16613
17309
  Stage operator-=(const Tuple &);
16614
17310
  Stage operator-=(const FuncRef &);
16615
17311
  // @}
@@ -16620,7 +17316,7 @@ public:
16620
17316
  * definition, this sets it to 1.
16621
17317
  */
16622
17318
  // @{
16623
- Stage operator*=(Expr);
17319
+ Stage operator*=(const Expr &);
16624
17320
  Stage operator*=(const Tuple &);
16625
17321
  Stage operator*=(const FuncRef &);
16626
17322
  // @}
@@ -16631,7 +17327,7 @@ public:
16631
17327
  * function does not already have a pure definition, this sets it to 1.
16632
17328
  */
16633
17329
  // @{
16634
- Stage operator/=(Expr);
17330
+ Stage operator/=(const Expr &);
16635
17331
  Stage operator/=(const Tuple &);
16636
17332
  Stage operator/=(const FuncRef &);
16637
17333
  // @}
@@ -16654,6 +17350,9 @@ public:
16654
17350
  /** How many outputs does the function this refers to produce. */
16655
17351
  size_t size() const;
16656
17352
 
17353
+ /** Is this FuncRef syntactically equivalent to another one? */
17354
+ bool equivalent_to(const FuncRef &other) const;
17355
+
16657
17356
  /** What function is this calling? */
16658
17357
  Internal::Function function() const {
16659
17358
  return func;
@@ -16820,7 +17519,7 @@ public:
16820
17519
  * not contain free variables). */
16821
17520
  explicit Func(const Expr &e);
16822
17521
 
16823
- /** Construct a new Func to wrap an existing, already-define
17522
+ /** Construct a new Func to wrap an existing, already-defined
16824
17523
  * Function object. */
16825
17524
  explicit Func(Internal::Function f);
16826
17525
 
@@ -17231,14 +17930,6 @@ public:
17231
17930
  device_api);
17232
17931
  }
17233
17932
 
17234
- void define_extern(const std::string &function_name,
17235
- const std::vector<ExternFuncArgument> &params,
17236
- const std::vector<Type> &types, int dimensionality,
17237
- NameMangling mangling) {
17238
- define_extern(function_name, params, types,
17239
- Internal::make_argument_list(dimensionality), mangling);
17240
- }
17241
-
17242
17933
  void define_extern(const std::string &function_name,
17243
17934
  const std::vector<ExternFuncArgument> &params,
17244
17935
  const std::vector<Type> &types, int dimensionality,
@@ -18331,6 +19022,11 @@ public:
18331
19022
  * to remove memoized entries using this eviction key from the
18332
19023
  * cache. Memoized computations that do not provide an eviction
18333
19024
  * key will never be evicted by this mechanism.
19025
+ *
19026
+ * It is invalid to memoize the output of a Pipeline; attempting
19027
+ * to do so will issue an error. To cache an entire pipeline,
19028
+ * either implement a caching mechanism outside of Halide or
19029
+ * explicitly copy out of the cache with another output Func.
18334
19030
  */
18335
19031
  Func &memoize(const EvictionKey &eviction_key = EvictionKey());
18336
19032
 
@@ -19195,401 +19891,6 @@ private:
19195
19891
  } // namespace Internal
19196
19892
  } // namespace Halide
19197
19893
 
19198
- #endif
19199
- #ifndef HALIDE_SCOPE_H
19200
- #define HALIDE_SCOPE_H
19201
-
19202
- #include <iostream>
19203
- #include <map>
19204
- #include <stack>
19205
- #include <string>
19206
- #include <utility>
19207
- #include <vector>
19208
-
19209
-
19210
- /** \file
19211
- * Defines the Scope class, which is used for keeping track of names in a scope while traversing IR
19212
- */
19213
-
19214
- namespace Halide {
19215
- namespace Internal {
19216
-
19217
- /** A stack which can store one item very efficiently. Using this
19218
- * instead of std::stack speeds up Scope substantially. */
19219
- template<typename T>
19220
- class SmallStack {
19221
- private:
19222
- T _top;
19223
- std::vector<T> _rest;
19224
- bool _empty = true;
19225
-
19226
- public:
19227
- SmallStack() = default;
19228
-
19229
- void pop() {
19230
- if (_rest.empty()) {
19231
- _empty = true;
19232
- _top = T();
19233
- } else {
19234
- _top = std::move(_rest.back());
19235
- _rest.pop_back();
19236
- }
19237
- }
19238
-
19239
- void push(T t) {
19240
- if (!_empty) {
19241
- _rest.push_back(std::move(_top));
19242
- }
19243
- _top = std::move(t);
19244
- _empty = false;
19245
- }
19246
-
19247
- T top() const {
19248
- return _top;
19249
- }
19250
-
19251
- T &top_ref() {
19252
- return _top;
19253
- }
19254
-
19255
- const T &top_ref() const {
19256
- return _top;
19257
- }
19258
-
19259
- bool empty() const {
19260
- return _empty;
19261
- }
19262
-
19263
- size_t size() const {
19264
- return _empty ? 0 : (_rest.size() + 1);
19265
- }
19266
- };
19267
-
19268
- template<>
19269
- class SmallStack<void> {
19270
- // A stack of voids. Voids are all the same, so just record how many voids are in the stack
19271
- int counter = 0;
19272
-
19273
- public:
19274
- void pop() {
19275
- counter--;
19276
- }
19277
- void push() {
19278
- counter++;
19279
- }
19280
- bool empty() const {
19281
- return counter == 0;
19282
- }
19283
- };
19284
-
19285
- /** A common pattern when traversing Halide IR is that you need to
19286
- * keep track of stuff when you find a Let or a LetStmt, and that it
19287
- * should hide previous values with the same name until you leave the
19288
- * Let or LetStmt nodes This class helps with that. */
19289
- template<typename T = void>
19290
- class Scope {
19291
- private:
19292
- std::map<std::string, SmallStack<T>> table;
19293
-
19294
- const Scope<T> *containing_scope = nullptr;
19295
-
19296
- public:
19297
- Scope() = default;
19298
- Scope(Scope &&that) noexcept = default;
19299
- Scope &operator=(Scope &&that) noexcept = default;
19300
-
19301
- // Copying a scope object copies a large table full of strings and
19302
- // stacks. Bad idea.
19303
- Scope(const Scope<T> &) = delete;
19304
- Scope<T> &operator=(const Scope<T> &) = delete;
19305
-
19306
- /** Set the parent scope. If lookups fail in this scope, they
19307
- * check the containing scope before returning an error. Caller is
19308
- * responsible for managing the memory of the containing scope. */
19309
- void set_containing_scope(const Scope<T> *s) {
19310
- containing_scope = s;
19311
- }
19312
-
19313
- /** A const ref to an empty scope. Useful for default function
19314
- * arguments, which would otherwise require a copy constructor
19315
- * (with llvm in c++98 mode) */
19316
- static const Scope<T> &empty_scope() {
19317
- static Scope<T> _empty_scope;
19318
- return _empty_scope;
19319
- }
19320
-
19321
- /** Retrieve the value referred to by a name */
19322
- template<typename T2 = T,
19323
- typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
19324
- T2 get(const std::string &name) const {
19325
- typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
19326
- if (iter == table.end() || iter->second.empty()) {
19327
- if (containing_scope) {
19328
- return containing_scope->get(name);
19329
- } else {
19330
- internal_error << "Name not in Scope: " << name << "\n"
19331
- << *this << "\n";
19332
- }
19333
- }
19334
- return iter->second.top();
19335
- }
19336
-
19337
- /** Return a reference to an entry. Does not consider the containing scope. */
19338
- template<typename T2 = T,
19339
- typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
19340
- T2 &ref(const std::string &name) {
19341
- typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
19342
- if (iter == table.end() || iter->second.empty()) {
19343
- internal_error << "Name not in Scope: " << name << "\n"
19344
- << *this << "\n";
19345
- }
19346
- return iter->second.top_ref();
19347
- }
19348
-
19349
- /** Returns a const pointer to an entry if it exists in this scope or any
19350
- * containing scope, or nullptr if it does not. Use this instead of if
19351
- * (scope.contains(foo)) { ... scope.get(foo) ... } to avoid doing two
19352
- * lookups. */
19353
- template<typename T2 = T,
19354
- typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
19355
- const T2 *find(const std::string &name) const {
19356
- typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
19357
- if (iter == table.end() || iter->second.empty()) {
19358
- if (containing_scope) {
19359
- return containing_scope->find(name);
19360
- } else {
19361
- return nullptr;
19362
- }
19363
- }
19364
- return &(iter->second.top_ref());
19365
- }
19366
-
19367
- /** A version of find that returns a non-const pointer, but ignores
19368
- * containing scope. */
19369
- template<typename T2 = T,
19370
- typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
19371
- T2 *shallow_find(const std::string &name) {
19372
- typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
19373
- if (iter == table.end() || iter->second.empty()) {
19374
- return nullptr;
19375
- } else {
19376
- return &(iter->second.top_ref());
19377
- }
19378
- }
19379
-
19380
- /** Tests if a name is in scope. If you plan to use the value if it is, call
19381
- * find instead. */
19382
- bool contains(const std::string &name) const {
19383
- typename std::map<std::string, SmallStack<T>>::const_iterator iter = table.find(name);
19384
- if (iter == table.end() || iter->second.empty()) {
19385
- if (containing_scope) {
19386
- return containing_scope->contains(name);
19387
- } else {
19388
- return false;
19389
- }
19390
- }
19391
- return true;
19392
- }
19393
-
19394
- /** How many nested definitions of a single name exist? */
19395
- size_t count(const std::string &name) const {
19396
- auto it = table.find(name);
19397
- if (it == table.end()) {
19398
- return 0;
19399
- } else {
19400
- return it->second.size();
19401
- }
19402
- }
19403
-
19404
- /** How many distinct names exist (does not count nested definitions of the same name) */
19405
- size_t size() const {
19406
- return table.size();
19407
- }
19408
-
19409
- struct PushToken {
19410
- typename std::map<std::string, SmallStack<T>>::iterator iter;
19411
- };
19412
-
19413
- /** Add a new (name, value) pair to the current scope. Hide old values that
19414
- * have this name until we pop this name. Returns a token that can be used
19415
- * to pop the same value without doing a fresh lookup.
19416
- */
19417
- template<typename T2 = T,
19418
- typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
19419
- PushToken push(const std::string &name, T2 &&value) {
19420
- auto it = table.try_emplace(name).first;
19421
- it->second.push(std::forward<T2>(value));
19422
- return PushToken{it};
19423
- }
19424
-
19425
- template<typename T2 = T,
19426
- typename = typename std::enable_if<std::is_same<T2, void>::value>::type>
19427
- PushToken push(const std::string &name) {
19428
- auto it = table.try_emplace(name).first;
19429
- it->second.push();
19430
- return PushToken{it};
19431
- }
19432
-
19433
- /** A name goes out of scope. Restore whatever its old value
19434
- * was (or remove it entirely if there was nothing else of the
19435
- * same name in an outer scope) */
19436
- void pop(const std::string &name) {
19437
- typename std::map<std::string, SmallStack<T>>::iterator iter = table.find(name);
19438
- internal_assert(iter != table.end()) << "Name not in Scope: " << name << "\n"
19439
- << *this << "\n";
19440
- iter->second.pop();
19441
- if (iter->second.empty()) {
19442
- table.erase(iter);
19443
- }
19444
- }
19445
-
19446
- /** Pop a name using a token returned by push instead of a string. */
19447
- void pop(PushToken p) {
19448
- p.iter->second.pop();
19449
- if (p.iter->second.empty()) {
19450
- table.erase(p.iter);
19451
- }
19452
- }
19453
-
19454
- /** Iterate through the scope. Does not capture any containing scope. */
19455
- class const_iterator {
19456
- typename std::map<std::string, SmallStack<T>>::const_iterator iter;
19457
-
19458
- public:
19459
- explicit const_iterator(const typename std::map<std::string, SmallStack<T>>::const_iterator &i)
19460
- : iter(i) {
19461
- }
19462
-
19463
- const_iterator() = default;
19464
-
19465
- bool operator!=(const const_iterator &other) {
19466
- return iter != other.iter;
19467
- }
19468
-
19469
- void operator++() {
19470
- ++iter;
19471
- }
19472
-
19473
- const std::string &name() {
19474
- return iter->first;
19475
- }
19476
-
19477
- const SmallStack<T> &stack() {
19478
- return iter->second;
19479
- }
19480
-
19481
- template<typename T2 = T,
19482
- typename = typename std::enable_if<!std::is_same<T2, void>::value>::type>
19483
- const T2 &value() {
19484
- return iter->second.top_ref();
19485
- }
19486
- };
19487
-
19488
- const_iterator cbegin() const {
19489
- return const_iterator(table.begin());
19490
- }
19491
-
19492
- const_iterator cend() const {
19493
- return const_iterator(table.end());
19494
- }
19495
-
19496
- void swap(Scope<T> &other) noexcept {
19497
- table.swap(other.table);
19498
- std::swap(containing_scope, other.containing_scope);
19499
- }
19500
- };
19501
-
19502
- template<typename T>
19503
- std::ostream &operator<<(std::ostream &stream, const Scope<T> &s) {
19504
- stream << "{\n";
19505
- typename Scope<T>::const_iterator iter;
19506
- for (iter = s.cbegin(); iter != s.cend(); ++iter) {
19507
- stream << " " << iter.name() << "\n";
19508
- }
19509
- stream << "}";
19510
- return stream;
19511
- }
19512
-
19513
- /** Helper class for pushing/popping Scope<> values, to allow
19514
- * for early-exit in Visitor/Mutators that preserves correctness.
19515
- * Note that this name can be a bit confusing, since there are two "scopes"
19516
- * involved here:
19517
- * - the Scope object itself
19518
- * - the lifetime of this helper object
19519
- * The "Scoped" in this class name refers to the latter, as it temporarily binds
19520
- * a name within the scope of this helper's lifetime. */
19521
- template<typename T = void>
19522
- struct ScopedBinding {
19523
- Scope<T> *scope = nullptr;
19524
- typename Scope<T>::PushToken token;
19525
-
19526
- ScopedBinding() = default;
19527
-
19528
- ScopedBinding(Scope<T> &s, const std::string &n, T value)
19529
- : scope(&s), token(scope->push(n, std::move(value))) {
19530
- }
19531
-
19532
- ScopedBinding(bool condition, Scope<T> &s, const std::string &n, const T &value)
19533
- : scope(condition ? &s : nullptr),
19534
- token(condition ? scope->push(n, value) : typename Scope<T>::PushToken{}) {
19535
- }
19536
-
19537
- bool bound() const {
19538
- return scope != nullptr;
19539
- }
19540
-
19541
- ~ScopedBinding() {
19542
- if (scope) {
19543
- scope->pop(token);
19544
- }
19545
- }
19546
-
19547
- // allow move but not copy
19548
- ScopedBinding(const ScopedBinding &that) = delete;
19549
- ScopedBinding(ScopedBinding &&that) noexcept
19550
- : scope(that.scope),
19551
- token(that.token) {
19552
- // The move constructor must null out scope, so we don't try to pop it
19553
- that.scope = nullptr;
19554
- }
19555
-
19556
- void operator=(const ScopedBinding &that) = delete;
19557
- void operator=(ScopedBinding &&that) = delete;
19558
- };
19559
-
19560
- template<>
19561
- struct ScopedBinding<void> {
19562
- Scope<> *scope;
19563
- Scope<>::PushToken token;
19564
- ScopedBinding(Scope<> &s, const std::string &n)
19565
- : scope(&s), token(scope->push(n)) {
19566
- }
19567
- ScopedBinding(bool condition, Scope<> &s, const std::string &n)
19568
- : scope(condition ? &s : nullptr),
19569
- token(condition ? scope->push(n) : Scope<>::PushToken{}) {
19570
- }
19571
- ~ScopedBinding() {
19572
- if (scope) {
19573
- scope->pop(token);
19574
- }
19575
- }
19576
-
19577
- // allow move but not copy
19578
- ScopedBinding(const ScopedBinding &that) = delete;
19579
- ScopedBinding(ScopedBinding &&that) noexcept
19580
- : scope(that.scope),
19581
- token(that.token) {
19582
- // The move constructor must null out scope, so we don't try to pop it
19583
- that.scope = nullptr;
19584
- }
19585
-
19586
- void operator=(const ScopedBinding &that) = delete;
19587
- void operator=(ScopedBinding &&that) = delete;
19588
- };
19589
-
19590
- } // namespace Internal
19591
- } // namespace Halide
19592
-
19593
19894
  #endif
19594
19895
 
19595
19896
  namespace Halide {
@@ -20029,7 +20330,7 @@ bool graph_equal(const IRNode &a, const IRNode &b) {
20029
20330
  } else if (a.node_type != b.node_type) {
20030
20331
  return false;
20031
20332
  } else {
20032
- return equal_impl(a, b);
20333
+ return graph_equal_impl(a, b);
20033
20334
  }
20034
20335
  }
20035
20336
 
@@ -20042,7 +20343,7 @@ bool graph_equal(const IRHandle &a, const IRHandle &b) {
20042
20343
  } else if (!b.defined()) {
20043
20344
  return false;
20044
20345
  } else {
20045
- return equal(*(a.get()), *(b.get()));
20346
+ return graph_equal(*(a.get()), *(b.get()));
20046
20347
  }
20047
20348
  }
20048
20349
 
@@ -20438,8 +20739,10 @@ protected:
20438
20739
  // @}
20439
20740
 
20440
20741
  private:
20441
- /** The nodes visited so far */
20442
- std::set<IRHandle> visited;
20742
+ /** The nodes visited so far. Only includes nodes with a ref count greater
20743
+ * than one, because we know that nodes with a ref count of 1 will only be
20744
+ * visited once if their parents are only visited once. */
20745
+ std::set<const IRNode *> visited;
20443
20746
 
20444
20747
  protected:
20445
20748
  /** These methods should call 'include' on the children to only
@@ -20822,360 +21125,6 @@ void propagate_estimate_test();
20822
21125
  } // namespace Internal
20823
21126
  } // namespace Halide
20824
21127
 
20825
- #endif
20826
- #ifndef HALIDE_BOUNDARY_CONDITIONS_H
20827
- #define HALIDE_BOUNDARY_CONDITIONS_H
20828
-
20829
- /** \file
20830
- * Support for imposing boundary conditions on Halide::Funcs.
20831
- */
20832
-
20833
- #include <vector>
20834
-
20835
- #ifndef HALIDE_LAMBDA_H
20836
- #define HALIDE_LAMBDA_H
20837
-
20838
-
20839
- /** \file
20840
- * Convenience functions for creating small anonymous Halide
20841
- * functions. See test/lambda.cpp for example usage. */
20842
-
20843
- namespace Halide {
20844
-
20845
- /** Create a zero-dimensional halide function that returns the given
20846
- * expression. The function may have more dimensions if the expression
20847
- * contains implicit arguments. */
20848
- Func lambda(const Expr &e);
20849
-
20850
- /** Create a 1-D halide function in the first argument that returns
20851
- * the second argument. The function may have more dimensions if the
20852
- * expression contains implicit arguments and the list of Var
20853
- * arguments contains a placeholder ("_"). */
20854
- Func lambda(const Var &x, const Expr &e);
20855
-
20856
- /** Create a 2-D halide function in the first two arguments that
20857
- * returns the last argument. The function may have more dimensions if
20858
- * the expression contains implicit arguments and the list of Var
20859
- * arguments contains a placeholder ("_"). */
20860
- Func lambda(const Var &x, const Var &y, const Expr &e);
20861
-
20862
- /** Create a 3-D halide function in the first three arguments that
20863
- * returns the last argument. The function may have more dimensions
20864
- * if the expression contains implicit arguments and the list of Var
20865
- * arguments contains a placeholder ("_"). */
20866
- Func lambda(const Var &x, const Var &y, const Var &z, const Expr &e);
20867
-
20868
- /** Create a 4-D halide function in the first four arguments that
20869
- * returns the last argument. The function may have more dimensions if
20870
- * the expression contains implicit arguments and the list of Var
20871
- * arguments contains a placeholder ("_"). */
20872
- Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Expr &e);
20873
-
20874
- /** Create a 5-D halide function in the first five arguments that
20875
- * returns the last argument. The function may have more dimensions if
20876
- * the expression contains implicit arguments and the list of Var
20877
- * arguments contains a placeholder ("_"). */
20878
- Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Var &v, const Expr &e);
20879
-
20880
- } // namespace Halide
20881
-
20882
- #endif // HALIDE_LAMBDA_H
20883
-
20884
- namespace Halide {
20885
-
20886
- /** namespace to hold functions for imposing boundary conditions on
20887
- * Halide Funcs.
20888
- *
20889
- * All functions in this namespace transform a source Func to a
20890
- * result Func where the result produces the values of the source
20891
- * within a given region and a different set of values outside the
20892
- * given region. A region is an N dimensional box specified by
20893
- * mins and extents.
20894
- *
20895
- * Three areas are defined:
20896
- * The image is the entire set of values in the region.
20897
- * The edge is the set of pixels in the image but adjacent
20898
- * to coordinates that are not
20899
- * The interior is the image minus the edge (and is undefined
20900
- * if the extent of any region is 1 or less).
20901
- *
20902
- * If the source Func has more dimensions than are specified, the extra ones
20903
- * are unmodified. Additionally, passing an undefined (default constructed)
20904
- * 'Expr' for the min and extent of a dimension will keep that dimension
20905
- * unmodified.
20906
- *
20907
- * Numerous options for specifing the outside area are provided,
20908
- * including replacement with an expression, repeating the edge
20909
- * samples, mirroring over the edge, and repeating or mirroring the
20910
- * entire image.
20911
- *
20912
- * Using these functions to express your boundary conditions is highly
20913
- * recommended for correctness and performance. Some of these are hard
20914
- * to get right. The versions here are both understood by bounds
20915
- * inference, and also judiciously use the 'likely' intrinsic to minimize
20916
- * runtime overhead.
20917
- *
20918
- */
20919
- namespace BoundaryConditions {
20920
-
20921
- namespace Internal {
20922
-
20923
- inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
20924
- const Expr &a1, const Expr &a2) {
20925
- collected_args.emplace_back(a1, a2);
20926
- }
20927
-
20928
- template<typename... Args>
20929
- inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
20930
- const Expr &a1, const Expr &a2, Args &&...args) {
20931
- collected_args.emplace_back(a1, a2);
20932
- collect_region(collected_args, std::forward<Args>(args)...);
20933
- }
20934
-
20935
- inline const Func &func_like_to_func(const Func &func) {
20936
- return func;
20937
- }
20938
-
20939
- template<typename T>
20940
- inline HALIDE_NO_USER_CODE_INLINE Func func_like_to_func(const T &func_like) {
20941
- return lambda(_, func_like(_));
20942
- }
20943
-
20944
- } // namespace Internal
20945
-
20946
- /** Impose a boundary condition such that a given expression is returned
20947
- * everywhere outside the boundary. Generally the expression will be a
20948
- * constant, though the code currently allows accessing the arguments
20949
- * of source.
20950
- *
20951
- * An ImageParam, Buffer<T>, or similar can be passed instead of a
20952
- * Func. If this is done and no bounds are given, the boundaries will
20953
- * be taken from the min and extent methods of the passed
20954
- * object. Note that objects are taken by mutable ref. Pipelines
20955
- * capture Buffers via mutable refs, because running a pipeline might
20956
- * alter the Buffer metadata (e.g. device allocation state).
20957
- *
20958
- * (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_BORDER
20959
- * and putting value in the border of the texture.)
20960
- *
20961
- * You may pass undefined Exprs for dimensions that you do not wish
20962
- * to bound.
20963
- */
20964
- // @{
20965
- Func constant_exterior(const Func &source, const Tuple &value,
20966
- const Region &bounds);
20967
- Func constant_exterior(const Func &source, const Expr &value,
20968
- const Region &bounds);
20969
-
20970
- template<typename T>
20971
- HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value, const Region &bounds) {
20972
- return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
20973
- }
20974
-
20975
- template<typename T>
20976
- HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value, const Region &bounds) {
20977
- return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
20978
- }
20979
-
20980
- template<typename T>
20981
- HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value) {
20982
- Region object_bounds;
20983
- for (int i = 0; i < func_like.dimensions(); i++) {
20984
- object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
20985
- }
20986
-
20987
- return constant_exterior(Internal::func_like_to_func(func_like), value, object_bounds);
20988
- }
20989
- template<typename T>
20990
- HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value) {
20991
- return constant_exterior(func_like, Tuple(value));
20992
- }
20993
-
20994
- template<typename T, typename... Bounds,
20995
- typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
20996
- HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value,
20997
- Bounds &&...bounds) {
20998
- Region collected_bounds;
20999
- Internal::collect_region(collected_bounds, std::forward<Bounds>(bounds)...);
21000
- return constant_exterior(Internal::func_like_to_func(func_like), value, collected_bounds);
21001
- }
21002
- template<typename T, typename... Bounds,
21003
- typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
21004
- HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value,
21005
- Bounds &&...bounds) {
21006
- return constant_exterior(func_like, Tuple(value), std::forward<Bounds>(bounds)...);
21007
- }
21008
- // @}
21009
-
21010
- /** Impose a boundary condition such that the nearest edge sample is returned
21011
- * everywhere outside the given region.
21012
- *
21013
- * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21014
- * is done and no bounds are given, the boundaries will be taken from the
21015
- * min and extent methods of the passed object.
21016
- *
21017
- * (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_EDGE.)
21018
- *
21019
- * You may pass undefined Exprs for dimensions that you do not wish
21020
- * to bound.
21021
- */
21022
- // @{
21023
- Func repeat_edge(const Func &source, const Region &bounds);
21024
-
21025
- template<typename T>
21026
- HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like, const Region &bounds) {
21027
- return repeat_edge(Internal::func_like_to_func(func_like), bounds);
21028
- }
21029
-
21030
- template<typename T>
21031
- HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like) {
21032
- Region object_bounds;
21033
- for (int i = 0; i < func_like.dimensions(); i++) {
21034
- object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21035
- }
21036
-
21037
- return repeat_edge(Internal::func_like_to_func(func_like), object_bounds);
21038
- }
21039
- // @}
21040
-
21041
- /** Impose a boundary condition such that the entire coordinate space is
21042
- * tiled with copies of the image abutted against each other.
21043
- *
21044
- * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21045
- * is done and no bounds are given, the boundaries will be taken from the
21046
- * min and extent methods of the passed object.
21047
- *
21048
- * (This is similar to setting GL_TEXTURE_WRAP_* to GL_REPEAT.)
21049
- *
21050
- * You may pass undefined Exprs for dimensions that you do not wish
21051
- * to bound.
21052
- */
21053
- // @{
21054
- Func repeat_image(const Func &source, const Region &bounds);
21055
-
21056
- template<typename T>
21057
- HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like, const Region &bounds) {
21058
- return repeat_image(Internal::func_like_to_func(func_like), bounds);
21059
- }
21060
-
21061
- template<typename T>
21062
- HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like) {
21063
- Region object_bounds;
21064
- for (int i = 0; i < func_like.dimensions(); i++) {
21065
- object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21066
- }
21067
-
21068
- return repeat_image(Internal::func_like_to_func(func_like), object_bounds);
21069
- }
21070
-
21071
- /** Impose a boundary condition such that the entire coordinate space is
21072
- * tiled with copies of the image abutted against each other, but mirror
21073
- * them such that adjacent edges are the same.
21074
- *
21075
- * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21076
- * is done and no bounds are given, the boundaries will be taken from the
21077
- * min and extent methods of the passed object.
21078
- *
21079
- * (This is similar to setting GL_TEXTURE_WRAP_* to GL_MIRRORED_REPEAT.)
21080
- *
21081
- * You may pass undefined Exprs for dimensions that you do not wish
21082
- * to bound.
21083
- */
21084
- // @{
21085
- Func mirror_image(const Func &source, const Region &bounds);
21086
-
21087
- template<typename T>
21088
- HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like, const Region &bounds) {
21089
- return mirror_image(Internal::func_like_to_func(func_like), bounds);
21090
- }
21091
-
21092
- template<typename T>
21093
- HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like) {
21094
- Region object_bounds;
21095
- for (int i = 0; i < func_like.dimensions(); i++) {
21096
- object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21097
- }
21098
-
21099
- return mirror_image(Internal::func_like_to_func(func_like), object_bounds);
21100
- }
21101
-
21102
- // @}
21103
-
21104
- /** Impose a boundary condition such that the entire coordinate space is
21105
- * tiled with copies of the image abutted against each other, but mirror
21106
- * them such that adjacent edges are the same and then overlap the edges.
21107
- *
21108
- * This produces an error if any extent is 1 or less. (TODO: check this.)
21109
- *
21110
- * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21111
- * is done and no bounds are given, the boundaries will be taken from the
21112
- * min and extent methods of the passed object.
21113
- *
21114
- * (I do not believe there is a direct GL_TEXTURE_WRAP_* equivalent for this.)
21115
- *
21116
- * You may pass undefined Exprs for dimensions that you do not wish
21117
- * to bound.
21118
- */
21119
- // @{
21120
- Func mirror_interior(const Func &source, const Region &bounds);
21121
-
21122
- template<typename T>
21123
- HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like, const Region &bounds) {
21124
- return mirror_interior(Internal::func_like_to_func(func_like), bounds);
21125
- }
21126
-
21127
- template<typename T>
21128
- HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like) {
21129
- Region object_bounds;
21130
- for (int i = 0; i < func_like.dimensions(); i++) {
21131
- object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21132
- }
21133
-
21134
- return mirror_interior(Internal::func_like_to_func(func_like), object_bounds);
21135
- }
21136
-
21137
- // @}
21138
-
21139
- } // namespace BoundaryConditions
21140
-
21141
- } // namespace Halide
21142
-
21143
- #endif
21144
- #ifndef HALIDE_BOUNDS_INFERENCE_H
21145
- #define HALIDE_BOUNDS_INFERENCE_H
21146
-
21147
- /** \file
21148
- * Defines the bounds_inference lowering pass.
21149
- */
21150
-
21151
- #include <map>
21152
- #include <string>
21153
- #include <vector>
21154
-
21155
-
21156
- namespace Halide {
21157
-
21158
- struct Target;
21159
-
21160
- namespace Internal {
21161
-
21162
- class Function;
21163
-
21164
- /** Take a partially lowered statement that includes symbolic
21165
- * representations of the bounds over which things should be realized,
21166
- * and inject expressions defining those bounds.
21167
- */
21168
- Stmt bounds_inference(Stmt,
21169
- const std::vector<Function> &outputs,
21170
- const std::vector<std::string> &realization_order,
21171
- const std::vector<std::vector<std::string>> &fused_groups,
21172
- const std::map<std::string, Function> &environment,
21173
- const std::map<std::pair<std::string, int>, Interval> &func_bounds,
21174
- const Target &target);
21175
-
21176
- } // namespace Internal
21177
- } // namespace Halide
21178
-
21179
21128
  #endif
21180
21129
  #ifndef HALIDE_BOUND_CONSTANT_EXTENT_LOOPS_H
21181
21130
  #define HALIDE_BOUND_CONSTANT_EXTENT_LOOPS_H
@@ -21223,6 +21172,431 @@ Stmt bound_small_allocations(const Stmt &s);
21223
21172
  } // namespace Internal
21224
21173
  } // namespace Halide
21225
21174
 
21175
+ #endif
21176
+ #ifndef HALIDE_BOUNDARY_CONDITIONS_H
21177
+ #define HALIDE_BOUNDARY_CONDITIONS_H
21178
+
21179
+ /** \file
21180
+ * Support for imposing boundary conditions on Halide::Funcs.
21181
+ */
21182
+
21183
+ #include <vector>
21184
+
21185
+ #ifndef HALIDE_LAMBDA_H
21186
+ #define HALIDE_LAMBDA_H
21187
+
21188
+
21189
+ /** \file
21190
+ * Convenience functions for creating small anonymous Halide
21191
+ * functions. See test/lambda.cpp for example usage. */
21192
+
21193
+ namespace Halide {
21194
+
21195
+ /** Create a zero-dimensional halide function that returns the given
21196
+ * expression. The function may have more dimensions if the expression
21197
+ * contains implicit arguments. */
21198
+ Func lambda(const Expr &e);
21199
+
21200
+ /** Create a 1-D halide function in the first argument that returns
21201
+ * the second argument. The function may have more dimensions if the
21202
+ * expression contains implicit arguments and the list of Var
21203
+ * arguments contains a placeholder ("_"). */
21204
+ Func lambda(const Var &x, const Expr &e);
21205
+
21206
+ /** Create a 2-D halide function in the first two arguments that
21207
+ * returns the last argument. The function may have more dimensions if
21208
+ * the expression contains implicit arguments and the list of Var
21209
+ * arguments contains a placeholder ("_"). */
21210
+ Func lambda(const Var &x, const Var &y, const Expr &e);
21211
+
21212
+ /** Create a 3-D halide function in the first three arguments that
21213
+ * returns the last argument. The function may have more dimensions
21214
+ * if the expression contains implicit arguments and the list of Var
21215
+ * arguments contains a placeholder ("_"). */
21216
+ Func lambda(const Var &x, const Var &y, const Var &z, const Expr &e);
21217
+
21218
+ /** Create a 4-D halide function in the first four arguments that
21219
+ * returns the last argument. The function may have more dimensions if
21220
+ * the expression contains implicit arguments and the list of Var
21221
+ * arguments contains a placeholder ("_"). */
21222
+ Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Expr &e);
21223
+
21224
+ /** Create a 5-D halide function in the first five arguments that
21225
+ * returns the last argument. The function may have more dimensions if
21226
+ * the expression contains implicit arguments and the list of Var
21227
+ * arguments contains a placeholder ("_"). */
21228
+ Func lambda(const Var &x, const Var &y, const Var &z, const Var &w, const Var &v, const Expr &e);
21229
+
21230
+ } // namespace Halide
21231
+
21232
+ #endif // HALIDE_LAMBDA_H
21233
+
21234
+ namespace Halide {
21235
+
21236
+ /** namespace to hold functions for imposing boundary conditions on
21237
+ * Halide Funcs.
21238
+ *
21239
+ * All functions in this namespace transform a source Func to a
21240
+ * result Func where the result produces the values of the source
21241
+ * within a given region and a different set of values outside the
21242
+ * given region. A region is an N dimensional box specified by
21243
+ * mins and extents.
21244
+ *
21245
+ * Three areas are defined:
21246
+ * The image is the entire set of values in the region.
21247
+ * The edge is the set of pixels in the image but adjacent
21248
+ * to coordinates that are not
21249
+ * The interior is the image minus the edge (and is undefined
21250
+ * if the extent of any region is 1 or less).
21251
+ *
21252
+ * If the source Func has more dimensions than are specified, the extra ones
21253
+ * are unmodified. Additionally, passing an undefined (default constructed)
21254
+ * 'Expr' for the min and extent of a dimension will keep that dimension
21255
+ * unmodified.
21256
+ *
21257
+ * Numerous options for specifing the outside area are provided,
21258
+ * including replacement with an expression, repeating the edge
21259
+ * samples, mirroring over the edge, and repeating or mirroring the
21260
+ * entire image.
21261
+ *
21262
+ * Using these functions to express your boundary conditions is highly
21263
+ * recommended for correctness and performance. Some of these are hard
21264
+ * to get right. The versions here are both understood by bounds
21265
+ * inference, and also judiciously use the 'likely' intrinsic to minimize
21266
+ * runtime overhead.
21267
+ *
21268
+ */
21269
+ namespace BoundaryConditions {
21270
+
21271
+ namespace Internal {
21272
+
21273
+ inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
21274
+ const Expr &a1, const Expr &a2) {
21275
+ collected_args.emplace_back(a1, a2);
21276
+ }
21277
+
21278
+ template<typename... Args>
21279
+ inline HALIDE_NO_USER_CODE_INLINE void collect_region(Region &collected_args,
21280
+ const Expr &a1, const Expr &a2, Args &&...args) {
21281
+ collected_args.emplace_back(a1, a2);
21282
+ collect_region(collected_args, std::forward<Args>(args)...);
21283
+ }
21284
+
21285
+ inline const Func &func_like_to_func(const Func &func) {
21286
+ return func;
21287
+ }
21288
+
21289
+ template<typename T>
21290
+ inline HALIDE_NO_USER_CODE_INLINE Func func_like_to_func(const T &func_like) {
21291
+ return lambda(_, func_like(_));
21292
+ }
21293
+
21294
+ } // namespace Internal
21295
+
21296
+ /** Impose a boundary condition such that a given expression is returned
21297
+ * everywhere outside the boundary. Generally the expression will be a
21298
+ * constant, though the code currently allows accessing the arguments
21299
+ * of source.
21300
+ *
21301
+ * An ImageParam, Buffer<T>, or similar can be passed instead of a
21302
+ * Func. If this is done and no bounds are given, the boundaries will
21303
+ * be taken from the min and extent methods of the passed
21304
+ * object. Note that objects are taken by mutable ref. Pipelines
21305
+ * capture Buffers via mutable refs, because running a pipeline might
21306
+ * alter the Buffer metadata (e.g. device allocation state).
21307
+ *
21308
+ * (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_BORDER
21309
+ * and putting value in the border of the texture.)
21310
+ *
21311
+ * You may pass undefined Exprs for dimensions that you do not wish
21312
+ * to bound.
21313
+ */
21314
+ // @{
21315
+ Func constant_exterior(const Func &source, const Tuple &value,
21316
+ const Region &bounds);
21317
+ Func constant_exterior(const Func &source, const Expr &value,
21318
+ const Region &bounds);
21319
+
21320
+ template<typename T>
21321
+ HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value, const Region &bounds) {
21322
+ return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
21323
+ }
21324
+
21325
+ template<typename T>
21326
+ HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value, const Region &bounds) {
21327
+ return constant_exterior(Internal::func_like_to_func(func_like), value, bounds);
21328
+ }
21329
+
21330
+ template<typename T>
21331
+ HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value) {
21332
+ Region object_bounds;
21333
+ for (int i = 0; i < func_like.dimensions(); i++) {
21334
+ object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21335
+ }
21336
+
21337
+ return constant_exterior(Internal::func_like_to_func(func_like), value, object_bounds);
21338
+ }
21339
+ template<typename T>
21340
+ HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value) {
21341
+ return constant_exterior(func_like, Tuple(value));
21342
+ }
21343
+
21344
+ template<typename T, typename... Bounds,
21345
+ typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
21346
+ HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Tuple &value,
21347
+ Bounds &&...bounds) {
21348
+ Region collected_bounds;
21349
+ Internal::collect_region(collected_bounds, std::forward<Bounds>(bounds)...);
21350
+ return constant_exterior(Internal::func_like_to_func(func_like), value, collected_bounds);
21351
+ }
21352
+ template<typename T, typename... Bounds,
21353
+ typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Bounds...>::value>::type * = nullptr>
21354
+ HALIDE_NO_USER_CODE_INLINE Func constant_exterior(const T &func_like, const Expr &value,
21355
+ Bounds &&...bounds) {
21356
+ return constant_exterior(func_like, Tuple(value), std::forward<Bounds>(bounds)...);
21357
+ }
21358
+ // @}
21359
+
21360
+ /** Impose a boundary condition such that the nearest edge sample is returned
21361
+ * everywhere outside the given region.
21362
+ *
21363
+ * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21364
+ * is done and no bounds are given, the boundaries will be taken from the
21365
+ * min and extent methods of the passed object.
21366
+ *
21367
+ * (This is similar to setting GL_TEXTURE_WRAP_* to GL_CLAMP_TO_EDGE.)
21368
+ *
21369
+ * You may pass undefined Exprs for dimensions that you do not wish
21370
+ * to bound.
21371
+ */
21372
+ // @{
21373
+ Func repeat_edge(const Func &source, const Region &bounds);
21374
+
21375
+ template<typename T>
21376
+ HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like, const Region &bounds) {
21377
+ return repeat_edge(Internal::func_like_to_func(func_like), bounds);
21378
+ }
21379
+
21380
+ template<typename T>
21381
+ HALIDE_NO_USER_CODE_INLINE Func repeat_edge(const T &func_like) {
21382
+ Region object_bounds;
21383
+ for (int i = 0; i < func_like.dimensions(); i++) {
21384
+ object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21385
+ }
21386
+
21387
+ return repeat_edge(Internal::func_like_to_func(func_like), object_bounds);
21388
+ }
21389
+ // @}
21390
+
21391
+ /** Impose a boundary condition such that the entire coordinate space is
21392
+ * tiled with copies of the image abutted against each other.
21393
+ *
21394
+ * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21395
+ * is done and no bounds are given, the boundaries will be taken from the
21396
+ * min and extent methods of the passed object.
21397
+ *
21398
+ * (This is similar to setting GL_TEXTURE_WRAP_* to GL_REPEAT.)
21399
+ *
21400
+ * You may pass undefined Exprs for dimensions that you do not wish
21401
+ * to bound.
21402
+ */
21403
+ // @{
21404
+ Func repeat_image(const Func &source, const Region &bounds);
21405
+
21406
+ template<typename T>
21407
+ HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like, const Region &bounds) {
21408
+ return repeat_image(Internal::func_like_to_func(func_like), bounds);
21409
+ }
21410
+
21411
+ template<typename T>
21412
+ HALIDE_NO_USER_CODE_INLINE Func repeat_image(const T &func_like) {
21413
+ Region object_bounds;
21414
+ for (int i = 0; i < func_like.dimensions(); i++) {
21415
+ object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21416
+ }
21417
+
21418
+ return repeat_image(Internal::func_like_to_func(func_like), object_bounds);
21419
+ }
21420
+
21421
+ /** Impose a boundary condition such that the entire coordinate space is
21422
+ * tiled with copies of the image abutted against each other, but mirror
21423
+ * them such that adjacent edges are the same.
21424
+ *
21425
+ * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21426
+ * is done and no bounds are given, the boundaries will be taken from the
21427
+ * min and extent methods of the passed object.
21428
+ *
21429
+ * (This is similar to setting GL_TEXTURE_WRAP_* to GL_MIRRORED_REPEAT.)
21430
+ *
21431
+ * You may pass undefined Exprs for dimensions that you do not wish
21432
+ * to bound.
21433
+ */
21434
+ // @{
21435
+ Func mirror_image(const Func &source, const Region &bounds);
21436
+
21437
+ template<typename T>
21438
+ HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like, const Region &bounds) {
21439
+ return mirror_image(Internal::func_like_to_func(func_like), bounds);
21440
+ }
21441
+
21442
+ template<typename T>
21443
+ HALIDE_NO_USER_CODE_INLINE Func mirror_image(const T &func_like) {
21444
+ Region object_bounds;
21445
+ for (int i = 0; i < func_like.dimensions(); i++) {
21446
+ object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21447
+ }
21448
+
21449
+ return mirror_image(Internal::func_like_to_func(func_like), object_bounds);
21450
+ }
21451
+
21452
+ // @}
21453
+
21454
+ /** Impose a boundary condition such that the entire coordinate space is
21455
+ * tiled with copies of the image abutted against each other, but mirror
21456
+ * them such that adjacent edges are the same and then overlap the edges.
21457
+ *
21458
+ * This produces an error if any extent is 1 or less. (TODO: check this.)
21459
+ *
21460
+ * An ImageParam, Buffer<T>, or similar can be passed instead of a Func. If this
21461
+ * is done and no bounds are given, the boundaries will be taken from the
21462
+ * min and extent methods of the passed object.
21463
+ *
21464
+ * (I do not believe there is a direct GL_TEXTURE_WRAP_* equivalent for this.)
21465
+ *
21466
+ * You may pass undefined Exprs for dimensions that you do not wish
21467
+ * to bound.
21468
+ */
21469
+ // @{
21470
+ Func mirror_interior(const Func &source, const Region &bounds);
21471
+
21472
+ template<typename T>
21473
+ HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like, const Region &bounds) {
21474
+ return mirror_interior(Internal::func_like_to_func(func_like), bounds);
21475
+ }
21476
+
21477
+ template<typename T>
21478
+ HALIDE_NO_USER_CODE_INLINE Func mirror_interior(const T &func_like) {
21479
+ Region object_bounds;
21480
+ for (int i = 0; i < func_like.dimensions(); i++) {
21481
+ object_bounds.emplace_back(Expr(func_like.dim(i).min()), Expr(func_like.dim(i).extent()));
21482
+ }
21483
+
21484
+ return mirror_interior(Internal::func_like_to_func(func_like), object_bounds);
21485
+ }
21486
+
21487
+ // @}
21488
+
21489
+ } // namespace BoundaryConditions
21490
+
21491
+ } // namespace Halide
21492
+
21493
+ #endif
21494
+ #ifndef HALIDE_BOUNDS_INFERENCE_H
21495
+ #define HALIDE_BOUNDS_INFERENCE_H
21496
+
21497
+ /** \file
21498
+ * Defines the bounds_inference lowering pass.
21499
+ */
21500
+
21501
+ #include <map>
21502
+ #include <string>
21503
+ #include <vector>
21504
+
21505
+
21506
+ namespace Halide {
21507
+
21508
+ struct Target;
21509
+
21510
+ namespace Internal {
21511
+
21512
+ class Function;
21513
+
21514
+ /** Take a partially lowered statement that includes symbolic
21515
+ * representations of the bounds over which things should be realized,
21516
+ * and inject expressions defining those bounds.
21517
+ */
21518
+ Stmt bounds_inference(Stmt,
21519
+ const std::vector<Function> &outputs,
21520
+ const std::vector<std::string> &realization_order,
21521
+ const std::vector<std::vector<std::string>> &fused_groups,
21522
+ const std::map<std::string, Function> &environment,
21523
+ const std::map<std::pair<std::string, int>, Interval> &func_bounds,
21524
+ const Target &target);
21525
+
21526
+ } // namespace Internal
21527
+ } // namespace Halide
21528
+
21529
+ #endif
21530
+ #ifndef HALIDE_CPLUSPLUS_MANGLE_H
21531
+ #define HALIDE_CPLUSPLUS_MANGLE_H
21532
+
21533
+ /** \file
21534
+ *
21535
+ * A simple function to get a C++ mangled function name for a function.
21536
+ */
21537
+ #include <string>
21538
+ #include <vector>
21539
+
21540
+
21541
+ namespace Halide {
21542
+
21543
+ struct ExternFuncArgument;
21544
+ struct Target;
21545
+
21546
+ namespace Internal {
21547
+
21548
+ /** Return the mangled C++ name for a function.
21549
+ * The target parameter is used to decide on the C++
21550
+ * ABI/mangling style to use.
21551
+ */
21552
+ std::string cplusplus_function_mangled_name(const std::string &name,
21553
+ const std::vector<std::string> &namespaces,
21554
+ Type return_type,
21555
+ const std::vector<ExternFuncArgument> &args,
21556
+ const Target &target);
21557
+
21558
+ void cplusplus_mangle_test();
21559
+
21560
+ } // namespace Internal
21561
+
21562
+ } // namespace Halide
21563
+
21564
+ #endif
21565
+ #ifndef HALIDE_INTERNAL_CSE_H
21566
+ #define HALIDE_INTERNAL_CSE_H
21567
+
21568
+ /** \file
21569
+ * Defines a pass for introducing let expressions to wrap common sub-expressions. */
21570
+
21571
+
21572
+ namespace Halide {
21573
+ namespace Internal {
21574
+
21575
+ /** Replace each common sub-expression in the argument with a
21576
+ * variable, and wrap the resulting expr in a let statement giving a
21577
+ * value to that variable.
21578
+ *
21579
+ * This is important to do within Halide (instead of punting to llvm),
21580
+ * because exprs that come in from the front-end are small when
21581
+ * considered as a graph, but combinatorially large when considered as
21582
+ * a tree. For an example of a such a case, see
21583
+ * test/code_explosion.cpp
21584
+ *
21585
+ * The last parameter determines whether all common subexpressions are
21586
+ * lifted, or only those that the simplifier would not subsitute back
21587
+ * in (e.g. addition of a constant).
21588
+ */
21589
+ Expr common_subexpression_elimination(const Expr &, bool lift_all = false);
21590
+
21591
+ /** Do common-subexpression-elimination on each expression in a
21592
+ * statement. Does not introduce let statements. */
21593
+ Stmt common_subexpression_elimination(const Stmt &, bool lift_all = false);
21594
+
21595
+ void cse_test();
21596
+
21597
+ } // namespace Internal
21598
+ } // namespace Halide
21599
+
21226
21600
  #endif
21227
21601
  #ifndef HALIDE_CANONICALIZE_GPU_VARS_H
21228
21602
  #define HALIDE_CANONICALIZE_GPU_VARS_H
@@ -21498,6 +21872,24 @@ struct Indentation {
21498
21872
  };
21499
21873
  std::ostream &operator<<(std::ostream &stream, const Indentation &);
21500
21874
 
21875
+ template<typename T>
21876
+ struct Ansi {
21877
+ const T &cnt;
21878
+ const char *open, *close;
21879
+ };
21880
+
21881
+ template<typename T>
21882
+ std::ostream &operator<<(std::ostream &out, const Ansi<T> &a) {
21883
+ if (a.open) {
21884
+ out << a.open;
21885
+ }
21886
+ out << a.cnt;
21887
+ if (a.close) {
21888
+ out << a.close;
21889
+ }
21890
+ return out;
21891
+ }
21892
+
21501
21893
  /** An IRVisitor that emits IR to the given output stream in a human
21502
21894
  * readable form. Can be subclassed if you want to modify the way in
21503
21895
  * which it prints.
@@ -21547,12 +21939,51 @@ protected:
21547
21939
  * ellipses (...). */
21548
21940
  bool is_summary = false;
21549
21941
 
21942
+ bool ansi = false;
21943
+ int paren_depth = 0;
21944
+
21945
+ const char *ansi_hl = "";
21946
+ const char *ansi_dim = "";
21947
+ const char *ansi_kw = "";
21948
+ const char *ansi_imm_int = "";
21949
+ const char *ansi_imm_float = "";
21950
+ const char *ansi_imm_str = "";
21951
+ const char *ansi_var = "";
21952
+ const char *ansi_buf = "";
21953
+ const char *ansi_fn = "";
21954
+ const char *ansi_type = "";
21955
+ const char *ansi_reset_col = "";
21956
+ const char *ansi_reset = "";
21957
+
21958
+ // clang-format off
21959
+ template<typename T> Ansi<T> hl(const T &t);
21960
+ template<typename T> Ansi<T> kw(const T &t);
21961
+ template<typename T> Ansi<T> imm_int(const T &t);
21962
+ template<typename T> Ansi<T> imm_float(const T &t);
21963
+ template<typename T> Ansi<T> imm_str(const T &t);
21964
+ template<typename T> Ansi<T> var(const T &t);
21965
+ template<typename T> Ansi<T> buf(const T &t);
21966
+ template<typename T> Ansi<T> fn(const T &t);
21967
+ template<typename T> Ansi<T> type(const T &t);
21968
+ template<typename T> Ansi<T> typep(const T &t);
21969
+ template<typename T> Ansi<T> paren(const T &t, bool bold = true, int d = -1);
21970
+ // clang-format on
21971
+
21550
21972
  /** Either emits "(" or "", depending on the value of implicit_parens */
21551
21973
  void open();
21552
21974
 
21553
21975
  /** Either emits ")" or "", depending on the value of implicit_parens */
21554
21976
  void close();
21555
21977
 
21978
+ /** Emits "(" always */
21979
+ void openf();
21980
+
21981
+ /** Emits "name(" always */
21982
+ void openf(const char *name);
21983
+
21984
+ /** Emits ")" always */
21985
+ void closef();
21986
+
21556
21987
  /** The symbols whose types can be inferred from values printed
21557
21988
  * already. */
21558
21989
  Scope<> known_type;
@@ -21625,6 +22056,8 @@ std::string lldb_string(const Stmt &);
21625
22056
 
21626
22057
  #endif
21627
22058
 
22059
+ #include <unordered_map>
22060
+
21628
22061
  namespace Halide {
21629
22062
 
21630
22063
  struct Argument;
@@ -21749,6 +22182,8 @@ protected:
21749
22182
  * use different syntax for other C-like languages. */
21750
22183
  virtual void add_vector_typedefs(const std::set<Type> &vector_types);
21751
22184
 
22185
+ std::unordered_map<std::string, std::string> extern_function_name_map;
22186
+
21752
22187
  /** Bottleneck to allow customization of calls to generic Extern/PureExtern calls. */
21753
22188
  virtual std::string print_extern_call(const Call *op);
21754
22189
 
@@ -22153,7 +22588,10 @@ protected:
22153
22588
  void visit(const Shuffle *op) override;
22154
22589
  void visit(const Call *op) override;
22155
22590
 
22591
+ std::string print_extern_call(const Call *op) override;
22592
+
22156
22593
  VectorDeclarationStyle vector_declaration_style = VectorDeclarationStyle::CLikeSyntax;
22594
+ bool abs_returns_unsigned_type{false};
22157
22595
  };
22158
22596
 
22159
22597
  } // namespace Internal
@@ -22292,6 +22730,7 @@ template<typename, typename>
22292
22730
  class IRBuilder;
22293
22731
  class LLVMContext;
22294
22732
  class Type;
22733
+ class PointerType;
22295
22734
  class StructType;
22296
22735
  class Instruction;
22297
22736
  class CallInst;
@@ -22437,10 +22876,31 @@ protected:
22437
22876
  std::unique_ptr<llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>> builder;
22438
22877
  llvm::Value *value = nullptr;
22439
22878
  llvm::MDNode *very_likely_branch = nullptr;
22440
- llvm::MDNode *default_fp_math_md = nullptr;
22879
+ llvm::MDNode *fast_fp_math_md = nullptr;
22441
22880
  llvm::MDNode *strict_fp_math_md = nullptr;
22442
22881
  std::vector<LoweredArgument> current_function_args;
22443
22882
 
22883
+ bool in_strict_float = false;
22884
+ bool any_strict_float = false;
22885
+
22886
+ /** Change floating-point math op emission to use fast flags. */
22887
+ void set_fast_fp_math();
22888
+
22889
+ /** Change floating-point math op emission to use strict flags. */
22890
+ void set_strict_fp_math();
22891
+
22892
+ /** If any_strict_float is true, sets fast math flags for the lifetime of
22893
+ * this object, then sets them to strict on destruction. If any_strict_float
22894
+ * is false, does nothing. Any call to an IRBuilder method that starts with
22895
+ * "CreateF" should probably be wrapped in one of these, but it's safe to
22896
+ * miss one - we just miss out on some optimizations. In this way codegen is
22897
+ * designed to fail safe. */
22898
+ struct ScopedFastMath {
22899
+ CodeGen_LLVM *codegen;
22900
+ ScopedFastMath(CodeGen_LLVM *);
22901
+ ~ScopedFastMath();
22902
+ };
22903
+
22444
22904
  /** The target we're generating code for */
22445
22905
  Halide::Target target;
22446
22906
 
@@ -22478,6 +22938,7 @@ protected:
22478
22938
  /** Some useful llvm types */
22479
22939
  // @{
22480
22940
  llvm::Type *void_t = nullptr, *i1_t = nullptr, *i8_t = nullptr, *i16_t = nullptr, *i32_t = nullptr, *i64_t = nullptr, *f16_t = nullptr, *f32_t = nullptr, *f64_t = nullptr;
22941
+ llvm::PointerType *ptr_t = nullptr;
22481
22942
  llvm::StructType *halide_buffer_t_type = nullptr,
22482
22943
  *type_t_type,
22483
22944
  *dimension_t_type,
@@ -22985,7 +23446,7 @@ private:
22985
23446
 
22986
23447
  void codegen_atomic_rmw(const Store *op);
22987
23448
 
22988
- void init_codegen(const std::string &name, bool any_strict_float = false);
23449
+ void init_codegen(const std::string &name);
22989
23450
  std::unique_ptr<llvm::Module> finish_codegen();
22990
23451
 
22991
23452
  /** A helper routine for generating folded vector reductions. */
@@ -23054,6 +23515,29 @@ std::unique_ptr<CodeGen_GPU_Dev> new_CodeGen_OpenCL_Dev(const Target &target);
23054
23515
  } // namespace Internal
23055
23516
  } // namespace Halide
23056
23517
 
23518
+ #endif
23519
+ #ifndef HALIDE_CODEGEN_PTX_DEV_H
23520
+ #define HALIDE_CODEGEN_PTX_DEV_H
23521
+
23522
+ /** \file
23523
+ * Defines the code-generator for producing CUDA host code
23524
+ */
23525
+
23526
+ #include <memory>
23527
+
23528
+ namespace Halide {
23529
+
23530
+ struct Target;
23531
+
23532
+ namespace Internal {
23533
+
23534
+ struct CodeGen_GPU_Dev;
23535
+
23536
+ std::unique_ptr<CodeGen_GPU_Dev> new_CodeGen_PTX_Dev(const Target &target);
23537
+
23538
+ } // namespace Internal
23539
+ } // namespace Halide
23540
+
23057
23541
  #endif
23058
23542
  #ifndef HALIDE_CODEGEN_POSIX_H
23059
23543
  #define HALIDE_CODEGEN_POSIX_H
@@ -23163,29 +23647,6 @@ private:
23163
23647
  } // namespace Internal
23164
23648
  } // namespace Halide
23165
23649
 
23166
- #endif
23167
- #ifndef HALIDE_CODEGEN_PTX_DEV_H
23168
- #define HALIDE_CODEGEN_PTX_DEV_H
23169
-
23170
- /** \file
23171
- * Defines the code-generator for producing CUDA host code
23172
- */
23173
-
23174
- #include <memory>
23175
-
23176
- namespace Halide {
23177
-
23178
- struct Target;
23179
-
23180
- namespace Internal {
23181
-
23182
- struct CodeGen_GPU_Dev;
23183
-
23184
- std::unique_ptr<CodeGen_GPU_Dev> new_CodeGen_PTX_Dev(const Target &target);
23185
-
23186
- } // namespace Internal
23187
- } // namespace Halide
23188
-
23189
23650
  #endif
23190
23651
  #ifndef HALIDE_CODEGEN_PYTORCH_H
23191
23652
  #define HALIDE_CODEGEN_PYTORCH_H
@@ -23549,221 +24010,10 @@ inline Expr u64_sat(Expr e) {
23549
24010
  }; // namespace ConciseCasts
23550
24011
  }; // namespace Halide
23551
24012
 
23552
- #endif
23553
- #ifndef HALIDE_CPLUSPLUS_MANGLE_H
23554
- #define HALIDE_CPLUSPLUS_MANGLE_H
23555
-
23556
- /** \file
23557
- *
23558
- * A simple function to get a C++ mangled function name for a function.
23559
- */
23560
- #include <string>
23561
- #include <vector>
23562
-
23563
-
23564
- namespace Halide {
23565
-
23566
- struct ExternFuncArgument;
23567
- struct Target;
23568
-
23569
- namespace Internal {
23570
-
23571
- /** Return the mangled C++ name for a function.
23572
- * The target parameter is used to decide on the C++
23573
- * ABI/mangling style to use.
23574
- */
23575
- std::string cplusplus_function_mangled_name(const std::string &name,
23576
- const std::vector<std::string> &namespaces,
23577
- Type return_type,
23578
- const std::vector<ExternFuncArgument> &args,
23579
- const Target &target);
23580
-
23581
- void cplusplus_mangle_test();
23582
-
23583
- } // namespace Internal
23584
-
23585
- } // namespace Halide
23586
-
23587
24013
  #endif
23588
24014
  #ifndef HALIDE_CONSTANT_BOUNDS_H
23589
24015
  #define HALIDE_CONSTANT_BOUNDS_H
23590
24016
 
23591
- #ifndef HALIDE_CONSTANT_INTERVAL_H
23592
- #define HALIDE_CONSTANT_INTERVAL_H
23593
-
23594
- #include <stdint.h>
23595
-
23596
- /** \file
23597
- * Defines the ConstantInterval class, and operators on it.
23598
- */
23599
-
23600
- namespace Halide {
23601
-
23602
- struct Type;
23603
-
23604
- namespace Internal {
23605
-
23606
- /** A class to represent ranges of integers. Can be unbounded above or below,
23607
- * but they cannot be empty. */
23608
- struct ConstantInterval {
23609
- /** The lower and upper bound of the interval. They are included
23610
- * in the interval. */
23611
- int64_t min = 0, max = 0;
23612
- bool min_defined = false, max_defined = false;
23613
-
23614
- /* A default-constructed Interval is everything */
23615
- ConstantInterval() = default;
23616
-
23617
- /** Construct an interval from a lower and upper bound. */
23618
- ConstantInterval(int64_t min, int64_t max);
23619
-
23620
- /** The interval representing everything. */
23621
- static ConstantInterval everything();
23622
-
23623
- /** Construct an interval representing a single point. */
23624
- static ConstantInterval single_point(int64_t x);
23625
-
23626
- /** Construct intervals bounded above or below. */
23627
- static ConstantInterval bounded_below(int64_t min);
23628
- static ConstantInterval bounded_above(int64_t max);
23629
-
23630
- /** Is the interval the entire range */
23631
- bool is_everything() const;
23632
-
23633
- /** Is the interval just a single value (min == max) */
23634
- bool is_single_point() const;
23635
-
23636
- /** Is the interval a particular single value */
23637
- bool is_single_point(int64_t x) const;
23638
-
23639
- /** Does the interval have a finite upper and lower bound */
23640
- bool is_bounded() const;
23641
-
23642
- /** Expand the interval to include another Interval */
23643
- void include(const ConstantInterval &i);
23644
-
23645
- /** Expand the interval to include a point */
23646
- void include(int64_t x);
23647
-
23648
- /** Test if the interval contains a particular value */
23649
- bool contains(int32_t x) const;
23650
-
23651
- /** Test if the interval contains a particular value */
23652
- bool contains(int64_t x) const;
23653
-
23654
- /** Test if the interval contains a particular unsigned value */
23655
- bool contains(uint64_t x) const;
23656
-
23657
- /** Construct the smallest interval containing two intervals. */
23658
- static ConstantInterval make_union(const ConstantInterval &a, const ConstantInterval &b);
23659
-
23660
- /** Construct the largest interval contained within two intervals. Throws an
23661
- * error if the interval is empty. */
23662
- static ConstantInterval make_intersection(const ConstantInterval &a, const ConstantInterval &b);
23663
-
23664
- /** Equivalent to same_as. Exists so that the autoscheduler can
23665
- * compare two map<string, Interval> for equality in order to
23666
- * cache computations. */
23667
- bool operator==(const ConstantInterval &other) const;
23668
-
23669
- /** In-place versions of the arithmetic operators below. */
23670
- // @{
23671
- void operator+=(const ConstantInterval &other);
23672
- void operator+=(int64_t);
23673
- void operator-=(const ConstantInterval &other);
23674
- void operator-=(int64_t);
23675
- void operator*=(const ConstantInterval &other);
23676
- void operator*=(int64_t);
23677
- void operator/=(const ConstantInterval &other);
23678
- void operator/=(int64_t);
23679
- void operator%=(const ConstantInterval &other);
23680
- void operator%=(int64_t);
23681
- // @}
23682
-
23683
- /** Negate an interval. */
23684
- ConstantInterval operator-() const;
23685
-
23686
- /** Track what happens if a constant integer interval is forced to fit into
23687
- * a concrete integer type. */
23688
- void cast_to(const Type &t);
23689
-
23690
- /** Get constant integer bounds on a type. */
23691
- static ConstantInterval bounds_of_type(Type);
23692
- };
23693
-
23694
- /** Arithmetic operators on ConstantIntervals. The resulting interval contains
23695
- * all possible values of the operator applied to any two elements of the
23696
- * argument intervals. Note that these operator on unbounded integers. If you
23697
- * are applying this to concrete small integer types, you will need to manually
23698
- * cast the constant interval back to the desired type to model the effect of
23699
- * overflow. */
23700
- // @{
23701
- ConstantInterval operator+(const ConstantInterval &a, const ConstantInterval &b);
23702
- ConstantInterval operator+(const ConstantInterval &a, int64_t b);
23703
- ConstantInterval operator-(const ConstantInterval &a, const ConstantInterval &b);
23704
- ConstantInterval operator-(const ConstantInterval &a, int64_t b);
23705
- ConstantInterval operator/(const ConstantInterval &a, const ConstantInterval &b);
23706
- ConstantInterval operator/(const ConstantInterval &a, int64_t b);
23707
- ConstantInterval operator*(const ConstantInterval &a, const ConstantInterval &b);
23708
- ConstantInterval operator*(const ConstantInterval &a, int64_t b);
23709
- ConstantInterval operator%(const ConstantInterval &a, const ConstantInterval &b);
23710
- ConstantInterval operator%(const ConstantInterval &a, int64_t b);
23711
- ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b);
23712
- ConstantInterval min(const ConstantInterval &a, int64_t b);
23713
- ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b);
23714
- ConstantInterval max(const ConstantInterval &a, int64_t b);
23715
- ConstantInterval abs(const ConstantInterval &a);
23716
- ConstantInterval operator<<(const ConstantInterval &a, const ConstantInterval &b);
23717
- ConstantInterval operator<<(const ConstantInterval &a, int64_t b);
23718
- ConstantInterval operator<<(int64_t a, const ConstantInterval &b);
23719
- ConstantInterval operator>>(const ConstantInterval &a, const ConstantInterval &b);
23720
- ConstantInterval operator>>(const ConstantInterval &a, int64_t b);
23721
- ConstantInterval operator>>(int64_t a, const ConstantInterval &b);
23722
- // @}
23723
-
23724
- /** Comparison operators on ConstantIntervals. Returns whether the comparison is
23725
- * true for all values of the two intervals. */
23726
- // @{
23727
- bool operator<=(const ConstantInterval &a, const ConstantInterval &b);
23728
- bool operator<=(const ConstantInterval &a, int64_t b);
23729
- bool operator<=(int64_t a, const ConstantInterval &b);
23730
- bool operator<(const ConstantInterval &a, const ConstantInterval &b);
23731
- bool operator<(const ConstantInterval &a, int64_t b);
23732
- bool operator<(int64_t a, const ConstantInterval &b);
23733
-
23734
- inline bool operator>=(const ConstantInterval &a, const ConstantInterval &b) {
23735
- return b <= a;
23736
- }
23737
- inline bool operator>(const ConstantInterval &a, const ConstantInterval &b) {
23738
- return b < a;
23739
- }
23740
- inline bool operator>=(const ConstantInterval &a, int64_t b) {
23741
- return b <= a;
23742
- }
23743
- inline bool operator>(const ConstantInterval &a, int64_t b) {
23744
- return b < a;
23745
- }
23746
- inline bool operator>=(int64_t a, const ConstantInterval &b) {
23747
- return b <= a;
23748
- }
23749
- inline bool operator>(int64_t a, const ConstantInterval &b) {
23750
- return b < a;
23751
- }
23752
-
23753
- // @}
23754
- } // namespace Internal
23755
-
23756
- /** Cast operators for ConstantIntervals. These ones have to live out in
23757
- * Halide::, to avoid C++ name lookup confusion with the Halide::cast variants
23758
- * that take Exprs. */
23759
- // @{
23760
- Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a);
23761
- Internal::ConstantInterval saturating_cast(Type t, const Internal::ConstantInterval &a);
23762
- // @}
23763
-
23764
- } // namespace Halide
23765
-
23766
- #endif
23767
24017
 
23768
24018
  /** \file
23769
24019
  * Methods for computing compile-time constant int64_t upper and lower bounds of
@@ -23792,42 +24042,6 @@ ConstantInterval constant_integer_bounds(const Expr &e,
23792
24042
  } // namespace Internal
23793
24043
  } // namespace Halide
23794
24044
 
23795
- #endif
23796
- #ifndef HALIDE_INTERNAL_CSE_H
23797
- #define HALIDE_INTERNAL_CSE_H
23798
-
23799
- /** \file
23800
- * Defines a pass for introducing let expressions to wrap common sub-expressions. */
23801
-
23802
-
23803
- namespace Halide {
23804
- namespace Internal {
23805
-
23806
- /** Replace each common sub-expression in the argument with a
23807
- * variable, and wrap the resulting expr in a let statement giving a
23808
- * value to that variable.
23809
- *
23810
- * This is important to do within Halide (instead of punting to llvm),
23811
- * because exprs that come in from the front-end are small when
23812
- * considered as a graph, but combinatorially large when considered as
23813
- * a tree. For an example of a such a case, see
23814
- * test/code_explosion.cpp
23815
- *
23816
- * The last parameter determines whether all common subexpressions are
23817
- * lifted, or only those that the simplifier would not subsitute back
23818
- * in (e.g. addition of a constant).
23819
- */
23820
- Expr common_subexpression_elimination(const Expr &, bool lift_all = false);
23821
-
23822
- /** Do common-subexpression-elimination on each expression in a
23823
- * statement. Does not introduce let statements. */
23824
- Stmt common_subexpression_elimination(const Stmt &, bool lift_all = false);
23825
-
23826
- void cse_test();
23827
-
23828
- } // namespace Internal
23829
- } // namespace Halide
23830
-
23831
24045
  #endif
23832
24046
  #ifndef HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
23833
24047
  #define HALIDE_INTERNAL_DEBUG_ARGUMENTS_H
@@ -24124,27 +24338,27 @@ Pipeline deserialize_pipeline(const std::string &filename, const std::map<std::s
24124
24338
  /// @return Returns a newly constructed deserialized Pipeline object/
24125
24339
  Pipeline deserialize_pipeline(std::istream &in, const std::map<std::string, Parameter> &user_params);
24126
24340
 
24127
- /// @brief Deserialize a Halide pipeline from a byte buffer containing a serizalized pipeline in binary format
24341
+ /// @brief Deserialize a Halide pipeline from a byte buffer containing a serialized pipeline in binary format
24128
24342
  /// @param data The data buffer containing a serialized Halide pipeline
24129
24343
  /// @param user_params Map of named input/output parameters to bind with the resulting pipeline (used to avoid deserializing specific objects and enable the use of externally defined ones instead).
24130
24344
  /// @return Returns a newly constructed deserialized Pipeline object/
24131
24345
  Pipeline deserialize_pipeline(const std::vector<uint8_t> &data, const std::map<std::string, Parameter> &user_params);
24132
24346
 
24133
- /// @brief Deserialize the extenal parameters for the Halide pipeline from a file.
24347
+ /// @brief Deserialize the external parameters for the Halide pipeline from a file.
24134
24348
  /// This method allows a minimal deserialization of just the external pipeline parameters, so they can be
24135
24349
  /// remapped and overridden with user parameters prior to deserializing the pipeline definition.
24136
24350
  /// @param filename The location of the file to deserialize. Must use .hlpipe extension.
24137
24351
  /// @return Returns a map containing the names and description of external parameters referenced in the pipeline
24138
24352
  std::map<std::string, Parameter> deserialize_parameters(const std::string &filename);
24139
24353
 
24140
- /// @brief Deserialize the extenal parameters for the Halide pipeline from input stream.
24354
+ /// @brief Deserialize the external parameters for the Halide pipeline from input stream.
24141
24355
  /// This method allows a minimal deserialization of just the external pipeline parameters, so they can be
24142
24356
  /// remapped and overridden with user parameters prior to deserializing the pipeline definition.
24143
24357
  /// @param in The input stream to read from containing a serialized Halide pipeline
24144
24358
  /// @return Returns a map containing the names and description of external parameters referenced in the pipeline
24145
24359
  std::map<std::string, Parameter> deserialize_parameters(std::istream &in);
24146
24360
 
24147
- /// @brief Deserialize the extenal parameters for the Halide pipeline from a byte buffer containing a serialized
24361
+ /// @brief Deserialize the external parameters for the Halide pipeline from a byte buffer containing a serialized
24148
24362
  /// pipeline in binary format. This method allows a minimal deserialization of just the external pipeline
24149
24363
  /// parameters, so they can be remapped and overridden with user parameters prior to deserializing the
24150
24364
  /// pipeline definition.
@@ -24301,6 +24515,7 @@ Stmt inject_early_frees(const Stmt &s);
24301
24515
  #define HALIDE_ELF_H
24302
24516
 
24303
24517
  #include <algorithm>
24518
+ #include <cstdint>
24304
24519
  #include <iterator>
24305
24520
  #include <list>
24306
24521
  #include <memory>
@@ -24524,17 +24739,17 @@ public:
24524
24739
  SHT_REL = 9,
24525
24740
  SHT_SHLIB = 10,
24526
24741
  SHT_DYNSYM = 11,
24527
- SHT_LOPROC = 0x70000000,
24528
- SHT_HIPROC = 0x7fffffff,
24529
- SHT_LOUSER = 0x80000000,
24530
- SHT_HIUSER = 0xffffffff,
24742
+ SHT_LOPROC = 0x70000000u,
24743
+ SHT_HIPROC = 0x7fffffffu,
24744
+ SHT_LOUSER = 0x80000000u,
24745
+ SHT_HIUSER = 0xffffffffu,
24531
24746
  };
24532
24747
 
24533
24748
  enum Flag : uint32_t {
24534
24749
  SHF_WRITE = 0x1,
24535
24750
  SHF_ALLOC = 0x2,
24536
24751
  SHF_EXECINSTR = 0x4,
24537
- SHF_MASKPROC = 0xf0000000,
24752
+ SHF_MASKPROC = 0xf0000000u,
24538
24753
  };
24539
24754
 
24540
24755
  typedef std::vector<Relocation> RelocationList;
@@ -24762,8 +24977,8 @@ public:
24762
24977
  ET_EXEC = 2,
24763
24978
  ET_DYN = 3,
24764
24979
  ET_CORE = 4,
24765
- ET_LOPROC = 0xff00,
24766
- ET_HIPROC = 0xffff,
24980
+ ET_LOPROC = 0xff00u,
24981
+ ET_HIPROC = 0xffffu,
24767
24982
  };
24768
24983
 
24769
24984
  // We use lists for sections and symbols to avoid iterator
@@ -28128,6 +28343,11 @@ public:
28128
28343
  #undef HALIDE_OUTPUT_FORWARD
28129
28344
  #undef HALIDE_OUTPUT_FORWARD_CONST
28130
28345
 
28346
+ using GIOBase::set_type;
28347
+
28348
+ /** Set types dynamically for tuple outputs. */
28349
+ void set_type(const std::vector<Type> &types);
28350
+
28131
28351
  protected:
28132
28352
  GeneratorOutputBase(size_t array_size,
28133
28353
  const std::string &name,
@@ -28999,11 +29219,21 @@ public:
28999
29219
  // long as all Outputs have been defined.)
29000
29220
  Pipeline get_pipeline();
29001
29221
 
29222
+ protected:
29223
+ void claim_name(const std::string &name, const char *param_type) {
29224
+ user_assert(param_info_ptr->names.count(name) == 0)
29225
+ << "Cannot add " << param_type << " with name " << name
29226
+ << ". It is already taken by another input or output parameter.";
29227
+ param_info_ptr->names.insert(name);
29228
+ }
29229
+
29230
+ public:
29002
29231
  // Create Input<Func> with dynamic type & dimensions
29003
29232
  template<typename T,
29004
29233
  typename std::enable_if<std::is_same<T, Halide::Func>::value>::type * = nullptr>
29005
29234
  GeneratorInput<T> *add_input(const std::string &name, const Type &t, int dimensions) {
29006
29235
  check_exact_phase(GeneratorBase::ConfigureCalled);
29236
+ claim_name(name, "input");
29007
29237
  auto *p = new GeneratorInput<T>(name, t, dimensions);
29008
29238
  p->generator = this;
29009
29239
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29018,6 +29248,7 @@ public:
29018
29248
  static_assert(!T::has_static_halide_type, "You can only call this version of add_input() for a Buffer<T, D> where T is void or omitted .");
29019
29249
  static_assert(!T::has_static_dimensions, "You can only call this version of add_input() for a Buffer<T, D> where D is -1 or omitted.");
29020
29250
  check_exact_phase(GeneratorBase::ConfigureCalled);
29251
+ claim_name(name, "input");
29021
29252
  auto *p = new GeneratorInput<T>(name, t, dimensions);
29022
29253
  p->generator = this;
29023
29254
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29032,6 +29263,7 @@ public:
29032
29263
  static_assert(T::has_static_halide_type, "You can only call this version of add_input() for a Buffer<T, D> where T is not void.");
29033
29264
  static_assert(!T::has_static_dimensions, "You can only call this version of add_input() for a Buffer<T, D> where D is -1 or omitted.");
29034
29265
  check_exact_phase(GeneratorBase::ConfigureCalled);
29266
+ claim_name(name, "input");
29035
29267
  auto *p = new GeneratorInput<T>(name, dimensions);
29036
29268
  p->generator = this;
29037
29269
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29046,6 +29278,7 @@ public:
29046
29278
  static_assert(T::has_static_halide_type, "You can only call this version of add_input() for a Buffer<T, D> where T is not void.");
29047
29279
  static_assert(T::has_static_dimensions, "You can only call this version of add_input() for a Buffer<T, D> where D is not -1.");
29048
29280
  check_exact_phase(GeneratorBase::ConfigureCalled);
29281
+ claim_name(name, "input");
29049
29282
  auto *p = new GeneratorInput<T>(name);
29050
29283
  p->generator = this;
29051
29284
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29057,6 +29290,7 @@ public:
29057
29290
  typename std::enable_if<std::is_arithmetic<T>::value>::type * = nullptr>
29058
29291
  GeneratorInput<T> *add_input(const std::string &name) {
29059
29292
  check_exact_phase(GeneratorBase::ConfigureCalled);
29293
+ claim_name(name, "input");
29060
29294
  auto *p = new GeneratorInput<T>(name);
29061
29295
  p->generator = this;
29062
29296
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29068,6 +29302,7 @@ public:
29068
29302
  typename std::enable_if<std::is_same<T, Expr>::value>::type * = nullptr>
29069
29303
  GeneratorInput<T> *add_input(const std::string &name, const Type &type) {
29070
29304
  check_exact_phase(GeneratorBase::ConfigureCalled);
29305
+ claim_name(name, "input");
29071
29306
  auto *p = new GeneratorInput<Expr>(name);
29072
29307
  p->generator = this;
29073
29308
  p->set_type(type);
@@ -29079,8 +29314,9 @@ public:
29079
29314
  // Create Output<Func> with dynamic type & dimensions
29080
29315
  template<typename T,
29081
29316
  typename std::enable_if<std::is_same<T, Halide::Func>::value>::type * = nullptr>
29082
- GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
29317
+ GeneratorOutput<T> *add_output(const std::string &name, const std::vector<Type> &t, int dimensions) {
29083
29318
  check_exact_phase(GeneratorBase::ConfigureCalled);
29319
+ claim_name(name, "output");
29084
29320
  auto *p = new GeneratorOutput<T>(name, t, dimensions);
29085
29321
  p->generator = this;
29086
29322
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29088,13 +29324,20 @@ public:
29088
29324
  return p;
29089
29325
  }
29090
29326
 
29327
+ template<typename T,
29328
+ typename std::enable_if<std::is_same<T, Halide::Func>::value>::type * = nullptr>
29329
+ GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
29330
+ return add_output<T>(name, std::vector<Type>{t}, dimensions);
29331
+ }
29332
+
29091
29333
  // Create Output<Buffer> with dynamic type & dimensions
29092
29334
  template<typename T,
29093
29335
  typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
29094
- GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
29336
+ GeneratorOutput<T> *add_output(const std::string &name, const std::vector<Type> &t, int dimensions) {
29095
29337
  static_assert(!T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is void or omitted .");
29096
29338
  static_assert(!T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<T, D> where D is -1 or omitted.");
29097
29339
  check_exact_phase(GeneratorBase::ConfigureCalled);
29340
+ claim_name(name, "output");
29098
29341
  auto *p = new GeneratorOutput<T>(name, t, dimensions);
29099
29342
  p->generator = this;
29100
29343
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29102,13 +29345,20 @@ public:
29102
29345
  return p;
29103
29346
  }
29104
29347
 
29105
- // Create Output<Buffer> with compile-time type
29348
+ template<typename T,
29349
+ typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
29350
+ GeneratorOutput<T> *add_output(const std::string &name, const Type &t, int dimensions) {
29351
+ return add_output<T>(name, std::vector<Type>{t}, dimensions);
29352
+ }
29353
+
29354
+ // Create Output<Buffer> with either a compile-time type or a
29355
+ // to-be-set-later type and dynamic dimensions
29106
29356
  template<typename T,
29107
29357
  typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
29108
29358
  GeneratorOutput<T> *add_output(const std::string &name, int dimensions) {
29109
- static_assert(T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is not void.");
29110
29359
  static_assert(!T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<T, D> where D is -1 or omitted.");
29111
29360
  check_exact_phase(GeneratorBase::ConfigureCalled);
29361
+ claim_name(name, "output");
29112
29362
  auto *p = new GeneratorOutput<T>(name, dimensions);
29113
29363
  p->generator = this;
29114
29364
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29116,13 +29366,35 @@ public:
29116
29366
  return p;
29117
29367
  }
29118
29368
 
29119
- // Create Output<Buffer> with compile-time type & dimensions
29369
+ // Create Output<Buffer> with compile-time dimensions and dynamic type
29370
+ template<typename T,
29371
+ typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
29372
+ GeneratorOutput<T> *add_output(const std::string &name, const std::vector<Type> &t) {
29373
+ static_assert(!T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is void or omitted.");
29374
+ static_assert(T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<void, D> where D is not -1.");
29375
+ check_exact_phase(GeneratorBase::ConfigureCalled);
29376
+ claim_name(name, "output");
29377
+ auto *p = new GeneratorOutput<T>(name, t);
29378
+ p->generator = this;
29379
+ param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
29380
+ param_info_ptr->filter_outputs.push_back(p);
29381
+ return p;
29382
+ }
29383
+
29384
+ template<typename T,
29385
+ typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
29386
+ GeneratorOutput<T> *add_output(const std::string &name, const Type &t) {
29387
+ return add_output<T>(name, std::vector<Type>{t});
29388
+ }
29389
+
29390
+ // Create Output<Buffer> with compile-time type and dimensions
29120
29391
  template<typename T,
29121
29392
  typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<T, Halide::Func>::value>::type * = nullptr>
29122
29393
  GeneratorOutput<T> *add_output(const std::string &name) {
29123
29394
  static_assert(T::has_static_halide_type, "You can only call this version of add_output() for a Buffer<T, D> where T is not void.");
29124
29395
  static_assert(T::has_static_dimensions, "You can only call this version of add_output() for a Buffer<T, D> where D is not -1.");
29125
29396
  check_exact_phase(GeneratorBase::ConfigureCalled);
29397
+ claim_name(name, "output");
29126
29398
  auto *p = new GeneratorOutput<T>(name);
29127
29399
  p->generator = this;
29128
29400
  param_info_ptr->owned_extras.push_back(std::unique_ptr<Internal::GIOBase>(p));
@@ -29954,240 +30226,6 @@ std::string type_suffix(const std::vector<Expr> &ops, bool signed_variants = tru
29954
30226
  } // namespace Internal
29955
30227
  } // namespace Halide
29956
30228
 
29957
- #endif
29958
- #ifndef HALIDE_INFER_ARGUMENTS_H
29959
- #define HALIDE_INFER_ARGUMENTS_H
29960
-
29961
- #include <vector>
29962
-
29963
-
29964
- /** \file
29965
- *
29966
- * Interface for a visitor to infer arguments used in a body Stmt.
29967
- */
29968
-
29969
- namespace Halide {
29970
- namespace Internal {
29971
-
29972
- /** An inferred argument. Inferred args are either Params,
29973
- * ImageParams, or Buffers. The first two are handled by the param
29974
- * field, and global images are tracked via the buf field. These
29975
- * are used directly when jitting, or used for validation when
29976
- * compiling with an explicit argument list. */
29977
- struct InferredArgument {
29978
- Argument arg;
29979
- Parameter param;
29980
- Buffer<> buffer;
29981
-
29982
- bool operator<(const InferredArgument &other) const {
29983
- if (arg.is_buffer() && !other.arg.is_buffer()) {
29984
- return true;
29985
- } else if (other.arg.is_buffer() && !arg.is_buffer()) {
29986
- return false;
29987
- } else {
29988
- return arg.name < other.arg.name;
29989
- }
29990
- }
29991
- };
29992
-
29993
- class Function;
29994
-
29995
- std::vector<InferredArgument> infer_arguments(const Stmt &body, const std::vector<Function> &outputs);
29996
-
29997
- } // namespace Internal
29998
- } // namespace Halide
29999
-
30000
- #endif
30001
- #ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H
30002
- #define HALIDE_HOST_GPU_BUFFER_COPIES_H
30003
-
30004
- /** \file
30005
- * Defines the lowering passes that deal with host and device buffer flow.
30006
- */
30007
-
30008
- #include <string>
30009
- #include <vector>
30010
-
30011
-
30012
- namespace Halide {
30013
-
30014
- struct Target;
30015
-
30016
- namespace Internal {
30017
-
30018
- /** A helper function to call an extern function, and assert that it
30019
- * returns 0. */
30020
- Stmt call_extern_and_assert(const std::string &name, const std::vector<Expr> &args);
30021
-
30022
- /** Inject calls to halide_device_malloc, halide_copy_to_device, and
30023
- * halide_copy_to_host as needed. */
30024
- Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t);
30025
-
30026
- } // namespace Internal
30027
- } // namespace Halide
30028
-
30029
- #endif
30030
- #ifndef HALIDE_INLINE_H
30031
- #define HALIDE_INLINE_H
30032
-
30033
- /** \file
30034
- * Methods for replacing calls to functions with their definitions.
30035
- */
30036
-
30037
-
30038
- namespace Halide {
30039
- namespace Internal {
30040
-
30041
- class Function;
30042
-
30043
- /** Inline a single named function, which must be pure. For a pure function to
30044
- * be inlined, it must not have any specializations (i.e. it can only have one
30045
- * values definition). */
30046
- // @{
30047
- Stmt inline_function(Stmt s, const Function &f);
30048
- Expr inline_function(Expr e, const Function &f);
30049
- void inline_function(Function caller, const Function &f);
30050
- // @}
30051
-
30052
- /** Check if the schedule of an inlined function is legal, throwing an error
30053
- * if it is not. */
30054
- void validate_schedule_inlined_function(Function f);
30055
-
30056
- } // namespace Internal
30057
- } // namespace Halide
30058
-
30059
- #endif
30060
- #ifndef HALIDE_INLINE_REDUCTIONS_H
30061
- #define HALIDE_INLINE_REDUCTIONS_H
30062
-
30063
- #include <string>
30064
-
30065
-
30066
- /** \file
30067
- * Defines some inline reductions: sum, product, minimum, maximum.
30068
- */
30069
- namespace Halide {
30070
-
30071
- class Func;
30072
-
30073
- /** An inline reduction. This is suitable for convolution-type
30074
- * operations - the reduction will be computed in the innermost loop
30075
- * that it is used in. The argument may contain free or implicit
30076
- * variables, and must refer to some reduction domain. The free
30077
- * variables are still free in the return value, but the reduction
30078
- * domain is captured - the result expression does not refer to a
30079
- * reduction domain and can be used in a pure function definition.
30080
- *
30081
- * An example using \ref sum :
30082
- *
30083
- \code
30084
- Func f, g;
30085
- Var x;
30086
- RDom r(0, 10);
30087
- f(x) = x*x;
30088
- g(x) = sum(f(x + r));
30089
- \endcode
30090
- *
30091
- * Here g computes some blur of x, but g is still a pure function. The
30092
- * sum is being computed by an anonymous reduction function that is
30093
- * scheduled innermost within g.
30094
- */
30095
- //@{
30096
- Expr sum(Expr, const std::string &s = "sum");
30097
- Expr saturating_sum(Expr, const std::string &s = "saturating_sum");
30098
- Expr product(Expr, const std::string &s = "product");
30099
- Expr maximum(Expr, const std::string &s = "maximum");
30100
- Expr minimum(Expr, const std::string &s = "minimum");
30101
- //@}
30102
-
30103
- /** Variants of the inline reduction in which the RDom is stated
30104
- * explicitly. The expression can refer to multiple RDoms, and only
30105
- * the inner one is captured by the reduction. This allows you to
30106
- * write expressions like:
30107
- \code
30108
- RDom r1(0, 10), r2(0, 10), r3(0, 10);
30109
- Expr e = minimum(r1, product(r2, sum(r3, r1 + r2 + r3)));
30110
- \endcode
30111
- */
30112
- // @{
30113
- Expr sum(const RDom &, Expr, const std::string &s = "sum");
30114
- Expr saturating_sum(const RDom &r, Expr e, const std::string &s = "saturating_sum");
30115
- Expr product(const RDom &, Expr, const std::string &s = "product");
30116
- Expr maximum(const RDom &, Expr, const std::string &s = "maximum");
30117
- Expr minimum(const RDom &, Expr, const std::string &s = "minimum");
30118
- // @}
30119
-
30120
- /** Returns an Expr or Tuple representing the coordinates of the point
30121
- * in the RDom which minimizes or maximizes the expression. The
30122
- * expression must refer to some RDom. Also returns the extreme value
30123
- * of the expression as the last element of the tuple. */
30124
- // @{
30125
- Tuple argmax(Expr, const std::string &s = "argmax");
30126
- Tuple argmin(Expr, const std::string &s = "argmin");
30127
- Tuple argmax(const RDom &, Expr, const std::string &s = "argmax");
30128
- Tuple argmin(const RDom &, Expr, const std::string &s = "argmin");
30129
- // @}
30130
-
30131
- /** Inline reductions create an anonymous helper Func to do the
30132
- * work. The variants below instead take a named Func object to use,
30133
- * so that it is no longer anonymous and can be scheduled
30134
- * (e.g. unrolled across the reduction domain). The Func passed must
30135
- * not have any existing definition. */
30136
- //@{
30137
- Expr sum(Expr, const Func &);
30138
- Expr saturating_sum(Expr, const Func &);
30139
- Expr product(Expr, const Func &);
30140
- Expr maximum(Expr, const Func &);
30141
- Expr minimum(Expr, const Func &);
30142
- Expr sum(const RDom &, Expr, const Func &);
30143
- Expr saturating_sum(const RDom &r, Expr e, const Func &);
30144
- Expr product(const RDom &, Expr, const Func &);
30145
- Expr maximum(const RDom &, Expr, const Func &);
30146
- Expr minimum(const RDom &, Expr, const Func &);
30147
- Tuple argmax(Expr, const Func &);
30148
- Tuple argmin(Expr, const Func &);
30149
- Tuple argmax(const RDom &, Expr, const Func &);
30150
- Tuple argmin(const RDom &, Expr, const Func &);
30151
- //@}
30152
-
30153
- } // namespace Halide
30154
-
30155
- #endif
30156
- #ifndef HALIDE_INTEGER_DIVISION_TABLE_H
30157
- #define HALIDE_INTEGER_DIVISION_TABLE_H
30158
-
30159
- #include <cstdint>
30160
-
30161
- /** \file
30162
- * Tables telling us how to do integer division via fixed-point
30163
- * multiplication for various small constants. This file is
30164
- * automatically generated by find_inverse.cpp.
30165
- */
30166
- namespace Halide {
30167
- namespace Internal {
30168
- namespace IntegerDivision {
30169
- extern const int64_t table_u8[256][4];
30170
- extern const int64_t table_s8[256][4];
30171
- extern const int64_t table_srz8[256][4];
30172
- extern const int64_t table_u16[256][4];
30173
- extern const int64_t table_s16[256][4];
30174
- extern const int64_t table_srz16[256][4];
30175
- extern const int64_t table_u32[256][4];
30176
- extern const int64_t table_s32[256][4];
30177
- extern const int64_t table_srz32[256][4];
30178
- extern const int64_t table_runtime_u8[256][4];
30179
- extern const int64_t table_runtime_s8[256][4];
30180
- extern const int64_t table_runtime_srz8[256][4];
30181
- extern const int64_t table_runtime_u16[256][4];
30182
- extern const int64_t table_runtime_s16[256][4];
30183
- extern const int64_t table_runtime_srz16[256][4];
30184
- extern const int64_t table_runtime_u32[256][4];
30185
- extern const int64_t table_runtime_s32[256][4];
30186
- extern const int64_t table_runtime_srz32[256][4];
30187
- } // namespace IntegerDivision
30188
- } // namespace Internal
30189
- } // namespace Halide
30190
-
30191
30229
  #endif
30192
30230
  #ifndef HALIDE_IR_MATCH_H
30193
30231
  #define HALIDE_IR_MATCH_H
@@ -30836,14 +30874,14 @@ struct BinOp {
30836
30874
  }
30837
30875
  const Op &op = (const Op &)e;
30838
30876
  return (a.template match<bound>(*op.a.get(), state) &&
30839
- b.template match<bound | bindings<A>::mask>(*op.b.get(), state));
30877
+ b.template match<(bound | bindings<A>::mask)>(*op.b.get(), state));
30840
30878
  }
30841
30879
 
30842
30880
  template<uint32_t bound, typename Op2, typename A2, typename B2>
30843
30881
  HALIDE_ALWAYS_INLINE bool match(const BinOp<Op2, A2, B2> &op, MatcherState &state) const noexcept {
30844
30882
  return (std::is_same<Op, Op2>::value &&
30845
30883
  a.template match<bound>(unwrap(op.a), state) &&
30846
- b.template match<bound | bindings<A>::mask>(unwrap(op.b), state));
30884
+ b.template match<(bound | bindings<A>::mask)>(unwrap(op.b), state));
30847
30885
  }
30848
30886
 
30849
30887
  constexpr static bool foldable = A::foldable && B::foldable;
@@ -30938,14 +30976,14 @@ struct CmpOp {
30938
30976
  }
30939
30977
  const Op &op = (const Op &)e;
30940
30978
  return (a.template match<bound>(*op.a.get(), state) &&
30941
- b.template match<bound | bindings<A>::mask>(*op.b.get(), state));
30979
+ b.template match<(bound | bindings<A>::mask)>(*op.b.get(), state));
30942
30980
  }
30943
30981
 
30944
30982
  template<uint32_t bound, typename Op2, typename A2, typename B2>
30945
30983
  HALIDE_ALWAYS_INLINE bool match(const CmpOp<Op2, A2, B2> &op, MatcherState &state) const noexcept {
30946
30984
  return (std::is_same<Op, Op2>::value &&
30947
30985
  a.template match<bound>(unwrap(op.a), state) &&
30948
- b.template match<bound | bindings<A>::mask>(unwrap(op.b), state));
30986
+ b.template match<(bound | bindings<A>::mask)>(unwrap(op.b), state));
30949
30987
  }
30950
30988
 
30951
30989
  constexpr static bool foldable = A::foldable && B::foldable;
@@ -31508,11 +31546,6 @@ constexpr bool and_reduce(bool first, Args... rest) {
31508
31546
  return first && and_reduce(rest...);
31509
31547
  }
31510
31548
 
31511
- // TODO: this can be replaced with std::min() once we require C++14 or later
31512
- constexpr int const_min(int a, int b) {
31513
- return a < b ? a : b;
31514
- }
31515
-
31516
31549
  template<Call::IntrinsicOp intrin>
31517
31550
  struct OptionalIntrinType {
31518
31551
  bool check(const Type &) const {
@@ -31550,7 +31583,7 @@ struct Intrin {
31550
31583
  HALIDE_ALWAYS_INLINE bool match_args(int, const Call &c, MatcherState &state) const noexcept {
31551
31584
  using T = decltype(std::get<i>(args));
31552
31585
  return (std::get<i>(args).template match<bound>(*c.args[i].get(), state) &&
31553
- match_args<i + 1, bound | bindings<T>::mask>(0, c, state));
31586
+ match_args<i + 1, (bound | bindings<T>::mask)>(0, c, state));
31554
31587
  }
31555
31588
 
31556
31589
  template<int i, uint32_t binds>
@@ -31601,7 +31634,7 @@ struct Intrin {
31601
31634
  return saturating_cast(optional_type_hint.type, std::move(arg0));
31602
31635
  }
31603
31636
 
31604
- Expr arg1 = std::get<const_min(1, sizeof...(Args) - 1)>(args).make(state, type_hint);
31637
+ Expr arg1 = std::get<std::min<size_t>(1, sizeof...(Args) - 1)>(args).make(state, type_hint);
31605
31638
  if (intrin == Call::absd) {
31606
31639
  return absd(std::move(arg0), std::move(arg1));
31607
31640
  } else if (intrin == Call::widen_right_add) {
@@ -31636,7 +31669,7 @@ struct Intrin {
31636
31669
  return rounding_shift_right(std::move(arg0), std::move(arg1));
31637
31670
  }
31638
31671
 
31639
- Expr arg2 = std::get<const_min(2, sizeof...(Args) - 1)>(args).make(state, type_hint);
31672
+ Expr arg2 = std::get<std::min<size_t>(2, sizeof...(Args) - 1)>(args).make(state, type_hint);
31640
31673
  if (intrin == Call::mul_shift_right) {
31641
31674
  return mul_shift_right(std::move(arg0), std::move(arg1), std::move(arg2));
31642
31675
  } else if (intrin == Call::rounding_mul_shift_right) {
@@ -31880,14 +31913,14 @@ struct SelectOp {
31880
31913
  }
31881
31914
  const Select &op = (const Select &)e;
31882
31915
  return (c.template match<bound>(*op.condition.get(), state) &&
31883
- t.template match<bound | bindings<C>::mask>(*op.true_value.get(), state) &&
31884
- f.template match<bound | bindings<C>::mask | bindings<T>::mask>(*op.false_value.get(), state));
31916
+ t.template match<(bound | bindings<C>::mask)>(*op.true_value.get(), state) &&
31917
+ f.template match<(bound | bindings<C>::mask | bindings<T>::mask)>(*op.false_value.get(), state));
31885
31918
  }
31886
31919
  template<uint32_t bound, typename C2, typename T2, typename F2>
31887
31920
  HALIDE_ALWAYS_INLINE bool match(const SelectOp<C2, T2, F2> &instance, MatcherState &state) const noexcept {
31888
31921
  return (c.template match<bound>(unwrap(instance.c), state) &&
31889
- t.template match<bound | bindings<C>::mask>(unwrap(instance.t), state) &&
31890
- f.template match<bound | bindings<C>::mask | bindings<T>::mask>(unwrap(instance.f), state));
31922
+ t.template match<(bound | bindings<C>::mask)>(unwrap(instance.t), state) &&
31923
+ f.template match<(bound | bindings<C>::mask | bindings<T>::mask)>(unwrap(instance.f), state));
31891
31924
  }
31892
31925
 
31893
31926
  HALIDE_ALWAYS_INLINE
@@ -31953,7 +31986,7 @@ struct BroadcastOp {
31953
31986
  template<uint32_t bound, typename A2, typename B2>
31954
31987
  HALIDE_ALWAYS_INLINE bool match(const BroadcastOp<A2, B2> &op, MatcherState &state) const noexcept {
31955
31988
  return (a.template match<bound>(unwrap(op.a), state) &&
31956
- lanes.template match<bound | bindings<A>::mask>(unwrap(op.lanes), state));
31989
+ lanes.template match<(bound | bindings<A>::mask)>(unwrap(op.lanes), state));
31957
31990
  }
31958
31991
 
31959
31992
  HALIDE_ALWAYS_INLINE
@@ -32017,8 +32050,8 @@ struct RampOp {
32017
32050
  }
32018
32051
  const Ramp &op = (const Ramp &)e;
32019
32052
  if (a.template match<bound>(*op.base.get(), state) &&
32020
- b.template match<bound | bindings<A>::mask>(*op.stride.get(), state) &&
32021
- lanes.template match<bound | bindings<A>::mask | bindings<B>::mask>(op.lanes, state)) {
32053
+ b.template match<(bound | bindings<A>::mask)>(*op.stride.get(), state) &&
32054
+ lanes.template match<(bound | bindings<A>::mask | bindings<B>::mask)>(op.lanes, state)) {
32022
32055
  return true;
32023
32056
  } else {
32024
32057
  return false;
@@ -32028,8 +32061,8 @@ struct RampOp {
32028
32061
  template<uint32_t bound, typename A2, typename B2, typename C2>
32029
32062
  HALIDE_ALWAYS_INLINE bool match(const RampOp<A2, B2, C2> &op, MatcherState &state) const noexcept {
32030
32063
  return (a.template match<bound>(unwrap(op.a), state) &&
32031
- b.template match<bound | bindings<A>::mask>(unwrap(op.b), state) &&
32032
- lanes.template match<bound | bindings<A>::mask | bindings<B>::mask>(unwrap(op.lanes), state));
32064
+ b.template match<(bound | bindings<A>::mask)>(unwrap(op.b), state) &&
32065
+ lanes.template match<(bound | bindings<A>::mask | bindings<B>::mask)>(unwrap(op.lanes), state));
32033
32066
  }
32034
32067
 
32035
32068
  HALIDE_ALWAYS_INLINE
@@ -32080,7 +32113,7 @@ struct VectorReduceOp {
32080
32113
  const VectorReduce &op = (const VectorReduce &)e;
32081
32114
  if (op.op == reduce_op &&
32082
32115
  a.template match<bound>(*op.value.get(), state) &&
32083
- lanes.template match<bound | bindings<A>::mask>(op.type.lanes(), state)) {
32116
+ lanes.template match<(bound | bindings<A>::mask)>(op.type.lanes(), state)) {
32084
32117
  return true;
32085
32118
  }
32086
32119
  }
@@ -32091,7 +32124,7 @@ struct VectorReduceOp {
32091
32124
  HALIDE_ALWAYS_INLINE bool match(const VectorReduceOp<A2, B2, reduce_op_2> &op, MatcherState &state) const noexcept {
32092
32125
  return (reduce_op == reduce_op_2 &&
32093
32126
  a.template match<bound>(unwrap(op.a), state) &&
32094
- lanes.template match<bound | bindings<A>::mask>(unwrap(op.lanes), state));
32127
+ lanes.template match<(bound | bindings<A>::mask)>(unwrap(op.lanes), state));
32095
32128
  }
32096
32129
 
32097
32130
  HALIDE_ALWAYS_INLINE
@@ -32340,9 +32373,9 @@ struct SliceOp {
32340
32373
  return v.vectors.size() == 1 &&
32341
32374
  v.is_slice() &&
32342
32375
  vec.template match<bound>(*v.vectors[0].get(), state) &&
32343
- base.template match<bound | bindings<Vec>::mask>(v.slice_begin(), state) &&
32344
- stride.template match<bound | bindings<Vec>::mask | bindings<Base>::mask>(v.slice_stride(), state) &&
32345
- lanes.template match<bound | bindings<Vec>::mask | bindings<Base>::mask | bindings<Stride>::mask>(v.type.lanes(), state);
32376
+ base.template match<(bound | bindings<Vec>::mask)>(v.slice_begin(), state) &&
32377
+ stride.template match<(bound | bindings<Vec>::mask | bindings<Base>::mask)>(v.slice_stride(), state) &&
32378
+ lanes.template match<(bound | bindings<Vec>::mask | bindings<Base>::mask | bindings<Stride>::mask)>(v.type.lanes(), state);
32346
32379
  }
32347
32380
 
32348
32381
  HALIDE_ALWAYS_INLINE
@@ -33409,13 +33442,59 @@ std::pair<Region, bool> mutate_region(Mutator *mutator, const Region &bounds, Ar
33409
33442
  } // namespace Halide
33410
33443
 
33411
33444
  #endif
33412
- #ifndef HALIDE_LERP_H
33413
- #define HALIDE_LERP_H
33445
+ #ifndef HALIDE_INFER_ARGUMENTS_H
33446
+ #define HALIDE_INFER_ARGUMENTS_H
33447
+
33448
+ #include <vector>
33449
+
33414
33450
 
33415
33451
  /** \file
33416
- * Defines methods for converting a lerp intrinsic into Halide IR.
33452
+ *
33453
+ * Interface for a visitor to infer arguments used in a body Stmt.
33417
33454
  */
33418
33455
 
33456
+ namespace Halide {
33457
+ namespace Internal {
33458
+
33459
+ /** An inferred argument. Inferred args are either Params,
33460
+ * ImageParams, or Buffers. The first two are handled by the param
33461
+ * field, and global images are tracked via the buf field. These
33462
+ * are used directly when jitting, or used for validation when
33463
+ * compiling with an explicit argument list. */
33464
+ struct InferredArgument {
33465
+ Argument arg;
33466
+ Parameter param;
33467
+ Buffer<> buffer;
33468
+
33469
+ bool operator<(const InferredArgument &other) const {
33470
+ if (arg.is_buffer() && !other.arg.is_buffer()) {
33471
+ return true;
33472
+ } else if (other.arg.is_buffer() && !arg.is_buffer()) {
33473
+ return false;
33474
+ } else {
33475
+ return arg.name < other.arg.name;
33476
+ }
33477
+ }
33478
+ };
33479
+
33480
+ class Function;
33481
+
33482
+ std::vector<InferredArgument> infer_arguments(const Stmt &body, const std::vector<Function> &outputs);
33483
+
33484
+ } // namespace Internal
33485
+ } // namespace Halide
33486
+
33487
+ #endif
33488
+ #ifndef HALIDE_HOST_GPU_BUFFER_COPIES_H
33489
+ #define HALIDE_HOST_GPU_BUFFER_COPIES_H
33490
+
33491
+ /** \file
33492
+ * Defines the lowering passes that deal with host and device buffer flow.
33493
+ */
33494
+
33495
+ #include <string>
33496
+ #include <vector>
33497
+
33419
33498
 
33420
33499
  namespace Halide {
33421
33500
 
@@ -33423,15 +33502,179 @@ struct Target;
33423
33502
 
33424
33503
  namespace Internal {
33425
33504
 
33426
- /** Build Halide IR that computes a lerp. Use by codegen targets that don't have
33427
- * a native lerp. The lerp is done in the type of the zero value. The final_type
33428
- * is a cast that should occur after the lerp. It's included because in some
33429
- * cases you can incorporate a final cast into the lerp math. */
33430
- Expr lower_lerp(Type final_type, Expr zero_val, Expr one_val, const Expr &weight, const Target &target);
33505
+ /** A helper function to call an extern function, and assert that it
33506
+ * returns 0. */
33507
+ Stmt call_extern_and_assert(const std::string &name, const std::vector<Expr> &args);
33508
+
33509
+ /** Inject calls to halide_device_malloc, halide_copy_to_device, and
33510
+ * halide_copy_to_host as needed. */
33511
+ Stmt inject_host_dev_buffer_copies(Stmt s, const Target &t);
33431
33512
 
33432
33513
  } // namespace Internal
33433
33514
  } // namespace Halide
33434
33515
 
33516
+ #endif
33517
+ #ifndef HALIDE_INLINE_H
33518
+ #define HALIDE_INLINE_H
33519
+
33520
+ /** \file
33521
+ * Methods for replacing calls to functions with their definitions.
33522
+ */
33523
+
33524
+
33525
+ namespace Halide {
33526
+ namespace Internal {
33527
+
33528
+ class Function;
33529
+
33530
+ /** Inline a single named function, which must be pure. For a pure function to
33531
+ * be inlined, it must not have any specializations (i.e. it can only have one
33532
+ * values definition). */
33533
+ // @{
33534
+ Stmt inline_function(Stmt s, const Function &f);
33535
+ Expr inline_function(Expr e, const Function &f);
33536
+ void inline_function(Function caller, const Function &f);
33537
+ // @}
33538
+
33539
+ /** Check if the schedule of an inlined function is legal, throwing an error
33540
+ * if it is not. */
33541
+ void validate_schedule_inlined_function(Function f);
33542
+
33543
+ } // namespace Internal
33544
+ } // namespace Halide
33545
+
33546
+ #endif
33547
+ #ifndef HALIDE_INLINE_REDUCTIONS_H
33548
+ #define HALIDE_INLINE_REDUCTIONS_H
33549
+
33550
+ #include <string>
33551
+
33552
+
33553
+ /** \file
33554
+ * Defines some inline reductions: sum, product, minimum, maximum.
33555
+ */
33556
+ namespace Halide {
33557
+
33558
+ class Func;
33559
+
33560
+ /** An inline reduction. This is suitable for convolution-type
33561
+ * operations - the reduction will be computed in the innermost loop
33562
+ * that it is used in. The argument may contain free or implicit
33563
+ * variables, and must refer to some reduction domain. The free
33564
+ * variables are still free in the return value, but the reduction
33565
+ * domain is captured - the result expression does not refer to a
33566
+ * reduction domain and can be used in a pure function definition.
33567
+ *
33568
+ * An example using \ref sum :
33569
+ *
33570
+ \code
33571
+ Func f, g;
33572
+ Var x;
33573
+ RDom r(0, 10);
33574
+ f(x) = x*x;
33575
+ g(x) = sum(f(x + r));
33576
+ \endcode
33577
+ *
33578
+ * Here g computes some blur of x, but g is still a pure function. The
33579
+ * sum is being computed by an anonymous reduction function that is
33580
+ * scheduled innermost within g.
33581
+ */
33582
+ //@{
33583
+ Expr sum(Expr, const std::string &s = "sum");
33584
+ Expr saturating_sum(Expr, const std::string &s = "saturating_sum");
33585
+ Expr product(Expr, const std::string &s = "product");
33586
+ Expr maximum(Expr, const std::string &s = "maximum");
33587
+ Expr minimum(Expr, const std::string &s = "minimum");
33588
+ //@}
33589
+
33590
+ /** Variants of the inline reduction in which the RDom is stated
33591
+ * explicitly. The expression can refer to multiple RDoms, and only
33592
+ * the inner one is captured by the reduction. This allows you to
33593
+ * write expressions like:
33594
+ \code
33595
+ RDom r1(0, 10), r2(0, 10), r3(0, 10);
33596
+ Expr e = minimum(r1, product(r2, sum(r3, r1 + r2 + r3)));
33597
+ \endcode
33598
+ */
33599
+ // @{
33600
+ Expr sum(const RDom &, Expr, const std::string &s = "sum");
33601
+ Expr saturating_sum(const RDom &r, Expr e, const std::string &s = "saturating_sum");
33602
+ Expr product(const RDom &, Expr, const std::string &s = "product");
33603
+ Expr maximum(const RDom &, Expr, const std::string &s = "maximum");
33604
+ Expr minimum(const RDom &, Expr, const std::string &s = "minimum");
33605
+ // @}
33606
+
33607
+ /** Returns an Expr or Tuple representing the coordinates of the point
33608
+ * in the RDom which minimizes or maximizes the expression. The
33609
+ * expression must refer to some RDom. Also returns the extreme value
33610
+ * of the expression as the last element of the tuple. */
33611
+ // @{
33612
+ Tuple argmax(Expr, const std::string &s = "argmax");
33613
+ Tuple argmin(Expr, const std::string &s = "argmin");
33614
+ Tuple argmax(const RDom &, Expr, const std::string &s = "argmax");
33615
+ Tuple argmin(const RDom &, Expr, const std::string &s = "argmin");
33616
+ // @}
33617
+
33618
+ /** Inline reductions create an anonymous helper Func to do the
33619
+ * work. The variants below instead take a named Func object to use,
33620
+ * so that it is no longer anonymous and can be scheduled
33621
+ * (e.g. unrolled across the reduction domain). The Func passed must
33622
+ * not have any existing definition. */
33623
+ //@{
33624
+ Expr sum(Expr, const Func &);
33625
+ Expr saturating_sum(Expr, const Func &);
33626
+ Expr product(Expr, const Func &);
33627
+ Expr maximum(Expr, const Func &);
33628
+ Expr minimum(Expr, const Func &);
33629
+ Expr sum(const RDom &, Expr, const Func &);
33630
+ Expr saturating_sum(const RDom &r, Expr e, const Func &);
33631
+ Expr product(const RDom &, Expr, const Func &);
33632
+ Expr maximum(const RDom &, Expr, const Func &);
33633
+ Expr minimum(const RDom &, Expr, const Func &);
33634
+ Tuple argmax(Expr, const Func &);
33635
+ Tuple argmin(Expr, const Func &);
33636
+ Tuple argmax(const RDom &, Expr, const Func &);
33637
+ Tuple argmin(const RDom &, Expr, const Func &);
33638
+ //@}
33639
+
33640
+ } // namespace Halide
33641
+
33642
+ #endif
33643
+ #ifndef HALIDE_INTEGER_DIVISION_TABLE_H
33644
+ #define HALIDE_INTEGER_DIVISION_TABLE_H
33645
+
33646
+ #include <cstdint>
33647
+
33648
+ /** \file
33649
+ * Tables telling us how to do integer division via fixed-point
33650
+ * multiplication for various small constants. This file is
33651
+ * automatically generated by find_inverse.cpp.
33652
+ */
33653
+ namespace Halide {
33654
+ namespace Internal {
33655
+ namespace IntegerDivision {
33656
+ extern const int64_t table_u8[256][4];
33657
+ extern const int64_t table_s8[256][4];
33658
+ extern const int64_t table_srz8[256][4];
33659
+ extern const int64_t table_u16[256][4];
33660
+ extern const int64_t table_s16[256][4];
33661
+ extern const int64_t table_srz16[256][4];
33662
+ extern const int64_t table_u32[256][4];
33663
+ extern const int64_t table_s32[256][4];
33664
+ extern const int64_t table_srz32[256][4];
33665
+ extern const int64_t table_runtime_u8[256][4];
33666
+ extern const int64_t table_runtime_s8[256][4];
33667
+ extern const int64_t table_runtime_srz8[256][4];
33668
+ extern const int64_t table_runtime_u16[256][4];
33669
+ extern const int64_t table_runtime_s16[256][4];
33670
+ extern const int64_t table_runtime_srz16[256][4];
33671
+ extern const int64_t table_runtime_u32[256][4];
33672
+ extern const int64_t table_runtime_s32[256][4];
33673
+ extern const int64_t table_runtime_srz32[256][4];
33674
+ } // namespace IntegerDivision
33675
+ } // namespace Internal
33676
+ } // namespace Halide
33677
+
33435
33678
  #endif
33436
33679
  #ifndef HALIDE_LICM_H
33437
33680
  #define HALIDE_LICM_H
@@ -33524,6 +33767,7 @@ void create_static_library(const std::vector<std::string> &src_files, const Targ
33524
33767
  * Support for linking LLVM modules that comprise the runtime.
33525
33768
  */
33526
33769
 
33770
+ #include <cstdint>
33527
33771
  #include <memory>
33528
33772
  #include <string>
33529
33773
  #include <vector>
@@ -33562,6 +33806,30 @@ std::unique_ptr<llvm::Module> link_with_wasm_jit_runtime(llvm::LLVMContext *c, c
33562
33806
  } // namespace Internal
33563
33807
  } // namespace Halide
33564
33808
 
33809
+ #endif
33810
+ #ifndef HALIDE_LERP_H
33811
+ #define HALIDE_LERP_H
33812
+
33813
+ /** \file
33814
+ * Defines methods for converting a lerp intrinsic into Halide IR.
33815
+ */
33816
+
33817
+
33818
+ namespace Halide {
33819
+
33820
+ struct Target;
33821
+
33822
+ namespace Internal {
33823
+
33824
+ /** Build Halide IR that computes a lerp. Use by codegen targets that don't have
33825
+ * a native lerp. The lerp is done in the type of the zero value. The final_type
33826
+ * is a cast that should occur after the lerp. It's included because in some
33827
+ * cases you can incorporate a final cast into the lerp math. */
33828
+ Expr lower_lerp(Type final_type, Expr zero_val, Expr one_val, const Expr &weight, const Target &target);
33829
+
33830
+ } // namespace Internal
33831
+ } // namespace Halide
33832
+
33565
33833
  #endif
33566
33834
  #ifndef HALIDE_LOOP_CARRY_H
33567
33835
  #define HALIDE_LOOP_CARRY_H
@@ -34754,6 +35022,13 @@ Interval solve_for_inner_interval(const Expr &c, const std::string &variable);
34754
35022
  * 'and' over the vector lanes, and return a scalar result. */
34755
35023
  Expr and_condition_over_domain(const Expr &c, const Scope<Interval> &varying);
34756
35024
 
35025
+ /** Take a conditional that includes variables that vary over some
35026
+ * domain, and convert it to a weaker (less frequently false) condition
35027
+ * that doesn't depend on those variables. Formally, the input expr
35028
+ * implies the output expr. Note that this function might be unable to
35029
+ * provide a better response than simply const_true(). */
35030
+ Expr or_condition_over_domain(const Expr &c, const Scope<Interval> &varying);
35031
+
34757
35032
  void solve_test();
34758
35033
 
34759
35034
  } // namespace Internal
@@ -34948,19 +35223,26 @@ Stmt storage_folding(const Stmt &s, const std::map<std::string, Function> &env);
34948
35223
  namespace Halide {
34949
35224
 
34950
35225
  struct Target;
35226
+ struct Expr;
34951
35227
 
34952
35228
  namespace Internal {
34953
35229
 
34954
35230
  class Function;
35231
+ struct Call;
34955
35232
 
34956
- /** Propagate strict_float intrinisics such that they immediately wrap
34957
- * all floating-point expressions. This makes the IR nodes context
34958
- * independent. If the Target::StrictFloat flag is specified in
34959
- * target, starts in strict_float mode so all floating-point type
34960
- * Exprs in the compilation will be marked with strict_float. Returns
34961
- * whether any strict floating-point is used in any function in the
34962
- * passed in env.
34963
- */
35233
+ /** Replace all rounding floating point ops and floating point ops that need to
35234
+ * handle nan and inf differently with strict float intrinsics. */
35235
+ Expr strictify_float(const Expr &e);
35236
+
35237
+ /** Replace a strict float intrinsic with its non-strict equivalent. Non-recursive. */
35238
+ Expr unstrictify_float(const Call *op);
35239
+
35240
+ /** If the StrictFloat target feature is set, replace add, sub, mul, div, etc
35241
+ * operations with strict float intrinsics for all Funcs in the environment. If
35242
+ * StrictFloat is not set does nothing. Returns whether or not there's any usage
35243
+ * of strict float intrinsics or if the target flag is set (i.e. returns whether
35244
+ * or not the rest of lowering and codegen needs to worry about floating point
35245
+ * strictness). */
34964
35246
  bool strictify_float(std::map<std::string, Function> &env, const Target &t);
34965
35247
 
34966
35248
  } // namespace Internal
@@ -34992,6 +35274,8 @@ Stmt strip_asserts(const Stmt &s);
34992
35274
  * Defines methods for substituting out variables in expressions and
34993
35275
  * statements. */
34994
35276
 
35277
+ #include <algorithm>
35278
+ #include <iterator>
34995
35279
  #include <map>
34996
35280
 
34997
35281
 
@@ -35022,6 +35306,16 @@ Expr substitute(const Expr &find, const Expr &replacement, const Expr &expr);
35022
35306
  Stmt substitute(const Expr &find, const Expr &replacement, const Stmt &stmt);
35023
35307
  // @}
35024
35308
 
35309
+ /** Substitute a container of Exprs or Stmts out of place */
35310
+ template<typename T>
35311
+ T substitute(const std::map<std::string, Expr> &replacements, const T &container) {
35312
+ T output;
35313
+ std::transform(container.begin(), container.end(), std::back_inserter(output), [&](const auto &expr_or_stmt) {
35314
+ return substitute(replacements, expr_or_stmt);
35315
+ });
35316
+ return output;
35317
+ }
35318
+
35025
35319
  /** Substitutions where the IR may be a general graph (and not just a
35026
35320
  * DAG). */
35027
35321
  // @{
@@ -35284,10 +35578,14 @@ std::map<std::string, Function> wrap_func_calls(const std::map<std::string, Func
35284
35578
  #endif
35285
35579
 
35286
35580
  // Clean up macros used inside Halide headers
35581
+ #ifndef HALIDE_KEEP_MACROS
35287
35582
  #undef user_assert
35288
35583
  #undef user_error
35289
35584
  #undef user_warning
35290
35585
  #undef internal_error
35291
35586
  #undef internal_assert
35292
35587
  #undef halide_runtime_error
35588
+ #undef debug
35589
+ #undef debug_is_active
35590
+ #endif
35293
35591
  #endif // HALIDE_H