cuda-cccl 0.3.1__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.2__cp312-cp312-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (185) hide show
  1. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  2. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  3. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  4. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  5. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  6. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  7. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1 -0
  8. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  9. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  10. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  11. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  12. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  13. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  14. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  15. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +12 -13
  16. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  17. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +2 -3
  18. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +4 -3
  19. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +1 -1
  20. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  21. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  22. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  23. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  24. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  25. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  26. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  27. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  28. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  29. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  30. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  31. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  32. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  33. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  34. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  35. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  36. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  37. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  38. cuda/cccl/headers/include/cuda/__device/arch_traits.h +239 -317
  39. cuda/cccl/headers/include/cuda/__device/attributes.h +4 -3
  40. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  41. cuda/cccl/headers/include/cuda/__device/device_ref.h +0 -10
  42. cuda/cccl/headers/include/cuda/__device/physical_device.h +1 -26
  43. cuda/cccl/headers/include/cuda/__event/event.h +26 -26
  44. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  45. cuda/cccl/headers/include/cuda/__event/timed_event.h +9 -7
  46. cuda/cccl/headers/include/cuda/__fwd/devices.h +4 -4
  47. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  48. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  49. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  50. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  51. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  52. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  53. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +2 -12
  54. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +15 -19
  55. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +59 -60
  56. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  57. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  58. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  59. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  60. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  61. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  62. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  63. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  64. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  65. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  66. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +5 -4
  67. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  68. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +17 -16
  69. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  70. cuda/cccl/headers/include/cuda/cmath +1 -0
  71. cuda/cccl/headers/include/cuda/devices +3 -0
  72. cuda/cccl/headers/include/cuda/memory +1 -0
  73. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  74. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  75. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  76. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  77. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  78. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  79. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  80. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  81. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  82. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  83. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  84. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  85. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  86. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  87. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  88. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  89. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  90. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  91. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  92. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  93. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  94. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  95. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  96. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  97. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  98. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  99. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  100. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  101. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  102. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  103. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  104. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  105. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  106. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  107. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  108. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  109. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  110. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  111. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  112. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  113. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  114. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  115. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  116. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  117. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  118. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  119. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  120. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  121. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  122. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  123. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  124. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  125. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  126. cuda/cccl/headers/include/cuda/std/string_view +146 -11
  127. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  128. cuda/cccl/headers/include/cuda/utility +1 -0
  129. cuda/cccl/headers/include/nv/target +7 -2
  130. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  131. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  132. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  133. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  134. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  135. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  136. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  137. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  138. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  139. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  140. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  141. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  142. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  143. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  144. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  145. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  146. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  147. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  148. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  149. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  150. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  151. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  152. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  153. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  154. cuda/compute/__init__.py +2 -0
  155. cuda/compute/_bindings.pyi +43 -1
  156. cuda/compute/_bindings_impl.pyx +156 -7
  157. cuda/compute/algorithms/_scan.py +108 -36
  158. cuda/compute/algorithms/_transform.py +32 -11
  159. cuda/compute/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
  160. cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
  161. cuda/compute/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
  162. cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
  163. cuda/compute/iterators/__init__.py +2 -0
  164. cuda/compute/iterators/_factories.py +28 -0
  165. cuda/compute/iterators/_iterators.py +206 -1
  166. cuda/compute/numba_utils.py +2 -2
  167. cuda/compute/typing.py +2 -0
  168. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  169. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +171 -175
  170. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  171. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  172. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  173. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  174. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  175. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  176. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  177. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  178. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  179. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  180. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  181. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  182. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  183. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  184. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  185. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -35,16 +35,6 @@
35
35
 
36
36
  _CCCL_BEGIN_NAMESPACE_CUDA
37
37
 
38
- _CCCL_TEMPLATE(class... _Iterators)
39
- _CCCL_REQUIRES((sizeof...(_Iterators) != 2))
40
- [[nodiscard]] _CCCL_API constexpr auto __tuple_or_pair_impl() noexcept -> ::cuda::std::tuple<_Iterators...>;
41
-
42
- template <class _Tp, class _Up>
43
- [[nodiscard]] _CCCL_API constexpr auto __tuple_or_pair_impl() noexcept -> ::cuda::std::pair<_Tp, _Up>;
44
-
45
- template <class... _Iterators>
46
- using __tuple_or_pair = decltype(::cuda::__tuple_or_pair_impl<_Iterators...>());
47
-
48
38
  template <class... _Iterators>
49
39
  struct __zip_iter_constraints
50
40
  {
@@ -106,7 +96,7 @@ template <class... _Iterators>
106
96
  struct __zip_op_star
107
97
  {
108
98
  template <class... _Iterators>
109
- using reference = __tuple_or_pair<::cuda::std::iter_reference_t<_Iterators>...>;
99
+ using reference = ::cuda::std::tuple<::cuda::std::iter_reference_t<_Iterators>...>;
110
100
 
111
101
  _CCCL_EXEC_CHECK_DISABLE
112
102
  template <class... _Iterators>
@@ -140,7 +130,7 @@ struct __zip_op_decrement
140
130
  struct __zip_iter_move
141
131
  {
142
132
  template <class... _Iterators>
143
- using __iter_move_ret = __tuple_or_pair<::cuda::std::iter_rvalue_reference_t<_Iterators>...>;
133
+ using __iter_move_ret = ::cuda::std::tuple<::cuda::std::iter_rvalue_reference_t<_Iterators>...>;
144
134
 
145
135
  _CCCL_EXEC_CHECK_DISABLE
146
136
  template <class... _Iterators>
@@ -42,7 +42,6 @@
42
42
  #include <cuda/std/__utility/forward.h>
43
43
  #include <cuda/std/__utility/integer_sequence.h>
44
44
  #include <cuda/std/__utility/move.h>
45
- #include <cuda/std/__utility/pair.h>
46
45
  #include <cuda/std/tuple>
47
46
 
48
47
  #include <cuda/std/__cccl/prologue.h>
@@ -123,7 +122,7 @@ using __zv_iter_category_base =
123
122
  template <class... _Iterators>
124
123
  class zip_iterator : public __zv_iter_category_base<_Iterators...>
125
124
  {
126
- __tuple_or_pair<_Iterators...> __current_;
125
+ ::cuda::std::tuple<_Iterators...> __current_;
127
126
 
128
127
  template <class...>
129
128
  friend class zip_iterator;
@@ -131,8 +130,8 @@ class zip_iterator : public __zv_iter_category_base<_Iterators...>
131
130
  template <class _Fn>
132
131
  _CCCL_API static constexpr auto
133
132
  __zip_apply(const _Fn& __fun,
134
- const __tuple_or_pair<_Iterators...>& __tuple1,
135
- const __tuple_or_pair<_Iterators...>& __tuple2) //
133
+ const ::cuda::std::tuple<_Iterators...>& __tuple1,
134
+ const ::cuda::std::tuple<_Iterators...>& __tuple2) //
136
135
  noexcept(noexcept(__fun(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>())))
137
136
  {
138
137
  return __fun(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>());
@@ -143,8 +142,8 @@ public:
143
142
  _CCCL_HIDE_FROM_ABI zip_iterator() = default;
144
143
 
145
144
  //! @brief Constructs a @c zip_iterator from a tuple of iterators
146
- //! @param __iters A tuple or pair of iterators
147
- _CCCL_API constexpr explicit zip_iterator(__tuple_or_pair<_Iterators...> __iters)
145
+ //! @param __iters A tuple of iterators
146
+ _CCCL_API constexpr explicit zip_iterator(::cuda::std::tuple<_Iterators...> __iters)
148
147
  : __current_(::cuda::std::move(__iters))
149
148
  {}
150
149
 
@@ -163,8 +162,8 @@ public:
163
162
  {}
164
163
 
165
164
  using iterator_concept = decltype(__get_zip_iterator_concept<_Iterators...>());
166
- using value_type = __tuple_or_pair<::cuda::std::iter_value_t<_Iterators>...>;
167
- using reference = __tuple_or_pair<::cuda::std::iter_reference_t<_Iterators>...>;
165
+ using value_type = ::cuda::std::tuple<::cuda::std::iter_value_t<_Iterators>...>;
166
+ using reference = ::cuda::std::tuple<::cuda::std::iter_reference_t<_Iterators>...>;
168
167
  using difference_type = ::cuda::std::common_type_t<::cuda::std::iter_difference_t<_Iterators>...>;
169
168
 
170
169
  // Those are technically not to spec, but pre-ranges iterator_traits do not work properly with iterators that do not
@@ -360,8 +359,8 @@ public:
360
359
  _CCCL_EXEC_CHECK_DISABLE
361
360
  template <size_t _Zero, size_t... _Indices>
362
361
  [[nodiscard]] _CCCL_API constexpr difference_type
363
- operator()(const __tuple_or_pair<_Iterators...>& __iters1,
364
- const __tuple_or_pair<_Iterators...>& __iters2,
362
+ operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
363
+ const ::cuda::std::tuple<_Iterators...>& __iters2,
365
364
  ::cuda::std::index_sequence<_Zero, _Indices...>) const //
366
365
  noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) - ::cuda::std::get<_Indices>(__iters2)) && ...)))
367
366
  {
@@ -391,8 +390,8 @@ public:
391
390
  {
392
391
  _CCCL_EXEC_CHECK_DISABLE
393
392
  template <size_t... _Indices>
394
- _CCCL_API constexpr bool operator()(const __tuple_or_pair<_Iterators...>& __iters1,
395
- const __tuple_or_pair<_Iterators...>& __iters2,
393
+ _CCCL_API constexpr bool operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
394
+ const ::cuda::std::tuple<_Iterators...>& __iters2,
396
395
  ::cuda::std::index_sequence<_Indices...>) const
397
396
  noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) == ::cuda::std::get<_Indices>(__iters2)) || ...)))
398
397
  {
@@ -490,8 +489,8 @@ public:
490
489
  struct __zip_op_iter_swap
491
490
  {
492
491
  template <size_t... _Indices>
493
- _CCCL_API constexpr void operator()(const __tuple_or_pair<_Iterators...>& __iters1,
494
- const __tuple_or_pair<_Iterators...>& __iters2,
492
+ _CCCL_API constexpr void operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
493
+ const ::cuda::std::tuple<_Iterators...>& __iters2,
495
494
  ::cuda::std::index_sequence<_Indices...>) const
496
495
  noexcept(__zip_iter_constraints<_Iterators...>::__all_noexcept_swappable)
497
496
  {
@@ -508,12 +507,12 @@ public:
508
507
  return __zip_apply(__zip_op_iter_swap{}, __lhs.__current_, __rhs.__current_);
509
508
  }
510
509
 
511
- [[nodiscard]] _CCCL_API constexpr __tuple_or_pair<_Iterators...>& __iterators() noexcept
510
+ [[nodiscard]] _CCCL_API constexpr ::cuda::std::tuple<_Iterators...>& __iterators() noexcept
512
511
  {
513
512
  return __current_;
514
513
  }
515
514
 
516
- [[nodiscard]] _CCCL_API constexpr const __tuple_or_pair<_Iterators...>& __iterators() const noexcept
515
+ [[nodiscard]] _CCCL_API constexpr const ::cuda::std::tuple<_Iterators...>& __iterators() const noexcept
517
516
  {
518
517
  return __current_;
519
518
  }
@@ -522,9 +521,6 @@ public:
522
521
  template <class... _Iterators>
523
522
  _CCCL_HOST_DEVICE zip_iterator(::cuda::std::tuple<_Iterators...>) -> zip_iterator<_Iterators...>;
524
523
 
525
- template <class _Iterator1, class _Iterator2>
526
- _CCCL_HOST_DEVICE zip_iterator(::cuda::std::pair<_Iterator1, _Iterator2>) -> zip_iterator<_Iterator1, _Iterator2>;
527
-
528
524
  template <class... _Iterators>
529
525
  _CCCL_HOST_DEVICE zip_iterator(_Iterators...) -> zip_iterator<_Iterators...>;
530
526
 
@@ -33,6 +33,7 @@
33
33
  #include <cuda/std/__iterator/concepts.h>
34
34
  #include <cuda/std/__iterator/incrementable_traits.h>
35
35
  #include <cuda/std/__iterator/iterator_traits.h>
36
+ #include <cuda/std/__ranges/compressed_movable_box.h>
36
37
  #include <cuda/std/__ranges/concepts.h>
37
38
  #include <cuda/std/__ranges/movable_box.h>
38
39
  #include <cuda/std/__type_traits/common_type.h>
@@ -41,7 +42,6 @@
41
42
  #include <cuda/std/__utility/forward.h>
42
43
  #include <cuda/std/__utility/integer_sequence.h>
43
44
  #include <cuda/std/__utility/move.h>
44
- #include <cuda/std/__utility/pair.h>
45
45
  #include <cuda/std/tuple>
46
46
 
47
47
  #include <cuda/std/__cccl/prologue.h>
@@ -153,8 +153,29 @@ template <class _Fn, class... _Iterators>
153
153
  template <class _Fn, class... _Iterators>
154
154
  class zip_transform_iterator
155
155
  {
156
- ::cuda::std::ranges::__movable_box<_Fn> __func_;
157
- __tuple_or_pair<_Iterators...> __current_;
156
+ private:
157
+ // Not a base because then the friend operators would be ambiguous
158
+ ::cuda::std::__compressed_movable_box<::cuda::std::tuple<_Iterators...>, _Fn> __store_;
159
+
160
+ [[nodiscard]] _CCCL_API constexpr ::cuda::std::tuple<_Iterators...>& __iters() noexcept
161
+ {
162
+ return __store_.template __get<0>();
163
+ }
164
+
165
+ [[nodiscard]] _CCCL_API constexpr const ::cuda::std::tuple<_Iterators...>& __iters() const noexcept
166
+ {
167
+ return __store_.template __get<0>();
168
+ }
169
+
170
+ [[nodiscard]] _CCCL_API constexpr _Fn& __func() noexcept
171
+ {
172
+ return __store_.template __get<1>();
173
+ }
174
+
175
+ [[nodiscard]] _CCCL_API constexpr const _Fn& __func() const noexcept
176
+ {
177
+ return __store_.template __get<1>();
178
+ }
158
179
 
159
180
  template <class, class...>
160
181
  friend class zip_transform_iterator;
@@ -162,8 +183,8 @@ class zip_transform_iterator
162
183
  template <class _Op>
163
184
  _CCCL_API static constexpr auto
164
185
  __zip_apply(const _Op& __op,
165
- const __tuple_or_pair<_Iterators...>& __tuple1,
166
- const __tuple_or_pair<_Iterators...>& __tuple2) //
186
+ const ::cuda::std::tuple<_Iterators...>& __tuple1,
187
+ const ::cuda::std::tuple<_Iterators...>& __tuple2) //
167
188
  noexcept(noexcept(__op(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>())))
168
189
  {
169
190
  return __op(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>());
@@ -171,13 +192,6 @@ class zip_transform_iterator
171
192
 
172
193
  public:
173
194
  //! @brief Default-constructs a @c zip_transform_iterator by value-initializing the functor and all stored iterators
174
- #if _CCCL_HAS_CONCEPTS()
175
- _CCCL_EXEC_CHECK_DISABLE
176
- _CCCL_HIDE_FROM_ABI zip_transform_iterator()
177
- requires ::cuda::std::default_initializable<_Fn>
178
- && __zip_iter_constraints<_Iterators...>::__all_default_initializable
179
- = default;
180
- #else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv
181
195
  _CCCL_EXEC_CHECK_DISABLE
182
196
  _CCCL_TEMPLATE(class _Fn2 = _Fn)
183
197
  _CCCL_REQUIRES(
@@ -185,32 +199,19 @@ public:
185
199
  _CCCL_API constexpr zip_transform_iterator() noexcept(
186
200
  ::cuda::std::is_nothrow_default_constructible_v<_Fn2>
187
201
  && __zip_iter_constraints<_Iterators...>::__all_nothrow_default_constructible)
188
- : __func_(::cuda::std::in_place)
189
- , __current_()
202
+ : __store_()
190
203
  {}
191
- #endif // ^^^ !_CCCL_HAS_CONCEPTS() ^^^
192
204
 
193
205
  //! @brief Constructs a @c zip_transform_iterator from a tuple of iterators
194
206
  //! @param __iters A tuple or pair of iterators
195
- _CCCL_API constexpr explicit zip_transform_iterator(_Fn __fun, __tuple_or_pair<_Iterators...> __iters)
196
- : __func_(::cuda::std::in_place, ::cuda::std::move(__fun))
197
- , __current_(::cuda::std::move(__iters))
198
- {}
199
-
200
- //! @brief Constructs a @c zip_transform_iterator from a tuple of iterators
201
- //! @param __iters A tuple of iterators
202
- _CCCL_TEMPLATE(size_t _NumIterators = sizeof...(_Iterators))
203
- _CCCL_REQUIRES((_NumIterators == 2))
204
207
  _CCCL_API constexpr explicit zip_transform_iterator(_Fn __fun, ::cuda::std::tuple<_Iterators...> __iters)
205
- : __func_(::cuda::std::in_place, ::cuda::std::move(__fun))
206
- , __current_(::cuda::std::get<0>(::cuda::std::move(__iters)), ::cuda::std::get<1>(::cuda::std::move(__iters)))
208
+ : __store_(::cuda::std::move(__iters), ::cuda::std::move(__fun))
207
209
  {}
208
210
 
209
211
  //! @brief Constructs a @c zip_transform_iterator from variadic set of iterators
210
212
  //! @param __iters The input iterators
211
213
  _CCCL_API constexpr explicit zip_transform_iterator(_Fn __fun, _Iterators... __iters)
212
- : __func_(::cuda::std::in_place, ::cuda::std::move(__fun))
213
- , __current_(::cuda::std::move(__iters)...)
214
+ : __store_(::cuda::std::tuple<_Iterators...>{::cuda::std::move(__iters)...}, ::cuda::std::move(__fun))
214
215
  {}
215
216
 
216
217
  using iterator_concept = decltype(::cuda::__get_zip_iterator_concept<_Iterators...>());
@@ -226,14 +227,14 @@ public:
226
227
 
227
228
  // Internal helper functions to extract internals for device dispatch, must be a tuple for cub_transform_many
228
229
  [[nodiscard]] _CCCL_API constexpr ::cuda::std::tuple<_Iterators...>
229
- __base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<__tuple_or_pair<_Iterators...>>)
230
+ __base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<::cuda::std::tuple<_Iterators...>>)
230
231
  {
231
- return ::cuda::std::move(__current_);
232
+ return ::cuda::std::move(__iters());
232
233
  }
233
234
 
234
235
  [[nodiscard]] _CCCL_API constexpr _Fn __pred() && noexcept(::cuda::std::is_nothrow_move_constructible_v<_Fn>)
235
236
  {
236
- return ::cuda::std::move(*__func_);
237
+ return ::cuda::std::move(__func());
237
238
  }
238
239
 
239
240
  struct __zip_transform_op_star
@@ -252,7 +253,7 @@ public:
252
253
  [[nodiscard]] _CCCL_API constexpr reference operator*() const
253
254
  noexcept(::cuda::std::is_nothrow_invocable_v<_Fn&, ::cuda::std::iter_reference_t<const _Iterators>...>)
254
255
  {
255
- return ::cuda::std::apply(__zip_transform_op_star{const_cast<_Fn&>(*__func_)}, __current_);
256
+ return ::cuda::std::apply(__zip_transform_op_star{const_cast<_Fn&>(__func())}, __iters());
256
257
  }
257
258
 
258
259
  struct __zip_transform_op_subscript
@@ -274,16 +275,17 @@ public:
274
275
  _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
275
276
  _CCCL_REQUIRES(_Constraints::__all_random_access)
276
277
  _CCCL_API constexpr reference operator[](difference_type __n) const
277
- noexcept(noexcept(::cuda::std::apply(__zip_transform_op_subscript{__n, const_cast<_Fn&>(*__func_)}, __current_)))
278
+ noexcept(noexcept(::cuda::std::apply(__zip_transform_op_subscript{__n, ::cuda::std::declval<_Fn&>()},
279
+ ::cuda::std::declval<const ::cuda::std::tuple<_Iterators...>&>())))
278
280
  {
279
- return ::cuda::std::apply(__zip_transform_op_subscript{__n, const_cast<_Fn&>(*__func_)}, __current_);
281
+ return ::cuda::std::apply(__zip_transform_op_subscript{__n, const_cast<_Fn&>(__func())}, __iters());
280
282
  }
281
283
 
282
284
  //! @brief Increments all stored iterators
283
- _CCCL_API constexpr zip_transform_iterator&
284
- operator++() noexcept(noexcept(::cuda::std::apply(__zip_op_increment{}, __current_)))
285
+ _CCCL_API constexpr zip_transform_iterator& operator++() noexcept(
286
+ noexcept(::cuda::std::apply(__zip_op_increment{}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
285
287
  {
286
- ::cuda::std::apply(__zip_op_increment{}, __current_);
288
+ ::cuda::std::apply(__zip_op_increment{}, __iters());
287
289
  return *this;
288
290
  }
289
291
 
@@ -306,10 +308,10 @@ public:
306
308
  //! @brief Decrements all stored iterators
307
309
  _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
308
310
  _CCCL_REQUIRES(_Constraints::__all_bidirectional)
309
- _CCCL_API constexpr zip_transform_iterator&
310
- operator--() noexcept(noexcept(::cuda::std::apply(__zip_op_decrement{}, __current_)))
311
+ _CCCL_API constexpr zip_transform_iterator& operator--() noexcept(
312
+ noexcept(::cuda::std::apply(__zip_op_decrement{}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
311
313
  {
312
- ::cuda::std::apply(__zip_op_decrement{}, __current_);
314
+ ::cuda::std::apply(__zip_op_decrement{}, __iters());
313
315
  return *this;
314
316
  }
315
317
 
@@ -339,10 +341,10 @@ public:
339
341
  //! @param __n The number of elements to increment
340
342
  _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
341
343
  _CCCL_REQUIRES(_Constraints::__all_random_access)
342
- _CCCL_API constexpr zip_transform_iterator&
343
- operator+=(difference_type __n) noexcept(noexcept(::cuda::std::apply(__zip_op_pe{__n}, __current_)))
344
+ _CCCL_API constexpr zip_transform_iterator& operator+=(difference_type __n) noexcept(
345
+ noexcept(::cuda::std::apply(__zip_op_pe{__n}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
344
346
  {
345
- ::cuda::std::apply(__zip_op_pe{__n}, __current_);
347
+ ::cuda::std::apply(__zip_op_pe{__n}, __iters());
346
348
  return *this;
347
349
  }
348
350
 
@@ -362,9 +364,10 @@ public:
362
364
  //! @param __n The number of elements to decrement
363
365
  _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
364
366
  _CCCL_REQUIRES(_Constraints::__all_random_access)
365
- _CCCL_API constexpr zip_transform_iterator& operator-=(difference_type __n)
367
+ _CCCL_API constexpr zip_transform_iterator& operator-=(difference_type __n) noexcept(
368
+ noexcept(::cuda::std::apply(__zip_op_me{__n}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
366
369
  {
367
- ::cuda::std::apply(__zip_op_me{__n}, __current_);
370
+ ::cuda::std::apply(__zip_op_me{__n}, __iters());
368
371
  return *this;
369
372
  }
370
373
 
@@ -425,8 +428,8 @@ public:
425
428
  _CCCL_EXEC_CHECK_DISABLE
426
429
  template <size_t _Zero, size_t... _Indices>
427
430
  [[nodiscard]] _CCCL_API constexpr difference_type
428
- operator()(const __tuple_or_pair<_Iterators...>& __iters1,
429
- const __tuple_or_pair<_Iterators...>& __iters2,
431
+ operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
432
+ const ::cuda::std::tuple<_Iterators...>& __iters2,
430
433
  ::cuda::std::index_sequence<_Zero, _Indices...>) const //
431
434
  noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) - ::cuda::std::get<_Indices>(__iters2)) && ...)))
432
435
  {
@@ -449,15 +452,15 @@ public:
449
452
  _CCCL_API friend constexpr auto operator-(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
450
453
  _CCCL_TRAILING_REQUIRES(difference_type)(_Constraints::__all_sized_sentinel)
451
454
  {
452
- return __zip_apply(__zip_op_minus{}, __n.__current_, __y.__current_);
455
+ return __zip_apply(__zip_op_minus{}, __n.__iters(), __y.__iters());
453
456
  }
454
457
 
455
458
  struct __zip_op_eq
456
459
  {
457
460
  _CCCL_EXEC_CHECK_DISABLE
458
461
  template <size_t... _Indices>
459
- _CCCL_API constexpr bool operator()(const __tuple_or_pair<_Iterators...>& __iters1,
460
- const __tuple_or_pair<_Iterators...>& __iters2,
462
+ _CCCL_API constexpr bool operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
463
+ const ::cuda::std::tuple<_Iterators...>& __iters2,
461
464
  ::cuda::std::index_sequence<_Indices...>) const
462
465
  noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) == ::cuda::std::get<_Indices>(__iters2)) || ...)))
463
466
  {
@@ -472,11 +475,11 @@ public:
472
475
  {
473
476
  if constexpr (_Constraints::__all_bidirectional)
474
477
  {
475
- return __n.__current_ == __y.__current_;
478
+ return __n.__iters() == __y.__iters();
476
479
  }
477
480
  else
478
481
  {
479
- return __zip_apply(__zip_op_eq{}, __n.__current_, __y.__current_);
482
+ return __zip_apply(__zip_op_eq{}, __n.__iters(), __y.__iters());
480
483
  }
481
484
  _CCCL_UNREACHABLE();
482
485
  }
@@ -489,11 +492,11 @@ public:
489
492
  {
490
493
  if constexpr (_Constraints::__all_bidirectional)
491
494
  {
492
- return __n.__current_ != __y.__current_;
495
+ return __n.__iters() != __y.__iters();
493
496
  }
494
497
  else
495
498
  {
496
- return !__zip_apply(__zip_op_eq{}, __n.__current_, __y.__current_);
499
+ return !__zip_apply(__zip_op_eq{}, __n.__iters(), __y.__iters());
497
500
  }
498
501
  _CCCL_UNREACHABLE();
499
502
  }
@@ -505,7 +508,7 @@ public:
505
508
  _CCCL_API friend constexpr auto operator<=>(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
506
509
  _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_random_access&& _Constraints::__all_three_way_comparable)
507
510
  {
508
- return __n.__current_ <=> __y.__current_;
511
+ return __n.__iters() <=> __y.__iters();
509
512
  }
510
513
 
511
514
  #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
@@ -515,7 +518,7 @@ public:
515
518
  _CCCL_API friend constexpr auto operator<(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
516
519
  _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_random_access)
517
520
  {
518
- return __n.__current_ < __y.__current_;
521
+ return __n.__iters() < __y.__iters();
519
522
  }
520
523
 
521
524
  //! @brief Compares two @c zip_transform_iterator for greater than by comparing the tuple of stored iterators
@@ -548,10 +551,6 @@ template <class _Fn, class... _Iterators>
548
551
  _CCCL_HOST_DEVICE zip_transform_iterator(_Fn, ::cuda::std::tuple<_Iterators...>)
549
552
  -> zip_transform_iterator<_Fn, _Iterators...>;
550
553
 
551
- template <class _Fn, class _Iterator1, class _Iterator2>
552
- _CCCL_HOST_DEVICE zip_transform_iterator(_Fn, ::cuda::std::pair<_Iterator1, _Iterator2>)
553
- -> zip_transform_iterator<_Fn, _Iterator1, _Iterator2>;
554
-
555
554
  template <class _Fn, class... _Iterators>
556
555
  _CCCL_HOST_DEVICE zip_transform_iterator(_Fn, _Iterators...) -> zip_transform_iterator<_Fn, _Iterators...>;
557
556