cuda-cccl 0.3.1__cp311-cp311-manylinux_2_24_aarch64.whl → 0.3.2__cp311-cp311-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (185) hide show
  1. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  2. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  3. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  4. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  5. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  6. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  7. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1 -0
  8. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  9. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  10. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  11. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  12. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  13. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  14. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  15. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +12 -13
  16. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  17. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +2 -3
  18. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +4 -3
  19. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +1 -1
  20. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  21. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  22. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  23. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  24. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  25. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  26. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  27. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  28. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  29. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  30. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  31. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  32. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  33. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  34. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  35. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  36. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  37. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  38. cuda/cccl/headers/include/cuda/__device/arch_traits.h +239 -317
  39. cuda/cccl/headers/include/cuda/__device/attributes.h +4 -3
  40. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  41. cuda/cccl/headers/include/cuda/__device/device_ref.h +0 -10
  42. cuda/cccl/headers/include/cuda/__device/physical_device.h +1 -26
  43. cuda/cccl/headers/include/cuda/__event/event.h +26 -26
  44. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  45. cuda/cccl/headers/include/cuda/__event/timed_event.h +9 -7
  46. cuda/cccl/headers/include/cuda/__fwd/devices.h +4 -4
  47. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  48. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  49. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  50. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  51. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  52. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  53. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +2 -12
  54. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +15 -19
  55. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +59 -60
  56. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  57. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  58. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  59. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  60. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  61. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  62. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  63. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  64. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  65. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  66. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +5 -4
  67. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  68. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +17 -16
  69. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  70. cuda/cccl/headers/include/cuda/cmath +1 -0
  71. cuda/cccl/headers/include/cuda/devices +3 -0
  72. cuda/cccl/headers/include/cuda/memory +1 -0
  73. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  74. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  75. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  76. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  77. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  78. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  79. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  80. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  81. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  82. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  83. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  84. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  85. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  86. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  87. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  88. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  89. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  90. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  91. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  92. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  93. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  94. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  95. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  96. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  97. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  98. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  99. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  100. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  101. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  102. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  103. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  104. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  105. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  106. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  107. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  108. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  109. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  110. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  111. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  112. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  113. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  114. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  115. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  116. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  117. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  118. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  119. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  120. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  121. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  122. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  123. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  124. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  125. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  126. cuda/cccl/headers/include/cuda/std/string_view +146 -11
  127. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  128. cuda/cccl/headers/include/cuda/utility +1 -0
  129. cuda/cccl/headers/include/nv/target +7 -2
  130. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  131. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  132. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  133. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  134. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  135. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  136. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  137. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  138. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  139. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  140. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  141. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  142. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  143. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  144. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  145. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  146. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  147. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  148. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  149. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  150. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  151. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  152. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  153. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  154. cuda/compute/__init__.py +2 -0
  155. cuda/compute/_bindings.pyi +43 -1
  156. cuda/compute/_bindings_impl.pyx +156 -7
  157. cuda/compute/algorithms/_scan.py +108 -36
  158. cuda/compute/algorithms/_transform.py +32 -11
  159. cuda/compute/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  160. cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
  161. cuda/compute/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  162. cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
  163. cuda/compute/iterators/__init__.py +2 -0
  164. cuda/compute/iterators/_factories.py +28 -0
  165. cuda/compute/iterators/_iterators.py +206 -1
  166. cuda/compute/numba_utils.py +2 -2
  167. cuda/compute/typing.py +2 -0
  168. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  169. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +171 -175
  170. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  171. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  172. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  173. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  174. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  175. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  176. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  177. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  178. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  179. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  180. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  181. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  182. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  183. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  184. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  185. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -23,6 +23,7 @@
23
23
 
24
24
  #include <cuda/std/__iterator/concepts.h>
25
25
  #include <cuda/std/__iterator/iterator_traits.h>
26
+ #include <cuda/std/__ranges/compressed_movable_box.h>
26
27
  #include <cuda/std/__ranges/movable_box.h>
27
28
  #include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
28
29
  #include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
@@ -64,8 +65,28 @@ class constant_iterator
64
65
  private:
65
66
  static_assert(::cuda::std::__integer_like<_Index>, "The index type of cuda::constant_iterator must be integer-like!");
66
67
 
67
- ::cuda::std::ranges::__movable_box<_Tp> __value_{::cuda::std::in_place};
68
- _Index __index_ = 0;
68
+ // Not a base because then the friend operators would be ambiguous
69
+ ::cuda::std::__compressed_movable_box<_Index, _Tp> __store_;
70
+
71
+ [[nodiscard]] _CCCL_API constexpr _Index& __index() noexcept
72
+ {
73
+ return __store_.template __get<0>();
74
+ }
75
+
76
+ [[nodiscard]] _CCCL_API constexpr const _Index& __index() const noexcept
77
+ {
78
+ return __store_.template __get<0>();
79
+ }
80
+
81
+ [[nodiscard]] _CCCL_API constexpr _Tp& __value() noexcept
82
+ {
83
+ return __store_.template __get<1>();
84
+ }
85
+
86
+ [[nodiscard]] _CCCL_API constexpr const _Tp& __value() const noexcept
87
+ {
88
+ return __store_.template __get<1>();
89
+ }
69
90
 
70
91
  public:
71
92
  using iterator_concept = ::cuda::std::random_access_iterator_tag;
@@ -78,22 +99,17 @@ public:
78
99
  using reference = _Tp;
79
100
  using pointer = void;
80
101
 
81
- #if _CCCL_HAS_CONCEPTS()
82
- _CCCL_HIDE_FROM_ABI constant_iterator()
83
- requires ::cuda::std::default_initializable<_Tp>
84
- = default;
85
- #else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv
86
102
  _CCCL_TEMPLATE(class _Tp2 = _Tp)
87
103
  _CCCL_REQUIRES(::cuda::std::default_initializable<_Tp2>)
88
- _CCCL_API constexpr constant_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Tp2>) {}
89
- #endif // !_CCCL_HAS_CONCEPTS()
104
+ _CCCL_API constexpr constant_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Tp2>)
105
+ : __store_()
106
+ {}
90
107
 
91
108
  //! @brief Creates a @c constant_iterator from a value. The index is set to zero
92
109
  //! @param __value The value to store in the @c constant_iterator
93
110
  _CCCL_EXEC_CHECK_DISABLE
94
111
  _CCCL_API constexpr constant_iterator(_Tp __value) noexcept(::cuda::std::is_nothrow_move_constructible_v<_Tp>)
95
- : __value_(::cuda::std::in_place, ::cuda::std::move(__value))
96
- , __index_()
112
+ : __store_(0, ::cuda::std::move(__value))
97
113
  {}
98
114
 
99
115
  //! @brief Creates @c constant_iterator from a value and an index
@@ -104,32 +120,31 @@ public:
104
120
  _CCCL_REQUIRES(::cuda::std::__integer_like<_Index2>)
105
121
  _CCCL_API constexpr explicit constant_iterator(_Tp __value, _Index2 __index) noexcept(
106
122
  ::cuda::std::is_nothrow_move_constructible_v<_Tp>)
107
- : __value_(::cuda::std::in_place, ::cuda::std::move(__value))
108
- , __index_(static_cast<_Index>(__index))
123
+ : __store_(static_cast<_Index>(__index), ::cuda::std::move(__value))
109
124
  {}
110
125
 
111
126
  //! @brief Returns a the current index
112
127
  [[nodiscard]] _CCCL_API constexpr difference_type index() const noexcept
113
128
  {
114
- return static_cast<difference_type>(__index_);
129
+ return static_cast<difference_type>(__index());
115
130
  }
116
131
 
117
132
  //! @brief Returns a const reference to the stored value
118
133
  [[nodiscard]] _CCCL_API constexpr const _Tp& operator*() const noexcept
119
134
  {
120
- return *__value_;
135
+ return __value();
121
136
  }
122
137
 
123
138
  //! @brief Returns a const reference to the stored value
124
139
  [[nodiscard]] _CCCL_API constexpr const _Tp& operator[](difference_type) const noexcept
125
140
  {
126
- return *__value_;
141
+ return __value();
127
142
  }
128
143
 
129
144
  //! @brief Increments the stored index
130
145
  _CCCL_API constexpr constant_iterator& operator++() noexcept
131
146
  {
132
- ++__index_;
147
+ ++__index();
133
148
  return *this;
134
149
  }
135
150
 
@@ -147,9 +162,9 @@ public:
147
162
  {
148
163
  if constexpr (::cuda::std::is_signed_v<_Index>)
149
164
  {
150
- _CCCL_ASSERT(__index_ > 0, "The index must be greater than or equal to 0");
165
+ _CCCL_ASSERT(__index() > 0, "The index must be greater than or equal to 0");
151
166
  }
152
- --__index_;
167
+ --__index();
153
168
  return *this;
154
169
  }
155
170
 
@@ -168,9 +183,9 @@ public:
168
183
  {
169
184
  if constexpr (::cuda::std::is_signed_v<_Index>)
170
185
  {
171
- _CCCL_ASSERT(__index_ + __n >= 0, "The index must be greater than or equal to 0");
186
+ _CCCL_ASSERT(__index() + __n >= 0, "The index must be greater than or equal to 0");
172
187
  }
173
- __index_ += static_cast<_Index>(__n);
188
+ __index() += static_cast<_Index>(__n);
174
189
  return *this;
175
190
  }
176
191
 
@@ -200,9 +215,9 @@ public:
200
215
  {
201
216
  if constexpr (::cuda::std::is_signed_v<_Index>)
202
217
  {
203
- _CCCL_ASSERT(__index_ - __n >= 0, "The index must be greater than or equal to 0");
218
+ _CCCL_ASSERT(__index() - __n >= 0, "The index must be greater than or equal to 0");
204
219
  }
205
- __index_ -= static_cast<_Index>(__n);
220
+ __index() -= static_cast<_Index>(__n);
206
221
  return *this;
207
222
  }
208
223
 
@@ -220,14 +235,14 @@ public:
220
235
  [[nodiscard]] _CCCL_API friend constexpr difference_type
221
236
  operator-(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
222
237
  {
223
- return static_cast<difference_type>(__lhs.__index_) - static_cast<difference_type>(__rhs.__index_);
238
+ return static_cast<difference_type>(__lhs.__index()) - static_cast<difference_type>(__rhs.__index());
224
239
  }
225
240
 
226
241
  //! @brief Compares two @c constant_iterator for equality by comparing the index in the sequence
227
242
  [[nodiscard]] _CCCL_API friend constexpr bool
228
243
  operator==(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
229
244
  {
230
- return __lhs.__index_ == __rhs.__index_;
245
+ return __lhs.__index() == __rhs.__index();
231
246
  }
232
247
 
233
248
  #if _CCCL_STD_VER <= 2017
@@ -235,7 +250,7 @@ public:
235
250
  [[nodiscard]] _CCCL_API friend constexpr bool
236
251
  operator!=(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
237
252
  {
238
- return __lhs.__index_ != __rhs.__index_;
253
+ return __lhs.__index() != __rhs.__index();
239
254
  }
240
255
  #endif // _CCCL_STD_VER <= 2017
241
256
 
@@ -244,32 +259,32 @@ public:
244
259
  [[nodiscard]] _CCCL_API friend constexpr auto
245
260
  operator<=>(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
246
261
  {
247
- return __lhs.__index_ <=> __rhs.__index_;
262
+ return __lhs.__index() <=> __rhs.__index();
248
263
  }
249
264
  #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
250
265
  //! @brief Compares two @c constant_iterator for less than by comparing the index in the sequence
251
266
  [[nodiscard]] _CCCL_API friend constexpr bool
252
267
  operator<(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
253
268
  {
254
- return __lhs.__index_ < __rhs.__index_;
269
+ return __lhs.__index() < __rhs.__index();
255
270
  }
256
271
  //! @brief Compares two @c constant_iterator for less equal by comparing the index in the sequence
257
272
  [[nodiscard]] _CCCL_API friend constexpr bool
258
273
  operator<=(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
259
274
  {
260
- return __lhs.__index_ <= __rhs.__index_;
275
+ return __lhs.__index() <= __rhs.__index();
261
276
  }
262
277
  //! @brief Compares two @c constant_iterator for greater than by comparing the index in the sequence
263
278
  [[nodiscard]] _CCCL_API friend constexpr bool
264
279
  operator>(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
265
280
  {
266
- return __lhs.__index_ > __rhs.__index_;
281
+ return __lhs.__index() > __rhs.__index();
267
282
  }
268
283
  //! @brief Compares two @c constant_iterator for greater equal by comparing the index in the sequence
269
284
  [[nodiscard]] _CCCL_API friend constexpr bool
270
285
  operator>=(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
271
286
  {
272
- return __lhs.__index_ >= __rhs.__index_;
287
+ return __lhs.__index() >= __rhs.__index();
273
288
  }
274
289
  #endif // !_LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR()
275
290
  };
@@ -28,6 +28,7 @@
28
28
  #include <cuda/std/__concepts/totally_ordered.h>
29
29
  #include <cuda/std/__iterator/iterator_traits.h>
30
30
  #include <cuda/std/__mdspan/submdspan_helper.h>
31
+ #include <cuda/std/__ranges/compressed_movable_box.h>
31
32
  #include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
32
33
  #include <cuda/std/__type_traits/is_nothrow_default_constructible.h>
33
34
  #include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
@@ -57,12 +58,32 @@ private:
57
58
  static_assert(::cuda::std::__integer_like<_Stride> || ::cuda::std::__integral_constant_like<_Stride>,
58
59
  "The stride of a strided_iterator must either be an integer-like or integral-constant-like.");
59
60
 
60
- _Iter __iter_{};
61
- _Stride __stride_{};
62
-
63
61
  template <class, class>
64
62
  friend class strided_iterator;
65
63
 
64
+ // Not a base because then the friend operators would be ambiguous
65
+ ::cuda::std::__compressed_movable_box<_Iter, _Stride> __store_;
66
+
67
+ [[nodiscard]] _CCCL_API constexpr _Iter& __iter() noexcept
68
+ {
69
+ return __store_.template __get<0>();
70
+ }
71
+
72
+ [[nodiscard]] _CCCL_API constexpr const _Iter& __iter() const noexcept
73
+ {
74
+ return __store_.template __get<0>();
75
+ }
76
+
77
+ [[nodiscard]] _CCCL_API constexpr _Stride& __stride() noexcept
78
+ {
79
+ return __store_.template __get<1>();
80
+ }
81
+
82
+ [[nodiscard]] _CCCL_API constexpr const _Stride& __stride() const noexcept
83
+ {
84
+ return __store_.template __get<1>();
85
+ }
86
+
66
87
  public:
67
88
  using iterator_concept = ::cuda::std::random_access_iterator_tag;
68
89
  using iterator_category = ::cuda::std::random_access_iterator_tag;
@@ -78,7 +99,12 @@ public:
78
99
  //! @note _Iter must be default initializable because it is a random_access_iterator and thereby semiregular
79
100
  //! _Stride must be integer-like or integral_constant_like which requires default constructability
80
101
  _CCCL_EXEC_CHECK_DISABLE
81
- _CCCL_HIDE_FROM_ABI strided_iterator() = default;
102
+ _CCCL_TEMPLATE(class _Iter2 = _Iter, class _Stride2 = _Stride)
103
+ _CCCL_REQUIRES(::cuda::std::default_initializable<_Iter2> _CCCL_AND ::cuda::std::default_initializable<_Stride2>)
104
+ _CCCL_API constexpr strided_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Iter2>
105
+ && ::cuda::std::is_nothrow_default_constructible_v<_Stride2>)
106
+ : __store_()
107
+ {}
82
108
 
83
109
  //! @brief Constructs a @c strided_iterator from a base iterator
84
110
  //! @param __iter The base iterator
@@ -90,8 +116,7 @@ public:
90
116
  _CCCL_REQUIRES(::cuda::std::__integral_constant_like<_Stride2>)
91
117
  _CCCL_API constexpr explicit strided_iterator(_Iter __iter) noexcept(
92
118
  ::cuda::std::is_nothrow_move_constructible_v<_Iter> && ::cuda::std::is_nothrow_default_constructible_v<_Stride2>)
93
- : __iter_(::cuda::std::move(__iter))
94
- , __stride_()
119
+ : __store_(::cuda::std::move(__iter))
95
120
  {}
96
121
 
97
122
  //! @brief Constructs a @c strided_iterator from a base iterator and a stride
@@ -100,21 +125,20 @@ public:
100
125
  _CCCL_EXEC_CHECK_DISABLE
101
126
  _CCCL_API constexpr explicit strided_iterator(_Iter __iter, _Stride __stride) noexcept(
102
127
  ::cuda::std::is_nothrow_move_constructible_v<_Iter> && ::cuda::std::is_nothrow_move_constructible_v<_Stride>)
103
- : __iter_(::cuda::std::move(__iter))
104
- , __stride_(::cuda::std::move(__stride))
128
+ : __store_(::cuda::std::move(__iter), ::cuda::std::move(__stride))
105
129
  {}
106
130
 
107
131
  //! @brief Returns a const reference to the stored iterator
108
132
  [[nodiscard]] _CCCL_API constexpr const _Iter& base() const& noexcept
109
133
  {
110
- return __iter_;
134
+ return __iter();
111
135
  }
112
136
 
113
137
  //! @brief Extracts the stored iterator
114
138
  _CCCL_EXEC_CHECK_DISABLE
115
139
  [[nodiscard]] _CCCL_API constexpr _Iter base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<_Iter>)
116
140
  {
117
- return ::cuda::std::move(__iter_);
141
+ return ::cuda::std::move(__iter());
118
142
  }
119
143
 
120
144
  static constexpr bool __noexcept_stride =
@@ -124,32 +148,33 @@ public:
124
148
  _CCCL_EXEC_CHECK_DISABLE
125
149
  [[nodiscard]] _CCCL_API constexpr difference_type stride() const noexcept(__noexcept_stride)
126
150
  {
127
- return static_cast<difference_type>(::cuda::std::__de_ice(__stride_));
151
+ return static_cast<difference_type>(::cuda::std::__de_ice(__stride()));
128
152
  }
129
153
 
130
154
  //! @brief Dereferences the stored base iterator
131
155
  _CCCL_EXEC_CHECK_DISABLE
132
- [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() noexcept(noexcept(*__iter_))
156
+ [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() noexcept(noexcept(*::cuda::std::declval<_Iter&>()))
133
157
  {
134
- return *__iter_;
158
+ return *__iter();
135
159
  }
136
160
 
137
161
  //! @brief Dereferences the stored base iterator
138
162
  _CCCL_EXEC_CHECK_DISABLE
139
163
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
140
164
  _CCCL_REQUIRES(::cuda::std::__dereferenceable<const _Iter2>)
141
- [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() const noexcept(noexcept(*__iter_))
165
+ [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() const
166
+ noexcept(noexcept(*::cuda::std::declval<const _Iter2&>()))
142
167
  {
143
- return *__iter_;
168
+ return *__iter();
144
169
  }
145
170
 
146
171
  //! @brief Subscripts the stored base iterator with a given offset times the stride
147
172
  //! @param __n The offset
148
173
  _CCCL_EXEC_CHECK_DISABLE
149
174
  [[nodiscard]] _CCCL_API constexpr decltype(auto)
150
- operator[](difference_type __n) noexcept(__noexcept_stride && noexcept(__iter_[__n]))
175
+ operator[](difference_type __n) noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>()[__n]))
151
176
  {
152
- return __iter_[__n * stride()];
177
+ return __iter()[__n * stride()];
153
178
  }
154
179
 
155
180
  //! @brief Subscripts the stored base iterator with a given offset times the stride
@@ -158,47 +183,49 @@ public:
158
183
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
159
184
  _CCCL_REQUIRES(::cuda::std::__dereferenceable<const _Iter2>)
160
185
  [[nodiscard]] _CCCL_API constexpr decltype(auto) operator[](difference_type __n) const
161
- noexcept(__noexcept_stride && noexcept(__iter_[__n]))
186
+ noexcept(__noexcept_stride && noexcept(::cuda::std::declval<const _Iter2&>()[__n]))
162
187
  {
163
- return __iter_[__n * stride()];
188
+ return __iter()[__n * stride()];
164
189
  }
165
190
 
166
191
  //! @brief Increments the stored base iterator by the stride
167
- // Note: we cannot use __iter_ += stride() in the noexcept clause because that breaks gcc < 9
192
+ // Note: we cannot use __iter() += stride() in the noexcept clause because that breaks gcc < 9
168
193
  _CCCL_EXEC_CHECK_DISABLE
169
- _CCCL_API constexpr strided_iterator& operator++() noexcept(__noexcept_stride && noexcept(__iter_ += 1))
194
+ _CCCL_API constexpr strided_iterator&
195
+ operator++() noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() += 1))
170
196
  {
171
- __iter_ += stride();
197
+ __iter() += stride();
172
198
  return *this;
173
199
  }
174
200
 
175
201
  //! @brief Increments the stored base iterator by the stride
176
202
  _CCCL_EXEC_CHECK_DISABLE
177
203
  _CCCL_API constexpr auto operator++(int) noexcept(
178
- noexcept(__noexcept_stride && noexcept(__iter_ += 1))
204
+ noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() += 1))
179
205
  && ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && ::cuda::std::is_nothrow_copy_constructible_v<_Stride>)
180
206
  {
181
207
  auto __tmp = *this;
182
- __iter_ += stride();
208
+ __iter() += stride();
183
209
  return __tmp;
184
210
  }
185
211
 
186
212
  //! @brief Decrements the stored base iterator by the stride
187
213
  _CCCL_EXEC_CHECK_DISABLE
188
- _CCCL_API constexpr strided_iterator& operator--() noexcept(__noexcept_stride && noexcept(__iter_ -= 1))
214
+ _CCCL_API constexpr strided_iterator&
215
+ operator--() noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() -= 1))
189
216
  {
190
- __iter_ -= stride();
217
+ __iter() -= stride();
191
218
  return *this;
192
219
  }
193
220
 
194
221
  //! @brief Decrements the stored base iterator by the stride
195
222
  _CCCL_EXEC_CHECK_DISABLE
196
223
  _CCCL_API constexpr strided_iterator operator--(int) noexcept(
197
- noexcept(__noexcept_stride && noexcept(__iter_ -= 1))
224
+ noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() -= 1))
198
225
  && ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && ::cuda::std::is_nothrow_copy_constructible_v<_Stride>)
199
226
  {
200
227
  auto __tmp = *this;
201
- __iter_ -= stride();
228
+ __iter() -= stride();
202
229
  return __tmp;
203
230
  }
204
231
 
@@ -207,9 +234,9 @@ public:
207
234
  //! @note Increments the base iterator by @c __n times the stride
208
235
  _CCCL_EXEC_CHECK_DISABLE
209
236
  _CCCL_API constexpr strided_iterator&
210
- operator+=(difference_type __n) noexcept(__noexcept_stride && noexcept(__iter_ += 1))
237
+ operator+=(difference_type __n) noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() += 1))
211
238
  {
212
- __iter_ += stride() * __n;
239
+ __iter() += stride() * __n;
213
240
  return *this;
214
241
  }
215
242
 
@@ -218,10 +245,10 @@ public:
218
245
  //! @param __n The number of steps to increment
219
246
  _CCCL_EXEC_CHECK_DISABLE
220
247
  [[nodiscard]] _CCCL_API friend constexpr strided_iterator
221
- operator+(strided_iterator __iter, difference_type __n) noexcept(noexcept(__iter_ += __n))
248
+ operator+(const strided_iterator& __iter, difference_type __n) noexcept(
249
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(::cuda::std::declval<const _Iter&>() + __n))
222
250
  {
223
- __iter += __n;
224
- return __iter;
251
+ return strided_iterator{__iter.__iter() + __iter.stride() * __n, __iter.__stride()};
225
252
  }
226
253
 
227
254
  //! @brief Returns a copy of a @c strided_iterator incremented by a given number of steps
@@ -229,9 +256,10 @@ public:
229
256
  //! @param __iter The @c strided_iterator to advance
230
257
  _CCCL_EXEC_CHECK_DISABLE
231
258
  [[nodiscard]] _CCCL_API friend constexpr strided_iterator
232
- operator+(difference_type __n, strided_iterator __iter) noexcept(noexcept(__iter_ + __n))
259
+ operator+(difference_type __n, const strided_iterator& __iter) noexcept(
260
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(::cuda::std::declval<const _Iter&>() + __n))
233
261
  {
234
- return __iter + __n;
262
+ return strided_iterator{__iter.__iter() + __iter.stride() * __n, __iter.__stride()};
235
263
  }
236
264
 
237
265
  //! @brief Decrements a @c strided_iterator by a given number of steps
@@ -239,9 +267,9 @@ public:
239
267
  //! @note Decrements the base iterator by @c __n times the stride
240
268
  _CCCL_EXEC_CHECK_DISABLE
241
269
  _CCCL_API constexpr strided_iterator&
242
- operator-=(difference_type __n) noexcept(__noexcept_stride && noexcept(__iter_ -= 1))
270
+ operator-=(difference_type __n) noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() -= 1))
243
271
  {
244
- __iter_ -= stride() * __n;
272
+ __iter() -= stride() * __n;
245
273
  return *this;
246
274
  }
247
275
 
@@ -250,21 +278,25 @@ public:
250
278
  //! @param __iter The @c strided_iterator to decrement
251
279
  _CCCL_EXEC_CHECK_DISABLE
252
280
  [[nodiscard]] _CCCL_API friend constexpr strided_iterator
253
- operator-(strided_iterator __iter, difference_type __n) noexcept(noexcept(__iter_ -= __n))
281
+ operator-(const strided_iterator& __iter, difference_type __n) noexcept(
282
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(::cuda::std::declval<const _Iter&>() - __n))
254
283
  {
255
- __iter -= __n;
256
- return __iter;
284
+ return strided_iterator{__iter.__iter() - __iter.stride() * __n, __iter.__stride()};
257
285
  }
258
286
 
287
+ template <class _Iter2, class _OtherIter>
288
+ static constexpr bool __noexcept_difference =
289
+ noexcept(::cuda::std::declval<const _Iter2&>() - ::cuda::std::declval<const _OtherIter&>());
290
+
259
291
  //! @brief Returns distance between two @c strided_iterator's in units of the stride
260
292
  _CCCL_EXEC_CHECK_DISABLE
261
293
  _CCCL_TEMPLATE(class _OtherIter, class _OtherStride)
262
294
  _CCCL_REQUIRES(::cuda::std::sized_sentinel_for<_OtherIter, _Iter>)
263
295
  [[nodiscard]] _CCCL_API friend constexpr difference_type
264
- operator-(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
265
- noexcept(::cuda::std::declval<_Iter>() - ::cuda::std::declval<_OtherIter>()))
296
+ operator-(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) //
297
+ noexcept(__noexcept_difference<_Iter, _OtherIter>)
266
298
  {
267
- const difference_type __diff = __x.__iter_ - __y.base();
299
+ const difference_type __diff = __x.__iter() - __y.base();
268
300
  _CCCL_ASSERT(__x.stride() == __y.stride(), "Taking the difference of two strided_iterators with different stride");
269
301
  _CCCL_ASSERT(__diff % __x.stride() == 0, "Underlying iterator difference must be divisible by the stride");
270
302
  return __diff / __x.stride();
@@ -278,7 +310,7 @@ public:
278
310
  operator==(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
279
311
  noexcept(::cuda::std::declval<const _Iter&>() == ::cuda::std::declval<const _OtherIter&>()))
280
312
  {
281
- return __x.__iter_ == __y.base();
313
+ return __x.__iter() == __y.base();
282
314
  }
283
315
 
284
316
  #if _CCCL_STD_VER <= 2017
@@ -290,7 +322,7 @@ public:
290
322
  operator!=(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
291
323
  noexcept(::cuda::std::declval<const _Iter&>() == ::cuda::std::declval<const _OtherIter&>()))
292
324
  {
293
- return __x.__iter_ != __y.base();
325
+ return __x.__iter() != __y.base();
294
326
  }
295
327
  #endif // _CCCL_STD_VER <= 2017
296
328
 
@@ -304,7 +336,7 @@ public:
304
336
  operator<=>(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
305
337
  noexcept(::cuda::std::declval<const _Iter&>() <=> ::cuda::std::declval<const _OtherIter&>()))
306
338
  {
307
- return __x.__iter_ <=> __y.base();
339
+ return __x.__iter() <=> __y.base();
308
340
  }
309
341
  #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
310
342
 
@@ -316,7 +348,7 @@ public:
316
348
  operator<(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
317
349
  noexcept(::cuda::std::declval<const _Iter&>() < ::cuda::std::declval<const _OtherIter&>()))
318
350
  {
319
- return __x.__iter_ < __y.base();
351
+ return __x.__iter() < __y.base();
320
352
  }
321
353
 
322
354
  //! @brief Compares two @c strided_iterator's for greater than by comparing the stored iterators