cuda-cccl 0.3.0__cp311-cp311-manylinux_2_24_aarch64.whl → 0.3.2__cp311-cp311-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  7. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  8. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  14. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  15. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  18. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  20. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  21. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  22. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  23. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  24. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  25. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  26. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
  27. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  28. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  29. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  30. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  31. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  32. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  33. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  34. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  35. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  36. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
  37. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  38. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
  39. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  40. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
  41. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
  42. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  43. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
  44. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  46. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  47. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  48. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  49. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
  50. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  51. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  52. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  53. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  54. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  55. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  56. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  57. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  58. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  59. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  60. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  61. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  62. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  63. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  64. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
  65. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  66. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  67. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  68. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  69. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  70. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  71. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  72. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  73. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  74. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  75. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  76. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  77. cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
  78. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  79. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  80. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  81. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  82. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  83. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  84. cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
  85. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  86. cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
  87. cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
  88. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  89. cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
  90. cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
  91. cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
  92. cuda/cccl/headers/include/cuda/__event/event.h +27 -26
  93. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  94. cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
  95. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  96. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  97. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  98. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  99. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  100. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  101. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  102. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  103. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  104. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
  105. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  106. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  107. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  108. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  109. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  110. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  111. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  112. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  113. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  114. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  115. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  116. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
  117. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  118. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
  119. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  120. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  121. cuda/cccl/headers/include/cuda/algorithm +1 -1
  122. cuda/cccl/headers/include/cuda/cmath +1 -0
  123. cuda/cccl/headers/include/cuda/devices +13 -0
  124. cuda/cccl/headers/include/cuda/iterator +1 -0
  125. cuda/cccl/headers/include/cuda/memory +1 -0
  126. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  127. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  128. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  129. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  130. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  131. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  132. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  133. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  134. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  135. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  136. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  137. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  138. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  139. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  140. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  141. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  142. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  143. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  144. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  145. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  146. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  147. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  148. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  149. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  150. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  151. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  152. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  153. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  154. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  155. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  156. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  157. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  158. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  159. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  160. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  161. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  162. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  163. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  164. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  165. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  166. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  167. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  168. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  169. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  170. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  171. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  172. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  173. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  174. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  175. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  176. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  177. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  178. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  179. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  180. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  181. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  182. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  183. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  184. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  185. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  186. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  187. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  188. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  189. cuda/cccl/headers/include/cuda/std/string_view +155 -13
  190. cuda/cccl/headers/include/cuda/std/version +1 -4
  191. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  192. cuda/cccl/headers/include/cuda/utility +1 -0
  193. cuda/cccl/headers/include/nv/target +7 -2
  194. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  195. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  196. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  197. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  198. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  199. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  200. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  201. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  202. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  203. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  204. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  205. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  206. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  207. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  208. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  209. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  210. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  211. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  212. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  213. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  214. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  215. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  216. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  217. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  218. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  219. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  220. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  221. cuda/cccl/parallel/experimental/__init__.py +21 -74
  222. cuda/compute/__init__.py +79 -0
  223. cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
  224. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
  225. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  226. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  227. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  228. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
  229. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
  230. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  231. cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
  232. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
  233. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  234. cuda/compute/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  235. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  236. cuda/compute/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  237. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  238. cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
  239. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
  240. cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
  241. cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
  242. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  243. cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
  244. cuda/coop/__init__.py +8 -0
  245. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  246. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  247. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  248. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  249. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  250. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  251. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  252. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  253. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  254. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  255. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  256. cuda/coop/warp/__init__.py +9 -0
  257. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  258. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  259. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  260. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  261. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
  262. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  263. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  264. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  265. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  266. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  267. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  268. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  269. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  270. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  271. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  272. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  273. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  274. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  275. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  276. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  277. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  278. cuda/cccl/parallel/experimental/.gitignore +0 -4
  279. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  280. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  281. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  282. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  283. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  284. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  285. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  286. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  287. /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
  288. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  289. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  290. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  291. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  292. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  293. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  294. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -30,6 +30,7 @@
30
30
  #include <cuda/std/__functional/invoke.h>
31
31
  #include <cuda/std/__iterator/concepts.h>
32
32
  #include <cuda/std/__iterator/iterator_traits.h>
33
+ #include <cuda/std/__ranges/compressed_movable_box.h>
33
34
  #include <cuda/std/__ranges/concepts.h>
34
35
  #include <cuda/std/__ranges/movable_box.h>
35
36
  #include <cuda/std/__type_traits/conditional.h>
@@ -41,6 +42,7 @@
41
42
  #include <cuda/std/__type_traits/is_object.h>
42
43
  #include <cuda/std/__type_traits/is_reference.h>
43
44
  #include <cuda/std/__type_traits/remove_cvref.h>
45
+ #include <cuda/std/__utility/declval.h>
44
46
  #include <cuda/std/__utility/forward.h>
45
47
  #include <cuda/std/__utility/move.h>
46
48
 
@@ -153,11 +155,40 @@ public:
153
155
  template <class _InputFn, class _OutputFn, class _Iter>
154
156
  class transform_input_output_iterator
155
157
  {
156
- public:
157
- _Iter __current_{};
158
- ::cuda::std::ranges::__movable_box<_InputFn> __input_func_{};
159
- ::cuda::std::ranges::__movable_box<_OutputFn> __output_func_{};
158
+ // Not a base because then the friend operators would be ambiguous
159
+ ::cuda::std::__compressed_movable_box<_Iter, _InputFn, _OutputFn> __store_;
160
+
161
+ [[nodiscard]] _CCCL_API constexpr _Iter& __iter() noexcept
162
+ {
163
+ return __store_.template __get<0>();
164
+ }
165
+
166
+ [[nodiscard]] _CCCL_API constexpr const _Iter& __iter() const noexcept
167
+ {
168
+ return __store_.template __get<0>();
169
+ }
170
+
171
+ [[nodiscard]] _CCCL_API constexpr _InputFn& __input_func() noexcept
172
+ {
173
+ return __store_.template __get<1>();
174
+ }
175
+
176
+ [[nodiscard]] _CCCL_API constexpr const _InputFn& __input_func() const noexcept
177
+ {
178
+ return __store_.template __get<1>();
179
+ }
180
+
181
+ [[nodiscard]] _CCCL_API constexpr _OutputFn& __output_func() noexcept
182
+ {
183
+ return __store_.template __get<2>();
184
+ }
185
+
186
+ [[nodiscard]] _CCCL_API constexpr const _OutputFn& __output_func() const noexcept
187
+ {
188
+ return __store_.template __get<2>();
189
+ }
160
190
 
191
+ public:
161
192
  using iterator_concept = ::cuda::std::conditional_t<
162
193
  ::cuda::std::__has_random_access_traversal<_Iter>,
163
194
  ::cuda::std::random_access_iterator_tag,
@@ -183,13 +214,6 @@ public:
183
214
  "dereferencing _Iter");
184
215
 
185
216
  //! @brief Default constructs a @c transform_input_output_iterator with a value initialized iterator and functors
186
- #if _CCCL_HAS_CONCEPTS()
187
- _CCCL_EXEC_CHECK_DISABLE
188
- _CCCL_HIDE_FROM_ABI transform_input_output_iterator()
189
- requires ::cuda::std::default_initializable<_Iter> && ::cuda::std::default_initializable<_InputFn>
190
- && ::cuda::std::default_initializable<_OutputFn>
191
- = default;
192
- #else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv
193
217
  _CCCL_EXEC_CHECK_DISABLE
194
218
  _CCCL_TEMPLATE(class _Iter2 = _Iter, class _InputFn2 = _InputFn, class _OutputFn2 = _OutputFn)
195
219
  _CCCL_REQUIRES(::cuda::std::default_initializable<_Iter2> _CCCL_AND ::cuda::std::default_initializable<_InputFn2>
@@ -198,8 +222,8 @@ public:
198
222
  ::cuda::std::is_nothrow_default_constructible_v<_Iter2>
199
223
  && ::cuda::std::is_nothrow_default_constructible_v<_InputFn2>
200
224
  && ::cuda::std::is_nothrow_default_constructible_v<_OutputFn2>)
225
+ : __store_()
201
226
  {}
202
- #endif // ^^^ !_CCCL_HAS_CONCEPTS() ^^^
203
227
 
204
228
  //! @brief Constructs a @c transform_input_output_iterator with base iterator, input functor and output functor
205
229
  //! @param __iter The iterator to transform
@@ -210,22 +234,20 @@ public:
210
234
  noexcept(::cuda::std::is_nothrow_move_constructible_v<_Iter>
211
235
  && ::cuda::std::is_nothrow_move_constructible_v<_InputFn>
212
236
  && ::cuda::std::is_nothrow_move_constructible_v<_OutputFn>)
213
- : __current_(::cuda::std::move(__iter))
214
- , __input_func_(::cuda::std::in_place, ::cuda::std::move(__input_func))
215
- , __output_func_(::cuda::std::in_place, ::cuda::std::move(__output_func))
237
+ : __store_(::cuda::std::move(__iter), ::cuda::std::move(__input_func), ::cuda::std::move(__output_func))
216
238
  {}
217
239
 
218
240
  //! @brief Returns a const reference to the base iterator stored
219
241
  [[nodiscard]] _CCCL_API constexpr const _Iter& base() const& noexcept
220
242
  {
221
- return __current_;
243
+ return __iter();
222
244
  }
223
245
 
224
246
  //! @brief Extracts the stored base iterator
225
247
  _CCCL_EXEC_CHECK_DISABLE
226
248
  [[nodiscard]] _CCCL_API constexpr _Iter base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<_Iter>)
227
249
  {
228
- return ::cuda::std::move(__current_);
250
+ return ::cuda::std::move(__iter());
229
251
  }
230
252
 
231
253
  //! @brief Dereferences the @c transform_input_output_iterator. Returns a proxy that transforms values read from the
@@ -235,7 +257,7 @@ public:
235
257
  noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter>)
236
258
  {
237
259
  return __transform_input_output_proxy{
238
- __current_, const_cast<_InputFn&>(*__input_func_), const_cast<_OutputFn&>(*__output_func_)};
260
+ __iter(), const_cast<_InputFn&>(__input_func()), const_cast<_OutputFn&>(__output_func())};
239
261
  }
240
262
 
241
263
  //! @brief Dereferences the @c transform_input_output_iterator. Returns a proxy that transforms values read from the
@@ -243,7 +265,7 @@ public:
243
265
  _CCCL_EXEC_CHECK_DISABLE
244
266
  [[nodiscard]] _CCCL_API constexpr reference operator*() noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter>)
245
267
  {
246
- return __transform_input_output_proxy{__current_, *__input_func_, *__output_func_};
268
+ return __transform_input_output_proxy{__iter(), __input_func(), __output_func()};
247
269
  }
248
270
 
249
271
  //! @brief Subscripts the @c transform_input_output_iterator. Returns a proxy that transforms values read from the
@@ -254,10 +276,11 @@ public:
254
276
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
255
277
  _CCCL_REQUIRES(::cuda::std::__has_random_access_traversal<_Iter2>)
256
278
  [[nodiscard]] _CCCL_API constexpr reference operator[](difference_type __n) const
257
- noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2> && noexcept(__current_ + __n))
279
+ noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2>
280
+ && noexcept(::cuda::std::declval<const _Iter2&>() + __n))
258
281
  {
259
282
  return __transform_input_output_proxy{
260
- __current_ + __n, const_cast<_InputFn&>(*__input_func_), const_cast<_OutputFn&>(*__output_func_)};
283
+ __iter() + __n, const_cast<_InputFn&>(__input_func()), const_cast<_OutputFn&>(__output_func())};
261
284
  }
262
285
 
263
286
  //! @brief Subscripts the @c transform_input_output_iterator. Returns a proxy that transforms values read from the
@@ -268,23 +291,23 @@ public:
268
291
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
269
292
  _CCCL_REQUIRES(::cuda::std::__has_random_access_traversal<_Iter2>)
270
293
  [[nodiscard]] _CCCL_API constexpr reference operator[](difference_type __n) noexcept(
271
- ::cuda::std::is_nothrow_copy_constructible_v<_Iter2> && noexcept(__current_ + __n))
294
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter2> && noexcept(::cuda::std::declval<const _Iter2&>() + __n))
272
295
  {
273
- return __transform_input_output_proxy{__current_ + __n, *__input_func_, *__output_func_};
296
+ return __transform_input_output_proxy{__iter() + __n, __input_func(), __output_func()};
274
297
  }
275
298
 
276
299
  //! @brief Increments the stored iterator
277
300
  _CCCL_EXEC_CHECK_DISABLE
278
- _CCCL_API constexpr transform_input_output_iterator& operator++() noexcept(noexcept(++__current_))
301
+ _CCCL_API constexpr transform_input_output_iterator& operator++() noexcept(noexcept(++::cuda::std::declval<_Iter&>()))
279
302
  {
280
- ++__current_;
303
+ ++__iter();
281
304
  return *this;
282
305
  }
283
306
 
284
307
  //! @brief Increments the stored iterator
285
308
  _CCCL_EXEC_CHECK_DISABLE
286
309
  _CCCL_API constexpr transform_input_output_iterator operator++(int) noexcept(
287
- noexcept(++__current_)
310
+ noexcept(++::cuda::std::declval<_Iter&>())
288
311
  && ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && ::cuda::std::is_nothrow_copy_constructible_v<_InputFn>
289
312
  && ::cuda::std::is_nothrow_copy_constructible_v<_OutputFn>)
290
313
  {
@@ -297,9 +320,9 @@ public:
297
320
  _CCCL_EXEC_CHECK_DISABLE
298
321
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
299
322
  _CCCL_REQUIRES(::cuda::std::__has_bidirectional_traversal<_Iter2>)
300
- _CCCL_API constexpr transform_input_output_iterator& operator--() noexcept(noexcept(--__current_))
323
+ _CCCL_API constexpr transform_input_output_iterator& operator--() noexcept(noexcept(--::cuda::std::declval<_Iter2&>()))
301
324
  {
302
- --__current_;
325
+ --__iter();
303
326
  return *this;
304
327
  }
305
328
 
@@ -307,8 +330,8 @@ public:
307
330
  _CCCL_EXEC_CHECK_DISABLE
308
331
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
309
332
  _CCCL_REQUIRES(::cuda::std::__has_bidirectional_traversal<_Iter2>)
310
- _CCCL_API constexpr transform_input_output_iterator
311
- operator--(int) noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(--__current_))
333
+ _CCCL_API constexpr transform_input_output_iterator operator--(int) noexcept(
334
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(--::cuda::std::declval<_Iter2&>()))
312
335
  {
313
336
  auto __tmp = *this;
314
337
  --*this;
@@ -321,9 +344,9 @@ public:
321
344
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
322
345
  _CCCL_REQUIRES(::cuda::std::__has_random_access_traversal<_Iter2>)
323
346
  _CCCL_API constexpr transform_input_output_iterator&
324
- operator+=(difference_type __n) noexcept(noexcept(__current_ += __n))
347
+ operator+=(difference_type __n) noexcept(noexcept(::cuda::std::declval<_Iter2&>() += __n))
325
348
  {
326
- __current_ += __n;
349
+ __iter() += __n;
327
350
  return *this;
328
351
  }
329
352
 
@@ -338,7 +361,7 @@ public:
338
361
  && noexcept(::cuda::std::declval<const _Iter2&>() + difference_type{}))
339
362
  _CCCL_TRAILING_REQUIRES(transform_input_output_iterator)(::cuda::std::__has_random_access_traversal<_Iter2>)
340
363
  {
341
- return transform_input_output_iterator{__iter.__current_ + __n, *__iter.__input_func_, *__iter.__output_func_};
364
+ return transform_input_output_iterator{__iter.__iter() + __n, __iter.__input_func(), __iter.__output_func()};
342
365
  }
343
366
 
344
367
  //! @brief Returns a copy of a @c transform_input_output_iterator advanced by a given number of elements
@@ -352,7 +375,7 @@ public:
352
375
  && noexcept(::cuda::std::declval<const _Iter2&>() + difference_type{}))
353
376
  _CCCL_TRAILING_REQUIRES(transform_input_output_iterator)(::cuda::std::__has_random_access_traversal<_Iter2>)
354
377
  {
355
- return transform_input_output_iterator{__iter.__current_ + __n, *__iter.__input_func_, *__iter.__output_func_};
378
+ return transform_input_output_iterator{__iter.__iter() + __n, __iter.__input_func(), __iter.__output_func()};
356
379
  }
357
380
 
358
381
  //! @brief Decrements the @c transform_input_output_iterator by a given number of elements
@@ -361,9 +384,9 @@ public:
361
384
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
362
385
  _CCCL_REQUIRES(::cuda::std::__has_random_access_traversal<_Iter2>)
363
386
  _CCCL_API constexpr transform_input_output_iterator&
364
- operator-=(difference_type __n) noexcept(noexcept(__current_ -= __n))
387
+ operator-=(difference_type __n) noexcept(noexcept(::cuda::std::declval<_Iter2&>() -= __n))
365
388
  {
366
- __current_ -= __n;
389
+ __iter() -= __n;
367
390
  return *this;
368
391
  }
369
392
 
@@ -378,18 +401,25 @@ public:
378
401
  && noexcept(::cuda::std::declval<const _Iter2&>() - difference_type{}))
379
402
  _CCCL_TRAILING_REQUIRES(transform_input_output_iterator)(::cuda::std::__has_random_access_traversal<_Iter2>)
380
403
  {
381
- return transform_input_output_iterator{__iter.__current_ - __n, *__iter.__input_func_, *__iter.__output_func_};
404
+ return transform_input_output_iterator{__iter.__iter() - __n, __iter.__input_func(), __iter.__output_func()};
382
405
  }
383
406
 
407
+ template <class _Iter2>
408
+ static constexpr bool __can_difference =
409
+ (::cuda::std::__has_random_access_traversal<_Iter2> || ::cuda::std::sized_sentinel_for<_Iter2, _Iter2>);
410
+
411
+ template <class _Iter2>
412
+ static constexpr bool __noexcept_difference =
413
+ noexcept(::cuda::std::declval<const _Iter2&>() - ::cuda::std::declval<const _Iter2&>());
414
+
384
415
  //! @brief Returns the distance between two @c transform_input_output_iterator
385
416
  _CCCL_EXEC_CHECK_DISABLE
386
417
  template <class _Iter2 = _Iter>
387
- [[nodiscard]] _CCCL_API friend constexpr auto
388
- operator-(const transform_input_output_iterator& __lhs, const transform_input_output_iterator& __rhs) noexcept(
389
- noexcept(::cuda::std::declval<const _Iter2&>() - ::cuda::std::declval<const _Iter2&>()))
390
- _CCCL_TRAILING_REQUIRES(difference_type)(::cuda::std::sized_sentinel_for<_Iter2, _Iter2>)
418
+ [[nodiscard]] _CCCL_API friend constexpr auto operator-(const transform_input_output_iterator& __lhs,
419
+ const transform_input_output_iterator& __rhs) //
420
+ noexcept(__noexcept_difference<_Iter2>) _CCCL_TRAILING_REQUIRES(difference_type)(__can_difference<_Iter2>)
391
421
  {
392
- return __lhs.__current_ - __rhs.__current_;
422
+ return __lhs.__iter() - __rhs.__iter();
393
423
  }
394
424
 
395
425
  //! @brief Compares two @c transform_input_output_iterator for equality by comparing the stored iterators
@@ -400,7 +430,7 @@ public:
400
430
  noexcept(::cuda::std::declval<const _Iter2&>() == ::cuda::std::declval<const _Iter2&>()))
401
431
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::equality_comparable<_Iter2>)
402
432
  {
403
- return __lhs.__current_ == __rhs.__current_;
433
+ return __lhs.__iter() == __rhs.__iter();
404
434
  }
405
435
 
406
436
  #if _CCCL_STD_VER <= 2017
@@ -412,7 +442,7 @@ public:
412
442
  noexcept(::cuda::std::declval<const _Iter2&>() != ::cuda::std::declval<const _Iter2&>()))
413
443
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::equality_comparable<_Iter2>)
414
444
  {
415
- return __lhs.__current_ != __rhs.__current_;
445
+ return __lhs.__iter() != __rhs.__iter();
416
446
  }
417
447
  #endif // _CCCL_STD_VER <= 2017
418
448
 
@@ -424,7 +454,7 @@ public:
424
454
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
425
455
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
426
456
  {
427
- return __lhs.__current_ < __rhs.__current_;
457
+ return __lhs.__iter() < __rhs.__iter();
428
458
  }
429
459
 
430
460
  //! @brief Compares two @c transform_input_output_iterator for greater than by comparing the stored iterators
@@ -435,7 +465,7 @@ public:
435
465
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
436
466
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
437
467
  {
438
- return __lhs.__current_ > __rhs.__current_;
468
+ return __lhs.__iter() > __rhs.__iter();
439
469
  }
440
470
 
441
471
  //! @brief Compares two @c transform_input_output_iterator for less equal by comparing the stored iterators
@@ -446,7 +476,7 @@ public:
446
476
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
447
477
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
448
478
  {
449
- return __lhs.__current_ <= __rhs.__current_;
479
+ return __lhs.__iter() <= __rhs.__iter();
450
480
  }
451
481
 
452
482
  //! @brief Compares two @c transform_input_output_iterator for greater equal by comparing the stored iterators
@@ -457,7 +487,7 @@ public:
457
487
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
458
488
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
459
489
  {
460
- return __lhs.__current_ >= __rhs.__current_;
490
+ return __lhs.__iter() >= __rhs.__iter();
461
491
  }
462
492
 
463
493
  #if _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
@@ -471,7 +501,7 @@ public:
471
501
  _CCCL_TRAILING_REQUIRES(bool)(
472
502
  ::cuda::std::__has_random_access_traversal<_Iter2>&& ::cuda::std::three_way_comparable<_Iter2>)
473
503
  {
474
- return __lhs.__current_ <=> __rhs.__current_;
504
+ return __lhs.__iter() <=> __rhs.__iter();
475
505
  }
476
506
  #endif // !_LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
477
507
  };
@@ -31,6 +31,7 @@
31
31
  #include <cuda/std/__functional/invoke.h>
32
32
  #include <cuda/std/__iterator/concepts.h>
33
33
  #include <cuda/std/__iterator/iterator_traits.h>
34
+ #include <cuda/std/__ranges/compressed_movable_box.h>
34
35
  #include <cuda/std/__ranges/concepts.h>
35
36
  #include <cuda/std/__ranges/movable_box.h>
36
37
  #include <cuda/std/__type_traits/conditional.h>
@@ -40,6 +41,7 @@
40
41
  #include <cuda/std/__type_traits/is_object.h>
41
42
  #include <cuda/std/__type_traits/is_reference.h>
42
43
  #include <cuda/std/__type_traits/remove_cvref.h>
44
+ #include <cuda/std/__utility/declval.h>
43
45
  #include <cuda/std/__utility/forward.h>
44
46
  #include <cuda/std/__utility/move.h>
45
47
 
@@ -159,10 +161,30 @@ class transform_iterator : public __transform_iterator_category_base<_Fn, _Iter>
159
161
  static_assert(::cuda::std::__can_reference<::cuda::std::invoke_result_t<_Fn&, ::cuda::std::iter_reference_t<_Iter>>>,
160
162
  "cuda::transform_iterator requires that the return type of _Fn is referenceable");
161
163
 
162
- public:
163
- _Iter __current_;
164
- ::cuda::std::ranges::__movable_box<_Fn> __func_;
164
+ // Not a base because then the friend operators would be ambiguous
165
+ ::cuda::std::__compressed_movable_box<_Iter, _Fn> __store_;
166
+
167
+ [[nodiscard]] _CCCL_API constexpr _Iter& __iter() noexcept
168
+ {
169
+ return __store_.template __get<0>();
170
+ }
171
+
172
+ [[nodiscard]] _CCCL_API constexpr const _Iter& __iter() const noexcept
173
+ {
174
+ return __store_.template __get<0>();
175
+ }
176
+
177
+ [[nodiscard]] _CCCL_API constexpr _Fn& __func() noexcept
178
+ {
179
+ return __store_.template __get<1>();
180
+ }
165
181
 
182
+ [[nodiscard]] _CCCL_API constexpr const _Fn& __func() const noexcept
183
+ {
184
+ return __store_.template __get<1>();
185
+ }
186
+
187
+ public:
166
188
  using iterator_concept = ::cuda::std::conditional_t<
167
189
  ::cuda::std::__has_random_access_traversal<_Iter>,
168
190
  ::cuda::std::random_access_iterator_tag,
@@ -181,19 +203,13 @@ public:
181
203
  using pointer = void;
182
204
 
183
205
  //! @brief Default constructs a @c transform_iterator with a value initialized iterator and functor
184
- #if _CCCL_HAS_CONCEPTS()
185
- _CCCL_EXEC_CHECK_DISABLE
186
- _CCCL_HIDE_FROM_ABI transform_iterator()
187
- requires ::cuda::std::default_initializable<_Iter> && ::cuda::std::default_initializable<_Fn>
188
- = default;
189
- #else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv
190
206
  _CCCL_EXEC_CHECK_DISABLE
191
207
  _CCCL_TEMPLATE(class _Iter2 = _Iter, class _Fn2 = _Fn)
192
208
  _CCCL_REQUIRES(::cuda::std::default_initializable<_Iter2> _CCCL_AND ::cuda::std::default_initializable<_Fn2>)
193
209
  _CCCL_API constexpr transform_iterator() noexcept(
194
210
  ::cuda::std::is_nothrow_default_constructible_v<_Iter2> && ::cuda::std::is_nothrow_default_constructible_v<_Fn2>)
211
+ : __store_()
195
212
  {}
196
- #endif // ^^^ !_CCCL_HAS_CONCEPTS() ^^^
197
213
 
198
214
  //! @brief Constructs a @c transform_iterator with a given iterator and functor
199
215
  //! @param __iter The iterator to transform
@@ -201,21 +217,20 @@ public:
201
217
  _CCCL_EXEC_CHECK_DISABLE
202
218
  _CCCL_API constexpr transform_iterator(_Iter __iter, _Fn __func) noexcept(
203
219
  ::cuda::std::is_nothrow_move_constructible_v<_Iter> && ::cuda::std::is_nothrow_move_constructible_v<_Fn>)
204
- : __current_(::cuda::std::move(__iter))
205
- , __func_(::cuda::std::in_place, ::cuda::std::move(__func))
220
+ : __store_(::cuda::std::move(__iter), ::cuda::std::move(__func))
206
221
  {}
207
222
 
208
223
  //! @brief Returns a const reference to the stored iterator
209
224
  [[nodiscard]] _CCCL_API constexpr const _Iter& base() const& noexcept
210
225
  {
211
- return __current_;
226
+ return __iter();
212
227
  }
213
228
 
214
229
  //! @brief Extracts the stored iterator
215
230
  _CCCL_EXEC_CHECK_DISABLE
216
231
  [[nodiscard]] _CCCL_API constexpr _Iter base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<_Iter>)
217
232
  {
218
- return ::cuda::std::move(__current_);
233
+ return ::cuda::std::move(__iter());
219
234
  }
220
235
 
221
236
  //! @brief Dereferences the stored iterator and applies the stored functor to the result
@@ -223,9 +238,9 @@ public:
223
238
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
224
239
  _CCCL_REQUIRES(::cuda::std::regular_invocable<const _Fn&, ::cuda::std::iter_reference_t<const _Iter2>>)
225
240
  [[nodiscard]] _CCCL_API constexpr reference operator*() const
226
- noexcept(noexcept(::cuda::std::invoke(*__func_, *__current_)))
241
+ noexcept(noexcept(::cuda::std::invoke(::cuda::std::declval<const _Fn&>(), *::cuda::std::declval<const _Iter2&>())))
227
242
  {
228
- return ::cuda::std::invoke(*__func_, *__current_);
243
+ return ::cuda::std::invoke(__func(), *__iter());
229
244
  }
230
245
 
231
246
  //! @cond
@@ -236,17 +251,18 @@ public:
236
251
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
237
252
  _CCCL_REQUIRES((!::cuda::std::regular_invocable<const _Fn&, ::cuda::std::iter_reference_t<const _Iter2>>) )
238
253
  [[nodiscard]] _CCCL_API constexpr reference operator*() const
239
- noexcept(noexcept(::cuda::std::invoke(const_cast<_Fn&>(*__func_), *__current_)))
254
+ noexcept(noexcept(::cuda::std::invoke(::cuda::std::declval<_Fn&>(), *::cuda::std::declval<const _Iter2&>())))
240
255
  {
241
- return ::cuda::std::invoke(const_cast<_Fn&>(*__func_), *__current_);
256
+ return ::cuda::std::invoke(const_cast<_Fn&>(__func()), *__iter());
242
257
  }
243
258
  //! @endcond
244
259
 
245
260
  //! @brief Dereferences the stored iterator and applies the stored functor to the result
246
261
  _CCCL_EXEC_CHECK_DISABLE
247
- [[nodiscard]] _CCCL_API constexpr reference operator*() noexcept(noexcept(::cuda::std::invoke(*__func_, *__current_)))
262
+ [[nodiscard]] _CCCL_API constexpr reference
263
+ operator*() noexcept(noexcept(::cuda::std::invoke(::cuda::std::declval<_Fn&>(), *::cuda::std::declval<_Iter&>())))
248
264
  {
249
- return ::cuda::std::invoke(*__func_, *__current_);
265
+ return ::cuda::std::invoke(__func(), *__iter());
250
266
  }
251
267
 
252
268
  //! @brief Subscripts the stored iterator by a number of elements and applies the stored functor to the result
@@ -258,7 +274,7 @@ public:
258
274
  [[nodiscard]] _CCCL_API constexpr reference operator[](difference_type __n) const
259
275
  noexcept(__transform_iterator_nothrow_subscript<const _Fn, _Iter2>)
260
276
  {
261
- return ::cuda::std::invoke(*__func_, __current_[__n]);
277
+ return ::cuda::std::invoke(__func(), __iter()[__n]);
262
278
  }
263
279
 
264
280
  //! @cond
@@ -273,7 +289,7 @@ public:
273
289
  [[nodiscard]] _CCCL_API constexpr reference operator[](difference_type __n) const
274
290
  noexcept(__transform_iterator_nothrow_subscript<_Fn, _Iter2>)
275
291
  {
276
- return ::cuda::std::invoke(const_cast<_Fn&>(*__func_), __current_[__n]);
292
+ return ::cuda::std::invoke(const_cast<_Fn&>(__func()), __iter()[__n]);
277
293
  }
278
294
  //! @endcond
279
295
 
@@ -285,20 +301,20 @@ public:
285
301
  [[nodiscard]] _CCCL_API constexpr reference
286
302
  operator[](difference_type __n) noexcept(__transform_iterator_nothrow_subscript<_Fn, _Iter2>)
287
303
  {
288
- return ::cuda::std::invoke(*__func_, __current_[__n]);
304
+ return ::cuda::std::invoke(__func(), __iter()[__n]);
289
305
  }
290
306
 
291
307
  //! @brief Increments the stored iterator
292
308
  _CCCL_EXEC_CHECK_DISABLE
293
- _CCCL_API constexpr transform_iterator& operator++() noexcept(noexcept(++__current_))
309
+ _CCCL_API constexpr transform_iterator& operator++() noexcept(noexcept(++::cuda::std::declval<_Iter&>()))
294
310
  {
295
- ++__current_;
311
+ ++__iter();
296
312
  return *this;
297
313
  }
298
314
 
299
315
  //! @brief Increments the stored iterator
300
316
  _CCCL_EXEC_CHECK_DISABLE
301
- _CCCL_API constexpr auto operator++(int) noexcept(noexcept(++__current_))
317
+ _CCCL_API constexpr auto operator++(int) noexcept(noexcept(++::cuda::std::declval<_Iter&>()))
302
318
  {
303
319
  if constexpr (::cuda::std::__has_forward_traversal<_Iter>)
304
320
  {
@@ -308,7 +324,7 @@ public:
308
324
  }
309
325
  else
310
326
  {
311
- ++__current_;
327
+ ++__iter();
312
328
  }
313
329
  }
314
330
 
@@ -316,9 +332,9 @@ public:
316
332
  _CCCL_EXEC_CHECK_DISABLE
317
333
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
318
334
  _CCCL_REQUIRES(::cuda::std::__has_bidirectional_traversal<_Iter2>)
319
- _CCCL_API constexpr transform_iterator& operator--() noexcept(noexcept(--__current_))
335
+ _CCCL_API constexpr transform_iterator& operator--() noexcept(noexcept(--::cuda::std::declval<_Iter2&>()))
320
336
  {
321
- --__current_;
337
+ --__iter();
322
338
  return *this;
323
339
  }
324
340
 
@@ -326,8 +342,8 @@ public:
326
342
  _CCCL_EXEC_CHECK_DISABLE
327
343
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
328
344
  _CCCL_REQUIRES(::cuda::std::__has_bidirectional_traversal<_Iter2>)
329
- _CCCL_API constexpr transform_iterator
330
- operator--(int) noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(--__current_))
345
+ _CCCL_API constexpr transform_iterator operator--(int) noexcept(
346
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(--::cuda::std::declval<_Iter2&>()))
331
347
  {
332
348
  auto __tmp = *this;
333
349
  --*this;
@@ -339,9 +355,10 @@ public:
339
355
  _CCCL_EXEC_CHECK_DISABLE
340
356
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
341
357
  _CCCL_REQUIRES(::cuda::std::__has_random_access_traversal<_Iter2>)
342
- _CCCL_API constexpr transform_iterator& operator+=(difference_type __n) noexcept(noexcept(__current_ += __n))
358
+ _CCCL_API constexpr transform_iterator&
359
+ operator+=(difference_type __n) noexcept(noexcept(::cuda::std::declval<_Iter2&>() += __n))
343
360
  {
344
- __current_ += __n;
361
+ __iter() += __n;
345
362
  return *this;
346
363
  }
347
364
 
@@ -353,7 +370,7 @@ public:
353
370
  [[nodiscard]] _CCCL_API friend constexpr auto operator+(const transform_iterator& __iter, difference_type __n)
354
371
  _CCCL_TRAILING_REQUIRES(transform_iterator)(::cuda::std::__has_random_access_traversal<_Iter2>)
355
372
  {
356
- return transform_iterator{__iter.__current_ + __n, *__iter.__func_};
373
+ return transform_iterator{__iter.__iter() + __n, __iter.__func()};
357
374
  }
358
375
 
359
376
  //! @brief Returns a copy of a @c transform_iterator advanced by a given number of elements
@@ -364,7 +381,7 @@ public:
364
381
  [[nodiscard]] _CCCL_API friend constexpr auto operator+(difference_type __n, const transform_iterator& __iter)
365
382
  _CCCL_TRAILING_REQUIRES(transform_iterator)(::cuda::std::__has_random_access_traversal<_Iter2>)
366
383
  {
367
- return transform_iterator{__iter.__current_ + __n, *__iter.__func_};
384
+ return transform_iterator{__iter.__iter() + __n, __iter.__func()};
368
385
  }
369
386
 
370
387
  //! @brief Decrements the @c transform_iterator by a given number of elements
@@ -372,9 +389,10 @@ public:
372
389
  _CCCL_EXEC_CHECK_DISABLE
373
390
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
374
391
  _CCCL_REQUIRES(::cuda::std::__has_random_access_traversal<_Iter2>)
375
- _CCCL_API constexpr transform_iterator& operator-=(difference_type __n) noexcept(noexcept(__current_ -= __n))
392
+ _CCCL_API constexpr transform_iterator&
393
+ operator-=(difference_type __n) noexcept(noexcept(::cuda::std::declval<_Iter2&>() -= __n))
376
394
  {
377
- __current_ -= __n;
395
+ __iter() -= __n;
378
396
  return *this;
379
397
  }
380
398
 
@@ -386,17 +404,25 @@ public:
386
404
  [[nodiscard]] _CCCL_API friend constexpr auto operator-(const transform_iterator& __iter, difference_type __n)
387
405
  _CCCL_TRAILING_REQUIRES(transform_iterator)(::cuda::std::__has_random_access_traversal<_Iter2>)
388
406
  {
389
- return transform_iterator{__iter.__current_ - __n, *__iter.__func_};
407
+ return transform_iterator{__iter.__iter() - __n, __iter.__func()};
390
408
  }
391
409
 
410
+ template <class _Iter2>
411
+ static constexpr bool __can_difference =
412
+ (::cuda::std::__has_random_access_traversal<_Iter2> || ::cuda::std::sized_sentinel_for<_Iter2, _Iter2>);
413
+
414
+ template <class _Iter2>
415
+ static constexpr bool __noexcept_difference =
416
+ noexcept(::cuda::std::declval<const _Iter2&>() - ::cuda::std::declval<const _Iter2&>());
417
+
392
418
  //! @brief Returns the distance between two @c transform_iterator
393
419
  _CCCL_EXEC_CHECK_DISABLE
394
420
  template <class _Iter2 = _Iter>
395
421
  [[nodiscard]] _CCCL_API friend constexpr auto
396
- operator-(const transform_iterator& __lhs, const transform_iterator& __rhs)
397
- _CCCL_TRAILING_REQUIRES(difference_type)(::cuda::std::sized_sentinel_for<_Iter2, _Iter2>)
422
+ operator-(const transform_iterator& __lhs, const transform_iterator& __rhs) noexcept(__noexcept_difference<_Iter2>)
423
+ _CCCL_TRAILING_REQUIRES(difference_type)(__can_difference<_Iter2>)
398
424
  {
399
- return __lhs.__current_ - __rhs.__current_;
425
+ return __lhs.__iter() - __rhs.__iter();
400
426
  }
401
427
 
402
428
  //! @brief Compares two @c transform_iterator for equality by comparing the stored iterators
@@ -407,7 +433,7 @@ public:
407
433
  noexcept(::cuda::std::declval<const _Iter2&>() == ::cuda::std::declval<const _Iter2&>()))
408
434
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::equality_comparable<_Iter2>)
409
435
  {
410
- return __lhs.__current_ == __rhs.__current_;
436
+ return __lhs.__iter() == __rhs.__iter();
411
437
  }
412
438
 
413
439
  #if _CCCL_STD_VER <= 2017
@@ -419,7 +445,7 @@ public:
419
445
  noexcept(::cuda::std::declval<const _Iter2&>() != ::cuda::std::declval<const _Iter2&>()))
420
446
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::equality_comparable<_Iter2>)
421
447
  {
422
- return __lhs.__current_ != __rhs.__current_;
448
+ return __lhs.__iter() != __rhs.__iter();
423
449
  }
424
450
  #endif // _CCCL_STD_VER <= 2017
425
451
 
@@ -433,7 +459,7 @@ public:
433
459
  _CCCL_TRAILING_REQUIRES(bool)(
434
460
  ::cuda::std::__has_random_access_traversal<_Iter2>&& ::cuda::std::three_way_comparable<_Iter2>)
435
461
  {
436
- return __lhs.__current_ <=> __rhs.__current_;
462
+ return __lhs.__iter() <=> __rhs.__iter();
437
463
  }
438
464
  #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
439
465
  //! @brief Compares two @c transform_iterator for less than by comparing the stored iterators
@@ -444,7 +470,7 @@ public:
444
470
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
445
471
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
446
472
  {
447
- return __lhs.__current_ < __rhs.__current_;
473
+ return __lhs.__iter() < __rhs.__iter();
448
474
  }
449
475
 
450
476
  //! @brief Compares two @c transform_iterator for greater than by comparing the stored iterators
@@ -455,7 +481,7 @@ public:
455
481
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
456
482
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
457
483
  {
458
- return __lhs.__current_ > __rhs.__current_;
484
+ return __lhs.__iter() > __rhs.__iter();
459
485
  }
460
486
 
461
487
  //! @brief Compares two @c transform_iterator for less equal by comparing the stored iterators
@@ -466,7 +492,7 @@ public:
466
492
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
467
493
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
468
494
  {
469
- return __lhs.__current_ <= __rhs.__current_;
495
+ return __lhs.__iter() <= __rhs.__iter();
470
496
  }
471
497
 
472
498
  //! @brief Compares two @c transform_iterator for greater equal by comparing the stored iterators
@@ -477,7 +503,7 @@ public:
477
503
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
478
504
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
479
505
  {
480
- return __lhs.__current_ >= __rhs.__current_;
506
+ return __lhs.__iter() >= __rhs.__iter();
481
507
  }
482
508
  #endif // !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
483
509
  };