cuda-cccl 0.3.0__cp311-cp311-manylinux_2_24_aarch64.whl → 0.3.2__cp311-cp311-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  7. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  8. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  14. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  15. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  18. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  20. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  21. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  22. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  23. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  24. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  25. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  26. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
  27. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  28. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  29. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  30. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  31. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  32. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  33. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  34. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  35. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  36. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
  37. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  38. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
  39. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  40. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
  41. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
  42. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  43. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
  44. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  46. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  47. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  48. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  49. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
  50. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  51. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  52. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  53. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  54. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  55. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  56. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  57. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  58. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  59. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  60. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  61. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  62. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  63. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  64. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
  65. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  66. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  67. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  68. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  69. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  70. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  71. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  72. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  73. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  74. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  75. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  76. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  77. cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
  78. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  79. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  80. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  81. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  82. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  83. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  84. cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
  85. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  86. cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
  87. cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
  88. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  89. cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
  90. cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
  91. cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
  92. cuda/cccl/headers/include/cuda/__event/event.h +27 -26
  93. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  94. cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
  95. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  96. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  97. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  98. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  99. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  100. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  101. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  102. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  103. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  104. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
  105. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  106. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  107. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  108. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  109. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  110. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  111. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  112. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  113. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  114. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  115. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  116. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
  117. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  118. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
  119. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  120. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  121. cuda/cccl/headers/include/cuda/algorithm +1 -1
  122. cuda/cccl/headers/include/cuda/cmath +1 -0
  123. cuda/cccl/headers/include/cuda/devices +13 -0
  124. cuda/cccl/headers/include/cuda/iterator +1 -0
  125. cuda/cccl/headers/include/cuda/memory +1 -0
  126. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  127. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  128. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  129. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  130. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  131. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  132. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  133. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  134. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  135. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  136. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  137. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  138. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  139. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  140. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  141. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  142. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  143. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  144. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  145. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  146. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  147. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  148. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  149. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  150. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  151. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  152. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  153. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  154. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  155. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  156. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  157. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  158. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  159. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  160. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  161. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  162. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  163. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  164. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  165. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  166. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  167. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  168. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  169. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  170. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  171. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  172. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  173. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  174. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  175. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  176. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  177. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  178. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  179. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  180. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  181. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  182. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  183. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  184. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  185. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  186. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  187. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  188. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  189. cuda/cccl/headers/include/cuda/std/string_view +155 -13
  190. cuda/cccl/headers/include/cuda/std/version +1 -4
  191. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  192. cuda/cccl/headers/include/cuda/utility +1 -0
  193. cuda/cccl/headers/include/nv/target +7 -2
  194. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  195. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  196. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  197. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  198. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  199. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  200. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  201. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  202. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  203. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  204. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  205. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  206. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  207. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  208. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  209. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  210. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  211. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  212. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  213. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  214. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  215. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  216. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  217. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  218. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  219. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  220. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  221. cuda/cccl/parallel/experimental/__init__.py +21 -74
  222. cuda/compute/__init__.py +79 -0
  223. cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
  224. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
  225. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  226. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  227. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  228. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
  229. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
  230. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  231. cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
  232. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
  233. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  234. cuda/compute/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  235. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  236. cuda/compute/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  237. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  238. cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
  239. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
  240. cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
  241. cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
  242. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  243. cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
  244. cuda/coop/__init__.py +8 -0
  245. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  246. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  247. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  248. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  249. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  250. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  251. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  252. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  253. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  254. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  255. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  256. cuda/coop/warp/__init__.py +9 -0
  257. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  258. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  259. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  260. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  261. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
  262. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  263. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  264. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  265. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  266. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  267. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  268. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  269. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  270. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  271. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  272. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  273. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  274. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  275. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  276. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  277. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  278. cuda/cccl/parallel/experimental/.gitignore +0 -4
  279. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  280. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  281. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  282. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  283. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  284. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  285. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  286. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  287. /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
  288. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  289. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  290. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  291. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  292. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  293. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  294. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,592 @@
1
+ // -*- C++ -*-
2
+ //===----------------------------------------------------------------------===//
3
+ //
4
+ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5
+ // See https://llvm.org/LICENSE.txt for license information.
6
+ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7
+ // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
8
+ //
9
+ //===----------------------------------------------------------------------===//
10
+ #ifndef _CUDA___ITERATOR_ZIP_TRANSFORM_ITERATOR_H
11
+ #define _CUDA___ITERATOR_ZIP_TRANSFORM_ITERATOR_H
12
+
13
+ #include <cuda/std/detail/__config>
14
+
15
+ #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
16
+ # pragma GCC system_header
17
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
18
+ # pragma clang system_header
19
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
20
+ # pragma system_header
21
+ #endif // no system header
22
+
23
+ #include <cuda/__fwd/zip_iterator.h>
24
+ #include <cuda/std/__algorithm/ranges_min_element.h>
25
+ #if _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
26
+ # include <cuda/std/__compare/three_way_comparable.h>
27
+ #endif // _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
28
+ #include <cuda/__iterator/zip_common.h>
29
+ #include <cuda/std/__concepts/convertible_to.h>
30
+ #include <cuda/std/__concepts/equality_comparable.h>
31
+ #include <cuda/std/__functional/invoke.h>
32
+ #include <cuda/std/__functional/operations.h>
33
+ #include <cuda/std/__iterator/concepts.h>
34
+ #include <cuda/std/__iterator/incrementable_traits.h>
35
+ #include <cuda/std/__iterator/iterator_traits.h>
36
+ #include <cuda/std/__ranges/compressed_movable_box.h>
37
+ #include <cuda/std/__ranges/concepts.h>
38
+ #include <cuda/std/__ranges/movable_box.h>
39
+ #include <cuda/std/__type_traits/common_type.h>
40
+ #include <cuda/std/__type_traits/make_unsigned.h>
41
+ #include <cuda/std/__type_traits/remove_cvref.h>
42
+ #include <cuda/std/__utility/forward.h>
43
+ #include <cuda/std/__utility/integer_sequence.h>
44
+ #include <cuda/std/__utility/move.h>
45
+ #include <cuda/std/tuple>
46
+
47
+ #include <cuda/std/__cccl/prologue.h>
48
+
49
+ _CCCL_BEGIN_NAMESPACE_CUDA
50
+
51
+ //! @addtogroup iterators
52
+ //! @{
53
+
54
+ template <class _Fn, class... _Iterators>
55
+ [[nodiscard]] _CCCL_API _CCCL_CONSTEVAL auto __get_zip_transform_iterator_category()
56
+ {
57
+ using _Constraints = __zip_iter_constraints<_Iterators...>;
58
+ if constexpr (!::cuda::std::is_reference_v<
59
+ ::cuda::std::invoke_result_t<_Fn&, ::cuda::std::iter_reference_t<_Iterators>...>>)
60
+ {
61
+ return ::cuda::std::input_iterator_tag{};
62
+ }
63
+ else if constexpr (_Constraints::__all_random_access)
64
+ {
65
+ return ::cuda::std::random_access_iterator_tag{};
66
+ }
67
+ else if constexpr (_Constraints::__all_bidirectional)
68
+ {
69
+ return ::cuda::std::bidirectional_iterator_tag{};
70
+ }
71
+ else if constexpr (_Constraints::__all_forward)
72
+ {
73
+ return ::cuda::std::forward_iterator_tag{};
74
+ }
75
+ else
76
+ {
77
+ return ::cuda::std::input_iterator_tag{};
78
+ }
79
+ _CCCL_UNREACHABLE();
80
+ }
81
+
82
+ //! @brief @c zip_transform_iterator is an iterator which represents the result of a transformation of a set of
83
+ //! sequences with a given function. This iterator is useful for creating a range filled with the result of applying an
84
+ //! operation to another range without either explicitly storing it in memory, or explicitly executing the
85
+ //! transformation. Using @c zip_transform_iterator facilitates kernel fusion by deferring the execution of a
86
+ //! transformation until the value is needed while saving both memory capacity and bandwidth.
87
+ //!
88
+ //! @c zip_transform_iterator is morally equivalent to a combination of transform_iterator and zip_iterator
89
+ //!
90
+ //! @code{.cpp}
91
+ //! template <class Fn, class... Iterators>
92
+ //! using zip_transform_iterator = cuda::transform_iterator<cuda::zip_iterator<Iterators...>, cuda::zip_function<Fn>>;
93
+ //! @endcode
94
+ //!
95
+ //! @c zip_transform_iterator has the additional benefit that it does not require an artificial @c zip_function to work
96
+ //! and more importantly does not need to materialize the result of dereferencing the stored iterators when passing them
97
+ //! to the stored function.
98
+ //!
99
+ //! The following code snippet demonstrates how to create a @c zip_transform_iterator which represents the result of
100
+ //! "zipping" multiple ranges together.
101
+ //!
102
+ //! @code
103
+ //! #include <cuda/iterator>
104
+ //! #include <thrust/device_vector.h>
105
+ //!
106
+ //! struct SumArgs {
107
+ //! __host__ __device__ float operator()(float a, float b, float c) const noexcept {
108
+ //! return a + b + c;
109
+ //! }
110
+ //! };
111
+ //!
112
+ //! thrust::device_vector<float> A{0.f, 1.f, 2.f};
113
+ //! thrust::device_vector<float> B{1.f, 2.f, 3.f};
114
+ //! thrust::device_vector<float> C{2.f, 3.f, 4.f};
115
+ //!
116
+ //! cuda::zip_transform_iterator iter{SumArgs{}, A.begin(), B.begin(), C.begin()};
117
+ //!
118
+ //! *iter; // returns (3.f)
119
+ //! iter[0]; // returns (3.f)
120
+ //! iter[1]; // returns (6.f)
121
+ //! iter[2]; // returns (9.f)
122
+ //! // iter[3] is an out-of-bounds error
123
+ //! @endcode
124
+ //!
125
+ //! This example shows how to use @c zip_transform_iterator to copy multiple ranges with a single call to @c
126
+ //! thrust::copy.
127
+ //!
128
+ //! @code
129
+ //! #include <cuda/iterator>
130
+ //! #include <thrust/device_vector.h>
131
+ //!
132
+ //! int main()
133
+ //! {
134
+ //! struct SumArgs {
135
+ //! __host__ __device__ float operator()(float a, float b, float c) const noexcept {
136
+ //! return a + b + c;
137
+ //! }
138
+ //! };
139
+ //!
140
+ //! thrust::device_vector<float> A{0.f, 1.f, 2.f};
141
+ //! thrust::device_vector<float> B{1.f, 2.f, 3.f};
142
+ //! thrust::device_vector<float> C{2.f, 3.f, 4.f};
143
+ //! thrust::device_vector<float> out(3);
144
+ //!
145
+ //! cuda::zip_transform_iterator iter{SumArgs{}, A.begin(), B.begin(), C.begin()}
146
+ //! thrust::copy(iter, iter + 3, out.begin());
147
+ //!
148
+ //! // out is now [3.0f, 6.0f, 9.0f]
149
+ //!
150
+ //! return 0;
151
+ //! }
152
+ //! @endcode
153
+ template <class _Fn, class... _Iterators>
154
+ class zip_transform_iterator
155
+ {
156
+ private:
157
+ // Not a base because then the friend operators would be ambiguous
158
+ ::cuda::std::__compressed_movable_box<::cuda::std::tuple<_Iterators...>, _Fn> __store_;
159
+
160
+ [[nodiscard]] _CCCL_API constexpr ::cuda::std::tuple<_Iterators...>& __iters() noexcept
161
+ {
162
+ return __store_.template __get<0>();
163
+ }
164
+
165
+ [[nodiscard]] _CCCL_API constexpr const ::cuda::std::tuple<_Iterators...>& __iters() const noexcept
166
+ {
167
+ return __store_.template __get<0>();
168
+ }
169
+
170
+ [[nodiscard]] _CCCL_API constexpr _Fn& __func() noexcept
171
+ {
172
+ return __store_.template __get<1>();
173
+ }
174
+
175
+ [[nodiscard]] _CCCL_API constexpr const _Fn& __func() const noexcept
176
+ {
177
+ return __store_.template __get<1>();
178
+ }
179
+
180
+ template <class, class...>
181
+ friend class zip_transform_iterator;
182
+
183
+ template <class _Op>
184
+ _CCCL_API static constexpr auto
185
+ __zip_apply(const _Op& __op,
186
+ const ::cuda::std::tuple<_Iterators...>& __tuple1,
187
+ const ::cuda::std::tuple<_Iterators...>& __tuple2) //
188
+ noexcept(noexcept(__op(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>())))
189
+ {
190
+ return __op(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>());
191
+ }
192
+
193
+ public:
194
+ //! @brief Default-constructs a @c zip_transform_iterator by value-initializing the functor and all stored iterators
195
+ _CCCL_EXEC_CHECK_DISABLE
196
+ _CCCL_TEMPLATE(class _Fn2 = _Fn)
197
+ _CCCL_REQUIRES(
198
+ ::cuda::std::default_initializable<_Fn2>&& __zip_iter_constraints<_Iterators...>::__all_default_initializable)
199
+ _CCCL_API constexpr zip_transform_iterator() noexcept(
200
+ ::cuda::std::is_nothrow_default_constructible_v<_Fn2>
201
+ && __zip_iter_constraints<_Iterators...>::__all_nothrow_default_constructible)
202
+ : __store_()
203
+ {}
204
+
205
+ //! @brief Constructs a @c zip_transform_iterator from a tuple of iterators
206
+ //! @param __iters A tuple or pair of iterators
207
+ _CCCL_API constexpr explicit zip_transform_iterator(_Fn __fun, ::cuda::std::tuple<_Iterators...> __iters)
208
+ : __store_(::cuda::std::move(__iters), ::cuda::std::move(__fun))
209
+ {}
210
+
211
+ //! @brief Constructs a @c zip_transform_iterator from variadic set of iterators
212
+ //! @param __iters The input iterators
213
+ _CCCL_API constexpr explicit zip_transform_iterator(_Fn __fun, _Iterators... __iters)
214
+ : __store_(::cuda::std::tuple<_Iterators...>{::cuda::std::move(__iters)...}, ::cuda::std::move(__fun))
215
+ {}
216
+
217
+ using iterator_concept = decltype(::cuda::__get_zip_iterator_concept<_Iterators...>());
218
+ using iterator_category = decltype(::cuda::__get_zip_transform_iterator_category<_Fn, _Iterators...>());
219
+ using difference_type = ::cuda::std::common_type_t<::cuda::std::iter_difference_t<_Iterators>...>;
220
+ using value_type =
221
+ ::cuda::std::remove_cvref_t<::cuda::std::invoke_result_t<_Fn&, ::cuda::std::iter_reference_t<_Iterators>...>>;
222
+
223
+ // Those are technically not to spec, but pre-ranges iterator_traits do not work properly with iterators that do not
224
+ // define all 5 aliases, see https://en.cppreference.com/w/cpp/iterator/iterator_traits.html
225
+ using reference = ::cuda::std::invoke_result_t<_Fn&, ::cuda::std::iter_reference_t<_Iterators>...>;
226
+ using pointer = void;
227
+
228
+ // Internal helper functions to extract internals for device dispatch, must be a tuple for cub_transform_many
229
+ [[nodiscard]] _CCCL_API constexpr ::cuda::std::tuple<_Iterators...>
230
+ __base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<::cuda::std::tuple<_Iterators...>>)
231
+ {
232
+ return ::cuda::std::move(__iters());
233
+ }
234
+
235
+ [[nodiscard]] _CCCL_API constexpr _Fn __pred() && noexcept(::cuda::std::is_nothrow_move_constructible_v<_Fn>)
236
+ {
237
+ return ::cuda::std::move(__func());
238
+ }
239
+
240
+ struct __zip_transform_op_star
241
+ {
242
+ _Fn& __func_;
243
+
244
+ _CCCL_EXEC_CHECK_DISABLE
245
+ [[nodiscard]] _CCCL_API constexpr reference operator()(const _Iterators&... __iters) const
246
+ noexcept(::cuda::std::is_nothrow_invocable_v<_Fn&, ::cuda::std::iter_reference_t<const _Iterators>...>)
247
+ {
248
+ return ::cuda::std::invoke(const_cast<_Fn&>(__func_), *__iters...);
249
+ }
250
+ };
251
+
252
+ //! @brief Invokes the stored function with the result of dereferencing the stored iterators
253
+ [[nodiscard]] _CCCL_API constexpr reference operator*() const
254
+ noexcept(::cuda::std::is_nothrow_invocable_v<_Fn&, ::cuda::std::iter_reference_t<const _Iterators>...>)
255
+ {
256
+ return ::cuda::std::apply(__zip_transform_op_star{const_cast<_Fn&>(__func())}, __iters());
257
+ }
258
+
259
+ struct __zip_transform_op_subscript
260
+ {
261
+ difference_type __n_;
262
+ _Fn& __func_;
263
+
264
+ _CCCL_EXEC_CHECK_DISABLE
265
+ [[nodiscard]] _CCCL_API constexpr reference operator()(const _Iterators&... __iters) const noexcept(noexcept(
266
+ ::cuda::std::invoke(const_cast<_Fn&>(__func_), __iters[::cuda::std::iter_difference_t<_Iterators>(__n_)]...)))
267
+ {
268
+ return ::cuda::std::invoke(
269
+ const_cast<_Fn&>(__func_), __iters[::cuda::std::iter_difference_t<_Iterators>(__n_)]...);
270
+ }
271
+ };
272
+
273
+ //! @brief Invokes the stored function with the result of dereferencing the stored iterators advanced by an offset
274
+ //! @param __n The additional offset
275
+ _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
276
+ _CCCL_REQUIRES(_Constraints::__all_random_access)
277
+ _CCCL_API constexpr reference operator[](difference_type __n) const
278
+ noexcept(noexcept(::cuda::std::apply(__zip_transform_op_subscript{__n, ::cuda::std::declval<_Fn&>()},
279
+ ::cuda::std::declval<const ::cuda::std::tuple<_Iterators...>&>())))
280
+ {
281
+ return ::cuda::std::apply(__zip_transform_op_subscript{__n, const_cast<_Fn&>(__func())}, __iters());
282
+ }
283
+
284
+ //! @brief Increments all stored iterators
285
+ _CCCL_API constexpr zip_transform_iterator& operator++() noexcept(
286
+ noexcept(::cuda::std::apply(__zip_op_increment{}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
287
+ {
288
+ ::cuda::std::apply(__zip_op_increment{}, __iters());
289
+ return *this;
290
+ }
291
+
292
+ //! @brief Increments all stored iterators
293
+ //! @returns A copy of the original @c zip_transform_iterator if possible
294
+ _CCCL_API constexpr auto operator++(int)
295
+ {
296
+ if constexpr (__zip_iter_constraints<_Iterators...>::__all_forward)
297
+ {
298
+ auto __tmp = *this;
299
+ ++*this;
300
+ return __tmp;
301
+ }
302
+ else
303
+ {
304
+ ++*this;
305
+ }
306
+ }
307
+
308
+ //! @brief Decrements all stored iterators
309
+ _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
310
+ _CCCL_REQUIRES(_Constraints::__all_bidirectional)
311
+ _CCCL_API constexpr zip_transform_iterator& operator--() noexcept(
312
+ noexcept(::cuda::std::apply(__zip_op_decrement{}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
313
+ {
314
+ ::cuda::std::apply(__zip_op_decrement{}, __iters());
315
+ return *this;
316
+ }
317
+
318
+ //! @brief Decrements all stored iterators
319
+ _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
320
+ _CCCL_REQUIRES(_Constraints::__all_bidirectional)
321
+ _CCCL_API constexpr zip_transform_iterator operator--(int)
322
+ {
323
+ auto __tmp = *this;
324
+ --*this;
325
+ return __tmp;
326
+ }
327
+
328
+ struct __zip_op_pe
329
+ {
330
+ difference_type __n;
331
+
332
+ _CCCL_EXEC_CHECK_DISABLE
333
+ _CCCL_API constexpr void operator()(_Iterators&... __iters) const
334
+ noexcept(noexcept(((void) (__iters += ::cuda::std::iter_difference_t<_Iterators>(__n)), ...)))
335
+ {
336
+ ((void) (__iters += ::cuda::std::iter_difference_t<_Iterators>(__n)), ...);
337
+ }
338
+ };
339
+
340
+ //! @brief Increments all stored iterators by a given number of elements
341
+ //! @param __n The number of elements to increment
342
+ _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
343
+ _CCCL_REQUIRES(_Constraints::__all_random_access)
344
+ _CCCL_API constexpr zip_transform_iterator& operator+=(difference_type __n) noexcept(
345
+ noexcept(::cuda::std::apply(__zip_op_pe{__n}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
346
+ {
347
+ ::cuda::std::apply(__zip_op_pe{__n}, __iters());
348
+ return *this;
349
+ }
350
+
351
+ struct __zip_op_me
352
+ {
353
+ difference_type __n;
354
+
355
+ _CCCL_EXEC_CHECK_DISABLE
356
+ _CCCL_API constexpr void operator()(_Iterators&... __iters) const
357
+ noexcept(noexcept(((void) (__iters -= ::cuda::std::iter_difference_t<_Iterators>(__n)), ...)))
358
+ {
359
+ ((void) (__iters -= ::cuda::std::iter_difference_t<_Iterators>(__n)), ...);
360
+ }
361
+ };
362
+
363
+ //! @brief Decrements all stored iterators by a given number of elements
364
+ //! @param __n The number of elements to decrement
365
+ _CCCL_TEMPLATE(class _Constraints = __zip_iter_constraints<_Iterators...>)
366
+ _CCCL_REQUIRES(_Constraints::__all_random_access)
367
+ _CCCL_API constexpr zip_transform_iterator& operator-=(difference_type __n) noexcept(
368
+ noexcept(::cuda::std::apply(__zip_op_me{__n}, ::cuda::std::declval<::cuda::std::tuple<_Iterators...>&>())))
369
+ {
370
+ ::cuda::std::apply(__zip_op_me{__n}, __iters());
371
+ return *this;
372
+ }
373
+
374
+ //! @brief Returns a copy of a @c zip_transform_iterator incremented by a given number of elements
375
+ //! @param __iter The @c zip_transform_iterator to increment
376
+ //! @param __n The number of elements to increment
377
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
378
+ _CCCL_API friend constexpr auto operator+(const zip_transform_iterator& __iter, difference_type __n)
379
+ _CCCL_TRAILING_REQUIRES(zip_transform_iterator)(_Constraints::__all_random_access)
380
+ {
381
+ auto __rhs = __iter;
382
+ __rhs += __n;
383
+ return __rhs;
384
+ }
385
+
386
+ //! @brief Returns a copy of a @c zip_transform_iterator incremented by a given number of elements
387
+ //! @param __n The number of elements to increment
388
+ //! @param __iter The @c zip_transform_iterator to increment
389
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
390
+ _CCCL_API friend constexpr auto operator+(difference_type __n, const zip_transform_iterator& __iter)
391
+ _CCCL_TRAILING_REQUIRES(zip_transform_iterator)(_Constraints::__all_random_access)
392
+ {
393
+ return __iter + __n;
394
+ }
395
+
396
+ //! @brief Returns a copy of a @c zip_transform_iterator decremented by a given number of elements
397
+ //! @param __n The number of elements to decrement
398
+ //! @param __iter The @c zip_transform_iterator to decrement
399
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
400
+ _CCCL_API friend constexpr auto operator-(const zip_transform_iterator& __iter, difference_type __n)
401
+ _CCCL_TRAILING_REQUIRES(zip_transform_iterator)(_Constraints::__all_random_access)
402
+ {
403
+ auto __rhs = __iter;
404
+ __rhs -= __n;
405
+ return __rhs;
406
+ }
407
+
408
+ struct __zip_op_minus
409
+ {
410
+ struct __less_abs
411
+ {
412
+ // abs in cstdlib is not constexpr
413
+ _CCCL_EXEC_CHECK_DISABLE
414
+ [[nodiscard]] _CCCL_API static constexpr difference_type
415
+ __abs(difference_type __t) noexcept(noexcept(__t < 0 ? -__t : __t))
416
+ {
417
+ return __t < 0 ? -__t : __t;
418
+ }
419
+
420
+ _CCCL_EXEC_CHECK_DISABLE
421
+ [[nodiscard]] _CCCL_API constexpr bool operator()(difference_type __n, difference_type __y) const
422
+ noexcept(noexcept(__abs(__n) < __abs(__y)))
423
+ {
424
+ return __abs(__n) < __abs(__y);
425
+ }
426
+ };
427
+
428
+ _CCCL_EXEC_CHECK_DISABLE
429
+ template <size_t _Zero, size_t... _Indices>
430
+ [[nodiscard]] _CCCL_API constexpr difference_type
431
+ operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
432
+ const ::cuda::std::tuple<_Iterators...>& __iters2,
433
+ ::cuda::std::index_sequence<_Zero, _Indices...>) const //
434
+ noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) - ::cuda::std::get<_Indices>(__iters2)) && ...)))
435
+ {
436
+ const auto __first = static_cast<difference_type>(::cuda::std::get<0>(__iters1) - ::cuda::std::get<0>(__iters2));
437
+ if (__first == 0)
438
+ {
439
+ return __first;
440
+ }
441
+
442
+ const difference_type __temp[] = {
443
+ __first,
444
+ static_cast<difference_type>(::cuda::std::get<_Indices>(__iters1) - ::cuda::std::get<_Indices>(__iters2))...};
445
+ return *::cuda::std::ranges::min_element(__temp, __zip_op_minus::__less_abs{});
446
+ }
447
+ };
448
+
449
+ //! @brief Returns the distance between two @c zip_transform_iterators
450
+ //! @returns The minimal distance between any of the stored iterators
451
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
452
+ _CCCL_API friend constexpr auto operator-(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
453
+ _CCCL_TRAILING_REQUIRES(difference_type)(_Constraints::__all_sized_sentinel)
454
+ {
455
+ return __zip_apply(__zip_op_minus{}, __n.__iters(), __y.__iters());
456
+ }
457
+
458
+ struct __zip_op_eq
459
+ {
460
+ _CCCL_EXEC_CHECK_DISABLE
461
+ template <size_t... _Indices>
462
+ _CCCL_API constexpr bool operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
463
+ const ::cuda::std::tuple<_Iterators...>& __iters2,
464
+ ::cuda::std::index_sequence<_Indices...>) const
465
+ noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) == ::cuda::std::get<_Indices>(__iters2)) || ...)))
466
+ {
467
+ return ((::cuda::std::get<_Indices>(__iters1) == ::cuda::std::get<_Indices>(__iters2)) || ...);
468
+ }
469
+ };
470
+
471
+ //! @brief Compares two @c zip_transform_iterator for equality by comparing the tuple of stored iterators
472
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
473
+ _CCCL_API friend constexpr auto operator==(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
474
+ _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_equality_comparable)
475
+ {
476
+ if constexpr (_Constraints::__all_bidirectional)
477
+ {
478
+ return __n.__iters() == __y.__iters();
479
+ }
480
+ else
481
+ {
482
+ return __zip_apply(__zip_op_eq{}, __n.__iters(), __y.__iters());
483
+ }
484
+ _CCCL_UNREACHABLE();
485
+ }
486
+
487
+ #if _CCCL_STD_VER <= 2017
488
+ //! @brief Compares two @c zip_transform_iterator for inequality by comparing the tuple of stored iterators
489
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
490
+ _CCCL_API friend constexpr auto operator!=(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
491
+ _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_equality_comparable)
492
+ {
493
+ if constexpr (_Constraints::__all_bidirectional)
494
+ {
495
+ return __n.__iters() != __y.__iters();
496
+ }
497
+ else
498
+ {
499
+ return !__zip_apply(__zip_op_eq{}, __n.__iters(), __y.__iters());
500
+ }
501
+ _CCCL_UNREACHABLE();
502
+ }
503
+ #endif // _CCCL_STD_VER <= 2017
504
+
505
+ #if _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
506
+ //! @brief Three-way compares two @c zip_transform_iterator by comparing the tuple of stored iterators
507
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
508
+ _CCCL_API friend constexpr auto operator<=>(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
509
+ _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_random_access&& _Constraints::__all_three_way_comparable)
510
+ {
511
+ return __n.__iters() <=> __y.__iters();
512
+ }
513
+
514
+ #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
515
+
516
+ //! @brief Compares two @c zip_transform_iterator for less than by comparing the tuple of stored iterators
517
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
518
+ _CCCL_API friend constexpr auto operator<(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
519
+ _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_random_access)
520
+ {
521
+ return __n.__iters() < __y.__iters();
522
+ }
523
+
524
+ //! @brief Compares two @c zip_transform_iterator for greater than by comparing the tuple of stored iterators
525
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
526
+ _CCCL_API friend constexpr auto operator>(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
527
+ _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_random_access)
528
+ {
529
+ return __y < __n;
530
+ }
531
+
532
+ //! @brief Compares two @c zip_transform_iterator for less equal by comparing the tuple of stored iterators
533
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
534
+ _CCCL_API friend constexpr auto operator<=(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
535
+ _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_random_access)
536
+ {
537
+ return !(__y < __n);
538
+ }
539
+
540
+ //! @brief Compares two @c zip_transform_iterator for greater equal by comparing the tuple of stored iterators
541
+ template <class _Constraints = __zip_iter_constraints<_Iterators...>>
542
+ _CCCL_API friend constexpr auto operator>=(const zip_transform_iterator& __n, const zip_transform_iterator& __y)
543
+ _CCCL_TRAILING_REQUIRES(bool)(_Constraints::__all_random_access)
544
+ {
545
+ return !(__n < __y);
546
+ }
547
+ #endif // !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
548
+ };
549
+
550
+ template <class _Fn, class... _Iterators>
551
+ _CCCL_HOST_DEVICE zip_transform_iterator(_Fn, ::cuda::std::tuple<_Iterators...>)
552
+ -> zip_transform_iterator<_Fn, _Iterators...>;
553
+
554
+ template <class _Fn, class... _Iterators>
555
+ _CCCL_HOST_DEVICE zip_transform_iterator(_Fn, _Iterators...) -> zip_transform_iterator<_Fn, _Iterators...>;
556
+
557
+ //! @brief Creates a @c zip_transform_iterator from a tuple of iterators.
558
+ //! @param __t The tuple of iterators to wrap
559
+ template <class _Fn, class... _Iterators>
560
+ [[nodiscard]] _CCCL_API constexpr auto
561
+ make_zip_transform_iterator(_Fn __fun, ::cuda::std::tuple<_Iterators...> __t) noexcept(
562
+ ::cuda::std::is_nothrow_move_constructible_v<_Fn>
563
+ && __zip_iter_constraints<_Iterators...>::__all_nothrow_move_constructible)
564
+ {
565
+ return zip_transform_iterator<_Fn, _Iterators...>{::cuda::std::move(__fun), ::cuda::std::move(__t)};
566
+ }
567
+
568
+ //! @brief Creates a @c zip_transform_iterator from a variadic number of iterators.
569
+ //! @param __iters The iterators to wrap
570
+ template <class _Fn, class... _Iterators>
571
+ [[nodiscard]] _CCCL_API constexpr auto make_zip_transform_iterator(_Fn __fun, _Iterators... __iters) noexcept(
572
+ ::cuda::std::is_nothrow_move_constructible_v<_Fn>
573
+ && __zip_iter_constraints<_Iterators...>::__all_nothrow_move_constructible)
574
+ {
575
+ return zip_transform_iterator<_Fn, _Iterators...>{::cuda::std::move(__fun), ::cuda::std::move(__iters)...};
576
+ }
577
+
578
+ //! @}
579
+
580
+ _CCCL_END_NAMESPACE_CUDA
581
+
582
+ // GCC and MSVC2019 have issues determining __is_fancy_pointer in C++17 because they fail to instantiate pointer_traits
583
+ #if (_CCCL_COMPILER(GCC) || _CCCL_COMPILER(MSVC)) && _CCCL_STD_VER <= 2017
584
+ _CCCL_BEGIN_NAMESPACE_CUDA_STD
585
+ template <class _Fn, class... _Iterators>
586
+ inline constexpr bool __is_fancy_pointer<::cuda::zip_transform_iterator<_Fn, _Iterators...>> = false;
587
+ _CCCL_END_NAMESPACE_CUDA_STD
588
+ #endif // (_CCCL_COMPILER(GCC) || _CCCL_COMPILER(MSVC)) && _CCCL_STD_VER <= 2017
589
+
590
+ #include <cuda/std/__cccl/epilogue.h>
591
+
592
+ #endif // _CUDA___ITERATOR_ZIP_TRANSFORM_ITERATOR_H