cuda-cccl 0.3.0__cp311-cp311-manylinux_2_24_aarch64.whl → 0.3.2__cp311-cp311-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  7. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  8. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  14. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  15. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  18. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  20. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  21. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  22. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  23. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  24. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  25. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  26. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
  27. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  28. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  29. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  30. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  31. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  32. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  33. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  34. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  35. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  36. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
  37. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  38. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
  39. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  40. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
  41. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
  42. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  43. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
  44. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  46. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  47. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  48. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  49. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
  50. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  51. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  52. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  53. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  54. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  55. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  56. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  57. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  58. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  59. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  60. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  61. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  62. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  63. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  64. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
  65. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  66. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  67. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  68. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  69. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  70. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  71. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  72. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  73. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  74. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  75. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  76. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  77. cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
  78. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  79. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  80. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  81. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  82. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  83. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  84. cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
  85. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  86. cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
  87. cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
  88. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  89. cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
  90. cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
  91. cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
  92. cuda/cccl/headers/include/cuda/__event/event.h +27 -26
  93. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  94. cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
  95. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  96. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  97. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  98. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  99. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  100. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  101. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  102. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  103. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  104. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
  105. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  106. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  107. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  108. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  109. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  110. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  111. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  112. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  113. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  114. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  115. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  116. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
  117. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  118. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
  119. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  120. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  121. cuda/cccl/headers/include/cuda/algorithm +1 -1
  122. cuda/cccl/headers/include/cuda/cmath +1 -0
  123. cuda/cccl/headers/include/cuda/devices +13 -0
  124. cuda/cccl/headers/include/cuda/iterator +1 -0
  125. cuda/cccl/headers/include/cuda/memory +1 -0
  126. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  127. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  128. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  129. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  130. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  131. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  132. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  133. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  134. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  135. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  136. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  137. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  138. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  139. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  140. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  141. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  142. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  143. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  144. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  145. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  146. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  147. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  148. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  149. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  150. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  151. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  152. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  153. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  154. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  155. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  156. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  157. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  158. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  159. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  160. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  161. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  162. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  163. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  164. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  165. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  166. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  167. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  168. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  169. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  170. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  171. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  172. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  173. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  174. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  175. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  176. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  177. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  178. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  179. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  180. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  181. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  182. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  183. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  184. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  185. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  186. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  187. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  188. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  189. cuda/cccl/headers/include/cuda/std/string_view +155 -13
  190. cuda/cccl/headers/include/cuda/std/version +1 -4
  191. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  192. cuda/cccl/headers/include/cuda/utility +1 -0
  193. cuda/cccl/headers/include/nv/target +7 -2
  194. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  195. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  196. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  197. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  198. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  199. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  200. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  201. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  202. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  203. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  204. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  205. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  206. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  207. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  208. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  209. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  210. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  211. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  212. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  213. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  214. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  215. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  216. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  217. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  218. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  219. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  220. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  221. cuda/cccl/parallel/experimental/__init__.py +21 -74
  222. cuda/compute/__init__.py +79 -0
  223. cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
  224. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
  225. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  226. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  227. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  228. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
  229. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
  230. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  231. cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
  232. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
  233. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  234. cuda/compute/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  235. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  236. cuda/compute/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  237. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  238. cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
  239. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
  240. cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
  241. cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
  242. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  243. cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
  244. cuda/coop/__init__.py +8 -0
  245. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  246. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  247. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  248. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  249. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  250. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  251. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  252. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  253. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  254. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  255. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  256. cuda/coop/warp/__init__.py +9 -0
  257. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  258. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  259. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  260. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  261. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
  262. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  263. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  264. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  265. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  266. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  267. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  268. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  269. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  270. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  271. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  272. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  273. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  274. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  275. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  276. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  277. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  278. cuda/cccl/parallel/experimental/.gitignore +0 -4
  279. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  280. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  281. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  282. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  283. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  284. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  285. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  286. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  287. /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
  288. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  289. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  290. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  291. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  292. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  293. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  294. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -30,6 +30,7 @@
30
30
  #include <cuda/std/__functional/invoke.h>
31
31
  #include <cuda/std/__iterator/concepts.h>
32
32
  #include <cuda/std/__iterator/iterator_traits.h>
33
+ #include <cuda/std/__ranges/compressed_movable_box.h>
33
34
  #include <cuda/std/__ranges/concepts.h>
34
35
  #include <cuda/std/__ranges/movable_box.h>
35
36
  #include <cuda/std/__type_traits/conditional.h>
@@ -41,6 +42,7 @@
41
42
  #include <cuda/std/__type_traits/is_object.h>
42
43
  #include <cuda/std/__type_traits/is_reference.h>
43
44
  #include <cuda/std/__type_traits/remove_cvref.h>
45
+ #include <cuda/std/__utility/declval.h>
44
46
  #include <cuda/std/__utility/forward.h>
45
47
  #include <cuda/std/__utility/move.h>
46
48
 
@@ -144,10 +146,30 @@ class transform_output_iterator
144
146
  static_assert(::cuda::std::is_object_v<_Fn>,
145
147
  "cuda::transform_output_iterator requires that _Fn is a function object");
146
148
 
147
- public:
148
- _Iter __current_{};
149
- ::cuda::std::ranges::__movable_box<_Fn> __func_{};
149
+ // Not a base because then the friend operators would be ambiguous
150
+ ::cuda::std::__compressed_movable_box<_Iter, _Fn> __store_;
151
+
152
+ [[nodiscard]] _CCCL_API constexpr _Iter& __iter() noexcept
153
+ {
154
+ return __store_.template __get<0>();
155
+ }
156
+
157
+ [[nodiscard]] _CCCL_API constexpr const _Iter& __iter() const noexcept
158
+ {
159
+ return __store_.template __get<0>();
160
+ }
161
+
162
+ [[nodiscard]] _CCCL_API constexpr _Fn& __func() noexcept
163
+ {
164
+ return __store_.template __get<1>();
165
+ }
150
166
 
167
+ [[nodiscard]] _CCCL_API constexpr const _Fn& __func() const noexcept
168
+ {
169
+ return __store_.template __get<1>();
170
+ }
171
+
172
+ public:
151
173
  using iterator_concept = ::cuda::std::conditional_t<
152
174
  ::cuda::std::__has_random_access_traversal<_Iter>,
153
175
  ::cuda::std::random_access_iterator_tag,
@@ -163,19 +185,13 @@ public:
163
185
  using reference = void;
164
186
 
165
187
  //! @brief Default constructs a @c transform_output_iterator with a value initialized iterator and functor
166
- #if _CCCL_HAS_CONCEPTS()
167
- _CCCL_EXEC_CHECK_DISABLE
168
- _CCCL_HIDE_FROM_ABI transform_output_iterator()
169
- requires ::cuda::std::default_initializable<_Iter> && ::cuda::std::default_initializable<_Fn>
170
- = default;
171
- #else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv
172
188
  _CCCL_EXEC_CHECK_DISABLE
173
189
  _CCCL_TEMPLATE(class _Iter2 = _Iter, class _Fn2 = _Fn)
174
190
  _CCCL_REQUIRES(::cuda::std::default_initializable<_Iter2> _CCCL_AND ::cuda::std::default_initializable<_Fn2>)
175
191
  _CCCL_API constexpr transform_output_iterator() noexcept(
176
192
  ::cuda::std::is_nothrow_default_constructible_v<_Iter2> && ::cuda::std::is_nothrow_default_constructible_v<_Fn2>)
193
+ : __store_()
177
194
  {}
178
- #endif // ^^^ !_CCCL_HAS_CONCEPTS() ^^^
179
195
 
180
196
  //! @brief Constructs a @c transform_output_iterator with a given iterator and output functor
181
197
  //! @param __iter The iterator to transform
@@ -183,35 +199,34 @@ public:
183
199
  _CCCL_EXEC_CHECK_DISABLE
184
200
  _CCCL_API constexpr transform_output_iterator(_Iter __iter, _Fn __func) noexcept(
185
201
  ::cuda::std::is_nothrow_move_constructible_v<_Iter> && ::cuda::std::is_nothrow_move_constructible_v<_Fn>)
186
- : __current_(::cuda::std::move(__iter))
187
- , __func_(::cuda::std::in_place, ::cuda::std::move(__func))
202
+ : __store_(::cuda::std::move(__iter), ::cuda::std::move(__func))
188
203
  {}
189
204
 
190
205
  //! @brief Returns a const reference to the stored iterator
191
206
  [[nodiscard]] _CCCL_API constexpr const _Iter& base() const& noexcept
192
207
  {
193
- return __current_;
208
+ return __iter();
194
209
  }
195
210
 
196
211
  //! @brief Extracts the stored iterator
197
212
  _CCCL_EXEC_CHECK_DISABLE
198
213
  [[nodiscard]] _CCCL_API constexpr _Iter base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<_Iter>)
199
214
  {
200
- return ::cuda::std::move(__current_);
215
+ return ::cuda::std::move(__iter());
201
216
  }
202
217
 
203
218
  //! @brief Returns a proxy that transforms the input upon assignment
204
219
  _CCCL_EXEC_CHECK_DISABLE
205
220
  [[nodiscard]] _CCCL_API constexpr auto operator*() const noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter>)
206
221
  {
207
- return __transform_output_proxy{__current_, const_cast<_Fn&>(*__func_)};
222
+ return __transform_output_proxy{__iter(), const_cast<_Fn&>(__func())};
208
223
  }
209
224
 
210
225
  //! @brief Returns a proxy that transforms the input upon assignment
211
226
  _CCCL_EXEC_CHECK_DISABLE
212
227
  [[nodiscard]] _CCCL_API constexpr auto operator*() noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter>)
213
228
  {
214
- return __transform_output_proxy{__current_, *__func_};
229
+ return __transform_output_proxy{__iter(), __func()};
215
230
  }
216
231
 
217
232
  //! @brief Subscripts the @c transform_output_iterator
@@ -221,9 +236,10 @@ public:
221
236
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
222
237
  _CCCL_REQUIRES(::cuda::std::__iter_can_subscript<_Iter2>)
223
238
  [[nodiscard]] _CCCL_API constexpr auto operator[](difference_type __n) const
224
- noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2> && noexcept(__current_ + __n))
239
+ noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2>
240
+ && noexcept(::cuda::std::declval<const _Iter2&>() + __n))
225
241
  {
226
- return __transform_output_proxy{__current_ + __n, const_cast<_Fn&>(*__func_)};
242
+ return __transform_output_proxy{__iter() + __n, const_cast<_Fn&>(__func())};
227
243
  }
228
244
 
229
245
  //! @brief Subscripts the @c transform_output_iterator
@@ -233,22 +249,22 @@ public:
233
249
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
234
250
  _CCCL_REQUIRES(::cuda::std::__iter_can_subscript<_Iter2>)
235
251
  [[nodiscard]] _CCCL_API constexpr auto operator[](difference_type __n) noexcept(
236
- ::cuda::std::is_nothrow_copy_constructible_v<_Iter2> && noexcept(__current_ + __n))
252
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter2> && noexcept(::cuda::std::declval<_Iter2&>() + __n))
237
253
  {
238
- return __transform_output_proxy{__current_ + __n, const_cast<_Fn&>(*__func_)};
254
+ return __transform_output_proxy{__iter() + __n, const_cast<_Fn&>(__func())};
239
255
  }
240
256
 
241
257
  //! @brief Increments the stored iterator
242
258
  _CCCL_EXEC_CHECK_DISABLE
243
- _CCCL_API constexpr transform_output_iterator& operator++() noexcept(noexcept(++__current_))
259
+ _CCCL_API constexpr transform_output_iterator& operator++() noexcept(noexcept(++::cuda::std::declval<_Iter&>()))
244
260
  {
245
- ++__current_;
261
+ ++__iter();
246
262
  return *this;
247
263
  }
248
264
 
249
265
  //! @brief Increments the stored iterator
250
266
  _CCCL_EXEC_CHECK_DISABLE
251
- _CCCL_API constexpr auto operator++(int) noexcept(noexcept(++__current_))
267
+ _CCCL_API constexpr auto operator++(int) noexcept(noexcept(++::cuda::std::declval<_Iter&>()))
252
268
  {
253
269
  if constexpr (::cuda::std::__has_forward_traversal<_Iter> || ::cuda::std::output_iterator<_Iter, value_type>)
254
270
  {
@@ -258,7 +274,7 @@ public:
258
274
  }
259
275
  else
260
276
  {
261
- ++__current_;
277
+ ++__iter();
262
278
  }
263
279
  }
264
280
 
@@ -266,9 +282,9 @@ public:
266
282
  _CCCL_EXEC_CHECK_DISABLE
267
283
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
268
284
  _CCCL_REQUIRES(::cuda::std::__iter_can_decrement<_Iter2>)
269
- _CCCL_API constexpr transform_output_iterator& operator--() noexcept(noexcept(--__current_))
285
+ _CCCL_API constexpr transform_output_iterator& operator--() noexcept(noexcept(--::cuda::std::declval<_Iter2&>()))
270
286
  {
271
- --__current_;
287
+ --__iter();
272
288
  return *this;
273
289
  }
274
290
 
@@ -276,8 +292,8 @@ public:
276
292
  _CCCL_EXEC_CHECK_DISABLE
277
293
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
278
294
  _CCCL_REQUIRES(::cuda::std::__iter_can_decrement<_Iter2>)
279
- _CCCL_API constexpr transform_output_iterator
280
- operator--(int) noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(--__current_))
295
+ _CCCL_API constexpr transform_output_iterator operator--(int) noexcept(
296
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(--::cuda::std::declval<_Iter2&>()))
281
297
  {
282
298
  auto __tmp = *this;
283
299
  --*this;
@@ -289,9 +305,10 @@ public:
289
305
  _CCCL_EXEC_CHECK_DISABLE
290
306
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
291
307
  _CCCL_REQUIRES(::cuda::std::__iter_can_plus_equal<_Iter2>)
292
- _CCCL_API constexpr transform_output_iterator& operator+=(difference_type __n) noexcept(noexcept(__current_ += __n))
308
+ _CCCL_API constexpr transform_output_iterator&
309
+ operator+=(difference_type __n) noexcept(noexcept(::cuda::std::declval<_Iter2&>() += __n))
293
310
  {
294
- __current_ += __n;
311
+ __iter() += __n;
295
312
  return *this;
296
313
  }
297
314
 
@@ -301,12 +318,12 @@ public:
301
318
  _CCCL_EXEC_CHECK_DISABLE
302
319
  template <class _Iter2 = _Iter>
303
320
  [[nodiscard]] _CCCL_API friend constexpr auto
304
- operator+(const transform_output_iterator& __iter,
305
- difference_type __n) noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2>
306
- && noexcept(::cuda::std::declval<const _Iter2&>() + difference_type{}))
307
- _CCCL_TRAILING_REQUIRES(transform_output_iterator)(::cuda::std::__iter_can_plus<_Iter2>)
321
+ operator+(const transform_output_iterator& __iter, difference_type __n) //
322
+ noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2>
323
+ && noexcept(::cuda::std::declval<const _Iter2&>() + difference_type{}))
324
+ _CCCL_TRAILING_REQUIRES(transform_output_iterator)(::cuda::std::__iter_can_plus<_Iter2>)
308
325
  {
309
- return transform_output_iterator{__iter.__current_ + __n, *__iter.__func_};
326
+ return transform_output_iterator{__iter.__iter() + __n, __iter.__func()};
310
327
  }
311
328
 
312
329
  //! @brief Returns a copy of a @c transform_output_iterator incremented by a given number of elements
@@ -320,7 +337,7 @@ public:
320
337
  && noexcept(::cuda::std::declval<const _Iter2&>() + difference_type{}))
321
338
  _CCCL_TRAILING_REQUIRES(transform_output_iterator)(::cuda::std::__iter_can_plus<_Iter2>)
322
339
  {
323
- return transform_output_iterator{__iter.__current_ + __n, *__iter.__func_};
340
+ return transform_output_iterator{__iter.__iter() + __n, __iter.__func()};
324
341
  }
325
342
 
326
343
  //! @brief Decrements the @c transform_output_iterator by a given number of elements
@@ -328,9 +345,10 @@ public:
328
345
  _CCCL_EXEC_CHECK_DISABLE
329
346
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
330
347
  _CCCL_REQUIRES(::cuda::std::__iter_can_minus_equal<_Iter2>)
331
- _CCCL_API constexpr transform_output_iterator& operator-=(difference_type __n) noexcept(noexcept(__current_ -= __n))
348
+ _CCCL_API constexpr transform_output_iterator&
349
+ operator-=(difference_type __n) noexcept(noexcept(::cuda::std::declval<_Iter2&>() -= __n))
332
350
  {
333
- __current_ -= __n;
351
+ __iter() -= __n;
334
352
  return *this;
335
353
  }
336
354
 
@@ -340,23 +358,30 @@ public:
340
358
  _CCCL_EXEC_CHECK_DISABLE
341
359
  template <class _Iter2 = _Iter>
342
360
  [[nodiscard]] _CCCL_API friend constexpr auto
343
- operator-(const transform_output_iterator& __iter,
344
- difference_type __n) noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2>
345
- && noexcept(::cuda::std::declval<const _Iter2&>() - difference_type{}))
346
- _CCCL_TRAILING_REQUIRES(transform_output_iterator)(::cuda::std::__iter_can_minus<_Iter2>)
361
+ operator-(const transform_output_iterator& __iter, difference_type __n) //
362
+ noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Iter2>
363
+ && noexcept(::cuda::std::declval<const _Iter2&>() - difference_type{}))
364
+ _CCCL_TRAILING_REQUIRES(transform_output_iterator)(::cuda::std::__iter_can_minus<_Iter2>)
347
365
  {
348
- return transform_output_iterator{__iter.__current_ - __n, *__iter.__func_};
366
+ return transform_output_iterator{__iter.__iter() - __n, __iter.__func()};
349
367
  }
350
368
 
369
+ template <class _Iter2>
370
+ static constexpr bool __can_difference =
371
+ (::cuda::std::__has_random_access_traversal<_Iter2> || ::cuda::std::sized_sentinel_for<_Iter2, _Iter2>);
372
+
373
+ template <class _Iter2>
374
+ static constexpr bool __noexcept_difference =
375
+ noexcept(::cuda::std::declval<const _Iter2&>() - ::cuda::std::declval<const _Iter2&>());
376
+
351
377
  //! @brief Returns the distance between two @c transform_output_iterator
352
378
  _CCCL_EXEC_CHECK_DISABLE
353
379
  template <class _Iter2 = _Iter>
354
- [[nodiscard]] _CCCL_API friend constexpr auto
355
- operator-(const transform_output_iterator& __lhs, const transform_output_iterator& __rhs) noexcept(
356
- noexcept(::cuda::std::declval<const _Iter2&>() - ::cuda::std::declval<const _Iter2&>()))
357
- _CCCL_TRAILING_REQUIRES(difference_type)(::cuda::std::sized_sentinel_for<_Iter2, _Iter2>)
380
+ [[nodiscard]] _CCCL_API friend constexpr auto operator-(const transform_output_iterator& __lhs,
381
+ const transform_output_iterator& __rhs) //
382
+ noexcept(__noexcept_difference<_Iter2>) _CCCL_TRAILING_REQUIRES(difference_type)(__can_difference<_Iter2>)
358
383
  {
359
- return __lhs.__current_ - __rhs.__current_;
384
+ return __lhs.__iter() - __rhs.__iter();
360
385
  }
361
386
 
362
387
  //! @brief Compares two @c transform_output_iterator for equality by comparing the stored iterators
@@ -367,7 +392,7 @@ public:
367
392
  noexcept(::cuda::std::declval<const _Iter2&>() == ::cuda::std::declval<const _Iter2&>()))
368
393
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::equality_comparable<_Iter2>)
369
394
  {
370
- return __lhs.__current_ == __rhs.__current_;
395
+ return __lhs.__iter() == __rhs.__iter();
371
396
  }
372
397
 
373
398
  #if _CCCL_STD_VER <= 2017
@@ -379,7 +404,7 @@ public:
379
404
  noexcept(::cuda::std::declval<const _Iter2&>() != ::cuda::std::declval<const _Iter2&>()))
380
405
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::equality_comparable<_Iter2>)
381
406
  {
382
- return __lhs.__current_ != __rhs.__current_;
407
+ return __lhs.__iter() != __rhs.__iter();
383
408
  }
384
409
  #endif // _CCCL_STD_VER <= 2017
385
410
 
@@ -393,7 +418,7 @@ public:
393
418
  _CCCL_TRAILING_REQUIRES(bool)(
394
419
  ::cuda::std::__has_random_access_traversal<_Iter2>&& ::cuda::std::three_way_comparable<_Iter2>)
395
420
  {
396
- return __lhs.__current_ <=> __rhs.__current_;
421
+ return __lhs.__iter() <=> __rhs.__iter();
397
422
  }
398
423
  #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
399
424
  //! @brief Compares two @c transform_output_iterator for less than by comparing the stored iterators
@@ -404,7 +429,7 @@ public:
404
429
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
405
430
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
406
431
  {
407
- return __lhs.__current_ < __rhs.__current_;
432
+ return __lhs.__iter() < __rhs.__iter();
408
433
  }
409
434
 
410
435
  //! @brief Compares two @c transform_output_iterator for greater than by comparing the stored iterators
@@ -415,7 +440,7 @@ public:
415
440
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
416
441
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
417
442
  {
418
- return __lhs.__current_ > __rhs.__current_;
443
+ return __lhs.__iter() > __rhs.__iter();
419
444
  }
420
445
 
421
446
  //! @brief Compares two @c transform_output_iterator for less equal by comparing the stored iterators
@@ -426,7 +451,7 @@ public:
426
451
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
427
452
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
428
453
  {
429
- return __lhs.__current_ <= __rhs.__current_;
454
+ return __lhs.__iter() <= __rhs.__iter();
430
455
  }
431
456
 
432
457
  //! @brief Compares two @c transform_output_iterator for greater equal by comparing the stored iterators
@@ -437,7 +462,7 @@ public:
437
462
  noexcept(::cuda::std::declval<const _Iter2&>() < ::cuda::std::declval<const _Iter2&>()))
438
463
  _CCCL_TRAILING_REQUIRES(bool)(::cuda::std::__has_random_access_traversal<_Iter2>)
439
464
  {
440
- return __lhs.__current_ >= __rhs.__current_;
465
+ return __lhs.__iter() >= __rhs.__iter();
441
466
  }
442
467
  #endif // !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
443
468
  };
@@ -0,0 +1,148 @@
1
+ // -*- C++ -*-
2
+ //===----------------------------------------------------------------------===//
3
+ //
4
+ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5
+ // See https://llvm.org/LICENSE.txt for license information.
6
+ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7
+ // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
8
+ //
9
+ //===----------------------------------------------------------------------===//
10
+ #ifndef _CUDA___ITERATOR_ZIP_COMMON_H
11
+ #define _CUDA___ITERATOR_ZIP_COMMON_H
12
+
13
+ #include <cuda/std/detail/__config>
14
+
15
+ #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
16
+ # pragma GCC system_header
17
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
18
+ # pragma clang system_header
19
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
20
+ # pragma system_header
21
+ #endif // no system header
22
+
23
+ #include <cuda/__fwd/zip_iterator.h>
24
+ #include <cuda/std/__fwd/pair.h>
25
+ #include <cuda/std/__fwd/tuple.h>
26
+ #include <cuda/std/__iterator/concepts.h>
27
+ #include <cuda/std/__iterator/iter_move.h>
28
+ #include <cuda/std/__iterator/iter_swap.h>
29
+ #include <cuda/std/__iterator/iterator_traits.h>
30
+ #include <cuda/std/__type_traits/is_nothrow_default_constructible.h>
31
+ #include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
32
+ #include <cuda/std/__utility/declval.h>
33
+
34
+ #include <cuda/std/__cccl/prologue.h>
35
+
36
+ _CCCL_BEGIN_NAMESPACE_CUDA
37
+
38
+ template <class... _Iterators>
39
+ struct __zip_iter_constraints
40
+ {
41
+ static constexpr bool __all_forward = (::cuda::std::__has_forward_traversal<_Iterators> && ...);
42
+ static constexpr bool __all_bidirectional = (::cuda::std::__has_bidirectional_traversal<_Iterators> && ...);
43
+ static constexpr bool __all_random_access = (::cuda::std::__has_random_access_traversal<_Iterators> && ...);
44
+
45
+ static constexpr bool __all_equality_comparable = (::cuda::std::equality_comparable<_Iterators> && ...);
46
+
47
+ #if _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
48
+ static constexpr bool __all_three_way_comparable = (::cuda::std::three_way_comparable<_Iterators> && ...);
49
+ #endif // _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
50
+
51
+ // Our C++17 iterators sometimes do not satisfy `sized_sentinel_for` but they should all be random_access
52
+ static constexpr bool __all_sized_sentinel =
53
+ (::cuda::std::sized_sentinel_for<_Iterators, _Iterators> && ...) || __all_random_access;
54
+
55
+ static constexpr bool __all_nothrow_iter_movable =
56
+ (noexcept(::cuda::std::ranges::iter_move(::cuda::std::declval<const _Iterators&>())) && ...)
57
+ && (::cuda::std::is_nothrow_move_constructible_v<::cuda::std::iter_rvalue_reference_t<_Iterators>> && ...);
58
+
59
+ static constexpr bool __all_indirectly_swappable = (::cuda::std::indirectly_swappable<_Iterators> && ...);
60
+
61
+ static constexpr bool __all_noexcept_swappable = (::cuda::std::__noexcept_swappable<_Iterators> && ...);
62
+
63
+ static constexpr bool __all_nothrow_move_constructible =
64
+ (::cuda::std::is_nothrow_move_constructible_v<_Iterators> && ...);
65
+
66
+ static constexpr bool __all_default_initializable = (::cuda::std::default_initializable<_Iterators> && ...);
67
+
68
+ static constexpr bool __all_nothrow_default_constructible =
69
+ (::cuda::std::is_nothrow_default_constructible_v<_Iterators> && ...);
70
+ };
71
+
72
+ template <class... _Iterators>
73
+ [[nodiscard]] _CCCL_API _CCCL_CONSTEVAL auto __get_zip_iterator_concept()
74
+ {
75
+ using _Constraints = __zip_iter_constraints<_Iterators...>;
76
+ if constexpr (_Constraints::__all_random_access)
77
+ {
78
+ return ::cuda::std::random_access_iterator_tag();
79
+ }
80
+ else if constexpr (_Constraints::__all_bidirectional)
81
+ {
82
+ return ::cuda::std::bidirectional_iterator_tag();
83
+ }
84
+ else if constexpr (_Constraints::__all_forward)
85
+ {
86
+ return ::cuda::std::forward_iterator_tag();
87
+ }
88
+ else
89
+ {
90
+ return ::cuda::std::input_iterator_tag();
91
+ }
92
+ _CCCL_UNREACHABLE();
93
+ }
94
+
95
+ //! @note Not static functions because nvc++ sometimes has issues with class static functions in device code
96
+ struct __zip_op_star
97
+ {
98
+ template <class... _Iterators>
99
+ using reference = ::cuda::std::tuple<::cuda::std::iter_reference_t<_Iterators>...>;
100
+
101
+ _CCCL_EXEC_CHECK_DISABLE
102
+ template <class... _Iterators>
103
+ [[nodiscard]] _CCCL_API constexpr reference<_Iterators...> operator()(const _Iterators&... __iters) const
104
+ noexcept(noexcept(reference<_Iterators...>{*__iters...}))
105
+ {
106
+ return reference<_Iterators...>{*__iters...};
107
+ }
108
+ };
109
+
110
+ struct __zip_op_increment
111
+ {
112
+ _CCCL_EXEC_CHECK_DISABLE
113
+ template <class... _Iterators>
114
+ _CCCL_API constexpr void operator()(_Iterators&... __iters) const noexcept(noexcept(((void) ++__iters, ...)))
115
+ {
116
+ ((void) ++__iters, ...);
117
+ }
118
+ };
119
+
120
+ struct __zip_op_decrement
121
+ {
122
+ _CCCL_EXEC_CHECK_DISABLE
123
+ template <class... _Iterators>
124
+ _CCCL_API constexpr void operator()(_Iterators&... __iters) const noexcept(noexcept(((void) --__iters, ...)))
125
+ {
126
+ ((void) --__iters, ...);
127
+ }
128
+ };
129
+
130
+ struct __zip_iter_move
131
+ {
132
+ template <class... _Iterators>
133
+ using __iter_move_ret = ::cuda::std::tuple<::cuda::std::iter_rvalue_reference_t<_Iterators>...>;
134
+
135
+ _CCCL_EXEC_CHECK_DISABLE
136
+ template <class... _Iterators>
137
+ [[nodiscard]] _CCCL_API constexpr __iter_move_ret<_Iterators...> operator()(const _Iterators&... __iters) const
138
+ noexcept(noexcept(__iter_move_ret<_Iterators...>{::cuda::std::ranges::iter_move(__iters)...}))
139
+ {
140
+ return __iter_move_ret<_Iterators...>{::cuda::std::ranges::iter_move(__iters)...};
141
+ }
142
+ };
143
+
144
+ _CCCL_END_NAMESPACE_CUDA
145
+
146
+ #include <cuda/std/__cccl/epilogue.h>
147
+
148
+ #endif // _CUDA___ITERATOR_ZIP_COMMON_H