cuda-cccl 0.3.0__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.2__cp312-cp312-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  7. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  8. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  14. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  15. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  18. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  20. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  21. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  22. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  23. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  24. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  25. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  26. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
  27. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  28. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  29. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  30. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  31. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  32. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  33. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  34. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  35. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  36. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
  37. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  38. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
  39. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  40. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
  41. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
  42. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  43. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
  44. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  46. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  47. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  48. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  49. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
  50. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  51. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  52. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  53. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  54. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  55. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  56. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  57. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  58. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  59. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  60. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  61. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  62. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  63. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  64. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
  65. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  66. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  67. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  68. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  69. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  70. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  71. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  72. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  73. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  74. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  75. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  76. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  77. cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
  78. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  79. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  80. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  81. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  82. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  83. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  84. cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
  85. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  86. cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
  87. cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
  88. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  89. cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
  90. cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
  91. cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
  92. cuda/cccl/headers/include/cuda/__event/event.h +27 -26
  93. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  94. cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
  95. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  96. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  97. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  98. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  99. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  100. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  101. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  102. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  103. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  104. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
  105. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  106. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  107. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  108. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  109. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  110. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  111. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  112. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  113. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  114. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  115. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  116. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
  117. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  118. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
  119. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  120. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  121. cuda/cccl/headers/include/cuda/algorithm +1 -1
  122. cuda/cccl/headers/include/cuda/cmath +1 -0
  123. cuda/cccl/headers/include/cuda/devices +13 -0
  124. cuda/cccl/headers/include/cuda/iterator +1 -0
  125. cuda/cccl/headers/include/cuda/memory +1 -0
  126. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  127. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  128. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  129. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  130. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  131. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  132. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  133. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  134. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  135. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  136. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  137. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  138. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  139. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  140. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  141. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  142. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  143. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  144. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  145. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  146. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  147. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  148. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  149. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  150. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  151. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  152. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  153. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  154. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  155. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  156. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  157. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  158. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  159. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  160. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  161. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  162. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  163. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  164. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  165. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  166. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  167. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  168. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  169. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  170. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  171. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  172. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  173. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  174. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  175. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  176. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  177. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  178. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  179. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  180. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  181. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  182. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  183. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  184. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  185. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  186. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  187. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  188. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  189. cuda/cccl/headers/include/cuda/std/string_view +155 -13
  190. cuda/cccl/headers/include/cuda/std/version +1 -4
  191. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  192. cuda/cccl/headers/include/cuda/utility +1 -0
  193. cuda/cccl/headers/include/nv/target +7 -2
  194. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  195. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  196. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  197. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  198. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  199. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  200. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  201. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  202. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  203. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  204. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  205. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  206. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  207. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  208. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  209. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  210. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  211. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  212. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  213. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  214. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  215. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  216. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  217. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  218. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  219. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  220. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  221. cuda/cccl/parallel/experimental/__init__.py +21 -74
  222. cuda/compute/__init__.py +79 -0
  223. cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
  224. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
  225. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  226. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  227. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  228. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
  229. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
  230. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  231. cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
  232. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
  233. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  234. cuda/compute/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
  235. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  236. cuda/compute/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
  237. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  238. cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
  239. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
  240. cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
  241. cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
  242. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  243. cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
  244. cuda/coop/__init__.py +8 -0
  245. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  246. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  247. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  248. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  249. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  250. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  251. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  252. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  253. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  254. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  255. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  256. cuda/coop/warp/__init__.py +9 -0
  257. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  258. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  259. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  260. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  261. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
  262. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  263. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  264. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  265. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  266. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  267. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  268. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  269. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  270. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  271. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  272. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  273. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  274. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  275. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  276. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  277. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  278. cuda/cccl/parallel/experimental/.gitignore +0 -4
  279. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
  280. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
  281. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  282. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  283. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  284. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  285. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  286. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  287. /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
  288. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  289. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  290. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  291. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  292. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  293. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  294. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -28,11 +28,15 @@
28
28
  #include <cuda/std/__iterator/concepts.h>
29
29
  #include <cuda/std/__memory/pointer_traits.h>
30
30
  #include <cuda/std/__type_traits/always_false.h>
31
+ #include <cuda/std/__type_traits/is_constant_evaluated.h>
32
+ #include <cuda/std/__type_traits/is_constructible.h>
31
33
  #include <cuda/std/__type_traits/is_convertible.h>
32
34
  #include <cuda/std/__type_traits/is_default_constructible.h>
35
+ #include <cuda/std/__type_traits/is_nothrow_constructible.h>
33
36
  #include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
34
37
  #include <cuda/std/__type_traits/is_nothrow_default_constructible.h>
35
38
  #include <cuda/std/__utility/declval.h>
39
+ #include <cuda/std/__utility/move.h>
36
40
  #include <cuda/std/cassert>
37
41
  #include <cuda/std/cstddef>
38
42
 
@@ -102,29 +106,30 @@ class __host_accessor : public _Accessor
102
106
  static constexpr bool __is_offset_noexcept =
103
107
  noexcept(::cuda::std::declval<_Accessor>().offset(::cuda::std::declval<__data_handle_type>(), 0));
104
108
 
105
- [[nodiscard]] _CCCL_API static constexpr bool
109
+ #if !_CCCL_COMPILER(NVRTC)
110
+ [[nodiscard]] _CCCL_HOST_API static constexpr bool
106
111
  __is_host_accessible_pointer([[maybe_unused]] __data_handle_type __p) noexcept
107
112
  {
108
- #if _CCCL_HAS_CTK()
113
+ # if _CCCL_HAS_CTK()
109
114
  if constexpr (::cuda::std::contiguous_iterator<__data_handle_type>)
110
115
  {
111
- auto __p1 = ::cuda::std::to_address(__p);
112
- ::CUmemorytype __type{};
113
- const auto __status =
114
- ::cuda::__driver::__pointerGetAttributeNoThrow<::CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(__type, __p1);
115
- return (__status != ::cudaSuccess) || __type == ::CU_MEMORYTYPE_HOST;
116
+ if (!cuda::std::__cccl_default_is_constant_evaluated())
117
+ {
118
+ auto __p1 = ::cuda::std::to_address(__p);
119
+ ::CUmemorytype __type{};
120
+ const auto __status =
121
+ ::cuda::__driver::__pointerGetAttributeNoThrow<::CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(__type, __p1);
122
+ return (__status != ::cudaSuccess) || __type == ::CU_MEMORYTYPE_HOST;
123
+ }
124
+ return true;
116
125
  }
117
126
  else
118
- #endif // _CCCL_HAS_CTK()
127
+ # endif // _CCCL_HAS_CTK()
119
128
  {
120
129
  return true; // cannot be verified
121
130
  }
122
131
  }
123
-
124
- _CCCL_API static constexpr void __check_host_pointer([[maybe_unused]] __data_handle_type __p) noexcept
125
- {
126
- _CCCL_ASSERT(__is_host_accessible_pointer(__p), "cuda::__host_accessor data handle is not a HOST pointer");
127
- }
132
+ #endif // !_CCCL_COMPILER(NVRTC)
128
133
 
129
134
  public:
130
135
  using offset_policy = __host_accessor<typename _Accessor::offset_policy>;
@@ -134,10 +139,15 @@ public:
134
139
 
135
140
  _CCCL_TEMPLATE(class _Accessor2 = _Accessor)
136
141
  _CCCL_REQUIRES(::cuda::std::is_default_constructible_v<_Accessor2>)
137
- _CCCL_API inline __host_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor2>)
142
+ _CCCL_API constexpr __host_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor2>)
138
143
  : _Accessor{}
139
144
  {}
140
145
 
146
+ _CCCL_API constexpr __host_accessor(_Accessor&& __acc) noexcept(
147
+ ::cuda::std::is_nothrow_move_constructible_v<_Accessor>)
148
+ : _Accessor{::cuda::std::move(__acc)}
149
+ {}
150
+
141
151
  _CCCL_API constexpr __host_accessor(const _Accessor& __acc) noexcept(
142
152
  ::cuda::std::is_nothrow_copy_constructible_v<_Accessor>)
143
153
  : _Accessor{__acc}
@@ -147,44 +157,59 @@ public:
147
157
  __host_accessor(const __device_accessor<_OtherAccessor>&) = delete;
148
158
 
149
159
  _CCCL_TEMPLATE(typename _OtherAccessor)
150
- _CCCL_REQUIRES(
151
- ::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
152
- _CCCL_API constexpr __host_accessor(const __host_accessor<_OtherAccessor>& __acc) noexcept(noexcept(_Accessor{
153
- ::cuda::std::declval<_OtherAccessor>()}))
160
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
161
+ ::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
162
+ _CCCL_API constexpr __host_accessor(const __host_accessor<_OtherAccessor>& __acc) noexcept(
163
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
154
164
  : _Accessor{__acc}
155
165
  {}
156
166
 
157
167
  _CCCL_TEMPLATE(typename _OtherAccessor)
158
- _CCCL_REQUIRES(::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(
159
- !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
168
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
169
+ !::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
160
170
  _CCCL_API constexpr explicit __host_accessor(const __host_accessor<_OtherAccessor>& __acc) noexcept(
161
- noexcept(_Accessor{::cuda::std::declval<_OtherAccessor>()}))
171
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
162
172
  : _Accessor{__acc}
163
173
  {}
164
174
 
165
175
  _CCCL_TEMPLATE(typename _OtherAccessor)
166
- _CCCL_REQUIRES(
167
- ::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
168
- _CCCL_API constexpr __host_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(noexcept(_Accessor{
169
- ::cuda::std::declval<_OtherAccessor>()}))
170
- : _Accessor{__acc}
176
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, _OtherAccessor> _CCCL_AND(
177
+ ::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
178
+ _CCCL_API constexpr __host_accessor(__host_accessor<_OtherAccessor>&& __acc) noexcept(
179
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
180
+ : _Accessor{::cuda::std::move(__acc)}
171
181
  {}
172
182
 
173
183
  _CCCL_TEMPLATE(typename _OtherAccessor)
174
- _CCCL_REQUIRES(::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(
184
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
175
185
  !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
186
+ _CCCL_API constexpr explicit __host_accessor(__host_accessor<_OtherAccessor>&& __acc) noexcept(
187
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
188
+ : _Accessor{::cuda::std::move(__acc)}
189
+ {}
190
+
191
+ _CCCL_TEMPLATE(typename _OtherAccessor)
192
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
193
+ ::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
194
+ _CCCL_API constexpr __host_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(
195
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
196
+ : _Accessor{__acc}
197
+ {}
198
+
199
+ _CCCL_TEMPLATE(typename _OtherAccessor)
200
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
201
+ !::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
176
202
  _CCCL_API constexpr explicit __host_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(
177
- noexcept(_Accessor{::cuda::std::declval<_OtherAccessor>()}))
203
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
178
204
  : _Accessor{__acc}
179
205
  {}
180
206
 
181
207
  _CCCL_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept(__is_access_noexcept)
182
208
  {
183
- #if _CCCL_HOST_COMPILATION()
184
- __check_host_pointer(__p);
185
- #else // ^^^ _CCCL_HOST_COMPILATION() ^^^ // vvv !_CCCL_HOST_COMPILATION() vvv
186
- static_assert(false, "cuda::__host_accessor cannot be used in DEVICE code");
187
- #endif // !_CCCL_HOST_COMPILATION()
209
+ NV_IF_ELSE_TARGET(
210
+ NV_IS_DEVICE,
211
+ (_CCCL_VERIFY(false, "cuda::__host_accessor cannot be used in DEVICE code");),
212
+ (_CCCL_ASSERT(__is_host_accessible_pointer(__p), "cuda::__host_accessor data handle is not a HOST pointer");))
188
213
  return _Accessor::access(__p, __i);
189
214
  }
190
215
 
@@ -265,12 +290,17 @@ public:
265
290
  using reference = typename _Accessor::reference;
266
291
  using element_type = typename _Accessor::element_type;
267
292
 
268
- _CCCL_TEMPLATE(typename _NotUsed = void)
269
- _CCCL_REQUIRES(::cuda::std::is_default_constructible_v<_Accessor>)
270
- _CCCL_API inline __device_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor>)
293
+ _CCCL_TEMPLATE(class _Accessor2 = _Accessor)
294
+ _CCCL_REQUIRES(::cuda::std::is_default_constructible_v<_Accessor2>)
295
+ _CCCL_API constexpr __device_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor2>)
271
296
  : _Accessor{}
272
297
  {}
273
298
 
299
+ _CCCL_API constexpr __device_accessor(_Accessor&& __acc) noexcept(
300
+ ::cuda::std::is_nothrow_move_constructible_v<_Accessor>)
301
+ : _Accessor{::cuda::std::move(__acc)}
302
+ {}
303
+
274
304
  _CCCL_API constexpr __device_accessor(const _Accessor& __acc) noexcept(
275
305
  ::cuda::std::is_nothrow_copy_constructible_v<_Accessor>)
276
306
  : _Accessor{__acc}
@@ -280,34 +310,50 @@ public:
280
310
  __device_accessor(const __host_accessor<_OtherAccessor>&) = delete;
281
311
 
282
312
  _CCCL_TEMPLATE(typename _OtherAccessor)
283
- _CCCL_REQUIRES(
284
- ::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
313
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
314
+ ::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
285
315
  _CCCL_API constexpr __device_accessor(const __device_accessor<_OtherAccessor>& __acc) noexcept(
286
- ::cuda::std::is_nothrow_copy_constructible_v<_Accessor>)
316
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
287
317
  : _Accessor{__acc}
288
318
  {}
289
319
 
290
320
  _CCCL_TEMPLATE(typename _OtherAccessor)
291
- _CCCL_REQUIRES(::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(
292
- !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
321
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
322
+ !::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
293
323
  _CCCL_API constexpr explicit __device_accessor(const __device_accessor<_OtherAccessor>& __acc) noexcept(
294
- ::cuda::std::is_nothrow_copy_constructible_v<_Accessor>)
324
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
295
325
  : _Accessor{__acc}
296
326
  {}
297
327
 
298
328
  _CCCL_TEMPLATE(typename _OtherAccessor)
299
- _CCCL_REQUIRES(
300
- ::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
301
- _CCCL_API constexpr __device_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(noexcept(_Accessor{
302
- ::cuda::std::declval<_OtherAccessor>()}))
303
- : _Accessor{__acc}
329
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, _OtherAccessor> _CCCL_AND(
330
+ ::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
331
+ _CCCL_API constexpr __device_accessor(__device_accessor<_OtherAccessor>&& __acc) noexcept(
332
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
333
+ : _Accessor{::cuda::std::move(__acc)}
304
334
  {}
305
335
 
306
336
  _CCCL_TEMPLATE(typename _OtherAccessor)
307
- _CCCL_REQUIRES(::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(
337
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, _OtherAccessor> _CCCL_AND(
308
338
  !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
339
+ _CCCL_API constexpr explicit __device_accessor(__device_accessor<_OtherAccessor>&& __acc) noexcept(
340
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
341
+ : _Accessor{::cuda::std::move(__acc)}
342
+ {}
343
+
344
+ _CCCL_TEMPLATE(typename _OtherAccessor)
345
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
346
+ ::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
347
+ _CCCL_API constexpr __device_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(
348
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
349
+ : _Accessor{__acc}
350
+ {}
351
+
352
+ _CCCL_TEMPLATE(typename _OtherAccessor)
353
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
354
+ !::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
309
355
  _CCCL_API constexpr explicit __device_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(
310
- noexcept(_Accessor{::cuda::std::declval<_OtherAccessor>()}))
356
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
311
357
  : _Accessor{__acc}
312
358
  {}
313
359
 
@@ -316,7 +362,7 @@ public:
316
362
  NV_IF_ELSE_TARGET(
317
363
  NV_IS_DEVICE,
318
364
  (_CCCL_ASSERT(__is_device_accessible_pointer_from_device(__p), "The pointer is not device accessible");),
319
- (_CCCL_ASSERT(false, "cuda::device_accessor cannot be used in HOST code");))
365
+ (_CCCL_VERIFY(false, "cuda::device_accessor cannot be used in HOST code");))
320
366
  return _Accessor::access(__p, __i);
321
367
  }
322
368
 
@@ -379,12 +425,17 @@ public:
379
425
  using reference = typename _Accessor::reference;
380
426
  using element_type = typename _Accessor::element_type;
381
427
 
382
- _CCCL_TEMPLATE(typename _NotUsed = void)
383
- _CCCL_REQUIRES(::cuda::std::is_default_constructible_v<_Accessor>)
384
- _CCCL_API inline __managed_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor>)
428
+ _CCCL_TEMPLATE(class _Accessor2 = _Accessor)
429
+ _CCCL_REQUIRES(::cuda::std::is_default_constructible_v<_Accessor2>)
430
+ _CCCL_API constexpr __managed_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor2>)
385
431
  : _Accessor{}
386
432
  {}
387
433
 
434
+ _CCCL_API constexpr __managed_accessor(_Accessor&& __acc) noexcept(
435
+ ::cuda::std::is_nothrow_move_constructible_v<_Accessor>)
436
+ : _Accessor{::cuda::std::move(__acc)}
437
+ {}
438
+
388
439
  _CCCL_API constexpr __managed_accessor(const _Accessor& __acc) noexcept(
389
440
  ::cuda::std::is_nothrow_copy_constructible_v<_Accessor>)
390
441
  : _Accessor{__acc}
@@ -397,21 +448,37 @@ public:
397
448
  __managed_accessor(const __device_accessor<_OtherAccessor>&) = delete;
398
449
 
399
450
  _CCCL_TEMPLATE(typename _OtherAccessor)
400
- _CCCL_REQUIRES(
401
- ::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
402
- _CCCL_API constexpr __managed_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(noexcept(_Accessor{
403
- ::cuda::std::declval<_OtherAccessor>()}))
451
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
452
+ ::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
453
+ _CCCL_API constexpr __managed_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(
454
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
404
455
  : _Accessor{__acc}
405
456
  {}
406
457
 
407
458
  _CCCL_TEMPLATE(typename _OtherAccessor)
408
- _CCCL_REQUIRES(::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(
409
- !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
459
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
460
+ !::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
410
461
  _CCCL_API constexpr explicit __managed_accessor(const __managed_accessor<_OtherAccessor>& __acc) noexcept(
411
- noexcept(_Accessor{::cuda::std::declval<_OtherAccessor>()}))
462
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
412
463
  : _Accessor{__acc}
413
464
  {}
414
465
 
466
+ _CCCL_TEMPLATE(typename _OtherAccessor)
467
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, _OtherAccessor> _CCCL_AND(
468
+ ::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
469
+ _CCCL_API constexpr __managed_accessor(__managed_accessor<_OtherAccessor>&& __acc) noexcept(
470
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
471
+ : _Accessor{::cuda::std::move(__acc)}
472
+ {}
473
+
474
+ _CCCL_TEMPLATE(typename _OtherAccessor)
475
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, _OtherAccessor> _CCCL_AND(
476
+ !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
477
+ _CCCL_API constexpr explicit __managed_accessor(__managed_accessor<_OtherAccessor>&& __acc) noexcept(
478
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
479
+ : _Accessor{::cuda::std::move(__acc)}
480
+ {}
481
+
415
482
  _CCCL_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept(__is_access_noexcept)
416
483
  {
417
484
  NV_IF_TARGET(NV_IS_HOST, (__check_managed_pointer(__p);))
@@ -22,6 +22,16 @@
22
22
  #endif // no system header
23
23
 
24
24
  #include <cuda/__mdspan/host_device_accessor.h>
25
+ #include <cuda/std/__concepts/concept_macros.h>
26
+ #include <cuda/std/__fwd/array.h>
27
+ #include <cuda/std/__fwd/span.h>
28
+ #include <cuda/std/__type_traits/extent.h>
29
+ #include <cuda/std/__type_traits/is_convertible.h>
30
+ #include <cuda/std/__type_traits/is_pointer.h>
31
+ #include <cuda/std/__type_traits/rank.h>
32
+ #include <cuda/std/__type_traits/remove_all_extents.h>
33
+ #include <cuda/std/__type_traits/remove_pointer.h>
34
+ #include <cuda/std/__type_traits/remove_reference.h>
25
35
  #include <cuda/std/mdspan>
26
36
 
27
37
  #include <cuda/std/__cccl/prologue.h>
@@ -32,19 +42,184 @@ template <typename _ElementType,
32
42
  typename _Extents,
33
43
  typename _LayoutPolicy = ::cuda::std::layout_right,
34
44
  typename _AccessorPolicy = ::cuda::std::default_accessor<_ElementType>>
35
- using host_mdspan = ::cuda::std::mdspan<_ElementType, _Extents, _LayoutPolicy, host_accessor<_AccessorPolicy>>;
45
+ class host_mdspan : public ::cuda::std::mdspan<_ElementType, _Extents, _LayoutPolicy, host_accessor<_AccessorPolicy>>
46
+ {
47
+ public:
48
+ _LIBCUDACXX_DELEGATE_CONSTRUCTORS(
49
+ host_mdspan, ::cuda::std::mdspan, _ElementType, _Extents, _LayoutPolicy, host_accessor<_AccessorPolicy>);
50
+
51
+ _CCCL_API friend constexpr void swap(host_mdspan& __x, host_mdspan& __y) noexcept
52
+ {
53
+ swap(static_cast<__base&>(__x), static_cast<__base&>(__y));
54
+ }
55
+ };
56
+
57
+ _CCCL_TEMPLATE(class _ElementType, class... _OtherIndexTypes)
58
+ _CCCL_REQUIRES((sizeof...(_OtherIndexTypes) > 0)
59
+ _CCCL_AND(::cuda::std::is_convertible_v<_OtherIndexTypes, size_t>&&...))
60
+ _CCCL_HOST_DEVICE explicit host_mdspan(_ElementType*, _OtherIndexTypes...)
61
+ -> host_mdspan<_ElementType, ::cuda::std::extents<size_t, ::cuda::std::__maybe_static_ext<_OtherIndexTypes>...>>;
62
+
63
+ _CCCL_TEMPLATE(class _Pointer)
64
+ _CCCL_REQUIRES(::cuda::std::is_pointer_v<::cuda::std::remove_reference_t<_Pointer>>)
65
+ _CCCL_HOST_DEVICE host_mdspan(_Pointer&&)
66
+ -> host_mdspan<::cuda::std::remove_pointer_t<::cuda::std::remove_reference_t<_Pointer>>, ::cuda::std::extents<size_t>>;
67
+
68
+ _CCCL_TEMPLATE(class _CArray)
69
+ _CCCL_REQUIRES(::cuda::std::is_array_v<_CArray> _CCCL_AND(::cuda::std::rank_v<_CArray> == 1))
70
+ _CCCL_HOST_DEVICE host_mdspan(_CArray&)
71
+ -> host_mdspan<::cuda::std::remove_all_extents_t<_CArray>,
72
+ ::cuda::std::extents<size_t, ::cuda::std::extent_v<_CArray, 0>>>;
73
+
74
+ template <class _ElementType, class _OtherIndexType, size_t _Size>
75
+ _CCCL_HOST_DEVICE host_mdspan(_ElementType*, const ::cuda::std::array<_OtherIndexType, _Size>&)
76
+ -> host_mdspan<_ElementType, ::cuda::std::dextents<size_t, _Size>>;
77
+
78
+ template <class _ElementType, class _OtherIndexType, size_t _Size>
79
+ _CCCL_HOST_DEVICE host_mdspan(_ElementType*, ::cuda::std::span<_OtherIndexType, _Size>)
80
+ -> host_mdspan<_ElementType, ::cuda::std::dextents<size_t, _Size>>;
81
+
82
+ // This one is necessary because all the constructors take `data_handle_type`s, not
83
+ // `_ElementType*`s, and `data_handle_type` is taken from `accessor_type::data_handle_type`, which
84
+ // seems to throw off automatic deduction guides.
85
+ template <class _ElementType, class _OtherIndexType, size_t... _ExtentsPack>
86
+ _CCCL_HOST_DEVICE host_mdspan(_ElementType*, const ::cuda::std::extents<_OtherIndexType, _ExtentsPack...>&)
87
+ -> host_mdspan<_ElementType, ::cuda::std::extents<_OtherIndexType, _ExtentsPack...>>;
88
+
89
+ template <class _ElementType, class _MappingType>
90
+ _CCCL_HOST_DEVICE host_mdspan(_ElementType*, const _MappingType&)
91
+ -> host_mdspan<_ElementType, typename _MappingType::extents_type, typename _MappingType::layout_type>;
92
+
93
+ template <class _MappingType, class _AccessorType>
94
+ _CCCL_HOST_DEVICE host_mdspan(const typename _AccessorType::data_handle_type, const _MappingType&, const _AccessorType&)
95
+ -> host_mdspan<typename _AccessorType::element_type,
96
+ typename _MappingType::extents_type,
97
+ typename _MappingType::layout_type,
98
+ _AccessorType>;
36
99
 
37
100
  template <typename _ElementType,
38
101
  typename _Extents,
39
102
  typename _LayoutPolicy = ::cuda::std::layout_right,
40
103
  typename _AccessorPolicy = ::cuda::std::default_accessor<_ElementType>>
41
- using device_mdspan = ::cuda::std::mdspan<_ElementType, _Extents, _LayoutPolicy, device_accessor<_AccessorPolicy>>;
104
+ class device_mdspan
105
+ : public ::cuda::std::mdspan<_ElementType, _Extents, _LayoutPolicy, device_accessor<_AccessorPolicy>>
106
+ {
107
+ public:
108
+ _LIBCUDACXX_DELEGATE_CONSTRUCTORS(
109
+ device_mdspan, ::cuda::std::mdspan, _ElementType, _Extents, _LayoutPolicy, device_accessor<_AccessorPolicy>);
110
+
111
+ _CCCL_API friend constexpr void swap(device_mdspan& __x, device_mdspan& __y) noexcept
112
+ {
113
+ swap(static_cast<__base&>(__x), static_cast<__base&>(__y));
114
+ }
115
+ };
116
+
117
+ _CCCL_TEMPLATE(class _ElementType, class... _OtherIndexTypes)
118
+ _CCCL_REQUIRES((sizeof...(_OtherIndexTypes) > 0)
119
+ _CCCL_AND(::cuda::std::is_convertible_v<_OtherIndexTypes, size_t>&&... && true))
120
+ _CCCL_HOST_DEVICE explicit device_mdspan(_ElementType*, _OtherIndexTypes...)
121
+ -> device_mdspan<_ElementType, ::cuda::std::extents<size_t, ::cuda::std::__maybe_static_ext<_OtherIndexTypes>...>>;
122
+
123
+ _CCCL_TEMPLATE(class _Pointer)
124
+ _CCCL_REQUIRES(::cuda::std::is_pointer_v<::cuda::std::remove_reference_t<_Pointer>>)
125
+ _CCCL_HOST_DEVICE device_mdspan(_Pointer&&)
126
+ -> device_mdspan<::cuda::std::remove_pointer_t<::cuda::std::remove_reference_t<_Pointer>>,
127
+ ::cuda::std::extents<size_t>>;
128
+
129
+ _CCCL_TEMPLATE(class _CArray)
130
+ _CCCL_REQUIRES(::cuda::std::is_array_v<_CArray> _CCCL_AND(::cuda::std::rank_v<_CArray> == 1))
131
+ _CCCL_HOST_DEVICE device_mdspan(_CArray&)
132
+ -> device_mdspan<::cuda::std::remove_all_extents_t<_CArray>,
133
+ ::cuda::std::extents<size_t, ::cuda::std::extent_v<_CArray, 0>>>;
134
+
135
+ template <class _ElementType, class _OtherIndexType, size_t _Size>
136
+ _CCCL_HOST_DEVICE device_mdspan(_ElementType*, const ::cuda::std::array<_OtherIndexType, _Size>&)
137
+ -> device_mdspan<_ElementType, ::cuda::std::dextents<size_t, _Size>>;
138
+
139
+ template <class _ElementType, class _OtherIndexType, size_t _Size>
140
+ _CCCL_HOST_DEVICE device_mdspan(_ElementType*, ::cuda::std::span<_OtherIndexType, _Size>)
141
+ -> device_mdspan<_ElementType, ::cuda::std::dextents<size_t, _Size>>;
142
+
143
+ // This one is necessary because all the constructors take `data_handle_type`s, not
144
+ // `_ElementType*`s, and `data_handle_type` is taken from `accessor_type::data_handle_type`, which
145
+ // seems to throw off automatic deduction guides.
146
+ template <class _ElementType, class _OtherIndexType, size_t... _ExtentsPack>
147
+ _CCCL_HOST_DEVICE device_mdspan(_ElementType*, const ::cuda::std::extents<_OtherIndexType, _ExtentsPack...>&)
148
+ -> device_mdspan<_ElementType, ::cuda::std::extents<_OtherIndexType, _ExtentsPack...>>;
149
+
150
+ template <class _ElementType, class _MappingType>
151
+ _CCCL_HOST_DEVICE device_mdspan(_ElementType*, const _MappingType&)
152
+ -> device_mdspan<_ElementType, typename _MappingType::extents_type, typename _MappingType::layout_type>;
153
+
154
+ template <class _MappingType, class _AccessorType>
155
+ _CCCL_HOST_DEVICE
156
+ device_mdspan(const typename _AccessorType::data_handle_type, const _MappingType&, const _AccessorType&)
157
+ -> device_mdspan<typename _AccessorType::element_type,
158
+ typename _MappingType::extents_type,
159
+ typename _MappingType::layout_type,
160
+ _AccessorType>;
42
161
 
43
162
  template <typename _ElementType,
44
163
  typename _Extents,
45
164
  typename _LayoutPolicy = ::cuda::std::layout_right,
46
165
  typename _AccessorPolicy = ::cuda::std::default_accessor<_ElementType>>
47
- using managed_mdspan = ::cuda::std::mdspan<_ElementType, _Extents, _LayoutPolicy, managed_accessor<_AccessorPolicy>>;
166
+ class managed_mdspan
167
+ : public ::cuda::std::mdspan<_ElementType, _Extents, _LayoutPolicy, managed_accessor<_AccessorPolicy>>
168
+ {
169
+ public:
170
+ _LIBCUDACXX_DELEGATE_CONSTRUCTORS(
171
+ managed_mdspan, ::cuda::std::mdspan, _ElementType, _Extents, _LayoutPolicy, managed_accessor<_AccessorPolicy>);
172
+
173
+ _CCCL_API friend constexpr void swap(managed_mdspan& __x, managed_mdspan& __y) noexcept
174
+ {
175
+ swap(static_cast<__base&>(__x), static_cast<__base&>(__y));
176
+ }
177
+ };
178
+
179
+ _CCCL_TEMPLATE(class _ElementType, class... _OtherIndexTypes)
180
+ _CCCL_REQUIRES((sizeof...(_OtherIndexTypes) > 0)
181
+ _CCCL_AND(::cuda::std::is_convertible_v<_OtherIndexTypes, size_t>&&... && true))
182
+ _CCCL_HOST_DEVICE explicit managed_mdspan(_ElementType*, _OtherIndexTypes...)
183
+ -> managed_mdspan<_ElementType, ::cuda::std::extents<size_t, ::cuda::std::__maybe_static_ext<_OtherIndexTypes>...>>;
184
+
185
+ _CCCL_TEMPLATE(class _Pointer)
186
+ _CCCL_REQUIRES(::cuda::std::is_pointer_v<::cuda::std::remove_reference_t<_Pointer>>)
187
+ _CCCL_HOST_DEVICE managed_mdspan(_Pointer&&)
188
+ -> managed_mdspan<::cuda::std::remove_pointer_t<::cuda::std::remove_reference_t<_Pointer>>,
189
+ ::cuda::std::extents<size_t>>;
190
+
191
+ _CCCL_TEMPLATE(class _CArray)
192
+ _CCCL_REQUIRES(::cuda::std::is_array_v<_CArray> _CCCL_AND(::cuda::std::rank_v<_CArray> == 1))
193
+ _CCCL_HOST_DEVICE managed_mdspan(_CArray&)
194
+ -> managed_mdspan<::cuda::std::remove_all_extents_t<_CArray>,
195
+ ::cuda::std::extents<size_t, ::cuda::std::extent_v<_CArray, 0>>>;
196
+
197
+ template <class _ElementType, class _OtherIndexType, size_t _Size>
198
+ _CCCL_HOST_DEVICE managed_mdspan(_ElementType*, const ::cuda::std::array<_OtherIndexType, _Size>&)
199
+ -> managed_mdspan<_ElementType, ::cuda::std::dextents<size_t, _Size>>;
200
+
201
+ template <class _ElementType, class _OtherIndexType, size_t _Size>
202
+ _CCCL_HOST_DEVICE managed_mdspan(_ElementType*, ::cuda::std::span<_OtherIndexType, _Size>)
203
+ -> managed_mdspan<_ElementType, ::cuda::std::dextents<size_t, _Size>>;
204
+
205
+ // This one is necessary because all the constructors take `data_handle_type`s, not
206
+ // `_ElementType*`s, and `data_handle_type` is taken from `accessor_type::data_handle_type`, which
207
+ // seems to throw off automatic deduction guides.
208
+ template <class _ElementType, class _OtherIndexType, size_t... _ExtentsPack>
209
+ _CCCL_HOST_DEVICE managed_mdspan(_ElementType*, const ::cuda::std::extents<_OtherIndexType, _ExtentsPack...>&)
210
+ -> managed_mdspan<_ElementType, ::cuda::std::extents<_OtherIndexType, _ExtentsPack...>>;
211
+
212
+ template <class _ElementType, class _MappingType>
213
+ _CCCL_HOST_DEVICE managed_mdspan(_ElementType*, const _MappingType&)
214
+ -> managed_mdspan<_ElementType, typename _MappingType::extents_type, typename _MappingType::layout_type>;
215
+
216
+ template <class _MappingType, class _AccessorType>
217
+ _CCCL_HOST_DEVICE
218
+ managed_mdspan(const typename _AccessorType::data_handle_type, const _MappingType&, const _AccessorType&)
219
+ -> managed_mdspan<typename _AccessorType::element_type,
220
+ typename _MappingType::extents_type,
221
+ typename _MappingType::layout_type,
222
+ _AccessorType>;
48
223
 
49
224
  /***********************************************************************************************************************
50
225
  * Accessibility Traits
@@ -22,13 +22,17 @@
22
22
  #endif // no system header
23
23
 
24
24
  #include <cuda/std/__concepts/concept_macros.h>
25
+ #include <cuda/std/__type_traits/is_constructible.h>
25
26
  #include <cuda/std/__type_traits/is_convertible.h>
26
27
  #include <cuda/std/__type_traits/is_default_constructible.h>
28
+ #include <cuda/std/__type_traits/is_nothrow_constructible.h>
27
29
  #include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
28
30
  #include <cuda/std/__type_traits/is_nothrow_default_constructible.h>
31
+ #include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
29
32
  #include <cuda/std/__type_traits/is_pointer.h>
30
33
  #include <cuda/std/__type_traits/remove_pointer.h>
31
34
  #include <cuda/std/__utility/declval.h>
35
+ #include <cuda/std/__utility/move.h>
32
36
  #include <cuda/std/cstddef>
33
37
 
34
38
  #include <cuda/std/__cccl/prologue.h>
@@ -55,6 +59,9 @@ inline constexpr bool is_restrict_accessor_v<__restrict_accessor<_Accessor>> = t
55
59
  * Restrict Accessor
56
60
  **********************************************************************************************************************/
57
61
 
62
+ _CCCL_DIAG_PUSH
63
+ _CCCL_DIAG_SUPPRESS_GCC("-Wignored-qualifiers")
64
+
58
65
  template <typename _Accessor>
59
66
  class __restrict_accessor : public _Accessor
60
67
  {
@@ -77,7 +84,7 @@ public:
77
84
 
78
85
  _CCCL_TEMPLATE(class _Accessor2 = _Accessor)
79
86
  _CCCL_REQUIRES(::cuda::std::is_default_constructible_v<_Accessor2>)
80
- _CCCL_API inline __restrict_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor2>)
87
+ _CCCL_API constexpr __restrict_accessor() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Accessor2>)
81
88
  : _Accessor{}
82
89
  {}
83
90
 
@@ -86,22 +93,43 @@ public:
86
93
  : _Accessor{__acc}
87
94
  {}
88
95
 
96
+ _CCCL_API constexpr __restrict_accessor(_Accessor&& __acc) noexcept(
97
+ ::cuda::std::is_nothrow_move_constructible_v<_Accessor>)
98
+ : _Accessor{::cuda::std::move(__acc)}
99
+ {}
100
+
89
101
  _CCCL_TEMPLATE(typename _OtherAccessor)
90
- _CCCL_REQUIRES(
91
- ::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
92
- _CCCL_API constexpr __restrict_accessor(const __restrict_accessor<_OtherAccessor>& __acc) noexcept(noexcept(_Accessor{
93
- ::cuda::std::declval<_OtherAccessor>()}))
102
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
103
+ ::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
104
+ _CCCL_API constexpr __restrict_accessor(const __restrict_accessor<_OtherAccessor>& __acc) noexcept(
105
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
94
106
  : _Accessor{__acc}
95
107
  {}
96
108
 
97
109
  _CCCL_TEMPLATE(typename _OtherAccessor)
98
- _CCCL_REQUIRES(::cuda::std::is_constructible_v<_OtherAccessor> _CCCL_AND(
99
- !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
110
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, const _OtherAccessor&> _CCCL_AND(
111
+ !::cuda::std::is_convertible_v<const _OtherAccessor&, _Accessor>))
100
112
  _CCCL_API constexpr explicit __restrict_accessor(const __restrict_accessor<_OtherAccessor>& __acc) noexcept(
101
- noexcept(_Accessor{::cuda::std::declval<_OtherAccessor>()}))
113
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, const _OtherAccessor&>)
102
114
  : _Accessor{__acc}
103
115
  {}
104
116
 
117
+ _CCCL_TEMPLATE(typename _OtherAccessor)
118
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, _OtherAccessor> _CCCL_AND(
119
+ ::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
120
+ _CCCL_API constexpr __restrict_accessor(__restrict_accessor<_OtherAccessor>&& __acc) noexcept(
121
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
122
+ : _Accessor{::cuda::std::move(__acc)}
123
+ {}
124
+
125
+ _CCCL_TEMPLATE(typename _OtherAccessor)
126
+ _CCCL_REQUIRES(::cuda::std::is_constructible_v<_Accessor, _OtherAccessor> _CCCL_AND(
127
+ !::cuda::std::is_convertible_v<_OtherAccessor, _Accessor>))
128
+ _CCCL_API constexpr explicit __restrict_accessor(__restrict_accessor<_OtherAccessor>&& __acc) noexcept(
129
+ ::cuda::std::is_nothrow_constructible_v<_Accessor, _OtherAccessor>)
130
+ : _Accessor{::cuda::std::move(__acc)}
131
+ {}
132
+
105
133
  _CCCL_API constexpr reference access(__element_type* _CCCL_RESTRICT __p, size_t __i) const
106
134
  noexcept(__is_access_noexcept)
107
135
  {
@@ -115,6 +143,8 @@ public:
115
143
  }
116
144
  };
117
145
 
146
+ _CCCL_DIAG_POP
147
+
118
148
  _CCCL_END_NAMESPACE_CUDA
119
149
 
120
150
  #include <cuda/std/__cccl/epilogue.h>