cuda-cccl 0.3.0__cp313-cp313-manylinux_2_24_aarch64.whl → 0.3.2__cp313-cp313-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  7. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  8. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  14. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  15. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  18. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  20. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  21. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  22. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  23. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  24. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  25. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  26. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
  27. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  28. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  29. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  30. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  31. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  32. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  33. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  34. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  35. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  36. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
  37. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  38. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
  39. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  40. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
  41. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
  42. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  43. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
  44. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  46. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  47. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  48. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  49. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
  50. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  51. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  52. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  53. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  54. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  55. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  56. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  57. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  58. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  59. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  60. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  61. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  62. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  63. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  64. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
  65. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  66. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  67. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  68. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  69. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  70. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  71. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  72. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  73. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  74. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  75. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  76. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  77. cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
  78. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  79. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  80. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  81. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  82. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  83. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  84. cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
  85. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  86. cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
  87. cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
  88. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  89. cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
  90. cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
  91. cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
  92. cuda/cccl/headers/include/cuda/__event/event.h +27 -26
  93. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  94. cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
  95. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  96. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  97. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  98. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  99. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  100. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  101. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  102. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  103. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  104. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
  105. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  106. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  107. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  108. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  109. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  110. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  111. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  112. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  113. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  114. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  115. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  116. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
  117. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  118. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
  119. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  120. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  121. cuda/cccl/headers/include/cuda/algorithm +1 -1
  122. cuda/cccl/headers/include/cuda/cmath +1 -0
  123. cuda/cccl/headers/include/cuda/devices +13 -0
  124. cuda/cccl/headers/include/cuda/iterator +1 -0
  125. cuda/cccl/headers/include/cuda/memory +1 -0
  126. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  127. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  128. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  129. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  130. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  131. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  132. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  133. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  134. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  135. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  136. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  137. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  138. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  139. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  140. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  141. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  142. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  143. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  144. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  145. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  146. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  147. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  148. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  149. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  150. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  151. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  152. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  153. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  154. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  155. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  156. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  157. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  158. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  159. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  160. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  161. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  162. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  163. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  164. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  165. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  166. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  167. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  168. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  169. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  170. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  171. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  172. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  173. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  174. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  175. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  176. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  177. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  178. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  179. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  180. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  181. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  182. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  183. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  184. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  185. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  186. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  187. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  188. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  189. cuda/cccl/headers/include/cuda/std/string_view +155 -13
  190. cuda/cccl/headers/include/cuda/std/version +1 -4
  191. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  192. cuda/cccl/headers/include/cuda/utility +1 -0
  193. cuda/cccl/headers/include/nv/target +7 -2
  194. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  195. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  196. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  197. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  198. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  199. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  200. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  201. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  202. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  203. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  204. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  205. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  206. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  207. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  208. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  209. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  210. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  211. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  212. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  213. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  214. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  215. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  216. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  217. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  218. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  219. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  220. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  221. cuda/cccl/parallel/experimental/__init__.py +21 -74
  222. cuda/compute/__init__.py +79 -0
  223. cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
  224. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
  225. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  226. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  227. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  228. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
  229. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
  230. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  231. cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
  232. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
  233. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  234. cuda/compute/cu12/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
  235. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  236. cuda/compute/cu13/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
  237. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  238. cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
  239. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
  240. cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
  241. cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
  242. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  243. cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
  244. cuda/coop/__init__.py +8 -0
  245. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  246. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  247. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  248. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  249. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  250. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  251. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  252. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  253. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  254. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  255. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  256. cuda/coop/warp/__init__.py +9 -0
  257. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  258. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  259. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  260. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  261. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
  262. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  263. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  264. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  265. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  266. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  267. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  268. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  269. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  270. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  271. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  272. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  273. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  274. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  275. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  276. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  277. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  278. cuda/cccl/parallel/experimental/.gitignore +0 -4
  279. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
  280. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
  281. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  282. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  283. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  284. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  285. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  286. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  287. /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
  288. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  289. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  290. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  291. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  292. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  293. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  294. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -22,9 +22,6 @@
22
22
  #endif // no system header
23
23
 
24
24
  #include <cuda/std/__concepts/concept_macros.h>
25
- #include <cuda/std/__type_traits/add_lvalue_reference.h>
26
- #include <cuda/std/__type_traits/conditional.h>
27
- #include <cuda/std/__type_traits/copy_cvref.h>
28
25
  #include <cuda/std/__type_traits/decay.h>
29
26
  #include <cuda/std/__type_traits/enable_if.h>
30
27
  #include <cuda/std/__type_traits/integral_constant.h>
@@ -36,12 +33,9 @@
36
33
  #include <cuda/std/__type_traits/is_same.h>
37
34
  #include <cuda/std/__type_traits/is_void.h>
38
35
  #include <cuda/std/__type_traits/nat.h>
39
- #include <cuda/std/__type_traits/remove_cv.h>
40
36
  #include <cuda/std/__utility/declval.h>
41
37
  #include <cuda/std/__utility/forward.h>
42
38
 
43
- // TODO: Disentangle the type traits and ::cuda::std::invoke properly
44
-
45
39
  #include <cuda/std/__cccl/prologue.h>
46
40
 
47
41
  _CCCL_BEGIN_NAMESPACE_CUDA_STD
@@ -51,220 +45,6 @@ struct __any
51
45
  _CCCL_API inline __any(...);
52
46
  };
53
47
 
54
- template <class _MP, bool _IsMemberFunctionPtr, bool _IsMemberObjectPtr>
55
- struct __member_pointer_traits_imp
56
- {};
57
-
58
- template <class _Rp, class _Class, class... _Param>
59
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...), true, false>
60
- {
61
- using _ClassType = _Class;
62
- using _ReturnType = _Rp;
63
- using _FnType = _Rp (*)(_Param...);
64
- };
65
-
66
- template <class _Rp, class _Class, class... _Param>
67
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...), true, false>
68
- {
69
- using _ClassType = _Class;
70
- using _ReturnType = _Rp;
71
- using _FnType = _Rp (*)(_Param..., ...);
72
- };
73
-
74
- template <class _Rp, class _Class, class... _Param>
75
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) const, true, false>
76
- {
77
- using _ClassType = _Class const;
78
- using _ReturnType = _Rp;
79
- using _FnType = _Rp (*)(_Param...);
80
- };
81
-
82
- template <class _Rp, class _Class, class... _Param>
83
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) const, true, false>
84
- {
85
- using _ClassType = _Class const;
86
- using _ReturnType = _Rp;
87
- using _FnType = _Rp (*)(_Param..., ...);
88
- };
89
-
90
- template <class _Rp, class _Class, class... _Param>
91
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) volatile, true, false>
92
- {
93
- using _ClassType = _Class volatile;
94
- using _ReturnType = _Rp;
95
- using _FnType = _Rp (*)(_Param...);
96
- };
97
-
98
- template <class _Rp, class _Class, class... _Param>
99
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) volatile, true, false>
100
- {
101
- using _ClassType = _Class volatile;
102
- using _ReturnType = _Rp;
103
- using _FnType = _Rp (*)(_Param..., ...);
104
- };
105
-
106
- template <class _Rp, class _Class, class... _Param>
107
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) const volatile, true, false>
108
- {
109
- using _ClassType = _Class const volatile;
110
- using _ReturnType = _Rp;
111
- using _FnType = _Rp (*)(_Param...);
112
- };
113
-
114
- template <class _Rp, class _Class, class... _Param>
115
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) const volatile, true, false>
116
- {
117
- using _ClassType = _Class const volatile;
118
- using _ReturnType = _Rp;
119
- using _FnType = _Rp (*)(_Param..., ...);
120
- };
121
-
122
- template <class _Rp, class _Class, class... _Param>
123
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) &, true, false>
124
- {
125
- using _ClassType = _Class&;
126
- using _ReturnType = _Rp;
127
- using _FnType = _Rp (*)(_Param...);
128
- };
129
-
130
- template <class _Rp, class _Class, class... _Param>
131
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) &, true, false>
132
- {
133
- using _ClassType = _Class&;
134
- using _ReturnType = _Rp;
135
- using _FnType = _Rp (*)(_Param..., ...);
136
- };
137
-
138
- template <class _Rp, class _Class, class... _Param>
139
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) const&, true, false>
140
- {
141
- using _ClassType = _Class const&;
142
- using _ReturnType = _Rp;
143
- using _FnType = _Rp (*)(_Param...);
144
- };
145
-
146
- template <class _Rp, class _Class, class... _Param>
147
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) const&, true, false>
148
- {
149
- using _ClassType = _Class const&;
150
- using _ReturnType = _Rp;
151
- using _FnType = _Rp (*)(_Param..., ...);
152
- };
153
-
154
- template <class _Rp, class _Class, class... _Param>
155
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) volatile&, true, false>
156
- {
157
- using _ClassType = _Class volatile&;
158
- using _ReturnType = _Rp;
159
- using _FnType = _Rp (*)(_Param...);
160
- };
161
-
162
- template <class _Rp, class _Class, class... _Param>
163
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) volatile&, true, false>
164
- {
165
- using _ClassType = _Class volatile&;
166
- using _ReturnType = _Rp;
167
- using _FnType = _Rp (*)(_Param..., ...);
168
- };
169
-
170
- template <class _Rp, class _Class, class... _Param>
171
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) const volatile&, true, false>
172
- {
173
- using _ClassType = _Class const volatile&;
174
- using _ReturnType = _Rp;
175
- using _FnType = _Rp (*)(_Param...);
176
- };
177
-
178
- template <class _Rp, class _Class, class... _Param>
179
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) const volatile&, true, false>
180
- {
181
- using _ClassType = _Class const volatile&;
182
- using _ReturnType = _Rp;
183
- using _FnType = _Rp (*)(_Param..., ...);
184
- };
185
-
186
- template <class _Rp, class _Class, class... _Param>
187
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) &&, true, false>
188
- {
189
- using _ClassType = _Class&&;
190
- using _ReturnType = _Rp;
191
- using _FnType = _Rp (*)(_Param...);
192
- };
193
-
194
- template <class _Rp, class _Class, class... _Param>
195
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) &&, true, false>
196
- {
197
- using _ClassType = _Class&&;
198
- using _ReturnType = _Rp;
199
- using _FnType = _Rp (*)(_Param..., ...);
200
- };
201
-
202
- template <class _Rp, class _Class, class... _Param>
203
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) const&&, true, false>
204
- {
205
- using _ClassType = _Class const&&;
206
- using _ReturnType = _Rp;
207
- using _FnType = _Rp (*)(_Param...);
208
- };
209
-
210
- template <class _Rp, class _Class, class... _Param>
211
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) const&&, true, false>
212
- {
213
- using _ClassType = _Class const&&;
214
- using _ReturnType = _Rp;
215
- using _FnType = _Rp (*)(_Param..., ...);
216
- };
217
-
218
- template <class _Rp, class _Class, class... _Param>
219
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) volatile&&, true, false>
220
- {
221
- using _ClassType = _Class volatile&&;
222
- using _ReturnType = _Rp;
223
- using _FnType = _Rp (*)(_Param...);
224
- };
225
-
226
- template <class _Rp, class _Class, class... _Param>
227
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) volatile&&, true, false>
228
- {
229
- using _ClassType = _Class volatile&&;
230
- using _ReturnType = _Rp;
231
- using _FnType = _Rp (*)(_Param..., ...);
232
- };
233
-
234
- template <class _Rp, class _Class, class... _Param>
235
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param...) const volatile&&, true, false>
236
- {
237
- using _ClassType = _Class const volatile&&;
238
- using _ReturnType = _Rp;
239
- using _FnType = _Rp (*)(_Param...);
240
- };
241
-
242
- template <class _Rp, class _Class, class... _Param>
243
- struct __member_pointer_traits_imp<_Rp (_Class::*)(_Param..., ...) const volatile&&, true, false>
244
- {
245
- using _ClassType = _Class const volatile&&;
246
- using _ReturnType = _Rp;
247
- using _FnType = _Rp (*)(_Param..., ...);
248
- };
249
-
250
- template <class _Rp, class _Class>
251
- struct __member_pointer_traits_imp<_Rp _Class::*, false, true>
252
- {
253
- using _ClassType = _Class;
254
- using _ReturnType = _Rp;
255
- };
256
-
257
- template <class _MP>
258
- struct __member_pointer_traits
259
- : public __member_pointer_traits_imp<remove_cv_t<_MP>,
260
- is_member_function_pointer<_MP>::value,
261
- is_member_object_pointer<_MP>::value>
262
- {
263
- // typedef ... _ClassType;
264
- // typedef ... _ReturnType;
265
- // typedef ... _FnType;
266
- };
267
-
268
48
  template <class _DecayedFp>
269
49
  struct __member_pointer_class_type
270
50
  {};
@@ -275,47 +55,46 @@ struct __member_pointer_class_type<_Ret _ClassType::*>
275
55
  using type = _ClassType;
276
56
  };
277
57
 
58
+ template <class _DecayedFp>
59
+ using __member_pointer_class_type_t = typename __member_pointer_class_type<_DecayedFp>::type;
60
+
278
61
  template <class _Fp,
279
62
  class _A0,
280
63
  class _DecayFp = decay_t<_Fp>,
281
- class _DecayA0 = typename decay<_A0>::type,
282
- class _ClassT = typename __member_pointer_class_type<_DecayFp>::type>
283
- using __enable_if_bullet1 =
284
- enable_if_t<is_member_function_pointer<_DecayFp>::value && is_base_of<_ClassT, _DecayA0>::value>;
64
+ class _DecayA0 = decay_t<_A0>,
65
+ class _ClassT = __member_pointer_class_type_t<_DecayFp>>
66
+ using __enable_if_bullet1 = enable_if_t<is_member_function_pointer_v<_DecayFp> && is_base_of_v<_ClassT, _DecayA0>>;
285
67
 
286
- template <class _Fp, class _A0, class _DecayFp = decay_t<_Fp>, class _DecayA0 = typename decay<_A0>::type>
68
+ template <class _Fp, class _A0, class _DecayFp = decay_t<_Fp>, class _DecayA0 = decay_t<_A0>>
287
69
  using __enable_if_bullet2 =
288
- enable_if_t<is_member_function_pointer<_DecayFp>::value && __cccl_is_reference_wrapper_v<_DecayA0>>;
70
+ enable_if_t<is_member_function_pointer_v<_DecayFp> && __cccl_is_reference_wrapper_v<_DecayA0>>;
289
71
 
290
72
  template <class _Fp,
291
73
  class _A0,
292
74
  class _DecayFp = decay_t<_Fp>,
293
- class _DecayA0 = typename decay<_A0>::type,
294
- class _ClassT = typename __member_pointer_class_type<_DecayFp>::type>
295
- using __enable_if_bullet3 =
296
- enable_if_t<is_member_function_pointer<_DecayFp>::value && !is_base_of<_ClassT, _DecayA0>::value
297
- && !__cccl_is_reference_wrapper_v<_DecayA0>>;
75
+ class _DecayA0 = decay_t<_A0>,
76
+ class _ClassT = __member_pointer_class_type_t<_DecayFp>>
77
+ using __enable_if_bullet3 = enable_if_t<is_member_function_pointer_v<_DecayFp> && !is_base_of_v<_ClassT, _DecayA0>
78
+ && !__cccl_is_reference_wrapper_v<_DecayA0>>;
298
79
 
299
80
  template <class _Fp,
300
81
  class _A0,
301
82
  class _DecayFp = decay_t<_Fp>,
302
- class _DecayA0 = typename decay<_A0>::type,
303
- class _ClassT = typename __member_pointer_class_type<_DecayFp>::type>
304
- using __enable_if_bullet4 =
305
- enable_if_t<is_member_object_pointer<_DecayFp>::value && is_base_of<_ClassT, _DecayA0>::value>;
83
+ class _DecayA0 = decay_t<_A0>,
84
+ class _ClassT = __member_pointer_class_type_t<_DecayFp>>
85
+ using __enable_if_bullet4 = enable_if_t<is_member_object_pointer_v<_DecayFp> && is_base_of_v<_ClassT, _DecayA0>>;
306
86
 
307
- template <class _Fp, class _A0, class _DecayFp = decay_t<_Fp>, class _DecayA0 = typename decay<_A0>::type>
87
+ template <class _Fp, class _A0, class _DecayFp = decay_t<_Fp>, class _DecayA0 = decay_t<_A0>>
308
88
  using __enable_if_bullet5 =
309
- enable_if_t<is_member_object_pointer<_DecayFp>::value && __cccl_is_reference_wrapper_v<_DecayA0>>;
89
+ enable_if_t<is_member_object_pointer_v<_DecayFp> && __cccl_is_reference_wrapper_v<_DecayA0>>;
310
90
 
311
91
  template <class _Fp,
312
92
  class _A0,
313
93
  class _DecayFp = decay_t<_Fp>,
314
- class _DecayA0 = typename decay<_A0>::type,
315
- class _ClassT = typename __member_pointer_class_type<_DecayFp>::type>
316
- using __enable_if_bullet6 =
317
- enable_if_t<is_member_object_pointer<_DecayFp>::value && !is_base_of<_ClassT, _DecayA0>::value
318
- && !__cccl_is_reference_wrapper_v<_DecayA0>>;
94
+ class _DecayA0 = decay_t<_A0>,
95
+ class _ClassT = __member_pointer_class_type_t<_DecayFp>>
96
+ using __enable_if_bullet6 = enable_if_t<is_member_object_pointer_v<_DecayFp> && !is_base_of_v<_ClassT, _DecayA0>
97
+ && !__cccl_is_reference_wrapper_v<_DecayA0>>;
319
98
 
320
99
  // __invoke forward declarations
321
100
 
@@ -393,65 +172,23 @@ __invoke(_Fp&& __f, _Args&&... __args) noexcept(noexcept(static_cast<_Fp&&>(__f)
393
172
  return static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...);
394
173
  }
395
174
 
396
- // __invocable
397
- template <class _Ret, class _Fp, class... _Args>
398
- struct __invocable_r
399
- {
400
- template <class _XFp, class... _XArgs>
401
- _CCCL_API inline static decltype(::cuda::std::__invoke(
402
- ::cuda::std::declval<_XFp>(), ::cuda::std::declval<_XArgs>()...))
403
- __try_call(int);
404
-
405
- template <class _XFp, class... _XArgs>
406
- _CCCL_API inline static __nat __try_call(...);
407
-
408
- // FIXME: Check that _Ret, _Fp, and _Args... are all complete types, cv void,
409
- // or incomplete array types as required by the standard.
410
- using _Result = decltype(__try_call<_Fp, _Args...>(0));
411
-
412
- using type = conditional_t<_IsNotSame<_Result, __nat>::value,
413
- conditional_t<is_void<_Ret>::value, true_type, __is_core_convertible<_Result, _Ret>>,
414
- false_type>;
415
- static const bool value = type::value;
416
- };
175
+ // __is_invocable
417
176
  template <class _Fp, class... _Args>
418
- using __invocable = __invocable_r<void, _Fp, _Args...>;
419
-
420
- template <bool _IsInvocable, bool _IsCVVoid, class _Ret, class _Fp, class... _Args>
421
- struct __nothrow_invocable_r_imp
422
- {
423
- static const bool value = false;
424
- };
425
-
426
- template <class _Ret, class _Fp, class... _Args>
427
- struct __nothrow_invocable_r_imp<true, false, _Ret, _Fp, _Args...>
428
- {
429
- using _ThisT = __nothrow_invocable_r_imp;
430
-
431
- template <class _Tp>
432
- _CCCL_API inline static void __test_noexcept(_Tp) noexcept;
177
+ using __invoke_result_t =
178
+ decltype(::cuda::std::__invoke(::cuda::std::declval<_Fp>(), ::cuda::std::declval<_Args>()...));
433
179
 
434
- static const bool value =
435
- noexcept(_ThisT::__test_noexcept<_Ret>(::cuda::std::__invoke(declval<_Fp>(), ::cuda::std::declval<_Args>()...)));
436
- };
437
-
438
- template <class _Ret, class _Fp, class... _Args>
439
- struct __nothrow_invocable_r_imp<true, true, _Ret, _Fp, _Args...>
440
- {
441
- static const bool value =
442
- noexcept(::cuda::std::__invoke(::cuda::std::declval<_Fp>(), ::cuda::std::declval<_Args>()...));
443
- };
180
+ template <class _Fp, class... _Args>
181
+ _CCCL_CONCEPT __is_invocable =
182
+ _CCCL_REQUIRES_EXPR((_Fp, variadic _Args))(requires(!is_same_v<__nat, __invoke_result_t<_Fp, _Args...>>));
444
183
 
445
184
  template <class _Ret, class _Fp, class... _Args>
446
- using __nothrow_invocable_r =
447
- __nothrow_invocable_r_imp<__invocable_r<_Ret, _Fp, _Args...>::value, is_void<_Ret>::value, _Ret, _Fp, _Args...>;
448
-
449
- template <class _Fp, class... _Args>
450
- using __nothrow_invocable = __nothrow_invocable_r_imp<__invocable<_Fp, _Args...>::value, true, void, _Fp, _Args...>;
185
+ _CCCL_CONCEPT __is_invocable_r = _CCCL_REQUIRES_EXPR((_Ret, _Fp, variadic _Args))(
186
+ requires(__is_invocable<_Fp, _Args...>),
187
+ requires((is_void_v<_Ret> || __is_core_convertible<__invoke_result_t<_Fp, _Args...>, _Ret>::value)));
451
188
 
452
189
  template <class _Fp, class... _Args>
453
- struct _CCCL_TYPE_VISIBILITY_DEFAULT __invoke_of //
454
- : public enable_if<__invocable<_Fp, _Args...>::value, typename __invocable_r<void, _Fp, _Args...>::_Result>
190
+ struct _CCCL_TYPE_VISIBILITY_DEFAULT invoke_result //
191
+ : public enable_if<__is_invocable<_Fp, _Args...>, __invoke_result_t<_Fp, _Args...>>
455
192
  {
456
193
  #if _CCCL_CUDA_COMPILER(NVCC) && defined(__CUDACC_EXTENDED_LAMBDA__) && !_CCCL_DEVICE_COMPILATION()
457
194
  # if _CCCL_CUDACC_BELOW(12, 3)
@@ -473,64 +210,57 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __invoke_of //
473
210
  #endif
474
211
  };
475
212
 
476
- template <class _Ret, bool = is_void<_Ret>::value>
477
- struct __invoke_void_return_wrapper
478
- {
479
- template <class... _Args>
480
- _CCCL_API static constexpr _Ret __call(_Args&&... __args)
481
- {
482
- return ::cuda::std::__invoke(::cuda::std::forward<_Args>(__args)...);
483
- }
484
- };
485
-
486
- template <class _Ret>
487
- struct __invoke_void_return_wrapper<_Ret, true>
488
- {
489
- template <class... _Args>
490
- _CCCL_API static constexpr void __call(_Args&&... __args)
491
- {
492
- ::cuda::std::__invoke(::cuda::std::forward<_Args>(__args)...);
493
- }
494
- };
495
-
496
213
  // is_invocable
497
214
 
498
215
  template <class _Fn, class... _Args>
499
- struct _CCCL_TYPE_VISIBILITY_DEFAULT is_invocable : integral_constant<bool, __invocable<_Fn, _Args...>::value>
216
+ struct _CCCL_TYPE_VISIBILITY_DEFAULT is_invocable : bool_constant<__is_invocable<_Fn, _Args...>>
500
217
  {};
501
218
 
502
219
  template <class _Ret, class _Fn, class... _Args>
503
- struct _CCCL_TYPE_VISIBILITY_DEFAULT is_invocable_r : integral_constant<bool, __invocable_r<_Ret, _Fn, _Args...>::value>
220
+ struct _CCCL_TYPE_VISIBILITY_DEFAULT is_invocable_r : bool_constant<__is_invocable_r<_Ret, _Fn, _Args...>>
504
221
  {};
505
222
 
506
223
  template <class _Fn, class... _Args>
507
- inline constexpr bool is_invocable_v = is_invocable<_Fn, _Args...>::value;
224
+ inline constexpr bool is_invocable_v = __is_invocable<_Fn, _Args...>;
508
225
 
509
226
  template <class _Ret, class _Fn, class... _Args>
510
- inline constexpr bool is_invocable_r_v = is_invocable_r<_Ret, _Fn, _Args...>::value;
227
+ inline constexpr bool is_invocable_r_v = __is_invocable_r<_Ret, _Fn, _Args...>;
511
228
 
512
229
  // is_nothrow_invocable
513
230
 
514
- template <class _Fn, class... _Args>
515
- struct _CCCL_TYPE_VISIBILITY_DEFAULT
516
- is_nothrow_invocable : integral_constant<bool, __nothrow_invocable<_Fn, _Args...>::value>
517
- {};
231
+ template <class _Tp>
232
+ _CCCL_API constexpr void __cccl_test_noexcept_conversion(_Tp) noexcept;
518
233
 
519
- template <class _Ret, class _Fn, class... _Args>
520
- struct _CCCL_TYPE_VISIBILITY_DEFAULT
521
- is_nothrow_invocable_r : integral_constant<bool, __nothrow_invocable_r<_Ret, _Fn, _Args...>::value>
522
- {};
234
+ template <bool _IsInvocable, bool _IsCVVoid, class _Ret, class _Fp, class... _Args>
235
+ inline constexpr bool __nothrow_invocable_r_imp = false;
523
236
 
524
- template <class _Fn, class... _Args>
525
- inline constexpr bool is_nothrow_invocable_v = is_nothrow_invocable<_Fn, _Args...>::value;
237
+ template <class _Ret, class _Fp, class... _Args>
238
+ inline constexpr bool __nothrow_invocable_r_imp<true, false, _Ret, _Fp, _Args...> =
239
+ noexcept(::cuda::std::__cccl_test_noexcept_conversion<_Ret>(
240
+ ::cuda::std::__invoke(declval<_Fp>(), ::cuda::std::declval<_Args>()...)));
526
241
 
527
- template <class _Ret, class _Fn, class... _Args>
528
- inline constexpr bool is_nothrow_invocable_r_v = is_nothrow_invocable_r<_Ret, _Fn, _Args...>::value;
242
+ template <class _Ret, class _Fp, class... _Args>
243
+ inline constexpr bool __nothrow_invocable_r_imp<true, true, _Ret, _Fp, _Args...> =
244
+ noexcept(::cuda::std::__invoke(::cuda::std::declval<_Fp>(), ::cuda::std::declval<_Args>()...));
245
+
246
+ template <class _Fp, class... _Args>
247
+ inline constexpr bool is_nothrow_invocable_v =
248
+ __nothrow_invocable_r_imp<__is_invocable<_Fp, _Args...>, true, void, _Fp, _Args...>;
249
+
250
+ template <class _Ret, class _Fp, class... _Args>
251
+ inline constexpr bool is_nothrow_invocable_r_v =
252
+ __nothrow_invocable_r_imp<__is_invocable_r<_Ret, _Fp, _Args...>, is_void_v<_Ret>, _Ret, _Fp, _Args...>;
529
253
 
530
254
  template <class _Fn, class... _Args>
531
- struct _CCCL_TYPE_VISIBILITY_DEFAULT invoke_result : __invoke_of<_Fn, _Args...>
255
+ struct _CCCL_TYPE_VISIBILITY_DEFAULT is_nothrow_invocable : bool_constant<is_nothrow_invocable_v<_Fn, _Args...>>
532
256
  {};
533
257
 
258
+ template <class _Ret, class _Fn, class... _Args>
259
+ struct _CCCL_TYPE_VISIBILITY_DEFAULT
260
+ is_nothrow_invocable_r : bool_constant<is_nothrow_invocable_r_v<_Ret, _Fn, _Args...>>
261
+ {};
262
+
263
+ // Not going directly through __invoke_result_t because we want the additional device lambda checks in invoke_result
534
264
  template <class _Fn, class... _Args>
535
265
  using invoke_result_t = typename invoke_result<_Fn, _Args...>::type;
536
266
 
@@ -545,13 +275,19 @@ _CCCL_TEMPLATE(class _Ret, class _Fn, class... _Args)
545
275
  _CCCL_REQUIRES(is_invocable_r_v<_Ret, _Fn, _Args...>)
546
276
  _CCCL_API constexpr _Ret invoke_r(_Fn&& __f, _Args&&... __args) noexcept(is_nothrow_invocable_r_v<_Ret, _Fn, _Args...>)
547
277
  {
548
- return __invoke_void_return_wrapper<_Ret>::__call(
549
- ::cuda::std::forward<_Fn>(__f), ::cuda::std::forward<_Args>(__args)...);
278
+ if constexpr (is_void_v<_Ret>)
279
+ {
280
+ ::cuda::std::__invoke(::cuda::std::forward<_Fn>(__f), ::cuda::std::forward<_Args>(__args)...);
281
+ }
282
+ else
283
+ {
284
+ return ::cuda::std::__invoke(::cuda::std::forward<_Fn>(__f), ::cuda::std::forward<_Args>(__args)...);
285
+ }
550
286
  }
551
287
 
552
288
  /// The type of intermediate accumulator (according to P2322R6)
553
289
  template <typename Invocable, typename InputT, typename InitT = InputT>
554
- using __accumulator_t = typename decay<typename ::cuda::std::__invoke_of<Invocable, InitT, InputT>::type>::type;
290
+ using __accumulator_t = decay_t<invoke_result_t<Invocable, InitT, InputT>>;
555
291
 
556
292
  _CCCL_END_NAMESPACE_CUDA_STD
557
293
 
@@ -47,8 +47,7 @@ public:
47
47
 
48
48
  // invoke
49
49
  template <class... _ArgTypes>
50
- _CCCL_API inline _CCCL_CONSTEXPR_CXX20 typename __invoke_return<type, _ArgTypes...>::type
51
- operator()(_ArgTypes&&... __args) const
50
+ _CCCL_API inline _CCCL_CONSTEXPR_CXX20 invoke_result_t<type, _ArgTypes...> operator()(_ArgTypes&&... __args) const
52
51
  {
53
52
  return ::cuda::std::__invoke(__f_, ::cuda::std::forward<_ArgTypes>(__args)...);
54
53
  }
@@ -67,10 +67,10 @@ public:
67
67
 
68
68
  // invoke
69
69
  template <class... _ArgTypes>
70
- _CCCL_API inline _CCCL_CONSTEXPR_CXX20 typename __invoke_of<type&, _ArgTypes...>::type
71
- operator()(_ArgTypes&&... __args) const noexcept(is_nothrow_invocable_v<_Tp&, _ArgTypes...>)
70
+ _CCCL_API inline _CCCL_CONSTEXPR_CXX20 invoke_result_t<type&, _ArgTypes...> operator()(_ArgTypes&&... __args) const
71
+ noexcept(is_nothrow_invocable_v<_Tp&, _ArgTypes...>)
72
72
  {
73
- return ::cuda::std::__invoke(get(), ::cuda::std::forward<_ArgTypes>(__args)...);
73
+ return ::cuda::std::invoke(get(), ::cuda::std::forward<_ArgTypes>(__args)...);
74
74
  }
75
75
  };
76
76
 
@@ -255,12 +255,6 @@ struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) const volatile>
255
255
  #endif
256
256
  };
257
257
 
258
- template <class _Tp, class... _Args>
259
- struct __invoke_return
260
- {
261
- using type = decltype(::cuda::std::__invoke(declval<_Tp>(), declval<_Args>()...));
262
- };
263
-
264
258
  _CCCL_END_NAMESPACE_CUDA_STD
265
259
 
266
260
  #include <cuda/std/__cccl/epilogue.h>
@@ -23,6 +23,19 @@
23
23
 
24
24
  #include <cuda/std/__cccl/prologue.h>
25
25
 
26
+ // std:: forward declarations
27
+
28
+ #if _CCCL_HAS_HOST_STD_LIB()
29
+ _CCCL_BEGIN_NAMESPACE_STD
30
+
31
+ template <class _Tp>
32
+ class allocator;
33
+
34
+ _CCCL_END_NAMESPACE_STD
35
+ #endif // _CCCL_HAS_HOST_STD_LIB()
36
+
37
+ // cuda::std:: forward declarations
38
+
26
39
  _CCCL_BEGIN_NAMESPACE_CUDA_STD
27
40
 
28
41
  template <class _Tp>
@@ -23,6 +23,19 @@
23
23
 
24
24
  #include <cuda/std/__cccl/prologue.h>
25
25
 
26
+ // std:: forward declarations
27
+
28
+ #if _CCCL_HAS_HOST_STD_LIB()
29
+ _CCCL_BEGIN_NAMESPACE_STD
30
+
31
+ template <class _CharT>
32
+ struct char_traits;
33
+
34
+ _CCCL_END_NAMESPACE_STD
35
+ #endif // _CCCL_HAS_HOST_STD_LIB()
36
+
37
+ // cuda::std:: forward declarations
38
+
26
39
  _CCCL_BEGIN_NAMESPACE_CUDA_STD
27
40
 
28
41
  template <class _CharT>
@@ -20,12 +20,21 @@
20
20
  # pragma system_header
21
21
  #endif // no system header
22
22
 
23
- #if !_CCCL_COMPILER(NVRTC)
24
- # include <complex>
25
- #endif // !_CCCL_COMPILER(NVRTC)
26
-
27
23
  #include <cuda/std/__cccl/prologue.h>
28
24
 
25
+ // std:: forward declarations
26
+
27
+ #if _CCCL_HAS_HOST_STD_LIB()
28
+ _CCCL_BEGIN_NAMESPACE_STD
29
+
30
+ template <class>
31
+ class complex;
32
+
33
+ _CCCL_END_NAMESPACE_STD
34
+ #endif // _CCCL_HAS_HOST_STD_LIB()
35
+
36
+ // cuda::std:: forward declarations
37
+
29
38
  _CCCL_BEGIN_NAMESPACE_CUDA_STD
30
39
 
31
40
  template <class _Tp>
@@ -83,6 +83,29 @@ inline constexpr bool __is_std_mdspan_v = false;
83
83
  template <class _ElementType, class _Extents, class _LayoutPolicy, class _AccessorPolicy>
84
84
  inline constexpr bool __is_std_mdspan_v<mdspan<_ElementType, _Extents, _LayoutPolicy, _AccessorPolicy>> = true;
85
85
 
86
+ template <typename _Layout>
87
+ inline constexpr bool __is_any_mdspan_layout_mapping_left_v = false;
88
+
89
+ template <typename _Extents>
90
+ inline constexpr bool __is_any_mdspan_layout_mapping_left_v<layout_left::mapping<_Extents>> = true;
91
+
92
+ template <typename _Layout>
93
+ inline constexpr bool __is_any_mdspan_layout_mapping_right_v = false;
94
+
95
+ template <typename _Extents>
96
+ inline constexpr bool __is_any_mdspan_layout_mapping_right_v<layout_right::mapping<_Extents>> = true;
97
+
98
+ template <typename _Layout>
99
+ inline constexpr bool __is_any_mdspan_layout_mapping_left_or_right_v =
100
+ __is_any_mdspan_layout_mapping_left_v<_Layout> || __is_any_mdspan_layout_mapping_right_v<_Layout>;
101
+
102
+ // TODO (fbusato): Add support for layout_right_padded and layout_left_padded
103
+ // template<>
104
+ // inline constexpr bool __is_any_mdspan_layout_mapping_right_v<layout_right_padded> = true;
105
+
106
+ // template<>
107
+ // inline constexpr bool __is_any_mdspan_layout_mapping_right_v<layout_left_padded> = true;
108
+
86
109
  _CCCL_END_NAMESPACE_CUDA_STD
87
110
 
88
111
  #include <cuda/std/__cccl/epilogue.h>
@@ -22,6 +22,19 @@
22
22
 
23
23
  #include <cuda/std/__cccl/prologue.h>
24
24
 
25
+ // std:: forward declarations
26
+
27
+ #if _CCCL_HAS_HOST_STD_LIB()
28
+ _CCCL_BEGIN_NAMESPACE_STD
29
+
30
+ template <class, class>
31
+ struct pair;
32
+
33
+ _CCCL_END_NAMESPACE_STD
34
+ #endif // _CCCL_HAS_HOST_STD_LIB()
35
+
36
+ // cuda::std:: forward declarations
37
+
25
38
  _CCCL_BEGIN_NAMESPACE_CUDA_STD
26
39
 
27
40
  template <class, class>