cuda-cccl 0.3.2__cp313-cp313-manylinux_2_24_aarch64.whl → 0.3.4__cp313-cp313-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (911) hide show
  1. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +12 -38
  2. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +16 -40
  3. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -28
  4. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +24 -56
  5. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +12 -38
  6. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +31 -56
  7. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +31 -35
  8. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +47 -48
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +39 -42
  10. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +33 -60
  11. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +18 -44
  12. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +26 -55
  13. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +22 -49
  14. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +15 -41
  15. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +9 -35
  16. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +20 -49
  17. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +14 -40
  18. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +18 -40
  19. cuda/cccl/headers/include/cub/agent/agent_topk.cuh +0 -2
  20. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +20 -46
  21. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +3 -28
  22. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +7 -31
  23. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +10 -34
  24. cuda/cccl/headers/include/cub/block/block_exchange.cuh +120 -154
  25. cuda/cccl/headers/include/cub/block/block_histogram.cuh +28 -52
  26. cuda/cccl/headers/include/cub/block/block_load.cuh +124 -146
  27. cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +0 -16
  28. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +58 -87
  29. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +81 -100
  30. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +92 -156
  31. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +8 -32
  32. cuda/cccl/headers/include/cub/block/block_reduce.cuh +21 -46
  33. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +51 -79
  34. cuda/cccl/headers/include/cub/block/block_scan.cuh +94 -401
  35. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +10 -34
  36. cuda/cccl/headers/include/cub/block/block_store.cuh +73 -97
  37. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +2 -29
  38. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +5 -29
  39. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +25 -49
  40. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +12 -34
  41. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +10 -34
  42. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +3 -27
  43. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +12 -36
  44. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +9 -33
  45. cuda/cccl/headers/include/cub/config.cuh +2 -26
  46. cuda/cccl/headers/include/cub/cub.cuh +3 -27
  47. cuda/cccl/headers/include/cub/detail/array_utils.cuh +2 -26
  48. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +2 -28
  49. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +3 -27
  50. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +0 -2
  51. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -3
  52. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +2 -28
  53. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +0 -2
  54. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +0 -2
  55. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +0 -2
  56. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +0 -2
  57. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +7 -12
  58. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +6 -33
  59. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +13 -36
  60. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +9 -38
  61. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +58 -32
  62. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +51 -51
  63. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +7 -31
  64. cuda/cccl/headers/include/cub/detail/rfa.cuh +2 -27
  65. cuda/cccl/headers/include/cub/detail/strong_load.cuh +3 -29
  66. cuda/cccl/headers/include/cub/detail/strong_store.cuh +3 -29
  67. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +2 -9
  68. cuda/cccl/headers/include/cub/detail/type_traits.cuh +0 -2
  69. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +6 -31
  70. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +2 -25
  71. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +2 -26
  72. cuda/cccl/headers/include/cub/device/device_for.cuh +3 -5
  73. cuda/cccl/headers/include/cub/device/device_histogram.cuh +3 -27
  74. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +2 -26
  75. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +2 -26
  76. cuda/cccl/headers/include/cub/device/device_partition.cuh +3 -27
  77. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3 -27
  78. cuda/cccl/headers/include/cub/device/device_reduce.cuh +10 -31
  79. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +3 -27
  80. cuda/cccl/headers/include/cub/device/device_scan.cuh +16 -34
  81. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +3 -27
  82. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +3 -27
  83. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2 -26
  84. cuda/cccl/headers/include/cub/device/device_select.cuh +3 -27
  85. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +2 -28
  86. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +2 -27
  87. cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +0 -2
  88. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +3 -29
  89. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +14 -34
  90. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +5 -30
  91. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +4 -29
  92. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +5 -32
  93. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +3 -29
  94. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -29
  95. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +1 -2
  96. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +47 -59
  97. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +21 -30
  98. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +2 -27
  99. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +3 -27
  100. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +3 -27
  101. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +0 -2
  102. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +51 -36
  103. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +3 -28
  104. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +0 -1
  105. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +27 -55
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +4 -28
  107. cuda/cccl/headers/include/cub/device/dispatch/kernels/{for_each.cuh → kernel_for_each.cuh} +0 -2
  108. cuda/cccl/headers/include/cub/device/dispatch/kernels/{histogram.cuh → kernel_histogram.cuh} +149 -157
  109. cuda/cccl/headers/include/cub/device/dispatch/kernels/{merge_sort.cuh → kernel_merge_sort.cuh} +0 -2
  110. cuda/cccl/headers/include/cub/device/dispatch/kernels/{radix_sort.cuh → kernel_radix_sort.cuh} +0 -2
  111. cuda/cccl/headers/include/cub/device/dispatch/kernels/{reduce.cuh → kernel_reduce.cuh} +2 -28
  112. cuda/cccl/headers/include/cub/device/dispatch/kernels/{scan.cuh → kernel_scan.cuh} +2 -28
  113. cuda/cccl/headers/include/cub/device/dispatch/kernels/{segmented_reduce.cuh → kernel_segmented_reduce.cuh} +3 -29
  114. cuda/cccl/headers/include/cub/device/dispatch/kernels/{segmented_sort.cuh → kernel_segmented_sort.cuh} +0 -1
  115. cuda/cccl/headers/include/cub/device/dispatch/kernels/{three_way_partition.cuh → kernel_three_way_partition.cuh} +0 -1
  116. cuda/cccl/headers/include/cub/device/dispatch/kernels/{transform.cuh → kernel_transform.cuh} +11 -11
  117. cuda/cccl/headers/include/cub/device/dispatch/kernels/{unique_by_key.cuh → kernel_unique_by_key.cuh} +0 -1
  118. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -26
  119. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -26
  120. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -28
  121. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +6 -26
  122. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -26
  123. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +5 -31
  124. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +31 -33
  125. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +15 -40
  126. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -26
  127. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -28
  128. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +20 -44
  129. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -26
  130. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +20 -45
  131. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +2 -27
  132. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +11 -36
  133. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +0 -1
  134. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +2 -27
  135. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +14 -40
  136. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +3 -27
  137. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +3 -27
  138. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +3 -27
  139. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +3 -27
  140. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +3 -27
  141. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +3 -27
  142. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +3 -27
  143. cuda/cccl/headers/include/cub/thread/thread_load.cuh +3 -28
  144. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +3 -27
  145. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +3 -26
  146. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +3 -29
  147. cuda/cccl/headers/include/cub/thread/thread_search.cuh +3 -27
  148. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +0 -2
  149. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +2 -26
  150. cuda/cccl/headers/include/cub/thread/thread_store.cuh +3 -27
  151. cuda/cccl/headers/include/cub/util_allocator.cuh +3 -27
  152. cuda/cccl/headers/include/cub/util_arch.cuh +3 -29
  153. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +2 -26
  154. cuda/cccl/headers/include/cub/util_debug.cuh +3 -27
  155. cuda/cccl/headers/include/cub/util_device.cuh +18 -59
  156. cuda/cccl/headers/include/cub/util_macro.cuh +4 -28
  157. cuda/cccl/headers/include/cub/util_math.cuh +2 -28
  158. cuda/cccl/headers/include/cub/util_namespace.cuh +3 -28
  159. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +3 -27
  160. cuda/cccl/headers/include/cub/util_ptx.cuh +6 -30
  161. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +3 -29
  162. cuda/cccl/headers/include/cub/util_type.cuh +5 -32
  163. cuda/cccl/headers/include/cub/util_vsmem.cuh +2 -28
  164. cuda/cccl/headers/include/cub/version.cuh +2 -26
  165. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +10 -35
  166. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +5 -30
  167. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +15 -39
  168. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +5 -35
  169. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +22 -46
  170. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +3 -27
  171. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +2 -26
  172. cuda/cccl/headers/include/cub/warp/warp_load.cuh +4 -27
  173. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +2 -26
  174. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +3 -22
  175. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +3 -27
  176. cuda/cccl/headers/include/cub/warp/warp_store.cuh +4 -27
  177. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +0 -2
  178. cuda/cccl/headers/include/cuda/__barrier/barrier.h +1 -1
  179. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +0 -1
  180. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +277 -235
  181. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +0 -1
  182. cuda/cccl/headers/include/cuda/__driver/driver_api.h +13 -0
  183. cuda/cccl/headers/include/cuda/__execution/determinism.h +0 -2
  184. cuda/cccl/headers/include/cuda/__execution/output_ordering.h +0 -2
  185. cuda/cccl/headers/include/cuda/__functional/maximum.h +25 -7
  186. cuda/cccl/headers/include/cuda/__functional/minimum.h +25 -7
  187. cuda/cccl/headers/include/cuda/__functional/minimum_maximum_common.h +52 -0
  188. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +0 -2
  189. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +13 -4
  190. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +4 -2
  191. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +0 -1
  192. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +28 -7
  193. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +1 -1
  194. cuda/cccl/headers/include/cuda/__memcpy_async/elect_one.h +52 -0
  195. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +2 -3
  196. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +1 -7
  197. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +0 -1
  198. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +1 -1
  199. cuda/cccl/headers/include/cuda/__memory/ranges_overlap.h +126 -0
  200. cuda/cccl/headers/include/cuda/__memory_resource/any_resource.h +898 -0
  201. cuda/cccl/headers/include/cuda/__memory_resource/device_memory_pool.h +149 -0
  202. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +3 -3
  203. cuda/cccl/headers/include/cuda/__memory_resource/legacy_managed_memory_resource.h +148 -0
  204. cuda/cccl/headers/include/cuda/__memory_resource/legacy_pinned_memory_resource.h +139 -0
  205. cuda/cccl/headers/include/cuda/__memory_resource/managed_memory_pool.h +146 -0
  206. cuda/cccl/headers/include/cuda/__memory_resource/memory_resource_base.h +578 -0
  207. cuda/cccl/headers/include/cuda/__memory_resource/pinned_memory_pool.h +188 -0
  208. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +3 -3
  209. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +37 -3
  210. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +13 -3
  211. cuda/cccl/headers/include/cuda/__numeric/div_overflow.h +150 -0
  212. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +2 -2
  213. cuda/cccl/headers/include/cuda/__numeric/sub_overflow.h +344 -0
  214. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +0 -6
  215. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +1 -1
  216. cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
  217. cuda/cccl/headers/include/cuda/{std/__cuda → __runtime}/api_wrapper.h +3 -3
  218. cuda/cccl/headers/include/cuda/__stream/get_stream.h +0 -1
  219. cuda/cccl/headers/include/cuda/{__fwd/barrier_native_handle.h → __stream/internal_streams.h} +17 -15
  220. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +2 -2
  221. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +1 -0
  222. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +1 -0
  223. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +2 -1
  224. cuda/cccl/headers/include/cuda/barrier +42 -16
  225. cuda/cccl/headers/include/cuda/memory +1 -0
  226. cuda/cccl/headers/include/cuda/memory_resource +6 -1
  227. cuda/cccl/headers/include/cuda/numeric +2 -0
  228. cuda/cccl/headers/include/cuda/pipeline +3 -2
  229. cuda/cccl/headers/include/cuda/ptx +1 -0
  230. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +0 -2
  231. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +1 -1
  232. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +115 -58
  233. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +844 -378
  234. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +12 -5
  235. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +31 -0
  236. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +10 -0
  237. cuda/cccl/headers/include/cuda/std/__atomic/types.h +2 -3
  238. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +37 -13
  239. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +0 -28
  240. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +7 -0
  241. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +10 -0
  242. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +2 -45
  243. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +0 -2
  244. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +8 -0
  245. cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +0 -2
  246. cuda/cccl/headers/include/cuda/std/__chrono/day.h +0 -2
  247. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +13 -17
  248. cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +0 -2
  249. cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +0 -2
  250. cuda/cccl/headers/include/cuda/std/__chrono/month.h +0 -2
  251. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +0 -2
  252. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +0 -2
  253. cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +5 -8
  254. cuda/cccl/headers/include/cuda/std/__chrono/year.h +0 -2
  255. cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +4 -0
  256. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +2 -3
  257. cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +4 -0
  258. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +4 -0
  259. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +2 -3
  260. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +2 -3
  261. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +2 -3
  262. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +2 -3
  263. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +2 -3
  264. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +2 -3
  265. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +2 -3
  266. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +2 -3
  267. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +2 -3
  268. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +2 -3
  269. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +2 -2
  270. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +4 -0
  271. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +2 -3
  272. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +2 -3
  273. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +4 -0
  274. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +2 -3
  275. cuda/cccl/headers/include/cuda/std/__complex/complex.h +0 -6
  276. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +2 -2
  277. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +27 -1
  278. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +2 -4
  279. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +15 -36
  280. cuda/cccl/headers/include/cuda/std/__exception/exception_macros.h +93 -0
  281. cuda/cccl/headers/include/cuda/std/{detail/libcxx/include/stdexcept → __exception/throw_error.h} +3 -3
  282. cuda/cccl/headers/include/cuda/std/__expected/expected.h +28 -43
  283. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +2 -10
  284. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +2 -2
  285. cuda/cccl/headers/include/cuda/std/__functional/bind.h +6 -6
  286. cuda/cccl/headers/include/cuda/std/__functional/function.h +2 -6
  287. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +5 -5
  288. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +5 -0
  289. cuda/cccl/headers/include/cuda/std/__fwd/array.h +2 -2
  290. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +12 -0
  291. cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
  292. cuda/cccl/headers/include/cuda/std/__fwd/get.h +21 -22
  293. cuda/cccl/headers/include/cuda/std/{detail/libcxx/include/iosfwd → __fwd/ios.h} +5 -10
  294. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +19 -10
  295. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +2 -2
  296. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +5 -0
  297. cuda/cccl/headers/include/cuda/std/__fwd/span.h +2 -2
  298. cuda/cccl/headers/include/cuda/std/__fwd/string.h +7 -0
  299. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +18 -0
  300. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +3 -0
  301. cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
  302. cuda/cccl/headers/include/cuda/std/{__type_traits/is_reference_wrapper.h → __fwd/variant.h} +16 -15
  303. cuda/cccl/headers/include/cuda/std/__internal/features.h +14 -0
  304. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +1 -1
  305. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +1 -1
  306. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +58 -40
  307. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +1 -1
  308. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +1 -1
  309. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +0 -5
  310. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +4 -18
  311. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +1 -2
  312. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +0 -2
  313. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +0 -2
  314. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +0 -4
  315. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +0 -5
  316. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +3 -10
  317. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +4 -15
  318. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +4 -4
  319. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +4 -4
  320. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +2 -4
  321. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +3 -3
  322. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +1 -1
  323. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +1 -0
  324. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +6 -12
  325. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -5
  326. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +7 -2
  327. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +1 -0
  328. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +5 -0
  329. cuda/cccl/headers/include/cuda/std/__new/allocate.h +5 -0
  330. cuda/cccl/headers/include/cuda/{__barrier/barrier_native_handle.h → std/__new/device_new.h} +9 -24
  331. cuda/cccl/headers/include/cuda/std/__new_ +1 -0
  332. cuda/cccl/headers/include/cuda/std/__optional/optional.h +5 -4
  333. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +4 -4
  334. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +1 -1
  335. cuda/cccl/headers/include/cuda/std/__random/philox_engine.h +562 -0
  336. cuda/cccl/headers/include/cuda/std/__random/seed_seq.h +204 -0
  337. cuda/cccl/headers/include/cuda/std/__random_ +2 -0
  338. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +7 -19
  339. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -4
  340. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +5 -4
  341. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +1 -1
  342. cuda/cccl/headers/include/cuda/std/__string/string_view.h +5 -5
  343. cuda/cccl/headers/include/cuda/std/__tuple_dir/apply.h +82 -0
  344. cuda/cccl/headers/include/cuda/std/__tuple_dir/get.h +122 -0
  345. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +0 -160
  346. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +123 -129
  347. cuda/cccl/headers/include/cuda/std/__tuple_dir/tie.h +55 -0
  348. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple.h +457 -0
  349. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_cat.h +158 -0
  350. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_constraints.h +286 -0
  351. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +7 -0
  352. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_leaf.h +452 -0
  353. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +1 -2
  354. cuda/cccl/headers/include/cuda/std/__type_traits/is_comparable.h +78 -0
  355. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +1 -1
  356. cuda/cccl/headers/include/cuda/std/__type_traits/is_fully_bounded_array.h +47 -0
  357. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +0 -2
  358. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +4 -24
  359. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +0 -2
  360. cuda/cccl/headers/include/cuda/std/__utility/pair.h +20 -20
  361. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +0 -2
  362. cuda/cccl/headers/include/cuda/std/__variant/bad_variant_access.h +74 -0
  363. cuda/cccl/headers/include/cuda/std/__variant/comparison.h +207 -0
  364. cuda/cccl/headers/include/cuda/std/__variant/get.h +192 -0
  365. cuda/cccl/headers/include/cuda/std/__variant/hash.h +82 -0
  366. cuda/cccl/headers/include/cuda/std/__variant/sfinae_helpers.h +89 -0
  367. cuda/cccl/headers/include/cuda/std/__variant/variant.h +250 -0
  368. cuda/cccl/headers/include/cuda/std/__variant/variant_access.h +70 -0
  369. cuda/cccl/headers/include/cuda/std/__variant/variant_base.h +683 -0
  370. cuda/cccl/headers/include/cuda/std/__variant/variant_constraints.h +135 -0
  371. cuda/cccl/headers/include/cuda/std/__variant/variant_match.h +126 -0
  372. cuda/cccl/headers/include/cuda/std/__variant/variant_traits.h +184 -0
  373. cuda/cccl/headers/include/cuda/std/__variant/variant_visit.h +225 -0
  374. cuda/cccl/headers/include/cuda/std/__variant/visit.h +148 -0
  375. cuda/cccl/headers/include/cuda/std/array +1 -1
  376. cuda/cccl/headers/include/cuda/std/atomic +1 -1
  377. cuda/cccl/headers/include/cuda/std/bitset +2 -10
  378. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +6 -6
  379. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1 -4
  380. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +3 -6
  381. cuda/cccl/headers/include/cuda/std/functional +1 -1
  382. cuda/cccl/headers/include/cuda/std/initializer_list +8 -0
  383. cuda/cccl/headers/include/cuda/std/inplace_vector +6 -5
  384. cuda/cccl/headers/include/cuda/std/iterator +1 -1
  385. cuda/cccl/headers/include/cuda/std/numbers +0 -2
  386. cuda/cccl/headers/include/cuda/std/ratio +2 -2
  387. cuda/cccl/headers/include/cuda/std/span +2 -2
  388. cuda/cccl/headers/include/cuda/std/string_view +24 -42
  389. cuda/cccl/headers/include/cuda/std/tuple +18 -1
  390. cuda/cccl/headers/include/cuda/std/type_traits +0 -1
  391. cuda/cccl/headers/include/cuda/std/variant +8 -1
  392. cuda/cccl/headers/include/nv/target +2 -6
  393. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +15 -2
  394. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +0 -2
  395. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +0 -1
  396. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +0 -1
  397. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +0 -2
  398. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +0 -2
  399. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +0 -2
  400. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +0 -2
  401. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +0 -2
  402. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +0 -4
  403. cuda/cccl/headers/include/thrust/detail/binary_search.inl +14 -2
  404. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +2 -7
  405. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +2 -8
  406. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +2 -8
  407. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +2 -8
  408. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +2 -8
  409. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +2 -8
  410. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +2 -7
  411. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +2 -8
  412. cuda/cccl/headers/include/thrust/detail/complex/clog.h +2 -8
  413. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +2 -8
  414. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +2 -7
  415. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +2 -8
  416. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +2 -8
  417. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +2 -8
  418. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +2 -8
  419. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +2 -8
  420. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +2 -8
  421. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +2 -8
  422. cuda/cccl/headers/include/thrust/detail/config/device_system.h +2 -0
  423. cuda/cccl/headers/include/thrust/detail/config/host_system.h +2 -0
  424. cuda/cccl/headers/include/thrust/detail/config/namespace.h +0 -1
  425. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +0 -2
  426. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +0 -2
  427. cuda/cccl/headers/include/thrust/detail/copy.h +0 -2
  428. cuda/cccl/headers/include/thrust/detail/copy.inl +14 -4
  429. cuda/cccl/headers/include/thrust/detail/copy_if.inl +14 -2
  430. cuda/cccl/headers/include/thrust/detail/count.inl +14 -2
  431. cuda/cccl/headers/include/thrust/detail/equal.inl +14 -2
  432. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +4 -5
  433. cuda/cccl/headers/include/thrust/detail/extrema.inl +14 -2
  434. cuda/cccl/headers/include/thrust/detail/fill.inl +14 -2
  435. cuda/cccl/headers/include/thrust/detail/find.inl +14 -2
  436. cuda/cccl/headers/include/thrust/detail/for_each.inl +14 -2
  437. cuda/cccl/headers/include/thrust/detail/functional/actor.h +2 -5
  438. cuda/cccl/headers/include/thrust/detail/functional/operators.h +2 -5
  439. cuda/cccl/headers/include/thrust/detail/gather.inl +14 -2
  440. cuda/cccl/headers/include/thrust/detail/generate.inl +14 -2
  441. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +0 -2
  442. cuda/cccl/headers/include/thrust/detail/inner_product.inl +14 -2
  443. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -0
  444. cuda/cccl/headers/include/thrust/detail/logical.inl +14 -2
  445. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +13 -1
  446. cuda/cccl/headers/include/thrust/detail/merge.inl +14 -2
  447. cuda/cccl/headers/include/thrust/detail/mismatch.inl +14 -2
  448. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +0 -4
  449. cuda/cccl/headers/include/thrust/detail/partition.inl +14 -2
  450. cuda/cccl/headers/include/thrust/detail/random_bijection.h +0 -2
  451. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +0 -2
  452. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +0 -2
  453. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +0 -6
  454. cuda/cccl/headers/include/thrust/detail/reduce.inl +21 -3
  455. cuda/cccl/headers/include/thrust/detail/reference.h +27 -3
  456. cuda/cccl/headers/include/thrust/detail/remove.inl +14 -2
  457. cuda/cccl/headers/include/thrust/detail/replace.inl +14 -2
  458. cuda/cccl/headers/include/thrust/detail/reverse.inl +14 -2
  459. cuda/cccl/headers/include/thrust/detail/scan.inl +21 -3
  460. cuda/cccl/headers/include/thrust/detail/scatter.inl +14 -2
  461. cuda/cccl/headers/include/thrust/detail/sequence.inl +13 -1
  462. cuda/cccl/headers/include/thrust/detail/set_operations.inl +13 -1
  463. cuda/cccl/headers/include/thrust/detail/sort.inl +13 -1
  464. cuda/cccl/headers/include/thrust/detail/static_assert.h +0 -2
  465. cuda/cccl/headers/include/thrust/detail/static_map.h +0 -3
  466. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +13 -1
  467. cuda/cccl/headers/include/thrust/detail/tabulate.inl +14 -2
  468. cuda/cccl/headers/include/thrust/detail/temporary_array.h +0 -4
  469. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +0 -1
  470. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +14 -3
  471. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +13 -1
  472. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +13 -1
  473. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +0 -2
  474. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +0 -2
  475. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +2 -7
  476. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +0 -2
  477. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +0 -4
  478. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +0 -4
  479. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +14 -2
  480. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +14 -2
  481. cuda/cccl/headers/include/thrust/detail/unique.inl +21 -3
  482. cuda/cccl/headers/include/thrust/detail/vector_base.h +0 -2
  483. cuda/cccl/headers/include/thrust/detail/vector_base.inl +0 -2
  484. cuda/cccl/headers/include/thrust/execution_policy.h +10 -9
  485. cuda/cccl/headers/include/thrust/functional.h +0 -2
  486. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +9 -4
  487. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +8 -4
  488. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +0 -1
  489. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +0 -1
  490. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +0 -1
  491. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +0 -1
  492. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +0 -1
  493. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +2 -6
  494. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +0 -1
  495. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +0 -2
  496. cuda/cccl/headers/include/thrust/mr/allocator.h +0 -2
  497. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +9 -4
  498. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +10 -10
  499. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +0 -2
  500. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +0 -2
  501. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +0 -2
  502. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +8 -4
  503. cuda/cccl/headers/include/thrust/mr/memory_resource.h +0 -2
  504. cuda/cccl/headers/include/thrust/mr/new.h +0 -2
  505. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +0 -2
  506. cuda/cccl/headers/include/thrust/mr/pool.h +10 -10
  507. cuda/cccl/headers/include/thrust/mr/pool_options.h +4 -6
  508. cuda/cccl/headers/include/thrust/mr/sync_pool.h +0 -2
  509. cuda/cccl/headers/include/thrust/mr/tls_pool.h +0 -2
  510. cuda/cccl/headers/include/thrust/mr/validator.h +0 -2
  511. cuda/cccl/headers/include/thrust/per_device_resource.h +13 -1
  512. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +0 -2
  513. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +0 -2
  514. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +2 -9
  515. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +0 -2
  516. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +2 -9
  517. cuda/cccl/headers/include/thrust/random/detail/mod.h +2 -9
  518. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +0 -2
  519. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +2 -7
  520. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +2 -9
  521. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +0 -2
  522. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +0 -2
  523. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +0 -2
  524. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +0 -2
  525. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +0 -2
  526. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +0 -2
  527. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +0 -2
  528. cuda/cccl/headers/include/thrust/random/normal_distribution.h +0 -2
  529. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +0 -2
  530. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +0 -2
  531. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +0 -2
  532. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +0 -2
  533. cuda/cccl/headers/include/thrust/random.h +0 -2
  534. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +15 -11
  535. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +2 -7
  536. cuda/cccl/headers/include/thrust/system/cpp/memory.h +0 -1
  537. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +0 -2
  538. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +0 -2
  539. cuda/cccl/headers/include/thrust/system/cpp/vector.h +0 -1
  540. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +0 -4
  541. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +0 -1
  542. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +0 -4
  543. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +2 -9
  544. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +4 -32
  545. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +2 -9
  546. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +0 -2
  547. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +0 -2
  548. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +23 -2
  549. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +0 -2
  550. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +2 -11
  551. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +2 -0
  552. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +0 -4
  553. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +0 -1
  554. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +0 -5
  555. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +0 -1
  556. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +0 -2
  557. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +0 -2
  558. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +0 -1
  559. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +2 -8
  560. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +0 -2
  561. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +0 -2
  562. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +2 -26
  563. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +7 -142
  564. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +0 -2
  565. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +0 -4
  566. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +0 -2
  567. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +0 -5
  568. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +0 -4
  569. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +0 -2
  570. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +0 -1
  571. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +0 -4
  572. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +0 -4
  573. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +0 -3
  574. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +0 -2
  575. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +3 -5
  576. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +8 -10
  577. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +0 -2
  578. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +0 -1
  579. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +0 -4
  580. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +0 -2
  581. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +1 -7
  582. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +2 -7
  583. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +0 -3
  584. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +0 -4
  585. cuda/cccl/headers/include/thrust/system/cuda/error.h +2 -11
  586. cuda/cccl/headers/include/thrust/system/cuda/memory.h +2 -6
  587. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +2 -9
  588. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +2 -7
  589. cuda/cccl/headers/include/thrust/system/cuda/vector.h +2 -6
  590. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +0 -2
  591. cuda/cccl/headers/include/thrust/system/detail/errno.h +0 -2
  592. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +0 -4
  593. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +0 -2
  594. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +0 -2
  595. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +0 -2
  596. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +0 -2
  597. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +0 -2
  598. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +0 -3
  599. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +0 -2
  600. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +0 -2
  601. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +0 -2
  602. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +0 -3
  603. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +0 -2
  604. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +0 -2
  605. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +0 -2
  606. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +0 -2
  607. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +0 -2
  608. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +0 -3
  609. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +0 -2
  610. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +0 -2
  611. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +0 -2
  612. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +0 -2
  613. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +0 -2
  614. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +0 -2
  615. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +0 -2
  616. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +0 -2
  617. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +0 -2
  618. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +0 -2
  619. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +0 -2
  620. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +0 -3
  621. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +0 -2
  622. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +0 -2
  623. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +0 -2
  624. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +0 -2
  625. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +0 -2
  626. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +0 -2
  627. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +0 -2
  628. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +0 -2
  629. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +0 -2
  630. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +0 -2
  631. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +0 -3
  632. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +0 -2
  633. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +0 -2
  634. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +0 -2
  635. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +0 -3
  636. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +0 -2
  637. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +0 -2
  638. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +0 -2
  639. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +0 -2
  640. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +26 -12
  641. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +0 -2
  642. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +0 -3
  643. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +0 -2
  644. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +0 -2
  645. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +0 -1
  646. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +0 -2
  647. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +0 -2
  648. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +0 -2
  649. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +0 -2
  650. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +0 -2
  651. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +0 -2
  652. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +0 -2
  653. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +0 -3
  654. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +0 -2
  655. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +0 -2
  656. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +0 -2
  657. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +0 -2
  658. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +0 -2
  659. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +0 -2
  660. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +0 -2
  661. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +0 -2
  662. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +0 -2
  663. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +0 -2
  664. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +0 -2
  665. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +2 -4
  666. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +0 -2
  667. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +0 -3
  668. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +0 -2
  669. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +0 -2
  670. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +0 -2
  671. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +0 -2
  672. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +0 -2
  673. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +0 -2
  674. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +0 -2
  675. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +0 -2
  676. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +76 -5
  677. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +0 -2
  678. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +0 -2
  679. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +0 -2
  680. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +0 -2
  681. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +0 -2
  682. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +0 -3
  683. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +0 -2
  684. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +0 -2
  685. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +0 -2
  686. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +0 -2
  687. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +78 -6
  688. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +0 -4
  689. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +0 -2
  690. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +0 -2
  691. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +0 -2
  692. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +0 -2
  693. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +0 -2
  694. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +0 -2
  695. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +67 -6
  696. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +310 -11
  697. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +78 -5
  698. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +543 -7
  699. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +0 -2
  700. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +0 -2
  701. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +0 -2
  702. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +0 -2
  703. cuda/cccl/headers/include/thrust/system/error_code.h +0 -4
  704. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +5 -25
  705. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +2 -15
  706. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +5 -25
  707. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +40 -29
  708. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +11 -28
  709. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +2 -15
  710. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +26 -28
  711. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +2 -15
  712. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +18 -13
  713. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +5 -25
  714. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +2 -15
  715. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +5 -25
  716. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +47 -30
  717. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +2 -15
  718. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +2 -15
  719. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +2 -15
  720. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +2 -15
  721. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +2 -15
  722. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +2 -15
  723. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +2 -15
  724. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +2 -15
  725. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +2 -15
  726. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +26 -31
  727. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +2 -15
  728. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +2 -26
  729. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +35 -27
  730. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +13 -28
  731. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +56 -28
  732. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +26 -31
  733. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +2 -15
  734. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +2 -15
  735. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +176 -17
  736. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +8 -15
  737. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +2 -15
  738. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +2 -15
  739. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +2 -15
  740. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +213 -28
  741. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +2 -15
  742. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +2 -15
  743. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +2 -15
  744. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +2 -15
  745. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +2 -15
  746. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +2 -15
  747. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +2 -15
  748. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +2 -15
  749. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +21 -30
  750. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +17 -29
  751. cuda/cccl/headers/include/thrust/system/omp/memory.h +51 -9
  752. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +3 -7
  753. cuda/cccl/headers/include/thrust/system/omp/pointer.h +3 -7
  754. cuda/cccl/headers/include/thrust/system/omp/vector.h +3 -6
  755. cuda/cccl/headers/include/thrust/system/system_error.h +0 -2
  756. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +4 -25
  757. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +2 -15
  758. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +2 -15
  759. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +38 -29
  760. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +91 -24
  761. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +2 -15
  762. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +2 -15
  763. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +17 -13
  764. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +4 -25
  765. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +2 -15
  766. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +4 -25
  767. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +47 -28
  768. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +2 -15
  769. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +2 -15
  770. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +2 -15
  771. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +2 -15
  772. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +2 -15
  773. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +2 -15
  774. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +2 -15
  775. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +254 -29
  776. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +2 -15
  777. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +25 -31
  778. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +2 -15
  779. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +95 -29
  780. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +345 -28
  781. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +4 -26
  782. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +32 -42
  783. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +2 -15
  784. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +2 -15
  785. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +265 -30
  786. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +7 -17
  787. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +2 -15
  788. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +2 -15
  789. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +2 -15
  790. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +244 -32
  791. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +2 -15
  792. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +2 -15
  793. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +2 -15
  794. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +2 -15
  795. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +2 -15
  796. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +2 -15
  797. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +2 -15
  798. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +2 -15
  799. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +23 -33
  800. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +16 -29
  801. cuda/cccl/headers/include/thrust/system/tbb/memory.h +52 -24
  802. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +4 -22
  803. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +4 -22
  804. cuda/cccl/headers/include/thrust/system/tbb/vector.h +4 -21
  805. cuda/cccl/headers/include/thrust/transform.h +14 -3
  806. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +0 -4
  807. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +0 -1
  808. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +0 -4
  809. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +0 -4
  810. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +0 -4
  811. cuda/cccl/headers/include/thrust/universal_allocator.h +8 -0
  812. cuda/cccl/headers/include/thrust/universal_vector.h +9 -0
  813. cuda/cccl/headers/include/thrust/zip_function.h +2 -28
  814. cuda/compute/__init__.py +4 -0
  815. cuda/compute/_bindings.pyi +26 -3
  816. cuda/compute/_bindings_impl.pyx +143 -1
  817. cuda/compute/algorithms/__init__.py +9 -5
  818. cuda/compute/algorithms/_sort/__init__.py +23 -0
  819. cuda/compute/algorithms/{_merge_sort.py → _sort/_merge_sort.py} +10 -10
  820. cuda/compute/algorithms/{_radix_sort.py → _sort/_radix_sort.py} +9 -58
  821. cuda/compute/algorithms/_sort/_segmented_sort.py +288 -0
  822. cuda/compute/algorithms/_sort/_sort_common.py +52 -0
  823. cuda/compute/cu12/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
  824. cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
  825. cuda/compute/cu13/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
  826. cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
  827. cuda_cccl-0.3.4.dist-info/METADATA +78 -0
  828. {cuda_cccl-0.3.2.dist-info → cuda_cccl-0.3.4.dist-info}/RECORD +830 -867
  829. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +0 -652
  830. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +0 -1365
  831. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +0 -2144
  832. cuda/cccl/headers/include/thrust/detail/integer_math.h +0 -113
  833. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +0 -51
  834. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +0 -51
  835. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +0 -51
  836. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +0 -51
  837. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +0 -52
  838. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +0 -51
  839. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +0 -51
  840. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +0 -51
  841. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +0 -51
  842. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +0 -51
  843. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +0 -51
  844. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +0 -51
  845. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +0 -51
  846. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +0 -51
  847. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +0 -51
  848. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +0 -51
  849. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +0 -51
  850. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +0 -51
  851. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +0 -51
  852. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +0 -51
  853. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +0 -51
  854. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +0 -51
  855. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +0 -51
  856. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +0 -51
  857. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +0 -51
  858. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +0 -51
  859. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +0 -51
  860. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +0 -51
  861. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +0 -51
  862. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +0 -51
  863. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +0 -51
  864. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +0 -51
  865. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +0 -51
  866. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +0 -51
  867. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +0 -51
  868. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +0 -51
  869. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +0 -51
  870. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +0 -51
  871. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +0 -51
  872. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +0 -51
  873. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +0 -51
  874. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +0 -51
  875. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +0 -51
  876. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +0 -85
  877. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +0 -119
  878. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +0 -145
  879. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +0 -116
  880. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +0 -356
  881. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +0 -124
  882. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +0 -586
  883. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +0 -74
  884. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +0 -59
  885. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +0 -65
  886. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +0 -87
  887. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +0 -93
  888. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +0 -102
  889. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +0 -78
  890. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +0 -65
  891. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +0 -103
  892. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +0 -87
  893. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +0 -265
  894. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +0 -71
  895. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +0 -75
  896. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +0 -73
  897. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +0 -136
  898. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +0 -91
  899. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +0 -94
  900. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +0 -327
  901. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +0 -98
  902. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +0 -137
  903. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +0 -400
  904. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +0 -87
  905. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +0 -312
  906. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +0 -295
  907. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +0 -71
  908. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +0 -75
  909. cuda_cccl-0.3.2.dist-info/METADATA +0 -42
  910. {cuda_cccl-0.3.2.dist-info → cuda_cccl-0.3.4.dist-info}/WHEEL +0 -0
  911. {cuda_cccl-0.3.2.dist-info → cuda_cccl-0.3.4.dist-info}/licenses/LICENSE +0 -0
@@ -37,6 +37,8 @@
37
37
  #include <thrust/mr/memory_resource.h>
38
38
  #include <thrust/mr/pool_options.h>
39
39
 
40
+ #include <cuda/__cmath/ilog.h>
41
+ #include <cuda/__cmath/pow2.h>
40
42
  #include <cuda/std/__cccl/algorithm_wrapper.h>
41
43
  #include <cuda/std/cassert>
42
44
  #include <cuda/std/cstdint>
@@ -44,7 +46,6 @@
44
46
  THRUST_NAMESPACE_BEGIN
45
47
  namespace mr
46
48
  {
47
-
48
49
  /** \addtogroup memory_resources Memory Resources
49
50
  * \ingroup memory_management
50
51
  * \{
@@ -112,7 +113,7 @@ public:
112
113
  unsynchronized_pool_resource(Upstream* upstream, pool_options options = get_default_options())
113
114
  : m_upstream(upstream)
114
115
  , m_options(options)
115
- , m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
116
+ , m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
116
117
  , m_pools(upstream)
117
118
  , m_allocated()
118
119
  , m_oversized()
@@ -121,7 +122,7 @@ public:
121
122
  assert(m_options.validate());
122
123
 
123
124
  pool p = {block_descriptor_ptr(), 0};
124
- m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
125
+ m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
125
126
  }
126
127
 
127
128
  // TODO: C++11: use delegating constructors
@@ -133,7 +134,7 @@ public:
133
134
  unsynchronized_pool_resource(pool_options options = get_default_options())
134
135
  : m_upstream(get_global_resource<Upstream>())
135
136
  , m_options(options)
136
- , m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
137
+ , m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
137
138
  , m_pools(get_global_resource<Upstream>())
138
139
  , m_allocated()
139
140
  , m_oversized()
@@ -142,7 +143,7 @@ public:
142
143
  assert(m_options.validate());
143
144
 
144
145
  pool p = {block_descriptor_ptr(), 0};
145
- m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
146
+ m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
146
147
  }
147
148
 
148
149
  /*! Destructor. Releases all held memory to upstream.
@@ -260,7 +261,7 @@ public:
260
261
  do_allocate(std::size_t bytes, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
261
262
  {
262
263
  bytes = (std::max) (bytes, m_options.smallest_block_size);
263
- assert(detail::is_power_of_2(alignment));
264
+ assert(::cuda::is_power_of_two(alignment));
264
265
 
265
266
  // an oversized and/or overaligned allocation requested; needs to be allocated separately
266
267
  if (bytes > m_options.largest_block_size || alignment > m_options.alignment)
@@ -371,7 +372,7 @@ public:
371
372
 
372
373
  // the request is NOT for oversized and/or overaligned memory
373
374
  // allocate a block from an appropriate bucket
374
- std::size_t bytes_log2 = thrust::detail::log2_ri(bytes);
375
+ std::size_t bytes_log2 = ::cuda::ceil_ilog2(bytes);
375
376
  std::size_t bucket_idx = bytes_log2 - m_smallest_block_log2;
376
377
  pool& bucket = thrust::raw_reference_cast(m_pools[bucket_idx]);
377
378
 
@@ -439,7 +440,7 @@ public:
439
440
  virtual void do_deallocate(void_ptr p, std::size_t n, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
440
441
  {
441
442
  n = (std::max) (n, m_options.smallest_block_size);
442
- assert(detail::is_power_of_2(alignment));
443
+ assert(::cuda::is_power_of_two(alignment));
443
444
 
444
445
  // verify that the pointer is at least as aligned as claimed
445
446
  assert(reinterpret_cast<::cuda::std::intmax_t>(void_ptr_traits::get(p)) % alignment == 0);
@@ -504,7 +505,7 @@ public:
504
505
  }
505
506
 
506
507
  // push the block to the front of the appropriate bucket's free list
507
- std::size_t n_log2 = thrust::detail::log2_ri(n);
508
+ std::size_t n_log2 = ::cuda::ceil_ilog2(n);
508
509
  std::size_t bucket_idx = n_log2 - m_smallest_block_log2;
509
510
  pool& bucket = thrust::raw_reference_cast(m_pools[bucket_idx]);
510
511
 
@@ -521,6 +522,5 @@ public:
521
522
 
522
523
  /*! \} // memory_resources
523
524
  */
524
-
525
525
  } // namespace mr
526
526
  THRUST_NAMESPACE_END
@@ -32,14 +32,13 @@
32
32
  #endif // no system header
33
33
 
34
34
  #include <thrust/detail/config/memory_resource.h>
35
- #include <thrust/detail/integer_math.h>
36
35
 
36
+ #include <cuda/__cmath/pow2.h>
37
37
  #include <cuda/std/cstddef>
38
38
 
39
39
  THRUST_NAMESPACE_BEGIN
40
40
  namespace mr
41
41
  {
42
-
43
42
  /*! \addtogroup memory_resources Memory Resources
44
43
  * \ingroup memory_management
45
44
  * \{
@@ -104,15 +103,15 @@ struct pool_options
104
103
  */
105
104
  bool validate() const
106
105
  {
107
- if (!detail::is_power_of_2(smallest_block_size))
106
+ if (smallest_block_size != 0 && !::cuda::is_power_of_two(smallest_block_size))
108
107
  {
109
108
  return false;
110
109
  }
111
- if (!detail::is_power_of_2(largest_block_size))
110
+ if (largest_block_size != 0 && !::cuda::is_power_of_two(largest_block_size))
112
111
  {
113
112
  return false;
114
113
  }
115
- if (!detail::is_power_of_2(alignment))
114
+ if (alignment != 0 && !::cuda::is_power_of_two(alignment))
116
115
  {
117
116
  return false;
118
117
  }
@@ -169,6 +168,5 @@ struct pool_options
169
168
 
170
169
  /*! \} // memory_resources
171
170
  */
172
-
173
171
  } // namespace mr
174
172
  THRUST_NAMESPACE_END
@@ -36,7 +36,6 @@
36
36
  THRUST_NAMESPACE_BEGIN
37
37
  namespace mr
38
38
  {
39
-
40
39
  /*! \addtogroup memory_resources Memory Resources
41
40
  * \ingroup memory_management
42
41
  * \{
@@ -109,6 +108,5 @@ private:
109
108
 
110
109
  /*! \} // memory_resources
111
110
  */
112
-
113
111
  } // namespace mr
114
112
  THRUST_NAMESPACE_END
@@ -34,7 +34,6 @@
34
34
  THRUST_NAMESPACE_BEGIN
35
35
  namespace mr
36
36
  {
37
-
38
37
  /*! \addtogroup memory_resources Memory Resources
39
38
  * \ingroup memory_management
40
39
  * \{
@@ -59,6 +58,5 @@ _CCCL_HOST thrust::mr::unsynchronized_pool_resource<Upstream>& tls_pool(Upstream
59
58
 
60
59
  /*! \}
61
60
  */
62
-
63
61
  } // namespace mr
64
62
  THRUST_NAMESPACE_END
@@ -34,7 +34,6 @@
34
34
  THRUST_NAMESPACE_BEGIN
35
35
  namespace mr
36
36
  {
37
-
38
37
  template <typename MR>
39
38
  struct validator
40
39
  {
@@ -51,6 +50,5 @@ struct validator2
51
50
  template <typename T>
52
51
  struct validator2<T, T> : private validator<T>
53
52
  {};
54
-
55
53
  } // namespace mr
56
54
  THRUST_NAMESPACE_END
@@ -27,8 +27,20 @@
27
27
  #endif // no system header
28
28
  #include <thrust/detail/execution_policy.h>
29
29
  #include <thrust/mr/allocator.h>
30
- #include <thrust/system/detail/adl/per_device_resource.h>
30
+
31
+ // Include all active backend system implementations (generic, sequential, host and device)
31
32
  #include <thrust/system/detail/generic/per_device_resource.h>
33
+ #include <thrust/system/detail/sequential/per_device_resource.h>
34
+ #include __THRUST_HOST_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(per_device_resource.h)
35
+ #include __THRUST_DEVICE_SYSTEM_ALGORITH_DETAIL_HEADER_INCLUDE(per_device_resource.h)
36
+
37
+ // Some build systems need a hint to know which files we actually include
38
+ #if 0
39
+ # include <thrust/system/cpp/detail/per_device_resource.h>
40
+ # include <thrust/system/cuda/detail/per_device_resource.h>
41
+ # include <thrust/system/omp/detail/per_device_resource.h>
42
+ # include <thrust/system/tbb/detail/per_device_resource.h>
43
+ #endif
32
44
 
33
45
  THRUST_NAMESPACE_BEGIN
34
46
 
@@ -32,7 +32,6 @@ THRUST_NAMESPACE_BEGIN
32
32
 
33
33
  namespace random
34
34
  {
35
-
36
35
  template <typename Engine, size_t p, size_t r>
37
36
  _CCCL_HOST_DEVICE discard_block_engine<Engine, p, r>::discard_block_engine()
38
37
  : m_e()
@@ -178,7 +177,6 @@ operator!=(const discard_block_engine<Engine, p, r>& lhs, const discard_block_en
178
177
  {
179
178
  return !(lhs == rhs);
180
179
  }
181
-
182
180
  } // namespace random
183
181
 
184
182
  THRUST_NAMESPACE_END
@@ -34,7 +34,6 @@ THRUST_NAMESPACE_BEGIN
34
34
 
35
35
  namespace random
36
36
  {
37
-
38
37
  template <typename UIntType, UIntType a, UIntType c, UIntType m>
39
38
  _CCCL_HOST_DEVICE linear_congruential_engine<UIntType, a, c, m>::linear_congruential_engine(result_type s)
40
39
  {
@@ -149,7 +148,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, linear_congruential_engine<UIn
149
148
  {
150
149
  return detail::random_core_access::stream_in(is, e);
151
150
  }
152
-
153
151
  } // namespace random
154
152
 
155
153
  THRUST_NAMESPACE_END
@@ -32,12 +32,8 @@
32
32
 
33
33
  THRUST_NAMESPACE_BEGIN
34
34
 
35
- namespace random
35
+ namespace random::detail
36
36
  {
37
-
38
- namespace detail
39
- {
40
-
41
37
  template <typename UIntType, UIntType a, unsigned long long c, UIntType m>
42
38
  struct linear_congruential_engine_discard_implementation
43
39
  {
@@ -96,9 +92,6 @@ struct linear_congruential_engine_discard
96
92
  linear_congruential_engine_discard_implementation<result_type, a, c, m>::discard(lcg.m_x, z);
97
93
  }
98
94
  }; // end linear_congruential_engine_discard
99
-
100
- } // namespace detail
101
-
102
- } // namespace random
95
+ } // namespace random::detail
103
96
 
104
97
  THRUST_NAMESPACE_END
@@ -32,7 +32,6 @@ THRUST_NAMESPACE_BEGIN
32
32
 
33
33
  namespace random
34
34
  {
35
-
36
35
  template <typename UIntType, size_t w, size_t k, size_t q, size_t s>
37
36
  _CCCL_HOST_DEVICE linear_feedback_shift_engine<UIntType, w, k, q, s>::linear_feedback_shift_engine(result_type value)
38
37
  {
@@ -145,7 +144,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, linear_feedback_shift_engine<U
145
144
  {
146
145
  return thrust::random::detail::random_core_access::stream_in(is, e);
147
146
  }
148
-
149
147
  } // namespace random
150
148
 
151
149
  THRUST_NAMESPACE_END
@@ -28,12 +28,8 @@
28
28
 
29
29
  THRUST_NAMESPACE_BEGIN
30
30
 
31
- namespace random
31
+ namespace random::detail
32
32
  {
33
-
34
- namespace detail
35
- {
36
-
37
33
  template <typename T, int w, int i = w - 1>
38
34
  struct linear_feedback_shift_engine_wordmask
39
35
  {
@@ -45,9 +41,6 @@ struct linear_feedback_shift_engine_wordmask<T, w, 0>
45
41
  {
46
42
  static const T value = 0;
47
43
  }; // end linear_feedback_shift_engine_wordmask
48
-
49
- } // namespace detail
50
-
51
- } // namespace random
44
+ } // namespace random::detail
52
45
 
53
46
  THRUST_NAMESPACE_END
@@ -28,12 +28,8 @@
28
28
 
29
29
  THRUST_NAMESPACE_BEGIN
30
30
 
31
- namespace random
31
+ namespace random::detail
32
32
  {
33
-
34
- namespace detail
35
- {
36
-
37
33
  template <typename T, T a, T c, T m, bool = (m == 0)>
38
34
  struct static_mod
39
35
  {
@@ -93,9 +89,6 @@ _CCCL_HOST_DEVICE T mod(T x)
93
89
  static_mod<T, a, c, m> f;
94
90
  return f(x);
95
91
  } // end static_mod
96
-
97
- } // namespace detail
98
-
99
- } // namespace random
92
+ } // namespace random::detail
100
93
 
101
94
  THRUST_NAMESPACE_END
@@ -36,7 +36,6 @@ THRUST_NAMESPACE_BEGIN
36
36
 
37
37
  namespace random
38
38
  {
39
-
40
39
  template <typename RealType>
41
40
  _CCCL_HOST_DEVICE normal_distribution<RealType>::normal_distribution(RealType a, RealType b)
42
41
  : super_t()
@@ -181,7 +180,6 @@ std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>&
181
180
  {
182
181
  return thrust::random::detail::random_core_access::stream_in(is, d);
183
182
  }
184
-
185
183
  } // namespace random
186
184
 
187
185
  THRUST_NAMESPACE_END
@@ -39,11 +39,8 @@
39
39
  #include <cuda/std/limits>
40
40
 
41
41
  THRUST_NAMESPACE_BEGIN
42
- namespace random
42
+ namespace random::detail
43
43
  {
44
- namespace detail
45
- {
46
-
47
44
  // this version samples the normal distribution directly
48
45
  // and uses the non-standard math function erfcinv
49
46
  template <typename RealType>
@@ -154,7 +151,5 @@ struct normal_distribution_base
154
151
  using type = normal_distribution_portable<RealType>;
155
152
  #endif
156
153
  };
157
-
158
- } // namespace detail
159
- } // namespace random
154
+ } // namespace random::detail
160
155
  THRUST_NAMESPACE_END
@@ -28,12 +28,8 @@
28
28
 
29
29
  THRUST_NAMESPACE_BEGIN
30
30
 
31
- namespace random
31
+ namespace random::detail
32
32
  {
33
-
34
- namespace detail
35
- {
36
-
37
33
  struct random_core_access
38
34
  {
39
35
  template <typename OStream, typename EngineOrDistribution>
@@ -55,9 +51,6 @@ struct random_core_access
55
51
  }
56
52
 
57
53
  }; // end random_core_access
58
-
59
- } // namespace detail
60
-
61
- } // namespace random
54
+ } // namespace random::detail
62
55
 
63
56
  THRUST_NAMESPACE_END
@@ -35,7 +35,6 @@ THRUST_NAMESPACE_BEGIN
35
35
 
36
36
  namespace random
37
37
  {
38
-
39
38
  template <typename UIntType, size_t w, size_t s, size_t r>
40
39
  _CCCL_HOST_DEVICE subtract_with_carry_engine<UIntType, w, s, r>::subtract_with_carry_engine(result_type value)
41
40
  {
@@ -195,7 +194,6 @@ _CCCL_HOST_DEVICE bool operator!=(const subtract_with_carry_engine<UIntType, w,
195
194
  {
196
195
  return !(lhs == rhs);
197
196
  }
198
-
199
197
  } // namespace random
200
198
 
201
199
  THRUST_NAMESPACE_END
@@ -34,7 +34,6 @@ THRUST_NAMESPACE_BEGIN
34
34
 
35
35
  namespace random
36
36
  {
37
-
38
37
  template <typename IntType>
39
38
  _CCCL_HOST_DEVICE uniform_int_distribution<IntType>::uniform_int_distribution(IntType a, IntType b)
40
39
  : m_param(a, b)
@@ -192,7 +191,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, uniform_int_distribution<IntTy
192
191
  {
193
192
  return thrust::random::detail::random_core_access::stream_in(is, d);
194
193
  }
195
-
196
194
  } // namespace random
197
195
 
198
196
  THRUST_NAMESPACE_END
@@ -34,7 +34,6 @@ THRUST_NAMESPACE_BEGIN
34
34
 
35
35
  namespace random
36
36
  {
37
-
38
37
  template <typename RealType>
39
38
  _CCCL_HOST_DEVICE uniform_real_distribution<RealType>::uniform_real_distribution(RealType a, RealType b)
40
39
  : m_param(a, b)
@@ -194,7 +193,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, uniform_real_distribution<Real
194
193
  {
195
194
  return thrust::random::detail::random_core_access::stream_in(is, d);
196
195
  }
197
-
198
196
  } // namespace random
199
197
 
200
198
  THRUST_NAMESPACE_END
@@ -33,7 +33,6 @@ THRUST_NAMESPACE_BEGIN
33
33
 
34
34
  namespace random
35
35
  {
36
-
37
36
  template <typename Engine1, size_t s1, typename Engine2, size_t s2>
38
37
  _CCCL_HOST_DEVICE xor_combine_engine<Engine1, s1, Engine2, s2>::xor_combine_engine()
39
38
  : m_b1()
@@ -177,7 +176,6 @@ _CCCL_HOST_DEVICE bool operator!=(const xor_combine_engine<Engine1, s1, Engine2,
177
176
  {
178
177
  return !(lhs == rhs);
179
178
  }
180
-
181
179
  } // namespace random
182
180
 
183
181
  THRUST_NAMESPACE_END
@@ -41,7 +41,6 @@ THRUST_NAMESPACE_BEGIN
41
41
 
42
42
  namespace random
43
43
  {
44
-
45
44
  /*! \addtogroup random_number_engine_adaptors Random Number Engine Adaptor Class Templates
46
45
  * \ingroup random
47
46
  * \{
@@ -229,7 +228,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, discard_block_engine<Engine, p
229
228
 
230
229
  /*! \} // end random_number_engine_adaptors
231
230
  */
232
-
233
231
  } // namespace random
234
232
 
235
233
  // import names into thrust::
@@ -40,7 +40,6 @@ THRUST_NAMESPACE_BEGIN
40
40
 
41
41
  namespace random
42
42
  {
43
-
44
43
  /*! \addtogroup random_number_engine_templates Random Number Engine Class Templates
45
44
  * \ingroup random
46
45
  * \{
@@ -276,7 +275,6 @@ using minstd_rand = linear_congruential_engine<std::uint32_t, 48271, 0, 21474836
276
275
 
277
276
  /*! \} // predefined_random
278
277
  */
279
-
280
278
  } // namespace random
281
279
 
282
280
  // import names into thrust::
@@ -49,7 +49,6 @@ THRUST_NAMESPACE_BEGIN
49
49
 
50
50
  namespace random
51
51
  {
52
-
53
52
  /*! \addtogroup random_number_engine_templates
54
53
  * \{
55
54
  */
@@ -206,7 +205,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, linear_feedback_shift_engine<U
206
205
 
207
206
  /*! \} // end random_number_engine_templates
208
207
  */
209
-
210
208
  } // namespace random
211
209
 
212
210
  // import names into thrust::
@@ -39,7 +39,6 @@ THRUST_NAMESPACE_BEGIN
39
39
 
40
40
  namespace random
41
41
  {
42
-
43
42
  /*! \addtogroup random_number_distributions
44
43
  * \{
45
44
  */
@@ -247,7 +246,6 @@ std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>&
247
246
 
248
247
  /*! \} // end random_number_distributions
249
248
  */
250
-
251
249
  } // namespace random
252
250
 
253
251
  using random::normal_distribution;
@@ -42,7 +42,6 @@ THRUST_NAMESPACE_BEGIN
42
42
 
43
43
  namespace random
44
44
  {
45
-
46
45
  /*! \addtogroup random_number_engine_templates
47
46
  * \{
48
47
  */
@@ -234,7 +233,6 @@ using ranlux48_base = subtract_with_carry_engine<std::uint64_t, 48, 5, 12>;
234
233
 
235
234
  /*! \} // end predefined_random
236
235
  */
237
-
238
236
  } // namespace random
239
237
 
240
238
  // import names into thrust::
@@ -40,7 +40,6 @@ THRUST_NAMESPACE_BEGIN
40
40
 
41
41
  namespace random
42
42
  {
43
-
44
43
  /*! \addtogroup random_number_distributions Random Number Distributions Class Templates
45
44
  * \ingroup random
46
45
  * \{
@@ -251,7 +250,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, uniform_int_distribution<IntTy
251
250
 
252
251
  /*! \} // end random_number_distributions
253
252
  */
254
-
255
253
  } // namespace random
256
254
 
257
255
  using random::uniform_int_distribution;
@@ -38,7 +38,6 @@ THRUST_NAMESPACE_BEGIN
38
38
 
39
39
  namespace random
40
40
  {
41
-
42
41
  /*! \addtogroup random_number_distributions
43
42
  * \{
44
43
  */
@@ -248,7 +247,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, uniform_real_distribution<Real
248
247
 
249
248
  /*! \} // end random_number_distributions
250
249
  */
251
-
252
250
  } // namespace random
253
251
 
254
252
  using random::uniform_real_distribution;
@@ -45,7 +45,6 @@ THRUST_NAMESPACE_BEGIN
45
45
 
46
46
  namespace random
47
47
  {
48
-
49
48
  /*! \addtogroup random_number_engine_adaptors
50
49
  * \{
51
50
  */
@@ -244,7 +243,6 @@ operator>>(std::basic_istream<CharT, Traits>& is, xor_combine_engine<Engine1_, s
244
243
 
245
244
  /*! \} // end random_number_engine_adaptors
246
245
  */
247
-
248
246
  } // namespace random
249
247
 
250
248
  // import names into thrust::
@@ -58,7 +58,6 @@ THRUST_NAMESPACE_BEGIN
58
58
  */
59
59
  namespace random
60
60
  {
61
-
62
61
  /*! \addtogroup predefined_random Random Number Engines with Predefined Parameters
63
62
  * \ingroup random
64
63
  * \{
@@ -105,7 +104,6 @@ using default_random_engine = minstd_rand;
105
104
 
106
105
  /*! \} // end predefined_random
107
106
  */
108
-
109
107
  } // namespace random
110
108
 
111
109
  /*! \} // end random
@@ -19,16 +19,11 @@
19
19
  THRUST_NAMESPACE_BEGIN
20
20
  namespace system::cpp
21
21
  {
22
- //! \addtogroup execution_policies
23
- //! \{
24
-
25
- //! \p thrust::cpp::tag is a type representing Thrust's standard C++ backend system in C++'s type system.
26
- //! Iterators "tagged" with a type which is convertible to \p cpp::tag assert that they may be
27
- //! "dispatched" to algorithm implementations in the \p cpp system.
22
+ namespace detail
23
+ {
24
+ // note: the tag and execution policy need to be defined in the same namespace as the algorithms for ADL to find them
28
25
  struct tag;
29
26
 
30
- //! \p thrust::cpp::execution_policy is the base class for all Thrust parallel execution
31
- //! policies which are derived from Thrust's standard C++ backend system.
32
27
  template <typename DerivedPolicy>
33
28
  struct execution_policy;
34
29
 
@@ -53,14 +48,24 @@ struct execution_policy : system::detail::sequential::execution_policy<Derived>
53
48
  }
54
49
  };
55
50
 
56
- namespace detail
57
- {
58
51
  struct par_t
59
52
  : execution_policy<par_t>
60
53
  , thrust::detail::allocator_aware_execution_policy<execution_policy>
61
54
  {};
62
55
  } // namespace detail
63
56
 
57
+ //! \addtogroup execution_policies
58
+ //! \{
59
+
60
+ //! \p thrust::cpp::tag is a type representing Thrust's standard C++ backend system in C++'s type system.
61
+ //! Iterators "tagged" with a type which is convertible to \p cpp::tag assert that they may be
62
+ //! "dispatched" to algorithm implementations in the \p cpp system.
63
+ using detail::tag;
64
+
65
+ //! \p thrust::cpp::execution_policy is the base class for all Thrust parallel execution
66
+ //! policies which are derived from Thrust's standard C++ backend system.
67
+ using detail::execution_policy;
68
+
64
69
  //! \p thrust::system::cpp::par is the parallel execution policy associated with Thrust's standard C++ backend system.
65
70
  //!
66
71
  //! Instead of relying on implicit algorithm dispatch through iterator system tags, users may directly target Thrust's
@@ -96,7 +101,6 @@ struct par_t
96
101
  _CCCL_GLOBAL_CONSTANT detail::par_t par;
97
102
 
98
103
  //! \}
99
-
100
104
  } // namespace system::cpp
101
105
 
102
106
  // aliases: