cuda-cccl 0.1.3.1.0.dev1486__cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1819) hide show
  1. cuda/cccl/__init__.py +14 -0
  2. cuda/cccl/cooperative/__init__.py +3 -0
  3. cuda/cccl/cooperative/experimental/__init__.py +8 -0
  4. cuda/cccl/cooperative/experimental/_caching.py +48 -0
  5. cuda/cccl/cooperative/experimental/_common.py +276 -0
  6. cuda/cccl/cooperative/experimental/_nvrtc.py +91 -0
  7. cuda/cccl/cooperative/experimental/_scan_op.py +181 -0
  8. cuda/cccl/cooperative/experimental/_types.py +953 -0
  9. cuda/cccl/cooperative/experimental/_typing.py +107 -0
  10. cuda/cccl/cooperative/experimental/block/__init__.py +33 -0
  11. cuda/cccl/cooperative/experimental/block/_block_load_store.py +215 -0
  12. cuda/cccl/cooperative/experimental/block/_block_merge_sort.py +125 -0
  13. cuda/cccl/cooperative/experimental/block/_block_radix_sort.py +214 -0
  14. cuda/cccl/cooperative/experimental/block/_block_reduce.py +294 -0
  15. cuda/cccl/cooperative/experimental/block/_block_scan.py +983 -0
  16. cuda/cccl/cooperative/experimental/warp/__init__.py +9 -0
  17. cuda/cccl/cooperative/experimental/warp/_warp_merge_sort.py +98 -0
  18. cuda/cccl/cooperative/experimental/warp/_warp_reduce.py +153 -0
  19. cuda/cccl/cooperative/experimental/warp/_warp_scan.py +78 -0
  20. cuda/cccl/headers/__init__.py +7 -0
  21. cuda/cccl/headers/include/__init__.py +1 -0
  22. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +261 -0
  23. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1181 -0
  24. cuda/cccl/headers/include/cub/agent/agent_for.cuh +84 -0
  25. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +919 -0
  26. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +227 -0
  27. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +752 -0
  28. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +766 -0
  29. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
  30. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +704 -0
  31. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +557 -0
  32. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +678 -0
  33. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +804 -0
  34. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +997 -0
  35. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +561 -0
  36. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +473 -0
  37. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +292 -0
  38. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1032 -0
  39. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +342 -0
  40. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +592 -0
  41. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +614 -0
  42. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1346 -0
  43. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +965 -0
  44. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1217 -0
  45. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1306 -0
  46. cuda/cccl/headers/include/cub/block/block_histogram.cuh +420 -0
  47. cuda/cccl/headers/include/cub/block/block_load.cuh +1259 -0
  48. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +787 -0
  49. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1218 -0
  50. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2193 -0
  51. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  52. cuda/cccl/headers/include/cub/block/block_reduce.cuh +629 -0
  53. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +437 -0
  54. cuda/cccl/headers/include/cub/block/block_scan.cuh +2600 -0
  55. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  56. cuda/cccl/headers/include/cub/block/block_store.cuh +1246 -0
  57. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +620 -0
  58. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  59. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  60. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  61. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  62. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +259 -0
  63. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  64. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  65. cuda/cccl/headers/include/cub/config.cuh +60 -0
  66. cuda/cccl/headers/include/cub/cub.cuh +112 -0
  67. cuda/cccl/headers/include/cub/detail/array_utils.cuh +77 -0
  68. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +155 -0
  69. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +93 -0
  70. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  71. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +246 -0
  72. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +120 -0
  73. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +74 -0
  74. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +118 -0
  75. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  76. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  77. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +61 -0
  78. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  79. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +71 -0
  80. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +93 -0
  81. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  82. cuda/cccl/headers/include/cub/detail/rfa.cuh +724 -0
  83. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  84. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  85. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +355 -0
  86. cuda/cccl/headers/include/cub/detail/type_traits.cuh +206 -0
  87. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +72 -0
  88. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  89. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  90. cuda/cccl/headers/include/cub/device/device_copy.cuh +187 -0
  91. cuda/cccl/headers/include/cub/device/device_for.cuh +994 -0
  92. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1507 -0
  93. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  94. cuda/cccl/headers/include/cub/device/device_merge.cuh +202 -0
  95. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  96. cuda/cccl/headers/include/cub/device/device_partition.cuh +664 -0
  97. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3431 -0
  98. cuda/cccl/headers/include/cub/device/device_reduce.cuh +1387 -0
  99. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +368 -0
  100. cuda/cccl/headers/include/cub/device/device_scan.cuh +1901 -0
  101. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  102. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1512 -0
  103. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  104. cuda/cccl/headers/include/cub/device/device_select.cuh +1224 -0
  105. cuda/cccl/headers/include/cub/device/device_transform.cuh +313 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +314 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +109 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +718 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +45 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +197 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1051 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +305 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1748 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1316 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +625 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +502 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +548 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +497 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +598 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +1374 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +838 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +439 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +552 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +397 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +543 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +218 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +338 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +799 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +523 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +194 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +330 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +437 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +70 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +121 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +63 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +278 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +91 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +118 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1068 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +397 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +945 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +675 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +555 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1013 -0
  149. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +249 -0
  150. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1587 -0
  151. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +407 -0
  152. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +283 -0
  153. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +874 -0
  154. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +215 -0
  155. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  156. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  157. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +256 -0
  158. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +238 -0
  159. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +252 -0
  160. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +322 -0
  161. cuda/cccl/headers/include/cub/thread/thread_load.cuh +347 -0
  162. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +629 -0
  163. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +504 -0
  164. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +340 -0
  165. cuda/cccl/headers/include/cub/thread/thread_search.cuh +198 -0
  166. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +406 -0
  167. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +101 -0
  168. cuda/cccl/headers/include/cub/thread/thread_store.cuh +364 -0
  169. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  170. cuda/cccl/headers/include/cub/util_arch.cuh +163 -0
  171. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  172. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  173. cuda/cccl/headers/include/cub/util_device.cuh +779 -0
  174. cuda/cccl/headers/include/cub/util_macro.cuh +91 -0
  175. cuda/cccl/headers/include/cub/util_math.cuh +115 -0
  176. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  177. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  178. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  179. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  180. cuda/cccl/headers/include/cub/util_type.cuh +1111 -0
  181. cuda/cccl/headers/include/cub/util_vsmem.cuh +251 -0
  182. cuda/cccl/headers/include/cub/version.cuh +89 -0
  183. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  184. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  185. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +729 -0
  186. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +405 -0
  187. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +688 -0
  188. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +437 -0
  189. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  190. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  191. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  192. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +822 -0
  193. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1156 -0
  194. cuda/cccl/headers/include/cub/warp/warp_store.cuh +520 -0
  195. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +169 -0
  196. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  197. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +210 -0
  198. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  199. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +84 -0
  200. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +127 -0
  201. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +209 -0
  202. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  203. cuda/cccl/headers/include/cuda/__barrier/aligned_size.h +61 -0
  204. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  205. cuda/cccl/headers/include/cuda/__barrier/barrier.h +66 -0
  206. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +100 -0
  207. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +454 -0
  208. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +72 -0
  209. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  210. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +61 -0
  211. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  212. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  213. cuda/cccl/headers/include/cuda/__bit/bitmask.h +88 -0
  214. cuda/cccl/headers/include/cuda/__cccl_config +36 -0
  215. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +126 -0
  216. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  217. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  218. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  219. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  220. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  221. cuda/cccl/headers/include/cuda/__cmath/round_down.h +104 -0
  222. cuda/cccl/headers/include/cuda/__cmath/round_up.h +106 -0
  223. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  224. cuda/cccl/headers/include/cuda/__execution/determinism.h +90 -0
  225. cuda/cccl/headers/include/cuda/__execution/require.h +67 -0
  226. cuda/cccl/headers/include/cuda/__execution/tune.h +62 -0
  227. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  228. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +279 -0
  229. cuda/cccl/headers/include/cuda/__functional/get_device_address.h +58 -0
  230. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  231. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  232. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
  233. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  234. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  235. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  236. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  237. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +261 -0
  238. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +407 -0
  239. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +314 -0
  240. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +323 -0
  241. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +481 -0
  242. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  243. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +457 -0
  244. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +63 -0
  245. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +123 -0
  246. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +51 -0
  247. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  248. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  249. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  250. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  251. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +98 -0
  252. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +162 -0
  253. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +49 -0
  254. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  255. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  256. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +99 -0
  257. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  258. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  259. cuda/cccl/headers/include/cuda/__memory/address_space.h +86 -0
  260. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +94 -0
  261. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +158 -0
  262. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +73 -0
  263. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +129 -0
  264. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +653 -0
  265. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  266. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +57 -0
  267. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  268. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +101 -0
  269. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2982 -0
  270. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  271. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  272. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  273. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  274. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  275. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  276. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  277. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  278. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  279. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  280. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  281. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  282. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  283. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  284. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  285. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  286. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  287. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +240 -0
  288. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +193 -0
  289. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  290. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +52 -0
  291. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +957 -0
  292. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +288 -0
  293. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +596 -0
  294. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  295. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  296. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  297. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1445 -0
  298. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  299. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +117 -0
  300. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  301. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  302. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  303. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  304. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  305. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  306. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  307. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +62 -0
  308. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +101 -0
  309. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +62 -0
  310. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  311. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  312. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +15074 -0
  313. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +385 -0
  314. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +176 -0
  315. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +94 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +137 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +138 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +280 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +282 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2148 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1272 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +228 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +430 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1830 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +105 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +81 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +612 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +44 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4446 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4061 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +6438 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +36 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +4582 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +44 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +67 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +750 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +275 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  376. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  377. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +151 -0
  378. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  379. cuda/cccl/headers/include/cuda/__stream/get_stream.h +97 -0
  380. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +165 -0
  381. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  382. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  383. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +66 -0
  384. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +249 -0
  385. cuda/cccl/headers/include/cuda/access_property +26 -0
  386. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  387. cuda/cccl/headers/include/cuda/atomic +27 -0
  388. cuda/cccl/headers/include/cuda/barrier +262 -0
  389. cuda/cccl/headers/include/cuda/bit +29 -0
  390. cuda/cccl/headers/include/cuda/cmath +35 -0
  391. cuda/cccl/headers/include/cuda/discard_memory +61 -0
  392. cuda/cccl/headers/include/cuda/functional +31 -0
  393. cuda/cccl/headers/include/cuda/iterator +31 -0
  394. cuda/cccl/headers/include/cuda/latch +27 -0
  395. cuda/cccl/headers/include/cuda/mdspan +28 -0
  396. cuda/cccl/headers/include/cuda/memory +28 -0
  397. cuda/cccl/headers/include/cuda/memory_resource +41 -0
  398. cuda/cccl/headers/include/cuda/numeric +28 -0
  399. cuda/cccl/headers/include/cuda/pipeline +579 -0
  400. cuda/cccl/headers/include/cuda/ptx +118 -0
  401. cuda/cccl/headers/include/cuda/semaphore +31 -0
  402. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +60 -0
  403. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +46 -0
  404. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +46 -0
  405. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +52 -0
  406. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  407. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +64 -0
  408. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  409. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +143 -0
  410. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +79 -0
  411. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  412. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +74 -0
  413. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  414. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  415. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +129 -0
  416. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  417. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  418. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  419. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +64 -0
  420. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  421. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  422. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  423. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  424. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  425. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  426. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  427. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  428. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  429. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +92 -0
  430. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +51 -0
  431. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  432. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +58 -0
  433. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  434. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +50 -0
  435. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +69 -0
  436. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  437. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +188 -0
  438. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  439. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +83 -0
  440. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +72 -0
  441. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +96 -0
  442. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  443. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +70 -0
  444. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  445. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  446. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +88 -0
  447. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +71 -0
  448. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +141 -0
  449. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  450. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +88 -0
  451. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  452. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +89 -0
  453. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +46 -0
  454. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  455. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  456. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +121 -0
  457. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  458. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  459. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +95 -0
  460. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +89 -0
  461. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +103 -0
  462. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  463. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +99 -0
  464. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +69 -0
  465. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  466. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  467. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  468. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  469. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  470. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  471. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  472. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  473. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  474. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  475. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +264 -0
  476. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  477. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  478. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  479. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  480. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +123 -0
  481. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +135 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +129 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +72 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +77 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +156 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +96 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +127 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  495. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  496. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +138 -0
  497. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  498. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  499. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +218 -0
  500. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  501. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  502. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  503. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  504. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  505. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  506. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  507. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  508. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  509. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +250 -0
  510. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +105 -0
  511. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  512. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +73 -0
  513. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  514. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  515. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  516. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  517. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  518. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  519. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +84 -0
  520. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +77 -0
  521. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +183 -0
  522. cuda/cccl/headers/include/cuda/std/__bit/countl.h +167 -0
  523. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  524. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  525. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  526. cuda/cccl/headers/include/cuda/std/__bit/integral.h +124 -0
  527. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  528. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1274 -0
  529. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  530. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  531. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +146 -0
  532. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +207 -0
  533. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +1343 -0
  534. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +216 -0
  535. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +43 -0
  536. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +53 -0
  537. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +69 -0
  538. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +129 -0
  539. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +124 -0
  540. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +326 -0
  541. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +35 -0
  542. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +68 -0
  543. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +129 -0
  544. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  545. cuda/cccl/headers/include/cuda/std/__cccl/os.h +48 -0
  546. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1234 -0
  547. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +267 -0
  548. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +176 -0
  549. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  550. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  551. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  552. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  553. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  554. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +112 -0
  555. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  556. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  557. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  558. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  559. cuda/cccl/headers/include/cuda/std/__charconv_ +30 -0
  560. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +240 -0
  561. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +187 -0
  562. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +620 -0
  563. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +207 -0
  564. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +181 -0
  565. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +250 -0
  566. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +213 -0
  567. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +250 -0
  568. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +323 -0
  569. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +163 -0
  570. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +201 -0
  571. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +176 -0
  572. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +129 -0
  573. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +106 -0
  574. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +503 -0
  575. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +236 -0
  576. cuda/cccl/headers/include/cuda/std/__cmath/nvbf16.h +58 -0
  577. cuda/cccl/headers/include/cuda/std/__cmath/nvfp16.h +58 -0
  578. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +180 -0
  579. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +877 -0
  580. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +155 -0
  581. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +170 -0
  582. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +292 -0
  583. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +351 -0
  584. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +350 -0
  585. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +135 -0
  586. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  587. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  588. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  589. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +46 -0
  590. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  591. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  592. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +274 -0
  593. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +107 -0
  594. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +71 -0
  595. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  596. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +57 -0
  597. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  598. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  599. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  600. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  601. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  602. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  603. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  604. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  605. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +42 -0
  606. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  607. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  608. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  609. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  610. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  611. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  612. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  613. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  614. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +69 -0
  615. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  616. cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +72 -0
  617. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +143 -0
  618. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  619. cuda/cccl/headers/include/cuda/std/__execution/env.h +436 -0
  620. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  621. cuda/cccl/headers/include/cuda/std/__expected/expected.h +2002 -0
  622. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1078 -0
  623. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  624. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +178 -0
  625. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  626. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +809 -0
  627. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  628. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  629. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +172 -0
  630. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +103 -0
  631. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  632. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +39 -0
  633. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +64 -0
  634. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  635. cuda/cccl/headers/include/cuda/std/__floating_point/nvfp_types.h +58 -0
  636. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  637. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  638. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  639. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  640. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  641. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  642. cuda/cccl/headers/include/cuda/std/__functional/bind.h +352 -0
  643. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +88 -0
  644. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  645. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +75 -0
  646. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +75 -0
  647. cuda/cccl/headers/include/cuda/std/__functional/compose.h +69 -0
  648. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  649. cuda/cccl/headers/include/cuda/std/__functional/function.h +1277 -0
  650. cuda/cccl/headers/include/cuda/std/__functional/hash.h +650 -0
  651. cuda/cccl/headers/include/cuda/std/__functional/identity.h +61 -0
  652. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +560 -0
  653. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +43 -0
  654. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +67 -0
  655. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +214 -0
  656. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +121 -0
  657. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  658. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +127 -0
  659. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +65 -0
  660. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +64 -0
  661. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  662. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  663. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  664. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +67 -0
  665. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  666. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +278 -0
  667. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +35 -0
  668. cuda/cccl/headers/include/cuda/std/__fwd/array.h +36 -0
  669. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +49 -0
  670. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +34 -0
  671. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  672. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  673. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  674. cuda/cccl/headers/include/cuda/std/__fwd/iterator_traits.h +40 -0
  675. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +73 -0
  676. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  677. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +34 -0
  678. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  679. cuda/cccl/headers/include/cuda/std/__fwd/span.h +38 -0
  680. cuda/cccl/headers/include/cuda/std/__fwd/string.h +83 -0
  681. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +59 -0
  682. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  683. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  684. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  685. cuda/cccl/headers/include/cuda/std/__internal/features.h +71 -0
  686. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +102 -0
  687. cuda/cccl/headers/include/cuda/std/__iterator/access.h +132 -0
  688. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +230 -0
  689. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +103 -0
  690. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +264 -0
  691. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +608 -0
  692. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +469 -0
  693. cuda/cccl/headers/include/cuda/std/__iterator/data.h +63 -0
  694. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  695. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  696. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +54 -0
  697. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  698. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +98 -0
  699. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +152 -0
  700. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  701. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +105 -0
  702. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +141 -0
  703. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  704. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  705. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  706. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  707. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +935 -0
  708. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  709. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +401 -0
  710. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  711. cuda/cccl/headers/include/cuda/std/__iterator/next.h +102 -0
  712. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +99 -0
  713. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +101 -0
  714. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  715. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +92 -0
  716. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  717. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +185 -0
  718. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +146 -0
  719. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +615 -0
  720. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  721. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  722. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +88 -0
  723. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +259 -0
  724. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  725. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  726. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +781 -0
  727. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  728. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +55 -0
  729. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +140 -0
  730. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +134 -0
  731. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +328 -0
  732. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +100 -0
  733. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
  734. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +74 -0
  735. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +363 -0
  736. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +765 -0
  737. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +317 -0
  738. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +310 -0
  739. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +615 -0
  740. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
  741. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  742. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +190 -0
  743. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +347 -0
  744. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +64 -0
  745. cuda/cccl/headers/include/cuda/std/__memory/align.h +87 -0
  746. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  747. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  748. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  749. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  750. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  751. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +569 -0
  752. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  753. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  754. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +231 -0
  755. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +248 -0
  756. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  757. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +43 -0
  758. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +260 -0
  759. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  760. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +686 -0
  761. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +771 -0
  762. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +55 -0
  763. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  764. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  765. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  766. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  767. cuda/cccl/headers/include/cuda/std/__new/launder.h +49 -0
  768. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  769. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +57 -0
  770. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  771. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  772. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +80 -0
  773. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  774. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  775. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  776. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +100 -0
  777. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +70 -0
  778. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +61 -0
  779. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  780. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  781. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  782. cuda/cccl/headers/include/cuda/std/__ranges/access.h +304 -0
  783. cuda/cccl/headers/include/cuda/std/__ranges/all.h +97 -0
  784. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +313 -0
  785. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  786. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  787. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  788. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +111 -0
  789. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  790. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  791. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +77 -0
  792. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  793. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +271 -0
  794. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  795. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +161 -0
  796. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +114 -0
  797. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  798. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  799. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  800. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +343 -0
  801. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +156 -0
  802. cuda/cccl/headers/include/cuda/std/__ranges/size.h +200 -0
  803. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  804. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +263 -0
  805. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +531 -0
  806. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  807. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +181 -0
  808. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  809. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +233 -0
  810. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  811. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  812. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +591 -0
  813. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +299 -0
  814. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  815. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  816. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  817. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  818. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +105 -0
  819. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  820. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  821. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +144 -0
  822. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  823. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  824. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +98 -0
  825. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +236 -0
  826. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +216 -0
  827. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  828. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  829. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +90 -0
  830. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +73 -0
  831. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  832. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  833. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +242 -0
  834. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  835. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  836. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  837. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  838. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  839. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  840. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  841. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  842. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  843. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  844. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +69 -0
  845. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  846. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +174 -0
  847. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  848. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  849. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  850. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  851. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  852. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  853. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  854. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  855. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  856. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  857. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +47 -0
  858. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +51 -0
  859. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  860. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +40 -0
  861. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +44 -0
  862. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  863. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  864. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  865. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  866. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +83 -0
  867. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  868. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  869. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  870. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +68 -0
  871. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +54 -0
  872. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  873. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  874. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  875. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +214 -0
  876. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  877. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  878. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  879. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +43 -0
  880. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  881. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  882. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +73 -0
  883. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +68 -0
  884. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  885. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +81 -0
  886. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +56 -0
  887. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  888. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  889. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  890. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  891. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  892. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  893. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  894. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +59 -0
  895. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  896. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  897. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  898. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  899. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  900. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  901. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  902. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  903. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  904. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  905. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  906. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +79 -0
  907. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  908. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  909. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  910. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  911. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  912. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +62 -0
  913. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  914. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +87 -0
  915. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +43 -0
  916. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +63 -0
  917. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +119 -0
  918. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  919. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  920. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  921. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +84 -0
  922. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  923. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  924. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  925. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  926. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +57 -0
  927. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +203 -0
  928. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +56 -0
  929. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +70 -0
  930. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +82 -0
  931. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +60 -0
  932. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +61 -0
  933. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +56 -0
  934. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +55 -0
  935. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +73 -0
  936. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +60 -0
  937. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +58 -0
  938. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  939. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +57 -0
  940. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  941. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  942. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  943. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  944. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  945. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  946. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  947. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  948. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  949. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  950. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  951. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  952. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  953. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +123 -0
  954. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  955. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  956. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  957. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  958. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  959. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  960. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  961. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  962. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  963. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  964. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  965. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  966. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  967. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  968. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  969. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1069 -0
  970. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +132 -0
  971. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +66 -0
  972. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  973. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  974. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +32 -0
  975. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  976. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +103 -0
  977. cuda/cccl/headers/include/cuda/std/__utility/declval.h +63 -0
  978. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +162 -0
  979. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  980. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  981. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +56 -0
  982. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +77 -0
  983. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  984. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  985. cuda/cccl/headers/include/cuda/std/__utility/move.h +75 -0
  986. cuda/cccl/headers/include/cuda/std/__utility/pair.h +808 -0
  987. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  988. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +763 -0
  989. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  990. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  991. cuda/cccl/headers/include/cuda/std/__utility/swap.h +65 -0
  992. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  993. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +425 -0
  994. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  995. cuda/cccl/headers/include/cuda/std/array +527 -0
  996. cuda/cccl/headers/include/cuda/std/atomic +823 -0
  997. cuda/cccl/headers/include/cuda/std/barrier +43 -0
  998. cuda/cccl/headers/include/cuda/std/bit +35 -0
  999. cuda/cccl/headers/include/cuda/std/bitset +1026 -0
  1000. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1001. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1002. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1003. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1004. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1005. cuda/cccl/headers/include/cuda/std/cmath +25 -0
  1006. cuda/cccl/headers/include/cuda/std/complex +25 -0
  1007. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1008. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1009. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1010. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1011. cuda/cccl/headers/include/cuda/std/cstring +111 -0
  1012. cuda/cccl/headers/include/cuda/std/ctime +147 -0
  1013. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1014. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +258 -0
  1015. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +2692 -0
  1016. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +3689 -0
  1017. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/cmath +685 -0
  1018. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/complex +1610 -0
  1019. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1020. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/optional +1786 -0
  1021. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1022. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1378 -0
  1023. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2160 -0
  1024. cuda/cccl/headers/include/cuda/std/execution +27 -0
  1025. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1026. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1027. cuda/cccl/headers/include/cuda/std/initializer_list +36 -0
  1028. cuda/cccl/headers/include/cuda/std/inplace_vector +2171 -0
  1029. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1030. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1031. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1032. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1033. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1034. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1035. cuda/cccl/headers/include/cuda/std/numbers +335 -0
  1036. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1037. cuda/cccl/headers/include/cuda/std/optional +25 -0
  1038. cuda/cccl/headers/include/cuda/std/ranges +68 -0
  1039. cuda/cccl/headers/include/cuda/std/ratio +417 -0
  1040. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1041. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1042. cuda/cccl/headers/include/cuda/std/span +640 -0
  1043. cuda/cccl/headers/include/cuda/std/string_view +814 -0
  1044. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1045. cuda/cccl/headers/include/cuda/std/type_traits +176 -0
  1046. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1047. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1048. cuda/cccl/headers/include/cuda/std/version +245 -0
  1049. cuda/cccl/headers/include/cuda/stream_ref +54 -0
  1050. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1051. cuda/cccl/headers/include/cuda/version +16 -0
  1052. cuda/cccl/headers/include/cuda/warp +28 -0
  1053. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1054. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1055. cuda/cccl/headers/include/nv/detail/__target_macros +599 -0
  1056. cuda/cccl/headers/include/nv/target +229 -0
  1057. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1058. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1059. cuda/cccl/headers/include/thrust/advance.h +59 -0
  1060. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1061. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1062. cuda/cccl/headers/include/thrust/complex.h +859 -0
  1063. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1064. cuda/cccl/headers/include/thrust/count.h +245 -0
  1065. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1066. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +37 -0
  1067. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1068. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +350 -0
  1069. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +371 -0
  1070. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +45 -0
  1071. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +242 -0
  1072. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +39 -0
  1073. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +137 -0
  1074. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +39 -0
  1075. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +99 -0
  1076. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +53 -0
  1077. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +68 -0
  1078. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1079. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +102 -0
  1080. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +86 -0
  1081. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +79 -0
  1082. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +81 -0
  1083. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +39 -0
  1084. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +98 -0
  1085. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1086. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1087. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1088. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1089. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1090. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1091. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1092. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1093. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1094. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1095. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1096. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1097. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1098. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1099. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1100. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1101. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1102. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1103. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1104. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1105. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1106. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1107. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1108. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1109. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1110. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1111. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1112. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1113. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1114. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1115. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1116. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1117. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1118. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1119. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1120. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1121. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1122. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1123. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1124. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1125. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1126. cuda/cccl/headers/include/thrust/detail/device_delete.inl +52 -0
  1127. cuda/cccl/headers/include/thrust/detail/device_free.inl +47 -0
  1128. cuda/cccl/headers/include/thrust/detail/device_malloc.inl +60 -0
  1129. cuda/cccl/headers/include/thrust/detail/device_new.inl +61 -0
  1130. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1131. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1132. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1133. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +80 -0
  1134. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1135. cuda/cccl/headers/include/thrust/detail/execution_policy.h +80 -0
  1136. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1137. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1138. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1139. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1140. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1141. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1142. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1143. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1144. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1145. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1146. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1147. cuda/cccl/headers/include/thrust/detail/integer_math.h +130 -0
  1148. cuda/cccl/headers/include/thrust/detail/internal_functional.h +285 -0
  1149. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1150. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +92 -0
  1151. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1152. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1153. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +40 -0
  1154. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1155. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1156. cuda/cccl/headers/include/thrust/detail/mpl/math.h +164 -0
  1157. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +37 -0
  1158. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1159. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1160. cuda/cccl/headers/include/thrust/detail/pointer.h +217 -0
  1161. cuda/cccl/headers/include/thrust/detail/pointer.inl +172 -0
  1162. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1163. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1164. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1165. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1166. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1167. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +189 -0
  1168. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1169. cuda/cccl/headers/include/thrust/detail/reference.h +500 -0
  1170. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1171. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1172. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1173. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1174. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1175. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1176. cuda/cccl/headers/include/thrust/detail/seq.h +54 -0
  1177. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1178. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1179. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1180. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1181. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1182. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1183. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1184. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1185. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1186. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +138 -0
  1187. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1188. cuda/cccl/headers/include/thrust/detail/transform.inl +250 -0
  1189. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1190. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1191. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +131 -0
  1192. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1193. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1194. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1195. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1196. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1197. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1198. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1199. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +60 -0
  1200. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_discard_iterator.h +44 -0
  1201. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1202. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1203. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1204. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1205. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1206. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1207. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1208. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1209. cuda/cccl/headers/include/thrust/detail/util/align.h +59 -0
  1210. cuda/cccl/headers/include/thrust/detail/vector_base.h +630 -0
  1211. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1242 -0
  1212. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1213. cuda/cccl/headers/include/thrust/device_delete.h +59 -0
  1214. cuda/cccl/headers/include/thrust/device_free.h +72 -0
  1215. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1216. cuda/cccl/headers/include/thrust/device_malloc.h +108 -0
  1217. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1218. cuda/cccl/headers/include/thrust/device_new.h +91 -0
  1219. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1220. cuda/cccl/headers/include/thrust/device_ptr.h +202 -0
  1221. cuda/cccl/headers/include/thrust/device_reference.h +986 -0
  1222. cuda/cccl/headers/include/thrust/device_vector.h +574 -0
  1223. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1224. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1225. cuda/cccl/headers/include/thrust/execution_policy.h +384 -0
  1226. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1227. cuda/cccl/headers/include/thrust/fill.h +201 -0
  1228. cuda/cccl/headers/include/thrust/find.h +382 -0
  1229. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1230. cuda/cccl/headers/include/thrust/functional.h +396 -0
  1231. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1232. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1233. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1234. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1235. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +219 -0
  1236. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1237. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1238. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1239. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1240. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1241. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1242. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +51 -0
  1243. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +62 -0
  1244. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1245. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +199 -0
  1246. cuda/cccl/headers/include/thrust/iterator/detail/iterator_traversal_tags.h +50 -0
  1247. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +53 -0
  1248. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1249. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1250. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1251. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1252. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +164 -0
  1253. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1254. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +215 -0
  1255. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +660 -0
  1256. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +245 -0
  1257. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +192 -0
  1258. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1259. cuda/cccl/headers/include/thrust/iterator/retag.h +74 -0
  1260. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +221 -0
  1261. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +184 -0
  1262. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1263. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +149 -0
  1264. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1265. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1266. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1267. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +357 -0
  1268. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1269. cuda/cccl/headers/include/thrust/memory.h +395 -0
  1270. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1271. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1272. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1273. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1274. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
  1275. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1276. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +68 -0
  1277. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1278. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1279. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1280. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1281. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1282. cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
  1283. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1284. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1285. cuda/cccl/headers/include/thrust/mr/tls_pool.h +65 -0
  1286. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1287. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1288. cuda/cccl/headers/include/thrust/pair.h +102 -0
  1289. cuda/cccl/headers/include/thrust/partition.h +1383 -0
  1290. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1291. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1292. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1293. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1294. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1295. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1296. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1297. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1298. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1299. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1300. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1301. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1302. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +198 -0
  1303. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1304. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +217 -0
  1305. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1306. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1307. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1308. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1309. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1310. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1311. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1312. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1313. cuda/cccl/headers/include/thrust/random.h +120 -0
  1314. cuda/cccl/headers/include/thrust/reduce.h +1112 -0
  1315. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1316. cuda/cccl/headers/include/thrust/replace.h +827 -0
  1317. cuda/cccl/headers/include/thrust/reverse.h +213 -0
  1318. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1319. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1320. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1321. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1322. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1323. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1324. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1325. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1326. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1327. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1328. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1329. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1330. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1331. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1332. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +90 -0
  1333. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1334. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1335. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1336. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1337. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1338. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1339. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1340. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1341. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1342. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1343. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1344. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1345. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1346. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1347. cuda/cccl/headers/include/thrust/system/cpp/detail/par.h +62 -0
  1348. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1349. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1350. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1351. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1352. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1353. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1354. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1355. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1356. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1357. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1358. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1359. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1360. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1361. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1362. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1363. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1364. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1365. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1366. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1367. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1368. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1369. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1370. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1371. cuda/cccl/headers/include/thrust/system/cpp/detail/vector.inl +130 -0
  1372. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +161 -0
  1373. cuda/cccl/headers/include/thrust/system/cpp/memory.h +109 -0
  1374. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +75 -0
  1375. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +119 -0
  1376. cuda/cccl/headers/include/thrust/system/cpp/vector.h +99 -0
  1377. cuda/cccl/headers/include/thrust/system/cuda/config.h +123 -0
  1378. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1379. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1380. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1381. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1382. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +129 -0
  1383. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1384. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1385. cuda/cccl/headers/include/thrust/system/cuda/detail/core/load_iterator.h +58 -0
  1386. cuda/cccl/headers/include/thrust/system/cuda/detail/core/make_load_iterator.h +60 -0
  1387. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1388. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +630 -0
  1389. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1390. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1391. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +210 -0
  1392. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1393. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1394. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +113 -0
  1395. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1396. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +82 -0
  1397. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +272 -0
  1398. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1399. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1400. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +85 -0
  1401. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1402. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1403. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_cross_system.h +204 -0
  1404. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_device_to_device.h +98 -0
  1405. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +69 -0
  1406. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1407. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1408. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1409. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1410. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1411. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +217 -0
  1412. cuda/cccl/headers/include/thrust/system/cuda/detail/par.h +237 -0
  1413. cuda/cccl/headers/include/thrust/system/cuda/detail/par_to_seq.h +95 -0
  1414. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1415. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1416. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1417. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +961 -0
  1418. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1000 -0
  1419. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1420. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +164 -0
  1421. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +88 -0
  1422. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +342 -0
  1423. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +415 -0
  1424. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +79 -0
  1425. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1426. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1736 -0
  1427. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +482 -0
  1428. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +75 -0
  1429. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +75 -0
  1430. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1431. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1432. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +403 -0
  1433. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1434. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1435. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +94 -0
  1436. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +91 -0
  1437. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +648 -0
  1438. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +311 -0
  1439. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +251 -0
  1440. cuda/cccl/headers/include/thrust/system/cuda/error.h +175 -0
  1441. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +39 -0
  1442. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1443. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1444. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +140 -0
  1445. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1446. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1447. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1448. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1449. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1450. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1451. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1452. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1453. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1454. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1455. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1456. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1457. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1458. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1459. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1460. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1461. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1462. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1463. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1464. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1465. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1466. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1467. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +48 -0
  1468. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1469. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1470. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1471. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1472. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1473. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1474. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1475. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1476. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1477. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1478. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1479. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1480. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1481. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1482. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1483. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1484. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1485. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1486. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1487. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1488. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1489. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +64 -0
  1490. cuda/cccl/headers/include/thrust/system/detail/errno.h +125 -0
  1491. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1492. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1493. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1494. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +59 -0
  1495. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +85 -0
  1496. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +167 -0
  1497. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +391 -0
  1498. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +51 -0
  1499. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +70 -0
  1500. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +64 -0
  1501. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +152 -0
  1502. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +54 -0
  1503. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +90 -0
  1504. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +55 -0
  1505. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +66 -0
  1506. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +72 -0
  1507. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +258 -0
  1508. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +60 -0
  1509. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +55 -0
  1510. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +143 -0
  1511. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +64 -0
  1512. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +79 -0
  1513. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +102 -0
  1514. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +51 -0
  1515. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1516. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +66 -0
  1517. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +78 -0
  1518. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +65 -0
  1519. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +70 -0
  1520. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +83 -0
  1521. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +105 -0
  1522. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +154 -0
  1523. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +55 -0
  1524. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +74 -0
  1525. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +135 -0
  1526. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +213 -0
  1527. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +49 -0
  1528. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +77 -0
  1529. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +106 -0
  1530. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +89 -0
  1531. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +192 -0
  1532. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +92 -0
  1533. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +127 -0
  1534. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +101 -0
  1535. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +181 -0
  1536. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +54 -0
  1537. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +72 -0
  1538. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +78 -0
  1539. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +141 -0
  1540. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +78 -0
  1541. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +91 -0
  1542. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +132 -0
  1543. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +238 -0
  1544. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +79 -0
  1545. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +91 -0
  1546. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +96 -0
  1547. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +55 -0
  1548. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.inl +95 -0
  1549. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +288 -0
  1550. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +482 -0
  1551. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +60 -0
  1552. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +131 -0
  1553. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +119 -0
  1554. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +181 -0
  1555. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +50 -0
  1556. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +82 -0
  1557. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +47 -0
  1558. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +60 -0
  1559. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +53 -0
  1560. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +60 -0
  1561. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +88 -0
  1562. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +109 -0
  1563. cuda/cccl/headers/include/thrust/system/detail/generic/transform.inl +185 -0
  1564. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +56 -0
  1565. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +62 -0
  1566. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +86 -0
  1567. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +119 -0
  1568. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +51 -0
  1569. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +172 -0
  1570. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +51 -0
  1571. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +121 -0
  1572. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +77 -0
  1573. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +119 -0
  1574. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +87 -0
  1575. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +132 -0
  1576. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +123 -0
  1577. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +76 -0
  1578. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +48 -0
  1579. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +142 -0
  1580. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +55 -0
  1581. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +125 -0
  1582. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +55 -0
  1583. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +77 -0
  1584. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +78 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +116 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +68 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +80 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +129 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +49 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +147 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +51 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +56 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +81 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +151 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +309 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +70 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +104 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +185 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +160 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +151 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +212 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +65 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +187 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +61 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +362 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +54 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +130 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +54 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +592 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +64 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +121 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +112 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1635. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1636. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1637. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1638. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1639. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1640. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1641. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1642. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1643. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1644. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1645. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1646. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1647. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +113 -0
  1648. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1649. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1650. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1651. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1652. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1653. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1654. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1655. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1656. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1657. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1658. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1659. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1660. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1661. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1662. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1663. cuda/cccl/headers/include/thrust/system/omp/detail/par.h +62 -0
  1664. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1665. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1666. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1667. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1668. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1669. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1670. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1671. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1672. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1673. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1674. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1675. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1676. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1677. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1678. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +30 -0
  1679. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +30 -0
  1680. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1681. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1682. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1683. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1684. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +259 -0
  1685. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1686. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1687. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1688. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1689. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1690. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1691. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1692. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1693. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1694. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1695. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1696. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1697. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +160 -0
  1698. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1699. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1700. cuda/cccl/headers/include/thrust/system/omp/pointer.h +120 -0
  1701. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1702. cuda/cccl/headers/include/thrust/system/system_error.h +184 -0
  1703. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1704. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1705. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1706. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1707. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1708. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1709. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1710. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1711. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1712. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +92 -0
  1713. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1714. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1715. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1716. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1717. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1718. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1719. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1720. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1721. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1722. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1723. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1724. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1725. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1726. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1727. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1728. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1729. cuda/cccl/headers/include/thrust/system/tbb/detail/par.h +62 -0
  1730. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1731. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1732. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1733. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1734. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1735. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1736. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1737. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1738. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1739. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1740. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1741. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1742. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1743. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1744. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +30 -0
  1745. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1746. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1747. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1748. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1749. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1750. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1751. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1752. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1753. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1754. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1755. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1756. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1757. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1758. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1759. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1760. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1761. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1762. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +160 -0
  1763. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1764. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1765. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +120 -0
  1766. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1767. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1768. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1769. cuda/cccl/headers/include/thrust/transform.h +903 -0
  1770. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1771. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1772. cuda/cccl/headers/include/thrust/tuple.h +142 -0
  1773. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1774. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +182 -0
  1775. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1776. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1777. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1778. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +306 -0
  1779. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1780. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +93 -0
  1781. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1782. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1783. cuda/cccl/headers/include/thrust/unique.h +1090 -0
  1784. cuda/cccl/headers/include/thrust/universal_allocator.h +90 -0
  1785. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1786. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1787. cuda/cccl/headers/include/thrust/version.h +93 -0
  1788. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1789. cuda/cccl/headers/include_paths.py +72 -0
  1790. cuda/cccl/parallel/__init__.py +3 -0
  1791. cuda/cccl/parallel/experimental/__init__.py +3 -0
  1792. cuda/cccl/parallel/experimental/_bindings.py +24 -0
  1793. cuda/cccl/parallel/experimental/_bindings.pyi +388 -0
  1794. cuda/cccl/parallel/experimental/_bindings_impl.cpython-310-x86_64-linux-gnu.so +0 -0
  1795. cuda/cccl/parallel/experimental/_bindings_impl.pyx +2158 -0
  1796. cuda/cccl/parallel/experimental/_caching.py +71 -0
  1797. cuda/cccl/parallel/experimental/_cccl_interop.py +371 -0
  1798. cuda/cccl/parallel/experimental/_utils/__init__.py +0 -0
  1799. cuda/cccl/parallel/experimental/_utils/protocols.py +132 -0
  1800. cuda/cccl/parallel/experimental/algorithms/__init__.py +28 -0
  1801. cuda/cccl/parallel/experimental/algorithms/_merge_sort.py +172 -0
  1802. cuda/cccl/parallel/experimental/algorithms/_radix_sort.py +244 -0
  1803. cuda/cccl/parallel/experimental/algorithms/_reduce.py +136 -0
  1804. cuda/cccl/parallel/experimental/algorithms/_scan.py +179 -0
  1805. cuda/cccl/parallel/experimental/algorithms/_segmented_reduce.py +183 -0
  1806. cuda/cccl/parallel/experimental/algorithms/_transform.py +213 -0
  1807. cuda/cccl/parallel/experimental/algorithms/_unique_by_key.py +179 -0
  1808. cuda/cccl/parallel/experimental/cccl/.gitkeep +0 -0
  1809. cuda/cccl/parallel/experimental/cccl/libcccl.c.parallel.so +0 -0
  1810. cuda/cccl/parallel/experimental/iterators/__init__.py +157 -0
  1811. cuda/cccl/parallel/experimental/iterators/_iterators.py +650 -0
  1812. cuda/cccl/parallel/experimental/numba_utils.py +6 -0
  1813. cuda/cccl/parallel/experimental/struct.py +150 -0
  1814. cuda/cccl/parallel/experimental/typing.py +27 -0
  1815. cuda/cccl/py.typed +0 -0
  1816. cuda_cccl-0.1.3.1.0.dev1486.dist-info/METADATA +29 -0
  1817. cuda_cccl-0.1.3.1.0.dev1486.dist-info/RECORD +1819 -0
  1818. cuda_cccl-0.1.3.1.0.dev1486.dist-info/WHEEL +6 -0
  1819. cuda_cccl-0.1.3.1.0.dev1486.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,2692 @@
1
+ //===----------------------------------------------------------------------===//
2
+ //
3
+ // Part of libcu++, the C++ Standard Library for your entire system,
4
+ // under the Apache License v2.0 with LLVM Exceptions.
5
+ // See https://llvm.org/LICENSE.txt for license information.
6
+ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7
+ // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
8
+ //
9
+ //===----------------------------------------------------------------------===//
10
+
11
+ #ifndef _LIBCUDACXX_ALGORITHM
12
+ #define _LIBCUDACXX_ALGORITHM
13
+
14
+ /*
15
+ algorithm synopsis
16
+
17
+ #include <initializer_list>
18
+
19
+ namespace std
20
+ {
21
+
22
+ template <class InputIterator, class Predicate>
23
+ constexpr bool // constexpr in C++20
24
+ all_of(InputIterator first, InputIterator last, Predicate pred);
25
+
26
+ template <class InputIterator, class Predicate>
27
+ constexpr bool // constexpr in C++20
28
+ any_of(InputIterator first, InputIterator last, Predicate pred);
29
+
30
+ template <class InputIterator, class Predicate>
31
+ constexpr bool // constexpr in C++20
32
+ none_of(InputIterator first, InputIterator last, Predicate pred);
33
+
34
+ template <class InputIterator, class Function>
35
+ constexpr Function // constexpr in C++20
36
+ for_each(InputIterator first, InputIterator last, Function f);
37
+
38
+ template<class InputIterator, class Size, class Function>
39
+ constexpr InputIterator // constexpr in C++20
40
+ for_each_n(InputIterator first, Size n, Function f); // C++17
41
+
42
+ template <class InputIterator, class T>
43
+ constexpr InputIterator // constexpr in C++20
44
+ find(InputIterator first, InputIterator last, const T& value);
45
+
46
+ template <class InputIterator, class Predicate>
47
+ constexpr InputIterator // constexpr in C++20
48
+ find_if(InputIterator first, InputIterator last, Predicate pred);
49
+
50
+ template<class InputIterator, class Predicate>
51
+ InputIterator // constexpr in C++20
52
+ find_if_not(InputIterator first, InputIterator last, Predicate pred);
53
+
54
+ template <class ForwardIterator1, class ForwardIterator2>
55
+ ForwardIterator1 // constexpr in C++20
56
+ find_end(ForwardIterator1 first1, ForwardIterator1 last1,
57
+ ForwardIterator2 first2, ForwardIterator2 last2);
58
+
59
+ template <class ForwardIterator1, class ForwardIterator2, class BinaryPredicate>
60
+ ForwardIterator1 // constexpr in C++20
61
+ find_end(ForwardIterator1 first1, ForwardIterator1 last1,
62
+ ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate pred);
63
+
64
+ template <class ForwardIterator1, class ForwardIterator2>
65
+ constexpr ForwardIterator1 // constexpr in C++20
66
+ find_first_of(ForwardIterator1 first1, ForwardIterator1 last1,
67
+ ForwardIterator2 first2, ForwardIterator2 last2);
68
+
69
+ template <class ForwardIterator1, class ForwardIterator2, class BinaryPredicate>
70
+ constexpr ForwardIterator1 // constexpr in C++20
71
+ find_first_of(ForwardIterator1 first1, ForwardIterator1 last1,
72
+ ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate pred);
73
+
74
+ template <class ForwardIterator>
75
+ constexpr ForwardIterator // constexpr in C++20
76
+ adjacent_find(ForwardIterator first, ForwardIterator last);
77
+
78
+ template <class ForwardIterator, class BinaryPredicate>
79
+ constexpr ForwardIterator // constexpr in C++20
80
+ adjacent_find(ForwardIterator first, ForwardIterator last, BinaryPredicate pred);
81
+
82
+ template <class InputIterator, class T>
83
+ constexpr typename iterator_traits<InputIterator>::difference_type // constexpr in C++20
84
+ count(InputIterator first, InputIterator last, const T& value);
85
+
86
+ template <class InputIterator, class Predicate>
87
+ constexpr typename iterator_traits<InputIterator>::difference_type // constexpr in C++20
88
+ count_if(InputIterator first, InputIterator last, Predicate pred);
89
+
90
+ template <class InputIterator1, class InputIterator2>
91
+ constexpr pair<InputIterator1, InputIterator2> // constexpr in C++20
92
+ mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2);
93
+
94
+ template <class InputIterator1, class InputIterator2>
95
+ constexpr pair<InputIterator1, InputIterator2> // constexpr in C++20
96
+ mismatch(InputIterator1 first1, InputIterator1 last1,
97
+ InputIterator2 first2, InputIterator2 last2); // **C++14**
98
+
99
+ template <class InputIterator1, class InputIterator2, class BinaryPredicate>
100
+ constexpr pair<InputIterator1, InputIterator2> // constexpr in C++20
101
+ mismatch(InputIterator1 first1, InputIterator1 last1,
102
+ InputIterator2 first2, BinaryPredicate pred);
103
+
104
+ template <class InputIterator1, class InputIterator2, class BinaryPredicate>
105
+ constexpr pair<InputIterator1, InputIterator2> // constexpr in C++20
106
+ mismatch(InputIterator1 first1, InputIterator1 last1,
107
+ InputIterator2 first2, InputIterator2 last2,
108
+ BinaryPredicate pred); // **C++14**
109
+
110
+ template <class InputIterator1, class InputIterator2>
111
+ constexpr bool // constexpr in C++20
112
+ equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2);
113
+
114
+ template <class InputIterator1, class InputIterator2>
115
+ constexpr bool // constexpr in C++20
116
+ equal(InputIterator1 first1, InputIterator1 last1,
117
+ InputIterator2 first2, InputIterator2 last2); // **C++14**
118
+
119
+ template <class InputIterator1, class InputIterator2, class BinaryPredicate>
120
+ constexpr bool // constexpr in C++20
121
+ equal(InputIterator1 first1, InputIterator1 last1,
122
+ InputIterator2 first2, BinaryPredicate pred);
123
+
124
+ template <class InputIterator1, class InputIterator2, class BinaryPredicate>
125
+ constexpr bool // constexpr in C++20
126
+ equal(InputIterator1 first1, InputIterator1 last1,
127
+ InputIterator2 first2, InputIterator2 last2,
128
+ BinaryPredicate pred); // **C++14**
129
+
130
+ template<class ForwardIterator1, class ForwardIterator2>
131
+ constexpr bool // constexpr in C++20
132
+ is_permutation(ForwardIterator1 first1, ForwardIterator1 last1,
133
+ ForwardIterator2 first2);
134
+
135
+ template<class ForwardIterator1, class ForwardIterator2>
136
+ constexpr bool // constexpr in C++20
137
+ is_permutation(ForwardIterator1 first1, ForwardIterator1 last1,
138
+ ForwardIterator2 first2, ForwardIterator2 last2); // **C++14**
139
+
140
+ template<class ForwardIterator1, class ForwardIterator2, class BinaryPredicate>
141
+ constexpr bool // constexpr in C++20
142
+ is_permutation(ForwardIterator1 first1, ForwardIterator1 last1,
143
+ ForwardIterator2 first2, BinaryPredicate pred);
144
+
145
+ template<class ForwardIterator1, class ForwardIterator2, class BinaryPredicate>
146
+ constexpr bool // constexpr in C++20
147
+ is_permutation(ForwardIterator1 first1, ForwardIterator1 last1,
148
+ ForwardIterator2 first2, ForwardIterator2 last2,
149
+ BinaryPredicate pred); // **C++14**
150
+
151
+ template <class ForwardIterator1, class ForwardIterator2>
152
+ constexpr ForwardIterator1 // constexpr in C++20
153
+ search(ForwardIterator1 first1, ForwardIterator1 last1,
154
+ ForwardIterator2 first2, ForwardIterator2 last2);
155
+
156
+ template <class ForwardIterator1, class ForwardIterator2, class BinaryPredicate>
157
+ constexpr ForwardIterator1 // constexpr in C++20
158
+ search(ForwardIterator1 first1, ForwardIterator1 last1,
159
+ ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate pred);
160
+
161
+ template <class ForwardIterator, class Size, class T>
162
+ constexpr ForwardIterator // constexpr in C++20
163
+ search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value);
164
+
165
+ template <class ForwardIterator, class Size, class T, class BinaryPredicate>
166
+ constexpr ForwardIterator // constexpr in C++20
167
+ search_n(ForwardIterator first, ForwardIterator last,
168
+ Size count, const T& value, BinaryPredicate pred);
169
+
170
+ template <class InputIterator, class OutputIterator>
171
+ OutputIterator
172
+ copy(InputIterator first, InputIterator last, OutputIterator result);
173
+
174
+ template<class InputIterator, class OutputIterator, class Predicate>
175
+ OutputIterator
176
+ copy_if(InputIterator first, InputIterator last,
177
+ OutputIterator result, Predicate pred);
178
+
179
+ template<class InputIterator, class Size, class OutputIterator>
180
+ OutputIterator
181
+ copy_n(InputIterator first, Size n, OutputIterator result);
182
+
183
+ template <class BidirectionalIterator1, class BidirectionalIterator2>
184
+ BidirectionalIterator2
185
+ copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last,
186
+ BidirectionalIterator2 result);
187
+
188
+ template <class ForwardIterator1, class ForwardIterator2>
189
+ ForwardIterator2
190
+ swap_ranges(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2);
191
+
192
+ template <class ForwardIterator1, class ForwardIterator2>
193
+ void
194
+ iter_swap(ForwardIterator1 a, ForwardIterator2 b);
195
+
196
+ template <class InputIterator, class OutputIterator, class UnaryOperation>
197
+ constexpr OutputIterator // constexpr in C++20
198
+ transform(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation op);
199
+
200
+ template <class InputIterator1, class InputIterator2, class OutputIterator, class BinaryOperation>
201
+ constexpr OutputIterator // constexpr in C++20
202
+ transform(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2,
203
+ OutputIterator result, BinaryOperation binary_op);
204
+
205
+ template <class ForwardIterator, class T>
206
+ constexpr void // constexpr in C++20
207
+ replace(ForwardIterator first, ForwardIterator last, const T& old_value, const T& new_value);
208
+
209
+ template <class ForwardIterator, class Predicate, class T>
210
+ constexpr void // constexpr in C++20
211
+ replace_if(ForwardIterator first, ForwardIterator last, Predicate pred, const T& new_value);
212
+
213
+ template <class InputIterator, class OutputIterator, class T>
214
+ constexpr OutputIterator // constexpr in C++20
215
+ replace_copy(InputIterator first, InputIterator last, OutputIterator result,
216
+ const T& old_value, const T& new_value);
217
+
218
+ template <class InputIterator, class OutputIterator, class Predicate, class T>
219
+ constexpr OutputIterator // constexpr in C++20
220
+ replace_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate pred, const T& new_value);
221
+
222
+ template <class ForwardIterator, class T>
223
+ constexpr void // constexpr in C++20
224
+ fill(ForwardIterator first, ForwardIterator last, const T& value);
225
+
226
+ template <class OutputIterator, class Size, class T>
227
+ constexpr OutputIterator // constexpr in C++20
228
+ fill_n(OutputIterator first, Size n, const T& value);
229
+
230
+ template <class ForwardIterator, class Generator>
231
+ constexpr void // constexpr in C++20
232
+ generate(ForwardIterator first, ForwardIterator last, Generator gen);
233
+
234
+ template <class OutputIterator, class Size, class Generator>
235
+ constexpr OutputIterator // constexpr in C++20
236
+ generate_n(OutputIterator first, Size n, Generator gen);
237
+
238
+ template <class ForwardIterator, class T>
239
+ constexpr ForwardIterator // constexpr in C++20
240
+ remove(ForwardIterator first, ForwardIterator last, const T& value);
241
+
242
+ template <class ForwardIterator, class Predicate>
243
+ constexpr ForwardIterator // constexpr in C++20
244
+ remove_if(ForwardIterator first, ForwardIterator last, Predicate pred);
245
+
246
+ template <class InputIterator, class OutputIterator, class T>
247
+ constexpr OutputIterator // constexpr in C++20
248
+ remove_copy(InputIterator first, InputIterator last, OutputIterator result, const T& value);
249
+
250
+ template <class InputIterator, class OutputIterator, class Predicate>
251
+ constexpr OutputIterator // constexpr in C++20
252
+ remove_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate pred);
253
+
254
+ template <class ForwardIterator>
255
+ ForwardIterator
256
+ unique(ForwardIterator first, ForwardIterator last);
257
+
258
+ template <class ForwardIterator, class BinaryPredicate>
259
+ ForwardIterator
260
+ unique(ForwardIterator first, ForwardIterator last, BinaryPredicate pred);
261
+
262
+ template <class InputIterator, class OutputIterator>
263
+ OutputIterator
264
+ unique_copy(InputIterator first, InputIterator last, OutputIterator result);
265
+
266
+ template <class InputIterator, class OutputIterator, class BinaryPredicate>
267
+ OutputIterator
268
+ unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate pred);
269
+
270
+ template <class BidirectionalIterator>
271
+ void
272
+ reverse(BidirectionalIterator first, BidirectionalIterator last);
273
+
274
+ template <class BidirectionalIterator, class OutputIterator>
275
+ constexpr OutputIterator // constexpr in C++20
276
+ reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator result);
277
+
278
+ template <class ForwardIterator>
279
+ ForwardIterator
280
+ rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last);
281
+
282
+ template <class ForwardIterator, class OutputIterator>
283
+ OutputIterator
284
+ rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result);
285
+
286
+ template <class RandomAccessIterator>
287
+ void
288
+ random_shuffle(RandomAccessIterator first, RandomAccessIterator last); // deprecated in C++14, removed in C++17
289
+
290
+ template <class RandomAccessIterator, class RandomNumberGenerator>
291
+ void
292
+ random_shuffle(RandomAccessIterator first, RandomAccessIterator last,
293
+ RandomNumberGenerator& rand); // deprecated in C++14, removed in C++17
294
+
295
+ template<class PopulationIterator, class SampleIterator,
296
+ class Distance, class UniformRandomBitGenerator>
297
+ SampleIterator sample(PopulationIterator first, PopulationIterator last,
298
+ SampleIterator out, Distance n,
299
+ UniformRandomBitGenerator&& g); // C++17
300
+
301
+ template<class RandomAccessIterator, class UniformRandomNumberGenerator>
302
+ void shuffle(RandomAccessIterator first, RandomAccessIterator last,
303
+ UniformRandomNumberGenerator&& g);
304
+
305
+ template <class InputIterator, class Predicate>
306
+ constexpr bool // constexpr in C++20
307
+ is_partitioned(InputIterator first, InputIterator last, Predicate pred);
308
+
309
+ template <class ForwardIterator, class Predicate>
310
+ ForwardIterator
311
+ partition(ForwardIterator first, ForwardIterator last, Predicate pred);
312
+
313
+ template <class InputIterator, class OutputIterator1,
314
+ class OutputIterator2, class Predicate>
315
+ constexpr pair<OutputIterator1, OutputIterator2> // constexpr in C++20
316
+ partition_copy(InputIterator first, InputIterator last,
317
+ OutputIterator1 out_true, OutputIterator2 out_false,
318
+ Predicate pred);
319
+
320
+ template <class ForwardIterator, class Predicate>
321
+ ForwardIterator
322
+ stable_partition(ForwardIterator first, ForwardIterator last, Predicate pred);
323
+
324
+ template<class ForwardIterator, class Predicate>
325
+ constexpr ForwardIterator // constexpr in C++20
326
+ partition_point(ForwardIterator first, ForwardIterator last, Predicate pred);
327
+
328
+ template <class ForwardIterator>
329
+ constexpr bool // constexpr in C++20
330
+ is_sorted(ForwardIterator first, ForwardIterator last);
331
+
332
+ template <class ForwardIterator, class Compare>
333
+ bool
334
+ is_sorted(ForwardIterator first, ForwardIterator last, Compare comp);
335
+
336
+ template<class ForwardIterator>
337
+ constexpr ForwardIterator // constexpr in C++20
338
+ is_sorted_until(ForwardIterator first, ForwardIterator last);
339
+
340
+ template <class ForwardIterator, class Compare>
341
+ constexpr ForwardIterator // constexpr in C++20
342
+ is_sorted_until(ForwardIterator first, ForwardIterator last, Compare comp);
343
+
344
+ template <class RandomAccessIterator>
345
+ void
346
+ sort(RandomAccessIterator first, RandomAccessIterator last);
347
+
348
+ template <class RandomAccessIterator, class Compare>
349
+ void
350
+ sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp);
351
+
352
+ template <class RandomAccessIterator>
353
+ void
354
+ stable_sort(RandomAccessIterator first, RandomAccessIterator last);
355
+
356
+ template <class RandomAccessIterator, class Compare>
357
+ void
358
+ stable_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp);
359
+
360
+ template <class RandomAccessIterator>
361
+ void
362
+ partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last);
363
+
364
+ template <class RandomAccessIterator, class Compare>
365
+ void
366
+ partial_sort(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare comp);
367
+
368
+ template <class InputIterator, class RandomAccessIterator>
369
+ RandomAccessIterator
370
+ partial_sort_copy(InputIterator first, InputIterator last,
371
+ RandomAccessIterator result_first, RandomAccessIterator result_last);
372
+
373
+ template <class InputIterator, class RandomAccessIterator, class Compare>
374
+ RandomAccessIterator
375
+ partial_sort_copy(InputIterator first, InputIterator last,
376
+ RandomAccessIterator result_first, RandomAccessIterator result_last, Compare comp);
377
+
378
+ template <class RandomAccessIterator>
379
+ void
380
+ nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last);
381
+
382
+ template <class RandomAccessIterator, class Compare>
383
+ void
384
+ nth_element(RandomAccessIterator first, RandomAccessIterator nth, RandomAccessIterator last, Compare comp);
385
+
386
+ template <class ForwardIterator, class T>
387
+ constexpr ForwardIterator // constexpr in C++20
388
+ lower_bound(ForwardIterator first, ForwardIterator last, const T& value);
389
+
390
+ template <class ForwardIterator, class T, class Compare>
391
+ constexpr ForwardIterator // constexpr in C++20
392
+ lower_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare comp);
393
+
394
+ template <class ForwardIterator, class T>
395
+ constexpr ForwardIterator // constexpr in C++20
396
+ upper_bound(ForwardIterator first, ForwardIterator last, const T& value);
397
+
398
+ template <class ForwardIterator, class T, class Compare>
399
+ constexpr ForwardIterator // constexpr in C++20
400
+ upper_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare comp);
401
+
402
+ template <class ForwardIterator, class T>
403
+ constexpr pair<ForwardIterator, ForwardIterator> // constexpr in C++20
404
+ equal_range(ForwardIterator first, ForwardIterator last, const T& value);
405
+
406
+ template <class ForwardIterator, class T, class Compare>
407
+ constexpr pair<ForwardIterator, ForwardIterator> // constexpr in C++20
408
+ equal_range(ForwardIterator first, ForwardIterator last, const T& value, Compare comp);
409
+
410
+ template <class ForwardIterator, class T>
411
+ constexpr bool // constexpr in C++20
412
+ binary_search(ForwardIterator first, ForwardIterator last, const T& value);
413
+
414
+ template <class ForwardIterator, class T, class Compare>
415
+ constexpr bool // constexpr in C++20
416
+ binary_search(ForwardIterator first, ForwardIterator last, const T& value, Compare comp);
417
+
418
+ template <class InputIterator1, class InputIterator2, class OutputIterator>
419
+ OutputIterator
420
+ merge(InputIterator1 first1, InputIterator1 last1,
421
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result);
422
+
423
+ template <class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
424
+ OutputIterator
425
+ merge(InputIterator1 first1, InputIterator1 last1,
426
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp);
427
+
428
+ template <class BidirectionalIterator>
429
+ void
430
+ inplace_merge(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last);
431
+
432
+ template <class BidirectionalIterator, class Compare>
433
+ void
434
+ inplace_merge(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last, Compare comp);
435
+
436
+ template <class InputIterator1, class InputIterator2>
437
+ constexpr bool // constexpr in C++20
438
+ includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2);
439
+
440
+ template <class InputIterator1, class InputIterator2, class Compare>
441
+ constexpr bool // constexpr in C++20
442
+ includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare comp);
443
+
444
+ template <class InputIterator1, class InputIterator2, class OutputIterator>
445
+ OutputIterator
446
+ set_union(InputIterator1 first1, InputIterator1 last1,
447
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result);
448
+
449
+ template <class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
450
+ OutputIterator
451
+ set_union(InputIterator1 first1, InputIterator1 last1,
452
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp);
453
+
454
+ template <class InputIterator1, class InputIterator2, class OutputIterator>
455
+ constexpr OutputIterator // constexpr in C++20
456
+ set_intersection(InputIterator1 first1, InputIterator1 last1,
457
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result);
458
+
459
+ template <class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
460
+ constexpr OutputIterator // constexpr in C++20
461
+ set_intersection(InputIterator1 first1, InputIterator1 last1,
462
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp);
463
+
464
+ template <class InputIterator1, class InputIterator2, class OutputIterator>
465
+ OutputIterator
466
+ set_difference(InputIterator1 first1, InputIterator1 last1,
467
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result);
468
+
469
+ template <class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
470
+ OutputIterator
471
+ set_difference(InputIterator1 first1, InputIterator1 last1,
472
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp);
473
+
474
+ template <class InputIterator1, class InputIterator2, class OutputIterator>
475
+ OutputIterator
476
+ set_symmetric_difference(InputIterator1 first1, InputIterator1 last1,
477
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result);
478
+
479
+ template <class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
480
+ OutputIterator
481
+ set_symmetric_difference(InputIterator1 first1, InputIterator1 last1,
482
+ InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp);
483
+
484
+ template <class RandomAccessIterator>
485
+ void
486
+ push_heap(RandomAccessIterator first, RandomAccessIterator last);
487
+
488
+ template <class RandomAccessIterator, class Compare>
489
+ void
490
+ push_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp);
491
+
492
+ template <class RandomAccessIterator>
493
+ void
494
+ pop_heap(RandomAccessIterator first, RandomAccessIterator last);
495
+
496
+ template <class RandomAccessIterator, class Compare>
497
+ void
498
+ pop_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp);
499
+
500
+ template <class RandomAccessIterator>
501
+ void
502
+ make_heap(RandomAccessIterator first, RandomAccessIterator last);
503
+
504
+ template <class RandomAccessIterator, class Compare>
505
+ void
506
+ make_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp);
507
+
508
+ template <class RandomAccessIterator>
509
+ void
510
+ sort_heap(RandomAccessIterator first, RandomAccessIterator last);
511
+
512
+ template <class RandomAccessIterator, class Compare>
513
+ void
514
+ sort_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp);
515
+
516
+ template <class RandomAccessIterator>
517
+ constexpr bool // constexpr in C++20
518
+ is_heap(RandomAccessIterator first, RandomAccessiterator last);
519
+
520
+ template <class RandomAccessIterator, class Compare>
521
+ constexpr bool // constexpr in C++20
522
+ is_heap(RandomAccessIterator first, RandomAccessiterator last, Compare comp);
523
+
524
+ template <class RandomAccessIterator>
525
+ constexpr RandomAccessIterator // constexpr in C++20
526
+ is_heap_until(RandomAccessIterator first, RandomAccessiterator last);
527
+
528
+ template <class RandomAccessIterator, class Compare>
529
+ constexpr RandomAccessIterator // constexpr in C++20
530
+ is_heap_until(RandomAccessIterator first, RandomAccessiterator last, Compare comp);
531
+
532
+ template <class ForwardIterator>
533
+ ForwardIterator
534
+ min_element(ForwardIterator first, ForwardIterator last); // constexpr in C++14
535
+
536
+ template <class ForwardIterator, class Compare>
537
+ ForwardIterator
538
+ min_element(ForwardIterator first, ForwardIterator last, Compare comp); // constexpr in C++14
539
+
540
+ template <class T>
541
+ const T&
542
+ min(const T& a, const T& b); // constexpr in C++14
543
+
544
+ template <class T, class Compare>
545
+ const T&
546
+ min(const T& a, const T& b, Compare comp); // constexpr in C++14
547
+
548
+ template<class T>
549
+ T
550
+ min(::std::initializer_list<T> t); // constexpr in C++14
551
+
552
+ template<class T, class Compare>
553
+ T
554
+ min(::std::initializer_list<T> t, Compare comp); // constexpr in C++14
555
+
556
+ template<class T>
557
+ constexpr const T& clamp( const T& v, const T& lo, const T& hi ); // C++17
558
+
559
+ template<class T, class Compare>
560
+ constexpr const T& clamp( const T& v, const T& lo, const T& hi, Compare comp ); // C++17
561
+
562
+ template <class ForwardIterator>
563
+ ForwardIterator
564
+ max_element(ForwardIterator first, ForwardIterator last); // constexpr in C++14
565
+
566
+ template <class ForwardIterator, class Compare>
567
+ ForwardIterator
568
+ max_element(ForwardIterator first, ForwardIterator last, Compare comp); // constexpr in C++14
569
+
570
+ template <class T>
571
+ const T&
572
+ max(const T& a, const T& b); // constexpr in C++14
573
+
574
+ template <class T, class Compare>
575
+ const T&
576
+ max(const T& a, const T& b, Compare comp); // constexpr in C++14
577
+
578
+ template<class T>
579
+ T
580
+ max(::std::initializer_list<T> t); // constexpr in C++14
581
+
582
+ template<class T, class Compare>
583
+ T
584
+ max(::std::initializer_list<T> t, Compare comp); // constexpr in C++14
585
+
586
+ template<class ForwardIterator>
587
+ pair<ForwardIterator, ForwardIterator>
588
+ minmax_element(ForwardIterator first, ForwardIterator last); // constexpr in C++14
589
+
590
+ template<class ForwardIterator, class Compare>
591
+ pair<ForwardIterator, ForwardIterator>
592
+ minmax_element(ForwardIterator first, ForwardIterator last, Compare comp); // constexpr in C++14
593
+
594
+ template<class T>
595
+ pair<const T&, const T&>
596
+ minmax(const T& a, const T& b); // constexpr in C++14
597
+
598
+ template<class T, class Compare>
599
+ pair<const T&, const T&>
600
+ minmax(const T& a, const T& b, Compare comp); // constexpr in C++14
601
+
602
+ template<class T>
603
+ pair<T, T>
604
+ minmax(::std::initializer_list<T> t); // constexpr in C++14
605
+
606
+ template<class T, class Compare>
607
+ pair<T, T>
608
+ minmax(::std::initializer_list<T> t, Compare comp); // constexpr in C++14
609
+
610
+ template <class InputIterator1, class InputIterator2>
611
+ constexpr bool // constexpr in C++20
612
+ lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2);
613
+
614
+ template <class InputIterator1, class InputIterator2, class Compare>
615
+ constexpr bool // constexpr in C++20
616
+ lexicographical_compare(InputIterator1 first1, InputIterator1 last1,
617
+ InputIterator2 first2, InputIterator2 last2, Compare comp);
618
+
619
+ template <class BidirectionalIterator>
620
+ bool
621
+ next_permutation(BidirectionalIterator first, BidirectionalIterator last);
622
+
623
+ template <class BidirectionalIterator, class Compare>
624
+ bool
625
+ next_permutation(BidirectionalIterator first, BidirectionalIterator last, Compare comp);
626
+
627
+ template <class BidirectionalIterator>
628
+ bool
629
+ prev_permutation(BidirectionalIterator first, BidirectionalIterator last);
630
+
631
+ template <class BidirectionalIterator, class Compare>
632
+ bool
633
+ prev_permutation(BidirectionalIterator first, BidirectionalIterator last, Compare comp);
634
+
635
+ } // std
636
+
637
+ */
638
+ #include <cuda/std/detail/__config>
639
+
640
+ #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
641
+ # pragma GCC system_header
642
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
643
+ # pragma clang system_header
644
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
645
+ # pragma system_header
646
+ #endif // no system header
647
+
648
+ #include <cuda/std/__algorithm/adjacent_find.h>
649
+ #include <cuda/std/__algorithm/all_of.h>
650
+ #include <cuda/std/__algorithm/any_of.h>
651
+ #include <cuda/std/__algorithm/binary_search.h>
652
+ #include <cuda/std/__algorithm/clamp.h>
653
+ #include <cuda/std/__algorithm/comp.h>
654
+ #include <cuda/std/__algorithm/comp_ref_type.h>
655
+ #include <cuda/std/__algorithm/copy.h>
656
+ #include <cuda/std/__algorithm/copy_backward.h>
657
+ #include <cuda/std/__algorithm/copy_if.h>
658
+ #include <cuda/std/__algorithm/copy_n.h>
659
+ #include <cuda/std/__algorithm/count.h>
660
+ #include <cuda/std/__algorithm/count_if.h>
661
+ #include <cuda/std/__algorithm/equal.h>
662
+ #include <cuda/std/__algorithm/equal_range.h>
663
+ #include <cuda/std/__algorithm/fill.h>
664
+ #include <cuda/std/__algorithm/fill_n.h>
665
+ #include <cuda/std/__algorithm/find.h>
666
+ #include <cuda/std/__algorithm/find_end.h>
667
+ #include <cuda/std/__algorithm/find_first_of.h>
668
+ #include <cuda/std/__algorithm/find_if.h>
669
+ #include <cuda/std/__algorithm/find_if_not.h>
670
+ #include <cuda/std/__algorithm/for_each.h>
671
+ #include <cuda/std/__algorithm/for_each_n.h>
672
+ #include <cuda/std/__algorithm/generate.h>
673
+ #include <cuda/std/__algorithm/generate_n.h>
674
+ #include <cuda/std/__algorithm/half_positive.h>
675
+ #include <cuda/std/__algorithm/includes.h>
676
+ #include <cuda/std/__algorithm/is_heap.h>
677
+ #include <cuda/std/__algorithm/is_heap_until.h>
678
+ #include <cuda/std/__algorithm/is_partitioned.h>
679
+ #include <cuda/std/__algorithm/is_permutation.h>
680
+ #include <cuda/std/__algorithm/is_sorted.h>
681
+ #include <cuda/std/__algorithm/is_sorted_until.h>
682
+ #include <cuda/std/__algorithm/iter_swap.h>
683
+ #include <cuda/std/__algorithm/iterator_operations.h>
684
+ #include <cuda/std/__algorithm/lexicographical_compare.h>
685
+ #include <cuda/std/__algorithm/lower_bound.h>
686
+ #include <cuda/std/__algorithm/make_heap.h>
687
+ #include <cuda/std/__algorithm/make_projected.h>
688
+ #include <cuda/std/__algorithm/max.h>
689
+ #include <cuda/std/__algorithm/max_element.h>
690
+ #include <cuda/std/__algorithm/merge.h>
691
+ #include <cuda/std/__algorithm/min.h>
692
+ #include <cuda/std/__algorithm/min_element.h>
693
+ #include <cuda/std/__algorithm/minmax.h>
694
+ #include <cuda/std/__algorithm/minmax_element.h>
695
+ #include <cuda/std/__algorithm/mismatch.h>
696
+ #include <cuda/std/__algorithm/move.h>
697
+ #include <cuda/std/__algorithm/move_backward.h>
698
+ #include <cuda/std/__algorithm/next_permutation.h>
699
+ #include <cuda/std/__algorithm/none_of.h>
700
+ #include <cuda/std/__algorithm/partial_sort.h>
701
+ #include <cuda/std/__algorithm/partial_sort_copy.h>
702
+ #include <cuda/std/__algorithm/partition.h>
703
+ #include <cuda/std/__algorithm/partition_copy.h>
704
+ #include <cuda/std/__algorithm/partition_point.h>
705
+ #include <cuda/std/__algorithm/pop_heap.h>
706
+ #include <cuda/std/__algorithm/prev_permutation.h>
707
+ #include <cuda/std/__algorithm/push_heap.h>
708
+ #include <cuda/std/__algorithm/ranges_iterator_concept.h>
709
+ #include <cuda/std/__algorithm/ranges_min.h>
710
+ #include <cuda/std/__algorithm/ranges_min_element.h>
711
+ #include <cuda/std/__algorithm/remove.h>
712
+ #include <cuda/std/__algorithm/remove_copy.h>
713
+ #include <cuda/std/__algorithm/remove_copy_if.h>
714
+ #include <cuda/std/__algorithm/remove_if.h>
715
+ #include <cuda/std/__algorithm/replace.h>
716
+ #include <cuda/std/__algorithm/replace_copy.h>
717
+ #include <cuda/std/__algorithm/replace_copy_if.h>
718
+ #include <cuda/std/__algorithm/replace_if.h>
719
+ #include <cuda/std/__algorithm/reverse.h>
720
+ #include <cuda/std/__algorithm/reverse_copy.h>
721
+ #include <cuda/std/__algorithm/rotate.h>
722
+ #include <cuda/std/__algorithm/rotate_copy.h>
723
+ #include <cuda/std/__algorithm/search.h>
724
+ #include <cuda/std/__algorithm/search_n.h>
725
+ #include <cuda/std/__algorithm/set_difference.h>
726
+ #include <cuda/std/__algorithm/set_intersection.h>
727
+ #include <cuda/std/__algorithm/set_symmetric_difference.h>
728
+ #include <cuda/std/__algorithm/set_union.h>
729
+ #include <cuda/std/__algorithm/shift_left.h>
730
+ #include <cuda/std/__algorithm/shift_right.h>
731
+ #include <cuda/std/__algorithm/sift_down.h>
732
+ #include <cuda/std/__algorithm/sort_heap.h>
733
+ #include <cuda/std/__algorithm/swap_ranges.h>
734
+ #include <cuda/std/__algorithm/transform.h>
735
+ #include <cuda/std/__algorithm/unique.h>
736
+ #include <cuda/std/__algorithm/unique_copy.h>
737
+ #include <cuda/std/__algorithm/upper_bound.h>
738
+ #include <cuda/std/__iterator/distance.h>
739
+ #include <cuda/std/__iterator/iterator_traits.h>
740
+ #include <cuda/std/__iterator/move_iterator.h>
741
+ #include <cuda/std/__iterator/next.h>
742
+ #include <cuda/std/__iterator/prev.h>
743
+ #include <cuda/std/__iterator/reverse_iterator.h>
744
+ #include <cuda/std/__iterator/wrap_iter.h>
745
+ #include <cuda/std/__memory/destruct_n.h>
746
+ #include <cuda/std/__memory/temporary_buffer.h>
747
+ #include <cuda/std/__type_traits/common_type.h>
748
+ #include <cuda/std/__type_traits/enable_if.h>
749
+ #include <cuda/std/__type_traits/is_integral.h>
750
+ #include <cuda/std/__type_traits/is_same.h>
751
+ #include <cuda/std/__type_traits/is_trivially_copy_assignable.h>
752
+ #include <cuda/std/__type_traits/make_unsigned.h>
753
+ #include <cuda/std/__type_traits/remove_const.h>
754
+ #include <cuda/std/bit>
755
+ #include <cuda/std/climits>
756
+ #include <cuda/std/cstddef>
757
+ #include <cuda/std/functional>
758
+ #include <cuda/std/initializer_list>
759
+ #include <cuda/std/type_traits>
760
+ #include <cuda/std/version>
761
+
762
+ #include <cuda/std/__cccl/prologue.h>
763
+
764
+ _LIBCUDACXX_BEGIN_NAMESPACE_STD
765
+
766
+ #ifndef __cuda_std__
767
+
768
+ template <class _Predicate>
769
+ class __invert // invert the sense of a comparison
770
+ {
771
+ private:
772
+ _Predicate __p_;
773
+
774
+ public:
775
+ _LIBCUDACXX_HIDE_FROM_ABI __invert() {}
776
+
777
+ _LIBCUDACXX_HIDE_FROM_ABI explicit __invert(_Predicate __p)
778
+ : __p_(__p)
779
+ {}
780
+
781
+ template <class _T1>
782
+ _LIBCUDACXX_HIDE_FROM_ABI bool operator()(const _T1& __x)
783
+ {
784
+ return !__p_(__x);
785
+ }
786
+
787
+ template <class _T1, class _T2>
788
+ _LIBCUDACXX_HIDE_FROM_ABI bool operator()(const _T1& __x, const _T2& __y)
789
+ {
790
+ return __p_(__y, __x);
791
+ }
792
+ };
793
+
794
+ // random_shuffle
795
+
796
+ // __independent_bits_engine
797
+
798
+ template <unsigned long long _Xp, size_t _Rp>
799
+ struct __log2_imp
800
+ {
801
+ static const size_t value = _Xp & ((unsigned long long) (1) << _Rp) ? _Rp : __log2_imp<_Xp, _Rp - 1>::value;
802
+ };
803
+
804
+ template <unsigned long long _Xp>
805
+ struct __log2_imp<_Xp, 0>
806
+ {
807
+ static const size_t value = 0;
808
+ };
809
+
810
+ template <size_t _Rp>
811
+ struct __log2_imp<0, _Rp>
812
+ {
813
+ static const size_t value = _Rp + 1;
814
+ };
815
+
816
+ template <class _UIntType, _UIntType _Xp>
817
+ struct __log2
818
+ {
819
+ static const size_t value = __log2_imp<_Xp, sizeof(_UIntType) * CHAR_BIT - 1>::value;
820
+ };
821
+
822
+ template <class _Engine, class _UIntType>
823
+ class __independent_bits_engine
824
+ {
825
+ public:
826
+ // types
827
+ typedef _UIntType result_type;
828
+
829
+ private:
830
+ typedef typename _Engine::result_type _Engine_result_type;
831
+ typedef conditional_t<sizeof(_Engine_result_type) <= sizeof(result_type), result_type, _Engine_result_type>
832
+ _Working_result_type;
833
+
834
+ _Engine& __e_;
835
+ size_t __w_;
836
+ size_t __w0_;
837
+ size_t __n_;
838
+ size_t __n0_;
839
+ _Working_result_type __y0_;
840
+ _Working_result_type __y1_;
841
+ _Engine_result_type __mask0_;
842
+ _Engine_result_type __mask1_;
843
+
844
+ static constexpr _Working_result_type _Rp = _Engine::max() - _Engine::min() + _Working_result_type(1);
845
+ static constexpr size_t __m = __log2<_Working_result_type, _Rp>::value;
846
+ static constexpr size_t _WDt = numeric_limits<_Working_result_type>::digits;
847
+ static constexpr size_t _EDt = numeric_limits<_Engine_result_type>::digits;
848
+
849
+ public:
850
+ // constructors and seeding functions
851
+ __independent_bits_engine(_Engine& __e, size_t __w);
852
+
853
+ // generating functions
854
+ result_type operator()()
855
+ {
856
+ return __eval(integral_constant<bool, _Rp != 0>());
857
+ }
858
+
859
+ private:
860
+ result_type __eval(false_type);
861
+ result_type __eval(true_type);
862
+ };
863
+
864
+ template <class _Engine, class _UIntType>
865
+ __independent_bits_engine<_Engine, _UIntType>::__independent_bits_engine(_Engine& __e, size_t __w)
866
+ : __e_(__e)
867
+ , __w_(__w)
868
+ {
869
+ __n_ = __w_ / __m + (__w_ % __m != 0);
870
+ __w0_ = __w_ / __n_;
871
+ if (_Rp == 0)
872
+ {
873
+ __y0_ = _Rp;
874
+ }
875
+ else if (__w0_ < _WDt)
876
+ {
877
+ __y0_ = (_Rp >> __w0_) << __w0_;
878
+ }
879
+ else
880
+ {
881
+ __y0_ = 0;
882
+ }
883
+ if (_Rp - __y0_ > __y0_ / __n_)
884
+ {
885
+ ++__n_;
886
+ __w0_ = __w_ / __n_;
887
+ if (__w0_ < _WDt)
888
+ {
889
+ __y0_ = (_Rp >> __w0_) << __w0_;
890
+ }
891
+ else
892
+ {
893
+ __y0_ = 0;
894
+ }
895
+ }
896
+ __n0_ = __n_ - __w_ % __n_;
897
+ if (__w0_ < _WDt - 1)
898
+ {
899
+ __y1_ = (_Rp >> (__w0_ + 1)) << (__w0_ + 1);
900
+ }
901
+ else
902
+ {
903
+ __y1_ = 0;
904
+ }
905
+ __mask0_ = __w0_ > 0 ? _Engine_result_type(~0) >> (_EDt - __w0_) : _Engine_result_type(0);
906
+ __mask1_ = __w0_ < _EDt - 1 ? _Engine_result_type(~0) >> (_EDt - (__w0_ + 1)) : _Engine_result_type(~0);
907
+ }
908
+
909
+ template <class _Engine, class _UIntType>
910
+ inline _UIntType __independent_bits_engine<_Engine, _UIntType>::__eval(false_type)
911
+ {
912
+ return static_cast<result_type>(__e_() & __mask0_);
913
+ }
914
+
915
+ template <class _Engine, class _UIntType>
916
+ _UIntType __independent_bits_engine<_Engine, _UIntType>::__eval(true_type)
917
+ {
918
+ const size_t _WRt = numeric_limits<result_type>::digits;
919
+ result_type _Sp = 0;
920
+ for (size_t __k = 0; __k < __n0_; ++__k)
921
+ {
922
+ _Engine_result_type __u;
923
+ do
924
+ {
925
+ __u = __e_() - _Engine::min();
926
+ } while (__u >= __y0_);
927
+ if (__w0_ < _WRt)
928
+ {
929
+ _Sp <<= __w0_;
930
+ }
931
+ else
932
+ {
933
+ _Sp = 0;
934
+ }
935
+ _Sp += __u & __mask0_;
936
+ }
937
+ for (size_t __k = __n0_; __k < __n_; ++__k)
938
+ {
939
+ _Engine_result_type __u;
940
+ do
941
+ {
942
+ __u = __e_() - _Engine::min();
943
+ } while (__u >= __y1_);
944
+ if (__w0_ < _WRt - 1)
945
+ {
946
+ _Sp <<= __w0_ + 1;
947
+ }
948
+ else
949
+ {
950
+ _Sp = 0;
951
+ }
952
+ _Sp += __u & __mask1_;
953
+ }
954
+ return _Sp;
955
+ }
956
+
957
+ // uniform_int_distribution
958
+
959
+ template <class _IntType = int>
960
+ class uniform_int_distribution
961
+ {
962
+ public:
963
+ // types
964
+ typedef _IntType result_type;
965
+
966
+ class param_type
967
+ {
968
+ result_type __a_;
969
+ result_type __b_;
970
+
971
+ public:
972
+ typedef uniform_int_distribution distribution_type;
973
+
974
+ explicit param_type(result_type __a = 0, result_type __b = numeric_limits<result_type>::max())
975
+ : __a_(__a)
976
+ , __b_(__b)
977
+ {}
978
+
979
+ result_type a() const
980
+ {
981
+ return __a_;
982
+ }
983
+ result_type b() const
984
+ {
985
+ return __b_;
986
+ }
987
+
988
+ friend bool operator==(const param_type& __x, const param_type& __y)
989
+ {
990
+ return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;
991
+ }
992
+ friend bool operator!=(const param_type& __x, const param_type& __y)
993
+ {
994
+ return !(__x == __y);
995
+ }
996
+ };
997
+
998
+ private:
999
+ param_type __p_;
1000
+
1001
+ public:
1002
+ // constructors and reset functions
1003
+ explicit uniform_int_distribution(result_type __a = 0, result_type __b = numeric_limits<result_type>::max())
1004
+ : __p_(param_type(__a, __b))
1005
+ {}
1006
+ explicit uniform_int_distribution(const param_type& __p)
1007
+ : __p_(__p)
1008
+ {}
1009
+ void reset() {}
1010
+
1011
+ // generating functions
1012
+ template <class _URNG>
1013
+ result_type operator()(_URNG& __g)
1014
+ {
1015
+ return (*this)(__g, __p_);
1016
+ }
1017
+ template <class _URNG>
1018
+ result_type operator()(_URNG& __g, const param_type& __p);
1019
+
1020
+ // property functions
1021
+ result_type a() const
1022
+ {
1023
+ return __p_.a();
1024
+ }
1025
+ result_type b() const
1026
+ {
1027
+ return __p_.b();
1028
+ }
1029
+
1030
+ param_type param() const
1031
+ {
1032
+ return __p_;
1033
+ }
1034
+ void param(const param_type& __p)
1035
+ {
1036
+ __p_ = __p;
1037
+ }
1038
+
1039
+ result_type min() const
1040
+ {
1041
+ return a();
1042
+ }
1043
+ result_type max() const
1044
+ {
1045
+ return b();
1046
+ }
1047
+
1048
+ friend bool operator==(const uniform_int_distribution& __x, const uniform_int_distribution& __y)
1049
+ {
1050
+ return __x.__p_ == __y.__p_;
1051
+ }
1052
+ friend bool operator!=(const uniform_int_distribution& __x, const uniform_int_distribution& __y)
1053
+ {
1054
+ return !(__x == __y);
1055
+ }
1056
+ };
1057
+
1058
+ template <class _IntType>
1059
+ template <class _URNG>
1060
+ typename uniform_int_distribution<_IntType>::result_type
1061
+ uniform_int_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p)
1062
+ _CCCL_NO_SANITIZE("unsigned-integer-overflow")
1063
+ {
1064
+ typedef conditional_t<sizeof(result_type) <= sizeof(uint32_t), uint32_t, uint64_t> _UIntType;
1065
+ const _UIntType _Rp = _UIntType(__p.b()) - _UIntType(__p.a()) + _UIntType(1);
1066
+ if (_Rp == 1)
1067
+ {
1068
+ return __p.a();
1069
+ }
1070
+ const size_t _Dt = numeric_limits<_UIntType>::digits;
1071
+ typedef __independent_bits_engine<_URNG, _UIntType> _Eng;
1072
+ if (_Rp == 0)
1073
+ {
1074
+ return static_cast<result_type>(_Eng(__g, _Dt)());
1075
+ }
1076
+ size_t __w = _Dt - _CUDA_VSTD::countl_zero(_Rp) - 1;
1077
+ if ((_Rp & (std::numeric_limits<_UIntType>::max() >> (_Dt - __w))) != 0)
1078
+ {
1079
+ ++__w;
1080
+ }
1081
+ _Eng __e(__g, __w);
1082
+ _UIntType __u;
1083
+ do
1084
+ {
1085
+ __u = __e();
1086
+ } while (__u >= _Rp);
1087
+ return static_cast<result_type>(__u + __p.a());
1088
+ }
1089
+
1090
+ template <class _PopulationIterator, class _SampleIterator, class _Distance, class _UniformRandomNumberGenerator>
1091
+ _LIBCUDACXX_HIDE_FROM_ABI _SampleIterator __sample(
1092
+ _PopulationIterator __first,
1093
+ _PopulationIterator __last,
1094
+ _SampleIterator __output_iter,
1095
+ _Distance __n,
1096
+ _UniformRandomNumberGenerator& __g,
1097
+ input_iterator_tag)
1098
+ {
1099
+ _Distance __k = 0;
1100
+ for (; __first != __last && __k < __n; ++__first, (void) ++__k)
1101
+ {
1102
+ __output_iter[__k] = *__first;
1103
+ }
1104
+ _Distance __sz = __k;
1105
+ for (; __first != __last; ++__first, (void) ++__k)
1106
+ {
1107
+ _Distance __r = _CUDA_VSTD::uniform_int_distribution<_Distance>(0, __k)(__g);
1108
+ if (__r < __sz)
1109
+ {
1110
+ __output_iter[__r] = *__first;
1111
+ }
1112
+ }
1113
+ return __output_iter + _CUDA_VSTD::min(__n, __k);
1114
+ }
1115
+
1116
+ template <class _PopulationIterator, class _SampleIterator, class _Distance, class _UniformRandomNumberGenerator>
1117
+ _LIBCUDACXX_HIDE_FROM_ABI _SampleIterator __sample(
1118
+ _PopulationIterator __first,
1119
+ _PopulationIterator __last,
1120
+ _SampleIterator __output_iter,
1121
+ _Distance __n,
1122
+ _UniformRandomNumberGenerator& __g,
1123
+ forward_iterator_tag)
1124
+ {
1125
+ _Distance __unsampled_sz = _CUDA_VSTD::distance(__first, __last);
1126
+ for (__n = _CUDA_VSTD::min(__n, __unsampled_sz); __n != 0; ++__first)
1127
+ {
1128
+ _Distance __r = _CUDA_VSTD::uniform_int_distribution<_Distance>(0, --__unsampled_sz)(__g);
1129
+ if (__r < __n)
1130
+ {
1131
+ *__output_iter++ = *__first;
1132
+ --__n;
1133
+ }
1134
+ }
1135
+ return __output_iter;
1136
+ }
1137
+
1138
+ template <class _PopulationIterator, class _SampleIterator, class _Distance, class _UniformRandomNumberGenerator>
1139
+ _LIBCUDACXX_HIDE_FROM_ABI _SampleIterator __sample(
1140
+ _PopulationIterator __first,
1141
+ _PopulationIterator __last,
1142
+ _SampleIterator __output_iter,
1143
+ _Distance __n,
1144
+ _UniformRandomNumberGenerator& __g)
1145
+ {
1146
+ typedef typename iterator_traits<_PopulationIterator>::iterator_category _PopCategory;
1147
+ typedef typename iterator_traits<_PopulationIterator>::difference_type _Difference;
1148
+ static_assert(__is_cpp17_forward_iterator<_PopulationIterator>::value
1149
+ || __is_cpp17_random_access_iterator<_SampleIterator>::value,
1150
+ "SampleIterator must meet the requirements of RandomAccessIterator");
1151
+ typedef typename common_type<_Distance, _Difference>::type _CommonType;
1152
+ _CCCL_ASSERT(__n >= 0, "N must be a positive number.");
1153
+ return _CUDA_VSTD::__sample(__first, __last, __output_iter, _CommonType(__n), __g, _PopCategory());
1154
+ }
1155
+
1156
+ template <class _PopulationIterator, class _SampleIterator, class _Distance, class _UniformRandomNumberGenerator>
1157
+ _LIBCUDACXX_HIDE_FROM_ABI _SampleIterator sample(
1158
+ _PopulationIterator __first,
1159
+ _PopulationIterator __last,
1160
+ _SampleIterator __output_iter,
1161
+ _Distance __n,
1162
+ _UniformRandomNumberGenerator&& __g)
1163
+ {
1164
+ return _CUDA_VSTD::__sample(__first, __last, __output_iter, __n, __g);
1165
+ }
1166
+
1167
+ template <class _RandomAccessIterator, class _UniformRandomNumberGenerator>
1168
+ _LIBCUDACXX_HIDE_FROM_ABI void
1169
+ shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, _UniformRandomNumberGenerator&& __g)
1170
+ {
1171
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
1172
+ typedef uniform_int_distribution<ptrdiff_t> _Dp;
1173
+ typedef typename _Dp::param_type _Pp;
1174
+ difference_type __d = __last - __first;
1175
+ if (__d > 1)
1176
+ {
1177
+ _Dp __uid;
1178
+ for (--__last, (void) --__d; __first < __last; ++__first, (void) --__d)
1179
+ {
1180
+ difference_type __i = __uid(__g, _Pp(0, __d));
1181
+ if (__i != difference_type(0))
1182
+ {
1183
+ swap(*__first, *(__first + __i));
1184
+ }
1185
+ }
1186
+ }
1187
+ }
1188
+
1189
+ // stable_partition
1190
+
1191
+ template <class _Predicate, class _ForwardIterator, class _Distance, class _Pair>
1192
+ _CCCL_HOST_DEVICE _ForwardIterator __stable_partition(
1193
+ _ForwardIterator __first,
1194
+ _ForwardIterator __last,
1195
+ _Predicate __pred,
1196
+ _Distance __len,
1197
+ _Pair __p,
1198
+ forward_iterator_tag __fit)
1199
+ {
1200
+ // *__first is known to be false
1201
+ // __len >= 1
1202
+ if (__len == 1)
1203
+ {
1204
+ return __first;
1205
+ }
1206
+ if (__len == 2)
1207
+ {
1208
+ _ForwardIterator __m = __first;
1209
+ if (__pred(*++__m))
1210
+ {
1211
+ swap(*__first, *__m);
1212
+ return __m;
1213
+ }
1214
+ return __first;
1215
+ }
1216
+ if (__len <= __p.second)
1217
+ { // The buffer is big enough to use
1218
+ typedef typename iterator_traits<_ForwardIterator>::value_type value_type;
1219
+ __destruct_n __d(0);
1220
+ unique_ptr<value_type, __destruct_n&> __h(__p.first, __d);
1221
+ // Move the falses into the temporary buffer, and the trues to the front of the line
1222
+ // Update __first to always point to the end of the trues
1223
+ value_type* __t = __p.first;
1224
+ ::new (__t) value_type(_CUDA_VSTD::move(*__first));
1225
+ __d.__incr((value_type*) 0);
1226
+ ++__t;
1227
+ _ForwardIterator __i = __first;
1228
+ while (++__i != __last)
1229
+ {
1230
+ if (__pred(*__i))
1231
+ {
1232
+ *__first = _CUDA_VSTD::move(*__i);
1233
+ ++__first;
1234
+ }
1235
+ else
1236
+ {
1237
+ ::new (__t) value_type(_CUDA_VSTD::move(*__i));
1238
+ __d.__incr((value_type*) 0);
1239
+ ++__t;
1240
+ }
1241
+ }
1242
+ // All trues now at start of range, all falses in buffer
1243
+ // Move falses back into range, but don't mess up __first which points to first false
1244
+ __i = __first;
1245
+ for (value_type* __t2 = __p.first; __t2 < __t; ++__t2, (void) ++__i)
1246
+ {
1247
+ *__i = _CUDA_VSTD::move(*__t2);
1248
+ }
1249
+ // __h destructs moved-from values out of the temp buffer, but doesn't deallocate buffer
1250
+ return __first;
1251
+ }
1252
+ // Else not enough buffer, do in place
1253
+ // __len >= 3
1254
+ _ForwardIterator __m = __first;
1255
+ _Distance __len2 = __len / 2; // __len2 >= 2
1256
+ _CUDA_VSTD::advance(__m, __len2);
1257
+ // recurse on [__first, __m), *__first know to be false
1258
+ // F?????????????????
1259
+ // f m l
1260
+ typedef add_lvalue_reference_t<_Predicate> _PredRef;
1261
+ _ForwardIterator __first_false = __stable_partition<_PredRef>(__first, __m, __pred, __len2, __p, __fit);
1262
+ // TTTFFFFF??????????
1263
+ // f ff m l
1264
+ // recurse on [__m, __last], except increase __m until *(__m) is false, *__last know to be true
1265
+ _ForwardIterator __m1 = __m;
1266
+ _ForwardIterator __second_false = __last;
1267
+ _Distance __len_half = __len - __len2;
1268
+ while (__pred(*__m1))
1269
+ {
1270
+ if (++__m1 == __last)
1271
+ {
1272
+ goto __second_half_done;
1273
+ }
1274
+ --__len_half;
1275
+ }
1276
+ // TTTFFFFFTTTF??????
1277
+ // f ff m m1 l
1278
+ __second_false = __stable_partition<_PredRef>(__m1, __last, __pred, __len_half, __p, __fit);
1279
+ __second_half_done:
1280
+ // TTTFFFFFTTTTTFFFFF
1281
+ // f ff m sf l
1282
+ return _CUDA_VSTD::rotate(__first_false, __m, __second_false);
1283
+ // TTTTTTTTFFFFFFFFFF
1284
+ // |
1285
+ }
1286
+
1287
+ template <class _Predicate, class _ForwardIterator>
1288
+ _CCCL_HOST_DEVICE _ForwardIterator
1289
+ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, forward_iterator_tag)
1290
+ {
1291
+ const unsigned __alloc_limit = 3; // might want to make this a function of trivial assignment
1292
+ // Either prove all true and return __first or point to first false
1293
+ while (true)
1294
+ {
1295
+ if (__first == __last)
1296
+ {
1297
+ return __first;
1298
+ }
1299
+ if (!__pred(*__first))
1300
+ {
1301
+ break;
1302
+ }
1303
+ ++__first;
1304
+ }
1305
+ // We now have a reduced range [__first, __last)
1306
+ // *__first is known to be false
1307
+ typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type;
1308
+ typedef typename iterator_traits<_ForwardIterator>::value_type value_type;
1309
+ difference_type __len = _CUDA_VSTD::distance(__first, __last);
1310
+ pair<value_type*, ptrdiff_t> __p(0, 0);
1311
+ unique_ptr<value_type, __return_temporary_buffer> __h;
1312
+ if (__len >= __alloc_limit)
1313
+ {
1314
+ __p = _CUDA_VSTD::get_temporary_buffer<value_type>(__len);
1315
+ __h.reset(__p.first);
1316
+ }
1317
+ return __stable_partition<add_lvalue_reference_t<_Predicate>>(
1318
+ __first, __last, __pred, __len, __p, forward_iterator_tag());
1319
+ }
1320
+
1321
+ template <class _Predicate, class _BidirectionalIterator, class _Distance, class _Pair>
1322
+ _CCCL_HOST_DEVICE _BidirectionalIterator __stable_partition(
1323
+ _BidirectionalIterator __first,
1324
+ _BidirectionalIterator __last,
1325
+ _Predicate __pred,
1326
+ _Distance __len,
1327
+ _Pair __p,
1328
+ bidirectional_iterator_tag __bit)
1329
+ {
1330
+ // *__first is known to be false
1331
+ // *__last is known to be true
1332
+ // __len >= 2
1333
+ if (__len == 2)
1334
+ {
1335
+ swap(*__first, *__last);
1336
+ return __last;
1337
+ }
1338
+ if (__len == 3)
1339
+ {
1340
+ _BidirectionalIterator __m = __first;
1341
+ if (__pred(*++__m))
1342
+ {
1343
+ swap(*__first, *__m);
1344
+ swap(*__m, *__last);
1345
+ return __last;
1346
+ }
1347
+ swap(*__m, *__last);
1348
+ swap(*__first, *__m);
1349
+ return __m;
1350
+ }
1351
+ if (__len <= __p.second)
1352
+ { // The buffer is big enough to use
1353
+ typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
1354
+ __destruct_n __d(0);
1355
+ unique_ptr<value_type, __destruct_n&> __h(__p.first, __d);
1356
+ // Move the falses into the temporary buffer, and the trues to the front of the line
1357
+ // Update __first to always point to the end of the trues
1358
+ value_type* __t = __p.first;
1359
+ ::new (__t) value_type(_CUDA_VSTD::move(*__first));
1360
+ __d.__incr((value_type*) 0);
1361
+ ++__t;
1362
+ _BidirectionalIterator __i = __first;
1363
+ while (++__i != __last)
1364
+ {
1365
+ if (__pred(*__i))
1366
+ {
1367
+ *__first = _CUDA_VSTD::move(*__i);
1368
+ ++__first;
1369
+ }
1370
+ else
1371
+ {
1372
+ ::new (__t) value_type(_CUDA_VSTD::move(*__i));
1373
+ __d.__incr((value_type*) 0);
1374
+ ++__t;
1375
+ }
1376
+ }
1377
+ // move *__last, known to be true
1378
+ *__first = _CUDA_VSTD::move(*__i);
1379
+ __i = ++__first;
1380
+ // All trues now at start of range, all falses in buffer
1381
+ // Move falses back into range, but don't mess up __first which points to first false
1382
+ for (value_type* __t2 = __p.first; __t2 < __t; ++__t2, (void) ++__i)
1383
+ {
1384
+ *__i = _CUDA_VSTD::move(*__t2);
1385
+ }
1386
+ // __h destructs moved-from values out of the temp buffer, but doesn't deallocate buffer
1387
+ return __first;
1388
+ }
1389
+ // Else not enough buffer, do in place
1390
+ // __len >= 4
1391
+ _BidirectionalIterator __m = __first;
1392
+ _Distance __len2 = __len / 2; // __len2 >= 2
1393
+ _CUDA_VSTD::advance(__m, __len2);
1394
+ // recurse on [__first, __m-1], except reduce __m-1 until *(__m-1) is true, *__first know to be false
1395
+ // F????????????????T
1396
+ // f m l
1397
+ _BidirectionalIterator __m1 = __m;
1398
+ _BidirectionalIterator __first_false = __first;
1399
+ _Distance __len_half = __len2;
1400
+ while (!__pred(*--__m1))
1401
+ {
1402
+ if (__m1 == __first)
1403
+ {
1404
+ goto __first_half_done;
1405
+ }
1406
+ --__len_half;
1407
+ }
1408
+ // F???TFFF?????????T
1409
+ // f m1 m l
1410
+ typedef add_lvalue_reference_t<_Predicate> _PredRef;
1411
+ __first_false = __stable_partition<_PredRef>(__first, __m1, __pred, __len_half, __p, __bit);
1412
+ __first_half_done:
1413
+ // TTTFFFFF?????????T
1414
+ // f ff m l
1415
+ // recurse on [__m, __last], except increase __m until *(__m) is false, *__last know to be true
1416
+ __m1 = __m;
1417
+ _BidirectionalIterator __second_false = __last;
1418
+ ++__second_false;
1419
+ __len_half = __len - __len2;
1420
+ while (__pred(*__m1))
1421
+ {
1422
+ if (++__m1 == __last)
1423
+ {
1424
+ goto __second_half_done;
1425
+ }
1426
+ --__len_half;
1427
+ }
1428
+ // TTTFFFFFTTTF?????T
1429
+ // f ff m m1 l
1430
+ __second_false = __stable_partition<_PredRef>(__m1, __last, __pred, __len_half, __p, __bit);
1431
+ __second_half_done:
1432
+ // TTTFFFFFTTTTTFFFFF
1433
+ // f ff m sf l
1434
+ return _CUDA_VSTD::rotate(__first_false, __m, __second_false);
1435
+ // TTTTTTTTFFFFFFFFFF
1436
+ // |
1437
+ }
1438
+
1439
+ template <class _Predicate, class _BidirectionalIterator>
1440
+ _CCCL_HOST_DEVICE _BidirectionalIterator __stable_partition(
1441
+ _BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, bidirectional_iterator_tag)
1442
+ {
1443
+ typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type;
1444
+ typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
1445
+ const difference_type __alloc_limit = 4; // might want to make this a function of trivial assignment
1446
+ // Either prove all true and return __first or point to first false
1447
+ while (true)
1448
+ {
1449
+ if (__first == __last)
1450
+ {
1451
+ return __first;
1452
+ }
1453
+ if (!__pred(*__first))
1454
+ {
1455
+ break;
1456
+ }
1457
+ ++__first;
1458
+ }
1459
+ // __first points to first false, everything prior to __first is already set.
1460
+ // Either prove [__first, __last) is all false and return __first, or point __last to last true
1461
+ do
1462
+ {
1463
+ if (__first == --__last)
1464
+ {
1465
+ return __first;
1466
+ }
1467
+ } while (!__pred(*__last));
1468
+ // We now have a reduced range [__first, __last]
1469
+ // *__first is known to be false
1470
+ // *__last is known to be true
1471
+ // __len >= 2
1472
+ difference_type __len = _CUDA_VSTD::distance(__first, __last) + 1;
1473
+ pair<value_type*, ptrdiff_t> __p(0, 0);
1474
+ unique_ptr<value_type, __return_temporary_buffer> __h;
1475
+ if (__len >= __alloc_limit)
1476
+ {
1477
+ __p = _CUDA_VSTD::get_temporary_buffer<value_type>(__len);
1478
+ __h.reset(__p.first);
1479
+ }
1480
+ return __stable_partition<add_lvalue_reference_t<_Predicate>>(
1481
+ __first, __last, __pred, __len, __p, bidirectional_iterator_tag());
1482
+ }
1483
+
1484
+ template <class _ForwardIterator, class _Predicate>
1485
+ _LIBCUDACXX_HIDE_FROM_ABI _ForwardIterator
1486
+ stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
1487
+ {
1488
+ return __stable_partition<add_lvalue_reference_t<_Predicate>>(
1489
+ __first, __last, __pred, typename iterator_traits<_ForwardIterator>::iterator_category());
1490
+ }
1491
+
1492
+ // sort
1493
+
1494
+ // stable, 2-3 compares, 0-2 swaps
1495
+
1496
+ template <class _Compare, class _ForwardIterator>
1497
+ _CCCL_HOST_DEVICE unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c)
1498
+ {
1499
+ unsigned __r = 0;
1500
+ if (!__c(*__y, *__x)) // if x <= y
1501
+ {
1502
+ if (!__c(*__z, *__y)) // if y <= z
1503
+ {
1504
+ return __r; // x <= y && y <= z
1505
+ // x <= y && y > z
1506
+ }
1507
+ swap(*__y, *__z); // x <= z && y < z
1508
+ __r = 1;
1509
+ if (__c(*__y, *__x)) // if x > y
1510
+ {
1511
+ swap(*__x, *__y); // x < y && y <= z
1512
+ __r = 2;
1513
+ }
1514
+ return __r; // x <= y && y < z
1515
+ }
1516
+ if (__c(*__z, *__y)) // x > y, if y > z
1517
+ {
1518
+ swap(*__x, *__z); // x < y && y < z
1519
+ __r = 1;
1520
+ return __r;
1521
+ }
1522
+ swap(*__x, *__y); // x > y && y <= z
1523
+ __r = 1; // x < y && x <= z
1524
+ if (__c(*__z, *__y)) // if y > z
1525
+ {
1526
+ swap(*__y, *__z); // x <= y && y < z
1527
+ __r = 2;
1528
+ }
1529
+ return __r;
1530
+ } // x <= y && y <= z
1531
+
1532
+ // stable, 3-6 compares, 0-5 swaps
1533
+
1534
+ template <class _Compare, class _ForwardIterator>
1535
+ _CCCL_HOST_DEVICE unsigned
1536
+ __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c)
1537
+ {
1538
+ unsigned __r = __sort3<_Compare>(__x1, __x2, __x3, __c);
1539
+ if (__c(*__x4, *__x3))
1540
+ {
1541
+ swap(*__x3, *__x4);
1542
+ ++__r;
1543
+ if (__c(*__x3, *__x2))
1544
+ {
1545
+ swap(*__x2, *__x3);
1546
+ ++__r;
1547
+ if (__c(*__x2, *__x1))
1548
+ {
1549
+ swap(*__x1, *__x2);
1550
+ ++__r;
1551
+ }
1552
+ }
1553
+ }
1554
+ return __r;
1555
+ }
1556
+
1557
+ // stable, 4-10 compares, 0-9 swaps
1558
+
1559
+ template <class _Compare, class _ForwardIterator>
1560
+ _CCCL_VISIBILITY_HIDDEN _CCCL_HOST_DEVICE unsigned __sort5(
1561
+ _ForwardIterator __x1,
1562
+ _ForwardIterator __x2,
1563
+ _ForwardIterator __x3,
1564
+ _ForwardIterator __x4,
1565
+ _ForwardIterator __x5,
1566
+ _Compare __c)
1567
+ {
1568
+ unsigned __r = __sort4<_Compare>(__x1, __x2, __x3, __x4, __c);
1569
+ if (__c(*__x5, *__x4))
1570
+ {
1571
+ swap(*__x4, *__x5);
1572
+ ++__r;
1573
+ if (__c(*__x4, *__x3))
1574
+ {
1575
+ swap(*__x3, *__x4);
1576
+ ++__r;
1577
+ if (__c(*__x3, *__x2))
1578
+ {
1579
+ swap(*__x2, *__x3);
1580
+ ++__r;
1581
+ if (__c(*__x2, *__x1))
1582
+ {
1583
+ swap(*__x1, *__x2);
1584
+ ++__r;
1585
+ }
1586
+ }
1587
+ }
1588
+ }
1589
+ return __r;
1590
+ }
1591
+
1592
+ // Assumes size > 0
1593
+ template <class _Compare, class _BirdirectionalIterator>
1594
+ _CCCL_HOST_DEVICE void __selection_sort(_BirdirectionalIterator __first, _BirdirectionalIterator __last, _Compare __comp)
1595
+ {
1596
+ _BirdirectionalIterator __lm1 = __last;
1597
+ for (--__lm1; __first != __lm1; ++__first)
1598
+ {
1599
+ _BirdirectionalIterator __i =
1600
+ _CUDA_VSTD::min_element<_BirdirectionalIterator, add_lvalue_reference_t<_Compare>>(__first, __last, __comp);
1601
+ if (__i != __first)
1602
+ {
1603
+ swap(*__first, *__i);
1604
+ }
1605
+ }
1606
+ }
1607
+
1608
+ template <class _Compare, class _BirdirectionalIterator>
1609
+ _CCCL_HOST_DEVICE void __insertion_sort(_BirdirectionalIterator __first, _BirdirectionalIterator __last, _Compare __comp)
1610
+ {
1611
+ typedef typename iterator_traits<_BirdirectionalIterator>::value_type value_type;
1612
+ if (__first != __last)
1613
+ {
1614
+ _BirdirectionalIterator __i = __first;
1615
+ for (++__i; __i != __last; ++__i)
1616
+ {
1617
+ _BirdirectionalIterator __j = __i;
1618
+ value_type __t(_CUDA_VSTD::move(*__j));
1619
+ for (_BirdirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j)
1620
+ {
1621
+ *__j = _CUDA_VSTD::move(*__k);
1622
+ }
1623
+ *__j = _CUDA_VSTD::move(__t);
1624
+ }
1625
+ }
1626
+ }
1627
+
1628
+ template <class _Compare, class _RandomAccessIterator>
1629
+ _CCCL_HOST_DEVICE void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
1630
+ {
1631
+ typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
1632
+ _RandomAccessIterator __j = __first + 2;
1633
+ __sort3<_Compare>(__first, __first + 1, __j, __comp);
1634
+ for (_RandomAccessIterator __i = __j + 1; __i != __last; ++__i)
1635
+ {
1636
+ if (__comp(*__i, *__j))
1637
+ {
1638
+ value_type __t(_CUDA_VSTD::move(*__i));
1639
+ _RandomAccessIterator __k = __j;
1640
+ __j = __i;
1641
+ do
1642
+ {
1643
+ *__j = _CUDA_VSTD::move(*__k);
1644
+ __j = __k;
1645
+ } while (__j != __first && __comp(__t, *--__k));
1646
+ *__j = _CUDA_VSTD::move(__t);
1647
+ }
1648
+ __j = __i;
1649
+ }
1650
+ }
1651
+
1652
+ template <class _Compare, class _RandomAccessIterator>
1653
+ _CCCL_HOST_DEVICE bool
1654
+ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
1655
+ {
1656
+ switch (__last - __first)
1657
+ {
1658
+ case 0:
1659
+ case 1:
1660
+ return true;
1661
+ case 2:
1662
+ if (__comp(*--__last, *__first))
1663
+ {
1664
+ swap(*__first, *__last);
1665
+ }
1666
+ return true;
1667
+ case 3:
1668
+ _CUDA_VSTD::__sort3<_Compare>(__first, __first + 1, --__last, __comp);
1669
+ return true;
1670
+ case 4:
1671
+ _CUDA_VSTD::__sort4<_Compare>(__first, __first + 1, __first + 2, --__last, __comp);
1672
+ return true;
1673
+ case 5:
1674
+ _CUDA_VSTD::__sort5<_Compare>(__first, __first + 1, __first + 2, __first + 3, --__last, __comp);
1675
+ return true;
1676
+ }
1677
+ typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
1678
+ _RandomAccessIterator __j = __first + 2;
1679
+ __sort3<_Compare>(__first, __first + 1, __j, __comp);
1680
+ const unsigned __limit = 8;
1681
+ unsigned __count = 0;
1682
+ for (_RandomAccessIterator __i = __j + 1; __i != __last; ++__i)
1683
+ {
1684
+ if (__comp(*__i, *__j))
1685
+ {
1686
+ value_type __t(_CUDA_VSTD::move(*__i));
1687
+ _RandomAccessIterator __k = __j;
1688
+ __j = __i;
1689
+ do
1690
+ {
1691
+ *__j = _CUDA_VSTD::move(*__k);
1692
+ __j = __k;
1693
+ } while (__j != __first && __comp(__t, *--__k));
1694
+ *__j = _CUDA_VSTD::move(__t);
1695
+ if (++__count == __limit)
1696
+ {
1697
+ return ++__i == __last;
1698
+ }
1699
+ }
1700
+ __j = __i;
1701
+ }
1702
+ return true;
1703
+ }
1704
+
1705
+ template <class _Compare, class _BirdirectionalIterator>
1706
+ _CCCL_HOST_DEVICE void __insertion_sort_move(
1707
+ _BirdirectionalIterator __first1,
1708
+ _BirdirectionalIterator __last1,
1709
+ typename iterator_traits<_BirdirectionalIterator>::value_type* __first2,
1710
+ _Compare __comp)
1711
+ {
1712
+ typedef typename iterator_traits<_BirdirectionalIterator>::value_type value_type;
1713
+ if (__first1 != __last1)
1714
+ {
1715
+ __destruct_n __d(0);
1716
+ unique_ptr<value_type, __destruct_n&> __h(__first2, __d);
1717
+ value_type* __last2 = __first2;
1718
+ ::new (__last2) value_type(_CUDA_VSTD::move(*__first1));
1719
+ __d.__incr((value_type*) 0);
1720
+ for (++__last2; ++__first1 != __last1; ++__last2)
1721
+ {
1722
+ value_type* __j2 = __last2;
1723
+ value_type* __i2 = __j2;
1724
+ if (__comp(*__first1, *--__i2))
1725
+ {
1726
+ ::new (__j2) value_type(_CUDA_VSTD::move(*__i2));
1727
+ __d.__incr((value_type*) 0);
1728
+ for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2)
1729
+ {
1730
+ *__j2 = _CUDA_VSTD::move(*__i2);
1731
+ }
1732
+ *__j2 = _CUDA_VSTD::move(*__first1);
1733
+ }
1734
+ else
1735
+ {
1736
+ ::new (__j2) value_type(_CUDA_VSTD::move(*__first1));
1737
+ __d.__incr((value_type*) 0);
1738
+ }
1739
+ }
1740
+ __h.release();
1741
+ }
1742
+ }
1743
+
1744
+ template <class _Compare, class _RandomAccessIterator>
1745
+ _CCCL_HOST_DEVICE void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
1746
+ {
1747
+ // _Compare is known to be a reference type
1748
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
1749
+ typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
1750
+ const difference_type __limit =
1751
+ is_trivially_copy_constructible<value_type>::value && is_trivially_copy_assignable<value_type>::value ? 30 : 6;
1752
+ while (true)
1753
+ {
1754
+ __restart:
1755
+ difference_type __len = __last - __first;
1756
+ switch (__len)
1757
+ {
1758
+ case 0:
1759
+ case 1:
1760
+ return;
1761
+ case 2:
1762
+ if (__comp(*--__last, *__first))
1763
+ {
1764
+ swap(*__first, *__last);
1765
+ }
1766
+ return;
1767
+ case 3:
1768
+ _CUDA_VSTD::__sort3<_Compare>(__first, __first + 1, --__last, __comp);
1769
+ return;
1770
+ case 4:
1771
+ _CUDA_VSTD::__sort4<_Compare>(__first, __first + 1, __first + 2, --__last, __comp);
1772
+ return;
1773
+ case 5:
1774
+ _CUDA_VSTD::__sort5<_Compare>(__first, __first + 1, __first + 2, __first + 3, --__last, __comp);
1775
+ return;
1776
+ }
1777
+ if (__len <= __limit)
1778
+ {
1779
+ _CUDA_VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp);
1780
+ return;
1781
+ }
1782
+ // __len > 5
1783
+ _RandomAccessIterator __m = __first;
1784
+ _RandomAccessIterator __lm1 = __last;
1785
+ --__lm1;
1786
+ unsigned __n_swaps;
1787
+ {
1788
+ difference_type __delta;
1789
+ if (__len >= 1000)
1790
+ {
1791
+ __delta = __len / 2;
1792
+ __m += __delta;
1793
+ __delta /= 2;
1794
+ __n_swaps = _CUDA_VSTD::__sort5<_Compare>(__first, __first + __delta, __m, __m + __delta, __lm1, __comp);
1795
+ }
1796
+ else
1797
+ {
1798
+ __delta = __len / 2;
1799
+ __m += __delta;
1800
+ __n_swaps = _CUDA_VSTD::__sort3<_Compare>(__first, __m, __lm1, __comp);
1801
+ }
1802
+ }
1803
+ // *__m is median
1804
+ // partition [__first, __m) < *__m and *__m <= [__m, __last)
1805
+ // (this inhibits tossing elements equivalent to __m around unnecessarily)
1806
+ _RandomAccessIterator __i = __first;
1807
+ _RandomAccessIterator __j = __lm1;
1808
+ // j points beyond range to be tested, *__m is known to be <= *__lm1
1809
+ // The search going up is known to be guarded but the search coming down isn't.
1810
+ // Prime the downward search with a guard.
1811
+ if (!__comp(*__i, *__m)) // if *__first == *__m
1812
+ {
1813
+ // *__first == *__m, *__first doesn't go in first part
1814
+ // manually guard downward moving __j against __i
1815
+ while (true)
1816
+ {
1817
+ if (__i == --__j)
1818
+ {
1819
+ // *__first == *__m, *__m <= all other elements
1820
+ // Partition instead into [__first, __i) == *__first and *__first < [__i, __last)
1821
+ ++__i; // __first + 1
1822
+ __j = __last;
1823
+ if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1)
1824
+ {
1825
+ while (true)
1826
+ {
1827
+ if (__i == __j)
1828
+ {
1829
+ return; // [__first, __last) all equivalent elements
1830
+ }
1831
+ if (__comp(*__first, *__i))
1832
+ {
1833
+ swap(*__i, *__j);
1834
+ ++__n_swaps;
1835
+ ++__i;
1836
+ break;
1837
+ }
1838
+ ++__i;
1839
+ }
1840
+ }
1841
+ // [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1
1842
+ if (__i == __j)
1843
+ {
1844
+ return;
1845
+ }
1846
+ while (true)
1847
+ {
1848
+ while (!__comp(*__first, *__i))
1849
+ {
1850
+ ++__i;
1851
+ }
1852
+ while (__comp(*__first, *--__j))
1853
+ ;
1854
+ if (__i >= __j)
1855
+ {
1856
+ break;
1857
+ }
1858
+ swap(*__i, *__j);
1859
+ ++__n_swaps;
1860
+ ++__i;
1861
+ }
1862
+ // [__first, __i) == *__first and *__first < [__i, __last)
1863
+ // The first part is sorted, sort the second part
1864
+ // _CUDA_VSTD::__sort<_Compare>(__i, __last, __comp);
1865
+ __first = __i;
1866
+ goto __restart;
1867
+ }
1868
+ if (__comp(*__j, *__m))
1869
+ {
1870
+ swap(*__i, *__j);
1871
+ ++__n_swaps;
1872
+ break; // found guard for downward moving __j, now use unguarded partition
1873
+ }
1874
+ }
1875
+ }
1876
+ // It is known that *__i < *__m
1877
+ ++__i;
1878
+ // j points beyond range to be tested, *__m is known to be <= *__lm1
1879
+ // if not yet partitioned...
1880
+ if (__i < __j)
1881
+ {
1882
+ // known that *(__i - 1) < *__m
1883
+ // known that __i <= __m
1884
+ while (true)
1885
+ {
1886
+ // __m still guards upward moving __i
1887
+ while (__comp(*__i, *__m))
1888
+ {
1889
+ ++__i;
1890
+ }
1891
+ // It is now known that a guard exists for downward moving __j
1892
+ while (!__comp(*--__j, *__m))
1893
+ ;
1894
+ if (__i > __j)
1895
+ {
1896
+ break;
1897
+ }
1898
+ swap(*__i, *__j);
1899
+ ++__n_swaps;
1900
+ // It is known that __m != __j
1901
+ // If __m just moved, follow it
1902
+ if (__m == __i)
1903
+ {
1904
+ __m = __j;
1905
+ }
1906
+ ++__i;
1907
+ }
1908
+ }
1909
+ // [__first, __i) < *__m and *__m <= [__i, __last)
1910
+ if (__i != __m && __comp(*__m, *__i))
1911
+ {
1912
+ swap(*__i, *__m);
1913
+ ++__n_swaps;
1914
+ }
1915
+ // [__first, __i) < *__i and *__i <= [__i+1, __last)
1916
+ // If we were given a perfect partition, see if insertion sort is quick...
1917
+ if (__n_swaps == 0)
1918
+ {
1919
+ bool __fs = _CUDA_VSTD::__insertion_sort_incomplete<_Compare>(__first, __i, __comp);
1920
+ if (_CUDA_VSTD::__insertion_sort_incomplete<_Compare>(__i + 1, __last, __comp))
1921
+ {
1922
+ if (__fs)
1923
+ {
1924
+ return;
1925
+ }
1926
+ __last = __i;
1927
+ continue;
1928
+ }
1929
+ else
1930
+ {
1931
+ if (__fs)
1932
+ {
1933
+ __first = ++__i;
1934
+ continue;
1935
+ }
1936
+ }
1937
+ }
1938
+ // sort smaller range with recursive call and larger with tail recursion elimination
1939
+ if (__i - __first < __last - __i)
1940
+ {
1941
+ _CUDA_VSTD::__sort<_Compare>(__first, __i, __comp);
1942
+ // _CUDA_VSTD::__sort<_Compare>(__i+1, __last, __comp);
1943
+ __first = ++__i;
1944
+ }
1945
+ else
1946
+ {
1947
+ _CUDA_VSTD::__sort<_Compare>(__i + 1, __last, __comp);
1948
+ // _CUDA_VSTD::__sort<_Compare>(__first, __i, __comp);
1949
+ __last = __i;
1950
+ }
1951
+ }
1952
+ }
1953
+
1954
+ // This forwarder keeps the top call and the recursive calls using the same instantiation, forcing a reference _Compare
1955
+ template <class _RandomAccessIterator, class _Compare>
1956
+ _LIBCUDACXX_HIDE_FROM_ABI void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
1957
+ {
1958
+ using _Comp_ref = __comp_ref_type<_Compare>;
1959
+ _CUDA_VSTD::__sort<_Comp_ref>(__first, __last, _Comp_ref(__comp));
1960
+ }
1961
+
1962
+ template <class _RandomAccessIterator>
1963
+ _LIBCUDACXX_HIDE_FROM_ABI void sort(_RandomAccessIterator __first, _RandomAccessIterator __last)
1964
+ {
1965
+ _CUDA_VSTD::sort(__first, __last, __less{});
1966
+ }
1967
+
1968
+ template <class _Tp>
1969
+ _LIBCUDACXX_HIDE_FROM_ABI void sort(_Tp** __first, _Tp** __last)
1970
+ {
1971
+ _CUDA_VSTD::sort((size_t*) __first, (size_t*) __last, __less{});
1972
+ }
1973
+
1974
+ template <class _Tp>
1975
+ _LIBCUDACXX_HIDE_FROM_ABI void sort(__wrap_iter<_Tp*> __first, __wrap_iter<_Tp*> __last)
1976
+ {
1977
+ _CUDA_VSTD::sort(__first.base(), __last.base());
1978
+ }
1979
+
1980
+ template <class _Tp, class _Compare>
1981
+ _LIBCUDACXX_HIDE_FROM_ABI void sort(__wrap_iter<_Tp*> __first, __wrap_iter<_Tp*> __last, _Compare __comp)
1982
+ {
1983
+ typedef add_lvalue_reference_t<_Compare> _Comp_ref;
1984
+ _CUDA_VSTD::sort<_Tp*, _Comp_ref>(__first.base(), __last.base(), __comp);
1985
+ }
1986
+
1987
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, char*>(char*, char*, __less&))
1988
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, wchar_t*>(wchar_t*, wchar_t*, __less&))
1989
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, signed*>(signed*, signed*, __less&))
1990
+ _LIBCUDACXX_EXTERN_TEMPLATE(
1991
+ _LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, unsigned char*>(unsigned char*, unsigned char*, __less&))
1992
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, short*>(short*, short*, __less&))
1993
+ _LIBCUDACXX_EXTERN_TEMPLATE(
1994
+ _LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, unsigned short*>(unsigned short*, unsigned short*, __less&))
1995
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, int*>(int*, int*, __less&))
1996
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, unsigned*>(unsigned*, unsigned*, __less&))
1997
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, long*>(long*, long*, __less&))
1998
+ _LIBCUDACXX_EXTERN_TEMPLATE(
1999
+ _LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, unsigned long*>(unsigned long*, unsigned long*, __less&))
2000
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, long long*>(long long*, long long*, __less&))
2001
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, unsigned long long*>(
2002
+ unsigned long long*, unsigned long long*, __less&))
2003
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, float*>(float*, float*, __less&))
2004
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, double*>(double*, double*, __less&))
2005
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2006
+ _LIBCUDACXX_HIDE_FROM_ABI void __sort<__less&, long double*>(long double*, long double*, __less&))
2007
+
2008
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2009
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, char*>(char*, char*, __less&))
2010
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2011
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, wchar_t*>(wchar_t*, wchar_t*, __less&))
2012
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, signed char*>(
2013
+ signed char*, signed char*, __less&))
2014
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, unsigned char*>(
2015
+ unsigned char*, unsigned char*, __less&))
2016
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2017
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, short*>(short*, short*, __less&))
2018
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, unsigned short*>(
2019
+ unsigned short*, unsigned short*, __less&))
2020
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2021
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, int*>(int*, int*, __less&))
2022
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2023
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, unsigned*>(unsigned*, unsigned*, __less&))
2024
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2025
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, long*>(long*, long*, __less&))
2026
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, unsigned long*>(
2027
+ unsigned long*, unsigned long*, __less&))
2028
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2029
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, long long*>(long long*, long long*, __less&))
2030
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, unsigned long long*>(
2031
+ unsigned long long*, unsigned long long*, __less&))
2032
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2033
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, float*>(float*, float*, __less&))
2034
+ _LIBCUDACXX_EXTERN_TEMPLATE(
2035
+ _LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, double*>(double*, double*, __less&))
2036
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI bool __insertion_sort_incomplete<__less&, long double*>(
2037
+ long double*, long double*, __less&))
2038
+
2039
+ _LIBCUDACXX_EXTERN_TEMPLATE(_LIBCUDACXX_HIDE_FROM_ABI unsigned __sort5<__less&, long double*>(
2040
+ long double*, long double*, long double*, long double*, long double*, __less&))
2041
+
2042
+ // inplace_merge
2043
+
2044
+ template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator>
2045
+ _CCCL_HOST_DEVICE void __half_inplace_merge(
2046
+ _InputIterator1 __first1,
2047
+ _InputIterator1 __last1,
2048
+ _InputIterator2 __first2,
2049
+ _InputIterator2 __last2,
2050
+ _OutputIterator __result,
2051
+ _Compare __comp)
2052
+ {
2053
+ for (; __first1 != __last1; ++__result)
2054
+ {
2055
+ if (__first2 == __last2)
2056
+ {
2057
+ _CUDA_VSTD::move(__first1, __last1, __result);
2058
+ return;
2059
+ }
2060
+
2061
+ if (__comp(*__first2, *__first1))
2062
+ {
2063
+ *__result = _CUDA_VSTD::move(*__first2);
2064
+ ++__first2;
2065
+ }
2066
+ else
2067
+ {
2068
+ *__result = _CUDA_VSTD::move(*__first1);
2069
+ ++__first1;
2070
+ }
2071
+ }
2072
+ // __first2 through __last2 are already in the right spot.
2073
+ }
2074
+
2075
+ template <class _Compare, class _BidirectionalIterator>
2076
+ _CCCL_HOST_DEVICE void __buffered_inplace_merge(
2077
+ _BidirectionalIterator __first,
2078
+ _BidirectionalIterator __middle,
2079
+ _BidirectionalIterator __last,
2080
+ _Compare __comp,
2081
+ typename iterator_traits<_BidirectionalIterator>::difference_type __len1,
2082
+ typename iterator_traits<_BidirectionalIterator>::difference_type __len2,
2083
+ typename iterator_traits<_BidirectionalIterator>::value_type* __buff)
2084
+ {
2085
+ typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
2086
+ __destruct_n __d(0);
2087
+ unique_ptr<value_type, __destruct_n&> __h2(__buff, __d);
2088
+ if (__len1 <= __len2)
2089
+ {
2090
+ value_type* __p = __buff;
2091
+ for (_BidirectionalIterator __i = __first; __i != __middle; __d.__incr((value_type*) 0), (void) ++__i, (void) ++__p)
2092
+ {
2093
+ ::new (__p) value_type(_CUDA_VSTD::move(*__i));
2094
+ }
2095
+ __half_inplace_merge(__buff, __p, __middle, __last, __first, __comp);
2096
+ }
2097
+ else
2098
+ {
2099
+ value_type* __p = __buff;
2100
+ for (_BidirectionalIterator __i = __middle; __i != __last; __d.__incr((value_type*) 0), (void) ++__i, (void) ++__p)
2101
+ {
2102
+ ::new (__p) value_type(_CUDA_VSTD::move(*__i));
2103
+ }
2104
+ typedef reverse_iterator<_BidirectionalIterator> _RBi;
2105
+ typedef reverse_iterator<value_type*> _Rv;
2106
+ __half_inplace_merge(_Rv(__p), _Rv(__buff), _RBi(__middle), _RBi(__first), _RBi(__last), __invert<_Compare>(__comp));
2107
+ }
2108
+ }
2109
+
2110
+ template <class _Compare, class _BidirectionalIterator>
2111
+ _CCCL_HOST_DEVICE void __inplace_merge(
2112
+ _BidirectionalIterator __first,
2113
+ _BidirectionalIterator __middle,
2114
+ _BidirectionalIterator __last,
2115
+ _Compare __comp,
2116
+ typename iterator_traits<_BidirectionalIterator>::difference_type __len1,
2117
+ typename iterator_traits<_BidirectionalIterator>::difference_type __len2,
2118
+ typename iterator_traits<_BidirectionalIterator>::value_type* __buff,
2119
+ ptrdiff_t __buff_size)
2120
+ {
2121
+ typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type;
2122
+ while (true)
2123
+ {
2124
+ // if __middle == __last, we're done
2125
+ if (__len2 == 0)
2126
+ {
2127
+ return;
2128
+ }
2129
+ if (__len1 <= __buff_size || __len2 <= __buff_size)
2130
+ {
2131
+ return __buffered_inplace_merge<_Compare>(__first, __middle, __last, __comp, __len1, __len2, __buff);
2132
+ }
2133
+ // shrink [__first, __middle) as much as possible (with no moves), returning if it shrinks to 0
2134
+ for (; true; ++__first, (void) --__len1)
2135
+ {
2136
+ if (__len1 == 0)
2137
+ {
2138
+ return;
2139
+ }
2140
+ if (__comp(*__middle, *__first))
2141
+ {
2142
+ break;
2143
+ }
2144
+ }
2145
+ // __first < __middle < __last
2146
+ // *__first > *__middle
2147
+ // partition [__first, __m1) [__m1, __middle) [__middle, __m2) [__m2, __last) such that
2148
+ // all elements in:
2149
+ // [__first, __m1) <= [__middle, __m2)
2150
+ // [__middle, __m2) < [__m1, __middle)
2151
+ // [__m1, __middle) <= [__m2, __last)
2152
+ // and __m1 or __m2 is in the middle of its range
2153
+ _BidirectionalIterator __m1; // "median" of [__first, __middle)
2154
+ _BidirectionalIterator __m2; // "median" of [__middle, __last)
2155
+ difference_type __len11; // distance(__first, __m1)
2156
+ difference_type __len21; // distance(__middle, __m2)
2157
+ // binary search smaller range
2158
+ if (__len1 < __len2)
2159
+ { // __len >= 1, __len2 >= 2
2160
+ __len21 = __len2 / 2;
2161
+ __m2 = __middle;
2162
+ _CUDA_VSTD::advance(__m2, __len21);
2163
+ __m1 = __upper_bound<_Compare>(__first, __middle, *__m2, __comp);
2164
+ __len11 = _CUDA_VSTD::distance(__first, __m1);
2165
+ }
2166
+ else
2167
+ {
2168
+ if (__len1 == 1)
2169
+ { // __len1 >= __len2 && __len2 > 0, therefore __len2 == 1
2170
+ // It is known *__first > *__middle
2171
+ swap(*__first, *__middle);
2172
+ return;
2173
+ }
2174
+ // __len1 >= 2, __len2 >= 1
2175
+ __len11 = __len1 / 2;
2176
+ __m1 = __first;
2177
+ _CUDA_VSTD::advance(__m1, __len11);
2178
+ __m2 = __lower_bound<_Compare>(__middle, __last, *__m1, __comp);
2179
+ __len21 = _CUDA_VSTD::distance(__middle, __m2);
2180
+ }
2181
+ difference_type __len12 = __len1 - __len11; // distance(__m1, __middle)
2182
+ difference_type __len22 = __len2 - __len21; // distance(__m2, __last)
2183
+ // [__first, __m1) [__m1, __middle) [__middle, __m2) [__m2, __last)
2184
+ // swap middle two partitions
2185
+ __middle = _CUDA_VSTD::rotate(__m1, __middle, __m2);
2186
+ // __len12 and __len21 now have swapped meanings
2187
+ // merge smaller range with recursive call and larger with tail recursion elimination
2188
+ if (__len11 + __len21 < __len12 + __len22)
2189
+ {
2190
+ __inplace_merge<_Compare>(__first, __m1, __middle, __comp, __len11, __len21, __buff, __buff_size);
2191
+ // __inplace_merge<_Compare>(__middle, __m2, __last, __comp, __len12, __len22, __buff, __buff_size);
2192
+ __first = __middle;
2193
+ __middle = __m2;
2194
+ __len1 = __len12;
2195
+ __len2 = __len22;
2196
+ }
2197
+ else
2198
+ {
2199
+ __inplace_merge<_Compare>(__middle, __m2, __last, __comp, __len12, __len22, __buff, __buff_size);
2200
+ // __inplace_merge<_Compare>(__first, __m1, __middle, __comp, __len11, __len21, __buff, __buff_size);
2201
+ __last = __middle;
2202
+ __middle = __m1;
2203
+ __len1 = __len11;
2204
+ __len2 = __len21;
2205
+ }
2206
+ }
2207
+ }
2208
+
2209
+ template <class _BidirectionalIterator, class _Compare>
2210
+ _LIBCUDACXX_HIDE_FROM_ABI void inplace_merge(
2211
+ _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare __comp)
2212
+ {
2213
+ typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
2214
+ typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type;
2215
+ difference_type __len1 = _CUDA_VSTD::distance(__first, __middle);
2216
+ difference_type __len2 = _CUDA_VSTD::distance(__middle, __last);
2217
+ difference_type __buf_size = _CUDA_VSTD::min(__len1, __len2);
2218
+ pair<value_type*, ptrdiff_t> __buf = _CUDA_VSTD::get_temporary_buffer<value_type>(__buf_size);
2219
+ unique_ptr<value_type, __return_temporary_buffer> __h(__buf.first);
2220
+ using _Comp_ref = __comp_ref_type<_Compare>;
2221
+ return _CUDA_VSTD::__inplace_merge<_Comp_ref>(
2222
+ __first, __middle, __last, __comp, __len1, __len2, __buf.first, __buf.second);
2223
+ }
2224
+
2225
+ template <class _BidirectionalIterator>
2226
+ _LIBCUDACXX_HIDE_FROM_ABI void
2227
+ inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last)
2228
+ {
2229
+ _CUDA_VSTD::inplace_merge(__first, __middle, __last, __less{});
2230
+ }
2231
+
2232
+ // stable_sort
2233
+
2234
+ template <class _Compare, class _InputIterator1, class _InputIterator2>
2235
+ _CCCL_HOST_DEVICE void __merge_move_construct(
2236
+ _InputIterator1 __first1,
2237
+ _InputIterator1 __last1,
2238
+ _InputIterator2 __first2,
2239
+ _InputIterator2 __last2,
2240
+ typename iterator_traits<_InputIterator1>::value_type* __result,
2241
+ _Compare __comp)
2242
+ {
2243
+ typedef typename iterator_traits<_InputIterator1>::value_type value_type;
2244
+ __destruct_n __d(0);
2245
+ unique_ptr<value_type, __destruct_n&> __h(__result, __d);
2246
+ for (; true; ++__result)
2247
+ {
2248
+ if (__first1 == __last1)
2249
+ {
2250
+ for (; __first2 != __last2; ++__first2, ++__result, (void) __d.__incr((value_type*) 0))
2251
+ {
2252
+ ::new (__result) value_type(_CUDA_VSTD::move(*__first2));
2253
+ }
2254
+ __h.release();
2255
+ return;
2256
+ }
2257
+ if (__first2 == __last2)
2258
+ {
2259
+ for (; __first1 != __last1; ++__first1, ++__result, (void) __d.__incr((value_type*) 0))
2260
+ {
2261
+ ::new (__result) value_type(_CUDA_VSTD::move(*__first1));
2262
+ }
2263
+ __h.release();
2264
+ return;
2265
+ }
2266
+ if (__comp(*__first2, *__first1))
2267
+ {
2268
+ ::new (__result) value_type(_CUDA_VSTD::move(*__first2));
2269
+ __d.__incr((value_type*) 0);
2270
+ ++__first2;
2271
+ }
2272
+ else
2273
+ {
2274
+ ::new (__result) value_type(_CUDA_VSTD::move(*__first1));
2275
+ __d.__incr((value_type*) 0);
2276
+ ++__first1;
2277
+ }
2278
+ }
2279
+ }
2280
+
2281
+ template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator>
2282
+ _CCCL_HOST_DEVICE void __merge_move_assign(
2283
+ _InputIterator1 __first1,
2284
+ _InputIterator1 __last1,
2285
+ _InputIterator2 __first2,
2286
+ _InputIterator2 __last2,
2287
+ _OutputIterator __result,
2288
+ _Compare __comp)
2289
+ {
2290
+ for (; __first1 != __last1; ++__result)
2291
+ {
2292
+ if (__first2 == __last2)
2293
+ {
2294
+ for (; __first1 != __last1; ++__first1, (void) ++__result)
2295
+ {
2296
+ *__result = _CUDA_VSTD::move(*__first1);
2297
+ }
2298
+ return;
2299
+ }
2300
+ if (__comp(*__first2, *__first1))
2301
+ {
2302
+ *__result = _CUDA_VSTD::move(*__first2);
2303
+ ++__first2;
2304
+ }
2305
+ else
2306
+ {
2307
+ *__result = _CUDA_VSTD::move(*__first1);
2308
+ ++__first1;
2309
+ }
2310
+ }
2311
+ for (; __first2 != __last2; ++__first2, (void) ++__result)
2312
+ {
2313
+ *__result = _CUDA_VSTD::move(*__first2);
2314
+ }
2315
+ }
2316
+
2317
+ template <class _Compare, class _RandomAccessIterator>
2318
+ _CCCL_HOST_DEVICE void __stable_sort(
2319
+ _RandomAccessIterator __first,
2320
+ _RandomAccessIterator __last,
2321
+ _Compare __comp,
2322
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
2323
+ typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
2324
+ ptrdiff_t __buff_size);
2325
+
2326
+ template <class _Compare, class _RandomAccessIterator>
2327
+ _CCCL_HOST_DEVICE void __stable_sort_move(
2328
+ _RandomAccessIterator __first1,
2329
+ _RandomAccessIterator __last1,
2330
+ _Compare __comp,
2331
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
2332
+ typename iterator_traits<_RandomAccessIterator>::value_type* __first2)
2333
+ {
2334
+ typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
2335
+ switch (__len)
2336
+ {
2337
+ case 0:
2338
+ return;
2339
+ case 1:
2340
+ ::new (__first2) value_type(_CUDA_VSTD::move(*__first1));
2341
+ return;
2342
+ case 2:
2343
+ __destruct_n __d(0);
2344
+ unique_ptr<value_type, __destruct_n&> __h2(__first2, __d);
2345
+ if (__comp(*--__last1, *__first1))
2346
+ {
2347
+ ::new (__first2) value_type(_CUDA_VSTD::move(*__last1));
2348
+ __d.__incr((value_type*) 0);
2349
+ ++__first2;
2350
+ ::new (__first2) value_type(_CUDA_VSTD::move(*__first1));
2351
+ }
2352
+ else
2353
+ {
2354
+ ::new (__first2) value_type(_CUDA_VSTD::move(*__first1));
2355
+ __d.__incr((value_type*) 0);
2356
+ ++__first2;
2357
+ ::new (__first2) value_type(_CUDA_VSTD::move(*__last1));
2358
+ }
2359
+ __h2.release();
2360
+ return;
2361
+ }
2362
+ if (__len <= 8)
2363
+ {
2364
+ __insertion_sort_move<_Compare>(__first1, __last1, __first2, __comp);
2365
+ return;
2366
+ }
2367
+ typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2;
2368
+ _RandomAccessIterator __m = __first1 + __l2;
2369
+ __stable_sort<_Compare>(__first1, __m, __comp, __l2, __first2, __l2);
2370
+ __stable_sort<_Compare>(__m, __last1, __comp, __len - __l2, __first2 + __l2, __len - __l2);
2371
+ __merge_move_construct<_Compare>(__first1, __m, __m, __last1, __first2, __comp);
2372
+ }
2373
+
2374
+ template <class _Tp>
2375
+ struct __stable_sort_switch
2376
+ {
2377
+ static const unsigned value = 128 * is_trivially_copy_assignable<_Tp>::value;
2378
+ };
2379
+
2380
+ template <class _Compare, class _RandomAccessIterator>
2381
+ _CCCL_HOST_DEVICE void __stable_sort(
2382
+ _RandomAccessIterator __first,
2383
+ _RandomAccessIterator __last,
2384
+ _Compare __comp,
2385
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
2386
+ typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
2387
+ ptrdiff_t __buff_size)
2388
+ {
2389
+ typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
2390
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
2391
+ switch (__len)
2392
+ {
2393
+ case 0:
2394
+ case 1:
2395
+ return;
2396
+ case 2:
2397
+ if (__comp(*--__last, *__first))
2398
+ {
2399
+ swap(*__first, *__last);
2400
+ }
2401
+ return;
2402
+ }
2403
+ if (__len <= static_cast<difference_type>(__stable_sort_switch<value_type>::value))
2404
+ {
2405
+ __insertion_sort<_Compare>(__first, __last, __comp);
2406
+ return;
2407
+ }
2408
+ typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2;
2409
+ _RandomAccessIterator __m = __first + __l2;
2410
+ if (__len <= __buff_size)
2411
+ {
2412
+ __destruct_n __d(0);
2413
+ unique_ptr<value_type, __destruct_n&> __h2(__buff, __d);
2414
+ __stable_sort_move<_Compare>(__first, __m, __comp, __l2, __buff);
2415
+ __d.__set(__l2, (value_type*) 0);
2416
+ __stable_sort_move<_Compare>(__m, __last, __comp, __len - __l2, __buff + __l2);
2417
+ __d.__set(__len, (value_type*) 0);
2418
+ __merge_move_assign<_Compare>(__buff, __buff + __l2, __buff + __l2, __buff + __len, __first, __comp);
2419
+ // __merge<_Compare>(move_iterator<value_type*>(__buff),
2420
+ // move_iterator<value_type*>(__buff + __l2),
2421
+ // move_iterator<_RandomAccessIterator>(__buff + __l2),
2422
+ // move_iterator<_RandomAccessIterator>(__buff + __len),
2423
+ // __first, __comp);
2424
+ return;
2425
+ }
2426
+ __stable_sort<_Compare>(__first, __m, __comp, __l2, __buff, __buff_size);
2427
+ __stable_sort<_Compare>(__m, __last, __comp, __len - __l2, __buff, __buff_size);
2428
+ __inplace_merge<_Compare>(__first, __m, __last, __comp, __l2, __len - __l2, __buff, __buff_size);
2429
+ }
2430
+
2431
+ template <class _RandomAccessIterator, class _Compare>
2432
+ _LIBCUDACXX_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
2433
+ {
2434
+ typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
2435
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
2436
+ difference_type __len = __last - __first;
2437
+ pair<value_type*, ptrdiff_t> __buf(0, 0);
2438
+ unique_ptr<value_type, __return_temporary_buffer> __h;
2439
+ if (__len > static_cast<difference_type>(__stable_sort_switch<value_type>::value))
2440
+ {
2441
+ __buf = _CUDA_VSTD::get_temporary_buffer<value_type>(__len);
2442
+ __h.reset(__buf.first);
2443
+ }
2444
+ using _Comp_ref = __comp_ref_type<_Compare>;
2445
+ __stable_sort<_Comp_ref>(__first, __last, __comp, __len, __buf.first, __buf.second);
2446
+ }
2447
+
2448
+ template <class _RandomAccessIterator>
2449
+ _LIBCUDACXX_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last)
2450
+ {
2451
+ _CUDA_VSTD::stable_sort(__first, __last, __less{});
2452
+ }
2453
+
2454
+ // nth_element
2455
+
2456
+ template <class _Compare, class _RandomAccessIterator>
2457
+ _CCCL_HOST_DEVICE void
2458
+ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp)
2459
+ {
2460
+ // _Compare is known to be a reference type
2461
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
2462
+ const difference_type __limit = 7;
2463
+ while (true)
2464
+ {
2465
+ __restart:
2466
+ if (__nth == __last)
2467
+ {
2468
+ return;
2469
+ }
2470
+ difference_type __len = __last - __first;
2471
+ switch (__len)
2472
+ {
2473
+ case 0:
2474
+ case 1:
2475
+ return;
2476
+ case 2:
2477
+ if (__comp(*--__last, *__first))
2478
+ {
2479
+ swap(*__first, *__last);
2480
+ }
2481
+ return;
2482
+ case 3: {
2483
+ _RandomAccessIterator __m = __first;
2484
+ _CUDA_VSTD::__sort3<_Compare>(__first, ++__m, --__last, __comp);
2485
+ return;
2486
+ }
2487
+ }
2488
+ if (__len <= __limit)
2489
+ {
2490
+ __selection_sort<_Compare>(__first, __last, __comp);
2491
+ return;
2492
+ }
2493
+ // __len > __limit >= 3
2494
+ _RandomAccessIterator __m = __first + __len / 2;
2495
+ _RandomAccessIterator __lm1 = __last;
2496
+ unsigned __n_swaps = _CUDA_VSTD::__sort3<_Compare>(__first, __m, --__lm1, __comp);
2497
+ // *__m is median
2498
+ // partition [__first, __m) < *__m and *__m <= [__m, __last)
2499
+ // (this inhibits tossing elements equivalent to __m around unnecessarily)
2500
+ _RandomAccessIterator __i = __first;
2501
+ _RandomAccessIterator __j = __lm1;
2502
+ // j points beyond range to be tested, *__lm1 is known to be <= *__m
2503
+ // The search going up is known to be guarded but the search coming down isn't.
2504
+ // Prime the downward search with a guard.
2505
+ if (!__comp(*__i, *__m)) // if *__first == *__m
2506
+ {
2507
+ // *__first == *__m, *__first doesn't go in first part
2508
+ // manually guard downward moving __j against __i
2509
+ while (true)
2510
+ {
2511
+ if (__i == --__j)
2512
+ {
2513
+ // *__first == *__m, *__m <= all other elements
2514
+ // Partition instead into [__first, __i) == *__first and *__first < [__i, __last)
2515
+ ++__i; // __first + 1
2516
+ __j = __last;
2517
+ if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1)
2518
+ {
2519
+ while (true)
2520
+ {
2521
+ if (__i == __j)
2522
+ {
2523
+ return; // [__first, __last) all equivalent elements
2524
+ }
2525
+ if (__comp(*__first, *__i))
2526
+ {
2527
+ swap(*__i, *__j);
2528
+ ++__n_swaps;
2529
+ ++__i;
2530
+ break;
2531
+ }
2532
+ ++__i;
2533
+ }
2534
+ }
2535
+ // [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1
2536
+ if (__i == __j)
2537
+ {
2538
+ return;
2539
+ }
2540
+ while (true)
2541
+ {
2542
+ while (!__comp(*__first, *__i))
2543
+ {
2544
+ ++__i;
2545
+ }
2546
+ while (__comp(*__first, *--__j))
2547
+ ;
2548
+ if (__i >= __j)
2549
+ {
2550
+ break;
2551
+ }
2552
+ swap(*__i, *__j);
2553
+ ++__n_swaps;
2554
+ ++__i;
2555
+ }
2556
+ // [__first, __i) == *__first and *__first < [__i, __last)
2557
+ // The first part is sorted,
2558
+ if (__nth < __i)
2559
+ {
2560
+ return;
2561
+ }
2562
+ // __nth_element the second part
2563
+ // __nth_element<_Compare>(__i, __nth, __last, __comp);
2564
+ __first = __i;
2565
+ goto __restart;
2566
+ }
2567
+ if (__comp(*__j, *__m))
2568
+ {
2569
+ swap(*__i, *__j);
2570
+ ++__n_swaps;
2571
+ break; // found guard for downward moving __j, now use unguarded partition
2572
+ }
2573
+ }
2574
+ }
2575
+ ++__i;
2576
+ // j points beyond range to be tested, *__lm1 is known to be <= *__m
2577
+ // if not yet partitioned...
2578
+ if (__i < __j)
2579
+ {
2580
+ // known that *(__i - 1) < *__m
2581
+ while (true)
2582
+ {
2583
+ // __m still guards upward moving __i
2584
+ while (__comp(*__i, *__m))
2585
+ {
2586
+ ++__i;
2587
+ }
2588
+ // It is now known that a guard exists for downward moving __j
2589
+ while (!__comp(*--__j, *__m))
2590
+ ;
2591
+ if (__i >= __j)
2592
+ {
2593
+ break;
2594
+ }
2595
+ swap(*__i, *__j);
2596
+ ++__n_swaps;
2597
+ // It is known that __m != __j
2598
+ // If __m just moved, follow it
2599
+ if (__m == __i)
2600
+ {
2601
+ __m = __j;
2602
+ }
2603
+ ++__i;
2604
+ }
2605
+ }
2606
+ // [__first, __i) < *__m and *__m <= [__i, __last)
2607
+ if (__i != __m && __comp(*__m, *__i))
2608
+ {
2609
+ swap(*__i, *__m);
2610
+ ++__n_swaps;
2611
+ }
2612
+ // [__first, __i) < *__i and *__i <= [__i+1, __last)
2613
+ if (__nth == __i)
2614
+ {
2615
+ return;
2616
+ }
2617
+ if (__n_swaps == 0)
2618
+ {
2619
+ // We were given a perfectly partitioned sequence. Coincidence?
2620
+ if (__nth < __i)
2621
+ {
2622
+ // Check for [__first, __i) already sorted
2623
+ __j = __m = __first;
2624
+ while (++__j != __i)
2625
+ {
2626
+ if (__comp(*__j, *__m))
2627
+ {
2628
+ // not yet sorted, so sort
2629
+ goto not_sorted;
2630
+ }
2631
+ __m = __j;
2632
+ }
2633
+ // [__first, __i) sorted
2634
+ return;
2635
+ }
2636
+ else
2637
+ {
2638
+ // Check for [__i, __last) already sorted
2639
+ __j = __m = __i;
2640
+ while (++__j != __last)
2641
+ {
2642
+ if (__comp(*__j, *__m))
2643
+ {
2644
+ // not yet sorted, so sort
2645
+ goto not_sorted;
2646
+ }
2647
+ __m = __j;
2648
+ }
2649
+ // [__i, __last) sorted
2650
+ return;
2651
+ }
2652
+ }
2653
+ not_sorted:
2654
+ // __nth_element on range containing __nth
2655
+ if (__nth < __i)
2656
+ {
2657
+ // __nth_element<_Compare>(__first, __nth, __i, __comp);
2658
+ __last = __i;
2659
+ }
2660
+ else
2661
+ {
2662
+ // __nth_element<_Compare>(__i+1, __nth, __last, __comp);
2663
+ __first = ++__i;
2664
+ }
2665
+ }
2666
+ }
2667
+
2668
+ template <class _RandomAccessIterator, class _Compare>
2669
+ _LIBCUDACXX_HIDE_FROM_ABI void
2670
+ nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp)
2671
+ {
2672
+ using _Comp_ref = __comp_ref_type<_Compare>;
2673
+ __nth_element<_Comp_ref>(__first, __nth, __last, __comp);
2674
+ }
2675
+
2676
+ template <class _RandomAccessIterator>
2677
+ _LIBCUDACXX_HIDE_FROM_ABI void
2678
+ nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last)
2679
+ {
2680
+ _CUDA_VSTD::nth_element(__first, __nth, __last, __less{});
2681
+ }
2682
+
2683
+ #endif
2684
+ _LIBCUDACXX_END_NAMESPACE_STD
2685
+
2686
+ #if defined(_LIBCUDACXX_HAS_PARALLEL_ALGORITHMS) && _CCCL_STD_VER >= 2017
2687
+ # include <__pstl_algorithm>
2688
+ #endif
2689
+
2690
+ #include <cuda/std/__cccl/epilogue.h>
2691
+
2692
+ #endif // _LIBCUDACXX_ALGORITHM