cuda-cccl 0.1.3.1.0.dev1678__cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1860) hide show
  1. cuda/cccl/__init__.py +14 -0
  2. cuda/cccl/cooperative/__init__.py +3 -0
  3. cuda/cccl/cooperative/experimental/__init__.py +8 -0
  4. cuda/cccl/cooperative/experimental/_caching.py +48 -0
  5. cuda/cccl/cooperative/experimental/_common.py +273 -0
  6. cuda/cccl/cooperative/experimental/_nvrtc.py +91 -0
  7. cuda/cccl/cooperative/experimental/_scan_op.py +181 -0
  8. cuda/cccl/cooperative/experimental/_types.py +935 -0
  9. cuda/cccl/cooperative/experimental/_typing.py +107 -0
  10. cuda/cccl/cooperative/experimental/block/__init__.py +33 -0
  11. cuda/cccl/cooperative/experimental/block/_block_load_store.py +215 -0
  12. cuda/cccl/cooperative/experimental/block/_block_merge_sort.py +125 -0
  13. cuda/cccl/cooperative/experimental/block/_block_radix_sort.py +214 -0
  14. cuda/cccl/cooperative/experimental/block/_block_reduce.py +294 -0
  15. cuda/cccl/cooperative/experimental/block/_block_scan.py +983 -0
  16. cuda/cccl/cooperative/experimental/warp/__init__.py +9 -0
  17. cuda/cccl/cooperative/experimental/warp/_warp_merge_sort.py +98 -0
  18. cuda/cccl/cooperative/experimental/warp/_warp_reduce.py +153 -0
  19. cuda/cccl/cooperative/experimental/warp/_warp_scan.py +78 -0
  20. cuda/cccl/headers/__init__.py +7 -0
  21. cuda/cccl/headers/include/__init__.py +1 -0
  22. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +261 -0
  23. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1181 -0
  24. cuda/cccl/headers/include/cub/agent/agent_for.cuh +84 -0
  25. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +925 -0
  26. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +227 -0
  27. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +753 -0
  28. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +766 -0
  29. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
  30. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +704 -0
  31. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +557 -0
  32. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +678 -0
  33. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +804 -0
  34. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +997 -0
  35. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +561 -0
  36. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +473 -0
  37. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +292 -0
  38. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1032 -0
  39. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +342 -0
  40. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +592 -0
  41. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +614 -0
  42. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1346 -0
  43. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +965 -0
  44. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1217 -0
  45. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1306 -0
  46. cuda/cccl/headers/include/cub/block/block_histogram.cuh +420 -0
  47. cuda/cccl/headers/include/cub/block/block_load.cuh +1259 -0
  48. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +787 -0
  49. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1218 -0
  50. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2193 -0
  51. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  52. cuda/cccl/headers/include/cub/block/block_reduce.cuh +629 -0
  53. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +437 -0
  54. cuda/cccl/headers/include/cub/block/block_scan.cuh +2583 -0
  55. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  56. cuda/cccl/headers/include/cub/block/block_store.cuh +1246 -0
  57. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +620 -0
  58. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  59. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  60. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  61. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  62. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +259 -0
  63. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  64. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  65. cuda/cccl/headers/include/cub/config.cuh +60 -0
  66. cuda/cccl/headers/include/cub/cub.cuh +112 -0
  67. cuda/cccl/headers/include/cub/detail/array_utils.cuh +77 -0
  68. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +155 -0
  69. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +93 -0
  70. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  71. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +246 -0
  72. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +120 -0
  73. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +74 -0
  74. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +118 -0
  75. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  76. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  77. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +61 -0
  78. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  79. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +71 -0
  80. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +93 -0
  81. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  82. cuda/cccl/headers/include/cub/detail/rfa.cuh +724 -0
  83. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  84. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  85. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
  86. cuda/cccl/headers/include/cub/detail/type_traits.cuh +206 -0
  87. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +72 -0
  88. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  89. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  90. cuda/cccl/headers/include/cub/device/device_copy.cuh +187 -0
  91. cuda/cccl/headers/include/cub/device/device_for.cuh +990 -0
  92. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1507 -0
  93. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  94. cuda/cccl/headers/include/cub/device/device_merge.cuh +202 -0
  95. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  96. cuda/cccl/headers/include/cub/device/device_partition.cuh +664 -0
  97. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3435 -0
  98. cuda/cccl/headers/include/cub/device/device_reduce.cuh +1815 -0
  99. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +368 -0
  100. cuda/cccl/headers/include/cub/device/device_scan.cuh +1901 -0
  101. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  102. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1512 -0
  103. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  104. cuda/cccl/headers/include/cub/device/device_select.cuh +1224 -0
  105. cuda/cccl/headers/include/cub/device/device_transform.cuh +313 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +314 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +109 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +718 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +45 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +197 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1051 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +305 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1748 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1316 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +625 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +497 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +548 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +497 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +598 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +1374 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +838 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +439 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +552 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +467 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +543 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +218 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +338 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +799 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +525 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +194 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +330 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +936 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +70 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +121 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +63 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +278 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +91 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +118 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1068 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +397 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +945 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +675 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +555 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1013 -0
  149. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +249 -0
  150. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1587 -0
  151. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +407 -0
  152. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +353 -0
  153. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +874 -0
  154. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +215 -0
  155. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  156. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  157. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +256 -0
  158. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +238 -0
  159. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +252 -0
  160. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +322 -0
  161. cuda/cccl/headers/include/cub/thread/thread_load.cuh +347 -0
  162. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +629 -0
  163. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +504 -0
  164. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +340 -0
  165. cuda/cccl/headers/include/cub/thread/thread_search.cuh +198 -0
  166. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +406 -0
  167. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +101 -0
  168. cuda/cccl/headers/include/cub/thread/thread_store.cuh +364 -0
  169. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  170. cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
  171. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  172. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  173. cuda/cccl/headers/include/cub/util_device.cuh +779 -0
  174. cuda/cccl/headers/include/cub/util_macro.cuh +91 -0
  175. cuda/cccl/headers/include/cub/util_math.cuh +115 -0
  176. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  177. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  178. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  179. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  180. cuda/cccl/headers/include/cub/util_type.cuh +1136 -0
  181. cuda/cccl/headers/include/cub/util_vsmem.cuh +251 -0
  182. cuda/cccl/headers/include/cub/version.cuh +89 -0
  183. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  184. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  185. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +729 -0
  186. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +405 -0
  187. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +688 -0
  188. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +437 -0
  189. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  190. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  191. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  192. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +822 -0
  193. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1156 -0
  194. cuda/cccl/headers/include/cub/warp/warp_store.cuh +520 -0
  195. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  196. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  197. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +210 -0
  198. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  199. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +84 -0
  200. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +127 -0
  201. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +209 -0
  202. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  203. cuda/cccl/headers/include/cuda/__barrier/aligned_size.h +61 -0
  204. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  205. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  206. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +100 -0
  207. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +454 -0
  208. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +72 -0
  209. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  210. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  211. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  212. cuda/cccl/headers/include/cuda/__bit/bitfield.h +121 -0
  213. cuda/cccl/headers/include/cuda/__bit/bitmask.h +88 -0
  214. cuda/cccl/headers/include/cuda/__cccl_config +36 -0
  215. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
  216. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  217. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  218. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  219. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  220. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  221. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  222. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  223. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  224. cuda/cccl/headers/include/cuda/__execution/determinism.h +90 -0
  225. cuda/cccl/headers/include/cuda/__execution/require.h +74 -0
  226. cuda/cccl/headers/include/cuda/__execution/tune.h +69 -0
  227. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  228. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +276 -0
  229. cuda/cccl/headers/include/cuda/__functional/get_device_address.h +58 -0
  230. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  231. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  232. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
  233. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  234. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  235. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  236. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  237. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +257 -0
  238. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +460 -0
  239. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +314 -0
  240. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +421 -0
  241. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +321 -0
  242. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +333 -0
  243. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +465 -0
  244. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +456 -0
  245. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  246. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +462 -0
  247. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +63 -0
  248. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +122 -0
  249. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +51 -0
  250. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  251. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  252. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  253. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  254. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +98 -0
  255. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +162 -0
  256. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +49 -0
  257. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  258. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  259. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +99 -0
  260. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  261. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  262. cuda/cccl/headers/include/cuda/__memory/address_space.h +86 -0
  263. cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
  264. cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
  265. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
  266. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  267. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +94 -0
  268. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +157 -0
  269. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +73 -0
  270. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +129 -0
  271. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +653 -0
  272. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  273. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +57 -0
  274. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  275. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +101 -0
  276. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2982 -0
  277. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  278. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  279. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  280. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  281. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  282. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  283. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  284. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  285. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  286. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  287. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  288. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  289. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  290. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  291. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  292. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  293. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  294. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +240 -0
  295. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +193 -0
  296. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  297. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +52 -0
  298. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +957 -0
  299. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +288 -0
  300. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +596 -0
  301. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  302. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  303. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  304. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1445 -0
  305. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  306. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +117 -0
  307. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  308. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  309. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  310. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  311. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  312. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  313. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  314. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +62 -0
  315. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +101 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +62 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +15074 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +385 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +176 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +94 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +137 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +138 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +280 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +282 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2148 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1272 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +228 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +430 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1830 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +105 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +81 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +612 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +44 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4446 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4061 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +6438 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +36 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +4582 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +44 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +67 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +750 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  383. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  384. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +151 -0
  385. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  386. cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
  387. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +163 -0
  388. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  389. cuda/cccl/headers/include/cuda/__utility/static_for.h +74 -0
  390. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  391. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  392. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +249 -0
  393. cuda/cccl/headers/include/cuda/access_property +26 -0
  394. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  395. cuda/cccl/headers/include/cuda/atomic +27 -0
  396. cuda/cccl/headers/include/cuda/barrier +262 -0
  397. cuda/cccl/headers/include/cuda/bit +29 -0
  398. cuda/cccl/headers/include/cuda/cmath +35 -0
  399. cuda/cccl/headers/include/cuda/discard_memory +60 -0
  400. cuda/cccl/headers/include/cuda/functional +31 -0
  401. cuda/cccl/headers/include/cuda/iterator +34 -0
  402. cuda/cccl/headers/include/cuda/latch +27 -0
  403. cuda/cccl/headers/include/cuda/mdspan +28 -0
  404. cuda/cccl/headers/include/cuda/memory +32 -0
  405. cuda/cccl/headers/include/cuda/memory_resource +41 -0
  406. cuda/cccl/headers/include/cuda/numeric +28 -0
  407. cuda/cccl/headers/include/cuda/pipeline +577 -0
  408. cuda/cccl/headers/include/cuda/ptx +124 -0
  409. cuda/cccl/headers/include/cuda/semaphore +31 -0
  410. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  411. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  412. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  413. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +52 -0
  414. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  415. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +64 -0
  416. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  417. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
  418. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  419. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  420. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  421. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  422. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  423. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  424. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  425. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  426. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  427. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  428. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  429. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  430. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  431. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  432. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  433. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  434. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  435. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  436. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  437. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +92 -0
  438. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  439. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  440. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  441. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  442. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  443. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  444. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  445. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  446. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  447. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  448. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  449. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +96 -0
  450. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  451. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +68 -0
  452. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  453. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  454. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  455. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  456. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +140 -0
  457. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  458. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +87 -0
  459. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  460. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  461. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  462. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  463. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  464. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  465. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  466. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  467. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +94 -0
  468. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  469. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +101 -0
  470. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  471. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
  472. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  473. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  474. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  475. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  476. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  477. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  478. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  479. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  480. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  481. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  495. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  496. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  497. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  498. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
  499. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  500. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  501. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  502. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  503. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  504. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +138 -0
  505. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  506. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  507. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +218 -0
  508. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  509. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  510. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  511. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  512. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  513. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  514. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  515. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  516. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  517. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +250 -0
  518. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +105 -0
  519. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  520. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +73 -0
  521. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  522. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  523. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  524. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  525. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  526. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  527. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  528. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +77 -0
  529. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +183 -0
  530. cuda/cccl/headers/include/cuda/std/__bit/countl.h +167 -0
  531. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  532. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  533. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  534. cuda/cccl/headers/include/cuda/std/__bit/integral.h +124 -0
  535. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  536. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1270 -0
  537. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  538. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  539. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +150 -0
  540. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +207 -0
  541. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +787 -0
  542. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
  543. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +43 -0
  544. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +53 -0
  545. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +69 -0
  546. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +128 -0
  547. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +126 -0
  548. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +326 -0
  549. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +79 -0
  550. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +68 -0
  551. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +133 -0
  552. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  553. cuda/cccl/headers/include/cuda/std/__cccl/os.h +48 -0
  554. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1276 -0
  555. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +267 -0
  556. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +176 -0
  557. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  558. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  559. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  560. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  561. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  562. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +115 -0
  563. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  564. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  565. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  566. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  567. cuda/cccl/headers/include/cuda/std/__charconv_ +30 -0
  568. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +246 -0
  569. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +193 -0
  570. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +724 -0
  571. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
  572. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +216 -0
  573. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
  574. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
  575. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +224 -0
  576. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
  577. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
  578. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
  579. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
  580. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +180 -0
  581. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  582. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +104 -0
  583. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +582 -0
  584. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +248 -0
  585. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  586. cuda/cccl/headers/include/cuda/std/__cmath/nvbf16.h +58 -0
  587. cuda/cccl/headers/include/cuda/std/__cmath/nvfp16.h +58 -0
  588. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
  589. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
  590. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
  591. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +155 -0
  592. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +170 -0
  593. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
  594. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  595. cuda/cccl/headers/include/cuda/std/__complex/complex.h +676 -0
  596. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +388 -0
  597. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
  598. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +215 -0
  599. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  600. cuda/cccl/headers/include/cuda/std/__complex/literals.h +106 -0
  601. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +53 -0
  602. cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
  603. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
  604. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
  605. cuda/cccl/headers/include/cuda/std/__complex/roots.h +64 -0
  606. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  607. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  608. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +131 -0
  609. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  610. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  611. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  612. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +46 -0
  613. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  614. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  615. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +273 -0
  616. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +107 -0
  617. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +71 -0
  618. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  619. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +57 -0
  620. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  621. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  622. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  623. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  624. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  625. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  626. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  627. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  628. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +42 -0
  629. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  630. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  631. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  632. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  633. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  634. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  635. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  636. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  637. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +69 -0
  638. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  639. cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +72 -0
  640. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +142 -0
  641. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  642. cuda/cccl/headers/include/cuda/std/__execution/env.h +436 -0
  643. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  644. cuda/cccl/headers/include/cuda/std/__expected/expected.h +2001 -0
  645. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1080 -0
  646. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  647. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +175 -0
  648. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  649. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +809 -0
  650. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  651. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  652. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +172 -0
  653. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +103 -0
  654. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  655. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +39 -0
  656. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +64 -0
  657. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  658. cuda/cccl/headers/include/cuda/std/__floating_point/nvfp_types.h +58 -0
  659. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  660. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  661. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  662. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  663. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  664. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +128 -0
  665. cuda/cccl/headers/include/cuda/std/__format_ +28 -0
  666. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  667. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  668. cuda/cccl/headers/include/cuda/std/__functional/bind.h +337 -0
  669. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
  670. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  671. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  672. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  673. cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
  674. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  675. cuda/cccl/headers/include/cuda/std/__functional/function.h +1277 -0
  676. cuda/cccl/headers/include/cuda/std/__functional/hash.h +650 -0
  677. cuda/cccl/headers/include/cuda/std/__functional/identity.h +61 -0
  678. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +558 -0
  679. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +43 -0
  680. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +67 -0
  681. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +213 -0
  682. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  683. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  684. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +127 -0
  685. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +65 -0
  686. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +64 -0
  687. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  688. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  689. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  690. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  691. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  692. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +277 -0
  693. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +35 -0
  694. cuda/cccl/headers/include/cuda/std/__fwd/array.h +36 -0
  695. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +49 -0
  696. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +34 -0
  697. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  698. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  699. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  700. cuda/cccl/headers/include/cuda/std/__fwd/iterator_traits.h +40 -0
  701. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +73 -0
  702. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  703. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  704. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +34 -0
  705. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  706. cuda/cccl/headers/include/cuda/std/__fwd/span.h +38 -0
  707. cuda/cccl/headers/include/cuda/std/__fwd/string.h +83 -0
  708. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +59 -0
  709. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  710. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  711. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  712. cuda/cccl/headers/include/cuda/std/__internal/features.h +71 -0
  713. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +102 -0
  714. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  715. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
  716. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +100 -0
  717. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +254 -0
  718. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  719. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
  720. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  721. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  722. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  723. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  724. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  725. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +95 -0
  726. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +152 -0
  727. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  728. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +102 -0
  729. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +140 -0
  730. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +160 -0
  731. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  732. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  733. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  734. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +932 -0
  735. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  736. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +400 -0
  737. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  738. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  739. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +98 -0
  740. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  741. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  742. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +91 -0
  743. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  744. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +185 -0
  745. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  746. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +605 -0
  747. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  748. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  749. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  750. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +247 -0
  751. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  752. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  753. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +781 -0
  754. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  755. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
  756. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
  757. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
  758. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +322 -0
  759. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +98 -0
  760. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
  761. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  762. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +358 -0
  763. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +757 -0
  764. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +315 -0
  765. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +308 -0
  766. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
  767. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +507 -0
  768. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  769. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
  770. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
  771. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +64 -0
  772. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  773. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  774. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  775. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  776. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  777. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  778. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +555 -0
  779. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  780. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  781. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +230 -0
  782. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +248 -0
  783. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  784. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +43 -0
  785. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +261 -0
  786. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  787. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  788. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +683 -0
  789. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +768 -0
  790. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +55 -0
  791. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  792. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  793. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  794. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  795. cuda/cccl/headers/include/cuda/std/__new/launder.h +49 -0
  796. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  797. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  798. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  799. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  800. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +80 -0
  801. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  802. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  803. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  804. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +100 -0
  805. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  806. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  807. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  808. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  809. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  810. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +75 -0
  811. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  812. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  813. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  814. cuda/cccl/headers/include/cuda/std/__optional/optional.h +900 -0
  815. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +430 -0
  816. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  817. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  818. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  819. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
  820. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +397 -0
  821. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
  822. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
  823. cuda/cccl/headers/include/cuda/std/__random_ +29 -0
  824. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  825. cuda/cccl/headers/include/cuda/std/__ranges/all.h +97 -0
  826. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +313 -0
  827. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  828. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  829. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  830. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  831. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  832. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  833. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +77 -0
  834. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  835. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
  836. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  837. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +161 -0
  838. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +113 -0
  839. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +174 -0
  840. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  841. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +181 -0
  842. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
  843. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  844. cuda/cccl/headers/include/cuda/std/__ranges/size.h +199 -0
  845. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  846. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +475 -0
  847. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  848. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  849. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  850. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +181 -0
  851. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  852. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +233 -0
  853. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  854. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  855. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
  856. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  857. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  858. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  859. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  860. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  861. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +105 -0
  862. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  863. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  864. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +142 -0
  865. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  866. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  867. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +98 -0
  868. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +269 -0
  869. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +216 -0
  870. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  871. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  872. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +90 -0
  873. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +73 -0
  874. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  875. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  876. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +277 -0
  877. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  878. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  879. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  880. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  881. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  882. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  883. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  884. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  885. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  886. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  887. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +69 -0
  888. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  889. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +174 -0
  890. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  891. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  892. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  893. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  894. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  895. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  896. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  897. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  898. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  899. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  900. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +47 -0
  901. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +51 -0
  902. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  903. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +40 -0
  904. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +44 -0
  905. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  906. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  907. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  908. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  909. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +83 -0
  910. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  911. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  912. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  913. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +68 -0
  914. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +54 -0
  915. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  916. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  917. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  918. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +214 -0
  919. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  920. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  921. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  922. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  923. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  924. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  925. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +73 -0
  926. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +68 -0
  927. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  928. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +81 -0
  929. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +56 -0
  930. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  931. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  932. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  933. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  934. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  935. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  936. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  937. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +59 -0
  938. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  939. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  940. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  941. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  942. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  943. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  944. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  945. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  946. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  947. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  948. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  949. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  950. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  951. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  952. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  953. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  954. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  955. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +62 -0
  956. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  957. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +87 -0
  958. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  959. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +63 -0
  960. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +119 -0
  961. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  962. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  963. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  964. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +84 -0
  965. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  966. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  967. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  968. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  969. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +57 -0
  970. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +201 -0
  971. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +56 -0
  972. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +70 -0
  973. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +82 -0
  974. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +60 -0
  975. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +61 -0
  976. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +56 -0
  977. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +55 -0
  978. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +73 -0
  979. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +60 -0
  980. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +58 -0
  981. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  982. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +57 -0
  983. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  984. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  985. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  986. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  987. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  988. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  989. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  990. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  991. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  992. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  993. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  994. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  995. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  996. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +123 -0
  997. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  998. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  999. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1000. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1001. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1002. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1003. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1004. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1005. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1006. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1007. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1008. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1009. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1010. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1011. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1012. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1013. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +132 -0
  1014. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +66 -0
  1015. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1016. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  1017. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +32 -0
  1018. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  1019. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1020. cuda/cccl/headers/include/cuda/std/__utility/declval.h +63 -0
  1021. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
  1022. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1023. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  1024. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
  1025. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +77 -0
  1026. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  1027. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1028. cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
  1029. cuda/cccl/headers/include/cuda/std/__utility/pair.h +802 -0
  1030. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1031. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +510 -0
  1032. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1033. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  1034. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1035. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1036. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1037. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1038. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1039. cuda/cccl/headers/include/cuda/std/array +520 -0
  1040. cuda/cccl/headers/include/cuda/std/atomic +818 -0
  1041. cuda/cccl/headers/include/cuda/std/barrier +43 -0
  1042. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1043. cuda/cccl/headers/include/cuda/std/bitset +994 -0
  1044. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1045. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1046. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1047. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1048. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1049. cuda/cccl/headers/include/cuda/std/cmath +25 -0
  1050. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1051. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1052. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1053. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1054. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1055. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1056. cuda/cccl/headers/include/cuda/std/ctime +152 -0
  1057. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1058. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +235 -0
  1059. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1720 -0
  1060. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +3628 -0
  1061. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/cmath +667 -0
  1062. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1063. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1064. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1367 -0
  1065. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2154 -0
  1066. cuda/cccl/headers/include/cuda/std/execution +27 -0
  1067. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1068. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1069. cuda/cccl/headers/include/cuda/std/initializer_list +36 -0
  1070. cuda/cccl/headers/include/cuda/std/inplace_vector +2163 -0
  1071. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1072. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1073. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1074. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1075. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1076. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1077. cuda/cccl/headers/include/cuda/std/numbers +335 -0
  1078. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1079. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1080. cuda/cccl/headers/include/cuda/std/ranges +69 -0
  1081. cuda/cccl/headers/include/cuda/std/ratio +417 -0
  1082. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1083. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1084. cuda/cccl/headers/include/cuda/std/span +640 -0
  1085. cuda/cccl/headers/include/cuda/std/string_view +788 -0
  1086. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1087. cuda/cccl/headers/include/cuda/std/type_traits +176 -0
  1088. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1089. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1090. cuda/cccl/headers/include/cuda/std/version +245 -0
  1091. cuda/cccl/headers/include/cuda/stream_ref +54 -0
  1092. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1093. cuda/cccl/headers/include/cuda/utility +27 -0
  1094. cuda/cccl/headers/include/cuda/version +16 -0
  1095. cuda/cccl/headers/include/cuda/warp +28 -0
  1096. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1097. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1098. cuda/cccl/headers/include/nv/detail/__target_macros +641 -0
  1099. cuda/cccl/headers/include/nv/target +240 -0
  1100. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1101. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1102. cuda/cccl/headers/include/thrust/advance.h +59 -0
  1103. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1104. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1105. cuda/cccl/headers/include/thrust/complex.h +859 -0
  1106. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1107. cuda/cccl/headers/include/thrust/count.h +245 -0
  1108. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1109. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +37 -0
  1110. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1111. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +350 -0
  1112. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +371 -0
  1113. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +45 -0
  1114. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +242 -0
  1115. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +39 -0
  1116. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +137 -0
  1117. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +39 -0
  1118. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +99 -0
  1119. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +53 -0
  1120. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +68 -0
  1121. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1122. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +102 -0
  1123. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +86 -0
  1124. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +79 -0
  1125. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +81 -0
  1126. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +39 -0
  1127. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +98 -0
  1128. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1129. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1130. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1131. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1132. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1133. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1134. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1135. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1136. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1137. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1138. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1139. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1140. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1141. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1142. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1143. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1144. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1145. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1146. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1147. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1148. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1149. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1150. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1151. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1152. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1153. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1154. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1155. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1156. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1157. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1158. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1159. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1160. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1161. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1162. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1163. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1164. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1165. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1166. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1167. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1168. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1169. cuda/cccl/headers/include/thrust/detail/device_delete.inl +52 -0
  1170. cuda/cccl/headers/include/thrust/detail/device_free.inl +47 -0
  1171. cuda/cccl/headers/include/thrust/detail/device_malloc.inl +60 -0
  1172. cuda/cccl/headers/include/thrust/detail/device_new.inl +61 -0
  1173. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1174. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1175. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1176. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +80 -0
  1177. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1178. cuda/cccl/headers/include/thrust/detail/execution_policy.h +80 -0
  1179. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1180. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1181. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1182. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1183. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1184. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1185. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1186. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1187. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1188. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1189. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1190. cuda/cccl/headers/include/thrust/detail/integer_math.h +130 -0
  1191. cuda/cccl/headers/include/thrust/detail/internal_functional.h +289 -0
  1192. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1193. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
  1194. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1195. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1196. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +40 -0
  1197. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1198. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1199. cuda/cccl/headers/include/thrust/detail/mpl/math.h +164 -0
  1200. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +37 -0
  1201. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1202. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1203. cuda/cccl/headers/include/thrust/detail/pointer.h +217 -0
  1204. cuda/cccl/headers/include/thrust/detail/pointer.inl +172 -0
  1205. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1206. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1207. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1208. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1209. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1210. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +189 -0
  1211. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1212. cuda/cccl/headers/include/thrust/detail/reference.h +500 -0
  1213. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1214. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1215. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1216. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1217. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1218. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1219. cuda/cccl/headers/include/thrust/detail/seq.h +54 -0
  1220. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1221. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1222. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1223. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1224. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1225. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1226. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1227. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1228. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1229. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
  1230. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1231. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1232. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1233. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
  1234. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1235. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1236. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1237. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1238. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1239. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1240. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1241. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
  1242. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_discard_iterator.h +44 -0
  1243. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1244. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1245. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1246. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1247. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1248. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1249. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1250. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1251. cuda/cccl/headers/include/thrust/detail/util/align.h +59 -0
  1252. cuda/cccl/headers/include/thrust/detail/vector_base.h +615 -0
  1253. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1212 -0
  1254. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1255. cuda/cccl/headers/include/thrust/device_delete.h +59 -0
  1256. cuda/cccl/headers/include/thrust/device_free.h +72 -0
  1257. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1258. cuda/cccl/headers/include/thrust/device_malloc.h +108 -0
  1259. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1260. cuda/cccl/headers/include/thrust/device_new.h +91 -0
  1261. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1262. cuda/cccl/headers/include/thrust/device_ptr.h +202 -0
  1263. cuda/cccl/headers/include/thrust/device_reference.h +986 -0
  1264. cuda/cccl/headers/include/thrust/device_vector.h +574 -0
  1265. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1266. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1267. cuda/cccl/headers/include/thrust/execution_policy.h +384 -0
  1268. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1269. cuda/cccl/headers/include/thrust/fill.h +201 -0
  1270. cuda/cccl/headers/include/thrust/find.h +382 -0
  1271. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1272. cuda/cccl/headers/include/thrust/functional.h +396 -0
  1273. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1274. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1275. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1276. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1277. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +219 -0
  1278. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1279. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1280. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1281. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1282. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1283. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1284. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +51 -0
  1285. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +62 -0
  1286. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1287. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +199 -0
  1288. cuda/cccl/headers/include/thrust/iterator/detail/iterator_traversal_tags.h +50 -0
  1289. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +53 -0
  1290. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1291. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1292. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1293. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1294. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1295. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1296. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +215 -0
  1297. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +660 -0
  1298. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +275 -0
  1299. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +192 -0
  1300. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1301. cuda/cccl/headers/include/thrust/iterator/retag.h +74 -0
  1302. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +221 -0
  1303. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +184 -0
  1304. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1305. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +149 -0
  1306. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1307. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1308. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1309. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +357 -0
  1310. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1311. cuda/cccl/headers/include/thrust/memory.h +395 -0
  1312. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1313. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1314. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1315. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1316. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
  1317. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1318. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +68 -0
  1319. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1320. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1321. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1322. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1323. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1324. cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
  1325. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1326. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1327. cuda/cccl/headers/include/thrust/mr/tls_pool.h +65 -0
  1328. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1329. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1330. cuda/cccl/headers/include/thrust/pair.h +102 -0
  1331. cuda/cccl/headers/include/thrust/partition.h +1383 -0
  1332. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1333. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1334. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1335. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1336. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1337. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1338. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1339. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1340. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1341. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1342. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1343. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1344. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +198 -0
  1345. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1346. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +217 -0
  1347. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1348. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1349. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1350. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1351. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1352. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1353. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1354. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1355. cuda/cccl/headers/include/thrust/random.h +120 -0
  1356. cuda/cccl/headers/include/thrust/reduce.h +1112 -0
  1357. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1358. cuda/cccl/headers/include/thrust/replace.h +827 -0
  1359. cuda/cccl/headers/include/thrust/reverse.h +213 -0
  1360. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1361. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1362. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1363. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1364. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1365. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1366. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1367. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1368. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1369. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1370. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1371. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1372. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1373. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1374. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +90 -0
  1375. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1376. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1377. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1378. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1379. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1380. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1381. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1382. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1383. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1384. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1385. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1386. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1387. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1388. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1389. cuda/cccl/headers/include/thrust/system/cpp/detail/par.h +62 -0
  1390. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1391. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1392. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1393. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1394. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1395. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1396. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1397. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1398. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1399. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1400. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1401. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1402. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1403. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1404. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1405. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1406. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1407. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1408. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1409. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1410. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1411. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1412. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1413. cuda/cccl/headers/include/thrust/system/cpp/detail/vector.inl +130 -0
  1414. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +161 -0
  1415. cuda/cccl/headers/include/thrust/system/cpp/memory.h +109 -0
  1416. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +75 -0
  1417. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +123 -0
  1418. cuda/cccl/headers/include/thrust/system/cpp/vector.h +99 -0
  1419. cuda/cccl/headers/include/thrust/system/cuda/config.h +123 -0
  1420. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1421. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1422. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1423. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1424. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +129 -0
  1425. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1426. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1427. cuda/cccl/headers/include/thrust/system/cuda/detail/core/load_iterator.h +58 -0
  1428. cuda/cccl/headers/include/thrust/system/cuda/detail/core/make_load_iterator.h +53 -0
  1429. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1430. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +611 -0
  1431. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1432. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1433. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +210 -0
  1434. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1435. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1436. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +113 -0
  1437. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1438. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +82 -0
  1439. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +272 -0
  1440. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1441. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1442. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +85 -0
  1443. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1444. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1445. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_cross_system.h +204 -0
  1446. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_device_to_device.h +89 -0
  1447. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +69 -0
  1448. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1449. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1450. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1451. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1452. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1453. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +217 -0
  1454. cuda/cccl/headers/include/thrust/system/cuda/detail/par.h +237 -0
  1455. cuda/cccl/headers/include/thrust/system/cuda/detail/par_to_seq.h +95 -0
  1456. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1457. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1458. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1459. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +781 -0
  1460. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1000 -0
  1461. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1462. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +152 -0
  1463. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +88 -0
  1464. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +342 -0
  1465. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +415 -0
  1466. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +79 -0
  1467. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1468. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1736 -0
  1469. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +482 -0
  1470. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +75 -0
  1471. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +75 -0
  1472. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1473. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1474. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +403 -0
  1475. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1476. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1477. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +94 -0
  1478. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +91 -0
  1479. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +646 -0
  1480. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +311 -0
  1481. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +251 -0
  1482. cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
  1483. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +39 -0
  1484. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1485. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1486. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
  1487. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1488. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1489. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1490. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1491. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1492. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1493. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1494. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1495. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1496. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1497. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1498. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1499. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1500. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1501. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1502. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1503. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1504. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1505. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1506. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1507. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1508. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1509. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +48 -0
  1510. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1511. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1512. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1513. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1514. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1515. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1516. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1517. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1518. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1519. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1520. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1521. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1522. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1523. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1524. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1525. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1526. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1527. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1528. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1529. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1530. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1531. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +64 -0
  1532. cuda/cccl/headers/include/thrust/system/detail/errno.h +125 -0
  1533. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1534. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1535. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1536. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +59 -0
  1537. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +85 -0
  1538. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +167 -0
  1539. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +391 -0
  1540. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +51 -0
  1541. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +70 -0
  1542. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +64 -0
  1543. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +152 -0
  1544. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +54 -0
  1545. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +90 -0
  1546. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +55 -0
  1547. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +66 -0
  1548. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +72 -0
  1549. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +258 -0
  1550. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +60 -0
  1551. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +55 -0
  1552. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +143 -0
  1553. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +64 -0
  1554. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +79 -0
  1555. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +102 -0
  1556. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +51 -0
  1557. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1558. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +66 -0
  1559. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +78 -0
  1560. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +65 -0
  1561. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +70 -0
  1562. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +83 -0
  1563. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +105 -0
  1564. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +154 -0
  1565. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +55 -0
  1566. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +74 -0
  1567. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +135 -0
  1568. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +213 -0
  1569. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +49 -0
  1570. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +77 -0
  1571. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +106 -0
  1572. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +89 -0
  1573. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +192 -0
  1574. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +92 -0
  1575. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +127 -0
  1576. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +101 -0
  1577. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +181 -0
  1578. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +54 -0
  1579. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +72 -0
  1580. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +78 -0
  1581. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +141 -0
  1582. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +78 -0
  1583. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +91 -0
  1584. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +132 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +238 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +79 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +91 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +96 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +288 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +482 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +60 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +131 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +119 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +181 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +50 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +82 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +47 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +60 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +53 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +60 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +88 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +56 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +62 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +86 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +119 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +51 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +172 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +51 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +121 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +77 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +119 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +87 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +132 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +123 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +76 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +48 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +142 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +55 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +125 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +55 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +77 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +78 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +116 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +68 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +80 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +129 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +49 -0
  1635. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1636. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +147 -0
  1637. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +51 -0
  1638. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1639. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +56 -0
  1640. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +81 -0
  1641. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +151 -0
  1642. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1643. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +309 -0
  1644. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1645. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +70 -0
  1646. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +104 -0
  1647. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +185 -0
  1648. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1649. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1650. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +160 -0
  1651. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +151 -0
  1652. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1653. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1654. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +212 -0
  1655. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +65 -0
  1656. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
  1657. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +61 -0
  1658. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +362 -0
  1659. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +54 -0
  1660. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +130 -0
  1661. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +54 -0
  1662. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +592 -0
  1663. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1664. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1665. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1666. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1667. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1668. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1669. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +64 -0
  1670. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1671. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1672. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +121 -0
  1673. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +112 -0
  1674. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1675. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1676. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1677. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1678. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1679. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1680. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1681. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1682. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1683. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1684. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1685. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1686. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1687. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +113 -0
  1688. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1689. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1690. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1691. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1692. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1693. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1694. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1695. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1696. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1697. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1698. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1699. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1700. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1701. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1702. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1703. cuda/cccl/headers/include/thrust/system/omp/detail/par.h +62 -0
  1704. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1705. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1706. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1707. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1708. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1709. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1710. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1711. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1712. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1713. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1714. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1715. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1716. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1717. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1718. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +30 -0
  1719. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +30 -0
  1720. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1721. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1722. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1723. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1724. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
  1725. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1726. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1727. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1728. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1729. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1730. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1731. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1732. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1733. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1734. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1735. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1736. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1737. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +160 -0
  1738. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1739. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1740. cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
  1741. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1742. cuda/cccl/headers/include/thrust/system/system_error.h +184 -0
  1743. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1744. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1745. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1746. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1747. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1748. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1749. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1750. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1751. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1752. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +92 -0
  1753. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1754. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1755. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1756. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1757. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1758. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1759. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1760. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1761. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1762. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1763. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1764. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1765. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1766. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1767. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1768. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1769. cuda/cccl/headers/include/thrust/system/tbb/detail/par.h +62 -0
  1770. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1771. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1772. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1773. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1774. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1775. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1776. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1777. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1778. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1779. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1780. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1781. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1782. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1783. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1784. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +30 -0
  1785. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1786. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1787. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1788. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1789. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1790. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1791. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1792. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1793. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1794. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1795. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1796. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1797. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1798. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1799. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1800. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1801. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1802. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +160 -0
  1803. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1804. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1805. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
  1806. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1807. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1808. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1809. cuda/cccl/headers/include/thrust/transform.h +1045 -0
  1810. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1811. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1812. cuda/cccl/headers/include/thrust/tuple.h +142 -0
  1813. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1814. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
  1815. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1816. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1817. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1818. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
  1819. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1820. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +96 -0
  1821. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1822. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1823. cuda/cccl/headers/include/thrust/unique.h +1090 -0
  1824. cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
  1825. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1826. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1827. cuda/cccl/headers/include/thrust/version.h +93 -0
  1828. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1829. cuda/cccl/headers/include_paths.py +72 -0
  1830. cuda/cccl/parallel/__init__.py +9 -0
  1831. cuda/cccl/parallel/experimental/__init__.py +47 -0
  1832. cuda/cccl/parallel/experimental/_bindings.py +24 -0
  1833. cuda/cccl/parallel/experimental/_bindings.pyi +388 -0
  1834. cuda/cccl/parallel/experimental/_bindings_impl.cpython-312-x86_64-linux-gnu.so +0 -0
  1835. cuda/cccl/parallel/experimental/_bindings_impl.pyx +2158 -0
  1836. cuda/cccl/parallel/experimental/_caching.py +71 -0
  1837. cuda/cccl/parallel/experimental/_cccl_interop.py +382 -0
  1838. cuda/cccl/parallel/experimental/_utils/__init__.py +0 -0
  1839. cuda/cccl/parallel/experimental/_utils/protocols.py +132 -0
  1840. cuda/cccl/parallel/experimental/algorithms/__init__.py +28 -0
  1841. cuda/cccl/parallel/experimental/algorithms/_merge_sort.py +172 -0
  1842. cuda/cccl/parallel/experimental/algorithms/_radix_sort.py +244 -0
  1843. cuda/cccl/parallel/experimental/algorithms/_reduce.py +136 -0
  1844. cuda/cccl/parallel/experimental/algorithms/_scan.py +179 -0
  1845. cuda/cccl/parallel/experimental/algorithms/_segmented_reduce.py +183 -0
  1846. cuda/cccl/parallel/experimental/algorithms/_transform.py +213 -0
  1847. cuda/cccl/parallel/experimental/algorithms/_unique_by_key.py +179 -0
  1848. cuda/cccl/parallel/experimental/cccl/.gitkeep +0 -0
  1849. cuda/cccl/parallel/experimental/cccl/libcccl.c.parallel.so +0 -0
  1850. cuda/cccl/parallel/experimental/iterators/__init__.py +17 -0
  1851. cuda/cccl/parallel/experimental/iterators/_factories.py +157 -0
  1852. cuda/cccl/parallel/experimental/iterators/_iterators.py +650 -0
  1853. cuda/cccl/parallel/experimental/numba_utils.py +6 -0
  1854. cuda/cccl/parallel/experimental/struct.py +150 -0
  1855. cuda/cccl/parallel/experimental/typing.py +27 -0
  1856. cuda/cccl/py.typed +0 -0
  1857. cuda_cccl-0.1.3.1.0.dev1678.dist-info/METADATA +28 -0
  1858. cuda_cccl-0.1.3.1.0.dev1678.dist-info/RECORD +1860 -0
  1859. cuda_cccl-0.1.3.1.0.dev1678.dist-info/WHEEL +6 -0
  1860. cuda_cccl-0.1.3.1.0.dev1678.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,1112 @@
1
+ /*
2
+ * Copyright 2008-2013 NVIDIA Corporation
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ /*! \file thrust/reduce.h
18
+ * \brief Functions for reducing a range to a single value
19
+ */
20
+
21
+ #pragma once
22
+
23
+ #include <thrust/detail/config.h>
24
+
25
+ #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
26
+ # pragma GCC system_header
27
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
28
+ # pragma clang system_header
29
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
30
+ # pragma system_header
31
+ #endif // no system header
32
+ #include <thrust/detail/execution_policy.h>
33
+ #include <thrust/iterator/iterator_traits.h>
34
+ #include <thrust/pair.h>
35
+
36
+ THRUST_NAMESPACE_BEGIN
37
+
38
+ /*! \addtogroup reductions
39
+ * \{
40
+ */
41
+
42
+ /*! \p reduce is a generalization of summation: it computes the sum (or some
43
+ * other binary operation) of all the elements in the range <tt>[first,
44
+ * last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
45
+ * reduction. \p reduce is similar to the C++ Standard Template Library's
46
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
47
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
48
+ * \p reduce requires associativity of the binary operation to parallelize
49
+ * the reduction.
50
+ *
51
+ * Note that \p reduce also assumes that the binary reduction operator (in this
52
+ * case operator+) is commutative. If the reduction operator is not commutative
53
+ * then \p thrust::reduce should not be used. Instead, one could use
54
+ * \p inclusive_scan (which does not require commutativity) and select the
55
+ * last element of the output array.
56
+ *
57
+ * The algorithm's execution is parallelized as determined by \p exec.
58
+ *
59
+ * \param exec The execution policy to use for parallelization.
60
+ * \param first The beginning of the sequence.
61
+ * \param last The end of the sequence.
62
+ * \return The result of the reduction.
63
+ *
64
+ * \tparam DerivedPolicy The name of the derived execution policy.
65
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
66
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
67
+ * is convertible to \p InputIterator's \c value_type. If \c T is \c InputIterator's \c value_type, then <tt>T(0)</tt>
68
+ * is defined.
69
+ *
70
+ * The following code snippet demonstrates how to use \p reduce to compute
71
+ * the sum of a sequence of integers using the \p thrust::host execution policy for parallelization:
72
+ *
73
+ * \code
74
+ * #include <thrust/reduce.h>
75
+ * #include <thrust/execution_policy.h>
76
+ * ...
77
+ * int data[6] = {1, 0, 2, 2, 1, 3};
78
+ * int result = thrust::reduce(thrust::host, data, data + 6);
79
+ *
80
+ * // result == 9
81
+ * \endcode
82
+ *
83
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
84
+ */
85
+ template <typename DerivedPolicy, typename InputIterator>
86
+ _CCCL_HOST_DEVICE detail::it_value_t<InputIterator>
87
+ reduce(const thrust::detail::execution_policy_base<DerivedPolicy>& exec, InputIterator first, InputIterator last);
88
+
89
+ /*! \p reduce is a generalization of summation: it computes the sum (or some
90
+ * other binary operation) of all the elements in the range <tt>[first,
91
+ * last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
92
+ * reduction. \p reduce is similar to the C++ Standard Template Library's
93
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
94
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
95
+ * \p reduce requires associativity of the binary operation to parallelize
96
+ * the reduction.
97
+ *
98
+ * Note that \p reduce also assumes that the binary reduction operator (in this
99
+ * case operator+) is commutative. If the reduction operator is not commutative
100
+ * then \p thrust::reduce should not be used. Instead, one could use
101
+ * \p inclusive_scan (which does not require commutativity) and select the
102
+ * last element of the output array.
103
+ *
104
+ * \param first The beginning of the sequence.
105
+ * \param last The end of the sequence.
106
+ * \return The result of the reduction.
107
+ *
108
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
109
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
110
+ * is convertible to \p InputIterator's \c value_type. If \c T is \c InputIterator's \c value_type, then <tt>T(0)</tt>
111
+ * is defined.
112
+ *
113
+ * The following code snippet demonstrates how to use \p reduce to compute
114
+ * the sum of a sequence of integers.
115
+ *
116
+ * \code
117
+ * #include <thrust/reduce.h>
118
+ * ...
119
+ * int data[6] = {1, 0, 2, 2, 1, 3};
120
+ * int result = thrust::reduce(data, data + 6);
121
+ *
122
+ * // result == 9
123
+ * \endcode
124
+ *
125
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
126
+ */
127
+ template <typename InputIterator>
128
+ detail::it_value_t<InputIterator> reduce(InputIterator first, InputIterator last);
129
+
130
+ /*! \p reduce is a generalization of summation: it computes the sum (or some
131
+ * other binary operation) of all the elements in the range <tt>[first,
132
+ * last)</tt>. This version of \p reduce uses \p init as the initial value of the
133
+ * reduction. \p reduce is similar to the C++ Standard Template Library's
134
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
135
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
136
+ * \p reduce requires associativity of the binary operation to parallelize
137
+ * the reduction.
138
+ *
139
+ * Note that \p reduce also assumes that the binary reduction operator (in this
140
+ * case operator+) is commutative. If the reduction operator is not commutative
141
+ * then \p thrust::reduce should not be used. Instead, one could use
142
+ * \p inclusive_scan (which does not require commutativity) and select the
143
+ * last element of the output array.
144
+ *
145
+ * The algorithm's execution is parallelized as determined by \p exec.
146
+ *
147
+ * \param exec The execution policy to use for parallelization.
148
+ * \param first The beginning of the input sequence.
149
+ * \param last The end of the input sequence.
150
+ * \param init The initial value.
151
+ * \return The result of the reduction.
152
+ *
153
+ * \tparam DerivedPolicy The name of the derived execution policy.
154
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
155
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
156
+ * is convertible to \p T. \tparam T is convertible to \p InputIterator's \c value_type.
157
+ *
158
+ * The following code snippet demonstrates how to use \p reduce to compute
159
+ * the sum of a sequence of integers including an initialization value using the \p thrust::host
160
+ * execution policy for parallelization:
161
+ *
162
+ * \code
163
+ * #include <thrust/reduce.h>
164
+ * #include <thrust/execution_policy.h>
165
+ * ...
166
+ * int data[6] = {1, 0, 2, 2, 1, 3};
167
+ * int result = thrust::reduce(thrust::host, data, data + 6, 1);
168
+ *
169
+ * // result == 10
170
+ * \endcode
171
+ *
172
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
173
+ */
174
+ template <typename DerivedPolicy, typename InputIterator, typename T>
175
+ _CCCL_HOST_DEVICE T reduce(
176
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec, InputIterator first, InputIterator last, T init);
177
+
178
+ /*! \p reduce is a generalization of summation: it computes the sum (or some
179
+ * other binary operation) of all the elements in the range <tt>[first,
180
+ * last)</tt>. This version of \p reduce uses \p init as the initial value of the
181
+ * reduction. \p reduce is similar to the C++ Standard Template Library's
182
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
183
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
184
+ * \p reduce requires associativity of the binary operation to parallelize
185
+ * the reduction.
186
+ *
187
+ * Note that \p reduce also assumes that the binary reduction operator (in this
188
+ * case operator+) is commutative. If the reduction operator is not commutative
189
+ * then \p thrust::reduce should not be used. Instead, one could use
190
+ * \p inclusive_scan (which does not require commutativity) and select the
191
+ * last element of the output array.
192
+ *
193
+ * \param first The beginning of the input sequence.
194
+ * \param last The end of the input sequence.
195
+ * \param init The initial value.
196
+ * \return The result of the reduction.
197
+ *
198
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
199
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
200
+ * is convertible to \p T. \tparam T is convertible to \p InputIterator's \c value_type.
201
+ *
202
+ * The following code snippet demonstrates how to use \p reduce to compute
203
+ * the sum of a sequence of integers including an initialization value.
204
+ *
205
+ * \code
206
+ * #include <thrust/reduce.h>
207
+ * ...
208
+ * int data[6] = {1, 0, 2, 2, 1, 3};
209
+ * int result = thrust::reduce(data, data + 6, 1);
210
+ *
211
+ * // result == 10
212
+ * \endcode
213
+ *
214
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
215
+ */
216
+ template <typename InputIterator, typename T>
217
+ T reduce(InputIterator first, InputIterator last, T init);
218
+
219
+ /*! \p reduce is a generalization of summation: it computes the sum (or some
220
+ * other binary operation) of all the elements in the range <tt>[first,
221
+ * last)</tt>. This version of \p reduce uses \p init as the initial value of the
222
+ * reduction and \p binary_op as the binary function used for summation. \p reduce
223
+ * is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
224
+ * The primary difference between the two functions is that <tt>std::accumulate</tt>
225
+ * guarantees the order of summation, while \p reduce requires associativity of
226
+ * \p binary_op to parallelize the reduction.
227
+ *
228
+ * Note that \p reduce also assumes that the binary reduction operator (in this
229
+ * case \p binary_op) is commutative. If the reduction operator is not commutative
230
+ * then \p thrust::reduce should not be used. Instead, one could use
231
+ * \p inclusive_scan (which does not require commutativity) and select the
232
+ * last element of the output array.
233
+ *
234
+ * The algorithm's execution is parallelized as determined by \p exec.
235
+ *
236
+ * \param exec The execution policy to use for parallelization.
237
+ * \param first The beginning of the input sequence.
238
+ * \param last The end of the input sequence.
239
+ * \param init The initial value.
240
+ * \param binary_op The binary function used to 'sum' values.
241
+ * \return The result of the reduction.
242
+ *
243
+ * \tparam DerivedPolicy The name of the derived execution policy.
244
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
245
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c T.
246
+ * \tparam T is a model of <a href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, and is
247
+ * convertible to \p BinaryFunction's first and second argument type.
248
+ * \tparam BinaryFunction The function's return type must be convertible to \p OutputType.
249
+ *
250
+ * The following code snippet demonstrates how to use \p reduce to
251
+ * compute the maximum value of a sequence of integers using the \p thrust::host execution policy
252
+ * for parallelization:
253
+ *
254
+ * \code
255
+ * #include <thrust/reduce.h>
256
+ * #include <thrust/functional.h>
257
+ * #include <thrust/execution_policy.h>
258
+ * ...
259
+ * int data[6] = {1, 0, 2, 2, 1, 3};
260
+ * int result = thrust::reduce(thrust::host,
261
+ * data, data + 6,
262
+ * -1,
263
+ * ::cuda::maximum<int>());
264
+ * // result == 3
265
+ * \endcode
266
+ *
267
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
268
+ * \see transform_reduce
269
+ */
270
+ template <typename DerivedPolicy, typename InputIterator, typename T, typename BinaryFunction>
271
+ _CCCL_HOST_DEVICE T reduce(
272
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
273
+ InputIterator first,
274
+ InputIterator last,
275
+ T init,
276
+ BinaryFunction binary_op);
277
+
278
+ /*! \p reduce is a generalization of summation: it computes the sum (or some
279
+ * other binary operation) of all the elements in the range <tt>[first,
280
+ * last)</tt>. This version of \p reduce uses \p init as the initial value of the
281
+ * reduction and \p binary_op as the binary function used for summation. \p reduce
282
+ * is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
283
+ * The primary difference between the two functions is that <tt>std::accumulate</tt>
284
+ * guarantees the order of summation, while \p reduce requires associativity of
285
+ * \p binary_op to parallelize the reduction.
286
+ *
287
+ * Note that \p reduce also assumes that the binary reduction operator (in this
288
+ * case \p binary_op) is commutative. If the reduction operator is not commutative
289
+ * then \p thrust::reduce should not be used. Instead, one could use
290
+ * \p inclusive_scan (which does not require commutativity) and select the
291
+ * last element of the output array.
292
+ *
293
+ * \param first The beginning of the input sequence.
294
+ * \param last The end of the input sequence.
295
+ * \param init The initial value.
296
+ * \param binary_op The binary function used to 'sum' values.
297
+ * \return The result of the reduction.
298
+ *
299
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
300
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c T.
301
+ * \tparam T is a model of <a href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, and is
302
+ * convertible to \p BinaryFunction's first and second argument type.
303
+ * \tparam BinaryFunction The function's return type must be convertible to \p OutputType.
304
+ *
305
+ * The following code snippet demonstrates how to use \p reduce to
306
+ * compute the maximum value of a sequence of integers.
307
+ *
308
+ * \code
309
+ * #include <thrust/reduce.h>
310
+ * #include <thrust/functional.h>
311
+ * ...
312
+ * int data[6] = {1, 0, 2, 2, 1, 3};
313
+ * int result = thrust::reduce(data, data + 6,
314
+ * -1,
315
+ * ::cuda::maximum<int>());
316
+ * // result == 3
317
+ * \endcode
318
+ *
319
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
320
+ * \see transform_reduce
321
+ */
322
+ template <typename InputIterator, typename T, typename BinaryFunction>
323
+ T reduce(InputIterator first, InputIterator last, T init, BinaryFunction binary_op);
324
+
325
+ /*! \p reduce_into is a generalization of summation: it computes the sum (or some
326
+ * other binary operation) of all the elements in the range <tt>[first,
327
+ * last)</tt>. This version of \p reduce_into uses \c 0 as the initial value of the
328
+ * reduction. \p reduce_into is similar to the C++ Standard Template Library's
329
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
330
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
331
+ * \p reduce_into requires associativity of the binary operation to parallelize
332
+ * the reduction.
333
+ *
334
+ * Note that \p reduce_into also assumes that the binary reduction operator (in this
335
+ * case operator+) is commutative. If the reduction operator is not commutative
336
+ * then \p reduce_into should not be used. Instead, one could use \p inclusive_scan
337
+ * (which does not require commutativity) and select the last element of the
338
+ * output array.
339
+ *
340
+ * Unlike \p reduce, \p reduce_into does not return the reduction result.
341
+ * Instead, it is written to <tt>*output</tt>. Thus, when \p exec is
342
+ * \p thrust::cuda::par_nosync, this algorithm does not wait for the work it
343
+ * launches to complete. Additionally, you can use \p reduce_into to avoid
344
+ * copying the reduction result from device memory to host memory.
345
+ *
346
+ * The algorithm's execution is parallelized as determined by \p exec.
347
+ *
348
+ * \param exec The execution policy to use for parallelization.
349
+ * \param first The beginning of the sequence.
350
+ * \param last The end of the sequence.
351
+ * \param output The location the reduction will be written to.
352
+ *
353
+ * \tparam DerivedPolicy The name of the derived execution policy.
354
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
355
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
356
+ * is convertible to \p InputIterator's \c value_type. If \c T is \c InputIterator's \c value_type, then <tt>T(0)</tt>
357
+ * is defined.
358
+ * \tparam OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
359
+ * Iterator</a> and \c OutputIterator's \c value_type is assignable from \c InputIterator's \c value_type.
360
+ *
361
+ * The following code snippet demonstrates how to use \p reduce_into to compute
362
+ * the sum of a sequence of integers using the \p thrust::device execution policy for parallelization:
363
+ *
364
+ * \code
365
+ * #include <thrust/reduce.h>
366
+ * #include <thrust/device_vector.h>
367
+ * #include <thrust/execution_policy.h>
368
+ *
369
+ * thrust::device_vector<int> data{1, 0, 2, 2, 1, 3};
370
+ * thrust::device_vector<int> output(1);
371
+ * thrust::reduce_into(thrust::device, data.begin(), data.end(), output.begin());
372
+ * // output[0] == 9
373
+ * \endcode
374
+ *
375
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
376
+ * \see reduce
377
+ */
378
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
379
+ _CCCL_HOST_DEVICE void reduce_into(
380
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
381
+ InputIterator first,
382
+ InputIterator last,
383
+ OutputIterator output);
384
+
385
+ /*! \p reduce_into is a generalization of summation: it computes the sum (or some
386
+ * other binary operation) of all the elements in the range <tt>[first,
387
+ * last)</tt>. This version of \p reduce_into uses \c 0 as the initial value of the
388
+ * reduction. \p reduce_into is similar to the C++ Standard Template Library's
389
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
390
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
391
+ * \p reduce_into requires associativity of the binary operation to parallelize
392
+ * the reduction.
393
+ *
394
+ * Note that \p reduce_into also assumes that the binary reduction operator (in this
395
+ * case operator+) is commutative. If the reduction operator is not commutative
396
+ * then \p reduce_into should not be used. Instead, one could use \p inclusive_scan
397
+ * (which does not require commutativity) and select the last element of the
398
+ * output array.
399
+ *
400
+ * Unlike \p reduce, \p reduce_into does not return the reduction result.
401
+ * Instead, it is written to <tt>*output</tt>. Thus, when \p exec is
402
+ * \p thrust::cuda::par_nosync, this algorithm does not wait for the work it
403
+ * launches to complete. Additionally, you can use \p reduce_into to avoid
404
+ * copying the reduction result from device memory to host memory.
405
+ *
406
+ * \param first The beginning of the sequence.
407
+ * \param last The end of the sequence.
408
+ * \param output The location the reduction will be written to.
409
+ *
410
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
411
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
412
+ * is convertible to \p InputIterator's \c value_type. If \c T is \c InputIterator's \c value_type, then <tt>T(0)</tt>
413
+ * is defined.
414
+ * \tparam OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
415
+ * Iterator</a> and \c OutputIterator's \c value_type is assignable from \c InputIterator's \c value_type.
416
+ *
417
+ * The following code snippet demonstrates how to use \p reduce_into to compute
418
+ * the sum of a sequence of integers.
419
+ *
420
+ * \code
421
+ * #include <thrust/reduce.h>
422
+ * #include <thrust/device_vector.h>
423
+ * #include <thrust/execution_policy.h>
424
+ *
425
+ * thrust::device_vector<int> data{1, 0, 2, 2, 1, 3};
426
+ * thrust::device_vector<int> output(1);
427
+ * thrust::reduce_into(data.begin(), data.end(), output.begin());
428
+ * // output[0] == 9
429
+ * \endcode
430
+ *
431
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
432
+ * \see reduce
433
+ */
434
+ template <typename InputIterator, typename OutputIterator>
435
+ void reduce_into(InputIterator first, InputIterator last, OutputIterator output);
436
+
437
+ /*! \p reduce_into is a generalization of summation: it computes the sum (or some
438
+ * other binary operation) of all the elements in the range <tt>[first,
439
+ * last)</tt>. This version of \p reduce_into uses \p init as the initial value of the
440
+ * reduction. \p reduce_into is similar to the C++ Standard Template Library's
441
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
442
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
443
+ * \p reduce_into requires associativity of the binary operation to parallelize
444
+ * the reduction.
445
+ *
446
+ * Note that \p reduce_into also assumes that the binary reduction operator (in this
447
+ * case operator+) is commutative. If the reduction operator is not commutative
448
+ * then \p reduce_into should not be used. Instead, one could use \p inclusive_scan
449
+ * (which does not require commutativity) and select the last element of the
450
+ * output array.
451
+ *
452
+ * Unlike \p reduce, \p reduce_into does not return the reduction result.
453
+ * Instead, it is written to <tt>*output</tt>. Thus, when \p exec is
454
+ * \p thrust::cuda::par_nosync, this algorithm does not wait for the work it
455
+ * launches to complete. Additionally, you can use \p reduce_into to avoid
456
+ * copying the reduction result from device memory to host memory.
457
+ *
458
+ * The algorithm's execution is parallelized as determined by \p exec.
459
+ *
460
+ * \param exec The execution policy to use for parallelization.
461
+ * \param first The beginning of the input sequence.
462
+ * \param last The end of the input sequence.
463
+ * \param output The location the reduction will be written to.
464
+ * \param init The initial value.
465
+ *
466
+ * \tparam DerivedPolicy The name of the derived execution policy.
467
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
468
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
469
+ * is convertible to \p T.
470
+ * \tparam OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
471
+ * Iterator</a> and \c OutputIterator's \c value_type is assignable from \c T.
472
+ * \tparam T is convertible to \p InputIterator's \c value_type.
473
+ *
474
+ * The following code snippet demonstrates how to use \p reduce_into to compute
475
+ * the sum of a sequence of integers including an initialization value using the
476
+ * \p thrust::device execution policy for parallelization:
477
+ *
478
+ * \code
479
+ * #include <thrust/reduce.h>
480
+ * #include <thrust/device_vector.h>
481
+ * #include <thrust/execution_policy.h>
482
+ *
483
+ * thrust::device_vector<int> data{1, 0, 2, 2, 1, 3};
484
+ * thrust::device_vector<int> output(1);
485
+ * thrust::reduce_into(thrust::device, data.begin(), data.end(), output.begin(), 1);
486
+ * // output[0] == 10
487
+ * \endcode
488
+ *
489
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
490
+ * \see reduce
491
+ */
492
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
493
+ _CCCL_HOST_DEVICE void reduce_into(
494
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
495
+ InputIterator first,
496
+ InputIterator last,
497
+ OutputIterator output,
498
+ T init);
499
+
500
+ /*! \p reduce_into is a generalization of summation: it computes the sum (or some
501
+ * other binary operation) of all the elements in the range <tt>[first,
502
+ * last)</tt>. This version of \p reduce_into uses \p init as the initial value of the
503
+ * reduction. \p reduce_into is similar to the C++ Standard Template Library's
504
+ * <tt>std::accumulate</tt>. The primary difference between the two functions
505
+ * is that <tt>std::accumulate</tt> guarantees the order of summation, while
506
+ * \p reduce_into requires associativity of the binary operation to parallelize
507
+ * the reduction.
508
+ *
509
+ * Note that \p reduce_into also assumes that the binary reduction operator (in this
510
+ * case operator+) is commutative. If the reduction operator is not commutative
511
+ * then \p reduce_into should not be used. Instead, one could use \p inclusive_scan
512
+ * (which does not require commutativity) and select the last element of the
513
+ * output array.
514
+ *
515
+ * Unlike \p reduce, \p reduce_into does not return the reduction result.
516
+ * Instead, it is written to <tt>*output</tt>. Thus, when \p exec is
517
+ * \p thrust::cuda::par_nosync, this algorithm does not wait for the work it
518
+ * launches to complete. Additionally, you can use \p reduce_into to avoid
519
+ * copying the reduction result from device memory to host memory.
520
+ *
521
+ * \param first The beginning of the input sequence.
522
+ * \param last The end of the input sequence.
523
+ * \param output The location the reduction will be written to.
524
+ * \param init The initial value.
525
+ *
526
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
527
+ * Iterator</a> and if \c x and \c y are objects of \p InputIterator's \c value_type, then <tt>x + y</tt> is defined and
528
+ * is convertible to \p T.
529
+ * \tparam OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
530
+ * Iterator</a> and \c OutputIterator's \c value_type is assignable from \c T.
531
+ * \tparam T is convertible to \p InputIterator's \c value_type.
532
+ *
533
+ * The following code snippet demonstrates how to use \p reduce_into to compute
534
+ * the sum of a sequence of integers including an initialization value.
535
+ *
536
+ * \code
537
+ * #include <thrust/reduce.h>
538
+ * #include <thrust/device_vector.h>
539
+ * #include <thrust/execution_policy.h>
540
+ *
541
+ * thrust::device_vector<int> data{1, 0, 2, 2, 1, 3};
542
+ * thrust::device_vector<int> output(1);
543
+ * thrust::reduce_into(data.begin(), data.end(), output.begin(), 1);
544
+ * // output[0] == 10
545
+ * \endcode
546
+ *
547
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
548
+ * \see reduce
549
+ */
550
+ template <typename InputIterator, typename OutputIterator, typename T>
551
+ void reduce_into(InputIterator first, InputIterator last, OutputIterator output, T init);
552
+
553
+ /*! \p reduce_into is a generalization of summation: it computes the sum (or some
554
+ * other binary operation) of all the elements in the range <tt>[first,
555
+ * last)</tt>. This version of \p reduce_into uses \p init as the initial value of the
556
+ * reduction and \p binary_op as the binary function used for summation. \p reduce_into
557
+ * is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
558
+ * The primary difference between the two functions is that <tt>std::accumulate</tt>
559
+ * guarantees the order of summation, while \p reduce_into requires associativity of
560
+ * \p binary_op to parallelize the reduction.
561
+ *
562
+ * Note that \p reduce_into also assumes that the binary reduction operator (in this
563
+ * case \p binary_op) is commutative. If the reduction operator is not commutative
564
+ * then \p reduce_into should not be used. Instead, one could use \p inclusive_scan
565
+ * (which does not require commutativity) and select the last element of the
566
+ * output array.
567
+ *
568
+ * Unlike \p reduce, \p reduce_into does not return the reduction result.
569
+ * Instead, it is written to <tt>*output</tt>. Thus, when \p exec is
570
+ * \p thrust::cuda::par_nosync, this algorithm does not wait for the work it
571
+ * launches to complete. Additionally, you can use \p reduce_into to avoid
572
+ * copying the reduction result from device memory to host memory.
573
+ *
574
+ * The algorithm's execution is parallelized as determined by \p exec.
575
+ *
576
+ * \param exec The execution policy to use for parallelization.
577
+ * \param first The beginning of the input sequence.
578
+ * \param last The end of the input sequence.
579
+ * \param output The location the reduction will be written to.
580
+ * \param init The initial value.
581
+ * \param binary_op The binary function used to 'sum' values.
582
+ * \return The result of the reduction.
583
+ *
584
+ * \tparam DerivedPolicy The name of the derived execution policy.
585
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
586
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c T.
587
+ * \tparam OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
588
+ * Iterator</a> and \c OutputIterator's \c value_type is assignable from \c T.
589
+ * \tparam T is a model of <a href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, and is
590
+ * convertible to \p BinaryFunction's first and second argument type.
591
+ * \tparam BinaryFunction The function's return type must be convertible to \p OutputType.
592
+ *
593
+ * The following code snippet demonstrates how to use \p reduce_into to
594
+ * compute the maximum value of a sequence of integers using the \p thrust::device
595
+ * execution policy for parallelization:
596
+ *
597
+ * \code
598
+ * #include <cuda/functional>
599
+ * #include <thrust/reduce.h>
600
+ * #include <thrust/device_vector.h>
601
+ * #include <thrust/execution_policy.h>
602
+ *
603
+ * thrust::device_vector<int> data{1, 0, 2, 2, 1, 3};
604
+ * thrust::device_vector<int> output(1);
605
+ * thrust::reduce_into(thrust::device,
606
+ * data.begin(), data.end(), output.begin(), -1,
607
+ * cuda::maximum{});
608
+ * // output[0] == 3
609
+ * \endcode
610
+ *
611
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
612
+ * \see reduce
613
+ * \see transform_reduce
614
+ * \see transform_reduce_into
615
+ */
616
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T, typename BinaryFunction>
617
+ _CCCL_HOST_DEVICE void reduce_into(
618
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
619
+ InputIterator first,
620
+ InputIterator last,
621
+ OutputIterator output,
622
+ T init,
623
+ BinaryFunction binary_op);
624
+
625
+ /*! \p reduce_into is a generalization of summation: it computes the sum (or some
626
+ * other binary operation) of all the elements in the range <tt>[first,
627
+ * last)</tt>. This version of \p reduce_into uses \p init as the initial value of the
628
+ * reduction and \p binary_op as the binary function used for summation. \p reduce_into
629
+ * is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
630
+ * The primary difference between the two functions is that <tt>std::accumulate</tt>
631
+ * guarantees the order of summation, while \p reduce_into requires associativity of
632
+ * \p binary_op to parallelize the reduction.
633
+ *
634
+ * Note that \p reduce_into also assumes that the binary reduction operator (in this
635
+ * case \p binary_op) is commutative. If the reduction operator is not commutative
636
+ * then \p thrust::reduce_into should not be used. Instead, one could use
637
+ * \p inclusive_scan (which does not require commutativity) and select the
638
+ * last element of the output array.
639
+ *
640
+ * \param first The beginning of the input sequence.
641
+ * \param last The end of the input sequence.
642
+ * \param init The initial value.
643
+ * \param binary_op The binary function used to 'sum' values.
644
+ * \return The result of the reduction.
645
+ *
646
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
647
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c T.
648
+ * \tparam OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
649
+ * Iterator</a> and \c OutputIterator's \c value_type is assignable from \c T.
650
+ * \tparam T is a model of <a href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, and is
651
+ * convertible to \p BinaryFunction's first and second argument type.
652
+ * \tparam BinaryFunction The function's return type must be convertible to \p OutputType.
653
+ *
654
+ * The following code snippet demonstrates how to use \p reduce_into to
655
+ * compute the maximum value of a sequence of integers.
656
+ *
657
+ * \code
658
+ * #include <cuda/functional>
659
+ * #include <thrust/reduce.h>
660
+ * #include <thrust/device_vector.h>
661
+ * #include <thrust/execution_policy.h>
662
+ *
663
+ * thrust::device_vector<int> data{1, 0, 2, 2, 1, 3};
664
+ * thrust::device_vector<int> output(1);
665
+ * thrust::reduce_into(data.begin(), data.end(), output.begin(), -1,
666
+ * cuda::maximum{});
667
+ * // output[0] == 3
668
+ * \endcode
669
+ *
670
+ * \see https://en.cppreference.com/w/cpp/algorithm/accumulate
671
+ * \see reduce
672
+ * \see transform_reduce
673
+ * \see transform_reduce_into
674
+ */
675
+ template <typename InputIterator, typename OutputIterator, typename T, typename BinaryFunction>
676
+ void reduce_into(InputIterator first, InputIterator last, OutputIterator output, T init, BinaryFunction binary_op);
677
+
678
+ /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
679
+ * For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
680
+ * that are equal, \p reduce_by_key copies the first element of the group to the
681
+ * \c keys_output. The corresponding values in the range are reduced using the
682
+ * \c plus and the result copied to \c values_output.
683
+ *
684
+ * This version of \p reduce_by_key uses the function object \c equal_to
685
+ * to test for equality and \c plus to reduce values with equal keys.
686
+ *
687
+ * The algorithm's execution is parallelized as determined by \p exec.
688
+ *
689
+ * \param exec The execution policy to use for parallelization.
690
+ * \param keys_first The beginning of the input key range.
691
+ * \param keys_last The end of the input key range.
692
+ * \param values_first The beginning of the input value range.
693
+ * \param keys_output The beginning of the output key range.
694
+ * \param values_output The beginning of the output value range.
695
+ * \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output,
696
+ * values_output_last)</tt>.
697
+ *
698
+ * \tparam DerivedPolicy The name of the derived execution policy.
699
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
700
+ * Iterator</a>, \tparam InputIterator2 is a model of <a
701
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a>, \tparam OutputIterator1 is a
702
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
703
+ * InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. \tparam OutputIterator2 is a
704
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
705
+ * InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
706
+ *
707
+ * \pre The input ranges shall not overlap either output range.
708
+ *
709
+ * The following code snippet demonstrates how to use \p reduce_by_key to
710
+ * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
711
+ * execution policy for parallelization:
712
+ *
713
+ * \code
714
+ * #include <thrust/reduce.h>
715
+ * #include <thrust/execution_policy.h>
716
+ * ...
717
+ * const int N = 7;
718
+ * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
719
+ * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
720
+ * int C[N]; // output keys
721
+ * int D[N]; // output values
722
+ *
723
+ * thrust::pair<int*,int*> new_end;
724
+ * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D);
725
+ *
726
+ * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
727
+ * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
728
+ * \endcode
729
+ *
730
+ * \see reduce
731
+ * \see unique_copy
732
+ * \see unique_by_key
733
+ * \see unique_by_key_copy
734
+ */
735
+ template <typename DerivedPolicy,
736
+ typename InputIterator1,
737
+ typename InputIterator2,
738
+ typename OutputIterator1,
739
+ typename OutputIterator2>
740
+ _CCCL_HOST_DEVICE thrust::pair<OutputIterator1, OutputIterator2> reduce_by_key(
741
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
742
+ InputIterator1 keys_first,
743
+ InputIterator1 keys_last,
744
+ InputIterator2 values_first,
745
+ OutputIterator1 keys_output,
746
+ OutputIterator2 values_output);
747
+
748
+ /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
749
+ * For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
750
+ * that are equal, \p reduce_by_key copies the first element of the group to the
751
+ * \c keys_output. The corresponding values in the range are reduced using the
752
+ * \c plus and the result copied to \c values_output.
753
+ *
754
+ * This version of \p reduce_by_key uses the function object \c equal_to
755
+ * to test for equality and \c plus to reduce values with equal keys.
756
+ *
757
+ * \param keys_first The beginning of the input key range.
758
+ * \param keys_last The end of the input key range.
759
+ * \param values_first The beginning of the input value range.
760
+ * \param keys_output The beginning of the output key range.
761
+ * \param values_output The beginning of the output value range.
762
+ * \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output,
763
+ * values_output_last)</tt>.
764
+ *
765
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
766
+ * Iterator</a>, \tparam InputIterator2 is a model of <a
767
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a>, \tparam OutputIterator1 is a
768
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
769
+ * InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. \tparam OutputIterator2 is a
770
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
771
+ * InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
772
+ *
773
+ * \pre The input ranges shall not overlap either output range.
774
+ *
775
+ * The following code snippet demonstrates how to use \p reduce_by_key to
776
+ * compact a sequence of key/value pairs and sum values with equal keys.
777
+ *
778
+ * \code
779
+ * #include <thrust/reduce.h>
780
+ * ...
781
+ * const int N = 7;
782
+ * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
783
+ * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
784
+ * int C[N]; // output keys
785
+ * int D[N]; // output values
786
+ *
787
+ * thrust::pair<int*,int*> new_end;
788
+ * new_end = thrust::reduce_by_key(A, A + N, B, C, D);
789
+ *
790
+ * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
791
+ * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
792
+ * \endcode
793
+ *
794
+ * \see reduce
795
+ * \see unique_copy
796
+ * \see unique_by_key
797
+ * \see unique_by_key_copy
798
+ */
799
+ template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2>
800
+ thrust::pair<OutputIterator1, OutputIterator2> reduce_by_key(
801
+ InputIterator1 keys_first,
802
+ InputIterator1 keys_last,
803
+ InputIterator2 values_first,
804
+ OutputIterator1 keys_output,
805
+ OutputIterator2 values_output);
806
+
807
+ /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
808
+ * For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
809
+ * that are equal, \p reduce_by_key copies the first element of the group to the
810
+ * \c keys_output. The corresponding values in the range are reduced using the
811
+ * \c plus and the result copied to \c values_output.
812
+ *
813
+ * This version of \p reduce_by_key uses the function object \c binary_pred
814
+ * to test for equality and \c plus to reduce values with equal keys.
815
+ *
816
+ * The algorithm's execution is parallelized as determined by \p exec.
817
+ *
818
+ * \param exec The execution policy to use for parallelization.
819
+ * \param keys_first The beginning of the input key range.
820
+ * \param keys_last The end of the input key range.
821
+ * \param values_first The beginning of the input value range.
822
+ * \param keys_output The beginning of the output key range.
823
+ * \param values_output The beginning of the output value range.
824
+ * \param binary_pred The binary predicate used to determine equality.
825
+ * \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output,
826
+ * values_output_last)</tt>.
827
+ *
828
+ * \tparam DerivedPolicy The name of the derived execution policy.
829
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
830
+ * Iterator</a>, \tparam InputIterator2 is a model of <a
831
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a>, \tparam OutputIterator1 is a
832
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
833
+ * InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. \tparam OutputIterator2 is a
834
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
835
+ * InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. \tparam BinaryPredicate is a
836
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
837
+ *
838
+ * \pre The input ranges shall not overlap either output range.
839
+ *
840
+ * The following code snippet demonstrates how to use \p reduce_by_key to
841
+ * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
842
+ * execution policy for parallelization:
843
+ *
844
+ * \code
845
+ * #include <thrust/reduce.h>
846
+ * #include <thrust/execution_policy.h>
847
+ * ...
848
+ * const int N = 7;
849
+ * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
850
+ * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
851
+ * int C[N]; // output keys
852
+ * int D[N]; // output values
853
+ *
854
+ * thrust::pair<int*,int*> new_end;
855
+ * ::cuda::std::equal_to<int> binary_pred;
856
+ * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred);
857
+ *
858
+ * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
859
+ * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
860
+ * \endcode
861
+ *
862
+ * \see reduce
863
+ * \see unique_copy
864
+ * \see unique_by_key
865
+ * \see unique_by_key_copy
866
+ */
867
+ template <typename DerivedPolicy,
868
+ typename InputIterator1,
869
+ typename InputIterator2,
870
+ typename OutputIterator1,
871
+ typename OutputIterator2,
872
+ typename BinaryPredicate>
873
+ _CCCL_HOST_DEVICE thrust::pair<OutputIterator1, OutputIterator2> reduce_by_key(
874
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
875
+ InputIterator1 keys_first,
876
+ InputIterator1 keys_last,
877
+ InputIterator2 values_first,
878
+ OutputIterator1 keys_output,
879
+ OutputIterator2 values_output,
880
+ BinaryPredicate binary_pred);
881
+
882
+ /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
883
+ * For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
884
+ * that are equal, \p reduce_by_key copies the first element of the group to the
885
+ * \c keys_output. The corresponding values in the range are reduced using the
886
+ * \c plus and the result copied to \c values_output.
887
+ *
888
+ * This version of \p reduce_by_key uses the function object \c binary_pred
889
+ * to test for equality and \c plus to reduce values with equal keys.
890
+ *
891
+ * \param keys_first The beginning of the input key range.
892
+ * \param keys_last The end of the input key range.
893
+ * \param values_first The beginning of the input value range.
894
+ * \param keys_output The beginning of the output key range.
895
+ * \param values_output The beginning of the output value range.
896
+ * \param binary_pred The binary predicate used to determine equality.
897
+ * \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output,
898
+ * values_output_last)</tt>.
899
+ *
900
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
901
+ * Iterator</a>, \tparam InputIterator2 is a model of <a
902
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a>, \tparam OutputIterator1 is a
903
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
904
+ * InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. \tparam OutputIterator2 is a
905
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
906
+ * InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. \tparam BinaryPredicate is a
907
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
908
+ *
909
+ * \pre The input ranges shall not overlap either output range.
910
+ *
911
+ * The following code snippet demonstrates how to use \p reduce_by_key to
912
+ * compact a sequence of key/value pairs and sum values with equal keys.
913
+ *
914
+ * \code
915
+ * #include <thrust/reduce.h>
916
+ * ...
917
+ * const int N = 7;
918
+ * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
919
+ * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
920
+ * int C[N]; // output keys
921
+ * int D[N]; // output values
922
+ *
923
+ * thrust::pair<int*,int*> new_end;
924
+ * ::cuda::std::equal_to<int> binary_pred;
925
+ * new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred);
926
+ *
927
+ * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
928
+ * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
929
+ * \endcode
930
+ *
931
+ * \see reduce
932
+ * \see unique_copy
933
+ * \see unique_by_key
934
+ * \see unique_by_key_copy
935
+ */
936
+ template <typename InputIterator1,
937
+ typename InputIterator2,
938
+ typename OutputIterator1,
939
+ typename OutputIterator2,
940
+ typename BinaryPredicate>
941
+ thrust::pair<OutputIterator1, OutputIterator2> reduce_by_key(
942
+ InputIterator1 keys_first,
943
+ InputIterator1 keys_last,
944
+ InputIterator2 values_first,
945
+ OutputIterator1 keys_output,
946
+ OutputIterator2 values_output,
947
+ BinaryPredicate binary_pred);
948
+
949
+ /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
950
+ * For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
951
+ * that are equal, \p reduce_by_key copies the first element of the group to the
952
+ * \c keys_output. The corresponding values in the range are reduced using the
953
+ * \c BinaryFunction \c binary_op and the result copied to \c values_output.
954
+ * Specifically, if consecutive key iterators \c i and \c (i + 1) are
955
+ * such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
956
+ * values are reduced to a single value with \c binary_op.
957
+ *
958
+ * This version of \p reduce_by_key uses the function object \c binary_pred
959
+ * to test for equality and \c binary_op to reduce values with equal keys.
960
+ *
961
+ * The algorithm's execution is parallelized as determined by \p exec.
962
+ *
963
+ * \param exec The execution policy to use for parallelization.
964
+ * \param keys_first The beginning of the input key range.
965
+ * \param keys_last The end of the input key range.
966
+ * \param values_first The beginning of the input value range.
967
+ * \param keys_output The beginning of the output key range.
968
+ * \param values_output The beginning of the output value range.
969
+ * \param binary_pred The binary predicate used to determine equality.
970
+ * \param binary_op The binary function used to accumulate values.
971
+ * \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output,
972
+ * values_output_last)</tt>.
973
+ *
974
+ * \tparam DerivedPolicy The name of the derived execution policy.
975
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
976
+ * Iterator</a>, \tparam InputIterator2 is a model of <a
977
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a>, \tparam OutputIterator1 is a
978
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
979
+ * InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. \tparam OutputIterator2 is a
980
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
981
+ * InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. \tparam BinaryPredicate is a
982
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
983
+ * \tparam BinaryFunction The function's return type must be convertible to \c OutputIterator2's \c value_type.
984
+ *
985
+ * \pre The input ranges shall not overlap either output range.
986
+ *
987
+ * The following code snippet demonstrates how to use \p reduce_by_key to
988
+ * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
989
+ * execution policy for parallelization:
990
+ *
991
+ * \code
992
+ * #include <thrust/reduce.h>
993
+ * #include <thrust/execution_policy.h>
994
+ * ...
995
+ * const int N = 7;
996
+ * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
997
+ * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
998
+ * int C[N]; // output keys
999
+ * int D[N]; // output values
1000
+ *
1001
+ * thrust::pair<int*,int*> new_end;
1002
+ * ::cuda::std::equal_to<int> binary_pred;
1003
+ * ::cuda::std::plus<int> binary_op;
1004
+ * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred, binary_op);
1005
+ *
1006
+ * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
1007
+ * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
1008
+ * \endcode
1009
+ *
1010
+ * \see reduce
1011
+ * \see unique_copy
1012
+ * \see unique_by_key
1013
+ * \see unique_by_key_copy
1014
+ */
1015
+ template <typename DerivedPolicy,
1016
+ typename InputIterator1,
1017
+ typename InputIterator2,
1018
+ typename OutputIterator1,
1019
+ typename OutputIterator2,
1020
+ typename BinaryPredicate,
1021
+ typename BinaryFunction>
1022
+ _CCCL_HOST_DEVICE thrust::pair<OutputIterator1, OutputIterator2> reduce_by_key(
1023
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
1024
+ InputIterator1 keys_first,
1025
+ InputIterator1 keys_last,
1026
+ InputIterator2 values_first,
1027
+ OutputIterator1 keys_output,
1028
+ OutputIterator2 values_output,
1029
+ BinaryPredicate binary_pred,
1030
+ BinaryFunction binary_op);
1031
+
1032
+ /*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
1033
+ * For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
1034
+ * that are equal, \p reduce_by_key copies the first element of the group to the
1035
+ * \c keys_output. The corresponding values in the range are reduced using the
1036
+ * \c BinaryFunction \c binary_op and the result copied to \c values_output.
1037
+ * Specifically, if consecutive key iterators \c i and \c (i + 1) are
1038
+ * such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
1039
+ * values are reduced to a single value with \c binary_op.
1040
+ *
1041
+ * This version of \p reduce_by_key uses the function object \c binary_pred
1042
+ * to test for equality and \c binary_op to reduce values with equal keys.
1043
+ *
1044
+ * \param keys_first The beginning of the input key range.
1045
+ * \param keys_last The end of the input key range.
1046
+ * \param values_first The beginning of the input value range.
1047
+ * \param keys_output The beginning of the output key range.
1048
+ * \param values_output The beginning of the output value range.
1049
+ * \param binary_pred The binary predicate used to determine equality.
1050
+ * \param binary_op The binary function used to accumulate values.
1051
+ * \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output,
1052
+ * values_output_last)</tt>.
1053
+ *
1054
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
1055
+ * Iterator</a>, \tparam InputIterator2 is a model of <a
1056
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a>, \tparam OutputIterator1 is a
1057
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
1058
+ * InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. \tparam OutputIterator2 is a
1059
+ * model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a> and and \p
1060
+ * InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. \tparam BinaryPredicate is a
1061
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
1062
+ * \tparam BinaryFunction The function's return type must be convertible to \c OutputIterator2's \c value_type.
1063
+ *
1064
+ * \pre The input ranges shall not overlap either output range.
1065
+ *
1066
+ * The following code snippet demonstrates how to use \p reduce_by_key to
1067
+ * compact a sequence of key/value pairs and sum values with equal keys.
1068
+ *
1069
+ * \code
1070
+ * #include <thrust/reduce.h>
1071
+ * ...
1072
+ * const int N = 7;
1073
+ * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
1074
+ * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
1075
+ * int C[N]; // output keys
1076
+ * int D[N]; // output values
1077
+ *
1078
+ * thrust::pair<int*,int*> new_end;
1079
+ * ::cuda::std::equal_to<int> binary_pred;
1080
+ * ::cuda::std::plus<int> binary_op;
1081
+ * new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred, binary_op);
1082
+ *
1083
+ * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
1084
+ * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
1085
+ * \endcode
1086
+ *
1087
+ * \see reduce
1088
+ * \see unique_copy
1089
+ * \see unique_by_key
1090
+ * \see unique_by_key_copy
1091
+ */
1092
+ template <typename InputIterator1,
1093
+ typename InputIterator2,
1094
+ typename OutputIterator1,
1095
+ typename OutputIterator2,
1096
+ typename BinaryPredicate,
1097
+ typename BinaryFunction>
1098
+ thrust::pair<OutputIterator1, OutputIterator2> reduce_by_key(
1099
+ InputIterator1 keys_first,
1100
+ InputIterator1 keys_last,
1101
+ InputIterator2 values_first,
1102
+ OutputIterator1 keys_output,
1103
+ OutputIterator2 values_output,
1104
+ BinaryPredicate binary_pred,
1105
+ BinaryFunction binary_op);
1106
+
1107
+ /*! \} // end reductions
1108
+ */
1109
+
1110
+ THRUST_NAMESPACE_END
1111
+
1112
+ #include <thrust/detail/reduce.inl>