cuda-cccl 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1968) hide show
  1. cuda/cccl/__init__.py +27 -0
  2. cuda/cccl/_cuda_version_utils.py +24 -0
  3. cuda/cccl/cooperative/__init__.py +9 -0
  4. cuda/cccl/cooperative/experimental/__init__.py +24 -0
  5. cuda/cccl/headers/__init__.py +7 -0
  6. cuda/cccl/headers/include/__init__.py +1 -0
  7. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +259 -0
  8. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1182 -0
  9. cuda/cccl/headers/include/cub/agent/agent_for.cuh +81 -0
  10. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +709 -0
  11. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +234 -0
  12. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +748 -0
  13. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +786 -0
  14. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
  15. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +703 -0
  16. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +555 -0
  17. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +619 -0
  18. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +806 -0
  19. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1124 -0
  20. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +589 -0
  21. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +474 -0
  22. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +289 -0
  23. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1117 -0
  24. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +346 -0
  25. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +606 -0
  26. cuda/cccl/headers/include/cub/agent/agent_topk.cuh +764 -0
  27. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +631 -0
  28. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1424 -0
  29. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +963 -0
  30. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1227 -0
  31. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1313 -0
  32. cuda/cccl/headers/include/cub/block/block_histogram.cuh +424 -0
  33. cuda/cccl/headers/include/cub/block/block_load.cuh +1264 -0
  34. cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
  35. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +800 -0
  36. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1225 -0
  37. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2196 -0
  38. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  39. cuda/cccl/headers/include/cub/block/block_reduce.cuh +667 -0
  40. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +434 -0
  41. cuda/cccl/headers/include/cub/block/block_scan.cuh +2315 -0
  42. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  43. cuda/cccl/headers/include/cub/block/block_store.cuh +1247 -0
  44. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +624 -0
  45. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  46. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  47. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  48. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  49. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
  50. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  51. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  52. cuda/cccl/headers/include/cub/config.cuh +53 -0
  53. cuda/cccl/headers/include/cub/cub.cuh +120 -0
  54. cuda/cccl/headers/include/cub/detail/array_utils.cuh +78 -0
  55. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +161 -0
  56. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +74 -0
  57. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  58. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +62 -0
  59. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +253 -0
  60. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +88 -0
  61. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
  62. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
  63. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +114 -0
  64. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  65. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  66. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +62 -0
  67. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  68. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +53 -0
  69. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
  70. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  71. cuda/cccl/headers/include/cub/detail/rfa.cuh +731 -0
  72. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  73. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  74. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
  75. cuda/cccl/headers/include/cub/detail/type_traits.cuh +187 -0
  76. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +73 -0
  77. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  78. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  79. cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
  80. cuda/cccl/headers/include/cub/device/device_for.cuh +1063 -0
  81. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1509 -0
  82. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  83. cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
  84. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  85. cuda/cccl/headers/include/cub/device/device_partition.cuh +668 -0
  86. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3437 -0
  87. cuda/cccl/headers/include/cub/device/device_reduce.cuh +2518 -0
  88. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
  89. cuda/cccl/headers/include/cub/device/device_scan.cuh +2212 -0
  90. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  91. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1430 -0
  92. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  93. cuda/cccl/headers/include/cub/device/device_select.cuh +1228 -0
  94. cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
  95. cuda/cccl/headers/include/cub/device/device_transform.cuh +668 -0
  96. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +315 -0
  97. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +719 -0
  98. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
  99. cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
  100. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +198 -0
  101. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1046 -0
  102. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +303 -0
  103. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
  104. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1744 -0
  105. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1310 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +655 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +531 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +313 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +615 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +517 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +602 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +975 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +842 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +440 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +389 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +627 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +569 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +545 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +261 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +803 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +583 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +189 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +321 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +522 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +201 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +1028 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +67 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +118 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +60 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +275 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +76 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +126 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1065 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +942 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +673 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +618 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1010 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +398 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1588 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +440 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +85 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +481 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +884 -0
  149. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +227 -0
  150. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  151. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  152. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +254 -0
  153. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +259 -0
  154. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +250 -0
  155. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +320 -0
  156. cuda/cccl/headers/include/cub/thread/thread_load.cuh +349 -0
  157. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +688 -0
  158. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +548 -0
  159. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
  160. cuda/cccl/headers/include/cub/thread/thread_search.cuh +199 -0
  161. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +458 -0
  162. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +102 -0
  163. cuda/cccl/headers/include/cub/thread/thread_store.cuh +365 -0
  164. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  165. cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
  166. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  167. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  168. cuda/cccl/headers/include/cub/util_device.cuh +800 -0
  169. cuda/cccl/headers/include/cub/util_macro.cuh +97 -0
  170. cuda/cccl/headers/include/cub/util_math.cuh +118 -0
  171. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  172. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  173. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  174. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  175. cuda/cccl/headers/include/cub/util_type.cuh +1120 -0
  176. cuda/cccl/headers/include/cub/util_vsmem.cuh +253 -0
  177. cuda/cccl/headers/include/cub/version.cuh +89 -0
  178. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  179. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  180. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +737 -0
  181. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +408 -0
  182. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +952 -0
  183. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +715 -0
  184. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  185. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  186. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  187. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +829 -0
  188. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1890 -0
  189. cuda/cccl/headers/include/cub/warp/warp_store.cuh +521 -0
  190. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
  191. cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
  192. cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
  193. cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
  194. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  195. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  196. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
  197. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  198. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
  199. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
  200. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
  201. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  202. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  203. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  204. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
  205. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +487 -0
  206. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
  207. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  208. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  209. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  210. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  211. cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
  212. cuda/cccl/headers/include/cuda/__cccl_config +37 -0
  213. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
  214. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
  215. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  216. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  217. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  218. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  219. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  220. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  221. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  222. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  223. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  224. cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
  225. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
  226. cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
  227. cuda/cccl/headers/include/cuda/__complex_ +28 -0
  228. cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
  229. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  230. cuda/cccl/headers/include/cuda/__device/arch_traits.h +537 -0
  231. cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
  232. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  233. cuda/cccl/headers/include/cuda/__device/device_ref.h +156 -0
  234. cuda/cccl/headers/include/cuda/__device/physical_device.h +172 -0
  235. cuda/cccl/headers/include/cuda/__driver/driver_api.h +835 -0
  236. cuda/cccl/headers/include/cuda/__event/event.h +171 -0
  237. cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
  238. cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
  239. cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
  240. cuda/cccl/headers/include/cuda/__execution/output_ordering.h +89 -0
  241. cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
  242. cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
  243. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  244. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
  245. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  246. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  247. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
  248. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  249. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  250. cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
  251. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  252. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  253. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  254. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
  255. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
  256. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +483 -0
  257. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
  258. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
  259. cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
  260. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
  261. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
  262. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
  263. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
  264. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
  265. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  266. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +112 -0
  267. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +557 -0
  268. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  269. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  270. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +533 -0
  271. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +238 -0
  272. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
  273. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +117 -0
  274. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  275. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  276. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  277. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  278. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
  279. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
  280. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
  281. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  282. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  283. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
  284. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  285. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  286. cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
  287. cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
  288. cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
  289. cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
  290. cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
  291. cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
  292. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
  293. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
  294. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  295. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  296. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
  297. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
  298. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +113 -0
  299. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
  300. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +652 -0
  301. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
  302. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  303. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
  304. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  305. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
  306. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2983 -0
  307. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  308. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  309. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  310. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  311. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  312. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  313. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  314. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  315. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  383. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  384. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  385. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  386. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  387. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  388. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  389. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  390. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  391. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  392. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  393. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  394. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  395. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  396. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  397. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  398. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  399. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  400. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  401. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  402. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  403. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  404. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  405. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  406. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  407. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  408. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  409. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  410. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  411. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  412. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  413. cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
  414. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  415. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
  416. cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
  417. cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
  418. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
  419. cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
  420. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  421. cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
  422. cuda/cccl/headers/include/cuda/__stream/stream.h +141 -0
  423. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +303 -0
  424. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  425. cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
  426. cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
  427. cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
  428. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
  429. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
  430. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
  431. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
  432. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
  433. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +590 -0
  434. cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
  435. cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
  436. cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
  437. cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
  438. cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
  439. cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
  440. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
  441. cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
  442. cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
  443. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
  444. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
  445. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
  446. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
  447. cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
  448. cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
  449. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  450. cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
  451. cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
  452. cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
  453. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  454. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  455. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
  456. cuda/cccl/headers/include/cuda/access_property +26 -0
  457. cuda/cccl/headers/include/cuda/algorithm +27 -0
  458. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  459. cuda/cccl/headers/include/cuda/atomic +27 -0
  460. cuda/cccl/headers/include/cuda/barrier +267 -0
  461. cuda/cccl/headers/include/cuda/bit +29 -0
  462. cuda/cccl/headers/include/cuda/cmath +37 -0
  463. cuda/cccl/headers/include/cuda/devices +33 -0
  464. cuda/cccl/headers/include/cuda/discard_memory +32 -0
  465. cuda/cccl/headers/include/cuda/functional +32 -0
  466. cuda/cccl/headers/include/cuda/iterator +39 -0
  467. cuda/cccl/headers/include/cuda/latch +27 -0
  468. cuda/cccl/headers/include/cuda/mdspan +28 -0
  469. cuda/cccl/headers/include/cuda/memory +35 -0
  470. cuda/cccl/headers/include/cuda/memory_resource +35 -0
  471. cuda/cccl/headers/include/cuda/numeric +29 -0
  472. cuda/cccl/headers/include/cuda/pipeline +579 -0
  473. cuda/cccl/headers/include/cuda/ptx +129 -0
  474. cuda/cccl/headers/include/cuda/semaphore +31 -0
  475. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  476. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  477. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  478. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
  479. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  480. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
  481. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  495. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  496. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  497. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  498. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  499. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  500. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  501. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  502. cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
  503. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
  504. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  505. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  506. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  507. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  508. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  509. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  510. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  511. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  512. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  513. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  514. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  515. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
  516. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  517. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
  518. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  519. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  520. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  521. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  522. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
  523. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  524. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
  525. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  526. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  527. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  528. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  529. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  530. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  531. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  532. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  533. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
  534. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  535. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
  536. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
  537. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
  538. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  539. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
  540. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  541. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  542. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  543. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  544. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  545. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  546. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  547. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  548. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  549. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  550. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  551. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  552. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  553. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  554. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  555. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  556. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  557. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  558. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  559. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  560. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  561. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  562. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  563. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  564. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  565. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  566. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
  567. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  568. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  569. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  570. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  571. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  572. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
  573. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  574. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  575. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
  576. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  577. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  578. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  579. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  580. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  581. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  582. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  583. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  584. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  585. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
  586. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
  587. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  588. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
  589. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  590. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  591. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  592. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  593. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  594. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  595. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  596. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
  597. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
  598. cuda/cccl/headers/include/cuda/std/__bit/countl.h +174 -0
  599. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  600. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  601. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  602. cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
  603. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  604. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
  605. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  606. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  607. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  608. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
  609. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
  610. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +673 -0
  611. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
  612. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
  613. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
  614. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
  615. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
  616. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
  617. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
  618. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +91 -0
  619. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
  620. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
  621. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  622. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  623. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  624. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  625. cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
  626. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1286 -0
  627. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
  628. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
  629. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  630. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  631. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  632. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  633. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  634. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
  635. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  636. cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
  637. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  638. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  639. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  640. cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
  641. cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +54 -0
  642. cuda/cccl/headers/include/cuda/std/__chrono/day.h +162 -0
  643. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +503 -0
  644. cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +55 -0
  645. cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +46 -0
  646. cuda/cccl/headers/include/cuda/std/__chrono/month.h +187 -0
  647. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +60 -0
  648. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +80 -0
  649. cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +259 -0
  650. cuda/cccl/headers/include/cuda/std/__chrono/year.h +186 -0
  651. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
  652. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
  653. cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +200 -0
  654. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +784 -0
  655. cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +118 -0
  656. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
  657. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +231 -0
  658. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
  659. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
  660. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
  661. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
  662. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
  663. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
  664. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
  665. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +186 -0
  666. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  667. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
  668. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +534 -0
  669. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
  670. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  671. cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
  672. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
  673. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
  674. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
  675. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
  676. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
  677. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
  678. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  679. cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
  680. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
  681. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
  682. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
  683. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  684. cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
  685. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
  686. cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
  687. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
  688. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
  689. cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
  690. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  691. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  692. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
  693. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  694. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  695. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  696. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
  697. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  698. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  699. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +341 -0
  700. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
  701. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
  702. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  703. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
  704. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  705. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  706. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  707. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  708. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  709. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  710. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  711. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  712. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
  713. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  714. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  715. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  716. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  717. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  718. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  719. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  720. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  721. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
  722. cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
  723. cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
  724. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  725. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +139 -0
  726. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  727. cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
  728. cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
  729. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  730. cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
  731. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
  732. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  733. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +165 -0
  734. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  735. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
  736. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  737. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  738. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
  739. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
  740. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
  741. cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
  742. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  743. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
  744. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
  745. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  746. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  747. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  748. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  749. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  750. cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
  751. cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
  752. cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
  753. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
  754. cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
  755. cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
  756. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  757. cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
  758. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
  759. cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
  760. cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
  761. cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
  762. cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
  763. cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
  764. cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
  765. cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
  766. cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
  767. cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
  768. cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
  769. cuda/cccl/headers/include/cuda/std/__format_ +45 -0
  770. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  771. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  772. cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
  773. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
  774. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  775. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  776. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  777. cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
  778. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  779. cuda/cccl/headers/include/cuda/std/__functional/function.h +1275 -0
  780. cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
  781. cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
  782. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +296 -0
  783. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
  784. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
  785. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
  786. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  787. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  788. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
  789. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
  790. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
  791. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  792. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  793. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  794. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  795. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  796. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
  797. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
  798. cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
  799. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
  800. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
  801. cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
  802. cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
  803. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  804. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  805. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  806. cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
  807. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
  808. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  809. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  810. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
  811. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  812. cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
  813. cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
  814. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
  815. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  816. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  817. cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
  818. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  819. cuda/cccl/headers/include/cuda/std/__internal/features.h +72 -0
  820. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +143 -0
  821. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  822. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
  823. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
  824. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
  825. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  826. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
  827. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  828. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  829. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  830. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  831. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  832. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
  833. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
  834. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  835. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
  836. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
  837. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  838. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  839. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  840. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  841. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
  842. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  843. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
  844. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  845. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  846. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
  847. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  848. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  849. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
  850. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  851. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
  852. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  853. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
  854. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  855. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  856. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  857. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
  858. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  859. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  860. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
  861. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  862. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
  863. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
  864. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
  865. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
  866. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
  867. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
  868. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  869. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
  870. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +759 -0
  871. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
  872. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
  873. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
  874. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
  875. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  876. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
  877. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
  878. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
  879. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  880. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  881. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  882. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  883. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  884. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  885. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +525 -0
  886. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  887. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  888. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
  889. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +246 -0
  890. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  891. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +46 -0
  892. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
  893. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  894. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  895. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
  896. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
  897. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
  898. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  899. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  900. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  901. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  902. cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
  903. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  904. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  905. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  906. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  907. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
  908. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  909. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  910. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  911. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
  912. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  913. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  914. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  915. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  916. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  917. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
  918. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  919. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  920. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  921. cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
  922. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +433 -0
  923. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  924. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  925. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  926. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
  927. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
  928. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
  929. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
  930. cuda/cccl/headers/include/cuda/std/__random_ +29 -0
  931. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  932. cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
  933. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  934. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
  935. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  936. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  937. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  938. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  939. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  940. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  941. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
  942. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  943. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
  944. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  945. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +162 -0
  946. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
  947. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  948. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  949. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  950. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
  951. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  952. cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
  953. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  954. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
  955. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  956. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  957. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  958. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
  959. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  960. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
  961. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  962. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  963. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
  964. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  965. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  966. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  967. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  968. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  969. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
  970. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  971. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  972. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
  973. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  974. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  975. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
  976. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +260 -0
  977. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
  978. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  979. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  980. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +84 -0
  981. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
  982. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  983. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  984. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
  985. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  986. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  987. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  988. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  989. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  990. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  991. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  992. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  993. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  994. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  995. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
  996. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  997. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
  998. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  999. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  1000. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  1001. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  1002. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  1003. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  1004. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  1005. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  1006. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  1007. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  1008. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
  1009. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
  1010. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  1011. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
  1012. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
  1013. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  1014. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  1015. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  1016. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  1017. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
  1018. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  1019. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  1020. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  1021. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
  1022. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
  1023. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  1024. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  1025. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  1026. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
  1027. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  1028. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  1029. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  1030. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  1031. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  1032. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  1033. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
  1034. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
  1035. cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
  1036. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  1037. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
  1038. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
  1039. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  1040. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  1041. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  1042. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  1043. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  1044. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  1045. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  1046. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
  1047. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  1048. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  1049. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  1050. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  1051. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  1052. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  1053. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  1054. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  1055. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  1056. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  1057. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  1058. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  1059. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  1060. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  1061. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  1062. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  1063. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  1064. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
  1065. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  1066. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
  1067. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  1068. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
  1069. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
  1070. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  1071. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  1072. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  1073. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
  1074. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  1075. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  1076. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  1077. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  1078. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
  1079. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +202 -0
  1080. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
  1081. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
  1082. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
  1083. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
  1084. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
  1085. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
  1086. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
  1087. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
  1088. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
  1089. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
  1090. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  1091. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
  1092. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  1093. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  1094. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  1095. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  1096. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  1097. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  1098. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  1099. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  1100. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  1101. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  1102. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  1103. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  1104. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  1105. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
  1106. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  1107. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  1108. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1109. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1110. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1111. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1112. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1113. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1114. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1115. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1116. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1117. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1118. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1119. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1120. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1121. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1122. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
  1123. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
  1124. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1125. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  1126. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
  1127. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  1128. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1129. cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
  1130. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
  1131. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1132. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  1133. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
  1134. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
  1135. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  1136. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1137. cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
  1138. cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
  1139. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1140. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
  1141. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1142. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  1143. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1144. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1145. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1146. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1147. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1148. cuda/cccl/headers/include/cuda/std/array +518 -0
  1149. cuda/cccl/headers/include/cuda/std/atomic +810 -0
  1150. cuda/cccl/headers/include/cuda/std/barrier +42 -0
  1151. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1152. cuda/cccl/headers/include/cuda/std/bitset +994 -0
  1153. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1154. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1155. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1156. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1157. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1158. cuda/cccl/headers/include/cuda/std/cmath +87 -0
  1159. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1160. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1161. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1162. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1163. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1164. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1165. cuda/cccl/headers/include/cuda/std/ctime +154 -0
  1166. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1167. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +207 -0
  1168. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1721 -0
  1169. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2509 -0
  1170. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1171. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1172. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
  1173. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2144 -0
  1174. cuda/cccl/headers/include/cuda/std/execution +29 -0
  1175. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1176. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1177. cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
  1178. cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
  1179. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1180. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1181. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1182. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1183. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1184. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1185. cuda/cccl/headers/include/cuda/std/numbers +346 -0
  1186. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1187. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1188. cuda/cccl/headers/include/cuda/std/ranges +69 -0
  1189. cuda/cccl/headers/include/cuda/std/ratio +416 -0
  1190. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1191. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1192. cuda/cccl/headers/include/cuda/std/span +628 -0
  1193. cuda/cccl/headers/include/cuda/std/string_view +925 -0
  1194. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1195. cuda/cccl/headers/include/cuda/std/type_traits +177 -0
  1196. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1197. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1198. cuda/cccl/headers/include/cuda/std/version +240 -0
  1199. cuda/cccl/headers/include/cuda/stream +31 -0
  1200. cuda/cccl/headers/include/cuda/stream_ref +59 -0
  1201. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1202. cuda/cccl/headers/include/cuda/utility +28 -0
  1203. cuda/cccl/headers/include/cuda/version +16 -0
  1204. cuda/cccl/headers/include/cuda/warp +28 -0
  1205. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1206. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1207. cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
  1208. cuda/cccl/headers/include/nv/target +240 -0
  1209. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1210. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1211. cuda/cccl/headers/include/thrust/advance.h +57 -0
  1212. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1213. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1214. cuda/cccl/headers/include/thrust/complex.h +858 -0
  1215. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1216. cuda/cccl/headers/include/thrust/count.h +245 -0
  1217. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1218. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1219. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +626 -0
  1220. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +192 -0
  1221. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +96 -0
  1222. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +81 -0
  1223. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +78 -0
  1224. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1225. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +115 -0
  1226. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +116 -0
  1227. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
  1228. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1229. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1230. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1231. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1232. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1233. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1234. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1235. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1236. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1237. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1238. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1239. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1240. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1241. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1242. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1243. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1244. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1245. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1246. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1247. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1248. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1249. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1250. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1251. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1252. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1253. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1254. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1255. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1256. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1257. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1258. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1259. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1260. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1261. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1262. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1263. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1264. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1265. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1266. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1267. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1268. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1269. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1270. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1271. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1272. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +81 -0
  1273. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1274. cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
  1275. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1276. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1277. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1278. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1279. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1280. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1281. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1282. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1283. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1284. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1285. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1286. cuda/cccl/headers/include/thrust/detail/internal_functional.h +328 -0
  1287. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1288. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
  1289. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1290. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1291. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1292. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1293. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1294. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1295. cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
  1296. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1297. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1298. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1299. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1300. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1301. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
  1302. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1303. cuda/cccl/headers/include/thrust/detail/reference.h +494 -0
  1304. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1305. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1306. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1307. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1308. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1309. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1310. cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
  1311. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1312. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1313. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1314. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1315. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1316. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1317. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1318. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1319. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1320. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
  1321. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1322. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1323. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1324. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
  1325. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1326. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1327. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1328. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1329. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1330. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1331. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1332. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
  1333. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1334. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1335. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1336. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1337. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1338. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1339. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1340. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1341. cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
  1342. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1210 -0
  1343. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1344. cuda/cccl/headers/include/thrust/device_delete.h +74 -0
  1345. cuda/cccl/headers/include/thrust/device_free.h +85 -0
  1346. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1347. cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
  1348. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1349. cuda/cccl/headers/include/thrust/device_new.h +112 -0
  1350. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1351. cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
  1352. cuda/cccl/headers/include/thrust/device_reference.h +983 -0
  1353. cuda/cccl/headers/include/thrust/device_vector.h +576 -0
  1354. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1355. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1356. cuda/cccl/headers/include/thrust/execution_policy.h +251 -0
  1357. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1358. cuda/cccl/headers/include/thrust/fill.h +200 -0
  1359. cuda/cccl/headers/include/thrust/find.h +382 -0
  1360. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1361. cuda/cccl/headers/include/thrust/functional.h +395 -0
  1362. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1363. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1364. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1365. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1366. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
  1367. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1368. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1369. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1370. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1371. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1372. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1373. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
  1374. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
  1375. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1376. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +182 -0
  1377. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +58 -0
  1378. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1379. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1380. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1381. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1382. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1383. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1384. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
  1385. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
  1386. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
  1387. cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
  1388. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
  1389. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1390. cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
  1391. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
  1392. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
  1393. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1394. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
  1395. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1396. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1397. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1398. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
  1399. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1400. cuda/cccl/headers/include/thrust/memory.h +299 -0
  1401. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1402. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1403. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1404. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1405. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +528 -0
  1406. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1407. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
  1408. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1409. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1410. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1411. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1412. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1413. cuda/cccl/headers/include/thrust/mr/pool.h +528 -0
  1414. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1415. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1416. cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
  1417. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1418. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1419. cuda/cccl/headers/include/thrust/pair.h +99 -0
  1420. cuda/cccl/headers/include/thrust/partition.h +1391 -0
  1421. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1422. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1423. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1424. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1425. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1426. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1427. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1428. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1429. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1430. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1431. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1432. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1433. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
  1434. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1435. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
  1436. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1437. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1438. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1439. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1440. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1441. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1442. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1443. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1444. cuda/cccl/headers/include/thrust/random.h +120 -0
  1445. cuda/cccl/headers/include/thrust/reduce.h +1113 -0
  1446. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1447. cuda/cccl/headers/include/thrust/replace.h +826 -0
  1448. cuda/cccl/headers/include/thrust/reverse.h +215 -0
  1449. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1450. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1451. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1452. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1453. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1454. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1455. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1456. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1457. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1458. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1459. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1460. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1461. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1462. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1463. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +109 -0
  1464. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1465. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1466. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1467. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1468. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1469. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1470. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1471. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1472. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1473. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1474. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1475. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1476. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1477. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1478. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1479. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1480. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1481. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1482. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1483. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1484. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1485. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1486. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1487. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1488. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1489. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1490. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1491. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1492. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1493. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1494. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1495. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1496. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1497. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1498. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1499. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1500. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1501. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
  1502. cuda/cccl/headers/include/thrust/system/cpp/memory.h +106 -0
  1503. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +72 -0
  1504. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +120 -0
  1505. cuda/cccl/headers/include/thrust/system/cpp/vector.h +96 -0
  1506. cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
  1507. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1508. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1509. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1510. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1511. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +273 -0
  1512. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1513. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1514. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1515. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +593 -0
  1516. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1517. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1518. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +233 -0
  1519. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1520. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1521. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +264 -0
  1522. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1523. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +100 -0
  1524. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +170 -0
  1525. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1526. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1527. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +60 -0
  1528. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1529. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1530. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +80 -0
  1531. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1532. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1533. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1534. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1535. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1536. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +223 -0
  1537. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1538. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1539. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1540. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +785 -0
  1541. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
  1542. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1543. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +122 -0
  1544. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
  1545. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +341 -0
  1546. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +414 -0
  1547. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +91 -0
  1548. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1549. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1734 -0
  1550. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +469 -0
  1551. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
  1552. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +61 -0
  1553. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1554. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1555. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +429 -0
  1556. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1557. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1558. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +117 -0
  1559. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +105 -0
  1560. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
  1561. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +310 -0
  1562. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
  1563. cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
  1564. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
  1565. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1566. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1567. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
  1568. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1569. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1570. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1571. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1572. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1573. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1574. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1575. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1576. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1577. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1578. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1579. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1580. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1581. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1582. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1583. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1584. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +61 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/errno.h +120 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +53 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +79 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +161 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +384 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +45 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +64 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +58 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +146 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +48 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +84 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +49 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +60 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +66 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +252 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +54 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +49 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +137 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +58 -0
  1635. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +73 -0
  1636. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +96 -0
  1637. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +45 -0
  1638. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1639. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +60 -0
  1640. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +72 -0
  1641. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +59 -0
  1642. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +64 -0
  1643. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +86 -0
  1644. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +99 -0
  1645. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +148 -0
  1646. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +49 -0
  1647. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +68 -0
  1648. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +129 -0
  1649. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +207 -0
  1650. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +43 -0
  1651. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +71 -0
  1652. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +100 -0
  1653. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +83 -0
  1654. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +186 -0
  1655. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +86 -0
  1656. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +121 -0
  1657. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +95 -0
  1658. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +175 -0
  1659. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +48 -0
  1660. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +67 -0
  1661. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +63 -0
  1662. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +126 -0
  1663. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +72 -0
  1664. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +85 -0
  1665. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +126 -0
  1666. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +232 -0
  1667. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +73 -0
  1668. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +85 -0
  1669. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +104 -0
  1670. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1671. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +282 -0
  1672. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +476 -0
  1673. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +54 -0
  1674. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +125 -0
  1675. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +113 -0
  1676. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +175 -0
  1677. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +44 -0
  1678. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +76 -0
  1679. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +41 -0
  1680. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
  1681. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +47 -0
  1682. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +54 -0
  1683. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +82 -0
  1684. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
  1685. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +50 -0
  1686. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +56 -0
  1687. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +80 -0
  1688. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +113 -0
  1689. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +45 -0
  1690. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +166 -0
  1691. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +45 -0
  1692. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +115 -0
  1693. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +71 -0
  1694. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +113 -0
  1695. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +81 -0
  1696. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +126 -0
  1697. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +117 -0
  1698. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +70 -0
  1699. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +42 -0
  1700. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
  1701. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +49 -0
  1702. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +119 -0
  1703. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +49 -0
  1704. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +71 -0
  1705. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1706. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1707. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
  1708. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +110 -0
  1709. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1710. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +62 -0
  1711. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +74 -0
  1712. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1713. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +123 -0
  1714. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1715. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +43 -0
  1716. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1717. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +141 -0
  1718. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +45 -0
  1719. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1720. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +50 -0
  1721. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +75 -0
  1722. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +145 -0
  1723. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1724. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +301 -0
  1725. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1726. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +64 -0
  1727. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +98 -0
  1728. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +179 -0
  1729. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1730. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1731. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +154 -0
  1732. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +145 -0
  1733. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1734. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1735. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +206 -0
  1736. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +59 -0
  1737. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
  1738. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +55 -0
  1739. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +356 -0
  1740. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +48 -0
  1741. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +124 -0
  1742. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +48 -0
  1743. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +586 -0
  1744. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1745. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1746. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1747. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1748. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1749. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1750. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +58 -0
  1751. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1752. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1753. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +115 -0
  1754. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +106 -0
  1755. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1756. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1757. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1758. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1759. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1760. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1761. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1762. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1763. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1764. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1765. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1766. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1767. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1768. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +127 -0
  1769. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1770. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1771. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1772. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1773. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1774. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1775. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1776. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1777. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1778. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1779. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1780. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1781. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1782. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1783. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1784. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1785. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1786. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1787. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1788. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1789. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1790. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1791. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1792. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1793. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1794. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1795. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1796. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1797. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1798. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +73 -0
  1799. cuda/cccl/headers/include/thrust/system/omp/detail/scan.inl +172 -0
  1800. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +36 -0
  1801. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1802. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1803. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1804. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1805. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
  1806. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1807. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1808. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1809. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1810. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1811. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1812. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1813. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1814. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1815. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1816. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1817. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1818. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
  1819. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1820. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1821. cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
  1822. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1823. cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
  1824. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1825. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1826. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1827. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1828. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1829. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1830. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1831. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1832. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1833. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +109 -0
  1834. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1835. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1836. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1837. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1838. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1839. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1840. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1841. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1842. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1843. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1844. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1845. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1846. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1847. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1848. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1849. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1850. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1851. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1852. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1853. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1854. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1855. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1856. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1857. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1858. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1859. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1860. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1861. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1862. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1863. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1864. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +33 -0
  1865. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1866. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1867. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1868. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1869. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1870. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1871. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1872. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1873. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1874. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1875. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1876. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1877. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1878. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1879. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1880. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1881. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1882. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
  1883. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1884. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1885. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
  1886. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1887. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1888. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1889. cuda/cccl/headers/include/thrust/transform.h +1045 -0
  1890. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1891. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1892. cuda/cccl/headers/include/thrust/tuple.h +139 -0
  1893. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1894. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
  1895. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1896. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1897. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1898. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
  1899. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1900. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
  1901. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1902. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1903. cuda/cccl/headers/include/thrust/unique.h +1088 -0
  1904. cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
  1905. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1906. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1907. cuda/cccl/headers/include/thrust/version.h +93 -0
  1908. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1909. cuda/cccl/headers/include_paths.py +51 -0
  1910. cuda/cccl/parallel/__init__.py +9 -0
  1911. cuda/cccl/parallel/experimental/__init__.py +24 -0
  1912. cuda/cccl/py.typed +0 -0
  1913. cuda/compute/__init__.py +79 -0
  1914. cuda/compute/_bindings.py +79 -0
  1915. cuda/compute/_bindings.pyi +475 -0
  1916. cuda/compute/_bindings_impl.pyx +2273 -0
  1917. cuda/compute/_caching.py +71 -0
  1918. cuda/compute/_cccl_interop.py +422 -0
  1919. cuda/compute/_utils/__init__.py +0 -0
  1920. cuda/compute/_utils/protocols.py +132 -0
  1921. cuda/compute/_utils/temp_storage_buffer.py +86 -0
  1922. cuda/compute/algorithms/__init__.py +54 -0
  1923. cuda/compute/algorithms/_histogram.py +243 -0
  1924. cuda/compute/algorithms/_merge_sort.py +225 -0
  1925. cuda/compute/algorithms/_radix_sort.py +312 -0
  1926. cuda/compute/algorithms/_reduce.py +182 -0
  1927. cuda/compute/algorithms/_scan.py +331 -0
  1928. cuda/compute/algorithms/_segmented_reduce.py +257 -0
  1929. cuda/compute/algorithms/_three_way_partition.py +261 -0
  1930. cuda/compute/algorithms/_transform.py +329 -0
  1931. cuda/compute/algorithms/_unique_by_key.py +252 -0
  1932. cuda/compute/cccl/.gitkeep +0 -0
  1933. cuda/compute/cu12/_bindings_impl.cp313-win_amd64.pyd +0 -0
  1934. cuda/compute/cu12/cccl/cccl.c.parallel.dll +0 -0
  1935. cuda/compute/cu12/cccl/cccl.c.parallel.lib +0 -0
  1936. cuda/compute/cu13/_bindings_impl.cp313-win_amd64.pyd +0 -0
  1937. cuda/compute/cu13/cccl/cccl.c.parallel.dll +0 -0
  1938. cuda/compute/cu13/cccl/cccl.c.parallel.lib +0 -0
  1939. cuda/compute/iterators/__init__.py +21 -0
  1940. cuda/compute/iterators/_factories.py +219 -0
  1941. cuda/compute/iterators/_iterators.py +817 -0
  1942. cuda/compute/iterators/_zip_iterator.py +199 -0
  1943. cuda/compute/numba_utils.py +53 -0
  1944. cuda/compute/op.py +3 -0
  1945. cuda/compute/struct.py +272 -0
  1946. cuda/compute/typing.py +37 -0
  1947. cuda/coop/__init__.py +8 -0
  1948. cuda/coop/_caching.py +48 -0
  1949. cuda/coop/_common.py +275 -0
  1950. cuda/coop/_nvrtc.py +92 -0
  1951. cuda/coop/_scan_op.py +181 -0
  1952. cuda/coop/_types.py +937 -0
  1953. cuda/coop/_typing.py +107 -0
  1954. cuda/coop/block/__init__.py +39 -0
  1955. cuda/coop/block/_block_exchange.py +251 -0
  1956. cuda/coop/block/_block_load_store.py +215 -0
  1957. cuda/coop/block/_block_merge_sort.py +125 -0
  1958. cuda/coop/block/_block_radix_sort.py +214 -0
  1959. cuda/coop/block/_block_reduce.py +294 -0
  1960. cuda/coop/block/_block_scan.py +983 -0
  1961. cuda/coop/warp/__init__.py +9 -0
  1962. cuda/coop/warp/_warp_merge_sort.py +92 -0
  1963. cuda/coop/warp/_warp_reduce.py +153 -0
  1964. cuda/coop/warp/_warp_scan.py +78 -0
  1965. cuda_cccl-0.3.3.dist-info/METADATA +41 -0
  1966. cuda_cccl-0.3.3.dist-info/RECORD +1968 -0
  1967. cuda_cccl-0.3.3.dist-info/WHEEL +5 -0
  1968. cuda_cccl-0.3.3.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,1671 @@
1
+ /*
2
+ * Copyright 2008-2013 NVIDIA Corporation
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ /*! \file scan.h
18
+ * \brief Functions for computing prefix sums
19
+ */
20
+
21
+ #pragma once
22
+
23
+ #include <thrust/detail/config.h>
24
+
25
+ #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
26
+ # pragma GCC system_header
27
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
28
+ # pragma clang system_header
29
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
30
+ # pragma system_header
31
+ #endif // no system header
32
+ #include <thrust/detail/execution_policy.h>
33
+
34
+ THRUST_NAMESPACE_BEGIN
35
+
36
+ /*! \addtogroup algorithms
37
+ */
38
+
39
+ /*! \addtogroup prefixsums Prefix Sums
40
+ * \ingroup algorithms
41
+ * \{
42
+ */
43
+
44
+ /*! \p inclusive_scan computes an inclusive prefix sum operation. The
45
+ * term 'inclusive' means that each result includes the corresponding
46
+ * input operand in the partial sum. More precisely, <tt>*first</tt> is
47
+ * assigned to <tt>*result</tt> and the sum of <tt>*first</tt> and
48
+ * <tt>*(first + 1)</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
49
+ * This version of \p inclusive_scan assumes plus as the associative operator.
50
+ * When the input and output sequences are the same, the scan is performed
51
+ * in-place.
52
+ *
53
+ * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
54
+ * difference between the two functions is that \c std::partial_sum guarantees
55
+ * a serial summation order, while \p inclusive_scan requires associativity of
56
+ * the binary operation to parallelize the prefix sum.
57
+ *
58
+ * Results are not deterministic for pseudo-associative operators (e.g.,
59
+ * addition of floating-point types). Results for pseudo-associative
60
+ * operators may vary from run to run.
61
+ *
62
+ * The algorithm's execution is parallelized as determined by \p exec.
63
+ *
64
+ * \param exec The execution policy to use for parallelization.
65
+ * \param first The beginning of the input sequence.
66
+ * \param last The end of the input sequence.
67
+ * \param result The beginning of the output sequence.
68
+ * \return The end of the output sequence.
69
+ *
70
+ * \tparam DerivedPolicy The name of the derived execution policy.
71
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
72
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
73
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
74
+ * Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
75
+ * If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
76
+ *
77
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
78
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
79
+ *
80
+ * The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place
81
+ * prefix sum using the \p thrust::host execution policy for parallelization:
82
+ *
83
+ * \code
84
+ * #include <thrust/scan.h>
85
+ * #include <thrust/execution_policy.h>
86
+ * ...
87
+ *
88
+ * int data[6] = {1, 0, 2, 2, 1, 3};
89
+ *
90
+ * thrust::inclusive_scan(thrust::host, data, data + 6, data); // in-place scan
91
+ *
92
+ * // data is now {1, 1, 3, 5, 6, 9}
93
+ * \endcode
94
+ *
95
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
96
+ *
97
+ */
98
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
99
+ _CCCL_HOST_DEVICE OutputIterator inclusive_scan(
100
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
101
+ InputIterator first,
102
+ InputIterator last,
103
+ OutputIterator result);
104
+
105
+ /*! \p inclusive_scan computes an inclusive prefix sum operation. The
106
+ * term 'inclusive' means that each result includes the corresponding
107
+ * input operand in the partial sum. More precisely, <tt>*first</tt> is
108
+ * assigned to <tt>*result</tt> and the sum of <tt>*first</tt> and
109
+ * <tt>*(first + 1)</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
110
+ * This version of \p inclusive_scan assumes plus as the associative operator.
111
+ * When the input and output sequences are the same, the scan is performed
112
+ * in-place.
113
+ *
114
+ * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
115
+ * difference between the two functions is that \c std::partial_sum guarantees
116
+ * a serial summation order, while \p inclusive_scan requires associativity of
117
+ * the binary operation to parallelize the prefix sum.
118
+ *
119
+ * Results are not deterministic for pseudo-associative operators (e.g.,
120
+ * addition of floating-point types). Results for pseudo-associative
121
+ * operators may vary from run to run.
122
+ *
123
+ * \param first The beginning of the input sequence.
124
+ * \param last The end of the input sequence.
125
+ * \param result The beginning of the output sequence.
126
+ * \return The end of the output sequence.
127
+ *
128
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
129
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
130
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
131
+ * Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
132
+ * If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
133
+ *
134
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
135
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
136
+ *
137
+ * The following code snippet demonstrates how to use \p inclusive_scan
138
+ *
139
+ * \code
140
+ * #include <thrust/scan.h>
141
+ *
142
+ * int data[6] = {1, 0, 2, 2, 1, 3};
143
+ *
144
+ * thrust::inclusive_scan(data, data + 6, data); // in-place scan
145
+ *
146
+ * // data is now {1, 1, 3, 5, 6, 9}
147
+ * \endcode
148
+ *
149
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
150
+ *
151
+ */
152
+ template <typename InputIterator, typename OutputIterator>
153
+ OutputIterator inclusive_scan(InputIterator first, InputIterator last, OutputIterator result);
154
+
155
+ /*! \p inclusive_scan computes an inclusive prefix sum operation. The
156
+ * term 'inclusive' means that each result includes the corresponding
157
+ * input operand in the partial sum. When the input and output sequences
158
+ * are the same, the scan is performed in-place.
159
+ *
160
+ * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
161
+ * difference between the two functions is that \c std::partial_sum guarantees
162
+ * a serial summation order, while \p inclusive_scan requires associativity of
163
+ * the binary operation to parallelize the prefix sum.
164
+ *
165
+ * Results are not deterministic for pseudo-associative operators (e.g.,
166
+ * addition of floating-point types). Results for pseudo-associative
167
+ * operators may vary from run to run.
168
+ *
169
+ * The algorithm's execution is parallelized as determined by \p exec.
170
+ *
171
+ * \param exec The execution policy to use for parallelization.
172
+ * \param first The beginning of the input sequence.
173
+ * \param last The end of the input sequence.
174
+ * \param result The beginning of the output sequence.
175
+ * \param binary_op The associative operator used to 'sum' values.
176
+ * \return The end of the output sequence.
177
+ *
178
+ * \tparam DerivedPolicy The name of the derived execution policy.
179
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
180
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
181
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
182
+ * and \c OutputIterator's \c value_type is convertible to
183
+ * both \c AssociativeOperator's first and second argument type.
184
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
185
+ *
186
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
187
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
188
+ *
189
+ * The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place
190
+ * prefix sum using the \p thrust::host execution policy for parallelization:
191
+ *
192
+ * \code
193
+ * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
194
+ *
195
+ * ::cuda::maximum<int> binary_op;
196
+ *
197
+ * thrust::inclusive_scan(thrust::host, data, data + 10, data, binary_op); // in-place scan
198
+ *
199
+ * // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8}
200
+ * \endcode
201
+ *
202
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
203
+ */
204
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename AssociativeOperator>
205
+ _CCCL_HOST_DEVICE OutputIterator inclusive_scan(
206
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
207
+ InputIterator first,
208
+ InputIterator last,
209
+ OutputIterator result,
210
+ AssociativeOperator binary_op);
211
+
212
+ /*! \p inclusive_scan computes an inclusive prefix sum operation. The
213
+ * term 'inclusive' means that each result includes the corresponding
214
+ * input operand in the partial sum. When the input and output sequences
215
+ * are the same, the scan is performed in-place.
216
+ *
217
+ * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
218
+ * difference between the two functions is that \c std::partial_sum guarantees
219
+ * a serial summation order, while \p inclusive_scan requires associativity of
220
+ * the binary operation to parallelize the prefix sum.
221
+ *
222
+ * Results are not deterministic for pseudo-associative operators (e.g.,
223
+ * addition of floating-point types). Results for pseudo-associative
224
+ * operators may vary from run to run.
225
+ *
226
+ * \param first The beginning of the input sequence.
227
+ * \param last The end of the input sequence.
228
+ * \param result The beginning of the output sequence.
229
+ * \param binary_op The associative operator used to 'sum' values.
230
+ * \return The end of the output sequence.
231
+ *
232
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
233
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
234
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
235
+ * and \c OutputIterator's \c value_type is convertible to
236
+ * both \c AssociativeOperator's first and second argument type.
237
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
238
+ *
239
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
240
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
241
+ *
242
+ * The following code snippet demonstrates how to use \p inclusive_scan
243
+ *
244
+ * \code
245
+ * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
246
+ *
247
+ * ::cuda::maximum<int> binary_op;
248
+ *
249
+ * thrust::inclusive_scan(data, data + 10, data, binary_op); // in-place scan
250
+ *
251
+ * // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8}
252
+ * \endcode
253
+ *
254
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
255
+ */
256
+ template <typename InputIterator, typename OutputIterator, typename AssociativeOperator>
257
+ OutputIterator
258
+ inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, AssociativeOperator binary_op);
259
+
260
+ /*! \p inclusive_scan computes an inclusive prefix sum operation. The
261
+ * term 'inclusive' means that each result includes the corresponding
262
+ * input operand in the partial sum. More precisely,
263
+ * <tt>binary_op(init, \*first)</tt> is assigned to <tt>*result</tt>
264
+ * and so on. This version of \p inclusive_scan requires both an associative
265
+ * operator and an initial value \p init. When the input and
266
+ * output sequences are the same, the scan is performed in-place.
267
+ *
268
+ * Results are not deterministic for pseudo-associative operators (e.g.,
269
+ * addition of floating-point types). Results for pseudo-associative
270
+ * operators may vary from run to run.
271
+ *
272
+ * The algorithm's execution is parallelized as determined by \p exec.
273
+ *
274
+ * \param exec The execution policy to use for parallelization.
275
+ * \param first The beginning of the input sequence.
276
+ * \param last The end of the input sequence.
277
+ * \param result The beginning of the output sequence.
278
+ * \param init The initial value.
279
+ * \param binary_op The associative operator used to 'sum' values.
280
+ * \return The end of the output sequence.
281
+ *
282
+ * \tparam DerivedPolicy The name of the derived execution policy.
283
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
284
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
285
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
286
+ * and \c OutputIterator's \c value_type is convertible to both \c AssociativeOperator's first and second argument type.
287
+ * \tparam T is convertible to \c OutputIterator's \c value_type.
288
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
289
+ *
290
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last -
291
+ * first))</tt> shall not overlap otherwise.
292
+ *
293
+ * The following code snippet demonstrates how to use \p inclusive_scan with initial value to compute an in-place
294
+ * prefix sum using the \p thrust::host execution policy for parallelization:
295
+ *
296
+ * \code
297
+ * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
298
+ * thrust::inclusive_scan(thrust::host, data, data + 10, data, 1, ::cuda::maximum<>{}); // in-place scan
299
+ * // data is now {1, 1, 2, 2, 2, 4, 4, 4, 4, 8}
300
+ * \endcode
301
+ *
302
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
303
+ */
304
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
305
+ _CCCL_HOST_DEVICE OutputIterator inclusive_scan(
306
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
307
+ InputIterator first,
308
+ InputIterator last,
309
+ OutputIterator result,
310
+ T init,
311
+ AssociativeOperator binary_op);
312
+
313
+ /*! \p inclusive_scan computes an inclusive prefix sum operation. The
314
+ * term 'inclusive' means that each result includes the corresponding
315
+ * input operand in the partial sum. More precisely,
316
+ * <tt>binary_op(init, \*first)</tt> is assigned to <tt>*result</tt>
317
+ * and so on. This version of \p inclusive_scan requires both an associative
318
+ * operator and an initial value \p init. When the input and
319
+ * output sequences are the same, the scan is performed in-place.
320
+ *
321
+ * Results are not deterministic for pseudo-associative operators (e.g.,
322
+ * addition of floating-point types). Results for pseudo-associative
323
+ * operators may vary from run to run.
324
+ *
325
+ * \param first The beginning of the input sequence.
326
+ * \param last The end of the input sequence.
327
+ * \param result The beginning of the output sequence.
328
+ * \param init The initial value.
329
+ * \param binary_op The associative operator used to 'sum' values.
330
+ * \return The end of the output sequence.
331
+ *
332
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
333
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
334
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
335
+ * and \c OutputIterator's \c value_type is convertible to both \c AssociativeOperator's first and second argument type.
336
+ * \tparam T is convertible to \c OutputIterator's \c value_type.
337
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
338
+ *
339
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last -
340
+ * first))</tt> shall not overlap otherwise.
341
+ *
342
+ * The following code snippet demonstrates how to use \p inclusive_scan with initial value:
343
+ *
344
+ * \code
345
+ * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
346
+ * ::cuda::maximum<int> binary_op;
347
+ * thrust::inclusive_scan(data, data + 10, data, 1, ::cuda::maximum<>{}); // in-place scan
348
+ * // data is now {1, 1, 2, 2, 2, 4, 4, 4, 4, 8}
349
+ * \endcode
350
+ *
351
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
352
+ */
353
+ template <typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
354
+ OutputIterator
355
+ inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, AssociativeOperator binary_op);
356
+
357
+ /*! \p exclusive_scan computes an exclusive prefix sum operation. The
358
+ * term 'exclusive' means that each result does not include the
359
+ * corresponding input operand in the partial sum. More precisely,
360
+ * <tt>0</tt> is assigned to <tt>*result</tt> and the sum of
361
+ * <tt>0</tt> and <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>,
362
+ * and so on. This version of \p exclusive_scan assumes plus as the
363
+ * associative operator and \c 0 as the initial value. When the input and
364
+ * output sequences are the same, the scan is performed in-place.
365
+ *
366
+ * Results are not deterministic for pseudo-associative operators (e.g.,
367
+ * addition of floating-point types). Results for pseudo-associative
368
+ * operators may vary from run to run.
369
+ *
370
+ * The algorithm's execution is parallelized as determined by \p exec.
371
+ *
372
+ * \param exec The execution policy to use for parallelization.
373
+ * \param first The beginning of the input sequence.
374
+ * \param last The end of the input sequence.
375
+ * \param result The beginning of the output sequence.
376
+ * \return The end of the output sequence.
377
+ *
378
+ * \tparam DerivedPolicy The name of the derived execution policy.
379
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
380
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
381
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
382
+ * Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
383
+ * If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
384
+ *
385
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
386
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
387
+ *
388
+ * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
389
+ * prefix sum using the \p thrust::host execution policy for parallelization:
390
+ *
391
+ * \code
392
+ * #include <thrust/scan.h>
393
+ * #include <thrust/execution_policy.h>
394
+ * ...
395
+ *
396
+ * int data[6] = {1, 0, 2, 2, 1, 3};
397
+ *
398
+ * thrust::exclusive_scan(thrust::host, data, data + 6, data); // in-place scan
399
+ *
400
+ * // data is now {0, 1, 1, 3, 5, 6}
401
+ * \endcode
402
+ *
403
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
404
+ */
405
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
406
+ _CCCL_HOST_DEVICE OutputIterator exclusive_scan(
407
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
408
+ InputIterator first,
409
+ InputIterator last,
410
+ OutputIterator result);
411
+
412
+ /*! \p exclusive_scan computes an exclusive prefix sum operation. The
413
+ * term 'exclusive' means that each result does not include the
414
+ * corresponding input operand in the partial sum. More precisely,
415
+ * <tt>0</tt> is assigned to <tt>*result</tt> and the sum of
416
+ * <tt>0</tt> and <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>,
417
+ * and so on. This version of \p exclusive_scan assumes plus as the
418
+ * associative operator and \c 0 as the initial value. When the input and
419
+ * output sequences are the same, the scan is performed in-place.
420
+ *
421
+ * Results are not deterministic for pseudo-associative operators (e.g.,
422
+ * addition of floating-point types). Results for pseudo-associative
423
+ * operators may vary from run to run.
424
+ *
425
+ * \param first The beginning of the input sequence.
426
+ * \param last The end of the input sequence.
427
+ * \param result The beginning of the output sequence.
428
+ * \return The end of the output sequence.
429
+ *
430
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
431
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
432
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
433
+ * Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
434
+ * If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
435
+ *
436
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
437
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
438
+ *
439
+ * The following code snippet demonstrates how to use \p exclusive_scan
440
+ *
441
+ * \code
442
+ * #include <thrust/scan.h>
443
+ *
444
+ * int data[6] = {1, 0, 2, 2, 1, 3};
445
+ *
446
+ * thrust::exclusive_scan(data, data + 6, data); // in-place scan
447
+ *
448
+ * // data is now {0, 1, 1, 3, 5, 6}
449
+ * \endcode
450
+ *
451
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
452
+ */
453
+ template <typename InputIterator, typename OutputIterator>
454
+ OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result);
455
+
456
+ /*! \p exclusive_scan computes an exclusive prefix sum operation. The
457
+ * term 'exclusive' means that each result does not include the
458
+ * corresponding input operand in the partial sum. More precisely,
459
+ * \p init is assigned to <tt>*result</tt> and the sum of \p init and
460
+ * <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
461
+ * This version of \p exclusive_scan assumes plus as the associative
462
+ * operator but requires an initial value \p init. When the input and
463
+ * output sequences are the same, the scan is performed in-place.
464
+ *
465
+ * Results are not deterministic for pseudo-associative operators (e.g.,
466
+ * addition of floating-point types). Results for pseudo-associative
467
+ * operators may vary from run to run.
468
+ *
469
+ * The algorithm's execution is parallelized as determined by \p exec.
470
+ *
471
+ * \param exec The execution policy to use for parallelization.
472
+ * \param first The beginning of the input sequence.
473
+ * \param last The end of the input sequence.
474
+ * \param result The beginning of the output sequence.
475
+ * \param init The initial value.
476
+ * \return The end of the output sequence.
477
+ *
478
+ * \tparam DerivedPolicy The name of the derived execution policy.
479
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
480
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
481
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
482
+ * Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
483
+ * \tparam T is convertible to \c OutputIterator's \c value_type.
484
+ *
485
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
486
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
487
+ *
488
+ * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
489
+ * prefix sum using the \p thrust::host execution policy for parallelization:
490
+ *
491
+ * \code
492
+ * #include <thrust/scan.h>
493
+ * #include <thrust/execution_policy.h>
494
+ *
495
+ * int data[6] = {1, 0, 2, 2, 1, 3};
496
+ *
497
+ * thrust::exclusive_scan(thrust::host, data, data + 6, data, 4); // in-place scan
498
+ *
499
+ * // data is now {4, 5, 5, 7, 9, 10}
500
+ * \endcode
501
+ *
502
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
503
+ */
504
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
505
+ _CCCL_HOST_DEVICE OutputIterator exclusive_scan(
506
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
507
+ InputIterator first,
508
+ InputIterator last,
509
+ OutputIterator result,
510
+ T init);
511
+
512
+ /*! \p exclusive_scan computes an exclusive prefix sum operation. The
513
+ * term 'exclusive' means that each result does not include the
514
+ * corresponding input operand in the partial sum. More precisely,
515
+ * \p init is assigned to <tt>*result</tt> and the sum of \p init and
516
+ * <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
517
+ * This version of \p exclusive_scan assumes plus as the associative
518
+ * operator but requires an initial value \p init. When the input and
519
+ * output sequences are the same, the scan is performed in-place.
520
+ *
521
+ * Results are not deterministic for pseudo-associative operators (e.g.,
522
+ * addition of floating-point types). Results for pseudo-associative
523
+ * operators may vary from run to run.
524
+ *
525
+ * \param first The beginning of the input sequence.
526
+ * \param last The end of the input sequence.
527
+ * \param result The beginning of the output sequence.
528
+ * \param init The initial value.
529
+ * \return The end of the output sequence.
530
+ *
531
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
532
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
533
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
534
+ * Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
535
+ * \tparam T is convertible to \c OutputIterator's \c value_type.
536
+ *
537
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
538
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
539
+ *
540
+ * The following code snippet demonstrates how to use \p exclusive_scan
541
+ *
542
+ * \code
543
+ * #include <thrust/scan.h>
544
+ *
545
+ * int data[6] = {1, 0, 2, 2, 1, 3};
546
+ *
547
+ * thrust::exclusive_scan(data, data + 6, data, 4); // in-place scan
548
+ *
549
+ * // data is now {4, 5, 5, 7, 9, 10}
550
+ * \endcode
551
+ *
552
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
553
+ */
554
+ template <typename InputIterator, typename OutputIterator, typename T>
555
+ OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init);
556
+
557
+ /*! \p exclusive_scan computes an exclusive prefix sum operation. The
558
+ * term 'exclusive' means that each result does not include the
559
+ * corresponding input operand in the partial sum. More precisely,
560
+ * \p init is assigned to <tt>\*result</tt> and the value
561
+ * <tt>binary_op(init, \*first)</tt> is assigned to <tt>\*(result + 1)</tt>,
562
+ * and so on. This version of the function requires both an associative
563
+ * operator and an initial value \p init. When the input and output
564
+ * sequences are the same, the scan is performed in-place.
565
+ *
566
+ * Results are not deterministic for pseudo-associative operators (e.g.,
567
+ * addition of floating-point types). Results for pseudo-associative
568
+ * operators may vary from run to run.
569
+ *
570
+ * The algorithm's execution is parallelized as determined by \p exec.
571
+ *
572
+ * \param exec The execution policy to use for parallelization.
573
+ * \param first The beginning of the input sequence.
574
+ * \param last The end of the input sequence.
575
+ * \param result The beginning of the output sequence.
576
+ * \param init The initial value.
577
+ * \param binary_op The associative operator used to 'sum' values.
578
+ * \return The end of the output sequence.
579
+ *
580
+ * \tparam DerivedPolicy The name of the derived execution policy.
581
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
582
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
583
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
584
+ * and \c OutputIterator's \c value_type is convertible to
585
+ * both \c AssociativeOperator's first and second argument type.
586
+ * \tparam T is convertible to \c OutputIterator's \c value_type.
587
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
588
+ *
589
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
590
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
591
+ *
592
+ * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
593
+ * prefix sum using the \p thrust::host execution policy for parallelization:
594
+ *
595
+ * \code
596
+ * #include <thrust/scan.h>
597
+ * #include <thrust/functional.h>
598
+ * #include <thrust/execution_policy.h>
599
+ * ...
600
+ *
601
+ * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
602
+ *
603
+ * ::cuda::maximum<int> binary_op;
604
+ *
605
+ * thrust::exclusive_scan(thrust::host, data, data + 10, data, 1, binary_op); // in-place scan
606
+ *
607
+ * // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 }
608
+ * \endcode
609
+ *
610
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
611
+ */
612
+ template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
613
+ _CCCL_HOST_DEVICE OutputIterator exclusive_scan(
614
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
615
+ InputIterator first,
616
+ InputIterator last,
617
+ OutputIterator result,
618
+ T init,
619
+ AssociativeOperator binary_op);
620
+
621
+ /*! \p exclusive_scan computes an exclusive prefix sum operation. The
622
+ * term 'exclusive' means that each result does not include the
623
+ * corresponding input operand in the partial sum. More precisely,
624
+ * \p init is assigned to <tt>\*result</tt> and the value
625
+ * <tt>binary_op(init, \*first)</tt> is assigned to <tt>\*(result + 1)</tt>,
626
+ * and so on. This version of the function requires both an associative
627
+ * operator and an initial value \p init. When the input and output
628
+ * sequences are the same, the scan is performed in-place.
629
+ *
630
+ * Results are not deterministic for pseudo-associative operators (e.g.,
631
+ * addition of floating-point types). Results for pseudo-associative
632
+ * operators may vary from run to run.
633
+ *
634
+ * \param first The beginning of the input sequence.
635
+ * \param last The end of the input sequence.
636
+ * \param result The beginning of the output sequence.
637
+ * \param init The initial value.
638
+ * \param binary_op The associative operator used to 'sum' values.
639
+ * \return The end of the output sequence.
640
+ *
641
+ * \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
642
+ * Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
643
+ * OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
644
+ * and \c OutputIterator's \c value_type is convertible to
645
+ * both \c AssociativeOperator's first and second argument type.
646
+ * \tparam T is convertible to \c OutputIterator's \c value_type.
647
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
648
+ *
649
+ * \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
650
+ * <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
651
+ *
652
+ * The following code snippet demonstrates how to use \p exclusive_scan
653
+ *
654
+ * \code
655
+ * #include <thrust/scan.h>
656
+ * #include <thrust/functional.h>
657
+ *
658
+ * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
659
+ *
660
+ * ::cuda::maximum<int> binary_op;
661
+ *
662
+ * thrust::exclusive_scan(data, data + 10, data, 1, binary_op); // in-place scan
663
+ *
664
+ * // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 }
665
+ * \endcode
666
+ *
667
+ * \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
668
+ */
669
+ template <typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
670
+ OutputIterator
671
+ exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, AssociativeOperator binary_op);
672
+
673
+ /*! \addtogroup segmentedprefixsums Segmented Prefix Sums
674
+ * \ingroup prefixsums
675
+ * \{
676
+ */
677
+
678
+ /*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
679
+ * sum operation. The term 'inclusive' means that each result includes
680
+ * the corresponding input operand in the partial sum. The term 'segmented'
681
+ * means that the partial sums are broken into distinct segments. In other
682
+ * words, within each segment a separate inclusive scan operation is computed.
683
+ * Refer to the code sample below for example usage.
684
+ *
685
+ * This version of \p inclusive_scan_by_key assumes \c equal_to as the binary
686
+ * predicate used to compare adjacent keys. Specifically, consecutive iterators
687
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
688
+ * belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
689
+ * different segments otherwise.
690
+ *
691
+ * This version of \p inclusive_scan_by_key assumes \c plus as the associative
692
+ * operator used to perform the prefix sum. When the input and output sequences
693
+ * are the same, the scan is performed in-place.
694
+ *
695
+ * Results are not deterministic for pseudo-associative operators (e.g.,
696
+ * addition of floating-point types). Results for pseudo-associative
697
+ * operators may vary from run to run.
698
+ *
699
+ * The algorithm's execution is parallelized as determined by \p exec.
700
+ *
701
+ * \param exec The execution policy to use for parallelization.
702
+ * \param first1 The beginning of the key sequence.
703
+ * \param last1 The end of the key sequence.
704
+ * \param first2 The beginning of the input value sequence.
705
+ * \param result The beginning of the output value sequence.
706
+ * \return The end of the output sequence.
707
+ *
708
+ * \tparam DerivedPolicy The name of the derived execution policy.
709
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
710
+ * Iterator</a> \tparam InputIterator2 is a model of <a
711
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
712
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
713
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
714
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined.
715
+ *
716
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
717
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
718
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
719
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
720
+ *
721
+ * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
722
+ * execution policy for parallelization:
723
+ *
724
+ * \code
725
+ * #include <thrust/scan.h>
726
+ * #include <thrust/execution_policy.h>
727
+ * ...
728
+ *
729
+ * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
730
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
731
+ *
732
+ * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, data, data); // in-place scan
733
+ *
734
+ * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
735
+ * \endcode
736
+ *
737
+ * \see inclusive_scan
738
+ * \see exclusive_scan_by_key
739
+ *
740
+ */
741
+ template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator>
742
+ _CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key(
743
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
744
+ InputIterator1 first1,
745
+ InputIterator1 last1,
746
+ InputIterator2 first2,
747
+ OutputIterator result);
748
+
749
+ /*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
750
+ * sum operation. The term 'inclusive' means that each result includes
751
+ * the corresponding input operand in the partial sum. The term 'segmented'
752
+ * means that the partial sums are broken into distinct segments. In other
753
+ * words, within each segment a separate inclusive scan operation is computed.
754
+ * Refer to the code sample below for example usage.
755
+ *
756
+ * This version of \p inclusive_scan_by_key assumes \c equal_to as the binary
757
+ * predicate used to compare adjacent keys. Specifically, consecutive iterators
758
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
759
+ * belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
760
+ * different segments otherwise.
761
+ *
762
+ * This version of \p inclusive_scan_by_key assumes \c plus as the associative
763
+ * operator used to perform the prefix sum. When the input and output sequences
764
+ * are the same, the scan is performed in-place.
765
+ *
766
+ * Results are not deterministic for pseudo-associative operators (e.g.,
767
+ * addition of floating-point types). Results for pseudo-associative
768
+ * operators may vary from run to run.
769
+ *
770
+ * \param first1 The beginning of the key sequence.
771
+ * \param last1 The end of the key sequence.
772
+ * \param first2 The beginning of the input value sequence.
773
+ * \param result The beginning of the output value sequence.
774
+ * \return The end of the output sequence.
775
+ *
776
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
777
+ * Iterator</a> \tparam InputIterator2 is a model of <a
778
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
779
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
780
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
781
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined.
782
+ *
783
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
784
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
785
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
786
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
787
+ *
788
+ * The following code snippet demonstrates how to use \p inclusive_scan_by_key
789
+ *
790
+ * \code
791
+ * #include <thrust/scan.h>
792
+ *
793
+ * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
794
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
795
+ *
796
+ * thrust::inclusive_scan_by_key(keys, keys + 10, data, data); // in-place scan
797
+ *
798
+ * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
799
+ * \endcode
800
+ *
801
+ * \see inclusive_scan
802
+ * \see exclusive_scan_by_key
803
+ *
804
+ */
805
+ template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
806
+ OutputIterator
807
+ inclusive_scan_by_key(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result);
808
+
809
+ /*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
810
+ * sum operation. The term 'inclusive' means that each result includes
811
+ * the corresponding input operand in the partial sum. The term 'segmented'
812
+ * means that the partial sums are broken into distinct segments. In other
813
+ * words, within each segment a separate inclusive scan operation is computed.
814
+ * Refer to the code sample below for example usage.
815
+ *
816
+ * This version of \p inclusive_scan_by_key uses the binary predicate
817
+ * \c pred to compare adjacent keys. Specifically, consecutive iterators
818
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
819
+ * belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
820
+ * different segments otherwise.
821
+ *
822
+ * This version of \p inclusive_scan_by_key assumes \c plus as the associative
823
+ * operator used to perform the prefix sum. When the input and output sequences
824
+ * are the same, the scan is performed in-place.
825
+ *
826
+ * Results are not deterministic for pseudo-associative operators (e.g.,
827
+ * addition of floating-point types). Results for pseudo-associative
828
+ * operators may vary from run to run.
829
+ *
830
+ * The algorithm's execution is parallelized as determined by \p exec.
831
+ *
832
+ * \param exec The execution policy to use for parallelization.
833
+ * \param first1 The beginning of the key sequence.
834
+ * \param last1 The end of the key sequence.
835
+ * \param first2 The beginning of the input value sequence.
836
+ * \param result The beginning of the output value sequence.
837
+ * \param binary_pred The binary predicate used to determine equality of keys.
838
+ * \return The end of the output sequence.
839
+ *
840
+ * \tparam DerivedPolicy The name of the derived execution policy.
841
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
842
+ * Iterator</a> \tparam InputIterator2 is a model of <a
843
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
844
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
845
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
846
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
847
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
848
+ *
849
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
850
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
851
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
852
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
853
+ *
854
+ * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
855
+ * execution policy for parallelization:
856
+ *
857
+ * \code
858
+ * #include <thrust/scan.h>
859
+ * #include <thrust/functional.h>
860
+ * #include <thrust/execution_policy.h>
861
+ * ...
862
+ *
863
+ * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
864
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
865
+ *
866
+ * ::cuda::std::equal_to<int> binary_pred;
867
+ *
868
+ * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, data, data, binary_pred); // in-place scan
869
+ *
870
+ * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
871
+ * \endcode
872
+ *
873
+ * \see inclusive_scan
874
+ * \see exclusive_scan_by_key
875
+ *
876
+ */
877
+ template <typename DerivedPolicy,
878
+ typename InputIterator1,
879
+ typename InputIterator2,
880
+ typename OutputIterator,
881
+ typename BinaryPredicate>
882
+ _CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key(
883
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
884
+ InputIterator1 first1,
885
+ InputIterator1 last1,
886
+ InputIterator2 first2,
887
+ OutputIterator result,
888
+ BinaryPredicate binary_pred);
889
+
890
+ /*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
891
+ * sum operation. The term 'inclusive' means that each result includes
892
+ * the corresponding input operand in the partial sum. The term 'segmented'
893
+ * means that the partial sums are broken into distinct segments. In other
894
+ * words, within each segment a separate inclusive scan operation is computed.
895
+ * Refer to the code sample below for example usage.
896
+ *
897
+ * This version of \p inclusive_scan_by_key uses the binary predicate
898
+ * \c pred to compare adjacent keys. Specifically, consecutive iterators
899
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
900
+ * belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
901
+ * different segments otherwise.
902
+ *
903
+ * This version of \p inclusive_scan_by_key assumes \c plus as the associative
904
+ * operator used to perform the prefix sum. When the input and output sequences
905
+ * are the same, the scan is performed in-place.
906
+ *
907
+ * Results are not deterministic for pseudo-associative operators (e.g.,
908
+ * addition of floating-point types). Results for pseudo-associative
909
+ * operators may vary from run to run.
910
+ *
911
+ * \param first1 The beginning of the key sequence.
912
+ * \param last1 The end of the key sequence.
913
+ * \param first2 The beginning of the input value sequence.
914
+ * \param result The beginning of the output value sequence.
915
+ * \param binary_pred The binary predicate used to determine equality of keys.
916
+ * \return The end of the output sequence.
917
+ *
918
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
919
+ * Iterator</a> \tparam InputIterator2 is a model of <a
920
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
921
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
922
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
923
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
924
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
925
+ *
926
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
927
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
928
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
929
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
930
+ *
931
+ * The following code snippet demonstrates how to use \p inclusive_scan_by_key
932
+ *
933
+ * \code
934
+ * #include <thrust/scan.h>
935
+ * #include <thrust/functional.h>
936
+ *
937
+ * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
938
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
939
+ *
940
+ * ::cuda::std::equal_to<int> binary_pred;
941
+ *
942
+ * thrust::inclusive_scan_by_key(keys, keys + 10, data, data, binary_pred); // in-place scan
943
+ *
944
+ * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
945
+ * \endcode
946
+ *
947
+ * \see inclusive_scan
948
+ * \see exclusive_scan_by_key
949
+ *
950
+ */
951
+ template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryPredicate>
952
+ OutputIterator inclusive_scan_by_key(
953
+ InputIterator1 first1,
954
+ InputIterator1 last1,
955
+ InputIterator2 first2,
956
+ OutputIterator result,
957
+ BinaryPredicate binary_pred);
958
+
959
+ /*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
960
+ * sum operation. The term 'inclusive' means that each result includes
961
+ * the corresponding input operand in the partial sum. The term 'segmented'
962
+ * means that the partial sums are broken into distinct segments. In other
963
+ * words, within each segment a separate inclusive scan operation is computed.
964
+ * Refer to the code sample below for example usage.
965
+ *
966
+ * This version of \p inclusive_scan_by_key uses the binary predicate
967
+ * \c pred to compare adjacent keys. Specifically, consecutive iterators
968
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
969
+ * belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
970
+ * different segments otherwise.
971
+ *
972
+ * This version of \p inclusive_scan_by_key uses the associative operator
973
+ * \c binary_op to perform the prefix sum. When the input and output sequences
974
+ * are the same, the scan is performed in-place.
975
+ *
976
+ * Results are not deterministic for pseudo-associative operators (e.g.,
977
+ * addition of floating-point types). Results for pseudo-associative
978
+ * operators may vary from run to run.
979
+ *
980
+ * The algorithm's execution is parallelized as determined by \p exec.
981
+ *
982
+ * \param exec The execution policy to use for parallelization.
983
+ * \param first1 The beginning of the key sequence.
984
+ * \param last1 The end of the key sequence.
985
+ * \param first2 The beginning of the input value sequence.
986
+ * \param result The beginning of the output value sequence.
987
+ * \param binary_pred The binary predicate used to determine equality of keys.
988
+ * \param binary_op The associative operator used to 'sum' values.
989
+ * \return The end of the output sequence.
990
+ *
991
+ * \tparam DerivedPolicy The name of the derived execution policy.
992
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
993
+ * Iterator</a> \tparam InputIterator2 is a model of <a
994
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
995
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
996
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
997
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
998
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>. \tparam
999
+ * AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
1000
+ *
1001
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1002
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1003
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1004
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1005
+ *
1006
+ * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
1007
+ * execution policy for parallelization:
1008
+ *
1009
+ * \code
1010
+ * #include <thrust/scan.h>
1011
+ * #include <thrust/functional.h>
1012
+ * #include <thrust/execution_policy.h>
1013
+ * ...
1014
+ *
1015
+ * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1016
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1017
+ *
1018
+ * ::cuda::std::equal_to<int> binary_pred;
1019
+ * ::cuda::std::plus<int> binary_op;
1020
+ *
1021
+ * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, data, data, binary_pred, binary_op); // in-place scan
1022
+ *
1023
+ * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
1024
+ * \endcode
1025
+ *
1026
+ * \see inclusive_scan
1027
+ * \see exclusive_scan_by_key
1028
+ *
1029
+ */
1030
+ template <typename DerivedPolicy,
1031
+ typename InputIterator1,
1032
+ typename InputIterator2,
1033
+ typename OutputIterator,
1034
+ typename BinaryPredicate,
1035
+ typename AssociativeOperator>
1036
+ _CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key(
1037
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
1038
+ InputIterator1 first1,
1039
+ InputIterator1 last1,
1040
+ InputIterator2 first2,
1041
+ OutputIterator result,
1042
+ BinaryPredicate binary_pred,
1043
+ AssociativeOperator binary_op);
1044
+
1045
+ /*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
1046
+ * sum operation. The term 'inclusive' means that each result includes
1047
+ * the corresponding input operand in the partial sum. The term 'segmented'
1048
+ * means that the partial sums are broken into distinct segments. In other
1049
+ * words, within each segment a separate inclusive scan operation is computed.
1050
+ * Refer to the code sample below for example usage.
1051
+ *
1052
+ * This version of \p inclusive_scan_by_key uses the binary predicate
1053
+ * \c pred to compare adjacent keys. Specifically, consecutive iterators
1054
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
1055
+ * belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
1056
+ * different segments otherwise.
1057
+ *
1058
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1059
+ * addition of floating-point types). Results for pseudo-associative
1060
+ * operators may vary from run to run.
1061
+ *
1062
+ * This version of \p inclusive_scan_by_key uses the associative operator
1063
+ * \c binary_op to perform the prefix sum. When the input and output sequences
1064
+ * are the same, the scan is performed in-place.
1065
+ *
1066
+ * \param first1 The beginning of the key sequence.
1067
+ * \param last1 The end of the key sequence.
1068
+ * \param first2 The beginning of the input value sequence.
1069
+ * \param result The beginning of the output value sequence.
1070
+ * \param binary_pred The binary predicate used to determine equality of keys.
1071
+ * \param binary_op The associative operator used to 'sum' values.
1072
+ * \return The end of the output sequence.
1073
+ *
1074
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
1075
+ * Iterator</a> \tparam InputIterator2 is a model of <a
1076
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
1077
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
1078
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
1079
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
1080
+ * model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>. \tparam
1081
+ * AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
1082
+ *
1083
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1084
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1085
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1086
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1087
+ *
1088
+ * The following code snippet demonstrates how to use \p inclusive_scan_by_key
1089
+ *
1090
+ * \code
1091
+ * #include <thrust/scan.h>
1092
+ * #include <thrust/functional.h>
1093
+ *
1094
+ * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1095
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1096
+ *
1097
+ * ::cuda::std::equal_to<int> binary_pred;
1098
+ * ::cuda::std::plus<int> binary_op;
1099
+ *
1100
+ * thrust::inclusive_scan_by_key(keys, keys + 10, data, data, binary_pred, binary_op); // in-place scan
1101
+ *
1102
+ * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
1103
+ * \endcode
1104
+ *
1105
+ * \see inclusive_scan
1106
+ * \see exclusive_scan_by_key
1107
+ *
1108
+ */
1109
+ template <typename InputIterator1,
1110
+ typename InputIterator2,
1111
+ typename OutputIterator,
1112
+ typename BinaryPredicate,
1113
+ typename AssociativeOperator>
1114
+ OutputIterator inclusive_scan_by_key(
1115
+ InputIterator1 first1,
1116
+ InputIterator1 last1,
1117
+ InputIterator2 first2,
1118
+ OutputIterator result,
1119
+ BinaryPredicate binary_pred,
1120
+ AssociativeOperator binary_op);
1121
+
1122
+ /*! \p exclusive_scan_by_key computes an exclusive segmented prefix
1123
+ *
1124
+ * This version of \p exclusive_scan_by_key uses the value \c 0 to
1125
+ * initialize the exclusive scan operation.
1126
+ *
1127
+ * This version of \p exclusive_scan_by_key assumes \c plus as the associative
1128
+ * operator used to perform the prefix sum. When the input and output sequences
1129
+ * are the same, the scan is performed in-place.
1130
+ *
1131
+ * This version of \p exclusive_scan_by_key assumes \c equal_to as the binary
1132
+ * predicate used to compare adjacent keys. Specifically, consecutive iterators
1133
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1</tt>
1134
+ * belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
1135
+ * different segments otherwise.
1136
+ *
1137
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1138
+ * addition of floating-point types). Results for pseudo-associative
1139
+ * operators may vary from run to run.
1140
+ *
1141
+ * Refer to the most general form of \p exclusive_scan_by_key for additional details.
1142
+ *
1143
+ * The algorithm's execution is parallelized as determined by \p exec.
1144
+ *
1145
+ * \param exec The execution policy to use for parallelization.
1146
+ * \param first1 The beginning of the key sequence.
1147
+ * \param last1 The end of the key sequence.
1148
+ * \param first2 The beginning of the input value sequence.
1149
+ * \param result The beginning of the output value sequence.
1150
+ *
1151
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1152
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1153
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1154
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1155
+ *
1156
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
1157
+ * \p thrust::host execution policy for parallelization:
1158
+ *
1159
+ * \code
1160
+ * #include <thrust/scan.h>
1161
+ * #include <thrust/execution_policy.h>
1162
+ * ...
1163
+ *
1164
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1165
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1166
+ *
1167
+ * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals); // in-place scan
1168
+ *
1169
+ * // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3};
1170
+ * \endcode
1171
+ *
1172
+ * \see exclusive_scan
1173
+ *
1174
+ */
1175
+ template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator>
1176
+ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
1177
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
1178
+ InputIterator1 first1,
1179
+ InputIterator1 last1,
1180
+ InputIterator2 first2,
1181
+ OutputIterator result);
1182
+
1183
+ /*! \p exclusive_scan_by_key computes an exclusive segmented prefix
1184
+ *
1185
+ * This version of \p exclusive_scan_by_key uses the value \c 0 to
1186
+ * initialize the exclusive scan operation.
1187
+ *
1188
+ * This version of \p exclusive_scan_by_key assumes \c plus as the associative
1189
+ * operator used to perform the prefix sum. When the input and output sequences
1190
+ * are the same, the scan is performed in-place.
1191
+ *
1192
+ * This version of \p exclusive_scan_by_key assumes \c equal_to as the binary
1193
+ * predicate used to compare adjacent keys. Specifically, consecutive iterators
1194
+ * <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1</tt>
1195
+ * belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
1196
+ * different segments otherwise.
1197
+ *
1198
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1199
+ * addition of floating-point types). Results for pseudo-associative
1200
+ * operators may vary from run to run.
1201
+ *
1202
+ * Refer to the most general form of \p exclusive_scan_by_key for additional details.
1203
+ *
1204
+ * \param first1 The beginning of the key sequence.
1205
+ * \param last1 The end of the key sequence.
1206
+ * \param first2 The beginning of the input value sequence.
1207
+ * \param result The beginning of the output value sequence.
1208
+ *
1209
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1210
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1211
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1212
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1213
+ *
1214
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key.
1215
+ *
1216
+ * \code
1217
+ * #include <thrust/scan.h>
1218
+ *
1219
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1220
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1221
+ *
1222
+ * thrust::exclusive_scan_by_key(key, key + 10, vals, vals); // in-place scan
1223
+ *
1224
+ * // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3};
1225
+ * \endcode
1226
+ *
1227
+ * \see exclusive_scan
1228
+ *
1229
+ */
1230
+ template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
1231
+ OutputIterator
1232
+ exclusive_scan_by_key(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result);
1233
+
1234
+ /*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
1235
+ * sum operation. The term 'exclusive' means that each result does not include
1236
+ * the corresponding input operand in the partial sum. The term 'segmented'
1237
+ * means that the partial sums are broken into distinct segments. In other
1238
+ * words, within each segment a separate exclusive scan operation is computed.
1239
+ * Refer to the code sample below for example usage.
1240
+ *
1241
+ * This version of \p exclusive_scan_by_key uses the value \c init to
1242
+ * initialize the exclusive scan operation.
1243
+ *
1244
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1245
+ * addition of floating-point types). Results for pseudo-associative
1246
+ * operators may vary from run to run.
1247
+ *
1248
+ * The algorithm's execution is parallelized as determined by \p exec.
1249
+ *
1250
+ * \param exec The execution policy to use for parallelization.
1251
+ * \param first1 The beginning of the key sequence.
1252
+ * \param last1 The end of the key sequence.
1253
+ * \param first2 The beginning of the input value sequence.
1254
+ * \param result The beginning of the output value sequence.
1255
+ * \param init The initial of the exclusive sum value.
1256
+ * \return The end of the output sequence.
1257
+ *
1258
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1259
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1260
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1261
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1262
+ *
1263
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the \p
1264
+ * thrust::host execution policy for parallelization:
1265
+ *
1266
+ * \code
1267
+ * #include <thrust/scan.h>
1268
+ * #include <thrust/functional.h>
1269
+ * #include <thrust/execution_policy.h>
1270
+ * ...
1271
+ *
1272
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1273
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1274
+ *
1275
+ * int init = 5;
1276
+ *
1277
+ * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init); // in-place scan
1278
+ *
1279
+ * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
1280
+ * \endcode
1281
+ *
1282
+ * \see exclusive_scan
1283
+ * \see inclusive_scan_by_key
1284
+ *
1285
+ */
1286
+ template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T>
1287
+ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
1288
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
1289
+ InputIterator1 first1,
1290
+ InputIterator1 last1,
1291
+ InputIterator2 first2,
1292
+ OutputIterator result,
1293
+ T init);
1294
+
1295
+ /*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
1296
+ * sum operation. The term 'exclusive' means that each result does not include
1297
+ * the corresponding input operand in the partial sum. The term 'segmented'
1298
+ * means that the partial sums are broken into distinct segments. In other
1299
+ * words, within each segment a separate exclusive scan operation is computed.
1300
+ * Refer to the code sample below for example usage.
1301
+ *
1302
+ * This version of \p exclusive_scan_by_key uses the value \c init to
1303
+ * initialize the exclusive scan operation.
1304
+ *
1305
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1306
+ * addition of floating-point types). Results for pseudo-associative
1307
+ * operators may vary from run to run.
1308
+ *
1309
+ * \param first1 The beginning of the key sequence.
1310
+ * \param last1 The end of the key sequence.
1311
+ * \param first2 The beginning of the input value sequence.
1312
+ * \param result The beginning of the output value sequence.
1313
+ * \param init The initial of the exclusive sum value.
1314
+ * \return The end of the output sequence.
1315
+ *
1316
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1317
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1318
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1319
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1320
+ *
1321
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key
1322
+ *
1323
+ * \code
1324
+ * #include <thrust/scan.h>
1325
+ * #include <thrust/functional.h>
1326
+ *
1327
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1328
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1329
+ *
1330
+ * int init = 5;
1331
+ *
1332
+ * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init); // in-place scan
1333
+ *
1334
+ * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
1335
+ * \endcode
1336
+ *
1337
+ * \see exclusive_scan
1338
+ * \see inclusive_scan_by_key
1339
+ *
1340
+ */
1341
+ template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T>
1342
+ OutputIterator exclusive_scan_by_key(
1343
+ InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init);
1344
+
1345
+ /*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
1346
+ * sum operation. The term 'exclusive' means that each result does not include
1347
+ * the corresponding input operand in the partial sum. The term 'segmented'
1348
+ * means that the partial sums are broken into distinct segments. In other
1349
+ * words, within each segment a separate exclusive scan operation is computed.
1350
+ * Refer to the code sample below for example usage.
1351
+ *
1352
+ * This version of \p exclusive_scan_by_key uses the value \c init to
1353
+ * initialize the exclusive scan operation.
1354
+ *
1355
+ * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
1356
+ * to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
1357
+ * <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
1358
+ * <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
1359
+ *
1360
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1361
+ * addition of floating-point types). Results for pseudo-associative
1362
+ * operators may vary from run to run.
1363
+ *
1364
+ * The algorithm's execution is parallelized as determined by \p exec.
1365
+ *
1366
+ * \param exec The execution policy to use for parallelization.
1367
+ * \param first1 The beginning of the key sequence.
1368
+ * \param last1 The end of the key sequence.
1369
+ * \param first2 The beginning of the input value sequence.
1370
+ * \param result The beginning of the output value sequence.
1371
+ * \param init The initial of the exclusive sum value.
1372
+ * \param binary_pred The binary predicate used to determine equality of keys.
1373
+ * \return The end of the output sequence.
1374
+ *
1375
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1376
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1377
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1378
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1379
+ *
1380
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
1381
+ * \p thrust::host execution policy for parallelization:
1382
+ *
1383
+ * \code
1384
+ * #include <thrust/scan.h>
1385
+ * #include <thrust/functional.h>
1386
+ * #include <thrust/execution_policy.h>
1387
+ * ...
1388
+ *
1389
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1390
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1391
+ *
1392
+ * int init = 5;
1393
+ *
1394
+ * ::cuda::std::equal_to<int> binary_pred;
1395
+ *
1396
+ * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred); // in-place scan
1397
+ *
1398
+ * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
1399
+ * \endcode
1400
+ *
1401
+ * \see exclusive_scan
1402
+ * \see inclusive_scan_by_key
1403
+ *
1404
+ */
1405
+ template <typename DerivedPolicy,
1406
+ typename InputIterator1,
1407
+ typename InputIterator2,
1408
+ typename OutputIterator,
1409
+ typename T,
1410
+ typename BinaryPredicate>
1411
+ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
1412
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
1413
+ InputIterator1 first1,
1414
+ InputIterator1 last1,
1415
+ InputIterator2 first2,
1416
+ OutputIterator result,
1417
+ T init,
1418
+ BinaryPredicate binary_pred);
1419
+
1420
+ /*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
1421
+ * sum operation. The term 'exclusive' means that each result does not include
1422
+ * the corresponding input operand in the partial sum. The term 'segmented'
1423
+ * means that the partial sums are broken into distinct segments. In other
1424
+ * words, within each segment a separate exclusive scan operation is computed.
1425
+ * Refer to the code sample below for example usage.
1426
+ *
1427
+ * This version of \p exclusive_scan_by_key uses the value \c init to
1428
+ * initialize the exclusive scan operation.
1429
+ *
1430
+ * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
1431
+ * to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
1432
+ * <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
1433
+ * <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
1434
+ *
1435
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1436
+ * addition of floating-point types). Results for pseudo-associative
1437
+ * operators may vary from run to run.
1438
+ *
1439
+ * \param first1 The beginning of the key sequence.
1440
+ * \param last1 The end of the key sequence.
1441
+ * \param first2 The beginning of the input value sequence.
1442
+ * \param result The beginning of the output value sequence.
1443
+ * \param init The initial of the exclusive sum value.
1444
+ * \param binary_pred The binary predicate used to determine equality of keys.
1445
+ * \return The end of the output sequence.
1446
+ *
1447
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1448
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1449
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1450
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1451
+ *
1452
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key
1453
+ *
1454
+ * \code
1455
+ * #include <thrust/scan.h>
1456
+ * #include <thrust/functional.h>
1457
+ *
1458
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1459
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1460
+ *
1461
+ * int init = 5;
1462
+ *
1463
+ * ::cuda::std::equal_to<int> binary_pred;
1464
+ *
1465
+ * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred); // in-place scan
1466
+ *
1467
+ * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
1468
+ * \endcode
1469
+ *
1470
+ * \see exclusive_scan
1471
+ * \see inclusive_scan_by_key
1472
+ *
1473
+ */
1474
+ template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T, typename BinaryPredicate>
1475
+ OutputIterator exclusive_scan_by_key(
1476
+ InputIterator1 first1,
1477
+ InputIterator1 last1,
1478
+ InputIterator2 first2,
1479
+ OutputIterator result,
1480
+ T init,
1481
+ BinaryPredicate binary_pred);
1482
+
1483
+ /*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
1484
+ * sum operation. The term 'exclusive' means that each result does not include
1485
+ * the corresponding input operand in the partial sum. The term 'segmented'
1486
+ * means that the partial sums are broken into distinct segments. In other
1487
+ * words, within each segment a separate exclusive scan operation is computed.
1488
+ * Refer to the code sample below for example usage.
1489
+ *
1490
+ * This version of \p exclusive_scan_by_key uses the value \c init to
1491
+ * initialize the exclusive scan operation.
1492
+ *
1493
+ * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
1494
+ * to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
1495
+ * <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
1496
+ * <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
1497
+ *
1498
+ * This version of \p exclusive_scan_by_key uses the associative operator
1499
+ * \c binary_op to perform the prefix sum. When the input and output sequences
1500
+ * are the same, the scan is performed in-place.
1501
+ *
1502
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1503
+ * addition of floating-point types). Results for pseudo-associative
1504
+ * operators may vary from run to run.
1505
+ *
1506
+ * The algorithm's execution is parallelized as determined by \p exec.
1507
+ *
1508
+ * \param exec The execution policy to use for parallelization.
1509
+ * \param first1 The beginning of the key sequence.
1510
+ * \param last1 The end of the key sequence.
1511
+ * \param first2 The beginning of the input value sequence.
1512
+ * \param result The beginning of the output value sequence.
1513
+ * \param init The initial of the exclusive sum value.
1514
+ * \param binary_pred The binary predicate used to determine equality of keys.
1515
+ * \param binary_op The associative operator used to 'sum' values.
1516
+ * \return The end of the output sequence.
1517
+ *
1518
+ * \tparam DerivedPolicy The name of the derived execution policy.
1519
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
1520
+ * Iterator</a> \tparam InputIterator2 is a model of <a
1521
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
1522
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
1523
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
1524
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam T is convertible to \c
1525
+ * OutputIterator's \c value_type. \tparam BinaryPredicate is a model of <a
1526
+ * href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
1527
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
1528
+ *
1529
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1530
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1531
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1532
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1533
+ *
1534
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
1535
+ * \p thrust::host execution policy for parallelization:
1536
+ *
1537
+ * \code
1538
+ * #include <thrust/scan.h>
1539
+ * #include <thrust/functional.h>
1540
+ * #include <thrust/execution_policy.h>
1541
+ * ...
1542
+ *
1543
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1544
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1545
+ *
1546
+ * int init = 5;
1547
+ *
1548
+ * ::cuda::std::equal_to<int> binary_pred;
1549
+ * ::cuda::std::plus<int> binary_op;
1550
+ *
1551
+ * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place
1552
+ * scan
1553
+ *
1554
+ * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
1555
+ * \endcode
1556
+ *
1557
+ * \see exclusive_scan
1558
+ * \see inclusive_scan_by_key
1559
+ *
1560
+ */
1561
+ template <typename DerivedPolicy,
1562
+ typename InputIterator1,
1563
+ typename InputIterator2,
1564
+ typename OutputIterator,
1565
+ typename T,
1566
+ typename BinaryPredicate,
1567
+ typename AssociativeOperator>
1568
+ _CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
1569
+ const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
1570
+ InputIterator1 first1,
1571
+ InputIterator1 last1,
1572
+ InputIterator2 first2,
1573
+ OutputIterator result,
1574
+ T init,
1575
+ BinaryPredicate binary_pred,
1576
+ AssociativeOperator binary_op);
1577
+
1578
+ /*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
1579
+ * sum operation. The term 'exclusive' means that each result does not include
1580
+ * the corresponding input operand in the partial sum. The term 'segmented'
1581
+ * means that the partial sums are broken into distinct segments. In other
1582
+ * words, within each segment a separate exclusive scan operation is computed.
1583
+ * Refer to the code sample below for example usage.
1584
+ *
1585
+ * This version of \p exclusive_scan_by_key uses the value \c init to
1586
+ * initialize the exclusive scan operation.
1587
+ *
1588
+ * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
1589
+ * to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
1590
+ * <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
1591
+ * <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
1592
+ *
1593
+ * This version of \p exclusive_scan_by_key uses the associative operator
1594
+ * \c binary_op to perform the prefix sum. When the input and output sequences
1595
+ * are the same, the scan is performed in-place.
1596
+ *
1597
+ * Results are not deterministic for pseudo-associative operators (e.g.,
1598
+ * addition of floating-point types). Results for pseudo-associative
1599
+ * operators may vary from run to run.
1600
+ *
1601
+ * \param first1 The beginning of the key sequence.
1602
+ * \param last1 The end of the key sequence.
1603
+ * \param first2 The beginning of the input value sequence.
1604
+ * \param result The beginning of the output value sequence.
1605
+ * \param init The initial of the exclusive sum value.
1606
+ * \param binary_pred The binary predicate used to determine equality of keys.
1607
+ * \param binary_op The associative operator used to 'sum' values.
1608
+ * \return The end of the output sequence.
1609
+ *
1610
+ * \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
1611
+ * Iterator</a> \tparam InputIterator2 is a model of <a
1612
+ * href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
1613
+ * value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
1614
+ * href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
1615
+ * objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam T is convertible to \c
1616
+ * OutputIterator's \c value_type. \tparam BinaryPredicate is a model of <a
1617
+ * href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
1618
+ * \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
1619
+ *
1620
+ * \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
1621
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1622
+ * \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
1623
+ * <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
1624
+ *
1625
+ * The following code snippet demonstrates how to use \p exclusive_scan_by_key
1626
+ *
1627
+ * \code
1628
+ * #include <thrust/scan.h>
1629
+ * #include <thrust/functional.h>
1630
+ *
1631
+ * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
1632
+ * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
1633
+ *
1634
+ * int init = 5;
1635
+ *
1636
+ * ::cuda::std::equal_to<int> binary_pred;
1637
+ * ::cuda::std::plus<int> binary_op;
1638
+ *
1639
+ * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan
1640
+ *
1641
+ * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
1642
+ * \endcode
1643
+ *
1644
+ * \see exclusive_scan
1645
+ * \see inclusive_scan_by_key
1646
+ *
1647
+ */
1648
+ template <typename InputIterator1,
1649
+ typename InputIterator2,
1650
+ typename OutputIterator,
1651
+ typename T,
1652
+ typename BinaryPredicate,
1653
+ typename AssociativeOperator>
1654
+ OutputIterator exclusive_scan_by_key(
1655
+ InputIterator1 first1,
1656
+ InputIterator1 last1,
1657
+ InputIterator2 first2,
1658
+ OutputIterator result,
1659
+ T init,
1660
+ BinaryPredicate binary_pred,
1661
+ AssociativeOperator binary_op);
1662
+
1663
+ /*! \} // end segmentedprefixsums
1664
+ */
1665
+
1666
+ /*! \} // end prefix sums
1667
+ */
1668
+
1669
+ THRUST_NAMESPACE_END
1670
+
1671
+ #include <thrust/detail/scan.inl>