cuda-cccl 0.1.3.1.0.dev1486__cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1819) hide show
  1. cuda/cccl/__init__.py +14 -0
  2. cuda/cccl/cooperative/__init__.py +3 -0
  3. cuda/cccl/cooperative/experimental/__init__.py +8 -0
  4. cuda/cccl/cooperative/experimental/_caching.py +48 -0
  5. cuda/cccl/cooperative/experimental/_common.py +276 -0
  6. cuda/cccl/cooperative/experimental/_nvrtc.py +91 -0
  7. cuda/cccl/cooperative/experimental/_scan_op.py +181 -0
  8. cuda/cccl/cooperative/experimental/_types.py +953 -0
  9. cuda/cccl/cooperative/experimental/_typing.py +107 -0
  10. cuda/cccl/cooperative/experimental/block/__init__.py +33 -0
  11. cuda/cccl/cooperative/experimental/block/_block_load_store.py +215 -0
  12. cuda/cccl/cooperative/experimental/block/_block_merge_sort.py +125 -0
  13. cuda/cccl/cooperative/experimental/block/_block_radix_sort.py +214 -0
  14. cuda/cccl/cooperative/experimental/block/_block_reduce.py +294 -0
  15. cuda/cccl/cooperative/experimental/block/_block_scan.py +983 -0
  16. cuda/cccl/cooperative/experimental/warp/__init__.py +9 -0
  17. cuda/cccl/cooperative/experimental/warp/_warp_merge_sort.py +98 -0
  18. cuda/cccl/cooperative/experimental/warp/_warp_reduce.py +153 -0
  19. cuda/cccl/cooperative/experimental/warp/_warp_scan.py +78 -0
  20. cuda/cccl/headers/__init__.py +7 -0
  21. cuda/cccl/headers/include/__init__.py +1 -0
  22. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +261 -0
  23. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1181 -0
  24. cuda/cccl/headers/include/cub/agent/agent_for.cuh +84 -0
  25. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +919 -0
  26. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +227 -0
  27. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +752 -0
  28. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +766 -0
  29. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
  30. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +704 -0
  31. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +557 -0
  32. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +678 -0
  33. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +804 -0
  34. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +997 -0
  35. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +561 -0
  36. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +473 -0
  37. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +292 -0
  38. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1032 -0
  39. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +342 -0
  40. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +592 -0
  41. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +614 -0
  42. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1346 -0
  43. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +965 -0
  44. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1217 -0
  45. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1306 -0
  46. cuda/cccl/headers/include/cub/block/block_histogram.cuh +420 -0
  47. cuda/cccl/headers/include/cub/block/block_load.cuh +1259 -0
  48. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +787 -0
  49. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1218 -0
  50. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2193 -0
  51. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  52. cuda/cccl/headers/include/cub/block/block_reduce.cuh +629 -0
  53. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +437 -0
  54. cuda/cccl/headers/include/cub/block/block_scan.cuh +2600 -0
  55. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  56. cuda/cccl/headers/include/cub/block/block_store.cuh +1246 -0
  57. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +620 -0
  58. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  59. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  60. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  61. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  62. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +259 -0
  63. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  64. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  65. cuda/cccl/headers/include/cub/config.cuh +60 -0
  66. cuda/cccl/headers/include/cub/cub.cuh +112 -0
  67. cuda/cccl/headers/include/cub/detail/array_utils.cuh +77 -0
  68. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +155 -0
  69. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +93 -0
  70. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  71. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +246 -0
  72. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +120 -0
  73. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +74 -0
  74. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +118 -0
  75. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  76. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  77. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +61 -0
  78. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  79. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +71 -0
  80. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +93 -0
  81. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  82. cuda/cccl/headers/include/cub/detail/rfa.cuh +724 -0
  83. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  84. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  85. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +355 -0
  86. cuda/cccl/headers/include/cub/detail/type_traits.cuh +206 -0
  87. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +72 -0
  88. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  89. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  90. cuda/cccl/headers/include/cub/device/device_copy.cuh +187 -0
  91. cuda/cccl/headers/include/cub/device/device_for.cuh +994 -0
  92. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1507 -0
  93. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  94. cuda/cccl/headers/include/cub/device/device_merge.cuh +202 -0
  95. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  96. cuda/cccl/headers/include/cub/device/device_partition.cuh +664 -0
  97. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3431 -0
  98. cuda/cccl/headers/include/cub/device/device_reduce.cuh +1387 -0
  99. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +368 -0
  100. cuda/cccl/headers/include/cub/device/device_scan.cuh +1901 -0
  101. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  102. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1512 -0
  103. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  104. cuda/cccl/headers/include/cub/device/device_select.cuh +1224 -0
  105. cuda/cccl/headers/include/cub/device/device_transform.cuh +313 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +314 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +109 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +718 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +45 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +197 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1051 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +305 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1748 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1316 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +625 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +502 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +548 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +497 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +598 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +1374 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +838 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +439 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +552 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +397 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +543 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +218 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +338 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +799 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +523 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +194 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +330 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +437 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +70 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +121 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +63 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +278 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +91 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +118 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1068 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +397 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +945 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +675 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +555 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1013 -0
  149. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +249 -0
  150. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1587 -0
  151. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +407 -0
  152. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +283 -0
  153. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +874 -0
  154. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +215 -0
  155. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  156. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  157. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +256 -0
  158. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +238 -0
  159. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +252 -0
  160. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +322 -0
  161. cuda/cccl/headers/include/cub/thread/thread_load.cuh +347 -0
  162. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +629 -0
  163. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +504 -0
  164. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +340 -0
  165. cuda/cccl/headers/include/cub/thread/thread_search.cuh +198 -0
  166. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +406 -0
  167. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +101 -0
  168. cuda/cccl/headers/include/cub/thread/thread_store.cuh +364 -0
  169. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  170. cuda/cccl/headers/include/cub/util_arch.cuh +163 -0
  171. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  172. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  173. cuda/cccl/headers/include/cub/util_device.cuh +779 -0
  174. cuda/cccl/headers/include/cub/util_macro.cuh +91 -0
  175. cuda/cccl/headers/include/cub/util_math.cuh +115 -0
  176. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  177. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  178. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  179. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  180. cuda/cccl/headers/include/cub/util_type.cuh +1111 -0
  181. cuda/cccl/headers/include/cub/util_vsmem.cuh +251 -0
  182. cuda/cccl/headers/include/cub/version.cuh +89 -0
  183. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  184. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  185. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +729 -0
  186. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +405 -0
  187. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +688 -0
  188. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +437 -0
  189. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  190. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  191. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  192. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +822 -0
  193. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1156 -0
  194. cuda/cccl/headers/include/cub/warp/warp_store.cuh +520 -0
  195. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +169 -0
  196. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  197. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +210 -0
  198. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  199. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +84 -0
  200. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +127 -0
  201. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +209 -0
  202. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  203. cuda/cccl/headers/include/cuda/__barrier/aligned_size.h +61 -0
  204. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  205. cuda/cccl/headers/include/cuda/__barrier/barrier.h +66 -0
  206. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +100 -0
  207. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +454 -0
  208. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +72 -0
  209. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  210. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +61 -0
  211. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  212. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  213. cuda/cccl/headers/include/cuda/__bit/bitmask.h +88 -0
  214. cuda/cccl/headers/include/cuda/__cccl_config +36 -0
  215. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +126 -0
  216. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  217. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  218. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  219. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  220. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  221. cuda/cccl/headers/include/cuda/__cmath/round_down.h +104 -0
  222. cuda/cccl/headers/include/cuda/__cmath/round_up.h +106 -0
  223. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  224. cuda/cccl/headers/include/cuda/__execution/determinism.h +90 -0
  225. cuda/cccl/headers/include/cuda/__execution/require.h +67 -0
  226. cuda/cccl/headers/include/cuda/__execution/tune.h +62 -0
  227. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  228. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +279 -0
  229. cuda/cccl/headers/include/cuda/__functional/get_device_address.h +58 -0
  230. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  231. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  232. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
  233. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  234. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  235. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  236. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  237. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +261 -0
  238. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +407 -0
  239. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +314 -0
  240. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +323 -0
  241. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +481 -0
  242. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  243. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +457 -0
  244. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +63 -0
  245. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +123 -0
  246. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +51 -0
  247. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  248. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  249. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  250. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  251. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +98 -0
  252. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +162 -0
  253. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +49 -0
  254. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  255. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  256. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +99 -0
  257. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  258. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  259. cuda/cccl/headers/include/cuda/__memory/address_space.h +86 -0
  260. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +94 -0
  261. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +158 -0
  262. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +73 -0
  263. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +129 -0
  264. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +653 -0
  265. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  266. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +57 -0
  267. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  268. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +101 -0
  269. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2982 -0
  270. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  271. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  272. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  273. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  274. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  275. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  276. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  277. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  278. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  279. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  280. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  281. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  282. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  283. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  284. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  285. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  286. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  287. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +240 -0
  288. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +193 -0
  289. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  290. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +52 -0
  291. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +957 -0
  292. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +288 -0
  293. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +596 -0
  294. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  295. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  296. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  297. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1445 -0
  298. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  299. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +117 -0
  300. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  301. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  302. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  303. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  304. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  305. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  306. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  307. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +62 -0
  308. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +101 -0
  309. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +62 -0
  310. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  311. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  312. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +15074 -0
  313. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +385 -0
  314. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +176 -0
  315. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +94 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +137 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +138 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +280 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +282 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2148 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1272 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +228 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +430 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1830 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +105 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +81 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +612 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +44 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4446 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4061 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +6438 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +36 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +4582 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +44 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +67 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +750 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +275 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  376. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  377. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +151 -0
  378. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  379. cuda/cccl/headers/include/cuda/__stream/get_stream.h +97 -0
  380. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +165 -0
  381. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  382. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  383. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +66 -0
  384. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +249 -0
  385. cuda/cccl/headers/include/cuda/access_property +26 -0
  386. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  387. cuda/cccl/headers/include/cuda/atomic +27 -0
  388. cuda/cccl/headers/include/cuda/barrier +262 -0
  389. cuda/cccl/headers/include/cuda/bit +29 -0
  390. cuda/cccl/headers/include/cuda/cmath +35 -0
  391. cuda/cccl/headers/include/cuda/discard_memory +61 -0
  392. cuda/cccl/headers/include/cuda/functional +31 -0
  393. cuda/cccl/headers/include/cuda/iterator +31 -0
  394. cuda/cccl/headers/include/cuda/latch +27 -0
  395. cuda/cccl/headers/include/cuda/mdspan +28 -0
  396. cuda/cccl/headers/include/cuda/memory +28 -0
  397. cuda/cccl/headers/include/cuda/memory_resource +41 -0
  398. cuda/cccl/headers/include/cuda/numeric +28 -0
  399. cuda/cccl/headers/include/cuda/pipeline +579 -0
  400. cuda/cccl/headers/include/cuda/ptx +118 -0
  401. cuda/cccl/headers/include/cuda/semaphore +31 -0
  402. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +60 -0
  403. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +46 -0
  404. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +46 -0
  405. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +52 -0
  406. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  407. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +64 -0
  408. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  409. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +143 -0
  410. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +79 -0
  411. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  412. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +74 -0
  413. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  414. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  415. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +129 -0
  416. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  417. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  418. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  419. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +64 -0
  420. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  421. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  422. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  423. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  424. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  425. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  426. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  427. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  428. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  429. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +92 -0
  430. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +51 -0
  431. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  432. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +58 -0
  433. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  434. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +50 -0
  435. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +69 -0
  436. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  437. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +188 -0
  438. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  439. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +83 -0
  440. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +72 -0
  441. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +96 -0
  442. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  443. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +70 -0
  444. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  445. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  446. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +88 -0
  447. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +71 -0
  448. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +141 -0
  449. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  450. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +88 -0
  451. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  452. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +89 -0
  453. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +46 -0
  454. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  455. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  456. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +121 -0
  457. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  458. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  459. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +95 -0
  460. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +89 -0
  461. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +103 -0
  462. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  463. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +99 -0
  464. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +69 -0
  465. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  466. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  467. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  468. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  469. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  470. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  471. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  472. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  473. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  474. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  475. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +264 -0
  476. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  477. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  478. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  479. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  480. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +123 -0
  481. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +135 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +129 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +72 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +77 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +156 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +96 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +127 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  495. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  496. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +138 -0
  497. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  498. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  499. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +218 -0
  500. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  501. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  502. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  503. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  504. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  505. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  506. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  507. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  508. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  509. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +250 -0
  510. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +105 -0
  511. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  512. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +73 -0
  513. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  514. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  515. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  516. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  517. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  518. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  519. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +84 -0
  520. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +77 -0
  521. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +183 -0
  522. cuda/cccl/headers/include/cuda/std/__bit/countl.h +167 -0
  523. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  524. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  525. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  526. cuda/cccl/headers/include/cuda/std/__bit/integral.h +124 -0
  527. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  528. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1274 -0
  529. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  530. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  531. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +146 -0
  532. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +207 -0
  533. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +1343 -0
  534. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +216 -0
  535. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +43 -0
  536. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +53 -0
  537. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +69 -0
  538. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +129 -0
  539. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +124 -0
  540. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +326 -0
  541. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +35 -0
  542. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +68 -0
  543. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +129 -0
  544. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  545. cuda/cccl/headers/include/cuda/std/__cccl/os.h +48 -0
  546. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1234 -0
  547. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +267 -0
  548. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +176 -0
  549. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  550. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  551. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  552. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  553. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  554. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +112 -0
  555. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  556. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  557. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  558. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  559. cuda/cccl/headers/include/cuda/std/__charconv_ +30 -0
  560. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +240 -0
  561. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +187 -0
  562. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +620 -0
  563. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +207 -0
  564. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +181 -0
  565. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +250 -0
  566. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +213 -0
  567. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +250 -0
  568. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +323 -0
  569. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +163 -0
  570. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +201 -0
  571. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +176 -0
  572. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +129 -0
  573. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +106 -0
  574. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +503 -0
  575. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +236 -0
  576. cuda/cccl/headers/include/cuda/std/__cmath/nvbf16.h +58 -0
  577. cuda/cccl/headers/include/cuda/std/__cmath/nvfp16.h +58 -0
  578. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +180 -0
  579. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +877 -0
  580. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +155 -0
  581. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +170 -0
  582. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +292 -0
  583. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +351 -0
  584. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +350 -0
  585. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +135 -0
  586. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  587. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  588. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  589. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +46 -0
  590. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  591. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  592. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +274 -0
  593. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +107 -0
  594. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +71 -0
  595. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  596. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +57 -0
  597. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  598. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  599. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  600. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  601. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  602. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  603. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  604. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  605. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +42 -0
  606. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  607. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  608. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  609. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  610. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  611. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  612. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  613. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  614. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +69 -0
  615. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  616. cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +72 -0
  617. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +143 -0
  618. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  619. cuda/cccl/headers/include/cuda/std/__execution/env.h +436 -0
  620. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  621. cuda/cccl/headers/include/cuda/std/__expected/expected.h +2002 -0
  622. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1078 -0
  623. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  624. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +178 -0
  625. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  626. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +809 -0
  627. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  628. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  629. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +172 -0
  630. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +103 -0
  631. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  632. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +39 -0
  633. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +64 -0
  634. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  635. cuda/cccl/headers/include/cuda/std/__floating_point/nvfp_types.h +58 -0
  636. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  637. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  638. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  639. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  640. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  641. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  642. cuda/cccl/headers/include/cuda/std/__functional/bind.h +352 -0
  643. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +88 -0
  644. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  645. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +75 -0
  646. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +75 -0
  647. cuda/cccl/headers/include/cuda/std/__functional/compose.h +69 -0
  648. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  649. cuda/cccl/headers/include/cuda/std/__functional/function.h +1277 -0
  650. cuda/cccl/headers/include/cuda/std/__functional/hash.h +650 -0
  651. cuda/cccl/headers/include/cuda/std/__functional/identity.h +61 -0
  652. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +560 -0
  653. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +43 -0
  654. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +67 -0
  655. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +214 -0
  656. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +121 -0
  657. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  658. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +127 -0
  659. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +65 -0
  660. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +64 -0
  661. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  662. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  663. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  664. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +67 -0
  665. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  666. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +278 -0
  667. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +35 -0
  668. cuda/cccl/headers/include/cuda/std/__fwd/array.h +36 -0
  669. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +49 -0
  670. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +34 -0
  671. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  672. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  673. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  674. cuda/cccl/headers/include/cuda/std/__fwd/iterator_traits.h +40 -0
  675. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +73 -0
  676. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  677. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +34 -0
  678. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  679. cuda/cccl/headers/include/cuda/std/__fwd/span.h +38 -0
  680. cuda/cccl/headers/include/cuda/std/__fwd/string.h +83 -0
  681. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +59 -0
  682. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  683. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  684. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  685. cuda/cccl/headers/include/cuda/std/__internal/features.h +71 -0
  686. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +102 -0
  687. cuda/cccl/headers/include/cuda/std/__iterator/access.h +132 -0
  688. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +230 -0
  689. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +103 -0
  690. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +264 -0
  691. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +608 -0
  692. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +469 -0
  693. cuda/cccl/headers/include/cuda/std/__iterator/data.h +63 -0
  694. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  695. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  696. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +54 -0
  697. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  698. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +98 -0
  699. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +152 -0
  700. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  701. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +105 -0
  702. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +141 -0
  703. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  704. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  705. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  706. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  707. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +935 -0
  708. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  709. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +401 -0
  710. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  711. cuda/cccl/headers/include/cuda/std/__iterator/next.h +102 -0
  712. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +99 -0
  713. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +101 -0
  714. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  715. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +92 -0
  716. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  717. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +185 -0
  718. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +146 -0
  719. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +615 -0
  720. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  721. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  722. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +88 -0
  723. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +259 -0
  724. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  725. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  726. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +781 -0
  727. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  728. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +55 -0
  729. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +140 -0
  730. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +134 -0
  731. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +328 -0
  732. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +100 -0
  733. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
  734. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +74 -0
  735. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +363 -0
  736. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +765 -0
  737. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +317 -0
  738. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +310 -0
  739. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +615 -0
  740. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
  741. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  742. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +190 -0
  743. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +347 -0
  744. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +64 -0
  745. cuda/cccl/headers/include/cuda/std/__memory/align.h +87 -0
  746. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  747. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  748. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  749. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  750. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  751. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +569 -0
  752. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  753. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  754. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +231 -0
  755. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +248 -0
  756. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  757. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +43 -0
  758. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +260 -0
  759. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  760. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +686 -0
  761. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +771 -0
  762. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +55 -0
  763. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  764. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  765. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  766. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  767. cuda/cccl/headers/include/cuda/std/__new/launder.h +49 -0
  768. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  769. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +57 -0
  770. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  771. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  772. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +80 -0
  773. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  774. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  775. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  776. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +100 -0
  777. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +70 -0
  778. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +61 -0
  779. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  780. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  781. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  782. cuda/cccl/headers/include/cuda/std/__ranges/access.h +304 -0
  783. cuda/cccl/headers/include/cuda/std/__ranges/all.h +97 -0
  784. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +313 -0
  785. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  786. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  787. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  788. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +111 -0
  789. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  790. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  791. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +77 -0
  792. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  793. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +271 -0
  794. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  795. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +161 -0
  796. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +114 -0
  797. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  798. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  799. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  800. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +343 -0
  801. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +156 -0
  802. cuda/cccl/headers/include/cuda/std/__ranges/size.h +200 -0
  803. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  804. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +263 -0
  805. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +531 -0
  806. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  807. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +181 -0
  808. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  809. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +233 -0
  810. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  811. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  812. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +591 -0
  813. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +299 -0
  814. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  815. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  816. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  817. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  818. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +105 -0
  819. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  820. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  821. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +144 -0
  822. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  823. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  824. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +98 -0
  825. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +236 -0
  826. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +216 -0
  827. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  828. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  829. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +90 -0
  830. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +73 -0
  831. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  832. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  833. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +242 -0
  834. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  835. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  836. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  837. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  838. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  839. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  840. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  841. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  842. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  843. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  844. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +69 -0
  845. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  846. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +174 -0
  847. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  848. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  849. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  850. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  851. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  852. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  853. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  854. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  855. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  856. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  857. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +47 -0
  858. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +51 -0
  859. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  860. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +40 -0
  861. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +44 -0
  862. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  863. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  864. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  865. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  866. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +83 -0
  867. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  868. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  869. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  870. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +68 -0
  871. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +54 -0
  872. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  873. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  874. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  875. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +214 -0
  876. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  877. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  878. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  879. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +43 -0
  880. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  881. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  882. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +73 -0
  883. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +68 -0
  884. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  885. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +81 -0
  886. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +56 -0
  887. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  888. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  889. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  890. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  891. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  892. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  893. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  894. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +59 -0
  895. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  896. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  897. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  898. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  899. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  900. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  901. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  902. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  903. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  904. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  905. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  906. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +79 -0
  907. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  908. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  909. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  910. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  911. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  912. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +62 -0
  913. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  914. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +87 -0
  915. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +43 -0
  916. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +63 -0
  917. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +119 -0
  918. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  919. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  920. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  921. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +84 -0
  922. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  923. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  924. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  925. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  926. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +57 -0
  927. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +203 -0
  928. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +56 -0
  929. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +70 -0
  930. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +82 -0
  931. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +60 -0
  932. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +61 -0
  933. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +56 -0
  934. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +55 -0
  935. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +73 -0
  936. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +60 -0
  937. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +58 -0
  938. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  939. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +57 -0
  940. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  941. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  942. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  943. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  944. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  945. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  946. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  947. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  948. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  949. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  950. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  951. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  952. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  953. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +123 -0
  954. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  955. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  956. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  957. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  958. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  959. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  960. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  961. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  962. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  963. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  964. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  965. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  966. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  967. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  968. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  969. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1069 -0
  970. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +132 -0
  971. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +66 -0
  972. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  973. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  974. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +32 -0
  975. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  976. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +103 -0
  977. cuda/cccl/headers/include/cuda/std/__utility/declval.h +63 -0
  978. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +162 -0
  979. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  980. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  981. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +56 -0
  982. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +77 -0
  983. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  984. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  985. cuda/cccl/headers/include/cuda/std/__utility/move.h +75 -0
  986. cuda/cccl/headers/include/cuda/std/__utility/pair.h +808 -0
  987. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  988. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +763 -0
  989. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  990. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  991. cuda/cccl/headers/include/cuda/std/__utility/swap.h +65 -0
  992. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  993. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +425 -0
  994. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  995. cuda/cccl/headers/include/cuda/std/array +527 -0
  996. cuda/cccl/headers/include/cuda/std/atomic +823 -0
  997. cuda/cccl/headers/include/cuda/std/barrier +43 -0
  998. cuda/cccl/headers/include/cuda/std/bit +35 -0
  999. cuda/cccl/headers/include/cuda/std/bitset +1026 -0
  1000. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1001. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1002. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1003. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1004. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1005. cuda/cccl/headers/include/cuda/std/cmath +25 -0
  1006. cuda/cccl/headers/include/cuda/std/complex +25 -0
  1007. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1008. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1009. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1010. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1011. cuda/cccl/headers/include/cuda/std/cstring +111 -0
  1012. cuda/cccl/headers/include/cuda/std/ctime +147 -0
  1013. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1014. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +258 -0
  1015. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +2692 -0
  1016. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +3689 -0
  1017. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/cmath +685 -0
  1018. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/complex +1610 -0
  1019. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1020. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/optional +1786 -0
  1021. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1022. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1378 -0
  1023. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2160 -0
  1024. cuda/cccl/headers/include/cuda/std/execution +27 -0
  1025. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1026. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1027. cuda/cccl/headers/include/cuda/std/initializer_list +36 -0
  1028. cuda/cccl/headers/include/cuda/std/inplace_vector +2171 -0
  1029. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1030. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1031. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1032. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1033. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1034. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1035. cuda/cccl/headers/include/cuda/std/numbers +335 -0
  1036. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1037. cuda/cccl/headers/include/cuda/std/optional +25 -0
  1038. cuda/cccl/headers/include/cuda/std/ranges +68 -0
  1039. cuda/cccl/headers/include/cuda/std/ratio +417 -0
  1040. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1041. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1042. cuda/cccl/headers/include/cuda/std/span +640 -0
  1043. cuda/cccl/headers/include/cuda/std/string_view +814 -0
  1044. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1045. cuda/cccl/headers/include/cuda/std/type_traits +176 -0
  1046. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1047. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1048. cuda/cccl/headers/include/cuda/std/version +245 -0
  1049. cuda/cccl/headers/include/cuda/stream_ref +54 -0
  1050. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1051. cuda/cccl/headers/include/cuda/version +16 -0
  1052. cuda/cccl/headers/include/cuda/warp +28 -0
  1053. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1054. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1055. cuda/cccl/headers/include/nv/detail/__target_macros +599 -0
  1056. cuda/cccl/headers/include/nv/target +229 -0
  1057. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1058. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1059. cuda/cccl/headers/include/thrust/advance.h +59 -0
  1060. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1061. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1062. cuda/cccl/headers/include/thrust/complex.h +859 -0
  1063. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1064. cuda/cccl/headers/include/thrust/count.h +245 -0
  1065. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1066. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +37 -0
  1067. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1068. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +350 -0
  1069. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +371 -0
  1070. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +45 -0
  1071. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +242 -0
  1072. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +39 -0
  1073. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +137 -0
  1074. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +39 -0
  1075. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +99 -0
  1076. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +53 -0
  1077. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +68 -0
  1078. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1079. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +102 -0
  1080. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +86 -0
  1081. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +79 -0
  1082. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +81 -0
  1083. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +39 -0
  1084. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +98 -0
  1085. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1086. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1087. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1088. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1089. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1090. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1091. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1092. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1093. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1094. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1095. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1096. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1097. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1098. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1099. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1100. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1101. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1102. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1103. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1104. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1105. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1106. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1107. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1108. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1109. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1110. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1111. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1112. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1113. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1114. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1115. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1116. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1117. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1118. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1119. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1120. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1121. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1122. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1123. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1124. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1125. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1126. cuda/cccl/headers/include/thrust/detail/device_delete.inl +52 -0
  1127. cuda/cccl/headers/include/thrust/detail/device_free.inl +47 -0
  1128. cuda/cccl/headers/include/thrust/detail/device_malloc.inl +60 -0
  1129. cuda/cccl/headers/include/thrust/detail/device_new.inl +61 -0
  1130. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1131. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1132. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1133. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +80 -0
  1134. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1135. cuda/cccl/headers/include/thrust/detail/execution_policy.h +80 -0
  1136. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1137. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1138. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1139. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1140. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1141. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1142. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1143. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1144. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1145. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1146. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1147. cuda/cccl/headers/include/thrust/detail/integer_math.h +130 -0
  1148. cuda/cccl/headers/include/thrust/detail/internal_functional.h +285 -0
  1149. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1150. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +92 -0
  1151. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1152. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1153. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +40 -0
  1154. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1155. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1156. cuda/cccl/headers/include/thrust/detail/mpl/math.h +164 -0
  1157. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +37 -0
  1158. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1159. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1160. cuda/cccl/headers/include/thrust/detail/pointer.h +217 -0
  1161. cuda/cccl/headers/include/thrust/detail/pointer.inl +172 -0
  1162. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1163. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1164. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1165. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1166. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1167. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +189 -0
  1168. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1169. cuda/cccl/headers/include/thrust/detail/reference.h +500 -0
  1170. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1171. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1172. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1173. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1174. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1175. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1176. cuda/cccl/headers/include/thrust/detail/seq.h +54 -0
  1177. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1178. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1179. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1180. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1181. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1182. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1183. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1184. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1185. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1186. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +138 -0
  1187. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1188. cuda/cccl/headers/include/thrust/detail/transform.inl +250 -0
  1189. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1190. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1191. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +131 -0
  1192. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1193. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1194. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1195. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1196. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1197. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1198. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1199. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +60 -0
  1200. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_discard_iterator.h +44 -0
  1201. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1202. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1203. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1204. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1205. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1206. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1207. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1208. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1209. cuda/cccl/headers/include/thrust/detail/util/align.h +59 -0
  1210. cuda/cccl/headers/include/thrust/detail/vector_base.h +630 -0
  1211. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1242 -0
  1212. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1213. cuda/cccl/headers/include/thrust/device_delete.h +59 -0
  1214. cuda/cccl/headers/include/thrust/device_free.h +72 -0
  1215. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1216. cuda/cccl/headers/include/thrust/device_malloc.h +108 -0
  1217. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1218. cuda/cccl/headers/include/thrust/device_new.h +91 -0
  1219. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1220. cuda/cccl/headers/include/thrust/device_ptr.h +202 -0
  1221. cuda/cccl/headers/include/thrust/device_reference.h +986 -0
  1222. cuda/cccl/headers/include/thrust/device_vector.h +574 -0
  1223. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1224. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1225. cuda/cccl/headers/include/thrust/execution_policy.h +384 -0
  1226. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1227. cuda/cccl/headers/include/thrust/fill.h +201 -0
  1228. cuda/cccl/headers/include/thrust/find.h +382 -0
  1229. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1230. cuda/cccl/headers/include/thrust/functional.h +396 -0
  1231. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1232. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1233. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1234. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1235. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +219 -0
  1236. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1237. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1238. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1239. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1240. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1241. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1242. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +51 -0
  1243. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +62 -0
  1244. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1245. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +199 -0
  1246. cuda/cccl/headers/include/thrust/iterator/detail/iterator_traversal_tags.h +50 -0
  1247. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +53 -0
  1248. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1249. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1250. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1251. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1252. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +164 -0
  1253. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1254. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +215 -0
  1255. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +660 -0
  1256. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +245 -0
  1257. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +192 -0
  1258. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1259. cuda/cccl/headers/include/thrust/iterator/retag.h +74 -0
  1260. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +221 -0
  1261. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +184 -0
  1262. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1263. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +149 -0
  1264. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1265. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1266. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1267. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +357 -0
  1268. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1269. cuda/cccl/headers/include/thrust/memory.h +395 -0
  1270. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1271. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1272. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1273. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1274. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
  1275. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1276. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +68 -0
  1277. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1278. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1279. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1280. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1281. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1282. cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
  1283. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1284. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1285. cuda/cccl/headers/include/thrust/mr/tls_pool.h +65 -0
  1286. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1287. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1288. cuda/cccl/headers/include/thrust/pair.h +102 -0
  1289. cuda/cccl/headers/include/thrust/partition.h +1383 -0
  1290. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1291. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1292. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1293. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1294. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1295. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1296. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1297. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1298. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1299. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1300. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1301. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1302. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +198 -0
  1303. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1304. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +217 -0
  1305. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1306. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1307. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1308. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1309. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1310. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1311. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1312. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1313. cuda/cccl/headers/include/thrust/random.h +120 -0
  1314. cuda/cccl/headers/include/thrust/reduce.h +1112 -0
  1315. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1316. cuda/cccl/headers/include/thrust/replace.h +827 -0
  1317. cuda/cccl/headers/include/thrust/reverse.h +213 -0
  1318. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1319. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1320. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1321. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1322. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1323. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1324. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1325. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1326. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1327. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1328. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1329. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1330. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1331. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1332. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +90 -0
  1333. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1334. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1335. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1336. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1337. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1338. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1339. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1340. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1341. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1342. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1343. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1344. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1345. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1346. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1347. cuda/cccl/headers/include/thrust/system/cpp/detail/par.h +62 -0
  1348. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1349. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1350. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1351. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1352. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1353. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1354. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1355. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1356. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1357. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1358. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1359. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1360. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1361. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1362. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1363. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1364. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1365. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1366. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1367. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1368. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1369. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1370. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1371. cuda/cccl/headers/include/thrust/system/cpp/detail/vector.inl +130 -0
  1372. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +161 -0
  1373. cuda/cccl/headers/include/thrust/system/cpp/memory.h +109 -0
  1374. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +75 -0
  1375. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +119 -0
  1376. cuda/cccl/headers/include/thrust/system/cpp/vector.h +99 -0
  1377. cuda/cccl/headers/include/thrust/system/cuda/config.h +123 -0
  1378. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1379. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1380. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1381. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1382. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +129 -0
  1383. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1384. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1385. cuda/cccl/headers/include/thrust/system/cuda/detail/core/load_iterator.h +58 -0
  1386. cuda/cccl/headers/include/thrust/system/cuda/detail/core/make_load_iterator.h +60 -0
  1387. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1388. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +630 -0
  1389. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1390. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1391. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +210 -0
  1392. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1393. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1394. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +113 -0
  1395. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1396. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +82 -0
  1397. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +272 -0
  1398. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1399. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1400. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +85 -0
  1401. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1402. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1403. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_cross_system.h +204 -0
  1404. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_device_to_device.h +98 -0
  1405. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +69 -0
  1406. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1407. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1408. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1409. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1410. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1411. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +217 -0
  1412. cuda/cccl/headers/include/thrust/system/cuda/detail/par.h +237 -0
  1413. cuda/cccl/headers/include/thrust/system/cuda/detail/par_to_seq.h +95 -0
  1414. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1415. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1416. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1417. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +961 -0
  1418. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1000 -0
  1419. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1420. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +164 -0
  1421. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +88 -0
  1422. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +342 -0
  1423. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +415 -0
  1424. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +79 -0
  1425. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1426. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1736 -0
  1427. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +482 -0
  1428. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +75 -0
  1429. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +75 -0
  1430. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1431. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1432. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +403 -0
  1433. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1434. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1435. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +94 -0
  1436. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +91 -0
  1437. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +648 -0
  1438. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +311 -0
  1439. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +251 -0
  1440. cuda/cccl/headers/include/thrust/system/cuda/error.h +175 -0
  1441. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +39 -0
  1442. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1443. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1444. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +140 -0
  1445. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1446. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1447. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1448. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1449. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1450. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1451. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1452. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1453. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1454. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1455. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1456. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1457. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1458. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1459. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1460. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1461. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1462. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1463. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1464. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1465. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1466. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1467. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +48 -0
  1468. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1469. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1470. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1471. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1472. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1473. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1474. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1475. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1476. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1477. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1478. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1479. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1480. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1481. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1482. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1483. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1484. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1485. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1486. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1487. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1488. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1489. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +64 -0
  1490. cuda/cccl/headers/include/thrust/system/detail/errno.h +125 -0
  1491. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1492. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1493. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1494. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +59 -0
  1495. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +85 -0
  1496. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +167 -0
  1497. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +391 -0
  1498. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +51 -0
  1499. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +70 -0
  1500. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +64 -0
  1501. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +152 -0
  1502. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +54 -0
  1503. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +90 -0
  1504. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +55 -0
  1505. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +66 -0
  1506. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +72 -0
  1507. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +258 -0
  1508. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +60 -0
  1509. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +55 -0
  1510. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +143 -0
  1511. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +64 -0
  1512. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +79 -0
  1513. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +102 -0
  1514. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +51 -0
  1515. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1516. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +66 -0
  1517. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +78 -0
  1518. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +65 -0
  1519. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +70 -0
  1520. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +83 -0
  1521. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +105 -0
  1522. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +154 -0
  1523. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +55 -0
  1524. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +74 -0
  1525. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +135 -0
  1526. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +213 -0
  1527. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +49 -0
  1528. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +77 -0
  1529. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +106 -0
  1530. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +89 -0
  1531. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +192 -0
  1532. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +92 -0
  1533. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +127 -0
  1534. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +101 -0
  1535. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +181 -0
  1536. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +54 -0
  1537. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +72 -0
  1538. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +78 -0
  1539. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +141 -0
  1540. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +78 -0
  1541. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +91 -0
  1542. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +132 -0
  1543. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +238 -0
  1544. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +79 -0
  1545. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +91 -0
  1546. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +96 -0
  1547. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +55 -0
  1548. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.inl +95 -0
  1549. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +288 -0
  1550. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +482 -0
  1551. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +60 -0
  1552. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +131 -0
  1553. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +119 -0
  1554. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +181 -0
  1555. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +50 -0
  1556. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +82 -0
  1557. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +47 -0
  1558. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +60 -0
  1559. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +53 -0
  1560. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +60 -0
  1561. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +88 -0
  1562. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +109 -0
  1563. cuda/cccl/headers/include/thrust/system/detail/generic/transform.inl +185 -0
  1564. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +56 -0
  1565. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +62 -0
  1566. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +86 -0
  1567. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +119 -0
  1568. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +51 -0
  1569. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +172 -0
  1570. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +51 -0
  1571. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +121 -0
  1572. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +77 -0
  1573. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +119 -0
  1574. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +87 -0
  1575. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +132 -0
  1576. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +123 -0
  1577. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +76 -0
  1578. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +48 -0
  1579. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +142 -0
  1580. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +55 -0
  1581. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +125 -0
  1582. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +55 -0
  1583. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +77 -0
  1584. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +78 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +116 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +68 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +80 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +129 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +49 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +147 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +51 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +56 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +81 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +151 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +309 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +70 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +104 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +185 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +160 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +151 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +212 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +65 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +187 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +61 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +362 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +54 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +130 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +54 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +592 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +64 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +121 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +112 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1635. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1636. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1637. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1638. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1639. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1640. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1641. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1642. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1643. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1644. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1645. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1646. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1647. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +113 -0
  1648. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1649. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1650. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1651. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1652. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1653. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1654. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1655. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1656. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1657. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1658. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1659. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1660. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1661. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1662. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1663. cuda/cccl/headers/include/thrust/system/omp/detail/par.h +62 -0
  1664. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1665. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1666. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1667. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1668. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1669. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1670. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1671. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1672. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1673. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1674. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1675. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1676. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1677. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1678. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +30 -0
  1679. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +30 -0
  1680. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1681. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1682. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1683. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1684. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +259 -0
  1685. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1686. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1687. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1688. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1689. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1690. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1691. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1692. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1693. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1694. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1695. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1696. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1697. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +160 -0
  1698. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1699. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1700. cuda/cccl/headers/include/thrust/system/omp/pointer.h +120 -0
  1701. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1702. cuda/cccl/headers/include/thrust/system/system_error.h +184 -0
  1703. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1704. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1705. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1706. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1707. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1708. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1709. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1710. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1711. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1712. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +92 -0
  1713. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1714. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1715. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1716. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1717. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1718. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1719. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1720. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1721. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1722. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1723. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1724. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1725. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1726. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1727. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1728. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1729. cuda/cccl/headers/include/thrust/system/tbb/detail/par.h +62 -0
  1730. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1731. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1732. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1733. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1734. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1735. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1736. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1737. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1738. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1739. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1740. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1741. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1742. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1743. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1744. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +30 -0
  1745. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1746. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1747. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1748. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1749. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1750. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1751. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1752. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1753. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1754. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1755. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1756. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1757. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1758. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1759. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1760. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1761. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1762. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +160 -0
  1763. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1764. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1765. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +120 -0
  1766. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1767. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1768. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1769. cuda/cccl/headers/include/thrust/transform.h +903 -0
  1770. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1771. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1772. cuda/cccl/headers/include/thrust/tuple.h +142 -0
  1773. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1774. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +182 -0
  1775. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1776. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1777. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1778. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +306 -0
  1779. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1780. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +93 -0
  1781. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1782. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1783. cuda/cccl/headers/include/thrust/unique.h +1090 -0
  1784. cuda/cccl/headers/include/thrust/universal_allocator.h +90 -0
  1785. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1786. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1787. cuda/cccl/headers/include/thrust/version.h +93 -0
  1788. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1789. cuda/cccl/headers/include_paths.py +72 -0
  1790. cuda/cccl/parallel/__init__.py +3 -0
  1791. cuda/cccl/parallel/experimental/__init__.py +3 -0
  1792. cuda/cccl/parallel/experimental/_bindings.py +24 -0
  1793. cuda/cccl/parallel/experimental/_bindings.pyi +388 -0
  1794. cuda/cccl/parallel/experimental/_bindings_impl.cpython-311-x86_64-linux-gnu.so +0 -0
  1795. cuda/cccl/parallel/experimental/_bindings_impl.pyx +2158 -0
  1796. cuda/cccl/parallel/experimental/_caching.py +71 -0
  1797. cuda/cccl/parallel/experimental/_cccl_interop.py +371 -0
  1798. cuda/cccl/parallel/experimental/_utils/__init__.py +0 -0
  1799. cuda/cccl/parallel/experimental/_utils/protocols.py +132 -0
  1800. cuda/cccl/parallel/experimental/algorithms/__init__.py +28 -0
  1801. cuda/cccl/parallel/experimental/algorithms/_merge_sort.py +172 -0
  1802. cuda/cccl/parallel/experimental/algorithms/_radix_sort.py +244 -0
  1803. cuda/cccl/parallel/experimental/algorithms/_reduce.py +136 -0
  1804. cuda/cccl/parallel/experimental/algorithms/_scan.py +179 -0
  1805. cuda/cccl/parallel/experimental/algorithms/_segmented_reduce.py +183 -0
  1806. cuda/cccl/parallel/experimental/algorithms/_transform.py +213 -0
  1807. cuda/cccl/parallel/experimental/algorithms/_unique_by_key.py +179 -0
  1808. cuda/cccl/parallel/experimental/cccl/.gitkeep +0 -0
  1809. cuda/cccl/parallel/experimental/cccl/libcccl.c.parallel.so +0 -0
  1810. cuda/cccl/parallel/experimental/iterators/__init__.py +157 -0
  1811. cuda/cccl/parallel/experimental/iterators/_iterators.py +650 -0
  1812. cuda/cccl/parallel/experimental/numba_utils.py +6 -0
  1813. cuda/cccl/parallel/experimental/struct.py +150 -0
  1814. cuda/cccl/parallel/experimental/typing.py +27 -0
  1815. cuda/cccl/py.typed +0 -0
  1816. cuda_cccl-0.1.3.1.0.dev1486.dist-info/METADATA +29 -0
  1817. cuda_cccl-0.1.3.1.0.dev1486.dist-info/RECORD +1819 -0
  1818. cuda_cccl-0.1.3.1.0.dev1486.dist-info/WHEEL +6 -0
  1819. cuda_cccl-0.1.3.1.0.dev1486.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,2158 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+
5
+ # Python signatures are declared in the companion Python stub file _bindings.pyi
6
+ # Make sure to update PYI with change to Python API to ensure that Python
7
+ # static type checker tools like mypy green-lights cuda.cccl.parallel
8
+
9
+ from libc.string cimport memset, memcpy
10
+ from libc.stdint cimport uint8_t, uint32_t, uint64_t, int64_t, uintptr_t
11
+ from cpython.bytes cimport PyBytes_FromStringAndSize
12
+
13
+ from cpython.buffer cimport (
14
+ Py_buffer, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS,
15
+ PyBuffer_Release, PyObject_CheckBuffer, PyObject_GetBuffer
16
+ )
17
+ from cpython.pycapsule cimport (
18
+ PyCapsule_CheckExact, PyCapsule_IsValid, PyCapsule_GetPointer
19
+ )
20
+
21
+ import ctypes
22
+
23
+ cdef extern from "<cuda.h>":
24
+ cdef struct OpaqueCUstream_st
25
+ cdef struct OpaqueCUkernel_st
26
+ cdef struct OpaqueCUlibrary_st
27
+
28
+ ctypedef int CUresult
29
+ ctypedef OpaqueCUstream_st *CUstream
30
+ ctypedef OpaqueCUkernel_st *CUkernel
31
+ ctypedef OpaqueCUlibrary_st *CUlibrary
32
+
33
+
34
+ cdef extern from "cccl/c/types.h":
35
+ ctypedef enum cccl_type_enum:
36
+ CCCL_INT8
37
+ CCCL_INT16
38
+ CCCL_INT32
39
+ CCCL_INT64
40
+ CCCL_UINT8
41
+ CCCL_UINT16
42
+ CCCL_UINT32
43
+ CCCL_UINT64
44
+ CCCL_FLOAT32
45
+ CCCL_FLOAT64
46
+ CCCL_STORAGE
47
+
48
+ ctypedef enum cccl_op_kind_t:
49
+ CCCL_STATELESS
50
+ CCCL_STATEFUL
51
+
52
+ ctypedef enum cccl_iterator_kind_t:
53
+ CCCL_POINTER
54
+ CCCL_ITERATOR
55
+
56
+ cdef struct cccl_type_info:
57
+ size_t size
58
+ size_t alignment
59
+ cccl_type_enum type
60
+
61
+ cdef struct cccl_op_t:
62
+ cccl_op_kind_t type
63
+ const char* name
64
+ const char* ltoir
65
+ size_t ltoir_size
66
+ size_t size
67
+ size_t alignment
68
+ void *state
69
+
70
+ cdef struct cccl_value_t:
71
+ cccl_type_info type
72
+ void *state
73
+
74
+ cdef union cccl_increment_t:
75
+ int64_t signed_offset
76
+ uint64_t unsigned_offset
77
+
78
+ ctypedef void (*cccl_host_op_fn_ptr_t)(void *, cccl_increment_t) nogil
79
+
80
+ cdef struct cccl_iterator_t:
81
+ size_t size
82
+ size_t alignment
83
+ cccl_iterator_kind_t type
84
+ cccl_op_t advance
85
+ cccl_op_t dereference
86
+ cccl_type_info value_type
87
+ void *state
88
+ cccl_host_op_fn_ptr_t host_advance
89
+
90
+ ctypedef enum cccl_sort_order_t:
91
+ CCCL_ASCENDING
92
+ CCCL_DESCENDING
93
+
94
+ cdef void arg_type_check(
95
+ str arg_name,
96
+ object expected_type,
97
+ object arg
98
+ ) except *:
99
+ if not isinstance(arg, expected_type):
100
+ raise TypeError(
101
+ f"Expected {arg_name} to have type '{expected_type}', "
102
+ f"got '{type(arg)}'"
103
+ )
104
+
105
+
106
+ cdef class IntEnumerationMember:
107
+ """
108
+ Represents enumeration member which records the enumeration it is a part of
109
+ for type-checking.
110
+ """
111
+ cdef object parent_class
112
+ cdef str enum_name
113
+ cdef str attr_name
114
+ cdef int attr_value
115
+
116
+ def __cinit__(self, object parent_class, str enum_name, str attr_name, int attr_value):
117
+ self.parent_class = parent_class
118
+ self.enum_name = enum_name
119
+ self.attr_name = attr_name
120
+ self.attr_value = attr_value
121
+
122
+ cdef str get_repr_str(self):
123
+ return f"<{self.enum_name}.{self.attr_name}: {self.attr_value}>"
124
+
125
+ def __repr__(self):
126
+ return self.get_repr_str()
127
+
128
+ def __str__(self):
129
+ return self.get_repr_str()
130
+
131
+ @property
132
+ def parent_class(self):
133
+ "Type of parental enumeration"
134
+ return self.parent_class
135
+
136
+ @property
137
+ def name(self):
138
+ "Name of the enumeration member"
139
+ return self.attr_name
140
+
141
+ @property
142
+ def value(self):
143
+ return self.attr_value
144
+
145
+ def __int__(self):
146
+ return int(self.attr_value)
147
+
148
+ def __hash__(self):
149
+ cdef object _cmp_key = (type(self), self.parent_class, <object>self.attr_value)
150
+ return hash(_cmp_key)
151
+
152
+ def __eq__(self, other):
153
+ cdef IntEnumerationMember rhs
154
+ if type(other) == type(self):
155
+ rhs = <IntEnumerationMember>other
156
+ return (self.attr_value == rhs.attr_value) and (self.parent_class == rhs.parent_class)
157
+ else:
158
+ return False
159
+
160
+
161
+ cdef class IntEnumerationBase:
162
+ cdef str enum_name
163
+
164
+ def __cinit__(self):
165
+ self.enum_name = "Undefined"
166
+
167
+ @property
168
+ def __name__(self):
169
+ return self.enum_name
170
+
171
+ def __repr__(self):
172
+ return f"<enum '{self.enum_name}'>"
173
+
174
+ def __str__(self):
175
+ return f"<enum '{self.enum_name}'>"
176
+
177
+
178
+ cdef class Enumeration_CCCLType(IntEnumerationBase):
179
+ "Enumeration of CCCL types"
180
+ cdef IntEnumerationMember _int8
181
+ cdef IntEnumerationMember _int16
182
+ cdef IntEnumerationMember _int32
183
+ cdef IntEnumerationMember _int64
184
+ cdef IntEnumerationMember _uint8
185
+ cdef IntEnumerationMember _uint16
186
+ cdef IntEnumerationMember _uint32
187
+ cdef IntEnumerationMember _uint64
188
+ cdef IntEnumerationMember _float32
189
+ cdef IntEnumerationMember _float64
190
+ cdef IntEnumerationMember _storage
191
+
192
+ def __cinit__(self):
193
+ self.enum_name = "TypeEnum"
194
+ self._int8 = self.make_INT8()
195
+ self._int16 = self.make_INT16()
196
+ self._int32 = self.make_INT32()
197
+ self._int64 = self.make_INT64()
198
+ self._uint8 = self.make_UINT8()
199
+ self._uint16 = self.make_UINT16()
200
+ self._uint32 = self.make_UINT32()
201
+ self._uint64 = self.make_UINT64()
202
+ self._float32 = self.make_FLOAT32()
203
+ self._float64 = self.make_FLOAT64()
204
+ self._storage = self.make_STORAGE()
205
+
206
+ @property
207
+ def INT8(self):
208
+ return self._int8
209
+
210
+ @property
211
+ def INT16(self):
212
+ return self._int16
213
+
214
+ @property
215
+ def INT32(self):
216
+ return self._int32
217
+
218
+ @property
219
+ def INT64(self):
220
+ return self._int64
221
+
222
+ @property
223
+ def UINT8(self):
224
+ return self._uint8
225
+
226
+ @property
227
+ def UINT16(self):
228
+ return self._uint16
229
+
230
+ @property
231
+ def UINT32(self):
232
+ return self._uint32
233
+
234
+ @property
235
+ def UINT64(self):
236
+ return self._uint64
237
+
238
+ @property
239
+ def FLOAT32(self):
240
+ return self._float32
241
+
242
+ @property
243
+ def FLOAT64(self):
244
+ return self._float64
245
+
246
+ @property
247
+ def STORAGE(self):
248
+ return self._storage
249
+
250
+ cdef IntEnumerationMember make_INT8(self):
251
+ cdef str prop_name = "INT8"
252
+ return IntEnumerationMember(
253
+ type(self),
254
+ self.enum_name,
255
+ prop_name,
256
+ cccl_type_enum.CCCL_INT8
257
+ )
258
+
259
+ cdef IntEnumerationMember make_INT16(self):
260
+ cdef str prop_name = "INT16"
261
+ return IntEnumerationMember(
262
+ type(self),
263
+ self.enum_name,
264
+ prop_name,
265
+ cccl_type_enum.CCCL_INT16
266
+ )
267
+
268
+ cdef IntEnumerationMember make_INT32(self):
269
+ cdef str prop_name = "INT32"
270
+ return IntEnumerationMember(
271
+ type(self),
272
+ self.enum_name,
273
+ prop_name,
274
+ cccl_type_enum.CCCL_INT32
275
+ )
276
+
277
+ cdef IntEnumerationMember make_INT64(self):
278
+ cdef str prop_name = "INT64"
279
+ return IntEnumerationMember(
280
+ type(self),
281
+ self.enum_name,
282
+ prop_name,
283
+ cccl_type_enum.CCCL_INT64
284
+ )
285
+
286
+ cdef IntEnumerationMember make_UINT8(self):
287
+ cdef str prop_name = "UINT8"
288
+ return IntEnumerationMember(
289
+ type(self),
290
+ self.enum_name,
291
+ prop_name,
292
+ cccl_type_enum.CCCL_UINT8
293
+ )
294
+
295
+ cdef IntEnumerationMember make_UINT16(self):
296
+ cdef str prop_name = "UINT16"
297
+ return IntEnumerationMember(
298
+ type(self),
299
+ self.enum_name,
300
+ prop_name,
301
+ cccl_type_enum.CCCL_UINT16
302
+ )
303
+
304
+ cdef IntEnumerationMember make_UINT32(self):
305
+ cdef str prop_name = "UINT32"
306
+ return IntEnumerationMember(
307
+ type(self),
308
+ self.enum_name,
309
+ prop_name,
310
+ cccl_type_enum.CCCL_UINT32
311
+ )
312
+
313
+ cdef IntEnumerationMember make_UINT64(self):
314
+ cdef str prop_name = "UINT64"
315
+ return IntEnumerationMember(
316
+ type(self),
317
+ self.enum_name,
318
+ prop_name,
319
+ cccl_type_enum.CCCL_UINT64
320
+ )
321
+
322
+
323
+ cdef IntEnumerationMember make_FLOAT32(self):
324
+ cdef str prop_name = "FLOAT32"
325
+ return IntEnumerationMember(
326
+ type(self),
327
+ self.enum_name,
328
+ prop_name,
329
+ cccl_type_enum.CCCL_FLOAT32
330
+ )
331
+
332
+ cdef IntEnumerationMember make_FLOAT64(self):
333
+ cdef str prop_name = "FLOAT64"
334
+ return IntEnumerationMember(
335
+ type(self),
336
+ self.enum_name,
337
+ prop_name,
338
+ cccl_type_enum.CCCL_FLOAT64
339
+ )
340
+
341
+
342
+ cdef IntEnumerationMember make_STORAGE(self):
343
+ cdef str prop_name = "STORAGE"
344
+ return IntEnumerationMember(
345
+ type(self),
346
+ self.enum_name,
347
+ prop_name,
348
+ cccl_type_enum.CCCL_STORAGE
349
+ )
350
+
351
+
352
+ cdef class Enumeration_OpKind(IntEnumerationBase):
353
+ "Enumeration of operator kinds"
354
+ cdef IntEnumerationMember _stateless
355
+ cdef IntEnumerationMember _stateful
356
+
357
+ def __cinit__(self):
358
+ self.enum_name = "OpKindEnum"
359
+ self._stateless = self.make_STATELESS()
360
+ self._stateful = self.make_STATEFUL()
361
+
362
+ cdef IntEnumerationMember make_STATELESS(self):
363
+ cdef str prop_name = "STATELESS"
364
+ return IntEnumerationMember(
365
+ type(self),
366
+ self.enum_name,
367
+ prop_name,
368
+ cccl_op_kind_t.CCCL_STATELESS
369
+ )
370
+
371
+ cdef IntEnumerationMember make_STATEFUL(self):
372
+ cdef str prop_name = "STATEFUL"
373
+ return IntEnumerationMember(
374
+ type(self),
375
+ self.enum_name,
376
+ prop_name,
377
+ cccl_op_kind_t.CCCL_STATEFUL
378
+ )
379
+
380
+
381
+ @property
382
+ def STATELESS(self):
383
+ return self._stateless
384
+
385
+ @property
386
+ def STATEFUL(self):
387
+ return self._stateful
388
+
389
+
390
+ cdef class Enumeration_IteratorKind(IntEnumerationBase):
391
+ "Enumeration of iterator kinds"
392
+ cdef IntEnumerationMember _pointer
393
+ cdef IntEnumerationMember _iterator
394
+
395
+ def __cinit__(self):
396
+ self.enum_name = "IteratorKindEnum"
397
+ self._pointer = self.make_POINTER()
398
+ self._iterator = self.make_ITERATOR()
399
+
400
+ cdef IntEnumerationMember make_POINTER(self):
401
+ cdef str prop_name = "POINTER"
402
+ return IntEnumerationMember(
403
+ type(self),
404
+ self.enum_name,
405
+ prop_name,
406
+ cccl_iterator_kind_t.CCCL_POINTER
407
+ )
408
+
409
+ cdef IntEnumerationMember make_ITERATOR(self):
410
+ cdef str prop_name = "ITERATOR"
411
+ return IntEnumerationMember(
412
+ type(self),
413
+ self.enum_name,
414
+ prop_name,
415
+ cccl_iterator_kind_t.CCCL_ITERATOR
416
+ )
417
+
418
+ @property
419
+ def POINTER(self):
420
+ return self._pointer
421
+
422
+ @property
423
+ def ITERATOR(self):
424
+ return self._iterator
425
+
426
+ cdef class Enumeration_SortOrder(IntEnumerationBase):
427
+ "Enumeration of sort orders (ascending or descending)"
428
+ cdef IntEnumerationMember _ascending
429
+ cdef IntEnumerationMember _descending
430
+
431
+ def __cinit__(self):
432
+ self.enum_name = "SortOrder"
433
+ self._ascending = self.make_ASCENDING()
434
+ self._descending = self.make_DESCENDING()
435
+
436
+ cdef IntEnumerationMember make_ASCENDING(self):
437
+ cdef str prop_name = "ASCENDING"
438
+ return IntEnumerationMember(
439
+ type(self),
440
+ self.enum_name,
441
+ prop_name,
442
+ cccl_sort_order_t.CCCL_ASCENDING
443
+ )
444
+
445
+ cdef IntEnumerationMember make_DESCENDING(self):
446
+ cdef str prop_name = "DESCENDING"
447
+ return IntEnumerationMember(
448
+ type(self),
449
+ self.enum_name,
450
+ prop_name,
451
+ cccl_sort_order_t.CCCL_DESCENDING
452
+ )
453
+
454
+ @property
455
+ def ASCENDING(self):
456
+ return self._ascending
457
+
458
+ @property
459
+ def DESCENDING(self):
460
+ return self._descending
461
+
462
+
463
+ TypeEnum = Enumeration_CCCLType()
464
+ OpKind = Enumeration_OpKind()
465
+ IteratorKind = Enumeration_IteratorKind()
466
+ SortOrder = Enumeration_SortOrder()
467
+
468
+ cpdef bint is_TypeEnum(IntEnumerationMember attr):
469
+ "Return True if attribute is a member of TypeEnum enumeration"
470
+ return attr.parent_class is Enumeration_CCCLType
471
+
472
+
473
+ cpdef bint is_OpKind(IntEnumerationMember attr):
474
+ "Return True if attribute is a member of OpKind enumeration"
475
+ return attr.parent_class is Enumeration_OpKind
476
+
477
+
478
+ cpdef bint is_IteratorKind(IntEnumerationMember attr):
479
+ "Return True if attribute is a member of IteratorKind enumeration"
480
+ return attr.parent_class is Enumeration_IteratorKind
481
+
482
+ cpdef bint is_SortOrder(IntEnumerationMember attr):
483
+ "Return True if attribute is a member of SortOrder enumeration"
484
+ return attr.parent_class is Enumeration_SortOrder
485
+
486
+
487
+ cdef void _validate_alignment(int alignment) except *:
488
+ """
489
+ Alignment must be positive integer and a power of two
490
+ that can be represented by uint32_t type.
491
+ """
492
+ cdef uint32_t val
493
+ if alignment < 1:
494
+ raise ValueError(
495
+ "Alignment must be non-negative, "
496
+ f"got {alignment}."
497
+ )
498
+ val = <uint32_t>alignment
499
+ if (val & (val - 1)) != 0:
500
+ raise ValueError(
501
+ "Alignment must be a power of two, "
502
+ f"got {alignment}"
503
+ )
504
+
505
+
506
+ cdef class Op:
507
+ """
508
+ Represents CCCL Operation
509
+
510
+ Args:
511
+ name (str):
512
+ Name of the operation
513
+ operator_type (OpKind):
514
+ Whether operator is stateless or stateful
515
+ ltoir (bytes):
516
+ The LTOIR for the operation compiled for device
517
+ state (bytes, optional):
518
+ State for the stateful operation.
519
+ state_alignment (int, optional):
520
+ Alignment of the state struct. Default: `1`.
521
+ """
522
+ # need Python owner of memory used for operator name
523
+ cdef bytes op_encoded_name
524
+ cdef bytes ltoir_bytes
525
+ cdef bytes state_bytes
526
+ cdef cccl_op_t op_data
527
+
528
+
529
+ cdef void _set_members(self, cccl_op_kind_t op_type, str name, bytes lto_ir, bytes state, int state_alignment):
530
+ memset(&self.op_data, 0, sizeof(cccl_op_t))
531
+ # Reference Python objects in the class to ensure lifetime
532
+ self.op_encoded_name = name.encode("utf-8")
533
+ self.ltoir_bytes = lto_ir
534
+ self.state_bytes = state
535
+ # set fields of op_data struct
536
+ self.op_data.type = op_type
537
+ self.op_data.name = <const char *>self.op_encoded_name
538
+ self.op_data.ltoir = <const char *>lto_ir
539
+ self.op_data.ltoir_size = len(lto_ir)
540
+ self.op_data.size = len(state)
541
+ self.op_data.alignment = state_alignment
542
+ self.op_data.state = <void *><const char *>state
543
+
544
+
545
+ def __cinit__(self, /, *, name = None, operator_type = OpKind.STATELESS, ltoir = None, state = None, state_alignment = 1):
546
+ if name is None and ltoir is None:
547
+ name = ""
548
+ ltoir = b""
549
+ if state is None:
550
+ state = b""
551
+ arg_type_check(arg_name="name", expected_type=str, arg=name)
552
+ arg_type_check(arg_name="ltoir", expected_type=bytes, arg=ltoir)
553
+ arg_type_check(arg_name="state", expected_type=bytes, arg=state)
554
+ arg_type_check(arg_name="state_alignment", expected_type=int, arg=state_alignment)
555
+ arg_type_check(arg_name="operator_type", expected_type=IntEnumerationMember, arg=operator_type)
556
+ if not is_OpKind(operator_type):
557
+ raise TypeError(
558
+ f"The operator_type argument should be an enumerator of operator kinds"
559
+ )
560
+ _validate_alignment(state_alignment)
561
+ self._set_members(
562
+ <cccl_op_kind_t> operator_type.value,
563
+ <str> name,
564
+ <bytes> ltoir,
565
+ <bytes> state,
566
+ <int> state_alignment
567
+ )
568
+
569
+
570
+ cdef void set_state(self, bytes state):
571
+ self.state_bytes = state
572
+ self.op_data.state = <void *><const char *>state
573
+
574
+ @property
575
+ def state(self):
576
+ return self.state_bytes
577
+
578
+ @state.setter
579
+ def state(self, bytes new_value):
580
+ self.set_state(<bytes>new_value)
581
+
582
+ @property
583
+ def name(self):
584
+ return self.op_encoded_name.decode("utf-8")
585
+
586
+ @property
587
+ def ltoir(self):
588
+ return self.ltoir_bytes
589
+
590
+ @property
591
+ def state_alignment(self):
592
+ return self.op_data.alignment
593
+
594
+ @property
595
+ def state_typenum(self):
596
+ return self.op_data.type
597
+
598
+ def as_bytes(self):
599
+ "Debugging utility to view memory content of library struct"
600
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.op_data))
601
+ memcpy(&mem_view[0], &self.op_data, sizeof(self.op_data))
602
+ return bytes(mem_view)
603
+
604
+
605
+ cdef class TypeInfo:
606
+ """
607
+ Represents CCCL type info structure
608
+
609
+ Args:
610
+ size (int):
611
+ Size of the type in bytes.
612
+ alignment (int):
613
+ Alignment of the type in bytes.
614
+ type_enum (IntEnumerationMember):
615
+ Enumeration member identifying the type.
616
+ """
617
+ cdef cccl_type_info type_info
618
+
619
+ def __cinit__(self, int size, int alignment, IntEnumerationMember type_enum):
620
+ if size < 1:
621
+ raise ValueError("Size argument must be positive")
622
+ _validate_alignment(alignment)
623
+ if not is_TypeEnum(type_enum):
624
+ raise TypeError(
625
+ f"The type argument should be enum of CCCL types"
626
+ )
627
+ self.type_info.size = size
628
+ self.type_info.alignment = alignment
629
+ self.type_info.type = <cccl_type_enum> type_enum.value
630
+
631
+ @property
632
+ def size(self):
633
+ return self.type_info.size
634
+
635
+ @property
636
+ def alignment(self):
637
+ return self.type_info.alignment
638
+
639
+ @property
640
+ def typenum(self):
641
+ return self.type_info.type
642
+
643
+ def as_bytes(self):
644
+ "Debugging utility to view memory content of library struct"
645
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.type_info))
646
+ memcpy(&mem_view[0], &self.type_info, sizeof(self.type_info))
647
+ return bytes(mem_view)
648
+
649
+
650
+ cdef class Value:
651
+ """
652
+ Represents CCCL value structure
653
+
654
+ Args:
655
+ value_type (TypeInfo):
656
+ type descriptor
657
+ state (object):
658
+ state of the value type. Object is expected to
659
+ implement Python buffer protocol and be able to provide
660
+ simple contiguous array of type `uint8_t`.
661
+ """
662
+ cdef uint8_t[::1] state_obj
663
+ cdef TypeInfo value_type
664
+ cdef cccl_value_t value_data;
665
+
666
+ def __cinit__(self, TypeInfo value_type, uint8_t[::1] state):
667
+ self.state_obj = state
668
+ self.value_type = value_type
669
+ self.value_data.type = value_type.type_info
670
+ self.value_data.state = <void *>&state[0]
671
+
672
+ @property
673
+ def type(self):
674
+ return self.value_type
675
+
676
+ @property
677
+ def state(self):
678
+ return self.state_obj
679
+
680
+ @state.setter
681
+ def state(self, uint8_t[::1] new_value):
682
+ if (len(self.state_obj) == len(new_value)):
683
+ self.state_obj = new_value
684
+ self.value_data.state = <void *>&self.state_obj[0]
685
+ else:
686
+ raise ValueError("Size mismatch")
687
+
688
+ def as_bytes(self):
689
+ "Debugging utility to view memory of native struct"
690
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.value_data))
691
+ memcpy(&mem_view[0], &self.value_data, sizeof(self.value_data))
692
+ return bytes(mem_view)
693
+
694
+
695
+ cdef void ensure_buffer(object o) except *:
696
+ if not PyObject_CheckBuffer(o):
697
+ raise TypeError(
698
+ "Object with buffer protocol expected, "
699
+ f"got {type(o)}"
700
+ )
701
+
702
+
703
+ cdef void * get_buffer_pointer(object o, size_t *size):
704
+ cdef int status = 0
705
+ cdef void *ptr = NULL
706
+ cdef Py_buffer view
707
+
708
+ status = PyObject_GetBuffer(o, &view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
709
+ if status != 0: # pragma: no cover
710
+ size[0] = 0
711
+ raise RuntimeError(
712
+ "Can not access simple contiguous buffer"
713
+ )
714
+
715
+ ptr = view.buf
716
+ if size is not NULL:
717
+ size[0] = <size_t>view.len
718
+ PyBuffer_Release(&view)
719
+
720
+ return ptr
721
+
722
+
723
+ cdef void * ctypes_typed_pointer_payload_ptr(object ctypes_typed_ptr):
724
+ "Get pointer to the value buffer represented by ctypes.pointer(ctypes_val)"
725
+ cdef size_t size = 0
726
+ cdef size_t *ptr_ref = NULL
727
+ ensure_buffer(ctypes_typed_ptr)
728
+ ptr_ref = <size_t *>get_buffer_pointer(ctypes_typed_ptr, &size)
729
+ return <void *>(ptr_ref[0])
730
+
731
+
732
+ cdef void * ctypes_value_ptr(object ctypes_cdata):
733
+ "Get pointer to the value buffer behind ctypes_val"
734
+ cdef size_t size = 0
735
+ ensure_buffer(ctypes_cdata)
736
+ return get_buffer_pointer(ctypes_cdata, &size)
737
+
738
+
739
+ cdef inline void * int_as_ptr(size_t ptr_val):
740
+ return <void *>(ptr_val)
741
+
742
+
743
+ cdef class StateBase:
744
+ cdef void *ptr
745
+ cdef object ref
746
+
747
+ def __cinit__(self):
748
+ self.ptr = NULL
749
+ self.ref = None
750
+
751
+ cdef inline void set_state(self, void *ptr, object ref):
752
+ self.ptr = ptr
753
+ self.ref = ref
754
+
755
+ @property
756
+ def pointer(self):
757
+ return <size_t>self.ptr
758
+
759
+ @property
760
+ def reference(self):
761
+ return self.ref
762
+
763
+
764
+ cdef class Pointer(StateBase):
765
+ "Represents the pointer value"
766
+
767
+ def __cinit__(self, arg):
768
+ cdef void *ptr
769
+ cdef object ref
770
+
771
+ if isinstance(arg, int):
772
+ ptr = int_as_ptr(arg)
773
+ ref = None
774
+ elif isinstance(arg, ctypes._Pointer):
775
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
776
+ ref = arg
777
+ elif isinstance(arg, ctypes.c_void_p):
778
+ ptr = int_as_ptr(arg.value)
779
+ ref = arg
780
+ else:
781
+ raise TypeError(
782
+ "Expect ctypes pointer, integers, or PointerProxy, "
783
+ f"got type {type(arg)}"
784
+ )
785
+ self.set_state(ptr, ref)
786
+
787
+
788
+ def make_pointer_object(ptr, owner):
789
+ cdef Pointer res = Pointer(0)
790
+
791
+ if isinstance(ptr, int):
792
+ res.ptr = int_as_ptr(ptr)
793
+ elif isinstance(ptr, ctypes.c_void_p):
794
+ res.ptr = int_as_ptr(ptr.value)
795
+ else:
796
+ raise TypeError(
797
+ "First argument must be an integer, or ctypes.c_void_p, "
798
+ f"got {type(ptr)}"
799
+ )
800
+ res.ref = owner
801
+ return res
802
+
803
+
804
+ cdef class IteratorState(StateBase):
805
+ "Represents blob referenced by pointer"
806
+ cdef size_t state_nbytes
807
+
808
+ def __cinit__(self, arg):
809
+ cdef size_t buffer_size = 0
810
+ cdef void *ptr = NULL
811
+ cdef object ref = None
812
+
813
+ super().__init__()
814
+ if isinstance(arg, ctypes._Pointer):
815
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
816
+ ref = arg.contents
817
+ self.state_nbytes = ctypes.sizeof(ref)
818
+ elif PyObject_CheckBuffer(arg):
819
+ ptr = get_buffer_pointer(arg, &buffer_size)
820
+ ref = arg
821
+ self.state_nbytes = buffer_size
822
+ else:
823
+ raise TypeError(
824
+ "Expected a ctypes pointer with content, or object of type bytes or bytearray, "
825
+ f"got type {type(arg)}"
826
+ )
827
+ self.set_state(ptr, ref)
828
+
829
+ cdef inline size_t get_size(self):
830
+ return self.state_nbytes
831
+
832
+ @property
833
+ def size(self):
834
+ return self.state_nbytes
835
+
836
+ def __getbuffer__(self, Py_buffer *buffer, int flags):
837
+ cdef Py_ssize_t cast_size = <Py_ssize_t>self.state_nbytes
838
+ buffer.buf = <char *>self.ptr
839
+ buffer.obj = self
840
+ buffer.len = cast_size
841
+ buffer.readonly = 0
842
+ buffer.itemsize = 1
843
+ buffer.format = "B" # unsigned char
844
+ buffer.ndim = 1
845
+ buffer.shape = <Py_ssize_t *>&self.state_nbytes
846
+ buffer.strides = &buffer.itemsize
847
+ buffer.suboffsets = NULL
848
+ buffer.internal = NULL
849
+
850
+ def __releasebuffer__(self, Py_buffer *buffer):
851
+ pass
852
+
853
+
854
+ cdef const char *function_ptr_capsule_name = "void (void *, cccl_increment_t)";
855
+
856
+ cdef bint is_function_pointer_capsule(object o) noexcept:
857
+ """
858
+ Returns non-zero if input is a valid capsule with
859
+ name 'void (void *, cccl_increment_t)'.
860
+ """
861
+ return (
862
+ PyCapsule_CheckExact(o) and
863
+ PyCapsule_IsValid(o, function_ptr_capsule_name)
864
+ )
865
+
866
+
867
+ cdef inline void* get_function_pointer_from_capsule(object cap) except *:
868
+ return PyCapsule_GetPointer(cap, function_ptr_capsule_name)
869
+
870
+
871
+ cdef cccl_host_op_fn_ptr_t unbox_host_advance_fn(object host_fn_obj) except *:
872
+ cdef void *fn_ptr = NULL
873
+ if isinstance(host_fn_obj, ctypes._CFuncPtr):
874
+ # the _CFuncPtr object encapsulates a pointer to the function pointer
875
+ fn_ptr = ctypes_typed_pointer_payload_ptr(host_fn_obj)
876
+ return <cccl_host_op_fn_ptr_t>fn_ptr
877
+
878
+ if isinstance(host_fn_obj, int):
879
+ fn_ptr = <void *><uintptr_t>host_fn_obj
880
+ return <cccl_host_op_fn_ptr_t>fn_ptr
881
+
882
+ if isinstance(host_fn_obj, ctypes.c_void_p):
883
+ fn_ptr = <void *><uintptr_t>host_fn_obj.value
884
+ return <cccl_host_op_fn_ptr_t>fn_ptr
885
+
886
+ if is_function_pointer_capsule(host_fn_obj):
887
+ fn_ptr = get_function_pointer_from_capsule(host_fn_obj)
888
+ return <cccl_host_op_fn_ptr_t>fn_ptr
889
+
890
+ raise TypeError(
891
+ "Expected ctypes function pointer, ctypes.c_void_p, integer or a named capsule, "
892
+ f"got {type(host_fn_obj)}"
893
+ )
894
+
895
+
896
+ cdef class Iterator:
897
+ """
898
+ Represents CCCL iterator.
899
+
900
+ Args:
901
+ alignment (int):
902
+ Alignment of the iterator state
903
+ iterator_type (IntEnumerationMember):
904
+ The type of iterator, `IteratorKind.POINTER` or
905
+ `IteratorKind.ITERATOR`
906
+ advance_fn (Op):
907
+ Descriptor for user-defined `advance` function
908
+ compiled for device
909
+ dereference_fn (Op):
910
+ Descriptor for user-defined `dereference` or `assign`
911
+ function compiled for device
912
+ value_type (TypeInfo):
913
+ Descriptor of the type addressed by the iterator
914
+ state (object, optional):
915
+ Python object for the state of the iterator. For iterators of
916
+ type `ITERATOR` the state object is expected to implement Python
917
+ buffer protocol for SIMPLE 1d buffer of type unsigned byte.
918
+ For iterators of type `POINTER` the state may be an integer convertible
919
+ to `uintptr_t`, or a `ctypes` pointer (typed or untyped).
920
+ Value `None` represents absence of iterator state.
921
+ host_advance_fn (object, optional):
922
+ Python object for host callable function to advance state by a given
923
+ increment. The argument may only be set for iterators of type
924
+ `IteratorKind.ITERATOR` and raise an exception otherwise. Supported
925
+ types are `int` or `ctypes.c_void_p` (raw pointer), ctypes function
926
+ pointer, or a Python capsule with name `"void *(void *, cccl_increment_t)"`.
927
+ """
928
+ cdef Op advance
929
+ cdef Op dereference
930
+ cdef object state_obj
931
+ cdef object host_advance_obj
932
+ cdef cccl_iterator_t iter_data
933
+
934
+ def __cinit__(self,
935
+ int alignment,
936
+ IntEnumerationMember iterator_type,
937
+ Op advance_fn,
938
+ Op dereference_fn,
939
+ TypeInfo value_type,
940
+ state=None,
941
+ host_advance_fn=None
942
+ ):
943
+ cdef cccl_iterator_kind_t it_kind
944
+ _validate_alignment(alignment)
945
+ if not is_IteratorKind(iterator_type):
946
+ raise TypeError("iterator_type must describe iterator kind")
947
+ it_kind = iterator_type.value
948
+ if it_kind == cccl_iterator_kind_t.CCCL_POINTER:
949
+ if state is None:
950
+ self.state_obj = None
951
+ self.iter_data.size = 0
952
+ self.iter_data.state = NULL
953
+ elif isinstance(state, int):
954
+ self.state_obj = None
955
+ self.iter_data.size = 0
956
+ self.iter_data.state = int_as_ptr(state)
957
+ elif isinstance(state, Pointer):
958
+ self.state_obj = state.reference
959
+ self.iter_data.size = 0
960
+ self.iter_data.state = (<Pointer>state).ptr
961
+ else:
962
+ raise TypeError(
963
+ "Expect for Iterator of kind POINTER, state must have type Pointer or int, "
964
+ f"got {type(state)}"
965
+ )
966
+ if host_advance_fn is not None:
967
+ raise ValueError(
968
+ "host_advance_fn must be set to None for iterators of kind POINTER"
969
+ )
970
+ self.iter_data.host_advance = NULL
971
+ self.host_advance_obj = None
972
+ elif it_kind == cccl_iterator_kind_t.CCCL_ITERATOR:
973
+ if state is None:
974
+ self.state_obj = None
975
+ self.iter_data.size = 0
976
+ self.iter_data.state = NULL
977
+ elif isinstance(state, IteratorState):
978
+ self.state_obj = state.reference
979
+ self.iter_data.size = (<IteratorState>state).size
980
+ self.iter_data.state = (<IteratorState>state).ptr
981
+ else:
982
+ raise TypeError(
983
+ "For Iterator of kind ITERATOR, state must have type IteratorState, "
984
+ f"got type {type(state)}"
985
+ )
986
+ if host_advance_fn is not None:
987
+ self.iter_data.host_advance = unbox_host_advance_fn(host_advance_fn)
988
+ self.host_advance_obj = host_advance_fn
989
+ else:
990
+ self.iter_data.host_advance = NULL
991
+ self.host_advance_obj = None
992
+ else: # pragma: no cover
993
+ raise ValueError("Unrecognized iterator kind")
994
+ self.advance = advance_fn
995
+ self.dereference = dereference_fn
996
+ self.iter_data.alignment = alignment
997
+ self.iter_data.type = <cccl_iterator_kind_t> it_kind
998
+ self.iter_data.advance = self.advance.op_data
999
+ self.iter_data.dereference = self.dereference.op_data
1000
+ self.iter_data.value_type = value_type.type_info
1001
+
1002
+ @property
1003
+ def advance_op(self):
1004
+ return self.advance
1005
+
1006
+ @property
1007
+ def dereference_or_assign_op(self):
1008
+ return self.dereference
1009
+
1010
+ @property
1011
+ def state(self):
1012
+ if self.iter_data.type == cccl_iterator_kind_t.CCCL_POINTER:
1013
+ return <size_t>self.iter_data.state
1014
+ else:
1015
+ return self.state_obj
1016
+
1017
+ @state.setter
1018
+ def state(self, new_value):
1019
+ cdef ssize_t state_sz = 0
1020
+ cdef size_t ptr = 0
1021
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
1022
+ if it_kind == cccl_iterator_kind_t.CCCL_POINTER:
1023
+ if isinstance(new_value, Pointer):
1024
+ self.state_obj = (<Pointer>new_value).ref
1025
+ self.iter_data.size = state_sz
1026
+ self.iter_data.state = (<Pointer>new_value).ptr
1027
+ elif isinstance(new_value, int):
1028
+ self.state_obj = None
1029
+ self.iter_data.size = state_sz
1030
+ self.iter_data.state = int_as_ptr(new_value)
1031
+ elif new_value is None:
1032
+ self.state_obj = None
1033
+ self.iter_data.size = 0
1034
+ self.iter_data.state = NULL
1035
+ else:
1036
+ raise TypeError(
1037
+ "For iterator with type POINTER, state value must have type int or type Pointer, "
1038
+ f"got type {type(new_value)}"
1039
+ )
1040
+ elif it_kind == cccl_iterator_kind_t.CCCL_ITERATOR:
1041
+ if isinstance(new_value, IteratorState):
1042
+ self.state_obj = new_value.reference
1043
+ self.iter_data.size = (<IteratorState>new_value).size
1044
+ self.iter_data.state = (<IteratorState>new_value).ptr
1045
+ elif isinstance(new_value, Pointer):
1046
+ self.state_obj = new_value.reference
1047
+ if self.iter_data.size == 0:
1048
+ raise ValueError("Assigning incomplete state value to iterator without state size information")
1049
+ self.iter_data.state = (<Pointer>new_value).ptr
1050
+ elif PyObject_CheckBuffer(new_value):
1051
+ self.iter_data.state = get_buffer_pointer(new_value, &self.iter_data.size)
1052
+ self.state_obj = new_value
1053
+ elif new_value is None:
1054
+ self.state_obj = None
1055
+ self.iter_data.size = 0
1056
+ self.iter_data.state = NULL
1057
+ else:
1058
+ raise TypeError(
1059
+ "For iterator with type ITERATOR, state value must have type IteratorState or type bytes, "
1060
+ f"got type {type(new_value)}"
1061
+ )
1062
+ else:
1063
+ raise TypeError("The new value should be an integer for iterators of POINTER kind, and bytes for ITERATOR kind")
1064
+
1065
+ @property
1066
+ def type(self):
1067
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
1068
+ if it_kind == cccl_iterator_kind_t.CCCL_POINTER:
1069
+ return IteratorKind.POINTER
1070
+ else:
1071
+ return IteratorKind.ITERATOR
1072
+
1073
+ def is_kind_pointer(self):
1074
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
1075
+ return (it_kind == cccl_iterator_kind_t.CCCL_POINTER)
1076
+
1077
+ def is_kind_iterator(self):
1078
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
1079
+ return (it_kind == cccl_iterator_kind_t.CCCL_ITERATOR)
1080
+
1081
+ def as_bytes(self):
1082
+ "Debugging ulitity to get memory view into library struct"
1083
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.iter_data))
1084
+ memcpy(&mem_view[0], &self.iter_data, sizeof(self.iter_data))
1085
+ return bytes(mem_view)
1086
+
1087
+ @property
1088
+ def host_advance_fn(self):
1089
+ return self.host_advance_obj
1090
+
1091
+ @host_advance_fn.setter
1092
+ def host_advance_fn(self, func):
1093
+ if (self.iter_data.type == cccl_iterator_kind_t.CCCL_ITERATOR):
1094
+ if func is not None:
1095
+ self.iter_data.host_advance = unbox_host_advance_fn(func)
1096
+ self.host_advance_obj = func
1097
+ else:
1098
+ self.iter_data.host_advance = NULL
1099
+ self.host_advance_obj = None
1100
+ else:
1101
+ raise ValueError
1102
+
1103
+
1104
+ cdef class CommonData:
1105
+ cdef int cc_major
1106
+ cdef int cc_minor
1107
+ cdef bytes encoded_cub_path
1108
+ cdef bytes encoded_thrust_path
1109
+ cdef bytes encoded_libcudacxx_path
1110
+ cdef bytes encoded_ctk_path
1111
+
1112
+ def __cinit__(self, int cc_major, int cc_minor, str cub_path, str thrust_path, str libcudacxx_path, str ctk_path):
1113
+ self.cc_major = cc_major
1114
+ self.cc_minor = cc_minor
1115
+ self.encoded_cub_path = cub_path.encode("utf-8")
1116
+ self.encoded_thrust_path = thrust_path.encode("utf-8")
1117
+ self.encoded_libcudacxx_path = libcudacxx_path.encode("utf-8")
1118
+ self.encoded_ctk_path = ctk_path.encode("utf-8")
1119
+
1120
+ cdef inline int get_cc_major(self):
1121
+ return self.cc_major
1122
+
1123
+ cdef inline int get_cc_minor(self):
1124
+ return self.cc_minor
1125
+
1126
+ cdef inline const char * cub_path_get_c_str(self):
1127
+ return <const char *>self.encoded_cub_path
1128
+
1129
+ cdef inline const char * thrust_path_get_c_str(self):
1130
+ return <const char *>self.encoded_thrust_path
1131
+
1132
+ cdef inline const char * libcudacxx_path_get_c_str(self):
1133
+ return <const char *>self.encoded_libcudacxx_path
1134
+
1135
+ cdef inline const char * ctk_path_get_c_str(self):
1136
+ return <const char *>self.encoded_ctk_path
1137
+
1138
+ @property
1139
+ def compute_capability(self):
1140
+ return (self.cc_major, self.cc_minor)
1141
+
1142
+ @property
1143
+ def cub_path(self):
1144
+ return self.encoded_cub_path.decode("utf-8")
1145
+
1146
+ @property
1147
+ def ctk_path(self):
1148
+ return self.encoded_ctk_path.decode("utf-8")
1149
+
1150
+ @property
1151
+ def thrust_path(self):
1152
+ return self.encoded_thrust_path.decode("utf-8")
1153
+
1154
+ @property
1155
+ def libcudacxx_path(self):
1156
+ return self.encoded_libcudacxx_path.decode("utf-8")
1157
+
1158
+ # --------------
1159
+ # DeviceReduce
1160
+ # --------------
1161
+
1162
+ cdef extern from "cccl/c/reduce.h":
1163
+ cdef struct cccl_device_reduce_build_result_t 'cccl_device_reduce_build_result_t':
1164
+ const char* cubin
1165
+ size_t cubin_size
1166
+
1167
+ cdef CUresult cccl_device_reduce_build(
1168
+ cccl_device_reduce_build_result_t*,
1169
+ cccl_iterator_t,
1170
+ cccl_iterator_t,
1171
+ cccl_op_t,
1172
+ cccl_value_t,
1173
+ int, int, const char*, const char*, const char*, const char*
1174
+ ) nogil
1175
+
1176
+ cdef CUresult cccl_device_reduce(
1177
+ cccl_device_reduce_build_result_t,
1178
+ void *,
1179
+ size_t *,
1180
+ cccl_iterator_t,
1181
+ cccl_iterator_t,
1182
+ uint64_t,
1183
+ cccl_op_t,
1184
+ cccl_value_t,
1185
+ CUstream
1186
+ ) nogil
1187
+
1188
+ cdef CUresult cccl_device_reduce_cleanup(
1189
+ cccl_device_reduce_build_result_t*
1190
+ ) nogil
1191
+
1192
+
1193
+ cdef class DeviceReduceBuildResult:
1194
+ cdef cccl_device_reduce_build_result_t build_data
1195
+
1196
+ def __cinit__(
1197
+ DeviceReduceBuildResult self,
1198
+ Iterator d_in,
1199
+ Iterator d_out,
1200
+ Op op,
1201
+ Value h_init,
1202
+ CommonData common_data
1203
+ ):
1204
+ cdef CUresult status = -1
1205
+ cdef int cc_major = common_data.get_cc_major()
1206
+ cdef int cc_minor = common_data.get_cc_minor()
1207
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1208
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1209
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1210
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1211
+ memset(&self.build_data, 0, sizeof(cccl_device_reduce_build_result_t))
1212
+
1213
+ with nogil:
1214
+ status = cccl_device_reduce_build(
1215
+ &self.build_data,
1216
+ d_in.iter_data,
1217
+ d_out.iter_data,
1218
+ op.op_data,
1219
+ h_init.value_data,
1220
+ cc_major,
1221
+ cc_minor,
1222
+ cub_path,
1223
+ thrust_path,
1224
+ libcudacxx_path,
1225
+ ctk_path,
1226
+ )
1227
+ if status != 0:
1228
+ raise RuntimeError(
1229
+ f"Failed building reduce, error code: {status}"
1230
+ )
1231
+
1232
+ def __dealloc__(DeviceReduceBuildResult self):
1233
+ cdef CUresult status = -1
1234
+ with nogil:
1235
+ status = cccl_device_reduce_cleanup(&self.build_data)
1236
+ if (status != 0):
1237
+ print(f"Return code {status} encountered during reduce result cleanup")
1238
+
1239
+ cpdef int compute(
1240
+ DeviceReduceBuildResult self,
1241
+ temp_storage_ptr,
1242
+ temp_storage_bytes,
1243
+ Iterator d_in,
1244
+ Iterator d_out,
1245
+ size_t num_items,
1246
+ Op op,
1247
+ Value h_init,
1248
+ stream
1249
+ ):
1250
+ cdef CUresult status = -1
1251
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1252
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1253
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1254
+
1255
+ with nogil:
1256
+ status = cccl_device_reduce(
1257
+ self.build_data,
1258
+ storage_ptr,
1259
+ &storage_sz,
1260
+ d_in.iter_data,
1261
+ d_out.iter_data,
1262
+ <uint64_t>num_items,
1263
+ op.op_data,
1264
+ h_init.value_data,
1265
+ c_stream
1266
+ )
1267
+ if status != 0:
1268
+ raise RuntimeError(
1269
+ f"Failed executing reduce, error code: {status}"
1270
+ )
1271
+ return storage_sz
1272
+
1273
+ def _get_cubin(self):
1274
+ return self.build_data.cubin[:self.build_data.cubin_size]
1275
+
1276
+ # ------------
1277
+ # DeviceScan
1278
+ # ------------
1279
+
1280
+
1281
+ cdef extern from "cccl/c/scan.h":
1282
+ ctypedef bint _Bool
1283
+
1284
+ cdef struct cccl_device_scan_build_result_t 'cccl_device_scan_build_result_t':
1285
+ const char* cubin
1286
+ size_t cubin_size
1287
+
1288
+ cdef CUresult cccl_device_scan_build(
1289
+ cccl_device_scan_build_result_t*,
1290
+ cccl_iterator_t,
1291
+ cccl_iterator_t,
1292
+ cccl_op_t,
1293
+ cccl_value_t,
1294
+ _Bool,
1295
+ int, int, const char*, const char*, const char*, const char*
1296
+ ) nogil
1297
+
1298
+ cdef CUresult cccl_device_exclusive_scan(
1299
+ cccl_device_scan_build_result_t,
1300
+ void *,
1301
+ size_t *,
1302
+ cccl_iterator_t,
1303
+ cccl_iterator_t,
1304
+ uint64_t,
1305
+ cccl_op_t,
1306
+ cccl_value_t,
1307
+ CUstream
1308
+ ) nogil
1309
+
1310
+ cdef CUresult cccl_device_inclusive_scan(
1311
+ cccl_device_scan_build_result_t,
1312
+ void *,
1313
+ size_t *,
1314
+ cccl_iterator_t,
1315
+ cccl_iterator_t,
1316
+ uint64_t,
1317
+ cccl_op_t,
1318
+ cccl_value_t,
1319
+ CUstream
1320
+ ) nogil
1321
+
1322
+ cdef CUresult cccl_device_scan_cleanup(
1323
+ cccl_device_scan_build_result_t*
1324
+ ) nogil
1325
+
1326
+
1327
+ cdef class DeviceScanBuildResult:
1328
+ cdef cccl_device_scan_build_result_t build_data
1329
+
1330
+ def __cinit__(
1331
+ DeviceScanBuildResult self,
1332
+ Iterator d_in,
1333
+ Iterator d_out,
1334
+ Op op,
1335
+ Value h_init,
1336
+ bint force_inclusive,
1337
+ CommonData common_data
1338
+ ):
1339
+ cdef CUresult status = -1
1340
+ cdef int cc_major = common_data.get_cc_major()
1341
+ cdef int cc_minor = common_data.get_cc_minor()
1342
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1343
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1344
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1345
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1346
+ memset(&self.build_data, 0, sizeof(cccl_device_scan_build_result_t))
1347
+
1348
+ with nogil:
1349
+ status = cccl_device_scan_build(
1350
+ &self.build_data,
1351
+ d_in.iter_data,
1352
+ d_out.iter_data,
1353
+ op.op_data,
1354
+ h_init.value_data,
1355
+ force_inclusive,
1356
+ cc_major,
1357
+ cc_minor,
1358
+ cub_path,
1359
+ thrust_path,
1360
+ libcudacxx_path,
1361
+ ctk_path,
1362
+ )
1363
+ if status != 0:
1364
+ raise RuntimeError(f"Error {status} building scan")
1365
+
1366
+ def __dealloc__(DeviceScanBuildResult self):
1367
+ cdef CUresult status = -1
1368
+ with nogil:
1369
+ status = cccl_device_scan_cleanup(&self.build_data)
1370
+ if (status != 0):
1371
+ print(f"Return code {status} encountered during scan result cleanup")
1372
+
1373
+ cpdef int compute_inclusive(
1374
+ DeviceScanBuildResult self,
1375
+ temp_storage_ptr,
1376
+ temp_storage_bytes,
1377
+ Iterator d_in,
1378
+ Iterator d_out,
1379
+ size_t num_items,
1380
+ Op op,
1381
+ Value h_init,
1382
+ stream
1383
+ ):
1384
+ cdef CUresult status = -1
1385
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1386
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1387
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1388
+
1389
+ with nogil:
1390
+ status = cccl_device_inclusive_scan(
1391
+ self.build_data,
1392
+ storage_ptr,
1393
+ &storage_sz,
1394
+ d_in.iter_data,
1395
+ d_out.iter_data,
1396
+ <uint64_t>num_items,
1397
+ op.op_data,
1398
+ h_init.value_data,
1399
+ c_stream
1400
+ )
1401
+ if status != 0:
1402
+ raise RuntimeError(
1403
+ f"Failed executing inclusive scan, error code: {status}"
1404
+ )
1405
+ return storage_sz
1406
+
1407
+ cpdef int compute_exclusive(
1408
+ DeviceScanBuildResult self,
1409
+ temp_storage_ptr,
1410
+ temp_storage_bytes,
1411
+ Iterator d_in,
1412
+ Iterator d_out,
1413
+ size_t num_items,
1414
+ Op op,
1415
+ Value h_init,
1416
+ stream
1417
+ ):
1418
+ cdef CUresult status = -1
1419
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1420
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1421
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1422
+
1423
+ with nogil:
1424
+ status = cccl_device_exclusive_scan(
1425
+ self.build_data,
1426
+ storage_ptr,
1427
+ &storage_sz,
1428
+ d_in.iter_data,
1429
+ d_out.iter_data,
1430
+ <uint64_t>num_items,
1431
+ op.op_data,
1432
+ h_init.value_data,
1433
+ c_stream
1434
+ )
1435
+ if status != 0:
1436
+ raise RuntimeError(
1437
+ f"Failed executing exclusive scan, error code: {status}"
1438
+ )
1439
+ return storage_sz
1440
+
1441
+ def _get_cubin(self):
1442
+ return self.build_data.cubin[:self.build_data.cubin_size]
1443
+
1444
+ # -----------------------
1445
+ # DeviceSegmentedReduce
1446
+ # -----------------------
1447
+
1448
+
1449
+ cdef extern from "cccl/c/segmented_reduce.h":
1450
+ cdef struct cccl_device_segmented_reduce_build_result_t 'cccl_device_segmented_reduce_build_result_t':
1451
+ const char* cubin
1452
+ size_t cubin_size
1453
+
1454
+ cdef CUresult cccl_device_segmented_reduce_build(
1455
+ cccl_device_segmented_reduce_build_result_t*,
1456
+ cccl_iterator_t,
1457
+ cccl_iterator_t,
1458
+ cccl_iterator_t,
1459
+ cccl_iterator_t,
1460
+ cccl_op_t,
1461
+ cccl_value_t,
1462
+ int, int, const char*, const char*, const char*, const char*
1463
+ ) nogil
1464
+
1465
+ cdef CUresult cccl_device_segmented_reduce(
1466
+ cccl_device_segmented_reduce_build_result_t,
1467
+ void *,
1468
+ size_t *,
1469
+ cccl_iterator_t,
1470
+ cccl_iterator_t,
1471
+ uint64_t,
1472
+ cccl_iterator_t,
1473
+ cccl_iterator_t,
1474
+ cccl_op_t,
1475
+ cccl_value_t,
1476
+ CUstream
1477
+ ) nogil
1478
+
1479
+ cdef CUresult cccl_device_segmented_reduce_cleanup(
1480
+ cccl_device_segmented_reduce_build_result_t* bld_ptr
1481
+ ) nogil
1482
+
1483
+
1484
+ cdef class DeviceSegmentedReduceBuildResult:
1485
+ cdef cccl_device_segmented_reduce_build_result_t build_data
1486
+
1487
+ def __cinit__(
1488
+ DeviceSegmentedReduceBuildResult self,
1489
+ Iterator d_in,
1490
+ Iterator d_out,
1491
+ Iterator start_offsets,
1492
+ Iterator end_offsets,
1493
+ Op op,
1494
+ Value h_init,
1495
+ CommonData common_data
1496
+ ):
1497
+ cdef CUresult status = -1
1498
+ cdef int cc_major = common_data.get_cc_major()
1499
+ cdef int cc_minor = common_data.get_cc_minor()
1500
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1501
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1502
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1503
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1504
+
1505
+ memset(&self.build_data, 0, sizeof(cccl_device_segmented_reduce_build_result_t))
1506
+ with nogil:
1507
+ status = cccl_device_segmented_reduce_build(
1508
+ &self.build_data,
1509
+ d_in.iter_data,
1510
+ d_out.iter_data,
1511
+ start_offsets.iter_data,
1512
+ end_offsets.iter_data,
1513
+ op.op_data,
1514
+ h_init.value_data,
1515
+ cc_major,
1516
+ cc_minor,
1517
+ cub_path,
1518
+ thrust_path,
1519
+ libcudacxx_path,
1520
+ ctk_path,
1521
+ )
1522
+ if status != 0:
1523
+ raise RuntimeError(
1524
+ f"Failed building segmented_reduce, error code: {status}"
1525
+ )
1526
+
1527
+ def __dealloc__(DeviceSegmentedReduceBuildResult self):
1528
+ cdef CUresult status = -1
1529
+ with nogil:
1530
+ status = cccl_device_segmented_reduce_cleanup(&self.build_data)
1531
+ if (status != 0):
1532
+ print(f"Return code {status} encountered during segmented_reduce result cleanup")
1533
+
1534
+ cpdef int compute(
1535
+ DeviceSegmentedReduceBuildResult self,
1536
+ temp_storage_ptr,
1537
+ temp_storage_bytes,
1538
+ Iterator d_in,
1539
+ Iterator d_out,
1540
+ size_t num_items,
1541
+ Iterator start_offsets,
1542
+ Iterator end_offsets,
1543
+ Op op,
1544
+ Value h_init,
1545
+ stream
1546
+ ):
1547
+ cdef CUresult status = -1
1548
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1549
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1550
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1551
+
1552
+ with nogil:
1553
+ status = cccl_device_segmented_reduce(
1554
+ self.build_data,
1555
+ storage_ptr,
1556
+ &storage_sz,
1557
+ d_in.iter_data,
1558
+ d_out.iter_data,
1559
+ <uint64_t>num_items,
1560
+ start_offsets.iter_data,
1561
+ end_offsets.iter_data,
1562
+ op.op_data,
1563
+ h_init.value_data,
1564
+ c_stream
1565
+ )
1566
+ if status != 0:
1567
+ raise RuntimeError(
1568
+ f"Failed executing segmented_reduce, error code: {status}"
1569
+ )
1570
+ return storage_sz
1571
+
1572
+ def _get_cubin(self):
1573
+ return self.build_data.cubin[:self.build_data.cubin_size]
1574
+ # -----------------
1575
+ # DeviceMergeSort
1576
+ # -----------------
1577
+
1578
+
1579
+ cdef extern from "cccl/c/merge_sort.h":
1580
+ cdef struct cccl_device_merge_sort_build_result_t 'cccl_device_merge_sort_build_result_t':
1581
+ const char* cubin
1582
+ size_t cubin_size
1583
+
1584
+ cdef CUresult cccl_device_merge_sort_build(
1585
+ cccl_device_merge_sort_build_result_t *bld_ptr,
1586
+ cccl_iterator_t d_in_keys,
1587
+ cccl_iterator_t d_in_items,
1588
+ cccl_iterator_t d_out_keys,
1589
+ cccl_iterator_t d_out_items,
1590
+ cccl_op_t,
1591
+ int, int, const char*, const char*, const char*, const char*
1592
+ ) nogil
1593
+
1594
+ cdef CUresult cccl_device_merge_sort(
1595
+ cccl_device_merge_sort_build_result_t,
1596
+ void *,
1597
+ size_t *,
1598
+ cccl_iterator_t,
1599
+ cccl_iterator_t,
1600
+ cccl_iterator_t,
1601
+ cccl_iterator_t,
1602
+ uint64_t,
1603
+ cccl_op_t,
1604
+ CUstream
1605
+ ) nogil
1606
+
1607
+ cdef CUresult cccl_device_merge_sort_cleanup(
1608
+ cccl_device_merge_sort_build_result_t* bld_ptr
1609
+ ) nogil
1610
+
1611
+
1612
+ cdef class DeviceMergeSortBuildResult:
1613
+ cdef cccl_device_merge_sort_build_result_t build_data
1614
+
1615
+ def __cinit__(
1616
+ DeviceMergeSortBuildResult self,
1617
+ Iterator d_in_keys,
1618
+ Iterator d_in_items,
1619
+ Iterator d_out_keys,
1620
+ Iterator d_out_items,
1621
+ Op op,
1622
+ CommonData common_data
1623
+ ):
1624
+ cdef CUresult status = -1
1625
+ cdef int cc_major = common_data.get_cc_major()
1626
+ cdef int cc_minor = common_data.get_cc_minor()
1627
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1628
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1629
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1630
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1631
+
1632
+ memset(&self.build_data, 0, sizeof(cccl_device_merge_sort_build_result_t))
1633
+ with nogil:
1634
+ status = cccl_device_merge_sort_build(
1635
+ &self.build_data,
1636
+ d_in_keys.iter_data,
1637
+ d_in_items.iter_data,
1638
+ d_out_keys.iter_data,
1639
+ d_out_items.iter_data,
1640
+ op.op_data,
1641
+ cc_major,
1642
+ cc_minor,
1643
+ cub_path,
1644
+ thrust_path,
1645
+ libcudacxx_path,
1646
+ ctk_path,
1647
+ )
1648
+ if status != 0:
1649
+ raise RuntimeError(
1650
+ f"Failed building merge_sort, error code: {status}"
1651
+ )
1652
+
1653
+ def __dealloc__(DeviceMergeSortBuildResult self):
1654
+ cdef CUresult status = -1
1655
+ with nogil:
1656
+ status = cccl_device_merge_sort_cleanup(&self.build_data)
1657
+ if (status != 0):
1658
+ print(f"Return code {status} encountered during merge_sort result cleanup")
1659
+
1660
+ cpdef int compute(
1661
+ DeviceMergeSortBuildResult self,
1662
+ temp_storage_ptr,
1663
+ temp_storage_bytes,
1664
+ Iterator d_in_keys,
1665
+ Iterator d_in_items,
1666
+ Iterator d_out_keys,
1667
+ Iterator d_out_items,
1668
+ size_t num_items,
1669
+ Op op,
1670
+ stream
1671
+ ):
1672
+ cdef CUresult status = -1
1673
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1674
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1675
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1676
+ with nogil:
1677
+ status = cccl_device_merge_sort(
1678
+ self.build_data,
1679
+ storage_ptr,
1680
+ &storage_sz,
1681
+ d_in_keys.iter_data,
1682
+ d_in_items.iter_data,
1683
+ d_out_keys.iter_data,
1684
+ d_out_items.iter_data,
1685
+ <uint64_t>num_items,
1686
+ op.op_data,
1687
+ c_stream
1688
+ )
1689
+ if status != 0:
1690
+ raise RuntimeError(
1691
+ f"Failed executing merge_sort, error code: {status}"
1692
+ )
1693
+ return storage_sz
1694
+
1695
+
1696
+ def _get_cubin(self):
1697
+ return self.build_data.cubin[:self.build_data.cubin_size]
1698
+
1699
+
1700
+ # -------------------
1701
+ # DeviceUniqueByKey
1702
+ # -------------------
1703
+
1704
+ cdef extern from "cccl/c/unique_by_key.h":
1705
+ cdef struct cccl_device_unique_by_key_build_result_t 'cccl_device_unique_by_key_build_result_t':
1706
+ const char* cubin
1707
+ size_t cubin_size
1708
+
1709
+
1710
+ cdef CUresult cccl_device_unique_by_key_build(
1711
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1712
+ cccl_iterator_t d_keys_in,
1713
+ cccl_iterator_t d_values_in,
1714
+ cccl_iterator_t d_keys_out,
1715
+ cccl_iterator_t d_values_out,
1716
+ cccl_iterator_t d_num_selected_out,
1717
+ cccl_op_t comparison_op,
1718
+ int, int, const char *, const char *, const char *, const char *
1719
+ ) nogil
1720
+
1721
+ cdef CUresult cccl_device_unique_by_key(
1722
+ cccl_device_unique_by_key_build_result_t build,
1723
+ void *d_storage_ptr,
1724
+ size_t *d_storage_nbytes,
1725
+ cccl_iterator_t d_keys_in,
1726
+ cccl_iterator_t d_values_in,
1727
+ cccl_iterator_t d_keys_out,
1728
+ cccl_iterator_t d_values_out,
1729
+ cccl_iterator_t d_num_selected_out,
1730
+ cccl_op_t comparison_op,
1731
+ size_t num_items,
1732
+ CUstream stream
1733
+ ) nogil
1734
+
1735
+ cdef CUresult cccl_device_unique_by_key_cleanup(
1736
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1737
+ ) nogil
1738
+
1739
+
1740
+ cdef class DeviceUniqueByKeyBuildResult:
1741
+ cdef cccl_device_unique_by_key_build_result_t build_data
1742
+
1743
+ def __cinit__(
1744
+ DeviceUniqueByKeyBuildResult self,
1745
+ Iterator d_keys_in,
1746
+ Iterator d_values_in,
1747
+ Iterator d_keys_out,
1748
+ Iterator d_values_out,
1749
+ Iterator d_num_selected_out,
1750
+ Op comparison_op,
1751
+ CommonData common_data
1752
+ ):
1753
+ cdef CUresult status = -1
1754
+ cdef int cc_major = common_data.get_cc_major()
1755
+ cdef int cc_minor = common_data.get_cc_minor()
1756
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1757
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1758
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1759
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1760
+
1761
+ memset(&self.build_data, 0, sizeof(cccl_device_unique_by_key_build_result_t))
1762
+ with nogil:
1763
+ status = cccl_device_unique_by_key_build(
1764
+ &self.build_data,
1765
+ d_keys_in.iter_data,
1766
+ d_values_in.iter_data,
1767
+ d_keys_out.iter_data,
1768
+ d_values_out.iter_data,
1769
+ d_num_selected_out.iter_data,
1770
+ comparison_op.op_data,
1771
+ cc_major,
1772
+ cc_minor,
1773
+ cub_path,
1774
+ thrust_path,
1775
+ libcudacxx_path,
1776
+ ctk_path,
1777
+ )
1778
+ if status != 0:
1779
+ raise RuntimeError(
1780
+ f"Failed building unique_by_key, error code: {status}"
1781
+ )
1782
+
1783
+ def __dealloc__(DeviceUniqueByKeyBuildResult self):
1784
+ cdef CUresult status = -1
1785
+ with nogil:
1786
+ status = cccl_device_unique_by_key_cleanup(&self.build_data)
1787
+ if (status != 0):
1788
+ print(f"Return code {status} encountered during unique_by_key result cleanup")
1789
+
1790
+ cpdef int compute(
1791
+ DeviceUniqueByKeyBuildResult self,
1792
+ temp_storage_ptr,
1793
+ temp_storage_bytes,
1794
+ Iterator d_keys_in,
1795
+ Iterator d_values_in,
1796
+ Iterator d_keys_out,
1797
+ Iterator d_values_out,
1798
+ Iterator d_num_selected_out,
1799
+ Op comparison_op,
1800
+ size_t num_items,
1801
+ stream
1802
+ ):
1803
+ cdef CUresult status = -1
1804
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1805
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1806
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1807
+
1808
+ with nogil:
1809
+ status = cccl_device_unique_by_key(
1810
+ self.build_data,
1811
+ storage_ptr,
1812
+ &storage_sz,
1813
+ d_keys_in.iter_data,
1814
+ d_values_in.iter_data,
1815
+ d_keys_out.iter_data,
1816
+ d_values_out.iter_data,
1817
+ d_num_selected_out.iter_data,
1818
+ comparison_op.op_data,
1819
+ <uint64_t>num_items,
1820
+ c_stream
1821
+ )
1822
+
1823
+ if status != 0:
1824
+ raise RuntimeError(
1825
+ f"Failed executing unique_by_key, error code: {status}"
1826
+ )
1827
+ return storage_sz
1828
+
1829
+ def _get_cubin(self):
1830
+ return self.build_data.cubin[:self.build_data.cubin_size]
1831
+
1832
+ # -----------------
1833
+ # DeviceRadixSort
1834
+ # -----------------
1835
+
1836
+ cdef extern from "cccl/c/radix_sort.h":
1837
+ cdef struct cccl_device_radix_sort_build_result_t 'cccl_device_radix_sort_build_result_t':
1838
+ const char* cubin
1839
+ size_t cubin_size
1840
+
1841
+ cdef CUresult cccl_device_radix_sort_build(
1842
+ cccl_device_radix_sort_build_result_t *build_ptr,
1843
+ cccl_sort_order_t sort_order,
1844
+ cccl_iterator_t d_keys_in,
1845
+ cccl_iterator_t d_values_in,
1846
+ cccl_op_t decomposer,
1847
+ const char* decomposer_return_type,
1848
+ int, int, const char *, const char *, const char *, const char *
1849
+ ) nogil
1850
+
1851
+ cdef CUresult cccl_device_radix_sort(
1852
+ cccl_device_radix_sort_build_result_t build,
1853
+ void *d_storage_ptr,
1854
+ size_t *d_storage_nbytes,
1855
+ cccl_iterator_t d_keys_in,
1856
+ cccl_iterator_t d_keys_out,
1857
+ cccl_iterator_t d_values_in,
1858
+ cccl_iterator_t d_values_out,
1859
+ cccl_op_t decomposer,
1860
+ size_t num_items,
1861
+ int begin_bit,
1862
+ int end_bit,
1863
+ bint is_overwrite_okay,
1864
+ int* selector,
1865
+ CUstream stream
1866
+ ) nogil
1867
+
1868
+ cdef CUresult cccl_device_radix_sort_cleanup(
1869
+ cccl_device_radix_sort_build_result_t *build_ptr,
1870
+ ) nogil
1871
+
1872
+
1873
+ cdef class DeviceRadixSortBuildResult:
1874
+ cdef cccl_device_radix_sort_build_result_t build_data
1875
+
1876
+ def __dealloc__(DeviceRadixSortBuildResult self):
1877
+ cdef CUresult status = -1
1878
+ with nogil:
1879
+ status = cccl_device_radix_sort_cleanup(&self.build_data)
1880
+ if (status != 0):
1881
+ print(f"Return code {status} encountered during radix_sort result cleanup")
1882
+
1883
+ def __cinit__(
1884
+ DeviceRadixSortBuildResult self,
1885
+ cccl_sort_order_t order,
1886
+ Iterator d_keys_in,
1887
+ Iterator d_values_in,
1888
+ Op decomposer_op,
1889
+ const char* decomposer_return_type,
1890
+ CommonData common_data
1891
+ ):
1892
+ cdef CUresult status = -1
1893
+ cdef int cc_major = common_data.get_cc_major()
1894
+ cdef int cc_minor = common_data.get_cc_minor()
1895
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1896
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1897
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1898
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1899
+
1900
+ memset(&self.build_data, 0, sizeof(cccl_device_radix_sort_build_result_t))
1901
+ with nogil:
1902
+ status = cccl_device_radix_sort_build(
1903
+ &self.build_data,
1904
+ order,
1905
+ d_keys_in.iter_data,
1906
+ d_values_in.iter_data,
1907
+ decomposer_op.op_data,
1908
+ decomposer_return_type,
1909
+ cc_major,
1910
+ cc_minor,
1911
+ cub_path,
1912
+ thrust_path,
1913
+ libcudacxx_path,
1914
+ ctk_path,
1915
+ )
1916
+ if status != 0:
1917
+ raise RuntimeError(
1918
+ f"Failed building radix_sort, error code: {status}"
1919
+ )
1920
+
1921
+ cpdef tuple compute(
1922
+ DeviceRadixSortBuildResult self,
1923
+ temp_storage_ptr,
1924
+ temp_storage_bytes,
1925
+ Iterator d_keys_in,
1926
+ Iterator d_keys_out,
1927
+ Iterator d_values_in,
1928
+ Iterator d_values_out,
1929
+ Op decomposer_op,
1930
+ size_t num_items,
1931
+ int begin_bit,
1932
+ int end_bit,
1933
+ bint is_overwrite_okay,
1934
+ selector,
1935
+ stream
1936
+ ):
1937
+ cdef CUresult status = -1
1938
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
1939
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1940
+ cdef int selector_int = <int>selector
1941
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
1942
+
1943
+ with nogil:
1944
+ status = cccl_device_radix_sort(
1945
+ self.build_data,
1946
+ storage_ptr,
1947
+ &storage_sz,
1948
+ d_keys_in.iter_data,
1949
+ d_keys_out.iter_data,
1950
+ d_values_in.iter_data,
1951
+ d_values_out.iter_data,
1952
+ decomposer_op.op_data,
1953
+ <uint64_t>num_items,
1954
+ begin_bit,
1955
+ end_bit,
1956
+ is_overwrite_okay,
1957
+ &selector_int,
1958
+ c_stream
1959
+ )
1960
+
1961
+ if status != 0:
1962
+ raise RuntimeError(
1963
+ f"Failed executing ascending radix_sort, error code: {status}"
1964
+ )
1965
+ return <object>storage_sz, <object>selector_int
1966
+
1967
+
1968
+ def _get_cubin(self):
1969
+ return self.build_data.cubin[:self.build_data.cubin_size]
1970
+
1971
+ # --------------------------------------------
1972
+ # DeviceUnaryTransform/DeviceBinaryTransform
1973
+ # --------------------------------------------
1974
+ cdef extern from "cccl/c/transform.h":
1975
+ cdef struct cccl_device_transform_build_result_t:
1976
+ const char* cubin
1977
+ size_t cubin_size
1978
+
1979
+ cdef CUresult cccl_device_unary_transform_build(
1980
+ cccl_device_transform_build_result_t *build_ptr,
1981
+ cccl_iterator_t d_in,
1982
+ cccl_iterator_t d_out,
1983
+ cccl_op_t op,
1984
+ int, int, const char *, const char *, const char *, const char *
1985
+ ) nogil
1986
+
1987
+ cdef CUresult cccl_device_unary_transform(
1988
+ cccl_device_transform_build_result_t build,
1989
+ cccl_iterator_t d_in,
1990
+ cccl_iterator_t d_out,
1991
+ uint64_t num_items,
1992
+ cccl_op_t op,
1993
+ CUstream stream) nogil
1994
+
1995
+ cdef CUresult cccl_device_binary_transform_build(
1996
+ cccl_device_transform_build_result_t* build_ptr,
1997
+ cccl_iterator_t d_in1,
1998
+ cccl_iterator_t d_in2,
1999
+ cccl_iterator_t d_out,
2000
+ cccl_op_t op,
2001
+ int, int, const char *, const char *, const char *, const char *
2002
+ ) nogil
2003
+
2004
+ cdef CUresult cccl_device_binary_transform(
2005
+ cccl_device_transform_build_result_t build,
2006
+ cccl_iterator_t d_in1,
2007
+ cccl_iterator_t d_in2,
2008
+ cccl_iterator_t d_out,
2009
+ uint64_t num_items,
2010
+ cccl_op_t op,
2011
+ CUstream stream) nogil
2012
+
2013
+ cdef CUresult cccl_device_transform_cleanup(
2014
+ cccl_device_transform_build_result_t *build_ptr,
2015
+ ) nogil
2016
+
2017
+
2018
+ cdef class DeviceUnaryTransform:
2019
+ cdef cccl_device_transform_build_result_t build_data
2020
+
2021
+ def __cinit__(
2022
+ self,
2023
+ Iterator d_in,
2024
+ Iterator d_out,
2025
+ Op op,
2026
+ CommonData common_data
2027
+ ):
2028
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
2029
+
2030
+ cdef CUresult status = -1
2031
+ cdef int cc_major = common_data.get_cc_major()
2032
+ cdef int cc_minor = common_data.get_cc_minor()
2033
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
2034
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
2035
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
2036
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
2037
+
2038
+ with nogil:
2039
+ status = cccl_device_unary_transform_build(
2040
+ &self.build_data,
2041
+ d_in.iter_data,
2042
+ d_out.iter_data,
2043
+ op.op_data,
2044
+ cc_major,
2045
+ cc_minor,
2046
+ cub_path,
2047
+ thrust_path,
2048
+ libcudacxx_path,
2049
+ ctk_path,
2050
+ )
2051
+ if status != 0:
2052
+ raise RuntimeError("Failed to build unary transform")
2053
+
2054
+ def __dealloc__(DeviceUnaryTransform self):
2055
+ cdef CUresult status = -1
2056
+ with nogil:
2057
+ status = cccl_device_transform_cleanup(&self.build_data)
2058
+ if (status != 0):
2059
+ print(f"Return code {status} encountered during unary transform result cleanup")
2060
+
2061
+ cpdef void compute(
2062
+ DeviceUnaryTransform self,
2063
+ Iterator d_in,
2064
+ Iterator d_out,
2065
+ size_t num_items,
2066
+ Op op,
2067
+ stream
2068
+ ):
2069
+ cdef CUresult status = -1
2070
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
2071
+ with nogil:
2072
+ status = cccl_device_unary_transform(
2073
+ self.build_data,
2074
+ d_in.iter_data,
2075
+ d_out.iter_data,
2076
+ <uint64_t>num_items,
2077
+ op.op_data,
2078
+ c_stream
2079
+ )
2080
+ if (status != 0):
2081
+ raise RuntimeError("Failed to compute unary transform")
2082
+
2083
+
2084
+ def _get_cubin(self):
2085
+ return self.build_data.cubin[:self.build_data.cubin_size]
2086
+
2087
+
2088
+ cdef class DeviceBinaryTransform:
2089
+ cdef cccl_device_transform_build_result_t build_data
2090
+
2091
+ def __cinit__(
2092
+ self,
2093
+ Iterator d_in1,
2094
+ Iterator d_in2,
2095
+ Iterator d_out,
2096
+ Op op,
2097
+ CommonData common_data
2098
+ ):
2099
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
2100
+
2101
+ cdef CUresult status = -1
2102
+ cdef int cc_major = common_data.get_cc_major()
2103
+ cdef int cc_minor = common_data.get_cc_minor()
2104
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
2105
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
2106
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
2107
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
2108
+
2109
+ with nogil:
2110
+ status = cccl_device_binary_transform_build(
2111
+ &self.build_data,
2112
+ d_in1.iter_data,
2113
+ d_in2.iter_data,
2114
+ d_out.iter_data,
2115
+ op.op_data,
2116
+ cc_major,
2117
+ cc_minor,
2118
+ cub_path,
2119
+ thrust_path,
2120
+ libcudacxx_path,
2121
+ ctk_path,
2122
+ )
2123
+ if status != 0:
2124
+ raise RuntimeError("Failed to build binary transform")
2125
+
2126
+ def __dealloc__(DeviceBinaryTransform self):
2127
+ cdef CUresult status = -1
2128
+ with nogil:
2129
+ status = cccl_device_transform_cleanup(&self.build_data)
2130
+ if (status != 0):
2131
+ print(f"Return code {status} encountered during binary transform result cleanup")
2132
+
2133
+ cpdef void compute(
2134
+ DeviceBinaryTransform self,
2135
+ Iterator d_in1,
2136
+ Iterator d_in2,
2137
+ Iterator d_out,
2138
+ size_t num_items,
2139
+ Op op,
2140
+ stream
2141
+ ):
2142
+ cdef CUresult status = -1
2143
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
2144
+ with nogil:
2145
+ status = cccl_device_binary_transform(
2146
+ self.build_data,
2147
+ d_in1.iter_data,
2148
+ d_in2.iter_data,
2149
+ d_out.iter_data,
2150
+ <uint64_t>num_items,
2151
+ op.op_data,
2152
+ c_stream
2153
+ )
2154
+ if (status != 0):
2155
+ raise RuntimeError("Failed to compute binary transform")
2156
+
2157
+ def _get_cubin(self):
2158
+ return self.build_data.cubin[:self.build_data.cubin_size]