cuda-cccl 0.4.3__cp312-cp312-manylinux_2_26_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2024) hide show
  1. cuda/cccl/__init__.py +27 -0
  2. cuda/cccl/_cuda_version_utils.py +24 -0
  3. cuda/cccl/cooperative/__init__.py +9 -0
  4. cuda/cccl/cooperative/experimental/__init__.py +24 -0
  5. cuda/cccl/headers/__init__.py +7 -0
  6. cuda/cccl/headers/include/__init__.py +1 -0
  7. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +233 -0
  8. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1158 -0
  9. cuda/cccl/headers/include/cub/agent/agent_for.cuh +55 -0
  10. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +699 -0
  11. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +365 -0
  12. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +721 -0
  13. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +756 -0
  14. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +277 -0
  15. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +715 -0
  16. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +546 -0
  17. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +592 -0
  18. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +780 -0
  19. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1092 -0
  20. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +564 -0
  21. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +448 -0
  22. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +263 -0
  23. cuda/cccl/headers/include/cub/agent/agent_segmented_scan.cuh +292 -0
  24. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1090 -0
  25. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +346 -0
  26. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +584 -0
  27. cuda/cccl/headers/include/cub/agent/agent_topk.cuh +762 -0
  28. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +599 -0
  29. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1384 -0
  30. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +939 -0
  31. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1200 -0
  32. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1279 -0
  33. cuda/cccl/headers/include/cub/block/block_histogram.cuh +396 -0
  34. cuda/cccl/headers/include/cub/block/block_load.cuh +1269 -0
  35. cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +437 -0
  36. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +771 -0
  37. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1215 -0
  38. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2129 -0
  39. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +124 -0
  40. cuda/cccl/headers/include/cub/block/block_reduce.cuh +661 -0
  41. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +406 -0
  42. cuda/cccl/headers/include/cub/block/block_scan.cuh +2168 -0
  43. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +319 -0
  44. cuda/cccl/headers/include/cub/block/block_store.cuh +1238 -0
  45. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +597 -0
  46. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +62 -0
  47. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +209 -0
  48. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +230 -0
  49. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +207 -0
  50. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +257 -0
  51. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +766 -0
  52. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +514 -0
  53. cuda/cccl/headers/include/cub/config.cuh +29 -0
  54. cuda/cccl/headers/include/cub/cub.cuh +96 -0
  55. cuda/cccl/headers/include/cub/detail/array_utils.cuh +54 -0
  56. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +135 -0
  57. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +50 -0
  58. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +94 -0
  59. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +60 -0
  60. cuda/cccl/headers/include/cub/detail/env_dispatch.cuh +87 -0
  61. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +227 -0
  62. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +87 -0
  63. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +149 -0
  64. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +103 -0
  65. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +112 -0
  66. cuda/cccl/headers/include/cub/detail/ptx-json/array.cuh +41 -0
  67. cuda/cccl/headers/include/cub/detail/ptx-json/json.cuh +39 -0
  68. cuda/cccl/headers/include/cub/detail/ptx-json/object.cuh +71 -0
  69. cuda/cccl/headers/include/cub/detail/ptx-json/string.cuh +79 -0
  70. cuda/cccl/headers/include/cub/detail/ptx-json/value.cuh +95 -0
  71. cuda/cccl/headers/include/cub/detail/ptx-json-parser.cuh +39 -0
  72. cuda/cccl/headers/include/cub/detail/rfa.cuh +706 -0
  73. cuda/cccl/headers/include/cub/detail/strong_load.cuh +163 -0
  74. cuda/cccl/headers/include/cub/detail/strong_store.cuh +194 -0
  75. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +377 -0
  76. cuda/cccl/headers/include/cub/detail/type_traits.cuh +185 -0
  77. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +48 -0
  78. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +33 -0
  79. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +572 -0
  80. cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
  81. cuda/cccl/headers/include/cub/device/device_for.cuh +1061 -0
  82. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1485 -0
  83. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +171 -0
  84. cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
  85. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +955 -0
  86. cuda/cccl/headers/include/cub/device/device_partition.cuh +644 -0
  87. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3413 -0
  88. cuda/cccl/headers/include/cub/device/device_reduce.cuh +2303 -0
  89. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +346 -0
  90. cuda/cccl/headers/include/cub/device/device_scan.cuh +2152 -0
  91. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1472 -0
  92. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1635 -0
  93. cuda/cccl/headers/include/cub/device/device_segmented_scan.cuh +1398 -0
  94. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2787 -0
  95. cuda/cccl/headers/include/cub/device/device_select.cuh +1204 -0
  96. cuda/cccl/headers/include/cub/device/device_topk.cuh +521 -0
  97. cuda/cccl/headers/include/cub/device/device_transform.cuh +666 -0
  98. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +289 -0
  99. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +694 -0
  100. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +50 -0
  101. cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +77 -0
  102. cuda/cccl/headers/include/cub/device/dispatch/dispatch_fixed_size_segmented_reduce.cuh +349 -0
  103. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +160 -0
  104. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1849 -0
  105. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +317 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +429 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1066 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +830 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +629 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +479 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +256 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +603 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +447 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +545 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_radix_sort.cuh +638 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_reduce.cuh +410 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_scan.cuh +278 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +899 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +831 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +321 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +454 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +364 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +626 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +527 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +472 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_for_each.cuh +259 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_histogram.cuh +669 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_merge_sort.cuh +332 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_radix_sort.cuh +553 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_reduce.cuh +584 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_scan.cuh +178 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_segmented_radix_sort.cuh +262 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_segmented_reduce.cuh +295 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_segmented_scan.cuh +77 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_segmented_sort.cuh +521 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_three_way_partition.cuh +200 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_transform.cuh +1049 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_unique_by_key.cuh +175 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/common.cuh +97 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +43 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +94 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +34 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +268 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +108 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +100 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1045 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +681 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +918 -0
  149. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +647 -0
  150. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +571 -0
  151. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +986 -0
  152. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_scan.cuh +108 -0
  153. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +373 -0
  154. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1563 -0
  155. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +415 -0
  156. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +84 -0
  157. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +476 -0
  158. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +858 -0
  159. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +203 -0
  160. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +82 -0
  161. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +175 -0
  162. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +230 -0
  163. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +235 -0
  164. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +226 -0
  165. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +293 -0
  166. cuda/cccl/headers/include/cub/thread/thread_load.cuh +353 -0
  167. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +664 -0
  168. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +525 -0
  169. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +472 -0
  170. cuda/cccl/headers/include/cub/thread/thread_search.cuh +214 -0
  171. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +456 -0
  172. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +78 -0
  173. cuda/cccl/headers/include/cub/thread/thread_store.cuh +341 -0
  174. cuda/cccl/headers/include/cub/util_allocator.cuh +897 -0
  175. cuda/cccl/headers/include/cub/util_arch.cuh +176 -0
  176. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +71 -0
  177. cuda/cccl/headers/include/cub/util_debug.cuh +183 -0
  178. cuda/cccl/headers/include/cub/util_device.cuh +838 -0
  179. cuda/cccl/headers/include/cub/util_macro.cuh +73 -0
  180. cuda/cccl/headers/include/cub/util_math.cuh +92 -0
  181. cuda/cccl/headers/include/cub/util_namespace.cuh +152 -0
  182. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +31 -0
  183. cuda/cccl/headers/include/cub/util_ptx.cuh +483 -0
  184. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +93 -0
  185. cuda/cccl/headers/include/cub/util_type.cuh +1084 -0
  186. cuda/cccl/headers/include/cub/util_vsmem.cuh +227 -0
  187. cuda/cccl/headers/include/cub/version.cuh +65 -0
  188. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +304 -0
  189. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +152 -0
  190. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +567 -0
  191. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +378 -0
  192. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +922 -0
  193. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +691 -0
  194. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +381 -0
  195. cuda/cccl/headers/include/cub/warp/warp_load.cuh +591 -0
  196. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +145 -0
  197. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +810 -0
  198. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1863 -0
  199. cuda/cccl/headers/include/cub/warp/warp_store.cuh +498 -0
  200. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +59 -0
  201. cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
  202. cuda/cccl/headers/include/cuda/__algorithm/copy.h +199 -0
  203. cuda/cccl/headers/include/cuda/__algorithm/fill.h +110 -0
  204. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  205. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +171 -0
  206. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +216 -0
  207. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  208. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
  209. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
  210. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
  211. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  212. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  213. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  214. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +101 -0
  215. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +528 -0
  216. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +73 -0
  217. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  218. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +198 -0
  219. cuda/cccl/headers/include/cuda/__bit/bitfield.h +121 -0
  220. cuda/cccl/headers/include/cuda/__bit/bitmask.h +89 -0
  221. cuda/cccl/headers/include/cuda/__cccl_config +38 -0
  222. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +123 -0
  223. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
  224. cuda/cccl/headers/include/cuda/__cmath/ilog.h +194 -0
  225. cuda/cccl/headers/include/cuda/__cmath/ipow.h +111 -0
  226. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  227. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +145 -0
  228. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  229. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  230. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  231. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  232. cuda/cccl/headers/include/cuda/__cmath/sincos.h +134 -0
  233. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  234. cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
  235. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
  236. cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
  237. cuda/cccl/headers/include/cuda/__complex_ +28 -0
  238. cuda/cccl/headers/include/cuda/__container/buffer.h +891 -0
  239. cuda/cccl/headers/include/cuda/__container/heterogeneous_iterator.h +436 -0
  240. cuda/cccl/headers/include/cuda/__container/uninitialized_async_buffer.h +416 -0
  241. cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
  242. cuda/cccl/headers/include/cuda/__device/arch_id.h +194 -0
  243. cuda/cccl/headers/include/cuda/__device/arch_traits.h +553 -0
  244. cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
  245. cuda/cccl/headers/include/cuda/__device/compute_capability.h +172 -0
  246. cuda/cccl/headers/include/cuda/__device/device_ref.h +168 -0
  247. cuda/cccl/headers/include/cuda/__device/physical_device.h +178 -0
  248. cuda/cccl/headers/include/cuda/__driver/driver_api.h +1041 -0
  249. cuda/cccl/headers/include/cuda/__event/event.h +171 -0
  250. cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
  251. cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
  252. cuda/cccl/headers/include/cuda/__execution/determinism.h +89 -0
  253. cuda/cccl/headers/include/cuda/__execution/output_ordering.h +87 -0
  254. cuda/cccl/headers/include/cuda/__execution/policy.h +53 -0
  255. cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
  256. cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
  257. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  258. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
  259. cuda/cccl/headers/include/cuda/__functional/maximum.h +77 -0
  260. cuda/cccl/headers/include/cuda/__functional/minimum.h +77 -0
  261. cuda/cccl/headers/include/cuda/__functional/minimum_maximum_common.h +52 -0
  262. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +106 -0
  263. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  264. cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
  265. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  266. cuda/cccl/headers/include/cuda/__fwd/execution_policy.h +47 -0
  267. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  268. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  269. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
  270. cuda/cccl/headers/include/cuda/__hierarchy/dimensions.h +162 -0
  271. cuda/cccl/headers/include/cuda/__hierarchy/hierarchy_dimensions.h +986 -0
  272. cuda/cccl/headers/include/cuda/__hierarchy/hierarchy_levels.h +494 -0
  273. cuda/cccl/headers/include/cuda/__hierarchy/level_dimensions.h +225 -0
  274. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
  275. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +490 -0
  276. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
  277. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
  278. cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
  279. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
  280. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
  281. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
  282. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
  283. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
  284. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +147 -0
  285. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +114 -0
  286. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +555 -0
  287. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +589 -0
  288. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  289. cuda/cccl/headers/include/cuda/__launch/configuration.h +754 -0
  290. cuda/cccl/headers/include/cuda/__launch/host_launch.h +115 -0
  291. cuda/cccl/headers/include/cuda/__launch/launch.h +334 -0
  292. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +531 -0
  293. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +239 -0
  294. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
  295. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +118 -0
  296. cuda/cccl/headers/include/cuda/__mdspan/shared_memory_accessor.h +208 -0
  297. cuda/cccl/headers/include/cuda/__mdspan/shared_memory_mdspan.h +129 -0
  298. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  299. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  300. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +77 -0
  301. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  302. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
  303. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
  304. cuda/cccl/headers/include/cuda/__memcpy_async/elect_one.h +52 -0
  305. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
  306. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  307. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  308. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +102 -0
  309. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +164 -0
  310. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +57 -0
  311. cuda/cccl/headers/include/cuda/__memory/address_space.h +256 -0
  312. cuda/cccl/headers/include/cuda/__memory/align_down.h +77 -0
  313. cuda/cccl/headers/include/cuda/__memory/align_up.h +77 -0
  314. cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
  315. cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
  316. cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
  317. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
  318. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +60 -0
  319. cuda/cccl/headers/include/cuda/__memory/is_pointer_accessible.h +278 -0
  320. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +92 -0
  321. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  322. cuda/cccl/headers/include/cuda/__memory/ranges_overlap.h +125 -0
  323. cuda/cccl/headers/include/cuda/__memory_pool/device_memory_pool.h +166 -0
  324. cuda/cccl/headers/include/cuda/__memory_pool/managed_memory_pool.h +161 -0
  325. cuda/cccl/headers/include/cuda/__memory_pool/memory_pool_base.h +644 -0
  326. cuda/cccl/headers/include/cuda/__memory_pool/pinned_memory_pool.h +218 -0
  327. cuda/cccl/headers/include/cuda/__memory_resource/any_resource.h +882 -0
  328. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
  329. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
  330. cuda/cccl/headers/include/cuda/__memory_resource/legacy_managed_memory_resource.h +148 -0
  331. cuda/cccl/headers/include/cuda/__memory_resource/legacy_pinned_memory_resource.h +141 -0
  332. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +130 -0
  333. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +135 -0
  334. cuda/cccl/headers/include/cuda/__memory_resource/shared_resource.h +261 -0
  335. cuda/cccl/headers/include/cuda/__memory_resource/synchronous_resource_adapter.h +136 -0
  336. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +331 -0
  337. cuda/cccl/headers/include/cuda/__numeric/div_overflow.h +150 -0
  338. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  339. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
  340. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  341. cuda/cccl/headers/include/cuda/__numeric/sub_overflow.h +359 -0
  342. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
  343. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2977 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +240 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +245 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +52 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +977 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +302 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +631 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
  383. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
  384. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  385. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  386. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
  387. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
  388. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
  389. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  390. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
  391. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  392. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_inval.h +26 -0
  393. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
  394. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
  395. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
  396. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
  397. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
  398. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
  399. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
  400. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  401. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
  402. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/setmaxnreg.h +58 -0
  403. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  404. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  405. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
  406. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  407. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  408. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +120 -0
  409. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +91 -0
  410. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +693 -0
  411. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +50 -0
  412. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +11437 -0
  413. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +6513 -0
  414. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +6726 -0
  415. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +40 -0
  416. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +4767 -0
  417. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +48 -0
  418. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
  419. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +886 -0
  420. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  421. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  422. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  423. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  424. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  425. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  426. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  427. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_inval.h +41 -0
  428. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  429. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  430. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  431. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  432. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  433. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  434. cuda/cccl/headers/include/cuda/__ptx/instructions/setmaxnreg.h +41 -0
  435. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  436. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  437. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  438. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  439. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  440. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  441. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  442. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  443. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  444. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  445. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  446. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  447. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  448. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  449. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  450. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  451. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  452. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  453. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  454. cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
  455. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  456. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +178 -0
  457. cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
  458. cuda/cccl/headers/include/cuda/__random/pcg_engine.h +398 -0
  459. cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
  460. cuda/cccl/headers/include/cuda/__runtime/api_wrapper.h +62 -0
  461. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
  462. cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
  463. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  464. cuda/cccl/headers/include/cuda/__stream/get_stream.h +109 -0
  465. cuda/cccl/headers/include/cuda/__stream/internal_streams.h +49 -0
  466. cuda/cccl/headers/include/cuda/__stream/invalid_stream.h +47 -0
  467. cuda/cccl/headers/include/cuda/__stream/launch_transform.h +193 -0
  468. cuda/cccl/headers/include/cuda/__stream/stream.h +145 -0
  469. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +356 -0
  470. cuda/cccl/headers/include/cuda/__tma/make_tma_descriptor.h +657 -0
  471. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  472. cuda/cccl/headers/include/cuda/__type_traits/is_instantiable_with.h +47 -0
  473. cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
  474. cuda/cccl/headers/include/cuda/__type_traits/vector_type.h +355 -0
  475. cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
  476. cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
  477. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
  478. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
  479. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
  480. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
  481. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
  482. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +611 -0
  483. cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +170 -0
  484. cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +147 -0
  485. cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
  486. cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
  487. cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
  488. cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +256 -0
  489. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +323 -0
  490. cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
  491. cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
  492. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
  493. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +183 -0
  494. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
  495. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
  496. cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
  497. cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
  498. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  499. cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
  500. cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
  501. cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
  502. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  503. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  504. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
  505. cuda/cccl/headers/include/cuda/access_property +26 -0
  506. cuda/cccl/headers/include/cuda/algorithm +28 -0
  507. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  508. cuda/cccl/headers/include/cuda/atomic +27 -0
  509. cuda/cccl/headers/include/cuda/barrier +293 -0
  510. cuda/cccl/headers/include/cuda/bit +29 -0
  511. cuda/cccl/headers/include/cuda/buffer +27 -0
  512. cuda/cccl/headers/include/cuda/cmath +38 -0
  513. cuda/cccl/headers/include/cuda/devices +33 -0
  514. cuda/cccl/headers/include/cuda/discard_memory +32 -0
  515. cuda/cccl/headers/include/cuda/functional +32 -0
  516. cuda/cccl/headers/include/cuda/hierarchy +28 -0
  517. cuda/cccl/headers/include/cuda/iterator +39 -0
  518. cuda/cccl/headers/include/cuda/latch +27 -0
  519. cuda/cccl/headers/include/cuda/launch +28 -0
  520. cuda/cccl/headers/include/cuda/mdspan +29 -0
  521. cuda/cccl/headers/include/cuda/memory +37 -0
  522. cuda/cccl/headers/include/cuda/memory_pool +27 -0
  523. cuda/cccl/headers/include/cuda/memory_resource +41 -0
  524. cuda/cccl/headers/include/cuda/numeric +31 -0
  525. cuda/cccl/headers/include/cuda/pipeline +580 -0
  526. cuda/cccl/headers/include/cuda/ptx +131 -0
  527. cuda/cccl/headers/include/cuda/semaphore +31 -0
  528. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  529. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  530. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  531. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
  532. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  533. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
  534. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  535. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +143 -0
  536. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  537. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  538. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  539. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  540. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  541. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  542. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  543. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  544. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  545. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  546. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  547. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  548. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  549. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  550. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  551. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  552. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  553. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  554. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  555. cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
  556. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
  557. cuda/cccl/headers/include/cuda/std/__algorithm/inplace_merge.h +293 -0
  558. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  559. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  560. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  561. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  562. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  563. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  564. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  565. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  566. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  567. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  568. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  569. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
  570. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  571. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
  572. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  573. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  574. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  575. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  576. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
  577. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  578. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +91 -0
  579. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  580. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  581. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  582. cuda/cccl/headers/include/cuda/std/__algorithm/nth_element.h +309 -0
  583. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  584. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  585. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  586. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  587. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  588. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
  589. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  590. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
  591. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_find_if.h +78 -0
  592. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_find_if_not.h +85 -0
  593. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
  594. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
  595. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  596. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +97 -0
  597. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  598. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  599. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  600. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  601. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  602. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  603. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  604. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  605. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  606. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  607. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  608. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  609. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  610. cuda/cccl/headers/include/cuda/std/__algorithm/sample.h +116 -0
  611. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  612. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  613. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  614. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  615. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  616. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  617. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  618. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  619. cuda/cccl/headers/include/cuda/std/__algorithm/shuffle.h +71 -0
  620. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  621. cuda/cccl/headers/include/cuda/std/__algorithm/sort.h +1097 -0
  622. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  623. cuda/cccl/headers/include/cuda/std/__algorithm/stable_partition.h +359 -0
  624. cuda/cccl/headers/include/cuda/std/__algorithm/stable_sort.h +321 -0
  625. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  626. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  627. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  628. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +153 -0
  629. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  630. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  631. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  632. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  633. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
  634. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  635. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  636. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
  637. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +458 -0
  638. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +4436 -0
  639. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +184 -0
  640. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +242 -0
  641. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  642. cuda/cccl/headers/include/cuda/std/__atomic/order.h +158 -0
  643. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  644. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  645. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  646. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +242 -0
  647. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +103 -0
  648. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  649. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
  650. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +238 -0
  651. cuda/cccl/headers/include/cuda/std/__atomic/types.h +51 -0
  652. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  653. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  654. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  655. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  656. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  657. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +81 -0
  658. cuda/cccl/headers/include/cuda/std/__bit/blsr.h +51 -0
  659. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +209 -0
  660. cuda/cccl/headers/include/cuda/std/__bit/countl.h +191 -0
  661. cuda/cccl/headers/include/cuda/std/__bit/countr.h +202 -0
  662. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  663. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  664. cuda/cccl/headers/include/cuda/std/__bit/integral.h +125 -0
  665. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +172 -0
  666. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
  667. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +185 -0
  668. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  669. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  670. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
  671. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
  672. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +494 -0
  673. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +213 -0
  674. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
  675. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
  676. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +87 -0
  677. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
  678. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +197 -0
  679. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +355 -0
  680. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +36 -0
  681. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
  682. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +139 -0
  683. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +59 -0
  684. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +71 -0
  685. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  686. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  687. cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
  688. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1288 -0
  689. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +312 -0
  690. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +363 -0
  691. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  692. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  693. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  694. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  695. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  696. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
  697. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  698. cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +171 -0
  699. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  700. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +192 -0
  701. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  702. cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +52 -0
  703. cuda/cccl/headers/include/cuda/std/__chrono/day.h +160 -0
  704. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +499 -0
  705. cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +53 -0
  706. cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +44 -0
  707. cuda/cccl/headers/include/cuda/std/__chrono/month.h +185 -0
  708. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +58 -0
  709. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +78 -0
  710. cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +255 -0
  711. cuda/cccl/headers/include/cuda/std/__chrono/year.h +184 -0
  712. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
  713. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
  714. cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +204 -0
  715. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +783 -0
  716. cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +122 -0
  717. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +129 -0
  718. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +230 -0
  719. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +204 -0
  720. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +285 -0
  721. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +220 -0
  722. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +285 -0
  723. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +370 -0
  724. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +166 -0
  725. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +203 -0
  726. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +184 -0
  727. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  728. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
  729. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +533 -0
  730. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
  731. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  732. cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
  733. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +210 -0
  734. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +198 -0
  735. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +983 -0
  736. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
  737. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +242 -0
  738. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +327 -0
  739. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  740. cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
  741. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +403 -0
  742. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +119 -0
  743. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +522 -0
  744. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  745. cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
  746. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +302 -0
  747. cuda/cccl/headers/include/cuda/std/__complex/math.h +161 -0
  748. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
  749. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
  750. cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
  751. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  752. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  753. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
  754. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  755. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  756. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  757. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
  758. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  759. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  760. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +385 -0
  761. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
  762. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
  763. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  764. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
  765. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  766. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  767. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +98 -0
  768. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  769. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  770. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  771. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  772. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  773. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
  774. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  775. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  776. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  777. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  778. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  779. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  780. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  781. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  782. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
  783. cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
  784. cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
  785. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +110 -0
  786. cuda/cccl/headers/include/cuda/std/__exception/exception_macros.h +108 -0
  787. cuda/cccl/headers/include/cuda/std/__exception/format_error.h +62 -0
  788. cuda/cccl/headers/include/cuda/std/__exception/msg_storage.h +41 -0
  789. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +74 -0
  790. cuda/cccl/headers/include/cuda/std/__exception/throw_error.h +120 -0
  791. cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
  792. cuda/cccl/headers/include/cuda/std/__execution/policy.h +90 -0
  793. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  794. cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
  795. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1051 -0
  796. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  797. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +164 -0
  798. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  799. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
  800. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  801. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  802. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +375 -0
  803. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
  804. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
  805. cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +126 -0
  806. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  807. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
  808. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
  809. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  810. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  811. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  812. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  813. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  814. cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
  815. cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
  816. cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
  817. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
  818. cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
  819. cuda/cccl/headers/include/cuda/std/__format/format_context.h +93 -0
  820. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  821. cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
  822. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
  823. cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1265 -0
  824. cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
  825. cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
  826. cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
  827. cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
  828. cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
  829. cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
  830. cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
  831. cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
  832. cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
  833. cuda/cccl/headers/include/cuda/std/__format_ +45 -0
  834. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  835. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  836. cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
  837. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +81 -0
  838. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +74 -0
  839. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  840. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  841. cuda/cccl/headers/include/cuda/std/__functional/compose.h +69 -0
  842. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +76 -0
  843. cuda/cccl/headers/include/cuda/std/__functional/function.h +1271 -0
  844. cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
  845. cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
  846. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +298 -0
  847. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
  848. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
  849. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
  850. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  851. cuda/cccl/headers/include/cuda/std/__functional/operations.h +535 -0
  852. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
  853. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
  854. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
  855. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  856. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +114 -0
  857. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  858. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  859. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  860. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
  861. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
  862. cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
  863. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
  864. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
  865. cuda/cccl/headers/include/cuda/std/__fwd/execution_policy.h +73 -0
  866. cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
  867. cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
  868. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  869. cuda/cccl/headers/include/cuda/std/__fwd/get.h +122 -0
  870. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  871. cuda/cccl/headers/include/cuda/std/__fwd/ios.h +123 -0
  872. cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
  873. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
  874. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  875. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  876. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
  877. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +39 -0
  878. cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
  879. cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
  880. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
  881. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  882. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +37 -0
  883. cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
  884. cuda/cccl/headers/include/cuda/std/__fwd/variant.h +51 -0
  885. cuda/cccl/headers/include/cuda/std/__internal/atomic.h +55 -0
  886. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  887. cuda/cccl/headers/include/cuda/std/__internal/features.h +104 -0
  888. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +132 -0
  889. cuda/cccl/headers/include/cuda/std/__internal/pstl_config.h +32 -0
  890. cuda/cccl/headers/include/cuda/std/__internal/thread_api.h +58 -0
  891. cuda/cccl/headers/include/cuda/std/__internal/version.h +52 -0
  892. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  893. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +227 -0
  894. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +164 -0
  895. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
  896. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  897. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +465 -0
  898. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  899. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  900. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +124 -0
  901. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  902. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  903. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +100 -0
  904. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
  905. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  906. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
  907. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
  908. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  909. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  910. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +181 -0
  911. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  912. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
  913. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  914. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
  915. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  916. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  917. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
  918. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  919. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  920. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
  921. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  922. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
  923. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  924. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +366 -0
  925. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  926. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  927. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  928. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
  929. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  930. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +603 -0
  931. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
  932. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +76 -0
  933. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +52 -0
  934. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +136 -0
  935. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +128 -0
  936. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +315 -0
  937. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
  938. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +137 -0
  939. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  940. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +348 -0
  941. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +749 -0
  942. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
  943. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
  944. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +598 -0
  945. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +515 -0
  946. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +190 -0
  947. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +187 -0
  948. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +339 -0
  949. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +90 -0
  950. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  951. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +82 -0
  952. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  953. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +327 -0
  954. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +85 -0
  955. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  956. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +517 -0
  957. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +59 -0
  958. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  959. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
  960. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +241 -0
  961. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  962. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +51 -0
  963. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
  964. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  965. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +99 -0
  966. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
  967. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
  968. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +59 -0
  969. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  970. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  971. cuda/cccl/headers/include/cuda/std/__new/allocate.h +131 -0
  972. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  973. cuda/cccl/headers/include/cuda/std/__new/device_new.h +30 -0
  974. cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
  975. cuda/cccl/headers/include/cuda/std/__new_ +30 -0
  976. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  977. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  978. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  979. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
  980. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  981. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  982. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  983. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
  984. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  985. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  986. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  987. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  988. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  989. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
  990. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  991. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  992. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  993. cuda/cccl/headers/include/cuda/std/__optional/optional.h +861 -0
  994. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +439 -0
  995. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  996. cuda/cccl/headers/include/cuda/std/__pstl/cuda/for_each_n.h +97 -0
  997. cuda/cccl/headers/include/cuda/std/__pstl/dispatch.h +123 -0
  998. cuda/cccl/headers/include/cuda/std/__pstl/for_each.h +71 -0
  999. cuda/cccl/headers/include/cuda/std/__pstl/for_each_n.h +68 -0
  1000. cuda/cccl/headers/include/cuda/std/__random/bernoulli_distribution.h +173 -0
  1001. cuda/cccl/headers/include/cuda/std/__random/binomial_distribution.h +254 -0
  1002. cuda/cccl/headers/include/cuda/std/__random/cauchy_distribution.h +192 -0
  1003. cuda/cccl/headers/include/cuda/std/__random/chi_squared_distribution.h +179 -0
  1004. cuda/cccl/headers/include/cuda/std/__random/exponential_distribution.h +187 -0
  1005. cuda/cccl/headers/include/cuda/std/__random/extreme_value_distribution.h +196 -0
  1006. cuda/cccl/headers/include/cuda/std/__random/fisher_f_distribution.h +196 -0
  1007. cuda/cccl/headers/include/cuda/std/__random/gamma_distribution.h +257 -0
  1008. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  1009. cuda/cccl/headers/include/cuda/std/__random/geometric_distribution.h +179 -0
  1010. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  1011. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +70 -0
  1012. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
  1013. cuda/cccl/headers/include/cuda/std/__random/lognormal_distribution.h +174 -0
  1014. cuda/cccl/headers/include/cuda/std/__random/negative_binomial_distribution.h +212 -0
  1015. cuda/cccl/headers/include/cuda/std/__random/normal_distribution.h +232 -0
  1016. cuda/cccl/headers/include/cuda/std/__random/philox_engine.h +562 -0
  1017. cuda/cccl/headers/include/cuda/std/__random/poisson_distribution.h +338 -0
  1018. cuda/cccl/headers/include/cuda/std/__random/seed_seq.h +204 -0
  1019. cuda/cccl/headers/include/cuda/std/__random/student_t_distribution.h +186 -0
  1020. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +341 -0
  1021. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +192 -0
  1022. cuda/cccl/headers/include/cuda/std/__random/weibull_distribution.h +189 -0
  1023. cuda/cccl/headers/include/cuda/std/__random_ +47 -0
  1024. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  1025. cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
  1026. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +889 -0
  1027. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
  1028. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  1029. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  1030. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  1031. cuda/cccl/headers/include/cuda/std/__ranges/drop_view.h +389 -0
  1032. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  1033. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  1034. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  1035. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
  1036. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  1037. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +264 -0
  1038. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +409 -0
  1039. cuda/cccl/headers/include/cuda/std/__ranges/non_propagating_cache.h +210 -0
  1040. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +163 -0
  1041. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +111 -0
  1042. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  1043. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  1044. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  1045. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +346 -0
  1046. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  1047. cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
  1048. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +510 -0
  1049. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +472 -0
  1050. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  1051. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  1052. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +52 -0
  1053. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
  1054. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  1055. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
  1056. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  1057. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +190 -0
  1058. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +580 -0
  1059. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  1060. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  1061. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  1062. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  1063. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  1064. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
  1065. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  1066. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  1067. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
  1068. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  1069. cuda/cccl/headers/include/cuda/std/__tuple_dir/apply.h +82 -0
  1070. cuda/cccl/headers/include/cuda/std/__tuple_dir/get.h +122 -0
  1071. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  1072. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
  1073. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +100 -0
  1074. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
  1075. cuda/cccl/headers/include/cuda/std/__tuple_dir/tie.h +55 -0
  1076. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple.h +457 -0
  1077. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_cat.h +158 -0
  1078. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_constraints.h +286 -0
  1079. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +77 -0
  1080. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  1081. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_leaf.h +452 -0
  1082. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +83 -0
  1083. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
  1084. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  1085. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  1086. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
  1087. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  1088. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  1089. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  1090. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  1091. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  1092. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  1093. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +155 -0
  1094. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  1095. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  1096. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  1097. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
  1098. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  1099. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
  1100. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  1101. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  1102. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  1103. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  1104. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  1105. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  1106. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  1107. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  1108. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  1109. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  1110. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
  1111. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
  1112. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  1113. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
  1114. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
  1115. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  1116. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  1117. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  1118. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  1119. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
  1120. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  1121. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  1122. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  1123. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
  1124. cuda/cccl/headers/include/cuda/std/__type_traits/is_comparable.h +78 -0
  1125. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
  1126. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  1127. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +49 -0
  1128. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  1129. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
  1130. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  1131. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  1132. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  1133. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  1134. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  1135. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  1136. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
  1137. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
  1138. cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +63 -0
  1139. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  1140. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
  1141. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
  1142. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  1143. cuda/cccl/headers/include/cuda/std/__type_traits/is_fully_bounded_array.h +47 -0
  1144. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  1145. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  1146. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  1147. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  1148. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  1149. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  1150. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
  1151. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  1152. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  1153. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  1154. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  1155. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  1156. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  1157. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +72 -0
  1158. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  1159. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  1160. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  1161. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  1162. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  1163. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  1164. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  1165. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  1166. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  1167. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  1168. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
  1169. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  1170. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
  1171. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  1172. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
  1173. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
  1174. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  1175. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  1176. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
  1177. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  1178. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  1179. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  1180. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  1181. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
  1182. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +200 -0
  1183. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
  1184. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
  1185. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
  1186. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
  1187. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
  1188. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
  1189. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
  1190. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
  1191. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
  1192. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
  1193. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  1194. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
  1195. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  1196. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  1197. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  1198. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  1199. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  1200. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  1201. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  1202. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  1203. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  1204. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  1205. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  1206. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  1207. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  1208. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
  1209. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  1210. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  1211. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1212. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1213. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1214. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1215. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1216. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1217. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1218. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1219. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1220. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1221. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1222. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1223. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1224. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1225. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
  1226. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
  1227. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1228. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +73 -0
  1229. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
  1230. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +114 -0
  1231. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1232. cuda/cccl/headers/include/cuda/std/__utility/ctad_support.h +27 -0
  1233. cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
  1234. cuda/cccl/headers/include/cuda/std/__utility/delegate_constructors.h +51 -0
  1235. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +162 -0
  1236. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1237. cuda/cccl/headers/include/cuda/std/__utility/forward.h +82 -0
  1238. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +82 -0
  1239. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
  1240. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +249 -0
  1241. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1242. cuda/cccl/headers/include/cuda/std/__utility/move.h +126 -0
  1243. cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
  1244. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1245. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +425 -0
  1246. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1247. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +61 -0
  1248. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1249. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1250. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1251. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1252. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1253. cuda/cccl/headers/include/cuda/std/__variant/bad_variant_access.h +74 -0
  1254. cuda/cccl/headers/include/cuda/std/__variant/comparison.h +207 -0
  1255. cuda/cccl/headers/include/cuda/std/__variant/get.h +192 -0
  1256. cuda/cccl/headers/include/cuda/std/__variant/hash.h +82 -0
  1257. cuda/cccl/headers/include/cuda/std/__variant/sfinae_helpers.h +89 -0
  1258. cuda/cccl/headers/include/cuda/std/__variant/variant.h +250 -0
  1259. cuda/cccl/headers/include/cuda/std/__variant/variant_access.h +70 -0
  1260. cuda/cccl/headers/include/cuda/std/__variant/variant_base.h +683 -0
  1261. cuda/cccl/headers/include/cuda/std/__variant/variant_constraints.h +135 -0
  1262. cuda/cccl/headers/include/cuda/std/__variant/variant_match.h +126 -0
  1263. cuda/cccl/headers/include/cuda/std/__variant/variant_traits.h +184 -0
  1264. cuda/cccl/headers/include/cuda/std/__variant/variant_visit.h +225 -0
  1265. cuda/cccl/headers/include/cuda/std/__variant/visit.h +148 -0
  1266. cuda/cccl/headers/include/cuda/std/algorithm +138 -0
  1267. cuda/cccl/headers/include/cuda/std/array +519 -0
  1268. cuda/cccl/headers/include/cuda/std/atomic +810 -0
  1269. cuda/cccl/headers/include/cuda/std/barrier +42 -0
  1270. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1271. cuda/cccl/headers/include/cuda/std/bitset +986 -0
  1272. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1273. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1274. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1275. cuda/cccl/headers/include/cuda/std/charconv +31 -0
  1276. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1277. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1278. cuda/cccl/headers/include/cuda/std/cmath +87 -0
  1279. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1280. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1281. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1282. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1283. cuda/cccl/headers/include/cuda/std/cstdlib +31 -0
  1284. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1285. cuda/cccl/headers/include/cuda/std/ctime +155 -0
  1286. cuda/cccl/headers/include/cuda/std/detail/__config +22 -0
  1287. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2506 -0
  1288. cuda/cccl/headers/include/cuda/std/execution +29 -0
  1289. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1290. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1291. cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
  1292. cuda/cccl/headers/include/cuda/std/inplace_vector +2171 -0
  1293. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1294. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1295. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1296. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1297. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1298. cuda/cccl/headers/include/cuda/std/memory +40 -0
  1299. cuda/cccl/headers/include/cuda/std/numbers +344 -0
  1300. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1301. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1302. cuda/cccl/headers/include/cuda/std/ranges +70 -0
  1303. cuda/cccl/headers/include/cuda/std/ratio +416 -0
  1304. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1305. cuda/cccl/headers/include/cuda/std/source_location +107 -0
  1306. cuda/cccl/headers/include/cuda/std/span +599 -0
  1307. cuda/cccl/headers/include/cuda/std/string_view +924 -0
  1308. cuda/cccl/headers/include/cuda/std/tuple +43 -0
  1309. cuda/cccl/headers/include/cuda/std/type_traits +176 -0
  1310. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1311. cuda/cccl/headers/include/cuda/std/variant +32 -0
  1312. cuda/cccl/headers/include/cuda/std/version +240 -0
  1313. cuda/cccl/headers/include/cuda/stream +32 -0
  1314. cuda/cccl/headers/include/cuda/stream_ref +59 -0
  1315. cuda/cccl/headers/include/cuda/tma +25 -0
  1316. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1317. cuda/cccl/headers/include/cuda/utility +28 -0
  1318. cuda/cccl/headers/include/cuda/version +16 -0
  1319. cuda/cccl/headers/include/cuda/warp +28 -0
  1320. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1321. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1322. cuda/cccl/headers/include/nv/detail/__target_macros +739 -0
  1323. cuda/cccl/headers/include/nv/target +241 -0
  1324. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1325. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1326. cuda/cccl/headers/include/thrust/advance.h +60 -0
  1327. cuda/cccl/headers/include/thrust/allocate_unique.h +301 -0
  1328. cuda/cccl/headers/include/thrust/binary_search.h +1911 -0
  1329. cuda/cccl/headers/include/thrust/complex.h +859 -0
  1330. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1331. cuda/cccl/headers/include/thrust/count.h +245 -0
  1332. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +108 -0
  1333. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1334. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +629 -0
  1335. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +193 -0
  1336. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +95 -0
  1337. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +79 -0
  1338. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +76 -0
  1339. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +74 -0
  1340. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +113 -0
  1341. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +114 -0
  1342. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
  1343. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +96 -0
  1344. cuda/cccl/headers/include/thrust/detail/binary_search.inl +537 -0
  1345. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1346. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +210 -0
  1347. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +877 -0
  1348. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +591 -0
  1349. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +234 -0
  1350. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +162 -0
  1351. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +194 -0
  1352. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +170 -0
  1353. cuda/cccl/headers/include/thrust/detail/complex/clog.h +222 -0
  1354. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +209 -0
  1355. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1356. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +53 -0
  1357. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +75 -0
  1358. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1359. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +169 -0
  1360. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1361. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1362. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +212 -0
  1363. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +136 -0
  1364. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +130 -0
  1365. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1366. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1367. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1368. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1369. cuda/cccl/headers/include/thrust/detail/config/device_system.h +57 -0
  1370. cuda/cccl/headers/include/thrust/detail/config/host_system.h +50 -0
  1371. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1372. cuda/cccl/headers/include/thrust/detail/config/namespace.h +164 -0
  1373. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1374. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1375. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +227 -0
  1376. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +272 -0
  1377. cuda/cccl/headers/include/thrust/detail/copy.h +70 -0
  1378. cuda/cccl/headers/include/thrust/detail/copy.inl +146 -0
  1379. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1380. cuda/cccl/headers/include/thrust/detail/copy_if.inl +114 -0
  1381. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1382. cuda/cccl/headers/include/thrust/detail/count.inl +101 -0
  1383. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1384. cuda/cccl/headers/include/thrust/detail/equal.inl +105 -0
  1385. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1386. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +79 -0
  1387. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1388. cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
  1389. cuda/cccl/headers/include/thrust/detail/extrema.inl +196 -0
  1390. cuda/cccl/headers/include/thrust/detail/fill.inl +97 -0
  1391. cuda/cccl/headers/include/thrust/detail/find.inl +125 -0
  1392. cuda/cccl/headers/include/thrust/detail/for_each.inl +96 -0
  1393. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1394. cuda/cccl/headers/include/thrust/detail/functional/actor.h +213 -0
  1395. cuda/cccl/headers/include/thrust/detail/functional/operators.h +384 -0
  1396. cuda/cccl/headers/include/thrust/detail/gather.inl +185 -0
  1397. cuda/cccl/headers/include/thrust/detail/generate.inl +97 -0
  1398. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +60 -0
  1399. cuda/cccl/headers/include/thrust/detail/inner_product.inl +130 -0
  1400. cuda/cccl/headers/include/thrust/detail/internal_functional.h +335 -0
  1401. cuda/cccl/headers/include/thrust/detail/logical.inl +125 -0
  1402. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +89 -0
  1403. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1404. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +208 -0
  1405. cuda/cccl/headers/include/thrust/detail/merge.inl +288 -0
  1406. cuda/cccl/headers/include/thrust/detail/mismatch.inl +107 -0
  1407. cuda/cccl/headers/include/thrust/detail/nvtx_policy.h +41 -0
  1408. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +120 -0
  1409. cuda/cccl/headers/include/thrust/detail/partition.inl +390 -0
  1410. cuda/cccl/headers/include/thrust/detail/pointer.h +313 -0
  1411. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1412. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1413. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +114 -0
  1414. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +128 -0
  1415. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1416. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +191 -0
  1417. cuda/cccl/headers/include/thrust/detail/reduce.inl +396 -0
  1418. cuda/cccl/headers/include/thrust/detail/reference.h +521 -0
  1419. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1420. cuda/cccl/headers/include/thrust/detail/remove.inl +225 -0
  1421. cuda/cccl/headers/include/thrust/detail/replace.inl +243 -0
  1422. cuda/cccl/headers/include/thrust/detail/reverse.inl +100 -0
  1423. cuda/cccl/headers/include/thrust/detail/scan.inl +536 -0
  1424. cuda/cccl/headers/include/thrust/detail/scatter.inl +169 -0
  1425. cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
  1426. cuda/cccl/headers/include/thrust/detail/sequence.inl +121 -0
  1427. cuda/cccl/headers/include/thrust/detail/set_operations.inl +993 -0
  1428. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1429. cuda/cccl/headers/include/thrust/detail/sort.inl +385 -0
  1430. cuda/cccl/headers/include/thrust/detail/static_assert.h +56 -0
  1431. cuda/cccl/headers/include/thrust/detail/static_map.h +164 -0
  1432. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +77 -0
  1433. cuda/cccl/headers/include/thrust/detail/tabulate.inl +74 -0
  1434. cuda/cccl/headers/include/thrust/detail/temporary_array.h +150 -0
  1435. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +121 -0
  1436. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +93 -0
  1437. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +81 -0
  1438. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +173 -0
  1439. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +128 -0
  1440. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +59 -0
  1441. cuda/cccl/headers/include/thrust/detail/type_deduction.h +61 -0
  1442. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +50 -0
  1443. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1444. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +162 -0
  1445. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +67 -0
  1446. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1447. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +55 -0
  1448. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +48 -0
  1449. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +91 -0
  1450. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1451. cuda/cccl/headers/include/thrust/detail/type_traits.h +143 -0
  1452. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +102 -0
  1453. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +97 -0
  1454. cuda/cccl/headers/include/thrust/detail/unique.inl +391 -0
  1455. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1456. cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
  1457. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1216 -0
  1458. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1459. cuda/cccl/headers/include/thrust/device_delete.h +74 -0
  1460. cuda/cccl/headers/include/thrust/device_free.h +85 -0
  1461. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1462. cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
  1463. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1464. cuda/cccl/headers/include/thrust/device_new.h +112 -0
  1465. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1466. cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
  1467. cuda/cccl/headers/include/thrust/device_reference.h +983 -0
  1468. cuda/cccl/headers/include/thrust/device_vector.h +576 -0
  1469. cuda/cccl/headers/include/thrust/distance.h +44 -0
  1470. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1471. cuda/cccl/headers/include/thrust/execution_policy.h +252 -0
  1472. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1473. cuda/cccl/headers/include/thrust/fill.h +200 -0
  1474. cuda/cccl/headers/include/thrust/find.h +382 -0
  1475. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1476. cuda/cccl/headers/include/thrust/functional.h +399 -0
  1477. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1478. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1479. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1480. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1481. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
  1482. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +338 -0
  1483. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1484. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1485. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +43 -0
  1486. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +42 -0
  1487. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +83 -0
  1488. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
  1489. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
  1490. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +56 -0
  1491. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +184 -0
  1492. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +55 -0
  1493. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1494. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1495. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +80 -0
  1496. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +171 -0
  1497. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1498. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1499. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
  1500. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
  1501. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
  1502. cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
  1503. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
  1504. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1505. cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
  1506. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
  1507. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +190 -0
  1508. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1509. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
  1510. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +225 -0
  1511. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +353 -0
  1512. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1513. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +365 -0
  1514. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1515. cuda/cccl/headers/include/thrust/memory.h +299 -0
  1516. cuda/cccl/headers/include/thrust/merge.h +726 -0
  1517. cuda/cccl/headers/include/thrust/mismatch.h +262 -0
  1518. cuda/cccl/headers/include/thrust/mr/allocator.h +227 -0
  1519. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +46 -0
  1520. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
  1521. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +116 -0
  1522. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +65 -0
  1523. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +65 -0
  1524. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +42 -0
  1525. cuda/cccl/headers/include/thrust/mr/memory_resource.h +215 -0
  1526. cuda/cccl/headers/include/thrust/mr/new.h +98 -0
  1527. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +61 -0
  1528. cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
  1529. cuda/cccl/headers/include/thrust/mr/pool_options.h +172 -0
  1530. cuda/cccl/headers/include/thrust/mr/sync_pool.h +112 -0
  1531. cuda/cccl/headers/include/thrust/mr/tls_pool.h +62 -0
  1532. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1533. cuda/cccl/headers/include/thrust/mr/validator.h +54 -0
  1534. cuda/cccl/headers/include/thrust/pair.h +102 -0
  1535. cuda/cccl/headers/include/thrust/partition.h +1392 -0
  1536. cuda/cccl/headers/include/thrust/per_device_resource.h +110 -0
  1537. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +182 -0
  1538. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +153 -0
  1539. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +97 -0
  1540. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +149 -0
  1541. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +46 -0
  1542. cuda/cccl/headers/include/thrust/random/detail/mod.h +94 -0
  1543. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +185 -0
  1544. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +157 -0
  1545. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +56 -0
  1546. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +199 -0
  1547. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +196 -0
  1548. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +198 -0
  1549. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +181 -0
  1550. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +186 -0
  1551. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +238 -0
  1552. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +287 -0
  1553. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +215 -0
  1554. cuda/cccl/headers/include/thrust/random/normal_distribution.h +256 -0
  1555. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +245 -0
  1556. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +259 -0
  1557. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +257 -0
  1558. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +253 -0
  1559. cuda/cccl/headers/include/thrust/random.h +118 -0
  1560. cuda/cccl/headers/include/thrust/reduce.h +1114 -0
  1561. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1562. cuda/cccl/headers/include/thrust/replace.h +826 -0
  1563. cuda/cccl/headers/include/thrust/reverse.h +215 -0
  1564. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1565. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1566. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1567. cuda/cccl/headers/include/thrust/set_operations.h +3027 -0
  1568. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1569. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1570. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1571. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1572. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1573. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1574. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1575. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1576. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1577. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1578. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +113 -0
  1579. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1580. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1581. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1582. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1583. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1584. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1585. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1586. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1587. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1588. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1589. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1590. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +55 -0
  1591. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1592. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1593. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1594. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1595. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1596. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1597. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1598. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1599. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1600. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1601. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1602. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1603. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1604. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1605. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1606. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1607. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1608. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1609. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1610. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1611. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1612. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1613. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1614. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1615. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1616. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
  1617. cuda/cccl/headers/include/thrust/system/cpp/memory.h +105 -0
  1618. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +70 -0
  1619. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +118 -0
  1620. cuda/cccl/headers/include/thrust/system/cpp/vector.h +95 -0
  1621. cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
  1622. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +218 -0
  1623. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1624. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1625. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1626. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +272 -0
  1627. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +251 -0
  1628. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +280 -0
  1629. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +162 -0
  1630. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +578 -0
  1631. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1632. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +241 -0
  1633. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +230 -0
  1634. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +62 -0
  1635. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +87 -0
  1636. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +266 -0
  1637. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +473 -0
  1638. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +99 -0
  1639. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +165 -0
  1640. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1641. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +89 -0
  1642. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +59 -0
  1643. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1644. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +77 -0
  1645. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +79 -0
  1646. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1647. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +55 -0
  1648. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +119 -0
  1649. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +55 -0
  1650. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +205 -0
  1651. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +88 -0
  1652. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +79 -0
  1653. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +401 -0
  1654. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +70 -0
  1655. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +774 -0
  1656. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +994 -0
  1657. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +105 -0
  1658. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +121 -0
  1659. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +83 -0
  1660. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +340 -0
  1661. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +412 -0
  1662. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +90 -0
  1663. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1664. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1722 -0
  1665. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +473 -0
  1666. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +99 -0
  1667. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +62 -0
  1668. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1669. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1670. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +428 -0
  1671. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +139 -0
  1672. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +117 -0
  1673. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +114 -0
  1674. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +102 -0
  1675. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +288 -0
  1676. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +307 -0
  1677. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
  1678. cuda/cccl/headers/include/thrust/system/cuda/error.h +159 -0
  1679. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
  1680. cuda/cccl/headers/include/thrust/system/cuda/memory.h +118 -0
  1681. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +115 -0
  1682. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +155 -0
  1683. cuda/cccl/headers/include/thrust/system/cuda/vector.h +104 -0
  1684. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +59 -0
  1685. cuda/cccl/headers/include/thrust/system/detail/errno.h +118 -0
  1686. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +298 -0
  1687. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +171 -0
  1688. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +119 -0
  1689. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +51 -0
  1690. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +77 -0
  1691. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +159 -0
  1692. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +370 -0
  1693. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +43 -0
  1694. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +62 -0
  1695. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +56 -0
  1696. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +145 -0
  1697. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +46 -0
  1698. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +82 -0
  1699. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +47 -0
  1700. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +58 -0
  1701. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +65 -0
  1702. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +246 -0
  1703. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +52 -0
  1704. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +47 -0
  1705. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +135 -0
  1706. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +56 -0
  1707. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +71 -0
  1708. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +94 -0
  1709. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +43 -0
  1710. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +64 -0
  1711. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +58 -0
  1712. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +70 -0
  1713. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +57 -0
  1714. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +61 -0
  1715. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +83 -0
  1716. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +97 -0
  1717. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +146 -0
  1718. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +47 -0
  1719. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +67 -0
  1720. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +127 -0
  1721. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +208 -0
  1722. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +41 -0
  1723. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +69 -0
  1724. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +98 -0
  1725. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +81 -0
  1726. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +183 -0
  1727. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +84 -0
  1728. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +119 -0
  1729. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +93 -0
  1730. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +172 -0
  1731. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +46 -0
  1732. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +65 -0
  1733. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +61 -0
  1734. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +125 -0
  1735. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +86 -0
  1736. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +124 -0
  1737. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +229 -0
  1738. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +71 -0
  1739. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +83 -0
  1740. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +105 -0
  1741. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1742. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +281 -0
  1743. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +474 -0
  1744. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +52 -0
  1745. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +123 -0
  1746. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +111 -0
  1747. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +176 -0
  1748. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +42 -0
  1749. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +73 -0
  1750. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +39 -0
  1751. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
  1752. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +45 -0
  1753. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +53 -0
  1754. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +81 -0
  1755. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +393 -0
  1756. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +48 -0
  1757. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +54 -0
  1758. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +78 -0
  1759. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +111 -0
  1760. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +43 -0
  1761. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +164 -0
  1762. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +43 -0
  1763. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +112 -0
  1764. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +69 -0
  1765. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +112 -0
  1766. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +80 -0
  1767. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +124 -0
  1768. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +115 -0
  1769. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +68 -0
  1770. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +40 -0
  1771. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
  1772. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +120 -0
  1773. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +47 -0
  1774. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +69 -0
  1775. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1776. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1777. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
  1778. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +109 -0
  1779. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1780. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +60 -0
  1781. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +72 -0
  1782. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1783. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +120 -0
  1784. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1785. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +41 -0
  1786. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1787. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +139 -0
  1788. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +43 -0
  1789. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1790. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +48 -0
  1791. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +147 -0
  1792. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1793. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +298 -0
  1794. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1795. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +62 -0
  1796. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +97 -0
  1797. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +177 -0
  1798. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1799. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1800. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +152 -0
  1801. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +143 -0
  1802. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1803. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1804. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +204 -0
  1805. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +120 -0
  1806. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +353 -0
  1807. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +121 -0
  1808. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +584 -0
  1809. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1810. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1811. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1812. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1813. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1814. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1815. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +56 -0
  1816. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1817. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1818. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +112 -0
  1819. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +105 -0
  1820. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +106 -0
  1821. cuda/cccl/headers/include/thrust/system/error_code.h +508 -0
  1822. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +34 -0
  1823. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +17 -0
  1824. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +57 -0
  1825. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +61 -0
  1826. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +39 -0
  1827. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +17 -0
  1828. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +48 -0
  1829. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +17 -0
  1830. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +132 -0
  1831. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +46 -0
  1832. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +17 -0
  1833. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +33 -0
  1834. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +74 -0
  1835. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +17 -0
  1836. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +17 -0
  1837. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +17 -0
  1838. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +17 -0
  1839. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +17 -0
  1840. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +17 -0
  1841. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +17 -0
  1842. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +17 -0
  1843. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +17 -0
  1844. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +84 -0
  1845. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +16 -0
  1846. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +30 -0
  1847. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +63 -0
  1848. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +48 -0
  1849. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +87 -0
  1850. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +67 -0
  1851. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +17 -0
  1852. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +17 -0
  1853. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +216 -0
  1854. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +23 -0
  1855. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +17 -0
  1856. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +17 -0
  1857. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +17 -0
  1858. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +245 -0
  1859. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +17 -0
  1860. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +17 -0
  1861. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +16 -0
  1862. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +17 -0
  1863. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +17 -0
  1864. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +17 -0
  1865. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +17 -0
  1866. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +17 -0
  1867. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +52 -0
  1868. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +56 -0
  1869. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
  1870. cuda/cccl/headers/include/thrust/system/omp/memory.h +153 -0
  1871. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +71 -0
  1872. cuda/cccl/headers/include/thrust/system/omp/pointer.h +120 -0
  1873. cuda/cccl/headers/include/thrust/system/omp/vector.h +96 -0
  1874. cuda/cccl/headers/include/thrust/system/system_error.h +183 -0
  1875. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +33 -0
  1876. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +17 -0
  1877. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +17 -0
  1878. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +59 -0
  1879. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +117 -0
  1880. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +17 -0
  1881. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +17 -0
  1882. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +113 -0
  1883. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +45 -0
  1884. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +17 -0
  1885. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +28 -0
  1886. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +72 -0
  1887. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +17 -0
  1888. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +17 -0
  1889. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +17 -0
  1890. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +17 -0
  1891. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +17 -0
  1892. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +17 -0
  1893. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +17 -0
  1894. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +302 -0
  1895. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +17 -0
  1896. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +79 -0
  1897. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +16 -0
  1898. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +121 -0
  1899. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +378 -0
  1900. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +118 -0
  1901. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +66 -0
  1902. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +17 -0
  1903. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +17 -0
  1904. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +294 -0
  1905. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +20 -0
  1906. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +17 -0
  1907. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +17 -0
  1908. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +17 -0
  1909. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +273 -0
  1910. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +17 -0
  1911. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +17 -0
  1912. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +16 -0
  1913. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +17 -0
  1914. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +17 -0
  1915. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +17 -0
  1916. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +17 -0
  1917. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +17 -0
  1918. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +49 -0
  1919. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +55 -0
  1920. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
  1921. cuda/cccl/headers/include/thrust/system/tbb/memory.h +139 -0
  1922. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +57 -0
  1923. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +106 -0
  1924. cuda/cccl/headers/include/thrust/system/tbb/vector.h +82 -0
  1925. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1926. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1927. cuda/cccl/headers/include/thrust/transform.h +1056 -0
  1928. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1929. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1930. cuda/cccl/headers/include/thrust/tuple.h +139 -0
  1931. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +256 -0
  1932. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +156 -0
  1933. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1934. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +180 -0
  1935. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +112 -0
  1936. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +333 -0
  1937. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1938. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
  1939. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1940. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1941. cuda/cccl/headers/include/thrust/unique.h +1089 -0
  1942. cuda/cccl/headers/include/thrust/universal_allocator.h +101 -0
  1943. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1944. cuda/cccl/headers/include/thrust/universal_vector.h +80 -0
  1945. cuda/cccl/headers/include/thrust/version.h +93 -0
  1946. cuda/cccl/headers/include/thrust/zip_function.h +149 -0
  1947. cuda/cccl/headers/include_paths.py +51 -0
  1948. cuda/cccl/headers/lib/cmake/cccl/cccl-config-version.cmake +25 -0
  1949. cuda/cccl/headers/lib/cmake/cccl/cccl-config.cmake +143 -0
  1950. cuda/cccl/headers/lib/cmake/cub/cub-config-version.cmake +29 -0
  1951. cuda/cccl/headers/lib/cmake/cub/cub-config.cmake +172 -0
  1952. cuda/cccl/headers/lib/cmake/cub/cub-header-search.cmake +15 -0
  1953. cuda/cccl/headers/lib/cmake/libcudacxx/libcudacxx-config-version.cmake +37 -0
  1954. cuda/cccl/headers/lib/cmake/libcudacxx/libcudacxx-config.cmake +297 -0
  1955. cuda/cccl/headers/lib/cmake/libcudacxx/libcudacxx-header-search.cmake +15 -0
  1956. cuda/cccl/headers/lib/cmake/thrust/FindTBB.cmake +498 -0
  1957. cuda/cccl/headers/lib/cmake/thrust/README.md +258 -0
  1958. cuda/cccl/headers/lib/cmake/thrust/thrust-config-version.cmake +37 -0
  1959. cuda/cccl/headers/lib/cmake/thrust/thrust-config.cmake +983 -0
  1960. cuda/cccl/headers/lib/cmake/thrust/thrust-header-search.cmake +15 -0
  1961. cuda/cccl/parallel/__init__.py +9 -0
  1962. cuda/cccl/parallel/experimental/__init__.py +24 -0
  1963. cuda/cccl/py.typed +0 -0
  1964. cuda/compute/__init__.py +91 -0
  1965. cuda/compute/_bindings.py +79 -0
  1966. cuda/compute/_bindings.pyi +516 -0
  1967. cuda/compute/_bindings_impl.pyx +2470 -0
  1968. cuda/compute/_caching.py +83 -0
  1969. cuda/compute/_cccl_interop.py +354 -0
  1970. cuda/compute/_odr_helpers.py +238 -0
  1971. cuda/compute/_utils/__init__.py +0 -0
  1972. cuda/compute/_utils/protocols.py +145 -0
  1973. cuda/compute/_utils/temp_storage_buffer.py +87 -0
  1974. cuda/compute/algorithms/__init__.py +62 -0
  1975. cuda/compute/algorithms/_histogram.py +243 -0
  1976. cuda/compute/algorithms/_reduce.py +205 -0
  1977. cuda/compute/algorithms/_scan.py +344 -0
  1978. cuda/compute/algorithms/_segmented_reduce.py +265 -0
  1979. cuda/compute/algorithms/_select.py +196 -0
  1980. cuda/compute/algorithms/_sort/__init__.py +23 -0
  1981. cuda/compute/algorithms/_sort/_merge_sort.py +235 -0
  1982. cuda/compute/algorithms/_sort/_radix_sort.py +263 -0
  1983. cuda/compute/algorithms/_sort/_segmented_sort.py +288 -0
  1984. cuda/compute/algorithms/_sort/_sort_common.py +52 -0
  1985. cuda/compute/algorithms/_three_way_partition.py +292 -0
  1986. cuda/compute/algorithms/_transform.py +317 -0
  1987. cuda/compute/algorithms/_unique_by_key.py +259 -0
  1988. cuda/compute/cccl/.gitkeep +0 -0
  1989. cuda/compute/cu12/_bindings_impl.cpython-312-x86_64-linux-gnu.so +0 -0
  1990. cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
  1991. cuda/compute/cu13/_bindings_impl.cpython-312-x86_64-linux-gnu.so +0 -0
  1992. cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
  1993. cuda/compute/determinism.py +3 -0
  1994. cuda/compute/iterators/__init__.py +23 -0
  1995. cuda/compute/iterators/_factories.py +251 -0
  1996. cuda/compute/iterators/_iterators.py +680 -0
  1997. cuda/compute/iterators/_permutation_iterator.py +266 -0
  1998. cuda/compute/iterators/_zip_iterator.py +268 -0
  1999. cuda/compute/numba_utils.py +54 -0
  2000. cuda/compute/op.py +140 -0
  2001. cuda/compute/struct.py +520 -0
  2002. cuda/compute/typing.py +36 -0
  2003. cuda/coop/__init__.py +8 -0
  2004. cuda/coop/_caching.py +48 -0
  2005. cuda/coop/_common.py +275 -0
  2006. cuda/coop/_nvrtc.py +92 -0
  2007. cuda/coop/_scan_op.py +181 -0
  2008. cuda/coop/_types.py +937 -0
  2009. cuda/coop/_typing.py +107 -0
  2010. cuda/coop/block/__init__.py +39 -0
  2011. cuda/coop/block/_block_exchange.py +251 -0
  2012. cuda/coop/block/_block_load_store.py +215 -0
  2013. cuda/coop/block/_block_merge_sort.py +125 -0
  2014. cuda/coop/block/_block_radix_sort.py +214 -0
  2015. cuda/coop/block/_block_reduce.py +294 -0
  2016. cuda/coop/block/_block_scan.py +983 -0
  2017. cuda/coop/warp/__init__.py +9 -0
  2018. cuda/coop/warp/_warp_merge_sort.py +92 -0
  2019. cuda/coop/warp/_warp_reduce.py +153 -0
  2020. cuda/coop/warp/_warp_scan.py +78 -0
  2021. cuda_cccl-0.4.3.dist-info/METADATA +84 -0
  2022. cuda_cccl-0.4.3.dist-info/RECORD +2024 -0
  2023. cuda_cccl-0.4.3.dist-info/WHEEL +5 -0
  2024. cuda_cccl-0.4.3.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,2470 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+
5
+ # Python signatures are declared in the companion Python stub file _bindings.pyi
6
+ # Make sure to update PYI with change to Python API to ensure that Python
7
+ # static type checker tools like mypy green-lights cuda.compute
8
+
9
+ from libc.string cimport memset, memcpy
10
+ from libc.stdint cimport uint8_t, uint32_t, uint64_t, int64_t, uintptr_t
11
+ from cpython.bytes cimport PyBytes_FromStringAndSize
12
+
13
+ from cpython.buffer cimport (
14
+ Py_buffer, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS,
15
+ PyBuffer_Release, PyObject_CheckBuffer, PyObject_GetBuffer
16
+ )
17
+ from cpython.pycapsule cimport (
18
+ PyCapsule_CheckExact, PyCapsule_IsValid, PyCapsule_GetPointer
19
+ )
20
+
21
+ import ctypes
22
+ from enum import IntEnum
23
+ cdef extern from "<cuda.h>":
24
+ cdef struct OpaqueCUstream_st
25
+ cdef struct OpaqueCUkernel_st
26
+ cdef struct OpaqueCUlibrary_st
27
+
28
+ ctypedef int CUresult
29
+ ctypedef OpaqueCUstream_st *CUstream
30
+ ctypedef OpaqueCUkernel_st *CUkernel
31
+ ctypedef OpaqueCUlibrary_st *CUlibrary
32
+
33
+
34
+ cdef extern from "cccl/c/types.h":
35
+ cpdef enum cccl_type_enum:
36
+ INT8 "CCCL_INT8"
37
+ INT16 "CCCL_INT16"
38
+ INT32 "CCCL_INT32"
39
+ INT64 "CCCL_INT64"
40
+ UINT8 "CCCL_UINT8"
41
+ UINT16 "CCCL_UINT16"
42
+ UINT32 "CCCL_UINT32"
43
+ UINT64 "CCCL_UINT64"
44
+ FLOAT16 "CCCL_FLOAT16"
45
+ FLOAT32 "CCCL_FLOAT32"
46
+ FLOAT64 "CCCL_FLOAT64"
47
+ STORAGE "CCCL_STORAGE"
48
+ BOOLEAN "CCCL_BOOLEAN"
49
+
50
+ cpdef enum cccl_op_kind_t:
51
+ STATELESS "CCCL_STATELESS"
52
+ STATEFUL "CCCL_STATEFUL"
53
+ PLUS "CCCL_PLUS"
54
+ MINUS "CCCL_MINUS"
55
+ MULTIPLIES "CCCL_MULTIPLIES"
56
+ DIVIDES "CCCL_DIVIDES"
57
+ MODULUS "CCCL_MODULUS"
58
+ EQUAL_TO "CCCL_EQUAL_TO"
59
+ NOT_EQUAL_TO "CCCL_NOT_EQUAL_TO"
60
+ GREATER "CCCL_GREATER"
61
+ LESS "CCCL_LESS"
62
+ GREATER_EQUAL "CCCL_GREATER_EQUAL"
63
+ LESS_EQUAL "CCCL_LESS_EQUAL"
64
+ LOGICAL_AND "CCCL_LOGICAL_AND"
65
+ LOGICAL_OR "CCCL_LOGICAL_OR"
66
+ LOGICAL_NOT "CCCL_LOGICAL_NOT"
67
+ BIT_AND "CCCL_BIT_AND"
68
+ BIT_OR "CCCL_BIT_OR"
69
+ BIT_XOR "CCCL_BIT_XOR"
70
+ BIT_NOT "CCCL_BIT_NOT"
71
+ IDENTITY "CCCL_IDENTITY"
72
+ NEGATE "CCCL_NEGATE"
73
+ MINIMUM "CCCL_MINIMUM"
74
+ MAXIMUM "CCCL_MAXIMUM"
75
+
76
+ cpdef enum cccl_iterator_kind_t:
77
+ POINTER "CCCL_POINTER"
78
+ ITERATOR "CCCL_ITERATOR"
79
+
80
+ cdef struct cccl_type_info:
81
+ size_t size
82
+ size_t alignment
83
+ cccl_type_enum type
84
+
85
+ cdef enum cccl_op_code_type:
86
+ CCCL_OP_LTOIR
87
+ CCCL_OP_CPP_SOURCE
88
+
89
+ cdef struct cccl_op_t:
90
+ cccl_op_kind_t type
91
+ const char* name
92
+ const char* code
93
+ size_t code_size
94
+ cccl_op_code_type code_type
95
+ size_t size
96
+ size_t alignment
97
+ void *state
98
+
99
+ cdef struct cccl_value_t:
100
+ cccl_type_info type
101
+ void *state
102
+
103
+ cdef union cccl_increment_t:
104
+ int64_t signed_offset
105
+ uint64_t unsigned_offset
106
+
107
+ ctypedef void (*cccl_host_op_fn_ptr_t)(void *, cccl_increment_t) nogil
108
+
109
+ cdef struct cccl_iterator_t:
110
+ size_t size
111
+ size_t alignment
112
+ cccl_iterator_kind_t type
113
+ cccl_op_t advance
114
+ cccl_op_t dereference
115
+ cccl_type_info value_type
116
+ void *state
117
+ cccl_host_op_fn_ptr_t host_advance
118
+
119
+ cpdef enum cccl_sort_order_t:
120
+ ASCENDING "CCCL_ASCENDING"
121
+ DESCENDING "CCCL_DESCENDING"
122
+
123
+ cpdef enum cccl_init_kind_t:
124
+ VALUE_INIT "CCCL_VALUE_INIT"
125
+ FUTURE_VALUE_INIT "CCCL_FUTURE_VALUE_INIT"
126
+ NO_INIT "CCCL_NO_INIT"
127
+
128
+ cpdef enum cccl_determinism_t:
129
+ NOT_GUARANTEED "CCCL_NOT_GUARANTEED"
130
+ RUN_TO_RUN "CCCL_RUN_TO_RUN"
131
+ GPU_TO_GPU "CCCL_GPU_TO_GPU"
132
+
133
+ cdef void arg_type_check(
134
+ str arg_name,
135
+ object expected_type,
136
+ object arg
137
+ ) except *:
138
+ if not isinstance(arg, expected_type):
139
+ raise TypeError(
140
+ f"Expected {arg_name} to have type '{expected_type}', "
141
+ f"got '{type(arg)}'"
142
+ )
143
+
144
+ OpKind = cccl_op_kind_t
145
+ TypeEnum = cccl_type_enum
146
+ IteratorKind = cccl_iterator_kind_t
147
+ SortOrder = cccl_sort_order_t
148
+ InitKind = cccl_init_kind_t
149
+ Determinism = cccl_determinism_t
150
+
151
+ cdef void _validate_alignment(int alignment) except *:
152
+ """
153
+ Alignment must be positive integer and a power of two
154
+ that can be represented by uint32_t type.
155
+ """
156
+ cdef uint32_t val
157
+ if alignment < 1:
158
+ raise ValueError(
159
+ "Alignment must be non-negative, "
160
+ f"got {alignment}."
161
+ )
162
+ val = <uint32_t>alignment
163
+ if (val & (val - 1)) != 0:
164
+ raise ValueError(
165
+ "Alignment must be a power of two, "
166
+ f"got {alignment}"
167
+ )
168
+
169
+
170
+ cdef class Op:
171
+ """
172
+ Represents CCCL Operation
173
+
174
+ Args:
175
+ name (str):
176
+ Name of the operation
177
+ operator_type (OpKind):
178
+ Whether operator is stateless or stateful
179
+ ltoir (bytes):
180
+ The LTOIR for the operation compiled for device
181
+ state (bytes, optional):
182
+ State for the stateful operation.
183
+ state_alignment (int, optional):
184
+ Alignment of the state struct. Default: `1`.
185
+ """
186
+ # need Python owner of memory used for operator name
187
+ cdef bytes op_encoded_name
188
+ cdef bytes code_bytes
189
+ cdef bytes state_bytes
190
+ cdef cccl_op_t op_data
191
+
192
+
193
+ cdef void _set_members(self, cccl_op_kind_t op_type, str name, bytes lto_ir, bytes state, int state_alignment):
194
+ memset(&self.op_data, 0, sizeof(cccl_op_t))
195
+ # Reference Python objects in the class to ensure lifetime
196
+ self.op_encoded_name = name.encode("utf-8")
197
+ self.code_bytes = lto_ir
198
+ self.state_bytes = state
199
+ # set fields of op_data struct
200
+ self.op_data.type = op_type
201
+ self.op_data.name = <const char *>self.op_encoded_name
202
+ self.op_data.code = <const char *>lto_ir
203
+ self.op_data.code_size = len(lto_ir)
204
+ self.op_data.code_type = cccl_op_code_type.CCCL_OP_LTOIR
205
+ self.op_data.size = len(state)
206
+ self.op_data.alignment = state_alignment
207
+ self.op_data.state = <void *><const char *>state
208
+
209
+
210
+ def __cinit__(self, /, *, name = None, operator_type = None, ltoir = None, state = None, state_alignment = 1):
211
+ if name is None and ltoir is None:
212
+ name = ""
213
+ ltoir = b""
214
+ if state is None:
215
+ state = b""
216
+ if operator_type is None:
217
+ operator_type = OpKind.STATELESS
218
+ arg_type_check(arg_name="name", expected_type=str, arg=name)
219
+ arg_type_check(arg_name="ltoir", expected_type=bytes, arg=ltoir)
220
+ arg_type_check(arg_name="state", expected_type=bytes, arg=state)
221
+ arg_type_check(arg_name="state_alignment", expected_type=int, arg=state_alignment)
222
+ if not isinstance(operator_type, OpKind):
223
+ raise TypeError(
224
+ f"The operator_type argument should be an enumerator of operator kinds"
225
+ )
226
+ _validate_alignment(state_alignment)
227
+ self._set_members(
228
+ <cccl_op_kind_t> operator_type.value,
229
+ <str> name,
230
+ <bytes> ltoir,
231
+ <bytes> state,
232
+ <int> state_alignment
233
+ )
234
+
235
+
236
+ cdef void set_state(self, bytes state):
237
+ self.state_bytes = state
238
+ self.op_data.state = <void *><const char *>state
239
+
240
+ @property
241
+ def state(self):
242
+ return self.state_bytes
243
+
244
+ @state.setter
245
+ def state(self, bytes new_value):
246
+ self.set_state(<bytes>new_value)
247
+
248
+ @property
249
+ def name(self):
250
+ return self.op_encoded_name.decode("utf-8")
251
+
252
+ @property
253
+ def ltoir(self):
254
+ # Backward compatibility property
255
+ return self.code_bytes
256
+
257
+ @property
258
+ def code(self):
259
+ return self.code_bytes
260
+
261
+ @property
262
+ def state_alignment(self):
263
+ return self.op_data.alignment
264
+
265
+ @property
266
+ def state_typenum(self):
267
+ return self.op_data.type
268
+
269
+ def as_bytes(self):
270
+ "Debugging utility to view memory content of library struct"
271
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.op_data))
272
+ memcpy(&mem_view[0], &self.op_data, sizeof(self.op_data))
273
+ return bytes(mem_view)
274
+
275
+
276
+ cdef class TypeInfo:
277
+ """
278
+ Represents CCCL type info structure
279
+
280
+ Args:
281
+ size (int):
282
+ Size of the type in bytes.
283
+ alignment (int):
284
+ Alignment of the type in bytes.
285
+ type_enum (TypeEnum):
286
+ Enumeration member identifying the type.
287
+ """
288
+ cdef cccl_type_info type_info
289
+
290
+ def __cinit__(self, int size, int alignment, cccl_type_enum type_enum):
291
+ if size < 1:
292
+ raise ValueError("Size argument must be positive")
293
+ _validate_alignment(alignment)
294
+ self.type_info.size = size
295
+ self.type_info.alignment = alignment
296
+ self.type_info.type = type_enum
297
+
298
+ @property
299
+ def size(self):
300
+ return self.type_info.size
301
+
302
+ @property
303
+ def alignment(self):
304
+ return self.type_info.alignment
305
+
306
+ @property
307
+ def typenum(self):
308
+ return self.type_info.type
309
+
310
+ def as_bytes(self):
311
+ "Debugging utility to view memory content of library struct"
312
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.type_info))
313
+ memcpy(&mem_view[0], &self.type_info, sizeof(self.type_info))
314
+ return bytes(mem_view)
315
+
316
+
317
+ cdef class Value:
318
+ """
319
+ Represents CCCL value structure
320
+
321
+ Args:
322
+ value_type (TypeInfo):
323
+ type descriptor
324
+ state (object):
325
+ state of the value type. Object is expected to
326
+ implement Python buffer protocol and be able to provide
327
+ simple contiguous array of type `uint8_t`.
328
+ """
329
+ cdef uint8_t[::1] state_obj
330
+ cdef TypeInfo value_type
331
+ cdef cccl_value_t value_data;
332
+
333
+ def __cinit__(self, TypeInfo value_type, uint8_t[::1] state):
334
+ self.state_obj = state
335
+ self.value_type = value_type
336
+ self.value_data.type = value_type.type_info
337
+ self.value_data.state = <void *>&state[0]
338
+
339
+ @property
340
+ def type(self):
341
+ return self.value_type
342
+
343
+ @property
344
+ def state(self):
345
+ return self.state_obj
346
+
347
+ @state.setter
348
+ def state(self, uint8_t[::1] new_value):
349
+ if (len(self.state_obj) == len(new_value)):
350
+ self.state_obj = new_value
351
+ self.value_data.state = <void *>&self.state_obj[0]
352
+ else:
353
+ raise ValueError("Size mismatch")
354
+
355
+ def as_bytes(self):
356
+ "Debugging utility to view memory of native struct"
357
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.value_data))
358
+ memcpy(&mem_view[0], &self.value_data, sizeof(self.value_data))
359
+ return bytes(mem_view)
360
+
361
+
362
+ cdef void ensure_buffer(object o) except *:
363
+ if not PyObject_CheckBuffer(o):
364
+ raise TypeError(
365
+ "Object with buffer protocol expected, "
366
+ f"got {type(o)}"
367
+ )
368
+
369
+
370
+ cdef void * get_buffer_pointer(object o, size_t *size):
371
+ cdef int status = 0
372
+ cdef void *ptr = NULL
373
+ cdef Py_buffer view
374
+
375
+ status = PyObject_GetBuffer(o, &view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
376
+ if status != 0: # pragma: no cover
377
+ size[0] = 0
378
+ raise RuntimeError(
379
+ "Can not access simple contiguous buffer"
380
+ )
381
+
382
+ ptr = view.buf
383
+ if size is not NULL:
384
+ size[0] = <size_t>view.len
385
+ PyBuffer_Release(&view)
386
+
387
+ return ptr
388
+
389
+
390
+ cdef void * ctypes_typed_pointer_payload_ptr(object ctypes_typed_ptr):
391
+ "Get pointer to the value buffer represented by ctypes.pointer(ctypes_val)"
392
+ cdef size_t size = 0
393
+ cdef size_t *ptr_ref = NULL
394
+ ensure_buffer(ctypes_typed_ptr)
395
+ ptr_ref = <size_t *>get_buffer_pointer(ctypes_typed_ptr, &size)
396
+ return <void *>(ptr_ref[0])
397
+
398
+
399
+ cdef void * ctypes_value_ptr(object ctypes_cdata):
400
+ "Get pointer to the value buffer behind ctypes_val"
401
+ cdef size_t size = 0
402
+ ensure_buffer(ctypes_cdata)
403
+ return get_buffer_pointer(ctypes_cdata, &size)
404
+
405
+
406
+ cdef inline void * int_as_ptr(size_t ptr_val):
407
+ return <void *>(ptr_val)
408
+
409
+
410
+ cdef class StateBase:
411
+ cdef void *ptr
412
+ cdef object ref
413
+
414
+ def __cinit__(self):
415
+ self.ptr = NULL
416
+ self.ref = None
417
+
418
+ cdef inline void set_state(self, void *ptr, object ref):
419
+ self.ptr = ptr
420
+ self.ref = ref
421
+
422
+ @property
423
+ def pointer(self):
424
+ return <size_t>self.ptr
425
+
426
+ @property
427
+ def reference(self):
428
+ return self.ref
429
+
430
+
431
+ cdef class Pointer(StateBase):
432
+ "Represents the pointer value"
433
+
434
+ def __cinit__(self, arg):
435
+ cdef void *ptr
436
+ cdef object ref
437
+
438
+ if isinstance(arg, int):
439
+ ptr = int_as_ptr(arg)
440
+ ref = None
441
+ elif isinstance(arg, ctypes._Pointer):
442
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
443
+ ref = arg
444
+ elif isinstance(arg, ctypes.c_void_p):
445
+ ptr = int_as_ptr(arg.value)
446
+ ref = arg
447
+ else:
448
+ raise TypeError(
449
+ "Expect ctypes pointer, integers, or PointerProxy, "
450
+ f"got type {type(arg)}"
451
+ )
452
+ self.set_state(ptr, ref)
453
+
454
+
455
+ def make_pointer_object(ptr, owner):
456
+ cdef Pointer res = Pointer(0)
457
+
458
+ if isinstance(ptr, int):
459
+ res.ptr = int_as_ptr(ptr)
460
+ elif isinstance(ptr, ctypes.c_void_p):
461
+ res.ptr = int_as_ptr(ptr.value)
462
+ else:
463
+ raise TypeError(
464
+ "First argument must be an integer, or ctypes.c_void_p, "
465
+ f"got {type(ptr)}"
466
+ )
467
+ res.ref = owner
468
+ return res
469
+
470
+
471
+ cdef class IteratorState(StateBase):
472
+ "Represents blob referenced by pointer"
473
+ cdef size_t state_nbytes
474
+
475
+ def __cinit__(self, arg):
476
+ cdef size_t buffer_size = 0
477
+ cdef void *ptr = NULL
478
+ cdef object ref = None
479
+
480
+ super().__init__()
481
+ if isinstance(arg, ctypes._Pointer):
482
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
483
+ ref = arg.contents
484
+ self.state_nbytes = ctypes.sizeof(ref)
485
+ elif PyObject_CheckBuffer(arg):
486
+ ptr = get_buffer_pointer(arg, &buffer_size)
487
+ ref = arg
488
+ self.state_nbytes = buffer_size
489
+ else:
490
+ raise TypeError(
491
+ "Expected a ctypes pointer with content, or object of type bytes or bytearray, "
492
+ f"got type {type(arg)}"
493
+ )
494
+ self.set_state(ptr, ref)
495
+
496
+ cdef inline size_t get_size(self):
497
+ return self.state_nbytes
498
+
499
+ @property
500
+ def size(self):
501
+ return self.state_nbytes
502
+
503
+ def __getbuffer__(self, Py_buffer *buffer, int flags):
504
+ cdef Py_ssize_t cast_size = <Py_ssize_t>self.state_nbytes
505
+ buffer.buf = <char *>self.ptr
506
+ buffer.obj = self
507
+ buffer.len = cast_size
508
+ buffer.readonly = 0
509
+ buffer.itemsize = 1
510
+ buffer.format = "B" # unsigned char
511
+ buffer.ndim = 1
512
+ buffer.shape = <Py_ssize_t *>&self.state_nbytes
513
+ buffer.strides = &buffer.itemsize
514
+ buffer.suboffsets = NULL
515
+ buffer.internal = NULL
516
+
517
+ def __releasebuffer__(self, Py_buffer *buffer):
518
+ pass
519
+
520
+
521
+ cdef const char *function_ptr_capsule_name = "void (void *, cccl_increment_t)";
522
+
523
+ cdef bint is_function_pointer_capsule(object o) noexcept:
524
+ """
525
+ Returns non-zero if input is a valid capsule with
526
+ name 'void (void *, cccl_increment_t)'.
527
+ """
528
+ return (
529
+ PyCapsule_CheckExact(o) and
530
+ PyCapsule_IsValid(o, function_ptr_capsule_name)
531
+ )
532
+
533
+
534
+ cdef inline void* get_function_pointer_from_capsule(object cap) except *:
535
+ return PyCapsule_GetPointer(cap, function_ptr_capsule_name)
536
+
537
+
538
+ cdef cccl_host_op_fn_ptr_t unbox_host_advance_fn(object host_fn_obj) except *:
539
+ cdef void *fn_ptr = NULL
540
+ if isinstance(host_fn_obj, ctypes._CFuncPtr):
541
+ # the _CFuncPtr object encapsulates a pointer to the function pointer
542
+ fn_ptr = ctypes_typed_pointer_payload_ptr(host_fn_obj)
543
+ return <cccl_host_op_fn_ptr_t>fn_ptr
544
+
545
+ if isinstance(host_fn_obj, int):
546
+ fn_ptr = <void *><uintptr_t>host_fn_obj
547
+ return <cccl_host_op_fn_ptr_t>fn_ptr
548
+
549
+ if isinstance(host_fn_obj, ctypes.c_void_p):
550
+ fn_ptr = <void *><uintptr_t>host_fn_obj.value
551
+ return <cccl_host_op_fn_ptr_t>fn_ptr
552
+
553
+ if is_function_pointer_capsule(host_fn_obj):
554
+ fn_ptr = get_function_pointer_from_capsule(host_fn_obj)
555
+ return <cccl_host_op_fn_ptr_t>fn_ptr
556
+
557
+ raise TypeError(
558
+ "Expected ctypes function pointer, ctypes.c_void_p, integer or a named capsule, "
559
+ f"got {type(host_fn_obj)}"
560
+ )
561
+
562
+
563
+ cdef class Iterator:
564
+ """
565
+ Represents CCCL iterator.
566
+
567
+ Args:
568
+ alignment (int):
569
+ Alignment of the iterator state
570
+ iterator_type (IteratorKind):
571
+ The type of iterator, `IteratorKind.POINTER` or
572
+ `IteratorKind.ITERATOR`
573
+ advance_fn (Op):
574
+ Descriptor for user-defined `advance` function
575
+ compiled for device
576
+ dereference_fn (Op):
577
+ Descriptor for user-defined `dereference` or `assign`
578
+ function compiled for device
579
+ value_type (TypeInfo):
580
+ Descriptor of the type addressed by the iterator
581
+ state (object, optional):
582
+ Python object for the state of the iterator. For iterators of
583
+ type `ITERATOR` the state object is expected to implement Python
584
+ buffer protocol for SIMPLE 1d buffer of type unsigned byte.
585
+ For iterators of type `POINTER` the state may be an integer convertible
586
+ to `uintptr_t`, or a `ctypes` pointer (typed or untyped).
587
+ Value `None` represents absence of iterator state.
588
+ host_advance_fn (object, optional):
589
+ Python object for host callable function to advance state by a given
590
+ increment. The argument may only be set for iterators of type
591
+ `IteratorKind.ITERATOR` and raise an exception otherwise. Supported
592
+ types are `int` or `ctypes.c_void_p` (raw pointer), ctypes function
593
+ pointer, or a Python capsule with name `"void *(void *, cccl_increment_t)"`.
594
+ """
595
+ cdef Op advance
596
+ cdef Op dereference
597
+ cdef object state_obj
598
+ cdef object host_advance_obj
599
+ cdef cccl_iterator_t iter_data
600
+
601
+ def __cinit__(self,
602
+ int alignment,
603
+ cccl_iterator_kind_t iterator_type,
604
+ Op advance_fn,
605
+ Op dereference_fn,
606
+ TypeInfo value_type,
607
+ state=None,
608
+ host_advance_fn=None
609
+ ):
610
+ cdef cccl_iterator_kind_t it_kind
611
+ _validate_alignment(alignment)
612
+ it_kind = iterator_type
613
+ if it_kind == cccl_iterator_kind_t.POINTER:
614
+ if state is None:
615
+ self.state_obj = None
616
+ self.iter_data.size = 0
617
+ self.iter_data.state = NULL
618
+ elif isinstance(state, int):
619
+ self.state_obj = None
620
+ self.iter_data.size = 0
621
+ self.iter_data.state = int_as_ptr(state)
622
+ elif isinstance(state, Pointer):
623
+ self.state_obj = state.reference
624
+ self.iter_data.size = 0
625
+ self.iter_data.state = (<Pointer>state).ptr
626
+ else:
627
+ raise TypeError(
628
+ "Expect for Iterator of kind POINTER, state must have type Pointer or int, "
629
+ f"got {type(state)}"
630
+ )
631
+ if host_advance_fn is not None:
632
+ raise ValueError(
633
+ "host_advance_fn must be set to None for iterators of kind POINTER"
634
+ )
635
+ self.iter_data.host_advance = NULL
636
+ self.host_advance_obj = None
637
+ elif it_kind == cccl_iterator_kind_t.ITERATOR:
638
+ if state is None:
639
+ self.state_obj = None
640
+ self.iter_data.size = 0
641
+ self.iter_data.state = NULL
642
+ elif isinstance(state, IteratorState):
643
+ self.state_obj = state.reference
644
+ self.iter_data.size = (<IteratorState>state).size
645
+ self.iter_data.state = (<IteratorState>state).ptr
646
+ else:
647
+ raise TypeError(
648
+ "For Iterator of kind ITERATOR, state must have type IteratorState, "
649
+ f"got type {type(state)}"
650
+ )
651
+ if host_advance_fn is not None:
652
+ self.iter_data.host_advance = unbox_host_advance_fn(host_advance_fn)
653
+ self.host_advance_obj = host_advance_fn
654
+ else:
655
+ self.iter_data.host_advance = NULL
656
+ self.host_advance_obj = None
657
+ else: # pragma: no cover
658
+ raise ValueError("Unrecognized iterator kind")
659
+ self.advance = advance_fn
660
+ self.dereference = dereference_fn
661
+ self.iter_data.alignment = alignment
662
+ self.iter_data.type = <cccl_iterator_kind_t> it_kind
663
+ self.iter_data.advance = self.advance.op_data
664
+ self.iter_data.dereference = self.dereference.op_data
665
+ self.iter_data.value_type = value_type.type_info
666
+
667
+ @property
668
+ def advance_op(self):
669
+ return self.advance
670
+
671
+ @property
672
+ def dereference_or_assign_op(self):
673
+ return self.dereference
674
+
675
+ @property
676
+ def state(self):
677
+ if self.iter_data.type == cccl_iterator_kind_t.POINTER:
678
+ return <size_t>self.iter_data.state
679
+ else:
680
+ return self.state_obj
681
+
682
+ @state.setter
683
+ def state(self, new_value):
684
+ cdef ssize_t state_sz = 0
685
+ cdef size_t ptr = 0
686
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
687
+ if it_kind == cccl_iterator_kind_t.POINTER:
688
+ if isinstance(new_value, Pointer):
689
+ self.state_obj = (<Pointer>new_value).ref
690
+ self.iter_data.size = state_sz
691
+ self.iter_data.state = (<Pointer>new_value).ptr
692
+ elif isinstance(new_value, int):
693
+ self.state_obj = None
694
+ self.iter_data.size = state_sz
695
+ self.iter_data.state = int_as_ptr(new_value)
696
+ elif new_value is None:
697
+ self.state_obj = None
698
+ self.iter_data.size = 0
699
+ self.iter_data.state = NULL
700
+ else:
701
+ raise TypeError(
702
+ "For iterator with type POINTER, state value must have type int or type Pointer, "
703
+ f"got type {type(new_value)}"
704
+ )
705
+ elif it_kind == cccl_iterator_kind_t.ITERATOR:
706
+ if isinstance(new_value, IteratorState):
707
+ self.state_obj = new_value.reference
708
+ self.iter_data.size = (<IteratorState>new_value).size
709
+ self.iter_data.state = (<IteratorState>new_value).ptr
710
+ elif isinstance(new_value, Pointer):
711
+ self.state_obj = new_value.reference
712
+ if self.iter_data.size == 0:
713
+ raise ValueError("Assigning incomplete state value to iterator without state size information")
714
+ self.iter_data.state = (<Pointer>new_value).ptr
715
+ elif PyObject_CheckBuffer(new_value):
716
+ self.iter_data.state = get_buffer_pointer(new_value, &self.iter_data.size)
717
+ self.state_obj = new_value
718
+ elif new_value is None:
719
+ self.state_obj = None
720
+ self.iter_data.size = 0
721
+ self.iter_data.state = NULL
722
+ else:
723
+ raise TypeError(
724
+ "For iterator with type ITERATOR, state value must have type IteratorState or type bytes, "
725
+ f"got type {type(new_value)}"
726
+ )
727
+ else:
728
+ raise TypeError("The new value should be an integer for iterators of POINTER kind, and bytes for ITERATOR kind")
729
+
730
+ @property
731
+ def type(self):
732
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
733
+ if it_kind == cccl_iterator_kind_t.POINTER:
734
+ return IteratorKind.POINTER
735
+ else:
736
+ return IteratorKind.ITERATOR
737
+
738
+ @property
739
+ def value_type(self):
740
+ cdef cccl_type_info type_info = self.iter_data.value_type
741
+ return TypeInfo(type_info.size, type_info.alignment, type_info.type)
742
+
743
+ def is_kind_pointer(self):
744
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
745
+ return (it_kind == cccl_iterator_kind_t.POINTER)
746
+
747
+ def is_kind_iterator(self):
748
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
749
+ return (it_kind == cccl_iterator_kind_t.ITERATOR)
750
+
751
+ def as_bytes(self):
752
+ "Debugging ulitity to get memory view into library struct"
753
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.iter_data))
754
+ memcpy(&mem_view[0], &self.iter_data, sizeof(self.iter_data))
755
+ return bytes(mem_view)
756
+
757
+ @property
758
+ def host_advance_fn(self):
759
+ return self.host_advance_obj
760
+
761
+ @host_advance_fn.setter
762
+ def host_advance_fn(self, func):
763
+ if (self.iter_data.type == cccl_iterator_kind_t.ITERATOR):
764
+ if func is not None:
765
+ self.iter_data.host_advance = unbox_host_advance_fn(func)
766
+ self.host_advance_obj = func
767
+ else:
768
+ self.iter_data.host_advance = NULL
769
+ self.host_advance_obj = None
770
+ else:
771
+ raise ValueError
772
+
773
+
774
+ cdef class CommonData:
775
+ cdef int cc_major
776
+ cdef int cc_minor
777
+ cdef bytes encoded_cub_path
778
+ cdef bytes encoded_thrust_path
779
+ cdef bytes encoded_libcudacxx_path
780
+ cdef bytes encoded_ctk_path
781
+
782
+ def __cinit__(self, int cc_major, int cc_minor, str cub_path, str thrust_path, str libcudacxx_path, str ctk_path):
783
+ self.cc_major = cc_major
784
+ self.cc_minor = cc_minor
785
+ self.encoded_cub_path = cub_path.encode("utf-8")
786
+ self.encoded_thrust_path = thrust_path.encode("utf-8")
787
+ self.encoded_libcudacxx_path = libcudacxx_path.encode("utf-8")
788
+ self.encoded_ctk_path = ctk_path.encode("utf-8")
789
+
790
+ cdef inline int get_cc_major(self):
791
+ return self.cc_major
792
+
793
+ cdef inline int get_cc_minor(self):
794
+ return self.cc_minor
795
+
796
+ cdef inline const char * cub_path_get_c_str(self):
797
+ return <const char *>self.encoded_cub_path if self.encoded_cub_path else NULL
798
+
799
+ cdef inline const char * thrust_path_get_c_str(self):
800
+ return <const char *>self.encoded_thrust_path if self.encoded_thrust_path else NULL
801
+
802
+ cdef inline const char * libcudacxx_path_get_c_str(self):
803
+ return <const char *>self.encoded_libcudacxx_path if self.encoded_libcudacxx_path else NULL
804
+
805
+ cdef inline const char * ctk_path_get_c_str(self):
806
+ return <const char *>self.encoded_ctk_path if self.encoded_ctk_path else NULL
807
+
808
+ @property
809
+ def compute_capability(self):
810
+ return (self.cc_major, self.cc_minor)
811
+
812
+ @property
813
+ def cub_path(self):
814
+ return self.encoded_cub_path.decode("utf-8")
815
+
816
+ @property
817
+ def ctk_path(self):
818
+ return self.encoded_ctk_path.decode("utf-8")
819
+
820
+ @property
821
+ def thrust_path(self):
822
+ return self.encoded_thrust_path.decode("utf-8")
823
+
824
+ @property
825
+ def libcudacxx_path(self):
826
+ return self.encoded_libcudacxx_path.decode("utf-8")
827
+
828
+ # --------------
829
+ # DeviceReduce
830
+ # --------------
831
+
832
+ cdef extern from "cccl/c/reduce.h":
833
+ cdef struct cccl_device_reduce_build_result_t 'cccl_device_reduce_build_result_t':
834
+ const char* cubin
835
+ size_t cubin_size
836
+
837
+ cdef CUresult cccl_device_reduce_build(
838
+ cccl_device_reduce_build_result_t*,
839
+ cccl_iterator_t,
840
+ cccl_iterator_t,
841
+ cccl_op_t,
842
+ cccl_value_t,
843
+ cccl_determinism_t,
844
+ int, int, const char*, const char*, const char*, const char*
845
+ ) nogil
846
+
847
+ cdef CUresult cccl_device_reduce(
848
+ cccl_device_reduce_build_result_t,
849
+ void *,
850
+ size_t *,
851
+ cccl_iterator_t,
852
+ cccl_iterator_t,
853
+ uint64_t,
854
+ cccl_op_t,
855
+ cccl_value_t,
856
+ CUstream
857
+ ) nogil
858
+
859
+ cdef CUresult cccl_device_reduce_nondeterministic(
860
+ cccl_device_reduce_build_result_t,
861
+ void *,
862
+ size_t *,
863
+ cccl_iterator_t,
864
+ cccl_iterator_t,
865
+ uint64_t,
866
+ cccl_op_t,
867
+ cccl_value_t,
868
+ CUstream
869
+ ) nogil
870
+
871
+ cdef CUresult cccl_device_reduce_cleanup(
872
+ cccl_device_reduce_build_result_t*
873
+ ) nogil
874
+
875
+
876
+ cdef class DeviceReduceBuildResult:
877
+ cdef cccl_device_reduce_build_result_t build_data
878
+
879
+ def __cinit__(
880
+ DeviceReduceBuildResult self,
881
+ Iterator d_in,
882
+ Iterator d_out,
883
+ Op op,
884
+ Value h_init,
885
+ cccl_determinism_t determinism,
886
+ CommonData common_data
887
+ ):
888
+ cdef CUresult status = -1
889
+ cdef int cc_major = common_data.get_cc_major()
890
+ cdef int cc_minor = common_data.get_cc_minor()
891
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
892
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
893
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
894
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
895
+ memset(&self.build_data, 0, sizeof(cccl_device_reduce_build_result_t))
896
+
897
+ with nogil:
898
+ status = cccl_device_reduce_build(
899
+ &self.build_data,
900
+ d_in.iter_data,
901
+ d_out.iter_data,
902
+ op.op_data,
903
+ h_init.value_data,
904
+ determinism,
905
+ cc_major,
906
+ cc_minor,
907
+ cub_path,
908
+ thrust_path,
909
+ libcudacxx_path,
910
+ ctk_path,
911
+ )
912
+ if status != 0:
913
+ raise RuntimeError(
914
+ f"Failed building reduce, error code: {status}"
915
+ )
916
+
917
+ def __dealloc__(DeviceReduceBuildResult self):
918
+ cdef CUresult status = -1
919
+ with nogil:
920
+ status = cccl_device_reduce_cleanup(&self.build_data)
921
+ if (status != 0):
922
+ print(f"Return code {status} encountered during reduce result cleanup")
923
+
924
+ cpdef int compute(
925
+ DeviceReduceBuildResult self,
926
+ temp_storage_ptr,
927
+ temp_storage_bytes,
928
+ Iterator d_in,
929
+ Iterator d_out,
930
+ size_t num_items,
931
+ Op op,
932
+ Value h_init,
933
+ stream
934
+ ):
935
+ cdef CUresult status = -1
936
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
937
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
938
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
939
+
940
+ with nogil:
941
+ status = cccl_device_reduce(
942
+ self.build_data,
943
+ storage_ptr,
944
+ &storage_sz,
945
+ d_in.iter_data,
946
+ d_out.iter_data,
947
+ <uint64_t>num_items,
948
+ op.op_data,
949
+ h_init.value_data,
950
+ c_stream
951
+ )
952
+ if status != 0:
953
+ raise RuntimeError(
954
+ f"Failed executing reduce, error code: {status}"
955
+ )
956
+ return storage_sz
957
+
958
+ cpdef int compute_nondeterministic(
959
+ DeviceReduceBuildResult self,
960
+ temp_storage_ptr,
961
+ temp_storage_bytes,
962
+ Iterator d_in,
963
+ Iterator d_out,
964
+ size_t num_items,
965
+ Op op,
966
+ Value h_init,
967
+ stream
968
+ ):
969
+ cdef CUresult status = -1
970
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
971
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
972
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
973
+
974
+ with nogil:
975
+ status = cccl_device_reduce_nondeterministic(
976
+ self.build_data,
977
+ storage_ptr,
978
+ &storage_sz,
979
+ d_in.iter_data,
980
+ d_out.iter_data,
981
+ <uint64_t>num_items,
982
+ op.op_data,
983
+ h_init.value_data,
984
+ c_stream
985
+ )
986
+ if status != 0:
987
+ raise RuntimeError(
988
+ f"Failed executing reduce not guaranteed determinism, error code: {status}"
989
+ )
990
+ return storage_sz
991
+
992
+ def _get_cubin(self):
993
+ return PyBytes_FromStringAndSize(
994
+ <const char*>self.build_data.cubin,
995
+ self.build_data.cubin_size
996
+ )
997
+
998
+ # ------------
999
+ # DeviceScan
1000
+ # ------------
1001
+
1002
+
1003
+ cdef extern from "cccl/c/scan.h":
1004
+ ctypedef bint _Bool
1005
+
1006
+ cdef struct cccl_device_scan_build_result_t 'cccl_device_scan_build_result_t':
1007
+ const char* cubin
1008
+ size_t cubin_size
1009
+
1010
+ cdef CUresult cccl_device_scan_build(
1011
+ cccl_device_scan_build_result_t*,
1012
+ cccl_iterator_t,
1013
+ cccl_iterator_t,
1014
+ cccl_op_t,
1015
+ cccl_type_info,
1016
+ _Bool,
1017
+ cccl_init_kind_t,
1018
+ int, int, const char*, const char*, const char*, const char*
1019
+ ) nogil
1020
+
1021
+ cdef CUresult cccl_device_exclusive_scan(
1022
+ cccl_device_scan_build_result_t,
1023
+ void *,
1024
+ size_t *,
1025
+ cccl_iterator_t,
1026
+ cccl_iterator_t,
1027
+ uint64_t,
1028
+ cccl_op_t,
1029
+ cccl_value_t,
1030
+ CUstream
1031
+ ) nogil
1032
+
1033
+ cdef CUresult cccl_device_inclusive_scan(
1034
+ cccl_device_scan_build_result_t,
1035
+ void *,
1036
+ size_t *,
1037
+ cccl_iterator_t,
1038
+ cccl_iterator_t,
1039
+ uint64_t,
1040
+ cccl_op_t,
1041
+ cccl_value_t,
1042
+ CUstream
1043
+ ) nogil
1044
+
1045
+ cdef CUresult cccl_device_exclusive_scan_future_value(
1046
+ cccl_device_scan_build_result_t,
1047
+ void *,
1048
+ size_t *,
1049
+ cccl_iterator_t,
1050
+ cccl_iterator_t,
1051
+ uint64_t,
1052
+ cccl_op_t,
1053
+ cccl_iterator_t,
1054
+ CUstream
1055
+ ) nogil
1056
+
1057
+ cdef CUresult cccl_device_inclusive_scan_future_value(
1058
+ cccl_device_scan_build_result_t,
1059
+ void *,
1060
+ size_t *,
1061
+ cccl_iterator_t,
1062
+ cccl_iterator_t,
1063
+ uint64_t,
1064
+ cccl_op_t,
1065
+ cccl_iterator_t,
1066
+ CUstream
1067
+ ) nogil
1068
+
1069
+ cdef CUresult cccl_device_inclusive_scan_no_init(
1070
+ cccl_device_scan_build_result_t,
1071
+ void *,
1072
+ size_t *,
1073
+ cccl_iterator_t,
1074
+ cccl_iterator_t,
1075
+ uint64_t,
1076
+ cccl_op_t,
1077
+ CUstream
1078
+ ) nogil
1079
+
1080
+ cdef CUresult cccl_device_scan_cleanup(
1081
+ cccl_device_scan_build_result_t*
1082
+ ) nogil
1083
+
1084
+
1085
+ cdef class DeviceScanBuildResult:
1086
+ cdef cccl_device_scan_build_result_t build_data
1087
+
1088
+ def __cinit__(
1089
+ DeviceScanBuildResult self,
1090
+ Iterator d_in,
1091
+ Iterator d_out,
1092
+ Op op,
1093
+ TypeInfo init_type,
1094
+ bint force_inclusive,
1095
+ cccl_init_kind_t init_kind,
1096
+ CommonData common_data
1097
+ ):
1098
+ cdef CUresult status = -1
1099
+ cdef int cc_major = common_data.get_cc_major()
1100
+ cdef int cc_minor = common_data.get_cc_minor()
1101
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1102
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1103
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1104
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1105
+ memset(&self.build_data, 0, sizeof(cccl_device_scan_build_result_t))
1106
+
1107
+ with nogil:
1108
+ status = cccl_device_scan_build(
1109
+ &self.build_data,
1110
+ d_in.iter_data,
1111
+ d_out.iter_data,
1112
+ op.op_data,
1113
+ init_type.type_info,
1114
+ force_inclusive,
1115
+ init_kind,
1116
+ cc_major,
1117
+ cc_minor,
1118
+ cub_path,
1119
+ thrust_path,
1120
+ libcudacxx_path,
1121
+ ctk_path,
1122
+ )
1123
+ if status != 0:
1124
+ raise RuntimeError(f"Error {status} building scan")
1125
+
1126
+ def __dealloc__(DeviceScanBuildResult self):
1127
+ cdef CUresult status = -1
1128
+ with nogil:
1129
+ status = cccl_device_scan_cleanup(&self.build_data)
1130
+ if (status != 0):
1131
+ print(f"Return code {status} encountered during scan result cleanup")
1132
+
1133
+ cpdef int compute_inclusive(
1134
+ DeviceScanBuildResult self,
1135
+ temp_storage_ptr,
1136
+ temp_storage_bytes,
1137
+ Iterator d_in,
1138
+ Iterator d_out,
1139
+ size_t num_items,
1140
+ Op op,
1141
+ Value init_value,
1142
+ stream
1143
+ ):
1144
+ cdef CUresult status = -1
1145
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1146
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1147
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1148
+
1149
+ with nogil:
1150
+ status = cccl_device_inclusive_scan(
1151
+ self.build_data,
1152
+ storage_ptr,
1153
+ &storage_sz,
1154
+ d_in.iter_data,
1155
+ d_out.iter_data,
1156
+ <uint64_t>num_items,
1157
+ op.op_data,
1158
+ init_value.value_data,
1159
+ c_stream
1160
+ )
1161
+ if status != 0:
1162
+ raise RuntimeError(
1163
+ f"Failed executing inclusive scan, error code: {status}"
1164
+ )
1165
+ return storage_sz
1166
+
1167
+ cpdef int compute_exclusive(
1168
+ DeviceScanBuildResult self,
1169
+ temp_storage_ptr,
1170
+ temp_storage_bytes,
1171
+ Iterator d_in,
1172
+ Iterator d_out,
1173
+ size_t num_items,
1174
+ Op op,
1175
+ Value init_value,
1176
+ stream
1177
+ ):
1178
+ cdef CUresult status = -1
1179
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1180
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1181
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1182
+
1183
+ with nogil:
1184
+ status = cccl_device_exclusive_scan(
1185
+ self.build_data,
1186
+ storage_ptr,
1187
+ &storage_sz,
1188
+ d_in.iter_data,
1189
+ d_out.iter_data,
1190
+ <uint64_t>num_items,
1191
+ op.op_data,
1192
+ init_value.value_data,
1193
+ c_stream
1194
+ )
1195
+ if status != 0:
1196
+ raise RuntimeError(
1197
+ f"Failed executing exclusive scan, error code: {status}"
1198
+ )
1199
+ return storage_sz
1200
+
1201
+ cpdef int compute_inclusive_future_value(
1202
+ DeviceScanBuildResult self,
1203
+ temp_storage_ptr,
1204
+ temp_storage_bytes,
1205
+ Iterator d_in,
1206
+ Iterator d_out,
1207
+ size_t num_items,
1208
+ Op op,
1209
+ Iterator init_value,
1210
+ stream
1211
+ ):
1212
+ cdef CUresult status = -1
1213
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1214
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1215
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1216
+
1217
+ with nogil:
1218
+ status = cccl_device_inclusive_scan_future_value(
1219
+ self.build_data,
1220
+ storage_ptr,
1221
+ &storage_sz,
1222
+ d_in.iter_data,
1223
+ d_out.iter_data,
1224
+ <uint64_t>num_items,
1225
+ op.op_data,
1226
+ init_value.iter_data,
1227
+ c_stream
1228
+ )
1229
+ if status != 0:
1230
+ raise RuntimeError(
1231
+ f"Failed executing inclusive scan, error code: {status}"
1232
+ )
1233
+ return storage_sz
1234
+
1235
+ cpdef int compute_exclusive_future_value(
1236
+ DeviceScanBuildResult self,
1237
+ temp_storage_ptr,
1238
+ temp_storage_bytes,
1239
+ Iterator d_in,
1240
+ Iterator d_out,
1241
+ size_t num_items,
1242
+ Op op,
1243
+ Iterator init_value,
1244
+ stream
1245
+ ):
1246
+ cdef CUresult status = -1
1247
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1248
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1249
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1250
+
1251
+ with nogil:
1252
+ status = cccl_device_exclusive_scan_future_value(
1253
+ self.build_data,
1254
+ storage_ptr,
1255
+ &storage_sz,
1256
+ d_in.iter_data,
1257
+ d_out.iter_data,
1258
+ <uint64_t>num_items,
1259
+ op.op_data,
1260
+ init_value.iter_data,
1261
+ c_stream
1262
+ )
1263
+ if status != 0:
1264
+ raise RuntimeError(
1265
+ f"Failed executing exclusive scan, error code: {status}"
1266
+ )
1267
+ return storage_sz
1268
+
1269
+ cpdef int compute_inclusive_no_init(
1270
+ DeviceScanBuildResult self,
1271
+ temp_storage_ptr,
1272
+ temp_storage_bytes,
1273
+ Iterator d_in,
1274
+ Iterator d_out,
1275
+ size_t num_items,
1276
+ Op op,
1277
+ object init_value,
1278
+ stream
1279
+ ):
1280
+ cdef CUresult status = -1
1281
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1282
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1283
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1284
+
1285
+ with nogil:
1286
+ status = cccl_device_inclusive_scan_no_init(
1287
+ self.build_data,
1288
+ storage_ptr,
1289
+ &storage_sz,
1290
+ d_in.iter_data,
1291
+ d_out.iter_data,
1292
+ <uint64_t>num_items,
1293
+ op.op_data,
1294
+ c_stream
1295
+ )
1296
+ if status != 0:
1297
+ raise RuntimeError(
1298
+ f"Failed executing inclusive scan, error code: {status}"
1299
+ )
1300
+ return storage_sz
1301
+
1302
+ def _get_cubin(self):
1303
+ return PyBytes_FromStringAndSize(
1304
+ <const char*>self.build_data.cubin,
1305
+ self.build_data.cubin_size
1306
+ )
1307
+
1308
+ # -----------------------
1309
+ # DeviceSegmentedReduce
1310
+ # -----------------------
1311
+
1312
+
1313
+ cdef extern from "cccl/c/segmented_reduce.h":
1314
+ cdef struct cccl_device_segmented_reduce_build_result_t 'cccl_device_segmented_reduce_build_result_t':
1315
+ const char* cubin
1316
+ size_t cubin_size
1317
+
1318
+ cdef CUresult cccl_device_segmented_reduce_build(
1319
+ cccl_device_segmented_reduce_build_result_t*,
1320
+ cccl_iterator_t,
1321
+ cccl_iterator_t,
1322
+ cccl_iterator_t,
1323
+ cccl_iterator_t,
1324
+ cccl_op_t,
1325
+ cccl_value_t,
1326
+ int, int, const char*, const char*, const char*, const char*
1327
+ ) nogil
1328
+
1329
+ cdef CUresult cccl_device_segmented_reduce(
1330
+ cccl_device_segmented_reduce_build_result_t,
1331
+ void *,
1332
+ size_t *,
1333
+ cccl_iterator_t,
1334
+ cccl_iterator_t,
1335
+ uint64_t,
1336
+ cccl_iterator_t,
1337
+ cccl_iterator_t,
1338
+ cccl_op_t,
1339
+ cccl_value_t,
1340
+ CUstream
1341
+ ) nogil
1342
+
1343
+ cdef CUresult cccl_device_segmented_reduce_cleanup(
1344
+ cccl_device_segmented_reduce_build_result_t* bld_ptr
1345
+ ) nogil
1346
+
1347
+
1348
+ cdef class DeviceSegmentedReduceBuildResult:
1349
+ cdef cccl_device_segmented_reduce_build_result_t build_data
1350
+
1351
+ def __cinit__(
1352
+ DeviceSegmentedReduceBuildResult self,
1353
+ Iterator d_in,
1354
+ Iterator d_out,
1355
+ Iterator start_offsets,
1356
+ Iterator end_offsets,
1357
+ Op op,
1358
+ Value h_init,
1359
+ CommonData common_data
1360
+ ):
1361
+ cdef CUresult status = -1
1362
+ cdef int cc_major = common_data.get_cc_major()
1363
+ cdef int cc_minor = common_data.get_cc_minor()
1364
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1365
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1366
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1367
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1368
+
1369
+ memset(&self.build_data, 0, sizeof(cccl_device_segmented_reduce_build_result_t))
1370
+ with nogil:
1371
+ status = cccl_device_segmented_reduce_build(
1372
+ &self.build_data,
1373
+ d_in.iter_data,
1374
+ d_out.iter_data,
1375
+ start_offsets.iter_data,
1376
+ end_offsets.iter_data,
1377
+ op.op_data,
1378
+ h_init.value_data,
1379
+ cc_major,
1380
+ cc_minor,
1381
+ cub_path,
1382
+ thrust_path,
1383
+ libcudacxx_path,
1384
+ ctk_path,
1385
+ )
1386
+ if status != 0:
1387
+ raise RuntimeError(
1388
+ f"Failed building segmented_reduce, error code: {status}"
1389
+ )
1390
+
1391
+ def __dealloc__(DeviceSegmentedReduceBuildResult self):
1392
+ cdef CUresult status = -1
1393
+ with nogil:
1394
+ status = cccl_device_segmented_reduce_cleanup(&self.build_data)
1395
+ if (status != 0):
1396
+ print(f"Return code {status} encountered during segmented_reduce result cleanup")
1397
+
1398
+ cpdef int compute(
1399
+ DeviceSegmentedReduceBuildResult self,
1400
+ temp_storage_ptr,
1401
+ temp_storage_bytes,
1402
+ Iterator d_in,
1403
+ Iterator d_out,
1404
+ size_t num_items,
1405
+ Iterator start_offsets,
1406
+ Iterator end_offsets,
1407
+ Op op,
1408
+ Value h_init,
1409
+ stream
1410
+ ):
1411
+ cdef CUresult status = -1
1412
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1413
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1414
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1415
+
1416
+ with nogil:
1417
+ status = cccl_device_segmented_reduce(
1418
+ self.build_data,
1419
+ storage_ptr,
1420
+ &storage_sz,
1421
+ d_in.iter_data,
1422
+ d_out.iter_data,
1423
+ <uint64_t>num_items,
1424
+ start_offsets.iter_data,
1425
+ end_offsets.iter_data,
1426
+ op.op_data,
1427
+ h_init.value_data,
1428
+ c_stream
1429
+ )
1430
+ if status != 0:
1431
+ raise RuntimeError(
1432
+ f"Failed executing segmented_reduce, error code: {status}"
1433
+ )
1434
+ return storage_sz
1435
+
1436
+ def _get_cubin(self):
1437
+ return PyBytes_FromStringAndSize(
1438
+ <const char*>self.build_data.cubin,
1439
+ self.build_data.cubin_size
1440
+ )
1441
+
1442
+ # -----------------
1443
+ # DeviceMergeSort
1444
+ # -----------------
1445
+
1446
+
1447
+ cdef extern from "cccl/c/merge_sort.h":
1448
+ cdef struct cccl_device_merge_sort_build_result_t 'cccl_device_merge_sort_build_result_t':
1449
+ const char* cubin
1450
+ size_t cubin_size
1451
+
1452
+ cdef CUresult cccl_device_merge_sort_build(
1453
+ cccl_device_merge_sort_build_result_t *bld_ptr,
1454
+ cccl_iterator_t d_in_keys,
1455
+ cccl_iterator_t d_in_items,
1456
+ cccl_iterator_t d_out_keys,
1457
+ cccl_iterator_t d_out_items,
1458
+ cccl_op_t,
1459
+ int, int, const char*, const char*, const char*, const char*
1460
+ ) nogil
1461
+
1462
+ cdef CUresult cccl_device_merge_sort(
1463
+ cccl_device_merge_sort_build_result_t,
1464
+ void *,
1465
+ size_t *,
1466
+ cccl_iterator_t,
1467
+ cccl_iterator_t,
1468
+ cccl_iterator_t,
1469
+ cccl_iterator_t,
1470
+ uint64_t,
1471
+ cccl_op_t,
1472
+ CUstream
1473
+ ) nogil
1474
+
1475
+ cdef CUresult cccl_device_merge_sort_cleanup(
1476
+ cccl_device_merge_sort_build_result_t* bld_ptr
1477
+ ) nogil
1478
+
1479
+
1480
+ cdef class DeviceMergeSortBuildResult:
1481
+ cdef cccl_device_merge_sort_build_result_t build_data
1482
+
1483
+ def __cinit__(
1484
+ DeviceMergeSortBuildResult self,
1485
+ Iterator d_in_keys,
1486
+ Iterator d_in_items,
1487
+ Iterator d_out_keys,
1488
+ Iterator d_out_items,
1489
+ Op op,
1490
+ CommonData common_data
1491
+ ):
1492
+ cdef CUresult status = -1
1493
+ cdef int cc_major = common_data.get_cc_major()
1494
+ cdef int cc_minor = common_data.get_cc_minor()
1495
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1496
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1497
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1498
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1499
+
1500
+ memset(&self.build_data, 0, sizeof(cccl_device_merge_sort_build_result_t))
1501
+ with nogil:
1502
+ status = cccl_device_merge_sort_build(
1503
+ &self.build_data,
1504
+ d_in_keys.iter_data,
1505
+ d_in_items.iter_data,
1506
+ d_out_keys.iter_data,
1507
+ d_out_items.iter_data,
1508
+ op.op_data,
1509
+ cc_major,
1510
+ cc_minor,
1511
+ cub_path,
1512
+ thrust_path,
1513
+ libcudacxx_path,
1514
+ ctk_path,
1515
+ )
1516
+ if status != 0:
1517
+ raise RuntimeError(
1518
+ f"Failed building merge_sort, error code: {status}"
1519
+ )
1520
+
1521
+ def __dealloc__(DeviceMergeSortBuildResult self):
1522
+ cdef CUresult status = -1
1523
+ with nogil:
1524
+ status = cccl_device_merge_sort_cleanup(&self.build_data)
1525
+ if (status != 0):
1526
+ print(f"Return code {status} encountered during merge_sort result cleanup")
1527
+
1528
+ cpdef int compute(
1529
+ DeviceMergeSortBuildResult self,
1530
+ temp_storage_ptr,
1531
+ temp_storage_bytes,
1532
+ Iterator d_in_keys,
1533
+ Iterator d_in_items,
1534
+ Iterator d_out_keys,
1535
+ Iterator d_out_items,
1536
+ size_t num_items,
1537
+ Op op,
1538
+ stream
1539
+ ):
1540
+ cdef CUresult status = -1
1541
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1542
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1543
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1544
+ with nogil:
1545
+ status = cccl_device_merge_sort(
1546
+ self.build_data,
1547
+ storage_ptr,
1548
+ &storage_sz,
1549
+ d_in_keys.iter_data,
1550
+ d_in_items.iter_data,
1551
+ d_out_keys.iter_data,
1552
+ d_out_items.iter_data,
1553
+ <uint64_t>num_items,
1554
+ op.op_data,
1555
+ c_stream
1556
+ )
1557
+ if status != 0:
1558
+ raise RuntimeError(
1559
+ f"Failed executing merge_sort, error code: {status}"
1560
+ )
1561
+ return storage_sz
1562
+
1563
+
1564
+ def _get_cubin(self):
1565
+ return PyBytes_FromStringAndSize(
1566
+ <const char*>self.build_data.cubin,
1567
+ self.build_data.cubin_size
1568
+ )
1569
+
1570
+
1571
+ # -------------------
1572
+ # DeviceUniqueByKey
1573
+ # -------------------
1574
+
1575
+ cdef extern from "cccl/c/unique_by_key.h":
1576
+ cdef struct cccl_device_unique_by_key_build_result_t 'cccl_device_unique_by_key_build_result_t':
1577
+ const char* cubin
1578
+ size_t cubin_size
1579
+
1580
+
1581
+ cdef CUresult cccl_device_unique_by_key_build(
1582
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1583
+ cccl_iterator_t d_keys_in,
1584
+ cccl_iterator_t d_values_in,
1585
+ cccl_iterator_t d_keys_out,
1586
+ cccl_iterator_t d_values_out,
1587
+ cccl_iterator_t d_num_selected_out,
1588
+ cccl_op_t comparison_op,
1589
+ int, int, const char *, const char *, const char *, const char *
1590
+ ) nogil
1591
+
1592
+ cdef CUresult cccl_device_unique_by_key(
1593
+ cccl_device_unique_by_key_build_result_t build,
1594
+ void *d_storage_ptr,
1595
+ size_t *d_storage_nbytes,
1596
+ cccl_iterator_t d_keys_in,
1597
+ cccl_iterator_t d_values_in,
1598
+ cccl_iterator_t d_keys_out,
1599
+ cccl_iterator_t d_values_out,
1600
+ cccl_iterator_t d_num_selected_out,
1601
+ cccl_op_t comparison_op,
1602
+ size_t num_items,
1603
+ CUstream stream
1604
+ ) nogil
1605
+
1606
+ cdef CUresult cccl_device_unique_by_key_cleanup(
1607
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1608
+ ) nogil
1609
+
1610
+
1611
+ cdef class DeviceUniqueByKeyBuildResult:
1612
+ cdef cccl_device_unique_by_key_build_result_t build_data
1613
+
1614
+ def __cinit__(
1615
+ DeviceUniqueByKeyBuildResult self,
1616
+ Iterator d_keys_in,
1617
+ Iterator d_values_in,
1618
+ Iterator d_keys_out,
1619
+ Iterator d_values_out,
1620
+ Iterator d_num_selected_out,
1621
+ Op comparison_op,
1622
+ CommonData common_data
1623
+ ):
1624
+ cdef CUresult status = -1
1625
+ cdef int cc_major = common_data.get_cc_major()
1626
+ cdef int cc_minor = common_data.get_cc_minor()
1627
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1628
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1629
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1630
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1631
+
1632
+ memset(&self.build_data, 0, sizeof(cccl_device_unique_by_key_build_result_t))
1633
+ with nogil:
1634
+ status = cccl_device_unique_by_key_build(
1635
+ &self.build_data,
1636
+ d_keys_in.iter_data,
1637
+ d_values_in.iter_data,
1638
+ d_keys_out.iter_data,
1639
+ d_values_out.iter_data,
1640
+ d_num_selected_out.iter_data,
1641
+ comparison_op.op_data,
1642
+ cc_major,
1643
+ cc_minor,
1644
+ cub_path,
1645
+ thrust_path,
1646
+ libcudacxx_path,
1647
+ ctk_path,
1648
+ )
1649
+ if status != 0:
1650
+ raise RuntimeError(
1651
+ f"Failed building unique_by_key, error code: {status}"
1652
+ )
1653
+
1654
+ def __dealloc__(DeviceUniqueByKeyBuildResult self):
1655
+ cdef CUresult status = -1
1656
+ with nogil:
1657
+ status = cccl_device_unique_by_key_cleanup(&self.build_data)
1658
+ if (status != 0):
1659
+ print(f"Return code {status} encountered during unique_by_key result cleanup")
1660
+
1661
+ cpdef int compute(
1662
+ DeviceUniqueByKeyBuildResult self,
1663
+ temp_storage_ptr,
1664
+ temp_storage_bytes,
1665
+ Iterator d_keys_in,
1666
+ Iterator d_values_in,
1667
+ Iterator d_keys_out,
1668
+ Iterator d_values_out,
1669
+ Iterator d_num_selected_out,
1670
+ Op comparison_op,
1671
+ size_t num_items,
1672
+ stream
1673
+ ):
1674
+ cdef CUresult status = -1
1675
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1676
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1677
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1678
+
1679
+ with nogil:
1680
+ status = cccl_device_unique_by_key(
1681
+ self.build_data,
1682
+ storage_ptr,
1683
+ &storage_sz,
1684
+ d_keys_in.iter_data,
1685
+ d_values_in.iter_data,
1686
+ d_keys_out.iter_data,
1687
+ d_values_out.iter_data,
1688
+ d_num_selected_out.iter_data,
1689
+ comparison_op.op_data,
1690
+ <uint64_t>num_items,
1691
+ c_stream
1692
+ )
1693
+
1694
+ if status != 0:
1695
+ raise RuntimeError(
1696
+ f"Failed executing unique_by_key, error code: {status}"
1697
+ )
1698
+ return storage_sz
1699
+
1700
+ def _get_cubin(self):
1701
+ return PyBytes_FromStringAndSize(
1702
+ <const char*>self.build_data.cubin,
1703
+ self.build_data.cubin_size
1704
+ )
1705
+
1706
+ # -----------------
1707
+ # DeviceRadixSort
1708
+ # -----------------
1709
+
1710
+ cdef extern from "cccl/c/radix_sort.h":
1711
+ cdef struct cccl_device_radix_sort_build_result_t 'cccl_device_radix_sort_build_result_t':
1712
+ const char* cubin
1713
+ size_t cubin_size
1714
+
1715
+ cdef CUresult cccl_device_radix_sort_build(
1716
+ cccl_device_radix_sort_build_result_t *build_ptr,
1717
+ cccl_sort_order_t sort_order,
1718
+ cccl_iterator_t d_keys_in,
1719
+ cccl_iterator_t d_values_in,
1720
+ cccl_op_t decomposer,
1721
+ const char* decomposer_return_type,
1722
+ int, int, const char *, const char *, const char *, const char *
1723
+ ) nogil
1724
+
1725
+ cdef CUresult cccl_device_radix_sort(
1726
+ cccl_device_radix_sort_build_result_t build,
1727
+ void *d_storage_ptr,
1728
+ size_t *d_storage_nbytes,
1729
+ cccl_iterator_t d_keys_in,
1730
+ cccl_iterator_t d_keys_out,
1731
+ cccl_iterator_t d_values_in,
1732
+ cccl_iterator_t d_values_out,
1733
+ cccl_op_t decomposer,
1734
+ size_t num_items,
1735
+ int begin_bit,
1736
+ int end_bit,
1737
+ bint is_overwrite_okay,
1738
+ int* selector,
1739
+ CUstream stream
1740
+ ) nogil
1741
+
1742
+ cdef CUresult cccl_device_radix_sort_cleanup(
1743
+ cccl_device_radix_sort_build_result_t *build_ptr,
1744
+ ) nogil
1745
+
1746
+
1747
+ cdef class DeviceRadixSortBuildResult:
1748
+ cdef cccl_device_radix_sort_build_result_t build_data
1749
+
1750
+ def __dealloc__(DeviceRadixSortBuildResult self):
1751
+ cdef CUresult status = -1
1752
+ with nogil:
1753
+ status = cccl_device_radix_sort_cleanup(&self.build_data)
1754
+ if (status != 0):
1755
+ print(f"Return code {status} encountered during radix_sort result cleanup")
1756
+
1757
+ def __cinit__(
1758
+ DeviceRadixSortBuildResult self,
1759
+ cccl_sort_order_t order,
1760
+ Iterator d_keys_in,
1761
+ Iterator d_values_in,
1762
+ Op decomposer_op,
1763
+ const char* decomposer_return_type,
1764
+ CommonData common_data
1765
+ ):
1766
+ cdef CUresult status = -1
1767
+ cdef int cc_major = common_data.get_cc_major()
1768
+ cdef int cc_minor = common_data.get_cc_minor()
1769
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1770
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1771
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1772
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1773
+
1774
+ memset(&self.build_data, 0, sizeof(cccl_device_radix_sort_build_result_t))
1775
+ with nogil:
1776
+ status = cccl_device_radix_sort_build(
1777
+ &self.build_data,
1778
+ order,
1779
+ d_keys_in.iter_data,
1780
+ d_values_in.iter_data,
1781
+ decomposer_op.op_data,
1782
+ decomposer_return_type,
1783
+ cc_major,
1784
+ cc_minor,
1785
+ cub_path,
1786
+ thrust_path,
1787
+ libcudacxx_path,
1788
+ ctk_path,
1789
+ )
1790
+ if status != 0:
1791
+ raise RuntimeError(
1792
+ f"Failed building radix_sort, error code: {status}"
1793
+ )
1794
+
1795
+ cpdef tuple compute(
1796
+ DeviceRadixSortBuildResult self,
1797
+ temp_storage_ptr,
1798
+ temp_storage_bytes,
1799
+ Iterator d_keys_in,
1800
+ Iterator d_keys_out,
1801
+ Iterator d_values_in,
1802
+ Iterator d_values_out,
1803
+ Op decomposer_op,
1804
+ size_t num_items,
1805
+ int begin_bit,
1806
+ int end_bit,
1807
+ bint is_overwrite_okay,
1808
+ selector,
1809
+ stream
1810
+ ):
1811
+ cdef CUresult status = -1
1812
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
1813
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1814
+ cdef int selector_int = <int>selector
1815
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
1816
+
1817
+ with nogil:
1818
+ status = cccl_device_radix_sort(
1819
+ self.build_data,
1820
+ storage_ptr,
1821
+ &storage_sz,
1822
+ d_keys_in.iter_data,
1823
+ d_keys_out.iter_data,
1824
+ d_values_in.iter_data,
1825
+ d_values_out.iter_data,
1826
+ decomposer_op.op_data,
1827
+ <uint64_t>num_items,
1828
+ begin_bit,
1829
+ end_bit,
1830
+ is_overwrite_okay,
1831
+ &selector_int,
1832
+ c_stream
1833
+ )
1834
+
1835
+ if status != 0:
1836
+ raise RuntimeError(
1837
+ f"Failed executing radix_sort, error code: {status}"
1838
+ )
1839
+ return <object>storage_sz, <object>selector_int
1840
+
1841
+
1842
+ def _get_cubin(self):
1843
+ return PyBytes_FromStringAndSize(
1844
+ <const char*>self.build_data.cubin,
1845
+ self.build_data.cubin_size
1846
+ )
1847
+
1848
+ # --------------------------------------------
1849
+ # DeviceUnaryTransform/DeviceBinaryTransform
1850
+ # --------------------------------------------
1851
+ cdef extern from "cccl/c/transform.h":
1852
+ cdef struct cccl_device_transform_build_result_t:
1853
+ const char* cubin
1854
+ size_t cubin_size
1855
+
1856
+ cdef CUresult cccl_device_unary_transform_build(
1857
+ cccl_device_transform_build_result_t *build_ptr,
1858
+ cccl_iterator_t d_in,
1859
+ cccl_iterator_t d_out,
1860
+ cccl_op_t op,
1861
+ int, int, const char *, const char *, const char *, const char *
1862
+ ) nogil
1863
+
1864
+ cdef CUresult cccl_device_unary_transform(
1865
+ cccl_device_transform_build_result_t build,
1866
+ cccl_iterator_t d_in,
1867
+ cccl_iterator_t d_out,
1868
+ uint64_t num_items,
1869
+ cccl_op_t op,
1870
+ CUstream stream) nogil
1871
+
1872
+ cdef CUresult cccl_device_binary_transform_build(
1873
+ cccl_device_transform_build_result_t* build_ptr,
1874
+ cccl_iterator_t d_in1,
1875
+ cccl_iterator_t d_in2,
1876
+ cccl_iterator_t d_out,
1877
+ cccl_op_t op,
1878
+ int, int, const char *, const char *, const char *, const char *
1879
+ ) nogil
1880
+
1881
+ cdef CUresult cccl_device_binary_transform(
1882
+ cccl_device_transform_build_result_t build,
1883
+ cccl_iterator_t d_in1,
1884
+ cccl_iterator_t d_in2,
1885
+ cccl_iterator_t d_out,
1886
+ uint64_t num_items,
1887
+ cccl_op_t op,
1888
+ CUstream stream) nogil
1889
+
1890
+ cdef CUresult cccl_device_transform_cleanup(
1891
+ cccl_device_transform_build_result_t *build_ptr,
1892
+ ) nogil
1893
+
1894
+
1895
+ cdef class DeviceUnaryTransform:
1896
+ cdef cccl_device_transform_build_result_t build_data
1897
+
1898
+ def __cinit__(
1899
+ self,
1900
+ Iterator d_in,
1901
+ Iterator d_out,
1902
+ Op op,
1903
+ CommonData common_data
1904
+ ):
1905
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
1906
+
1907
+ cdef CUresult status = -1
1908
+ cdef int cc_major = common_data.get_cc_major()
1909
+ cdef int cc_minor = common_data.get_cc_minor()
1910
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1911
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1912
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1913
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1914
+
1915
+ with nogil:
1916
+ status = cccl_device_unary_transform_build(
1917
+ &self.build_data,
1918
+ d_in.iter_data,
1919
+ d_out.iter_data,
1920
+ op.op_data,
1921
+ cc_major,
1922
+ cc_minor,
1923
+ cub_path,
1924
+ thrust_path,
1925
+ libcudacxx_path,
1926
+ ctk_path,
1927
+ )
1928
+ if status != 0:
1929
+ raise RuntimeError("Failed to build unary transform")
1930
+
1931
+ def __dealloc__(DeviceUnaryTransform self):
1932
+ cdef CUresult status = -1
1933
+ with nogil:
1934
+ status = cccl_device_transform_cleanup(&self.build_data)
1935
+ if (status != 0):
1936
+ print(f"Return code {status} encountered during unary transform result cleanup")
1937
+
1938
+ cpdef void compute(
1939
+ DeviceUnaryTransform self,
1940
+ Iterator d_in,
1941
+ Iterator d_out,
1942
+ size_t num_items,
1943
+ Op op,
1944
+ stream
1945
+ ):
1946
+ cdef CUresult status = -1
1947
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1948
+ with nogil:
1949
+ status = cccl_device_unary_transform(
1950
+ self.build_data,
1951
+ d_in.iter_data,
1952
+ d_out.iter_data,
1953
+ <uint64_t>num_items,
1954
+ op.op_data,
1955
+ c_stream
1956
+ )
1957
+ if (status != 0):
1958
+ raise RuntimeError("Failed to compute unary transform")
1959
+
1960
+
1961
+ def _get_cubin(self):
1962
+ return PyBytes_FromStringAndSize(
1963
+ <const char*>self.build_data.cubin,
1964
+ self.build_data.cubin_size
1965
+ )
1966
+
1967
+
1968
+ cdef class DeviceBinaryTransform:
1969
+ cdef cccl_device_transform_build_result_t build_data
1970
+
1971
+ def __cinit__(
1972
+ self,
1973
+ Iterator d_in1,
1974
+ Iterator d_in2,
1975
+ Iterator d_out,
1976
+ Op op,
1977
+ CommonData common_data
1978
+ ):
1979
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
1980
+
1981
+ cdef CUresult status = -1
1982
+ cdef int cc_major = common_data.get_cc_major()
1983
+ cdef int cc_minor = common_data.get_cc_minor()
1984
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1985
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1986
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1987
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1988
+
1989
+ with nogil:
1990
+ status = cccl_device_binary_transform_build(
1991
+ &self.build_data,
1992
+ d_in1.iter_data,
1993
+ d_in2.iter_data,
1994
+ d_out.iter_data,
1995
+ op.op_data,
1996
+ cc_major,
1997
+ cc_minor,
1998
+ cub_path,
1999
+ thrust_path,
2000
+ libcudacxx_path,
2001
+ ctk_path,
2002
+ )
2003
+ if status != 0:
2004
+ raise RuntimeError("Failed to build binary transform")
2005
+
2006
+ def __dealloc__(DeviceBinaryTransform self):
2007
+ cdef CUresult status = -1
2008
+ with nogil:
2009
+ status = cccl_device_transform_cleanup(&self.build_data)
2010
+ if (status != 0):
2011
+ print(f"Return code {status} encountered during binary transform result cleanup")
2012
+
2013
+ cpdef void compute(
2014
+ DeviceBinaryTransform self,
2015
+ Iterator d_in1,
2016
+ Iterator d_in2,
2017
+ Iterator d_out,
2018
+ size_t num_items,
2019
+ Op op,
2020
+ stream
2021
+ ):
2022
+ cdef CUresult status = -1
2023
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
2024
+ with nogil:
2025
+ status = cccl_device_binary_transform(
2026
+ self.build_data,
2027
+ d_in1.iter_data,
2028
+ d_in2.iter_data,
2029
+ d_out.iter_data,
2030
+ <uint64_t>num_items,
2031
+ op.op_data,
2032
+ c_stream
2033
+ )
2034
+ if (status != 0):
2035
+ raise RuntimeError("Failed to compute binary transform")
2036
+
2037
+ def _get_cubin(self):
2038
+ return PyBytes_FromStringAndSize(
2039
+ <const char*>self.build_data.cubin,
2040
+ self.build_data.cubin_size
2041
+ )
2042
+
2043
+
2044
+ # -----------------
2045
+ # DeviceHistogram
2046
+ # -----------------
2047
+ cdef extern from "cccl/c/histogram.h":
2048
+ cdef struct cccl_device_histogram_build_result_t 'cccl_device_histogram_build_result_t':
2049
+ const char* cubin
2050
+ size_t cubin_size
2051
+
2052
+ cdef CUresult cccl_device_histogram_build(
2053
+ cccl_device_histogram_build_result_t *build_ptr,
2054
+ int num_channels,
2055
+ int num_active_channels,
2056
+ cccl_iterator_t d_samples,
2057
+ int num_output_levels_val,
2058
+ cccl_iterator_t d_output_histograms,
2059
+ cccl_value_t h_levels,
2060
+ int64_t num_rows,
2061
+ int64_t row_stride_samples,
2062
+ bint is_evenly_segmented,
2063
+ int, int, const char *, const char *, const char *, const char *
2064
+ ) nogil
2065
+
2066
+ cdef CUresult cccl_device_histogram_even(
2067
+ cccl_device_histogram_build_result_t build,
2068
+ void *d_storage_ptr,
2069
+ size_t *d_storage_nbytes,
2070
+ cccl_iterator_t d_samples,
2071
+ cccl_iterator_t d_output_histograms,
2072
+ cccl_value_t num_output_levels,
2073
+ cccl_value_t lower_level,
2074
+ cccl_value_t upper_level,
2075
+ int64_t num_row_pixels,
2076
+ int64_t num_rows,
2077
+ int64_t row_stride_samples,
2078
+ CUstream stream
2079
+ ) nogil
2080
+
2081
+ cdef CUresult cccl_device_histogram_cleanup(
2082
+ cccl_device_histogram_build_result_t *build_ptr,
2083
+ ) nogil
2084
+
2085
+
2086
+ cdef class DeviceHistogramBuildResult:
2087
+ cdef cccl_device_histogram_build_result_t build_data
2088
+
2089
+ def __dealloc__(DeviceHistogramBuildResult self):
2090
+ cdef CUresult status = -1
2091
+ with nogil:
2092
+ status = cccl_device_histogram_cleanup(&self.build_data)
2093
+ if (status != 0):
2094
+ print(f"Return code {status} encountered during histogram result cleanup")
2095
+
2096
+
2097
+ def __cinit__(
2098
+ DeviceHistogramBuildResult self,
2099
+ int num_channels,
2100
+ int num_active_channels,
2101
+ Iterator d_samples,
2102
+ int num_levels,
2103
+ Iterator d_histogram,
2104
+ Value h_levels,
2105
+ int num_rows,
2106
+ int row_stride_samples,
2107
+ bint is_evenly_segmented,
2108
+ CommonData common_data
2109
+ ):
2110
+ cdef CUresult status = -1
2111
+ cdef int cc_major = common_data.get_cc_major()
2112
+ cdef int cc_minor = common_data.get_cc_minor()
2113
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
2114
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
2115
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
2116
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
2117
+
2118
+ memset(&self.build_data, 0, sizeof(cccl_device_histogram_build_result_t))
2119
+ with nogil:
2120
+ status = cccl_device_histogram_build(
2121
+ &self.build_data,
2122
+ num_channels,
2123
+ num_active_channels,
2124
+ d_samples.iter_data,
2125
+ num_levels,
2126
+ d_histogram.iter_data,
2127
+ h_levels.value_data,
2128
+ num_rows,
2129
+ row_stride_samples,
2130
+ is_evenly_segmented,
2131
+ cc_major,
2132
+ cc_minor,
2133
+ cub_path,
2134
+ thrust_path,
2135
+ libcudacxx_path,
2136
+ ctk_path,
2137
+ )
2138
+ if status != 0:
2139
+ raise RuntimeError(
2140
+ f"Failed building histogram, error code: {status}"
2141
+ )
2142
+
2143
+ cpdef int compute_even(
2144
+ DeviceHistogramBuildResult self,
2145
+ temp_storage_ptr,
2146
+ temp_storage_bytes,
2147
+ Iterator d_samples,
2148
+ Iterator d_histogram,
2149
+ Value h_num_output_levels,
2150
+ Value h_lower_level,
2151
+ Value h_upper_level,
2152
+ int num_row_pixels,
2153
+ int num_rows,
2154
+ int row_stride_samples,
2155
+ stream
2156
+ ):
2157
+ cdef CUresult status = -1
2158
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
2159
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
2160
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
2161
+
2162
+ with nogil:
2163
+ status = cccl_device_histogram_even(
2164
+ self.build_data,
2165
+ storage_ptr,
2166
+ &storage_sz,
2167
+ d_samples.iter_data,
2168
+ d_histogram.iter_data,
2169
+ h_num_output_levels.value_data,
2170
+ h_lower_level.value_data,
2171
+ h_upper_level.value_data,
2172
+ num_row_pixels,
2173
+ num_rows,
2174
+ row_stride_samples,
2175
+ c_stream
2176
+ )
2177
+ if status != 0:
2178
+ raise RuntimeError(
2179
+ f"Failed executing histogram, error code: {status}"
2180
+ )
2181
+ return storage_sz
2182
+
2183
+
2184
+ def _get_cubin(self):
2185
+ return PyBytes_FromStringAndSize(
2186
+ <const char*>self.build_data.cubin,
2187
+ self.build_data.cubin_size
2188
+ )
2189
+
2190
+
2191
+ # ----------------------------------
2192
+ # DeviceThreeWayPartitionBuildResult
2193
+ # ----------------------------------
2194
+ cdef extern from "cccl/c/three_way_partition.h":
2195
+ cdef struct cccl_device_three_way_partition_build_result_t 'cccl_device_three_way_partition_build_result_t':
2196
+ const char* cubin
2197
+ size_t cubin_size
2198
+
2199
+ cdef CUresult cccl_device_three_way_partition_build(
2200
+ cccl_device_three_way_partition_build_result_t *build_ptr,
2201
+ cccl_iterator_t d_in,
2202
+ cccl_iterator_t d_first_part_out,
2203
+ cccl_iterator_t d_second_part_out,
2204
+ cccl_iterator_t d_unselected_out,
2205
+ cccl_iterator_t d_num_selected_out,
2206
+ cccl_op_t select_first_part_op,
2207
+ cccl_op_t select_second_part_op,
2208
+ int, int, const char *, const char *, const char *, const char *
2209
+ ) nogil
2210
+
2211
+ CUresult cccl_device_three_way_partition(
2212
+ cccl_device_three_way_partition_build_result_t build,
2213
+ void* d_temp_storage,
2214
+ size_t* temp_storage_bytes,
2215
+ cccl_iterator_t d_in,
2216
+ cccl_iterator_t d_first_part_out,
2217
+ cccl_iterator_t d_second_part_out,
2218
+ cccl_iterator_t d_unselected_out,
2219
+ cccl_iterator_t d_num_selected_out,
2220
+ cccl_op_t select_first_part_op,
2221
+ cccl_op_t select_second_part_op,
2222
+ int64_t num_items,
2223
+ CUstream stream
2224
+ ) nogil
2225
+
2226
+ cdef CUresult cccl_device_three_way_partition_cleanup(
2227
+ cccl_device_three_way_partition_build_result_t *build_ptr
2228
+ ) nogil
2229
+
2230
+
2231
+ cdef class DeviceThreeWayPartitionBuildResult:
2232
+ cdef cccl_device_three_way_partition_build_result_t build_data
2233
+
2234
+ def __dealloc__(DeviceThreeWayPartitionBuildResult self):
2235
+ cdef CUresult status = -1
2236
+ with nogil:
2237
+ status = cccl_device_three_way_partition_cleanup(&self.build_data)
2238
+ if (status != 0):
2239
+ print(f"Return code {status} encountered during three_way_partition result cleanup")
2240
+
2241
+
2242
+ def __cinit__(
2243
+ DeviceThreeWayPartitionBuildResult self,
2244
+ Iterator d_in,
2245
+ Iterator d_first_part_out,
2246
+ Iterator d_second_part_out,
2247
+ Iterator d_unselected_out,
2248
+ Iterator d_num_selected_out,
2249
+ Op select_first_part_op,
2250
+ Op select_second_part_op,
2251
+ CommonData common_data
2252
+ ):
2253
+ cdef CUresult status = -1
2254
+ cdef int cc_major = common_data.get_cc_major()
2255
+ cdef int cc_minor = common_data.get_cc_minor()
2256
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
2257
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
2258
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
2259
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
2260
+
2261
+ memset(&self.build_data, 0, sizeof(cccl_device_three_way_partition_build_result_t))
2262
+ with nogil:
2263
+ status = cccl_device_three_way_partition_build(
2264
+ &self.build_data,
2265
+ d_in.iter_data,
2266
+ d_first_part_out.iter_data,
2267
+ d_second_part_out.iter_data,
2268
+ d_unselected_out.iter_data,
2269
+ d_num_selected_out.iter_data,
2270
+ select_first_part_op.op_data,
2271
+ select_second_part_op.op_data,
2272
+ cc_major,
2273
+ cc_minor,
2274
+ cub_path,
2275
+ thrust_path,
2276
+ libcudacxx_path,
2277
+ ctk_path,
2278
+ )
2279
+ if status != 0:
2280
+ raise RuntimeError(
2281
+ f"Failed building three_way_partition, error code: {status}"
2282
+ )
2283
+
2284
+ cpdef int compute(
2285
+ DeviceThreeWayPartitionBuildResult self,
2286
+ temp_storage_ptr,
2287
+ temp_storage_bytes,
2288
+ Iterator d_in,
2289
+ Iterator d_first_part_out,
2290
+ Iterator d_second_part_out,
2291
+ Iterator d_unselected_out,
2292
+ Iterator d_num_selected_out,
2293
+ Op select_first_part_op,
2294
+ Op select_second_part_op,
2295
+ size_t num_items,
2296
+ stream
2297
+ ):
2298
+ cdef CUresult status = -1
2299
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
2300
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
2301
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
2302
+
2303
+ with nogil:
2304
+ status = cccl_device_three_way_partition(
2305
+ self.build_data,
2306
+ storage_ptr,
2307
+ &storage_sz,
2308
+ d_in.iter_data,
2309
+ d_first_part_out.iter_data,
2310
+ d_second_part_out.iter_data,
2311
+ d_unselected_out.iter_data,
2312
+ d_num_selected_out.iter_data,
2313
+ select_first_part_op.op_data,
2314
+ select_second_part_op.op_data,
2315
+ <uint64_t>num_items,
2316
+ c_stream
2317
+ )
2318
+ if status != 0:
2319
+ raise RuntimeError(
2320
+ f"Failed executing three_way_partition, error code: {status}"
2321
+ )
2322
+ return storage_sz
2323
+
2324
+ def _get_cubin(self):
2325
+ return PyBytes_FromStringAndSize(
2326
+ <const char*>self.build_data.cubin,
2327
+ self.build_data.cubin_size
2328
+ )
2329
+
2330
+
2331
+ # -------------------
2332
+ # DeviceSegmentedSort
2333
+ # -------------------
2334
+
2335
+ cdef extern from "cccl/c/segmented_sort.h":
2336
+ cdef struct cccl_device_segmented_sort_build_result_t 'cccl_device_segmented_sort_build_result_t':
2337
+ const char* cubin
2338
+ size_t cubin_size
2339
+
2340
+ cdef CUresult cccl_device_segmented_sort_build(
2341
+ cccl_device_segmented_sort_build_result_t *build_ptr,
2342
+ cccl_sort_order_t sort_order,
2343
+ cccl_iterator_t d_keys_in,
2344
+ cccl_iterator_t d_keys_out,
2345
+ cccl_iterator_t begin_offset_in,
2346
+ cccl_iterator_t end_offset_in,
2347
+ int, int, const char *, const char *, const char *, const char *
2348
+ ) nogil
2349
+
2350
+ cdef CUresult cccl_device_segmented_sort(
2351
+ cccl_device_segmented_sort_build_result_t build,
2352
+ void* d_temp_storage,
2353
+ size_t* temp_storage_bytes,
2354
+ cccl_iterator_t d_keys_in,
2355
+ cccl_iterator_t d_keys_out,
2356
+ cccl_iterator_t d_values_in,
2357
+ cccl_iterator_t d_values_out,
2358
+ int64_t num_items,
2359
+ int64_t num_segments,
2360
+ cccl_iterator_t start_offset_in,
2361
+ cccl_iterator_t end_offset_in,
2362
+ bint is_overwrite_okay,
2363
+ int* selector,
2364
+ CUstream stream
2365
+ ) nogil
2366
+
2367
+ cdef CUresult cccl_device_segmented_sort_cleanup(
2368
+ cccl_device_segmented_sort_build_result_t* build_ptr
2369
+ ) nogil
2370
+
2371
+ cdef class DeviceSegmentedSortBuildResult:
2372
+ cdef cccl_device_segmented_sort_build_result_t build_data
2373
+
2374
+ def __dealloc__(DeviceSegmentedSortBuildResult self):
2375
+ cdef CUresult status = -1
2376
+ with nogil:
2377
+ status = cccl_device_segmented_sort_cleanup(&self.build_data)
2378
+ if (status != 0):
2379
+ print(f"Return code {status} encountered during segmented_sort result cleanup")
2380
+
2381
+ def __cinit__(
2382
+ DeviceSegmentedSortBuildResult self,
2383
+ cccl_sort_order_t order,
2384
+ Iterator d_keys_in,
2385
+ Iterator d_values_in,
2386
+ Iterator begin_offset_in,
2387
+ Iterator end_offset_in,
2388
+ CommonData common_data,
2389
+ ):
2390
+ cdef CUresult status = -1
2391
+ cdef int cc_major = common_data.get_cc_major()
2392
+ cdef int cc_minor = common_data.get_cc_minor()
2393
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
2394
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
2395
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
2396
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
2397
+
2398
+ memset(&self.build_data, 0, sizeof(cccl_device_segmented_sort_build_result_t))
2399
+ with nogil:
2400
+ status = cccl_device_segmented_sort_build(
2401
+ &self.build_data,
2402
+ order,
2403
+ d_keys_in.iter_data,
2404
+ d_values_in.iter_data,
2405
+ begin_offset_in.iter_data,
2406
+ end_offset_in.iter_data,
2407
+ cc_major,
2408
+ cc_minor,
2409
+ cub_path,
2410
+ thrust_path,
2411
+ libcudacxx_path,
2412
+ ctk_path,
2413
+ )
2414
+ if status != 0:
2415
+ raise RuntimeError(
2416
+ f"Failed building segmented_sort, error code: {status}"
2417
+ )
2418
+
2419
+ cpdef tuple compute(
2420
+ DeviceSegmentedSortBuildResult self,
2421
+ temp_storage_ptr,
2422
+ temp_storage_bytes,
2423
+ Iterator d_keys_in,
2424
+ Iterator d_keys_out,
2425
+ Iterator d_values_in,
2426
+ Iterator d_values_out,
2427
+ size_t num_items,
2428
+ size_t num_segments,
2429
+ Iterator start_offset_in,
2430
+ Iterator end_offset_in,
2431
+ bint is_overwrite_okay,
2432
+ selector,
2433
+ stream
2434
+ ):
2435
+ cdef CUresult status = -1
2436
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
2437
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
2438
+ cdef int selector_int = <int>selector
2439
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
2440
+
2441
+ with nogil:
2442
+ status = cccl_device_segmented_sort(
2443
+ self.build_data,
2444
+ storage_ptr,
2445
+ &storage_sz,
2446
+ d_keys_in.iter_data,
2447
+ d_keys_out.iter_data,
2448
+ d_values_in.iter_data,
2449
+ d_values_out.iter_data,
2450
+ <uint64_t>num_items,
2451
+ <uint64_t>num_segments,
2452
+ start_offset_in.iter_data,
2453
+ end_offset_in.iter_data,
2454
+ is_overwrite_okay,
2455
+ &selector_int,
2456
+ c_stream
2457
+ )
2458
+
2459
+ if status != 0:
2460
+ raise RuntimeError(
2461
+ f"Failed executing segmented_sort, error code: {status}"
2462
+ )
2463
+ return <object>storage_sz, <object>selector_int
2464
+
2465
+
2466
+ def _get_cubin(self):
2467
+ return PyBytes_FromStringAndSize(
2468
+ <const char*>self.build_data.cubin,
2469
+ self.build_data.cubin_size
2470
+ )