cuda-cccl 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1968) hide show
  1. cuda/cccl/__init__.py +27 -0
  2. cuda/cccl/_cuda_version_utils.py +24 -0
  3. cuda/cccl/cooperative/__init__.py +9 -0
  4. cuda/cccl/cooperative/experimental/__init__.py +24 -0
  5. cuda/cccl/headers/__init__.py +7 -0
  6. cuda/cccl/headers/include/__init__.py +1 -0
  7. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +259 -0
  8. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1182 -0
  9. cuda/cccl/headers/include/cub/agent/agent_for.cuh +81 -0
  10. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +709 -0
  11. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +234 -0
  12. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +748 -0
  13. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +786 -0
  14. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
  15. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +703 -0
  16. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +555 -0
  17. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +619 -0
  18. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +806 -0
  19. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1124 -0
  20. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +589 -0
  21. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +474 -0
  22. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +289 -0
  23. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1117 -0
  24. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +346 -0
  25. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +606 -0
  26. cuda/cccl/headers/include/cub/agent/agent_topk.cuh +764 -0
  27. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +631 -0
  28. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1424 -0
  29. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +963 -0
  30. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1227 -0
  31. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1313 -0
  32. cuda/cccl/headers/include/cub/block/block_histogram.cuh +424 -0
  33. cuda/cccl/headers/include/cub/block/block_load.cuh +1264 -0
  34. cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
  35. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +800 -0
  36. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1225 -0
  37. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2196 -0
  38. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  39. cuda/cccl/headers/include/cub/block/block_reduce.cuh +667 -0
  40. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +434 -0
  41. cuda/cccl/headers/include/cub/block/block_scan.cuh +2315 -0
  42. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  43. cuda/cccl/headers/include/cub/block/block_store.cuh +1247 -0
  44. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +624 -0
  45. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  46. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  47. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  48. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  49. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
  50. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  51. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  52. cuda/cccl/headers/include/cub/config.cuh +53 -0
  53. cuda/cccl/headers/include/cub/cub.cuh +120 -0
  54. cuda/cccl/headers/include/cub/detail/array_utils.cuh +78 -0
  55. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +161 -0
  56. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +74 -0
  57. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  58. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +62 -0
  59. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +253 -0
  60. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +88 -0
  61. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
  62. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
  63. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +114 -0
  64. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  65. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  66. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +62 -0
  67. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  68. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +53 -0
  69. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
  70. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  71. cuda/cccl/headers/include/cub/detail/rfa.cuh +731 -0
  72. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  73. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  74. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
  75. cuda/cccl/headers/include/cub/detail/type_traits.cuh +187 -0
  76. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +73 -0
  77. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  78. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  79. cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
  80. cuda/cccl/headers/include/cub/device/device_for.cuh +1063 -0
  81. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1509 -0
  82. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  83. cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
  84. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  85. cuda/cccl/headers/include/cub/device/device_partition.cuh +668 -0
  86. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3437 -0
  87. cuda/cccl/headers/include/cub/device/device_reduce.cuh +2518 -0
  88. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
  89. cuda/cccl/headers/include/cub/device/device_scan.cuh +2212 -0
  90. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  91. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1430 -0
  92. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  93. cuda/cccl/headers/include/cub/device/device_select.cuh +1228 -0
  94. cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
  95. cuda/cccl/headers/include/cub/device/device_transform.cuh +668 -0
  96. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +315 -0
  97. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +719 -0
  98. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
  99. cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
  100. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +198 -0
  101. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1046 -0
  102. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +303 -0
  103. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
  104. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1744 -0
  105. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1310 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +655 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +531 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +313 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +615 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +517 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +602 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +975 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +842 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +440 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +389 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +627 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +569 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +545 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +261 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +803 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +583 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +189 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +321 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +522 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +201 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +1028 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +67 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +118 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +60 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +275 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +76 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +126 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1065 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +942 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +673 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +618 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1010 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +398 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1588 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +440 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +85 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +481 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +884 -0
  149. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +227 -0
  150. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  151. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  152. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +254 -0
  153. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +259 -0
  154. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +250 -0
  155. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +320 -0
  156. cuda/cccl/headers/include/cub/thread/thread_load.cuh +349 -0
  157. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +688 -0
  158. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +548 -0
  159. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
  160. cuda/cccl/headers/include/cub/thread/thread_search.cuh +199 -0
  161. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +458 -0
  162. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +102 -0
  163. cuda/cccl/headers/include/cub/thread/thread_store.cuh +365 -0
  164. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  165. cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
  166. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  167. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  168. cuda/cccl/headers/include/cub/util_device.cuh +800 -0
  169. cuda/cccl/headers/include/cub/util_macro.cuh +97 -0
  170. cuda/cccl/headers/include/cub/util_math.cuh +118 -0
  171. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  172. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  173. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  174. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  175. cuda/cccl/headers/include/cub/util_type.cuh +1120 -0
  176. cuda/cccl/headers/include/cub/util_vsmem.cuh +253 -0
  177. cuda/cccl/headers/include/cub/version.cuh +89 -0
  178. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  179. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  180. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +737 -0
  181. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +408 -0
  182. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +952 -0
  183. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +715 -0
  184. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  185. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  186. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  187. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +829 -0
  188. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1890 -0
  189. cuda/cccl/headers/include/cub/warp/warp_store.cuh +521 -0
  190. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
  191. cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
  192. cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
  193. cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
  194. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  195. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  196. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
  197. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  198. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
  199. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
  200. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
  201. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  202. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  203. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  204. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
  205. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +487 -0
  206. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
  207. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  208. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  209. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  210. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  211. cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
  212. cuda/cccl/headers/include/cuda/__cccl_config +37 -0
  213. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
  214. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
  215. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  216. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  217. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  218. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  219. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  220. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  221. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  222. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  223. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  224. cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
  225. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
  226. cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
  227. cuda/cccl/headers/include/cuda/__complex_ +28 -0
  228. cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
  229. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  230. cuda/cccl/headers/include/cuda/__device/arch_traits.h +537 -0
  231. cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
  232. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  233. cuda/cccl/headers/include/cuda/__device/device_ref.h +156 -0
  234. cuda/cccl/headers/include/cuda/__device/physical_device.h +172 -0
  235. cuda/cccl/headers/include/cuda/__driver/driver_api.h +835 -0
  236. cuda/cccl/headers/include/cuda/__event/event.h +171 -0
  237. cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
  238. cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
  239. cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
  240. cuda/cccl/headers/include/cuda/__execution/output_ordering.h +89 -0
  241. cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
  242. cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
  243. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  244. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
  245. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  246. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  247. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
  248. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  249. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  250. cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
  251. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  252. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  253. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  254. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
  255. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
  256. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +483 -0
  257. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
  258. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
  259. cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
  260. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
  261. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
  262. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
  263. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
  264. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
  265. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  266. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +112 -0
  267. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +557 -0
  268. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  269. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  270. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +533 -0
  271. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +238 -0
  272. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
  273. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +117 -0
  274. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  275. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  276. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  277. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  278. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
  279. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
  280. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
  281. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  282. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  283. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
  284. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  285. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  286. cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
  287. cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
  288. cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
  289. cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
  290. cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
  291. cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
  292. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
  293. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
  294. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  295. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  296. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
  297. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
  298. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +113 -0
  299. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
  300. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +652 -0
  301. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
  302. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  303. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
  304. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  305. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
  306. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2983 -0
  307. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  308. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  309. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  310. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  311. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  312. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  313. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  314. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  315. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  383. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  384. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  385. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  386. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  387. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  388. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  389. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  390. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  391. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  392. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  393. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  394. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  395. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  396. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  397. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  398. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  399. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  400. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  401. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  402. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  403. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  404. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  405. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  406. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  407. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  408. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  409. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  410. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  411. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  412. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  413. cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
  414. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  415. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
  416. cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
  417. cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
  418. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
  419. cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
  420. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  421. cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
  422. cuda/cccl/headers/include/cuda/__stream/stream.h +141 -0
  423. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +303 -0
  424. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  425. cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
  426. cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
  427. cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
  428. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
  429. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
  430. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
  431. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
  432. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
  433. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +590 -0
  434. cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
  435. cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
  436. cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
  437. cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
  438. cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
  439. cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
  440. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
  441. cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
  442. cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
  443. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
  444. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
  445. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
  446. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
  447. cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
  448. cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
  449. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  450. cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
  451. cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
  452. cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
  453. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  454. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  455. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
  456. cuda/cccl/headers/include/cuda/access_property +26 -0
  457. cuda/cccl/headers/include/cuda/algorithm +27 -0
  458. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  459. cuda/cccl/headers/include/cuda/atomic +27 -0
  460. cuda/cccl/headers/include/cuda/barrier +267 -0
  461. cuda/cccl/headers/include/cuda/bit +29 -0
  462. cuda/cccl/headers/include/cuda/cmath +37 -0
  463. cuda/cccl/headers/include/cuda/devices +33 -0
  464. cuda/cccl/headers/include/cuda/discard_memory +32 -0
  465. cuda/cccl/headers/include/cuda/functional +32 -0
  466. cuda/cccl/headers/include/cuda/iterator +39 -0
  467. cuda/cccl/headers/include/cuda/latch +27 -0
  468. cuda/cccl/headers/include/cuda/mdspan +28 -0
  469. cuda/cccl/headers/include/cuda/memory +35 -0
  470. cuda/cccl/headers/include/cuda/memory_resource +35 -0
  471. cuda/cccl/headers/include/cuda/numeric +29 -0
  472. cuda/cccl/headers/include/cuda/pipeline +579 -0
  473. cuda/cccl/headers/include/cuda/ptx +129 -0
  474. cuda/cccl/headers/include/cuda/semaphore +31 -0
  475. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  476. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  477. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  478. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
  479. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  480. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
  481. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  495. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  496. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  497. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  498. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  499. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  500. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  501. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  502. cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
  503. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
  504. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  505. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  506. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  507. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  508. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  509. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  510. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  511. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  512. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  513. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  514. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  515. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
  516. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  517. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
  518. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  519. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  520. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  521. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  522. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
  523. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  524. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
  525. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  526. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  527. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  528. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  529. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  530. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  531. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  532. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  533. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
  534. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  535. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
  536. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
  537. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
  538. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  539. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
  540. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  541. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  542. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  543. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  544. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  545. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  546. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  547. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  548. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  549. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  550. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  551. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  552. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  553. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  554. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  555. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  556. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  557. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  558. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  559. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  560. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  561. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  562. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  563. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  564. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  565. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  566. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
  567. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  568. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  569. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  570. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  571. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  572. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
  573. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  574. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  575. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
  576. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  577. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  578. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  579. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  580. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  581. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  582. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  583. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  584. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  585. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
  586. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
  587. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  588. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
  589. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  590. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  591. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  592. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  593. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  594. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  595. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  596. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
  597. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
  598. cuda/cccl/headers/include/cuda/std/__bit/countl.h +174 -0
  599. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  600. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  601. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  602. cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
  603. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  604. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
  605. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  606. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  607. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  608. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
  609. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
  610. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +673 -0
  611. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
  612. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
  613. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
  614. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
  615. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
  616. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
  617. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
  618. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +91 -0
  619. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
  620. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
  621. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  622. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  623. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  624. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  625. cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
  626. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1286 -0
  627. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
  628. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
  629. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  630. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  631. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  632. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  633. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  634. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
  635. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  636. cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
  637. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  638. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  639. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  640. cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
  641. cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +54 -0
  642. cuda/cccl/headers/include/cuda/std/__chrono/day.h +162 -0
  643. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +503 -0
  644. cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +55 -0
  645. cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +46 -0
  646. cuda/cccl/headers/include/cuda/std/__chrono/month.h +187 -0
  647. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +60 -0
  648. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +80 -0
  649. cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +259 -0
  650. cuda/cccl/headers/include/cuda/std/__chrono/year.h +186 -0
  651. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
  652. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
  653. cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +200 -0
  654. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +784 -0
  655. cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +118 -0
  656. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
  657. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +231 -0
  658. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
  659. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
  660. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
  661. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
  662. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
  663. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
  664. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
  665. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +186 -0
  666. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  667. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
  668. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +534 -0
  669. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
  670. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  671. cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
  672. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
  673. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
  674. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
  675. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
  676. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
  677. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
  678. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  679. cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
  680. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
  681. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
  682. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
  683. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  684. cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
  685. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
  686. cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
  687. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
  688. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
  689. cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
  690. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  691. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  692. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
  693. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  694. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  695. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  696. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
  697. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  698. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  699. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +341 -0
  700. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
  701. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
  702. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  703. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
  704. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  705. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  706. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  707. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  708. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  709. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  710. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  711. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  712. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
  713. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  714. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  715. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  716. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  717. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  718. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  719. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  720. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  721. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
  722. cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
  723. cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
  724. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  725. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +139 -0
  726. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  727. cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
  728. cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
  729. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  730. cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
  731. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
  732. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  733. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +165 -0
  734. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  735. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
  736. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  737. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  738. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
  739. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
  740. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
  741. cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
  742. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  743. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
  744. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
  745. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  746. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  747. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  748. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  749. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  750. cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
  751. cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
  752. cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
  753. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
  754. cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
  755. cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
  756. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  757. cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
  758. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
  759. cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
  760. cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
  761. cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
  762. cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
  763. cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
  764. cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
  765. cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
  766. cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
  767. cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
  768. cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
  769. cuda/cccl/headers/include/cuda/std/__format_ +45 -0
  770. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  771. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  772. cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
  773. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
  774. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  775. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  776. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  777. cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
  778. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  779. cuda/cccl/headers/include/cuda/std/__functional/function.h +1275 -0
  780. cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
  781. cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
  782. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +296 -0
  783. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
  784. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
  785. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
  786. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  787. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  788. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
  789. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
  790. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
  791. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  792. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  793. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  794. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  795. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  796. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
  797. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
  798. cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
  799. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
  800. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
  801. cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
  802. cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
  803. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  804. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  805. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  806. cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
  807. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
  808. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  809. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  810. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
  811. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  812. cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
  813. cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
  814. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
  815. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  816. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  817. cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
  818. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  819. cuda/cccl/headers/include/cuda/std/__internal/features.h +72 -0
  820. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +143 -0
  821. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  822. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
  823. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
  824. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
  825. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  826. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
  827. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  828. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  829. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  830. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  831. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  832. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
  833. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
  834. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  835. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
  836. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
  837. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  838. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  839. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  840. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  841. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
  842. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  843. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
  844. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  845. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  846. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
  847. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  848. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  849. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
  850. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  851. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
  852. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  853. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
  854. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  855. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  856. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  857. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
  858. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  859. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  860. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
  861. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  862. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
  863. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
  864. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
  865. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
  866. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
  867. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
  868. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  869. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
  870. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +759 -0
  871. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
  872. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
  873. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
  874. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
  875. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  876. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
  877. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
  878. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
  879. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  880. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  881. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  882. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  883. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  884. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  885. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +525 -0
  886. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  887. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  888. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
  889. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +246 -0
  890. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  891. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +46 -0
  892. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
  893. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  894. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  895. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
  896. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
  897. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
  898. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  899. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  900. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  901. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  902. cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
  903. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  904. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  905. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  906. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  907. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
  908. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  909. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  910. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  911. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
  912. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  913. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  914. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  915. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  916. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  917. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
  918. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  919. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  920. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  921. cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
  922. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +433 -0
  923. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  924. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  925. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  926. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
  927. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
  928. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
  929. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
  930. cuda/cccl/headers/include/cuda/std/__random_ +29 -0
  931. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  932. cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
  933. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  934. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
  935. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  936. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  937. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  938. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  939. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  940. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  941. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
  942. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  943. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
  944. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  945. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +162 -0
  946. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
  947. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  948. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  949. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  950. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
  951. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  952. cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
  953. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  954. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
  955. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  956. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  957. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  958. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
  959. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  960. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
  961. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  962. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  963. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
  964. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  965. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  966. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  967. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  968. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  969. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
  970. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  971. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  972. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
  973. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  974. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  975. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
  976. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +260 -0
  977. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
  978. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  979. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  980. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +84 -0
  981. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
  982. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  983. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  984. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
  985. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  986. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  987. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  988. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  989. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  990. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  991. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  992. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  993. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  994. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  995. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
  996. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  997. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
  998. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  999. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  1000. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  1001. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  1002. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  1003. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  1004. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  1005. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  1006. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  1007. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  1008. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
  1009. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
  1010. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  1011. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
  1012. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
  1013. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  1014. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  1015. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  1016. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  1017. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
  1018. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  1019. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  1020. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  1021. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
  1022. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
  1023. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  1024. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  1025. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  1026. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
  1027. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  1028. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  1029. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  1030. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  1031. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  1032. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  1033. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
  1034. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
  1035. cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
  1036. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  1037. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
  1038. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
  1039. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  1040. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  1041. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  1042. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  1043. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  1044. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  1045. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  1046. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
  1047. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  1048. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  1049. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  1050. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  1051. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  1052. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  1053. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  1054. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  1055. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  1056. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  1057. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  1058. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  1059. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  1060. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  1061. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  1062. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  1063. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  1064. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
  1065. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  1066. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
  1067. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  1068. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
  1069. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
  1070. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  1071. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  1072. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  1073. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
  1074. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  1075. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  1076. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  1077. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  1078. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
  1079. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +202 -0
  1080. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
  1081. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
  1082. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
  1083. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
  1084. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
  1085. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
  1086. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
  1087. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
  1088. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
  1089. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
  1090. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  1091. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
  1092. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  1093. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  1094. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  1095. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  1096. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  1097. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  1098. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  1099. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  1100. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  1101. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  1102. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  1103. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  1104. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  1105. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
  1106. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  1107. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  1108. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1109. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1110. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1111. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1112. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1113. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1114. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1115. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1116. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1117. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1118. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1119. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1120. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1121. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1122. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
  1123. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
  1124. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1125. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  1126. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
  1127. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  1128. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1129. cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
  1130. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
  1131. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1132. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  1133. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
  1134. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
  1135. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  1136. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1137. cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
  1138. cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
  1139. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1140. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
  1141. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1142. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  1143. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1144. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1145. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1146. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1147. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1148. cuda/cccl/headers/include/cuda/std/array +518 -0
  1149. cuda/cccl/headers/include/cuda/std/atomic +810 -0
  1150. cuda/cccl/headers/include/cuda/std/barrier +42 -0
  1151. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1152. cuda/cccl/headers/include/cuda/std/bitset +994 -0
  1153. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1154. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1155. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1156. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1157. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1158. cuda/cccl/headers/include/cuda/std/cmath +87 -0
  1159. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1160. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1161. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1162. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1163. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1164. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1165. cuda/cccl/headers/include/cuda/std/ctime +154 -0
  1166. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1167. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +207 -0
  1168. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1721 -0
  1169. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2509 -0
  1170. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1171. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1172. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
  1173. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2144 -0
  1174. cuda/cccl/headers/include/cuda/std/execution +29 -0
  1175. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1176. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1177. cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
  1178. cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
  1179. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1180. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1181. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1182. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1183. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1184. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1185. cuda/cccl/headers/include/cuda/std/numbers +346 -0
  1186. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1187. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1188. cuda/cccl/headers/include/cuda/std/ranges +69 -0
  1189. cuda/cccl/headers/include/cuda/std/ratio +416 -0
  1190. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1191. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1192. cuda/cccl/headers/include/cuda/std/span +628 -0
  1193. cuda/cccl/headers/include/cuda/std/string_view +925 -0
  1194. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1195. cuda/cccl/headers/include/cuda/std/type_traits +177 -0
  1196. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1197. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1198. cuda/cccl/headers/include/cuda/std/version +240 -0
  1199. cuda/cccl/headers/include/cuda/stream +31 -0
  1200. cuda/cccl/headers/include/cuda/stream_ref +59 -0
  1201. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1202. cuda/cccl/headers/include/cuda/utility +28 -0
  1203. cuda/cccl/headers/include/cuda/version +16 -0
  1204. cuda/cccl/headers/include/cuda/warp +28 -0
  1205. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1206. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1207. cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
  1208. cuda/cccl/headers/include/nv/target +240 -0
  1209. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1210. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1211. cuda/cccl/headers/include/thrust/advance.h +57 -0
  1212. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1213. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1214. cuda/cccl/headers/include/thrust/complex.h +858 -0
  1215. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1216. cuda/cccl/headers/include/thrust/count.h +245 -0
  1217. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1218. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1219. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +626 -0
  1220. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +192 -0
  1221. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +96 -0
  1222. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +81 -0
  1223. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +78 -0
  1224. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1225. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +115 -0
  1226. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +116 -0
  1227. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
  1228. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1229. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1230. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1231. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1232. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1233. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1234. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1235. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1236. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1237. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1238. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1239. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1240. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1241. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1242. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1243. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1244. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1245. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1246. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1247. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1248. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1249. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1250. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1251. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1252. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1253. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1254. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1255. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1256. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1257. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1258. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1259. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1260. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1261. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1262. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1263. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1264. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1265. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1266. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1267. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1268. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1269. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1270. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1271. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1272. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +81 -0
  1273. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1274. cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
  1275. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1276. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1277. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1278. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1279. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1280. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1281. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1282. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1283. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1284. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1285. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1286. cuda/cccl/headers/include/thrust/detail/internal_functional.h +328 -0
  1287. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1288. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
  1289. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1290. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1291. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1292. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1293. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1294. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1295. cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
  1296. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1297. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1298. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1299. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1300. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1301. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
  1302. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1303. cuda/cccl/headers/include/thrust/detail/reference.h +494 -0
  1304. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1305. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1306. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1307. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1308. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1309. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1310. cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
  1311. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1312. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1313. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1314. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1315. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1316. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1317. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1318. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1319. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1320. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
  1321. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1322. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1323. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1324. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
  1325. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1326. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1327. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1328. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1329. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1330. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1331. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1332. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
  1333. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1334. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1335. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1336. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1337. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1338. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1339. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1340. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1341. cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
  1342. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1210 -0
  1343. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1344. cuda/cccl/headers/include/thrust/device_delete.h +74 -0
  1345. cuda/cccl/headers/include/thrust/device_free.h +85 -0
  1346. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1347. cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
  1348. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1349. cuda/cccl/headers/include/thrust/device_new.h +112 -0
  1350. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1351. cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
  1352. cuda/cccl/headers/include/thrust/device_reference.h +983 -0
  1353. cuda/cccl/headers/include/thrust/device_vector.h +576 -0
  1354. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1355. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1356. cuda/cccl/headers/include/thrust/execution_policy.h +251 -0
  1357. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1358. cuda/cccl/headers/include/thrust/fill.h +200 -0
  1359. cuda/cccl/headers/include/thrust/find.h +382 -0
  1360. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1361. cuda/cccl/headers/include/thrust/functional.h +395 -0
  1362. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1363. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1364. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1365. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1366. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
  1367. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1368. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1369. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1370. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1371. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1372. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1373. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
  1374. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
  1375. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1376. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +182 -0
  1377. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +58 -0
  1378. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1379. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1380. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1381. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1382. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1383. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1384. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
  1385. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
  1386. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
  1387. cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
  1388. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
  1389. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1390. cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
  1391. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
  1392. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
  1393. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1394. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
  1395. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1396. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1397. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1398. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
  1399. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1400. cuda/cccl/headers/include/thrust/memory.h +299 -0
  1401. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1402. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1403. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1404. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1405. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +528 -0
  1406. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1407. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
  1408. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1409. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1410. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1411. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1412. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1413. cuda/cccl/headers/include/thrust/mr/pool.h +528 -0
  1414. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1415. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1416. cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
  1417. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1418. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1419. cuda/cccl/headers/include/thrust/pair.h +99 -0
  1420. cuda/cccl/headers/include/thrust/partition.h +1391 -0
  1421. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1422. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1423. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1424. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1425. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1426. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1427. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1428. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1429. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1430. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1431. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1432. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1433. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
  1434. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1435. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
  1436. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1437. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1438. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1439. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1440. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1441. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1442. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1443. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1444. cuda/cccl/headers/include/thrust/random.h +120 -0
  1445. cuda/cccl/headers/include/thrust/reduce.h +1113 -0
  1446. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1447. cuda/cccl/headers/include/thrust/replace.h +826 -0
  1448. cuda/cccl/headers/include/thrust/reverse.h +215 -0
  1449. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1450. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1451. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1452. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1453. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1454. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1455. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1456. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1457. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1458. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1459. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1460. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1461. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1462. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1463. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +109 -0
  1464. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1465. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1466. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1467. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1468. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1469. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1470. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1471. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1472. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1473. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1474. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1475. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1476. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1477. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1478. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1479. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1480. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1481. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1482. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1483. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1484. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1485. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1486. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1487. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1488. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1489. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1490. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1491. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1492. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1493. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1494. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1495. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1496. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1497. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1498. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1499. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1500. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1501. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
  1502. cuda/cccl/headers/include/thrust/system/cpp/memory.h +106 -0
  1503. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +72 -0
  1504. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +120 -0
  1505. cuda/cccl/headers/include/thrust/system/cpp/vector.h +96 -0
  1506. cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
  1507. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1508. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1509. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1510. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1511. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +273 -0
  1512. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1513. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1514. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1515. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +593 -0
  1516. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1517. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1518. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +233 -0
  1519. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1520. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1521. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +264 -0
  1522. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1523. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +100 -0
  1524. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +170 -0
  1525. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1526. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1527. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +60 -0
  1528. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1529. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1530. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +80 -0
  1531. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1532. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1533. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1534. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1535. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1536. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +223 -0
  1537. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1538. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1539. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1540. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +785 -0
  1541. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
  1542. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1543. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +122 -0
  1544. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
  1545. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +341 -0
  1546. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +414 -0
  1547. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +91 -0
  1548. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1549. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1734 -0
  1550. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +469 -0
  1551. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
  1552. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +61 -0
  1553. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1554. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1555. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +429 -0
  1556. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1557. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1558. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +117 -0
  1559. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +105 -0
  1560. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
  1561. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +310 -0
  1562. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
  1563. cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
  1564. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
  1565. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1566. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1567. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
  1568. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1569. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1570. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1571. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1572. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1573. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1574. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1575. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1576. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1577. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1578. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1579. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1580. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1581. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1582. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1583. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1584. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +61 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/errno.h +120 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +53 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +79 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +161 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +384 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +45 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +64 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +58 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +146 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +48 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +84 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +49 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +60 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +66 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +252 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +54 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +49 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +137 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +58 -0
  1635. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +73 -0
  1636. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +96 -0
  1637. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +45 -0
  1638. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1639. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +60 -0
  1640. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +72 -0
  1641. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +59 -0
  1642. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +64 -0
  1643. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +86 -0
  1644. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +99 -0
  1645. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +148 -0
  1646. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +49 -0
  1647. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +68 -0
  1648. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +129 -0
  1649. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +207 -0
  1650. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +43 -0
  1651. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +71 -0
  1652. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +100 -0
  1653. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +83 -0
  1654. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +186 -0
  1655. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +86 -0
  1656. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +121 -0
  1657. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +95 -0
  1658. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +175 -0
  1659. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +48 -0
  1660. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +67 -0
  1661. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +63 -0
  1662. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +126 -0
  1663. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +72 -0
  1664. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +85 -0
  1665. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +126 -0
  1666. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +232 -0
  1667. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +73 -0
  1668. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +85 -0
  1669. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +104 -0
  1670. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1671. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +282 -0
  1672. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +476 -0
  1673. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +54 -0
  1674. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +125 -0
  1675. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +113 -0
  1676. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +175 -0
  1677. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +44 -0
  1678. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +76 -0
  1679. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +41 -0
  1680. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
  1681. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +47 -0
  1682. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +54 -0
  1683. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +82 -0
  1684. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
  1685. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +50 -0
  1686. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +56 -0
  1687. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +80 -0
  1688. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +113 -0
  1689. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +45 -0
  1690. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +166 -0
  1691. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +45 -0
  1692. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +115 -0
  1693. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +71 -0
  1694. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +113 -0
  1695. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +81 -0
  1696. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +126 -0
  1697. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +117 -0
  1698. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +70 -0
  1699. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +42 -0
  1700. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
  1701. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +49 -0
  1702. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +119 -0
  1703. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +49 -0
  1704. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +71 -0
  1705. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1706. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1707. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
  1708. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +110 -0
  1709. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1710. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +62 -0
  1711. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +74 -0
  1712. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1713. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +123 -0
  1714. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1715. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +43 -0
  1716. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1717. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +141 -0
  1718. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +45 -0
  1719. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1720. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +50 -0
  1721. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +75 -0
  1722. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +145 -0
  1723. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1724. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +301 -0
  1725. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1726. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +64 -0
  1727. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +98 -0
  1728. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +179 -0
  1729. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1730. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1731. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +154 -0
  1732. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +145 -0
  1733. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1734. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1735. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +206 -0
  1736. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +59 -0
  1737. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
  1738. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +55 -0
  1739. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +356 -0
  1740. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +48 -0
  1741. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +124 -0
  1742. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +48 -0
  1743. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +586 -0
  1744. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1745. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1746. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1747. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1748. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1749. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1750. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +58 -0
  1751. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1752. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1753. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +115 -0
  1754. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +106 -0
  1755. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1756. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1757. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1758. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1759. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1760. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1761. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1762. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1763. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1764. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1765. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1766. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1767. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1768. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +127 -0
  1769. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1770. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1771. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1772. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1773. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1774. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1775. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1776. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1777. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1778. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1779. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1780. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1781. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1782. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1783. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1784. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1785. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1786. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1787. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1788. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1789. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1790. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1791. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1792. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1793. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1794. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1795. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1796. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1797. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1798. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +73 -0
  1799. cuda/cccl/headers/include/thrust/system/omp/detail/scan.inl +172 -0
  1800. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +36 -0
  1801. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1802. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1803. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1804. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1805. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
  1806. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1807. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1808. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1809. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1810. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1811. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1812. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1813. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1814. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1815. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1816. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1817. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1818. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
  1819. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1820. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1821. cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
  1822. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1823. cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
  1824. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1825. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1826. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1827. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1828. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1829. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1830. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1831. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1832. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1833. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +109 -0
  1834. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1835. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1836. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1837. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1838. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1839. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1840. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1841. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1842. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1843. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1844. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1845. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1846. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1847. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1848. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1849. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1850. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1851. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1852. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1853. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1854. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1855. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1856. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1857. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1858. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1859. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1860. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1861. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1862. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1863. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1864. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +33 -0
  1865. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1866. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1867. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1868. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1869. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1870. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1871. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1872. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1873. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1874. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1875. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1876. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1877. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1878. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1879. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1880. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1881. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1882. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
  1883. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1884. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1885. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
  1886. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1887. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1888. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1889. cuda/cccl/headers/include/thrust/transform.h +1045 -0
  1890. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1891. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1892. cuda/cccl/headers/include/thrust/tuple.h +139 -0
  1893. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1894. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
  1895. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1896. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1897. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1898. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
  1899. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1900. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
  1901. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1902. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1903. cuda/cccl/headers/include/thrust/unique.h +1088 -0
  1904. cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
  1905. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1906. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1907. cuda/cccl/headers/include/thrust/version.h +93 -0
  1908. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1909. cuda/cccl/headers/include_paths.py +51 -0
  1910. cuda/cccl/parallel/__init__.py +9 -0
  1911. cuda/cccl/parallel/experimental/__init__.py +24 -0
  1912. cuda/cccl/py.typed +0 -0
  1913. cuda/compute/__init__.py +79 -0
  1914. cuda/compute/_bindings.py +79 -0
  1915. cuda/compute/_bindings.pyi +475 -0
  1916. cuda/compute/_bindings_impl.pyx +2273 -0
  1917. cuda/compute/_caching.py +71 -0
  1918. cuda/compute/_cccl_interop.py +422 -0
  1919. cuda/compute/_utils/__init__.py +0 -0
  1920. cuda/compute/_utils/protocols.py +132 -0
  1921. cuda/compute/_utils/temp_storage_buffer.py +86 -0
  1922. cuda/compute/algorithms/__init__.py +54 -0
  1923. cuda/compute/algorithms/_histogram.py +243 -0
  1924. cuda/compute/algorithms/_merge_sort.py +225 -0
  1925. cuda/compute/algorithms/_radix_sort.py +312 -0
  1926. cuda/compute/algorithms/_reduce.py +182 -0
  1927. cuda/compute/algorithms/_scan.py +331 -0
  1928. cuda/compute/algorithms/_segmented_reduce.py +257 -0
  1929. cuda/compute/algorithms/_three_way_partition.py +261 -0
  1930. cuda/compute/algorithms/_transform.py +329 -0
  1931. cuda/compute/algorithms/_unique_by_key.py +252 -0
  1932. cuda/compute/cccl/.gitkeep +0 -0
  1933. cuda/compute/cu12/_bindings_impl.cp313-win_amd64.pyd +0 -0
  1934. cuda/compute/cu12/cccl/cccl.c.parallel.dll +0 -0
  1935. cuda/compute/cu12/cccl/cccl.c.parallel.lib +0 -0
  1936. cuda/compute/cu13/_bindings_impl.cp313-win_amd64.pyd +0 -0
  1937. cuda/compute/cu13/cccl/cccl.c.parallel.dll +0 -0
  1938. cuda/compute/cu13/cccl/cccl.c.parallel.lib +0 -0
  1939. cuda/compute/iterators/__init__.py +21 -0
  1940. cuda/compute/iterators/_factories.py +219 -0
  1941. cuda/compute/iterators/_iterators.py +817 -0
  1942. cuda/compute/iterators/_zip_iterator.py +199 -0
  1943. cuda/compute/numba_utils.py +53 -0
  1944. cuda/compute/op.py +3 -0
  1945. cuda/compute/struct.py +272 -0
  1946. cuda/compute/typing.py +37 -0
  1947. cuda/coop/__init__.py +8 -0
  1948. cuda/coop/_caching.py +48 -0
  1949. cuda/coop/_common.py +275 -0
  1950. cuda/coop/_nvrtc.py +92 -0
  1951. cuda/coop/_scan_op.py +181 -0
  1952. cuda/coop/_types.py +937 -0
  1953. cuda/coop/_typing.py +107 -0
  1954. cuda/coop/block/__init__.py +39 -0
  1955. cuda/coop/block/_block_exchange.py +251 -0
  1956. cuda/coop/block/_block_load_store.py +215 -0
  1957. cuda/coop/block/_block_merge_sort.py +125 -0
  1958. cuda/coop/block/_block_radix_sort.py +214 -0
  1959. cuda/coop/block/_block_reduce.py +294 -0
  1960. cuda/coop/block/_block_scan.py +983 -0
  1961. cuda/coop/warp/__init__.py +9 -0
  1962. cuda/coop/warp/_warp_merge_sort.py +92 -0
  1963. cuda/coop/warp/_warp_reduce.py +153 -0
  1964. cuda/coop/warp/_warp_scan.py +78 -0
  1965. cuda_cccl-0.3.3.dist-info/METADATA +41 -0
  1966. cuda_cccl-0.3.3.dist-info/RECORD +1968 -0
  1967. cuda_cccl-0.3.3.dist-info/WHEEL +5 -0
  1968. cuda_cccl-0.3.3.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,2273 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+
5
+ # Python signatures are declared in the companion Python stub file _bindings.pyi
6
+ # Make sure to update PYI with change to Python API to ensure that Python
7
+ # static type checker tools like mypy green-lights cuda.compute
8
+
9
+ from libc.string cimport memset, memcpy
10
+ from libc.stdint cimport uint8_t, uint32_t, uint64_t, int64_t, uintptr_t
11
+ from cpython.bytes cimport PyBytes_FromStringAndSize
12
+
13
+ from cpython.buffer cimport (
14
+ Py_buffer, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS,
15
+ PyBuffer_Release, PyObject_CheckBuffer, PyObject_GetBuffer
16
+ )
17
+ from cpython.pycapsule cimport (
18
+ PyCapsule_CheckExact, PyCapsule_IsValid, PyCapsule_GetPointer
19
+ )
20
+
21
+ import ctypes
22
+ from enum import IntEnum
23
+ cdef extern from "<cuda.h>":
24
+ cdef struct OpaqueCUstream_st
25
+ cdef struct OpaqueCUkernel_st
26
+ cdef struct OpaqueCUlibrary_st
27
+
28
+ ctypedef int CUresult
29
+ ctypedef OpaqueCUstream_st *CUstream
30
+ ctypedef OpaqueCUkernel_st *CUkernel
31
+ ctypedef OpaqueCUlibrary_st *CUlibrary
32
+
33
+
34
+ cdef extern from "cccl/c/types.h":
35
+ cpdef enum cccl_type_enum:
36
+ INT8 "CCCL_INT8"
37
+ INT16 "CCCL_INT16"
38
+ INT32 "CCCL_INT32"
39
+ INT64 "CCCL_INT64"
40
+ UINT8 "CCCL_UINT8"
41
+ UINT16 "CCCL_UINT16"
42
+ UINT32 "CCCL_UINT32"
43
+ UINT64 "CCCL_UINT64"
44
+ FLOAT16 "CCCL_FLOAT16"
45
+ FLOAT32 "CCCL_FLOAT32"
46
+ FLOAT64 "CCCL_FLOAT64"
47
+ STORAGE "CCCL_STORAGE"
48
+ BOOLEAN "CCCL_BOOLEAN"
49
+
50
+ cpdef enum cccl_op_kind_t:
51
+ STATELESS "CCCL_STATELESS"
52
+ STATEFUL "CCCL_STATEFUL"
53
+ PLUS "CCCL_PLUS"
54
+ MINUS "CCCL_MINUS"
55
+ MULTIPLIES "CCCL_MULTIPLIES"
56
+ DIVIDES "CCCL_DIVIDES"
57
+ MODULUS "CCCL_MODULUS"
58
+ EQUAL_TO "CCCL_EQUAL_TO"
59
+ NOT_EQUAL_TO "CCCL_NOT_EQUAL_TO"
60
+ GREATER "CCCL_GREATER"
61
+ LESS "CCCL_LESS"
62
+ GREATER_EQUAL "CCCL_GREATER_EQUAL"
63
+ LESS_EQUAL "CCCL_LESS_EQUAL"
64
+ LOGICAL_AND "CCCL_LOGICAL_AND"
65
+ LOGICAL_OR "CCCL_LOGICAL_OR"
66
+ LOGICAL_NOT "CCCL_LOGICAL_NOT"
67
+ BIT_AND "CCCL_BIT_AND"
68
+ BIT_OR "CCCL_BIT_OR"
69
+ BIT_XOR "CCCL_BIT_XOR"
70
+ BIT_NOT "CCCL_BIT_NOT"
71
+ IDENTITY "CCCL_IDENTITY"
72
+ NEGATE "CCCL_NEGATE"
73
+ MINIMUM "CCCL_MINIMUM"
74
+ MAXIMUM "CCCL_MAXIMUM"
75
+
76
+ cpdef enum cccl_iterator_kind_t:
77
+ POINTER "CCCL_POINTER"
78
+ ITERATOR "CCCL_ITERATOR"
79
+
80
+ cdef struct cccl_type_info:
81
+ size_t size
82
+ size_t alignment
83
+ cccl_type_enum type
84
+
85
+ cdef enum cccl_op_code_type:
86
+ CCCL_OP_LTOIR
87
+ CCCL_OP_CPP_SOURCE
88
+
89
+ cdef struct cccl_op_t:
90
+ cccl_op_kind_t type
91
+ const char* name
92
+ const char* code
93
+ size_t code_size
94
+ cccl_op_code_type code_type
95
+ size_t size
96
+ size_t alignment
97
+ void *state
98
+
99
+ cdef struct cccl_value_t:
100
+ cccl_type_info type
101
+ void *state
102
+
103
+ cdef union cccl_increment_t:
104
+ int64_t signed_offset
105
+ uint64_t unsigned_offset
106
+
107
+ ctypedef void (*cccl_host_op_fn_ptr_t)(void *, cccl_increment_t) nogil
108
+
109
+ cdef struct cccl_iterator_t:
110
+ size_t size
111
+ size_t alignment
112
+ cccl_iterator_kind_t type
113
+ cccl_op_t advance
114
+ cccl_op_t dereference
115
+ cccl_type_info value_type
116
+ void *state
117
+ cccl_host_op_fn_ptr_t host_advance
118
+
119
+ cpdef enum cccl_sort_order_t:
120
+ ASCENDING "CCCL_ASCENDING"
121
+ DESCENDING "CCCL_DESCENDING"
122
+
123
+ cpdef enum cccl_init_kind_t:
124
+ VALUE_INIT "CCCL_VALUE_INIT"
125
+ FUTURE_VALUE_INIT "CCCL_FUTURE_VALUE_INIT"
126
+ NO_INIT "CCCL_NO_INIT"
127
+
128
+ cdef void arg_type_check(
129
+ str arg_name,
130
+ object expected_type,
131
+ object arg
132
+ ) except *:
133
+ if not isinstance(arg, expected_type):
134
+ raise TypeError(
135
+ f"Expected {arg_name} to have type '{expected_type}', "
136
+ f"got '{type(arg)}'"
137
+ )
138
+
139
+ OpKind = cccl_op_kind_t
140
+ TypeEnum = cccl_type_enum
141
+ IteratorKind = cccl_iterator_kind_t
142
+ SortOrder = cccl_sort_order_t
143
+ InitKind = cccl_init_kind_t
144
+
145
+ cdef void _validate_alignment(int alignment) except *:
146
+ """
147
+ Alignment must be positive integer and a power of two
148
+ that can be represented by uint32_t type.
149
+ """
150
+ cdef uint32_t val
151
+ if alignment < 1:
152
+ raise ValueError(
153
+ "Alignment must be non-negative, "
154
+ f"got {alignment}."
155
+ )
156
+ val = <uint32_t>alignment
157
+ if (val & (val - 1)) != 0:
158
+ raise ValueError(
159
+ "Alignment must be a power of two, "
160
+ f"got {alignment}"
161
+ )
162
+
163
+
164
+ cdef class Op:
165
+ """
166
+ Represents CCCL Operation
167
+
168
+ Args:
169
+ name (str):
170
+ Name of the operation
171
+ operator_type (OpKind):
172
+ Whether operator is stateless or stateful
173
+ ltoir (bytes):
174
+ The LTOIR for the operation compiled for device
175
+ state (bytes, optional):
176
+ State for the stateful operation.
177
+ state_alignment (int, optional):
178
+ Alignment of the state struct. Default: `1`.
179
+ """
180
+ # need Python owner of memory used for operator name
181
+ cdef bytes op_encoded_name
182
+ cdef bytes code_bytes
183
+ cdef bytes state_bytes
184
+ cdef cccl_op_t op_data
185
+
186
+
187
+ cdef void _set_members(self, cccl_op_kind_t op_type, str name, bytes lto_ir, bytes state, int state_alignment):
188
+ memset(&self.op_data, 0, sizeof(cccl_op_t))
189
+ # Reference Python objects in the class to ensure lifetime
190
+ self.op_encoded_name = name.encode("utf-8")
191
+ self.code_bytes = lto_ir
192
+ self.state_bytes = state
193
+ # set fields of op_data struct
194
+ self.op_data.type = op_type
195
+ self.op_data.name = <const char *>self.op_encoded_name
196
+ self.op_data.code = <const char *>lto_ir
197
+ self.op_data.code_size = len(lto_ir)
198
+ self.op_data.code_type = cccl_op_code_type.CCCL_OP_LTOIR
199
+ self.op_data.size = len(state)
200
+ self.op_data.alignment = state_alignment
201
+ self.op_data.state = <void *><const char *>state
202
+
203
+
204
+ def __cinit__(self, /, *, name = None, operator_type = None, ltoir = None, state = None, state_alignment = 1):
205
+ if name is None and ltoir is None:
206
+ name = ""
207
+ ltoir = b""
208
+ if state is None:
209
+ state = b""
210
+ if operator_type is None:
211
+ operator_type = OpKind.STATELESS
212
+ arg_type_check(arg_name="name", expected_type=str, arg=name)
213
+ arg_type_check(arg_name="ltoir", expected_type=bytes, arg=ltoir)
214
+ arg_type_check(arg_name="state", expected_type=bytes, arg=state)
215
+ arg_type_check(arg_name="state_alignment", expected_type=int, arg=state_alignment)
216
+ if not isinstance(operator_type, OpKind):
217
+ raise TypeError(
218
+ f"The operator_type argument should be an enumerator of operator kinds"
219
+ )
220
+ _validate_alignment(state_alignment)
221
+ self._set_members(
222
+ <cccl_op_kind_t> operator_type.value,
223
+ <str> name,
224
+ <bytes> ltoir,
225
+ <bytes> state,
226
+ <int> state_alignment
227
+ )
228
+
229
+
230
+ cdef void set_state(self, bytes state):
231
+ self.state_bytes = state
232
+ self.op_data.state = <void *><const char *>state
233
+
234
+ @property
235
+ def state(self):
236
+ return self.state_bytes
237
+
238
+ @state.setter
239
+ def state(self, bytes new_value):
240
+ self.set_state(<bytes>new_value)
241
+
242
+ @property
243
+ def name(self):
244
+ return self.op_encoded_name.decode("utf-8")
245
+
246
+ @property
247
+ def ltoir(self):
248
+ # Backward compatibility property
249
+ return self.code_bytes
250
+
251
+ @property
252
+ def code(self):
253
+ return self.code_bytes
254
+
255
+ @property
256
+ def state_alignment(self):
257
+ return self.op_data.alignment
258
+
259
+ @property
260
+ def state_typenum(self):
261
+ return self.op_data.type
262
+
263
+ def as_bytes(self):
264
+ "Debugging utility to view memory content of library struct"
265
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.op_data))
266
+ memcpy(&mem_view[0], &self.op_data, sizeof(self.op_data))
267
+ return bytes(mem_view)
268
+
269
+
270
+ cdef class TypeInfo:
271
+ """
272
+ Represents CCCL type info structure
273
+
274
+ Args:
275
+ size (int):
276
+ Size of the type in bytes.
277
+ alignment (int):
278
+ Alignment of the type in bytes.
279
+ type_enum (TypeEnum):
280
+ Enumeration member identifying the type.
281
+ """
282
+ cdef cccl_type_info type_info
283
+
284
+ def __cinit__(self, int size, int alignment, cccl_type_enum type_enum):
285
+ if size < 1:
286
+ raise ValueError("Size argument must be positive")
287
+ _validate_alignment(alignment)
288
+ self.type_info.size = size
289
+ self.type_info.alignment = alignment
290
+ self.type_info.type = type_enum
291
+
292
+ @property
293
+ def size(self):
294
+ return self.type_info.size
295
+
296
+ @property
297
+ def alignment(self):
298
+ return self.type_info.alignment
299
+
300
+ @property
301
+ def typenum(self):
302
+ return self.type_info.type
303
+
304
+ def as_bytes(self):
305
+ "Debugging utility to view memory content of library struct"
306
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.type_info))
307
+ memcpy(&mem_view[0], &self.type_info, sizeof(self.type_info))
308
+ return bytes(mem_view)
309
+
310
+
311
+ cdef class Value:
312
+ """
313
+ Represents CCCL value structure
314
+
315
+ Args:
316
+ value_type (TypeInfo):
317
+ type descriptor
318
+ state (object):
319
+ state of the value type. Object is expected to
320
+ implement Python buffer protocol and be able to provide
321
+ simple contiguous array of type `uint8_t`.
322
+ """
323
+ cdef uint8_t[::1] state_obj
324
+ cdef TypeInfo value_type
325
+ cdef cccl_value_t value_data;
326
+
327
+ def __cinit__(self, TypeInfo value_type, uint8_t[::1] state):
328
+ self.state_obj = state
329
+ self.value_type = value_type
330
+ self.value_data.type = value_type.type_info
331
+ self.value_data.state = <void *>&state[0]
332
+
333
+ @property
334
+ def type(self):
335
+ return self.value_type
336
+
337
+ @property
338
+ def state(self):
339
+ return self.state_obj
340
+
341
+ @state.setter
342
+ def state(self, uint8_t[::1] new_value):
343
+ if (len(self.state_obj) == len(new_value)):
344
+ self.state_obj = new_value
345
+ self.value_data.state = <void *>&self.state_obj[0]
346
+ else:
347
+ raise ValueError("Size mismatch")
348
+
349
+ def as_bytes(self):
350
+ "Debugging utility to view memory of native struct"
351
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.value_data))
352
+ memcpy(&mem_view[0], &self.value_data, sizeof(self.value_data))
353
+ return bytes(mem_view)
354
+
355
+
356
+ cdef void ensure_buffer(object o) except *:
357
+ if not PyObject_CheckBuffer(o):
358
+ raise TypeError(
359
+ "Object with buffer protocol expected, "
360
+ f"got {type(o)}"
361
+ )
362
+
363
+
364
+ cdef void * get_buffer_pointer(object o, size_t *size):
365
+ cdef int status = 0
366
+ cdef void *ptr = NULL
367
+ cdef Py_buffer view
368
+
369
+ status = PyObject_GetBuffer(o, &view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
370
+ if status != 0: # pragma: no cover
371
+ size[0] = 0
372
+ raise RuntimeError(
373
+ "Can not access simple contiguous buffer"
374
+ )
375
+
376
+ ptr = view.buf
377
+ if size is not NULL:
378
+ size[0] = <size_t>view.len
379
+ PyBuffer_Release(&view)
380
+
381
+ return ptr
382
+
383
+
384
+ cdef void * ctypes_typed_pointer_payload_ptr(object ctypes_typed_ptr):
385
+ "Get pointer to the value buffer represented by ctypes.pointer(ctypes_val)"
386
+ cdef size_t size = 0
387
+ cdef size_t *ptr_ref = NULL
388
+ ensure_buffer(ctypes_typed_ptr)
389
+ ptr_ref = <size_t *>get_buffer_pointer(ctypes_typed_ptr, &size)
390
+ return <void *>(ptr_ref[0])
391
+
392
+
393
+ cdef void * ctypes_value_ptr(object ctypes_cdata):
394
+ "Get pointer to the value buffer behind ctypes_val"
395
+ cdef size_t size = 0
396
+ ensure_buffer(ctypes_cdata)
397
+ return get_buffer_pointer(ctypes_cdata, &size)
398
+
399
+
400
+ cdef inline void * int_as_ptr(size_t ptr_val):
401
+ return <void *>(ptr_val)
402
+
403
+
404
+ cdef class StateBase:
405
+ cdef void *ptr
406
+ cdef object ref
407
+
408
+ def __cinit__(self):
409
+ self.ptr = NULL
410
+ self.ref = None
411
+
412
+ cdef inline void set_state(self, void *ptr, object ref):
413
+ self.ptr = ptr
414
+ self.ref = ref
415
+
416
+ @property
417
+ def pointer(self):
418
+ return <size_t>self.ptr
419
+
420
+ @property
421
+ def reference(self):
422
+ return self.ref
423
+
424
+
425
+ cdef class Pointer(StateBase):
426
+ "Represents the pointer value"
427
+
428
+ def __cinit__(self, arg):
429
+ cdef void *ptr
430
+ cdef object ref
431
+
432
+ if isinstance(arg, int):
433
+ ptr = int_as_ptr(arg)
434
+ ref = None
435
+ elif isinstance(arg, ctypes._Pointer):
436
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
437
+ ref = arg
438
+ elif isinstance(arg, ctypes.c_void_p):
439
+ ptr = int_as_ptr(arg.value)
440
+ ref = arg
441
+ else:
442
+ raise TypeError(
443
+ "Expect ctypes pointer, integers, or PointerProxy, "
444
+ f"got type {type(arg)}"
445
+ )
446
+ self.set_state(ptr, ref)
447
+
448
+
449
+ def make_pointer_object(ptr, owner):
450
+ cdef Pointer res = Pointer(0)
451
+
452
+ if isinstance(ptr, int):
453
+ res.ptr = int_as_ptr(ptr)
454
+ elif isinstance(ptr, ctypes.c_void_p):
455
+ res.ptr = int_as_ptr(ptr.value)
456
+ else:
457
+ raise TypeError(
458
+ "First argument must be an integer, or ctypes.c_void_p, "
459
+ f"got {type(ptr)}"
460
+ )
461
+ res.ref = owner
462
+ return res
463
+
464
+
465
+ cdef class IteratorState(StateBase):
466
+ "Represents blob referenced by pointer"
467
+ cdef size_t state_nbytes
468
+
469
+ def __cinit__(self, arg):
470
+ cdef size_t buffer_size = 0
471
+ cdef void *ptr = NULL
472
+ cdef object ref = None
473
+
474
+ super().__init__()
475
+ if isinstance(arg, ctypes._Pointer):
476
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
477
+ ref = arg.contents
478
+ self.state_nbytes = ctypes.sizeof(ref)
479
+ elif PyObject_CheckBuffer(arg):
480
+ ptr = get_buffer_pointer(arg, &buffer_size)
481
+ ref = arg
482
+ self.state_nbytes = buffer_size
483
+ else:
484
+ raise TypeError(
485
+ "Expected a ctypes pointer with content, or object of type bytes or bytearray, "
486
+ f"got type {type(arg)}"
487
+ )
488
+ self.set_state(ptr, ref)
489
+
490
+ cdef inline size_t get_size(self):
491
+ return self.state_nbytes
492
+
493
+ @property
494
+ def size(self):
495
+ return self.state_nbytes
496
+
497
+ def __getbuffer__(self, Py_buffer *buffer, int flags):
498
+ cdef Py_ssize_t cast_size = <Py_ssize_t>self.state_nbytes
499
+ buffer.buf = <char *>self.ptr
500
+ buffer.obj = self
501
+ buffer.len = cast_size
502
+ buffer.readonly = 0
503
+ buffer.itemsize = 1
504
+ buffer.format = "B" # unsigned char
505
+ buffer.ndim = 1
506
+ buffer.shape = <Py_ssize_t *>&self.state_nbytes
507
+ buffer.strides = &buffer.itemsize
508
+ buffer.suboffsets = NULL
509
+ buffer.internal = NULL
510
+
511
+ def __releasebuffer__(self, Py_buffer *buffer):
512
+ pass
513
+
514
+
515
+ cdef const char *function_ptr_capsule_name = "void (void *, cccl_increment_t)";
516
+
517
+ cdef bint is_function_pointer_capsule(object o) noexcept:
518
+ """
519
+ Returns non-zero if input is a valid capsule with
520
+ name 'void (void *, cccl_increment_t)'.
521
+ """
522
+ return (
523
+ PyCapsule_CheckExact(o) and
524
+ PyCapsule_IsValid(o, function_ptr_capsule_name)
525
+ )
526
+
527
+
528
+ cdef inline void* get_function_pointer_from_capsule(object cap) except *:
529
+ return PyCapsule_GetPointer(cap, function_ptr_capsule_name)
530
+
531
+
532
+ cdef cccl_host_op_fn_ptr_t unbox_host_advance_fn(object host_fn_obj) except *:
533
+ cdef void *fn_ptr = NULL
534
+ if isinstance(host_fn_obj, ctypes._CFuncPtr):
535
+ # the _CFuncPtr object encapsulates a pointer to the function pointer
536
+ fn_ptr = ctypes_typed_pointer_payload_ptr(host_fn_obj)
537
+ return <cccl_host_op_fn_ptr_t>fn_ptr
538
+
539
+ if isinstance(host_fn_obj, int):
540
+ fn_ptr = <void *><uintptr_t>host_fn_obj
541
+ return <cccl_host_op_fn_ptr_t>fn_ptr
542
+
543
+ if isinstance(host_fn_obj, ctypes.c_void_p):
544
+ fn_ptr = <void *><uintptr_t>host_fn_obj.value
545
+ return <cccl_host_op_fn_ptr_t>fn_ptr
546
+
547
+ if is_function_pointer_capsule(host_fn_obj):
548
+ fn_ptr = get_function_pointer_from_capsule(host_fn_obj)
549
+ return <cccl_host_op_fn_ptr_t>fn_ptr
550
+
551
+ raise TypeError(
552
+ "Expected ctypes function pointer, ctypes.c_void_p, integer or a named capsule, "
553
+ f"got {type(host_fn_obj)}"
554
+ )
555
+
556
+
557
+ cdef class Iterator:
558
+ """
559
+ Represents CCCL iterator.
560
+
561
+ Args:
562
+ alignment (int):
563
+ Alignment of the iterator state
564
+ iterator_type (IteratorKind):
565
+ The type of iterator, `IteratorKind.POINTER` or
566
+ `IteratorKind.ITERATOR`
567
+ advance_fn (Op):
568
+ Descriptor for user-defined `advance` function
569
+ compiled for device
570
+ dereference_fn (Op):
571
+ Descriptor for user-defined `dereference` or `assign`
572
+ function compiled for device
573
+ value_type (TypeInfo):
574
+ Descriptor of the type addressed by the iterator
575
+ state (object, optional):
576
+ Python object for the state of the iterator. For iterators of
577
+ type `ITERATOR` the state object is expected to implement Python
578
+ buffer protocol for SIMPLE 1d buffer of type unsigned byte.
579
+ For iterators of type `POINTER` the state may be an integer convertible
580
+ to `uintptr_t`, or a `ctypes` pointer (typed or untyped).
581
+ Value `None` represents absence of iterator state.
582
+ host_advance_fn (object, optional):
583
+ Python object for host callable function to advance state by a given
584
+ increment. The argument may only be set for iterators of type
585
+ `IteratorKind.ITERATOR` and raise an exception otherwise. Supported
586
+ types are `int` or `ctypes.c_void_p` (raw pointer), ctypes function
587
+ pointer, or a Python capsule with name `"void *(void *, cccl_increment_t)"`.
588
+ """
589
+ cdef Op advance
590
+ cdef Op dereference
591
+ cdef object state_obj
592
+ cdef object host_advance_obj
593
+ cdef cccl_iterator_t iter_data
594
+
595
+ def __cinit__(self,
596
+ int alignment,
597
+ cccl_iterator_kind_t iterator_type,
598
+ Op advance_fn,
599
+ Op dereference_fn,
600
+ TypeInfo value_type,
601
+ state=None,
602
+ host_advance_fn=None
603
+ ):
604
+ cdef cccl_iterator_kind_t it_kind
605
+ _validate_alignment(alignment)
606
+ it_kind = iterator_type
607
+ if it_kind == cccl_iterator_kind_t.POINTER:
608
+ if state is None:
609
+ self.state_obj = None
610
+ self.iter_data.size = 0
611
+ self.iter_data.state = NULL
612
+ elif isinstance(state, int):
613
+ self.state_obj = None
614
+ self.iter_data.size = 0
615
+ self.iter_data.state = int_as_ptr(state)
616
+ elif isinstance(state, Pointer):
617
+ self.state_obj = state.reference
618
+ self.iter_data.size = 0
619
+ self.iter_data.state = (<Pointer>state).ptr
620
+ else:
621
+ raise TypeError(
622
+ "Expect for Iterator of kind POINTER, state must have type Pointer or int, "
623
+ f"got {type(state)}"
624
+ )
625
+ if host_advance_fn is not None:
626
+ raise ValueError(
627
+ "host_advance_fn must be set to None for iterators of kind POINTER"
628
+ )
629
+ self.iter_data.host_advance = NULL
630
+ self.host_advance_obj = None
631
+ elif it_kind == cccl_iterator_kind_t.ITERATOR:
632
+ if state is None:
633
+ self.state_obj = None
634
+ self.iter_data.size = 0
635
+ self.iter_data.state = NULL
636
+ elif isinstance(state, IteratorState):
637
+ self.state_obj = state.reference
638
+ self.iter_data.size = (<IteratorState>state).size
639
+ self.iter_data.state = (<IteratorState>state).ptr
640
+ else:
641
+ raise TypeError(
642
+ "For Iterator of kind ITERATOR, state must have type IteratorState, "
643
+ f"got type {type(state)}"
644
+ )
645
+ if host_advance_fn is not None:
646
+ self.iter_data.host_advance = unbox_host_advance_fn(host_advance_fn)
647
+ self.host_advance_obj = host_advance_fn
648
+ else:
649
+ self.iter_data.host_advance = NULL
650
+ self.host_advance_obj = None
651
+ else: # pragma: no cover
652
+ raise ValueError("Unrecognized iterator kind")
653
+ self.advance = advance_fn
654
+ self.dereference = dereference_fn
655
+ self.iter_data.alignment = alignment
656
+ self.iter_data.type = <cccl_iterator_kind_t> it_kind
657
+ self.iter_data.advance = self.advance.op_data
658
+ self.iter_data.dereference = self.dereference.op_data
659
+ self.iter_data.value_type = value_type.type_info
660
+
661
+ @property
662
+ def advance_op(self):
663
+ return self.advance
664
+
665
+ @property
666
+ def dereference_or_assign_op(self):
667
+ return self.dereference
668
+
669
+ @property
670
+ def state(self):
671
+ if self.iter_data.type == cccl_iterator_kind_t.POINTER:
672
+ return <size_t>self.iter_data.state
673
+ else:
674
+ return self.state_obj
675
+
676
+ @state.setter
677
+ def state(self, new_value):
678
+ cdef ssize_t state_sz = 0
679
+ cdef size_t ptr = 0
680
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
681
+ if it_kind == cccl_iterator_kind_t.POINTER:
682
+ if isinstance(new_value, Pointer):
683
+ self.state_obj = (<Pointer>new_value).ref
684
+ self.iter_data.size = state_sz
685
+ self.iter_data.state = (<Pointer>new_value).ptr
686
+ elif isinstance(new_value, int):
687
+ self.state_obj = None
688
+ self.iter_data.size = state_sz
689
+ self.iter_data.state = int_as_ptr(new_value)
690
+ elif new_value is None:
691
+ self.state_obj = None
692
+ self.iter_data.size = 0
693
+ self.iter_data.state = NULL
694
+ else:
695
+ raise TypeError(
696
+ "For iterator with type POINTER, state value must have type int or type Pointer, "
697
+ f"got type {type(new_value)}"
698
+ )
699
+ elif it_kind == cccl_iterator_kind_t.ITERATOR:
700
+ if isinstance(new_value, IteratorState):
701
+ self.state_obj = new_value.reference
702
+ self.iter_data.size = (<IteratorState>new_value).size
703
+ self.iter_data.state = (<IteratorState>new_value).ptr
704
+ elif isinstance(new_value, Pointer):
705
+ self.state_obj = new_value.reference
706
+ if self.iter_data.size == 0:
707
+ raise ValueError("Assigning incomplete state value to iterator without state size information")
708
+ self.iter_data.state = (<Pointer>new_value).ptr
709
+ elif PyObject_CheckBuffer(new_value):
710
+ self.iter_data.state = get_buffer_pointer(new_value, &self.iter_data.size)
711
+ self.state_obj = new_value
712
+ elif new_value is None:
713
+ self.state_obj = None
714
+ self.iter_data.size = 0
715
+ self.iter_data.state = NULL
716
+ else:
717
+ raise TypeError(
718
+ "For iterator with type ITERATOR, state value must have type IteratorState or type bytes, "
719
+ f"got type {type(new_value)}"
720
+ )
721
+ else:
722
+ raise TypeError("The new value should be an integer for iterators of POINTER kind, and bytes for ITERATOR kind")
723
+
724
+ @property
725
+ def type(self):
726
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
727
+ if it_kind == cccl_iterator_kind_t.POINTER:
728
+ return IteratorKind.POINTER
729
+ else:
730
+ return IteratorKind.ITERATOR
731
+
732
+ @property
733
+ def value_type(self):
734
+ cdef cccl_type_info type_info = self.iter_data.value_type
735
+ return TypeInfo(type_info.size, type_info.alignment, type_info.type)
736
+
737
+ def is_kind_pointer(self):
738
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
739
+ return (it_kind == cccl_iterator_kind_t.POINTER)
740
+
741
+ def is_kind_iterator(self):
742
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
743
+ return (it_kind == cccl_iterator_kind_t.ITERATOR)
744
+
745
+ def as_bytes(self):
746
+ "Debugging ulitity to get memory view into library struct"
747
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.iter_data))
748
+ memcpy(&mem_view[0], &self.iter_data, sizeof(self.iter_data))
749
+ return bytes(mem_view)
750
+
751
+ @property
752
+ def host_advance_fn(self):
753
+ return self.host_advance_obj
754
+
755
+ @host_advance_fn.setter
756
+ def host_advance_fn(self, func):
757
+ if (self.iter_data.type == cccl_iterator_kind_t.ITERATOR):
758
+ if func is not None:
759
+ self.iter_data.host_advance = unbox_host_advance_fn(func)
760
+ self.host_advance_obj = func
761
+ else:
762
+ self.iter_data.host_advance = NULL
763
+ self.host_advance_obj = None
764
+ else:
765
+ raise ValueError
766
+
767
+
768
+ cdef class CommonData:
769
+ cdef int cc_major
770
+ cdef int cc_minor
771
+ cdef bytes encoded_cub_path
772
+ cdef bytes encoded_thrust_path
773
+ cdef bytes encoded_libcudacxx_path
774
+ cdef bytes encoded_ctk_path
775
+
776
+ def __cinit__(self, int cc_major, int cc_minor, str cub_path, str thrust_path, str libcudacxx_path, str ctk_path):
777
+ self.cc_major = cc_major
778
+ self.cc_minor = cc_minor
779
+ self.encoded_cub_path = cub_path.encode("utf-8")
780
+ self.encoded_thrust_path = thrust_path.encode("utf-8")
781
+ self.encoded_libcudacxx_path = libcudacxx_path.encode("utf-8")
782
+ self.encoded_ctk_path = ctk_path.encode("utf-8")
783
+
784
+ cdef inline int get_cc_major(self):
785
+ return self.cc_major
786
+
787
+ cdef inline int get_cc_minor(self):
788
+ return self.cc_minor
789
+
790
+ cdef inline const char * cub_path_get_c_str(self):
791
+ return <const char *>self.encoded_cub_path if self.encoded_cub_path else NULL
792
+
793
+ cdef inline const char * thrust_path_get_c_str(self):
794
+ return <const char *>self.encoded_thrust_path if self.encoded_thrust_path else NULL
795
+
796
+ cdef inline const char * libcudacxx_path_get_c_str(self):
797
+ return <const char *>self.encoded_libcudacxx_path if self.encoded_libcudacxx_path else NULL
798
+
799
+ cdef inline const char * ctk_path_get_c_str(self):
800
+ return <const char *>self.encoded_ctk_path if self.encoded_ctk_path else NULL
801
+
802
+ @property
803
+ def compute_capability(self):
804
+ return (self.cc_major, self.cc_minor)
805
+
806
+ @property
807
+ def cub_path(self):
808
+ return self.encoded_cub_path.decode("utf-8")
809
+
810
+ @property
811
+ def ctk_path(self):
812
+ return self.encoded_ctk_path.decode("utf-8")
813
+
814
+ @property
815
+ def thrust_path(self):
816
+ return self.encoded_thrust_path.decode("utf-8")
817
+
818
+ @property
819
+ def libcudacxx_path(self):
820
+ return self.encoded_libcudacxx_path.decode("utf-8")
821
+
822
+ # --------------
823
+ # DeviceReduce
824
+ # --------------
825
+
826
+ cdef extern from "cccl/c/reduce.h":
827
+ cdef struct cccl_device_reduce_build_result_t 'cccl_device_reduce_build_result_t':
828
+ const char* cubin
829
+ size_t cubin_size
830
+
831
+ cdef CUresult cccl_device_reduce_build(
832
+ cccl_device_reduce_build_result_t*,
833
+ cccl_iterator_t,
834
+ cccl_iterator_t,
835
+ cccl_op_t,
836
+ cccl_value_t,
837
+ int, int, const char*, const char*, const char*, const char*
838
+ ) nogil
839
+
840
+ cdef CUresult cccl_device_reduce(
841
+ cccl_device_reduce_build_result_t,
842
+ void *,
843
+ size_t *,
844
+ cccl_iterator_t,
845
+ cccl_iterator_t,
846
+ uint64_t,
847
+ cccl_op_t,
848
+ cccl_value_t,
849
+ CUstream
850
+ ) nogil
851
+
852
+ cdef CUresult cccl_device_reduce_cleanup(
853
+ cccl_device_reduce_build_result_t*
854
+ ) nogil
855
+
856
+
857
+ cdef class DeviceReduceBuildResult:
858
+ cdef cccl_device_reduce_build_result_t build_data
859
+
860
+ def __cinit__(
861
+ DeviceReduceBuildResult self,
862
+ Iterator d_in,
863
+ Iterator d_out,
864
+ Op op,
865
+ Value h_init,
866
+ CommonData common_data
867
+ ):
868
+ cdef CUresult status = -1
869
+ cdef int cc_major = common_data.get_cc_major()
870
+ cdef int cc_minor = common_data.get_cc_minor()
871
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
872
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
873
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
874
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
875
+ memset(&self.build_data, 0, sizeof(cccl_device_reduce_build_result_t))
876
+
877
+ with nogil:
878
+ status = cccl_device_reduce_build(
879
+ &self.build_data,
880
+ d_in.iter_data,
881
+ d_out.iter_data,
882
+ op.op_data,
883
+ h_init.value_data,
884
+ cc_major,
885
+ cc_minor,
886
+ cub_path,
887
+ thrust_path,
888
+ libcudacxx_path,
889
+ ctk_path,
890
+ )
891
+ if status != 0:
892
+ raise RuntimeError(
893
+ f"Failed building reduce, error code: {status}"
894
+ )
895
+
896
+ def __dealloc__(DeviceReduceBuildResult self):
897
+ cdef CUresult status = -1
898
+ with nogil:
899
+ status = cccl_device_reduce_cleanup(&self.build_data)
900
+ if (status != 0):
901
+ print(f"Return code {status} encountered during reduce result cleanup")
902
+
903
+ cpdef int compute(
904
+ DeviceReduceBuildResult self,
905
+ temp_storage_ptr,
906
+ temp_storage_bytes,
907
+ Iterator d_in,
908
+ Iterator d_out,
909
+ size_t num_items,
910
+ Op op,
911
+ Value h_init,
912
+ stream
913
+ ):
914
+ cdef CUresult status = -1
915
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
916
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
917
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
918
+
919
+ with nogil:
920
+ status = cccl_device_reduce(
921
+ self.build_data,
922
+ storage_ptr,
923
+ &storage_sz,
924
+ d_in.iter_data,
925
+ d_out.iter_data,
926
+ <uint64_t>num_items,
927
+ op.op_data,
928
+ h_init.value_data,
929
+ c_stream
930
+ )
931
+ if status != 0:
932
+ raise RuntimeError(
933
+ f"Failed executing reduce, error code: {status}"
934
+ )
935
+ return storage_sz
936
+
937
+ def _get_cubin(self):
938
+ return PyBytes_FromStringAndSize(
939
+ <const char*>self.build_data.cubin,
940
+ self.build_data.cubin_size
941
+ )
942
+
943
+ # ------------
944
+ # DeviceScan
945
+ # ------------
946
+
947
+
948
+ cdef extern from "cccl/c/scan.h":
949
+ ctypedef bint _Bool
950
+
951
+ cdef struct cccl_device_scan_build_result_t 'cccl_device_scan_build_result_t':
952
+ const char* cubin
953
+ size_t cubin_size
954
+
955
+ cdef CUresult cccl_device_scan_build(
956
+ cccl_device_scan_build_result_t*,
957
+ cccl_iterator_t,
958
+ cccl_iterator_t,
959
+ cccl_op_t,
960
+ cccl_type_info,
961
+ _Bool,
962
+ cccl_init_kind_t,
963
+ int, int, const char*, const char*, const char*, const char*
964
+ ) nogil
965
+
966
+ cdef CUresult cccl_device_exclusive_scan(
967
+ cccl_device_scan_build_result_t,
968
+ void *,
969
+ size_t *,
970
+ cccl_iterator_t,
971
+ cccl_iterator_t,
972
+ uint64_t,
973
+ cccl_op_t,
974
+ cccl_value_t,
975
+ CUstream
976
+ ) nogil
977
+
978
+ cdef CUresult cccl_device_inclusive_scan(
979
+ cccl_device_scan_build_result_t,
980
+ void *,
981
+ size_t *,
982
+ cccl_iterator_t,
983
+ cccl_iterator_t,
984
+ uint64_t,
985
+ cccl_op_t,
986
+ cccl_value_t,
987
+ CUstream
988
+ ) nogil
989
+
990
+ cdef CUresult cccl_device_exclusive_scan_future_value(
991
+ cccl_device_scan_build_result_t,
992
+ void *,
993
+ size_t *,
994
+ cccl_iterator_t,
995
+ cccl_iterator_t,
996
+ uint64_t,
997
+ cccl_op_t,
998
+ cccl_iterator_t,
999
+ CUstream
1000
+ ) nogil
1001
+
1002
+ cdef CUresult cccl_device_inclusive_scan_future_value(
1003
+ cccl_device_scan_build_result_t,
1004
+ void *,
1005
+ size_t *,
1006
+ cccl_iterator_t,
1007
+ cccl_iterator_t,
1008
+ uint64_t,
1009
+ cccl_op_t,
1010
+ cccl_iterator_t,
1011
+ CUstream
1012
+ ) nogil
1013
+
1014
+ cdef CUresult cccl_device_inclusive_scan_no_init(
1015
+ cccl_device_scan_build_result_t,
1016
+ void *,
1017
+ size_t *,
1018
+ cccl_iterator_t,
1019
+ cccl_iterator_t,
1020
+ uint64_t,
1021
+ cccl_op_t,
1022
+ CUstream
1023
+ ) nogil
1024
+
1025
+ cdef CUresult cccl_device_scan_cleanup(
1026
+ cccl_device_scan_build_result_t*
1027
+ ) nogil
1028
+
1029
+
1030
+ cdef class DeviceScanBuildResult:
1031
+ cdef cccl_device_scan_build_result_t build_data
1032
+
1033
+ def __cinit__(
1034
+ DeviceScanBuildResult self,
1035
+ Iterator d_in,
1036
+ Iterator d_out,
1037
+ Op op,
1038
+ TypeInfo init_type,
1039
+ bint force_inclusive,
1040
+ cccl_init_kind_t init_kind,
1041
+ CommonData common_data
1042
+ ):
1043
+ cdef CUresult status = -1
1044
+ cdef int cc_major = common_data.get_cc_major()
1045
+ cdef int cc_minor = common_data.get_cc_minor()
1046
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1047
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1048
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1049
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1050
+ memset(&self.build_data, 0, sizeof(cccl_device_scan_build_result_t))
1051
+
1052
+ with nogil:
1053
+ status = cccl_device_scan_build(
1054
+ &self.build_data,
1055
+ d_in.iter_data,
1056
+ d_out.iter_data,
1057
+ op.op_data,
1058
+ init_type.type_info,
1059
+ force_inclusive,
1060
+ init_kind,
1061
+ cc_major,
1062
+ cc_minor,
1063
+ cub_path,
1064
+ thrust_path,
1065
+ libcudacxx_path,
1066
+ ctk_path,
1067
+ )
1068
+ if status != 0:
1069
+ raise RuntimeError(f"Error {status} building scan")
1070
+
1071
+ def __dealloc__(DeviceScanBuildResult self):
1072
+ cdef CUresult status = -1
1073
+ with nogil:
1074
+ status = cccl_device_scan_cleanup(&self.build_data)
1075
+ if (status != 0):
1076
+ print(f"Return code {status} encountered during scan result cleanup")
1077
+
1078
+ cpdef int compute_inclusive(
1079
+ DeviceScanBuildResult self,
1080
+ temp_storage_ptr,
1081
+ temp_storage_bytes,
1082
+ Iterator d_in,
1083
+ Iterator d_out,
1084
+ size_t num_items,
1085
+ Op op,
1086
+ Value init_value,
1087
+ stream
1088
+ ):
1089
+ cdef CUresult status = -1
1090
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1091
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1092
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1093
+
1094
+ with nogil:
1095
+ status = cccl_device_inclusive_scan(
1096
+ self.build_data,
1097
+ storage_ptr,
1098
+ &storage_sz,
1099
+ d_in.iter_data,
1100
+ d_out.iter_data,
1101
+ <uint64_t>num_items,
1102
+ op.op_data,
1103
+ init_value.value_data,
1104
+ c_stream
1105
+ )
1106
+ if status != 0:
1107
+ raise RuntimeError(
1108
+ f"Failed executing inclusive scan, error code: {status}"
1109
+ )
1110
+ return storage_sz
1111
+
1112
+ cpdef int compute_exclusive(
1113
+ DeviceScanBuildResult self,
1114
+ temp_storage_ptr,
1115
+ temp_storage_bytes,
1116
+ Iterator d_in,
1117
+ Iterator d_out,
1118
+ size_t num_items,
1119
+ Op op,
1120
+ Value init_value,
1121
+ stream
1122
+ ):
1123
+ cdef CUresult status = -1
1124
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1125
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1126
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1127
+
1128
+ with nogil:
1129
+ status = cccl_device_exclusive_scan(
1130
+ self.build_data,
1131
+ storage_ptr,
1132
+ &storage_sz,
1133
+ d_in.iter_data,
1134
+ d_out.iter_data,
1135
+ <uint64_t>num_items,
1136
+ op.op_data,
1137
+ init_value.value_data,
1138
+ c_stream
1139
+ )
1140
+ if status != 0:
1141
+ raise RuntimeError(
1142
+ f"Failed executing exclusive scan, error code: {status}"
1143
+ )
1144
+ return storage_sz
1145
+
1146
+ cpdef int compute_inclusive_future_value(
1147
+ DeviceScanBuildResult self,
1148
+ temp_storage_ptr,
1149
+ temp_storage_bytes,
1150
+ Iterator d_in,
1151
+ Iterator d_out,
1152
+ size_t num_items,
1153
+ Op op,
1154
+ Iterator init_value,
1155
+ stream
1156
+ ):
1157
+ cdef CUresult status = -1
1158
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1159
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1160
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1161
+
1162
+ with nogil:
1163
+ status = cccl_device_inclusive_scan_future_value(
1164
+ self.build_data,
1165
+ storage_ptr,
1166
+ &storage_sz,
1167
+ d_in.iter_data,
1168
+ d_out.iter_data,
1169
+ <uint64_t>num_items,
1170
+ op.op_data,
1171
+ init_value.iter_data,
1172
+ c_stream
1173
+ )
1174
+ if status != 0:
1175
+ raise RuntimeError(
1176
+ f"Failed executing inclusive scan, error code: {status}"
1177
+ )
1178
+ return storage_sz
1179
+
1180
+ cpdef int compute_exclusive_future_value(
1181
+ DeviceScanBuildResult self,
1182
+ temp_storage_ptr,
1183
+ temp_storage_bytes,
1184
+ Iterator d_in,
1185
+ Iterator d_out,
1186
+ size_t num_items,
1187
+ Op op,
1188
+ Iterator init_value,
1189
+ stream
1190
+ ):
1191
+ cdef CUresult status = -1
1192
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1193
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1194
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1195
+
1196
+ with nogil:
1197
+ status = cccl_device_exclusive_scan_future_value(
1198
+ self.build_data,
1199
+ storage_ptr,
1200
+ &storage_sz,
1201
+ d_in.iter_data,
1202
+ d_out.iter_data,
1203
+ <uint64_t>num_items,
1204
+ op.op_data,
1205
+ init_value.iter_data,
1206
+ c_stream
1207
+ )
1208
+ if status != 0:
1209
+ raise RuntimeError(
1210
+ f"Failed executing exclusive scan, error code: {status}"
1211
+ )
1212
+ return storage_sz
1213
+
1214
+ cpdef int compute_inclusive_no_init(
1215
+ DeviceScanBuildResult self,
1216
+ temp_storage_ptr,
1217
+ temp_storage_bytes,
1218
+ Iterator d_in,
1219
+ Iterator d_out,
1220
+ size_t num_items,
1221
+ Op op,
1222
+ object init_value,
1223
+ stream
1224
+ ):
1225
+ cdef CUresult status = -1
1226
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1227
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1228
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1229
+
1230
+ with nogil:
1231
+ status = cccl_device_inclusive_scan_no_init(
1232
+ self.build_data,
1233
+ storage_ptr,
1234
+ &storage_sz,
1235
+ d_in.iter_data,
1236
+ d_out.iter_data,
1237
+ <uint64_t>num_items,
1238
+ op.op_data,
1239
+ c_stream
1240
+ )
1241
+ if status != 0:
1242
+ raise RuntimeError(
1243
+ f"Failed executing inclusive scan, error code: {status}"
1244
+ )
1245
+ return storage_sz
1246
+
1247
+ def _get_cubin(self):
1248
+ return PyBytes_FromStringAndSize(
1249
+ <const char*>self.build_data.cubin,
1250
+ self.build_data.cubin_size
1251
+ )
1252
+
1253
+ # -----------------------
1254
+ # DeviceSegmentedReduce
1255
+ # -----------------------
1256
+
1257
+
1258
+ cdef extern from "cccl/c/segmented_reduce.h":
1259
+ cdef struct cccl_device_segmented_reduce_build_result_t 'cccl_device_segmented_reduce_build_result_t':
1260
+ const char* cubin
1261
+ size_t cubin_size
1262
+
1263
+ cdef CUresult cccl_device_segmented_reduce_build(
1264
+ cccl_device_segmented_reduce_build_result_t*,
1265
+ cccl_iterator_t,
1266
+ cccl_iterator_t,
1267
+ cccl_iterator_t,
1268
+ cccl_iterator_t,
1269
+ cccl_op_t,
1270
+ cccl_value_t,
1271
+ int, int, const char*, const char*, const char*, const char*
1272
+ ) nogil
1273
+
1274
+ cdef CUresult cccl_device_segmented_reduce(
1275
+ cccl_device_segmented_reduce_build_result_t,
1276
+ void *,
1277
+ size_t *,
1278
+ cccl_iterator_t,
1279
+ cccl_iterator_t,
1280
+ uint64_t,
1281
+ cccl_iterator_t,
1282
+ cccl_iterator_t,
1283
+ cccl_op_t,
1284
+ cccl_value_t,
1285
+ CUstream
1286
+ ) nogil
1287
+
1288
+ cdef CUresult cccl_device_segmented_reduce_cleanup(
1289
+ cccl_device_segmented_reduce_build_result_t* bld_ptr
1290
+ ) nogil
1291
+
1292
+
1293
+ cdef class DeviceSegmentedReduceBuildResult:
1294
+ cdef cccl_device_segmented_reduce_build_result_t build_data
1295
+
1296
+ def __cinit__(
1297
+ DeviceSegmentedReduceBuildResult self,
1298
+ Iterator d_in,
1299
+ Iterator d_out,
1300
+ Iterator start_offsets,
1301
+ Iterator end_offsets,
1302
+ Op op,
1303
+ Value h_init,
1304
+ CommonData common_data
1305
+ ):
1306
+ cdef CUresult status = -1
1307
+ cdef int cc_major = common_data.get_cc_major()
1308
+ cdef int cc_minor = common_data.get_cc_minor()
1309
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1310
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1311
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1312
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1313
+
1314
+ memset(&self.build_data, 0, sizeof(cccl_device_segmented_reduce_build_result_t))
1315
+ with nogil:
1316
+ status = cccl_device_segmented_reduce_build(
1317
+ &self.build_data,
1318
+ d_in.iter_data,
1319
+ d_out.iter_data,
1320
+ start_offsets.iter_data,
1321
+ end_offsets.iter_data,
1322
+ op.op_data,
1323
+ h_init.value_data,
1324
+ cc_major,
1325
+ cc_minor,
1326
+ cub_path,
1327
+ thrust_path,
1328
+ libcudacxx_path,
1329
+ ctk_path,
1330
+ )
1331
+ if status != 0:
1332
+ raise RuntimeError(
1333
+ f"Failed building segmented_reduce, error code: {status}"
1334
+ )
1335
+
1336
+ def __dealloc__(DeviceSegmentedReduceBuildResult self):
1337
+ cdef CUresult status = -1
1338
+ with nogil:
1339
+ status = cccl_device_segmented_reduce_cleanup(&self.build_data)
1340
+ if (status != 0):
1341
+ print(f"Return code {status} encountered during segmented_reduce result cleanup")
1342
+
1343
+ cpdef int compute(
1344
+ DeviceSegmentedReduceBuildResult self,
1345
+ temp_storage_ptr,
1346
+ temp_storage_bytes,
1347
+ Iterator d_in,
1348
+ Iterator d_out,
1349
+ size_t num_items,
1350
+ Iterator start_offsets,
1351
+ Iterator end_offsets,
1352
+ Op op,
1353
+ Value h_init,
1354
+ stream
1355
+ ):
1356
+ cdef CUresult status = -1
1357
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1358
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1359
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1360
+
1361
+ with nogil:
1362
+ status = cccl_device_segmented_reduce(
1363
+ self.build_data,
1364
+ storage_ptr,
1365
+ &storage_sz,
1366
+ d_in.iter_data,
1367
+ d_out.iter_data,
1368
+ <uint64_t>num_items,
1369
+ start_offsets.iter_data,
1370
+ end_offsets.iter_data,
1371
+ op.op_data,
1372
+ h_init.value_data,
1373
+ c_stream
1374
+ )
1375
+ if status != 0:
1376
+ raise RuntimeError(
1377
+ f"Failed executing segmented_reduce, error code: {status}"
1378
+ )
1379
+ return storage_sz
1380
+
1381
+ def _get_cubin(self):
1382
+ return PyBytes_FromStringAndSize(
1383
+ <const char*>self.build_data.cubin,
1384
+ self.build_data.cubin_size
1385
+ )
1386
+
1387
+ # -----------------
1388
+ # DeviceMergeSort
1389
+ # -----------------
1390
+
1391
+
1392
+ cdef extern from "cccl/c/merge_sort.h":
1393
+ cdef struct cccl_device_merge_sort_build_result_t 'cccl_device_merge_sort_build_result_t':
1394
+ const char* cubin
1395
+ size_t cubin_size
1396
+
1397
+ cdef CUresult cccl_device_merge_sort_build(
1398
+ cccl_device_merge_sort_build_result_t *bld_ptr,
1399
+ cccl_iterator_t d_in_keys,
1400
+ cccl_iterator_t d_in_items,
1401
+ cccl_iterator_t d_out_keys,
1402
+ cccl_iterator_t d_out_items,
1403
+ cccl_op_t,
1404
+ int, int, const char*, const char*, const char*, const char*
1405
+ ) nogil
1406
+
1407
+ cdef CUresult cccl_device_merge_sort(
1408
+ cccl_device_merge_sort_build_result_t,
1409
+ void *,
1410
+ size_t *,
1411
+ cccl_iterator_t,
1412
+ cccl_iterator_t,
1413
+ cccl_iterator_t,
1414
+ cccl_iterator_t,
1415
+ uint64_t,
1416
+ cccl_op_t,
1417
+ CUstream
1418
+ ) nogil
1419
+
1420
+ cdef CUresult cccl_device_merge_sort_cleanup(
1421
+ cccl_device_merge_sort_build_result_t* bld_ptr
1422
+ ) nogil
1423
+
1424
+
1425
+ cdef class DeviceMergeSortBuildResult:
1426
+ cdef cccl_device_merge_sort_build_result_t build_data
1427
+
1428
+ def __cinit__(
1429
+ DeviceMergeSortBuildResult self,
1430
+ Iterator d_in_keys,
1431
+ Iterator d_in_items,
1432
+ Iterator d_out_keys,
1433
+ Iterator d_out_items,
1434
+ Op op,
1435
+ CommonData common_data
1436
+ ):
1437
+ cdef CUresult status = -1
1438
+ cdef int cc_major = common_data.get_cc_major()
1439
+ cdef int cc_minor = common_data.get_cc_minor()
1440
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1441
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1442
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1443
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1444
+
1445
+ memset(&self.build_data, 0, sizeof(cccl_device_merge_sort_build_result_t))
1446
+ with nogil:
1447
+ status = cccl_device_merge_sort_build(
1448
+ &self.build_data,
1449
+ d_in_keys.iter_data,
1450
+ d_in_items.iter_data,
1451
+ d_out_keys.iter_data,
1452
+ d_out_items.iter_data,
1453
+ op.op_data,
1454
+ cc_major,
1455
+ cc_minor,
1456
+ cub_path,
1457
+ thrust_path,
1458
+ libcudacxx_path,
1459
+ ctk_path,
1460
+ )
1461
+ if status != 0:
1462
+ raise RuntimeError(
1463
+ f"Failed building merge_sort, error code: {status}"
1464
+ )
1465
+
1466
+ def __dealloc__(DeviceMergeSortBuildResult self):
1467
+ cdef CUresult status = -1
1468
+ with nogil:
1469
+ status = cccl_device_merge_sort_cleanup(&self.build_data)
1470
+ if (status != 0):
1471
+ print(f"Return code {status} encountered during merge_sort result cleanup")
1472
+
1473
+ cpdef int compute(
1474
+ DeviceMergeSortBuildResult self,
1475
+ temp_storage_ptr,
1476
+ temp_storage_bytes,
1477
+ Iterator d_in_keys,
1478
+ Iterator d_in_items,
1479
+ Iterator d_out_keys,
1480
+ Iterator d_out_items,
1481
+ size_t num_items,
1482
+ Op op,
1483
+ stream
1484
+ ):
1485
+ cdef CUresult status = -1
1486
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1487
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1488
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1489
+ with nogil:
1490
+ status = cccl_device_merge_sort(
1491
+ self.build_data,
1492
+ storage_ptr,
1493
+ &storage_sz,
1494
+ d_in_keys.iter_data,
1495
+ d_in_items.iter_data,
1496
+ d_out_keys.iter_data,
1497
+ d_out_items.iter_data,
1498
+ <uint64_t>num_items,
1499
+ op.op_data,
1500
+ c_stream
1501
+ )
1502
+ if status != 0:
1503
+ raise RuntimeError(
1504
+ f"Failed executing merge_sort, error code: {status}"
1505
+ )
1506
+ return storage_sz
1507
+
1508
+
1509
+ def _get_cubin(self):
1510
+ return PyBytes_FromStringAndSize(
1511
+ <const char*>self.build_data.cubin,
1512
+ self.build_data.cubin_size
1513
+ )
1514
+
1515
+
1516
+ # -------------------
1517
+ # DeviceUniqueByKey
1518
+ # -------------------
1519
+
1520
+ cdef extern from "cccl/c/unique_by_key.h":
1521
+ cdef struct cccl_device_unique_by_key_build_result_t 'cccl_device_unique_by_key_build_result_t':
1522
+ const char* cubin
1523
+ size_t cubin_size
1524
+
1525
+
1526
+ cdef CUresult cccl_device_unique_by_key_build(
1527
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1528
+ cccl_iterator_t d_keys_in,
1529
+ cccl_iterator_t d_values_in,
1530
+ cccl_iterator_t d_keys_out,
1531
+ cccl_iterator_t d_values_out,
1532
+ cccl_iterator_t d_num_selected_out,
1533
+ cccl_op_t comparison_op,
1534
+ int, int, const char *, const char *, const char *, const char *
1535
+ ) nogil
1536
+
1537
+ cdef CUresult cccl_device_unique_by_key(
1538
+ cccl_device_unique_by_key_build_result_t build,
1539
+ void *d_storage_ptr,
1540
+ size_t *d_storage_nbytes,
1541
+ cccl_iterator_t d_keys_in,
1542
+ cccl_iterator_t d_values_in,
1543
+ cccl_iterator_t d_keys_out,
1544
+ cccl_iterator_t d_values_out,
1545
+ cccl_iterator_t d_num_selected_out,
1546
+ cccl_op_t comparison_op,
1547
+ size_t num_items,
1548
+ CUstream stream
1549
+ ) nogil
1550
+
1551
+ cdef CUresult cccl_device_unique_by_key_cleanup(
1552
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1553
+ ) nogil
1554
+
1555
+
1556
+ cdef class DeviceUniqueByKeyBuildResult:
1557
+ cdef cccl_device_unique_by_key_build_result_t build_data
1558
+
1559
+ def __cinit__(
1560
+ DeviceUniqueByKeyBuildResult self,
1561
+ Iterator d_keys_in,
1562
+ Iterator d_values_in,
1563
+ Iterator d_keys_out,
1564
+ Iterator d_values_out,
1565
+ Iterator d_num_selected_out,
1566
+ Op comparison_op,
1567
+ CommonData common_data
1568
+ ):
1569
+ cdef CUresult status = -1
1570
+ cdef int cc_major = common_data.get_cc_major()
1571
+ cdef int cc_minor = common_data.get_cc_minor()
1572
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1573
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1574
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1575
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1576
+
1577
+ memset(&self.build_data, 0, sizeof(cccl_device_unique_by_key_build_result_t))
1578
+ with nogil:
1579
+ status = cccl_device_unique_by_key_build(
1580
+ &self.build_data,
1581
+ d_keys_in.iter_data,
1582
+ d_values_in.iter_data,
1583
+ d_keys_out.iter_data,
1584
+ d_values_out.iter_data,
1585
+ d_num_selected_out.iter_data,
1586
+ comparison_op.op_data,
1587
+ cc_major,
1588
+ cc_minor,
1589
+ cub_path,
1590
+ thrust_path,
1591
+ libcudacxx_path,
1592
+ ctk_path,
1593
+ )
1594
+ if status != 0:
1595
+ raise RuntimeError(
1596
+ f"Failed building unique_by_key, error code: {status}"
1597
+ )
1598
+
1599
+ def __dealloc__(DeviceUniqueByKeyBuildResult self):
1600
+ cdef CUresult status = -1
1601
+ with nogil:
1602
+ status = cccl_device_unique_by_key_cleanup(&self.build_data)
1603
+ if (status != 0):
1604
+ print(f"Return code {status} encountered during unique_by_key result cleanup")
1605
+
1606
+ cpdef int compute(
1607
+ DeviceUniqueByKeyBuildResult self,
1608
+ temp_storage_ptr,
1609
+ temp_storage_bytes,
1610
+ Iterator d_keys_in,
1611
+ Iterator d_values_in,
1612
+ Iterator d_keys_out,
1613
+ Iterator d_values_out,
1614
+ Iterator d_num_selected_out,
1615
+ Op comparison_op,
1616
+ size_t num_items,
1617
+ stream
1618
+ ):
1619
+ cdef CUresult status = -1
1620
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1621
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1622
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1623
+
1624
+ with nogil:
1625
+ status = cccl_device_unique_by_key(
1626
+ self.build_data,
1627
+ storage_ptr,
1628
+ &storage_sz,
1629
+ d_keys_in.iter_data,
1630
+ d_values_in.iter_data,
1631
+ d_keys_out.iter_data,
1632
+ d_values_out.iter_data,
1633
+ d_num_selected_out.iter_data,
1634
+ comparison_op.op_data,
1635
+ <uint64_t>num_items,
1636
+ c_stream
1637
+ )
1638
+
1639
+ if status != 0:
1640
+ raise RuntimeError(
1641
+ f"Failed executing unique_by_key, error code: {status}"
1642
+ )
1643
+ return storage_sz
1644
+
1645
+ def _get_cubin(self):
1646
+ return PyBytes_FromStringAndSize(
1647
+ <const char*>self.build_data.cubin,
1648
+ self.build_data.cubin_size
1649
+ )
1650
+
1651
+ # -----------------
1652
+ # DeviceRadixSort
1653
+ # -----------------
1654
+
1655
+ cdef extern from "cccl/c/radix_sort.h":
1656
+ cdef struct cccl_device_radix_sort_build_result_t 'cccl_device_radix_sort_build_result_t':
1657
+ const char* cubin
1658
+ size_t cubin_size
1659
+
1660
+ cdef CUresult cccl_device_radix_sort_build(
1661
+ cccl_device_radix_sort_build_result_t *build_ptr,
1662
+ cccl_sort_order_t sort_order,
1663
+ cccl_iterator_t d_keys_in,
1664
+ cccl_iterator_t d_values_in,
1665
+ cccl_op_t decomposer,
1666
+ const char* decomposer_return_type,
1667
+ int, int, const char *, const char *, const char *, const char *
1668
+ ) nogil
1669
+
1670
+ cdef CUresult cccl_device_radix_sort(
1671
+ cccl_device_radix_sort_build_result_t build,
1672
+ void *d_storage_ptr,
1673
+ size_t *d_storage_nbytes,
1674
+ cccl_iterator_t d_keys_in,
1675
+ cccl_iterator_t d_keys_out,
1676
+ cccl_iterator_t d_values_in,
1677
+ cccl_iterator_t d_values_out,
1678
+ cccl_op_t decomposer,
1679
+ size_t num_items,
1680
+ int begin_bit,
1681
+ int end_bit,
1682
+ bint is_overwrite_okay,
1683
+ int* selector,
1684
+ CUstream stream
1685
+ ) nogil
1686
+
1687
+ cdef CUresult cccl_device_radix_sort_cleanup(
1688
+ cccl_device_radix_sort_build_result_t *build_ptr,
1689
+ ) nogil
1690
+
1691
+
1692
+ cdef class DeviceRadixSortBuildResult:
1693
+ cdef cccl_device_radix_sort_build_result_t build_data
1694
+
1695
+ def __dealloc__(DeviceRadixSortBuildResult self):
1696
+ cdef CUresult status = -1
1697
+ with nogil:
1698
+ status = cccl_device_radix_sort_cleanup(&self.build_data)
1699
+ if (status != 0):
1700
+ print(f"Return code {status} encountered during radix_sort result cleanup")
1701
+
1702
+ def __cinit__(
1703
+ DeviceRadixSortBuildResult self,
1704
+ cccl_sort_order_t order,
1705
+ Iterator d_keys_in,
1706
+ Iterator d_values_in,
1707
+ Op decomposer_op,
1708
+ const char* decomposer_return_type,
1709
+ CommonData common_data
1710
+ ):
1711
+ cdef CUresult status = -1
1712
+ cdef int cc_major = common_data.get_cc_major()
1713
+ cdef int cc_minor = common_data.get_cc_minor()
1714
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1715
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1716
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1717
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1718
+
1719
+ memset(&self.build_data, 0, sizeof(cccl_device_radix_sort_build_result_t))
1720
+ with nogil:
1721
+ status = cccl_device_radix_sort_build(
1722
+ &self.build_data,
1723
+ order,
1724
+ d_keys_in.iter_data,
1725
+ d_values_in.iter_data,
1726
+ decomposer_op.op_data,
1727
+ decomposer_return_type,
1728
+ cc_major,
1729
+ cc_minor,
1730
+ cub_path,
1731
+ thrust_path,
1732
+ libcudacxx_path,
1733
+ ctk_path,
1734
+ )
1735
+ if status != 0:
1736
+ raise RuntimeError(
1737
+ f"Failed building radix_sort, error code: {status}"
1738
+ )
1739
+
1740
+ cpdef tuple compute(
1741
+ DeviceRadixSortBuildResult self,
1742
+ temp_storage_ptr,
1743
+ temp_storage_bytes,
1744
+ Iterator d_keys_in,
1745
+ Iterator d_keys_out,
1746
+ Iterator d_values_in,
1747
+ Iterator d_values_out,
1748
+ Op decomposer_op,
1749
+ size_t num_items,
1750
+ int begin_bit,
1751
+ int end_bit,
1752
+ bint is_overwrite_okay,
1753
+ selector,
1754
+ stream
1755
+ ):
1756
+ cdef CUresult status = -1
1757
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
1758
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1759
+ cdef int selector_int = <int>selector
1760
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
1761
+
1762
+ with nogil:
1763
+ status = cccl_device_radix_sort(
1764
+ self.build_data,
1765
+ storage_ptr,
1766
+ &storage_sz,
1767
+ d_keys_in.iter_data,
1768
+ d_keys_out.iter_data,
1769
+ d_values_in.iter_data,
1770
+ d_values_out.iter_data,
1771
+ decomposer_op.op_data,
1772
+ <uint64_t>num_items,
1773
+ begin_bit,
1774
+ end_bit,
1775
+ is_overwrite_okay,
1776
+ &selector_int,
1777
+ c_stream
1778
+ )
1779
+
1780
+ if status != 0:
1781
+ raise RuntimeError(
1782
+ f"Failed executing ascending radix_sort, error code: {status}"
1783
+ )
1784
+ return <object>storage_sz, <object>selector_int
1785
+
1786
+
1787
+ def _get_cubin(self):
1788
+ return PyBytes_FromStringAndSize(
1789
+ <const char*>self.build_data.cubin,
1790
+ self.build_data.cubin_size
1791
+ )
1792
+
1793
+ # --------------------------------------------
1794
+ # DeviceUnaryTransform/DeviceBinaryTransform
1795
+ # --------------------------------------------
1796
+ cdef extern from "cccl/c/transform.h":
1797
+ cdef struct cccl_device_transform_build_result_t:
1798
+ const char* cubin
1799
+ size_t cubin_size
1800
+
1801
+ cdef CUresult cccl_device_unary_transform_build(
1802
+ cccl_device_transform_build_result_t *build_ptr,
1803
+ cccl_iterator_t d_in,
1804
+ cccl_iterator_t d_out,
1805
+ cccl_op_t op,
1806
+ int, int, const char *, const char *, const char *, const char *
1807
+ ) nogil
1808
+
1809
+ cdef CUresult cccl_device_unary_transform(
1810
+ cccl_device_transform_build_result_t build,
1811
+ cccl_iterator_t d_in,
1812
+ cccl_iterator_t d_out,
1813
+ uint64_t num_items,
1814
+ cccl_op_t op,
1815
+ CUstream stream) nogil
1816
+
1817
+ cdef CUresult cccl_device_binary_transform_build(
1818
+ cccl_device_transform_build_result_t* build_ptr,
1819
+ cccl_iterator_t d_in1,
1820
+ cccl_iterator_t d_in2,
1821
+ cccl_iterator_t d_out,
1822
+ cccl_op_t op,
1823
+ int, int, const char *, const char *, const char *, const char *
1824
+ ) nogil
1825
+
1826
+ cdef CUresult cccl_device_binary_transform(
1827
+ cccl_device_transform_build_result_t build,
1828
+ cccl_iterator_t d_in1,
1829
+ cccl_iterator_t d_in2,
1830
+ cccl_iterator_t d_out,
1831
+ uint64_t num_items,
1832
+ cccl_op_t op,
1833
+ CUstream stream) nogil
1834
+
1835
+ cdef CUresult cccl_device_transform_cleanup(
1836
+ cccl_device_transform_build_result_t *build_ptr,
1837
+ ) nogil
1838
+
1839
+
1840
+ cdef class DeviceUnaryTransform:
1841
+ cdef cccl_device_transform_build_result_t build_data
1842
+
1843
+ def __cinit__(
1844
+ self,
1845
+ Iterator d_in,
1846
+ Iterator d_out,
1847
+ Op op,
1848
+ CommonData common_data
1849
+ ):
1850
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
1851
+
1852
+ cdef CUresult status = -1
1853
+ cdef int cc_major = common_data.get_cc_major()
1854
+ cdef int cc_minor = common_data.get_cc_minor()
1855
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1856
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1857
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1858
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1859
+
1860
+ with nogil:
1861
+ status = cccl_device_unary_transform_build(
1862
+ &self.build_data,
1863
+ d_in.iter_data,
1864
+ d_out.iter_data,
1865
+ op.op_data,
1866
+ cc_major,
1867
+ cc_minor,
1868
+ cub_path,
1869
+ thrust_path,
1870
+ libcudacxx_path,
1871
+ ctk_path,
1872
+ )
1873
+ if status != 0:
1874
+ raise RuntimeError("Failed to build unary transform")
1875
+
1876
+ def __dealloc__(DeviceUnaryTransform self):
1877
+ cdef CUresult status = -1
1878
+ with nogil:
1879
+ status = cccl_device_transform_cleanup(&self.build_data)
1880
+ if (status != 0):
1881
+ print(f"Return code {status} encountered during unary transform result cleanup")
1882
+
1883
+ cpdef void compute(
1884
+ DeviceUnaryTransform self,
1885
+ Iterator d_in,
1886
+ Iterator d_out,
1887
+ size_t num_items,
1888
+ Op op,
1889
+ stream
1890
+ ):
1891
+ cdef CUresult status = -1
1892
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1893
+ with nogil:
1894
+ status = cccl_device_unary_transform(
1895
+ self.build_data,
1896
+ d_in.iter_data,
1897
+ d_out.iter_data,
1898
+ <uint64_t>num_items,
1899
+ op.op_data,
1900
+ c_stream
1901
+ )
1902
+ if (status != 0):
1903
+ raise RuntimeError("Failed to compute unary transform")
1904
+
1905
+
1906
+ def _get_cubin(self):
1907
+ return PyBytes_FromStringAndSize(
1908
+ <const char*>self.build_data.cubin,
1909
+ self.build_data.cubin_size
1910
+ )
1911
+
1912
+
1913
+ cdef class DeviceBinaryTransform:
1914
+ cdef cccl_device_transform_build_result_t build_data
1915
+
1916
+ def __cinit__(
1917
+ self,
1918
+ Iterator d_in1,
1919
+ Iterator d_in2,
1920
+ Iterator d_out,
1921
+ Op op,
1922
+ CommonData common_data
1923
+ ):
1924
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
1925
+
1926
+ cdef CUresult status = -1
1927
+ cdef int cc_major = common_data.get_cc_major()
1928
+ cdef int cc_minor = common_data.get_cc_minor()
1929
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1930
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1931
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1932
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1933
+
1934
+ with nogil:
1935
+ status = cccl_device_binary_transform_build(
1936
+ &self.build_data,
1937
+ d_in1.iter_data,
1938
+ d_in2.iter_data,
1939
+ d_out.iter_data,
1940
+ op.op_data,
1941
+ cc_major,
1942
+ cc_minor,
1943
+ cub_path,
1944
+ thrust_path,
1945
+ libcudacxx_path,
1946
+ ctk_path,
1947
+ )
1948
+ if status != 0:
1949
+ raise RuntimeError("Failed to build binary transform")
1950
+
1951
+ def __dealloc__(DeviceBinaryTransform self):
1952
+ cdef CUresult status = -1
1953
+ with nogil:
1954
+ status = cccl_device_transform_cleanup(&self.build_data)
1955
+ if (status != 0):
1956
+ print(f"Return code {status} encountered during binary transform result cleanup")
1957
+
1958
+ cpdef void compute(
1959
+ DeviceBinaryTransform self,
1960
+ Iterator d_in1,
1961
+ Iterator d_in2,
1962
+ Iterator d_out,
1963
+ size_t num_items,
1964
+ Op op,
1965
+ stream
1966
+ ):
1967
+ cdef CUresult status = -1
1968
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1969
+ with nogil:
1970
+ status = cccl_device_binary_transform(
1971
+ self.build_data,
1972
+ d_in1.iter_data,
1973
+ d_in2.iter_data,
1974
+ d_out.iter_data,
1975
+ <uint64_t>num_items,
1976
+ op.op_data,
1977
+ c_stream
1978
+ )
1979
+ if (status != 0):
1980
+ raise RuntimeError("Failed to compute binary transform")
1981
+
1982
+ def _get_cubin(self):
1983
+ return PyBytes_FromStringAndSize(
1984
+ <const char*>self.build_data.cubin,
1985
+ self.build_data.cubin_size
1986
+ )
1987
+
1988
+
1989
+ # -----------------
1990
+ # DeviceHistogram
1991
+ # -----------------
1992
+ cdef extern from "cccl/c/histogram.h":
1993
+ cdef struct cccl_device_histogram_build_result_t 'cccl_device_histogram_build_result_t':
1994
+ const char* cubin
1995
+ size_t cubin_size
1996
+
1997
+ cdef CUresult cccl_device_histogram_build(
1998
+ cccl_device_histogram_build_result_t *build_ptr,
1999
+ int num_channels,
2000
+ int num_active_channels,
2001
+ cccl_iterator_t d_samples,
2002
+ int num_output_levels_val,
2003
+ cccl_iterator_t d_output_histograms,
2004
+ cccl_value_t h_levels,
2005
+ int64_t num_rows,
2006
+ int64_t row_stride_samples,
2007
+ bint is_evenly_segmented,
2008
+ int, int, const char *, const char *, const char *, const char *
2009
+ ) nogil
2010
+
2011
+ cdef CUresult cccl_device_histogram_even(
2012
+ cccl_device_histogram_build_result_t build,
2013
+ void *d_storage_ptr,
2014
+ size_t *d_storage_nbytes,
2015
+ cccl_iterator_t d_samples,
2016
+ cccl_iterator_t d_output_histograms,
2017
+ cccl_value_t num_output_levels,
2018
+ cccl_value_t lower_level,
2019
+ cccl_value_t upper_level,
2020
+ int64_t num_row_pixels,
2021
+ int64_t num_rows,
2022
+ int64_t row_stride_samples,
2023
+ CUstream stream
2024
+ ) nogil
2025
+
2026
+ cdef CUresult cccl_device_histogram_cleanup(
2027
+ cccl_device_histogram_build_result_t *build_ptr,
2028
+ ) nogil
2029
+
2030
+
2031
+ cdef class DeviceHistogramBuildResult:
2032
+ cdef cccl_device_histogram_build_result_t build_data
2033
+
2034
+ def __dealloc__(DeviceHistogramBuildResult self):
2035
+ cdef CUresult status = -1
2036
+ with nogil:
2037
+ status = cccl_device_histogram_cleanup(&self.build_data)
2038
+ if (status != 0):
2039
+ print(f"Return code {status} encountered during histogram result cleanup")
2040
+
2041
+
2042
+ def __cinit__(
2043
+ DeviceHistogramBuildResult self,
2044
+ int num_channels,
2045
+ int num_active_channels,
2046
+ Iterator d_samples,
2047
+ int num_levels,
2048
+ Iterator d_histogram,
2049
+ Value h_levels,
2050
+ int num_rows,
2051
+ int row_stride_samples,
2052
+ bint is_evenly_segmented,
2053
+ CommonData common_data
2054
+ ):
2055
+ cdef CUresult status = -1
2056
+ cdef int cc_major = common_data.get_cc_major()
2057
+ cdef int cc_minor = common_data.get_cc_minor()
2058
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
2059
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
2060
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
2061
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
2062
+
2063
+ memset(&self.build_data, 0, sizeof(cccl_device_histogram_build_result_t))
2064
+ with nogil:
2065
+ status = cccl_device_histogram_build(
2066
+ &self.build_data,
2067
+ num_channels,
2068
+ num_active_channels,
2069
+ d_samples.iter_data,
2070
+ num_levels,
2071
+ d_histogram.iter_data,
2072
+ h_levels.value_data,
2073
+ num_rows,
2074
+ row_stride_samples,
2075
+ is_evenly_segmented,
2076
+ cc_major,
2077
+ cc_minor,
2078
+ cub_path,
2079
+ thrust_path,
2080
+ libcudacxx_path,
2081
+ ctk_path,
2082
+ )
2083
+ if status != 0:
2084
+ raise RuntimeError(
2085
+ f"Failed building histogram, error code: {status}"
2086
+ )
2087
+
2088
+ cpdef int compute_even(
2089
+ DeviceHistogramBuildResult self,
2090
+ temp_storage_ptr,
2091
+ temp_storage_bytes,
2092
+ Iterator d_samples,
2093
+ Iterator d_histogram,
2094
+ Value h_num_output_levels,
2095
+ Value h_lower_level,
2096
+ Value h_upper_level,
2097
+ int num_row_pixels,
2098
+ int num_rows,
2099
+ int row_stride_samples,
2100
+ stream
2101
+ ):
2102
+ cdef CUresult status = -1
2103
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
2104
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
2105
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
2106
+
2107
+ with nogil:
2108
+ status = cccl_device_histogram_even(
2109
+ self.build_data,
2110
+ storage_ptr,
2111
+ &storage_sz,
2112
+ d_samples.iter_data,
2113
+ d_histogram.iter_data,
2114
+ h_num_output_levels.value_data,
2115
+ h_lower_level.value_data,
2116
+ h_upper_level.value_data,
2117
+ num_row_pixels,
2118
+ num_rows,
2119
+ row_stride_samples,
2120
+ c_stream
2121
+ )
2122
+ if status != 0:
2123
+ raise RuntimeError(
2124
+ f"Failed executing histogram, error code: {status}"
2125
+ )
2126
+ return storage_sz
2127
+
2128
+
2129
+ def _get_cubin(self):
2130
+ return PyBytes_FromStringAndSize(
2131
+ <const char*>self.build_data.cubin,
2132
+ self.build_data.cubin_size
2133
+ )
2134
+
2135
+
2136
+ # ----------------------------------
2137
+ # DeviceThreeWayPartitionBuildResult
2138
+ # ----------------------------------
2139
+ cdef extern from "cccl/c/three_way_partition.h":
2140
+ cdef struct cccl_device_three_way_partition_build_result_t 'cccl_device_three_way_partition_build_result_t':
2141
+ const char* cubin
2142
+ size_t cubin_size
2143
+
2144
+ cdef CUresult cccl_device_three_way_partition_build(
2145
+ cccl_device_three_way_partition_build_result_t *build_ptr,
2146
+ cccl_iterator_t d_in,
2147
+ cccl_iterator_t d_first_part_out,
2148
+ cccl_iterator_t d_second_part_out,
2149
+ cccl_iterator_t d_unselected_out,
2150
+ cccl_iterator_t d_num_selected_out,
2151
+ cccl_op_t select_first_part_op,
2152
+ cccl_op_t select_second_part_op,
2153
+ int, int, const char *, const char *, const char *, const char *
2154
+ ) nogil
2155
+
2156
+ CUresult cccl_device_three_way_partition(
2157
+ cccl_device_three_way_partition_build_result_t build,
2158
+ void* d_temp_storage,
2159
+ size_t* temp_storage_bytes,
2160
+ cccl_iterator_t d_in,
2161
+ cccl_iterator_t d_first_part_out,
2162
+ cccl_iterator_t d_second_part_out,
2163
+ cccl_iterator_t d_unselected_out,
2164
+ cccl_iterator_t d_num_selected_out,
2165
+ cccl_op_t select_first_part_op,
2166
+ cccl_op_t select_second_part_op,
2167
+ int64_t num_items,
2168
+ CUstream stream
2169
+ ) nogil
2170
+
2171
+ cdef CUresult cccl_device_three_way_partition_cleanup(
2172
+ cccl_device_three_way_partition_build_result_t *build_ptr
2173
+ ) nogil
2174
+
2175
+
2176
+ cdef class DeviceThreeWayPartitionBuildResult:
2177
+ cdef cccl_device_three_way_partition_build_result_t build_data
2178
+
2179
+ def __dealloc__(DeviceThreeWayPartitionBuildResult self):
2180
+ cdef CUresult status = -1
2181
+ with nogil:
2182
+ status = cccl_device_three_way_partition_cleanup(&self.build_data)
2183
+ if (status != 0):
2184
+ print(f"Return code {status} encountered during three_way_partition result cleanup")
2185
+
2186
+
2187
+ def __cinit__(
2188
+ DeviceThreeWayPartitionBuildResult self,
2189
+ Iterator d_in,
2190
+ Iterator d_first_part_out,
2191
+ Iterator d_second_part_out,
2192
+ Iterator d_unselected_out,
2193
+ Iterator d_num_selected_out,
2194
+ Op select_first_part_op,
2195
+ Op select_second_part_op,
2196
+ CommonData common_data
2197
+ ):
2198
+ cdef CUresult status = -1
2199
+ cdef int cc_major = common_data.get_cc_major()
2200
+ cdef int cc_minor = common_data.get_cc_minor()
2201
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
2202
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
2203
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
2204
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
2205
+
2206
+ memset(&self.build_data, 0, sizeof(cccl_device_three_way_partition_build_result_t))
2207
+ with nogil:
2208
+ status = cccl_device_three_way_partition_build(
2209
+ &self.build_data,
2210
+ d_in.iter_data,
2211
+ d_first_part_out.iter_data,
2212
+ d_second_part_out.iter_data,
2213
+ d_unselected_out.iter_data,
2214
+ d_num_selected_out.iter_data,
2215
+ select_first_part_op.op_data,
2216
+ select_second_part_op.op_data,
2217
+ cc_major,
2218
+ cc_minor,
2219
+ cub_path,
2220
+ thrust_path,
2221
+ libcudacxx_path,
2222
+ ctk_path,
2223
+ )
2224
+ if status != 0:
2225
+ raise RuntimeError(
2226
+ f"Failed building three_way_partition, error code: {status}"
2227
+ )
2228
+
2229
+ cpdef int compute(
2230
+ DeviceThreeWayPartitionBuildResult self,
2231
+ temp_storage_ptr,
2232
+ temp_storage_bytes,
2233
+ Iterator d_in,
2234
+ Iterator d_first_part_out,
2235
+ Iterator d_second_part_out,
2236
+ Iterator d_unselected_out,
2237
+ Iterator d_num_selected_out,
2238
+ Op select_first_part_op,
2239
+ Op select_second_part_op,
2240
+ size_t num_items,
2241
+ stream
2242
+ ):
2243
+ cdef CUresult status = -1
2244
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
2245
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
2246
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
2247
+
2248
+ with nogil:
2249
+ status = cccl_device_three_way_partition(
2250
+ self.build_data,
2251
+ storage_ptr,
2252
+ &storage_sz,
2253
+ d_in.iter_data,
2254
+ d_first_part_out.iter_data,
2255
+ d_second_part_out.iter_data,
2256
+ d_unselected_out.iter_data,
2257
+ d_num_selected_out.iter_data,
2258
+ select_first_part_op.op_data,
2259
+ select_second_part_op.op_data,
2260
+ <uint64_t>num_items,
2261
+ c_stream
2262
+ )
2263
+ if status != 0:
2264
+ raise RuntimeError(
2265
+ f"Failed executing three_way_partition, error code: {status}"
2266
+ )
2267
+ return storage_sz
2268
+
2269
+ def _get_cubin(self):
2270
+ return PyBytes_FromStringAndSize(
2271
+ <const char*>self.build_data.cubin,
2272
+ self.build_data.cubin_size
2273
+ )