cuda-cccl 0.1.3.2.0.dev271__cp310-cp310-manylinux_2_26_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1947) hide show
  1. cuda/cccl/__init__.py +27 -0
  2. cuda/cccl/_cuda_version_utils.py +46 -0
  3. cuda/cccl/cooperative/__init__.py +3 -0
  4. cuda/cccl/cooperative/experimental/__init__.py +8 -0
  5. cuda/cccl/cooperative/experimental/_caching.py +48 -0
  6. cuda/cccl/cooperative/experimental/_common.py +273 -0
  7. cuda/cccl/cooperative/experimental/_nvrtc.py +91 -0
  8. cuda/cccl/cooperative/experimental/_scan_op.py +181 -0
  9. cuda/cccl/cooperative/experimental/_types.py +937 -0
  10. cuda/cccl/cooperative/experimental/_typing.py +107 -0
  11. cuda/cccl/cooperative/experimental/block/__init__.py +39 -0
  12. cuda/cccl/cooperative/experimental/block/_block_exchange.py +251 -0
  13. cuda/cccl/cooperative/experimental/block/_block_load_store.py +215 -0
  14. cuda/cccl/cooperative/experimental/block/_block_merge_sort.py +125 -0
  15. cuda/cccl/cooperative/experimental/block/_block_radix_sort.py +214 -0
  16. cuda/cccl/cooperative/experimental/block/_block_reduce.py +294 -0
  17. cuda/cccl/cooperative/experimental/block/_block_scan.py +983 -0
  18. cuda/cccl/cooperative/experimental/warp/__init__.py +9 -0
  19. cuda/cccl/cooperative/experimental/warp/_warp_merge_sort.py +92 -0
  20. cuda/cccl/cooperative/experimental/warp/_warp_reduce.py +153 -0
  21. cuda/cccl/cooperative/experimental/warp/_warp_scan.py +78 -0
  22. cuda/cccl/headers/__init__.py +7 -0
  23. cuda/cccl/headers/include/__init__.py +1 -0
  24. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +262 -0
  25. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1181 -0
  26. cuda/cccl/headers/include/cub/agent/agent_for.cuh +84 -0
  27. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +925 -0
  28. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +226 -0
  29. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +730 -0
  30. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +766 -0
  31. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
  32. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +704 -0
  33. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +557 -0
  34. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +632 -0
  35. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +804 -0
  36. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1124 -0
  37. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +561 -0
  38. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +473 -0
  39. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +292 -0
  40. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1114 -0
  41. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +341 -0
  42. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +592 -0
  43. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +614 -0
  44. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1342 -0
  45. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +965 -0
  46. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1217 -0
  47. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1306 -0
  48. cuda/cccl/headers/include/cub/block/block_histogram.cuh +420 -0
  49. cuda/cccl/headers/include/cub/block/block_load.cuh +1260 -0
  50. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +787 -0
  51. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1218 -0
  52. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2193 -0
  53. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  54. cuda/cccl/headers/include/cub/block/block_reduce.cuh +665 -0
  55. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +437 -0
  56. cuda/cccl/headers/include/cub/block/block_scan.cuh +2583 -0
  57. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  58. cuda/cccl/headers/include/cub/block/block_store.cuh +1246 -0
  59. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +620 -0
  60. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  61. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  62. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  63. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  64. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
  65. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  66. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  67. cuda/cccl/headers/include/cub/config.cuh +53 -0
  68. cuda/cccl/headers/include/cub/cub.cuh +112 -0
  69. cuda/cccl/headers/include/cub/detail/array_utils.cuh +77 -0
  70. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +155 -0
  71. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +93 -0
  72. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  73. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +246 -0
  74. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +84 -0
  75. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
  76. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
  77. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +118 -0
  78. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  79. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  80. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +61 -0
  81. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  82. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +71 -0
  83. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +93 -0
  84. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  85. cuda/cccl/headers/include/cub/detail/rfa.cuh +724 -0
  86. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  87. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  88. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
  89. cuda/cccl/headers/include/cub/detail/type_traits.cuh +179 -0
  90. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +72 -0
  91. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  92. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  93. cuda/cccl/headers/include/cub/device/device_copy.cuh +187 -0
  94. cuda/cccl/headers/include/cub/device/device_for.cuh +990 -0
  95. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1507 -0
  96. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  97. cuda/cccl/headers/include/cub/device/device_merge.cuh +202 -0
  98. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  99. cuda/cccl/headers/include/cub/device/device_partition.cuh +664 -0
  100. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3435 -0
  101. cuda/cccl/headers/include/cub/device/device_reduce.cuh +1898 -0
  102. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
  103. cuda/cccl/headers/include/cub/device/device_scan.cuh +1899 -0
  104. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  105. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1512 -0
  106. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  107. cuda/cccl/headers/include/cub/device/device_select.cuh +1224 -0
  108. cuda/cccl/headers/include/cub/device/device_transform.cuh +545 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +314 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +109 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +718 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +45 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +197 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1042 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +305 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1749 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1316 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +656 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +497 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +313 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +612 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +497 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +598 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +916 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +838 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +441 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +455 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +558 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +543 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +218 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +799 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +591 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +194 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +330 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +475 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +121 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +987 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +70 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +121 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +63 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +278 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +91 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +118 -0
  149. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1068 -0
  150. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
  151. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +945 -0
  152. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +675 -0
  153. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +609 -0
  154. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1013 -0
  155. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +249 -0
  156. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1587 -0
  157. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +407 -0
  158. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +448 -0
  159. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +874 -0
  160. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +226 -0
  161. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  162. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  163. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +256 -0
  164. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +260 -0
  165. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +252 -0
  166. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +322 -0
  167. cuda/cccl/headers/include/cub/thread/thread_load.cuh +347 -0
  168. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +684 -0
  169. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +547 -0
  170. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
  171. cuda/cccl/headers/include/cub/thread/thread_search.cuh +198 -0
  172. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +464 -0
  173. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +101 -0
  174. cuda/cccl/headers/include/cub/thread/thread_store.cuh +364 -0
  175. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  176. cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
  177. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  178. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  179. cuda/cccl/headers/include/cub/util_device.cuh +779 -0
  180. cuda/cccl/headers/include/cub/util_macro.cuh +99 -0
  181. cuda/cccl/headers/include/cub/util_math.cuh +115 -0
  182. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  183. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  184. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  185. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  186. cuda/cccl/headers/include/cub/util_type.cuh +1136 -0
  187. cuda/cccl/headers/include/cub/util_vsmem.cuh +251 -0
  188. cuda/cccl/headers/include/cub/version.cuh +89 -0
  189. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  190. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  191. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +729 -0
  192. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +405 -0
  193. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +950 -0
  194. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +713 -0
  195. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  196. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  197. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  198. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +822 -0
  199. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1885 -0
  200. cuda/cccl/headers/include/cub/warp/warp_store.cuh +520 -0
  201. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
  202. cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
  203. cuda/cccl/headers/include/cuda/__algorithm/copy.h +143 -0
  204. cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
  205. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  206. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  207. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
  208. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  209. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
  210. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
  211. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
  212. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  213. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  214. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  215. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
  216. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +466 -0
  217. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
  218. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  219. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  220. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  221. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  222. cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
  223. cuda/cccl/headers/include/cuda/__cccl_config +36 -0
  224. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
  225. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +249 -0
  226. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  227. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  228. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  229. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  230. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  231. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  232. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  233. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  234. cuda/cccl/headers/include/cuda/__device/all_devices.h +240 -0
  235. cuda/cccl/headers/include/cuda/__device/arch_traits.h +613 -0
  236. cuda/cccl/headers/include/cuda/__device/attributes.h +721 -0
  237. cuda/cccl/headers/include/cuda/__device/device_ref.h +176 -0
  238. cuda/cccl/headers/include/cuda/__device/physical_device.h +168 -0
  239. cuda/cccl/headers/include/cuda/__driver/driver_api.h +503 -0
  240. cuda/cccl/headers/include/cuda/__event/event.h +171 -0
  241. cuda/cccl/headers/include/cuda/__event/event_ref.h +158 -0
  242. cuda/cccl/headers/include/cuda/__event/timed_event.h +118 -0
  243. cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
  244. cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
  245. cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
  246. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  247. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +276 -0
  248. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  249. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  250. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +109 -0
  251. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  252. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  253. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  254. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  255. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +49 -0
  256. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +257 -0
  257. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +460 -0
  258. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +314 -0
  259. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +424 -0
  260. cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +292 -0
  261. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +321 -0
  262. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +335 -0
  263. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +501 -0
  264. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +496 -0
  265. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +452 -0
  266. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +94 -0
  267. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +539 -0
  268. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  269. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +462 -0
  270. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +63 -0
  271. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +122 -0
  272. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +51 -0
  273. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  274. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  275. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  276. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  277. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
  278. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
  279. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
  280. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  281. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  282. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
  283. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  284. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  285. cuda/cccl/headers/include/cuda/__memory/address_space.h +211 -0
  286. cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
  287. cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
  288. cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
  289. cuda/cccl/headers/include/cuda/__memory/check_address.h +106 -0
  290. cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
  291. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
  292. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
  293. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  294. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
  295. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
  296. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +69 -0
  297. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
  298. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +654 -0
  299. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
  300. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  301. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
  302. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  303. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
  304. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2982 -0
  305. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  306. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  307. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  308. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  309. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  310. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  311. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  312. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  313. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  314. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  315. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  383. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  384. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  385. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  386. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  387. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  388. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  389. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  390. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  391. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  392. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  393. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  394. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  395. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  396. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  397. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  398. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  399. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  400. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  401. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  402. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  403. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  404. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  405. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  406. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  407. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  408. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  409. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  410. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  411. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  412. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
  413. cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
  414. cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
  415. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +97 -0
  416. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  417. cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
  418. cuda/cccl/headers/include/cuda/__stream/stream.h +142 -0
  419. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +296 -0
  420. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  421. cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
  422. cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
  423. cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
  424. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
  425. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
  426. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
  427. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
  428. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
  429. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +521 -0
  430. cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
  431. cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
  432. cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
  433. cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
  434. cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
  435. cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
  436. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
  437. cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +78 -0
  438. cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
  439. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
  440. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
  441. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
  442. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
  443. cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
  444. cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
  445. cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
  446. cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
  447. cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
  448. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  449. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  450. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
  451. cuda/cccl/headers/include/cuda/access_property +26 -0
  452. cuda/cccl/headers/include/cuda/algorithm +27 -0
  453. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  454. cuda/cccl/headers/include/cuda/atomic +27 -0
  455. cuda/cccl/headers/include/cuda/barrier +267 -0
  456. cuda/cccl/headers/include/cuda/bit +29 -0
  457. cuda/cccl/headers/include/cuda/cmath +36 -0
  458. cuda/cccl/headers/include/cuda/devices +20 -0
  459. cuda/cccl/headers/include/cuda/discard_memory +32 -0
  460. cuda/cccl/headers/include/cuda/functional +32 -0
  461. cuda/cccl/headers/include/cuda/iterator +38 -0
  462. cuda/cccl/headers/include/cuda/latch +27 -0
  463. cuda/cccl/headers/include/cuda/mdspan +28 -0
  464. cuda/cccl/headers/include/cuda/memory +34 -0
  465. cuda/cccl/headers/include/cuda/memory_resource +35 -0
  466. cuda/cccl/headers/include/cuda/numeric +29 -0
  467. cuda/cccl/headers/include/cuda/pipeline +578 -0
  468. cuda/cccl/headers/include/cuda/ptx +128 -0
  469. cuda/cccl/headers/include/cuda/semaphore +31 -0
  470. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  471. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  472. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  473. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
  474. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  475. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +64 -0
  476. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  477. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
  478. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  479. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  480. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  481. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  495. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  496. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  497. cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
  498. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +92 -0
  499. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  500. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  501. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  502. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  503. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  504. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  505. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  506. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  507. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  508. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  509. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  510. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +96 -0
  511. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  512. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +68 -0
  513. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  514. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  515. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  516. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  517. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +141 -0
  518. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  519. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
  520. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  521. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  522. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  523. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  524. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  525. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  526. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  527. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  528. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +94 -0
  529. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  530. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +101 -0
  531. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
  532. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
  533. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  534. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
  535. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  536. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  537. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  538. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  539. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  540. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  541. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  542. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  543. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  544. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  545. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  546. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  547. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  548. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  549. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  550. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  551. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  552. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  553. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  554. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  555. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  556. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  557. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  558. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  559. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  560. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  561. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
  562. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  563. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  564. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  565. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  566. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  567. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +138 -0
  568. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  569. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  570. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
  571. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  572. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  573. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  574. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  575. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  576. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  577. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  578. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  579. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  580. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
  581. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
  582. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  583. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
  584. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  585. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  586. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  587. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  588. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  589. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  590. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  591. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
  592. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
  593. cuda/cccl/headers/include/cuda/std/__bit/countl.h +167 -0
  594. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  595. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  596. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  597. cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
  598. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  599. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
  600. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  601. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  602. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +150 -0
  603. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
  604. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +787 -0
  605. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
  606. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
  607. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +53 -0
  608. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +69 -0
  609. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
  610. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
  611. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
  612. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +79 -0
  613. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +68 -0
  614. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +133 -0
  615. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  616. cuda/cccl/headers/include/cuda/std/__cccl/os.h +48 -0
  617. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1276 -0
  618. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
  619. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
  620. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  621. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  622. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  623. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  624. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  625. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +146 -0
  626. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  627. cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
  628. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  629. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  630. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  631. cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
  632. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
  633. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
  634. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +724 -0
  635. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
  636. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +216 -0
  637. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
  638. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
  639. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
  640. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
  641. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
  642. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
  643. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
  644. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +180 -0
  645. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  646. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
  647. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +582 -0
  648. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +260 -0
  649. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  650. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
  651. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
  652. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
  653. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
  654. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
  655. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
  656. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  657. cuda/cccl/headers/include/cuda/std/__complex/complex.h +676 -0
  658. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
  659. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
  660. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
  661. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  662. cuda/cccl/headers/include/cuda/std/__complex/literals.h +106 -0
  663. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +302 -0
  664. cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
  665. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +322 -0
  666. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +321 -0
  667. cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
  668. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  669. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  670. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
  671. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  672. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  673. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  674. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
  675. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  676. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  677. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +274 -0
  678. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +107 -0
  679. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
  680. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  681. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
  682. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  683. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  684. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  685. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  686. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  687. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  688. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  689. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  690. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +42 -0
  691. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  692. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  693. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  694. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  695. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  696. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  697. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  698. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  699. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +69 -0
  700. cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +58 -0
  701. cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
  702. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  703. cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +72 -0
  704. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +146 -0
  705. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  706. cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
  707. cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
  708. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  709. cuda/cccl/headers/include/cuda/std/__expected/expected.h +1963 -0
  710. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
  711. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  712. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +172 -0
  713. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  714. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +809 -0
  715. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  716. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  717. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +374 -0
  718. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
  719. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +113 -0
  720. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  721. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +39 -0
  722. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +72 -0
  723. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  724. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  725. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  726. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  727. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  728. cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
  729. cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
  730. cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
  731. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
  732. cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
  733. cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
  734. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  735. cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
  736. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
  737. cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
  738. cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
  739. cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
  740. cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
  741. cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
  742. cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
  743. cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
  744. cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
  745. cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
  746. cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
  747. cuda/cccl/headers/include/cuda/std/__format_ +45 -0
  748. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  749. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  750. cuda/cccl/headers/include/cuda/std/__functional/bind.h +337 -0
  751. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
  752. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  753. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  754. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  755. cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
  756. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  757. cuda/cccl/headers/include/cuda/std/__functional/function.h +1279 -0
  758. cuda/cccl/headers/include/cuda/std/__functional/hash.h +650 -0
  759. cuda/cccl/headers/include/cuda/std/__functional/identity.h +61 -0
  760. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +560 -0
  761. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +43 -0
  762. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +67 -0
  763. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +213 -0
  764. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  765. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  766. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
  767. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +65 -0
  768. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +64 -0
  769. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  770. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  771. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  772. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  773. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  774. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +277 -0
  775. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +35 -0
  776. cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
  777. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +49 -0
  778. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +34 -0
  779. cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
  780. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  781. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  782. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  783. cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
  784. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +90 -0
  785. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  786. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  787. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +34 -0
  788. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  789. cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
  790. cuda/cccl/headers/include/cuda/std/__fwd/string.h +83 -0
  791. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +59 -0
  792. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  793. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  794. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  795. cuda/cccl/headers/include/cuda/std/__internal/features.h +71 -0
  796. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +122 -0
  797. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  798. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
  799. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
  800. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +254 -0
  801. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  802. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
  803. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  804. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  805. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  806. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  807. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  808. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
  809. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +150 -0
  810. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  811. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
  812. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
  813. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  814. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  815. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  816. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  817. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +932 -0
  818. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  819. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +433 -0
  820. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  821. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  822. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
  823. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  824. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  825. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +91 -0
  826. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  827. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +185 -0
  828. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  829. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
  830. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  831. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  832. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  833. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +247 -0
  834. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  835. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  836. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
  837. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  838. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
  839. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
  840. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
  841. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
  842. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
  843. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +138 -0
  844. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  845. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
  846. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +757 -0
  847. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
  848. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
  849. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
  850. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +499 -0
  851. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  852. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
  853. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
  854. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +64 -0
  855. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  856. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  857. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  858. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  859. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  860. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  861. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +552 -0
  862. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  863. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  864. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
  865. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +248 -0
  866. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  867. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +43 -0
  868. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +261 -0
  869. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  870. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  871. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +682 -0
  872. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +767 -0
  873. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
  874. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  875. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  876. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  877. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  878. cuda/cccl/headers/include/cuda/std/__new/launder.h +49 -0
  879. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  880. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  881. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  882. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  883. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
  884. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  885. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  886. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  887. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
  888. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  889. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  890. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  891. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  892. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  893. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
  894. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  895. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  896. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  897. cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
  898. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +432 -0
  899. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  900. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  901. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  902. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
  903. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
  904. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
  905. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
  906. cuda/cccl/headers/include/cuda/std/__random_ +29 -0
  907. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  908. cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
  909. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +314 -0
  910. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  911. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  912. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  913. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  914. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  915. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  916. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
  917. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  918. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
  919. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  920. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +161 -0
  921. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
  922. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  923. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  924. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  925. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
  926. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  927. cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
  928. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  929. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
  930. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  931. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  932. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  933. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
  934. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  935. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +233 -0
  936. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  937. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  938. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
  939. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  940. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  941. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  942. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  943. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  944. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +105 -0
  945. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  946. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  947. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
  948. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  949. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  950. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +98 -0
  951. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +269 -0
  952. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +218 -0
  953. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  954. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  955. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +90 -0
  956. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +73 -0
  957. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  958. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  959. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +291 -0
  960. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  961. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  962. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  963. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  964. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  965. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  966. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  967. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  968. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  969. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  970. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +69 -0
  971. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  972. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
  973. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  974. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  975. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  976. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  977. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  978. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  979. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  980. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  981. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  982. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  983. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +47 -0
  984. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +51 -0
  985. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  986. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +40 -0
  987. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +44 -0
  988. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  989. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  990. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  991. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  992. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +83 -0
  993. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  994. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  995. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  996. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +68 -0
  997. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +54 -0
  998. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  999. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  1000. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  1001. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +214 -0
  1002. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  1003. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  1004. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  1005. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  1006. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  1007. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  1008. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +73 -0
  1009. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +68 -0
  1010. cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
  1011. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  1012. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +81 -0
  1013. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +56 -0
  1014. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  1015. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  1016. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  1017. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  1018. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  1019. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  1020. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  1021. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +59 -0
  1022. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  1023. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  1024. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  1025. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  1026. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  1027. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  1028. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  1029. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  1030. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  1031. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  1032. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  1033. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  1034. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  1035. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  1036. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  1037. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  1038. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  1039. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +62 -0
  1040. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  1041. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
  1042. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  1043. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +63 -0
  1044. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +119 -0
  1045. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  1046. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  1047. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  1048. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +84 -0
  1049. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  1050. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  1051. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  1052. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  1053. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +57 -0
  1054. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +201 -0
  1055. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +56 -0
  1056. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +70 -0
  1057. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +82 -0
  1058. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +60 -0
  1059. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +61 -0
  1060. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +56 -0
  1061. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +55 -0
  1062. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +73 -0
  1063. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +60 -0
  1064. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +58 -0
  1065. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  1066. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +57 -0
  1067. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  1068. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  1069. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  1070. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  1071. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  1072. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  1073. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  1074. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  1075. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  1076. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  1077. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  1078. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  1079. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  1080. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +123 -0
  1081. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  1082. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  1083. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1084. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1085. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1086. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1087. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1088. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1089. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1090. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1091. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1092. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1093. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1094. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1095. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1096. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1097. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
  1098. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +66 -0
  1099. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1100. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  1101. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
  1102. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  1103. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1104. cuda/cccl/headers/include/cuda/std/__utility/declval.h +63 -0
  1105. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
  1106. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1107. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  1108. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
  1109. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +77 -0
  1110. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  1111. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1112. cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
  1113. cuda/cccl/headers/include/cuda/std/__utility/pair.h +797 -0
  1114. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1115. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
  1116. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1117. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  1118. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1119. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1120. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1121. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1122. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1123. cuda/cccl/headers/include/cuda/std/array +518 -0
  1124. cuda/cccl/headers/include/cuda/std/atomic +818 -0
  1125. cuda/cccl/headers/include/cuda/std/barrier +42 -0
  1126. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1127. cuda/cccl/headers/include/cuda/std/bitset +994 -0
  1128. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1129. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1130. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1131. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1132. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1133. cuda/cccl/headers/include/cuda/std/cmath +25 -0
  1134. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1135. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1136. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1137. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1138. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1139. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1140. cuda/cccl/headers/include/cuda/std/ctime +154 -0
  1141. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1142. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +235 -0
  1143. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1722 -0
  1144. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +3630 -0
  1145. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/cmath +520 -0
  1146. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1147. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1148. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
  1149. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2142 -0
  1150. cuda/cccl/headers/include/cuda/std/execution +29 -0
  1151. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1152. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1153. cuda/cccl/headers/include/cuda/std/initializer_list +36 -0
  1154. cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
  1155. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1156. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1157. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1158. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1159. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1160. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1161. cuda/cccl/headers/include/cuda/std/numbers +342 -0
  1162. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1163. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1164. cuda/cccl/headers/include/cuda/std/ranges +69 -0
  1165. cuda/cccl/headers/include/cuda/std/ratio +417 -0
  1166. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1167. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1168. cuda/cccl/headers/include/cuda/std/span +628 -0
  1169. cuda/cccl/headers/include/cuda/std/string_view +799 -0
  1170. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1171. cuda/cccl/headers/include/cuda/std/type_traits +177 -0
  1172. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1173. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1174. cuda/cccl/headers/include/cuda/std/version +245 -0
  1175. cuda/cccl/headers/include/cuda/stream +31 -0
  1176. cuda/cccl/headers/include/cuda/stream_ref +54 -0
  1177. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1178. cuda/cccl/headers/include/cuda/utility +27 -0
  1179. cuda/cccl/headers/include/cuda/version +16 -0
  1180. cuda/cccl/headers/include/cuda/warp +28 -0
  1181. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1182. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1183. cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
  1184. cuda/cccl/headers/include/nv/target +235 -0
  1185. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1186. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1187. cuda/cccl/headers/include/thrust/advance.h +59 -0
  1188. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1189. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1190. cuda/cccl/headers/include/thrust/complex.h +858 -0
  1191. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1192. cuda/cccl/headers/include/thrust/count.h +245 -0
  1193. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1194. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +37 -0
  1195. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1196. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +350 -0
  1197. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +371 -0
  1198. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +45 -0
  1199. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +242 -0
  1200. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +39 -0
  1201. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +137 -0
  1202. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +39 -0
  1203. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +99 -0
  1204. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +53 -0
  1205. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +68 -0
  1206. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1207. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +102 -0
  1208. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +86 -0
  1209. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +79 -0
  1210. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +79 -0
  1211. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +39 -0
  1212. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +98 -0
  1213. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1214. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1215. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1216. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1217. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1218. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1219. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1220. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1221. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1222. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1223. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1224. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1225. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1226. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1227. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1228. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1229. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1230. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1231. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1232. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1233. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1234. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1235. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1236. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1237. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1238. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1239. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1240. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1241. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1242. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1243. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1244. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1245. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1246. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1247. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1248. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1249. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1250. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1251. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1252. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1253. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1254. cuda/cccl/headers/include/thrust/detail/device_delete.inl +52 -0
  1255. cuda/cccl/headers/include/thrust/detail/device_free.inl +47 -0
  1256. cuda/cccl/headers/include/thrust/detail/device_new.inl +61 -0
  1257. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1258. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1259. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1260. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +80 -0
  1261. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1262. cuda/cccl/headers/include/thrust/detail/execution_policy.h +80 -0
  1263. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1264. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1265. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1266. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1267. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1268. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1269. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1270. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1271. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1272. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1273. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1274. cuda/cccl/headers/include/thrust/detail/integer_math.h +130 -0
  1275. cuda/cccl/headers/include/thrust/detail/internal_functional.h +293 -0
  1276. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1277. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
  1278. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1279. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1280. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +40 -0
  1281. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1282. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1283. cuda/cccl/headers/include/thrust/detail/mpl/math.h +164 -0
  1284. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +37 -0
  1285. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1286. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1287. cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
  1288. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1289. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1290. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1291. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1292. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1293. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
  1294. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1295. cuda/cccl/headers/include/thrust/detail/reference.h +500 -0
  1296. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1297. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1298. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1299. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1300. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1301. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1302. cuda/cccl/headers/include/thrust/detail/seq.h +54 -0
  1303. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1304. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1305. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1306. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1307. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1308. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1309. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1310. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1311. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1312. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
  1313. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1314. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1315. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1316. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
  1317. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1318. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1319. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1320. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1321. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1322. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1323. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1324. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
  1325. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_discard_iterator.h +44 -0
  1326. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1327. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1328. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1329. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1330. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1331. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1332. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1333. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1334. cuda/cccl/headers/include/thrust/detail/util/align.h +59 -0
  1335. cuda/cccl/headers/include/thrust/detail/vector_base.h +615 -0
  1336. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1212 -0
  1337. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1338. cuda/cccl/headers/include/thrust/device_delete.h +59 -0
  1339. cuda/cccl/headers/include/thrust/device_free.h +72 -0
  1340. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1341. cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
  1342. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1343. cuda/cccl/headers/include/thrust/device_new.h +91 -0
  1344. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1345. cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
  1346. cuda/cccl/headers/include/thrust/device_reference.h +983 -0
  1347. cuda/cccl/headers/include/thrust/device_vector.h +576 -0
  1348. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1349. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1350. cuda/cccl/headers/include/thrust/execution_policy.h +384 -0
  1351. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1352. cuda/cccl/headers/include/thrust/fill.h +200 -0
  1353. cuda/cccl/headers/include/thrust/find.h +382 -0
  1354. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1355. cuda/cccl/headers/include/thrust/functional.h +396 -0
  1356. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1357. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1358. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1359. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1360. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
  1361. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1362. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1363. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1364. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1365. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1366. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1367. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +51 -0
  1368. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +62 -0
  1369. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1370. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +199 -0
  1371. cuda/cccl/headers/include/thrust/iterator/detail/iterator_traversal_tags.h +50 -0
  1372. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +53 -0
  1373. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1374. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1375. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1376. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1377. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1378. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1379. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +215 -0
  1380. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +660 -0
  1381. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +311 -0
  1382. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
  1383. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1384. cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
  1385. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
  1386. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
  1387. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1388. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +149 -0
  1389. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1390. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1391. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1392. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
  1393. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1394. cuda/cccl/headers/include/thrust/memory.h +299 -0
  1395. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1396. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1397. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1398. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1399. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
  1400. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1401. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
  1402. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1403. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1404. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1405. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1406. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1407. cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
  1408. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1409. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1410. cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
  1411. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1412. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1413. cuda/cccl/headers/include/thrust/pair.h +99 -0
  1414. cuda/cccl/headers/include/thrust/partition.h +1391 -0
  1415. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1416. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1417. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1418. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1419. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1420. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1421. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1422. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1423. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1424. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1425. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1426. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1427. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
  1428. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1429. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +217 -0
  1430. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1431. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1432. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1433. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1434. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1435. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1436. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1437. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1438. cuda/cccl/headers/include/thrust/random.h +120 -0
  1439. cuda/cccl/headers/include/thrust/reduce.h +1113 -0
  1440. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1441. cuda/cccl/headers/include/thrust/replace.h +826 -0
  1442. cuda/cccl/headers/include/thrust/reverse.h +215 -0
  1443. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1444. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1445. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1446. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1447. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1448. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1449. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1450. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1451. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1452. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1453. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1454. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1455. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1456. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1457. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +90 -0
  1458. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1459. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1460. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1461. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1462. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1463. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1464. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1465. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1466. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1467. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1468. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1469. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1470. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1471. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1472. cuda/cccl/headers/include/thrust/system/cpp/detail/par.h +62 -0
  1473. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1474. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1475. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1476. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1477. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1478. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1479. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1480. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1481. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1482. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1483. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1484. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1485. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1486. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1487. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1488. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1489. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1490. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1491. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1492. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1493. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1494. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1495. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1496. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +158 -0
  1497. cuda/cccl/headers/include/thrust/system/cpp/memory.h +109 -0
  1498. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +75 -0
  1499. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +123 -0
  1500. cuda/cccl/headers/include/thrust/system/cpp/vector.h +99 -0
  1501. cuda/cccl/headers/include/thrust/system/cuda/config.h +123 -0
  1502. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1503. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1504. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1505. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1506. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +129 -0
  1507. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1508. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1509. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1510. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +609 -0
  1511. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1512. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1513. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +210 -0
  1514. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1515. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1516. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +113 -0
  1517. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1518. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +82 -0
  1519. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +272 -0
  1520. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1521. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1522. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +85 -0
  1523. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1524. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1525. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_cross_system.h +204 -0
  1526. cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_device_to_device.h +92 -0
  1527. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +69 -0
  1528. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1529. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1530. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1531. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1532. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1533. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +217 -0
  1534. cuda/cccl/headers/include/thrust/system/cuda/detail/par.h +237 -0
  1535. cuda/cccl/headers/include/thrust/system/cuda/detail/par_to_seq.h +95 -0
  1536. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1537. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1538. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1539. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +782 -0
  1540. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
  1541. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1542. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +152 -0
  1543. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
  1544. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +342 -0
  1545. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +415 -0
  1546. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +79 -0
  1547. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1548. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1738 -0
  1549. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +482 -0
  1550. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
  1551. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +75 -0
  1552. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1553. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1554. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +415 -0
  1555. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1556. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1557. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +114 -0
  1558. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +91 -0
  1559. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
  1560. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +311 -0
  1561. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +251 -0
  1562. cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
  1563. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +39 -0
  1564. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1565. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1566. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
  1567. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1568. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1569. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1570. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1571. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1572. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1573. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1574. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1575. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1576. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1577. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1578. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1579. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1580. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1581. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1582. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1583. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1584. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +64 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/errno.h +125 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +59 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +85 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +167 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +391 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +51 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +70 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +64 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +152 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +54 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +90 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +55 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +66 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +72 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +258 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +60 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +55 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +143 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +64 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +79 -0
  1635. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +102 -0
  1636. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +51 -0
  1637. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1638. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +66 -0
  1639. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +78 -0
  1640. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +65 -0
  1641. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +70 -0
  1642. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +92 -0
  1643. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +105 -0
  1644. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +154 -0
  1645. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +55 -0
  1646. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +74 -0
  1647. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +135 -0
  1648. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +213 -0
  1649. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +49 -0
  1650. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +77 -0
  1651. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +106 -0
  1652. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +89 -0
  1653. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +192 -0
  1654. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +92 -0
  1655. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +127 -0
  1656. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +101 -0
  1657. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +181 -0
  1658. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +54 -0
  1659. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +73 -0
  1660. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +78 -0
  1661. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +141 -0
  1662. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +78 -0
  1663. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +91 -0
  1664. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +132 -0
  1665. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +238 -0
  1666. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +79 -0
  1667. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +91 -0
  1668. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +96 -0
  1669. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1670. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +288 -0
  1671. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +482 -0
  1672. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +60 -0
  1673. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +131 -0
  1674. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +119 -0
  1675. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +181 -0
  1676. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +50 -0
  1677. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +82 -0
  1678. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +47 -0
  1679. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +60 -0
  1680. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +53 -0
  1681. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +60 -0
  1682. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +88 -0
  1683. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
  1684. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +56 -0
  1685. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +62 -0
  1686. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +86 -0
  1687. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +119 -0
  1688. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +51 -0
  1689. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +172 -0
  1690. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +51 -0
  1691. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +121 -0
  1692. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +77 -0
  1693. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +119 -0
  1694. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +87 -0
  1695. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +132 -0
  1696. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +123 -0
  1697. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +76 -0
  1698. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +48 -0
  1699. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +142 -0
  1700. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +55 -0
  1701. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +125 -0
  1702. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +55 -0
  1703. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +77 -0
  1704. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1705. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1706. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +78 -0
  1707. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +116 -0
  1708. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1709. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +68 -0
  1710. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +80 -0
  1711. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1712. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +129 -0
  1713. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1714. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +49 -0
  1715. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1716. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +147 -0
  1717. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +51 -0
  1718. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1719. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +56 -0
  1720. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +81 -0
  1721. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +151 -0
  1722. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1723. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +309 -0
  1724. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1725. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +70 -0
  1726. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +104 -0
  1727. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +185 -0
  1728. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1729. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1730. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +160 -0
  1731. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +151 -0
  1732. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1733. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1734. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +212 -0
  1735. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +65 -0
  1736. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
  1737. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +61 -0
  1738. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +362 -0
  1739. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +54 -0
  1740. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +130 -0
  1741. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +54 -0
  1742. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +592 -0
  1743. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1744. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1745. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1746. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1747. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1748. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1749. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +64 -0
  1750. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1751. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1752. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +121 -0
  1753. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +112 -0
  1754. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1755. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1756. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1757. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1758. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1759. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1760. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1761. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1762. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1763. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1764. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1765. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1766. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1767. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +113 -0
  1768. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1769. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1770. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1771. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1772. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1773. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1774. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1775. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1776. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1777. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1778. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1779. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1780. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1781. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1782. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1783. cuda/cccl/headers/include/thrust/system/omp/detail/par.h +62 -0
  1784. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1785. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1786. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1787. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1788. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1789. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1790. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1791. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1792. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1793. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1794. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1795. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1796. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1797. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1798. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +30 -0
  1799. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +30 -0
  1800. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1801. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1802. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1803. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1804. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
  1805. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1806. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1807. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1808. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1809. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1810. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1811. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1812. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1813. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1814. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1815. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1816. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1817. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +157 -0
  1818. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1819. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1820. cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
  1821. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1822. cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
  1823. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1824. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1825. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1826. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1827. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1828. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1829. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1830. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1831. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1832. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +92 -0
  1833. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1834. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1835. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1836. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1837. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1838. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1839. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1840. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1841. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1842. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1843. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1844. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1845. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1846. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1847. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1848. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1849. cuda/cccl/headers/include/thrust/system/tbb/detail/par.h +62 -0
  1850. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1851. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1852. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1853. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1854. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1855. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1856. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1857. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1858. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1859. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1860. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1861. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1862. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1863. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1864. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +30 -0
  1865. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1866. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1867. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1868. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1869. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1870. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1871. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1872. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1873. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1874. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1875. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1876. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1877. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1878. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1879. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1880. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1881. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1882. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +157 -0
  1883. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1884. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1885. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
  1886. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1887. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1888. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1889. cuda/cccl/headers/include/thrust/transform.h +1045 -0
  1890. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1891. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1892. cuda/cccl/headers/include/thrust/tuple.h +139 -0
  1893. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1894. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
  1895. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1896. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1897. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1898. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
  1899. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1900. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +96 -0
  1901. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1902. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1903. cuda/cccl/headers/include/thrust/unique.h +1088 -0
  1904. cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
  1905. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1906. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1907. cuda/cccl/headers/include/thrust/version.h +93 -0
  1908. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1909. cuda/cccl/headers/include_paths.py +54 -0
  1910. cuda/cccl/parallel/__init__.py +9 -0
  1911. cuda/cccl/parallel/experimental/.gitignore +4 -0
  1912. cuda/cccl/parallel/experimental/__init__.py +75 -0
  1913. cuda/cccl/parallel/experimental/_bindings.py +56 -0
  1914. cuda/cccl/parallel/experimental/_bindings.pyi +405 -0
  1915. cuda/cccl/parallel/experimental/_bindings_impl.pyx +1957 -0
  1916. cuda/cccl/parallel/experimental/_caching.py +71 -0
  1917. cuda/cccl/parallel/experimental/_cccl_interop.py +396 -0
  1918. cuda/cccl/parallel/experimental/_utils/__init__.py +0 -0
  1919. cuda/cccl/parallel/experimental/_utils/protocols.py +132 -0
  1920. cuda/cccl/parallel/experimental/_utils/temp_storage_buffer.py +86 -0
  1921. cuda/cccl/parallel/experimental/algorithms/__init__.py +50 -0
  1922. cuda/cccl/parallel/experimental/algorithms/_histogram.py +243 -0
  1923. cuda/cccl/parallel/experimental/algorithms/_merge_sort.py +225 -0
  1924. cuda/cccl/parallel/experimental/algorithms/_radix_sort.py +312 -0
  1925. cuda/cccl/parallel/experimental/algorithms/_reduce.py +184 -0
  1926. cuda/cccl/parallel/experimental/algorithms/_scan.py +261 -0
  1927. cuda/cccl/parallel/experimental/algorithms/_segmented_reduce.py +257 -0
  1928. cuda/cccl/parallel/experimental/algorithms/_transform.py +308 -0
  1929. cuda/cccl/parallel/experimental/algorithms/_unique_by_key.py +252 -0
  1930. cuda/cccl/parallel/experimental/cccl/.gitkeep +0 -0
  1931. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-310-x86_64-linux-gnu.so +0 -0
  1932. cuda/cccl/parallel/experimental/cu12/cccl/libcccl.c.parallel.so +0 -0
  1933. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-310-x86_64-linux-gnu.so +0 -0
  1934. cuda/cccl/parallel/experimental/cu13/cccl/libcccl.c.parallel.so +0 -0
  1935. cuda/cccl/parallel/experimental/iterators/__init__.py +21 -0
  1936. cuda/cccl/parallel/experimental/iterators/_factories.py +214 -0
  1937. cuda/cccl/parallel/experimental/iterators/_iterators.py +627 -0
  1938. cuda/cccl/parallel/experimental/iterators/_zip_iterator.py +207 -0
  1939. cuda/cccl/parallel/experimental/numba_utils.py +6 -0
  1940. cuda/cccl/parallel/experimental/op.py +3 -0
  1941. cuda/cccl/parallel/experimental/struct.py +272 -0
  1942. cuda/cccl/parallel/experimental/typing.py +35 -0
  1943. cuda/cccl/py.typed +0 -0
  1944. cuda_cccl-0.1.3.2.0.dev271.dist-info/METADATA +40 -0
  1945. cuda_cccl-0.1.3.2.0.dev271.dist-info/RECORD +1947 -0
  1946. cuda_cccl-0.1.3.2.0.dev271.dist-info/WHEEL +5 -0
  1947. cuda_cccl-0.1.3.2.0.dev271.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,1957 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+
5
+ # Python signatures are declared in the companion Python stub file _bindings.pyi
6
+ # Make sure to update PYI with change to Python API to ensure that Python
7
+ # static type checker tools like mypy green-lights cuda.cccl.parallel
8
+
9
+ from libc.string cimport memset, memcpy
10
+ from libc.stdint cimport uint8_t, uint32_t, uint64_t, int64_t, uintptr_t
11
+ from cpython.bytes cimport PyBytes_FromStringAndSize
12
+
13
+ from cpython.buffer cimport (
14
+ Py_buffer, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS,
15
+ PyBuffer_Release, PyObject_CheckBuffer, PyObject_GetBuffer
16
+ )
17
+ from cpython.pycapsule cimport (
18
+ PyCapsule_CheckExact, PyCapsule_IsValid, PyCapsule_GetPointer
19
+ )
20
+
21
+ import ctypes
22
+ from enum import IntEnum
23
+ cdef extern from "<cuda.h>":
24
+ cdef struct OpaqueCUstream_st
25
+ cdef struct OpaqueCUkernel_st
26
+ cdef struct OpaqueCUlibrary_st
27
+
28
+ ctypedef int CUresult
29
+ ctypedef OpaqueCUstream_st *CUstream
30
+ ctypedef OpaqueCUkernel_st *CUkernel
31
+ ctypedef OpaqueCUlibrary_st *CUlibrary
32
+
33
+
34
+ cdef extern from "cccl/c/types.h":
35
+ cpdef enum cccl_type_enum:
36
+ INT8 "CCCL_INT8"
37
+ INT16 "CCCL_INT16"
38
+ INT32 "CCCL_INT32"
39
+ INT64 "CCCL_INT64"
40
+ UINT8 "CCCL_UINT8"
41
+ UINT16 "CCCL_UINT16"
42
+ UINT32 "CCCL_UINT32"
43
+ UINT64 "CCCL_UINT64"
44
+ FLOAT16 "CCCL_FLOAT16"
45
+ FLOAT32 "CCCL_FLOAT32"
46
+ FLOAT64 "CCCL_FLOAT64"
47
+ STORAGE "CCCL_STORAGE"
48
+ BOOLEAN "CCCL_BOOLEAN"
49
+
50
+ cpdef enum cccl_op_kind_t:
51
+ STATELESS "CCCL_STATELESS"
52
+ STATEFUL "CCCL_STATEFUL"
53
+ PLUS "CCCL_PLUS"
54
+ MINUS "CCCL_MINUS"
55
+ MULTIPLIES "CCCL_MULTIPLIES"
56
+ DIVIDES "CCCL_DIVIDES"
57
+ MODULUS "CCCL_MODULUS"
58
+ EQUAL_TO "CCCL_EQUAL_TO"
59
+ NOT_EQUAL_TO "CCCL_NOT_EQUAL_TO"
60
+ GREATER "CCCL_GREATER"
61
+ LESS "CCCL_LESS"
62
+ GREATER_EQUAL "CCCL_GREATER_EQUAL"
63
+ LESS_EQUAL "CCCL_LESS_EQUAL"
64
+ LOGICAL_AND "CCCL_LOGICAL_AND"
65
+ LOGICAL_OR "CCCL_LOGICAL_OR"
66
+ LOGICAL_NOT "CCCL_LOGICAL_NOT"
67
+ BIT_AND "CCCL_BIT_AND"
68
+ BIT_OR "CCCL_BIT_OR"
69
+ BIT_XOR "CCCL_BIT_XOR"
70
+ BIT_NOT "CCCL_BIT_NOT"
71
+ IDENTITY "CCCL_IDENTITY"
72
+ NEGATE "CCCL_NEGATE"
73
+ MINIMUM "CCCL_MINIMUM"
74
+ MAXIMUM "CCCL_MAXIMUM"
75
+
76
+ cpdef enum cccl_iterator_kind_t:
77
+ POINTER "CCCL_POINTER"
78
+ ITERATOR "CCCL_ITERATOR"
79
+
80
+ cdef struct cccl_type_info:
81
+ size_t size
82
+ size_t alignment
83
+ cccl_type_enum type
84
+
85
+ cdef enum cccl_op_code_type:
86
+ CCCL_OP_LTOIR
87
+ CCCL_OP_CPP_SOURCE
88
+
89
+ cdef struct cccl_op_t:
90
+ cccl_op_kind_t type
91
+ const char* name
92
+ const char* code
93
+ size_t code_size
94
+ cccl_op_code_type code_type
95
+ size_t size
96
+ size_t alignment
97
+ void *state
98
+
99
+ cdef struct cccl_value_t:
100
+ cccl_type_info type
101
+ void *state
102
+
103
+ cdef union cccl_increment_t:
104
+ int64_t signed_offset
105
+ uint64_t unsigned_offset
106
+
107
+ ctypedef void (*cccl_host_op_fn_ptr_t)(void *, cccl_increment_t) nogil
108
+
109
+ cdef struct cccl_iterator_t:
110
+ size_t size
111
+ size_t alignment
112
+ cccl_iterator_kind_t type
113
+ cccl_op_t advance
114
+ cccl_op_t dereference
115
+ cccl_type_info value_type
116
+ void *state
117
+ cccl_host_op_fn_ptr_t host_advance
118
+
119
+ cpdef enum cccl_sort_order_t:
120
+ ASCENDING "CCCL_ASCENDING"
121
+ DESCENDING "CCCL_DESCENDING"
122
+
123
+
124
+ cdef void arg_type_check(
125
+ str arg_name,
126
+ object expected_type,
127
+ object arg
128
+ ) except *:
129
+ if not isinstance(arg, expected_type):
130
+ raise TypeError(
131
+ f"Expected {arg_name} to have type '{expected_type}', "
132
+ f"got '{type(arg)}'"
133
+ )
134
+
135
+ OpKind = cccl_op_kind_t
136
+ TypeEnum = cccl_type_enum
137
+ IteratorKind = cccl_iterator_kind_t
138
+ SortOrder = cccl_sort_order_t
139
+
140
+ cdef void _validate_alignment(int alignment) except *:
141
+ """
142
+ Alignment must be positive integer and a power of two
143
+ that can be represented by uint32_t type.
144
+ """
145
+ cdef uint32_t val
146
+ if alignment < 1:
147
+ raise ValueError(
148
+ "Alignment must be non-negative, "
149
+ f"got {alignment}."
150
+ )
151
+ val = <uint32_t>alignment
152
+ if (val & (val - 1)) != 0:
153
+ raise ValueError(
154
+ "Alignment must be a power of two, "
155
+ f"got {alignment}"
156
+ )
157
+
158
+
159
+ cdef class Op:
160
+ """
161
+ Represents CCCL Operation
162
+
163
+ Args:
164
+ name (str):
165
+ Name of the operation
166
+ operator_type (OpKind):
167
+ Whether operator is stateless or stateful
168
+ ltoir (bytes):
169
+ The LTOIR for the operation compiled for device
170
+ state (bytes, optional):
171
+ State for the stateful operation.
172
+ state_alignment (int, optional):
173
+ Alignment of the state struct. Default: `1`.
174
+ """
175
+ # need Python owner of memory used for operator name
176
+ cdef bytes op_encoded_name
177
+ cdef bytes code_bytes
178
+ cdef bytes state_bytes
179
+ cdef cccl_op_t op_data
180
+
181
+
182
+ cdef void _set_members(self, cccl_op_kind_t op_type, str name, bytes lto_ir, bytes state, int state_alignment):
183
+ memset(&self.op_data, 0, sizeof(cccl_op_t))
184
+ # Reference Python objects in the class to ensure lifetime
185
+ self.op_encoded_name = name.encode("utf-8")
186
+ self.code_bytes = lto_ir
187
+ self.state_bytes = state
188
+ # set fields of op_data struct
189
+ self.op_data.type = op_type
190
+ self.op_data.name = <const char *>self.op_encoded_name
191
+ self.op_data.code = <const char *>lto_ir
192
+ self.op_data.code_size = len(lto_ir)
193
+ self.op_data.code_type = cccl_op_code_type.CCCL_OP_LTOIR
194
+ self.op_data.size = len(state)
195
+ self.op_data.alignment = state_alignment
196
+ self.op_data.state = <void *><const char *>state
197
+
198
+
199
+ def __cinit__(self, /, *, name = None, operator_type = None, ltoir = None, state = None, state_alignment = 1):
200
+ if name is None and ltoir is None:
201
+ name = ""
202
+ ltoir = b""
203
+ if state is None:
204
+ state = b""
205
+ if operator_type is None:
206
+ operator_type = OpKind.STATELESS
207
+ arg_type_check(arg_name="name", expected_type=str, arg=name)
208
+ arg_type_check(arg_name="ltoir", expected_type=bytes, arg=ltoir)
209
+ arg_type_check(arg_name="state", expected_type=bytes, arg=state)
210
+ arg_type_check(arg_name="state_alignment", expected_type=int, arg=state_alignment)
211
+ if not isinstance(operator_type, OpKind):
212
+ raise TypeError(
213
+ f"The operator_type argument should be an enumerator of operator kinds"
214
+ )
215
+ _validate_alignment(state_alignment)
216
+ self._set_members(
217
+ <cccl_op_kind_t> operator_type.value,
218
+ <str> name,
219
+ <bytes> ltoir,
220
+ <bytes> state,
221
+ <int> state_alignment
222
+ )
223
+
224
+
225
+ cdef void set_state(self, bytes state):
226
+ self.state_bytes = state
227
+ self.op_data.state = <void *><const char *>state
228
+
229
+ @property
230
+ def state(self):
231
+ return self.state_bytes
232
+
233
+ @state.setter
234
+ def state(self, bytes new_value):
235
+ self.set_state(<bytes>new_value)
236
+
237
+ @property
238
+ def name(self):
239
+ return self.op_encoded_name.decode("utf-8")
240
+
241
+ @property
242
+ def ltoir(self):
243
+ # Backward compatibility property
244
+ return self.code_bytes
245
+
246
+ @property
247
+ def code(self):
248
+ return self.code_bytes
249
+
250
+ @property
251
+ def state_alignment(self):
252
+ return self.op_data.alignment
253
+
254
+ @property
255
+ def state_typenum(self):
256
+ return self.op_data.type
257
+
258
+ def as_bytes(self):
259
+ "Debugging utility to view memory content of library struct"
260
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.op_data))
261
+ memcpy(&mem_view[0], &self.op_data, sizeof(self.op_data))
262
+ return bytes(mem_view)
263
+
264
+
265
+ cdef class TypeInfo:
266
+ """
267
+ Represents CCCL type info structure
268
+
269
+ Args:
270
+ size (int):
271
+ Size of the type in bytes.
272
+ alignment (int):
273
+ Alignment of the type in bytes.
274
+ type_enum (TypeEnum):
275
+ Enumeration member identifying the type.
276
+ """
277
+ cdef cccl_type_info type_info
278
+
279
+ def __cinit__(self, int size, int alignment, cccl_type_enum type_enum):
280
+ if size < 1:
281
+ raise ValueError("Size argument must be positive")
282
+ _validate_alignment(alignment)
283
+ self.type_info.size = size
284
+ self.type_info.alignment = alignment
285
+ self.type_info.type = type_enum
286
+
287
+ @property
288
+ def size(self):
289
+ return self.type_info.size
290
+
291
+ @property
292
+ def alignment(self):
293
+ return self.type_info.alignment
294
+
295
+ @property
296
+ def typenum(self):
297
+ return self.type_info.type
298
+
299
+ def as_bytes(self):
300
+ "Debugging utility to view memory content of library struct"
301
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.type_info))
302
+ memcpy(&mem_view[0], &self.type_info, sizeof(self.type_info))
303
+ return bytes(mem_view)
304
+
305
+
306
+ cdef class Value:
307
+ """
308
+ Represents CCCL value structure
309
+
310
+ Args:
311
+ value_type (TypeInfo):
312
+ type descriptor
313
+ state (object):
314
+ state of the value type. Object is expected to
315
+ implement Python buffer protocol and be able to provide
316
+ simple contiguous array of type `uint8_t`.
317
+ """
318
+ cdef uint8_t[::1] state_obj
319
+ cdef TypeInfo value_type
320
+ cdef cccl_value_t value_data;
321
+
322
+ def __cinit__(self, TypeInfo value_type, uint8_t[::1] state):
323
+ self.state_obj = state
324
+ self.value_type = value_type
325
+ self.value_data.type = value_type.type_info
326
+ self.value_data.state = <void *>&state[0]
327
+
328
+ @property
329
+ def type(self):
330
+ return self.value_type
331
+
332
+ @property
333
+ def state(self):
334
+ return self.state_obj
335
+
336
+ @state.setter
337
+ def state(self, uint8_t[::1] new_value):
338
+ if (len(self.state_obj) == len(new_value)):
339
+ self.state_obj = new_value
340
+ self.value_data.state = <void *>&self.state_obj[0]
341
+ else:
342
+ raise ValueError("Size mismatch")
343
+
344
+ def as_bytes(self):
345
+ "Debugging utility to view memory of native struct"
346
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.value_data))
347
+ memcpy(&mem_view[0], &self.value_data, sizeof(self.value_data))
348
+ return bytes(mem_view)
349
+
350
+
351
+ cdef void ensure_buffer(object o) except *:
352
+ if not PyObject_CheckBuffer(o):
353
+ raise TypeError(
354
+ "Object with buffer protocol expected, "
355
+ f"got {type(o)}"
356
+ )
357
+
358
+
359
+ cdef void * get_buffer_pointer(object o, size_t *size):
360
+ cdef int status = 0
361
+ cdef void *ptr = NULL
362
+ cdef Py_buffer view
363
+
364
+ status = PyObject_GetBuffer(o, &view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
365
+ if status != 0: # pragma: no cover
366
+ size[0] = 0
367
+ raise RuntimeError(
368
+ "Can not access simple contiguous buffer"
369
+ )
370
+
371
+ ptr = view.buf
372
+ if size is not NULL:
373
+ size[0] = <size_t>view.len
374
+ PyBuffer_Release(&view)
375
+
376
+ return ptr
377
+
378
+
379
+ cdef void * ctypes_typed_pointer_payload_ptr(object ctypes_typed_ptr):
380
+ "Get pointer to the value buffer represented by ctypes.pointer(ctypes_val)"
381
+ cdef size_t size = 0
382
+ cdef size_t *ptr_ref = NULL
383
+ ensure_buffer(ctypes_typed_ptr)
384
+ ptr_ref = <size_t *>get_buffer_pointer(ctypes_typed_ptr, &size)
385
+ return <void *>(ptr_ref[0])
386
+
387
+
388
+ cdef void * ctypes_value_ptr(object ctypes_cdata):
389
+ "Get pointer to the value buffer behind ctypes_val"
390
+ cdef size_t size = 0
391
+ ensure_buffer(ctypes_cdata)
392
+ return get_buffer_pointer(ctypes_cdata, &size)
393
+
394
+
395
+ cdef inline void * int_as_ptr(size_t ptr_val):
396
+ return <void *>(ptr_val)
397
+
398
+
399
+ cdef class StateBase:
400
+ cdef void *ptr
401
+ cdef object ref
402
+
403
+ def __cinit__(self):
404
+ self.ptr = NULL
405
+ self.ref = None
406
+
407
+ cdef inline void set_state(self, void *ptr, object ref):
408
+ self.ptr = ptr
409
+ self.ref = ref
410
+
411
+ @property
412
+ def pointer(self):
413
+ return <size_t>self.ptr
414
+
415
+ @property
416
+ def reference(self):
417
+ return self.ref
418
+
419
+
420
+ cdef class Pointer(StateBase):
421
+ "Represents the pointer value"
422
+
423
+ def __cinit__(self, arg):
424
+ cdef void *ptr
425
+ cdef object ref
426
+
427
+ if isinstance(arg, int):
428
+ ptr = int_as_ptr(arg)
429
+ ref = None
430
+ elif isinstance(arg, ctypes._Pointer):
431
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
432
+ ref = arg
433
+ elif isinstance(arg, ctypes.c_void_p):
434
+ ptr = int_as_ptr(arg.value)
435
+ ref = arg
436
+ else:
437
+ raise TypeError(
438
+ "Expect ctypes pointer, integers, or PointerProxy, "
439
+ f"got type {type(arg)}"
440
+ )
441
+ self.set_state(ptr, ref)
442
+
443
+
444
+ def make_pointer_object(ptr, owner):
445
+ cdef Pointer res = Pointer(0)
446
+
447
+ if isinstance(ptr, int):
448
+ res.ptr = int_as_ptr(ptr)
449
+ elif isinstance(ptr, ctypes.c_void_p):
450
+ res.ptr = int_as_ptr(ptr.value)
451
+ else:
452
+ raise TypeError(
453
+ "First argument must be an integer, or ctypes.c_void_p, "
454
+ f"got {type(ptr)}"
455
+ )
456
+ res.ref = owner
457
+ return res
458
+
459
+
460
+ cdef class IteratorState(StateBase):
461
+ "Represents blob referenced by pointer"
462
+ cdef size_t state_nbytes
463
+
464
+ def __cinit__(self, arg):
465
+ cdef size_t buffer_size = 0
466
+ cdef void *ptr = NULL
467
+ cdef object ref = None
468
+
469
+ super().__init__()
470
+ if isinstance(arg, ctypes._Pointer):
471
+ ptr = ctypes_typed_pointer_payload_ptr(arg)
472
+ ref = arg.contents
473
+ self.state_nbytes = ctypes.sizeof(ref)
474
+ elif PyObject_CheckBuffer(arg):
475
+ ptr = get_buffer_pointer(arg, &buffer_size)
476
+ ref = arg
477
+ self.state_nbytes = buffer_size
478
+ else:
479
+ raise TypeError(
480
+ "Expected a ctypes pointer with content, or object of type bytes or bytearray, "
481
+ f"got type {type(arg)}"
482
+ )
483
+ self.set_state(ptr, ref)
484
+
485
+ cdef inline size_t get_size(self):
486
+ return self.state_nbytes
487
+
488
+ @property
489
+ def size(self):
490
+ return self.state_nbytes
491
+
492
+ def __getbuffer__(self, Py_buffer *buffer, int flags):
493
+ cdef Py_ssize_t cast_size = <Py_ssize_t>self.state_nbytes
494
+ buffer.buf = <char *>self.ptr
495
+ buffer.obj = self
496
+ buffer.len = cast_size
497
+ buffer.readonly = 0
498
+ buffer.itemsize = 1
499
+ buffer.format = "B" # unsigned char
500
+ buffer.ndim = 1
501
+ buffer.shape = <Py_ssize_t *>&self.state_nbytes
502
+ buffer.strides = &buffer.itemsize
503
+ buffer.suboffsets = NULL
504
+ buffer.internal = NULL
505
+
506
+ def __releasebuffer__(self, Py_buffer *buffer):
507
+ pass
508
+
509
+
510
+ cdef const char *function_ptr_capsule_name = "void (void *, cccl_increment_t)";
511
+
512
+ cdef bint is_function_pointer_capsule(object o) noexcept:
513
+ """
514
+ Returns non-zero if input is a valid capsule with
515
+ name 'void (void *, cccl_increment_t)'.
516
+ """
517
+ return (
518
+ PyCapsule_CheckExact(o) and
519
+ PyCapsule_IsValid(o, function_ptr_capsule_name)
520
+ )
521
+
522
+
523
+ cdef inline void* get_function_pointer_from_capsule(object cap) except *:
524
+ return PyCapsule_GetPointer(cap, function_ptr_capsule_name)
525
+
526
+
527
+ cdef cccl_host_op_fn_ptr_t unbox_host_advance_fn(object host_fn_obj) except *:
528
+ cdef void *fn_ptr = NULL
529
+ if isinstance(host_fn_obj, ctypes._CFuncPtr):
530
+ # the _CFuncPtr object encapsulates a pointer to the function pointer
531
+ fn_ptr = ctypes_typed_pointer_payload_ptr(host_fn_obj)
532
+ return <cccl_host_op_fn_ptr_t>fn_ptr
533
+
534
+ if isinstance(host_fn_obj, int):
535
+ fn_ptr = <void *><uintptr_t>host_fn_obj
536
+ return <cccl_host_op_fn_ptr_t>fn_ptr
537
+
538
+ if isinstance(host_fn_obj, ctypes.c_void_p):
539
+ fn_ptr = <void *><uintptr_t>host_fn_obj.value
540
+ return <cccl_host_op_fn_ptr_t>fn_ptr
541
+
542
+ if is_function_pointer_capsule(host_fn_obj):
543
+ fn_ptr = get_function_pointer_from_capsule(host_fn_obj)
544
+ return <cccl_host_op_fn_ptr_t>fn_ptr
545
+
546
+ raise TypeError(
547
+ "Expected ctypes function pointer, ctypes.c_void_p, integer or a named capsule, "
548
+ f"got {type(host_fn_obj)}"
549
+ )
550
+
551
+
552
+ cdef class Iterator:
553
+ """
554
+ Represents CCCL iterator.
555
+
556
+ Args:
557
+ alignment (int):
558
+ Alignment of the iterator state
559
+ iterator_type (IteratorKind):
560
+ The type of iterator, `IteratorKind.POINTER` or
561
+ `IteratorKind.ITERATOR`
562
+ advance_fn (Op):
563
+ Descriptor for user-defined `advance` function
564
+ compiled for device
565
+ dereference_fn (Op):
566
+ Descriptor for user-defined `dereference` or `assign`
567
+ function compiled for device
568
+ value_type (TypeInfo):
569
+ Descriptor of the type addressed by the iterator
570
+ state (object, optional):
571
+ Python object for the state of the iterator. For iterators of
572
+ type `ITERATOR` the state object is expected to implement Python
573
+ buffer protocol for SIMPLE 1d buffer of type unsigned byte.
574
+ For iterators of type `POINTER` the state may be an integer convertible
575
+ to `uintptr_t`, or a `ctypes` pointer (typed or untyped).
576
+ Value `None` represents absence of iterator state.
577
+ host_advance_fn (object, optional):
578
+ Python object for host callable function to advance state by a given
579
+ increment. The argument may only be set for iterators of type
580
+ `IteratorKind.ITERATOR` and raise an exception otherwise. Supported
581
+ types are `int` or `ctypes.c_void_p` (raw pointer), ctypes function
582
+ pointer, or a Python capsule with name `"void *(void *, cccl_increment_t)"`.
583
+ """
584
+ cdef Op advance
585
+ cdef Op dereference
586
+ cdef object state_obj
587
+ cdef object host_advance_obj
588
+ cdef cccl_iterator_t iter_data
589
+
590
+ def __cinit__(self,
591
+ int alignment,
592
+ cccl_iterator_kind_t iterator_type,
593
+ Op advance_fn,
594
+ Op dereference_fn,
595
+ TypeInfo value_type,
596
+ state=None,
597
+ host_advance_fn=None
598
+ ):
599
+ cdef cccl_iterator_kind_t it_kind
600
+ _validate_alignment(alignment)
601
+ it_kind = iterator_type
602
+ if it_kind == cccl_iterator_kind_t.POINTER:
603
+ if state is None:
604
+ self.state_obj = None
605
+ self.iter_data.size = 0
606
+ self.iter_data.state = NULL
607
+ elif isinstance(state, int):
608
+ self.state_obj = None
609
+ self.iter_data.size = 0
610
+ self.iter_data.state = int_as_ptr(state)
611
+ elif isinstance(state, Pointer):
612
+ self.state_obj = state.reference
613
+ self.iter_data.size = 0
614
+ self.iter_data.state = (<Pointer>state).ptr
615
+ else:
616
+ raise TypeError(
617
+ "Expect for Iterator of kind POINTER, state must have type Pointer or int, "
618
+ f"got {type(state)}"
619
+ )
620
+ if host_advance_fn is not None:
621
+ raise ValueError(
622
+ "host_advance_fn must be set to None for iterators of kind POINTER"
623
+ )
624
+ self.iter_data.host_advance = NULL
625
+ self.host_advance_obj = None
626
+ elif it_kind == cccl_iterator_kind_t.ITERATOR:
627
+ if state is None:
628
+ self.state_obj = None
629
+ self.iter_data.size = 0
630
+ self.iter_data.state = NULL
631
+ elif isinstance(state, IteratorState):
632
+ self.state_obj = state.reference
633
+ self.iter_data.size = (<IteratorState>state).size
634
+ self.iter_data.state = (<IteratorState>state).ptr
635
+ else:
636
+ raise TypeError(
637
+ "For Iterator of kind ITERATOR, state must have type IteratorState, "
638
+ f"got type {type(state)}"
639
+ )
640
+ if host_advance_fn is not None:
641
+ self.iter_data.host_advance = unbox_host_advance_fn(host_advance_fn)
642
+ self.host_advance_obj = host_advance_fn
643
+ else:
644
+ self.iter_data.host_advance = NULL
645
+ self.host_advance_obj = None
646
+ else: # pragma: no cover
647
+ raise ValueError("Unrecognized iterator kind")
648
+ self.advance = advance_fn
649
+ self.dereference = dereference_fn
650
+ self.iter_data.alignment = alignment
651
+ self.iter_data.type = <cccl_iterator_kind_t> it_kind
652
+ self.iter_data.advance = self.advance.op_data
653
+ self.iter_data.dereference = self.dereference.op_data
654
+ self.iter_data.value_type = value_type.type_info
655
+
656
+ @property
657
+ def advance_op(self):
658
+ return self.advance
659
+
660
+ @property
661
+ def dereference_or_assign_op(self):
662
+ return self.dereference
663
+
664
+ @property
665
+ def state(self):
666
+ if self.iter_data.type == cccl_iterator_kind_t.POINTER:
667
+ return <size_t>self.iter_data.state
668
+ else:
669
+ return self.state_obj
670
+
671
+ @state.setter
672
+ def state(self, new_value):
673
+ cdef ssize_t state_sz = 0
674
+ cdef size_t ptr = 0
675
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
676
+ if it_kind == cccl_iterator_kind_t.POINTER:
677
+ if isinstance(new_value, Pointer):
678
+ self.state_obj = (<Pointer>new_value).ref
679
+ self.iter_data.size = state_sz
680
+ self.iter_data.state = (<Pointer>new_value).ptr
681
+ elif isinstance(new_value, int):
682
+ self.state_obj = None
683
+ self.iter_data.size = state_sz
684
+ self.iter_data.state = int_as_ptr(new_value)
685
+ elif new_value is None:
686
+ self.state_obj = None
687
+ self.iter_data.size = 0
688
+ self.iter_data.state = NULL
689
+ else:
690
+ raise TypeError(
691
+ "For iterator with type POINTER, state value must have type int or type Pointer, "
692
+ f"got type {type(new_value)}"
693
+ )
694
+ elif it_kind == cccl_iterator_kind_t.ITERATOR:
695
+ if isinstance(new_value, IteratorState):
696
+ self.state_obj = new_value.reference
697
+ self.iter_data.size = (<IteratorState>new_value).size
698
+ self.iter_data.state = (<IteratorState>new_value).ptr
699
+ elif isinstance(new_value, Pointer):
700
+ self.state_obj = new_value.reference
701
+ if self.iter_data.size == 0:
702
+ raise ValueError("Assigning incomplete state value to iterator without state size information")
703
+ self.iter_data.state = (<Pointer>new_value).ptr
704
+ elif PyObject_CheckBuffer(new_value):
705
+ self.iter_data.state = get_buffer_pointer(new_value, &self.iter_data.size)
706
+ self.state_obj = new_value
707
+ elif new_value is None:
708
+ self.state_obj = None
709
+ self.iter_data.size = 0
710
+ self.iter_data.state = NULL
711
+ else:
712
+ raise TypeError(
713
+ "For iterator with type ITERATOR, state value must have type IteratorState or type bytes, "
714
+ f"got type {type(new_value)}"
715
+ )
716
+ else:
717
+ raise TypeError("The new value should be an integer for iterators of POINTER kind, and bytes for ITERATOR kind")
718
+
719
+ @property
720
+ def type(self):
721
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
722
+ if it_kind == cccl_iterator_kind_t.POINTER:
723
+ return IteratorKind.POINTER
724
+ else:
725
+ return IteratorKind.ITERATOR
726
+
727
+ def is_kind_pointer(self):
728
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
729
+ return (it_kind == cccl_iterator_kind_t.POINTER)
730
+
731
+ def is_kind_iterator(self):
732
+ cdef cccl_iterator_kind_t it_kind = self.iter_data.type
733
+ return (it_kind == cccl_iterator_kind_t.ITERATOR)
734
+
735
+ def as_bytes(self):
736
+ "Debugging ulitity to get memory view into library struct"
737
+ cdef uint8_t[:] mem_view = bytearray(sizeof(self.iter_data))
738
+ memcpy(&mem_view[0], &self.iter_data, sizeof(self.iter_data))
739
+ return bytes(mem_view)
740
+
741
+ @property
742
+ def host_advance_fn(self):
743
+ return self.host_advance_obj
744
+
745
+ @host_advance_fn.setter
746
+ def host_advance_fn(self, func):
747
+ if (self.iter_data.type == cccl_iterator_kind_t.ITERATOR):
748
+ if func is not None:
749
+ self.iter_data.host_advance = unbox_host_advance_fn(func)
750
+ self.host_advance_obj = func
751
+ else:
752
+ self.iter_data.host_advance = NULL
753
+ self.host_advance_obj = None
754
+ else:
755
+ raise ValueError
756
+
757
+
758
+ cdef class CommonData:
759
+ cdef int cc_major
760
+ cdef int cc_minor
761
+ cdef bytes encoded_cub_path
762
+ cdef bytes encoded_thrust_path
763
+ cdef bytes encoded_libcudacxx_path
764
+ cdef bytes encoded_ctk_path
765
+
766
+ def __cinit__(self, int cc_major, int cc_minor, str cub_path, str thrust_path, str libcudacxx_path, str ctk_path):
767
+ self.cc_major = cc_major
768
+ self.cc_minor = cc_minor
769
+ self.encoded_cub_path = cub_path.encode("utf-8")
770
+ self.encoded_thrust_path = thrust_path.encode("utf-8")
771
+ self.encoded_libcudacxx_path = libcudacxx_path.encode("utf-8")
772
+ self.encoded_ctk_path = ctk_path.encode("utf-8")
773
+
774
+ cdef inline int get_cc_major(self):
775
+ return self.cc_major
776
+
777
+ cdef inline int get_cc_minor(self):
778
+ return self.cc_minor
779
+
780
+ cdef inline const char * cub_path_get_c_str(self):
781
+ return <const char *>self.encoded_cub_path if self.encoded_cub_path else NULL
782
+
783
+ cdef inline const char * thrust_path_get_c_str(self):
784
+ return <const char *>self.encoded_thrust_path if self.encoded_thrust_path else NULL
785
+
786
+ cdef inline const char * libcudacxx_path_get_c_str(self):
787
+ return <const char *>self.encoded_libcudacxx_path if self.encoded_libcudacxx_path else NULL
788
+
789
+ cdef inline const char * ctk_path_get_c_str(self):
790
+ return <const char *>self.encoded_ctk_path if self.encoded_ctk_path else NULL
791
+
792
+ @property
793
+ def compute_capability(self):
794
+ return (self.cc_major, self.cc_minor)
795
+
796
+ @property
797
+ def cub_path(self):
798
+ return self.encoded_cub_path.decode("utf-8")
799
+
800
+ @property
801
+ def ctk_path(self):
802
+ return self.encoded_ctk_path.decode("utf-8")
803
+
804
+ @property
805
+ def thrust_path(self):
806
+ return self.encoded_thrust_path.decode("utf-8")
807
+
808
+ @property
809
+ def libcudacxx_path(self):
810
+ return self.encoded_libcudacxx_path.decode("utf-8")
811
+
812
+ # --------------
813
+ # DeviceReduce
814
+ # --------------
815
+
816
+ cdef extern from "cccl/c/reduce.h":
817
+ cdef struct cccl_device_reduce_build_result_t 'cccl_device_reduce_build_result_t':
818
+ const char* cubin
819
+ size_t cubin_size
820
+
821
+ cdef CUresult cccl_device_reduce_build(
822
+ cccl_device_reduce_build_result_t*,
823
+ cccl_iterator_t,
824
+ cccl_iterator_t,
825
+ cccl_op_t,
826
+ cccl_value_t,
827
+ int, int, const char*, const char*, const char*, const char*
828
+ ) nogil
829
+
830
+ cdef CUresult cccl_device_reduce(
831
+ cccl_device_reduce_build_result_t,
832
+ void *,
833
+ size_t *,
834
+ cccl_iterator_t,
835
+ cccl_iterator_t,
836
+ uint64_t,
837
+ cccl_op_t,
838
+ cccl_value_t,
839
+ CUstream
840
+ ) nogil
841
+
842
+ cdef CUresult cccl_device_reduce_cleanup(
843
+ cccl_device_reduce_build_result_t*
844
+ ) nogil
845
+
846
+
847
+ cdef class DeviceReduceBuildResult:
848
+ cdef cccl_device_reduce_build_result_t build_data
849
+
850
+ def __cinit__(
851
+ DeviceReduceBuildResult self,
852
+ Iterator d_in,
853
+ Iterator d_out,
854
+ Op op,
855
+ Value h_init,
856
+ CommonData common_data
857
+ ):
858
+ cdef CUresult status = -1
859
+ cdef int cc_major = common_data.get_cc_major()
860
+ cdef int cc_minor = common_data.get_cc_minor()
861
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
862
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
863
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
864
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
865
+ memset(&self.build_data, 0, sizeof(cccl_device_reduce_build_result_t))
866
+
867
+ with nogil:
868
+ status = cccl_device_reduce_build(
869
+ &self.build_data,
870
+ d_in.iter_data,
871
+ d_out.iter_data,
872
+ op.op_data,
873
+ h_init.value_data,
874
+ cc_major,
875
+ cc_minor,
876
+ cub_path,
877
+ thrust_path,
878
+ libcudacxx_path,
879
+ ctk_path,
880
+ )
881
+ if status != 0:
882
+ raise RuntimeError(
883
+ f"Failed building reduce, error code: {status}"
884
+ )
885
+
886
+ def __dealloc__(DeviceReduceBuildResult self):
887
+ cdef CUresult status = -1
888
+ with nogil:
889
+ status = cccl_device_reduce_cleanup(&self.build_data)
890
+ if (status != 0):
891
+ print(f"Return code {status} encountered during reduce result cleanup")
892
+
893
+ cpdef int compute(
894
+ DeviceReduceBuildResult self,
895
+ temp_storage_ptr,
896
+ temp_storage_bytes,
897
+ Iterator d_in,
898
+ Iterator d_out,
899
+ size_t num_items,
900
+ Op op,
901
+ Value h_init,
902
+ stream
903
+ ):
904
+ cdef CUresult status = -1
905
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
906
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
907
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
908
+
909
+ with nogil:
910
+ status = cccl_device_reduce(
911
+ self.build_data,
912
+ storage_ptr,
913
+ &storage_sz,
914
+ d_in.iter_data,
915
+ d_out.iter_data,
916
+ <uint64_t>num_items,
917
+ op.op_data,
918
+ h_init.value_data,
919
+ c_stream
920
+ )
921
+ if status != 0:
922
+ raise RuntimeError(
923
+ f"Failed executing reduce, error code: {status}"
924
+ )
925
+ return storage_sz
926
+
927
+ def _get_cubin(self):
928
+ return self.build_data.cubin[:self.build_data.cubin_size]
929
+
930
+ # ------------
931
+ # DeviceScan
932
+ # ------------
933
+
934
+
935
+ cdef extern from "cccl/c/scan.h":
936
+ ctypedef bint _Bool
937
+
938
+ cdef struct cccl_device_scan_build_result_t 'cccl_device_scan_build_result_t':
939
+ const char* cubin
940
+ size_t cubin_size
941
+
942
+ cdef CUresult cccl_device_scan_build(
943
+ cccl_device_scan_build_result_t*,
944
+ cccl_iterator_t,
945
+ cccl_iterator_t,
946
+ cccl_op_t,
947
+ cccl_value_t,
948
+ _Bool,
949
+ int, int, const char*, const char*, const char*, const char*
950
+ ) nogil
951
+
952
+ cdef CUresult cccl_device_exclusive_scan(
953
+ cccl_device_scan_build_result_t,
954
+ void *,
955
+ size_t *,
956
+ cccl_iterator_t,
957
+ cccl_iterator_t,
958
+ uint64_t,
959
+ cccl_op_t,
960
+ cccl_value_t,
961
+ CUstream
962
+ ) nogil
963
+
964
+ cdef CUresult cccl_device_inclusive_scan(
965
+ cccl_device_scan_build_result_t,
966
+ void *,
967
+ size_t *,
968
+ cccl_iterator_t,
969
+ cccl_iterator_t,
970
+ uint64_t,
971
+ cccl_op_t,
972
+ cccl_value_t,
973
+ CUstream
974
+ ) nogil
975
+
976
+ cdef CUresult cccl_device_scan_cleanup(
977
+ cccl_device_scan_build_result_t*
978
+ ) nogil
979
+
980
+
981
+ cdef class DeviceScanBuildResult:
982
+ cdef cccl_device_scan_build_result_t build_data
983
+
984
+ def __cinit__(
985
+ DeviceScanBuildResult self,
986
+ Iterator d_in,
987
+ Iterator d_out,
988
+ Op op,
989
+ Value h_init,
990
+ bint force_inclusive,
991
+ CommonData common_data
992
+ ):
993
+ cdef CUresult status = -1
994
+ cdef int cc_major = common_data.get_cc_major()
995
+ cdef int cc_minor = common_data.get_cc_minor()
996
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
997
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
998
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
999
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1000
+ memset(&self.build_data, 0, sizeof(cccl_device_scan_build_result_t))
1001
+
1002
+ with nogil:
1003
+ status = cccl_device_scan_build(
1004
+ &self.build_data,
1005
+ d_in.iter_data,
1006
+ d_out.iter_data,
1007
+ op.op_data,
1008
+ h_init.value_data,
1009
+ force_inclusive,
1010
+ cc_major,
1011
+ cc_minor,
1012
+ cub_path,
1013
+ thrust_path,
1014
+ libcudacxx_path,
1015
+ ctk_path,
1016
+ )
1017
+ if status != 0:
1018
+ raise RuntimeError(f"Error {status} building scan")
1019
+
1020
+ def __dealloc__(DeviceScanBuildResult self):
1021
+ cdef CUresult status = -1
1022
+ with nogil:
1023
+ status = cccl_device_scan_cleanup(&self.build_data)
1024
+ if (status != 0):
1025
+ print(f"Return code {status} encountered during scan result cleanup")
1026
+
1027
+ cpdef int compute_inclusive(
1028
+ DeviceScanBuildResult self,
1029
+ temp_storage_ptr,
1030
+ temp_storage_bytes,
1031
+ Iterator d_in,
1032
+ Iterator d_out,
1033
+ size_t num_items,
1034
+ Op op,
1035
+ Value h_init,
1036
+ stream
1037
+ ):
1038
+ cdef CUresult status = -1
1039
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1040
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1041
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1042
+
1043
+ with nogil:
1044
+ status = cccl_device_inclusive_scan(
1045
+ self.build_data,
1046
+ storage_ptr,
1047
+ &storage_sz,
1048
+ d_in.iter_data,
1049
+ d_out.iter_data,
1050
+ <uint64_t>num_items,
1051
+ op.op_data,
1052
+ h_init.value_data,
1053
+ c_stream
1054
+ )
1055
+ if status != 0:
1056
+ raise RuntimeError(
1057
+ f"Failed executing inclusive scan, error code: {status}"
1058
+ )
1059
+ return storage_sz
1060
+
1061
+ cpdef int compute_exclusive(
1062
+ DeviceScanBuildResult self,
1063
+ temp_storage_ptr,
1064
+ temp_storage_bytes,
1065
+ Iterator d_in,
1066
+ Iterator d_out,
1067
+ size_t num_items,
1068
+ Op op,
1069
+ Value h_init,
1070
+ stream
1071
+ ):
1072
+ cdef CUresult status = -1
1073
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1074
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1075
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1076
+
1077
+ with nogil:
1078
+ status = cccl_device_exclusive_scan(
1079
+ self.build_data,
1080
+ storage_ptr,
1081
+ &storage_sz,
1082
+ d_in.iter_data,
1083
+ d_out.iter_data,
1084
+ <uint64_t>num_items,
1085
+ op.op_data,
1086
+ h_init.value_data,
1087
+ c_stream
1088
+ )
1089
+ if status != 0:
1090
+ raise RuntimeError(
1091
+ f"Failed executing exclusive scan, error code: {status}"
1092
+ )
1093
+ return storage_sz
1094
+
1095
+ def _get_cubin(self):
1096
+ return self.build_data.cubin[:self.build_data.cubin_size]
1097
+
1098
+ # -----------------------
1099
+ # DeviceSegmentedReduce
1100
+ # -----------------------
1101
+
1102
+
1103
+ cdef extern from "cccl/c/segmented_reduce.h":
1104
+ cdef struct cccl_device_segmented_reduce_build_result_t 'cccl_device_segmented_reduce_build_result_t':
1105
+ const char* cubin
1106
+ size_t cubin_size
1107
+
1108
+ cdef CUresult cccl_device_segmented_reduce_build(
1109
+ cccl_device_segmented_reduce_build_result_t*,
1110
+ cccl_iterator_t,
1111
+ cccl_iterator_t,
1112
+ cccl_iterator_t,
1113
+ cccl_iterator_t,
1114
+ cccl_op_t,
1115
+ cccl_value_t,
1116
+ int, int, const char*, const char*, const char*, const char*
1117
+ ) nogil
1118
+
1119
+ cdef CUresult cccl_device_segmented_reduce(
1120
+ cccl_device_segmented_reduce_build_result_t,
1121
+ void *,
1122
+ size_t *,
1123
+ cccl_iterator_t,
1124
+ cccl_iterator_t,
1125
+ uint64_t,
1126
+ cccl_iterator_t,
1127
+ cccl_iterator_t,
1128
+ cccl_op_t,
1129
+ cccl_value_t,
1130
+ CUstream
1131
+ ) nogil
1132
+
1133
+ cdef CUresult cccl_device_segmented_reduce_cleanup(
1134
+ cccl_device_segmented_reduce_build_result_t* bld_ptr
1135
+ ) nogil
1136
+
1137
+
1138
+ cdef class DeviceSegmentedReduceBuildResult:
1139
+ cdef cccl_device_segmented_reduce_build_result_t build_data
1140
+
1141
+ def __cinit__(
1142
+ DeviceSegmentedReduceBuildResult self,
1143
+ Iterator d_in,
1144
+ Iterator d_out,
1145
+ Iterator start_offsets,
1146
+ Iterator end_offsets,
1147
+ Op op,
1148
+ Value h_init,
1149
+ CommonData common_data
1150
+ ):
1151
+ cdef CUresult status = -1
1152
+ cdef int cc_major = common_data.get_cc_major()
1153
+ cdef int cc_minor = common_data.get_cc_minor()
1154
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1155
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1156
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1157
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1158
+
1159
+ memset(&self.build_data, 0, sizeof(cccl_device_segmented_reduce_build_result_t))
1160
+ with nogil:
1161
+ status = cccl_device_segmented_reduce_build(
1162
+ &self.build_data,
1163
+ d_in.iter_data,
1164
+ d_out.iter_data,
1165
+ start_offsets.iter_data,
1166
+ end_offsets.iter_data,
1167
+ op.op_data,
1168
+ h_init.value_data,
1169
+ cc_major,
1170
+ cc_minor,
1171
+ cub_path,
1172
+ thrust_path,
1173
+ libcudacxx_path,
1174
+ ctk_path,
1175
+ )
1176
+ if status != 0:
1177
+ raise RuntimeError(
1178
+ f"Failed building segmented_reduce, error code: {status}"
1179
+ )
1180
+
1181
+ def __dealloc__(DeviceSegmentedReduceBuildResult self):
1182
+ cdef CUresult status = -1
1183
+ with nogil:
1184
+ status = cccl_device_segmented_reduce_cleanup(&self.build_data)
1185
+ if (status != 0):
1186
+ print(f"Return code {status} encountered during segmented_reduce result cleanup")
1187
+
1188
+ cpdef int compute(
1189
+ DeviceSegmentedReduceBuildResult self,
1190
+ temp_storage_ptr,
1191
+ temp_storage_bytes,
1192
+ Iterator d_in,
1193
+ Iterator d_out,
1194
+ size_t num_items,
1195
+ Iterator start_offsets,
1196
+ Iterator end_offsets,
1197
+ Op op,
1198
+ Value h_init,
1199
+ stream
1200
+ ):
1201
+ cdef CUresult status = -1
1202
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1203
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1204
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1205
+
1206
+ with nogil:
1207
+ status = cccl_device_segmented_reduce(
1208
+ self.build_data,
1209
+ storage_ptr,
1210
+ &storage_sz,
1211
+ d_in.iter_data,
1212
+ d_out.iter_data,
1213
+ <uint64_t>num_items,
1214
+ start_offsets.iter_data,
1215
+ end_offsets.iter_data,
1216
+ op.op_data,
1217
+ h_init.value_data,
1218
+ c_stream
1219
+ )
1220
+ if status != 0:
1221
+ raise RuntimeError(
1222
+ f"Failed executing segmented_reduce, error code: {status}"
1223
+ )
1224
+ return storage_sz
1225
+
1226
+ def _get_cubin(self):
1227
+ return self.build_data.cubin[:self.build_data.cubin_size]
1228
+
1229
+ # -----------------
1230
+ # DeviceMergeSort
1231
+ # -----------------
1232
+
1233
+
1234
+ cdef extern from "cccl/c/merge_sort.h":
1235
+ cdef struct cccl_device_merge_sort_build_result_t 'cccl_device_merge_sort_build_result_t':
1236
+ const char* cubin
1237
+ size_t cubin_size
1238
+
1239
+ cdef CUresult cccl_device_merge_sort_build(
1240
+ cccl_device_merge_sort_build_result_t *bld_ptr,
1241
+ cccl_iterator_t d_in_keys,
1242
+ cccl_iterator_t d_in_items,
1243
+ cccl_iterator_t d_out_keys,
1244
+ cccl_iterator_t d_out_items,
1245
+ cccl_op_t,
1246
+ int, int, const char*, const char*, const char*, const char*
1247
+ ) nogil
1248
+
1249
+ cdef CUresult cccl_device_merge_sort(
1250
+ cccl_device_merge_sort_build_result_t,
1251
+ void *,
1252
+ size_t *,
1253
+ cccl_iterator_t,
1254
+ cccl_iterator_t,
1255
+ cccl_iterator_t,
1256
+ cccl_iterator_t,
1257
+ uint64_t,
1258
+ cccl_op_t,
1259
+ CUstream
1260
+ ) nogil
1261
+
1262
+ cdef CUresult cccl_device_merge_sort_cleanup(
1263
+ cccl_device_merge_sort_build_result_t* bld_ptr
1264
+ ) nogil
1265
+
1266
+
1267
+ cdef class DeviceMergeSortBuildResult:
1268
+ cdef cccl_device_merge_sort_build_result_t build_data
1269
+
1270
+ def __cinit__(
1271
+ DeviceMergeSortBuildResult self,
1272
+ Iterator d_in_keys,
1273
+ Iterator d_in_items,
1274
+ Iterator d_out_keys,
1275
+ Iterator d_out_items,
1276
+ Op op,
1277
+ CommonData common_data
1278
+ ):
1279
+ cdef CUresult status = -1
1280
+ cdef int cc_major = common_data.get_cc_major()
1281
+ cdef int cc_minor = common_data.get_cc_minor()
1282
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1283
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1284
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1285
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1286
+
1287
+ memset(&self.build_data, 0, sizeof(cccl_device_merge_sort_build_result_t))
1288
+ with nogil:
1289
+ status = cccl_device_merge_sort_build(
1290
+ &self.build_data,
1291
+ d_in_keys.iter_data,
1292
+ d_in_items.iter_data,
1293
+ d_out_keys.iter_data,
1294
+ d_out_items.iter_data,
1295
+ op.op_data,
1296
+ cc_major,
1297
+ cc_minor,
1298
+ cub_path,
1299
+ thrust_path,
1300
+ libcudacxx_path,
1301
+ ctk_path,
1302
+ )
1303
+ if status != 0:
1304
+ raise RuntimeError(
1305
+ f"Failed building merge_sort, error code: {status}"
1306
+ )
1307
+
1308
+ def __dealloc__(DeviceMergeSortBuildResult self):
1309
+ cdef CUresult status = -1
1310
+ with nogil:
1311
+ status = cccl_device_merge_sort_cleanup(&self.build_data)
1312
+ if (status != 0):
1313
+ print(f"Return code {status} encountered during merge_sort result cleanup")
1314
+
1315
+ cpdef int compute(
1316
+ DeviceMergeSortBuildResult self,
1317
+ temp_storage_ptr,
1318
+ temp_storage_bytes,
1319
+ Iterator d_in_keys,
1320
+ Iterator d_in_items,
1321
+ Iterator d_out_keys,
1322
+ Iterator d_out_items,
1323
+ size_t num_items,
1324
+ Op op,
1325
+ stream
1326
+ ):
1327
+ cdef CUresult status = -1
1328
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1329
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1330
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1331
+ with nogil:
1332
+ status = cccl_device_merge_sort(
1333
+ self.build_data,
1334
+ storage_ptr,
1335
+ &storage_sz,
1336
+ d_in_keys.iter_data,
1337
+ d_in_items.iter_data,
1338
+ d_out_keys.iter_data,
1339
+ d_out_items.iter_data,
1340
+ <uint64_t>num_items,
1341
+ op.op_data,
1342
+ c_stream
1343
+ )
1344
+ if status != 0:
1345
+ raise RuntimeError(
1346
+ f"Failed executing merge_sort, error code: {status}"
1347
+ )
1348
+ return storage_sz
1349
+
1350
+
1351
+ def _get_cubin(self):
1352
+ return self.build_data.cubin[:self.build_data.cubin_size]
1353
+
1354
+
1355
+ # -------------------
1356
+ # DeviceUniqueByKey
1357
+ # -------------------
1358
+
1359
+ cdef extern from "cccl/c/unique_by_key.h":
1360
+ cdef struct cccl_device_unique_by_key_build_result_t 'cccl_device_unique_by_key_build_result_t':
1361
+ const char* cubin
1362
+ size_t cubin_size
1363
+
1364
+
1365
+ cdef CUresult cccl_device_unique_by_key_build(
1366
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1367
+ cccl_iterator_t d_keys_in,
1368
+ cccl_iterator_t d_values_in,
1369
+ cccl_iterator_t d_keys_out,
1370
+ cccl_iterator_t d_values_out,
1371
+ cccl_iterator_t d_num_selected_out,
1372
+ cccl_op_t comparison_op,
1373
+ int, int, const char *, const char *, const char *, const char *
1374
+ ) nogil
1375
+
1376
+ cdef CUresult cccl_device_unique_by_key(
1377
+ cccl_device_unique_by_key_build_result_t build,
1378
+ void *d_storage_ptr,
1379
+ size_t *d_storage_nbytes,
1380
+ cccl_iterator_t d_keys_in,
1381
+ cccl_iterator_t d_values_in,
1382
+ cccl_iterator_t d_keys_out,
1383
+ cccl_iterator_t d_values_out,
1384
+ cccl_iterator_t d_num_selected_out,
1385
+ cccl_op_t comparison_op,
1386
+ size_t num_items,
1387
+ CUstream stream
1388
+ ) nogil
1389
+
1390
+ cdef CUresult cccl_device_unique_by_key_cleanup(
1391
+ cccl_device_unique_by_key_build_result_t *build_ptr,
1392
+ ) nogil
1393
+
1394
+
1395
+ cdef class DeviceUniqueByKeyBuildResult:
1396
+ cdef cccl_device_unique_by_key_build_result_t build_data
1397
+
1398
+ def __cinit__(
1399
+ DeviceUniqueByKeyBuildResult self,
1400
+ Iterator d_keys_in,
1401
+ Iterator d_values_in,
1402
+ Iterator d_keys_out,
1403
+ Iterator d_values_out,
1404
+ Iterator d_num_selected_out,
1405
+ Op comparison_op,
1406
+ CommonData common_data
1407
+ ):
1408
+ cdef CUresult status = -1
1409
+ cdef int cc_major = common_data.get_cc_major()
1410
+ cdef int cc_minor = common_data.get_cc_minor()
1411
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1412
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1413
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1414
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1415
+
1416
+ memset(&self.build_data, 0, sizeof(cccl_device_unique_by_key_build_result_t))
1417
+ with nogil:
1418
+ status = cccl_device_unique_by_key_build(
1419
+ &self.build_data,
1420
+ d_keys_in.iter_data,
1421
+ d_values_in.iter_data,
1422
+ d_keys_out.iter_data,
1423
+ d_values_out.iter_data,
1424
+ d_num_selected_out.iter_data,
1425
+ comparison_op.op_data,
1426
+ cc_major,
1427
+ cc_minor,
1428
+ cub_path,
1429
+ thrust_path,
1430
+ libcudacxx_path,
1431
+ ctk_path,
1432
+ )
1433
+ if status != 0:
1434
+ raise RuntimeError(
1435
+ f"Failed building unique_by_key, error code: {status}"
1436
+ )
1437
+
1438
+ def __dealloc__(DeviceUniqueByKeyBuildResult self):
1439
+ cdef CUresult status = -1
1440
+ with nogil:
1441
+ status = cccl_device_unique_by_key_cleanup(&self.build_data)
1442
+ if (status != 0):
1443
+ print(f"Return code {status} encountered during unique_by_key result cleanup")
1444
+
1445
+ cpdef int compute(
1446
+ DeviceUniqueByKeyBuildResult self,
1447
+ temp_storage_ptr,
1448
+ temp_storage_bytes,
1449
+ Iterator d_keys_in,
1450
+ Iterator d_values_in,
1451
+ Iterator d_keys_out,
1452
+ Iterator d_values_out,
1453
+ Iterator d_num_selected_out,
1454
+ Op comparison_op,
1455
+ size_t num_items,
1456
+ stream
1457
+ ):
1458
+ cdef CUresult status = -1
1459
+ cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
1460
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1461
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1462
+
1463
+ with nogil:
1464
+ status = cccl_device_unique_by_key(
1465
+ self.build_data,
1466
+ storage_ptr,
1467
+ &storage_sz,
1468
+ d_keys_in.iter_data,
1469
+ d_values_in.iter_data,
1470
+ d_keys_out.iter_data,
1471
+ d_values_out.iter_data,
1472
+ d_num_selected_out.iter_data,
1473
+ comparison_op.op_data,
1474
+ <uint64_t>num_items,
1475
+ c_stream
1476
+ )
1477
+
1478
+ if status != 0:
1479
+ raise RuntimeError(
1480
+ f"Failed executing unique_by_key, error code: {status}"
1481
+ )
1482
+ return storage_sz
1483
+
1484
+ def _get_cubin(self):
1485
+ return self.build_data.cubin[:self.build_data.cubin_size]
1486
+
1487
+ # -----------------
1488
+ # DeviceRadixSort
1489
+ # -----------------
1490
+
1491
+ cdef extern from "cccl/c/radix_sort.h":
1492
+ cdef struct cccl_device_radix_sort_build_result_t 'cccl_device_radix_sort_build_result_t':
1493
+ const char* cubin
1494
+ size_t cubin_size
1495
+
1496
+ cdef CUresult cccl_device_radix_sort_build(
1497
+ cccl_device_radix_sort_build_result_t *build_ptr,
1498
+ cccl_sort_order_t sort_order,
1499
+ cccl_iterator_t d_keys_in,
1500
+ cccl_iterator_t d_values_in,
1501
+ cccl_op_t decomposer,
1502
+ const char* decomposer_return_type,
1503
+ int, int, const char *, const char *, const char *, const char *
1504
+ ) nogil
1505
+
1506
+ cdef CUresult cccl_device_radix_sort(
1507
+ cccl_device_radix_sort_build_result_t build,
1508
+ void *d_storage_ptr,
1509
+ size_t *d_storage_nbytes,
1510
+ cccl_iterator_t d_keys_in,
1511
+ cccl_iterator_t d_keys_out,
1512
+ cccl_iterator_t d_values_in,
1513
+ cccl_iterator_t d_values_out,
1514
+ cccl_op_t decomposer,
1515
+ size_t num_items,
1516
+ int begin_bit,
1517
+ int end_bit,
1518
+ bint is_overwrite_okay,
1519
+ int* selector,
1520
+ CUstream stream
1521
+ ) nogil
1522
+
1523
+ cdef CUresult cccl_device_radix_sort_cleanup(
1524
+ cccl_device_radix_sort_build_result_t *build_ptr,
1525
+ ) nogil
1526
+
1527
+
1528
+ cdef class DeviceRadixSortBuildResult:
1529
+ cdef cccl_device_radix_sort_build_result_t build_data
1530
+
1531
+ def __dealloc__(DeviceRadixSortBuildResult self):
1532
+ cdef CUresult status = -1
1533
+ with nogil:
1534
+ status = cccl_device_radix_sort_cleanup(&self.build_data)
1535
+ if (status != 0):
1536
+ print(f"Return code {status} encountered during radix_sort result cleanup")
1537
+
1538
+ def __cinit__(
1539
+ DeviceRadixSortBuildResult self,
1540
+ cccl_sort_order_t order,
1541
+ Iterator d_keys_in,
1542
+ Iterator d_values_in,
1543
+ Op decomposer_op,
1544
+ const char* decomposer_return_type,
1545
+ CommonData common_data
1546
+ ):
1547
+ cdef CUresult status = -1
1548
+ cdef int cc_major = common_data.get_cc_major()
1549
+ cdef int cc_minor = common_data.get_cc_minor()
1550
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1551
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1552
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1553
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1554
+
1555
+ memset(&self.build_data, 0, sizeof(cccl_device_radix_sort_build_result_t))
1556
+ with nogil:
1557
+ status = cccl_device_radix_sort_build(
1558
+ &self.build_data,
1559
+ order,
1560
+ d_keys_in.iter_data,
1561
+ d_values_in.iter_data,
1562
+ decomposer_op.op_data,
1563
+ decomposer_return_type,
1564
+ cc_major,
1565
+ cc_minor,
1566
+ cub_path,
1567
+ thrust_path,
1568
+ libcudacxx_path,
1569
+ ctk_path,
1570
+ )
1571
+ if status != 0:
1572
+ raise RuntimeError(
1573
+ f"Failed building radix_sort, error code: {status}"
1574
+ )
1575
+
1576
+ cpdef tuple compute(
1577
+ DeviceRadixSortBuildResult self,
1578
+ temp_storage_ptr,
1579
+ temp_storage_bytes,
1580
+ Iterator d_keys_in,
1581
+ Iterator d_keys_out,
1582
+ Iterator d_values_in,
1583
+ Iterator d_values_out,
1584
+ Op decomposer_op,
1585
+ size_t num_items,
1586
+ int begin_bit,
1587
+ int end_bit,
1588
+ bint is_overwrite_okay,
1589
+ selector,
1590
+ stream
1591
+ ):
1592
+ cdef CUresult status = -1
1593
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
1594
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1595
+ cdef int selector_int = <int>selector
1596
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
1597
+
1598
+ with nogil:
1599
+ status = cccl_device_radix_sort(
1600
+ self.build_data,
1601
+ storage_ptr,
1602
+ &storage_sz,
1603
+ d_keys_in.iter_data,
1604
+ d_keys_out.iter_data,
1605
+ d_values_in.iter_data,
1606
+ d_values_out.iter_data,
1607
+ decomposer_op.op_data,
1608
+ <uint64_t>num_items,
1609
+ begin_bit,
1610
+ end_bit,
1611
+ is_overwrite_okay,
1612
+ &selector_int,
1613
+ c_stream
1614
+ )
1615
+
1616
+ if status != 0:
1617
+ raise RuntimeError(
1618
+ f"Failed executing ascending radix_sort, error code: {status}"
1619
+ )
1620
+ return <object>storage_sz, <object>selector_int
1621
+
1622
+
1623
+ def _get_cubin(self):
1624
+ return self.build_data.cubin[:self.build_data.cubin_size]
1625
+
1626
+ # --------------------------------------------
1627
+ # DeviceUnaryTransform/DeviceBinaryTransform
1628
+ # --------------------------------------------
1629
+ cdef extern from "cccl/c/transform.h":
1630
+ cdef struct cccl_device_transform_build_result_t:
1631
+ const char* cubin
1632
+ size_t cubin_size
1633
+
1634
+ cdef CUresult cccl_device_unary_transform_build(
1635
+ cccl_device_transform_build_result_t *build_ptr,
1636
+ cccl_iterator_t d_in,
1637
+ cccl_iterator_t d_out,
1638
+ cccl_op_t op,
1639
+ int, int, const char *, const char *, const char *, const char *
1640
+ ) nogil
1641
+
1642
+ cdef CUresult cccl_device_unary_transform(
1643
+ cccl_device_transform_build_result_t build,
1644
+ cccl_iterator_t d_in,
1645
+ cccl_iterator_t d_out,
1646
+ uint64_t num_items,
1647
+ cccl_op_t op,
1648
+ CUstream stream) nogil
1649
+
1650
+ cdef CUresult cccl_device_binary_transform_build(
1651
+ cccl_device_transform_build_result_t* build_ptr,
1652
+ cccl_iterator_t d_in1,
1653
+ cccl_iterator_t d_in2,
1654
+ cccl_iterator_t d_out,
1655
+ cccl_op_t op,
1656
+ int, int, const char *, const char *, const char *, const char *
1657
+ ) nogil
1658
+
1659
+ cdef CUresult cccl_device_binary_transform(
1660
+ cccl_device_transform_build_result_t build,
1661
+ cccl_iterator_t d_in1,
1662
+ cccl_iterator_t d_in2,
1663
+ cccl_iterator_t d_out,
1664
+ uint64_t num_items,
1665
+ cccl_op_t op,
1666
+ CUstream stream) nogil
1667
+
1668
+ cdef CUresult cccl_device_transform_cleanup(
1669
+ cccl_device_transform_build_result_t *build_ptr,
1670
+ ) nogil
1671
+
1672
+
1673
+ cdef class DeviceUnaryTransform:
1674
+ cdef cccl_device_transform_build_result_t build_data
1675
+
1676
+ def __cinit__(
1677
+ self,
1678
+ Iterator d_in,
1679
+ Iterator d_out,
1680
+ Op op,
1681
+ CommonData common_data
1682
+ ):
1683
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
1684
+
1685
+ cdef CUresult status = -1
1686
+ cdef int cc_major = common_data.get_cc_major()
1687
+ cdef int cc_minor = common_data.get_cc_minor()
1688
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1689
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1690
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1691
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1692
+
1693
+ with nogil:
1694
+ status = cccl_device_unary_transform_build(
1695
+ &self.build_data,
1696
+ d_in.iter_data,
1697
+ d_out.iter_data,
1698
+ op.op_data,
1699
+ cc_major,
1700
+ cc_minor,
1701
+ cub_path,
1702
+ thrust_path,
1703
+ libcudacxx_path,
1704
+ ctk_path,
1705
+ )
1706
+ if status != 0:
1707
+ raise RuntimeError("Failed to build unary transform")
1708
+
1709
+ def __dealloc__(DeviceUnaryTransform self):
1710
+ cdef CUresult status = -1
1711
+ with nogil:
1712
+ status = cccl_device_transform_cleanup(&self.build_data)
1713
+ if (status != 0):
1714
+ print(f"Return code {status} encountered during unary transform result cleanup")
1715
+
1716
+ cpdef void compute(
1717
+ DeviceUnaryTransform self,
1718
+ Iterator d_in,
1719
+ Iterator d_out,
1720
+ size_t num_items,
1721
+ Op op,
1722
+ stream
1723
+ ):
1724
+ cdef CUresult status = -1
1725
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1726
+ with nogil:
1727
+ status = cccl_device_unary_transform(
1728
+ self.build_data,
1729
+ d_in.iter_data,
1730
+ d_out.iter_data,
1731
+ <uint64_t>num_items,
1732
+ op.op_data,
1733
+ c_stream
1734
+ )
1735
+ if (status != 0):
1736
+ raise RuntimeError("Failed to compute unary transform")
1737
+
1738
+
1739
+ def _get_cubin(self):
1740
+ return self.build_data.cubin[:self.build_data.cubin_size]
1741
+
1742
+
1743
+ cdef class DeviceBinaryTransform:
1744
+ cdef cccl_device_transform_build_result_t build_data
1745
+
1746
+ def __cinit__(
1747
+ self,
1748
+ Iterator d_in1,
1749
+ Iterator d_in2,
1750
+ Iterator d_out,
1751
+ Op op,
1752
+ CommonData common_data
1753
+ ):
1754
+ memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
1755
+
1756
+ cdef CUresult status = -1
1757
+ cdef int cc_major = common_data.get_cc_major()
1758
+ cdef int cc_minor = common_data.get_cc_minor()
1759
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1760
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1761
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1762
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1763
+
1764
+ with nogil:
1765
+ status = cccl_device_binary_transform_build(
1766
+ &self.build_data,
1767
+ d_in1.iter_data,
1768
+ d_in2.iter_data,
1769
+ d_out.iter_data,
1770
+ op.op_data,
1771
+ cc_major,
1772
+ cc_minor,
1773
+ cub_path,
1774
+ thrust_path,
1775
+ libcudacxx_path,
1776
+ ctk_path,
1777
+ )
1778
+ if status != 0:
1779
+ raise RuntimeError("Failed to build binary transform")
1780
+
1781
+ def __dealloc__(DeviceBinaryTransform self):
1782
+ cdef CUresult status = -1
1783
+ with nogil:
1784
+ status = cccl_device_transform_cleanup(&self.build_data)
1785
+ if (status != 0):
1786
+ print(f"Return code {status} encountered during binary transform result cleanup")
1787
+
1788
+ cpdef void compute(
1789
+ DeviceBinaryTransform self,
1790
+ Iterator d_in1,
1791
+ Iterator d_in2,
1792
+ Iterator d_out,
1793
+ size_t num_items,
1794
+ Op op,
1795
+ stream
1796
+ ):
1797
+ cdef CUresult status = -1
1798
+ cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
1799
+ with nogil:
1800
+ status = cccl_device_binary_transform(
1801
+ self.build_data,
1802
+ d_in1.iter_data,
1803
+ d_in2.iter_data,
1804
+ d_out.iter_data,
1805
+ <uint64_t>num_items,
1806
+ op.op_data,
1807
+ c_stream
1808
+ )
1809
+ if (status != 0):
1810
+ raise RuntimeError("Failed to compute binary transform")
1811
+
1812
+ def _get_cubin(self):
1813
+ return self.build_data.cubin[:self.build_data.cubin_size]
1814
+
1815
+
1816
+ # -----------------
1817
+ # DeviceHistogram
1818
+ # -----------------
1819
+ cdef extern from "cccl/c/histogram.h":
1820
+ cdef struct cccl_device_histogram_build_result_t 'cccl_device_histogram_build_result_t':
1821
+ const char* cubin
1822
+ size_t cubin_size
1823
+
1824
+ cdef CUresult cccl_device_histogram_build(
1825
+ cccl_device_histogram_build_result_t *build_ptr,
1826
+ int num_channels,
1827
+ int num_active_channels,
1828
+ cccl_iterator_t d_samples,
1829
+ int num_output_levels_val,
1830
+ cccl_iterator_t d_output_histograms,
1831
+ cccl_value_t h_levels,
1832
+ int64_t num_rows,
1833
+ int64_t row_stride_samples,
1834
+ bint is_evenly_segmented,
1835
+ int, int, const char *, const char *, const char *, const char *
1836
+ ) nogil
1837
+
1838
+ cdef CUresult cccl_device_histogram_even(
1839
+ cccl_device_histogram_build_result_t build,
1840
+ void *d_storage_ptr,
1841
+ size_t *d_storage_nbytes,
1842
+ cccl_iterator_t d_samples,
1843
+ cccl_iterator_t d_output_histograms,
1844
+ cccl_value_t num_output_levels,
1845
+ cccl_value_t lower_level,
1846
+ cccl_value_t upper_level,
1847
+ int64_t num_row_pixels,
1848
+ int64_t num_rows,
1849
+ int64_t row_stride_samples,
1850
+ CUstream stream
1851
+ ) nogil
1852
+
1853
+ cdef CUresult cccl_device_histogram_cleanup(
1854
+ cccl_device_histogram_build_result_t *build_ptr,
1855
+ ) nogil
1856
+
1857
+
1858
+ cdef class DeviceHistogramBuildResult:
1859
+ cdef cccl_device_histogram_build_result_t build_data
1860
+
1861
+ def __dealloc__(DeviceHistogramBuildResult self):
1862
+ cdef CUresult status = -1
1863
+ with nogil:
1864
+ status = cccl_device_histogram_cleanup(&self.build_data)
1865
+ if (status != 0):
1866
+ print(f"Return code {status} encountered during histogram result cleanup")
1867
+
1868
+
1869
+ def __cinit__(
1870
+ DeviceHistogramBuildResult self,
1871
+ int num_channels,
1872
+ int num_active_channels,
1873
+ Iterator d_samples,
1874
+ int num_levels,
1875
+ Iterator d_histogram,
1876
+ Value h_levels,
1877
+ int num_rows,
1878
+ int row_stride_samples,
1879
+ bint is_evenly_segmented,
1880
+ CommonData common_data
1881
+ ):
1882
+ cdef CUresult status = -1
1883
+ cdef int cc_major = common_data.get_cc_major()
1884
+ cdef int cc_minor = common_data.get_cc_minor()
1885
+ cdef const char *cub_path = common_data.cub_path_get_c_str()
1886
+ cdef const char *thrust_path = common_data.thrust_path_get_c_str()
1887
+ cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
1888
+ cdef const char *ctk_path = common_data.ctk_path_get_c_str()
1889
+
1890
+ memset(&self.build_data, 0, sizeof(cccl_device_histogram_build_result_t))
1891
+ with nogil:
1892
+ status = cccl_device_histogram_build(
1893
+ &self.build_data,
1894
+ num_channels,
1895
+ num_active_channels,
1896
+ d_samples.iter_data,
1897
+ num_levels,
1898
+ d_histogram.iter_data,
1899
+ h_levels.value_data,
1900
+ num_rows,
1901
+ row_stride_samples,
1902
+ is_evenly_segmented,
1903
+ cc_major,
1904
+ cc_minor,
1905
+ cub_path,
1906
+ thrust_path,
1907
+ libcudacxx_path,
1908
+ ctk_path,
1909
+ )
1910
+ if status != 0:
1911
+ raise RuntimeError(
1912
+ f"Failed building histogram, error code: {status}"
1913
+ )
1914
+
1915
+ cpdef int compute_even(
1916
+ DeviceHistogramBuildResult self,
1917
+ temp_storage_ptr,
1918
+ temp_storage_bytes,
1919
+ Iterator d_samples,
1920
+ Iterator d_histogram,
1921
+ Value h_num_output_levels,
1922
+ Value h_lower_level,
1923
+ Value h_upper_level,
1924
+ int num_row_pixels,
1925
+ int num_rows,
1926
+ int row_stride_samples,
1927
+ stream
1928
+ ):
1929
+ cdef CUresult status = -1
1930
+ cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
1931
+ cdef size_t storage_sz = <size_t>temp_storage_bytes
1932
+ cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
1933
+
1934
+ with nogil:
1935
+ status = cccl_device_histogram_even(
1936
+ self.build_data,
1937
+ storage_ptr,
1938
+ &storage_sz,
1939
+ d_samples.iter_data,
1940
+ d_histogram.iter_data,
1941
+ h_num_output_levels.value_data,
1942
+ h_lower_level.value_data,
1943
+ h_upper_level.value_data,
1944
+ num_row_pixels,
1945
+ num_rows,
1946
+ row_stride_samples,
1947
+ c_stream
1948
+ )
1949
+ if status != 0:
1950
+ raise RuntimeError(
1951
+ f"Failed executing histogram, error code: {status}"
1952
+ )
1953
+ return storage_sz
1954
+
1955
+
1956
+ def _get_cubin(self):
1957
+ return self.build_data.cubin[:self.build_data.cubin_size]