cuda-cccl 0.3.4__cp311-cp311-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1926) hide show
  1. cuda/cccl/__init__.py +27 -0
  2. cuda/cccl/_cuda_version_utils.py +24 -0
  3. cuda/cccl/cooperative/__init__.py +9 -0
  4. cuda/cccl/cooperative/experimental/__init__.py +24 -0
  5. cuda/cccl/headers/__init__.py +7 -0
  6. cuda/cccl/headers/include/__init__.py +1 -0
  7. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +233 -0
  8. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1158 -0
  9. cuda/cccl/headers/include/cub/agent/agent_for.cuh +55 -0
  10. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +677 -0
  11. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +234 -0
  12. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +722 -0
  13. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +761 -0
  14. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +282 -0
  15. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +702 -0
  16. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +552 -0
  17. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +592 -0
  18. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +780 -0
  19. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1095 -0
  20. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +562 -0
  21. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +448 -0
  22. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +263 -0
  23. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1088 -0
  24. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +320 -0
  25. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +584 -0
  26. cuda/cccl/headers/include/cub/agent/agent_topk.cuh +762 -0
  27. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +605 -0
  28. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1399 -0
  29. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +939 -0
  30. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1203 -0
  31. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1279 -0
  32. cuda/cccl/headers/include/cub/block/block_histogram.cuh +400 -0
  33. cuda/cccl/headers/include/cub/block/block_load.cuh +1242 -0
  34. cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +416 -0
  35. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +771 -0
  36. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1203 -0
  37. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2132 -0
  38. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +126 -0
  39. cuda/cccl/headers/include/cub/block/block_reduce.cuh +642 -0
  40. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +406 -0
  41. cuda/cccl/headers/include/cub/block/block_scan.cuh +2287 -0
  42. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +322 -0
  43. cuda/cccl/headers/include/cub/block/block_store.cuh +1223 -0
  44. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +597 -0
  45. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +62 -0
  46. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +216 -0
  47. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +230 -0
  48. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +214 -0
  49. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +257 -0
  50. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +766 -0
  51. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +514 -0
  52. cuda/cccl/headers/include/cub/config.cuh +29 -0
  53. cuda/cccl/headers/include/cub/cub.cuh +96 -0
  54. cuda/cccl/headers/include/cub/detail/array_utils.cuh +54 -0
  55. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +135 -0
  56. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +50 -0
  57. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +94 -0
  58. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +60 -0
  59. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +227 -0
  60. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +86 -0
  61. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +140 -0
  62. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +98 -0
  63. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +112 -0
  64. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +66 -0
  65. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +41 -0
  66. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +39 -0
  67. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +71 -0
  68. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +79 -0
  69. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
  70. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +39 -0
  71. cuda/cccl/headers/include/cub/detail/rfa.cuh +706 -0
  72. cuda/cccl/headers/include/cub/detail/strong_load.cuh +163 -0
  73. cuda/cccl/headers/include/cub/detail/strong_store.cuh +194 -0
  74. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +377 -0
  75. cuda/cccl/headers/include/cub/detail/type_traits.cuh +185 -0
  76. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +48 -0
  77. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +33 -0
  78. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +572 -0
  79. cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
  80. cuda/cccl/headers/include/cub/device/device_for.cuh +1061 -0
  81. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1485 -0
  82. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +171 -0
  83. cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
  84. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +955 -0
  85. cuda/cccl/headers/include/cub/device/device_partition.cuh +644 -0
  86. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3413 -0
  87. cuda/cccl/headers/include/cub/device/device_reduce.cuh +2497 -0
  88. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +346 -0
  89. cuda/cccl/headers/include/cub/device/device_scan.cuh +2187 -0
  90. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1472 -0
  91. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1406 -0
  92. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2787 -0
  93. cuda/cccl/headers/include/cub/device/device_select.cuh +1204 -0
  94. cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
  95. cuda/cccl/headers/include/cub/device/device_transform.cuh +668 -0
  96. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +289 -0
  97. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +694 -0
  98. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
  99. cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +77 -0
  100. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +172 -0
  101. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1026 -0
  102. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +303 -0
  103. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +449 -0
  104. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1719 -0
  105. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1283 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +629 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +504 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +312 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +603 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +491 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +577 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +951 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +818 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +339 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +455 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +364 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +626 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +541 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +521 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_for_each.cuh +259 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_histogram.cuh +497 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_merge_sort.cuh +332 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_radix_sort.cuh +801 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_reduce.cuh +557 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_scan.cuh +163 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_segmented_reduce.cuh +295 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_segmented_sort.cuh +521 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_three_way_partition.cuh +200 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_transform.cuh +1028 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/kernel_unique_by_key.cuh +175 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +43 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +94 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +34 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +255 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +52 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +100 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1063 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +468 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +918 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +647 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +594 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +986 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +373 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1563 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +415 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +84 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +456 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +858 -0
  149. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +203 -0
  150. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +82 -0
  151. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +178 -0
  152. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +230 -0
  153. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +235 -0
  154. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +226 -0
  155. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +296 -0
  156. cuda/cccl/headers/include/cub/thread/thread_load.cuh +324 -0
  157. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +664 -0
  158. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +525 -0
  159. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +472 -0
  160. cuda/cccl/headers/include/cub/thread/thread_search.cuh +175 -0
  161. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +456 -0
  162. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +78 -0
  163. cuda/cccl/headers/include/cub/thread/thread_store.cuh +341 -0
  164. cuda/cccl/headers/include/cub/util_allocator.cuh +897 -0
  165. cuda/cccl/headers/include/cub/util_arch.cuh +141 -0
  166. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +71 -0
  167. cuda/cccl/headers/include/cub/util_debug.cuh +183 -0
  168. cuda/cccl/headers/include/cub/util_device.cuh +759 -0
  169. cuda/cccl/headers/include/cub/util_macro.cuh +73 -0
  170. cuda/cccl/headers/include/cub/util_math.cuh +92 -0
  171. cuda/cccl/headers/include/cub/util_namespace.cuh +151 -0
  172. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +31 -0
  173. cuda/cccl/headers/include/cub/util_ptx.cuh +489 -0
  174. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +96 -0
  175. cuda/cccl/headers/include/cub/util_type.cuh +1093 -0
  176. cuda/cccl/headers/include/cub/util_vsmem.cuh +227 -0
  177. cuda/cccl/headers/include/cub/version.cuh +65 -0
  178. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +304 -0
  179. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +152 -0
  180. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +713 -0
  181. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +378 -0
  182. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +928 -0
  183. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +691 -0
  184. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +381 -0
  185. cuda/cccl/headers/include/cub/warp/warp_load.cuh +591 -0
  186. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +145 -0
  187. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +810 -0
  188. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1866 -0
  189. cuda/cccl/headers/include/cub/warp/warp_store.cuh +498 -0
  190. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +59 -0
  191. cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
  192. cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
  193. cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
  194. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  195. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  196. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
  197. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  198. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
  199. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
  200. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
  201. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  202. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  203. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  204. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +101 -0
  205. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +529 -0
  206. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +73 -0
  207. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  208. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  209. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  210. cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
  211. cuda/cccl/headers/include/cuda/__cccl_config +37 -0
  212. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
  213. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
  214. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  215. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  216. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  217. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  218. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  219. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  220. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  221. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  222. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  223. cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
  224. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
  225. cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
  226. cuda/cccl/headers/include/cuda/__complex_ +28 -0
  227. cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
  228. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  229. cuda/cccl/headers/include/cuda/__device/arch_traits.h +537 -0
  230. cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
  231. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  232. cuda/cccl/headers/include/cuda/__device/device_ref.h +156 -0
  233. cuda/cccl/headers/include/cuda/__device/physical_device.h +172 -0
  234. cuda/cccl/headers/include/cuda/__driver/driver_api.h +848 -0
  235. cuda/cccl/headers/include/cuda/__event/event.h +171 -0
  236. cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
  237. cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
  238. cuda/cccl/headers/include/cuda/__execution/determinism.h +89 -0
  239. cuda/cccl/headers/include/cuda/__execution/output_ordering.h +87 -0
  240. cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
  241. cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
  242. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  243. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
  244. cuda/cccl/headers/include/cuda/__functional/maximum.h +76 -0
  245. cuda/cccl/headers/include/cuda/__functional/minimum.h +76 -0
  246. cuda/cccl/headers/include/cuda/__functional/minimum_maximum_common.h +52 -0
  247. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +106 -0
  248. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  249. cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
  250. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  251. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  252. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  253. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
  254. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
  255. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +492 -0
  256. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
  257. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
  258. cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
  259. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
  260. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
  261. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
  262. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
  263. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
  264. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  265. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +114 -0
  266. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +557 -0
  267. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  268. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  269. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +532 -0
  270. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +238 -0
  271. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
  272. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +117 -0
  273. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  274. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  275. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +81 -0
  276. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  277. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
  278. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
  279. cuda/cccl/headers/include/cuda/__memcpy_async/elect_one.h +52 -0
  280. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
  281. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  282. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  283. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +103 -0
  284. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +164 -0
  285. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +58 -0
  286. cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
  287. cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
  288. cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
  289. cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
  290. cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
  291. cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
  292. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
  293. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
  294. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  295. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  296. cuda/cccl/headers/include/cuda/__memory/ranges_overlap.h +126 -0
  297. cuda/cccl/headers/include/cuda/__memory_resource/any_resource.h +898 -0
  298. cuda/cccl/headers/include/cuda/__memory_resource/device_memory_pool.h +149 -0
  299. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
  300. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
  301. cuda/cccl/headers/include/cuda/__memory_resource/legacy_managed_memory_resource.h +148 -0
  302. cuda/cccl/headers/include/cuda/__memory_resource/legacy_pinned_memory_resource.h +139 -0
  303. cuda/cccl/headers/include/cuda/__memory_resource/managed_memory_pool.h +146 -0
  304. cuda/cccl/headers/include/cuda/__memory_resource/memory_resource_base.h +578 -0
  305. cuda/cccl/headers/include/cuda/__memory_resource/pinned_memory_pool.h +188 -0
  306. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +113 -0
  307. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +159 -0
  308. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +316 -0
  309. cuda/cccl/headers/include/cuda/__numeric/div_overflow.h +150 -0
  310. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  311. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
  312. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  313. cuda/cccl/headers/include/cuda/__numeric/sub_overflow.h +344 -0
  314. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
  315. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2977 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
  383. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
  384. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
  385. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
  386. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
  387. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
  388. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
  389. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
  390. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  391. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  392. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  393. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  394. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  395. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  396. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  397. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  398. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  399. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  400. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  401. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  402. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  403. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  404. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  405. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  406. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  407. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  408. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  409. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  410. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  411. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  412. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  413. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  414. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  415. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  416. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  417. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  418. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  419. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  420. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  421. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  422. cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
  423. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  424. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
  425. cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
  426. cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
  427. cuda/cccl/headers/include/cuda/__runtime/api_wrapper.h +62 -0
  428. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
  429. cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
  430. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  431. cuda/cccl/headers/include/cuda/__stream/get_stream.h +109 -0
  432. cuda/cccl/headers/include/cuda/__stream/internal_streams.h +44 -0
  433. cuda/cccl/headers/include/cuda/__stream/stream.h +141 -0
  434. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +303 -0
  435. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  436. cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
  437. cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
  438. cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
  439. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
  440. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
  441. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
  442. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
  443. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
  444. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +591 -0
  445. cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
  446. cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
  447. cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
  448. cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
  449. cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
  450. cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
  451. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +323 -0
  452. cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
  453. cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
  454. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +163 -0
  455. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
  456. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
  457. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
  458. cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
  459. cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
  460. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  461. cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
  462. cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
  463. cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
  464. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  465. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  466. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
  467. cuda/cccl/headers/include/cuda/access_property +26 -0
  468. cuda/cccl/headers/include/cuda/algorithm +27 -0
  469. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  470. cuda/cccl/headers/include/cuda/atomic +27 -0
  471. cuda/cccl/headers/include/cuda/barrier +293 -0
  472. cuda/cccl/headers/include/cuda/bit +29 -0
  473. cuda/cccl/headers/include/cuda/cmath +37 -0
  474. cuda/cccl/headers/include/cuda/devices +33 -0
  475. cuda/cccl/headers/include/cuda/discard_memory +32 -0
  476. cuda/cccl/headers/include/cuda/functional +32 -0
  477. cuda/cccl/headers/include/cuda/iterator +39 -0
  478. cuda/cccl/headers/include/cuda/latch +27 -0
  479. cuda/cccl/headers/include/cuda/mdspan +28 -0
  480. cuda/cccl/headers/include/cuda/memory +36 -0
  481. cuda/cccl/headers/include/cuda/memory_resource +40 -0
  482. cuda/cccl/headers/include/cuda/numeric +31 -0
  483. cuda/cccl/headers/include/cuda/pipeline +580 -0
  484. cuda/cccl/headers/include/cuda/ptx +129 -0
  485. cuda/cccl/headers/include/cuda/semaphore +31 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  495. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  496. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  497. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  498. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  499. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  500. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  501. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  502. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  503. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  504. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  505. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  506. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  507. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  508. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  509. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  510. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  511. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  512. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  513. cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
  514. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
  515. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  516. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  517. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  518. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  519. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  520. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  521. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  522. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  523. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  524. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  525. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  526. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
  527. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  528. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
  529. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  530. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  531. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  532. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  533. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
  534. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  535. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
  536. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  537. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  538. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  539. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  540. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  541. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  542. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  543. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  544. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
  545. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  546. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
  547. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
  548. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
  549. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  550. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
  551. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  552. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  553. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  554. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  555. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  556. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  557. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  558. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  559. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  560. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  561. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  562. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  563. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  564. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  565. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  566. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  567. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  568. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  569. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  570. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  571. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  572. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  573. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  574. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  575. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  576. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  577. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +153 -0
  578. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  579. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  580. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  581. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  582. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  583. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
  584. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  585. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  586. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
  587. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +458 -0
  588. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +4437 -0
  589. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +184 -0
  590. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +242 -0
  591. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  592. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  593. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  594. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  595. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  596. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
  597. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
  598. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  599. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
  600. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +238 -0
  601. cuda/cccl/headers/include/cuda/std/__atomic/types.h +51 -0
  602. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  603. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  604. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  605. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  606. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  607. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
  608. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +209 -0
  609. cuda/cccl/headers/include/cuda/std/__bit/countl.h +174 -0
  610. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  611. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  612. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  613. cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
  614. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  615. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
  616. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  617. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  618. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  619. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
  620. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
  621. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +645 -0
  622. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
  623. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
  624. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
  625. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
  626. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
  627. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +130 -0
  628. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +354 -0
  629. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +36 -0
  630. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
  631. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
  632. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  633. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +71 -0
  634. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  635. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  636. cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
  637. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1286 -0
  638. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +289 -0
  639. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
  640. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  641. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  642. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  643. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  644. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  645. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
  646. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  647. cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
  648. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  649. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  650. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  651. cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
  652. cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +52 -0
  653. cuda/cccl/headers/include/cuda/std/__chrono/day.h +160 -0
  654. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +499 -0
  655. cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +53 -0
  656. cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +44 -0
  657. cuda/cccl/headers/include/cuda/std/__chrono/month.h +185 -0
  658. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +58 -0
  659. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +78 -0
  660. cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +255 -0
  661. cuda/cccl/headers/include/cuda/std/__chrono/year.h +184 -0
  662. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
  663. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
  664. cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +204 -0
  665. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +783 -0
  666. cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +122 -0
  667. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +129 -0
  668. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +230 -0
  669. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +204 -0
  670. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +285 -0
  671. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +220 -0
  672. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +285 -0
  673. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +370 -0
  674. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +166 -0
  675. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +204 -0
  676. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +185 -0
  677. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  678. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
  679. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +533 -0
  680. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
  681. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  682. cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
  683. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +210 -0
  684. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +198 -0
  685. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +983 -0
  686. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
  687. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +242 -0
  688. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +327 -0
  689. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  690. cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
  691. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
  692. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
  693. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
  694. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  695. cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
  696. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
  697. cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
  698. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
  699. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
  700. cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
  701. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  702. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  703. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
  704. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  705. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  706. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  707. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
  708. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  709. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  710. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +367 -0
  711. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
  712. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
  713. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  714. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
  715. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  716. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  717. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +98 -0
  718. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  719. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  720. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  721. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  722. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  723. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
  724. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  725. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  726. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  727. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  728. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  729. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  730. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  731. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  732. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
  733. cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
  734. cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
  735. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +118 -0
  736. cuda/cccl/headers/include/cuda/std/__exception/exception_macros.h +93 -0
  737. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  738. cuda/cccl/headers/include/cuda/std/__exception/throw_error.h +120 -0
  739. cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
  740. cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
  741. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  742. cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
  743. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
  744. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  745. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +164 -0
  746. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  747. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
  748. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  749. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  750. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
  751. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
  752. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
  753. cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
  754. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  755. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
  756. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
  757. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  758. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  759. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  760. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  761. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  762. cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
  763. cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
  764. cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
  765. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
  766. cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
  767. cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
  768. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  769. cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
  770. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
  771. cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
  772. cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
  773. cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
  774. cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
  775. cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
  776. cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
  777. cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
  778. cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
  779. cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
  780. cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
  781. cuda/cccl/headers/include/cuda/std/__format_ +45 -0
  782. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  783. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  784. cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
  785. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
  786. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  787. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  788. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  789. cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
  790. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  791. cuda/cccl/headers/include/cuda/std/__functional/function.h +1271 -0
  792. cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
  793. cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
  794. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +296 -0
  795. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
  796. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
  797. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
  798. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  799. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  800. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
  801. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
  802. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
  803. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  804. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  805. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  806. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  807. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  808. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
  809. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
  810. cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
  811. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
  812. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
  813. cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
  814. cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
  815. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  816. cuda/cccl/headers/include/cuda/std/__fwd/get.h +122 -0
  817. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  818. cuda/cccl/headers/include/cuda/std/__fwd/ios.h +123 -0
  819. cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
  820. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
  821. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  822. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  823. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
  824. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +39 -0
  825. cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
  826. cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
  827. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
  828. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  829. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +37 -0
  830. cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
  831. cuda/cccl/headers/include/cuda/std/__fwd/variant.h +51 -0
  832. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  833. cuda/cccl/headers/include/cuda/std/__internal/features.h +86 -0
  834. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +143 -0
  835. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  836. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
  837. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
  838. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
  839. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  840. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
  841. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  842. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  843. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  844. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  845. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  846. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
  847. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
  848. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  849. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
  850. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
  851. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  852. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  853. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +181 -0
  854. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  855. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
  856. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  857. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
  858. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  859. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  860. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
  861. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  862. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  863. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
  864. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  865. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
  866. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  867. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +366 -0
  868. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  869. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  870. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  871. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
  872. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  873. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +603 -0
  874. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
  875. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +77 -0
  876. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +52 -0
  877. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +137 -0
  878. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +128 -0
  879. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +316 -0
  880. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
  881. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +137 -0
  882. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  883. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
  884. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +753 -0
  885. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
  886. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
  887. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +603 -0
  888. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
  889. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  890. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
  891. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
  892. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
  893. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  894. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  895. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  896. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  897. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +85 -0
  898. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  899. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +526 -0
  900. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  901. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  902. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
  903. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +242 -0
  904. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  905. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +51 -0
  906. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
  907. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  908. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  909. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +679 -0
  910. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
  911. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +59 -0
  912. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  913. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  914. cuda/cccl/headers/include/cuda/std/__new/allocate.h +131 -0
  915. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  916. cuda/cccl/headers/include/cuda/std/__new/device_new.h +30 -0
  917. cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
  918. cuda/cccl/headers/include/cuda/std/__new_ +30 -0
  919. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  920. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  921. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  922. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
  923. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  924. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  925. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  926. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
  927. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  928. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  929. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  930. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  931. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  932. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
  933. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  934. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  935. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  936. cuda/cccl/headers/include/cuda/std/__optional/optional.h +860 -0
  937. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +433 -0
  938. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  939. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  940. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  941. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
  942. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
  943. cuda/cccl/headers/include/cuda/std/__random/philox_engine.h +562 -0
  944. cuda/cccl/headers/include/cuda/std/__random/seed_seq.h +204 -0
  945. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
  946. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
  947. cuda/cccl/headers/include/cuda/std/__random_ +31 -0
  948. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  949. cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
  950. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  951. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
  952. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  953. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  954. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  955. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  956. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  957. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  958. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
  959. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  960. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
  961. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +408 -0
  962. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +162 -0
  963. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
  964. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  965. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  966. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  967. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
  968. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  969. cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
  970. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  971. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
  972. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  973. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  974. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  975. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
  976. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  977. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
  978. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  979. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  980. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
  981. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  982. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  983. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  984. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  985. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  986. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
  987. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  988. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  989. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
  990. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  991. cuda/cccl/headers/include/cuda/std/__tuple_dir/apply.h +82 -0
  992. cuda/cccl/headers/include/cuda/std/__tuple_dir/get.h +122 -0
  993. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  994. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
  995. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +100 -0
  996. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
  997. cuda/cccl/headers/include/cuda/std/__tuple_dir/tie.h +55 -0
  998. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple.h +457 -0
  999. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_cat.h +158 -0
  1000. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_constraints.h +286 -0
  1001. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +77 -0
  1002. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  1003. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_leaf.h +452 -0
  1004. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +83 -0
  1005. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
  1006. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  1007. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  1008. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
  1009. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  1010. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  1011. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  1012. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  1013. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  1014. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  1015. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  1016. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  1017. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  1018. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  1019. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
  1020. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  1021. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
  1022. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  1023. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  1024. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  1025. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  1026. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  1027. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  1028. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  1029. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  1030. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  1031. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  1032. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
  1033. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
  1034. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  1035. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
  1036. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
  1037. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  1038. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  1039. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  1040. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  1041. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
  1042. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  1043. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  1044. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  1045. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
  1046. cuda/cccl/headers/include/cuda/std/__type_traits/is_comparable.h +78 -0
  1047. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
  1048. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  1049. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  1050. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  1051. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
  1052. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  1053. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  1054. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  1055. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  1056. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  1057. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  1058. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
  1059. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
  1060. cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
  1061. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  1062. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
  1063. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
  1064. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  1065. cuda/cccl/headers/include/cuda/std/__type_traits/is_fully_bounded_array.h +47 -0
  1066. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  1067. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  1068. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  1069. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  1070. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  1071. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  1072. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
  1073. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  1074. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  1075. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  1076. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  1077. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  1078. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  1079. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  1080. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  1081. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  1082. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  1083. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  1084. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  1085. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  1086. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  1087. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  1088. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  1089. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  1090. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
  1091. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  1092. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
  1093. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  1094. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
  1095. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
  1096. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  1097. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  1098. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
  1099. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  1100. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  1101. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  1102. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  1103. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
  1104. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +200 -0
  1105. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
  1106. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
  1107. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
  1108. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
  1109. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
  1110. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
  1111. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
  1112. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
  1113. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
  1114. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
  1115. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  1116. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
  1117. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  1118. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  1119. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  1120. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  1121. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  1122. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  1123. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  1124. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  1125. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  1126. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  1127. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  1128. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  1129. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  1130. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
  1131. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  1132. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  1133. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1134. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1135. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1136. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1137. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1138. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1139. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1140. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1141. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1142. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1143. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1144. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1145. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1146. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1147. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
  1148. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
  1149. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1150. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  1151. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
  1152. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  1153. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1154. cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
  1155. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
  1156. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1157. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  1158. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
  1159. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
  1160. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +249 -0
  1161. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1162. cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
  1163. cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
  1164. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1165. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
  1166. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1167. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +61 -0
  1168. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1169. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1170. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1171. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1172. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1173. cuda/cccl/headers/include/cuda/std/__variant/bad_variant_access.h +74 -0
  1174. cuda/cccl/headers/include/cuda/std/__variant/comparison.h +207 -0
  1175. cuda/cccl/headers/include/cuda/std/__variant/get.h +192 -0
  1176. cuda/cccl/headers/include/cuda/std/__variant/hash.h +82 -0
  1177. cuda/cccl/headers/include/cuda/std/__variant/sfinae_helpers.h +89 -0
  1178. cuda/cccl/headers/include/cuda/std/__variant/variant.h +250 -0
  1179. cuda/cccl/headers/include/cuda/std/__variant/variant_access.h +70 -0
  1180. cuda/cccl/headers/include/cuda/std/__variant/variant_base.h +683 -0
  1181. cuda/cccl/headers/include/cuda/std/__variant/variant_constraints.h +135 -0
  1182. cuda/cccl/headers/include/cuda/std/__variant/variant_match.h +126 -0
  1183. cuda/cccl/headers/include/cuda/std/__variant/variant_traits.h +184 -0
  1184. cuda/cccl/headers/include/cuda/std/__variant/variant_visit.h +225 -0
  1185. cuda/cccl/headers/include/cuda/std/__variant/visit.h +148 -0
  1186. cuda/cccl/headers/include/cuda/std/array +518 -0
  1187. cuda/cccl/headers/include/cuda/std/atomic +810 -0
  1188. cuda/cccl/headers/include/cuda/std/barrier +42 -0
  1189. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1190. cuda/cccl/headers/include/cuda/std/bitset +986 -0
  1191. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1192. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1193. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1194. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1195. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1196. cuda/cccl/headers/include/cuda/std/cmath +87 -0
  1197. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1198. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1199. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1200. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1201. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1202. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1203. cuda/cccl/headers/include/cuda/std/ctime +154 -0
  1204. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1205. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +207 -0
  1206. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1718 -0
  1207. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2506 -0
  1208. cuda/cccl/headers/include/cuda/std/execution +29 -0
  1209. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1210. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1211. cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
  1212. cuda/cccl/headers/include/cuda/std/inplace_vector +2171 -0
  1213. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1214. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1215. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1216. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1217. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1218. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1219. cuda/cccl/headers/include/cuda/std/numbers +344 -0
  1220. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1221. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1222. cuda/cccl/headers/include/cuda/std/ranges +69 -0
  1223. cuda/cccl/headers/include/cuda/std/ratio +416 -0
  1224. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1225. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1226. cuda/cccl/headers/include/cuda/std/span +628 -0
  1227. cuda/cccl/headers/include/cuda/std/string_view +923 -0
  1228. cuda/cccl/headers/include/cuda/std/tuple +43 -0
  1229. cuda/cccl/headers/include/cuda/std/type_traits +176 -0
  1230. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1231. cuda/cccl/headers/include/cuda/std/variant +32 -0
  1232. cuda/cccl/headers/include/cuda/std/version +240 -0
  1233. cuda/cccl/headers/include/cuda/stream +31 -0
  1234. cuda/cccl/headers/include/cuda/stream_ref +59 -0
  1235. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1236. cuda/cccl/headers/include/cuda/utility +28 -0
  1237. cuda/cccl/headers/include/cuda/version +16 -0
  1238. cuda/cccl/headers/include/cuda/warp +28 -0
  1239. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1240. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1241. cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
  1242. cuda/cccl/headers/include/nv/target +236 -0
  1243. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1244. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1245. cuda/cccl/headers/include/thrust/advance.h +57 -0
  1246. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1247. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1248. cuda/cccl/headers/include/thrust/complex.h +858 -0
  1249. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1250. cuda/cccl/headers/include/thrust/count.h +245 -0
  1251. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +108 -0
  1252. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1253. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +624 -0
  1254. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +191 -0
  1255. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +95 -0
  1256. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +79 -0
  1257. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +76 -0
  1258. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +74 -0
  1259. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +113 -0
  1260. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +114 -0
  1261. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
  1262. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +95 -0
  1263. cuda/cccl/headers/include/thrust/detail/binary_search.inl +537 -0
  1264. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1265. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +250 -0
  1266. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +58 -0
  1267. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +869 -0
  1268. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +583 -0
  1269. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +227 -0
  1270. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +155 -0
  1271. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +190 -0
  1272. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +167 -0
  1273. cuda/cccl/headers/include/thrust/detail/complex/clog.h +217 -0
  1274. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +204 -0
  1275. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1276. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1277. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +76 -0
  1278. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +222 -0
  1279. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +162 -0
  1280. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +172 -0
  1281. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +168 -0
  1282. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +202 -0
  1283. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +127 -0
  1284. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +132 -0
  1285. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1286. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1287. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1288. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1289. cuda/cccl/headers/include/thrust/detail/config/device_system.h +57 -0
  1290. cuda/cccl/headers/include/thrust/detail/config/host_system.h +50 -0
  1291. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1292. cuda/cccl/headers/include/thrust/detail/config/namespace.h +161 -0
  1293. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1294. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1295. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +226 -0
  1296. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +271 -0
  1297. cuda/cccl/headers/include/thrust/detail/copy.h +70 -0
  1298. cuda/cccl/headers/include/thrust/detail/copy.inl +139 -0
  1299. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1300. cuda/cccl/headers/include/thrust/detail/copy_if.inl +114 -0
  1301. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1302. cuda/cccl/headers/include/thrust/detail/count.inl +101 -0
  1303. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1304. cuda/cccl/headers/include/thrust/detail/equal.inl +105 -0
  1305. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1306. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +79 -0
  1307. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1308. cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
  1309. cuda/cccl/headers/include/thrust/detail/extrema.inl +196 -0
  1310. cuda/cccl/headers/include/thrust/detail/fill.inl +98 -0
  1311. cuda/cccl/headers/include/thrust/detail/find.inl +125 -0
  1312. cuda/cccl/headers/include/thrust/detail/for_each.inl +96 -0
  1313. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1314. cuda/cccl/headers/include/thrust/detail/functional/actor.h +211 -0
  1315. cuda/cccl/headers/include/thrust/detail/functional/operators.h +383 -0
  1316. cuda/cccl/headers/include/thrust/detail/gather.inl +185 -0
  1317. cuda/cccl/headers/include/thrust/detail/generate.inl +98 -0
  1318. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +60 -0
  1319. cuda/cccl/headers/include/thrust/detail/inner_product.inl +130 -0
  1320. cuda/cccl/headers/include/thrust/detail/internal_functional.h +329 -0
  1321. cuda/cccl/headers/include/thrust/detail/logical.inl +125 -0
  1322. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +89 -0
  1323. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1324. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1325. cuda/cccl/headers/include/thrust/detail/merge.inl +288 -0
  1326. cuda/cccl/headers/include/thrust/detail/mismatch.inl +106 -0
  1327. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +120 -0
  1328. cuda/cccl/headers/include/thrust/detail/partition.inl +390 -0
  1329. cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
  1330. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1331. cuda/cccl/headers/include/thrust/detail/random_bijection.h +175 -0
  1332. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +114 -0
  1333. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +128 -0
  1334. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1335. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +186 -0
  1336. cuda/cccl/headers/include/thrust/detail/reduce.inl +395 -0
  1337. cuda/cccl/headers/include/thrust/detail/reference.h +518 -0
  1338. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1339. cuda/cccl/headers/include/thrust/detail/remove.inl +225 -0
  1340. cuda/cccl/headers/include/thrust/detail/replace.inl +243 -0
  1341. cuda/cccl/headers/include/thrust/detail/reverse.inl +100 -0
  1342. cuda/cccl/headers/include/thrust/detail/scan.inl +536 -0
  1343. cuda/cccl/headers/include/thrust/detail/scatter.inl +169 -0
  1344. cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
  1345. cuda/cccl/headers/include/thrust/detail/sequence.inl +121 -0
  1346. cuda/cccl/headers/include/thrust/detail/set_operations.inl +993 -0
  1347. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1348. cuda/cccl/headers/include/thrust/detail/sort.inl +385 -0
  1349. cuda/cccl/headers/include/thrust/detail/static_assert.h +56 -0
  1350. cuda/cccl/headers/include/thrust/detail/static_map.h +164 -0
  1351. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +77 -0
  1352. cuda/cccl/headers/include/thrust/detail/tabulate.inl +74 -0
  1353. cuda/cccl/headers/include/thrust/detail/temporary_array.h +149 -0
  1354. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +119 -0
  1355. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +92 -0
  1356. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +81 -0
  1357. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +173 -0
  1358. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +128 -0
  1359. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +59 -0
  1360. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1361. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1362. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1363. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +162 -0
  1364. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +67 -0
  1365. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1366. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +55 -0
  1367. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1368. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1369. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +328 -0
  1370. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1371. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +102 -0
  1372. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +98 -0
  1373. cuda/cccl/headers/include/thrust/detail/unique.inl +391 -0
  1374. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1375. cuda/cccl/headers/include/thrust/detail/vector_base.h +611 -0
  1376. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1208 -0
  1377. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1378. cuda/cccl/headers/include/thrust/device_delete.h +74 -0
  1379. cuda/cccl/headers/include/thrust/device_free.h +85 -0
  1380. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1381. cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
  1382. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1383. cuda/cccl/headers/include/thrust/device_new.h +112 -0
  1384. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1385. cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
  1386. cuda/cccl/headers/include/thrust/device_reference.h +983 -0
  1387. cuda/cccl/headers/include/thrust/device_vector.h +576 -0
  1388. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1389. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1390. cuda/cccl/headers/include/thrust/execution_policy.h +252 -0
  1391. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1392. cuda/cccl/headers/include/thrust/fill.h +200 -0
  1393. cuda/cccl/headers/include/thrust/find.h +382 -0
  1394. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1395. cuda/cccl/headers/include/thrust/functional.h +393 -0
  1396. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1397. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1398. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1399. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1400. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
  1401. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1402. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1403. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1404. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +43 -0
  1405. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +42 -0
  1406. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +80 -0
  1407. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
  1408. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
  1409. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +56 -0
  1410. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +181 -0
  1411. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +57 -0
  1412. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1413. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1414. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +80 -0
  1415. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +170 -0
  1416. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1417. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1418. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
  1419. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
  1420. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
  1421. cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
  1422. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
  1423. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1424. cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
  1425. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
  1426. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
  1427. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1428. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
  1429. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +225 -0
  1430. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +349 -0
  1431. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1432. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
  1433. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1434. cuda/cccl/headers/include/thrust/memory.h +299 -0
  1435. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1436. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1437. cuda/cccl/headers/include/thrust/mr/allocator.h +227 -0
  1438. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +46 -0
  1439. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
  1440. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +116 -0
  1441. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +65 -0
  1442. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +65 -0
  1443. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +42 -0
  1444. cuda/cccl/headers/include/thrust/mr/memory_resource.h +215 -0
  1445. cuda/cccl/headers/include/thrust/mr/new.h +98 -0
  1446. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +61 -0
  1447. cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
  1448. cuda/cccl/headers/include/thrust/mr/pool_options.h +172 -0
  1449. cuda/cccl/headers/include/thrust/mr/sync_pool.h +112 -0
  1450. cuda/cccl/headers/include/thrust/mr/tls_pool.h +62 -0
  1451. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1452. cuda/cccl/headers/include/thrust/mr/validator.h +54 -0
  1453. cuda/cccl/headers/include/thrust/pair.h +99 -0
  1454. cuda/cccl/headers/include/thrust/partition.h +1391 -0
  1455. cuda/cccl/headers/include/thrust/per_device_resource.h +110 -0
  1456. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +182 -0
  1457. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +153 -0
  1458. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +97 -0
  1459. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +149 -0
  1460. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +46 -0
  1461. cuda/cccl/headers/include/thrust/random/detail/mod.h +94 -0
  1462. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +185 -0
  1463. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +155 -0
  1464. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +56 -0
  1465. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +199 -0
  1466. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +196 -0
  1467. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +198 -0
  1468. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +181 -0
  1469. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
  1470. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +238 -0
  1471. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +287 -0
  1472. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +215 -0
  1473. cuda/cccl/headers/include/thrust/random/normal_distribution.h +255 -0
  1474. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +245 -0
  1475. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +259 -0
  1476. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +256 -0
  1477. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +253 -0
  1478. cuda/cccl/headers/include/thrust/random.h +118 -0
  1479. cuda/cccl/headers/include/thrust/reduce.h +1113 -0
  1480. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1481. cuda/cccl/headers/include/thrust/replace.h +826 -0
  1482. cuda/cccl/headers/include/thrust/reverse.h +215 -0
  1483. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1484. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1485. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1486. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1487. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1488. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1489. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1490. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1491. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1492. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1493. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1494. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1495. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1496. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1497. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +113 -0
  1498. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1499. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1500. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1501. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1502. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1503. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1504. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1505. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1506. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1507. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1508. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1509. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +55 -0
  1510. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1511. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1512. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1513. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1514. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1515. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1516. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1517. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1518. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1519. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1520. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1521. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1522. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1523. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1524. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1525. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1526. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1527. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1528. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1529. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1530. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1531. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1532. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1533. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1534. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1535. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
  1536. cuda/cccl/headers/include/thrust/system/cpp/memory.h +105 -0
  1537. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +70 -0
  1538. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +118 -0
  1539. cuda/cccl/headers/include/thrust/system/cpp/vector.h +95 -0
  1540. cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
  1541. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +215 -0
  1542. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1543. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1544. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1545. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +272 -0
  1546. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +251 -0
  1547. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +282 -0
  1548. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +163 -0
  1549. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +586 -0
  1550. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +73 -0
  1551. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +241 -0
  1552. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +231 -0
  1553. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +62 -0
  1554. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +87 -0
  1555. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +266 -0
  1556. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +472 -0
  1557. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +99 -0
  1558. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +165 -0
  1559. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +82 -0
  1560. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +89 -0
  1561. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +58 -0
  1562. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1563. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1564. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +79 -0
  1565. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1566. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +55 -0
  1567. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +119 -0
  1568. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +55 -0
  1569. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +204 -0
  1570. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +88 -0
  1571. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +79 -0
  1572. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +401 -0
  1573. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +70 -0
  1574. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +780 -0
  1575. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +997 -0
  1576. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +105 -0
  1577. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +121 -0
  1578. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +83 -0
  1579. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +338 -0
  1580. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +411 -0
  1581. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +89 -0
  1582. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1583. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1732 -0
  1584. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +468 -0
  1585. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
  1586. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +61 -0
  1587. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +130 -0
  1588. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1589. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +428 -0
  1590. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +139 -0
  1591. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +117 -0
  1592. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +111 -0
  1593. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +100 -0
  1594. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +286 -0
  1595. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +306 -0
  1596. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
  1597. cuda/cccl/headers/include/thrust/system/cuda/error.h +159 -0
  1598. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
  1599. cuda/cccl/headers/include/thrust/system/cuda/memory.h +118 -0
  1600. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +115 -0
  1601. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +155 -0
  1602. cuda/cccl/headers/include/thrust/system/cuda/vector.h +104 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +59 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/errno.h +118 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +298 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +171 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +119 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +51 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +77 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +159 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +381 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +43 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +62 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +56 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +143 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +46 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +82 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +47 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +58 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +64 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +249 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +52 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +47 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +135 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +56 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +71 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +94 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +43 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +58 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +70 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +57 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +62 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +83 -0
  1635. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +97 -0
  1636. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +146 -0
  1637. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +47 -0
  1638. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +66 -0
  1639. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +127 -0
  1640. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +205 -0
  1641. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +41 -0
  1642. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +69 -0
  1643. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +98 -0
  1644. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +81 -0
  1645. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +183 -0
  1646. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +84 -0
  1647. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +119 -0
  1648. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +93 -0
  1649. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +172 -0
  1650. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +46 -0
  1651. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +65 -0
  1652. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +61 -0
  1653. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +124 -0
  1654. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +86 -0
  1655. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +124 -0
  1656. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +229 -0
  1657. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +71 -0
  1658. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +83 -0
  1659. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +103 -0
  1660. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1661. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +280 -0
  1662. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +474 -0
  1663. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +52 -0
  1664. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +123 -0
  1665. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +111 -0
  1666. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +173 -0
  1667. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +42 -0
  1668. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +73 -0
  1669. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +39 -0
  1670. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +52 -0
  1671. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +45 -0
  1672. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +52 -0
  1673. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +80 -0
  1674. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +393 -0
  1675. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +48 -0
  1676. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +54 -0
  1677. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +78 -0
  1678. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +111 -0
  1679. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +43 -0
  1680. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +164 -0
  1681. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +43 -0
  1682. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +112 -0
  1683. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +69 -0
  1684. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +111 -0
  1685. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +79 -0
  1686. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +124 -0
  1687. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +115 -0
  1688. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +68 -0
  1689. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +40 -0
  1690. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +134 -0
  1691. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +120 -0
  1692. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +47 -0
  1693. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +69 -0
  1694. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1695. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1696. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
  1697. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +108 -0
  1698. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1699. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +60 -0
  1700. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +72 -0
  1701. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1702. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +120 -0
  1703. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1704. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +41 -0
  1705. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1706. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +139 -0
  1707. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +43 -0
  1708. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1709. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +48 -0
  1710. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +147 -0
  1711. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1712. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +297 -0
  1713. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1714. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +62 -0
  1715. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +96 -0
  1716. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +177 -0
  1717. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1718. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1719. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +152 -0
  1720. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +143 -0
  1721. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1722. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1723. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +204 -0
  1724. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +120 -0
  1725. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +354 -0
  1726. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +121 -0
  1727. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +584 -0
  1728. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1729. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1730. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1731. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1732. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1733. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1734. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +56 -0
  1735. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1736. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1737. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +113 -0
  1738. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +104 -0
  1739. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +106 -0
  1740. cuda/cccl/headers/include/thrust/system/error_code.h +508 -0
  1741. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +34 -0
  1742. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +17 -0
  1743. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +57 -0
  1744. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +61 -0
  1745. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +39 -0
  1746. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +17 -0
  1747. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +48 -0
  1748. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +17 -0
  1749. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +132 -0
  1750. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +46 -0
  1751. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +17 -0
  1752. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +33 -0
  1753. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +73 -0
  1754. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +17 -0
  1755. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +17 -0
  1756. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +17 -0
  1757. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +17 -0
  1758. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +17 -0
  1759. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +17 -0
  1760. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +17 -0
  1761. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +17 -0
  1762. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +17 -0
  1763. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +83 -0
  1764. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +16 -0
  1765. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +30 -0
  1766. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +62 -0
  1767. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +49 -0
  1768. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +87 -0
  1769. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +67 -0
  1770. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +17 -0
  1771. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +17 -0
  1772. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +189 -0
  1773. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +23 -0
  1774. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +17 -0
  1775. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +17 -0
  1776. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +17 -0
  1777. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +245 -0
  1778. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +17 -0
  1779. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +17 -0
  1780. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +16 -0
  1781. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +17 -0
  1782. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +17 -0
  1783. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +17 -0
  1784. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +17 -0
  1785. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +17 -0
  1786. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +51 -0
  1787. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +55 -0
  1788. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
  1789. cuda/cccl/headers/include/thrust/system/omp/memory.h +153 -0
  1790. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +71 -0
  1791. cuda/cccl/headers/include/thrust/system/omp/pointer.h +120 -0
  1792. cuda/cccl/headers/include/thrust/system/omp/vector.h +96 -0
  1793. cuda/cccl/headers/include/thrust/system/system_error.h +183 -0
  1794. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +33 -0
  1795. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +17 -0
  1796. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +17 -0
  1797. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +59 -0
  1798. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +114 -0
  1799. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +17 -0
  1800. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +17 -0
  1801. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +113 -0
  1802. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +45 -0
  1803. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +17 -0
  1804. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +28 -0
  1805. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +70 -0
  1806. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +17 -0
  1807. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +17 -0
  1808. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +17 -0
  1809. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +17 -0
  1810. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +17 -0
  1811. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +17 -0
  1812. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +17 -0
  1813. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +302 -0
  1814. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +17 -0
  1815. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +78 -0
  1816. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +16 -0
  1817. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +120 -0
  1818. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +378 -0
  1819. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +118 -0
  1820. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +66 -0
  1821. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +17 -0
  1822. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +17 -0
  1823. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +294 -0
  1824. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +20 -0
  1825. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +17 -0
  1826. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +17 -0
  1827. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +17 -0
  1828. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +272 -0
  1829. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +17 -0
  1830. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +17 -0
  1831. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +16 -0
  1832. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +17 -0
  1833. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +17 -0
  1834. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +17 -0
  1835. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +17 -0
  1836. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +17 -0
  1837. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +50 -0
  1838. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +54 -0
  1839. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
  1840. cuda/cccl/headers/include/thrust/system/tbb/memory.h +139 -0
  1841. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +57 -0
  1842. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +106 -0
  1843. cuda/cccl/headers/include/thrust/system/tbb/vector.h +82 -0
  1844. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1845. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1846. cuda/cccl/headers/include/thrust/transform.h +1056 -0
  1847. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1848. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1849. cuda/cccl/headers/include/thrust/tuple.h +139 -0
  1850. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +257 -0
  1851. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +153 -0
  1852. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1853. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +180 -0
  1854. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +112 -0
  1855. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +332 -0
  1856. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1857. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
  1858. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1859. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1860. cuda/cccl/headers/include/thrust/unique.h +1088 -0
  1861. cuda/cccl/headers/include/thrust/universal_allocator.h +101 -0
  1862. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1863. cuda/cccl/headers/include/thrust/universal_vector.h +80 -0
  1864. cuda/cccl/headers/include/thrust/version.h +93 -0
  1865. cuda/cccl/headers/include/thrust/zip_function.h +150 -0
  1866. cuda/cccl/headers/include_paths.py +51 -0
  1867. cuda/cccl/parallel/__init__.py +9 -0
  1868. cuda/cccl/parallel/experimental/__init__.py +24 -0
  1869. cuda/cccl/py.typed +0 -0
  1870. cuda/compute/__init__.py +83 -0
  1871. cuda/compute/_bindings.py +79 -0
  1872. cuda/compute/_bindings.pyi +498 -0
  1873. cuda/compute/_bindings_impl.pyx +2415 -0
  1874. cuda/compute/_caching.py +71 -0
  1875. cuda/compute/_cccl_interop.py +422 -0
  1876. cuda/compute/_utils/__init__.py +0 -0
  1877. cuda/compute/_utils/protocols.py +132 -0
  1878. cuda/compute/_utils/temp_storage_buffer.py +86 -0
  1879. cuda/compute/algorithms/__init__.py +58 -0
  1880. cuda/compute/algorithms/_histogram.py +243 -0
  1881. cuda/compute/algorithms/_reduce.py +182 -0
  1882. cuda/compute/algorithms/_scan.py +331 -0
  1883. cuda/compute/algorithms/_segmented_reduce.py +257 -0
  1884. cuda/compute/algorithms/_sort/__init__.py +23 -0
  1885. cuda/compute/algorithms/_sort/_merge_sort.py +225 -0
  1886. cuda/compute/algorithms/_sort/_radix_sort.py +263 -0
  1887. cuda/compute/algorithms/_sort/_segmented_sort.py +288 -0
  1888. cuda/compute/algorithms/_sort/_sort_common.py +52 -0
  1889. cuda/compute/algorithms/_three_way_partition.py +261 -0
  1890. cuda/compute/algorithms/_transform.py +329 -0
  1891. cuda/compute/algorithms/_unique_by_key.py +252 -0
  1892. cuda/compute/cccl/.gitkeep +0 -0
  1893. cuda/compute/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  1894. cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
  1895. cuda/compute/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  1896. cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
  1897. cuda/compute/iterators/__init__.py +21 -0
  1898. cuda/compute/iterators/_factories.py +219 -0
  1899. cuda/compute/iterators/_iterators.py +817 -0
  1900. cuda/compute/iterators/_zip_iterator.py +199 -0
  1901. cuda/compute/numba_utils.py +53 -0
  1902. cuda/compute/op.py +3 -0
  1903. cuda/compute/struct.py +272 -0
  1904. cuda/compute/typing.py +37 -0
  1905. cuda/coop/__init__.py +8 -0
  1906. cuda/coop/_caching.py +48 -0
  1907. cuda/coop/_common.py +275 -0
  1908. cuda/coop/_nvrtc.py +92 -0
  1909. cuda/coop/_scan_op.py +181 -0
  1910. cuda/coop/_types.py +937 -0
  1911. cuda/coop/_typing.py +107 -0
  1912. cuda/coop/block/__init__.py +39 -0
  1913. cuda/coop/block/_block_exchange.py +251 -0
  1914. cuda/coop/block/_block_load_store.py +215 -0
  1915. cuda/coop/block/_block_merge_sort.py +125 -0
  1916. cuda/coop/block/_block_radix_sort.py +214 -0
  1917. cuda/coop/block/_block_reduce.py +294 -0
  1918. cuda/coop/block/_block_scan.py +983 -0
  1919. cuda/coop/warp/__init__.py +9 -0
  1920. cuda/coop/warp/_warp_merge_sort.py +92 -0
  1921. cuda/coop/warp/_warp_reduce.py +153 -0
  1922. cuda/coop/warp/_warp_scan.py +78 -0
  1923. cuda_cccl-0.3.4.dist-info/METADATA +78 -0
  1924. cuda_cccl-0.3.4.dist-info/RECORD +1926 -0
  1925. cuda_cccl-0.3.4.dist-info/WHEEL +5 -0
  1926. cuda_cccl-0.3.4.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,1485 @@
1
+ // SPDX-FileCopyrightText: Copyright (c) 2011, Duane Merrill. All rights reserved.
2
+ // SPDX-FileCopyrightText: Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
3
+ // SPDX-License-Identifier: BSD-3
4
+
5
+ //! @file
6
+ //! cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of
7
+ //! samples data residing within device-accessible memory.
8
+
9
+ #pragma once
10
+
11
+ #include <cub/config.cuh>
12
+
13
+ #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
14
+ # pragma GCC system_header
15
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
16
+ # pragma clang system_header
17
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
18
+ # pragma system_header
19
+ #endif // no system header
20
+
21
+ #include <cub/device/dispatch/dispatch_histogram.cuh>
22
+
23
+ #include <cuda/std/__algorithm/copy.h>
24
+ #include <cuda/std/__type_traits/integral_constant.h>
25
+ #include <cuda/std/__type_traits/remove_const.h>
26
+ #include <cuda/std/array>
27
+ #include <cuda/std/limits>
28
+
29
+ CUB_NAMESPACE_BEGIN
30
+
31
+ //! @rst
32
+ //! DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of
33
+ //! samples data residing within device-accessible memory.
34
+ //!
35
+ //! Overview
36
+ //! ++++++++++++++++++++++++++
37
+ //!
38
+ //! A `histogram <http://en.wikipedia.org/wiki/Histogram>`_ counts the number of observations that fall into each
39
+ //! of the disjoint categories (known as *bins*).
40
+ //!
41
+ //! Usage Considerations
42
+ //! ++++++++++++++++++++++++++
43
+ //!
44
+ //! @cdp_class{DeviceHistogram}
45
+ //!
46
+ //! @endrst
47
+ struct DeviceHistogram
48
+ {
49
+ //! @name Evenly-segmented bin ranges
50
+ //! @{
51
+
52
+ //! @rst
53
+ //! Computes an intensity histogram from a sequence of data samples using equal-width bins.
54
+ //!
55
+ //! - The number of histogram bins is (``num_levels - 1``)
56
+ //! - All bins comprise the same width of sample values: ``(upper_level - lower_level) / (num_levels - 1)``.
57
+ //! - If the common type of ``SampleT`` and ``LevelT`` is of integral type, the bin for a sample is
58
+ //! computed as ``(sample - lower_level) * (num_levels - 1) / (upper_level - lower_level)``, round
59
+ //! down to the nearest whole number. To protect against potential overflows, if the product
60
+ //! ``(upper_level - lower_level) * (num_levels - 1)`` exceeds the number representable by an
61
+ //! ``uint64_t``, the cuda error ``cudaErrorInvalidValue`` is returned. If the common type is 128
62
+ //! bits wide, bin computation will use 128-bit arithmetic and ``cudaErrorInvalidValue`` will only
63
+ //! be returned if bin computation would overflow for 128-bit arithmetic.
64
+ //! - The ranges ``[d_samples, d_samples + num_samples)`` and
65
+ //! ``[d_histogram, d_histogram + num_levels - 1)`` shall not overlap in any way.
66
+ //! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT and SampleT must be valid
67
+ //! arithmetic types. The common type must be convertible to ``int`` and trivially copyable.
68
+ //! - @devicestorage
69
+ //!
70
+ //! Snippet
71
+ //! +++++++
72
+ //!
73
+ //! The code snippet below illustrates the computation of a six-bin histogram
74
+ //! from a sequence of float samples
75
+ //!
76
+ //! .. code-block:: c++
77
+ //!
78
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
79
+ //!
80
+ //! // Declare, allocate, and initialize device-accessible pointers for
81
+ //! // input samples and output histogram
82
+ //! int num_samples; // e.g., 10
83
+ //! float* d_samples; // e.g., [2.2, 6.1, 7.1, 2.9, 3.5, 0.3, 2.9, 2.1, 6.1, 999.5]
84
+ //! int* d_histogram; // e.g., [ -, -, -, -, -, -]
85
+ //! int num_levels; // e.g., 7 (seven level boundaries for six bins)
86
+ //! float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin)
87
+ //! float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin)
88
+ //! ...
89
+ //!
90
+ //! // Determine temporary device storage requirements
91
+ //! void* d_temp_storage = nullptr;
92
+ //! size_t temp_storage_bytes = 0;
93
+ //! cub::DeviceHistogram::HistogramEven(
94
+ //! d_temp_storage, temp_storage_bytes,
95
+ //! d_samples, d_histogram, num_levels,
96
+ //! lower_level, upper_level, num_samples);
97
+ //!
98
+ //! // Allocate temporary storage
99
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
100
+ //!
101
+ //! // Compute histograms
102
+ //! cub::DeviceHistogram::HistogramEven(
103
+ //! d_temp_storage, temp_storage_bytes,
104
+ //! d_samples, d_histogram, num_levels,
105
+ //! lower_level, upper_level, num_samples);
106
+ //!
107
+ //! // d_histogram <-- [1, 5, 0, 3, 0, 0];
108
+ //!
109
+ //! @endrst
110
+ //!
111
+ //! @tparam SampleIteratorT
112
+ //! **[inferred]** Random-access input iterator type for reading input samples @iterator
113
+ //!
114
+ //! @tparam CounterT
115
+ //! **[inferred]** Integer type for histogram bin counters
116
+ //!
117
+ //! @tparam LevelT
118
+ //! **[inferred]** Type for specifying boundaries (levels)
119
+ //!
120
+ //! @tparam OffsetT
121
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
122
+ //! pointer differences, etc. @offset_size1
123
+ //!
124
+ //! @param[in] d_temp_storage
125
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
126
+ //! required allocation size is written to `temp_storage_bytes` and no
127
+ //! work is done.
128
+ //!
129
+ //! @param[in,out] temp_storage_bytes
130
+ //! Reference to size in bytes of `d_temp_storage` allocation
131
+ //!
132
+ //! @param[in] d_samples
133
+ //! The pointer to the input sequence of data samples.
134
+ //!
135
+ //! @param[out] d_histogram
136
+ //! The pointer to the histogram counter output array of length
137
+ //! `num_levels - 1`.
138
+ //!
139
+ //! @param[in] num_levels
140
+ //! The number of boundaries (levels) for delineating histogram samples.
141
+ //! Implies that the number of bins is `num_levels - 1`.
142
+ //!
143
+ //! @param[in] lower_level
144
+ //! The lower sample value bound (inclusive) for the lowest histogram bin.
145
+ //!
146
+ //! @param[in] upper_level
147
+ //! The upper sample value bound (exclusive) for the highest histogram bin.
148
+ //!
149
+ //! @param[in] num_samples
150
+ //! The number of input samples (i.e., the length of `d_samples`)
151
+ //!
152
+ //! @param[in] stream
153
+ //! @rst
154
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
155
+ //! @endrst
156
+ template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
157
+ CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven(
158
+ void* d_temp_storage,
159
+ size_t& temp_storage_bytes,
160
+ SampleIteratorT d_samples,
161
+ CounterT* d_histogram,
162
+ int num_levels,
163
+ LevelT lower_level,
164
+ LevelT upper_level,
165
+ OffsetT num_samples,
166
+ cudaStream_t stream = 0)
167
+ {
168
+ /// The sample value type of the input iterator
169
+ using SampleT = cub::detail::it_value_t<SampleIteratorT>;
170
+ return MultiHistogramEven<1, 1>(
171
+ d_temp_storage,
172
+ temp_storage_bytes,
173
+ d_samples,
174
+ ::cuda::std::array{d_histogram},
175
+ ::cuda::std::array{num_levels},
176
+ ::cuda::std::array{lower_level},
177
+ ::cuda::std::array{upper_level},
178
+ num_samples,
179
+ static_cast<OffsetT>(1),
180
+ sizeof(SampleT) * num_samples,
181
+ stream);
182
+ }
183
+
184
+ //! @rst
185
+ //! Computes an intensity histogram from a sequence of data samples using equal-width bins.
186
+ //!
187
+ //! - A two-dimensional *region of interest* within ``d_samples`` can be specified using
188
+ //! the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
189
+ //! - The row stride must be a whole multiple of the sample data type
190
+ //! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
191
+ //! - The number of histogram bins is (``num_levels - 1``)
192
+ //! - All bins comprise the same width of sample values: ``(upper_level - lower_level) / (num_levels - 1)``
193
+ //! - If the common type of ``SampleT`` and ``LevelT`` is of integral type, the bin for a sample is
194
+ //! computed as ``(sample - lower_level) * (num_levels - 1) / (upper_level - lower_level)``, round
195
+ //! down to the nearest whole number. To protect against potential overflows, if the product
196
+ //! ``(upper_level - lower_level) * (num_levels - 1)`` exceeds the number representable by an
197
+ //! ``uint64_t``, the cuda error ``cudaErrorInvalidValue`` is returned. If the common type is 128
198
+ //! bits wide, bin computation will use 128-bit arithmetic and ``cudaErrorInvalidValue`` will only
199
+ //! be returned if bin computation would overflow for 128-bit arithmetic.
200
+ //! - For a given row ``r`` in ``[0, num_rows)``, let
201
+ //! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)`` and
202
+ //! ``row_end = row_begin + num_row_samples``. The ranges
203
+ //! ``[row_begin, row_end)`` and ``[d_histogram, d_histogram + num_levels - 1)``
204
+ //! shall not overlap in any way.
205
+ //! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT
206
+ //! and SampleT must be valid arithmetic types. The common type must be
207
+ //! convertible to ``int`` and trivially copyable.
208
+ //! - @devicestorage
209
+ //!
210
+ //! Snippet
211
+ //! +++++++
212
+ //!
213
+ //! The code snippet below illustrates the computation of a six-bin histogram
214
+ //! from a 2x5 region of interest within a flattened 2x7 array of float samples.
215
+ //!
216
+ //! .. code-block:: c++
217
+ //!
218
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
219
+ //!
220
+ //! // Declare, allocate, and initialize device-accessible pointers for
221
+ //! // input samples and output histogram
222
+ //! int num_row_samples; // e.g., 5
223
+ //! int num_rows; // e.g., 2;
224
+ //! size_t row_stride_bytes; // e.g., 7 * sizeof(float)
225
+ //! float* d_samples; // e.g., [2.2, 6.1, 7.1, 2.9, 3.5, -, -,
226
+ //! // 0.3, 2.9, 2.1, 6.1, 999.5, -, -]
227
+ //! int* d_histogram; // e.g., [ -, -, -, -, -, -]
228
+ //! int num_levels; // e.g., 7 (seven level boundaries for six bins)
229
+ //! float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin)
230
+ //! float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin)
231
+ //! ...
232
+ //!
233
+ //! // Determine temporary device storage requirements
234
+ //! void* d_temp_storage = nullptr;
235
+ //! size_t temp_storage_bytes = 0;
236
+ //! cub::DeviceHistogram::HistogramEven(
237
+ //! d_temp_storage, temp_storage_bytes,
238
+ //! d_samples, d_histogram, num_levels, lower_level, upper_level,
239
+ //! num_row_samples, num_rows, row_stride_bytes);
240
+ //!
241
+ //! // Allocate temporary storage
242
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
243
+ //!
244
+ //! // Compute histograms
245
+ //! cub::DeviceHistogram::HistogramEven(
246
+ //! d_temp_storage, temp_storage_bytes, d_samples, d_histogram,
247
+ //! d_samples, d_histogram, num_levels, lower_level, upper_level,
248
+ //! num_row_samples, num_rows, row_stride_bytes);
249
+ //!
250
+ //! // d_histogram <-- [1, 5, 0, 3, 0, 0];
251
+ //!
252
+ //! @endrst
253
+ //!
254
+ //! @tparam SampleIteratorT
255
+ //! **[inferred]** Random-access input iterator type for reading
256
+ //! input samples. @iterator
257
+ //!
258
+ //! @tparam CounterT
259
+ //! **[inferred]** Integer type for histogram bin counters
260
+ //!
261
+ //! @tparam LevelT
262
+ //! **[inferred]** Type for specifying boundaries (levels)
263
+ //!
264
+ //! @tparam OffsetT
265
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
266
+ //! pointer differences, etc. @offset_size1
267
+ //!
268
+ //! @param[in] d_temp_storage
269
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
270
+ //! required allocation size is written to `temp_storage_bytes` and no
271
+ //! work is done.
272
+ //!
273
+ //! @param[in,out] temp_storage_bytes
274
+ //! Reference to size in bytes of `d_temp_storage` allocation
275
+ //!
276
+ //! @param[in] d_samples
277
+ //! The pointer to the input sequence of data samples.
278
+ //!
279
+ //! @param[out] d_histogram
280
+ //! The pointer to the histogram counter output array of
281
+ //! length `num_levels - 1`.
282
+ //!
283
+ //! @param[in] num_levels
284
+ //! The number of boundaries (levels) for delineating histogram samples.
285
+ //! Implies that the number of bins is `num_levels - 1`.
286
+ //!
287
+ //! @param[in] lower_level
288
+ //! The lower sample value bound (inclusive) for the lowest histogram bin.
289
+ //!
290
+ //! @param[in] upper_level
291
+ //! The upper sample value bound (exclusive) for the highest histogram bin.
292
+ //!
293
+ //! @param[in] num_row_samples
294
+ //! The number of data samples per row in the region of interest
295
+ //!
296
+ //! @param[in] num_rows
297
+ //! The number of rows in the region of interest
298
+ //!
299
+ //! @param[in] row_stride_bytes
300
+ //! The number of bytes between starts of consecutive rows in
301
+ //! the region of interest
302
+ //!
303
+ //! @param[in] stream
304
+ //! @rst
305
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
306
+ //! @endrst
307
+ template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
308
+ CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven(
309
+ void* d_temp_storage,
310
+ size_t& temp_storage_bytes,
311
+ SampleIteratorT d_samples,
312
+ CounterT* d_histogram,
313
+ int num_levels,
314
+ LevelT lower_level,
315
+ LevelT upper_level,
316
+ OffsetT num_row_samples,
317
+ OffsetT num_rows,
318
+ size_t row_stride_bytes,
319
+ cudaStream_t stream = 0)
320
+ {
321
+ return MultiHistogramEven<1, 1>(
322
+ d_temp_storage,
323
+ temp_storage_bytes,
324
+ d_samples,
325
+ ::cuda::std::array{d_histogram},
326
+ ::cuda::std::array{num_levels},
327
+ ::cuda::std::array{lower_level},
328
+ ::cuda::std::array{upper_level},
329
+ num_row_samples,
330
+ num_rows,
331
+ row_stride_bytes,
332
+ stream);
333
+ }
334
+
335
+ //! @rst
336
+ //! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using
337
+ //! equal-width bins.
338
+ //!
339
+ //! - The input is a sequence of *pixel* structures, where each pixel comprises
340
+ //! a record of ``NUM_CHANNELS`` consecutive data samples
341
+ //! (e.g., an *RGBA* pixel).
342
+ //! - ``NUM_CHANNELS`` can be up to 4.
343
+ //! - Of the ``NUM_CHANNELS`` specified, the function will only compute
344
+ //! histograms for the first ``NUM_ACTIVE_CHANNELS``
345
+ //! (e.g., only *RGB* histograms from *RGBA* pixel samples).
346
+ //! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
347
+ //! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
348
+ //! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
349
+ //! - If the common type of sample and level is of integral type, the bin for a sample is
350
+ //! computed as ``(sample - lower_level[i]) * (num_levels - 1) / (upper_level[i] - lower_level[i])``, round down
351
+ //! to the nearest whole number. To protect against potential overflows, if, for any channel ``i``, the product
352
+ //! ``(upper_level[i] - lower_level[i]) * (num_levels[i] - 1)`` exceeds the number representable by an ``uint64_t``,
353
+ //! the cuda error ``cudaErrorInvalidValue`` is returned. If the common type is 128 bits wide, bin computation
354
+ //! will use 128-bit arithmetic and ``cudaErrorInvalidValue`` will only be returned if bin
355
+ //! computation would overflow for 128-bit arithmetic.
356
+ //! - For a given channel ``c`` in ``[0, NUM_ACTIVE_CHANNELS)``, the ranges
357
+ //! ``[d_samples, d_samples + NUM_CHANNELS * num_pixels)`` and
358
+ //! ``[d_histogram[c], d_histogram[c] + num_levels[c] - 1)`` shall not overlap in any way.
359
+ //! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT
360
+ //! and SampleT must be valid arithmetic types.
361
+ //! The common type must be convertible to ``int`` and trivially copyable.
362
+ //! - @devicestorage
363
+ //!
364
+ //! Snippet
365
+ //! +++++++
366
+ //!
367
+ //! The code snippet below illustrates the computation of three 256-bin *RGB* histograms
368
+ //! from a quad-channel sequence of *RGBA* pixels (8 bits per channel per pixel)
369
+ //!
370
+ //! .. code-block:: c++
371
+ //!
372
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
373
+ //!
374
+ //! // Declare, allocate, and initialize device-accessible pointers for
375
+ //! // input samples and output histograms
376
+ //! int num_pixels; // e.g., 5
377
+ //! unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2),
378
+ //! // (0, 6, 7, 5), (3, 0, 2, 6)]
379
+ //! int* d_histogram[3]; // e.g., three device pointers to three device buffers,
380
+ //! // each allocated with 256 integer counters
381
+ //! int num_levels[3]; // e.g., {257, 257, 257};
382
+ //! unsigned int lower_level[3]; // e.g., {0, 0, 0};
383
+ //! unsigned int upper_level[3]; // e.g., {256, 256, 256};
384
+ //! ...
385
+ //!
386
+ //! // Determine temporary device storage requirements
387
+ //! void* d_temp_storage = nullptr;
388
+ //! size_t temp_storage_bytes = 0;
389
+ //! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
390
+ //! d_temp_storage, temp_storage_bytes,
391
+ //! d_samples, d_histogram, num_levels,
392
+ //! lower_level, upper_level, num_pixels);
393
+ //!
394
+ //! // Allocate temporary storage
395
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
396
+ //!
397
+ //! // Compute histograms
398
+ //! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
399
+ //! d_temp_storage, temp_storage_bytes,
400
+ //! d_samples, d_histogram, num_levels,
401
+ //! lower_level, upper_level, num_pixels);
402
+ //!
403
+ //! // d_histogram <-- [ [1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0],
404
+ //! // [0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0],
405
+ //! // [0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ]
406
+ //!
407
+ //! @endrst
408
+ //!
409
+ //! @tparam NUM_CHANNELS
410
+ //! Number of channels interleaved in the input data (may be greater than
411
+ //! the number of channels being actively histogrammed)
412
+ //!
413
+ //! @tparam NUM_ACTIVE_CHANNELS
414
+ //! **[inferred]** Number of channels actively being histogrammed
415
+ //!
416
+ //! @tparam SampleIteratorT
417
+ //! **[inferred]** Random-access input iterator type for reading
418
+ //! input samples. @iterator
419
+ //!
420
+ //! @tparam CounterT
421
+ //! **[inferred]** Integer type for histogram bin counters
422
+ //!
423
+ //! @tparam LevelT
424
+ //! **[inferred]** Type for specifying boundaries (levels)
425
+ //!
426
+ //! @tparam OffsetT
427
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
428
+ //! pointer differences, etc. @offset_size1
429
+ //!
430
+ //! @param[in] d_temp_storage
431
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
432
+ //! required allocation size is written to `temp_storage_bytes` and no
433
+ //! work is done.
434
+ //!
435
+ //! @param[in,out] temp_storage_bytes
436
+ //! Reference to size in bytes of `d_temp_storage` allocation
437
+ //!
438
+ //! @param[in] d_samples
439
+ //! The pointer to the multi-channel input sequence of data samples.
440
+ //! The samples from different channels are assumed to be interleaved
441
+ //! (e.g., an array of 32-bit pixels where each pixel consists of four
442
+ //! *RGBA* 8-bit samples).
443
+ //!
444
+ //! @param[out] d_histogram
445
+ //! @rst
446
+ //! The pointers to the histogram counter output arrays, one for each active
447
+ //! channel. For channel\ :sub:`i`, the allocation length of
448
+ //! ``d_histogram[i]`` should be `num_levels[i] - 1``.
449
+ //! @endrst
450
+ //!
451
+ //! @param[in] num_levels
452
+ //! @rst
453
+ //! The number of boundaries (levels) for delineating histogram samples in each active channel.
454
+ //! Implies that the number of bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
455
+ //! @endrst
456
+ //!
457
+ //! @param[in] lower_level
458
+ //! The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
459
+ //!
460
+ //! @param[in] upper_level
461
+ //! The upper sample value bound (exclusive) for the highest histogram bin in each active channel.
462
+ //!
463
+ //! @param[in] num_pixels
464
+ //! The number of multi-channel pixels (i.e., the length of `d_samples / NUM_CHANNELS`)
465
+ //!
466
+ //! @param[in] stream
467
+ //! @rst
468
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
469
+ //! @endrst
470
+ template <int NUM_CHANNELS,
471
+ int NUM_ACTIVE_CHANNELS,
472
+ typename SampleIteratorT,
473
+ typename CounterT,
474
+ typename LevelT,
475
+ typename OffsetT>
476
+ CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven(
477
+ void* d_temp_storage,
478
+ size_t& temp_storage_bytes,
479
+ SampleIteratorT d_samples,
480
+ ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
481
+ ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
482
+ ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
483
+ ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
484
+ OffsetT num_pixels,
485
+ cudaStream_t stream = 0)
486
+ {
487
+ /// The sample value type of the input iterator
488
+ using SampleT = cub::detail::it_value_t<SampleIteratorT>;
489
+
490
+ return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
491
+ d_temp_storage,
492
+ temp_storage_bytes,
493
+ d_samples,
494
+ d_histogram,
495
+ num_levels,
496
+ lower_level,
497
+ upper_level,
498
+ num_pixels,
499
+ static_cast<OffsetT>(1),
500
+ sizeof(SampleT) * NUM_CHANNELS * num_pixels,
501
+ stream);
502
+ }
503
+
504
+ private:
505
+ template <size_t N, typename T>
506
+ _CCCL_HOST_DEVICE static auto to_array(T* ptr)
507
+ {
508
+ ::cuda::std::array<::cuda::std::remove_const_t<T>, N> a{};
509
+ ::cuda::std::copy(ptr, ptr + N, a.begin());
510
+ return a;
511
+ }
512
+
513
+ public:
514
+ //! Deprecate [Since 3.0]
515
+ template <int NUM_CHANNELS,
516
+ int NUM_ACTIVE_CHANNELS,
517
+ typename SampleIteratorT,
518
+ typename CounterT,
519
+ typename LevelT,
520
+ typename OffsetT>
521
+ CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
522
+ MultiHistogramEven(
523
+ void* d_temp_storage,
524
+ size_t& temp_storage_bytes,
525
+ SampleIteratorT d_samples,
526
+ CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
527
+ const int num_levels[NUM_ACTIVE_CHANNELS],
528
+ const LevelT lower_level[NUM_ACTIVE_CHANNELS],
529
+ const LevelT upper_level[NUM_ACTIVE_CHANNELS],
530
+ OffsetT num_pixels,
531
+ cudaStream_t stream = 0)
532
+ {
533
+ /// The sample value type of the input iterator
534
+ using SampleT = cub::detail::it_value_t<SampleIteratorT>;
535
+ return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
536
+ d_temp_storage,
537
+ temp_storage_bytes,
538
+ d_samples,
539
+ to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
540
+ to_array<NUM_ACTIVE_CHANNELS>(num_levels),
541
+ to_array<NUM_ACTIVE_CHANNELS>(lower_level),
542
+ to_array<NUM_ACTIVE_CHANNELS>(upper_level),
543
+ num_pixels,
544
+ stream);
545
+ }
546
+
547
+ //! @rst
548
+ //! Computes per-channel intensity histograms from a sequence of
549
+ //! multi-channel "pixel" data samples using equal-width bins.
550
+ //!
551
+ //! - The input is a sequence of *pixel* structures, where each pixel
552
+ //! comprises a record of ``NUM_CHANNELS`` consecutive data samples (e.g., an *RGBA* pixel).
553
+ //! - ``NUM_CHANNELS`` can be up to 4.
554
+ //! - Of the ``NUM_CHANNELS`` specified, the function will only compute
555
+ //! histograms for the first ``NUM_ACTIVE_CHANNELS`` (e.g., only *RGB*
556
+ //! histograms from *RGBA* pixel samples).
557
+ //! - A two-dimensional *region of interest* within ``d_samples`` can be
558
+ //! specified using the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
559
+ //! - The row stride must be a whole multiple of the sample data type
560
+ //! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
561
+ //! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
562
+ //! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
563
+ //! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
564
+ //! - If the common type of sample and level is of integral type, the bin for a sample is
565
+ //! computed as ``(sample - lower_level[i]) * (num_levels - 1) / (upper_level[i] - lower_level[i])``,
566
+ //! round down to the nearest whole number. To protect against potential overflows, if, for any channel ``i``,
567
+ //! the product ``(upper_level[i] - lower_level[i]) * (num_levels[i] - 1)`` exceeds the number representable by
568
+ //! an ``uint64_t``, the cuda error ``cudaErrorInvalidValue`` is returned.
569
+ //! If the common type is 128 bits wide, bin computation will use 128-bit arithmetic and ``cudaErrorInvalidValue``
570
+ //! will only be returned if bin computation would overflow for 128-bit arithmetic.
571
+ //! - For a given row ``r`` in ``[0, num_rows)``, and sample ``s`` in
572
+ //! ``[0, num_row_pixels)``, let
573
+ //! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)``,
574
+ //! ``sample_begin = row_begin + s * NUM_CHANNELS``, and
575
+ //! ``sample_end = sample_begin + NUM_ACTIVE_CHANNELS``. For a given channel ``c`` in
576
+ //! ``[0, NUM_ACTIVE_CHANNELS)``, the ranges
577
+ //! ``[sample_begin, sample_end)`` and
578
+ //! ``[d_histogram[c], d_histogram[c] + num_levels[c] - 1)`` shall not overlap in any way.
579
+ //! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT
580
+ //! and SampleT must be valid arithmetic types. The common type must be
581
+ //! convertible to ``int`` and trivially copyable.
582
+ //! - @devicestorage
583
+ //!
584
+ //! Snippet
585
+ //! +++++++
586
+ //!
587
+ //! The code snippet below illustrates the computation of three 256-bin
588
+ //! *RGB* histograms from a 2x3 region of interest of within a flattened 2x4
589
+ //! array of quad-channel *RGBA* pixels (8 bits per channel per pixel).
590
+ //!
591
+ //! .. code-block:: c++
592
+ //!
593
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
594
+ //!
595
+ //! // Declare, allocate, and initialize device-accessible pointers for input
596
+ //! // samples and output histograms
597
+ //! int num_row_pixels; // e.g., 3
598
+ //! int num_rows; // e.g., 2
599
+ //! size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS
600
+ //! unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), (-, -, -, -),
601
+ //! // (0, 6, 7, 5), (3, 0, 2, 6), (1, 1, 1, 1), (-, -, -, -)]
602
+ //! int* d_histogram[3]; // e.g., three device pointers to three device buffers,
603
+ //! // each allocated with 256 integer counters
604
+ //! int num_levels[3]; // e.g., {257, 257, 257};
605
+ //! unsigned int lower_level[3]; // e.g., {0, 0, 0};
606
+ //! unsigned int upper_level[3]; // e.g., {256, 256, 256};
607
+ //! ...
608
+ //!
609
+ //! // Determine temporary device storage requirements
610
+ //! void* d_temp_storage = nullptr;
611
+ //! size_t temp_storage_bytes = 0;
612
+ //! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
613
+ //! d_temp_storage, temp_storage_bytes,
614
+ //! d_samples, d_histogram, num_levels, lower_level, upper_level,
615
+ //! num_row_pixels, num_rows, row_stride_bytes);
616
+ //!
617
+ //! // Allocate temporary storage
618
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
619
+ //!
620
+ //! // Compute histograms
621
+ //! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
622
+ //! d_temp_storage, temp_storage_bytes,
623
+ //! d_samples, d_histogram, num_levels, lower_level, upper_level,
624
+ //! num_row_pixels, num_rows, row_stride_bytes);
625
+ //!
626
+ //! // d_histogram <-- [ [1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0],
627
+ //! // [0, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0],
628
+ //! // [0, 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ]
629
+ //!
630
+ //! @endrst
631
+ //!
632
+ //! @tparam NUM_CHANNELS
633
+ //! Number of channels interleaved in the input data (may be greater than
634
+ //! the number of channels being actively histogrammed)
635
+ //!
636
+ //! @tparam NUM_ACTIVE_CHANNELS
637
+ //! **[inferred]** Number of channels actively being histogrammed
638
+ //!
639
+ //! @tparam SampleIteratorT
640
+ //! **[inferred]** Random-access input iterator type for reading input
641
+ //! samples. @iterator
642
+ //!
643
+ //! @tparam CounterT
644
+ //! **[inferred]** Integer type for histogram bin counters
645
+ //!
646
+ //! @tparam LevelT
647
+ //! **[inferred]** Type for specifying boundaries (levels)
648
+ //!
649
+ //! @tparam OffsetT
650
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
651
+ //! pointer differences, etc. @offset_size1
652
+ //!
653
+ //! @param[in] d_temp_storage
654
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
655
+ //! required allocation size is written to `temp_storage_bytes` and no
656
+ //! work is done.
657
+ //!
658
+ //! @param[in,out] temp_storage_bytes
659
+ //! Reference to size in bytes of `d_temp_storage` allocation
660
+ //!
661
+ //! @param[in] d_samples
662
+ //! The pointer to the multi-channel input sequence of data samples. The
663
+ //! samples from different channels are assumed to be interleaved (e.g.,
664
+ //! an array of 32-bit pixels where each pixel consists of four
665
+ //! *RGBA* 8-bit samples).
666
+ //!
667
+ //! @param[out] d_histogram
668
+ //! @rst
669
+ //! The pointers to the histogram counter output arrays, one for each
670
+ //! active channel. For channel\ :sub:`i`, the allocation length
671
+ //! of ``d_histogram[i]`` should be ``num_levels[i] - 1``.
672
+ //! @endrst
673
+ //!
674
+ //! @param[in] num_levels
675
+ //! @rst
676
+ //! The number of boundaries (levels) for delineating histogram samples in each active channel.
677
+ //! Implies that the number of bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
678
+ //! @endrst
679
+ //!
680
+ //! @param[in] lower_level
681
+ //! The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
682
+ //!
683
+ //! @param[in] upper_level
684
+ //! The upper sample value bound (exclusive) for the highest histogram bin in each active channel.
685
+ //!
686
+ //! @param[in] num_row_pixels
687
+ //! The number of multi-channel pixels per row in the region of interest
688
+ //!
689
+ //! @param[in] num_rows
690
+ //! The number of rows in the region of interest
691
+ //!
692
+ //! @param[in] row_stride_bytes
693
+ //! The number of bytes between starts of consecutive rows in the region of
694
+ //! interest
695
+ //!
696
+ //! @param[in] stream
697
+ //! @rst
698
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
699
+ //! @endrst
700
+ template <int NUM_CHANNELS,
701
+ int NUM_ACTIVE_CHANNELS,
702
+ typename SampleIteratorT,
703
+ typename CounterT,
704
+ typename LevelT,
705
+ typename OffsetT>
706
+ CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven(
707
+ void* d_temp_storage,
708
+ size_t& temp_storage_bytes,
709
+ SampleIteratorT d_samples,
710
+ ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
711
+ ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
712
+ ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
713
+ ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
714
+ OffsetT num_row_pixels,
715
+ OffsetT num_rows,
716
+ size_t row_stride_bytes,
717
+ cudaStream_t stream = 0)
718
+ {
719
+ _CCCL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceHistogram::MultiHistogramEven");
720
+
721
+ /// The sample value type of the input iterator
722
+ using SampleT = cub::detail::it_value_t<SampleIteratorT>;
723
+ ::cuda::std::bool_constant<sizeof(SampleT) == 1> is_byte_sample;
724
+
725
+ if constexpr (sizeof(OffsetT) > sizeof(int))
726
+ {
727
+ if ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) INT_MAX)
728
+ {
729
+ // Down-convert OffsetT data type
730
+ return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchEven(
731
+ d_temp_storage,
732
+ temp_storage_bytes,
733
+ d_samples,
734
+ d_histogram,
735
+ num_levels,
736
+ lower_level,
737
+ upper_level,
738
+ (int) num_row_pixels,
739
+ (int) num_rows,
740
+ (int) (row_stride_bytes / sizeof(SampleT)),
741
+ stream,
742
+ is_byte_sample);
743
+ }
744
+ }
745
+
746
+ return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchEven(
747
+ d_temp_storage,
748
+ temp_storage_bytes,
749
+ d_samples,
750
+ d_histogram,
751
+ num_levels,
752
+ lower_level,
753
+ upper_level,
754
+ num_row_pixels,
755
+ num_rows,
756
+ (OffsetT) (row_stride_bytes / sizeof(SampleT)),
757
+ stream,
758
+ is_byte_sample);
759
+ }
760
+
761
+ //! Deprecate [Since 3.0]
762
+ template <int NUM_CHANNELS,
763
+ int NUM_ACTIVE_CHANNELS,
764
+ typename SampleIteratorT,
765
+ typename CounterT,
766
+ typename LevelT,
767
+ typename OffsetT>
768
+ CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
769
+ MultiHistogramEven(
770
+ void* d_temp_storage,
771
+ size_t& temp_storage_bytes,
772
+ SampleIteratorT d_samples,
773
+ CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
774
+ const int num_levels[NUM_ACTIVE_CHANNELS],
775
+ const LevelT lower_level[NUM_ACTIVE_CHANNELS],
776
+ const LevelT upper_level[NUM_ACTIVE_CHANNELS],
777
+ OffsetT num_row_pixels,
778
+ OffsetT num_rows,
779
+ size_t row_stride_bytes,
780
+ cudaStream_t stream = 0)
781
+ {
782
+ return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
783
+ d_temp_storage,
784
+ temp_storage_bytes,
785
+ d_samples,
786
+ to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
787
+ to_array<NUM_ACTIVE_CHANNELS>(num_levels),
788
+ to_array<NUM_ACTIVE_CHANNELS>(lower_level),
789
+ to_array<NUM_ACTIVE_CHANNELS>(upper_level),
790
+ num_row_pixels,
791
+ num_rows,
792
+ row_stride_bytes,
793
+ stream);
794
+ }
795
+
796
+ //! @} end member group
797
+ //! @name Custom bin ranges
798
+ //! @{
799
+
800
+ //! @rst
801
+ //! Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.
802
+ //!
803
+ //! - The number of histogram bins is (``num_levels - 1``)
804
+ //! - The value range for bin\ :sub:`i` is ``[level[i], level[i+1])``
805
+ //! - The range ``[d_histogram, d_histogram + num_levels - 1)`` shall not
806
+ //! overlap ``[d_samples, d_samples + num_samples)`` nor
807
+ //! ``[d_levels, d_levels + num_levels)`` in any way. The ranges
808
+ //! ``[d_levels, d_levels + num_levels)`` and
809
+ //! ``[d_samples, d_samples + num_samples)`` may overlap.
810
+ //! - @devicestorage
811
+ //!
812
+ //! Snippet
813
+ //! +++++++
814
+ //!
815
+ //! The code snippet below illustrates the computation of an six-bin histogram
816
+ //! from a sequence of float samples
817
+ //!
818
+ //! .. code-block:: c++
819
+ //!
820
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
821
+ //!
822
+ //! // Declare, allocate, and initialize device-accessible pointers for input
823
+ //! // samples and output histogram
824
+ //! int num_samples; // e.g., 10
825
+ //! float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5]
826
+ //! int* d_histogram; // e.g., [ -, -, -, -, -, -]
827
+ //! int num_levels // e.g., 7 (seven level boundaries for six bins)
828
+ //! float* d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0]
829
+ //! ...
830
+ //!
831
+ //! // Determine temporary device storage requirements
832
+ //! void* d_temp_storage = nullptr;
833
+ //! size_t temp_storage_bytes = 0;
834
+ //! cub::DeviceHistogram::HistogramRange(
835
+ //! d_temp_storage, temp_storage_bytes,
836
+ //! d_samples, d_histogram, num_levels, d_levels, num_samples);
837
+ //!
838
+ //! // Allocate temporary storage
839
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
840
+ //!
841
+ //! // Compute histograms
842
+ //! cub::DeviceHistogram::HistogramRange(
843
+ //! d_temp_storage, temp_storage_bytes,
844
+ //! d_samples, d_histogram, num_levels, d_levels, num_samples);
845
+ //!
846
+ //! // d_histogram <-- [1, 5, 0, 3, 0, 0];
847
+ //!
848
+ //! @endrst
849
+ //!
850
+ //! @tparam SampleIteratorT
851
+ //! **[inferred]** Random-access input iterator type for reading
852
+ //! input samples. @iterator
853
+ //!
854
+ //! @tparam CounterT
855
+ //! **[inferred]** Integer type for histogram bin counters
856
+ //!
857
+ //! @tparam LevelT
858
+ //! **[inferred]** Type for specifying boundaries (levels)
859
+ //!
860
+ //! @tparam OffsetT
861
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
862
+ //! pointer differences, etc. @offset_size1
863
+ //!
864
+ //! @param[in] d_temp_storage
865
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
866
+ //! required allocation size is written to `temp_storage_bytes` and no work
867
+ //! is done.
868
+ //!
869
+ //! @param[in,out] temp_storage_bytes
870
+ //! Reference to size in bytes of `d_temp_storage` allocation
871
+ //!
872
+ //! @param[in] d_samples
873
+ //! The pointer to the input sequence of data samples.
874
+ //!
875
+ //! @param[out] d_histogram
876
+ //! The pointer to the histogram counter output array of length
877
+ //! `num_levels - 1`.
878
+ //!
879
+ //! @param[in] num_levels
880
+ //! The number of boundaries (levels) for delineating histogram samples.
881
+ //! Implies that the number of bins is `num_levels - 1`.
882
+ //!
883
+ //! @param[in] d_levels
884
+ //! The pointer to the array of boundaries (levels). Bin ranges are defined
885
+ //! by consecutive boundary pairings: lower sample value boundaries are
886
+ //! inclusive and upper sample value boundaries are exclusive.
887
+ //!
888
+ //! @param[in] num_samples
889
+ //! The number of data samples per row in the region of interest
890
+ //!
891
+ //! @param[in] stream
892
+ //! @rst
893
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
894
+ //! @endrst
895
+ template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
896
+ CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange(
897
+ void* d_temp_storage,
898
+ size_t& temp_storage_bytes,
899
+ SampleIteratorT d_samples,
900
+ CounterT* d_histogram,
901
+ int num_levels,
902
+ const LevelT* d_levels,
903
+ OffsetT num_samples,
904
+ cudaStream_t stream = 0)
905
+ {
906
+ /// The sample value type of the input iterator
907
+ using SampleT = cub::detail::it_value_t<SampleIteratorT>;
908
+ return MultiHistogramRange<1, 1>(
909
+ d_temp_storage,
910
+ temp_storage_bytes,
911
+ d_samples,
912
+ ::cuda::std::array{d_histogram},
913
+ ::cuda::std::array{num_levels},
914
+ ::cuda::std::array{d_levels},
915
+ num_samples,
916
+ (OffsetT) 1,
917
+ (size_t) (sizeof(SampleT) * num_samples),
918
+ stream);
919
+ }
920
+
921
+ //! @rst
922
+ //! Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.
923
+ //!
924
+ //! - A two-dimensional *region of interest* within ``d_samples`` can be
925
+ //! specified using the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
926
+ //! - The row stride must be a whole multiple of the sample data type
927
+ //! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
928
+ //! - The number of histogram bins is (``num_levels - 1``)
929
+ //! - The value range for bin\ :sub:`i` is ``[level[i], level[i+1])``
930
+ //! - For a given row ``r`` in ``[0, num_rows)``, let
931
+ //! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)`` and
932
+ //! ``row_end = row_begin + num_row_samples``. The range
933
+ //! ``[d_histogram, d_histogram + num_levels - 1)`` shall not overlap
934
+ //! ``[row_begin, row_end)`` nor ``[d_levels, d_levels + num_levels)``.
935
+ //! The ranges ``[d_levels, d_levels + num_levels)`` and ``[row_begin, row_end)`` may overlap.
936
+ //! - @devicestorage
937
+ //!
938
+ //! Snippet
939
+ //! +++++++
940
+ //!
941
+ //! The code snippet below illustrates the computation of a six-bin histogram
942
+ //! from a 2x5 region of interest within a flattened 2x7 array of float samples.
943
+ //!
944
+ //! .. code-block:: c++
945
+ //!
946
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
947
+ //!
948
+ //! // Declare, allocate, and initialize device-accessible pointers for input samples and
949
+ //! // output histogram
950
+ //! int num_row_samples; // e.g., 5
951
+ //! int num_rows; // e.g., 2;
952
+ //! int row_stride_bytes; // e.g., 7 * sizeof(float)
953
+ //! float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -,
954
+ //! // 0.3, 2.9, 2.0, 6.1, 999.5, -, -]
955
+ //! int* d_histogram; // e.g., [ -, -, -, -, -, -]
956
+ //! int num_levels // e.g., 7 (seven level boundaries for six bins)
957
+ //! float *d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0]
958
+ //! ...
959
+ //!
960
+ //! // Determine temporary device storage requirements
961
+ //! void* d_temp_storage = nullptr;
962
+ //! size_t temp_storage_bytes = 0;
963
+ //! cub::DeviceHistogram::HistogramRange(
964
+ //! d_temp_storage, temp_storage_bytes,
965
+ //! d_samples, d_histogram, num_levels, d_levels,
966
+ //! num_row_samples, num_rows, row_stride_bytes);
967
+ //!
968
+ //! // Allocate temporary storage
969
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
970
+ //!
971
+ //! // Compute histograms
972
+ //! cub::DeviceHistogram::HistogramRange(
973
+ //! d_temp_storage, temp_storage_bytes,
974
+ //! d_samples, d_histogram, num_levels, d_levels,
975
+ //! num_row_samples, num_rows, row_stride_bytes);
976
+ //!
977
+ //! // d_histogram <-- [1, 5, 0, 3, 0, 0];
978
+ //!
979
+ //! @endrst
980
+ //!
981
+ //! @tparam SampleIteratorT
982
+ //! **[inferred]** Random-access input iterator type for reading
983
+ //! input samples. @iterator
984
+ //!
985
+ //! @tparam CounterT
986
+ //! **[inferred]** Integer type for histogram bin counters
987
+ //!
988
+ //! @tparam LevelT
989
+ //! **[inferred]** Type for specifying boundaries (levels)
990
+ //!
991
+ //! @tparam OffsetT
992
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
993
+ //! pointer differences, etc. @offset_size1
994
+ //!
995
+ //! @param[in] d_temp_storage
996
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
997
+ //! required allocation size is written to `temp_storage_bytes` and no
998
+ //! work is done.
999
+ //!
1000
+ //! @param[in,out] temp_storage_bytes
1001
+ //! Reference to size in bytes of `d_temp_storage` allocation
1002
+ //!
1003
+ //! @param[in] d_samples
1004
+ //! The pointer to the input sequence of data samples.
1005
+ //!
1006
+ //! @param[out] d_histogram
1007
+ //! The pointer to the histogram counter output array of length
1008
+ //! `num_levels - 1`.
1009
+ //!
1010
+ //! @param[in] num_levels
1011
+ //! The number of boundaries (levels) for delineating histogram samples.
1012
+ //! Implies that the number of bins is `num_levels - 1`.
1013
+ //!
1014
+ //! @param[in] d_levels
1015
+ //! The pointer to the array of boundaries (levels). Bin ranges are defined
1016
+ //! by consecutive boundary pairings: lower sample value boundaries are
1017
+ //! inclusive and upper sample value boundaries are exclusive.
1018
+ //!
1019
+ //! @param[in] num_row_samples
1020
+ //! The number of data samples per row in the region of interest
1021
+ //!
1022
+ //! @param[in] num_rows
1023
+ //! The number of rows in the region of interest
1024
+ //!
1025
+ //! @param[in] row_stride_bytes
1026
+ //! The number of bytes between starts of consecutive rows in the region
1027
+ //! of interest
1028
+ //!
1029
+ //! @param[in] stream
1030
+ //! @rst
1031
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
1032
+ //! @endrst
1033
+ template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
1034
+ CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange(
1035
+ void* d_temp_storage,
1036
+ size_t& temp_storage_bytes,
1037
+ SampleIteratorT d_samples,
1038
+ CounterT* d_histogram,
1039
+ int num_levels,
1040
+ const LevelT* d_levels,
1041
+ OffsetT num_row_samples,
1042
+ OffsetT num_rows,
1043
+ size_t row_stride_bytes,
1044
+ cudaStream_t stream = 0)
1045
+ {
1046
+ return MultiHistogramRange<1, 1>(
1047
+ d_temp_storage,
1048
+ temp_storage_bytes,
1049
+ d_samples,
1050
+ ::cuda::std::array{d_histogram},
1051
+ ::cuda::std::array{num_levels},
1052
+ ::cuda::std::array{d_levels},
1053
+ num_row_samples,
1054
+ num_rows,
1055
+ row_stride_bytes,
1056
+ stream);
1057
+ }
1058
+
1059
+ //! @rst
1060
+ //! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples
1061
+ //! using the specified bin boundary levels.
1062
+ //!
1063
+ //! - The input is a sequence of *pixel* structures, where each pixel
1064
+ //! comprises a record of ``NUM_CHANNELS`` consecutive data samples (e.g., an *RGBA* pixel).
1065
+ //! - ``NUM_CHANNELS`` can be up to 4.
1066
+ //! - Of the ``NUM_CHANNELS`` specified, the function will only compute
1067
+ //! histograms for the first ``NUM_ACTIVE_CHANNELS`` (e.g., *RGB* histograms from *RGBA* pixel samples).
1068
+ //! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
1069
+ //! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
1070
+ //! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
1071
+ //! - For given channels ``c1`` and ``c2`` in ``[0, NUM_ACTIVE_CHANNELS)``, the
1072
+ //! range ``[d_histogram[c1], d_histogram[c1] + num_levels[c1] - 1)`` shall
1073
+ //! not overlap ``[d_samples, d_samples + NUM_CHANNELS * num_pixels)`` nor
1074
+ //! ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` in any way.
1075
+ //! The ranges ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` and
1076
+ //! ``[d_samples, d_samples + NUM_CHANNELS * num_pixels)`` may overlap.
1077
+ //! - @devicestorage
1078
+ //!
1079
+ //! Snippet
1080
+ //! +++++++
1081
+ //!
1082
+ //! The code snippet below illustrates the computation of three 4-bin *RGB*
1083
+ //! histograms from a quad-channel sequence of *RGBA* pixels
1084
+ //! (8 bits per channel per pixel)
1085
+ //!
1086
+ //! .. code-block:: c++
1087
+ //!
1088
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
1089
+ //!
1090
+ //! // Declare, allocate, and initialize device-accessible pointers for
1091
+ //! // input samples and output histograms
1092
+ //! int num_pixels; // e.g., 5
1093
+ //! unsigned char *d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(7, 0, 6, 2),
1094
+ //! // (0, 6, 7, 5),(3, 0, 2, 6)]
1095
+ //! unsigned int *d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]];
1096
+ //! int num_levels[3]; // e.g., {5, 5, 5};
1097
+ //! unsigned int *d_levels[3]; // e.g., [ [0, 2, 4, 6, 8],
1098
+ //! // [0, 2, 4, 6, 8],
1099
+ //! // [0, 2, 4, 6, 8] ];
1100
+ //! ...
1101
+ //!
1102
+ //! // Determine temporary device storage requirements
1103
+ //! void* d_temp_storage = nullptr;
1104
+ //! size_t temp_storage_bytes = 0;
1105
+ //! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
1106
+ //! d_temp_storage, temp_storage_bytes,
1107
+ //! d_samples, d_histogram, num_levels, d_levels, num_pixels);
1108
+ //!
1109
+ //! // Allocate temporary storage
1110
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
1111
+ //!
1112
+ //! // Compute histograms
1113
+ //! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
1114
+ //! d_temp_storage, temp_storage_bytes,
1115
+ //! d_samples, d_histogram, num_levels, d_levels, num_pixels);
1116
+ //!
1117
+ //! // d_histogram <-- [ [1, 3, 0, 1],
1118
+ //! // [3, 0, 0, 2],
1119
+ //! // [0, 2, 0, 3] ]
1120
+ //!
1121
+ //! @endrst
1122
+ //!
1123
+ //! @tparam NUM_CHANNELS
1124
+ //! Number of channels interleaved in the input data (may be greater than
1125
+ //! the number of channels being actively histogrammed)
1126
+ //!
1127
+ //! @tparam NUM_ACTIVE_CHANNELS
1128
+ //! **[inferred]** Number of channels actively being histogrammed
1129
+ //!
1130
+ //! @tparam SampleIteratorT
1131
+ //! **[inferred]** Random-access input iterator type for reading
1132
+ //! input samples. @iterator
1133
+ //!
1134
+ //! @tparam CounterT
1135
+ //! **[inferred]** Integer type for histogram bin counters
1136
+ //!
1137
+ //! @tparam LevelT
1138
+ //! **[inferred]** Type for specifying boundaries (levels)
1139
+ //!
1140
+ //! @tparam OffsetT
1141
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
1142
+ //! pointer differences, etc. @offset_size1
1143
+ //!
1144
+ //! @param[in] d_temp_storage
1145
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
1146
+ //! required allocation size is written to `temp_storage_bytes` and no
1147
+ //! work is done.
1148
+ //!
1149
+ //! @param[in,out] temp_storage_bytes
1150
+ //! Reference to size in bytes of `d_temp_storage` allocation
1151
+ //!
1152
+ //! @param[in] d_samples
1153
+ //! The pointer to the multi-channel input sequence of data samples.
1154
+ //! The samples from different channels are assumed to be interleaved (e.g.,
1155
+ //! an array of 32-bit pixels where each pixel consists of four *RGBA*
1156
+ //! 8-bit samples).
1157
+ //!
1158
+ //! @param[out] d_histogram
1159
+ //! @rst
1160
+ //! The pointers to the histogram counter output arrays, one for each active
1161
+ //! channel. For channel\ :sub:`i`, the allocation length of
1162
+ //! ``d_histogram[i]`` should be ``num_levels[i] - 1``.
1163
+ //! @endrst
1164
+ //!
1165
+ //! @param[in] num_levels
1166
+ //! @rst
1167
+ //! The number of boundaries (levels) for delineating histogram samples in
1168
+ //! each active channel. Implies that the number of bins for
1169
+ //! channel\ :sub:`i` is ``num_levels[i] - 1``.
1170
+ //! @endrst
1171
+ //!
1172
+ //! @param[in] d_levels
1173
+ //! The pointers to the arrays of boundaries (levels), one for each active
1174
+ //! channel. Bin ranges are defined by consecutive boundary pairings: lower
1175
+ //! sample value boundaries are inclusive and upper sample value boundaries
1176
+ //! are exclusive.
1177
+ //!
1178
+ //! @param[in] num_pixels
1179
+ //! The number of multi-channel pixels (i.e., the length of `d_samples / NUM_CHANNELS`)
1180
+ //!
1181
+ //! @param[in] stream
1182
+ //! @rst
1183
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
1184
+ //! @endrst
1185
+ template <int NUM_CHANNELS,
1186
+ int NUM_ACTIVE_CHANNELS,
1187
+ typename SampleIteratorT,
1188
+ typename CounterT,
1189
+ typename LevelT,
1190
+ typename OffsetT>
1191
+ CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange(
1192
+ void* d_temp_storage,
1193
+ size_t& temp_storage_bytes,
1194
+ SampleIteratorT d_samples,
1195
+ ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
1196
+ ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
1197
+ ::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
1198
+ OffsetT num_pixels,
1199
+ cudaStream_t stream = 0)
1200
+ {
1201
+ /// The sample value type of the input iterator
1202
+ using SampleT = cub::detail::it_value_t<SampleIteratorT>;
1203
+
1204
+ return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
1205
+ d_temp_storage,
1206
+ temp_storage_bytes,
1207
+ d_samples,
1208
+ d_histogram,
1209
+ num_levels,
1210
+ d_levels,
1211
+ num_pixels,
1212
+ (OffsetT) 1,
1213
+ (size_t) (sizeof(SampleT) * NUM_CHANNELS * num_pixels),
1214
+ stream);
1215
+ }
1216
+
1217
+ //! Deprecate [Since 3.0]
1218
+ template <int NUM_CHANNELS,
1219
+ int NUM_ACTIVE_CHANNELS,
1220
+ typename SampleIteratorT,
1221
+ typename CounterT,
1222
+ typename LevelT,
1223
+ typename OffsetT>
1224
+ CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
1225
+ MultiHistogramRange(
1226
+ void* d_temp_storage,
1227
+ size_t& temp_storage_bytes,
1228
+ SampleIteratorT d_samples,
1229
+ CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
1230
+ const int num_levels[NUM_ACTIVE_CHANNELS],
1231
+ const LevelT* const d_levels[NUM_ACTIVE_CHANNELS],
1232
+ OffsetT num_pixels,
1233
+ cudaStream_t stream = 0)
1234
+ {
1235
+ return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
1236
+ d_temp_storage,
1237
+ temp_storage_bytes,
1238
+ d_samples,
1239
+ to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
1240
+ to_array<NUM_ACTIVE_CHANNELS>(num_levels),
1241
+ to_array<NUM_ACTIVE_CHANNELS>(d_levels),
1242
+ num_pixels,
1243
+ stream);
1244
+ }
1245
+
1246
+ //! @rst
1247
+ //! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using
1248
+ //! the specified bin boundary levels.
1249
+ //!
1250
+ //! - The input is a sequence of *pixel* structures, where each pixel comprises
1251
+ //! a record of ``NUM_CHANNELS`` consecutive data samples (e.g., an *RGBA* pixel).
1252
+ //! - ``NUM_CHANNELS`` can be up to 4.
1253
+ //! - Of the ``NUM_CHANNELS`` specified, the function will only compute
1254
+ //! histograms for the first ``NUM_ACTIVE_CHANNELS`` (e.g., *RGB* histograms from *RGBA* pixel samples).
1255
+ //! - A two-dimensional *region of interest* within ``d_samples`` can be
1256
+ //! specified using the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
1257
+ //! - The row stride must be a whole multiple of the sample data type
1258
+ //! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
1259
+ //! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
1260
+ //! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
1261
+ //! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
1262
+ //! - For a given row ``r`` in ``[0, num_rows)``, and sample ``s`` in ``[0, num_row_pixels)``, let
1263
+ //! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)``,
1264
+ //! ``sample_begin = row_begin + s * NUM_CHANNELS``, and
1265
+ //! ``sample_end = sample_begin + NUM_ACTIVE_CHANNELS``. For given channels
1266
+ //! ``c1`` and ``c2`` in ``[0, NUM_ACTIVE_CHANNELS)``, the range
1267
+ //! ``[d_histogram[c1], d_histogram[c1] + num_levels[c1] - 1)`` shall not overlap
1268
+ //! ``[sample_begin, sample_end)`` nor
1269
+ //! ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` in any way. The ranges
1270
+ //! ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` and
1271
+ //! ``[sample_begin, sample_end)`` may overlap.
1272
+ //! - @devicestorage
1273
+ //!
1274
+ //! Snippet
1275
+ //! +++++++
1276
+ //!
1277
+ //! The code snippet below illustrates the computation of three 4-bin *RGB*
1278
+ //! histograms from a 2x3 region of interest of within a flattened 2x4 array
1279
+ //! of quad-channel *RGBA* pixels (8 bits per channel per pixel).
1280
+ //!
1281
+ //!
1282
+ //! .. code-block:: c++
1283
+ //!
1284
+ //! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
1285
+ //!
1286
+ //! // Declare, allocate, and initialize device-accessible pointers for input
1287
+ //! // samples and output histograms
1288
+ //! int num_row_pixels; // e.g., 3
1289
+ //! int num_rows; // e.g., 2
1290
+ //! size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS
1291
+ //! unsigned char* d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(1, 1, 1, 1),(-, -, -, -),
1292
+ //! // (7, 0, 6, 2),(0, 6, 7, 5),(3, 0, 2, 6),(-, -, -, -)]
1293
+ //! int* d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]];
1294
+ //! int num_levels[3]; // e.g., {5, 5, 5};
1295
+ //! unsigned int* d_levels[3]; // e.g., [ [0, 2, 4, 6, 8],
1296
+ //! // [0, 2, 4, 6, 8],
1297
+ //! // [0, 2, 4, 6, 8] ];
1298
+ //! ...
1299
+ //!
1300
+ //! // Determine temporary device storage requirements
1301
+ //! void* d_temp_storage = nullptr;
1302
+ //! size_t temp_storage_bytes = 0;
1303
+ //! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
1304
+ //! d_temp_storage, temp_storage_bytes,
1305
+ //! d_samples, d_histogram, num_levels, d_levels,
1306
+ //! num_row_pixels, num_rows, row_stride_bytes);
1307
+ //!
1308
+ //! // Allocate temporary storage
1309
+ //! cudaMalloc(&d_temp_storage, temp_storage_bytes);
1310
+ //!
1311
+ //! // Compute histograms
1312
+ //! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
1313
+ //! d_temp_storage, temp_storage_bytes,
1314
+ //! d_samples, d_histogram, num_levels,
1315
+ //! d_levels, num_row_pixels, num_rows, row_stride_bytes);
1316
+ //!
1317
+ //! // d_histogram <-- [ [2, 3, 0, 1],
1318
+ //! // [3, 0, 0, 2],
1319
+ //! // [1, 2, 0, 3] ]
1320
+ //!
1321
+ //! @endrst
1322
+ //!
1323
+ //! @tparam NUM_CHANNELS
1324
+ //! Number of channels interleaved in the input data (may be greater than
1325
+ //! the number of channels being actively histogrammed)
1326
+ //!
1327
+ //! @tparam NUM_ACTIVE_CHANNELS
1328
+ //! **[inferred]** Number of channels actively being histogrammed
1329
+ //!
1330
+ //! @tparam SampleIteratorT
1331
+ //! **[inferred]** Random-access input iterator type for reading input
1332
+ //! samples. @iterator
1333
+ //!
1334
+ //! @tparam CounterT
1335
+ //! **[inferred]** Integer type for histogram bin counters
1336
+ //!
1337
+ //! @tparam LevelT
1338
+ //! **[inferred]** Type for specifying boundaries (levels)
1339
+ //!
1340
+ //! @tparam OffsetT
1341
+ //! **[inferred]** Signed integer type for sequence offsets, list lengths,
1342
+ //! pointer differences, etc. @offset_size1
1343
+ //!
1344
+ //! @param[in] d_temp_storage
1345
+ //! Device-accessible allocation of temporary storage. When `nullptr`, the
1346
+ //! required allocation size is written to `temp_storage_bytes` and no work is done.
1347
+ //!
1348
+ //! @param[in,out] temp_storage_bytes
1349
+ //! Reference to size in bytes of `d_temp_storage` allocation
1350
+ //!
1351
+ //! @param[in] d_samples
1352
+ //! The pointer to the multi-channel input sequence of data samples. The
1353
+ //! samples from different channels are assumed to be interleaved (e.g., an
1354
+ //! array of 32-bit pixels where each pixel consists of four
1355
+ //! *RGBA* 8-bit samples).
1356
+ //!
1357
+ //! @param[out] d_histogram
1358
+ //! @rst
1359
+ //! The pointers to the histogram counter output arrays, one for each active
1360
+ //! channel. For channel\ :sub:`i`, the allocation length of
1361
+ //! ``d_histogram[i]`` should be ``num_levels[i] - 1``.
1362
+ //! @endrst
1363
+ //!
1364
+ //! @param[in] num_levels
1365
+ //! @rst
1366
+ //! The number of boundaries (levels) for delineating histogram samples in
1367
+ //! each active channel. Implies that the number of bins for
1368
+ //! channel\ :sub:`i` is ``num_levels[i] - 1``.
1369
+ //! @endrst
1370
+ //!
1371
+ //! @param[in] d_levels
1372
+ //! The pointers to the arrays of boundaries (levels), one for each active
1373
+ //! channel. Bin ranges are defined by consecutive boundary pairings: lower
1374
+ //! sample value boundaries are inclusive and upper sample value boundaries
1375
+ //! are exclusive.
1376
+ //!
1377
+ //! @param[in] num_row_pixels
1378
+ //! The number of multi-channel pixels per row in the region of interest
1379
+ //!
1380
+ //! @param[in] num_rows
1381
+ //! The number of rows in the region of interest
1382
+ //!
1383
+ //! @param[in] row_stride_bytes
1384
+ //! The number of bytes between starts of consecutive rows in the
1385
+ //! region of interest
1386
+ //!
1387
+ //! @param[in] stream
1388
+ //! @rst
1389
+ //! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
1390
+ //! @endrst
1391
+ template <int NUM_CHANNELS,
1392
+ int NUM_ACTIVE_CHANNELS,
1393
+ typename SampleIteratorT,
1394
+ typename CounterT,
1395
+ typename LevelT,
1396
+ typename OffsetT>
1397
+ CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange(
1398
+ void* d_temp_storage,
1399
+ size_t& temp_storage_bytes,
1400
+ SampleIteratorT d_samples,
1401
+ ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
1402
+ ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
1403
+ ::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
1404
+ OffsetT num_row_pixels,
1405
+ OffsetT num_rows,
1406
+ size_t row_stride_bytes,
1407
+ cudaStream_t stream = 0)
1408
+ {
1409
+ _CCCL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceHistogram::MultiHistogramRange");
1410
+
1411
+ /// The sample value type of the input iterator
1412
+ using SampleT = cub::detail::it_value_t<SampleIteratorT>;
1413
+ ::cuda::std::bool_constant<sizeof(SampleT) == 1> is_byte_sample;
1414
+
1415
+ if constexpr (sizeof(OffsetT) > sizeof(int))
1416
+ {
1417
+ if ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) INT_MAX)
1418
+ {
1419
+ // Down-convert OffsetT data type
1420
+ return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchRange(
1421
+ d_temp_storage,
1422
+ temp_storage_bytes,
1423
+ d_samples,
1424
+ d_histogram,
1425
+ num_levels,
1426
+ d_levels,
1427
+ (int) num_row_pixels,
1428
+ (int) num_rows,
1429
+ (int) (row_stride_bytes / sizeof(SampleT)),
1430
+ stream,
1431
+ is_byte_sample);
1432
+ }
1433
+ }
1434
+
1435
+ return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchRange(
1436
+ d_temp_storage,
1437
+ temp_storage_bytes,
1438
+ d_samples,
1439
+ d_histogram,
1440
+ num_levels,
1441
+ d_levels,
1442
+ num_row_pixels,
1443
+ num_rows,
1444
+ (OffsetT) (row_stride_bytes / sizeof(SampleT)),
1445
+ stream,
1446
+ is_byte_sample);
1447
+ }
1448
+
1449
+ //! Deprecate [Since 3.0]
1450
+ template <int NUM_CHANNELS,
1451
+ int NUM_ACTIVE_CHANNELS,
1452
+ typename SampleIteratorT,
1453
+ typename CounterT,
1454
+ typename LevelT,
1455
+ typename OffsetT>
1456
+ CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
1457
+ MultiHistogramRange(
1458
+ void* d_temp_storage,
1459
+ size_t& temp_storage_bytes,
1460
+ SampleIteratorT d_samples,
1461
+ CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
1462
+ const int num_levels[NUM_ACTIVE_CHANNELS],
1463
+ const LevelT* const d_levels[NUM_ACTIVE_CHANNELS],
1464
+ OffsetT num_row_pixels,
1465
+ OffsetT num_rows,
1466
+ size_t row_stride_bytes,
1467
+ cudaStream_t stream = 0)
1468
+ {
1469
+ return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
1470
+ d_temp_storage,
1471
+ temp_storage_bytes,
1472
+ d_samples,
1473
+ to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
1474
+ to_array<NUM_ACTIVE_CHANNELS>(num_levels),
1475
+ to_array<NUM_ACTIVE_CHANNELS>(d_levels),
1476
+ num_row_pixels,
1477
+ num_rows,
1478
+ row_stride_bytes,
1479
+ stream);
1480
+ }
1481
+
1482
+ //@} end member group
1483
+ };
1484
+
1485
+ CUB_NAMESPACE_END